forked from prabhakar267/WA-Reader
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
60 lines (51 loc) · 2 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import io
import os
from dateutil.parser import parse as parse_datetime
TIMESTAMP_SPLITTERS = ["-", "]", ": "]
REMOVE_CHARACTERS = ["[", "]", "(", ")", "{", "}", '\u200e', '\ufeff']
def _get_parsed_line(input_line, persons_list):
timestamp_string = None
for timestamp_splitter in TIMESTAMP_SPLITTERS:
items = input_line.split(timestamp_splitter)
dirty_timestamp_string = items[0]
for remove_character in REMOVE_CHARACTERS:
dirty_timestamp_string = dirty_timestamp_string.replace(remove_character, "")
try:
timestamp_string = parse_datetime(dirty_timestamp_string, dayfirst=True)
line = timestamp_splitter.join(items[1:]).strip()
break
except ValueError:
continue
if not timestamp_string:
raise IndexError
items = line.split(":")
text_string = ":".join(items[1:]).strip()
if not text_string:
return None, persons_list
user_name = items[0]
if user_name and user_name not in persons_list:
persons_list.append(user_name)
obj = {
"t": timestamp_string,
"p": text_string,
"i": persons_list.index(user_name),
}
return obj, persons_list
def get_parsed_file(filepath):
if not os.path.exists(filepath):
raise Exception("File not uploaded properly. Try Again!")
parsed_chats = []
persons_list = []
with io.open(filepath, "r", encoding='utf-8') as f:
for line in f:
try:
parsed_line, persons_list = _get_parsed_line(line.strip(), persons_list)
if parsed_line:
parsed_chats.append(parsed_line)
except IndexError:
if len(parsed_chats) == 0:
raise Exception("It wasn't a valid text file or we were not able to convert it")
else:
# continution message from last message
parsed_chats[-1]["p"] += "\n{}".format(line.strip())
return parsed_chats, persons_list