-
Notifications
You must be signed in to change notification settings - Fork 1
/
dump.py
165 lines (112 loc) · 5.31 KB
/
dump.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
from pytg import sender
from pytg.exceptions import IllegalResponseException
import os
import logging
import yaml
import datetime
import time
logging.basicConfig(level=logging.INFO)
# Ugly hack: increate timeout for document reception
# Sub hack: use a list to assign a new value
tmp_f = list(sender.functions["load_document"])
tmp_f[sender.FUNC_TIME] = 3600.0
sender.functions["load_document"] = tuple(tmp_f)
x = sender.Sender("127.0.0.1", 4458)
def build_dialogs_list():
"""Return the list of all dialogs"""
base_list = []
res = True
while res:
res = x.dialog_list(100, len(base_list))
base_list += res
return base_list
def work_on_dialog(d):
"""Backup a particular dialog"""
logging.info("Working on %s %s %s", d['type'], d['print_name'], d['id'])
if not d['print_name']:
logging.error("%s has no print_name, cannot continue.", d['id'])
return
working_dir = "logs/by_ids/{}/".format(d['id'])
if not os.path.isdir(working_dir):
logging.debug("Creating working_dir %s", working_dir)
os.mkdir(working_dir)
symlink = "logs/{},{}".format(d['type'], d['print_name'].replace('/', ''))
if not os.path.exists(symlink):
logging.debug("Creating symlink %s", symlink)
os.symlink(working_dir[5:], symlink)
# "Eat" history until the last message, but stop at the last checkpoint
checkpoint_file = "{}/_checkpoint.yaml".format(working_dir)
last_checkpoint = None
if os.path.exists(checkpoint_file):
logging.debug("Loading checkpoing")
with open(checkpoint_file, 'r') as checkpoint_f:
data = yaml.load(checkpoint_f)
last_checkpoint = data.get('checkpoint', None)
logging.info("Last checkpoint is %s", last_checkpoint)
messages = {}
last_messages = True
while last_messages and last_checkpoint not in messages:
try:
last_messages = x.history(d['print_name'], 250, len(messages), retry_connect=-1)
except IllegalResponseException as e:
last_messages = []
if str(e) == "Result parser does not allow exceptions.":
logging.warning("Slowing down...")
time.sleep(5)
last_messages = True
if last_messages and last_messages != True:
for message in last_messages:
messages[message['id']] = message
logging.info("Loading, offset %s", len(messages))
logging.info("Found %s messages to process", len(messages))
# Save messages by date
loaded_data = {}
for id, message in messages.items():
if 'date' not in message:
logging.error("Not date in message %s", message['id'])
continue
date = datetime.datetime.fromtimestamp(message['date'])
file_key = '{}.{}.yaml'.format(date.year, date.month)
if file_key not in loaded_data:
file_key_name = '{}{}'.format(working_dir, file_key)
if os.path.isfile(file_key_name):
with open(file_key_name, 'r') as file_key_f:
loaded_data[file_key] = yaml.load(file_key_f)
logging.info("Loaded datafile %s", file_key)
else:
loaded_data[file_key] = {}
logging.info("Created datafile %s", file_key)
if message['id'] not in loaded_data[file_key]:
if message['event'] == 'message':
loaded_data[file_key][message['id']] = {'from': message['from']['print_name'], 'text': message.get('text', ''), 'date': message['date']}
if 'media' in message:
if message['media']['type'] not in ['webpage', 'contact']:
result = x.load_document(message['id'])
if os.path.exists(result['result']):
file_dir = "files_{}_{}/".format(date.year, date.month)
file_dir_full = "{}/{}/".format(working_dir, file_dir)
if not os.path.isdir(file_dir_full):
os.mkdir(file_dir_full)
media_file = "{}/{}.{}".format(file_dir_full, message['id'], result['result'].split('.')[-1].replace('/', ''))
os.rename(result['result'], media_file)
loaded_data[file_key][message['id']]['media'] = '{}{}.{}'.format(file_dir, message['id'], result['result'].split('.')[-1].replace('/', ''))
else:
loaded_data[file_key][message['id']]['media'] = result['result']
elif message['event'] == 'service':
pass
else:
logging.error("Unknow type %s", message['event'])
if not last_checkpoint or last_checkpoint < message['id']:
last_checkpoint = message['id']
# Save messages
for file_key, data in loaded_data.items():
with open('{}/{}'.format(working_dir, file_key), 'w') as file_key_f:
yaml.dump(data, file_key_f, default_flow_style=False)
logging.info("Saved datafile %s", file_key)
# Save checkpoint
with open(checkpoint_file, 'w') as checkpoint_f:
yaml.dump({'checkpoint': last_checkpoint}, checkpoint_f)
logging.info("Saved checkpoint")
return True
for d in build_dialogs_list():
work_on_dialog(d)