-
Notifications
You must be signed in to change notification settings - Fork 2
/
extract-files.py
160 lines (130 loc) · 5.69 KB
/
extract-files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import os
import time
import shutil
import sqlite3
from lib.config import read_config
from lib.downloader import Downloader
no_op = bool(1)
config = read_config()
db_path = config['db_path']
input_dir = config['folder_path']
output_dir = config['extracted_folder_path']
if not os.path.isdir(output_dir):
os.makedirs(os.path.join(output_dir, 'extra'))
num_title_splitter = '. '
nums_in_filenames = False
sql_queery = f"""
SELECT
requests.number as '№',
card_code,
list.title as nom,
voting_number,
voting_title,
sound_start
FROM list, requests
LEFT JOIN (SELECT request_id, value as sound_start FROM [values]
WHERE title LIKE 'Начало%')
ON request_id = requests.id
WHERE list.id = topic_id
AND status != 'disapproved'
AND (default_duration > 0 OR card_code = 'AGR')
"""
num_field = '№'
video_exts = {'avi', 'mp4', 'mov', 'wmv', 'mkv'}
audio_exts = {'mp3', 'wav', 'flac', 'ogg', 'm4a', 'aac'}
img_exts = {'jpeg', 'png', 'jpg'}
# target_exts = img_exts
target_exts = video_exts | audio_exts
processed_log = ""
title_differences = ""
skipped_files = ""
errors = ""
def make_filename(data, num, dl_title=None):
if num not in data:
return False, num, None
req_data = data[num]
code = Downloader.to_filename(f"{req_data['voting_number']} {req_data['card_code']}")
title = Downloader.to_filename(req_data['voting_title'])
if dl_title:
global title_differences
if req_data['voting_title'] != title or req_data['voting_title'] != dl_title:
title_differences += f"\n{code}" \
f"\nDownloaded: {dl_title}" \
f"\nExtracted: {title}" \
f"\nReal: {req_data['voting_title']}"
sound_start = 'Неизвестно'
if req_data['sound_start']:
sound_start = {
'Трек запускается сразу после объявления (выход из за кулис под играющий трек)': 'Сразу',
'Трек запускается после выхода на сцену (с точки, без реквизита)': 'С точки',
'Трек запускается сразу после выноса реквизита (когда на сцене никого)': 'Стафф,Сразу',
'Трек запускается после выноса реквизита и выхода на точку': 'Стафф,ТЧК',
'Трек содержит превью (выход из за кулис во время превью танца)': 'Превью'
}[req_data['sound_start']]
elif req_data['card_code'][0] == 'V': # Videos
sound_start = 'Сразу'
title = f"[{sound_start}] {title}"
title = Downloader.to_filename(f"{title} №{req_data['№']:d}")
return True, code, title
print('Connecting to %s...' % os.path.abspath(db_path))
data, headers = [], []
with sqlite3.connect(db_path, isolation_level=None) as db:
c = db.cursor()
c.execute('PRAGMA encoding = "UTF-8"')
c.execute(sql_queery)
print('Fetching data...')
data = c.fetchall()
headers = [description[0] for description in c.description]
print('Closing the database...')
data_dicts = [{headers[i]: val for i, val in enumerate(row)} for row in data]
data_by_num = {str(req[num_field]): req for req in data_dicts}
missing_files = {k for k, v in data_by_num.items()}
print('Processing files...')
prev_dir = ""
for dirpath, dirnames, filenames in os.walk(input_dir):
for filename in filenames:
if dirpath != prev_dir:
prev_dir = dirpath
rep = 1
root, dir_name = os.path.split(dirpath)
if dir_name.endswith('-not-extract'):
continue
if any([filename.endswith(ext) for ext in target_exts]):
name = filename.rsplit('.', 1)[0] if nums_in_filenames else dir_name
ext = filename.rsplit('.', 1)[1]
if num_title_splitter:
num, title = name.split(num_title_splitter, 1)
num = num.lstrip('№')
else:
num, title = name, None
num = num.lstrip('0')
success, code, name = make_filename(data_by_num, num, title)
if not success:
msg = "|>>> ERROR <<<| Failed to make title for %s." % os.path.join(dirpath, filename).replace(input_dir, '.')
errors += msg + '\n'
print(msg)
continue
missing_files -= {num}
new_filename = f"{code}. {name}.{ext}"
if os.path.exists(os.path.join(output_dir, new_filename)):
rep += 1
new_filename = f"extra\\{code}. {name} ({rep}).{ext}"
msg = f"{dir_name} | {filename} -> {new_filename}"
processed_log += msg + '\n'
print(msg)
old_path = os.path.join(root, dir_name, filename)
new_path = os.path.join(output_dir, new_filename)
if not no_op:
shutil.copy(old_path, new_path)
else:
skipped_files += f"{dir_name} | {filename}\n"
missing_files_msg = "\n".join([". ".join(make_filename(data_by_num, num)[1:]) for num in missing_files])
info_log = "\n--- Skipped by extension ---\n" + skipped_files + \
"\n--- Title differences ---\n" + title_differences + \
"\n--- Errors ---\n" + errors + \
"\n--- Missing files ---\n" + missing_files_msg
print(info_log)
log_file = os.path.join(output_dir, time.strftime("log-%d%m%y%H%M%S.txt", time.localtime()))
open(log_file, 'w', encoding='utf-8').write((processed_log + info_log).replace('\n', os.linesep))