-
Notifications
You must be signed in to change notification settings - Fork 0
/
pysearch.py
265 lines (205 loc) · 9.08 KB
/
pysearch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
""" this script assists in sifting through and comparing key attributes of different instances/versions of an album to stratify the population based on
key attributes of:
- track count
- dynamic range
- bit depth
- sampling rate
- sampling frequency
- channels
Dependencies are Python 3.x and associated libraries coupled with a SQLite database containing the fields specified in 'criteria' below. The base database
can be generated by running https://github.com/audiomuze/audiodb against a folder tree.
DR data is generated using https://github.com/audiomuze/dr14_t.meter (or the most recent version thereof in the master repository) then collected using
getdr14.sh and imported into table.
This version of the script compares two different data sources and stratifies the population on the basis of matches. A seperate version to be adapted to
stratify different'versions' in same table / directory tree.
Note, this code has not been optimised, it's a top down first cut and there are no doubt many opporunities to optimise"""
import pandas as pd
import sqlite3
from itertools import product
def run_query(query1, query2, iteration, dbconn):
""" generate a dataframe containing the query results and if a result is returned,
write it out to file and delete the corresponding records from the table """
df = pd.read_sql( query1, dbconn)
if len(df) > 0:
df.to_csv(f"/tmp/db/results/{iteration}alib_has_{tc}tc_{dr}dr_{bd}bd_{fr}fr_{ch}ch.csv", index = False, sep="|")
dbcursor.execute( query2 )
""" set up all the global variables for the script """
""" define the fields we want to return from the query """
"""fields = ("a.__dirpath alib_dirpath, "
"b.__dirpath jbod_dirpath, "
"a.__dirname alib_dirname, "
"b.__dirname jbod_dirname, "
"a.albumartist alib_albumartist, "
"b.albumartist jbod_albumartist, "
"a.artist alib_artist, "
"b.artist jbod_artist, "
"a.album alib_album, "
"b.album jbod_album, "
"a.track_count alib_tracks, "
"b.track_count jbod_tracks, "
"a.dynamicrange alib_dr, "
"b.dynamicrange jbod_dr, "
"a.__bitspersample alib_bits, "
"b.__bitspersample jbod_bits, "
"a.__frequency_num alib_freq, "
"b.__frequency_num jbod_freq, "
"a.__channels alib_channels, "
"b.__channels jbod_channels")"""
fields = ("b.__dirpath jbod_dirpath")
""" name the tables we will be iterating """
table1 = "alib_dirnames"
table2 = "jbod_dirnames"
""" Define the datapoints and their possible values to iterate """
tc = ("=", ">", "<")
dr = ("=", ">", "<")
bd = ("=", ">", "<")
fr = ("=", ">", "<")
ch = ("=", ">", "<")
""" Generate the resulting Cartesian product """
permutations = list(product(tc, dr, bd, fr, ch))
""" establish database connection """
conn = sqlite3.connect("/tmp/db/alib_jbod.db")
dbcursor = conn.cursor()
""" now iterate the queries """
for tc, dr, bd, fr, ch in permutations:
""" define select criteria - 1st query is highest order match: albumartist, album & version"""
criteria = (f"a.albumartist IS NOT NULL AND "
f"b.albumartist IS NOT NULL AND "
f"a.album IS NOT NULL AND "
f"b.album IS NOT NULL AND "
f"a.version IS NOT NULL AND "
f"b.version IS NOT NULL AND "
f"a.albumartist = b.albumartist AND "
f"a.album = b.album AND "
f"a.version = b.version AND "
f"a.track_count {tc} b.track_count AND "
f"a.dynamicrange {dr} b.dynamicrange AND "
f"a.__bitspersample {bd} b.__bitspersample AND "
f"a.__frequency_num {fr} b.__frequency_num AND "
f"a.__channels {ch} b.__channels")
""" define the SQL query to be run """
query1 = (f"SELECT {fields} "
f"FROM {table1} a "
f"INNER JOIN {table2} b "
f"ON ( {criteria} ) "
f"WHERE a.__dirname NOT LIKE 'CD%' "
f"ORDER BY b.__dirpath;")
""" define the corresponding delete query """
query2 = (f"DELETE FROM {table2} "
f"WHERE {table2}.__dirpath IN ( "
f"SELECT b.__dirpath "
f"FROM {table1} a "
f"INNER JOIN {table2} b "
f"ON ( {criteria} ) "
f"WHERE a.__dirname NOT LIKE 'CD%');")
run_query(query1, query2, 1, conn)
for tc, dr, bd, fr, ch in permutations:
""" define select criteria - 2nd query is matched albumartist and album """
criteria = (f"a.albumartist IS NOT NULL AND "
f"b.albumartist IS NOT NULL AND "
f"a.album IS NOT NULL AND "
f"b.album IS NOT NULL AND "
f"a.albumartist = b.albumartist AND "
f"a.album = b.album AND "
f"a.track_count {tc} b.track_count AND "
f"a.dynamicrange {dr} b.dynamicrange AND "
f"a.__bitspersample {bd} b.__bitspersample AND "
f"a.__frequency_num {fr} b.__frequency_num AND "
f"a.__channels {ch} b.__channels")
""" define the SQL query to be run """
query1 = (f"SELECT {fields} "
f"FROM {table1} a "
f"INNER JOIN {table2} b "
f"ON ( {criteria} ) "
f"WHERE a.__dirname NOT LIKE 'CD%' "
f"ORDER BY b.__dirpath;")
""" define the corresponding delete query """
query2 = (f"DELETE FROM {table2} "
f"WHERE {table2}.__dirpath IN ( "
f"SELECT b.__dirpath "
f"FROM {table1} a "
f"INNER JOIN {table2} b "
f"ON ( {criteria} ) "
f"WHERE a.__dirname NOT LIKE 'CD%');")
run_query(query1, query2, 2, conn)
for tc, dr, bd, fr, ch in permutations:
""" define select criteria - 3rd query: matched artist and album only """
criteria = (f"a.artist IS NOT NULL AND "
f"b.artist IS NOT NULL AND "
f"a.album IS NOT NULL AND "
f"b.album IS NOT NULL AND "
f"a.artist = b.artist AND "
f"a.album = b.album AND "
f"a.track_count {tc} b.track_count AND "
f"a.dynamicrange {dr} b.dynamicrange AND "
f"a.__bitspersample {bd} b.__bitspersample AND "
f"a.__frequency_num {fr} b.__frequency_num AND "
f"a.__channels {ch} b.__channels")
""" define the SQL query to be run """
query1 = (f"SELECT {fields} "
f"FROM {table1} a "
f"INNER JOIN {table2} b "
f"ON ( {criteria} ) "
f"WHERE a.__dirname NOT LIKE 'CD%' "
f"ORDER BY b.__dirpath;")
""" define the corresponding delete query """
query2 = (f"DELETE FROM {table2} "
f"WHERE {table2}.__dirpath IN ( "
f"SELECT b.__dirpath "
f"FROM {table1} a "
f"INNER JOIN {table2} b "
f"ON ( {criteria} ) "
f"WHERE a.__dirname NOT LIKE 'CD%');")
run_query(query1, query2, 3, conn)
for tc, dr, bd, fr, ch in permutations:
""" define select criteria - 4th query is the weakest: matched __dirname only """
criteria = (f"a.__dirname = b.__dirname AND "
f"a.track_count {tc} b.track_count AND "
f"a.dynamicrange {dr} b.dynamicrange AND "
f"a.__bitspersample {bd} b.__bitspersample AND "
f"a.__frequency_num {fr} b.__frequency_num AND "
f"a.__channels {ch} b.__channels")
""" define the SQL query to be run """
query1 = (f"SELECT {fields} "
f"FROM {table1} a "
f"INNER JOIN {table2} b "
f"ON ( {criteria} ) "
f"WHERE a.__dirname NOT LIKE 'CD%' "
f"ORDER BY b.__dirpath;")
""" define the corresponding delete query """
query2 = (f"DELETE FROM {table2} "
f"WHERE {table2}.__dirpath IN ( "
f"SELECT b.__dirpath "
f"FROM {table1} a "
f"INNER JOIN {table2} b "
f"ON ( {criteria} ) "
f"WHERE a.__dirname NOT LIKE 'CD%');")
run_query(query1, query2, 4, conn)
for tc, dr, bd, fr, ch in permutations:
""" define select criteria - deal with compilations: matched __dirname like "%VA - %" and album only """
criteria = (f"a.__dirname LIKE '%VA - %' AND "
f"b.__dirname LIKE '%VA - %' AND "
f"a.album IS NOT NULL AND "
f"b.album IS NOT NULL AND "
f"a.album = b.album AND "
f"a.track_count {tc} b.track_count AND "
f"a.dynamicrange {dr} b.dynamicrange AND "
f"a.__bitspersample {bd} b.__bitspersample AND "
f"a.__frequency_num {fr} b.__frequency_num AND "
f"a.__channels {ch} b.__channels")
""" define the SQL query to be run """
query1 = (f"SELECT {fields} "
f"FROM {table1} a "
f"INNER JOIN {table2} b "
f"ON ( {criteria} ) "
f"WHERE a.__dirname NOT LIKE 'CD%' "
f"ORDER BY b.__dirpath;")
""" define the corresponding delete query """
query2 = (f"DELETE FROM {table2} "
f"WHERE {table2}.__dirpath IN ( "
f"SELECT b.__dirpath "
f"FROM {table1} a "
f"INNER JOIN {table2} b "
f"ON ( {criteria} ) "
f"WHERE a.__dirname NOT LIKE 'CD%');")
run_query(query1, query2, 5, conn)