From 85a455752586ddec223bcce4f282656ea1f60e4f Mon Sep 17 00:00:00 2001 From: Bruno Cabral Date: Mon, 19 Feb 2024 07:19:33 -0800 Subject: [PATCH] match all orientations (#1127) * match all orientations * use rotation as option --------- Co-authored-by: Andrew Senetar Co-authored-by: Luke --- core/engine.py | 6 ++--- core/fs.py | 4 +++- core/pe/cache_sqlite.py | 28 +++++++++++------------ core/pe/matchblock.py | 42 ++++++++++++++++++++++------------ core/pe/photo.py | 7 ++++-- core/pe/scanner.py | 2 ++ help/en/preferences.rst | 4 ++++ locale/en/LC_MESSAGES/ui.po | 4 ++++ locale/es/LC_MESSAGES/ui.po | 4 ++++ locale/pt_BR/LC_MESSAGES/ui.po | 4 ++++ qt/app.py | 1 + qt/pe/preferences_dialog.py | 4 ++++ qt/preferences.py | 3 +++ 13 files changed, 78 insertions(+), 35 deletions(-) diff --git a/core/engine.py b/core/engine.py index ca05a61f..934152fb 100644 --- a/core/engine.py +++ b/core/engine.py @@ -304,12 +304,12 @@ def getmatches_by_contents(files, bigsize=0, j=job.nulljob): result.append(Match(first, second, 100)) continue # if digests are the same (and not None) then files match - if first.digest_partial == second.digest_partial and first.digest_partial is not None: + if first.digest_partial is not None and first.digest_partial == second.digest_partial: if bigsize > 0 and first.size > bigsize: - if first.digest_samples == second.digest_samples and first.digest_samples is not None: + if first.digest_samples is not None and first.digest_samples == second.digest_samples: result.append(Match(first, second, 100)) else: - if first.digest == second.digest and first.digest is not None: + if first.digest is not None and first.digest == second.digest: result.append(Match(first, second, 100)) group_count += 1 j.add_progress(desc=PROGRESS_MESSAGE % (len(result), group_count)) diff --git a/core/fs.py b/core/fs.py index 13e6ab52..a62d29ce 100644 --- a/core/fs.py +++ b/core/fs.py @@ -206,7 +206,7 @@ class File: # Slots for File make us save quite a bit of memory. In a memory test I've made with a lot of # files, I saved 35% memory usage with "unread" files (no _read_info() call) and gains become # even greater when we take into account read attributes (70%!). Yeah, it's worth it. - __slots__ = ("path", "is_ref", "words") + tuple(INITIAL_INFO.keys()) + __slots__ = ("path", "unicode_path", "is_ref", "words") + tuple(INITIAL_INFO.keys()) def __init__(self, path): for attrname in self.INITIAL_INFO: @@ -217,6 +217,8 @@ def __init__(self, path): self.mtime = nonone(path.stat().st_mtime, 0) else: self.path = path + if self.path: + self.unicode_path = str(self.path) def __repr__(self): return f"<{self.__class__.__name__} {str(self.path)}>" diff --git a/core/pe/cache_sqlite.py b/core/pe/cache_sqlite.py index 4cb3c588..5aaf2940 100644 --- a/core/pe/cache_sqlite.py +++ b/core/pe/cache_sqlite.py @@ -15,10 +15,10 @@ class SqliteCache: """A class to cache picture blocks in a sqlite backend.""" - schema_version = 1 - schema_version_description = "Changed from string to bytes for blocks." + schema_version = 2 + schema_version_description = "Added blocks for all 8 orientations." - create_table_query = "CREATE TABLE IF NOT EXISTS pictures(path TEXT, mtime_ns INTEGER, blocks BLOB)" + create_table_query = "CREATE TABLE IF NOT EXISTS pictures(path TEXT, mtime_ns INTEGER, blocks BLOB, blocks2 BLOB, blocks3 BLOB, blocks4 BLOB, blocks5 BLOB, blocks6 BLOB, blocks7 BLOB, blocks8 BLOB)" create_index_query = "CREATE INDEX IF NOT EXISTS idx_path on pictures (path)" drop_table_query = "DROP TABLE IF EXISTS pictures" drop_index_query = "DROP INDEX IF EXISTS idx_path" @@ -43,12 +43,12 @@ def __delitem__(self, key): # Optimized def __getitem__(self, key): if isinstance(key, int): - sql = "select blocks from pictures where rowid = ?" + sql = "select blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 from pictures where rowid = ?" else: - sql = "select blocks from pictures where path = ?" - result = self.con.execute(sql, [key]).fetchone() - if result: - result = bytes_to_colors(result[0]) + sql = "select blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 from pictures where path = ?" + blocks = self.con.execute(sql, [key]).fetchone() + if blocks: + result = [bytes_to_colors(block) for block in blocks] return result else: raise KeyError(key) @@ -64,17 +64,17 @@ def __len__(self): return result[0][0] def __setitem__(self, path_str, blocks): - blocks = colors_to_bytes(blocks) + blocks = [colors_to_bytes(block) for block in blocks] if op.exists(path_str): mtime = int(os.stat(path_str).st_mtime) else: mtime = 0 if path_str in self: - sql = "update pictures set blocks = ?, mtime_ns = ? where path = ?" + sql = "update pictures set blocks = ?, blocks2 = ?, blocks3 = ?, blocks4 = ?, blocks5 = ?, blocks6 = ?, blocks7 = ?, blocks8 = ?, mtime_ns = ? where path = ?" else: - sql = "insert into pictures(blocks,mtime_ns,path) values(?,?,?)" + sql = "insert into pictures(blocks,blocks2,blocks3,blocks4,blocks5,blocks6,blocks7,blocks8,mtime_ns,path) values(?,?,?,?,?,?,?,?,?,?)" try: - self.con.execute(sql, [blocks, mtime, path_str]) + self.con.execute(sql, blocks + [mtime, path_str]) except sqlite.OperationalError: logging.warning("Picture cache could not set value for key %r", path_str) except sqlite.DatabaseError as e: @@ -136,9 +136,9 @@ def get_id(self, path): raise ValueError(path) def get_multiple(self, rowids): - sql = "select rowid, blocks from pictures where rowid in (%s)" % ",".join(map(str, rowids)) + sql = "select rowid, blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 from pictures where rowid in (%s)" % ",".join(map(str, rowids)) cur = self.con.execute(sql) - return ((rowid, bytes_to_colors(blocks)) for rowid, blocks in cur) + return ((rowid, [bytes_to_colors(blocks), bytes_to_colors(blocks2), bytes_to_colors(blocks3), bytes_to_colors(blocks4), bytes_to_colors(blocks5), bytes_to_colors(blocks6), bytes_to_colors(blocks7), bytes_to_colors(blocks8)]) for rowid, blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 in cur) def purge_outdated(self): """Go through the cache and purge outdated records. diff --git a/core/pe/matchblock.py b/core/pe/matchblock.py index 9af739bd..f312b2ec 100644 --- a/core/pe/matchblock.py +++ b/core/pe/matchblock.py @@ -72,13 +72,12 @@ def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob): # entry in iPhoto library. logging.warning("We have a picture with a null path here") continue - picture.unicode_path = str(picture.path) logging.debug("Analyzing picture at %s", picture.unicode_path) if with_dimensions: picture.dimensions # pre-read dimensions try: if picture.unicode_path not in cache: - blocks = picture.get_blocks(BLOCK_COUNT_PER_SIDE) + blocks = [picture.get_blocks(BLOCK_COUNT_PER_SIDE, orientation) for orientation in range(1, 9)] cache[picture.unicode_path] = blocks prepared.append(picture) except (OSError, ValueError) as e: @@ -119,13 +118,13 @@ def get_match(first, second, percentage): return Match(first, second, percentage) -def async_compare(ref_ids, other_ids, dbname, threshold, picinfo): +def async_compare(ref_ids, other_ids, dbname, threshold, picinfo, match_rotated=False): # The list of ids in ref_ids have to be compared to the list of ids in other_ids. other_ids # can be None. In this case, ref_ids has to be compared with itself # picinfo is a dictionary {pic_id: (dimensions, is_ref)} cache = get_cache(dbname, readonly=True) limit = 100 - threshold - ref_pairs = list(cache.get_multiple(ref_ids)) + ref_pairs = list(cache.get_multiple(ref_ids)) # (rowid, [b, b2, ..., b8]) if other_ids is not None: other_pairs = list(cache.get_multiple(other_ids)) comparisons_to_do = [(r, o) for r in ref_pairs for o in other_pairs] @@ -138,22 +137,35 @@ def async_compare(ref_ids, other_ids, dbname, threshold, picinfo): if ref_is_ref and other_is_ref: continue if ref_dimensions != other_dimensions: - continue - try: - diff = avgdiff(ref_blocks, other_blocks, limit, MIN_ITERATIONS) - percentage = 100 - diff - except (DifferentBlockCountError, NoBlocksError): - percentage = 0 - if percentage >= threshold: - results.append((ref_id, other_id, percentage)) + if match_rotated: + rotated_ref_dimensions = (ref_dimensions[1], ref_dimensions[0]) + if rotated_ref_dimensions != other_dimensions: + continue + else: + continue + + orientation_range = 1 + if match_rotated: + orientation_range = 8 + + for orientation_ref in range(orientation_range): + try: + diff = avgdiff(ref_blocks[orientation_ref], other_blocks[0], limit, MIN_ITERATIONS) + percentage = 100 - diff + except (DifferentBlockCountError, NoBlocksError): + percentage = 0 + if percentage >= threshold: + results.append((ref_id, other_id, percentage)) + break + cache.close() return results -def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljob): +def getmatches(pictures, cache_path, threshold, match_scaled=False, match_rotated=False, j=job.nulljob): def get_picinfo(p): if match_scaled: - return (None, p.is_ref) + return ((None, None), p.is_ref) else: return (p.dimensions, p.is_ref) @@ -205,7 +217,7 @@ def collect_results(collect_all=False): picinfo.update({p.cache_id: get_picinfo(p) for p in other_chunk}) else: other_ids = None - args = (ref_ids, other_ids, cache_path, threshold, picinfo) + args = (ref_ids, other_ids, cache_path, threshold, picinfo, match_rotated) async_results.append(pool.apply_async(async_compare, args)) collect_results() collect_results(collect_all=True) diff --git a/core/pe/photo.py b/core/pe/photo.py index 128e3c1f..5bc8356f 100644 --- a/core/pe/photo.py +++ b/core/pe/photo.py @@ -100,5 +100,8 @@ def _read_info(self, field): elif field == "exif_timestamp": self.exif_timestamp = self._get_exif_timestamp() - def get_blocks(self, block_count_per_side): - return self._plat_get_blocks(block_count_per_side, self._get_orientation()) + def get_blocks(self, block_count_per_side, orientation: int = None): + if orientation is None: + return self._plat_get_blocks(block_count_per_side, self._get_orientation()) + else: + return self._plat_get_blocks(block_count_per_side, orientation) diff --git a/core/pe/scanner.py b/core/pe/scanner.py index e58c4a90..8a1d53d2 100644 --- a/core/pe/scanner.py +++ b/core/pe/scanner.py @@ -14,6 +14,7 @@ class ScannerPE(Scanner): cache_path = None match_scaled = False + match_rotated = False @staticmethod def get_scan_options(): @@ -29,6 +30,7 @@ def _getmatches(self, files, j): cache_path=self.cache_path, threshold=self.min_match_percentage, match_scaled=self.match_scaled, + match_rotated=self.match_rotated, j=j, ) elif self.scan_type == ScanType.EXIFTIMESTAMP: diff --git a/help/en/preferences.rst b/help/en/preferences.rst index eadc4f3c..0a1423d5 100644 --- a/help/en/preferences.rst +++ b/help/en/preferences.rst @@ -14,6 +14,10 @@ Preferences If you check this box, pictures of different dimensions will be allowed in the same duplicate group. +**Match pictures of different rotations:** + If you check this box, pictures of different rotations will be allowed in the same + duplicate group. + .. _filter-hardness: **Filter Hardness:** diff --git a/locale/en/LC_MESSAGES/ui.po b/locale/en/LC_MESSAGES/ui.po index 7ef3b1d9..f77da262 100644 --- a/locale/en/LC_MESSAGES/ui.po +++ b/locale/en/LC_MESSAGES/ui.po @@ -307,6 +307,10 @@ msgstr "Debug mode (restart required)" msgid "Match pictures of different dimensions" msgstr "Match pictures of different dimensions" +#: qt/pe/preferences_dialog.py:19 cocoa/en.lproj/Localizable.strings:0 +msgid "Match pictures of different rotations" +msgstr "Match pictures of different rotations" + #: qt/preferences_dialog.py:43 msgid "Filter Hardness:" msgstr "Filter Hardness:" diff --git a/locale/es/LC_MESSAGES/ui.po b/locale/es/LC_MESSAGES/ui.po index c0bcee6f..7089a348 100644 --- a/locale/es/LC_MESSAGES/ui.po +++ b/locale/es/LC_MESSAGES/ui.po @@ -316,6 +316,10 @@ msgstr "Mode de depuración (se requiere reinicio)" msgid "Match pictures of different dimensions" msgstr "Coincidencia de imágenes de distintas dimensiones" +#: qt/pe/preferences_dialog.py:19 cocoa/en.lproj/Localizable.strings:0 +msgid "Match pictures of different rotations" +msgstr "Coincidencia de imágenes de distintas rotaciones" + #: qt/preferences_dialog.py:43 msgid "Filter Hardness:" msgstr "Dureza del Filtro:" diff --git a/locale/pt_BR/LC_MESSAGES/ui.po b/locale/pt_BR/LC_MESSAGES/ui.po index 92e80b73..56eec41e 100644 --- a/locale/pt_BR/LC_MESSAGES/ui.po +++ b/locale/pt_BR/LC_MESSAGES/ui.po @@ -314,6 +314,10 @@ msgstr "Modo de Depuração (requer reinício)" msgid "Match pictures of different dimensions" msgstr "Coincidir fotos de dimensões diferentes" +#: qt/pe/preferences_dialog.py:19 cocoa/en.lproj/Localizable.strings:0 +msgid "Match pictures of different rotations" +msgstr "Coincidir fotos de rotações diferentes" + #: qt/preferences_dialog.py:43 msgid "Filter Hardness:" msgstr "Pressão do Filtro:" diff --git a/qt/app.py b/qt/app.py index 5e6271c0..30312cd8 100644 --- a/qt/app.py +++ b/qt/app.py @@ -192,6 +192,7 @@ def _update_options(self): scanned_tags.add("year") self.model.options["scanned_tags"] = scanned_tags self.model.options["match_scaled"] = self.prefs.match_scaled + self.model.options["match_rotated"] = self.prefs.match_rotated self.model.options["include_exists_check"] = self.prefs.include_exists_check self.model.options["rehash_ignore_mtime"] = self.prefs.rehash_ignore_mtime diff --git a/qt/pe/preferences_dialog.py b/qt/pe/preferences_dialog.py index 375cc779..735d4eda 100644 --- a/qt/pe/preferences_dialog.py +++ b/qt/pe/preferences_dialog.py @@ -21,6 +21,8 @@ def _setupPreferenceWidgets(self): self.widgetsVLayout.addLayout(self.filterHardnessHLayout) self._setupAddCheckbox("matchScaledBox", tr("Match pictures of different dimensions")) self.widgetsVLayout.addWidget(self.matchScaledBox) + self._setupAddCheckbox("matchRotatedBox", tr("Match pictures of different rotations")) + self.widgetsVLayout.addWidget(self.matchRotatedBox) self._setupAddCheckbox("mixFileKindBox", tr("Can mix file kind")) self.widgetsVLayout.addWidget(self.mixFileKindBox) self._setupAddCheckbox("useRegexpBox", tr("Use regular expressions when filtering")) @@ -57,6 +59,7 @@ def _setupDisplayPage(self): def _load(self, prefs, setchecked, section): setchecked(self.matchScaledBox, prefs.match_scaled) + setchecked(self.matchRotatedBox, prefs.match_rotated) # Update UI state based on selected scan type scan_type = prefs.get_scan_type(AppMode.PICTURE) @@ -67,5 +70,6 @@ def _load(self, prefs, setchecked, section): def _save(self, prefs, ischecked): prefs.match_scaled = ischecked(self.matchScaledBox) + prefs.match_rotated = ischecked(self.matchRotatedBox) prefs.details_dialog_override_theme_icons = ischecked(self.details_dialog_override_theme_icons) prefs.details_dialog_viewers_show_scrollbars = ischecked(self.details_dialog_viewers_show_scrollbars) diff --git a/qt/preferences.py b/qt/preferences.py index 17ae3bf9..1f88cc31 100644 --- a/qt/preferences.py +++ b/qt/preferences.py @@ -225,6 +225,7 @@ def _load_values(self, settings): self.scan_tag_genre = get("ScanTagGenre", self.scan_tag_genre) self.scan_tag_year = get("ScanTagYear", self.scan_tag_year) self.match_scaled = get("MatchScaled", self.match_scaled) + self.match_rotated = get("MatchRotated", self.match_rotated) def reset(self): self.filter_hardness = 95 @@ -277,6 +278,7 @@ def reset(self): self.scan_tag_genre = False self.scan_tag_year = False self.match_scaled = False + self.match_rotated = False def _save_values(self, settings): set_ = self.set_value @@ -330,6 +332,7 @@ def _save_values(self, settings): set_("ScanTagGenre", self.scan_tag_genre) set_("ScanTagYear", self.scan_tag_year) set_("MatchScaled", self.match_scaled) + set_("MatchRotated", self.match_rotated) # scan_type is special because we save it immediately when we set it. def get_scan_type(self, app_mode):