From 6e05a7aab2c436132f863cf797a7f1bc2beadf17 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Tue, 27 Aug 2024 09:19:43 +0200 Subject: [PATCH] Fixup: Add comment --- vamb/reclustering.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/vamb/reclustering.py b/vamb/reclustering.py index 914e6b36..63f9c94d 100644 --- a/vamb/reclustering.py +++ b/vamb/reclustering.py @@ -23,6 +23,11 @@ EPS_VALUES = np.arange(0.01, 0.35, 0.02) +# TODO: This might be slightly algorithmically inefficient. The big problem is that it re-heapifies +# the heap whenever a bin is emitted, which makes it O(N^2). +# To solve this, I need some datastructure which is like a heap, but which allows me to update +# arbitrary elements in the heap. +# This can be solved with a MutableBinaryMaxHeap in Julia's DataStructures.jl, for inspiration. def deduplicate( scoring: Callable[[set[ContigId]], float], bins: dict[BinId, set[ContigId]],