Skip to content

Commit

Permalink
Bugfix: Do not RELU latent in clustering
Browse files Browse the repository at this point in the history
Commit fc31a8a accidentally set `matrix[matrix < 0] = 0`, hence destroying 50%
of information in the latent space. This led to a serious quality regression
in Vamb, as discovered by Xinyuan.

Also check that the new PCG64-based shuffling works correctly.
  • Loading branch information
jakobnissen committed Jun 1, 2023
1 parent 3f491b6 commit d691729
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
5 changes: 3 additions & 2 deletions test/test_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,10 @@ def test_bad_params(self):
# This depends on the implementation of shuffling and permute being
# the same, which I test here.
def test_shuffling(self):
seed = 0
cp = self.data.copy()
np.random.RandomState(0).shuffle(cp)
indices = np.random.RandomState(0).permutation(len(cp))
np.random.Generator(np.random.PCG64(seed)).shuffle((cp))
indices = np.random.Generator(np.random.PCG64(seed)).permutation(len(cp))
cplike = self.data[indices]
self.assertTrue(np.all(cplike == cp))

Expand Down
3 changes: 1 addition & 2 deletions vamb/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,8 +479,7 @@ def _normalize(matrix: _Tensor, inplace: bool = False) -> _Tensor:

# If any rows are kept all zeros, the distance function will return 0.5 to all points
# inclusive itself, which can break the code in this module
matrix[matrix < 0] = 0
zeromask = matrix.max(dim=1).values == 0
zeromask = (matrix == 0).all(dim=1)
matrix[zeromask] = 1 / matrix.shape[1]
matrix /= matrix.norm(dim=1).reshape(-1, 1) * (2**0.5)
return matrix
Expand Down

0 comments on commit d691729

Please sign in to comment.