Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rewrite reclustering #340

Merged
merged 16 commits into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/cli_vamb.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
cache-dependency-path: '**/pyproject.toml'
- name: Download fixtures
run: |
wget https://www.dropbox.com/scl/fi/xzc0tro7oe6tfm3igygpj/ci_data.zip\?rlkey\=xuv6b5eoynfryp4fba1kfp5jm\&st\=9oz24ych\&dl\=0 -O ci_data.zip
wget https://www.dropbox.com/scl/fi/xzc0tro7oe6tfm3igygpj/ci_data.zip\?rlkey\=xuv6b5eoynfryp4fba1kfp5jm\&st\=rjb1xccw\&dl\=0 -O ci_data.zip
unzip -o ci_data.zip
- name: Install dependencies
run: |
Expand Down Expand Up @@ -61,6 +61,6 @@ jobs:
cat outdir_taxometer/log.txt
- name: Run k-means reclustering
run: |
vamb recluster --outdir outdir_recluster --fasta catalogue_mock.fna.gz --abundance abundance_mock.npz --latent_path outdir_taxvamb/vaevae_latent.npz --clusters_path outdir_taxvamb/vaevae_clusters_split.tsv --hmmout_path markers_mock.hmmout --algorithm kmeans --minfasta 200000
vamb recluster --outdir outdir_recluster --fasta catalogue_mock.fna.gz --abundance abundance_mock.npz --latent_path outdir_taxvamb/vaevae_latent.npz --clusters_path outdir_taxvamb/vaevae_clusters_split.tsv --markers markers_mock.npz --algorithm kmeans --minfasta 200000
ls -la outdir_recluster
cat outdir_recluster/log.txt
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ dependencies = [
"pycoverm == 0.6.0",
"networkx == 3.2", # 3.3 drops Python 3.9 support
"scikit-learn == 1.5.0",
"pandas == 2.2.0",
"dadaptation == 3.2",
"loguru == 0.7.2",
"pyhmmer == 0.10.12",
Expand Down
2 changes: 2 additions & 0 deletions test/data/marker.fna
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
>abc
GTGGTGAGTGCTTGGACGTGTGTGGTGATGCTCATGGTGTCTTCCCCCCTTCCCCGTGTTGTTACCACCCACTCTACCCGCTCACACTTTCGAATACAAGGGTATTTTTCGAAATGGAAAAGCTGTCGAGCACTCACATGTTCGACAGCTTCATATGTCCTTGAACTACGTAACTTTTTCCTTACAATCAGCGCAAAGTCCAAAGATTTCGGCCTCGTGACTACTGAGAGCAAAGCCGTTTTTAGTGGCAATTTCCTGTGCCCATGTCTCTACTGGACCGCCATCGATTTCGACTGTGTGACCGCAATTGGTGCAGACCAAGTGATGGTGGTGTCCCTCGTCGTGGCATTGGCGGTACAGAGTTTCTCCACCTGTGACGGTAAGTACGTCGACTGCCCTGATGTCCGCGAGGGATTGGAGGGTTCGGTAGACGGTTGTGAGGCCGACGTTGTGTTCCCTGGTGGATAGCTCGTGATGGATTTCTTTGGCGGAAGCGAAGTTATCGATTTCCTCAAGAACGTCAATTACGGCTTTTCGCTGTCTGGTGCTTCGCACTCCCAGCTTCGGGGCAGAGCCTTGGCTGATGCGATTGATACCCACCGTTGATCCTCCTCAATGACACAAAATGTACTTCGATAGTCTACCCAGATGTGTCAACCCCTGCGTTTAGTGCCAGGAGAAGTATGTCGAGGACGAGTGGTTCGGCAAGTGAATAAGTCATTTGCCGGCCTTGACGTTCTGCGTCGACGATACCTGCAGTTTTAAGGACTTTGAGGTGTTGGCTCACTAGTGGTTGCGAACTTTTTACTAGCTTGACCAATTCGTGGACGTAGTGGGGCCTTTCGTTGAGGGCGAGGATGATTTCGATTCTTAAGGGGGAATCTAGTGCCCTAATCAGCAGGCTGATCGCTTTGATGTTTTTTGCTGTTGCAAGTTTCTGAAGCTCAGCTGATGCTGTGGATTCGGACTCTTCTGCAGGGGTGACGAAATTCCGATTTGAGTGTTGAGCCACGGGGAAGTCCTTCCGTCCTTAGGCTAGGTCTGGAATGGATCTAGCACGCTTGCTATTTTACCTTCTATATAAACCTTTTATGAGGGAAATGAAAAAATAGTTATTAGAACTAGTTTACATCGCGAAGGCCGCAAAATGACGGGGTCAGCGGAAGCAACATCGTTAGTTGGGCTAGGATTGGTTGGGTATGTCCTAAAAGGGACGGTTATTTTTTCATTCGACGTGGAGGAGAGCATCCGACGTGGCTCAGCAATCGATCATCGACACCGTGGTTAACCTGTGTAAACGACGTGGACTGGTGTACCCCTGTGGTGAGATCTACGGCGGTACCCGCTCTGCGTGGGACTACGGCCCGCTGGGTGTGGAGCTGAAGGAAAACATCAAGCGCCAGTGGTGGCGTTCTATGGTTACTTCCCGCCCAGATGTTGTGGGTGTTGATACTTCTGTCATCCTTCCTCGCCAGGTGTGGGTAACTTCCGGCCACGTTGAGGTCTTCACTGACCCACTGGTTGAGTCTTTGAACACCCACAAGCGTTACCGTGCGGACCACCTGCTGGAGCAGTACGAAGAGAAGCATGGTCACCCACCTGTAAACGGCTTGGCTGACATCAACGATCCAGAGACCGGCCAGCCAGGTAACTGGACTGAGCCTAAGGCGTTCTCTGGTCTTCTGAAGACTTTCTTGGGACCTGTGGACGACGAAGAGGGTCTGCACTACCTGCGCCCTGAAACTGCTCAGGGTATCTTCGTGAACTTCAAGAACGTGATGAACACTTCACGCATGAAGCCACCTTTCGGTATCGCGAACATCGGTAAGTCTTTCCGTAACGAGATCACCCCAGGTAACTTCATTTTCCGTACTCGTGAGTTCGAGCAGATGGAGATGGAGTTCTTCGTCAAGCCTGGTGAGGACGAAGAGTGGCACCAGCACTGGATTGATACTCGCCTGCAGTGGTACATCAACCTGGGCATTAAGCCTGAGAACCTGCGTCTGTACGAGCACCCTCAGGAGAAGCTGTCTCACTACTCCAAGCGCACTGTTGATATTGAGTACGCATTCAACTTTGCTAACACCAAGTGGGGCGAGTTAGAGGGTATCGCGAACCGTACTGATTACGATCTTCGCGTGCACTCTGAGGGCTCTGGTGAGGACCTGTCATTCTTCGATCAGGAGACCAATGAGCGTTGGATTCCTTTCGTAATCGAGCCTGCTGCAGGTCTTGGTCGCGCAATGATGATGTTCCTGATGGATGCTTATCACGAGGACGAGGCACCAAACTCAAAGGGTGGCGTCGATAAGCGTGTTGTTCTGAAGCTTGACCGTCGCCTTGCGCCGGTTAAGGTTGCGGTCTTGCCGCTGTCAAAGAAGGACACTTTGACGCCTTTGGCGGAAAAGCTCGCAGCAGAGCTGCGTGAATTCTGGAACGTTGATTACGACACTTCAGGTGCGATTGGTCGCCGTTACCGTCGTCAGGACGAGATCGGTACTCCATTCTGCGTCACCGTTGACTTTGATTCTCTCGAGGACAACGCTGTGACCGTGCGTGAGCGCGACACCATGGAGCAGGTTCGTGTTCCACTTGATGAGCTGCAGGGTTACTTGGCTCAGCGCCTCATCGGCTGCTAAACGGCAACCAATAGAGCGATAATTCGCTAAGACGAATGTAATCGCAGCAACATATAGCACCGGCTTAACAGGCCGGTGCTATTCTGTTCGCATGACTTCGAAGGATCTGATTGTGACCTCCTATACGTCTTGGGGCAAGCGTTTCAAGAATGACGGGAAGCTTTTTATTAACCTACTTCGCAGCACCACTGATAGTGCTGATGAAAAGGTTTTAGCCACTTTCGGTGAAGTTCCCAGCAAATCATTTGAAACCACCGCAACGGTTGATGAGCAGCAGTGGGAACTGTCCTTCAATATTGATGGAACGGCAACTGCCAAGCTTCCTGATGGTCGTGTGTTCAGCGCGAATGCAGGTGAGAAGACCTTTACCAAGTCCAAGCGGATTGAAATCGACATGGACGGCACCGCGATGGCTGCTGTTAATGAAGATAAAAACAATTGGATTATCGACGATTCTGAAGAGAATAAAGTCGCTCAGTTTACCGGTATGAACAACGGTGTGCGTCGCGCGATTGTGGAGTTTGAGCCTGACGTAGAAGTCACCCAGGAGCAGGAAATTTTCTTGTCGTGGGTTGCTCGGAAAACTCTGGAATCCCGCATGTTGGGCTCCAGTTGGGGACTGACTCTGTTTTTGATCATTTTGACGCCAATCATTATTTTTCTCACTTTCAGCTAAAAGGACCATGCAATGGTAGACGCTCAGCGCCCCAAAGCAGGCATCTTCGGTAGCCACACAGAAGAAACATGGGTGTGGCTCGGTAATGAACTTTTCGACGAGTCCGGCGAGGTCATCGCCGACGTTCGCTCCGACGTCCTCTACGTGGATCGCGAACGACTACTCATCGAATCCACCCCCGGCACCATGCGTTTTCGTTGCCGCGCAACACTGTCCGGGGGTGAGGTCTATACAATGACTCAGAATTCTTTCACTGTGGGGGATCTCACTGCGGTGTGCGGGCGCCGGACGTATTCGCTAAAAAGGGTGTCGCCGTGGCGTAAAGAACGCCTGATCACCAACAATGGGGTGGAAGTGGCGCGACTTCGCCCGATGACCAGCGGTAAAGTCGAATTCATTGTGGGCACCGCGGACAGCGAGGCGTTGCCGTTCGTCGACGCAGTATTTTTGAGCTGGGCGTGCGTCCTGGTGGATTCGGCCGTGCGCCGGCCGAAAATTTAAAAGCTTTTTGCTTATCGACGCACCCCTCCACCTGTTTTTTGTAGCCGGGGGATCATTTCCTTTGAAGGATCCAATCTCCGCACTTAGTTTCCTTCGGTGTGAAGGAAAGAGTTCCGTAAAGACCTCTATCTCATTTAAAGAAGTGGAGGATTAGGGTCGTTGACTCGCCTTCGGCACTAATTTGAGCCAAGTTCAAGTTTGCTGCCATCCCAGGTGACCGAAAATGTCCTATGCGAGGTCTCTTCGGTCACTTGGTTTTGCTCGTTTCAGGCTAGAAGCGGCCTCCGCGGAACCCTCCTCCGCCACCGCCACCACCGCTGAAGCCGCCACCGCCTCCACCGAAGCCTCCACCGAAACCGCCACCGCGGCCGCTGTTGAGAATCGAGTTGATCACCATGCCGGTGACAATCGCACCGGTGGTTCCGCCACCGGAATTGTGGCGATTGTTGTAGGTGGTGATGTCGTTTTGTGCTGACTTGCTGGCGCGTTGGGCTGCGACTGCTGCTTGACGTCCGTAATCAATTCCTGCACGGGTGTCGCGGGTGCGGTTTTGTTGTGCCATGGCGTACAGTTTTTGTGCGTTGGCCAGGTGGGTGCGGGCTTCGGATTTTACGATGCGACCGCGGGTGGAGATGAGGTCTTCGGCCTTTTGGATTTGGCTTCTTGCAGATTGCAGCTGTTGGTCGAATACGCGTAGCTGGCGGGCTTGATCAGCTGCGGTGGCGCGAAGTGTGTCAAGTTGAGTGTCGAGGGCGGAGTCGACATCGACAAGTTCTGTGTAGGTTCCGAGCGGATCCTTTTCGGCGTCTGCTGATGCGGTGGTTAGTGCTGCGCTGGCTGCGCGGACAGCATCGTCGAGGGAGGCCCAGTCGGCACGGGCACCGTCGGCTCCTGCGCTTTGTTTGAGTTGGCCGGCTTCGTTGATTTCGTCTGAGATTTCTTGAATCAGATCGGCAACGTTTGCTTTGGCTGTGGAGATGTTTTCATCGGCATGCTCGACGCCCTCGAGGAGTTTGTCTGCGGTAGTGATGGCGTGCTCGATGTGACGGATCGCGTCGATAAGCCCGCCCTGCTCGCCTGCGGGCATGGACTCTATCTTGTACGCCTGTGGCAGGACTTCTTCTGCTTCGTCGAGCGAAGCGCTGGCGAGGTCGACGTTGTCGTCGATGCTTTCAAGGACCTCTGCTGAGTAGCGAGCGCGCAGGCCAGCGAGTGTTTCTTGAGCCTTGGGGAGGCGGGTGCGCAGGTCGACGGATTTTTGGGTGAGAGCATCCAATTTGCTGCCCGCGTTGATCAGCAGGTTGCGCATATCGGCAAAGTTTTGGGCCTCGGCGTCGAGGGCATCGTCGGCTTGGCCACAGGATGAAATGATTTCTACCAGCATGGATCGACGTTCGGCTTCGGATTCTGGGATAGAATCGTTGAGGCGCTGCTGAATCTCAAAGGCTTTTTGCAGGGTGCCGGTGGAGTGGTTCATGGCGCGGTTGAAGCTGCGGGTGCGCTCTGGTCCGAACTCGGAGGTAGCGATAGCGAGCTCTTCTTTTCCGCGACGGATGGAGTCATCAGTGGAGGTGAGCTCTTCTTGGGCAAGGTGTTCGAGAGTTTCCATGGGAAGCTGCATGAGGCGGTTGGTATCGCGAGGGTCGATCTCACGTGCATCTTCCAAGGTTGCAGCACTTGTTTTCTTCTTGCGGCTGCGGGAATAGGCCCAAATTCCGCCACCAGCGGCCACTGTGCCAACGCCCGCAGCAGCCAACCAAGCGCCGGAAGATCCAGAAGAGCCTGAGGTTCCTGAGGCACCAGAACTAGAACCAACTGATTCTGCCAGCGCTAGTGCGGAGCCTGCCCAATCTTCTTGGGAAAGCGCCTGGAAAGCAGCGTTGTTGGCGGCGTCGAGTTCAGCGTCGGTCCATTGAGTACCACCTTGGATGCCGTACTGCCGTTCCTCGGGAGCGAGTGCATAAACCAAGACGTTTCCGCCGCCGTTGGCTTGGAGTGCTTGCTGCGTCCACGTTTCAGGGTCAACTCCGTCGAAAGAGCTTAGGAAAACAACGAAAATAACCTTTTGTTCAGATGCCTTTACATCATCGATGGCAGCCTGAATGTTGGTGATATCGGACGAGGAAATCTGGCCGGTGTAGTCAGTGACATTGTCTTGGTAAAATTCTGGTGATTCAGCCAAGACATATGTTTCTGTGGCTTCTGCAGTGTGAGCAGTAAAAAATGGTCCACTGATAAGGAGCGCGCCAGCTCCAATTGCCACAGTGACCGATACACGGCGGACGTTTTCCCGAAGATGCACCAAACTAAAGTTCATGGTCCCCACCTTAGACGAGTCCAGCTGGCACACTAGTTAACGTGAGAAGATTTTTAGCCAAGAGTTTACTCTTAACCGCAGTAGCGCAACCAGCCCTGAGGGTGGTCGCGTATTCGATGCTCAGAACGCCTAATAATCGGCACAAAATTGATTCAATTTTGGTGTTGGGCACAGCTCAATATGATGGGGTTCCATCGAGGCAGTTTGCTGCTCGTTTGAGGCATGCCGCGAAGCTGTGGCGTCTTCATGAAATCCAGCATGTATATACTGTCGGCGGAAAACTTCCTGGTGATCGTTTCACCGAAGCAGAAGTCGCGCGGGAGTATTTGATCAAAGAGGGCGTGGATCCGGATCTGATTTTTGTCTCTGCAGTTGGCAATGACACTGTCTCCTCCTATGAGGCGCTTGATCCGGAAAAGCTTGGTCGGGTGCTGATTGTTACTGATCCGAACCATTCGTATCGGGCGGTGCGCATCGCGCGACGCATGGGCTTTGACGCGAAACCTTCCCCGACAACCTATAGTCCCGCGAAGTTTCCGTCGATAGTTTATTTTCTGACCTTGTCCCATGAGTGGGGCGGGGTAGTGGTACAGGACGTGTCGTGGCTCTTGGGCGAACGGGTGGCCGATAAGGTGGAAGCATCTTTGCGAACTATCCAAGGCCTGCTGCGCCCTTCGAGGCGTGCGCGCCATGAGCAACTTCGGAGGCTGAAAAAGTAGATGTACCCCTATTCCGACGCAGACGCTTTTCGACGCCACCCTGAGCGCGCCAAGTCCAGCCAACTGCGCACCAGCGCCGTAGACACCCGCAGCGCGTTCGCCCGCGACCGGGCTCGCGTGCTGCATTCTGCTGCTCTTCGACGCCTCGCGGATAAAACCCAAGTGGTTGGCCCCAATGATGGTGATACTCCGCGCACCCGGCTGACGCACTCTTTGGAAGTAGCTCAAATTGCACGGGGAATCGGAGCTGGACTGGATTTGGATCCTGATCTGTGCGATCTGGCAGGGCTGTGCCATGACATTGGGCATCCGCCGTATGGACACAACGGTGAAAACGCGTTGAATGAAGTTGCTGCGGCCTGTGGAGGATTTGAGGGCAACGCCCAAACCTTGCGTATTCTCACGCGGCTGGAGCCAAAAATTGTCTCTGATGAGGGGGAGAGCTTTGGGCTGAACTTGTCGCGGGCTGCTCTTGATGCTGCATGTAAGTATCCGTGGGCTAAAACAAATGCGGATGGCAGTGTCAATAAGAAATACAGTGCTTATGATGAGGACGCAGAAATTCTTGCTTGGATCAGGCAAGGCCATGAAGACCTCAGACCACCAATCGAAGCTCAGGTCATGGACTTTTCCGATGATATTGCCTACTCAGTACACGATGTAGAAGACGGCATTGTTTCCGGTCGCATCGATTTGAAAGTGCTGTGGGACCTGGTGGAATTAGCAGCACTGGCGGACAAAGGCGCAGCAGCTTTCGGAGGCTCGCCTGCAGAACTCATCGAGGGCGCAGCCTCGTTGCGGGAGCTTCCTGTGGTAGCGGCCGCTGCAGATTTTGATTTCTCACTGCGTTCCTACGCTGCGCTGAAGGCCATGACCTCAGAACTAGTGGGAAGATACGTTGGCTCTACCATCGAGTCAACAAAGAAAACACACGCCGGCATTGATGTGGGACGCATGCACGGCGATTTGATCATTCCAGAAACAGCGGCCAGTGAAGTAAAACTGCTCAAAACGTTAGCGGTTCTCTATGTGATGGATGACCCAGGGCACCTTGCGCGCCAAAACAGGCAACGGGATCGTATCTTCCGGGTTTTTGACTACCTGGTGCTGGGGGCTCCGGGATCGTTGGATCCGATGTATCGCCAGTGGTTTATTGAAGCGGATTCAGAATCGGAACAGATCCGTGTGATTGTTGATCAGATTGCGTCGATGACGGAGTCTCGTCTGGAACGCCTTGCCCGGAATGCTGCTGACATCTCAGGATTCTTGGGATAGTTGGTTAGAGCAGCAGCGATTTTTAGTAAGGCCAATAACATGTTTTGGCTTAAACCTGTGTCGTGTCAGATGGTGGCGAAGTAGAGTTCGCAAAGCTAGCGAACATGAATTCGTGTTCAGGAACTTAACAGGGATCAAACAGAGAACAGAGAACAGATCACGCTGCCCAAAAATCGCACTTTTAAGGTTTGTGGGCGTCTGTGTGTGGTTTGCCGCTGTAAAGTATCACCACGTTATGCGCCCTGGTGTGATCAAGCGTTCGTTCTGGGTCGAAACCCCAAAAGTCACAATTCCCCAGAAGCGGGTCAAACCCATTTAGCTTATTGCTTACATATCGAGGGTTTAGAAAAGTGATTTGTCGGATCAGTCGGTTTCTGCCAAGTAAATAGAACTTTATAAATTTTGTGGCTCTCAAATCTTAGGCCACGGCTTCCGATTTGAACCGGAGGTTCAAAAGGCTTATATAGACAAGATTCTGCATCGTCTCACGAGCCCCTCATTGCCTGACACGGTCAATCGTGTGGGAGGTACCAATCCGTGAGATTTCTGCCAACGAGCGATTCATTGGCCCCGCTGCAGAGCTGGCAGAACACGGACATAACCCAAATAATCTGAGGTCTGCCGTTTGCAGCAGCATTAGCGTTTGATGTGGAAGGTGATGCAGAGGCTGTTGATCTGCAAGCGCGTCTTTCCCAAGCACGGGGGAACCCTGAAGCATCGGATGCTCTAGTTGCTGAGCTGACTGGTGTTACTGCTAATCATCCGTTGGTCAGTGCTTGTCTGAAGTTTCCGCTCAATCCTAAGCTTCTCAAGATTTCGTAAAAAAGCTGCCAACTACCGTAAAACCGCACTACTAGAGGAGTGCGTTTTTCGTTCCTGAACACATTGCGTGCTGCAACTTAATTATGGTCCTCCCAGCTCAGTGTGCTGTGTGGATTGTTTATTCTCGTCCATTAAGTGATCGAGAAAAAGTTGTTGTAAAGTCATGCGCATGTGTGGAATTGTTGGATATATTGGCCAGGCGGGCGACTCCCGTGATTACTTTGCGCTTGACGTCGTTTTAGAAGGACTGCGCCGACTTGAATACCGCGGTTATGATTCCGCAGGTGTAGCTGTTCATGCGAACGGTGAAATCAGCTACCGAAAGAAGGCTGGAAAGGTAGCTGCGCTGGACGCTGAGATCGCTCGCGCTCCTTTGGCGGATTCCATTTTGGCTATTGGTCACACCCGGTGGGCAACTCACGGTGGACCAACCGATGCAAATGCACACCCCCATGTTGTTGATGGCGGCAAGTTAGCTGTCGTACACAACGGTATTATTGAAAACTTTGCAGAGCTGCGCGCAGAGCTTTCAGCTAAGGGCTACAACTTTGTTTCCGTTACTGACACTGAAGTTGCCGCCACATTGCTGGCAGAAATCTACAACACCCAGGCTAATGGCGATCTGACCAAGGCTATGCAGCTTACTGGTCAGCGTCTTGAGGGTGCGTTCACCCTGCTGGCTATCCATGCTGATCATGATGATCGTATTGTTGCAGCGCGCCGTAACTCTCCTTTGGTTATTGGCTTGGGTGAAGGCGAAAACTTCCTCGGCTCTGACGTTTCTGGCTTCATCGATTACACCCGCAAGGCTGTTGAGATGGGCAACGATCAGATTGTGACCATCACTGCGAACGACTACCAGATCACCAACTTCGATGGTTCTGAGGCAACCGGAAAACCTTTCGACGTGGAGTGGGATGCGGCTGCTGCTGAAAAGGGTGGCTTTGATTCCTTCATGGATAAGGAAATCCACGACCAGCCAGCTGCAGTGCGTGACACCCTCCTCGGACGTTTAGATGAGGATGGCAAGCTGGTCCTTGATGAGCTTCGT
37 changes: 37 additions & 0 deletions test/test_parsemarkers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import unittest
import vamb
import testtools
from pathlib import Path
import tempfile
import shutil
import io


class TestParseMarkers(unittest.TestCase):
def test_instantiate(self):
tmp = tempfile.mkdtemp()
tmp_path = Path(tmp)
shutil.rmtree(tmp)
markers = vamb.parsemarkers.Markers.from_files(
Path(testtools.DATADIR).joinpath("marker.fna"),
Path(testtools.PARENTDIR).joinpath("vamb").joinpath("marker.hmm"),
["abc"],
tmp_path,
4,
None,
)
self.assertIsNotNone(markers.markers[0])
self.assertEqual(len(markers.markers), 1)
self.assertEqual(set(markers.markers[0]), {39})
self.assertEqual(
markers.refhash, vamb.vambtools.RefHasher.hash_refnames(["abc"])
)

buf = io.StringIO()
markers.save(buf)
buf.seek(0)

markers2 = vamb.parsemarkers.Markers.load(buf, markers.refhash)
self.assertEqual(len(markers.markers), len(markers2.markers))
self.assertEqual(set(markers.markers[0]), set(markers2.markers[0]))
self.assertEqual(markers.marker_names, markers2.marker_names)
8 changes: 5 additions & 3 deletions test/test_reclustering.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
import unittest

# Invariants:
# It produces disjoint clusters, a subset of the input points

# For CAMI dataset, compute comp, abundance, taxonomy, markers
# Subset to e.g. 5 genera plus a few unclassified contigs

Expand All @@ -22,3 +19,8 @@ class TestKmeansReclustering(unittest.TestCase):
# Create latent

# Initial clustering


class TestDBScanReclustering(unittest.TestCase):
# It produces disjoint clusters, a subset of the input points
pass
6 changes: 3 additions & 3 deletions test/test_semisupervised_encode.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ def make_random_annotation(self):
phylum = np.random.choice(self.phyla, 1)[0]
clas = np.random.choice(self.classes[phylum], 1)[0]
if np.random.random() <= 0.2:
return vamb.vambtools.ContigTaxonomy.from_semicolon_sep(
return vamb.taxonomy.ContigTaxonomy.from_semicolon_sep(
";".join([self.domain])
)
if 0.2 < np.random.random() <= 0.5:
return vamb.vambtools.ContigTaxonomy.from_semicolon_sep(
return vamb.taxonomy.ContigTaxonomy.from_semicolon_sep(
";".join([self.domain, phylum])
)
return vamb.vambtools.ContigTaxonomy.from_semicolon_sep(
return vamb.taxonomy.ContigTaxonomy.from_semicolon_sep(
";".join([self.domain, phylum, clas])
)

Expand Down
2 changes: 2 additions & 0 deletions vamb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from . import parsebam
from . import parsecontigs
from . import parsemarkers
from . import taxonomy
from . import cluster
from . import encode
from . import aamb_encode
Expand All @@ -35,6 +36,7 @@
"parsebam",
"parsecontigs",
"parsemarkers",
"taxonomy",
"cluster",
"encode",
"aamb_encode",
Expand Down
Loading
Loading