From 2931283818e30090afaacb2ad96c6c6aecb3c216 Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Fri, 16 Feb 2024 13:20:13 -0500 Subject: [PATCH 1/7] bugfix: taxdb is integers, not strings --- metagenomics.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/metagenomics.py b/metagenomics.py index 08e72d40..5a20752d 100755 --- a/metagenomics.py +++ b/metagenomics.py @@ -728,18 +728,16 @@ def filter_taxids_to_focal_hits(taxids_tsv, focal_report_tsv, taxdb_dir, min_rea with util.file.open_or_gzopen(focal_report_tsv, "rt") as inf: for row in csv.DictReader(inf, delimiter='\t'): if int(row['reads_excl_children']) >= min_read_count: - hits.add(row['taxon_id']) + hits.add(int(row['taxon_id'])) # filter taxids_tsv -> output_tsv with util.file.open_or_gzopen(taxids_tsv, "rt") as inf: with util.file.open_or_gzopen(output_tsv, "wt") as outf: for line in inf: - taxid = line.rstrip('\r\n').split('\t')[0] + taxid = int(line.rstrip('\r\n').split('\t')[0]) ancestors = taxdb.get_ordered_ancestors(taxid) - for node in [taxid] + ancestors: - if taxid in hits: - outf.write(line) - break + if any(node in hits for node in [taxid] + ancestors): + outf.write(line) __commands__.append(('filter_taxids_to_focal_hits', parser_filter_taxids_to_focal_hits)) From 493fbce56e92ffa427202bcff66a37269fce3c8a Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Fri, 16 Feb 2024 13:50:27 -0500 Subject: [PATCH 2/7] fix build: include viral-core requirements-conda.txt with mamba solve --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 21730250..03aaf024 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,7 @@ ENV VIRAL_CLASSIFY_PATH=$INSTALL_PATH/viral-classify \ COPY requirements-conda.txt requirements-conda-env2.txt $VIRAL_CLASSIFY_PATH/ # install most dependencies to the main environment -RUN $VIRAL_NGS_PATH/docker/install-conda-dependencies.sh $VIRAL_CLASSIFY_PATH/requirements-conda.txt +RUN $VIRAL_NGS_PATH/docker/install-conda-dependencies.sh $VIRAL_CLASSIFY_PATH/requirements-conda.txt $VIRAL_NGS_PATH/requirements-conda.txt # install packages with dependency incompatibilities to the second environment RUN CONDA_PREFIX="$MINICONDA_PATH/envs/env2"; \ From 08b8e65245daeb968b166871232d76a5334b87a8 Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Fri, 16 Feb 2024 13:50:36 -0500 Subject: [PATCH 3/7] bump versions --- requirements-conda.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/requirements-conda.txt b/requirements-conda.txt index 4132ffc5..f0d9119c 100644 --- a/requirements-conda.txt +++ b/requirements-conda.txt @@ -1,7 +1,7 @@ -blast=2.9.0 +blast>=2.9.0 bmtagger>=3.101 -diamond>=2.1.6 -kmc>=3.1.1rc1 -kraken2>=2.1.2 +diamond>=2.1.9 +kmc>=3.2.1 +kraken2>=2.1.3 krona>=2.8.1 -last>=876 +last>=1541 From 5d05561db724f623aadd4077ca751bc63fbfad20 Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Fri, 16 Feb 2024 14:00:32 -0500 Subject: [PATCH 4/7] pin blast version for now --- requirements-conda.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-conda.txt b/requirements-conda.txt index f0d9119c..69458412 100644 --- a/requirements-conda.txt +++ b/requirements-conda.txt @@ -1,4 +1,4 @@ -blast>=2.9.0 +blast=2.9.0 bmtagger>=3.101 diamond>=2.1.9 kmc>=3.2.1 From c42e48ec387b790c71e0c73874e95188dfb5c686 Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Fri, 16 Feb 2024 16:23:17 -0500 Subject: [PATCH 5/7] revert pinning of blast, breaks resolver --- requirements-conda.txt | 2 +- test/unit/test_taxon_filter.py | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/requirements-conda.txt b/requirements-conda.txt index 69458412..f0d9119c 100644 --- a/requirements-conda.txt +++ b/requirements-conda.txt @@ -1,4 +1,4 @@ -blast=2.9.0 +blast>=2.9.0 bmtagger>=3.101 diamond>=2.1.9 kmc>=3.2.1 diff --git a/test/unit/test_taxon_filter.py b/test/unit/test_taxon_filter.py index 901a0782..7afa75d8 100644 --- a/test/unit/test_taxon_filter.py +++ b/test/unit/test_taxon_filter.py @@ -4,6 +4,7 @@ + "hlevitin@broadinstitute.org" import unittest +import glob import os, os.path import tempfile import shutil @@ -364,13 +365,11 @@ def setUp(self): # tar one db, but not the other tar_db_tgz = util.file.mkstempfname('-humanChr9Subset.blastn.db.tar.gz') - cmd = ['tar', '-C', self.tempDir, '-cvzf', tar_db_tgz] - for ext in ('nhr', 'nin', 'nsq'): - cmd.append('humanChr9Subset.'+ext) + cmd = ['tar', '-C', self.tempDir, '-cvzf', tar_db_tgz] + glob.glob("humanChr9Subset.n*", root_dir=self.tempDir) subprocess.check_call(cmd) self.blastdbs_multi[1] = tar_db_tgz - for ext in ('nhr', 'nin', 'nsq'): - os.unlink(os.path.join(self.tempDir, 'humanChr9Subset.'+ext)) + for idx in glob.glob("humanChr9Subset.n*", root_dir=self.tempDir): + os.unlink(idx) def test_deplete_blastn_bam(self): tempDir = tempfile.mkdtemp() From 056d343ea95f7e79815d06453c5f679c7621302d Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Fri, 16 Feb 2024 16:44:11 -0500 Subject: [PATCH 6/7] our glob.glob doesnt have a root_dir option --- test/unit/test_taxon_filter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/unit/test_taxon_filter.py b/test/unit/test_taxon_filter.py index 7afa75d8..1d64ff3c 100644 --- a/test/unit/test_taxon_filter.py +++ b/test/unit/test_taxon_filter.py @@ -365,10 +365,10 @@ def setUp(self): # tar one db, but not the other tar_db_tgz = util.file.mkstempfname('-humanChr9Subset.blastn.db.tar.gz') - cmd = ['tar', '-C', self.tempDir, '-cvzf', tar_db_tgz] + glob.glob("humanChr9Subset.n*", root_dir=self.tempDir) + cmd = ['tar', '-C', self.tempDir, '-cvzf', tar_db_tgz] + map(os.path.basename, glob.glob(os.path.join(self.tempDir, "humanChr9Subset.n*"))) subprocess.check_call(cmd) self.blastdbs_multi[1] = tar_db_tgz - for idx in glob.glob("humanChr9Subset.n*", root_dir=self.tempDir): + for idx in glob.glob(os.path.join(self.tempDir, "humanChr9Subset.n*")): os.unlink(idx) def test_deplete_blastn_bam(self): From 30bc8bd83d94f4885f22d11fd40251b2ebd914f1 Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Fri, 16 Feb 2024 16:56:59 -0500 Subject: [PATCH 7/7] fix list append --- test/unit/test_taxon_filter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/test_taxon_filter.py b/test/unit/test_taxon_filter.py index 1d64ff3c..83fc52a3 100644 --- a/test/unit/test_taxon_filter.py +++ b/test/unit/test_taxon_filter.py @@ -365,7 +365,7 @@ def setUp(self): # tar one db, but not the other tar_db_tgz = util.file.mkstempfname('-humanChr9Subset.blastn.db.tar.gz') - cmd = ['tar', '-C', self.tempDir, '-cvzf', tar_db_tgz] + map(os.path.basename, glob.glob(os.path.join(self.tempDir, "humanChr9Subset.n*"))) + cmd = ['tar', '-C', self.tempDir, '-cvzf', tar_db_tgz] + list(os.path.basename(f) for f in glob.glob(os.path.join(self.tempDir, "humanChr9Subset.n*"))) subprocess.check_call(cmd) self.blastdbs_multi[1] = tar_db_tgz for idx in glob.glob(os.path.join(self.tempDir, "humanChr9Subset.n*")):