From b3104a85bfd2eee70e7d93c0a6df683fd47c1b63 Mon Sep 17 00:00:00 2001 From: Amine Ghozlane Date: Wed, 28 Aug 2024 15:53:23 +0200 Subject: [PATCH] Fix consensus with large catalogue --- meteor/counter.py | 8 +++++--- meteor/downloader.py | 9 +++++++-- meteor/mapper.py | 6 ++---- meteor/phylogeny.py | 29 +++++++++++++++++++---------- meteor/tests/test_counter.py | 6 +++--- meteor/tests/test_fastq_importer.py | 1 + meteor/variantcalling.py | 2 ++ 7 files changed, 39 insertions(+), 22 deletions(-) diff --git a/meteor/counter.py b/meteor/counter.py index 9998d19..45a82b0 100644 --- a/meteor/counter.py +++ b/meteor/counter.py @@ -387,7 +387,7 @@ def save_cram_strain( "wc", template=cramdesc, reference_filename=str(reference.resolve()), - threads=self.meteor.threads + threads=self.meteor.threads, ) as total_reads: for element in read_chain: if int(element.reference_name) in gene_id_set: @@ -417,7 +417,9 @@ def launch_counting( else: logging.info("Launch counting") pysam.set_verbosity(0) - with AlignmentFile(str(raw_cramfile.resolve()), threads=self.meteor.threads) as cramdesc: + with AlignmentFile( + str(raw_cramfile.resolve()), threads=self.meteor.threads + ) as cramdesc: # create a dictionary containing the length of reference genes # get name of reference sequence references = [int(ref) for ref in cramdesc.references] @@ -463,7 +465,7 @@ def launch_counting( "wc", template=cramdesc, reference_filename=str(reference.resolve()), - threads=self.meteor.threads + threads=self.meteor.threads, ) as total_reads: for element in chain.from_iterable(reads.values()): # if int(element.reference_name) in ref_json["reference_file"]: diff --git a/meteor/downloader.py b/meteor/downloader.py index fedb442..c57da00 100644 --- a/meteor/downloader.py +++ b/meteor/downloader.py @@ -44,12 +44,17 @@ class Downloader(Session): @staticmethod def load_catalogues_config() -> dict: try: - config_data = importlib.resources.files("meteor") / str(Downloader.CONFIG_DATA_FILE) + config_data = importlib.resources.files("meteor") / str( + Downloader.CONFIG_DATA_FILE + ) with importlib.resources.as_file(config_data) as configuration_path: with configuration_path.open("rt", encoding="UTF-8") as config: return json.load(config) except FileNotFoundError: - logging.error("The file %s is missing in meteor source", Downloader.CONFIG_DATA_FILE.name) + logging.error( + "The file %s is missing in meteor source", + Downloader.CONFIG_DATA_FILE.name, + ) sys.exit(1) @staticmethod diff --git a/meteor/mapper.py b/meteor/mapper.py index b02a906..524be16 100644 --- a/meteor/mapper.py +++ b/meteor/mapper.py @@ -153,9 +153,7 @@ def execute(self) -> None: ) as mapping_exec: assert mapping_exec.stdout is not None and mapping_exec.stderr is not None with pysam.AlignmentFile( - mapping_exec.stdout, - "r", - threads=self.meteor.threads + mapping_exec.stdout, "r", threads=self.meteor.threads ) as samdesc: with pysam.AlignmentFile( str(cram_file.resolve()), @@ -163,7 +161,7 @@ def execute(self) -> None: "wc", template=samdesc, reference_filename=str(reference.resolve()), - threads=self.meteor.threads + threads=self.meteor.threads, ) as cram: for element in samdesc: cram.write(element) diff --git a/meteor/phylogeny.py b/meteor/phylogeny.py index 57bc2c0..cb384bd 100644 --- a/meteor/phylogeny.py +++ b/meteor/phylogeny.py @@ -13,14 +13,17 @@ """Effective phylogeny""" import re import logging + # import sys from dataclasses import dataclass, field from meteor.session import Session, Component + # from subprocess import check_call, run, DEVNULL from time import perf_counter from pathlib import Path import tempfile from tempfile import NamedTemporaryFile + # from packaging.version import parse from collections import OrderedDict from datetime import datetime @@ -142,7 +145,10 @@ def execute(self) -> None: msp_count = len(self.msp_file_list) for idx, msp_file in enumerate(self.msp_file_list, start=1): logging.info( - "%d/%d %s: Start analysis", idx, msp_count, msp_file.name.replace(".fasta", "") + "%d/%d %s: Start analysis", + idx, + msp_count, + msp_file.name.replace(".fasta", ""), ) with NamedTemporaryFile( mode="wt", dir=self.meteor.tmp_dir, suffix=".fasta" @@ -157,7 +163,7 @@ def execute(self) -> None: logging.info( "Only %d informative sites (< %d threshold) left after cleaning, skip.", info_sites, - self.min_info_sites + self.min_info_sites, ) # elif len(cleaned_seqs) >= 4: # # Compute trees @@ -182,8 +188,8 @@ def execute(self) -> None: # ], # stdout = DEVNULL # ) - # if result != 0: - # logging.error("raxml-ng failed with return code %d", result) + # if result != 0: + # logging.error("raxml-ng failed with return code %d", result) else: # logging.info( # "Less than 4 sequences, run cogent3" @@ -208,16 +214,19 @@ def execute(self) -> None: # ) if tree_file.with_suffix(".tree").exists(): self.tree_files.append(tree_file.with_suffix(".tree")) - logging.info("Completed MSP tree for MSP %s", msp_file.name.replace(".fasta", "")) + logging.info( + "Completed MSP tree for MSP %s", + msp_file.name.replace(".fasta", ""), + ) # elif tree_file.with_suffix(".raxml.bestTree").exists(): # self.tree_files.append(tree_file.with_suffix(".raxml.bestTree")) - # logging.info("Completed MSP tree with raxml") + # logging.info("Completed MSP tree with raxml") else: - logging.info( - "No tree file generated" - ) + logging.info("No tree file generated") logging.info("Completed phylogeny in %f seconds", perf_counter() - start) - logging.info("Trees were generated for %d/%d MSPs", len(self.tree_files), msp_count) + logging.info( + "Trees were generated for %d/%d MSPs", len(self.tree_files), msp_count + ) # config = self.set_tree_config(raxml_ng_version) config = self.set_tree_config() self.save_config(config, self.meteor.tree_dir / "census_stage_4.json") diff --git a/meteor/tests/test_counter.py b/meteor/tests/test_counter.py index 2540979..6f47e26 100644 --- a/meteor/tests/test_counter.py +++ b/meteor/tests/test_counter.py @@ -55,7 +55,7 @@ def counter_unique(datadir: Path, tmp_path: Path) -> Counter: alignment_number=10000, keep_filtered_alignments=True, identity_threshold=0.95, - core_size=100 + core_size=100, ) @@ -77,7 +77,7 @@ def counter_smart_shared(datadir: Path, tmp_path: Path) -> Counter: alignment_number=10000, keep_filtered_alignments=True, identity_threshold=0.95, - core_size=100 + core_size=100, ) @@ -100,7 +100,7 @@ def counter_total(datadir: Path, tmp_path: Path) -> Counter: keep_all_alignments=False, keep_filtered_alignments=True, identity_threshold=0.95, - core_size=100 + core_size=100, ) diff --git a/meteor/tests/test_fastq_importer.py b/meteor/tests/test_fastq_importer.py index adf23fb..e42a4d6 100644 --- a/meteor/tests/test_fastq_importer.py +++ b/meteor/tests/test_fastq_importer.py @@ -89,6 +89,7 @@ def test_replace_ext(builder: FastqImporter, fastq_filename: str, name: str) -> def test_get_tag_none(builder: FastqImporter, fastq_filename: str) -> None: assert builder.get_tag(fastq_filename) is None + @pytest.mark.parametrize( ("fastq_filename", "tag"), (("simple_case_R1.fastq", "1"), ("pretty.complex_pain_2.fq.xz", "2")), diff --git a/meteor/variantcalling.py b/meteor/variantcalling.py index 024326c..292a775 100644 --- a/meteor/variantcalling.py +++ b/meteor/variantcalling.py @@ -351,6 +351,8 @@ def create_consensus( consensus[row["startpos"] : row["endpos"]] = ( self.meteor.DEFAULT_GAP_CHAR ) + else: + continue consensus_f.write(f">{gene_id}\n") consensus_f.write("".join(consensus) + "\n") del consensus