From 9405cdc5c8e08aa0e142a6b600e4d4ae8a6e975e Mon Sep 17 00:00:00 2001 From: Florian Plaza Onate Date: Tue, 2 Apr 2024 15:12:54 +0000 Subject: [PATCH 1/6] Hide feature not implemented --- meteor/meteor.py | 52 +++++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/meteor/meteor.py b/meteor/meteor.py index 8ec2ec1..3388eb1 100644 --- a/meteor/meteor.py +++ b/meteor/meteor.py @@ -460,13 +460,13 @@ def get_arguments() -> Namespace: # pragma: no cover help="Remove samples with no detected species (MSPs) " "(default: %(default)s).", ) - merging_parser.add_argument( - "-m", - dest="output_mpa", - action="store_true", - help="Save the merged species abundance table in the style of MetaPhlan " - "(default: %(default)s).", - ) + #merging_parser.add_argument( + # "-m", + # dest="output_mpa", + # action="store_true", + # help="Save the merged species abundance table in the style of MetaPhlan " + # "(default: %(default)s).", + #) merging_parser.add_argument( "-b", dest="output_biom", @@ -474,22 +474,22 @@ def get_arguments() -> Namespace: # pragma: no cover help="Save the merged species abundance table in biom format " "(default: %(default)s).", ) - merging_parser.add_argument( - "--tax_lev", - dest="taxonomic_level", - default=Merging.DEFAULT_MPA_TAXONOMIC_LEVEL, - choices=Merging.MPA_TAXONOMIC_LEVELS, - help="""The taxonomic level for mpa output (default: %(default)s): - 'a' : all taxonomic levels - 'k' : kingdoms - 'p' : phyla only - 'c' : classes only - 'o' : orders only - 'f' : families only - 'g' : genera only - 's' : species only - 't' : MSPs only""", - ) + #merging_parser.add_argument( + # "--tax_lev", + # dest="taxonomic_level", + # default=Merging.DEFAULT_MPA_TAXONOMIC_LEVEL, + # choices=Merging.MPA_TAXONOMIC_LEVELS, + # help="""The taxonomic level for mpa output (default: %(default)s): + # 'a' : all taxonomic levels + # 'k' : kingdoms + # 'p' : phyla only + # 'c' : classes only + # 'o' : orders only + # 'f' : families only + # 'g' : genera only + # 's' : species only + # 't' : MSPs only""", + #) merging_parser.add_argument( "-o", dest="merging_dir", @@ -782,8 +782,10 @@ def main() -> None: # pragma: no cover args.min_msp_abundance, args.min_msp_occurrence, args.remove_sample_with_no_msp, - args.output_mpa, - args.taxonomic_level, + None, + None, + #args.output_mpa, + #args.taxonomic_level, args.output_biom, args.output_gene_matrix, ) From 16c34aeada3a48cd1d3c1198f4efe455af7a8f4e Mon Sep 17 00:00:00 2001 From: Florian Plaza Onate Date: Tue, 2 Apr 2024 15:38:36 +0000 Subject: [PATCH 2/6] Fix mypy warning + check counting and mapping types --- meteor/counter.py | 5 +++++ meteor/mapper.py | 7 ++++++- meteor/merging.py | 4 ++-- meteor/meteor.py | 8 ++++---- 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/meteor/counter.py b/meteor/counter.py index 98a1c6d..46945ce 100644 --- a/meteor/counter.py +++ b/meteor/counter.py @@ -50,6 +50,9 @@ class Counter(Session): json_data: dict = field(default_factory=dict) def __post_init__(self) -> None: + if self.counting_type not in Counter.COUNTING_TYPES: + raise ValueError(f'{self.counting_type} is not a valid counting type') + if self.meteor.tmp_path: self.meteor.tmp_path.mkdir(exist_ok=True) self.meteor.tmp_dir = Path(mkdtemp(dir=self.meteor.tmp_path)) @@ -133,6 +136,8 @@ def filter_alignments( # contains a list of alignment of each read reads: dict[str, list[AlignedSegment]] = {} for element in cramdesc: + assert element.query_name is not None and element.reference_name is not None + # identity = (element.query_length - element.get_tag("NM")) / element.query_length # identity = 1.0 - (element.get_tag("NM") / element.query_alignment_length) ali = sum(self.get_aligned_nucleotides(element)) diff --git a/meteor/mapper.py b/meteor/mapper.py index 556949f..53c8b33 100644 --- a/meteor/mapper.py +++ b/meteor/mapper.py @@ -33,7 +33,7 @@ class Mapper(Session): """Run the bowtie""" DEFAULT_NUM_THREADS : ClassVar[int] = 1 - MAPPING_TYPES: ClassVar[list[str]] = ['end_to_end', 'local'] + MAPPING_TYPES: ClassVar[list[str]] = ['end-to-end', 'local'] DEFAULT_MAPPING_TYPE: ClassVar[str] = 'end-to-end' DEFAULT_TRIM: ClassVar[int] = 80 NO_TRIM: ClassVar[int] = 0 @@ -48,6 +48,10 @@ class Mapper(Session): counting_type: str identity_threshold: float + def __post_init__(self) -> None: + if self.mapping_type not in Mapper.MAPPING_TYPES: + raise ValueError(f'{self.mapping_type} is not a valid mapping type') + def set_mapping_config( self, cram_file: Path, @@ -147,6 +151,7 @@ def execute(self) -> None: stderr=PIPE, ) # cramfile_unsorted = Path(mkstemp(dir=self.meteor.tmp_dir)[1]) + assert mapping_exec.stdout is not None and mapping_exec.stderr is not None with pysam.AlignmentFile( mapping_exec.stdout, "r", diff --git a/meteor/merging.py b/meteor/merging.py index b95d102..6d7ae80 100644 --- a/meteor/merging.py +++ b/meteor/merging.py @@ -19,7 +19,7 @@ import logging import sys import numpy as np -from biom.table import Table +from biom.table import Table # type: ignore from typing import ClassVar @@ -348,7 +348,7 @@ def execute(self) -> None: / ref_json["annotation"]["taxonomy"]["filename"], sep="\t", header=0, - usecols=self.ranks.keys(), + usecols=list(self.ranks.keys()), ) annotation = annotation[ diff --git a/meteor/meteor.py b/meteor/meteor.py index 3388eb1..80c90da 100644 --- a/meteor/meteor.py +++ b/meteor/meteor.py @@ -782,8 +782,8 @@ def main() -> None: # pragma: no cover args.min_msp_abundance, args.min_msp_occurrence, args.remove_sample_with_no_msp, - None, - None, + False, + "a", #args.output_mpa, #args.taxonomic_level, args.output_biom, @@ -806,10 +806,10 @@ def main() -> None: # pragma: no cover fastq_importer.execute() meteor.fastq_dir = Path(tmpdirname) / "test" meteor.ref_dir = meteor.ref_dir / "mock" - counter = Counter(meteor, "best", "end-to-end", 80, 0.97, 100, False, True) + counter = Counter(meteor, "total", "end-to-end", 80, 0.97, 100, False, True) counter.execute() meteor.fastq_dir = Path(tmpdirname) / "test2" - counter = Counter(meteor, "best", "end-to-end", 80, 0.97, 100, False, True) + counter = Counter(meteor, "total", "end-to-end", 80, 0.97, 100, False, True) counter.execute() # Remove the mapping directory and its contents shutil.rmtree(Path(tmpdirname) / "test") From 3c5765692957e9536e1505d5db96a6d158b4479b Mon Sep 17 00:00:00 2001 From: Florian Plaza Onate Date: Tue, 2 Apr 2024 16:35:08 +0000 Subject: [PATCH 3/6] Check normalization and tree output format --- meteor/mapper.py | 4 ++-- meteor/profiler.py | 5 ++++- meteor/tests/test_profiler.py | 2 +- meteor/treebuilder.py | 5 ++++- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/meteor/mapper.py b/meteor/mapper.py index 53c8b33..951101c 100644 --- a/meteor/mapper.py +++ b/meteor/mapper.py @@ -33,8 +33,8 @@ class Mapper(Session): """Run the bowtie""" DEFAULT_NUM_THREADS : ClassVar[int] = 1 - MAPPING_TYPES: ClassVar[list[str]] = ['end-to-end', 'local'] - DEFAULT_MAPPING_TYPE: ClassVar[str] = 'end-to-end' + MAPPING_TYPES: ClassVar[list[str]] = ["end-to-end", "local"] + DEFAULT_MAPPING_TYPE: ClassVar[str] = "end-to-end" DEFAULT_TRIM: ClassVar[int] = 80 NO_TRIM: ClassVar[int] = 0 DEFAULT_ALIGNMENT_NUMBER: ClassVar[int] = 10000 diff --git a/meteor/profiler.py b/meteor/profiler.py index 61e7125..74daea4 100644 --- a/meteor/profiler.py +++ b/meteor/profiler.py @@ -33,7 +33,7 @@ class Profiler(Session): NO_RAREFACTION: ClassVar[int] = 0 DEFAULT_RAREFACTION_LEVEL: ClassVar[int] = NO_RAREFACTION DEFAULT_RANDOM_SEED: ClassVar[int] = 1234 - NORMALIZATIONS: ClassVar[list[str]] = ["coverage", "fpkm", "raw"] + NORMALIZATIONS: ClassVar[list[str|None]] = [None, "coverage", "fpkm", "raw"] DEFAULT_NORMALIZATION: ClassVar[str] = "coverage" DEFAULT_COVERAGE_FACTOR: ClassVar[float] = 100.0 DEFAULT_CORE_SIZE: ClassVar[int] = 100 @@ -50,6 +50,9 @@ class Profiler(Session): coverage_factor: float def __post_init__(self): + if self.normalization not in Profiler.NORMALIZATIONS: + raise ValueError(f'{self.normalization} is not a valid normalization') + # Get the json file self.sample_config = self.get_census_stage(self.meteor.mapping_dir, 1) diff --git a/meteor/tests/test_profiler.py b/meteor/tests/test_profiler.py index f375cc6..cd6283f 100644 --- a/meteor/tests/test_profiler.py +++ b/meteor/tests/test_profiler.py @@ -31,7 +31,7 @@ def profiler_standard(datadir: Path, tmp_path: Path) -> Profiler: rarefaction_level=-1, seed=12345, coverage_factor=100.0, - normalization="", + normalization=None, core_size=4, msp_filter=0.5, completeness=0.6, diff --git a/meteor/treebuilder.py b/meteor/treebuilder.py index 9eca09d..921374f 100644 --- a/meteor/treebuilder.py +++ b/meteor/treebuilder.py @@ -32,7 +32,7 @@ class TreeBuilder(Session): DEFAULT_MAX_GAP: ClassVar[float] = 0.5 DEFAULT_GAP_CHAR: ClassVar[str] = "-" - OUTPUT_FORMATS: ClassVar[list[str]] = ["png", "svg", "pdf", "txt"] + OUTPUT_FORMATS: ClassVar[list[str|None]] = [None, "png", "svg", "pdf", "txt"] DEFAULT_OUTPUT_FORMAT: ClassVar[str|None] = None DEFAULT_WIDTH: ClassVar[int] = 500 DEFAULT_HEIGHT: ClassVar[int] = 500 @@ -46,6 +46,9 @@ class TreeBuilder(Session): gap_char: str def __post_init__(self) -> None: + if self.format not in TreeBuilder.OUTPUT_FORMATS: + raise ValueError(f'{self.format} is not a valid output format') + self.meteor.tmp_dir = Path(mkdtemp(dir=self.meteor.tmp_path)) self.meteor.tree_dir.mkdir(exist_ok=True, parents=True) From 672921b590ba84a4c6ce1d1731acda458b3b8438 Mon Sep 17 00:00:00 2001 From: Florian Plaza Onate Date: Wed, 3 Apr 2024 11:25:03 +0000 Subject: [PATCH 4/6] Remove used parameter --- meteor/counter.py | 1 - meteor/mapper.py | 2 -- meteor/tests/test_mapper.py | 1 - 3 files changed, 4 deletions(-) diff --git a/meteor/counter.py b/meteor/counter.py index 46945ce..2505db2 100644 --- a/meteor/counter.py +++ b/meteor/counter.py @@ -79,7 +79,6 @@ def launch_mapping(self) -> None: self.mapping_type, self.trim, self.alignment_number, - self.counting_type, self.identity_threshold, ) mapping_process.execute() diff --git a/meteor/mapper.py b/meteor/mapper.py index 951101c..f8a9a45 100644 --- a/meteor/mapper.py +++ b/meteor/mapper.py @@ -45,7 +45,6 @@ class Mapper(Session): mapping_type: str trim: int alignment_number: int - counting_type: str identity_threshold: float def __post_init__(self) -> None: @@ -117,7 +116,6 @@ def execute(self) -> None: if self.trim > Mapper.NO_TRIM: parameters += f"--trim-to {self.trim} " if self.alignment_number > 1: - # and self.counting_type != "best" parameters += f"-k {self.alignment_number} " # Check the bowtie2 version bowtie_exec = run(["bowtie2", "--version"], capture_output=True) diff --git a/meteor/tests/test_mapper.py b/meteor/tests/test_mapper.py index dbf2f77..5627ff6 100644 --- a/meteor/tests/test_mapper.py +++ b/meteor/tests/test_mapper.py @@ -52,7 +52,6 @@ def mapping_builder(datadir: Path, tmp_path: Path) -> Mapper: "end-to-end", 80, 10000, - "smart_shared_reads", 0.95, ) From a9fc8511f7cf868223a07d268fadbdd9ad846e57 Mon Sep 17 00:00:00 2001 From: Florian Plaza Onate Date: Wed, 3 Apr 2024 11:29:44 +0000 Subject: [PATCH 5/6] Fix mypy warnings --- meteor/merging.py | 2 +- meteor/meteor.py | 2 +- meteor/profiler.py | 2 +- meteor/tests/test_counter.py | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/meteor/merging.py b/meteor/merging.py index 6d7ae80..c0c9d4c 100644 --- a/meteor/merging.py +++ b/meteor/merging.py @@ -41,7 +41,7 @@ class Merging(Session): min_msp_occurrence: int remove_sample_with_no_msp: bool output_mpa: bool - mpa_taxonomic_level: str + mpa_taxonomic_level: str|None output_biom: bool output_gene_matrix: bool ranks: dict[str, str] = field( diff --git a/meteor/meteor.py b/meteor/meteor.py index 80c90da..f05afab 100644 --- a/meteor/meteor.py +++ b/meteor/meteor.py @@ -783,7 +783,7 @@ def main() -> None: # pragma: no cover args.min_msp_occurrence, args.remove_sample_with_no_msp, False, - "a", + None, #args.output_mpa, #args.taxonomic_level, args.output_biom, diff --git a/meteor/profiler.py b/meteor/profiler.py index 74daea4..057a0e5 100644 --- a/meteor/profiler.py +++ b/meteor/profiler.py @@ -43,7 +43,7 @@ class Profiler(Session): meteor: type[Component] rarefaction_level: int seed: int - normalization: str + normalization: str|None core_size: int msp_filter: float completeness: float diff --git a/meteor/tests/test_counter.py b/meteor/tests/test_counter.py index 34b5832..f128f5c 100644 --- a/meteor/tests/test_counter.py +++ b/meteor/tests/test_counter.py @@ -284,8 +284,8 @@ def test_save_cram(counter_unique: Counter, datadir: Path, tmp_path: Path) -> No reads, _ = counter_unique.filter_alignments( cramdesc ) # pylint: disable=unused-variable - read_list = list(chain(reads.values())) - merged_list = list(chain.from_iterable(read_list)) + read_list = reads.values() + merged_list = chain.from_iterable(read_list) tmpcramfile = tmp_path / "test" counter_unique.save_cram_strain(tmpcramfile, cramdesc, merged_list, ref_json) assert tmpcramfile.exists() From d65c8a595d913f495c594d8994d90b84fc4b6c62 Mon Sep 17 00:00:00 2001 From: Florian Plaza Onate Date: Wed, 3 Apr 2024 13:25:06 +0000 Subject: [PATCH 6/6] Fix pylint warnings --- meteor/counter.py | 2 +- meteor/downloader.py | 4 +- meteor/mapper.py | 82 ++++++++++++----------------- meteor/merging.py | 25 ++++----- meteor/phylogeny.py | 14 +++-- meteor/profiler.py | 4 +- meteor/referencebuilder.py | 2 +- meteor/strain.py | 6 +-- meteor/tests/test_fastq_importer.py | 8 +-- meteor/tests/test_parser.py | 8 +-- meteor/tests/test_variantcalling.py | 5 +- meteor/treebuilder.py | 8 +-- meteor/variantcalling.py | 28 +++++----- 13 files changed, 88 insertions(+), 108 deletions(-) diff --git a/meteor/counter.py b/meteor/counter.py index 2505db2..796bc78 100644 --- a/meteor/counter.py +++ b/meteor/counter.py @@ -51,7 +51,7 @@ class Counter(Session): def __post_init__(self) -> None: if self.counting_type not in Counter.COUNTING_TYPES: - raise ValueError(f'{self.counting_type} is not a valid counting type') + raise ValueError(f"{self.counting_type} is not a valid counting type") if self.meteor.tmp_path: self.meteor.tmp_path.mkdir(exist_ok=True) diff --git a/meteor/downloader.py b/meteor/downloader.py index de93196..fedb442 100644 --- a/meteor/downloader.py +++ b/meteor/downloader.py @@ -51,7 +51,7 @@ def load_catalogues_config() -> dict: except FileNotFoundError: logging.error("The file %s is missing in meteor source", Downloader.CONFIG_DATA_FILE.name) sys.exit(1) - + @staticmethod def get_available_catalogues() -> list[str]: catalogues_config = Downloader.load_catalogues_config() @@ -130,7 +130,7 @@ def execute(self) -> None: print(flush=True) if self.choice == Downloader.TEST_CATALOGUE: for sample in self.catalogues_config[self.choice]["samples"]: - logging.info(f"Download {sample} fastq file") + logging.info("Download %s fastq file", sample) url_fastq = self.catalogues_config[self.choice]["samples"][sample][ "catalogue" ] diff --git a/meteor/mapper.py b/meteor/mapper.py index f8a9a45..50b7301 100644 --- a/meteor/mapper.py +++ b/meteor/mapper.py @@ -49,7 +49,7 @@ class Mapper(Session): def __post_init__(self) -> None: if self.mapping_type not in Mapper.MAPPING_TYPES: - raise ValueError(f'{self.mapping_type} is not a valid mapping type') + raise ValueError(f"{self.mapping_type} is not a valid mapping type") def set_mapping_config( self, @@ -118,8 +118,8 @@ def execute(self) -> None: if self.alignment_number > 1: parameters += f"-k {self.alignment_number} " # Check the bowtie2 version - bowtie_exec = run(["bowtie2", "--version"], capture_output=True) - bowtie_version = str(bowtie_exec.stdout).split("\\n")[0].split(" ")[2] + bowtie_exec = run(["bowtie2", "--version"], check=False, capture_output=True) + bowtie_version = str(bowtie_exec.stdout).split("\\n", maxsplit=1)[0].split(" ")[2] if bowtie_exec.returncode != 0: logging.error( "Checking bowtie2 version failed:\n%s", @@ -134,7 +134,7 @@ def execute(self) -> None: sys.exit(1) # Start mapping start = perf_counter() - mapping_exec = Popen( + with Popen( [ "bowtie2", parameters, @@ -147,51 +147,37 @@ def execute(self) -> None: ], stdout=PIPE, stderr=PIPE, - ) - # cramfile_unsorted = Path(mkstemp(dir=self.meteor.tmp_dir)[1]) - assert mapping_exec.stdout is not None and mapping_exec.stderr is not None - with pysam.AlignmentFile( - mapping_exec.stdout, - "r", - ) as samdesc: + ) as mapping_exec: + assert mapping_exec.stdout is not None and mapping_exec.stderr is not None with pysam.AlignmentFile( - str(cram_file.resolve()), - # cramfile_unsorted, - "wc", - template=samdesc, - reference_filename=str(reference.resolve()), - ) as cram: - for element in samdesc: - cram.write(element) - # pysam.sort( - # "-o", - # str(cram_file.resolve()), - # "-@", - # str(self.meteor.threads), - # "-O", - # "cram", - # str(cramfile_unsorted.resolve()), - # catch_stdout=False, - # ) - # pysam.index(str(cram_file.resolve())) - # Read standard error from the process (non-blocking read) - mapping_result = mapping_exec.stderr.read().decode("utf-8") - mapping_exec.stderr.close() - - # Wait for the process to finish and get the exit code - exit_code = mapping_exec.wait() - - # Check for errors and print the error output if necessary - if exit_code != 0: - logging.error("bowtie2 failed:\n%s" % mapping_result) - sys.exit(1) - try: - mapping_log = findall(r"([0-9]+)\s+\(", mapping_result) - assert len(mapping_log) == 4 - mapping_data = [int(i) for i in mapping_log] - except AssertionError: - logging.error("Could not access the mapping result from bowtie2") - sys.exit(1) + mapping_exec.stdout, + "r", + ) as samdesc: + with pysam.AlignmentFile( + str(cram_file.resolve()), + # cramfile_unsorted, + "wc", + template=samdesc, + reference_filename=str(reference.resolve()), + ) as cram: + for element in samdesc: + cram.write(element) + # Read standard error from the process (non-blocking read) + mapping_result = mapping_exec.stderr.read().decode("utf-8") + mapping_exec.stderr.close() + # Wait for the process to finish and get the exit code + exit_code = mapping_exec.wait() + # Check for errors and print the error output if necessary + if exit_code != 0: + logging.error("bowtie2 failed:\n%s", mapping_result) + sys.exit(1) + try: + mapping_log = findall(r"([0-9]+)\s+\(", mapping_result) + assert len(mapping_log) == 4 + mapping_data = [int(i) for i in mapping_log] + except AssertionError: + logging.error("Could not access the mapping result from bowtie2") + sys.exit(1) logging.info("Completed mapping creation in %f seconds", perf_counter() - start) config = self.set_mapping_config(cram_file, bowtie_version, mapping_data) self.save_config(config, self.census["Stage1FileName"]) diff --git a/meteor/merging.py b/meteor/merging.py index c0c9d4c..da99511 100644 --- a/meteor/merging.py +++ b/meteor/merging.py @@ -18,9 +18,9 @@ from pathlib import Path import logging import sys -import numpy as np from biom.table import Table # type: ignore from typing import ClassVar +from functools import partial @dataclass @@ -84,7 +84,7 @@ def find_files_to_merge( for my_sample, my_dir in input_dir.items() } # Check that there is exactly one element in each list - len_list = list(set([len(value) for value in list(dict_to_merge.values())])) + len_list = list({len(value) for value in list(dict_to_merge.values())}) assert len(len_list) == 1 assert len_list[0] == 1 files_to_merge = { @@ -103,7 +103,7 @@ def extract_json_info( """ # Check that sections are present try: - assert all([my_section in config for my_section in list(param_dict.keys())]) + assert all(my_section in config for my_section in param_dict.keys()) except AssertionError: logging.error("Missing required section in census json file.") sys.exit(1) @@ -115,11 +115,9 @@ def extract_json_info( # Check that required fields are present try: assert all( - [ - my_field in config[my_section] - for my_section in param_dict - for my_field in param_dict[my_section] - ] + my_field in config[my_section] + for my_section in param_dict + for my_field in param_dict[my_section] ) except AssertionError: logging.error("Missing required fields in census ini file.") @@ -259,12 +257,7 @@ def execute(self) -> None: # Save database_type for later use try: database_type_all = list( - set( - [ - my_info["database_type"] - for my_info in list(all_information.values()) - ] - ) + {my_info["database_type"] for my_info in list(all_information.values())} ) assert len(database_type_all) == 1 database_type = database_type_all[0] @@ -365,7 +358,7 @@ def execute(self) -> None: # Apply the prefixes to each taxonomic rank for rank, prefix in self.ranks.items(): annotation[rank] = annotation[rank].apply( - lambda x: f"{prefix}{x}" + partial(lambda prefix, x: f"{prefix}{x}", prefix) ) observ_metadata = [ {"taxonomy": row.iloc[1:].tolist()} @@ -382,7 +375,7 @@ def execute(self) -> None: # Generate JSON representation of the BIOM table biom_json = biom_table.to_json(generated_by="Meteor") # Write the JSON to a file - with open(output_name.with_suffix(".biom"), "wt") as f: + with open(output_name.with_suffix(".biom"), "wt", encoding="UTF-8") as f: f.write(biom_json) # with h5py.File(output_name.with_suffix(".biom"), "w") as f: # table.to_hdf5(f, generated_by="Meteor", compress=True) diff --git a/meteor/phylogeny.py b/meteor/phylogeny.py index a888474..3c25010 100644 --- a/meteor/phylogeny.py +++ b/meteor/phylogeny.py @@ -98,7 +98,7 @@ def execute(self) -> None: # Define the regex pattern to match the version number version_pattern = re.compile(r"RAxML-NG v\. (\d+\.\d+\.\d+)") raxml_ng_help = run( - ["raxml-ng", "--version"], capture_output=True + ["raxml-ng", "--version"], check=False, capture_output=True ).stdout.decode("utf-8") match = version_pattern.search(raxml_ng_help) # Check if a match is found @@ -152,7 +152,7 @@ def execute(self) -> None: "MSP %s have less than 4 sequences, we compute the mutation rate", msp_file.name, ) - with open(tree_file.parent / "cleaned_sequences.fasta", "w") as f: + with open(tree_file.parent / "cleaned_sequences.fasta", "w", encoding="UTF-8") as f: for seq_name, sequence in cleaned_seqs.items(): f.write(f">{seq_name}\n{sequence}\n") mutation_rate = [] @@ -163,7 +163,7 @@ def execute(self) -> None: seq2 = cleaned_seqs[seq_ids[j]] mutation_rate += [self.compute_mutation_rate(seq1, seq2)] # Construct Newick format string - with open(tree_file.with_suffix(".tree"), "wt") as tree: + with open(tree_file.with_suffix(".tree"), "wt", encoding="UTF-8") as tree: if len(seq_ids) == 2: tree.write( f"({seq_ids[0]}:{mutation_rate[0]}, {seq_ids[1]}:{mutation_rate[0]});" @@ -174,11 +174,15 @@ def execute(self) -> None: min_rate_idx == 0 ): # seq1 and seq2 have the smallest distance tree.write( - f"(({seq_ids[0]}:{mutation_rate[0]}, {seq_ids[1]}:{mutation_rate[0]}):{mutation_rate[1]}, {seq_ids[2]}:{mutation_rate[1]});" + f"(({seq_ids[0]}:{mutation_rate[0]}, " + f"{seq_ids[1]}:{mutation_rate[0]}):{mutation_rate[1]}, " + f"{seq_ids[2]}:{mutation_rate[1]});" ) else: # seq1 and seq3 have the smallest distance tree.write( - f"(({seq_ids[0]}:{mutation_rate[1]}, {seq_ids[2]}:{mutation_rate[1]}):{mutation_rate[0]}, {seq_ids[1]}:{mutation_rate[0]});" + f"(({seq_ids[0]}:{mutation_rate[1]}, " + f"{seq_ids[2]}:{mutation_rate[1]}):{mutation_rate[0]}, " + f"{seq_ids[1]}:{mutation_rate[0]});" ) tree_files.append(tree_file) logging.info("Completed MSP tree %d/%d", idx, msp_count) diff --git a/meteor/profiler.py b/meteor/profiler.py index 057a0e5..3b2a8cc 100644 --- a/meteor/profiler.py +++ b/meteor/profiler.py @@ -51,8 +51,8 @@ class Profiler(Session): def __post_init__(self): if self.normalization not in Profiler.NORMALIZATIONS: - raise ValueError(f'{self.normalization} is not a valid normalization') - + raise ValueError(f"{self.normalization} is not a valid normalization") + # Get the json file self.sample_config = self.get_census_stage(self.meteor.mapping_dir, 1) diff --git a/meteor/referencebuilder.py b/meteor/referencebuilder.py index 613664d..e9f7946 100644 --- a/meteor/referencebuilder.py +++ b/meteor/referencebuilder.py @@ -132,7 +132,7 @@ def execute(self) -> None: # Prepare the reference for meteor self.create_reference() # Check the bowtie2 version - bowtie_exec = run(["bowtie2", "--version"], capture_output=True) + bowtie_exec = run(["bowtie2", "--version"], check=False, capture_output=True) bowtie_version = bowtie_exec.stdout.decode("utf-8").split(" ")[2].split("\n")[0] if bowtie_exec.returncode != 0: logging.error( diff --git a/meteor/strain.py b/meteor/strain.py index 793e673..7f4e9c5 100644 --- a/meteor/strain.py +++ b/meteor/strain.py @@ -31,7 +31,7 @@ @dataclass class Strain(Session): """Counter session map and count""" - + DEFAULT_MAX_DEPTH: ClassVar[int] = 100 MIN_MIN_SNP_DEPTH: ClassVar[int] = 1 MAX_MIN_SNP_DEPTH: ClassVar[int] = 10000 @@ -166,9 +166,7 @@ def get_msp_variant( consensus_file, ) sys.exit(1) - gene_dict = { - gene_id: seq for gene_id, seq in self.get_sequences(consensus_file) - } + gene_dict = dict(self.get_sequences(consensus_file)) logging.info( "%s MSPs have sufficient signal for SNP analysis ", len(msp_with_overlapping_genes["msp_name"].values), diff --git a/meteor/tests/test_fastq_importer.py b/meteor/tests/test_fastq_importer.py index 0534757..adf23fb 100644 --- a/meteor/tests/test_fastq_importer.py +++ b/meteor/tests/test_fastq_importer.py @@ -80,13 +80,13 @@ def test_replace_ext(builder: FastqImporter, fastq_filename: str, name: str) -> @pytest.mark.parametrize( - ("fastq_filename", "tag"), + ("fastq_filename"), ( - ("test.fastq.gz", ""), - pytest.param("pretty.complex_pain.fasta", "", id="fasta"), + ("test.fastq.gz"), + pytest.param("pretty.complex_pain.fasta", id="fasta"), ), ) -def test_get_tag_none(builder: FastqImporter, fastq_filename: str, tag: str) -> None: +def test_get_tag_none(builder: FastqImporter, fastq_filename: str) -> None: assert builder.get_tag(fastq_filename) is None @pytest.mark.parametrize( diff --git a/meteor/tests/test_parser.py b/meteor/tests/test_parser.py index 3ed7a3f..11bf111 100644 --- a/meteor/tests/test_parser.py +++ b/meteor/tests/test_parser.py @@ -74,7 +74,7 @@ def test_find_all_alt(parser_standard: Parser) -> None: mod_dict = {"M01": "K01", "M03": "K0123+K0124"} real_alt = parser_standard.find_all_alt("K01 K02+K03+(K04,K05)", mod_dict) true_alt = [{"K01", "K02", "K03", "K04"}, {"K01", "K02", "K03", "K05"}] - assert all([x in true_alt for x in real_alt]) + assert all(x in true_alt for x in real_alt) assert len(real_alt) == len(true_alt) real_alt = parser_standard.find_all_alt("(K01,K02) K03+(K04,K05)", mod_dict) true_alt = [ @@ -83,7 +83,7 @@ def test_find_all_alt(parser_standard: Parser) -> None: {"K02", "K03", "K04"}, {"K02", "K03", "K05"}, ] - assert all([x in true_alt for x in real_alt]) + assert all(x in true_alt for x in real_alt) assert len(real_alt) == len(true_alt) real_alt = parser_standard.find_all_alt( "((K16154+K16155),(K16157+K16158),K08684)", mod_dict @@ -93,7 +93,7 @@ def test_find_all_alt(parser_standard: Parser) -> None: {"K16157", "K16158"}, {"K08684"}, ] - assert all([x in true_alt for x in real_alt]) + assert all(x in true_alt for x in real_alt) assert len(real_alt) == len(true_alt) real_alt = parser_standard.find_all_alt( "(K13811,(K00957+K00956)) ((K00394+K00395)) ((K11180+K11181))", mod_dict @@ -102,7 +102,7 @@ def test_find_all_alt(parser_standard: Parser) -> None: {"K13811", "K00394", "K00395", "K11180", "K11181"}, {"K00957", "K00956", "K00394", "K00395", "K11180", "K11181"}, ] - assert all([x in true_alt for x in real_alt]) + assert all(x in true_alt for x in real_alt) assert len(real_alt) == len(true_alt) diff --git a/meteor/tests/test_variantcalling.py b/meteor/tests/test_variantcalling.py index 13f7f31..beecee7 100644 --- a/meteor/tests/test_variantcalling.py +++ b/meteor/tests/test_variantcalling.py @@ -16,11 +16,10 @@ from pathlib import Path import pytest import json -import pandas as pd -@pytest.fixture -def vc_builder(datadir: Path, tmp_path: Path) -> VariantCalling: +@pytest.fixture(name="vc_builder") +def fixture_vc_builder(datadir: Path, tmp_path: Path) -> VariantCalling: meteor = Component meteor.ref_dir = datadir / "eva71" meteor.ref_name = "test" diff --git a/meteor/treebuilder.py b/meteor/treebuilder.py index 921374f..fbd40d9 100644 --- a/meteor/treebuilder.py +++ b/meteor/treebuilder.py @@ -14,11 +14,11 @@ from pathlib import Path from collections import defaultdict from meteor.session import Session, Component +from meteor.phylogeny import Phylogeny from dataclasses import dataclass from tempfile import mkdtemp import ete3 # type: ignore[import] from ete3 import Tree # , TreeStyle -from meteor.phylogeny import Phylogeny import logging import sys import pandas as pd @@ -47,7 +47,7 @@ class TreeBuilder(Session): def __post_init__(self) -> None: if self.format not in TreeBuilder.OUTPUT_FORMATS: - raise ValueError(f'{self.format} is not a valid output format') + raise ValueError(f"{self.format} is not a valid output format") self.meteor.tmp_dir = Path(mkdtemp(dir=self.meteor.tmp_path)) self.meteor.tree_dir.mkdir(exist_ok=True, parents=True) @@ -69,11 +69,11 @@ def concatenate(self, msp_file_dict: dict[str, list[Path]]) -> list[Path]: with lzma.open(path, "rt") as infile: outfile.write(infile.read()) msp_list += [res] - logging.info(f"{len(msp_list)} MSPs are available for tree analysis.") + logging.info("%d MSPs are available for tree analysis.", len(msp_list)) return msp_list def get_msp_distance(self, tree: ete3.TreeNode) -> pd.DataFrame: - samples = [leaf for leaf in tree] + samples = list(tree) distance_matrix = pd.DataFrame( index=[n.name for n in samples], columns=[n.name for n in samples] ) diff --git a/meteor/variantcalling.py b/meteor/variantcalling.py index c32ea69..efc712a 100644 --- a/meteor/variantcalling.py +++ b/meteor/variantcalling.py @@ -245,13 +245,13 @@ def execute(self) -> None: / self.census["reference"]["reference_file"]["database_dir"] / self.census["reference"]["annotation"]["bed"]["filename"] ) - bcftools_exec = run(["bcftools", "--version"], capture_output=True) + bcftools_exec = run(["bcftools", "--version"], check=False, capture_output=True) bcftools_version = ( bcftools_exec.stdout.decode("utf-8").split("\n")[0].split(" ")[1] ) if bcftools_exec.returncode != 0: logging.error( - "Checking bcftools failed:\n%s" % bcftools_exec.stderr.decode("utf-8") + "Checking bcftools failed:\n%s", bcftools_exec.stderr.decode("utf-8") ) sys.exit(1) elif parse(bcftools_version) < Version("0.1.19"): @@ -260,11 +260,11 @@ def execute(self) -> None: bcftools_version, ) sys.exit(1) - bedtools_exec = run(["bedtools", "--version"], capture_output=True) + bedtools_exec = run(["bedtools", "--version"], check=False, capture_output=True) bedtools_version = bedtools_exec.stdout.decode("utf-8").split(" ")[1][1:] if bedtools_exec.returncode != 0: logging.error( - "Check bedtools failed:\n%s" % bedtools_exec.stderr.decode("utf-8") + "Check bedtools failed:\n%s", bedtools_exec.stderr.decode("utf-8") ) sys.exit() elif parse(bedtools_version) < Version("2.18"): @@ -381,7 +381,7 @@ def execute(self) -> None: "-ibam", str(cram_file.resolve()), ], - capture_output=True, + check=False, capture_output=True, ).stdout.decode("utf-8") self.filter_low_cov_sites(output, temp_low_cov_sites) logging.info( @@ -389,7 +389,7 @@ def execute(self) -> None: perf_counter() - startlowcovbed, ) startlowcov = perf_counter() - bcftools_process = Popen( + with Popen( [ "bcftools", "consensus", @@ -402,16 +402,16 @@ def execute(self) -> None: str(vcf_file.resolve()), ], stdout=PIPE, - ) - # capture output of bcftools_process - bcftools_output = bcftools_process.communicate()[0] + ) as bcftools_process: + # capture output of bcftools_process + bcftools_output = bcftools_process.communicate()[0] - # compress output using lzma - compressed_output = lzma.compress(bcftools_output) + # compress output using lzma + compressed_output = lzma.compress(bcftools_output) - # write compressed output to file - with open(str(consensus_file.resolve()), "wb") as f: - f.write(compressed_output) + # write compressed output to file + with open(str(consensus_file.resolve()), "wb") as f: + f.write(compressed_output) logging.info( "Completed low coverage regions filtering step in %f seconds", perf_counter() - startlowcov,