From 9405cdc5c8e08aa0e142a6b600e4d4ae8a6e975e Mon Sep 17 00:00:00 2001
From: Florian Plaza Onate <florian.plaza-onate@inrae.fr>
Date: Tue, 2 Apr 2024 15:12:54 +0000
Subject: [PATCH 1/6] Hide feature not implemented

---
 meteor/meteor.py | 52 +++++++++++++++++++++++++-----------------------
 1 file changed, 27 insertions(+), 25 deletions(-)

diff --git a/meteor/meteor.py b/meteor/meteor.py
index 8ec2ec1..3388eb1 100644
--- a/meteor/meteor.py
+++ b/meteor/meteor.py
@@ -460,13 +460,13 @@ def get_arguments() -> Namespace:  # pragma: no cover
         help="Remove samples with no detected species (MSPs) "
         "(default: %(default)s).",
     )
-    merging_parser.add_argument(
-        "-m",
-        dest="output_mpa",
-        action="store_true",
-        help="Save the merged species abundance table in the style of MetaPhlan "
-        "(default: %(default)s).",
-    )
+    #merging_parser.add_argument(
+    #    "-m",
+    #    dest="output_mpa",
+    #    action="store_true",
+    #    help="Save the merged species abundance table in the style of MetaPhlan "
+    #    "(default: %(default)s).",
+    #)
     merging_parser.add_argument(
         "-b",
         dest="output_biom",
@@ -474,22 +474,22 @@ def get_arguments() -> Namespace:  # pragma: no cover
         help="Save the merged species abundance table in biom format "
         "(default: %(default)s).",
     )
-    merging_parser.add_argument(
-        "--tax_lev",
-        dest="taxonomic_level",
-        default=Merging.DEFAULT_MPA_TAXONOMIC_LEVEL,
-        choices=Merging.MPA_TAXONOMIC_LEVELS,
-        help="""The taxonomic level for mpa output (default: %(default)s):
-                        'a' : all taxonomic levels
-                        'k' : kingdoms
-                        'p' : phyla only
-                        'c' : classes only
-                        'o' : orders only
-                        'f' : families only
-                        'g' : genera only
-                        's' : species only
-                        't' : MSPs only""",
-    )
+    #merging_parser.add_argument(
+    #    "--tax_lev",
+    #    dest="taxonomic_level",
+    #    default=Merging.DEFAULT_MPA_TAXONOMIC_LEVEL,
+    #    choices=Merging.MPA_TAXONOMIC_LEVELS,
+    #    help="""The taxonomic level for mpa output (default: %(default)s):
+    #                    'a' : all taxonomic levels
+    #                    'k' : kingdoms
+    #                    'p' : phyla only
+    #                    'c' : classes only
+    #                    'o' : orders only
+    #                    'f' : families only
+    #                    'g' : genera only
+    #                    's' : species only
+    #                    't' : MSPs only""",
+    #)
     merging_parser.add_argument(
         "-o",
         dest="merging_dir",
@@ -782,8 +782,10 @@ def main() -> None:  # pragma: no cover
             args.min_msp_abundance,
             args.min_msp_occurrence,
             args.remove_sample_with_no_msp,
-            args.output_mpa,
-            args.taxonomic_level,
+            None,
+            None,
+            #args.output_mpa,
+            #args.taxonomic_level,
             args.output_biom,
             args.output_gene_matrix,
         )

From 16c34aeada3a48cd1d3c1198f4efe455af7a8f4e Mon Sep 17 00:00:00 2001
From: Florian Plaza Onate <florian.plaza-onate@inrae.fr>
Date: Tue, 2 Apr 2024 15:38:36 +0000
Subject: [PATCH 2/6] Fix mypy warning + check counting and mapping types

---
 meteor/counter.py | 5 +++++
 meteor/mapper.py  | 7 ++++++-
 meteor/merging.py | 4 ++--
 meteor/meteor.py  | 8 ++++----
 4 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/meteor/counter.py b/meteor/counter.py
index 98a1c6d..46945ce 100644
--- a/meteor/counter.py
+++ b/meteor/counter.py
@@ -50,6 +50,9 @@ class Counter(Session):
     json_data: dict = field(default_factory=dict)
 
     def __post_init__(self) -> None:
+        if self.counting_type not in Counter.COUNTING_TYPES:
+            raise ValueError(f'{self.counting_type} is not a valid counting type')
+
         if self.meteor.tmp_path:
             self.meteor.tmp_path.mkdir(exist_ok=True)
         self.meteor.tmp_dir = Path(mkdtemp(dir=self.meteor.tmp_path))
@@ -133,6 +136,8 @@ def filter_alignments(
         # contains a list of alignment of each read
         reads: dict[str, list[AlignedSegment]] = {}
         for element in cramdesc:
+            assert element.query_name is not None and element.reference_name is not None
+
             # identity = (element.query_length - element.get_tag("NM")) / element.query_length
             # identity = 1.0 - (element.get_tag("NM") / element.query_alignment_length)
             ali = sum(self.get_aligned_nucleotides(element))
diff --git a/meteor/mapper.py b/meteor/mapper.py
index 556949f..53c8b33 100644
--- a/meteor/mapper.py
+++ b/meteor/mapper.py
@@ -33,7 +33,7 @@ class Mapper(Session):
     """Run the bowtie"""
 
     DEFAULT_NUM_THREADS : ClassVar[int] = 1
-    MAPPING_TYPES: ClassVar[list[str]] = ['end_to_end', 'local']
+    MAPPING_TYPES: ClassVar[list[str]] = ['end-to-end', 'local']
     DEFAULT_MAPPING_TYPE: ClassVar[str] = 'end-to-end'
     DEFAULT_TRIM: ClassVar[int] = 80
     NO_TRIM: ClassVar[int] = 0
@@ -48,6 +48,10 @@ class Mapper(Session):
     counting_type: str
     identity_threshold: float
 
+    def __post_init__(self) -> None:
+        if self.mapping_type not in Mapper.MAPPING_TYPES:
+            raise ValueError(f'{self.mapping_type} is not a valid mapping type')
+
     def set_mapping_config(
         self,
         cram_file: Path,
@@ -147,6 +151,7 @@ def execute(self) -> None:
             stderr=PIPE,
         )
         # cramfile_unsorted = Path(mkstemp(dir=self.meteor.tmp_dir)[1])
+        assert mapping_exec.stdout is not None and mapping_exec.stderr is not None
         with pysam.AlignmentFile(
             mapping_exec.stdout,
             "r",
diff --git a/meteor/merging.py b/meteor/merging.py
index b95d102..6d7ae80 100644
--- a/meteor/merging.py
+++ b/meteor/merging.py
@@ -19,7 +19,7 @@
 import logging
 import sys
 import numpy as np
-from biom.table import Table
+from biom.table import Table # type: ignore
 from typing import ClassVar
 
 
@@ -348,7 +348,7 @@ def execute(self) -> None:
                     / ref_json["annotation"]["taxonomy"]["filename"],
                     sep="\t",
                     header=0,
-                    usecols=self.ranks.keys(),
+                    usecols=list(self.ranks.keys()),
                 )
 
                 annotation = annotation[
diff --git a/meteor/meteor.py b/meteor/meteor.py
index 3388eb1..80c90da 100644
--- a/meteor/meteor.py
+++ b/meteor/meteor.py
@@ -782,8 +782,8 @@ def main() -> None:  # pragma: no cover
             args.min_msp_abundance,
             args.min_msp_occurrence,
             args.remove_sample_with_no_msp,
-            None,
-            None,
+            False,
+            "a",
             #args.output_mpa,
             #args.taxonomic_level,
             args.output_biom,
@@ -806,10 +806,10 @@ def main() -> None:  # pragma: no cover
             fastq_importer.execute()
             meteor.fastq_dir = Path(tmpdirname) / "test"
             meteor.ref_dir = meteor.ref_dir / "mock"
-            counter = Counter(meteor, "best", "end-to-end", 80, 0.97, 100, False, True)
+            counter = Counter(meteor, "total", "end-to-end", 80, 0.97, 100, False, True)
             counter.execute()
             meteor.fastq_dir = Path(tmpdirname) / "test2"
-            counter = Counter(meteor, "best", "end-to-end", 80, 0.97, 100, False, True)
+            counter = Counter(meteor, "total", "end-to-end", 80, 0.97, 100, False, True)
             counter.execute()
             # Remove the mapping directory and its contents
             shutil.rmtree(Path(tmpdirname) / "test")

From 3c5765692957e9536e1505d5db96a6d158b4479b Mon Sep 17 00:00:00 2001
From: Florian Plaza Onate <florian.plaza-onate@inrae.fr>
Date: Tue, 2 Apr 2024 16:35:08 +0000
Subject: [PATCH 3/6] Check normalization and tree output format

---
 meteor/mapper.py              | 4 ++--
 meteor/profiler.py            | 5 ++++-
 meteor/tests/test_profiler.py | 2 +-
 meteor/treebuilder.py         | 5 ++++-
 4 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/meteor/mapper.py b/meteor/mapper.py
index 53c8b33..951101c 100644
--- a/meteor/mapper.py
+++ b/meteor/mapper.py
@@ -33,8 +33,8 @@ class Mapper(Session):
     """Run the bowtie"""
 
     DEFAULT_NUM_THREADS : ClassVar[int] = 1
-    MAPPING_TYPES: ClassVar[list[str]] = ['end-to-end', 'local']
-    DEFAULT_MAPPING_TYPE: ClassVar[str] = 'end-to-end'
+    MAPPING_TYPES: ClassVar[list[str]] = ["end-to-end", "local"]
+    DEFAULT_MAPPING_TYPE: ClassVar[str] = "end-to-end"
     DEFAULT_TRIM: ClassVar[int] = 80
     NO_TRIM: ClassVar[int] = 0
     DEFAULT_ALIGNMENT_NUMBER: ClassVar[int] = 10000
diff --git a/meteor/profiler.py b/meteor/profiler.py
index 61e7125..74daea4 100644
--- a/meteor/profiler.py
+++ b/meteor/profiler.py
@@ -33,7 +33,7 @@ class Profiler(Session):
     NO_RAREFACTION: ClassVar[int] = 0
     DEFAULT_RAREFACTION_LEVEL: ClassVar[int] = NO_RAREFACTION
     DEFAULT_RANDOM_SEED: ClassVar[int] = 1234
-    NORMALIZATIONS: ClassVar[list[str]] = ["coverage", "fpkm", "raw"]
+    NORMALIZATIONS: ClassVar[list[str|None]] = [None, "coverage", "fpkm", "raw"]
     DEFAULT_NORMALIZATION: ClassVar[str] = "coverage"
     DEFAULT_COVERAGE_FACTOR: ClassVar[float] = 100.0
     DEFAULT_CORE_SIZE: ClassVar[int] = 100
@@ -50,6 +50,9 @@ class Profiler(Session):
     coverage_factor: float
 
     def __post_init__(self):
+        if self.normalization not in Profiler.NORMALIZATIONS:
+            raise ValueError(f'{self.normalization} is not a valid normalization')
+        
         # Get the json file
         self.sample_config = self.get_census_stage(self.meteor.mapping_dir, 1)
 
diff --git a/meteor/tests/test_profiler.py b/meteor/tests/test_profiler.py
index f375cc6..cd6283f 100644
--- a/meteor/tests/test_profiler.py
+++ b/meteor/tests/test_profiler.py
@@ -31,7 +31,7 @@ def profiler_standard(datadir: Path, tmp_path: Path) -> Profiler:
         rarefaction_level=-1,
         seed=12345,
         coverage_factor=100.0,
-        normalization="",
+        normalization=None,
         core_size=4,
         msp_filter=0.5,
         completeness=0.6,
diff --git a/meteor/treebuilder.py b/meteor/treebuilder.py
index 9eca09d..921374f 100644
--- a/meteor/treebuilder.py
+++ b/meteor/treebuilder.py
@@ -32,7 +32,7 @@ class TreeBuilder(Session):
 
     DEFAULT_MAX_GAP: ClassVar[float] = 0.5
     DEFAULT_GAP_CHAR: ClassVar[str] = "-"
-    OUTPUT_FORMATS: ClassVar[list[str]] = ["png", "svg", "pdf", "txt"]
+    OUTPUT_FORMATS: ClassVar[list[str|None]] = [None, "png", "svg", "pdf", "txt"]
     DEFAULT_OUTPUT_FORMAT: ClassVar[str|None] = None
     DEFAULT_WIDTH: ClassVar[int] = 500
     DEFAULT_HEIGHT: ClassVar[int] = 500
@@ -46,6 +46,9 @@ class TreeBuilder(Session):
     gap_char: str
 
     def __post_init__(self) -> None:
+        if self.format not in TreeBuilder.OUTPUT_FORMATS:
+            raise ValueError(f'{self.format} is not a valid output format')
+
         self.meteor.tmp_dir = Path(mkdtemp(dir=self.meteor.tmp_path))
         self.meteor.tree_dir.mkdir(exist_ok=True, parents=True)
 

From 672921b590ba84a4c6ce1d1731acda458b3b8438 Mon Sep 17 00:00:00 2001
From: Florian Plaza Onate <florian.plaza-onate@inrae.fr>
Date: Wed, 3 Apr 2024 11:25:03 +0000
Subject: [PATCH 4/6] Remove used parameter

---
 meteor/counter.py           | 1 -
 meteor/mapper.py            | 2 --
 meteor/tests/test_mapper.py | 1 -
 3 files changed, 4 deletions(-)

diff --git a/meteor/counter.py b/meteor/counter.py
index 46945ce..2505db2 100644
--- a/meteor/counter.py
+++ b/meteor/counter.py
@@ -79,7 +79,6 @@ def launch_mapping(self) -> None:
                 self.mapping_type,
                 self.trim,
                 self.alignment_number,
-                self.counting_type,
                 self.identity_threshold,
             )
             mapping_process.execute()
diff --git a/meteor/mapper.py b/meteor/mapper.py
index 951101c..f8a9a45 100644
--- a/meteor/mapper.py
+++ b/meteor/mapper.py
@@ -45,7 +45,6 @@ class Mapper(Session):
     mapping_type: str
     trim: int
     alignment_number: int
-    counting_type: str
     identity_threshold: float
 
     def __post_init__(self) -> None:
@@ -117,7 +116,6 @@ def execute(self) -> None:
         if self.trim > Mapper.NO_TRIM:
             parameters += f"--trim-to {self.trim} "
         if self.alignment_number > 1:
-            # and self.counting_type != "best"
             parameters += f"-k {self.alignment_number} "
         # Check the bowtie2 version
         bowtie_exec = run(["bowtie2", "--version"], capture_output=True)
diff --git a/meteor/tests/test_mapper.py b/meteor/tests/test_mapper.py
index dbf2f77..5627ff6 100644
--- a/meteor/tests/test_mapper.py
+++ b/meteor/tests/test_mapper.py
@@ -52,7 +52,6 @@ def mapping_builder(datadir: Path, tmp_path: Path) -> Mapper:
         "end-to-end",
         80,
         10000,
-        "smart_shared_reads",
         0.95,
     )
 

From a9fc8511f7cf868223a07d268fadbdd9ad846e57 Mon Sep 17 00:00:00 2001
From: Florian Plaza Onate <florian.plaza-onate@inrae.fr>
Date: Wed, 3 Apr 2024 11:29:44 +0000
Subject: [PATCH 5/6] Fix mypy warnings

---
 meteor/merging.py            | 2 +-
 meteor/meteor.py             | 2 +-
 meteor/profiler.py           | 2 +-
 meteor/tests/test_counter.py | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/meteor/merging.py b/meteor/merging.py
index 6d7ae80..c0c9d4c 100644
--- a/meteor/merging.py
+++ b/meteor/merging.py
@@ -41,7 +41,7 @@ class Merging(Session):
     min_msp_occurrence: int
     remove_sample_with_no_msp: bool
     output_mpa: bool
-    mpa_taxonomic_level: str
+    mpa_taxonomic_level: str|None
     output_biom: bool
     output_gene_matrix: bool
     ranks: dict[str, str] = field(
diff --git a/meteor/meteor.py b/meteor/meteor.py
index 80c90da..f05afab 100644
--- a/meteor/meteor.py
+++ b/meteor/meteor.py
@@ -783,7 +783,7 @@ def main() -> None:  # pragma: no cover
             args.min_msp_occurrence,
             args.remove_sample_with_no_msp,
             False,
-            "a",
+            None,
             #args.output_mpa,
             #args.taxonomic_level,
             args.output_biom,
diff --git a/meteor/profiler.py b/meteor/profiler.py
index 74daea4..057a0e5 100644
--- a/meteor/profiler.py
+++ b/meteor/profiler.py
@@ -43,7 +43,7 @@ class Profiler(Session):
     meteor: type[Component]
     rarefaction_level: int
     seed: int
-    normalization: str
+    normalization: str|None
     core_size: int
     msp_filter: float
     completeness: float
diff --git a/meteor/tests/test_counter.py b/meteor/tests/test_counter.py
index 34b5832..f128f5c 100644
--- a/meteor/tests/test_counter.py
+++ b/meteor/tests/test_counter.py
@@ -284,8 +284,8 @@ def test_save_cram(counter_unique: Counter, datadir: Path, tmp_path: Path) -> No
         reads, _ = counter_unique.filter_alignments(
             cramdesc
         )  # pylint: disable=unused-variable
-        read_list = list(chain(reads.values()))
-        merged_list = list(chain.from_iterable(read_list))
+        read_list = reads.values()
+        merged_list = chain.from_iterable(read_list)
         tmpcramfile = tmp_path / "test"
         counter_unique.save_cram_strain(tmpcramfile, cramdesc, merged_list, ref_json)
         assert tmpcramfile.exists()

From d65c8a595d913f495c594d8994d90b84fc4b6c62 Mon Sep 17 00:00:00 2001
From: Florian Plaza Onate <florian.plaza-onate@inrae.fr>
Date: Wed, 3 Apr 2024 13:25:06 +0000
Subject: [PATCH 6/6] Fix pylint warnings

---
 meteor/counter.py                   |  2 +-
 meteor/downloader.py                |  4 +-
 meteor/mapper.py                    | 82 ++++++++++++-----------------
 meteor/merging.py                   | 25 ++++-----
 meteor/phylogeny.py                 | 14 +++--
 meteor/profiler.py                  |  4 +-
 meteor/referencebuilder.py          |  2 +-
 meteor/strain.py                    |  6 +--
 meteor/tests/test_fastq_importer.py |  8 +--
 meteor/tests/test_parser.py         |  8 +--
 meteor/tests/test_variantcalling.py |  5 +-
 meteor/treebuilder.py               |  8 +--
 meteor/variantcalling.py            | 28 +++++-----
 13 files changed, 88 insertions(+), 108 deletions(-)

diff --git a/meteor/counter.py b/meteor/counter.py
index 2505db2..796bc78 100644
--- a/meteor/counter.py
+++ b/meteor/counter.py
@@ -51,7 +51,7 @@ class Counter(Session):
 
     def __post_init__(self) -> None:
         if self.counting_type not in Counter.COUNTING_TYPES:
-            raise ValueError(f'{self.counting_type} is not a valid counting type')
+            raise ValueError(f"{self.counting_type} is not a valid counting type")
 
         if self.meteor.tmp_path:
             self.meteor.tmp_path.mkdir(exist_ok=True)
diff --git a/meteor/downloader.py b/meteor/downloader.py
index de93196..fedb442 100644
--- a/meteor/downloader.py
+++ b/meteor/downloader.py
@@ -51,7 +51,7 @@ def load_catalogues_config() -> dict:
         except FileNotFoundError:
             logging.error("The file %s is missing in meteor source", Downloader.CONFIG_DATA_FILE.name)
             sys.exit(1)
-    
+
     @staticmethod
     def get_available_catalogues() -> list[str]:
         catalogues_config = Downloader.load_catalogues_config()
@@ -130,7 +130,7 @@ def execute(self) -> None:
             print(flush=True)
             if self.choice == Downloader.TEST_CATALOGUE:
                 for sample in self.catalogues_config[self.choice]["samples"]:
-                    logging.info(f"Download {sample} fastq file")
+                    logging.info("Download %s fastq file", sample)
                     url_fastq = self.catalogues_config[self.choice]["samples"][sample][
                         "catalogue"
                     ]
diff --git a/meteor/mapper.py b/meteor/mapper.py
index f8a9a45..50b7301 100644
--- a/meteor/mapper.py
+++ b/meteor/mapper.py
@@ -49,7 +49,7 @@ class Mapper(Session):
 
     def __post_init__(self) -> None:
         if self.mapping_type not in Mapper.MAPPING_TYPES:
-            raise ValueError(f'{self.mapping_type} is not a valid mapping type')
+            raise ValueError(f"{self.mapping_type} is not a valid mapping type")
 
     def set_mapping_config(
         self,
@@ -118,8 +118,8 @@ def execute(self) -> None:
         if self.alignment_number > 1:
             parameters += f"-k {self.alignment_number} "
         # Check the bowtie2 version
-        bowtie_exec = run(["bowtie2", "--version"], capture_output=True)
-        bowtie_version = str(bowtie_exec.stdout).split("\\n")[0].split(" ")[2]
+        bowtie_exec = run(["bowtie2", "--version"], check=False, capture_output=True)
+        bowtie_version = str(bowtie_exec.stdout).split("\\n", maxsplit=1)[0].split(" ")[2]
         if bowtie_exec.returncode != 0:
             logging.error(
                 "Checking bowtie2 version failed:\n%s",
@@ -134,7 +134,7 @@ def execute(self) -> None:
             sys.exit(1)
         # Start mapping
         start = perf_counter()
-        mapping_exec = Popen(
+        with Popen(
             [
                 "bowtie2",
                 parameters,
@@ -147,51 +147,37 @@ def execute(self) -> None:
             ],
             stdout=PIPE,
             stderr=PIPE,
-        )
-        # cramfile_unsorted = Path(mkstemp(dir=self.meteor.tmp_dir)[1])
-        assert mapping_exec.stdout is not None and mapping_exec.stderr is not None
-        with pysam.AlignmentFile(
-            mapping_exec.stdout,
-            "r",
-        ) as samdesc:
+        ) as mapping_exec:
+            assert mapping_exec.stdout is not None and mapping_exec.stderr is not None
             with pysam.AlignmentFile(
-                str(cram_file.resolve()),
-                # cramfile_unsorted,
-                "wc",
-                template=samdesc,
-                reference_filename=str(reference.resolve()),
-            ) as cram:
-                for element in samdesc:
-                    cram.write(element)
-        # pysam.sort(
-        #     "-o",
-        #     str(cram_file.resolve()),
-        #     "-@",
-        #     str(self.meteor.threads),
-        #     "-O",
-        #     "cram",
-        #     str(cramfile_unsorted.resolve()),
-        #     catch_stdout=False,
-        # )
-        # pysam.index(str(cram_file.resolve()))
-        # Read standard error from the process (non-blocking read)
-        mapping_result = mapping_exec.stderr.read().decode("utf-8")
-        mapping_exec.stderr.close()
-
-        # Wait for the process to finish and get the exit code
-        exit_code = mapping_exec.wait()
-
-        # Check for errors and print the error output if necessary
-        if exit_code != 0:
-            logging.error("bowtie2 failed:\n%s" % mapping_result)
-            sys.exit(1)
-        try:
-            mapping_log = findall(r"([0-9]+)\s+\(", mapping_result)
-            assert len(mapping_log) == 4
-            mapping_data = [int(i) for i in mapping_log]
-        except AssertionError:
-            logging.error("Could not access the mapping result from bowtie2")
-            sys.exit(1)
+                mapping_exec.stdout,
+                "r",
+            ) as samdesc:
+                with pysam.AlignmentFile(
+                    str(cram_file.resolve()),
+                    # cramfile_unsorted,
+                    "wc",
+                    template=samdesc,
+                    reference_filename=str(reference.resolve()),
+                ) as cram:
+                    for element in samdesc:
+                        cram.write(element)
+            # Read standard error from the process (non-blocking read)
+            mapping_result = mapping_exec.stderr.read().decode("utf-8")
+            mapping_exec.stderr.close()
+            # Wait for the process to finish and get the exit code
+            exit_code = mapping_exec.wait()
+            # Check for errors and print the error output if necessary
+            if exit_code != 0:
+                logging.error("bowtie2 failed:\n%s", mapping_result)
+                sys.exit(1)
+            try:
+                mapping_log = findall(r"([0-9]+)\s+\(", mapping_result)
+                assert len(mapping_log) == 4
+                mapping_data = [int(i) for i in mapping_log]
+            except AssertionError:
+                logging.error("Could not access the mapping result from bowtie2")
+                sys.exit(1)
         logging.info("Completed mapping creation in %f seconds", perf_counter() - start)
         config = self.set_mapping_config(cram_file, bowtie_version, mapping_data)
         self.save_config(config, self.census["Stage1FileName"])
diff --git a/meteor/merging.py b/meteor/merging.py
index c0c9d4c..da99511 100644
--- a/meteor/merging.py
+++ b/meteor/merging.py
@@ -18,9 +18,9 @@
 from pathlib import Path
 import logging
 import sys
-import numpy as np
 from biom.table import Table # type: ignore
 from typing import ClassVar
+from functools import partial
 
 
 @dataclass
@@ -84,7 +84,7 @@ def find_files_to_merge(
             for my_sample, my_dir in input_dir.items()
         }
         # Check that there is exactly one element in each list
-        len_list = list(set([len(value) for value in list(dict_to_merge.values())]))
+        len_list = list({len(value) for value in list(dict_to_merge.values())})
         assert len(len_list) == 1
         assert len_list[0] == 1
         files_to_merge = {
@@ -103,7 +103,7 @@ def extract_json_info(
         """
         # Check that sections are present
         try:
-            assert all([my_section in config for my_section in list(param_dict.keys())])
+            assert all(my_section in config for my_section in param_dict.keys())
         except AssertionError:
             logging.error("Missing required section in census json file.")
             sys.exit(1)
@@ -115,11 +115,9 @@ def extract_json_info(
         # Check that required fields are present
         try:
             assert all(
-                [
-                    my_field in config[my_section]
-                    for my_section in param_dict
-                    for my_field in param_dict[my_section]
-                ]
+                my_field in config[my_section]
+                for my_section in param_dict
+                for my_field in param_dict[my_section]
             )
         except AssertionError:
             logging.error("Missing required fields in census ini file.")
@@ -259,12 +257,7 @@ def execute(self) -> None:
         # Save database_type for later use
         try:
             database_type_all = list(
-                set(
-                    [
-                        my_info["database_type"]
-                        for my_info in list(all_information.values())
-                    ]
-                )
+                {my_info["database_type"] for my_info in list(all_information.values())}
             )
             assert len(database_type_all) == 1
             database_type = database_type_all[0]
@@ -365,7 +358,7 @@ def execute(self) -> None:
                     # Apply the prefixes to each taxonomic rank
                     for rank, prefix in self.ranks.items():
                         annotation[rank] = annotation[rank].apply(
-                            lambda x: f"{prefix}{x}"
+                            partial(lambda prefix, x: f"{prefix}{x}", prefix)
                         )
                     observ_metadata = [
                         {"taxonomy": row.iloc[1:].tolist()}
@@ -382,7 +375,7 @@ def execute(self) -> None:
                     # Generate JSON representation of the BIOM table
                     biom_json = biom_table.to_json(generated_by="Meteor")
                     # Write the JSON to a file
-                    with open(output_name.with_suffix(".biom"), "wt") as f:
+                    with open(output_name.with_suffix(".biom"), "wt", encoding="UTF-8") as f:
                         f.write(biom_json)
                     # with h5py.File(output_name.with_suffix(".biom"), "w") as f:
                     #     table.to_hdf5(f, generated_by="Meteor", compress=True)
diff --git a/meteor/phylogeny.py b/meteor/phylogeny.py
index a888474..3c25010 100644
--- a/meteor/phylogeny.py
+++ b/meteor/phylogeny.py
@@ -98,7 +98,7 @@ def execute(self) -> None:
         # Define the regex pattern to match the version number
         version_pattern = re.compile(r"RAxML-NG v\. (\d+\.\d+\.\d+)")
         raxml_ng_help = run(
-            ["raxml-ng", "--version"], capture_output=True
+            ["raxml-ng", "--version"], check=False, capture_output=True
         ).stdout.decode("utf-8")
         match = version_pattern.search(raxml_ng_help)
         # Check if a match is found
@@ -152,7 +152,7 @@ def execute(self) -> None:
                         "MSP %s have less than 4 sequences, we compute the mutation rate",
                         msp_file.name,
                     )
-                    with open(tree_file.parent / "cleaned_sequences.fasta", "w") as f:
+                    with open(tree_file.parent / "cleaned_sequences.fasta", "w", encoding="UTF-8") as f:
                         for seq_name, sequence in cleaned_seqs.items():
                             f.write(f">{seq_name}\n{sequence}\n")
                     mutation_rate = []
@@ -163,7 +163,7 @@ def execute(self) -> None:
                             seq2 = cleaned_seqs[seq_ids[j]]
                             mutation_rate += [self.compute_mutation_rate(seq1, seq2)]
                     # Construct Newick format string
-                    with open(tree_file.with_suffix(".tree"), "wt") as tree:
+                    with open(tree_file.with_suffix(".tree"), "wt", encoding="UTF-8") as tree:
                         if len(seq_ids) == 2:
                             tree.write(
                                 f"({seq_ids[0]}:{mutation_rate[0]}, {seq_ids[1]}:{mutation_rate[0]});"
@@ -174,11 +174,15 @@ def execute(self) -> None:
                                 min_rate_idx == 0
                             ):  # seq1 and seq2 have the smallest distance
                                 tree.write(
-                                    f"(({seq_ids[0]}:{mutation_rate[0]}, {seq_ids[1]}:{mutation_rate[0]}):{mutation_rate[1]}, {seq_ids[2]}:{mutation_rate[1]});"
+                                    f"(({seq_ids[0]}:{mutation_rate[0]}, "
+                                    f"{seq_ids[1]}:{mutation_rate[0]}):{mutation_rate[1]}, "
+                                    f"{seq_ids[2]}:{mutation_rate[1]});"
                                 )
                             else:  # seq1 and seq3 have the smallest distance
                                 tree.write(
-                                    f"(({seq_ids[0]}:{mutation_rate[1]}, {seq_ids[2]}:{mutation_rate[1]}):{mutation_rate[0]}, {seq_ids[1]}:{mutation_rate[0]});"
+                                    f"(({seq_ids[0]}:{mutation_rate[1]}, "
+                                    f"{seq_ids[2]}:{mutation_rate[1]}):{mutation_rate[0]}, "
+                                    f"{seq_ids[1]}:{mutation_rate[0]});"
                                 )
                 tree_files.append(tree_file)
             logging.info("Completed MSP tree %d/%d", idx, msp_count)
diff --git a/meteor/profiler.py b/meteor/profiler.py
index 057a0e5..3b2a8cc 100644
--- a/meteor/profiler.py
+++ b/meteor/profiler.py
@@ -51,8 +51,8 @@ class Profiler(Session):
 
     def __post_init__(self):
         if self.normalization not in Profiler.NORMALIZATIONS:
-            raise ValueError(f'{self.normalization} is not a valid normalization')
-        
+            raise ValueError(f"{self.normalization} is not a valid normalization")
+
         # Get the json file
         self.sample_config = self.get_census_stage(self.meteor.mapping_dir, 1)
 
diff --git a/meteor/referencebuilder.py b/meteor/referencebuilder.py
index 613664d..e9f7946 100644
--- a/meteor/referencebuilder.py
+++ b/meteor/referencebuilder.py
@@ -132,7 +132,7 @@ def execute(self) -> None:
         # Prepare the reference for meteor
         self.create_reference()
         # Check the bowtie2 version
-        bowtie_exec = run(["bowtie2", "--version"], capture_output=True)
+        bowtie_exec = run(["bowtie2", "--version"], check=False, capture_output=True)
         bowtie_version = bowtie_exec.stdout.decode("utf-8").split(" ")[2].split("\n")[0]
         if bowtie_exec.returncode != 0:
             logging.error(
diff --git a/meteor/strain.py b/meteor/strain.py
index 793e673..7f4e9c5 100644
--- a/meteor/strain.py
+++ b/meteor/strain.py
@@ -31,7 +31,7 @@
 @dataclass
 class Strain(Session):
     """Counter session map and count"""
-    
+
     DEFAULT_MAX_DEPTH: ClassVar[int] = 100
     MIN_MIN_SNP_DEPTH: ClassVar[int] = 1
     MAX_MIN_SNP_DEPTH: ClassVar[int] = 10000
@@ -166,9 +166,7 @@ def get_msp_variant(
                 consensus_file,
             )
             sys.exit(1)
-        gene_dict = {
-            gene_id: seq for gene_id, seq in self.get_sequences(consensus_file)
-        }
+        gene_dict = dict(self.get_sequences(consensus_file))
         logging.info(
             "%s MSPs have sufficient signal for SNP analysis ",
             len(msp_with_overlapping_genes["msp_name"].values),
diff --git a/meteor/tests/test_fastq_importer.py b/meteor/tests/test_fastq_importer.py
index 0534757..adf23fb 100644
--- a/meteor/tests/test_fastq_importer.py
+++ b/meteor/tests/test_fastq_importer.py
@@ -80,13 +80,13 @@ def test_replace_ext(builder: FastqImporter, fastq_filename: str, name: str) ->
 
 
 @pytest.mark.parametrize(
-    ("fastq_filename", "tag"),
+    ("fastq_filename"),
     (
-        ("test.fastq.gz", ""),
-        pytest.param("pretty.complex_pain.fasta", "", id="fasta"),
+        ("test.fastq.gz"),
+        pytest.param("pretty.complex_pain.fasta", id="fasta"),
     ),
 )
-def test_get_tag_none(builder: FastqImporter, fastq_filename: str, tag: str) -> None:
+def test_get_tag_none(builder: FastqImporter, fastq_filename: str) -> None:
     assert builder.get_tag(fastq_filename) is None
 
 @pytest.mark.parametrize(
diff --git a/meteor/tests/test_parser.py b/meteor/tests/test_parser.py
index 3ed7a3f..11bf111 100644
--- a/meteor/tests/test_parser.py
+++ b/meteor/tests/test_parser.py
@@ -74,7 +74,7 @@ def test_find_all_alt(parser_standard: Parser) -> None:
     mod_dict = {"M01": "K01", "M03": "K0123+K0124"}
     real_alt = parser_standard.find_all_alt("K01 K02+K03+(K04,K05)", mod_dict)
     true_alt = [{"K01", "K02", "K03", "K04"}, {"K01", "K02", "K03", "K05"}]
-    assert all([x in true_alt for x in real_alt])
+    assert all(x in true_alt for x in real_alt)
     assert len(real_alt) == len(true_alt)
     real_alt = parser_standard.find_all_alt("(K01,K02) K03+(K04,K05)", mod_dict)
     true_alt = [
@@ -83,7 +83,7 @@ def test_find_all_alt(parser_standard: Parser) -> None:
         {"K02", "K03", "K04"},
         {"K02", "K03", "K05"},
     ]
-    assert all([x in true_alt for x in real_alt])
+    assert all(x in true_alt for x in real_alt)
     assert len(real_alt) == len(true_alt)
     real_alt = parser_standard.find_all_alt(
         "((K16154+K16155),(K16157+K16158),K08684)", mod_dict
@@ -93,7 +93,7 @@ def test_find_all_alt(parser_standard: Parser) -> None:
         {"K16157", "K16158"},
         {"K08684"},
     ]
-    assert all([x in true_alt for x in real_alt])
+    assert all(x in true_alt for x in real_alt)
     assert len(real_alt) == len(true_alt)
     real_alt = parser_standard.find_all_alt(
         "(K13811,(K00957+K00956)) ((K00394+K00395)) ((K11180+K11181))", mod_dict
@@ -102,7 +102,7 @@ def test_find_all_alt(parser_standard: Parser) -> None:
         {"K13811", "K00394", "K00395", "K11180", "K11181"},
         {"K00957", "K00956", "K00394", "K00395", "K11180", "K11181"},
     ]
-    assert all([x in true_alt for x in real_alt])
+    assert all(x in true_alt for x in real_alt)
     assert len(real_alt) == len(true_alt)
 
 
diff --git a/meteor/tests/test_variantcalling.py b/meteor/tests/test_variantcalling.py
index 13f7f31..beecee7 100644
--- a/meteor/tests/test_variantcalling.py
+++ b/meteor/tests/test_variantcalling.py
@@ -16,11 +16,10 @@
 from pathlib import Path
 import pytest
 import json
-import pandas as pd
 
 
-@pytest.fixture
-def vc_builder(datadir: Path, tmp_path: Path) -> VariantCalling:
+@pytest.fixture(name="vc_builder")
+def fixture_vc_builder(datadir: Path, tmp_path: Path) -> VariantCalling:
     meteor = Component
     meteor.ref_dir = datadir / "eva71"
     meteor.ref_name = "test"
diff --git a/meteor/treebuilder.py b/meteor/treebuilder.py
index 921374f..fbd40d9 100644
--- a/meteor/treebuilder.py
+++ b/meteor/treebuilder.py
@@ -14,11 +14,11 @@
 from pathlib import Path
 from collections import defaultdict
 from meteor.session import Session, Component
+from meteor.phylogeny import Phylogeny
 from dataclasses import dataclass
 from tempfile import mkdtemp
 import ete3  # type: ignore[import]
 from ete3 import Tree  # , TreeStyle
-from meteor.phylogeny import Phylogeny
 import logging
 import sys
 import pandas as pd
@@ -47,7 +47,7 @@ class TreeBuilder(Session):
 
     def __post_init__(self) -> None:
         if self.format not in TreeBuilder.OUTPUT_FORMATS:
-            raise ValueError(f'{self.format} is not a valid output format')
+            raise ValueError(f"{self.format} is not a valid output format")
 
         self.meteor.tmp_dir = Path(mkdtemp(dir=self.meteor.tmp_path))
         self.meteor.tree_dir.mkdir(exist_ok=True, parents=True)
@@ -69,11 +69,11 @@ def concatenate(self, msp_file_dict: dict[str, list[Path]]) -> list[Path]:
                         with lzma.open(path, "rt") as infile:
                             outfile.write(infile.read())
                 msp_list += [res]
-        logging.info(f"{len(msp_list)} MSPs are available for tree analysis.")
+        logging.info("%d MSPs are available for tree analysis.", len(msp_list))
         return msp_list
 
     def get_msp_distance(self, tree: ete3.TreeNode) -> pd.DataFrame:
-        samples = [leaf for leaf in tree]
+        samples = list(tree)
         distance_matrix = pd.DataFrame(
             index=[n.name for n in samples], columns=[n.name for n in samples]
         )
diff --git a/meteor/variantcalling.py b/meteor/variantcalling.py
index c32ea69..efc712a 100644
--- a/meteor/variantcalling.py
+++ b/meteor/variantcalling.py
@@ -245,13 +245,13 @@ def execute(self) -> None:
             / self.census["reference"]["reference_file"]["database_dir"]
             / self.census["reference"]["annotation"]["bed"]["filename"]
         )
-        bcftools_exec = run(["bcftools", "--version"], capture_output=True)
+        bcftools_exec = run(["bcftools", "--version"], check=False, capture_output=True)
         bcftools_version = (
             bcftools_exec.stdout.decode("utf-8").split("\n")[0].split(" ")[1]
         )
         if bcftools_exec.returncode != 0:
             logging.error(
-                "Checking bcftools failed:\n%s" % bcftools_exec.stderr.decode("utf-8")
+                "Checking bcftools failed:\n%s", bcftools_exec.stderr.decode("utf-8")
             )
             sys.exit(1)
         elif parse(bcftools_version) < Version("0.1.19"):
@@ -260,11 +260,11 @@ def execute(self) -> None:
                 bcftools_version,
             )
             sys.exit(1)
-        bedtools_exec = run(["bedtools", "--version"], capture_output=True)
+        bedtools_exec = run(["bedtools", "--version"], check=False, capture_output=True)
         bedtools_version = bedtools_exec.stdout.decode("utf-8").split(" ")[1][1:]
         if bedtools_exec.returncode != 0:
             logging.error(
-                "Check bedtools failed:\n%s" % bedtools_exec.stderr.decode("utf-8")
+                "Check bedtools failed:\n%s", bedtools_exec.stderr.decode("utf-8")
             )
             sys.exit()
         elif parse(bedtools_version) < Version("2.18"):
@@ -381,7 +381,7 @@ def execute(self) -> None:
                             "-ibam",
                             str(cram_file.resolve()),
                         ],
-                        capture_output=True,
+                        check=False, capture_output=True,
                     ).stdout.decode("utf-8")
                     self.filter_low_cov_sites(output, temp_low_cov_sites)
                     logging.info(
@@ -389,7 +389,7 @@ def execute(self) -> None:
                         perf_counter() - startlowcovbed,
                     )
                     startlowcov = perf_counter()
-                    bcftools_process = Popen(
+                    with Popen(
                         [
                             "bcftools",
                             "consensus",
@@ -402,16 +402,16 @@ def execute(self) -> None:
                             str(vcf_file.resolve()),
                         ],
                         stdout=PIPE,
-                    )
-                    # capture output of bcftools_process
-                    bcftools_output = bcftools_process.communicate()[0]
+                    ) as bcftools_process:
+                        # capture output of bcftools_process
+                        bcftools_output = bcftools_process.communicate()[0]
 
-                    # compress output using lzma
-                    compressed_output = lzma.compress(bcftools_output)
+                        # compress output using lzma
+                        compressed_output = lzma.compress(bcftools_output)
 
-                    # write compressed output to file
-                    with open(str(consensus_file.resolve()), "wb") as f:
-                        f.write(compressed_output)
+                        # write compressed output to file
+                        with open(str(consensus_file.resolve()), "wb") as f:
+                            f.write(compressed_output)
                 logging.info(
                     "Completed low coverage regions filtering step in %f seconds",
                     perf_counter() - startlowcov,