Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix mypy and pylint warnings #41

Merged
merged 7 commits into from
Apr 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions meteor/counter.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class Counter(Session):

def __post_init__(self) -> None:
if self.counting_type not in Counter.COUNTING_TYPES:
raise ValueError(f'{self.counting_type} is not a valid counting type')
raise ValueError(f"{self.counting_type} is not a valid counting type")

if self.meteor.tmp_path:
self.meteor.tmp_path.mkdir(exist_ok=True)
Expand Down Expand Up @@ -79,7 +79,6 @@ def launch_mapping(self) -> None:
self.mapping_type,
self.trim,
self.alignment_number,
self.counting_type,
self.identity_threshold,
)
mapping_process.execute()
Expand Down
4 changes: 2 additions & 2 deletions meteor/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def load_catalogues_config() -> dict:
except FileNotFoundError:
logging.error("The file %s is missing in meteor source", Downloader.CONFIG_DATA_FILE.name)
sys.exit(1)

@staticmethod
def get_available_catalogues() -> list[str]:
catalogues_config = Downloader.load_catalogues_config()
Expand Down Expand Up @@ -130,7 +130,7 @@ def execute(self) -> None:
print(flush=True)
if self.choice == Downloader.TEST_CATALOGUE:
for sample in self.catalogues_config[self.choice]["samples"]:
logging.info(f"Download {sample} fastq file")
logging.info("Download %s fastq file", sample)
url_fastq = self.catalogues_config[self.choice]["samples"][sample][
"catalogue"
]
Expand Down
84 changes: 34 additions & 50 deletions meteor/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,11 @@ class Mapper(Session):
mapping_type: str
trim: int
alignment_number: int
counting_type: str
identity_threshold: float

def __post_init__(self) -> None:
if self.mapping_type not in Mapper.MAPPING_TYPES:
raise ValueError(f'{self.mapping_type} is not a valid mapping type')
raise ValueError(f"{self.mapping_type} is not a valid mapping type")

def set_mapping_config(
self,
Expand Down Expand Up @@ -117,11 +116,10 @@ def execute(self) -> None:
if self.trim > Mapper.NO_TRIM:
parameters += f"--trim-to {self.trim} "
if self.alignment_number > 1:
# and self.counting_type != "best"
parameters += f"-k {self.alignment_number} "
# Check the bowtie2 version
bowtie_exec = run(["bowtie2", "--version"], capture_output=True)
bowtie_version = str(bowtie_exec.stdout).split("\\n")[0].split(" ")[2]
bowtie_exec = run(["bowtie2", "--version"], check=False, capture_output=True)
bowtie_version = str(bowtie_exec.stdout).split("\\n", maxsplit=1)[0].split(" ")[2]
if bowtie_exec.returncode != 0:
logging.error(
"Checking bowtie2 version failed:\n%s",
Expand All @@ -136,7 +134,7 @@ def execute(self) -> None:
sys.exit(1)
# Start mapping
start = perf_counter()
mapping_exec = Popen(
with Popen(
[
"bowtie2",
parameters,
Expand All @@ -149,51 +147,37 @@ def execute(self) -> None:
],
stdout=PIPE,
stderr=PIPE,
)
# cramfile_unsorted = Path(mkstemp(dir=self.meteor.tmp_dir)[1])
assert mapping_exec.stdout is not None and mapping_exec.stderr is not None
with pysam.AlignmentFile(
mapping_exec.stdout,
"r",
) as samdesc:
) as mapping_exec:
assert mapping_exec.stdout is not None and mapping_exec.stderr is not None
with pysam.AlignmentFile(
str(cram_file.resolve()),
# cramfile_unsorted,
"wc",
template=samdesc,
reference_filename=str(reference.resolve()),
) as cram:
for element in samdesc:
cram.write(element)
# pysam.sort(
# "-o",
# str(cram_file.resolve()),
# "-@",
# str(self.meteor.threads),
# "-O",
# "cram",
# str(cramfile_unsorted.resolve()),
# catch_stdout=False,
# )
# pysam.index(str(cram_file.resolve()))
# Read standard error from the process (non-blocking read)
mapping_result = mapping_exec.stderr.read().decode("utf-8")
mapping_exec.stderr.close()

# Wait for the process to finish and get the exit code
exit_code = mapping_exec.wait()

# Check for errors and print the error output if necessary
if exit_code != 0:
logging.error("bowtie2 failed:\n%s" % mapping_result)
sys.exit(1)
try:
mapping_log = findall(r"([0-9]+)\s+\(", mapping_result)
assert len(mapping_log) == 4
mapping_data = [int(i) for i in mapping_log]
except AssertionError:
logging.error("Could not access the mapping result from bowtie2")
sys.exit(1)
mapping_exec.stdout,
"r",
) as samdesc:
with pysam.AlignmentFile(
str(cram_file.resolve()),
# cramfile_unsorted,
"wc",
template=samdesc,
reference_filename=str(reference.resolve()),
) as cram:
for element in samdesc:
cram.write(element)
# Read standard error from the process (non-blocking read)
mapping_result = mapping_exec.stderr.read().decode("utf-8")
mapping_exec.stderr.close()
# Wait for the process to finish and get the exit code
exit_code = mapping_exec.wait()
# Check for errors and print the error output if necessary
if exit_code != 0:
logging.error("bowtie2 failed:\n%s", mapping_result)
sys.exit(1)
try:
mapping_log = findall(r"([0-9]+)\s+\(", mapping_result)
assert len(mapping_log) == 4
mapping_data = [int(i) for i in mapping_log]
except AssertionError:
logging.error("Could not access the mapping result from bowtie2")
sys.exit(1)
logging.info("Completed mapping creation in %f seconds", perf_counter() - start)
config = self.set_mapping_config(cram_file, bowtie_version, mapping_data)
self.save_config(config, self.census["Stage1FileName"])
27 changes: 10 additions & 17 deletions meteor/merging.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
from pathlib import Path
import logging
import sys
import numpy as np
from biom.table import Table # type: ignore
from typing import ClassVar
from functools import partial


@dataclass
Expand All @@ -41,7 +41,7 @@ class Merging(Session):
min_msp_occurrence: int
remove_sample_with_no_msp: bool
output_mpa: bool
mpa_taxonomic_level: str
mpa_taxonomic_level: str|None
output_biom: bool
output_gene_matrix: bool
ranks: dict[str, str] = field(
Expand Down Expand Up @@ -84,7 +84,7 @@ def find_files_to_merge(
for my_sample, my_dir in input_dir.items()
}
# Check that there is exactly one element in each list
len_list = list(set([len(value) for value in list(dict_to_merge.values())]))
len_list = list({len(value) for value in list(dict_to_merge.values())})
assert len(len_list) == 1
assert len_list[0] == 1
files_to_merge = {
Expand All @@ -103,7 +103,7 @@ def extract_json_info(
"""
# Check that sections are present
try:
assert all([my_section in config for my_section in list(param_dict.keys())])
assert all(my_section in config for my_section in param_dict.keys())
except AssertionError:
logging.error("Missing required section in census json file.")
sys.exit(1)
Expand All @@ -115,11 +115,9 @@ def extract_json_info(
# Check that required fields are present
try:
assert all(
[
my_field in config[my_section]
for my_section in param_dict
for my_field in param_dict[my_section]
]
my_field in config[my_section]
for my_section in param_dict
for my_field in param_dict[my_section]
)
except AssertionError:
logging.error("Missing required fields in census ini file.")
Expand Down Expand Up @@ -259,12 +257,7 @@ def execute(self) -> None:
# Save database_type for later use
try:
database_type_all = list(
set(
[
my_info["database_type"]
for my_info in list(all_information.values())
]
)
{my_info["database_type"] for my_info in list(all_information.values())}
)
assert len(database_type_all) == 1
database_type = database_type_all[0]
Expand Down Expand Up @@ -365,7 +358,7 @@ def execute(self) -> None:
# Apply the prefixes to each taxonomic rank
for rank, prefix in self.ranks.items():
annotation[rank] = annotation[rank].apply(
lambda x: f"{prefix}{x}"
partial(lambda prefix, x: f"{prefix}{x}", prefix)
)
observ_metadata = [
{"taxonomy": row.iloc[1:].tolist()}
Expand All @@ -382,7 +375,7 @@ def execute(self) -> None:
# Generate JSON representation of the BIOM table
biom_json = biom_table.to_json(generated_by="Meteor")
# Write the JSON to a file
with open(output_name.with_suffix(".biom"), "wt") as f:
with open(output_name.with_suffix(".biom"), "wt", encoding="UTF-8") as f:
f.write(biom_json)
# with h5py.File(output_name.with_suffix(".biom"), "w") as f:
# table.to_hdf5(f, generated_by="Meteor", compress=True)
Expand Down
2 changes: 1 addition & 1 deletion meteor/meteor.py
Original file line number Diff line number Diff line change
Expand Up @@ -783,7 +783,7 @@ def main() -> None: # pragma: no cover
args.min_msp_occurrence,
args.remove_sample_with_no_msp,
False,
"a",
None,
#args.output_mpa,
#args.taxonomic_level,
args.output_biom,
Expand Down
14 changes: 9 additions & 5 deletions meteor/phylogeny.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def execute(self) -> None:
# Define the regex pattern to match the version number
version_pattern = re.compile(r"RAxML-NG v\. (\d+\.\d+\.\d+)")
raxml_ng_help = run(
["raxml-ng", "--version"], capture_output=True
["raxml-ng", "--version"], check=False, capture_output=True
).stdout.decode("utf-8")
match = version_pattern.search(raxml_ng_help)
# Check if a match is found
Expand Down Expand Up @@ -152,7 +152,7 @@ def execute(self) -> None:
"MSP %s have less than 4 sequences, we compute the mutation rate",
msp_file.name,
)
with open(tree_file.parent / "cleaned_sequences.fasta", "w") as f:
with open(tree_file.parent / "cleaned_sequences.fasta", "w", encoding="UTF-8") as f:
for seq_name, sequence in cleaned_seqs.items():
f.write(f">{seq_name}\n{sequence}\n")
mutation_rate = []
Expand All @@ -163,7 +163,7 @@ def execute(self) -> None:
seq2 = cleaned_seqs[seq_ids[j]]
mutation_rate += [self.compute_mutation_rate(seq1, seq2)]
# Construct Newick format string
with open(tree_file.with_suffix(".tree"), "wt") as tree:
with open(tree_file.with_suffix(".tree"), "wt", encoding="UTF-8") as tree:
if len(seq_ids) == 2:
tree.write(
f"({seq_ids[0]}:{mutation_rate[0]}, {seq_ids[1]}:{mutation_rate[0]});"
Expand All @@ -174,11 +174,15 @@ def execute(self) -> None:
min_rate_idx == 0
): # seq1 and seq2 have the smallest distance
tree.write(
f"(({seq_ids[0]}:{mutation_rate[0]}, {seq_ids[1]}:{mutation_rate[0]}):{mutation_rate[1]}, {seq_ids[2]}:{mutation_rate[1]});"
f"(({seq_ids[0]}:{mutation_rate[0]}, "
f"{seq_ids[1]}:{mutation_rate[0]}):{mutation_rate[1]}, "
f"{seq_ids[2]}:{mutation_rate[1]});"
)
else: # seq1 and seq3 have the smallest distance
tree.write(
f"(({seq_ids[0]}:{mutation_rate[1]}, {seq_ids[2]}:{mutation_rate[1]}):{mutation_rate[0]}, {seq_ids[1]}:{mutation_rate[0]});"
f"(({seq_ids[0]}:{mutation_rate[1]}, "
f"{seq_ids[2]}:{mutation_rate[1]}):{mutation_rate[0]}, "
f"{seq_ids[1]}:{mutation_rate[0]});"
)
tree_files.append(tree_file)
logging.info("Completed MSP tree %d/%d", idx, msp_count)
Expand Down
6 changes: 3 additions & 3 deletions meteor/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,16 @@ class Profiler(Session):
meteor: type[Component]
rarefaction_level: int
seed: int
normalization: str
normalization: str|None
core_size: int
msp_filter: float
completeness: float
coverage_factor: float

def __post_init__(self):
if self.normalization not in Profiler.NORMALIZATIONS:
raise ValueError(f'{self.normalization} is not a valid normalization')
raise ValueError(f"{self.normalization} is not a valid normalization")

# Get the json file
self.sample_config = self.get_census_stage(self.meteor.mapping_dir, 1)

Expand Down
2 changes: 1 addition & 1 deletion meteor/referencebuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def execute(self) -> None:
# Prepare the reference for meteor
self.create_reference()
# Check the bowtie2 version
bowtie_exec = run(["bowtie2", "--version"], capture_output=True)
bowtie_exec = run(["bowtie2", "--version"], check=False, capture_output=True)
bowtie_version = bowtie_exec.stdout.decode("utf-8").split(" ")[2].split("\n")[0]
if bowtie_exec.returncode != 0:
logging.error(
Expand Down
6 changes: 2 additions & 4 deletions meteor/strain.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
@dataclass
class Strain(Session):
"""Counter session map and count"""

DEFAULT_MAX_DEPTH: ClassVar[int] = 100
MIN_MIN_SNP_DEPTH: ClassVar[int] = 1
MAX_MIN_SNP_DEPTH: ClassVar[int] = 10000
Expand Down Expand Up @@ -166,9 +166,7 @@ def get_msp_variant(
consensus_file,
)
sys.exit(1)
gene_dict = {
gene_id: seq for gene_id, seq in self.get_sequences(consensus_file)
}
gene_dict = dict(self.get_sequences(consensus_file))
logging.info(
"%s MSPs have sufficient signal for SNP analysis ",
len(msp_with_overlapping_genes["msp_name"].values),
Expand Down
4 changes: 2 additions & 2 deletions meteor/tests/test_counter.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,8 +284,8 @@ def test_save_cram(counter_unique: Counter, datadir: Path, tmp_path: Path) -> No
reads, _ = counter_unique.filter_alignments(
cramdesc
) # pylint: disable=unused-variable
read_list = list(chain(reads.values()))
merged_list = list(chain.from_iterable(read_list))
read_list = reads.values()
merged_list = chain.from_iterable(read_list)
tmpcramfile = tmp_path / "test"
counter_unique.save_cram_strain(tmpcramfile, cramdesc, merged_list, ref_json)
assert tmpcramfile.exists()
Expand Down
8 changes: 4 additions & 4 deletions meteor/tests/test_fastq_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,13 @@ def test_replace_ext(builder: FastqImporter, fastq_filename: str, name: str) ->


@pytest.mark.parametrize(
("fastq_filename", "tag"),
("fastq_filename"),
(
("test.fastq.gz", ""),
pytest.param("pretty.complex_pain.fasta", "", id="fasta"),
("test.fastq.gz"),
pytest.param("pretty.complex_pain.fasta", id="fasta"),
),
)
def test_get_tag_none(builder: FastqImporter, fastq_filename: str, tag: str) -> None:
def test_get_tag_none(builder: FastqImporter, fastq_filename: str) -> None:
assert builder.get_tag(fastq_filename) is None

@pytest.mark.parametrize(
Expand Down
1 change: 0 additions & 1 deletion meteor/tests/test_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ def mapping_builder(datadir: Path, tmp_path: Path) -> Mapper:
"end-to-end",
80,
10000,
"smart_shared_reads",
0.95,
)

Expand Down
Loading
Loading