Skip to content

Commit

Permalink
Fix consensus with large catalogue
Browse files Browse the repository at this point in the history
  • Loading branch information
aghozlane committed Aug 28, 2024
1 parent 5999d27 commit b3104a8
Show file tree
Hide file tree
Showing 7 changed files with 39 additions and 22 deletions.
8 changes: 5 additions & 3 deletions meteor/counter.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ def save_cram_strain(
"wc",
template=cramdesc,
reference_filename=str(reference.resolve()),
threads=self.meteor.threads
threads=self.meteor.threads,
) as total_reads:
for element in read_chain:
if int(element.reference_name) in gene_id_set:
Expand Down Expand Up @@ -417,7 +417,9 @@ def launch_counting(
else:
logging.info("Launch counting")
pysam.set_verbosity(0)
with AlignmentFile(str(raw_cramfile.resolve()), threads=self.meteor.threads) as cramdesc:
with AlignmentFile(
str(raw_cramfile.resolve()), threads=self.meteor.threads
) as cramdesc:
# create a dictionary containing the length of reference genes
# get name of reference sequence
references = [int(ref) for ref in cramdesc.references]
Expand Down Expand Up @@ -463,7 +465,7 @@ def launch_counting(
"wc",
template=cramdesc,
reference_filename=str(reference.resolve()),
threads=self.meteor.threads
threads=self.meteor.threads,
) as total_reads:
for element in chain.from_iterable(reads.values()):
# if int(element.reference_name) in ref_json["reference_file"]:
Expand Down
9 changes: 7 additions & 2 deletions meteor/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,17 @@ class Downloader(Session):
@staticmethod
def load_catalogues_config() -> dict:
try:
config_data = importlib.resources.files("meteor") / str(Downloader.CONFIG_DATA_FILE)
config_data = importlib.resources.files("meteor") / str(
Downloader.CONFIG_DATA_FILE
)
with importlib.resources.as_file(config_data) as configuration_path:
with configuration_path.open("rt", encoding="UTF-8") as config:
return json.load(config)
except FileNotFoundError:
logging.error("The file %s is missing in meteor source", Downloader.CONFIG_DATA_FILE.name)
logging.error(
"The file %s is missing in meteor source",
Downloader.CONFIG_DATA_FILE.name,
)
sys.exit(1)

@staticmethod
Expand Down
6 changes: 2 additions & 4 deletions meteor/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,17 +153,15 @@ def execute(self) -> None:
) as mapping_exec:
assert mapping_exec.stdout is not None and mapping_exec.stderr is not None
with pysam.AlignmentFile(
mapping_exec.stdout,
"r",
threads=self.meteor.threads
mapping_exec.stdout, "r", threads=self.meteor.threads
) as samdesc:
with pysam.AlignmentFile(
str(cram_file.resolve()),
# cramfile_unsorted,
"wc",
template=samdesc,
reference_filename=str(reference.resolve()),
threads=self.meteor.threads
threads=self.meteor.threads,
) as cram:
for element in samdesc:
cram.write(element)
Expand Down
29 changes: 19 additions & 10 deletions meteor/phylogeny.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,17 @@
"""Effective phylogeny"""
import re
import logging

# import sys
from dataclasses import dataclass, field
from meteor.session import Session, Component

# from subprocess import check_call, run, DEVNULL
from time import perf_counter
from pathlib import Path
import tempfile
from tempfile import NamedTemporaryFile

# from packaging.version import parse
from collections import OrderedDict
from datetime import datetime
Expand Down Expand Up @@ -142,7 +145,10 @@ def execute(self) -> None:
msp_count = len(self.msp_file_list)
for idx, msp_file in enumerate(self.msp_file_list, start=1):
logging.info(
"%d/%d %s: Start analysis", idx, msp_count, msp_file.name.replace(".fasta", "")
"%d/%d %s: Start analysis",
idx,
msp_count,
msp_file.name.replace(".fasta", ""),
)
with NamedTemporaryFile(
mode="wt", dir=self.meteor.tmp_dir, suffix=".fasta"
Expand All @@ -157,7 +163,7 @@ def execute(self) -> None:
logging.info(
"Only %d informative sites (< %d threshold) left after cleaning, skip.",
info_sites,
self.min_info_sites
self.min_info_sites,
)
# elif len(cleaned_seqs) >= 4:
# # Compute trees
Expand All @@ -182,8 +188,8 @@ def execute(self) -> None:
# ],
# stdout = DEVNULL
# )
# if result != 0:
# logging.error("raxml-ng failed with return code %d", result)
# if result != 0:
# logging.error("raxml-ng failed with return code %d", result)
else:
# logging.info(
# "Less than 4 sequences, run cogent3"
Expand All @@ -208,16 +214,19 @@ def execute(self) -> None:
# )
if tree_file.with_suffix(".tree").exists():
self.tree_files.append(tree_file.with_suffix(".tree"))
logging.info("Completed MSP tree for MSP %s", msp_file.name.replace(".fasta", ""))
logging.info(
"Completed MSP tree for MSP %s",
msp_file.name.replace(".fasta", ""),
)
# elif tree_file.with_suffix(".raxml.bestTree").exists():
# self.tree_files.append(tree_file.with_suffix(".raxml.bestTree"))
# logging.info("Completed MSP tree with raxml")
# logging.info("Completed MSP tree with raxml")
else:
logging.info(
"No tree file generated"
)
logging.info("No tree file generated")
logging.info("Completed phylogeny in %f seconds", perf_counter() - start)
logging.info("Trees were generated for %d/%d MSPs", len(self.tree_files), msp_count)
logging.info(
"Trees were generated for %d/%d MSPs", len(self.tree_files), msp_count
)
# config = self.set_tree_config(raxml_ng_version)
config = self.set_tree_config()
self.save_config(config, self.meteor.tree_dir / "census_stage_4.json")
6 changes: 3 additions & 3 deletions meteor/tests/test_counter.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def counter_unique(datadir: Path, tmp_path: Path) -> Counter:
alignment_number=10000,
keep_filtered_alignments=True,
identity_threshold=0.95,
core_size=100
core_size=100,
)


Expand All @@ -77,7 +77,7 @@ def counter_smart_shared(datadir: Path, tmp_path: Path) -> Counter:
alignment_number=10000,
keep_filtered_alignments=True,
identity_threshold=0.95,
core_size=100
core_size=100,
)


Expand All @@ -100,7 +100,7 @@ def counter_total(datadir: Path, tmp_path: Path) -> Counter:
keep_all_alignments=False,
keep_filtered_alignments=True,
identity_threshold=0.95,
core_size=100
core_size=100,
)


Expand Down
1 change: 1 addition & 0 deletions meteor/tests/test_fastq_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ def test_replace_ext(builder: FastqImporter, fastq_filename: str, name: str) ->
def test_get_tag_none(builder: FastqImporter, fastq_filename: str) -> None:
assert builder.get_tag(fastq_filename) is None


@pytest.mark.parametrize(
("fastq_filename", "tag"),
(("simple_case_R1.fastq", "1"), ("pretty.complex_pain_2.fq.xz", "2")),
Expand Down
2 changes: 2 additions & 0 deletions meteor/variantcalling.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,8 @@ def create_consensus(
consensus[row["startpos"] : row["endpos"]] = (
self.meteor.DEFAULT_GAP_CHAR
)
else:
continue
consensus_f.write(f">{gene_id}\n")
consensus_f.write("".join(consensus) + "\n")
del consensus
Expand Down

0 comments on commit b3104a8

Please sign in to comment.