Skip to content

Commit

Permalink
Fix issue #56, fix reference builder, fix default parameters in strain.
Browse files Browse the repository at this point in the history
  • Loading branch information
aghozlane committed Nov 19, 2024
1 parent 18f296f commit dbe9387
Show file tree
Hide file tree
Showing 6 changed files with 18 additions and 19 deletions.
10 changes: 3 additions & 7 deletions meteor/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class Profiler(Session):
NO_RAREFACTION: ClassVar[int] = 0
DEFAULT_RAREFACTION_LEVEL: ClassVar[int] = NO_RAREFACTION
DEFAULT_RANDOM_SEED: ClassVar[int] = 1234
NORMALIZATIONS: ClassVar[list[str | None]] = [None, "coverage", "fpkm", "raw"]
NORMALIZATIONS: ClassVar[list[str]] = ["coverage", "fpkm", "raw"]
DEFAULT_NORMALIZATION: ClassVar[str] = "coverage"
DEFAULT_COVERAGE_FACTOR: ClassVar[float] = 100.0
DEFAULT_MSP_FILTER_COMPLETE: ClassVar[float] = 0.1
Expand Down Expand Up @@ -101,19 +101,15 @@ def __post_init__(self):
self.msp_filter = self.msp_filter_user

# Get the associated count table
self.input_count_table = (
self.meteor.mapping_dir / self.sample_name
).with_suffix(".tsv.xz")
self.input_count_table = self.meteor.mapping_dir / f"{self.sample_name}.tsv.xz"
try:
assert self.input_count_table.is_file()
except AssertionError:
logging.error("The count table %s does not exist.", self.input_count_table)
sys.exit(1)

# Add a symlink to get the raw count table in the profile directory (for merging purpose)
raw_count_table_symlink = (
self.stage2_dir / f"{self.sample_name}_raw"
).with_suffix(".tsv.xz")
raw_count_table_symlink = self.stage2_dir / f"{self.sample_name}_raw.tsv.xz"
try:
raw_count_table_symlink.symlink_to(self.input_count_table.resolve())
except FileExistsError:
Expand Down
17 changes: 10 additions & 7 deletions meteor/referencebuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import bz2
import lzma
import sys
import bgzip
from subprocess import check_call, run
from pathlib import Path
from dataclasses import dataclass, field
Expand Down Expand Up @@ -55,7 +56,8 @@ def __post_init__(self) -> None:
self.output_annotation_file = (
self.database_dir / f"{self.meteor.ref_name}_annotation.tsv"
)
self.output_fasta_file = self.fasta_dir / f"{self.meteor.ref_name}.fasta"

self.output_fasta_file = self.fasta_dir / f"{self.meteor.ref_name}.fasta.gz"
self.output_index_file = self.fasta_dir / f"{self.meteor.ref_name}.dict"

# Write configuration file
Expand Down Expand Up @@ -119,12 +121,13 @@ def create_reference(self):
"wt", encoding="UTF-8"
) as output_annotation:
output_annotation.write("gene_id\tgene_name\tgene_length\n")
with self.output_fasta_file.open("wt", encoding="UTF-8") as output_fasta:
for gene_id, (header, len_seq, seq) in enumerate(
self.read_reference(), start=1
):
output_annotation.write(f"{gene_id}\t{header}\t{len_seq}\n")
output_fasta.write(f">{gene_id}\n{seq}\n")
with self.output_fasta_file.open("wb") as output_fasta:
with bgzip.BGZipWriter(output_fasta) as fh:
for gene_id, (header, len_seq, seq) in enumerate(
self.read_reference(), start=1
):
output_annotation.write(f"{gene_id}\t{header}\t{len_seq}\n")
fh.write(f">{gene_id}\n{seq}\n".encode("utf-8"))

def execute(self) -> None:
"""Build the database"""
Expand Down
2 changes: 1 addition & 1 deletion meteor/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ def load_data(self, file_path: Path):
:return: pd.DataFrame: Data loaded into a pandas DataFrame.
"""
# Choose the appropriate pandas function based on extension
if "".join(file_path.suffixes) in [".tsv", ".tsv.xz"]:
if "".join(file_path.suffixes[-2:]) in [".tsv", ".tsv.xz"]:
return pd.read_csv(
file_path,
sep="\t",
Expand Down
4 changes: 2 additions & 2 deletions meteor/strain.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ class Strain(Session):
MIN_MIN_SNP_DEPTH: ClassVar[int] = 1
MAX_MIN_SNP_DEPTH: ClassVar[int] = 10000
DEFAULT_MIN_SNP_DEPTH: ClassVar[int] = 3
DEFAULT_MIN_FREQUENCY: ClassVar[float] = 0.01
DEFAULT_MIN_FREQUENCY: ClassVar[float] = 0.1
DEFAULT_PLOIDY: ClassVar[int] = 1
MIN_MIN_MSP_COVERAGE: ClassVar[int] = 1
MAX_MIN_MSP_COVERAGE: ClassVar[int] = 100
DEFAULT_MIN_MSP_COVERAGE: ClassVar[int] = 50
DEFAULT_MIN_MSP_COVERAGE: ClassVar[int] = 80
DEFAULT_MIN_GENE_COVERAGE: ClassVar[float] = 0.5
DEFAULT_NUM_THREADS: ClassVar[int] = 1
DEFAULT_MIN_DEPTH: ClassVar[int] = 3
Expand Down
2 changes: 1 addition & 1 deletion meteor/tests/test_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def profiler_standard(datadir: Path, tmp_path: Path) -> Profiler:
rarefaction_level=-1,
seed=12345,
coverage_factor=100.0,
normalization=None,
normalization="raw",
core_size=4,
msp_filter_user=0.5,
completeness=0.6,
Expand Down
2 changes: 1 addition & 1 deletion meteor/tests/test_reference_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def test_read_reference(builder_defec: ReferenceBuilder):
(
pytest.param(
"be4ea162246d2f23ed8b33bdf9b209d8",
"2912b682a8e7554025cc5feadd641570",
"55b4a418bd2814f14dd84b7217762b8b",
id="Accurate output",
),
),
Expand Down

0 comments on commit dbe9387

Please sign in to comment.