From d1e364e9f18f06f871d1d099ab88a2a08de3e220 Mon Sep 17 00:00:00 2001 From: Amine GHOZLANE Date: Tue, 26 Nov 2024 10:25:16 +0100 Subject: [PATCH] Improve phylogeny speed --- meteor/meteor.py | 15 +++++++-------- meteor/phylogeny.py | 8 ++++++-- meteor/tests/test_phylogeny.py | 2 +- meteor/tests/test_treebuilder.py | 2 +- meteor/treebuilder.py | 3 ++- 5 files changed, 17 insertions(+), 13 deletions(-) diff --git a/meteor/meteor.py b/meteor/meteor.py index 96192f1..7c3de95 100644 --- a/meteor/meteor.py +++ b/meteor/meteor.py @@ -649,6 +649,12 @@ def get_arguments() -> Namespace: # pragma: no cover type=isborned01, help="Minimum number of informative sites in the alignment (default: >= %(default)d).", ) + tree_parser.add_argument( + "-r", + dest="gtr", + action="store_true", + help="Compute GTR model (default: False, slower).", + ) tree_parser.add_argument( "-f", dest="format", @@ -857,14 +863,7 @@ def main() -> None: # pragma: no cover strain_detector = Strain(meteor, 100, 2, 2, 0.2, 1, 1, 0.2, 10, False) strain_detector.execute() meteor.tree_dir = Path(tmpdirname) / "tree" - trees = TreeBuilder( - meteor, - 0.1, - 4, - 800, - 600, - None, - ) + trees = TreeBuilder(meteor, 0.1, 4, False, 800, 600, None) trees.execute() # Close logging logger.handlers[0].close() diff --git a/meteor/phylogeny.py b/meteor/phylogeny.py index 25dd2ca..fc72877 100644 --- a/meteor/phylogeny.py +++ b/meteor/phylogeny.py @@ -48,6 +48,7 @@ class Phylogeny(Session): msp_file_list: list[Path] max_gap: float min_info_sites: int + gtr: bool tree_files: list[Path] = field(default_factory=list) def compute_site_info(self, sequences: Iterable[str]) -> list[float]: @@ -171,8 +172,11 @@ def process_msp_file( ) # cleaned_alignment = load_aligned_seqs(ali_file, moltype="dna") # d = EstimateDistances(cleaned_alignment, submodel=GTR()) - d = EstimateDistances(aligned_seqs, submodel=GTR()) - d.run(show_progress=False) + if self.gtr: + d = EstimateDistances(aligned_seqs, submodel=GTR()) + d.run(show_progress=False) + else: + d = aligned_seqs.distance_matrix(calc="tn93", show_progress=False) # Create UPGMA Tree mycluster = upgma(d.get_pairwise_distances()) diff --git a/meteor/tests/test_phylogeny.py b/meteor/tests/test_phylogeny.py index 91acbc1..41c97e2 100644 --- a/meteor/tests/test_phylogeny.py +++ b/meteor/tests/test_phylogeny.py @@ -27,7 +27,7 @@ def phylogeny_builder(datadir: Path, tmp_path: Path) -> Phylogeny: meteor.tree_dir.mkdir() meteor.threads = 1 meteor.DEFAULT_GAP_CHAR = "?" - return Phylogeny(meteor, [Path(datadir / "msp_0864.fasta")], 0.5, 4) + return Phylogeny(meteor, [Path(datadir / "msp_0864.fasta")], 0.5, 4, False) def test_compute_site_info(phylogeny_builder: Phylogeny): diff --git a/meteor/tests/test_treebuilder.py b/meteor/tests/test_treebuilder.py index 27045be..40e467b 100644 --- a/meteor/tests/test_treebuilder.py +++ b/meteor/tests/test_treebuilder.py @@ -29,7 +29,7 @@ def treebuilder_builder(datadir: Path, tmp_path: Path) -> TreeBuilder: meteor.tree_dir.mkdir() meteor.threads = 1 meteor.strain_dir = datadir / "strain" - return TreeBuilder(meteor, 0.5, 4, 500, 500, None) + return TreeBuilder(meteor, 0.5, 4, False, 500, 500, None) def test_concatenate(treebuilder_builder: TreeBuilder, datadir: Path): diff --git a/meteor/treebuilder.py b/meteor/treebuilder.py index bef96f0..b874a62 100644 --- a/meteor/treebuilder.py +++ b/meteor/treebuilder.py @@ -42,6 +42,7 @@ class TreeBuilder(Session): meteor: type[Component] max_gap: float min_info_sites: int + gtr: bool width: int height: int format: str | None @@ -103,7 +104,7 @@ def execute(self) -> None: msp_file_list = self.concatenate(msp_file_dict) # Compute phylogenies phylogeny_process = Phylogeny( - self.meteor, msp_file_list, self.max_gap, self.min_info_sites + self.meteor, msp_file_list, self.max_gap, self.min_info_sites, self.gtr ) phylogeny_process.execute() # Analyze tree data