From 6f8bb0f2f8a5e72cc86044d2b3fe83745302fcc0 Mon Sep 17 00:00:00 2001 From: murphycj Date: Sun, 29 Sep 2024 22:23:56 -0400 Subject: [PATCH] update unit test, use ensemble id if name is empty --- agfusion/cli.py | 19 ++++++++++--------- agfusion/model.py | 11 ++++++++++- test/test_parsers.py | 32 ++++++++++++++++---------------- 3 files changed, 36 insertions(+), 26 deletions(-) diff --git a/agfusion/cli.py b/agfusion/cli.py index 4c2f18a..a65dc51 100644 --- a/agfusion/cli.py +++ b/agfusion/cli.py @@ -111,15 +111,17 @@ def annotate( if batch_out_dir is not None: + gene1_name = fusion.gene5prime.gene.name + if gene1_name == "": + gene1_name = fusion.gene5prime.gene.id + + gene2_name = fusion.gene3prime.gene.name + if gene2_name == "": + gene2_name = fusion.gene3prime.gene.id + outdir = join( batch_out_dir, - fusion.gene5prime.gene.name - + "-" - + str(junction5prime) - + "_" - + fusion.gene3prime.gene.name - + "-" - + str(junction3prime), + gene1_name + "-" + str(junction5prime) + "_" + gene2_name + "-" + str(junction3prime), ) fusion.save_transcript_cdna(out_dir=outdir, middlestar=args.middlestar) @@ -155,8 +157,7 @@ def batch_mode(args, agfusion_db, pyensembl_data, rename, colors): agfusion_db.logger.warn(f"Output directory {args.out} already exists! Overwriting...") if not Path(args.file).exists(): - FileNotFoundError(f"File not found {args.file}") - sys.exit(1) + raise FileNotFoundError(f"File not found {args.file}") if args.algorithm in parsers.parsers: for fusion in parsers.parsers[args.algorithm](args.file, agfusion_db.logger): diff --git a/agfusion/model.py b/agfusion/model.py index 432ccce..ac08ae9 100644 --- a/agfusion/model.py +++ b/agfusion/model.py @@ -1,6 +1,7 @@ """ Holds classes for containing information for Gene and Fusion exon and protein information. """ + import itertools import os import re @@ -342,7 +343,15 @@ def __init__( noncanonical=noncanonical, ) - self.name = self.gene5prime.gene.name + "_" + self.gene3prime.gene.name + gene1_name = self.gene5prime.gene.name + if gene1_name == "": + gene1_name = self.gene5prime.gene.id + + gene2_name = self.gene3prime.gene.name + if gene2_name == "": + gene2_name = self.gene3prime.gene.id + + self.name = gene1_name + "_" + gene2_name self.name = self.name.replace("/", "-") # construct all the fusion transcript combinations diff --git a/test/test_parsers.py b/test/test_parsers.py index 57930de..4378101 100644 --- a/test/test_parsers.py +++ b/test/test_parsers.py @@ -16,9 +16,9 @@ db_human = database.AGFusionDB(abspath(join(curdir, "agfusion.homo_sapiens.75.db"))) db_human.build = "homo_sapiens_75" -data_human95 = pyensembl.EnsemblRelease(111, "human") -db_human95 = database.AGFusionDB(abspath(join(curdir, "agfusion.homo_sapiens.111.db"))) -db_human95.build = "homo_sapiens_111" +data_human_hg38 = pyensembl.EnsemblRelease(111, "human") +db_human_hg38 = database.AGFusionDB(abspath(join(curdir, "agfusion.homo_sapiens.111.db"))) +db_human_hg38.build = "homo_sapiens_111" BASEDIR = "./data/FusionsFindingAlgorithms" @@ -112,15 +112,15 @@ def test_with_coding_effect(self): all_fusions = ["ARID3B_MYCNUT", "ARID3B_MYCN", "TVP23C_CDRT4"] for fusion in parsers.parsers["starfusion"]( f"{BASEDIR}/STARFusion/" + "star-fusion.fusion_predictions.abridged.coding_effect.tsv", - db_human95.logger, + db_human_hg38.logger, ): fusion = model.Fusion( gene5prime=fusion["gene5prime"], gene5primejunction=fusion["gene5prime_junction"], gene3prime=fusion["gene3prime"], gene3primejunction=fusion["gene3prime_junction"], - db=db_human95, - pyensembl_data=data_human95, + db=db_human_hg38, + pyensembl_data=data_human_hg38, protein_databases=["pfam"], noncanonical=False, ) @@ -156,15 +156,15 @@ def test_parse_human(self): all_fusions = ["BCAS4_BCAS3", "HNRNPC_ACIN1"] for fusion in parsers.parsers["longgf"]( f"{BASEDIR}/LongGF/fusions_hg38.log", - db_human95.logger, + db_human_hg38.logger, ): fusion = model.Fusion( gene5prime=fusion["gene5prime"], gene5primejunction=fusion["gene5prime_junction"], gene3prime=fusion["gene3prime"], gene3primejunction=fusion["gene3prime_junction"], - db=db_human95, - pyensembl_data=data_human95, + db=db_human_hg38, + pyensembl_data=data_human_hg38, protein_databases=["pfam"], noncanonical=False, ) @@ -177,19 +177,19 @@ class TestFusionInspector(unittest.TestCase): def test_parse_human(self): """Test basic parsing.""" - all_fusions = ["AL627171.2_TPM3", "STAT3_AL627171.2"] + all_fusions = ["ENSG00000282885_TPM3", "STAT3_ENSG00000282885"] for fusion in parsers.parsers["fusioninspector"]( f"{BASEDIR}/FusionInspector/test.FusionInspector.fusions.abridged.txt", - db_human95.logger, + db_human_hg38.logger, ): fusion = model.Fusion( gene5prime=fusion["gene5prime"], gene5primejunction=fusion["gene5prime_junction"], gene3prime=fusion["gene3prime"], gene3primejunction=fusion["gene3prime_junction"], - db=db_human95, - pyensembl_data=data_human95, + db=db_human_hg38, + pyensembl_data=data_human_hg38, protein_databases=["pfam"], noncanonical=False, ) @@ -197,15 +197,15 @@ def test_parse_human(self): for fusion in parsers.parsers["fusioninspector"]( f"{BASEDIR}/FusionInspector/test.FusionInspector.fusions.txt", - db_human95.logger, + db_human_hg38.logger, ): fusion = model.Fusion( gene5prime=fusion["gene5prime"], gene5primejunction=fusion["gene5prime_junction"], gene3prime=fusion["gene3prime"], gene3primejunction=fusion["gene3prime_junction"], - db=db_human95, - pyensembl_data=data_human95, + db=db_human_hg38, + pyensembl_data=data_human_hg38, protein_databases=["pfam"], noncanonical=False, )