Skip to content

Commit

Permalink
update unit test, use ensemble id if name is empty
Browse files Browse the repository at this point in the history
  • Loading branch information
murphycj committed Sep 30, 2024
1 parent 323cdf8 commit 6f8bb0f
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 26 deletions.
19 changes: 10 additions & 9 deletions agfusion/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,15 +111,17 @@ def annotate(

if batch_out_dir is not None:

gene1_name = fusion.gene5prime.gene.name
if gene1_name == "":
gene1_name = fusion.gene5prime.gene.id

gene2_name = fusion.gene3prime.gene.name
if gene2_name == "":
gene2_name = fusion.gene3prime.gene.id

outdir = join(
batch_out_dir,
fusion.gene5prime.gene.name
+ "-"
+ str(junction5prime)
+ "_"
+ fusion.gene3prime.gene.name
+ "-"
+ str(junction3prime),
gene1_name + "-" + str(junction5prime) + "_" + gene2_name + "-" + str(junction3prime),
)

fusion.save_transcript_cdna(out_dir=outdir, middlestar=args.middlestar)
Expand Down Expand Up @@ -155,8 +157,7 @@ def batch_mode(args, agfusion_db, pyensembl_data, rename, colors):
agfusion_db.logger.warn(f"Output directory {args.out} already exists! Overwriting...")

if not Path(args.file).exists():
FileNotFoundError(f"File not found {args.file}")
sys.exit(1)
raise FileNotFoundError(f"File not found {args.file}")

if args.algorithm in parsers.parsers:
for fusion in parsers.parsers[args.algorithm](args.file, agfusion_db.logger):
Expand Down
11 changes: 10 additions & 1 deletion agfusion/model.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Holds classes for containing information for Gene and Fusion exon and protein information.
"""

import itertools
import os
import re
Expand Down Expand Up @@ -342,7 +343,15 @@ def __init__(
noncanonical=noncanonical,
)

self.name = self.gene5prime.gene.name + "_" + self.gene3prime.gene.name
gene1_name = self.gene5prime.gene.name
if gene1_name == "":
gene1_name = self.gene5prime.gene.id

gene2_name = self.gene3prime.gene.name
if gene2_name == "":
gene2_name = self.gene3prime.gene.id

self.name = gene1_name + "_" + gene2_name
self.name = self.name.replace("/", "-")

# construct all the fusion transcript combinations
Expand Down
32 changes: 16 additions & 16 deletions test/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
db_human = database.AGFusionDB(abspath(join(curdir, "agfusion.homo_sapiens.75.db")))
db_human.build = "homo_sapiens_75"

data_human95 = pyensembl.EnsemblRelease(111, "human")
db_human95 = database.AGFusionDB(abspath(join(curdir, "agfusion.homo_sapiens.111.db")))
db_human95.build = "homo_sapiens_111"
data_human_hg38 = pyensembl.EnsemblRelease(111, "human")
db_human_hg38 = database.AGFusionDB(abspath(join(curdir, "agfusion.homo_sapiens.111.db")))
db_human_hg38.build = "homo_sapiens_111"


BASEDIR = "./data/FusionsFindingAlgorithms"
Expand Down Expand Up @@ -112,15 +112,15 @@ def test_with_coding_effect(self):
all_fusions = ["ARID3B_MYCNUT", "ARID3B_MYCN", "TVP23C_CDRT4"]
for fusion in parsers.parsers["starfusion"](
f"{BASEDIR}/STARFusion/" + "star-fusion.fusion_predictions.abridged.coding_effect.tsv",
db_human95.logger,
db_human_hg38.logger,
):
fusion = model.Fusion(
gene5prime=fusion["gene5prime"],
gene5primejunction=fusion["gene5prime_junction"],
gene3prime=fusion["gene3prime"],
gene3primejunction=fusion["gene3prime_junction"],
db=db_human95,
pyensembl_data=data_human95,
db=db_human_hg38,
pyensembl_data=data_human_hg38,
protein_databases=["pfam"],
noncanonical=False,
)
Expand Down Expand Up @@ -156,15 +156,15 @@ def test_parse_human(self):
all_fusions = ["BCAS4_BCAS3", "HNRNPC_ACIN1"]
for fusion in parsers.parsers["longgf"](
f"{BASEDIR}/LongGF/fusions_hg38.log",
db_human95.logger,
db_human_hg38.logger,
):
fusion = model.Fusion(
gene5prime=fusion["gene5prime"],
gene5primejunction=fusion["gene5prime_junction"],
gene3prime=fusion["gene3prime"],
gene3primejunction=fusion["gene3prime_junction"],
db=db_human95,
pyensembl_data=data_human95,
db=db_human_hg38,
pyensembl_data=data_human_hg38,
protein_databases=["pfam"],
noncanonical=False,
)
Expand All @@ -177,35 +177,35 @@ class TestFusionInspector(unittest.TestCase):
def test_parse_human(self):
"""Test basic parsing."""

all_fusions = ["AL627171.2_TPM3", "STAT3_AL627171.2"]
all_fusions = ["ENSG00000282885_TPM3", "STAT3_ENSG00000282885"]

for fusion in parsers.parsers["fusioninspector"](
f"{BASEDIR}/FusionInspector/test.FusionInspector.fusions.abridged.txt",
db_human95.logger,
db_human_hg38.logger,
):
fusion = model.Fusion(
gene5prime=fusion["gene5prime"],
gene5primejunction=fusion["gene5prime_junction"],
gene3prime=fusion["gene3prime"],
gene3primejunction=fusion["gene3prime_junction"],
db=db_human95,
pyensembl_data=data_human95,
db=db_human_hg38,
pyensembl_data=data_human_hg38,
protein_databases=["pfam"],
noncanonical=False,
)
assert fusion.name in all_fusions, f"{fusion.name} not in list!"

for fusion in parsers.parsers["fusioninspector"](
f"{BASEDIR}/FusionInspector/test.FusionInspector.fusions.txt",
db_human95.logger,
db_human_hg38.logger,
):
fusion = model.Fusion(
gene5prime=fusion["gene5prime"],
gene5primejunction=fusion["gene5prime_junction"],
gene3prime=fusion["gene3prime"],
gene3primejunction=fusion["gene3prime_junction"],
db=db_human95,
pyensembl_data=data_human95,
db=db_human_hg38,
pyensembl_data=data_human_hg38,
protein_databases=["pfam"],
noncanonical=False,
)
Expand Down

0 comments on commit 6f8bb0f

Please sign in to comment.