Skip to content

Commit

Permalink
feat: Endpoint for retrieving all transcripts for a given HGNC ID (#11)…
Browse files Browse the repository at this point in the history
… (#12)

Co-authored-by: Manuel Holtgrewe <[email protected]>
  • Loading branch information
gromdimon and holtgrewe authored Oct 15, 2023
1 parent 5f82241 commit 2b425bf
Show file tree
Hide file tree
Showing 9 changed files with 76,258 additions and 136 deletions.
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ mypy = "*"
pytest = "*"
pytest-coverage = "*"
httpx = "*"
pytest-snapshot = "*"

[requires]
python_version = "3.10"
209 changes: 74 additions & 135 deletions Pipfile.lock

Large diffs are not rendered by default.

128 changes: 128 additions & 0 deletions dotty/main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import typing
from contextlib import asynccontextmanager

import bioutils.assemblies
Expand All @@ -15,6 +16,10 @@

#: The global Driver instance.
driver: Driver = None # type: ignore[assignment]
#: Map from HGNC ID to transcripts.
hgnc_to_transcripts: dict[str, list[typing.Any]] = {}
#: Map from Assembly to map from hgnc_id to transcripts.
assembly_to_hgnc_to_transcripts: dict[Assembly, dict[str, list[typing.Any]]] = {}

#: Contig names per assembly.
contig_names: dict[Assembly, set[str]] = {
Expand All @@ -32,6 +37,21 @@ async def lifespan(app: FastAPI): # pragma: no cover
driver = Driver(cdot_dir=settings.DATA_DIR)
driver.load()
_logger.info("driver loaded")
for assembly in Assembly:
for transcript in driver.data_providers[assembly].transcripts.keys():
if (
assembly.value
not in driver.data_providers[assembly]
._get_transcript(transcript)["genome_builds"]
.keys()
):
continue
hgnc_id = f"HGNC:{driver.data_providers[assembly]._get_transcript(transcript)['hgnc']}"
hgnc_to_transcripts.setdefault(hgnc_id, []).append(
driver.data_providers[assembly]._get_transcript(transcript)
)
assembly_to_hgnc_to_transcripts[assembly] = hgnc_to_transcripts
_logger.info("map built")
yield


Expand Down Expand Up @@ -63,6 +83,93 @@ class Result(pydantic.BaseModel):
spdi: Spdi


class ExonAlignment(pydantic.BaseModel):
"""Alignment of an exon to an assembly."""

#: Exon start in reference.
ref_start: int
#: Exon end in reference.
ref_end: int
#: Exon number.
exon_no: int
#: Exon start in transcript.
tx_start: int
#: Exon end in transcript.
tx_end: int
#: The gapped alignment description.
alignment: str | None

@staticmethod
def _from_list(lst: list[typing.Any]) -> "ExonAlignment":
"""Create an ``ExonAlignment`` from a list."""
return ExonAlignment(
ref_start=lst[0],
ref_end=lst[1],
exon_no=lst[2],
tx_start=lst[3],
tx_end=lst[4],
alignment=lst[5],
)


class TanscriptAlignment(pydantic.BaseModel):
"""Alignment of a `Transcript` to an assembly."""

#: Assembly of alignment.
assembly: str
#: Alignment contig.
contig: str
#: CDS start.
cds_start: int
#: CDS end.
cds_end: int
#: Exons, first two entries are start/end positions on the chromosome.
exons: list[ExonAlignment]

@staticmethod
def _from_dict(assembly, dct: dict[str, typing.Any]) -> "TanscriptAlignment":
"""Create a ``TanscriptAlignment`` from a dictionary."""
return TanscriptAlignment(
assembly=assembly,
contig=dct["contig"],
cds_start=dct["cds_start"],
cds_end=dct["cds_end"],
exons=[ExonAlignment._from_list(lst) for lst in dct["exons"]],
)


class Transcript(pydantic.BaseModel):
"""Transcript model."""

#: Transcript ID.
id: str
#: Gene HGNC ID.
hgnc_id: str
#: Gene HGNC symbol.
hgnc_symbol: str
#: Alignments of the transcripts.
alignments: list[TanscriptAlignment]

@staticmethod
def _from_dict(assembly: str, dct: dict[str, typing.Any]) -> "Transcript":
"""Create a ``Transcript`` from a dictionary."""
return Transcript(
id=dct["id"],
hgnc_id=f"HGNC:{dct['hgnc']}",
hgnc_symbol=dct["gene_name"],
alignments=[TanscriptAlignment._from_dict(assembly, dct["genome_builds"][assembly])]
if assembly in dct["genome_builds"]
else [],
)


class TranscriptResult(pydantic.BaseModel):
"""The result of the query for searching for transcripts."""

#: The actual payload / list of transcripts.
transcripts: list[Transcript]


@app.get("/api/v1/to-spdi", response_model=Result)
async def to_spdi(q: str, assembly: Assembly = Assembly.GRCH38) -> Result:
"""Resolve the given HGVS variant to SPDI representation."""
Expand Down Expand Up @@ -93,3 +200,24 @@ async def to_spdi(q: str, assembly: Assembly = Assembly.GRCH38) -> Result:
alternate_inserted=alternative,
)
)


@app.get("/api/v1/find-transcripts", response_model=TranscriptResult)
async def find_transcripts(hgnc_id: str, assembly: Assembly = Assembly.GRCH38) -> TranscriptResult:
"""Find transcripts for the given HGNC ID."""
result = []
transctipts = assembly_to_hgnc_to_transcripts[assembly].get(hgnc_id, [])
if not transctipts:
raise HTTPException(status_code=404, detail="No transcripts found")
else:
for t in transctipts:
if (
assembly.value not in t["genome_builds"]
or "cds_start" not in t["genome_builds"][assembly.value]
or "cds_end" not in t["genome_builds"][assembly.value]
or "exons" not in t["genome_builds"][assembly.value]
):
continue

result.append(Transcript._from_dict(assembly.value, t))
return TranscriptResult(transcripts=result)
5 changes: 4 additions & 1 deletion stubs/cdot/hgvs/dataproviders/json_data_provider.pyi
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import typing

from hgvs.dataproviders.interface import Interface

class AbstractJSONDataProvider(Interface):
Expand All @@ -7,4 +9,5 @@ class LocalDataProvider(AbstractJSONDataProvider):
pass

class JSONDataProvider(LocalDataProvider):
pass
transcripts: dict[str, typing.Any]
def _get_transcript(self, tx_id: str) -> dict[str, typing.Any]: ...
Empty file.
6 changes: 6 additions & 0 deletions stubs/pytest_snapshot/plugin.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from pathlib import Path
from typing import Union

class Snapshot:
def assert_match(self, value: Union[str, bytes], snapshot_name: Union[str, Path]): ...
def assert_match_dir(self, dir_dict: dict, snapshot_dir_name: Union[str, Path]): ...
Loading

0 comments on commit 2b425bf

Please sign in to comment.