-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
113 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,7 @@ | ||
from enum import Enum | ||
from Bio import Entrez | ||
from typing import Optional, Dict, List | ||
import time | ||
|
||
""" | ||
SYNOPSIS | ||
|
@@ -62,3 +65,113 @@ def get_translation_table(marker: Marker, taxonomy_dict: dict) -> int: | |
return 1 | ||
|
||
|
||
class TaxonomyResolver: | ||
|
||
""" | ||
SYNOPSIS | ||
# Example usage: | ||
>>> resolver = TaxonomyResolver("[email protected]") | ||
>>> | ||
>>> # Example 1: Get full taxonomy for a species | ||
>>> taxonomy = resolver.get_taxonomy_dict("Homo sapiens", kingdom="Metazoa") | ||
>>> print("Full taxonomy:", taxonomy) | ||
>>> | ||
>>> # Example 2: Get only specific ranks needed for genetic code determination | ||
>>> required_ranks = ['phylum', 'class', 'family'] | ||
>>> specific_taxonomy = resolver.get_lineage_at_ranks("Homo sapiens", required_ranks) | ||
>>>print("Specific ranks:", specific_taxonomy) | ||
>>> | ||
>>> # Example 3: Resolve a genus name | ||
>>> genus_taxonomy = resolver.get_taxonomy_dict("Drosophila", kingdom="Metazoa") | ||
>>> print("Genus taxonomy:", genus_taxonomy) | ||
""" | ||
|
||
def __init__(self, email: str): | ||
""" | ||
Initialize the taxonomy resolver. | ||
Args: | ||
email: Email address for NCBI Entrez queries (required by NCBI) | ||
""" | ||
Entrez.email = email | ||
self.tax_ranks = ['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species'] | ||
|
||
def get_taxonomy_dict(self, taxon_name: str, kingdom: Optional[str] = None) -> Optional[Dict[str, str]]: | ||
""" | ||
Resolve a taxon name to its full taxonomy using NCBI taxonomy database. | ||
Args: | ||
taxon_name: The taxonomic name to resolve | ||
kingdom: Optional kingdom name to filter results | ||
Returns: | ||
Dictionary containing taxonomic classification or None if not found | ||
""" | ||
try: | ||
# First, search for the taxon ID | ||
search_term = f"{taxon_name}[Scientific Name]" | ||
if kingdom: | ||
search_term = f"{search_term} AND {kingdom}[Kingdom]" | ||
|
||
# Search in taxonomy database | ||
handle = Entrez.esearch(db="taxonomy", term=search_term) | ||
record = Entrez.read(handle) | ||
handle.close() | ||
|
||
if not record['IdList']: | ||
return None | ||
|
||
# Get the first (most relevant) taxonomy ID | ||
taxid = record['IdList'][0] | ||
|
||
# Fetch the full taxonomy record | ||
handle = Entrez.efetch(db="taxonomy", id=taxid, retmode="xml") | ||
records = Entrez.read(handle) | ||
handle.close() | ||
|
||
if not records: | ||
return None | ||
|
||
record = records[0] | ||
|
||
# Extract lineage information | ||
lineage_ex = record.get('LineageEx', []) | ||
taxonomy_dict = {} | ||
|
||
# Build dictionary of rank -> name | ||
for entry in lineage_ex: | ||
rank = entry.get('Rank', '').lower() | ||
if rank in self.tax_ranks: | ||
taxonomy_dict[rank] = entry.get('ScientificName', '') | ||
|
||
# Add the query taxon's rank and name | ||
query_rank = record.get('Rank', '').lower() | ||
if query_rank in self.tax_ranks: | ||
taxonomy_dict[query_rank] = record.get('ScientificName', '') | ||
|
||
return taxonomy_dict | ||
|
||
except Exception as e: | ||
print(f"Error resolving taxonomy: {str(e)}") | ||
return None | ||
|
||
def get_lineage_at_ranks(self, taxon_name: str, required_ranks: List[str], | ||
kingdom: Optional[str] = None) -> Optional[Dict[str, str]]: | ||
""" | ||
Get taxonomy information for specific required ranks. | ||
Args: | ||
taxon_name: The taxonomic name to resolve | ||
required_ranks: List of taxonomic ranks required | ||
kingdom: Optional kingdom name to filter results | ||
Returns: | ||
Dictionary containing only the required ranks or None if not found | ||
""" | ||
full_taxonomy = self.get_taxonomy_dict(taxon_name, kingdom) | ||
if not full_taxonomy: | ||
return None | ||
|
||
return {rank: full_taxonomy.get(rank, '') for rank in required_ranks} | ||
|
||
|