Skip to content

Commit

Permalink
added TNRS Entrez client
Browse files Browse the repository at this point in the history
  • Loading branch information
rvosa committed Dec 5, 2024
1 parent 7e195c2 commit ae44a77
Showing 1 changed file with 113 additions and 0 deletions.
113 changes: 113 additions & 0 deletions barcode_validator/translation_tables.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from enum import Enum
from Bio import Entrez
from typing import Optional, Dict, List
import time

"""
SYNOPSIS
Expand Down Expand Up @@ -62,3 +65,113 @@ def get_translation_table(marker: Marker, taxonomy_dict: dict) -> int:
return 1


class TaxonomyResolver:

"""
SYNOPSIS
# Example usage:
>>> resolver = TaxonomyResolver("[email protected]")
>>>
>>> # Example 1: Get full taxonomy for a species
>>> taxonomy = resolver.get_taxonomy_dict("Homo sapiens", kingdom="Metazoa")
>>> print("Full taxonomy:", taxonomy)
>>>
>>> # Example 2: Get only specific ranks needed for genetic code determination
>>> required_ranks = ['phylum', 'class', 'family']
>>> specific_taxonomy = resolver.get_lineage_at_ranks("Homo sapiens", required_ranks)
>>>print("Specific ranks:", specific_taxonomy)
>>>
>>> # Example 3: Resolve a genus name
>>> genus_taxonomy = resolver.get_taxonomy_dict("Drosophila", kingdom="Metazoa")
>>> print("Genus taxonomy:", genus_taxonomy)
"""

def __init__(self, email: str):
"""
Initialize the taxonomy resolver.
Args:
email: Email address for NCBI Entrez queries (required by NCBI)
"""
Entrez.email = email
self.tax_ranks = ['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species']

def get_taxonomy_dict(self, taxon_name: str, kingdom: Optional[str] = None) -> Optional[Dict[str, str]]:
"""
Resolve a taxon name to its full taxonomy using NCBI taxonomy database.
Args:
taxon_name: The taxonomic name to resolve
kingdom: Optional kingdom name to filter results
Returns:
Dictionary containing taxonomic classification or None if not found
"""
try:
# First, search for the taxon ID
search_term = f"{taxon_name}[Scientific Name]"
if kingdom:
search_term = f"{search_term} AND {kingdom}[Kingdom]"

# Search in taxonomy database
handle = Entrez.esearch(db="taxonomy", term=search_term)
record = Entrez.read(handle)
handle.close()

if not record['IdList']:
return None

# Get the first (most relevant) taxonomy ID
taxid = record['IdList'][0]

# Fetch the full taxonomy record
handle = Entrez.efetch(db="taxonomy", id=taxid, retmode="xml")
records = Entrez.read(handle)
handle.close()

if not records:
return None

record = records[0]

# Extract lineage information
lineage_ex = record.get('LineageEx', [])
taxonomy_dict = {}

# Build dictionary of rank -> name
for entry in lineage_ex:
rank = entry.get('Rank', '').lower()
if rank in self.tax_ranks:
taxonomy_dict[rank] = entry.get('ScientificName', '')

# Add the query taxon's rank and name
query_rank = record.get('Rank', '').lower()
if query_rank in self.tax_ranks:
taxonomy_dict[query_rank] = record.get('ScientificName', '')

return taxonomy_dict

except Exception as e:
print(f"Error resolving taxonomy: {str(e)}")
return None

def get_lineage_at_ranks(self, taxon_name: str, required_ranks: List[str],
kingdom: Optional[str] = None) -> Optional[Dict[str, str]]:
"""
Get taxonomy information for specific required ranks.
Args:
taxon_name: The taxonomic name to resolve
required_ranks: List of taxonomic ranks required
kingdom: Optional kingdom name to filter results
Returns:
Dictionary containing only the required ranks or None if not found
"""
full_taxonomy = self.get_taxonomy_dict(taxon_name, kingdom)
if not full_taxonomy:
return None

return {rank: full_taxonomy.get(rank, '') for rank in required_ranks}


0 comments on commit ae44a77

Please sign in to comment.