From 8067226bd32420a4609713c94362cb1bd427c254 Mon Sep 17 00:00:00 2001 From: tibvdm Date: Wed, 28 Aug 2024 14:58:49 +0200 Subject: [PATCH] split the functional annotations --- sa-index/src/peptide_search.rs | 8 ++++++-- sa-mappings/src/proteins.rs | 25 +++++++++++++++++++------ 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/sa-index/src/peptide_search.rs b/sa-index/src/peptide_search.rs index 55d629f..5f4d8b3 100644 --- a/sa-index/src/peptide_search.rs +++ b/sa-index/src/peptide_search.rs @@ -16,7 +16,9 @@ pub struct SearchResult { pub struct ProteinInfo { pub taxon: u32, pub uniprot_accession: String, - pub functional_annotations: String + pub ec_numbers: String, + pub go_terms: String, + pub interpro_entries: String } impl From<&Protein> for ProteinInfo { @@ -24,7 +26,9 @@ impl From<&Protein> for ProteinInfo { ProteinInfo { taxon: protein.taxon_id, uniprot_accession: protein.uniprot_id.clone(), - functional_annotations: protein.get_functional_annotations() + ec_numbers: protein.get_ec_numbers(), + go_terms: protein.get_go_terms(), + interpro_entries: protein.get_interpro_entries() } } } diff --git a/sa-mappings/src/proteins.rs b/sa-mappings/src/proteins.rs index f2b24cc..7597b8d 100644 --- a/sa-mappings/src/proteins.rs +++ b/sa-mappings/src/proteins.rs @@ -22,7 +22,9 @@ pub struct Protein { pub taxon_id: u32, /// The encoded functional annotations of the protein - pub functional_annotations: Vec + pub ec_numbers: Vec, + pub go_terms: Vec, + pub interpro_entries: Vec } /// A struct that represents a collection of proteins @@ -35,9 +37,16 @@ pub struct Proteins { } impl Protein { - /// Returns the decoded functional annotations of the protein - pub fn get_functional_annotations(&self) -> String { - decode(&self.functional_annotations) + pub fn get_ec_numbers(&self) -> String { + decode(&self.ec_numbers) + } + + pub fn get_go_terms(&self) -> String { + decode(&self.go_terms) + } + + pub fn get_interpro_entries(&self) -> String { + decode(&self.interpro_entries) } } @@ -72,7 +81,9 @@ impl Proteins { let uniprot_id = from_utf8(fields.next().unwrap())?; let taxon_id = from_utf8(fields.next().unwrap())?.parse()?; let sequence = from_utf8(fields.next().unwrap())?; - let functional_annotations: Vec = encode(from_utf8(fields.next().unwrap())?); + let ec_numbers: Vec = encode(from_utf8(fields.next().unwrap())?); + let go_terms: Vec = encode(from_utf8(fields.next().unwrap())?); + let interpro_entries: Vec = encode(from_utf8(fields.next().unwrap())?); input_string.push_str(&sequence.to_uppercase()); input_string.push(SEPARATION_CHARACTER.into()); @@ -80,7 +91,9 @@ impl Proteins { proteins.push(Protein { uniprot_id: uniprot_id.to_string(), taxon_id, - functional_annotations + ec_numbers, + go_terms, + interpro_entries }); }