Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split summarization for functional annotations #49

Draft
wants to merge 10 commits into
base: develop
Choose a base branch
from
10 changes: 5 additions & 5 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
resolver = "2"

members = [
"api",
"api",
"database",
"datastore",
"index"
Expand Down
82 changes: 80 additions & 2 deletions api/src/controllers/api/pept2ec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@ use crate::{
},
helpers::{
ec_helper::{ec_numbers_from_map, EcNumber},
fa_helper::calculate_fa
},
AppState
};
use crate::controllers::request::{GetContent, PostContent};
use crate::helpers::fa_helper::calculate_ec;
use crate::helpers::sanitize_peptides;

#[derive(Deserialize)]
Expand Down Expand Up @@ -52,7 +53,7 @@ async fn handler(
let mut final_results = Vec::new();
for (unique_peptide, item) in unique_peptides.iter().zip(result.into_iter()) {
if let Some(count) = peptide_counts.get(unique_peptide) {
let fa = calculate_fa(&item.proteins);
let fa = calculate_ec(item.proteins(&index.searcher));
let total_protein_count = *fa.counts.get("all").unwrap_or(&0);

for _ in 0..*count {
Expand All @@ -78,3 +79,80 @@ generate_handlers!(
Ok(Json(handler(state, params).await?))
}
);

pub async fn test_get_json_handler(
State(AppState { index, datastore, .. }): State<AppState>,
GetContent(Parameters { input, equate_il, extra }): GetContent<Parameters>
) -> Result<Json<Vec<EcInformation>>, ()> {
let input = sanitize_peptides(input);

let mut peptide_counts: HashMap<String, usize> = HashMap::new();
for peptide in input.into_iter() {
*peptide_counts.entry(peptide).or_insert(0) += 1;
}

let unique_peptides: Vec<String> = peptide_counts.keys().cloned().collect();
let result = index.analyse(&unique_peptides, equate_il, None);

let ec_store = datastore.ec_store();

// Step 6: Duplicate the results according to the original input
let mut final_results = Vec::new();
for (unique_peptide, item) in unique_peptides.iter().zip(result.into_iter()) {
if let Some(count) = peptide_counts.get(unique_peptide) {
let fa = calculate_ec(item.proteins(&index.searcher));
let total_protein_count = *fa.counts.get("all").unwrap_or(&0);

for _ in 0..*count {
let ecs = ec_numbers_from_map(&fa.data, ec_store, extra);

final_results.push(EcInformation {
peptide: item.sequence.clone(),
total_protein_count,
ec: ecs,
});
}
}
}

Ok(Json(final_results))
}


pub async fn test_post_json_handler(
State(AppState { index, datastore, .. }): State<AppState>,
PostContent(Parameters { input, equate_il, extra }): PostContent<Parameters>
) -> Result<Json<Vec<EcInformation>>, ()> {
let input = sanitize_peptides(input);

let mut peptide_counts: HashMap<String, usize> = HashMap::new();
for peptide in input.into_iter() {
*peptide_counts.entry(peptide).or_insert(0) += 1;
}

let unique_peptides: Vec<String> = peptide_counts.keys().cloned().collect();
let result = index.analyse(&unique_peptides, equate_il, None);

let ec_store = datastore.ec_store();

// Step 6: Duplicate the results according to the original input
let mut final_results = Vec::new();
for (unique_peptide, item) in unique_peptides.iter().zip(result.into_iter()) {
if let Some(count) = peptide_counts.get(unique_peptide) {
let fa = calculate_ec(item.proteins(&index.searcher));
let total_protein_count = *fa.counts.get("all").unwrap_or(&0);

for _ in 0..*count {
let ecs = ec_numbers_from_map(&fa.data, ec_store, extra);

final_results.push(EcInformation {
peptide: item.sequence.clone(),
total_protein_count,
ec: ecs,
});
}
}
}

Ok(Json(final_results))
}
2 changes: 1 addition & 1 deletion api/src/controllers/api/pept2funct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ async fn handler(
Ok(result
.into_iter()
.map(|item| {
let fa = calculate_fa(&item.proteins);
let fa = calculate_fa(item.proteins(&index.searcher));

let total_protein_count = *fa.counts.get("all").unwrap_or(&0);
let ecs = ec_numbers_from_map(&fa.data, ec_store, extra);
Expand Down
4 changes: 2 additions & 2 deletions api/src/controllers/api/pept2go.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ use crate::{
generate_handlers
},
helpers::{
fa_helper::calculate_fa,
go_helper::{go_terms_from_map, GoTerms}
},
AppState
};
use crate::helpers::fa_helper::calculate_go;
use crate::helpers::sanitize_peptides;

#[derive(Deserialize)]
Expand Down Expand Up @@ -45,7 +45,7 @@ async fn handler(
Ok(result
.into_iter()
.map(|item| {
let fa = calculate_fa(&item.proteins);
let fa = calculate_go(item.proteins(&index.searcher));

let total_protein_count = *fa.counts.get("all").unwrap_or(&0);
let gos = go_terms_from_map(&fa.data, go_store, extra, domains);
Expand Down
4 changes: 2 additions & 2 deletions api/src/controllers/api/pept2interpro.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ use crate::{
generate_handlers
},
helpers::{
fa_helper::calculate_fa,
interpro_helper::{interpro_entries_from_map, InterproEntries}
},
AppState
};
use crate::helpers::fa_helper::calculate_ipr;
use crate::helpers::sanitize_peptides;

#[derive(Deserialize)]
Expand Down Expand Up @@ -45,7 +45,7 @@ async fn handler(
Ok(result
.into_iter()
.map(|item| {
let fa = calculate_fa(&item.proteins);
let fa = calculate_ipr(item.proteins(&index.searcher));

let total_protein_count = *fa.counts.get("all").unwrap_or(&0);
let iprs = interpro_entries_from_map(&fa.data, interpro_store, extra, domains);
Expand Down
2 changes: 1 addition & 1 deletion api/src/controllers/api/pept2lca.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ async fn handler(
.into_iter()
.filter_map(|item| {
let lca = calculate_lca(
item.proteins.iter().map(|protein| protein.taxon).collect(),
item.proteins(&index.searcher).map(|protein| protein.taxon_id).collect(),
version,
taxon_store,
lineage_store
Expand Down
10 changes: 5 additions & 5 deletions api/src/controllers/api/pept2prot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ async fn handler(

let accession_numbers: Vec<String> = result
.iter()
.flat_map(|item| item.proteins.iter().map(|protein| protein.uniprot_accession.clone()))
.flat_map(|item| item.proteins(&index.searcher).map(|protein| protein.uniprot_id.clone()))
.collect();

let accessions_map = connection.interact(move |conn| get_accessions_map(conn, &accession_numbers)).await??;
Expand All @@ -67,10 +67,10 @@ async fn handler(
Ok(result
.into_iter()
.flat_map(|item| {
item.proteins
item.proteins(&index.searcher)
.into_iter()
.filter_map(|protein| {
let uniprot_entry = accessions_map.get(&protein.uniprot_accession)?;
let uniprot_entry = accessions_map.get(&protein.uniprot_id)?;

if extra {
let taxon_name = taxon_store.get_name(uniprot_entry.taxon_id)?;
Expand All @@ -97,7 +97,7 @@ async fn handler(

Some(ProtInformation::Extra {
peptide: item.sequence.clone(),
uniprot_id: protein.uniprot_accession.clone(),
uniprot_id: protein.uniprot_id.clone(),
protein_name: uniprot_entry.name.clone(),
taxon_id: uniprot_entry.taxon_id,
taxon_name: taxon_name.clone(),
Expand All @@ -109,7 +109,7 @@ async fn handler(
} else {
Some(ProtInformation::Default {
peptide: item.sequence.clone(),
uniprot_id: protein.uniprot_accession.clone(),
uniprot_id: protein.uniprot_id.clone(),
protein_name: uniprot_entry.name.clone(),
taxon_id: uniprot_entry.taxon_id,
protein: uniprot_entry.protein.clone()
Expand Down
2 changes: 1 addition & 1 deletion api/src/controllers/api/pept2taxa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ async fn handler(
Ok(result
.into_iter()
.flat_map(|item| {
item.proteins.iter().map(|protein| protein.taxon).collect::<HashSet<u32>>().into_iter().filter_map(
item.proteins(&index.searcher).map(|protein| protein.taxon_id).collect::<HashSet<u32>>().into_iter().filter_map(
move |taxon| {
let (name, rank, _) = taxon_store.get(taxon)?;
let lineage = match (extra, names) {
Expand Down
4 changes: 2 additions & 2 deletions api/src/controllers/api/peptinfo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,15 @@ async fn handler(
Ok(result
.into_iter()
.filter_map(|item| {
let fa = calculate_fa(&item.proteins);
let fa = calculate_fa(item.proteins(&index.searcher));

let total_protein_count = *fa.counts.get("all").unwrap_or(&0);
let ecs = ec_numbers_from_map(&fa.data, ec_store, extra);
let gos = go_terms_from_map(&fa.data, go_store, extra, domains);
let iprs = interpro_entries_from_map(&fa.data, interpro_store, extra, domains);

let lca = calculate_lca(
item.proteins.iter().map(|protein| protein.taxon).collect(),
item.proteins(&index.searcher).map(|protein| protein.taxon_id).collect(),
version,
taxon_store,
lineage_store
Expand Down
5 changes: 3 additions & 2 deletions api/src/controllers/mpa/pept2data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,19 @@ async fn handler(
.into_iter()
.map(|item| {
let lca = calculate_lca(
item.proteins.iter().map(|protein| protein.taxon).collect(),
item.proteins(&index.searcher).map(|protein| protein.taxon_id).collect(),
LineageVersion::V2,
taxon_store,
lineage_store
);
let lineage = get_lineage_array(lca as u32, LineageVersion::V2, lineage_store);

let fa = calculate_fa(item.proteins(&index.searcher));
DataItem {
sequence: item.sequence,
lca: Some(lca as u32),
lineage,
fa: calculate_fa(&item.proteins)
fa
}
})
.collect()
Expand Down
4 changes: 2 additions & 2 deletions api/src/controllers/mpa/pept2filtered.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,14 @@ async fn handler(
peptides: result
.into_iter()
.filter_map(|item| {
let item_taxa: Vec<u32> = item.proteins.iter().map(|protein| protein.taxon).collect();
let item_taxa: Vec<u32> = item.proteins(&index.searcher).map(|protein| protein.taxon_id).collect();

if item_taxa.is_empty() {
return None;
}

let fa = if include_fa {
Some(calculate_fa(&item.proteins))
Some(calculate_fa(item.proteins(&index.searcher)))
} else {
None
};
Expand Down
11 changes: 5 additions & 6 deletions api/src/controllers/private_api/proteins.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,14 @@ async fn handler(
}

let accession_numbers: Vec<String> =
result[0].proteins.iter().map(|protein| protein.uniprot_accession.clone()).collect();
result[0].proteins(&index.searcher).map(|protein| protein.uniprot_id.clone()).collect();

let accessions_map = connection.interact(move |conn| get_accessions_map(conn, &accession_numbers)).await??;

let taxon_store = datastore.taxon_store();
let lineage_store = datastore.lineage_store();

let taxa = result[0].proteins.iter().map(|protein| protein.taxon).collect();
let taxa = result[0].proteins(&index.searcher).map(|protein| protein.taxon_id).collect();
let lca = calculate_lca(taxa, LineageVersion::V2, taxon_store, lineage_store);

let common_lineage = get_lineage_array(lca as u32, LineageVersion::V2, lineage_store)
Expand All @@ -77,10 +77,9 @@ async fn handler(
lca,
common_lineage,
proteins: result[0]
.proteins
.iter()
.proteins(&index.searcher)
.filter_map(|protein| {
let uniprot_entry = accessions_map.get(&protein.uniprot_accession)?;
let uniprot_entry = accessions_map.get(&protein.uniprot_id)?;

let fa: Vec<&str> = uniprot_entry.fa.split(';').collect();
let ec_numbers =
Expand All @@ -94,7 +93,7 @@ async fn handler(
.collect::<Vec<String>>();

Some(Protein {
uniprot_accession_id: protein.uniprot_accession.clone(),
uniprot_accession_id: protein.uniprot_id.clone(),
name: uniprot_entry.name.clone(),
organism: uniprot_entry.taxon_id,
ec_numbers,
Expand Down
Loading