Skip to content

Commit

Permalink
Merge pull request #48 from unipept/fix/proteins_in_batches
Browse files Browse the repository at this point in the history
Retrieve protein data in batches from database
  • Loading branch information
pverscha authored Sep 19, 2024
2 parents be6416f + 0131986 commit 97d6385
Show file tree
Hide file tree
Showing 8 changed files with 39 additions and 7 deletions.
11 changes: 11 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,4 @@ Follow these steps in order to easily work on the Unipept API in the devcontaine

* You first have to build the binaries by running `cargo build --release`.
* Then, you should start the mariadb server: `sudo service mariadb start`
* Finally, the Unipept API can be started with this command: `./target/release/unipept-api -i "/unipept-index-data" -d "mysql://root:[email protected]:3306/unipept" -p 80`.
* Finally, the Unipept API can be started with this command: `./target/release/unipept-api -i "/unipept-index-data" -d "mysql://root:root_pass@localhost:3306/unipept" -p 80`.
1 change: 1 addition & 0 deletions api/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ urlencoding = "2.1.3"
http = "1.1.0"
tower-layer = "0.3.2"
tower-service = "0.3.2"
itertools = "0.13.0"
4 changes: 3 additions & 1 deletion api/src/controllers/api/pept2prot.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use std::collections::HashSet;
use axum::{extract::State, Json};
use itertools::Itertools;
use database::get_accessions_map;
use serde::{Deserialize, Serialize};

Expand Down Expand Up @@ -55,7 +57,7 @@ async fn handler(

let result = index.analyse(&input, equate_il, None);

let accession_numbers: Vec<String> = result
let accession_numbers: HashSet<String> = result
.iter()
.flat_map(|item| item.proteins.iter().map(|protein| protein.uniprot_accession.clone()))
.collect();
Expand Down
2 changes: 2 additions & 0 deletions api/src/controllers/api/protinfo.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::collections::HashSet;
use axum::{extract::State, Json};
use database::get_accessions;
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -58,6 +59,7 @@ async fn handler(
version: LineageVersion
) -> Result<Vec<ProtInformation>, ApiError> {
let input = sanitize_proteins(input);
let input = HashSet::from_iter(input.into_iter());

let connection = database.get_conn().await?;

Expand Down
3 changes: 2 additions & 1 deletion api/src/controllers/private_api/proteins.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::collections::HashSet;
use axum::{extract::State, Json};
use database::get_accessions_map;
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -57,7 +58,7 @@ async fn handler(
return Ok(ProteinInformation::default());
}

let accession_numbers: Vec<String> =
let accession_numbers: HashSet<String> =
result[0].proteins.iter().map(|protein| protein.uniprot_accession.clone()).collect();

let accessions_map = connection.interact(move |conn| get_accessions_map(conn, &accession_numbers)).await??;
Expand Down
1 change: 1 addition & 0 deletions database/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ edition = "2021"
[dependencies]
deadpool-diesel = { version = "0.4.1", features = ["mysql"] }
diesel = { version = "2", features = ["mysql"] }
itertools = "0.13.0"
thiserror = "1.0"
22 changes: 18 additions & 4 deletions database/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
use std::{collections::HashMap, ops::Deref};

use std::collections::HashSet;
use deadpool_diesel::mysql::{Manager, Object, Pool};
pub use deadpool_diesel::InteractError;
use diesel::{prelude::*, MysqlConnection, QueryDsl};
pub use errors::DatabaseError;
use models::UniprotEntry;
use itertools::Itertools;

mod errors;
mod models;
Expand Down Expand Up @@ -36,16 +37,29 @@ impl Deref for Database {

pub fn get_accessions(
conn: &mut MysqlConnection,
accessions: &Vec<String>
accessions: &HashSet<String>
) -> Result<Vec<UniprotEntry>, DatabaseError> {
use schema::uniprot_entries::dsl::*;

Ok(uniprot_entries.filter(uniprot_accession_number.eq_any(accessions)).load(conn)?)
let mut result: Vec<UniprotEntry> = Vec::new();

accessions
.into_iter()
.chunks(1000)
.into_iter()
.for_each(|chunk| {
let data = uniprot_entries.filter(uniprot_accession_number.eq_any(chunk)).load(conn);
if data.is_ok() {
result.extend(data.unwrap());
}
});

Ok(result)
}

pub fn get_accessions_map(
conn: &mut MysqlConnection,
accessions: &Vec<String>
accessions: &HashSet<String>
) -> Result<HashMap<String, UniprotEntry>, DatabaseError> {
Ok(get_accessions(conn, accessions)?
.into_iter()
Expand Down

0 comments on commit 97d6385

Please sign in to comment.