Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Retrieve protein data in batches from database #48

Merged
merged 2 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,4 @@ Follow these steps in order to easily work on the Unipept API in the devcontaine

* You first have to build the binaries by running `cargo build --release`.
* Then, you should start the mariadb server: `sudo service mariadb start`
* Finally, the Unipept API can be started with this command: `./target/release/unipept-api -i "/unipept-index-data" -d "mysql://root:[email protected]:3306/unipept" -p 80`.
* Finally, the Unipept API can be started with this command: `./target/release/unipept-api -i "/unipept-index-data" -d "mysql://root:root_pass@localhost:3306/unipept" -p 80`.
1 change: 1 addition & 0 deletions api/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ urlencoding = "2.1.3"
http = "1.1.0"
tower-layer = "0.3.2"
tower-service = "0.3.2"
itertools = "0.13.0"
4 changes: 3 additions & 1 deletion api/src/controllers/api/pept2prot.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use std::collections::HashSet;
use axum::{extract::State, Json};
use itertools::Itertools;
use database::get_accessions_map;
use serde::{Deserialize, Serialize};

Expand Down Expand Up @@ -55,7 +57,7 @@ async fn handler(

let result = index.analyse(&input, equate_il, None);

let accession_numbers: Vec<String> = result
let accession_numbers: HashSet<String> = result
.iter()
.flat_map(|item| item.proteins.iter().map(|protein| protein.uniprot_accession.clone()))
.collect();
Expand Down
2 changes: 2 additions & 0 deletions api/src/controllers/api/protinfo.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::collections::HashSet;
use axum::{extract::State, Json};
use database::get_accessions;
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -58,6 +59,7 @@ async fn handler(
version: LineageVersion
) -> Result<Vec<ProtInformation>, ApiError> {
let input = sanitize_proteins(input);
let input = HashSet::from_iter(input.into_iter());

let connection = database.get_conn().await?;

Expand Down
3 changes: 2 additions & 1 deletion api/src/controllers/private_api/proteins.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::collections::HashSet;
use axum::{extract::State, Json};
use database::get_accessions_map;
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -57,7 +58,7 @@ async fn handler(
return Ok(ProteinInformation::default());
}

let accession_numbers: Vec<String> =
let accession_numbers: HashSet<String> =
result[0].proteins.iter().map(|protein| protein.uniprot_accession.clone()).collect();

let accessions_map = connection.interact(move |conn| get_accessions_map(conn, &accession_numbers)).await??;
Expand Down
1 change: 1 addition & 0 deletions database/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ edition = "2021"
[dependencies]
deadpool-diesel = { version = "0.4.1", features = ["mysql"] }
diesel = { version = "2", features = ["mysql"] }
itertools = "0.13.0"
thiserror = "1.0"
22 changes: 18 additions & 4 deletions database/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
use std::{collections::HashMap, ops::Deref};

use std::collections::HashSet;
use deadpool_diesel::mysql::{Manager, Object, Pool};
pub use deadpool_diesel::InteractError;
use diesel::{prelude::*, MysqlConnection, QueryDsl};
pub use errors::DatabaseError;
use models::UniprotEntry;
use itertools::Itertools;

mod errors;
mod models;
Expand Down Expand Up @@ -36,16 +37,29 @@ impl Deref for Database {

pub fn get_accessions(
conn: &mut MysqlConnection,
accessions: &Vec<String>
accessions: &HashSet<String>
) -> Result<Vec<UniprotEntry>, DatabaseError> {
use schema::uniprot_entries::dsl::*;

Ok(uniprot_entries.filter(uniprot_accession_number.eq_any(accessions)).load(conn)?)
let mut result: Vec<UniprotEntry> = Vec::new();

accessions
.into_iter()
.chunks(1000)
.into_iter()
.for_each(|chunk| {
let data = uniprot_entries.filter(uniprot_accession_number.eq_any(chunk)).load(conn);
if data.is_ok() {
result.extend(data.unwrap());
}
});

Ok(result)
}

pub fn get_accessions_map(
conn: &mut MysqlConnection,
accessions: &Vec<String>
accessions: &HashSet<String>
) -> Result<HashMap<String, UniprotEntry>, DatabaseError> {
Ok(get_accessions(conn, accessions)?
.into_iter()
Expand Down