Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix requested changes from Pieter for Rust-only version #42

Merged
merged 2 commits into from
Mar 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions scripts/build_database.sh
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ END
# This function removes all temporary files that have been created by this script.
clean() {
# Clean contents of temporary directory
# rm -rf "$TEMP_DIR/$UNIPEPT_TEMP_CONSTANT"
rm -rf "$TEMP_DIR/$UNIPEPT_TEMP_CONSTANT"
export TMPDIR="$OLD_TMPDIR"
}

Expand Down Expand Up @@ -365,7 +365,8 @@ download_and_convert_all_sources() {
DB_TYPES_ARRAY=($DB_TYPES)
DB_SOURCES_ARRAY=($DB_SOURCES)

IFS="$OLDIFS"
# Set IFS to newline to properly split the $CHUNKS variable for folders with newlines
IFS=$'\n'

while [[ "$IDX" -ne "${#DB_TYPES_ARRAY}" ]] && [[ -n $(echo "${DB_TYPES_ARRAY[$IDX]}" | sed "s/\s//g") ]]
do
Expand Down Expand Up @@ -462,6 +463,8 @@ download_and_convert_all_sources() {

IDX=$((IDX + 1))
done

IFS="$OLDIFS"
}

filter_sources_by_taxa() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ use anyhow::{Context, Result};
use clap::Parser;
use unipept_database::dat_parser::uniprot_dat_parser;
use unipept_database::dat_parser::utils::write_header;
use unipept_database::uniprot::UniprotType;

use unipept_database::utils::files::open_sin;

Expand All @@ -24,8 +23,8 @@ fn main() -> Result<()> {

#[derive(Parser, Debug)]
struct Cli {
#[clap(value_enum, short = 't', long, default_value_t = UniprotType::Swissprot)]
db_type: UniprotType,
#[clap(short = 't', long, default_value = "swissprot")]
db_type: String,
#[clap(long, default_value_t = 0)]
threads: usize,
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use std::num::NonZeroUsize;
use anyhow::{Context, Result};
use clap::Parser;
use smartstring::{LazyCompact, SmartString};
use unipept_database::uniprot::UniprotType;
use uniprot::uniprot::{SequentialParser, ThreadedParser};

use unipept_database::utils::files::open_sin;
Expand Down Expand Up @@ -50,8 +49,8 @@ type SmartStr = SmartString<LazyCompact>;
// Parse a Uniprot XML file and convert it into a TSV-file
#[derive(Parser, Debug)]
struct Cli {
#[clap(value_enum, short = 't', long, default_value_t = UniprotType::Swissprot)]
uniprot_type: UniprotType,
#[clap(short = 't', long, default_value = "swissprot")]
uniprot_type: String,
#[clap(long, default_value_t = 0)]
threads: u32,
#[clap(short, long, default_value_t = false)]
Expand Down Expand Up @@ -123,7 +122,7 @@ fn parse_name(entry: &uniprot::uniprot::Entry) -> SmartStr {
}

/// Write a single UniProt entry to stdout
fn write_entry(entry: &uniprot::uniprot::Entry, db_type: &UniprotType, verbose: bool) {
fn write_entry(entry: &uniprot::uniprot::Entry, db_type: &str, verbose: bool) {
let accession_number: SmartStr = entry.accessions[0].clone();
let sequence: SmartStr = entry.sequence.value.clone();

Expand Down Expand Up @@ -165,7 +164,7 @@ fn write_entry(entry: &uniprot::uniprot::Entry, db_type: &UniprotType, verbose:
SmartStr::from(ec_references.join(";")),
SmartStr::from(go_references.join(";")),
SmartStr::from(ip_references.join(";")),
SmartStr::from(db_type.to_str()),
SmartStr::from(db_type),
taxon_id,
];

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use crate::uniprot::UniprotType;
use anyhow::Context;
use std::collections::HashSet;

Expand Down Expand Up @@ -56,7 +55,7 @@ impl UniProtDATEntry {
}

/// Write an entry to stdout
pub fn write(&self, db_type: &UniprotType) {
pub fn write(&self, db_type: &str) {
if self.name.is_empty() {
eprintln!(
"Could not find a name for entry AC-{}",
Expand All @@ -73,7 +72,7 @@ impl UniProtDATEntry {
self.ec_references.join(";"),
self.go_references.join(";"),
self.ip_references.join(";"),
db_type.to_str(),
db_type,
self.taxon_id
)
}
Expand Down
1 change: 0 additions & 1 deletion scripts/helper_scripts/unipept-database-rs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,4 @@ pub mod calculate_lcas;
pub mod dat_parser;
pub mod taxons_lineages;
pub mod taxons_uniprots_tables;
pub mod uniprot;
pub mod utils;
15 changes: 0 additions & 15 deletions scripts/helper_scripts/unipept-database-rs/src/uniprot/mod.rs

This file was deleted.

Loading