Skip to content

Commit

Permalink
Finished MZTab parsing #19
Browse files Browse the repository at this point in the history
  • Loading branch information
douweschulte committed Oct 24, 2024
1 parent ffcef71 commit 1b54e65
Show file tree
Hide file tree
Showing 8 changed files with 393 additions and 195 deletions.
133 changes: 75 additions & 58 deletions rustyms/src/error/custom_error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,23 @@ use serde::*;
use std::error;
use std::fmt;

/// An error
/// An error. Stored as a pointer to a structure on the heap to prevent large sizes which could be
/// detremental to performance for the happy path.
#[derive(Serialize, Deserialize, PartialEq, Clone, Eq, Hash)]
pub struct CustomError {
content: Box<InnerError>,
}

#[derive(Serialize, Deserialize, PartialEq, Clone, Eq, Hash)]
struct InnerError {
/// The level of the error, defining how it should be handled
warning: bool,
/// A short description of the error, generally used as title line
short_description: String,
/// A longer description of the error, presented below the context to give more information and helpful feedback
long_description: String,
/// Possible suggestion(s) for the indicated text
pub(crate) suggestions: Vec<String>,
suggestions: Vec<String>,
/// The context, in the most general sense this produces output which leads the user to the right place in the code or file
context: Context,
/// Underlying errors
Expand All @@ -35,32 +41,14 @@ impl CustomError {
context: Context,
) -> Self {
Self {
warning: false,
short_description: short_desc.to_string(),
long_description: long_desc.to_string(),
suggestions: Vec::new(),
context,
underlying_errors: Vec::new(),
}
}
/// Create a new `CustomError`
///
/// ## Arguments
/// * `short_desc` - A short description of the error, generally used as title line
/// * `long_desc` - A longer description of the error, presented below the context to give more information and helpful feedback
/// * `context` - The context, in the most general sense this produces output which leads the user to the right place in the code or file
pub(crate) const fn const_error(
short_desc: String,
long_desc: String,
context: Context,
) -> Self {
Self {
warning: false,
short_description: short_desc,
long_description: long_desc,
suggestions: Vec::new(),
context,
underlying_errors: Vec::new(),
content: Box::new(InnerError {
warning: false,
short_description: short_desc.to_string(),
long_description: long_desc.to_string(),
suggestions: Vec::new(),
context,
underlying_errors: Vec::new(),
}),
}
}
/// Create a new `CustomError`
Expand All @@ -75,45 +63,54 @@ impl CustomError {
context: Context,
) -> Self {
Self {
warning: true,
short_description: short_desc.to_string(),
long_description: long_desc.to_string(),
suggestions: Vec::new(),
context,
underlying_errors: Vec::new(),
content: Box::new(InnerError {
warning: true,
short_description: short_desc.to_string(),
long_description: long_desc.to_string(),
suggestions: Vec::new(),
context,
underlying_errors: Vec::new(),
}),
}
}

/// The level of the error
pub const fn level(&self) -> &str {
if self.warning {
if self.content.warning {
"warning"
} else {
"error"
}
}

/// The suggestions
pub fn suggestions(&self) -> &[String] {
&self.content.suggestions
}

/// Tests if this errors is a warning
pub const fn is_warning(&self) -> bool {
self.warning
self.content.warning
}

/// Gives the short description or title for this error
pub fn short_description(&self) -> &str {
&self.short_description
&self.content.short_description
}

/// Gives the long description for this error
pub fn long_description(&self) -> &str {
&self.long_description
&self.content.long_description
}

/// Create a copy of the error with a new long description
#[must_use]
pub fn with_long_description(&self, long_desc: impl std::string::ToString) -> Self {
Self {
long_description: long_desc.to_string(),
..self.clone()
content: Box::new(InnerError {
long_description: long_desc.to_string(),
..(*self.content).clone()
}),
}
}

Expand All @@ -124,41 +121,53 @@ impl CustomError {
suggestions: impl IntoIterator<Item = impl std::string::ToString>,
) -> Self {
Self {
suggestions: suggestions.into_iter().map(|s| s.to_string()).collect(),
..self.clone()
content: Box::new(InnerError {
suggestions: suggestions.into_iter().map(|s| s.to_string()).collect(),
..(*self.content).clone()
}),
}
}

/// Create a copy of the error with a new context
#[must_use]
pub fn with_context(&self, context: Context) -> Self {
Self {
context,
..self.clone()
content: Box::new(InnerError {
context,
..(*self.content).clone()
}),
}
}

/// Create a copy of the error with the given underlying errors
#[must_use]
pub fn with_underlying_errors(&self, underlying_errors: Vec<Self>) -> Self {
Self {
underlying_errors,
..self.clone()
content: Box::new(InnerError {
underlying_errors,
..(*self.content).clone()
}),
}
}

/// Overwrite the line number with the given number, if applicable
#[must_use]
pub fn overwrite_line_number(&self, line_number: usize) -> Self {
Self {
context: self.context.clone().overwrite_line_number(line_number),
..self.clone()
content: Box::new(InnerError {
context: self
.content
.context
.clone()
.overwrite_line_number(line_number),
..(*self.content).clone()
}),
}
}

/// Gives the context for this error
pub const fn context(&self) -> &Context {
&self.context
&self.content.context
}
}

Expand All @@ -168,23 +177,31 @@ impl fmt::Debug for CustomError {
f,
"{}: {}{}\n{}",
self.level(),
self.short_description,
self.context,
self.long_description
self.content.short_description,
self.content.context,
self.content.long_description
)?;
match self.suggestions.len() {
match self.content.suggestions.len() {
0 => Ok(()),
1 => writeln!(f, "Did you mean: {}?", self.suggestions[0]),
_ => writeln!(f, "Did you mean any of: {}?", self.suggestions.join(", ")),
1 => writeln!(f, "Did you mean: {}?", self.content.suggestions[0]),
_ => writeln!(
f,
"Did you mean any of: {}?",
self.content.suggestions.join(", ")
),
}
.unwrap();
match self.underlying_errors.len() {
match self.content.underlying_errors.len() {
0 => Ok(()),
1 => writeln!(f, "Underlying error:\n{}", self.underlying_errors[0]),
1 => writeln!(
f,
"Underlying error:\n{}",
self.content.underlying_errors[0]
),
_ => writeln!(
f,
"Underlying errors:\n{}",
self.underlying_errors.iter().join("\n")
self.content.underlying_errors.iter().join("\n")
),
}
}
Expand Down
10 changes: 8 additions & 2 deletions rustyms/src/identification/general.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,16 @@ use super::{
error::{Context, CustomError},
ontologies::CustomDatabase,
FastaData, IdentifiedPeptide, IdentifiedPeptideIter, IdentifiedPeptideSource, MSFraggerData,
MaxQuantData, NovorData, OpairData, PeaksData, SageData,
MZTabData, MaxQuantData, NovorData, OpairData, PeaksData, SageData,
};

// TODO:
// * Merge multiple annotations for the same spectrum (e.g. all candidates peaks export, take care not to lose info on chimeric spectra)
// * Merge identical (or similar?) peptide sequences (for faster processing)

/// Open the selected path and automatically determine the file type.
/// Open the selected path and automatically determine the file type. It will uncompress gzipped
/// files automatically.
///
/// # Errors
/// It errors if the file type could not be determined or if opening the file errors.
pub fn open_identified_peptides_file<'a>(
Expand Down Expand Up @@ -64,6 +66,10 @@ pub fn open_identified_peptides_file<'a>(
Some("txt") => {
MaxQuantData::parse_file(path, custom_database).map(IdentifiedPeptideIter::into_box)
}
Some("mztab") => MZTabData::parse_file(path, custom_database).map(|peptides| {
Box::new(peptides.into_iter().map(|p| p.map(Into::into)))
as Box<dyn Iterator<Item = Result<IdentifiedPeptide, CustomError>> + 'a>
}),
_ => Err(CustomError::error(
"Unknown extension",
"Use CSV, TSV, TXT, PSMTSV, or Fasta, or any of these as a gzipped file (eg csv.gz).",
Expand Down
17 changes: 14 additions & 3 deletions rustyms/src/identification/identified_peptide.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,22 +124,33 @@ impl IdentifiedPeptide {
}
MetaData::MaxQuant(MaxQuantData { scan_number, .. }) => Some(scan_number.clone()),
MetaData::MSFragger(MSFraggerData { spectrum, .. }) => Some(vec![spectrum.scan.0]),
MetaData::Sage(_) | MetaData::Fasta(_) | MetaData::None | MetaData::MZTab(_) => None,
MetaData::MZTab(MZTabData { spectra_ref, .. }) => Some(
spectra_ref
.iter()
.filter_map(|(_, _, id, _)| id.index())
.collect(),
),
MetaData::Sage(_) | MetaData::Fasta(_) | MetaData::None => None,
}
}

/// The native ids of the spectrum for this identified peptide, if known.
pub fn spectrum_native_ids(&self) -> Option<Vec<String>> {
match &self.metadata {
MetaData::Sage(SageData { native_id, .. }) => Some(vec![native_id.clone()]),
MetaData::MZTab(MZTabData { spectra_ref, .. }) => Some(
spectra_ref
.iter()
.filter_map(|(_, _, id, _)| id.native().map(ToString::to_string))
.collect(),
),
MetaData::MaxQuant(_)
| MetaData::Opair(_)
| MetaData::Novor(_)
| MetaData::Peaks(_)
| MetaData::Fasta(_)
| MetaData::MSFragger(_)
| MetaData::None
| MetaData::MZTab(_) => None,
| MetaData::None => None,
}
}

Expand Down
Loading

0 comments on commit 1b54e65

Please sign in to comment.