Skip to content

Commit

Permalink
Merge pull request #103 from lazear/main
Browse files Browse the repository at this point in the history
Add additional search engine, some fixes
  • Loading branch information
RobbinBouwmeester authored Sep 26, 2023
2 parents 584aa2d + 75aa06d commit 5ce715e
Show file tree
Hide file tree
Showing 14 changed files with 42,835 additions and 41 deletions.
3 changes: 2 additions & 1 deletion proteobench/modules/dda_quant/datapoint.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import numpy as np
from dataclasses import asdict, dataclass
from datetime import datetime

Expand Down Expand Up @@ -44,7 +45,7 @@ def calculate_plot_data(self, df):
nr_missing_0 = 0
for spec in species:
f = len(df[df[spec] == True])
sum_s = (df[df[spec] == True]["1|2_expected_ratio_diff"]).sum()
sum_s = np.nan_to_num(df[df[spec] == True]["1|2_expected_ratio_diff"], nan=0, neginf=-1000, posinf=1000).sum()
ratio = sum_s / f
prop_ratio = (f / len(df)) * ratio
prop_ratios.append(prop_ratio)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03 = "LFQ_Orbitrap_DDA_Condition_B_Sam

[species_expected_ratio]
[species_expected_ratio.YEAST]
"1|2" = 0.5
"1|2" = 2.0

[species_expected_ratio.ECOLI]
"1|2" = 1.5
"1|2" = 0.25

[species_expected_ratio.HUMAN]
"1|2" = 1.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,16 @@ LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03 = 2

[species_expected_ratio]
[species_expected_ratio.YEAST]
"1|2" = 0.5
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01|LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01" = 0.5
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02|LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02" = 0.5
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03|LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03" = 0.5
"1|2" = 2.0
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01|LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01" = 2.0
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02|LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02" = 2.0
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03|LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03" = 2.0

[species_expected_ratio.ECOLI]
"1|2" = 1.5
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01|LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01" = 1.5
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02|LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02" = 1.5
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03|LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03" = 1.5
"1|2" = 0.25
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01|LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01" = 0.25
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02|LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02" = 0.25
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03|LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03" = 0.25

[species_expected_ratio.HUMAN]
"1|2" = 1.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ Charge = "Charge"

[species_expected_ratio]
[species_expected_ratio.YEAST]
"1|2" = 0.5
"1|2" = 2.0

[species_expected_ratio.ECOLI]
"1|2" = 1.5
"1|2" = 0.25

[species_expected_ratio.HUMAN]
"1|2" = 1.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ abundance_DDA_Condition_B_Sample_Alpha_03 = 2

[species_expected_ratio]
[species_expected_ratio.YEAST]
"1|2" = 0.5
"1|2" = 2.0

[species_expected_ratio.ECOLI]
"1|2" = 1.5
"1|2" = 0.25

[species_expected_ratio.HUMAN]
"1|2" = 1.0
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
[mapper]
"proteins" = "Proteins"
"peptide" = "Sequence"
"charge" = "Charge"

[replicate_mapper]
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.mzML.gz" = 1
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.mzML.gz" = 1
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.mzML.gz" = 1
"LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML.gz" = 2
"LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.mzML.gz" = 2
"LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.mzML.gz" = 2

[run_mapper]
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.mzML" = "LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01"
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.mzML" = "LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02"
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.mzML" = "LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03"
"LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML" = "LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01"
"LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.mzML" = "LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02"
"LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.mzML" = "LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03"

[species_dict]
"YEAST" = "_YEAST"
"ECOLI" = "_ECOLI"
"HUMAN" = "_HUMAN"

[species_expected_ratio]
[species_expected_ratio.YEAST]
"1|2" = 2.0

[species_expected_ratio.ECOLI]
"1|2" = 0.25

[species_expected_ratio.HUMAN]
"1|2" = 1.0

[general]
contaminant_flag = "Cont_"
decoy_flag = false
min_count_multispec = 1
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ abundance_B_3 = "LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03"

[species_expected_ratio]
[species_expected_ratio.YEAST]
"1|2" = 0.5
"1|2" = 2.0

[species_expected_ratio.ECOLI]
"1|2" = 1.5
"1|2" = 0.25

[species_expected_ratio.HUMAN]
"1|2" = 1.0
Expand Down
2 changes: 2 additions & 0 deletions proteobench/modules/dda_quant/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@ def load_input_file(self, input_csv: str, input_format: str) -> pd.DataFrame:
input_data_frame = pd.read_csv(input_csv, sep="\t", low_memory=False)
elif input_format == "AlphaPept":
input_data_frame = pd.read_csv(input_csv, low_memory=False)
elif input_format == "Sage":
input_data_frame = pd.read_csv(input_csv, sep='\t', low_memory=False)
elif input_format == "MSFragger":
input_data_frame = pd.read_csv(input_csv, low_memory=False, sep="\t")
elif input_format == "WOMBAT":
Expand Down
2 changes: 2 additions & 0 deletions proteobench/modules/dda_quant/parse_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"MSFragger" : os.path.join(PARSE_SETTINGS_DIR, 'parse_settings_msfragger.toml'),
"Proline" : os.path.join(PARSE_SETTINGS_DIR, 'parse_settings_proline.toml'),
"AlphaPept" : os.path.join(PARSE_SETTINGS_DIR, 'parse_settings_alphapept.toml'),
"Sage" : os.path.join(PARSE_SETTINGS_DIR, 'parse_settings_sage.toml'),
"Custom" : os.path.join(PARSE_SETTINGS_DIR, 'parse_settings_custom.toml')
}

Expand All @@ -28,6 +29,7 @@
"MSFragger",
"Proline",
"WOMBAT",
"Sage",
"Custom")

LOCAL_DEVELOPMENT = False
Expand Down
34 changes: 19 additions & 15 deletions proteobench/modules/dda_quant/plot.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
import pandas as pd
import plotly.figure_factory as ff
import plotly.graph_objects as go
import plotly.express as px
import streamlit as st
from streamlit_plotly_events import plotly_events

Expand All @@ -11,18 +11,21 @@ class PlotDataPoint:
def plot_bench(self, result_df: pd.DataFrame) -> go.Figure:
"""Plot results with Plotly Express."""

hist_data = [
np.array(result_df[result_df["YEAST"] == True]["1|2_ratio"]),
np.array(result_df[result_df["HUMAN"] == True]["1|2_ratio"]),
np.array(result_df[result_df["ECOLI"] == True]["1|2_ratio"]),
]
group_labels = [
"YEAST",
"HUMAN",
"ECOLI",
]

fig = ff.create_distplot(hist_data, group_labels, show_hist=False)
# Remove any precursors not arising from a known organism... contaminants?
result_df = result_df[result_df[["YEAST", "ECOLI", "HUMAN"]].any(axis=1)]
result_df["kind"] = result_df[["YEAST", "ECOLI", "HUMAN"]].apply(
lambda x: ["YEAST", "ECOLI", "HUMAN"][np.argmax(x)], axis=1
)
fig = px.histogram(
result_df,
x=np.log2(result_df["1|2_ratio"]),
color="kind",
marginal="rug",
histnorm="probability density",
barmode="overlay",
opacity=0.7,
nbins=100
)

fig.update_layout(
width=700,
Expand All @@ -39,9 +42,9 @@ def plot_bench(self, result_df: pd.DataFrame) -> go.Figure:
gridwidth=2,
),
)
fig.update_xaxes(range=[0, 4])
fig.update_xaxes(range=[-4, 4])
fig.update_xaxes(showgrid=True, gridcolor="lightgray", gridwidth=1)
# fig.update_yaxes(showgrid=True, gridcolor="lightgray", gridwidth=1)
fig.update_yaxes(showgrid=True, gridcolor="lightgray", gridwidth=1)

return fig

Expand All @@ -66,6 +69,7 @@ def plot_metric(self, benchmark_metrics_df: pd.DataFrame) -> go.Figure:
"MSFragger": "#ff7f0e",
"WOMBAT": "#7f7f7f",
"Proline": "#d62728",
"Sage": "#f74c00",
"Custom": "#9467bd",
}

Expand Down
Loading

0 comments on commit 5ce715e

Please sign in to comment.