Skip to content

Commit

Permalink
Merge branch 'main' into parse_settings
Browse files Browse the repository at this point in the history
  • Loading branch information
Henry committed Oct 12, 2023
2 parents 532157f + fb53dc1 commit 9b69350
Show file tree
Hide file tree
Showing 17 changed files with 42,985 additions and 196 deletions.
9 changes: 6 additions & 3 deletions proteobench/modules/dda_quant/datapoint.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import json
import numpy as np
from dataclasses import asdict, dataclass
from datetime import datetime


@dataclass
class Datapoint:
"""Data used to stored the"""

# TODO add threshold value used for presence ion/peptidoform
id: str = None
search_engine: str = None
software_version: int = 0
Expand Down Expand Up @@ -44,13 +45,14 @@ def calculate_plot_data(self, df):
nr_missing_0 = 0
for spec in species:
f = len(df[df[spec] == True])
sum_s = (df[df[spec] == True]["1|2_expected_ratio_diff"]).sum()
sum_s = np.nan_to_num(df[df[spec] == True]["1|2_expected_ratio_diff"], nan=0, neginf=-1000, posinf=1000).sum()
ratio = sum_s / f
prop_ratio = (f / len(df)) * ratio
prop_ratios.append(prop_ratio)
sum_ratios += prop_ratio
nr_missing_0 += f


# TODO rename/document code
self.weighted_sum = round(sum_ratios, ndigits=3)
self.nr_prec = len(df)

Expand All @@ -64,6 +66,7 @@ def generate_id(self):
)
print(self.id)

# TODO, not used? Can be removed?
def dump_json_object(self, file_name):
f = open(file_name, "a")
f.write(json.dumps(asdict(self)))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[species_expected_ratio]
[species_expected_ratio.YEAST]
"1|2" = 2.0

[species_expected_ratio.ECOLI]
"1|2" = 0.25

[species_expected_ratio.HUMAN]
"1|2" = 1.0

[general]
min_count_multispec = 1
Original file line number Diff line number Diff line change
Expand Up @@ -16,29 +16,18 @@ LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02 = 2
LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03 = 2

[run_mapper]
LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01 = "LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01"
LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02 = "LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02"
LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03 = "LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03"
LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01 = "LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01"
LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02 = "LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02"
LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03 = "LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03"
LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01 = "Condition_A_Sample_Alpha_01"
LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02 = "Condition_A_Sample_Alpha_02"
LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03 = "Condition_A_Sample_Alpha_03"
LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01 = "Condition_B_Sample_Alpha_01"
LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02 = "Condition_B_Sample_Alpha_02"
LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03 = "Condition_B_Sample_Alpha_03"

[species_dict]
"YEAST" = "_YEAST"
"ECOLI" = "_ECOLI"
"HUMAN" = "_HUMAN"

[species_expected_ratio]
[species_expected_ratio.YEAST]
"1|2" = 0.5

[species_expected_ratio.ECOLI]
"1|2" = 1.5

[species_expected_ratio.HUMAN]
"1|2" = 1.0
[species_mapper]
"_YEAST" = "YEAST"
"_ECOLI" = "ECOLI"
"_HUMAN" = "HUMAN"

[general]
contaminant_flag = "Cont_"
decoy_flag = true
min_count_multispec = 1
Original file line number Diff line number Diff line change
Expand Up @@ -13,29 +13,18 @@ LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02 = 2
LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03 = 2

[run_mapper]
LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01 = "LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01"
LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02 = "LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02"
LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03 = "LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03"
LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01 = "LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01"
LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02 = "LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02"
LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03 = "LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03"
LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01 = "Condition_A_Sample_Alpha_01"
LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02 = "Condition_A_Sample_Alpha_02"
LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03 = "Condition_A_Sample_Alpha_03"
LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01 = "Condition_B_Sample_Alpha_01"
LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02 = "Condition_B_Sample_Alpha_02"
LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03 = "Condition_B_Sample_Alpha_03"

[species_dict]
"YEAST" = "_YEAST"
"ECOLI" = "_ECOLI"
"HUMAN" = "_HUMAN"

[species_expected_ratio]
[species_expected_ratio.YEAST]
"1|2" = 0.5

[species_expected_ratio.ECOLI]
"1|2" = 1.5

[species_expected_ratio.HUMAN]
"1|2" = 1.0
[species_mapper]
"_YEAST" = "YEAST"
"_ECOLI" = "ECOLI"
"_HUMAN" = "HUMAN"

[general]
contaminant_flag = "Cont_"
decoy_flag = true
min_count_multispec = 1
Original file line number Diff line number Diff line change
Expand Up @@ -14,38 +14,18 @@ LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02 = 2
LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03 = 2

[run_mapper]
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01" = "LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01"
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02" = "LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02"
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03" = "LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03"
"LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01" = "LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01"
"LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02" = "LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02"
"LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03" = "LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03"
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01" = "Condition_A_Sample_Alpha_01"
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02" = "Condition_A_Sample_Alpha_02"
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03" = "Condition_A_Sample_Alpha_03"
"LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01" = "Condition_B_Sample_Alpha_01"
"LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02" = "Condition_B_Sample_Alpha_02"
"LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03" = "Condition_B_Sample_Alpha_03"

[species_dict]
"YEAST" = "_YEAST"
"ECOLI" = "_ECOLI"
"HUMAN" = "_HUMAN"

[species_expected_ratio]
[species_expected_ratio.YEAST]
"1|2" = 0.5
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01|LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01" = 0.5
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02|LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02" = 0.5
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03|LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03" = 0.5

[species_expected_ratio.ECOLI]
"1|2" = 1.5
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01|LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01" = 1.5
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02|LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02" = 1.5
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03|LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03" = 1.5

[species_expected_ratio.HUMAN]
"1|2" = 1.0
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01|LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01" = 1.0
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02|LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02" = 1.0
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03|LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03" = 1.0
[species_mapper]
"_YEAST" = "YEAST"
"_ECOLI" = "ECOLI"
"_HUMAN" = "HUMAN"

[general]
contaminant_flag = "Cont_"
decoy_flag = true
min_count_multispec = 1
Original file line number Diff line number Diff line change
Expand Up @@ -13,30 +13,19 @@ Charge = "Charge"
"B_3 Intensity" = 2

[run_mapper]
"A_1 Intensity" = "LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01"
"A_2 Intensity" = "LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02"
"A_3 Intensity" = "LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03"
"B_1 Intensity" = "LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01"
"B_2 Intensity" = "LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02"
"B_3 Intensity" = "LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03"
"A_1 Intensity" = "Condition_A_Sample_Alpha_01"
"A_2 Intensity" = "Condition_A_Sample_Alpha_02"
"A_3 Intensity" = "Condition_A_Sample_Alpha_03"
"B_1 Intensity" = "Condition_B_Sample_Alpha_01"
"B_2 Intensity" = "Condition_B_Sample_Alpha_02"
"B_3 Intensity" = "Condition_B_Sample_Alpha_03"


[species_dict]
"YEAST" = "_YEAST"
"ECOLI" = "_ECOLI"
"HUMAN" = "_HUMAN"

[species_expected_ratio]
[species_expected_ratio.YEAST]
"1|2" = 0.5

[species_expected_ratio.ECOLI]
"1|2" = 1.5

[species_expected_ratio.HUMAN]
"1|2" = 1.0
[species_mapper]
"_YEAST" = "YEAST"
"_ECOLI" = "ECOLI"
"_HUMAN" = "HUMAN"

[general]
contaminant_flag = "Cont_"
decoy_flag = false
min_count_multispec = 1
Original file line number Diff line number Diff line change
Expand Up @@ -14,29 +14,18 @@ abundance_DDA_Condition_B_Sample_Alpha_02 = 2
abundance_DDA_Condition_B_Sample_Alpha_03 = 2

[run_mapper]
"abundance_DDA_Condition_A_Sample_Alpha_01" = "LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01"
"abundance_DDA_Condition_A_Sample_Alpha_02" = "LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02"
"abundance_DDA_Condition_A_Sample_Alpha_03" = "LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03"
"abundance_DDA_Condition_B_Sample_Alpha_01" = "LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01"
"abundance_DDA_Condition_B_Sample_Alpha_02" = "LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02"
"abundance_DDA_Condition_B_Sample_Alpha_03" = "LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03"
"abundance_DDA_Condition_A_Sample_Alpha_01" = "Condition_A_Sample_Alpha_01"
"abundance_DDA_Condition_A_Sample_Alpha_02" = "Condition_A_Sample_Alpha_02"
"abundance_DDA_Condition_A_Sample_Alpha_03" = "Condition_A_Sample_Alpha_03"
"abundance_DDA_Condition_B_Sample_Alpha_01" = "Condition_B_Sample_Alpha_01"
"abundance_DDA_Condition_B_Sample_Alpha_02" = "Condition_B_Sample_Alpha_02"
"abundance_DDA_Condition_B_Sample_Alpha_03" = "Condition_B_Sample_Alpha_03"

[species_dict]
"YEAST" = "_YEAST"
"ECOLI" = "_ECOLI"
"HUMAN" = "_HUMAN"

[species_expected_ratio]
[species_expected_ratio.YEAST]
"1|2" = 0.5

[species_expected_ratio.ECOLI]
"1|2" = 1.5

[species_expected_ratio.HUMAN]
"1|2" = 1.0
[species_mapper]
"_YEAST" = "YEAST"
"_ECOLI" = "ECOLI"
"_HUMAN" = "HUMAN"

[general]
contaminant_flag = "Cont_"
decoy_flag = true
min_count_multispec = 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
[mapper]
"proteins" = "Proteins"
"peptide" = "Sequence"
"charge" = "Charge"

[replicate_mapper]
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.mzML.gz" = 1
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.mzML.gz" = 1
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.mzML.gz" = 1
"LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML.gz" = 2
"LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.mzML.gz" = 2
"LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.mzML.gz" = 2

[run_mapper]
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.mzML" = "Condition_A_Sample_Alpha_01"
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.mzML" = "Condition_A_Sample_Alpha_02"
"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.mzML" = "Condition_A_Sample_Alpha_03"
"LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML" = "Condition_B_Sample_Alpha_01"
"LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.mzML" = "Condition_B_Sample_Alpha_02"
"LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.mzML" = "Condition_B_Sample_Alpha_03"

[species_mapper]
"_YEAST" = "YEAST"
"_ECOLI" = "ECOLI"
"_HUMAN" = "HUMAN"

[general]
contaminant_flag = "Cont_"
decoy_flag = false
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
protein_group = "Proteins"
"modified_peptide" = "Modified sequence"


[replicate_mapper]
abundance_A_1 = 1
abundance_A_2 = 1
Expand All @@ -12,29 +11,18 @@ abundance_B_2 = 2
abundance_B_3 = 2

[run_mapper]
abundance_A_1 = "LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01"
abundance_A_2 = "LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02"
abundance_A_3 = "LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03"
abundance_B_1 = "LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01"
abundance_B_2 = "LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02"
abundance_B_3 = "LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03"

[species_dict]
"YEAST" = "_YEAST"
"ECOLI" = "_ECOLI"
"HUMAN" = "_HUMAN"

[species_expected_ratio]
[species_expected_ratio.YEAST]
"1|2" = 0.5
abundance_A_1 = "Condition_A_Sample_Alpha_01"
abundance_A_2 = "Condition_A_Sample_Alpha_02"
abundance_A_3 = "Condition_A_Sample_Alpha_03"
abundance_B_1 = "Condition_B_Sample_Alpha_01"
abundance_B_2 = "Condition_B_Sample_Alpha_02"
abundance_B_3 = "Condition_B_Sample_Alpha_03"

[species_expected_ratio.ECOLI]
"1|2" = 1.5

[species_expected_ratio.HUMAN]
"1|2" = 1.0
[species_mapper]
"_YEAST" = "YEAST"
"_ECOLI" = "ECOLI"
"_HUMAN" = "HUMAN"

[general]
contaminant_flag = "Cont_"
decoy_flag = true
min_count_multispec = 1
decoy_flag = true
Loading

0 comments on commit 9b69350

Please sign in to comment.