Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

id field in json better interpretable. Closes #125 #126

Merged
merged 1 commit into from
Nov 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion proteobench/modules/dda_quant/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,12 +126,16 @@ def generate_datapoint(
self, intermediate: pd.DataFrame, input_format: str, user_input: dict
) -> Datapoint:
"""Method used to compute metadata for the provided result."""
current_datetime = datetime.datetime.now()
formatted_datetime = current_datetime.strftime("%Y%m%d_%H%M%S_%f")


result_datapoint = Datapoint(
id=input_format
+ "_"
+ user_input["version"]
+ "_"
+ str(datetime.datetime.now()),
+ formatted_datetime,
search_engine=input_format,
software_version=user_input["version"],
fdr_psm=user_input["fdr_psm"],
Expand Down
81 changes: 67 additions & 14 deletions test/test_module_dda_quant.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import datetime
import os
import unittest

Expand All @@ -8,15 +9,19 @@
from proteobench.modules.dda_quant.module import Module
from proteobench.modules.dda_quant.parse import ParseInputs
from proteobench.modules.dda_quant.parse_settings import (
DDA_QUANT_RESULTS_REPO, INPUT_FORMATS, ParseSettings)
DDA_QUANT_RESULTS_REPO,
INPUT_FORMATS,
ParseSettings,
)
from proteobench.modules.dda_quant.plot import PlotDataPoint
from proteobench.modules.dda_quant.module import Datapoint

# genereate_input_field


TESTDATA_DIR = os.path.join(os.path.dirname(__file__), "data")
TESTDATA_FILES = {
#"WOMBAT": os.path.join(TESTDATA_DIR, "WOMBAT_stand_pep_quant_mergedproline.csv"),
# "WOMBAT": os.path.join(TESTDATA_DIR, "WOMBAT_stand_pep_quant_mergedproline.csv"),
"MaxQuant": os.path.join(TESTDATA_DIR, "MaxQuant_evidence_sample.txt"),
"MSFragger": os.path.join(TESTDATA_DIR, "MSFragger_combined_ion.tsv"),
"AlphaPept": os.path.join(TESTDATA_DIR, "AlphaPept_subset.csv"),
Expand Down Expand Up @@ -59,12 +64,18 @@ def process_file(format_name: str):


class TestOutputFileReading(unittest.TestCase):
supported_formats = ("MaxQuant", "MSFragger", "AlphaPept", "Sage") #"WOMBAT",
supported_formats = ("MaxQuant", "MSFragger", "AlphaPept", "Sage") # "WOMBAT",
""" Simple tests for reading csv input files."""

def test_search_engines_supported(self):
"""Test whether the expected formats are supported."""
for format_name in ("MaxQuant", "AlphaPept", "MSFragger", "Proline", "Sage"): #, "WOMBAT"
for format_name in (
"MaxQuant",
"AlphaPept",
"MSFragger",
"Proline",
"Sage",
): # , "WOMBAT"
self.assertTrue(format_name in INPUT_FORMATS)

def test_input_file_loading(self):
Expand Down Expand Up @@ -128,7 +139,7 @@ class TestPlot(unittest.TestCase):
"""Test if the plots return a figure."""

def test_plot_metric(self):
#all_datapoints = pd.read_json(DDA_QUANT_RESULTS_PATH)
# all_datapoints = pd.read_json(DDA_QUANT_RESULTS_PATH)
all_datapoints = read_results_json_repo(DDA_QUANT_RESULTS_REPO)
all_datapoints["old_new"] = "old"
fig = PlotDataPoint().plot_metric(all_datapoints)
Expand All @@ -141,30 +152,72 @@ def test_plot_bench(self):
Nyeast = 1000
Necoli = 500
Nhuman = 2000

yeastRatio = np.random.normal(loc=-1, scale=1, size=Nyeast)
humanRatio = np.random.normal(loc=0, scale=1, size=Nhuman)
ecoliRatio = np.random.normal(loc=2, scale=1, size=Necoli)
combined_ratios = np.concatenate([yeastRatio, humanRatio, ecoliRatio])

human_strings = ["HUMAN"] * Nhuman
ecoli_strings = ["ECOLI"] * Necoli
yeast_strings = ["YEAST"] * Nyeast

# Concatenate the lists to create a single list
combined_list = human_strings + ecoli_strings + yeast_strings

combineddf = pd.DataFrame({'SPECIES': combined_list, '1|2_ratio': combined_ratios})
combineddf['HUMAN'] = combineddf['SPECIES'] == 'HUMAN'
combineddf['ECOLI'] = combineddf['SPECIES'] == 'ECOLI'
combineddf['YEAST'] = combineddf['SPECIES'] == 'YEAST'


combineddf = pd.DataFrame(
{"SPECIES": combined_list, "1|2_ratio": combined_ratios}
)
combineddf["HUMAN"] = combineddf["SPECIES"] == "HUMAN"
combineddf["ECOLI"] = combineddf["SPECIES"] == "ECOLI"
combineddf["YEAST"] = combineddf["SPECIES"] == "YEAST"

fig = PlotDataPoint().plot_bench(combineddf)
#fig.write_html("dummy.html")
# fig.write_html("dummy.html")
self.assertIsNotNone(fig)


class TestDatapoint(unittest.TestCase):
"""Test if the plots return a figure."""

def test_Datapoint_constructor(self):
input_format = "MaxQuant"
user_input = {
"version": "1.0",
"fdr_psm": 0.01,
"fdr_peptide": 0.05,
"fdr_protein": 0.1,
"mbr": 1,
"precursor_mass_tolerance": 0.02,
"precursor_mass_tolerance_unit": "Da",
"fragment_mass_tolerance": 0.02,
"fragment_mass_tolerance_unit": "Da",
"search_enzyme_name": "Trypsin",
"allowed_missed_cleavage": 1,
"min_peptide_length": 6,
"max_peptide_length": 30,
}
current_datetime = datetime.datetime.now()
formatted_datetime = current_datetime.strftime("%Y%m%d_%H%M%S_%f")

result_datapoint = Datapoint(
id=input_format + "_" + user_input["version"] + "_" + formatted_datetime,
search_engine=input_format,
software_version=user_input["version"],
fdr_psm=user_input["fdr_psm"],
fdr_peptide=user_input["fdr_peptide"],
fdr_protein=user_input["fdr_protein"],
MBR=user_input["mbr"],
precursor_tol=user_input["precursor_mass_tolerance"],
precursor_tol_unit=user_input["precursor_mass_tolerance_unit"],
fragmnent_tol=user_input["fragment_mass_tolerance"],
fragment_tol_unit=user_input["fragment_mass_tolerance_unit"],
enzyme_name=user_input["search_enzyme_name"],
missed_cleavages=user_input["allowed_missed_cleavage"],
min_pep_length=user_input["min_peptide_length"],
max_pep_length=user_input["max_peptide_length"],
)


if __name__ == "__main__":
unittest.main()