Skip to content

Commit

Permalink
Merge branch 'main' into improveplots
Browse files Browse the repository at this point in the history
  • Loading branch information
scaramonche authored Sep 25, 2023
2 parents ff0a867 + a8a3102 commit a38e7c7
Show file tree
Hide file tree
Showing 7 changed files with 109 additions and 80 deletions.
72 changes: 16 additions & 56 deletions proteobench/github/gh.py
Original file line number Diff line number Diff line change
@@ -1,70 +1,30 @@
import os
from tempfile import TemporaryDirectory

import pandas as pd
from git import Repo

from proteobench.modules.dda_quant.module import Module

def write_json_local_development(
temporary_datapoints
):
t_dir = TemporaryDirectory().name
os.mkdir(t_dir)

current_datapoint = temporary_datapoints.iloc[-1]
current_datapoint["is_temporary"] = False
all_datapoints = Module().add_current_data_point(None, current_datapoint)

# TODO write below to logger instead of std.out
fname = os.path.join(t_dir, "results.json")
print(f"Writing the json to: {fname}")

f = open(os.path.join(t_dir, "results.json"), "w")

all_datapoints.to_json(
f,
orient="records",
indent=2
)
def clone_repo_anon(
clone_dir="K:/pb/",
remote_git="https://github.com/Proteobench/Results_Module2_quant_DDA.git",
):
repo = Repo.clone_from(remote_git, clone_dir)
return clone_dir

return os.path.join(t_dir, "results.json")

def clone_pr(
temporary_datapoints,
token,
username="Proteobot",
remote_git="github.com/Proteobot/Results_Module2_quant_DDA.git",
branch_name="new_branch",
def read_results_json_repo(
remote_git_repo= "https://github.com/Proteobench/Results_Module2_quant_DDA.git"
):
t_dir = TemporaryDirectory().name

clone_repo(clone_dir=t_dir, token=token, remote_git=remote_git, username=username)
current_datapoint = temporary_datapoints.iloc[-1]
current_datapoint["is_temporary"] = False
all_datapoints = Module().add_current_data_point(None, current_datapoint)
branch_name = current_datapoint["id"]

# do the pd.write_json() here!!!
print(os.path.join(t_dir, "results.json"))
f = open(os.path.join(t_dir, "results.json"), "w")

all_datapoints.to_json(
f,
orient="records",
indent=2
)

f.close()
commit_message = "Added new run with id " + branch_name

pr_github(
clone_dir=t_dir,
token=token,
remote_git=remote_git,
username=username,
branch_name=branch_name,
commit_message=commit_message,
os.mkdir(t_dir)
clone_repo_anon(
t_dir,
remote_git_repo
)
fname = os.path.join(t_dir, "results.json")
all_datapoints = pd.read_json(fname)
return all_datapoints


def clone_repo(
Expand Down
80 changes: 75 additions & 5 deletions proteobench/modules/dda_quant/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,19 @@

import datetime
import itertools
import os
import re
from dataclasses import asdict
from tempfile import TemporaryDirectory

import numpy as np
import pandas as pd
import streamlit as st
from proteobench.github.gh import clone_repo, pr_github, read_results_json_repo
from proteobench.modules.dda_quant.datapoint import Datapoint
from proteobench.modules.dda_quant.parse import ParseInputs
from proteobench.modules.dda_quant.parse_settings import (
DDA_QUANT_RESULTS_PATH, ParseSettings)
DDA_QUANT_RESULTS_REPO, ParseSettings)
from proteobench.modules.interfaces import ModuleInterface


Expand Down Expand Up @@ -166,9 +170,10 @@ def load_input_file(self, input_csv: str, input_format: str) -> pd.DataFrame:
def add_current_data_point(self, all_datapoints, current_datapoint):
"""Add current data point to all data points and load them from file if empty. TODO: Not clear why is the df transposed here."""
if not isinstance(all_datapoints, pd.DataFrame):
all_datapoints = pd.read_json(DDA_QUANT_RESULTS_PATH)
all_datapoints["old_new"] = "old"
#all_datapoints = pd.read_json(DDA_QUANT_RESULTS_PATH)
all_datapoints = read_results_json_repo(DDA_QUANT_RESULTS_REPO)

all_datapoints["old_new"] = "old"
all_datapoints = all_datapoints.T

current_datapoint["old_new"] = "new"
Expand Down Expand Up @@ -198,7 +203,72 @@ def benchmarking(
intermediate_data_structure, input_format, user_input
)


all_datapoints = self.add_current_data_point(all_datapoints, current_datapoint)

return intermediate_data_structure, all_datapoints
# TODO check why there are NA and inf/-inf values
return intermediate_data_structure.fillna(0.0).replace([np.inf, -np.inf], 0), all_datapoints


def clone_pr(
self,
temporary_datapoints,
token,
username="Proteobot",
remote_git="github.com/Proteobot/Results_Module2_quant_DDA.git",
branch_name="new_branch",
):
t_dir = TemporaryDirectory().name

clone_repo(clone_dir=t_dir, token=token, remote_git=remote_git, username=username)
current_datapoint = temporary_datapoints.iloc[-1]
current_datapoint["is_temporary"] = False
all_datapoints = self.add_current_data_point(None, current_datapoint)
branch_name = current_datapoint["id"]

# do the pd.write_json() here!!!
print(os.path.join(t_dir, "results.json"))
f = open(os.path.join(t_dir, "results.json"), "w")

all_datapoints.to_json(
f,
orient="records",
indent=2
)

f.close()
commit_message = "Added new run with id " + branch_name

pr_github(
clone_dir=t_dir,
token=token,
remote_git=remote_git,
username=username,
branch_name=branch_name,
commit_message=commit_message,
)


def write_json_local_development(
self,
temporary_datapoints
):
t_dir = TemporaryDirectory().name
os.mkdir(t_dir)

current_datapoint = temporary_datapoints.iloc[-1]
current_datapoint["is_temporary"] = False
all_datapoints = self.add_current_data_point(None, current_datapoint)

# TODO write below to logger instead of std.out
fname = os.path.join(t_dir, "results.json")
print(f"Writing the json to: {fname}")

f = open(os.path.join(t_dir, "results.json"), "w")

all_datapoints.to_json(
f,
orient="records",
indent=2
)

return os.path.join(t_dir, "results.json")
3 changes: 2 additions & 1 deletion proteobench/modules/dda_quant/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ def convert_to_standard_format(

df = df[df["MULTI_SPEC"] == False]

df.loc[df.index, "peptidoform"] = df.loc[df.index, "Sequence"]
# TODO, if "Charge" is not available return a sensible error
df.loc[df.index, "peptidoform"] = df.loc[df.index, "Sequence"]+"|Z="+df.loc[df.index, "Charge"].map(str)
count_non_zero = (
df.groupby(["Sequence", "Raw file"])["Intensity"].sum() > 0.0
).groupby(level=[0]).sum() == 6
Expand Down
2 changes: 2 additions & 0 deletions proteobench/modules/dda_quant/parse_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
# For local development change below to the json and path, if you do not want to download it from github
DDA_QUANT_RESULTS_PATH = "https://raw.githubusercontent.com/Proteobench/Results_Module2_quant_DDA/main/results.json" #e.g., K:/results.json

DDA_QUANT_RESULTS_REPO = "https://github.com/Proteobench/Results_Module2_quant_DDA.git"

class ParseSettings:
""" Structure that contains all the parameters used to parse the given database search output. """

Expand Down
11 changes: 6 additions & 5 deletions test/test_module_dda_quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import pandas as pd

from proteobench.github.gh import read_results_json_repo
from proteobench.modules.dda_quant.module import Module
from proteobench.modules.dda_quant.parse import ParseInputs
from proteobench.modules.dda_quant.parse_settings import (
Expand All @@ -17,7 +18,7 @@

TESTDATA_DIR = os.path.join(os.path.dirname(__file__), "data")
TESTDATA_FILES = {
"WOMBAT": os.path.join(TESTDATA_DIR, "WOMBAT_stand_pep_quant_mergedproline.csv"),
#"WOMBAT": os.path.join(TESTDATA_DIR, "WOMBAT_stand_pep_quant_mergedproline.csv"),
"MaxQuant": os.path.join(TESTDATA_DIR, "MaxQuant_evidence_sample.txt"),
"MSFragger": os.path.join(TESTDATA_DIR, "MSFragger_combined_ion.tsv"),
"AlphaPept": os.path.join(TESTDATA_DIR, "AlphaPept_subset.csv"),
Expand Down Expand Up @@ -59,12 +60,12 @@ def process_file(format_name: str):


class TestOutputFileReading(unittest.TestCase):
supported_formats = ("MaxQuant", "WOMBAT", "MSFragger", "AlphaPept")
supported_formats = ("MaxQuant", "MSFragger", "AlphaPept") #"WOMBAT",
""" Simple tests for reading csv input files."""

def test_search_engines_supported(self):
"""Test whether the expected formats are supported."""
for format_name in ("MaxQuant", "AlphaPept", "MSFragger", "Proline", "WOMBAT"):
for format_name in ("MaxQuant", "AlphaPept", "MSFragger", "Proline"): #, "WOMBAT"
self.assertTrue(format_name in INPUT_FORMATS)

def test_input_file_loading(self):
Expand Down Expand Up @@ -128,8 +129,8 @@ class TestPlot(unittest.TestCase):
"""Test if the plots return a figure."""

def test_plot_metric(self):
all_datapoints = pd.read_json(DDA_QUANT_RESULTS_PATH)

#all_datapoints = pd.read_json(DDA_QUANT_RESULTS_PATH)
all_datapoints = read_results_json_repo(DDA_QUANT_RESULTS_REPO)
all_datapoints["old_new"] = "old"
fig = PlotDataPoint().plot_metric(all_datapoints)
self.assertIsNotNone(fig)
Expand Down
15 changes: 6 additions & 9 deletions webinterface/pages/DDA_Quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,7 @@

from proteobench.modules.dda_quant.module import Module
from proteobench.modules.dda_quant.parse_settings import (
DDA_QUANT_RESULTS_PATH,
INPUT_FORMATS,
LOCAL_DEVELOPMENT,
)
DDA_QUANT_RESULTS_PATH, INPUT_FORMATS, LOCAL_DEVELOPMENT)
from proteobench.modules.dda_quant.plot import PlotDataPoint

try:
Expand All @@ -21,7 +18,7 @@
import streamlit_utils
from streamlit_extras.let_it_rain import rain

from proteobench.github.gh import clone_pr, write_json_local_development
#from proteobench.github.gh import clone_pr, write_json_local_development

logger = logging.getLogger(__name__)

Expand All @@ -46,8 +43,8 @@ def __init__(self):
st.set_page_config(
page_title="Proteobench web server",
page_icon=":rocket:",
layout="centered",
initial_sidebar_state="expanded",
layout="wide",
initial_sidebar_state="expanded"
)
if SUBMIT not in st.session_state:
st.session_state[SUBMIT] = False
Expand Down Expand Up @@ -265,15 +262,15 @@ def generate_results(
if submit_pr:
st.session_state[SUBMIT] = True
if not LOCAL_DEVELOPMENT:
clone_pr(
Module().clone_pr(
st.session_state[ALL_DATAPOINTS],
st.secrets["gh"]["token"],
username="Proteobot",
remote_git="github.com/Proteobot/Results_Module2_quant_DDA.git",
branch_name="new_branch",
)
else:
DDA_QUANT_RESULTS_PATH = write_json_local_development(
DDA_QUANT_RESULTS_PATH = Module().write_json_local_development(
st.session_state[ALL_DATAPOINTS]
)
if SUBMIT in st.session_state:
Expand Down
6 changes: 2 additions & 4 deletions webinterface/pages/TEMPLATE.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
import streamlit_utils
from streamlit_extras.let_it_rain import rain

from proteobench.github.gh import clone_pr, write_json_local_development

logger = logging.getLogger(__name__)

## Different parts of the web application
Expand Down Expand Up @@ -225,15 +223,15 @@ def generate_results(
if submit_pr:
st.session_state[SUBMIT] = True
if not LOCAL_DEVELOPMENT:
clone_pr(
Module().clone_pr(
st.session_state[ALL_DATAPOINTS],
st.secrets["gh"]["token"],
username="Proteobot",
remote_git="github.com/Proteobot/Results_Module2_quant_DDA.git",
branch_name="new_branch",
)
else:
DDA_QUANT_RESULTS_PATH = write_json_local_development(
DDA_QUANT_RESULTS_PATH = Module().write_json_local_development(
st.session_state[ALL_DATAPOINTS]
)
if SUBMIT in st.session_state:
Expand Down

0 comments on commit a38e7c7

Please sign in to comment.