Merge pull request #93 from Proteobench/improveplots

changing dot size in plot depending if new submission #75
Proteobench · Sep 25, 2023 · 763c54a · 763c54a
2 parents a8a3102 + 867c10a
commit 763c54a
Show file tree

Hide file tree

Showing 5 changed files with 62 additions and 14 deletions.
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -0,0 +1,20 @@
+
+{
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python: BStreamlit",
+            "type": "python",
+            "request": "launch",
+            "module": "streamlit",
+            "args": [
+                 "run",
+                 "Home.py",
+                 "--server.port",
+                 "8080"
+            ],
+            "cwd": "${workspaceFolder}/webinterface/",
+            "justMyCode":false
+        }
+    ]
+}
diff --git a/proteobench/modules/dda_quant/module.py b/proteobench/modules/dda_quant/module.py
@@ -10,7 +10,6 @@
 import numpy as np
 import pandas as pd
 import streamlit as st
-
 from proteobench.github.gh import clone_repo, pr_github, read_results_json_repo
 from proteobench.modules.dda_quant.datapoint import Datapoint
 from proteobench.modules.dda_quant.parse import ParseInputs
@@ -173,7 +172,11 @@ def add_current_data_point(self, all_datapoints, current_datapoint):
         if not isinstance(all_datapoints, pd.DataFrame):
             #all_datapoints = pd.read_json(DDA_QUANT_RESULTS_PATH)
             all_datapoints = read_results_json_repo(DDA_QUANT_RESULTS_REPO)
+
+        all_datapoints["old_new"] = "old"
         all_datapoints = all_datapoints.T
+
+        current_datapoint["old_new"] = "new"
         all_datapoints = pd.concat([all_datapoints, current_datapoint], axis=1)
         all_datapoints = all_datapoints.T.reset_index(drop=True)
         return all_datapoints

diff --git a/proteobench/modules/dda_quant/plot.py b/proteobench/modules/dda_quant/plot.py
@@ -27,7 +27,7 @@ def plot_bench(self, result_df: pd.DataFrame) -> go.Figure:
         fig.update_layout(
             width=700,
             height=700,
-            title="Distplot",
+            # title="Distplot",
             xaxis=dict(
                 title="1|2_ratio",
                 color="white",
@@ -40,6 +40,8 @@ def plot_bench(self, result_df: pd.DataFrame) -> go.Figure:
             ),
         )
         fig.update_xaxes(range=[0, 4])
+        fig.update_xaxes(showgrid=True, gridcolor="lightgray", gridwidth=1)
+        # fig.update_yaxes(showgrid=True, gridcolor="lightgray", gridwidth=1)
 
         return fig
 
@@ -75,26 +77,41 @@ def plot_metric(self, benchmark_metrics_df: pd.DataFrame) -> go.Figure:
 
         # Add hover text
         hover_texts = [
-            f"Search Engine: {benchmark_metrics_df.search_engine[idx]} {benchmark_metrics_df.software_version[idx]}<br>FDR psm: {benchmark_metrics_df.fdr_psm[idx]}<br>FDR Peptide: {benchmark_metrics_df.fdr_peptide[idx]}<br>FRD Protein: {benchmark_metrics_df.fdr_protein[idx]}<br>MBR: {benchmark_metrics_df.MBR[idx]}<br>Precursor Tolerance: {benchmark_metrics_df.precursor_tol[idx]} {benchmark_metrics_df.precursor_tol_unit[idx]}<br>Fragment Tolerance: {benchmark_metrics_df.fragment_tol_unit[idx]}<br>Enzyme: {benchmark_metrics_df.enzyme_name[idx]} <br>Missed Cleavages: {benchmark_metrics_df.missed_cleavages[idx]}<br>Min peptide length: {benchmark_metrics_df.min_pep_length[idx]}<br>Max peptide length: {benchmark_metrics_df.max_pep_length[idx]}"
+            f"Search Engine: {benchmark_metrics_df.search_engine[idx]} {benchmark_metrics_df.software_version[idx]}<br>"
+            + f"FDR psm: {benchmark_metrics_df.fdr_psm[idx]}<br>"
+            + f"FDR Peptide: {benchmark_metrics_df.fdr_peptide[idx]}<br>"
+            + f"FRD Protein: {benchmark_metrics_df.fdr_protein[idx]}<br>"
+            + f"MBR: {benchmark_metrics_df.MBR[idx]}<br>"
+            + f"Precursor Tolerance: {benchmark_metrics_df.precursor_tol[idx]} {benchmark_metrics_df.precursor_tol_unit[idx]}<br>"
+            + f"Fragment Tolerance: {benchmark_metrics_df.fragment_tol_unit[idx]}<br>"
+            + f"Enzyme: {benchmark_metrics_df.enzyme_name[idx]} <br>"
+            + f"Missed Cleavages: {benchmark_metrics_df.missed_cleavages[idx]}<br>"
+            + f"Min peptide length: {benchmark_metrics_df.min_pep_length[idx]}<br>"
+            + f"Max peptide length: {benchmark_metrics_df.max_pep_length[idx]}"
             for idx, row in benchmark_metrics_df.iterrows()
         ]
 
         #  spellerror {meta_data.fragmnent_tol[idx]}
 
+        mapping = {"old": 10, "new": 20}
+
         fig = go.Figure(
             data=[
                 go.Scatter(
                     x=benchmark_metrics_df["weighted_sum"],
                     y=benchmark_metrics_df["nr_prec"],
                     mode="markers",
                     text=hover_texts,
-                    marker=dict(color=colors, showscale=True, size=20),
+                    marker=dict(color=colors, showscale=False, size=20),
+                    marker_size=[
+                        mapping[item] for item in benchmark_metrics_df["old_new"]
+                    ],
                 )
             ]
         )
 
         fig.update_layout(
-            title="Metric",
+            # title="Metric",
             width=700,
             height=700,
             xaxis=dict(
@@ -107,8 +124,11 @@ def plot_metric(self, benchmark_metrics_df: pd.DataFrame) -> go.Figure:
                 gridcolor="white",
                 gridwidth=2,
             ),
+            # paper_bgcolor='rgb(243, 243, 243)',
+            # plot_bgcolor="rgb(243, 243, 243)",
         )
-
+        fig.update_xaxes(showgrid=True, gridcolor="lightgray", gridwidth=1)
+        fig.update_yaxes(showgrid=True, gridcolor="lightgray", gridwidth=1)
         # selected_points = plotly_events(
         #    fig,
         #    select_event=True,

diff --git a/test/test_module_dda_quant.py b/test/test_module_dda_quant.py
@@ -126,10 +126,9 @@ class TestPlot(unittest.TestCase):
     """Test if the plots return a figure."""
 
     def test_plot_metric(self):
-
         #all_datapoints = pd.read_json(DDA_QUANT_RESULTS_PATH)
         all_datapoints = read_results_json_repo(DDA_QUANT_RESULTS_REPO)
-
+        all_datapoints["old_new"] = "old"
         fig = PlotDataPoint().plot_metric(all_datapoints)
         self.assertIsNotNone(fig)
 

diff --git a/webinterface/pages/DDA_Quant.py b/webinterface/pages/DDA_Quant.py
@@ -73,7 +73,8 @@ def _main_page(self):
         """Format main page."""
         st.title("Module 2: DDA quantification")
         st.header("Description of the module")
-        st.markdown("""
+        st.markdown(
+            """
                     This module compares the MS1-level quantification tools for
                     data-dependent acquisition (DDA). The raw files provided for
                     this module are presented in the comprehensive LFQ benchmark
@@ -95,9 +96,11 @@ def _main_page(self):
                     sets of parameters for the search and quantification.
                     The full description of the pre-processing steps and metrics
                     calculation is available here: LINK.
-                    """)
+                    """
+        )
         st.header("Downloading associated files")
-        st.markdown("""
+        st.markdown(
+            """
                     The raw files used for this module were acquired on an Orbitrap
                     Q-Exactive H-FX (ThermoScientific). They can be downloaded from the
                     proteomeXchange repository PXD028735. You can download them here:
@@ -109,13 +112,16 @@ def _main_page(self):
                     [LFQ_Orbitrap_AIF_Condition_B_Sample_Alpha_03.raw](https://ftp.pride.ebi.ac.uk/pride/data/archive/2022/02/PXD028735/LFQ_Orbitrap_AIF_Condition_B_Sample_Alpha_03.raw)  
 
                     **It is imperative not to rename the files once downloaded!**
-                    """)
-        st.markdown("""
+                    """
+        )
+        st.markdown(
+            """
                     Download the fasta file here: [TODO]  
                     The fasta file provided for this module contains the three species
                     present in the samples and contaminant proteins
                     ([Frankenfield et al., JPR](https://pubs.acs.org/doi/10.1021/acs.jproteome.2c00145))
-                    """)
+                    """
+        )
 
         st.header("Input and configuration")