From ee2e6f74c39f349cb7e4e923b82f58c2c570aff1 Mon Sep 17 00:00:00 2001
From: RobbinBouwmeester <robbin.bouwmeester@ugent.be>
Date: Thu, 19 Dec 2024 15:47:19 +0100
Subject: [PATCH 1/2] Update Server resubmission.ipynb

---
 jupyter_notebooks/Server resubmission.ipynb | 461 +++++++++++++++++---
 1 file changed, 408 insertions(+), 53 deletions(-)

diff --git a/jupyter_notebooks/Server resubmission.ipynb b/jupyter_notebooks/Server resubmission.ipynb
index 4179d6cd..057a336d 100644
--- a/jupyter_notebooks/Server resubmission.ipynb	
+++ b/jupyter_notebooks/Server resubmission.ipynb	
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 21,
    "id": "1affdd3a",
    "metadata": {},
    "outputs": [
@@ -57,12 +57,20 @@
     "from bs4 import BeautifulSoup\n",
     "import os\n",
     "import zipfile\n",
-    "from tqdm import tqdm"
+    "from tqdm import tqdm\n",
+    "from pathlib import Path\n",
+    "\n",
+    "import json\n",
+    "\n",
+    "from collections import defaultdict\n",
+    "import toml\n",
+    "\n",
+    "from proteobench.modules.quant.lfq.ion.DDA.quant_lfq_ion_DDA import DDAQuantIonModule"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 2,
    "id": "56b53d36",
    "metadata": {},
    "outputs": [
@@ -272,12 +280,12 @@
        "3         0.605836                     0.213234                    0.333518   \n",
        "4         0.514675                     0.199873                    0.272681   \n",
        "\n",
-       "   results.1.variance_epsilon results.1.nr_prec  results.1.CV_median  \\\n",
-       "0                    0.162885             51193             0.204522   \n",
-       "1                    0.267728             59609             0.226330   \n",
-       "2                    0.337984             82533             0.132900   \n",
-       "3                    0.291254             51338             0.218630   \n",
-       "4                    0.155889             51345             0.202474   \n",
+       "   results.1.variance_epsilon  results.1.nr_prec  results.1.CV_median  \\\n",
+       "0                    0.162885              51193             0.204522   \n",
+       "1                    0.267728              59609             0.226330   \n",
+       "2                    0.337984              82533             0.132900   \n",
+       "3                    0.291254              51338             0.218630   \n",
+       "4                    0.155889              51345             0.202474   \n",
        "\n",
        "  results.1.CV_q90  results.1.CV_q75  results.1.CV_q95  \n",
        "0         0.426579          0.295455          0.524623  \n",
@@ -289,7 +297,7 @@
        "[5 rows x 87 columns]"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -311,14 +319,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 3,
    "id": "90eec923",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e25afbdf0b454e70be200328304947a8",
+       "model_id": "b078ebc7a5db4ad59ecebb05d7112052",
        "version_major": 2,
        "version_minor": 0
       },
@@ -332,7 +340,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "2658f30c03824c1d88ee8381d75f04d1",
+       "model_id": "8879c41130a64a0dabef9b18ec8c85cf",
        "version_major": 2,
        "version_minor": 0
       },
@@ -346,7 +354,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "387cecf52a67429aba2c69e330a84817",
+       "model_id": "6c86a298774b461e9fcd59d3bb7a8e15",
        "version_major": 2,
        "version_minor": 0
       },
@@ -393,21 +401,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 5,
    "id": "c79455a0",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "14    8cbc0bce20eee581ad10326e02a09dbc316c30e1\n",
-       "15    36b7b01b380f641722b3b34633bb53d72348eb80\n",
-       "16    0280a06fabdbe84746419d0810deae56e7ab2406\n",
-       "17    47db7ef37a0fb5fec79f3bedbfb4f67835774f10\n",
+       "20    1bfa914c771321b285a9ca40d4aa538cb9fdc42e\n",
+       "21    e8e80290fb48ff02de5ee54eb6b0114ff661bace\n",
        "Name: intermediate_hash, dtype: object"
       ]
      },
-     "execution_count": 19,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -418,7 +424,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 6,
    "id": "093bd9d5",
    "metadata": {},
    "outputs": [
@@ -426,65 +432,42 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Processing folder: https://proteobench.cubimed.rub.de/datasets/0280a06fabdbe84746419d0810deae56e7ab2406/\n",
-      "Downloading: https://proteobench.cubimed.rub.de/datasets/0280a06fabdbe84746419d0810deae56e7ab2406/0280a06fabdbe84746419d0810deae56e7ab2406_data.zip\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Downloading 0280a06fabdbe84746419d0810deae56e7ab2406_data.zip: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.6M/33.6M [00:24<00:00, 1.42MB/s]\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Extracted contents to: extracted_files/0280a06fabdbe84746419d0810deae56e7ab2406\n",
-      "Processing folder: https://proteobench.cubimed.rub.de/datasets/36b7b01b380f641722b3b34633bb53d72348eb80/\n",
-      "Downloading: https://proteobench.cubimed.rub.de/datasets/36b7b01b380f641722b3b34633bb53d72348eb80/36b7b01b380f641722b3b34633bb53d72348eb80_data.zip\n"
+      "Processing folder: https://proteobench.cubimed.rub.de/datasets/1bfa914c771321b285a9ca40d4aa538cb9fdc42e/\n",
+      "Downloading: https://proteobench.cubimed.rub.de/datasets/1bfa914c771321b285a9ca40d4aa538cb9fdc42e/1bfa914c771321b285a9ca40d4aa538cb9fdc42e_data.zip\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Downloading 36b7b01b380f641722b3b34633bb53d72348eb80_data.zip: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 25.4M/25.4M [00:14<00:00, 1.83MB/s]\n"
+      "Downloading 1bfa914c771321b285a9ca40d4aa538cb9fdc42e_data.zip: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 66.4M/66.4M [00:15<00:00, 4.50MB/s]\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Extracted contents to: extracted_files/36b7b01b380f641722b3b34633bb53d72348eb80\n",
-      "Processing folder: https://proteobench.cubimed.rub.de/datasets/8cbc0bce20eee581ad10326e02a09dbc316c30e1/\n",
-      "Downloading: https://proteobench.cubimed.rub.de/datasets/8cbc0bce20eee581ad10326e02a09dbc316c30e1/8cbc0bce20eee581ad10326e02a09dbc316c30e1_data.zip\n"
+      "Extracted contents to: extracted_files/1bfa914c771321b285a9ca40d4aa538cb9fdc42e\n",
+      "Processing folder: https://proteobench.cubimed.rub.de/datasets/e8e80290fb48ff02de5ee54eb6b0114ff661bace/\n",
+      "Downloading: https://proteobench.cubimed.rub.de/datasets/e8e80290fb48ff02de5ee54eb6b0114ff661bace/e8e80290fb48ff02de5ee54eb6b0114ff661bace_data.zip\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Downloading 8cbc0bce20eee581ad10326e02a09dbc316c30e1_data.zip: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.2M/33.2M [00:18<00:00, 1.94MB/s]\n"
+      "Downloading e8e80290fb48ff02de5ee54eb6b0114ff661bace_data.zip: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 46.5M/46.5M [00:08<00:00, 5.71MB/s]\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Extracted contents to: extracted_files/8cbc0bce20eee581ad10326e02a09dbc316c30e1\n"
+      "Extracted contents to: extracted_files/e8e80290fb48ff02de5ee54eb6b0114ff661bace\n"
      ]
     }
    ],
    "source": [
-    "import pandas as pd\n",
-    "import requests\n",
-    "from bs4 import BeautifulSoup\n",
-    "import os\n",
-    "import zipfile\n",
-    "from tqdm import tqdm\n",
-    "\n",
     "# Step 1: Extract the hash list from the DataFrame\n",
     "hash_list = filtered_df[\"intermediate_hash\"].tolist()\n",
     "\n",
@@ -549,10 +532,382 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 28,
+   "id": "593739e9",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>old_new</th>\n",
+       "      <th>software_name</th>\n",
+       "      <th>software_version</th>\n",
+       "      <th>search_engine</th>\n",
+       "      <th>search_engine_version</th>\n",
+       "      <th>ident_fdr_psm</th>\n",
+       "      <th>ident_fdr_peptide</th>\n",
+       "      <th>ident_fdr_protein</th>\n",
+       "      <th>enable_match_between_runs</th>\n",
+       "      <th>...</th>\n",
+       "      <th>color</th>\n",
+       "      <th>hover_text</th>\n",
+       "      <th>scatter_size</th>\n",
+       "      <th>scan_window</th>\n",
+       "      <th>quantification_method_DIANN</th>\n",
+       "      <th>second_pass</th>\n",
+       "      <th>protein_inference</th>\n",
+       "      <th>predictors_library</th>\n",
+       "      <th>quantification_method</th>\n",
+       "      <th>mean_abs_epsilon</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>MaxQuant_20241216_100704</td>\n",
+       "      <td>old</td>\n",
+       "      <td>MaxQuant</td>\n",
+       "      <td>1.5.2.8</td>\n",
+       "      <td>Andromeda</td>\n",
+       "      <td>None</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.010000</td>\n",
+       "      <td>0.010000</td>\n",
+       "      <td>False</td>\n",
+       "      <td>...</td>\n",
+       "      <td>#377eb8</td>\n",
+       "      <td>ProteoBench ID: MaxQuant_20241216_100704&lt;br&gt;So...</td>\n",
+       "      <td>20</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.265490</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>ProlineStudio_20241216_103006</td>\n",
+       "      <td>old</td>\n",
+       "      <td>ProlineStudio</td>\n",
+       "      <td>2.3.0-SNAPSHOT_2024-09-11T06:45:20Z_jenkins</td>\n",
+       "      <td>Mascot</td>\n",
+       "      <td>2.8.3</td>\n",
+       "      <td>0.010000</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>True</td>\n",
+       "      <td>...</td>\n",
+       "      <td>#5f0f40</td>\n",
+       "      <td>ProteoBench ID: ProlineStudio_20241216_103006&lt;...</td>\n",
+       "      <td>20</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.319847</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>i2MassChroQ_20241216_103323</td>\n",
+       "      <td>old</td>\n",
+       "      <td>i2MassChroQ</td>\n",
+       "      <td>1.0.16</td>\n",
+       "      <td>X! Tandem</td>\n",
+       "      <td>X! Tandem Alanine (2017.2.1.4)</td>\n",
+       "      <td>0.008998</td>\n",
+       "      <td>0.011963</td>\n",
+       "      <td>0.009873</td>\n",
+       "      <td>True</td>\n",
+       "      <td>...</td>\n",
+       "      <td>#984ea3</td>\n",
+       "      <td>ProteoBench ID: i2MassChroQ_20241216_103323&lt;br...</td>\n",
+       "      <td>20</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.369880</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>MaxQuant_20241216_130203</td>\n",
+       "      <td>old</td>\n",
+       "      <td>MaxQuant</td>\n",
+       "      <td>1.5.3.30</td>\n",
+       "      <td>Andromeda</td>\n",
+       "      <td>None</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.010000</td>\n",
+       "      <td>0.010000</td>\n",
+       "      <td>True</td>\n",
+       "      <td>...</td>\n",
+       "      <td>#377eb8</td>\n",
+       "      <td>ProteoBench ID: MaxQuant_20241216_130203&lt;br&gt;So...</td>\n",
+       "      <td>20</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.322391</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>MaxQuant_20241216_120735</td>\n",
+       "      <td>old</td>\n",
+       "      <td>MaxQuant</td>\n",
+       "      <td>1.5.3.30</td>\n",
+       "      <td>Andromeda</td>\n",
+       "      <td>None</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.010000</td>\n",
+       "      <td>0.010000</td>\n",
+       "      <td>False</td>\n",
+       "      <td>...</td>\n",
+       "      <td>#377eb8</td>\n",
+       "      <td>ProteoBench ID: MaxQuant_20241216_120735&lt;br&gt;So...</td>\n",
+       "      <td>20</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.259993</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 40 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                              id old_new  software_name  \\\n",
+       "0       MaxQuant_20241216_100704     old       MaxQuant   \n",
+       "1  ProlineStudio_20241216_103006     old  ProlineStudio   \n",
+       "2    i2MassChroQ_20241216_103323     old    i2MassChroQ   \n",
+       "3       MaxQuant_20241216_130203     old       MaxQuant   \n",
+       "4       MaxQuant_20241216_120735     old       MaxQuant   \n",
+       "\n",
+       "                              software_version search_engine  \\\n",
+       "0                                      1.5.2.8     Andromeda   \n",
+       "1  2.3.0-SNAPSHOT_2024-09-11T06:45:20Z_jenkins        Mascot   \n",
+       "2                                       1.0.16     X! Tandem   \n",
+       "3                                     1.5.3.30     Andromeda   \n",
+       "4                                     1.5.3.30     Andromeda   \n",
+       "\n",
+       "            search_engine_version  ident_fdr_psm  ident_fdr_peptide  \\\n",
+       "0                            None            NaN           0.010000   \n",
+       "1                           2.8.3       0.010000                NaN   \n",
+       "2  X! Tandem Alanine (2017.2.1.4)       0.008998           0.011963   \n",
+       "3                            None            NaN           0.010000   \n",
+       "4                            None            NaN           0.010000   \n",
+       "\n",
+       "   ident_fdr_protein  enable_match_between_runs  ...    color  \\\n",
+       "0           0.010000                      False  ...  #377eb8   \n",
+       "1                NaN                       True  ...  #5f0f40   \n",
+       "2           0.009873                       True  ...  #984ea3   \n",
+       "3           0.010000                       True  ...  #377eb8   \n",
+       "4           0.010000                      False  ...  #377eb8   \n",
+       "\n",
+       "                                          hover_text scatter_size  \\\n",
+       "0  ProteoBench ID: MaxQuant_20241216_100704<br>So...           20   \n",
+       "1  ProteoBench ID: ProlineStudio_20241216_103006<...           20   \n",
+       "2  ProteoBench ID: i2MassChroQ_20241216_103323<br...           20   \n",
+       "3  ProteoBench ID: MaxQuant_20241216_130203<br>So...           20   \n",
+       "4  ProteoBench ID: MaxQuant_20241216_120735<br>So...           20   \n",
+       "\n",
+       "   scan_window  quantification_method_DIANN  second_pass  protein_inference  \\\n",
+       "0          NaN                          NaN          NaN                NaN   \n",
+       "1          NaN                          NaN          NaN                NaN   \n",
+       "2          NaN                          NaN          NaN                NaN   \n",
+       "3          NaN                          NaN          NaN                NaN   \n",
+       "4          NaN                          NaN          NaN                NaN   \n",
+       "\n",
+       "  predictors_library quantification_method  mean_abs_epsilon  \n",
+       "0                NaN                   NaN          0.265490  \n",
+       "1                NaN                   NaN          0.319847  \n",
+       "2                NaN                   NaN          0.369880  \n",
+       "3                NaN                   NaN          0.322391  \n",
+       "4                NaN                   NaN          0.259993  \n",
+       "\n",
+       "[5 rows x 40 columns]"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "token = toml.load(\"../webinterface/.streamlit/secrets.toml\")[\"gh\"][\"token\"]\n",
+    "\n",
+    "# TODO change to the correct module\n",
+    "module_obj = DDAQuantIonModule(token=token)\n",
+    "results_df = module_obj.obtain_all_data_points(all_datapoints=None)\n",
+    "\n",
+    "results_df.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
    "id": "ecb9cf7c",
    "metadata": {},
    "outputs": [],
+   "source": [
+    "extra_path = Path(\"extracted_files\")\n",
+    "\n",
+    "# submission_files = [\n",
+    "#    {\n",
+    "#        \"input_file\" : \"../test/data/dda_quant/MaxQuant_evidence_sample.txt\",\n",
+    "#        \"param_file\" : \"../test/params/mqpar_MQ1.6.3.3_MBR.xml\",\n",
+    "#        \"input_type\" : \"MaxQuant\",\n",
+    "#        \"default_cutoff_min_prec\" : 3,\n",
+    "#        \"user_comments\" : \"Put comments here.\"\n",
+    "#    }\n",
+    "#]\n",
+    "\n",
+    "submission_files = []\n",
+    "\n",
+    "for idx,row in filtered_df.iterrows():\n",
+    "    base_path = extra_path / row[\"intermediate_hash\"]\n",
+    "    comments = \"\\n\".join(open(base_path / \"comment.txt\").readlines())\n",
+    "    input_file = base_path / \"input_file.txt\"\n",
+    "    parameter_file = base_path / \"param_0.txt\"\n",
+    "    \n",
+    "    submission_files.append({\n",
+    "        \"input_file\" : input_file,\n",
+    "        \"param_file\" : parameter_file,\n",
+    "        \"input_type\" : row[\"software_name\"],\n",
+    "        \"default_cutoff_min_prec\" : 3,\n",
+    "        \"user_comments\" : comments\n",
+    "    })"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "a40c51ba",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Not all columns required for making the ion are available.\n",
+      "Load locally: extracted_files\\1bfa914c771321b285a9ca40d4aa538cb9fdc42e\\param_0.txt\n",
+      "ProteoBenchParameters(software_name='AlphaPept', software_version='0.5.0', search_engine='AlphaPept', search_engine_version='0.5.0', ident_fdr_psm=None, ident_fdr_peptide=0.01, ident_fdr_protein=0.01, enable_match_between_runs=True, precursor_mass_tolerance='[-20 ppm, 20 ppm]', fragment_mass_tolerance='[-50 ppm, 50 ppm]', enzyme='Trypsin', allowed_miscleavages=2, min_peptide_length=7, max_peptide_length=27, fixed_mods='cC', variable_mods='oxM', max_mods=3, min_precursor_charge=1, max_precursor_charge=6, scan_window=None, quantification_method=None, second_pass=None, protein_inference=None, predictors_library=None)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Following Github server redirection from /repos/Proteobot/Results_Module2_quant_DDA to /repositories/594032348\n",
+      "INFO:github.Requester:Following Github server redirection from /repos/Proteobot/Results_Module2_quant_DDA to /repositories/594032348\n",
+      "Following Github server redirection from /repos/Proteobot/Results_quant_ion_DDA/branches/master to /repos/Proteobot/Results_quant_ion_DDA/branches/main\n",
+      "INFO:github.Requester:Following Github server redirection from /repos/Proteobot/Results_quant_ion_DDA/branches/master to /repos/Proteobot/Results_quant_ion_DDA/branches/main\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Submitted: {'input_file': WindowsPath('extracted_files/1bfa914c771321b285a9ca40d4aa538cb9fdc42e/input_file.txt'), 'param_file': WindowsPath('extracted_files/1bfa914c771321b285a9ca40d4aa538cb9fdc42e/param_0.txt'), 'input_type': 'AlphaPept', 'default_cutoff_min_prec': 3, 'user_comments': 'Fixed mod of N-acetyl (N-term)'}\n",
+      "------------------------\n",
+      "Not all columns required for making the ion are available.\n",
+      "Load locally: extracted_files\\e8e80290fb48ff02de5ee54eb6b0114ff661bace\\param_0.txt\n",
+      "ProteoBenchParameters(software_name='AlphaPept', software_version='0.5.0', search_engine='AlphaPept', search_engine_version='0.5.0', ident_fdr_psm=None, ident_fdr_peptide=0.01, ident_fdr_protein=0.01, enable_match_between_runs=True, precursor_mass_tolerance='[-10 ppm, 10 ppm]', fragment_mass_tolerance='[-20 ppm, 20 ppm]', enzyme='Trypsin', allowed_miscleavages=1, min_peptide_length=7, max_peptide_length=27, fixed_mods='cC', variable_mods='oxM', max_mods=3, min_precursor_charge=1, max_precursor_charge=6, scan_window=None, quantification_method=None, second_pass=None, protein_inference=None, predictors_library=None)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Following Github server redirection from /repos/Proteobot/Results_Module2_quant_DDA to /repositories/594032348\n",
+      "INFO:github.Requester:Following Github server redirection from /repos/Proteobot/Results_Module2_quant_DDA to /repositories/594032348\n",
+      "Following Github server redirection from /repos/Proteobot/Results_quant_ion_DDA/branches/master to /repos/Proteobot/Results_quant_ion_DDA/branches/main\n",
+      "INFO:github.Requester:Following Github server redirection from /repos/Proteobot/Results_quant_ion_DDA/branches/master to /repos/Proteobot/Results_quant_ion_DDA/branches/main\n",
+      "ERROR:root:Error in PR: 422 {\"message\": \"Validation Failed\", \"errors\": [{\"resource\": \"PullRequest\", \"code\": \"custom\", \"message\": \"A pull request already exists for Proteobot:AlphaPept_20241217_084044.\"}], \"documentation_url\": \"https://docs.github.com/rest/pulls/pulls#create-a-pull-request\", \"status\": \"422\"}\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Submitted: {'input_file': WindowsPath('extracted_files/e8e80290fb48ff02de5ee54eb6b0114ff661bace/input_file.txt'), 'param_file': WindowsPath('extracted_files/e8e80290fb48ff02de5ee54eb6b0114ff661bace/param_0.txt'), 'input_type': 'AlphaPept', 'default_cutoff_min_prec': 3, 'user_comments': ''}\n",
+      "------------------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "for submission_settings in submission_files:\n",
+    "    param_file = submission_settings[\"param_file\"]\n",
+    "    input_file = submission_settings[\"input_file\"]\n",
+    "    input_type = submission_settings[\"input_type\"]\n",
+    "    default_cutoff_min_prec = submission_settings[\"default_cutoff_min_prec\"]\n",
+    "    user_comments = submission_settings[\"user_comments\"]\n",
+    "    \n",
+    "    user_config = defaultdict(lambda: \"\")\n",
+    "\n",
+    "    results_intermediates, results_df_new, parsed_input = module_obj.benchmarking(\n",
+    "        input_file,\n",
+    "        input_type,\n",
+    "        user_config,\n",
+    "        results_df,\n",
+    "        default_cutoff_min_prec=default_cutoff_min_prec,\n",
+    "    )\n",
+    "\n",
+    "    results_df_new.tail(5)\n",
+    "    \n",
+    "    param_obj = module_obj.load_params_file(\n",
+    "        [param_file], input_type\n",
+    "    )\n",
+    "    print(param_obj)\n",
+    "\n",
+    "    pr_url = module_obj.clone_pr(\n",
+    "        results_df_new,\n",
+    "        param_obj,\n",
+    "        remote_git=\"\",\n",
+    "        submission_comments=user_comments,\n",
+    "    )\n",
+    "    \n",
+    "    print(f\"Submitted: {submission_settings}\")\n",
+    "    print(\"------------------------\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7c8b7f6f",
+   "metadata": {},
+   "outputs": [],
    "source": []
   }
  ],

From f8f5650248021af91831c3f6587aa8cb86cd9de3 Mon Sep 17 00:00:00 2001
From: RobbinBouwmeester <robbin.bouwmeester@ugent.be>
Date: Thu, 19 Dec 2024 15:49:19 +0100
Subject: [PATCH 2/2] Update .gitignore

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index 49c006d2..3a54c1d9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,8 @@ build/
 develop-eggs/
 dist/
 downloads/
+jupyter_notebooks/extracted_files/
+jupyter_notebooks/result_dir/
 eggs/
 .eggs/
 lib/