RasmussenLab · enryH · May 31, 2024 · May 31, 2024 · May 31, 2024 · May 31, 2024
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -20,13 +20,13 @@ jobs:
              "macos-13",
              # "windows-latest" # rrcovNA cannot be build from source on windows-server
              ]
-        python-version: ["3.8"]
+        python-version: ["3.8", "3.9", "3.10"]
     steps:
     - name: Checkout
       uses: actions/checkout@v4
     - name: Set up Miniconda
       # ! change action https://github.com/mamba-org/setup-micromamba
-      uses: conda-incubator/setup-miniconda@v2
+      uses: conda-incubator/setup-miniconda@v3
       with: 
         miniforge-variant: Mambaforge
         # miniforge-version: latest
@@ -82,9 +82,9 @@ jobs:
         snakemake -p -c4 -k --configfile config/single_dev_dataset/example/config.yaml
     - name: Archive results
     # https://github.com/actions/upload-artifact
-      uses: actions/upload-artifact@v3
+      uses: actions/upload-artifact@v4
       with:
-        name: example-workflow-results-${{ matrix.os }}
+        name: ${{ matrix.os }}-${{ matrix.python-version }}-example-workflow-results
         path: |
           project/runs/example/
           environment.yml
@@ -114,7 +114,6 @@ jobs:
       - name: Run pytest
         run: pytest .
 
-
   publish:
     name: Publish package
     if: startsWith(github.ref, 'refs/tags')

diff --git a/.github/workflows/ci_workflow.yaml b/.github/workflows/ci_workflow.yaml
@@ -0,0 +1,55 @@
+name: run workflow with conda envs
+on:
+  push:
+    branches: [main, dev]
+  pull_request:
+    branches: [main, dev]
+  release:
+  # schedule:
+  #   - cron: '0 2 * * 3,6'
+jobs:
+  run-integration-tests-with-conda-install:
+    runs-on: ${{ matrix.os }}
+    defaults:
+      run:
+        shell: bash -el {0}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [
+            "ubuntu-latest",
+            "macos-13",
+            # "windows-latest" # rrcovNA cannot be build from source on windows-server
+          ]
+        python-version: ["3.10"]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Set up Miniconda
+        # ! change action https://github.com/mamba-org/setup-micromamba
+        uses: conda-incubator/setup-miniconda@v3
+        with:
+          miniforge-variant: Mambaforge
+          use-mamba: true
+          channel-priority: disabled
+          python-version: ${{ matrix.python-version }}
+          environment-file: snakemake_env.yml
+          activate-environment: snakemake
+          auto-activate-base: true
+      - name: inspect-conda-environment
+        run: |
+          conda info
+          conda list
+      - name: Dry-run workflow
+        run: |
+          cd project
+          snakemake -p -c1 --configfile config/single_dev_dataset/example/config.yaml -n --use-conda
+      - name: Run demo workflow (integration test)
+        continue-on-error: true
+        run: |
+          cd project
+          snakemake -p -c4 -k --configfile config/single_dev_dataset/example/config.yaml --use-conda
+      - name: Run demo workflow again (in case of installation issues)
+        run: |
+          cd project
+          snakemake -p -c1 -k --configfile config/single_dev_dataset/example/config.yaml --use-conda
diff --git a/.github/workflows/test_pkg_on_colab.yaml b/.github/workflows/test_pkg_on_colab.yaml
@@ -0,0 +1,26 @@
+name: Test that tutorial runs on latest colab image
+
+on:
+    push:
+        branches: [dev]
+    pull_request:
+        branches: [main, dev]
+    schedule:
+        - cron: '0 2 3 * *'
+
+jobs:
+    test-tutorial-on-colab:
+        name: Test tutorial on latest colab image
+        runs-on: ubuntu-latest-4core # increase disk space  
+        # https://console.cloud.google.com/artifacts/docker/colab-images/europe/public/runtime
+        container:
+            image: europe-docker.pkg.dev/colab-images/public/runtime:latest
+        steps:
+        - uses: actions/checkout@v4
+        - name: Install pimms-learn and papermill
+          run: | 
+            python3 -m pip install pimms-learn papermill
+        - name: Run tutorial
+          run: |
+            cd project
+            papermill 04_1_train_pimms_models.ipynb 04_1_train_pimms_models_output.ipynb
diff --git a/.github/workflows/workflow_website.yaml b/.github/workflows/workflow_website.yaml
@@ -1,4 +1,4 @@
-name: Build workflow website on smaller development dataset (for protein groups)
+name: Build workflow website on public Alzheimer dataset (for protein groups)
 on:
   pull_request:
     branches: [main, dev]
@@ -29,32 +29,39 @@ jobs:
         activate-environment: vaep
         auto-activate-base: true
         # auto-update-conda: true
+    - name: Dry-run workflow
+      run: | 
+        cd project
+        snakemake -s workflow/Snakefile_v2.smk --configfile config/alzheimer_study/config.yaml -p -c1 -n
     - name: Run demo workflow (integration test)
       continue-on-error: true
       run: | 
         cd project
-        snakemake -p -c1 -n
-        snakemake -p -c4 -k
+        snakemake -s workflow/Snakefile_v2.smk --configfile config/alzheimer_study/config.yaml -p -c4 -k
     - name: Run demo workflow again (in case of installation issues)
       run: | 
         cd project
-        snakemake -p -c1 -n
-        snakemake -p -c4 -k
+        snakemake -s workflow/Snakefile_v2.smk --configfile config/alzheimer_study/config.yaml -p -c4 -k
+    - name: Run differential analysis workflow
+      run: | 
+        cd project
+        snakemake -s workflow/Snakefile_ald_comparison.smk --configfile config/alzheimer_study/comparison.yaml -p -c4
     - name: Install website dependencies
       run: |
         pip install .[docs]
     - name: Build imputation comparison website
       run: |
-        pimms-setup-imputation-comparison -f project/runs/dev_dataset_small/proteinGroups_N50/
-        cd project/runs/dev_dataset_small/proteinGroups_N50/
+        pimms-setup-imputation-comparison -f project/runs/alzheimer_study/
+        pimms-add-diff-comp -f project/runs/alzheimer_study/ -sf_cp project/runs/alzheimer_study/diff_analysis/AD
+        cd project/runs/alzheimer_study/
         sphinx-build -n --keep-going -b html ./ ./_build/
     - name: Archive results
       uses: actions/upload-artifact@v3
       with:
-        name: example-workflow-results-${{ matrix.os }}
-        path: project/runs/dev_dataset_small/proteinGroups_N50/_build/
+        name: alzheimer-study
+        path: project/runs/alzheimer_study/
     - name: Publish workflow as website
       uses: peaceiris/actions-gh-pages@v4
       with:
         github_token: ${{ secrets.GITHUB_TOKEN }}
-        publish_dir: project/runs/dev_dataset_small/proteinGroups_N50/_build/
+        publish_dir: project/runs/alzheimer_study/_build/
diff --git a/README.md b/README.md
@@ -1,24 +1,33 @@
 # PIMMS
-[![Read the Docs](https://img.shields.io/readthedocs/pimms)](https://readthedocs.org/projects/pimms/) [![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/RasmussenLab/pimms/ci.yaml)](https://github.com/RasmussenLab/pimms/actions)
+[![Read the Docs](https://img.shields.io/readthedocs/pimms)](https://readthedocs.org/projects/pimms/) [![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/RasmussenLab/pimms/ci.yaml)](https://github.com/RasmussenLab/pimms/actions) [![Documentation Status](https://readthedocs.org/projects/pimms/badge/?version=latest)](https://pimms.readthedocs.io/en/latest/?badge=latest)
 
 
 PIMMS stands for Proteomics Imputation Modeling Mass Spectrometry 
 and is a hommage to our dear British friends 
 who are missing as part of the EU for far too long already
-(Pimms is also a British summer drink).
+(Pimms is a British summer drink).
 
-The pre-print is available [on biorxiv](https://doi.org/10.1101/2023.01.12.523792).
+The publication is accepted in Nature Communications 
+and the pre-print is available [on biorxiv](https://doi.org/10.1101/2023.01.12.523792).
 
 > `PIMMS` was called `vaep` during development.  
-> Before entire refactoring has to been completed the imported package will be
-`vaep`.
+> Before entire refactoring has to been completed the imported package will be `vaep`.
 
-We provide functionality as a python package, an excutable workflow and notebooks.
+We provide functionality as a python package, an excutable workflow or simply in notebooks.
 
-The models can be used with the scikit-learn interface in the spirit of other scikit-learn imputers. You can try this in colab. [![open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/RasmussenLab/pimms/blob/HEAD/project/04_1_train_pimms_models.ipynb)
+For any questions, please [open an issue](https://github.com/RasmussenLab/pimms/issues) or contact me directly.
 
+## Getting started
 
-## Python package
+The models can be used with the scikit-learn interface in the spirit of other scikit-learn imputers. You can try this using our tutorial in colab:
+
+[![open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/RasmussenLab/pimms/blob/HEAD/project/04_1_train_pimms_models.ipynb)
+
+It uses the scikit-learn interface. The PIMMS models in the scikit-learn interface
+can be executed on the entire data or by specifying a valdiation split for checking training process.
+In our experiments overfitting wasn't a big issue, but it's easy to check.
+
+## Install Python package
 
 For interactive use of the models provided in PIMMS, you can use our
 [python package `pimms-learn`](https://pypi.org/project/pimms-learn/).
@@ -28,7 +37,7 @@ The interface is similar to scikit-learn.
 pip install pimms-learn
 ```
 
-Then you can use the models on a pandas DataFrame with missing values. Try this in the tutorial on Colab:
+Then you can use the models on a pandas DataFrame with missing values. You can try this in the tutorial on Colab by uploading your data:
 [![open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/RasmussenLab/pimms/blob/HEAD/project/04_1_train_pimms_models.ipynb)
 
 ## Notebooks as scripts using papermill
@@ -37,27 +46,71 @@ If you want to run a model on your prepared data, you can run notebooks prefixed
 `01_`, i.e. [`project/01_*.ipynb`](https://github.com/RasmussenLab/pimms/tree/HEAD/project) after cloning the repository. Using jupytext also python percentage script versions
 are saved.
 
-```
+```bash
+# navigat to your desired folder
+git clone https://github.com/RasmussenLab/pimms.git # get all notebooks
 cd project # project folder as pwd
+# pip install pimms-learn papermill # if not already installed
 papermill 01_0_split_data.ipynb --help-notebook
 papermill 01_1_train_vae.ipynb --help-notebook
 ```
+> :warning: Mistyped argument names won't throw an error when using papermill, but a warning is printed on the console thanks to my contributions:)
 
-> Mistyped argument names won't throw an error when using papermill
-
-## PIMMS comparison workflow
+## PIMMS comparison workflow and differential analysis workflow
 
 The PIMMS comparison workflow is a snakemake workflow that runs the all selected PIMMS models and R-models on 
-a user-provided dataset and compares the results. An example for the smaller HeLa development dataset on the 
+a user-provided dataset and compares the results. An example for a publickly available Alzheimer dataset on the 
 protein groups level is re-built regularly and available at: [rasmussenlab.org/pimms](https://www.rasmussenlab.org/pimms/)
 
+It is built on top of
+  - the [Snakefile_v2.smk](https://github.com/RasmussenLab/pimms/blob/HEAD/project/workflow/Snakefile_v2.smk) (v2 of imputation workflow), specified in on configuration
+  - the [Snakefile_ald_comparision](https://github.com/RasmussenLab/pimms/blob/HEAD/project/workflow/Snakefile_ald_comparison.smk) workflow for differential analysis
+
+The associated notebooks are index with `01_*` for the comparsion workflow and `10_*` for the differential analysis workflow. The `project` folder can be copied separately to any location if the package is installed. It's standalone folder. It's main folders are:
+
+```bash
+# project folder:
+project
+│   README.md # see description of notebooks and hints on execution in project folder
+|---config # configuration files for experiments ("workflows")
+|---data # data for experiments
+|---runs # results of experiments
+|---src # source code or binaries for some R packges
+|---tutorials # some tutorials for libraries used in the project
+|---workflow # snakemake workflows
+```
+
+To re-execute the entire workflow locally, have a look at the [configuration files](https://github.com/RasmussenLab/pimms/tree/HEAD/project/config/alzheimer_study) for the published Alzheimer workflow:
+
+- [`config/alzheimer_study/config.yaml`](https://github.com/RasmussenLab/pimms/blob/HEAD/project/config/alzheimer_study/comparison.yaml)
+- [`config/alzheimer_study/comparsion.yaml`](https://github.com/RasmussenLab/pimms/blob/HEAD/project/config/alzheimer_study/config.yaml)
+
+To execute that workflow, follow the Setup instructions below and run the following command in the project folder:
+
+```bash
+# being in the project folder
+snakemake -s workflow/Snakefile_v2.smk --configfile config/alzheimer_study/config.yaml -p -c1 -n # one core/process, dry-run
+snakemake -s workflow/Snakefile_v2.smk --configfile config/alzheimer_study/config.yaml -p -c2 # two cores/process, execute
+# after imputation workflow, execute the comparison workflow
+snakemake -s workflow/Snakefile_ald_comparison.smk --configfile config/alzheimer_study/comparison.yaml -p -c1
+# If you want to build the website locally: https://www.rasmussenlab.org/pimms/
+pip install .[docs]
+pimms-setup-imputation-comparison -f project/runs/alzheimer_study/
+pimms-add-diff-comp -f project/runs/alzheimer_study/ -sf_cp project/runs/alzheimer_study/diff_analysis/AD
+cd project/runs/alzheimer_study/
+sphinx-build -n --keep-going -b html ./ ./_build/
+# open ./_build/index.html
+```
+
+## Setup workflow and development environment
+
 ### Setup comparison workflow
 
 The core funtionality is available as a standalone software on PyPI under the name `pimms-learn`. However, running the entire snakemake workflow in enabled using 
 conda (or mamba) and pip to setup an analysis environment. For a detailed description of setting up
 conda (or mamba), see [instructions on setting up a virtual environment](https://github.com/RasmussenLab/pimms/blob/HEAD/docs/venv_setup.md).
 
-Download the repository
+Download the repository:
 
 ```
 git clone https://github.com/RasmussenLab/pimms.git
@@ -74,14 +127,14 @@ mamba env create -n pimms -f environment.yml # faster, less then 5mins
 
 If on Mac M1, M2 or having otherwise issue using your accelerator (e.g. GPUs): Install the pytorch dependencies first, then the rest of the environment:
 
-### Install development dependencies
+### Install pytorch first (M-chips)
 
 Check how to install pytorch for your system [here](https://pytorch.org/get-started).
 
 - select the version compatible with your cuda version if you have an nvidia gpu or a Mac M-chip.
 
 ```bash
-conda create -n vaep python=3.8 pip
+conda create -n vaep python=3.9 pip
 conda activate vaep
 # Follow instructions on https://pytorch.org/get-started 
 # conda env update -f environment.yml -n vaep # should not install the rest.
@@ -95,29 +148,17 @@ papermill 04_1_train_pimms_models.ipynb 04_1_train_pimms_models_test.ipynb # sec
 python 04_1_train_pimms_models.py # just execute the code
 ```
 
-### Entire development installation
-
-
-```bash
-conda create -n pimms_dev -c pytorch -c nvidia -c fastai -c bioconda -c plotly -c conda-forge --file requirements.txt --file requirements_R.txt --file requirements_dev.txt
-pip install -e . # other pip dependencies missing
-snakemake --configfile config/single_dev_dataset/example/config.yaml -F -n
-```
-
-or if you want to update an existing environment
+### Let Snakemake handle installation
 
+If you only want to execute the workflow, you can use snakemake to build the environments for you:
 
-```
-conda update  -c defaults -c conda-forge -c fastai -c bioconda -c plotly --file requirements.txt --file requirements_R.txt --file requirements_dev.txt
-```
+> Snakefile workflow for imputation v1 only support that atm.
 
-or using the environment.yml file (can fail on certain systems)
-
-```
-conda env create -f environment.yml
+```bash
+snakemake -p -c1 --configfile config/single_dev_dataset/example/config.yaml --use-conda -n # dry-run
+snakemake -p -c1 --configfile config/single_dev_dataset/example/config.yaml --use-conda # execute with one core
 ```
 
-
 ### Troubleshooting
 
 Trouble shoot your R installation by opening jupyter lab
@@ -127,16 +168,16 @@ Trouble shoot your R installation by opening jupyter lab
 jupyter lab # open 01_1_train_NAGuideR.ipynb
 ```
 
-## Run an analysis
+## Run example
 
 Change to the [`project` folder](./project) and see it's [README](project/README.md)
-You can subselect models by editing the config file:  [`config.yaml`](project/config/single_dev_dataset/proteinGroups_N50/config.yaml) file.
+You can subselect models by editing the config file:  [`config.yaml`](https://github.com/RasmussenLab/pimms/tree/HEAD/project/config/single_dev_dataset/proteinGroups_N50) file.
 
 ```
 conda activate pimms # activate virtual environment
 cd project # go to project folder
 pwd # so be in ./pimms/project
-snakemake -c1 -p -n # dryrun demo workflow
+snakemake -c1 -p -n # dryrun demo workflow, potentiall add --use-conda
 snakemake -c1 -p
 ```
 
@@ -228,7 +269,3 @@ From the brief description in the table the exact procedure is not always clear.
 | MSIMPUTE_MNAR | msImpute          | BIOCONDUCTOR | | Missing not at random algorithm using low rank approximation
 | ~~grr~~       | DreamAI           | -            | Fails to install | Rigde regression 
 | ~~GMS~~       | GMSimpute         | tar file     | Fails on Windows | Lasso model
-
-
-## Build status
-[![Documentation Status](https://readthedocs.org/projects/pimms/badge/?version=latest)](https://pimms.readthedocs.io/en/latest/?badge=latest)