From 05eb01cabd9c0e19ad070c9cc379d6992d6afc64 Mon Sep 17 00:00:00 2001 From: Henry Webel Date: Tue, 28 May 2024 13:46:04 +0000 Subject: [PATCH 01/11] :test_tube: add failing tutorial execution KeyError: 'array'? (Python 3.10) --- .github/workflows/test_tutorial.yaml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 .github/workflows/test_tutorial.yaml diff --git a/.github/workflows/test_tutorial.yaml b/.github/workflows/test_tutorial.yaml new file mode 100644 index 0000000..dd3091b --- /dev/null +++ b/.github/workflows/test_tutorial.yaml @@ -0,0 +1,27 @@ +name: Test tutorial + +on: + - push + +jobs: + format: + name: Check tutorial + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install Claster requirements + run: | + pip install numpy pandas matplotlib jupyter ipykernel eir-dl + - name: Run papermill for cmd-line execution of notebooks + run: | + pip install papermill + - name: Test Tutorial + run: | + cd scripts + papermill 0_Tutorial.ipynb 0_Tutorial_out.ipynb + From 522b5198da5abe4dc542f3f436e90a70b0948688 Mon Sep 17 00:00:00 2001 From: Henry Webel Date: Tue, 28 May 2024 19:32:47 +0000 Subject: [PATCH 02/11] :art: speed up tutorial execution in CI, add some hyperlinks --- .github/workflows/test_tutorial.yaml | 2 +- scripts/0_Tutorial.ipynb | 39 ++++++++++++++++++---------- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/.github/workflows/test_tutorial.yaml b/.github/workflows/test_tutorial.yaml index dd3091b..12e1d12 100644 --- a/.github/workflows/test_tutorial.yaml +++ b/.github/workflows/test_tutorial.yaml @@ -23,5 +23,5 @@ jobs: - name: Test Tutorial run: | cd scripts - papermill 0_Tutorial.ipynb 0_Tutorial_out.ipynb + papermill 0_Tutorial.ipynb 0_Tutorial_out.ipynb -p epochs 4 diff --git a/scripts/0_Tutorial.ipynb b/scripts/0_Tutorial.ipynb index b0cb135..c3a52f9 100644 --- a/scripts/0_Tutorial.ipynb +++ b/scripts/0_Tutorial.ipynb @@ -9,13 +9,13 @@ "\n", "*Authors:* \n", "\n", - "Marc Pielies Avelli (marc.pielies-avelli@cpr.ku.dk, mpielies@broadinstitute.org )\n", + "Marc Pielies Avelli ([marc.pielies-avelli@cpr.ku.dk](mailto:marc.pielies-avelli@cpr.ku.dk), [mpielies@broadinstitute.org](mailto:mpielies@broadinstitute.org) )\n", "\n", - "Arnor Sigurdsson (arnor.sigurdsson@sund.ku.dk, asigurds@broadinstitute.org)\n", + "Arnor Sigurdsson ([arnor.sigurdsson@sund.ku.dk](mailto:arnor.sigurdsson@sund.ku.dk), [asigurds@broadinstitute.org](mailto:asigurds@broadinstitute.org))\n", "\n", - "Henry Webel (henry.webel@sund.ku.dk)\n", + "Henry Webel ([henry.webel@sund.ku.dk](mailto:henry.webel@sund.ku.dk))\n", " \n", - "Simon Rasmussen (srasmuss@sund.ku.dk, srasmuss@broadinstitute.org)\n", + "Simon Rasmussen ([srasmuss@sund.ku.dk](mailto:srasmuss@sund.ku.dk), [srasmuss@broadinstitute.org](mailto:srasmuss@broadinstitute.org))\n", "\n", "## Introduction ##\n", "\n", @@ -47,9 +47,7 @@ "- **Fusion module**: The extracted high-level, abstract features are then combined in a number of dense layers.\n", "- **Output module**: A final set of dense layers maps the feature vectors to the targets in a regression task per node, which will represent the EU-seq signal at a distance from the TSS of the gene defining the sample (located in the middle).\n", "\n", - "CLASTER was built using the EIR framework, a python package developed by Arnor Sigurdsson (arnor.sigurdsson@sund.ku.dk) which makes it easy to replicate and adapt it to new tasks. Documentation on EIR can be found at https://eir.readthedocs.io/en/latest/. Have a look at the tutorials to get a feel for the config files required and all possibilities that EIR offers. The framework uses hydra (https://hydra.cc/docs/intro/) to manage a set of configuration files, which allow you to streamline the process.\n", - "\n", - "\n" + "CLASTER was built using the EIR framework, a python package developed by Arnor Sigurdsson ([arnor.sigurdsson@sund.ku.dk](mailto:arnor.sigurdsson@sund.ku.dk)) which makes it easy to replicate and adapt it to new tasks. Documentation on EIR can be found at [eir.readthedocs.io](https://eir.readthedocs.io/en/latest/). Have a look at the tutorials to get a feel for the config files required and all possibilities that EIR offers. The framework uses hydra (https://hydra.cc/docs/intro/) to manage a set of configuration files, which allow you to streamline the process." ] }, { @@ -812,6 +810,20 @@ "## 2. Training and validating CLASTER" ] }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "tags": [ + "Parameters", + "parameters" + ] + }, + "outputs": [], + "source": [ + "epochs: int = 120" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -845,11 +857,11 @@ "output_path.mkdir(parents=True, exist_ok=True)\n", "\n", "# Write config files explicitly:\n", - "training_microc_rotated_yaml_contents = {\"globals.yaml\":\"\"\"\n", + "training_microc_rotated_yaml_contents = {\"globals.yaml\": f\"\"\"\n", "output_folder: ../runs_tutorial/gene_expression_microc_rotated_pure_conv_tutorial/ \n", - "checkpoint_interval: 860\n", - "sample_interval: 860 \n", - "n_epochs: 120\n", + "checkpoint_interval: {int(860 /120 * epochs)}\n", + "sample_interval: {int(860 /120 * epochs)} \n", + "n_epochs: {epochs}\n", "batch_size: 64\n", "optimizer: \"adamw\"\n", "lr: 0.0001 \n", @@ -1570,7 +1582,8 @@ "results_path = Path(\"../runs_tutorial/gene_expression_microc_rotated_pure_conv_tutorial/results/\")\n", "N_BINS= 200\n", "condition_list = [\"_ctrl\"]\n", - "SPLIT = 4300 #860\n", + "SPLIT = int(860 /120 * epochs)\n", + "# SPLIT = 4300 # for 860 samples, 120 epochs\n", "\n", "ids, predicted, actual = _get_predictions(results_path, N_BINS, condition_list, SPLIT)\n", "\n", @@ -1597,7 +1610,7 @@ "\n", "CLASTER was designed to predict nascent transcription, but the EIR framework can be reused and expanded for any other purpose. It can handle plenty of other data modalities as well! You just need a well defined input containing some information or signal that can be used to infer the values of another, as an output. You can then customize your network using EIR, provide the inputs and targets in one of the supported data modalities, and name samples with the same ID for input and output.\n", "\n", - "If in doubt, feel free to reach us!\n" + "If in doubt, feel free to reach out to us!\n" ] } ], From ff83c17bfd72a3916b2b0c5fda83281c5d712c8f Mon Sep 17 00:00:00 2001 From: Henry Webel Date: Tue, 28 May 2024 19:35:22 +0000 Subject: [PATCH 03/11] :memo: specify minimum eir-dl version (to catch the wrong python version) - 3.12 does not yet seem to work either --- scripts/0_Tutorial.ipynb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/0_Tutorial.ipynb b/scripts/0_Tutorial.ipynb index c3a52f9..390d244 100644 --- a/scripts/0_Tutorial.ipynb +++ b/scripts/0_Tutorial.ipynb @@ -58,17 +58,17 @@ "\n", ">💻 **Create an environment for the project:**\n", ">The following steps will be performed from the terminal, and once the environment is set up we will run everything else from this notebook.\n", - ">We will first create an environment for this project, where we will install all the required dependencies. The python package EIR, which is the core framework used to build, train and test CLASTER, will need python >= 3.11. \n", + ">We will first create an environment for this project, where we will install all the required dependencies. The python package EIR, which is the core framework used to build, train and test CLASTER, will need `python >= 3.11`. \n", ">If you have anaconda, the environment can be created from the terminal by typing:\n", ">\n", ">```bash\n", - ">conda create -n claster_env_tutorial python=3.11 -y #Create environment\n", - ">conda activate claster_env_tutorial #Activate it\n", + ">conda create -n claster_env_tutorial python=3.11 pip -y # Create environment\n", + ">conda activate claster_env_tutorial # Activate it\n", ">```\n", "> We can now install all the required packages by typing:\n", ">\n", ">``` bash\n", - ">pip install numpy pandas matplotlib jupyter ipykernel eir-dl #Install these packages to run the notebook\n", + ">pip install numpy pandas matplotlib jupyter ipykernel eir-dl>=0.1.43 # Install these packages to run the notebook\n", ">```\n", "> We ran this tutorial on eir version 0.1.43." ] From 7cd797982ebf5673958d7e09486449f5e5728a07 Mon Sep 17 00:00:00 2001 From: Henry Webel Date: Wed, 29 May 2024 12:22:30 +0000 Subject: [PATCH 04/11] :sparkles: build website based on notebooks for documentation --- .github/workflows/test_tutorial.yaml | 25 ++++++- docs/conf.py | 98 ++++++++++++++++++++++++++++ docs/index.rst | 14 ++++ docs/nbs/0_Tutorial.ipynb | 1 + 4 files changed, 135 insertions(+), 3 deletions(-) create mode 100644 docs/conf.py create mode 100644 docs/index.rst create mode 120000 docs/nbs/0_Tutorial.ipynb diff --git a/.github/workflows/test_tutorial.yaml b/.github/workflows/test_tutorial.yaml index 12e1d12..173bbaf 100644 --- a/.github/workflows/test_tutorial.yaml +++ b/.github/workflows/test_tutorial.yaml @@ -1,10 +1,10 @@ -name: Test tutorial +name: Test tutorial and publish website on: - push jobs: - format: + test_tutorial: name: Check tutorial runs-on: ubuntu-latest steps: @@ -24,4 +24,23 @@ jobs: run: | cd scripts papermill 0_Tutorial.ipynb 0_Tutorial_out.ipynb -p epochs 4 - + website: + name: Publish notebooks as website + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Install dependencies + run: | + pip install sphinx sphinx-book-theme myst-nb + - name: Build website + run: | + cd docs + sphinx-build -n --keep-going -b html ./ ./_build/ + - name: Publish workflow as website + uses: peaceiris/actions-gh-pages@v4 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: docs/_build diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..0829eca --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,98 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. + +# -- Project information ----------------------------------------------------- + +project = 'CLASTER' +copyright = '2022, Marc Pielies Avelli' +author = 'Marc Pielies Avelli' +version = '2024.05.29' + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.autodoc.typehints', + 'sphinx.ext.viewcode', + 'sphinx.ext.intersphinx', + 'myst_nb', + 'sphinx.ext.napoleon', + # 'sphinx_new_tab_link', +] + +# https://myst-nb.readthedocs.io/en/latest/computation/execute.html +nb_execution_mode = "off" + +myst_enable_extensions = ["dollarmath", "amsmath"] + +# Plolty support through require javascript library +# https://myst-nb.readthedocs.io/en/latest/render/interactive.html#plotly +# html_js_files = ["https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"] + +# https://myst-nb.readthedocs.io/en/latest/configuration.html +# Execution +nb_execution_raise_on_error = True +# Rendering +nb_merge_streams = True + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ['_build', 'Thumbs.db', 'jupyter_execute','.DS_Store'] + + +# Intersphinx options +intersphinx_mapping = { + "python": ("https://docs.python.org/3", None), + "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), + "scikit-learn": ("https://scikit-learn.org/stable/", None), + "matplotlib": ("https://matplotlib.org/stable/", None), +} + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# See: +# https://github.com/executablebooks/MyST-NB/blob/master/docs/conf.py +html_title = "CLASTER" +html_theme = "sphinx_book_theme" +# html_logo = "_static/logo-wide.svg" +# html_favicon = "_static/logo-square.svg" +html_theme_options = { + "github_url": "https://github.com/RasmussenLab/CLASTER", + "repository_url": "https://github.com/RasmussenLab/CLASTER", + "repository_branch": "master", + "home_page_in_toc": True, + "path_to_docs": "docs", + "show_navbar_depth": 1, + "use_edit_page_button": True, + "use_repository_button": True, + "use_download_button": True, + "launch_buttons": { + "colab_url": "https://colab.research.google.com" + # "binderhub_url": "https://mybinder.org", + # "notebook_interface": "jupyterlab", + }, + "navigation_with_keys": False, +} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +# html_static_path = ["_static"] diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..8ec5567 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,14 @@ +Claster +======= +Modeling nascent RNA transcription from chromatin landscape and structure + + +.. include:: ../Readme.md + :parser: myst_parser.sphinx_ + :start-line: 3 + +.. toctree:: + :maxdepth: 2 + :caption: Tutorial: + + nbs/0_Tutorial.ipynb diff --git a/docs/nbs/0_Tutorial.ipynb b/docs/nbs/0_Tutorial.ipynb new file mode 120000 index 0000000..5c3ed8f --- /dev/null +++ b/docs/nbs/0_Tutorial.ipynb @@ -0,0 +1 @@ +../../scripts/0_Tutorial.ipynb \ No newline at end of file From af5e573e689c1dd76702eceafc92be77356996e2 Mon Sep 17 00:00:00 2001 From: Henry Webel Date: Wed, 29 May 2024 13:42:28 +0000 Subject: [PATCH 05/11] :art: specify headlines (in hierarchy) --- Readme.md | 21 +++++++++------------ scripts/0_Tutorial.ipynb | 11 ++++++++--- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/Readme.md b/Readme.md index 41cf60c..a2a6577 100644 --- a/Readme.md +++ b/Readme.md @@ -1,31 +1,31 @@ #
CLASTER ##
Modeling nascent RNA transcription from chromatin landscape and structure
-**Abstract** +## Abstract _Different cell types and their associated functionalities can emerge from a single genomic sequence when certain regions are expressed while others remain silenced. The study of gene regulation and its potential malfunctioning in different cellular contexts is hence pivotal to understand both development and disease. We present the Chromatin Landscape and Structure to Expression Regressor (CLASTER), an epigenetic-based deep neural network that can integrate different data modalities describing the chromatin landscape and its 3D structure in their raw format. CLASTER effectively translates them into nascent transcription levels measured by EU-seq at a kilobasepair resolution. Our predictions reached a Pearson correlation with targets above r=0.86 at both bin and gene levels, without relying on DNA sequence nor explicitly extracted chromatin features. The model mostly used the information found within 10 kbp of the predicted locus to perform the predictions, even when a wide genomic region of 1 Mbp was available. Explicit modeling of long-range interactions using multi-headed attention and high-resolution chromatin contact maps had little impact on model performance, despite the model correctly identifying elements in these inputs influencing nascent transcription. The trained model then served as a platform to predict the transcriptional impact of simulated epigenetic silencing perturbations. Our results point towards a rather local, integrative and combinatorial paradigm of gene regulation, where changes in the chromatin environment surrounding a gene shape its context-specific transcription. We conclude that the predominant locality and limitations of current machine learning approaches might emerge as a genuine signature of genomic organization, having broad implications for future modeling approaches._ -![Claster image](./images/Claster_image.png) +![Claster image](https://raw.githubusercontent.com/RasmussenLab/CLASTER/master/images/Claster_image.png) -**CLASTER overview:** CLASTER integrates the chromatin landscape (accessibility, promoter and enhancer activities and chromatin silencing) and structure (Micro-C) to predict nascent transcription levels measured by EU-seq. +**CLASTER overview** CLASTER integrates the chromatin landscape (accessibility, promoter and enhancer activities and chromatin silencing) and structure (Micro-C) to predict nascent transcription levels measured by EU-seq. ## In this repository This repository contains the files and scripts required to reproduce the results of the paper and a short tutorial. The repository consists of the following folders: -```configurations```: +### `configurations` - Configuration files (.yaml) required to build different flavours of CLASTER. -```images```: +### `images` - Overview of CLASTER's architecture. -```inputs```: +### `inputs` The folder contains the test set inputs for both data modalities, i.e. samples exploring regions of 1 Mbp centered at the TSS of protein coding genes found in chr4 (in mice). They will be used in the tutorial to exemplify how can we train and validate CLASTER. -```scripts```: +### `scripts` -- [`0_Tutorial.ipynb`](scripts/0_Tutorial.ipynb): The notebook provides a rapid overview of the most important steps in CLASTER's pipeline, including training and validating the network using the EIR framework. +- [`0_Tutorial.ipynb`](https://github.com/RasmussenLab/CLASTER/blob/master/scripts/0_Tutorial.ipynb): The notebook provides a rapid overview of the most important steps in CLASTER's pipeline, including training and validating the network using the EIR framework. - `1_Data_obtention.ipynb`: This notebook guides the user through the data obtention process, including: - Data download from publicly available repositories: - Inputs: Chromatin landscape (ATAC-seq, H3K4me3, H3K27ac and H3K27me3 in mESCs) and structure (Micro-C maps in mESCs) @@ -45,9 +45,6 @@ These were used to benchmark CLASTER. It includes: - Code to fine-tune Hyena-DNA's backbone and the added head together. - `3_Data_analysis.ipynb`: The notebook contains the functions used to perform the data analysis and create the figures included in the manuscript. -```targets```: +### `targets` The folder contains the target EU-seq profiles matching the input (test) samples. - - - diff --git a/scripts/0_Tutorial.ipynb b/scripts/0_Tutorial.ipynb index 390d244..a8c0f54 100644 --- a/scripts/0_Tutorial.ipynb +++ b/scripts/0_Tutorial.ipynb @@ -4,8 +4,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#
TUTORIAL
\n", - "#
_Training and validating CLASTER_
\n", + "# TUTORIAL\n", + "**_Training and validating CLASTER_**\n", "\n", "*Authors:* \n", "\n", @@ -100,7 +100,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# 1. Inputs and outputs\n", + "## 1. Inputs and outputs\n", "The obtention of inputs and outputs from publicly available sources is detailed in notebook ```1_Data_obtention.ipynb```. In this tutorial we will however provide you with the already created inputs and targets for all samples in the test set.\n", "\n", "Input samples and their matching targets are named after the ENSEMBL ID code for the protein coding gene located at the center of the region of interest. We kept the orientation of the genes, and hence the EU-seq signal can go both towards the right or towards the left. \n", @@ -1612,6 +1612,11 @@ "\n", "If in doubt, feel free to reach out to us!\n" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] } ], "metadata": { From bd0ad14a9e6fb8fc1ccbf2befc72bcc50924dd6b Mon Sep 17 00:00:00 2001 From: Henry Webel Date: Wed, 29 May 2024 13:55:18 +0000 Subject: [PATCH 06/11] :bug: avoid symlinks - otherwise colab link does not work --- .github/workflows/test_tutorial.yaml | 3 +-- docs/conf.py => conf.py | 2 +- docs/nbs/0_Tutorial.ipynb | 1 - docs/index.rst => index.rst | 3 ++- 4 files changed, 4 insertions(+), 5 deletions(-) rename docs/conf.py => conf.py (99%) delete mode 120000 docs/nbs/0_Tutorial.ipynb rename docs/index.rst => index.rst (88%) diff --git a/.github/workflows/test_tutorial.yaml b/.github/workflows/test_tutorial.yaml index 173bbaf..9f16bba 100644 --- a/.github/workflows/test_tutorial.yaml +++ b/.github/workflows/test_tutorial.yaml @@ -37,10 +37,9 @@ jobs: pip install sphinx sphinx-book-theme myst-nb - name: Build website run: | - cd docs sphinx-build -n --keep-going -b html ./ ./_build/ - name: Publish workflow as website uses: peaceiris/actions-gh-pages@v4 with: github_token: ${{ secrets.GITHUB_TOKEN }} - publish_dir: docs/_build + publish_dir: _build diff --git a/docs/conf.py b/conf.py similarity index 99% rename from docs/conf.py rename to conf.py index 0829eca..917af33 100644 --- a/docs/conf.py +++ b/conf.py @@ -70,7 +70,7 @@ # a list of builtin themes. # See: # https://github.com/executablebooks/MyST-NB/blob/master/docs/conf.py -html_title = "CLASTER" +html_title = "" html_theme = "sphinx_book_theme" # html_logo = "_static/logo-wide.svg" # html_favicon = "_static/logo-square.svg" diff --git a/docs/nbs/0_Tutorial.ipynb b/docs/nbs/0_Tutorial.ipynb deleted file mode 120000 index 5c3ed8f..0000000 --- a/docs/nbs/0_Tutorial.ipynb +++ /dev/null @@ -1 +0,0 @@ -../../scripts/0_Tutorial.ipynb \ No newline at end of file diff --git a/docs/index.rst b/index.rst similarity index 88% rename from docs/index.rst rename to index.rst index 8ec5567..9daec87 100644 --- a/docs/index.rst +++ b/index.rst @@ -11,4 +11,5 @@ Modeling nascent RNA transcription from chromatin landscape and structure :maxdepth: 2 :caption: Tutorial: - nbs/0_Tutorial.ipynb + scripts/0_Tutorial.ipynb + From 43b3539e293b57938df2ea7a0c9a548d8c6540a0 Mon Sep 17 00:00:00 2001 From: Henry Webel Date: Wed, 29 May 2024 14:01:13 +0000 Subject: [PATCH 07/11] :bug: remove path to docs folder --- conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf.py b/conf.py index 917af33..9ddc366 100644 --- a/conf.py +++ b/conf.py @@ -79,7 +79,7 @@ "repository_url": "https://github.com/RasmussenLab/CLASTER", "repository_branch": "master", "home_page_in_toc": True, - "path_to_docs": "docs", + # "path_to_docs": "", "show_navbar_depth": 1, "use_edit_page_button": True, "use_repository_button": True, From e4a244ec2b9713db0ce1ddca0d56efb37643baea Mon Sep 17 00:00:00 2001 From: Henry Webel Date: Wed, 29 May 2024 14:11:46 +0000 Subject: [PATCH 08/11] :sparkles: add other notebooks (scripts) --- conf.py | 3 ++- index.rst | 8 ++++++++ scripts/0_Tutorial.ipynb | 2 +- scripts/1_Data_obtention.ipynb | 6 +++--- scripts/2_Run_CLASTER.ipynb | 2 +- scripts/2b_Run_HyenaDNA_and_Enformer.ipynb | 2 +- scripts/3_Data_analysis.ipynb | 2 +- 7 files changed, 17 insertions(+), 8 deletions(-) diff --git a/conf.py b/conf.py index 9ddc366..4f9c87c 100644 --- a/conf.py +++ b/conf.py @@ -16,6 +16,7 @@ copyright = '2022, Marc Pielies Avelli' author = 'Marc Pielies Avelli' version = '2024.05.29' +release = '2024.05.29' # -- General configuration --------------------------------------------------- @@ -70,7 +71,7 @@ # a list of builtin themes. # See: # https://github.com/executablebooks/MyST-NB/blob/master/docs/conf.py -html_title = "" +html_title = "CLASTER" html_theme = "sphinx_book_theme" # html_logo = "_static/logo-wide.svg" # html_favicon = "_static/logo-square.svg" diff --git a/index.rst b/index.rst index 9daec87..789cd8b 100644 --- a/index.rst +++ b/index.rst @@ -13,3 +13,11 @@ Modeling nascent RNA transcription from chromatin landscape and structure scripts/0_Tutorial.ipynb +.. toctree:: + :maxdepth: 2 + :caption: Scripts: + + scripts/1_Data_obtention.ipynb + scripts/2_Run_CLASTER.ipynb + scripts/2b_Run_HyenaDNA_and_Enformer.ipynb + scripts/3_Data_analysis.ipynb \ No newline at end of file diff --git a/scripts/0_Tutorial.ipynb b/scripts/0_Tutorial.ipynb index a8c0f54..aa90d09 100644 --- a/scripts/0_Tutorial.ipynb +++ b/scripts/0_Tutorial.ipynb @@ -1635,7 +1635,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.undefined.undefined" }, "orig_nbformat": 4, "vscode": { diff --git a/scripts/1_Data_obtention.ipynb b/scripts/1_Data_obtention.ipynb index 3c86d43..7409fc5 100644 --- a/scripts/1_Data_obtention.ipynb +++ b/scripts/1_Data_obtention.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#
1. DATA OBTENTION & PREPROCESSING
\n", + "# 1. DATA OBTENTION & PREPROCESSING\n", "\n", "Hi!\n", "This notebook is a guide to obtain and preprocess the data used to train and test CLASTER. It is meant to be run sequentially, i.e. one cell after the other, and it mainly contains:\n", @@ -18,7 +18,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Before we start ...\n", + "## Before we start ...\n", "\n", "A) It would be highly recommendable to create an environment for this project. The python package EIR, which is the core framework used to build, train and test CLASTER, will need python >= 3.11. If you have anaconda, it can be done as follows from the terminal:\n", "\n", @@ -69,7 +69,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# 1. Data obtention" + "## 1. Data obtention" ] }, { diff --git a/scripts/2_Run_CLASTER.ipynb b/scripts/2_Run_CLASTER.ipynb index 9e597fe..b93f142 100644 --- a/scripts/2_Run_CLASTER.ipynb +++ b/scripts/2_Run_CLASTER.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#
2. CREATE & RUN CLASTER
" + "# 2. CREATE & RUN CLASTER" ] }, { diff --git a/scripts/2b_Run_HyenaDNA_and_Enformer.ipynb b/scripts/2b_Run_HyenaDNA_and_Enformer.ipynb index d66548a..f1412af 100644 --- a/scripts/2b_Run_HyenaDNA_and_Enformer.ipynb +++ b/scripts/2b_Run_HyenaDNA_and_Enformer.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#
2b. BENCHMARKING CLASTER WITH HYENA-DNA AND ENFORMER
" + "# 2b. BENCHMARKING CLASTER WITH HYENA-DNA AND ENFORMER" ] }, { diff --git a/scripts/3_Data_analysis.ipynb b/scripts/3_Data_analysis.ipynb index 615755d..94966ea 100644 --- a/scripts/3_Data_analysis.ipynb +++ b/scripts/3_Data_analysis.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#
DATA ANALYSIS NOTEBOOK
\n", + "# DATA ANALYSIS NOTEBOOK\n", "\n", "This notebook was used to run the analyses and produce most of the images in the paper.\n", "\n", From bb270c722170b8e2db8c4c0718b062195769998b Mon Sep 17 00:00:00 2001 From: Henry Webel Date: Wed, 29 May 2024 14:23:31 +0000 Subject: [PATCH 09/11] :art: remove centering - just does not look good in table of contents --- scripts/2_Run_CLASTER.ipynb | 6 +++--- scripts/3_Data_analysis.ipynb | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/2_Run_CLASTER.ipynb b/scripts/2_Run_CLASTER.ipynb index b93f142..a41227f 100644 --- a/scripts/2_Run_CLASTER.ipynb +++ b/scripts/2_Run_CLASTER.ipynb @@ -5617,7 +5617,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##
Training CLASTER
\n", + "## Training CLASTER\n", "The model can be trained by running the following command:\n", "\n", "```bash\n", @@ -5689,7 +5689,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##
Testing CLASTER
\n", + "## Testing CLASTER\n", "\n", "> _Note:_ \n", ">\n", @@ -5776,7 +5776,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##
Predicting _in silico_ perturbed chromatin landscapes
\n", + "## Predicting _in silico_ perturbed chromatin landscapes\n", "\n", ">Note: Make sure to create the output folder beforehand: ./runs/perturbation_runs/gene_expression_only_chrom_pure_conv/" ] diff --git a/scripts/3_Data_analysis.ipynb b/scripts/3_Data_analysis.ipynb index 94966ea..2aebdbe 100644 --- a/scripts/3_Data_analysis.ipynb +++ b/scripts/3_Data_analysis.ipynb @@ -958,7 +958,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##
Model performance on the baseline prediction task
\n", + "## Model performance on the baseline prediction task\n", "**Figure 1 and related supplementary figures**\n", "\n", "> Note: ⏰ This took 42 min when analysing all genes and enhancers in chr4:\n", @@ -1256,7 +1256,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##
_In silico_ perturbations
\n", + "## _In silico_ perturbations\n", "\n", "**Figure 2 and related supplementary figures**\n", "\n" @@ -1962,7 +1962,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#
Attributions
\n", + "# Attributions\n", "\n", "Now we will compute the attribution scores for every input position (4,10.001) towards every output (401). This mechanism tells how important is every input position (i.e. a given bin in each input track in the landscape or a given bin in a Micro-C matrix) for the prediction of the EU-seq levels at the position described by a given target node (401 targets). It also tells us the direction of the association.\n", "The original paper can be found at:\n", From b428c2ab42e44a5efdd29d32e96f39fdee321c4f Mon Sep 17 00:00:00 2001 From: Henry Webel Date: Wed, 29 May 2024 14:41:53 +0000 Subject: [PATCH 10/11] :art: collapse most code cell - adding "hide-input" tag to cell using jupytext --- scripts/0_Tutorial.ipynb | 60 ++++++++--- scripts/1_Data_obtention.ipynb | 103 +++++++++++++++---- scripts/2_Run_CLASTER.ipynb | 28 ++++-- scripts/2b_Run_HyenaDNA_and_Enformer.ipynb | 22 +++- scripts/3_Data_analysis.ipynb | 112 +++++++++++++++++---- 5 files changed, 260 insertions(+), 65 deletions(-) diff --git a/scripts/0_Tutorial.ipynb b/scripts/0_Tutorial.ipynb index aa90d09..0fa3f9d 100644 --- a/scripts/0_Tutorial.ipynb +++ b/scripts/0_Tutorial.ipynb @@ -83,7 +83,11 @@ { "cell_type": "code", "execution_count": 3, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [], "source": [ "# Import the required packages\n", @@ -118,7 +122,11 @@ { "cell_type": "code", "execution_count": 1, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [], "source": [ "def visualize_input_array(a,\n", @@ -246,7 +254,12 @@ { "cell_type": "code", "execution_count": 59, - "metadata": {}, + "metadata": { + "lines_to_next_cell": 2, + "tags": [ + "hide-input" + ] + }, "outputs": [ { "data": { @@ -309,7 +322,7 @@ " scaling_factor,\n", " track_dict)\n", " fig.show()\n", - " plot_microc_original_and_rotated(pathlist, num_bins, window_of_observation, name=f\"{sample_name}_{orientation}.npy\")\n" + " plot_microc_original_and_rotated(pathlist, num_bins, window_of_observation, name=f\"{sample_name}_{orientation}.npy\")" ] }, { @@ -327,7 +340,11 @@ { "cell_type": "code", "execution_count": 60, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [ { "data": { @@ -762,7 +779,11 @@ { "cell_type": "code", "execution_count": 64, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [ { "name": "stdout", @@ -844,7 +865,11 @@ { "cell_type": "code", "execution_count": 5, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [], "source": [ "# Create folder to store configuration files\n", @@ -1430,7 +1455,11 @@ { "cell_type": "code", "execution_count": 22, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [], "source": [ "#### Functions:\n", @@ -1541,7 +1570,7 @@ " fig.legend(bbox_to_anchor=(.48,.8), fancybox=True, shadow=False, fontsize=7)\n", "\n", " return fig\n", - " " + " \n" ] }, { @@ -1555,7 +1584,12 @@ { "cell_type": "code", "execution_count": 23, - "metadata": {}, + "metadata": { + "lines_to_next_cell": 2, + "tags": [ + "hide-input" + ] + }, "outputs": [ { "data": { @@ -1597,7 +1631,7 @@ " line_a = actual.loc[f\"{sample_name}_forward\"].values\n", "\n", " fig = plot_target_predictions(line_p, line_a)\n", - " fig.show()\n" + " fig.show()" ] }, { @@ -1620,6 +1654,9 @@ } ], "metadata": { + "jupytext": { + "formats": "ipynb,py:percent" + }, "kernelspec": { "display_name": "Python 3.11.9 ('claster_env_tutorial')", "language": "python", @@ -1637,7 +1674,6 @@ "pygments_lexer": "ipython3", "version": "3.undefined.undefined" }, - "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "12187645b0fe86aeeeb7c7a27e94503590520c2ee4730b6cfd81a67885c82988" diff --git a/scripts/1_Data_obtention.ipynb b/scripts/1_Data_obtention.ipynb index 7409fc5..53de9db 100644 --- a/scripts/1_Data_obtention.ipynb +++ b/scripts/1_Data_obtention.ipynb @@ -82,7 +82,11 @@ { "cell_type": "code", "execution_count": 1, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [], "source": [ "import os \n", @@ -107,7 +111,11 @@ { "cell_type": "code", "execution_count": 2, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [], "source": [ "def download_files(links, destination_folder):\n", @@ -130,7 +138,8 @@ " wget.download(link, out=output_path)\n", " print(f\"\\nSuccessfully downloaded {name} to {destination_folder}\")\n", " except Exception as e:\n", - " print(f\"\\nFailed to download {link} because : {e}\")\n" + " print(f\"\\nFailed to download {link} because : {e}\")\n", + "\n" ] }, { @@ -146,7 +155,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "lines_to_next_cell": 2 + }, "outputs": [], "source": [ "\n", @@ -178,7 +189,7 @@ " if gene_type == '\"protein_coding\"' and chrom !='chrM' : # or gene_type == '\"lincRNA\"':\n", " final_df.loc[id[1:-1]] = [chrom,start,end,strand,name[1:-1], gene_type[1:-1]]\n", "\n", - "final_df.to_csv(outfile, sep='\\t')\n" + "final_df.to_csv(outfile, sep='\\t')" ] }, { @@ -252,7 +263,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "lines_to_next_cell": 2, + "tags": [ + "hide-input" + ] + }, "outputs": [], "source": [ "%%writefile create_microC_arrays.py\n", @@ -390,13 +406,17 @@ "min_dataset, max_dataset = find_dataset_min_max(microc_path)\n", "print(\"Log 10 (min)=\",np.log10(min_dataset), \"Max:\", max_dataset)\n", "\n", - "create_Micro_C_arrays(gene_annotations_path, microc_path, savepath , imputation_value = min_dataset)\n" + "create_Micro_C_arrays(gene_annotations_path, microc_path, savepath , imputation_value = min_dataset)" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [], "source": [ "%%writefile create_microC_arrays.py\n", @@ -545,7 +565,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "lines_to_next_cell": 2 + }, "outputs": [], "source": [ "links = {\"EU_Seq_Ctrl.bw\":\"https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE146326&format=file&file=GSE146326%5FEUSeq%5FmESC%5FA485%5FTimecourse%5F0min%2En2%2Esmooth%2Ebw\", #p300 paper\n", @@ -558,7 +580,7 @@ "\n", "savepath = Path(\"../GEO_files/\")\n", "\n", - "download_files(links, savepath)\n" + "download_files(links, savepath)" ] }, { @@ -583,7 +605,11 @@ { "cell_type": "code", "execution_count": 3, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [ { "name": "stdout", @@ -820,7 +846,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [], "source": [ "%%writefile keep_only_sample_intersection.py\n", @@ -874,7 +904,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "lines_to_next_cell": 2, + "tags": [ + "hide-input" + ] + }, "outputs": [], "source": [ "%%writefile remove_missing_samples.py\n", @@ -912,7 +947,7 @@ " split, missing_modality, sample = row['Split'], row['Modality'], row['Sample']\n", " for modality in data_modalities:\n", " if modality != missing_modality: # Remove the sample from all modalities except where it's missing\n", - " remove_sample_from_modality(split, modality, sample)\n" + " remove_sample_from_modality(split, modality, sample)" ] }, { @@ -938,7 +973,11 @@ { "cell_type": "code", "execution_count": 2, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [ { "name": "stderr", @@ -1045,7 +1084,11 @@ { "cell_type": "code", "execution_count": 3, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [ { "name": "stdout", @@ -1225,7 +1268,12 @@ { "cell_type": "code", "execution_count": 1, - "metadata": {}, + "metadata": { + "lines_to_next_cell": 2, + "tags": [ + "hide-input" + ] + }, "outputs": [ { "name": "stdout", @@ -1309,7 +1357,7 @@ " fi\n", "done < \"$input_file\"\n", "\n", - "echo \"Processing complete. Files generated: $output_file1, $output_file2, $output_file3\"\n" + "echo \"Processing complete. Files generated: $output_file1, $output_file2, $output_file3\"" ] }, { @@ -1339,7 +1387,11 @@ { "cell_type": "code", "execution_count": 6, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [], "source": [ "from pathlib import Path\n", @@ -1387,7 +1439,12 @@ { "cell_type": "code", "execution_count": 1, - "metadata": {}, + "metadata": { + "lines_to_next_cell": 2, + "tags": [ + "hide-input" + ] + }, "outputs": [ { "name": "stdout", @@ -1425,7 +1482,7 @@ "\n", "for input_name,output_name in zip(['../targets/training_targets.csv','../targets/test_targets.csv'],['../targets/training_targets_Enformer.csv','../targets/test_targets_Enformer.csv']):\n", " TEST = True if 'test' in input_name else False\n", - " filter_columns_by_bins(input_name, output_name, 57, TEST)\n" + " filter_columns_by_bins(input_name, output_name, 57, TEST)" ] }, { @@ -1443,6 +1500,9 @@ } ], "metadata": { + "jupytext": { + "formats": "ipynb,py:percent" + }, "kernelspec": { "display_name": "Python 3.11.8 (conda)", "language": "python", @@ -1460,7 +1520,6 @@ "pygments_lexer": "ipython3", "version": "3.11.5" }, - "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "e44f9b6c2865d2d1964da278f2e72d55afaa9cfc23528d4e8089ad3422c3db6e" diff --git a/scripts/2_Run_CLASTER.ipynb b/scripts/2_Run_CLASTER.ipynb index a41227f..5e10e53 100644 --- a/scripts/2_Run_CLASTER.ipynb +++ b/scripts/2_Run_CLASTER.ipynb @@ -36,7 +36,11 @@ { "cell_type": "code", "execution_count": 1, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [], "source": [ "from pathlib import Path\n", @@ -62,7 +66,11 @@ { "cell_type": "code", "execution_count": 4, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [], "source": [ "config_paths = [Path(\"../configurations/conf_pure_conv/\"), \n", @@ -5418,7 +5426,11 @@ { "cell_type": "code", "execution_count": 4, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [ { "name": "stdout", @@ -5700,12 +5712,14 @@ { "cell_type": "code", "execution_count": 7, - "metadata": {}, + "metadata": { + "lines_to_next_cell": 2 + }, "outputs": [], "source": [ "! mkdir -p ../runs/test_runs/gene_expression_only_chrom_pure_conv\n", "! mkdir -p ../runs/test_runs/gene_expression_microc_pure_conv\n", - "! mkdir -p ../runs/test_runs/gene_expression_microc_rotated_pure_conv\n" + "! mkdir -p ../runs/test_runs/gene_expression_microc_rotated_pure_conv" ] }, { @@ -5825,6 +5839,9 @@ } ], "metadata": { + "jupytext": { + "formats": "ipynb,py:percent" + }, "kernelspec": { "display_name": "Python 3.11.5", "language": "python", @@ -5842,7 +5859,6 @@ "pygments_lexer": "ipython3", "version": "3.11.5" }, - "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "c305a8ed7bd3fb5b79b1e9049e998ba6b84af3f1b497d7bcc87b4717f669b9d3" diff --git a/scripts/2b_Run_HyenaDNA_and_Enformer.ipynb b/scripts/2b_Run_HyenaDNA_and_Enformer.ipynb index f1412af..8e10d4f 100644 --- a/scripts/2b_Run_HyenaDNA_and_Enformer.ipynb +++ b/scripts/2b_Run_HyenaDNA_and_Enformer.ipynb @@ -160,7 +160,11 @@ { "cell_type": "code", "execution_count": 2, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [ { "name": "stdout", @@ -2196,7 +2200,11 @@ { "cell_type": "code", "execution_count": 25, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [ { "name": "stdout", @@ -2302,7 +2310,11 @@ { "cell_type": "code", "execution_count": 4, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [ { "name": "stdout", @@ -2827,6 +2839,9 @@ } ], "metadata": { + "jupytext": { + "formats": "ipynb,py:percent" + }, "kernelspec": { "display_name": "Python 3.11.5", "language": "python", @@ -2844,7 +2859,6 @@ "pygments_lexer": "ipython3", "version": "3.11.5" }, - "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "c305a8ed7bd3fb5b79b1e9049e998ba6b84af3f1b497d7bcc87b4717f669b9d3" diff --git a/scripts/3_Data_analysis.ipynb b/scripts/3_Data_analysis.ipynb index 2aebdbe..25d3c59 100644 --- a/scripts/3_Data_analysis.ipynb +++ b/scripts/3_Data_analysis.ipynb @@ -27,7 +27,11 @@ { "cell_type": "code", "execution_count": 5, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [], "source": [ "import os\n", @@ -63,7 +67,11 @@ { "cell_type": "code", "execution_count": 69, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [], "source": [ "def read_gene_positions(csv_path, resolution=1):\n", @@ -990,7 +998,12 @@ { "cell_type": "code", "execution_count": 9, - "metadata": {}, + "metadata": { + "lines_to_next_cell": 2, + "tags": [ + "hide-input" + ] + }, "outputs": [ { "name": "stdout", @@ -1042,7 +1055,7 @@ "\n", "\n", "pointwise_avg = plot_correlations(figure_path, np.concatenate([pred_list_values_dict[condition] for condition in condition_list]), np.concatenate([actual_list_values_dict[condition] for condition in condition_list]), \"_binpred_noH3K27ac_both_conditions\", cmap=\"afmhot\", binlims=(-10,100))\n", - "area = plot_correlations(figure_path, np.concatenate([pred_list_A_per_bin_dict[condition] for condition in condition_list]), np.concatenate([actual_list_A_per_bin_dict[condition] for condition in condition_list]), condition + \"_gene_area_norm_noH3K27ac_both_conditions\", cmap=\"afmhot\", density=False)\n" + "area = plot_correlations(figure_path, np.concatenate([pred_list_A_per_bin_dict[condition] for condition in condition_list]), np.concatenate([actual_list_A_per_bin_dict[condition] for condition in condition_list]), condition + \"_gene_area_norm_noH3K27ac_both_conditions\", cmap=\"afmhot\", density=False)" ] }, { @@ -1057,7 +1070,11 @@ { "cell_type": "code", "execution_count": 17, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [ { "name": "stdout", @@ -1205,7 +1222,11 @@ { "cell_type": "code", "execution_count": 70, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [ { "name": "stdout", @@ -1302,7 +1323,11 @@ { "cell_type": "code", "execution_count": 17, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [ { "data": { @@ -1637,7 +1662,11 @@ { "cell_type": "code", "execution_count": 34, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [ { "data": { @@ -1705,7 +1734,12 @@ { "cell_type": "code", "execution_count": 30, - "metadata": {}, + "metadata": { + "lines_to_next_cell": 2, + "tags": [ + "hide-input" + ] + }, "outputs": [ { "name": "stderr", @@ -1932,7 +1966,7 @@ " fig.savefig(figure_path / \"Normalized_frequency_hist.png\", dpi=200)\n", "\n", "\n", - "plot_perturbation_histograms(table, Path(\"../figures/Figure_2/\"))\n" + "plot_perturbation_histograms(table, Path(\"../figures/Figure_2/\"))" ] }, { @@ -1947,7 +1981,11 @@ { "cell_type": "code", "execution_count": 55, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [], "source": [ "array_path = Path(\"../inputs/landscape_arrays/test/\")\n", @@ -1980,7 +2018,11 @@ { "cell_type": "code", "execution_count": 61, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [ { "name": "stderr", @@ -2066,7 +2108,11 @@ { "cell_type": "code", "execution_count": 17, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [], "source": [ "name = \"ENSMUSG00000059552.13_forward.npy\" #\"ENSMUSG00000024406.16_forward.npy\" # ENSMUSG00000028948.16_forward ENSMUSG00000078626.2_forward ENSMUSG00000046667.14_rev # Original paper example \"ENSMUST00000105369.7.npy\" #\"ENSMUST00000002350.10_flipped\"\n", @@ -2089,7 +2135,11 @@ { "cell_type": "code", "execution_count": 67, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [ { "name": "stdout", @@ -2135,7 +2185,11 @@ { "cell_type": "code", "execution_count": 4, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [ { "name": "stdout", @@ -2217,7 +2271,12 @@ { "cell_type": "code", "execution_count": 33, - "metadata": {}, + "metadata": { + "lines_to_next_cell": 2, + "tags": [ + "hide-input" + ] + }, "outputs": [ { "data": { @@ -2291,7 +2350,7 @@ " TSS,\n", " chrom,\n", " sigma,\n", - " binsize)\n" + " binsize)" ] }, { @@ -2304,7 +2363,11 @@ { "cell_type": "code", "execution_count": 26, - "metadata": {}, + "metadata": { + "tags": [ + "hide-input" + ] + }, "outputs": [ { "data": { @@ -2398,7 +2461,12 @@ { "cell_type": "code", "execution_count": 36, - "metadata": {}, + "metadata": { + "lines_to_next_cell": 2, + "tags": [ + "hide-input" + ] + }, "outputs": [ { "data": { @@ -2480,11 +2548,14 @@ "savepath = '../figures/supplementary_figures/Train_test_loss_curves_benchmark.png'\n", "\n", "# Plot the losses\n", - "plot_losses(hyena_log_file, enformer_log_file, savepath)\n" + "plot_losses(hyena_log_file, enformer_log_file, savepath)" ] } ], "metadata": { + "jupytext": { + "formats": "ipynb,py:percent" + }, "kernelspec": { "display_name": "Python 3.11.8", "language": "python", @@ -2502,7 +2573,6 @@ "pygments_lexer": "ipython3", "version": "3.11.5" }, - "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "c305a8ed7bd3fb5b79b1e9049e998ba6b84af3f1b497d7bcc87b4717f669b9d3" From 2b30f569353d73cd956b4a35cf4f5a40f87a4520 Mon Sep 17 00:00:00 2001 From: Henry Webel Date: Thu, 30 May 2024 11:51:35 +0000 Subject: [PATCH 11/11] :bug: fix path to Readme for website --- index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index.rst b/index.rst index 789cd8b..b6f6f4f 100644 --- a/index.rst +++ b/index.rst @@ -3,7 +3,7 @@ Claster Modeling nascent RNA transcription from chromatin landscape and structure -.. include:: ../Readme.md +.. include:: Readme.md :parser: myst_parser.sphinx_ :start-line: 3