From f119b66dd7666683be08c7eb2a260bc51c8eb05b Mon Sep 17 00:00:00 2001 From: Joseph Ellaway Date: Fri, 1 Nov 2024 15:49:44 +0000 Subject: [PATCH 1/6] Investigations folder --- .gitignore | 11 ++++++++++- .../PDB_release_stats.ipynb | 0 .../PDBe_yearly_release_stats_2022.ipynb | 0 .../PDBe_yearly_release_stats_2023.ipynb | 0 4 files changed, 10 insertions(+), 1 deletion(-) rename PDB_release_stats.ipynb => example_api_investigations/PDB_release_stats.ipynb (100%) rename PDBe_yearly_release_stats_2022.ipynb => example_api_investigations/PDBe_yearly_release_stats_2022.ipynb (100%) rename PDBe_yearly_release_stats_2023.ipynb => example_api_investigations/PDBe_yearly_release_stats_2023.ipynb (100%) diff --git a/.gitignore b/.gitignore index 28aef85..d5858a0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,5 @@ .ipynb_checkpoints .idea -venv *.pyc summer_school_2024/similar_proteins* @@ -8,3 +7,13 @@ summer_school_2024/search_results* pdbe_tutorial_2024/similar_proteins* pdbe_tutorial_2024/search_results* + + +# Environments +env/ +venv/ +.venv/ +.env/ + +# Editors +.vscode/ \ No newline at end of file diff --git a/PDB_release_stats.ipynb b/example_api_investigations/PDB_release_stats.ipynb similarity index 100% rename from PDB_release_stats.ipynb rename to example_api_investigations/PDB_release_stats.ipynb diff --git a/PDBe_yearly_release_stats_2022.ipynb b/example_api_investigations/PDBe_yearly_release_stats_2022.ipynb similarity index 100% rename from PDBe_yearly_release_stats_2022.ipynb rename to example_api_investigations/PDBe_yearly_release_stats_2022.ipynb diff --git a/PDBe_yearly_release_stats_2023.ipynb b/example_api_investigations/PDBe_yearly_release_stats_2023.ipynb similarity index 100% rename from PDBe_yearly_release_stats_2023.ipynb rename to example_api_investigations/PDBe_yearly_release_stats_2023.ipynb From a0c68c80358e7b82d9b900311b4d68ca030451bd Mon Sep 17 00:00:00 2001 From: Joseph Ellaway <69363546+Joseph-Ellaway@users.noreply.github.com> Date: Fri, 1 Nov 2024 15:55:14 +0000 Subject: [PATCH 2/6] Delete .vscode directory --- .vscode/settings.json | 1 - 1 file changed, 1 deletion(-) delete mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 9e26dfe..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file From 038c24b67c6f8baf3daba5a2f26c840b4d87c44e Mon Sep 17 00:00:00 2001 From: jellaway Date: Tue, 12 Nov 2024 15:28:38 +0000 Subject: [PATCH 3/6] Dockerised --- .dockerignore | 34 + .gitignore | 1 - Dockerfile | 13 + LICENSE.md | 2 +- README.md | 14 +- pdbe_tutorial_2024/6_complexes.ipynb | 1078 +++++++++++++++++++++++++- requirements.txt | 115 ++- requirements_nonversioned.txt | 5 + 8 files changed, 1242 insertions(+), 20 deletions(-) create mode 100644 .dockerignore create mode 100644 Dockerfile create mode 100644 requirements_nonversioned.txt diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..945ca0e --- /dev/null +++ b/.dockerignore @@ -0,0 +1,34 @@ + + +# Common +README.md +CHANGELOG.md +docker-compose.yml +Dockerfile + +# Virtual environments +venv +env +.venv +.env + +# Python +*.pyc +.Python + +# Editors +vscode +.idea + +# Git +.git +.gitignore +.gitattributes + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.cache +nosetests.xml +coverage.xml \ No newline at end of file diff --git a/.gitignore b/.gitignore index d5858a0..8dd27a7 100644 --- a/.gitignore +++ b/.gitignore @@ -8,7 +8,6 @@ summer_school_2024/search_results* pdbe_tutorial_2024/similar_proteins* pdbe_tutorial_2024/search_results* - # Environments env/ venv/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..de53709 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,13 @@ +FROM python:3.10 + +WORKDIR /pdbe_api_tutorial + +COPY ./requirements.txt /pdbe_api_tutorial/requirements.txt + +RUN pip install --no-cache-dir -r requirements.txt + +EXPOSE 8888 + +COPY ./pdbe_tutorial_2024 /pdbe_api_tutorial/pdbe_tutorial_2024 + +CMD ["jupyter-server", "--allow-root", "--ip", "0.0.0.0", "--port", "8888", "--no-browser", "pdbe_tutorial_2024/"] diff --git a/LICENSE.md b/LICENSE.md index eb027a0..3c9e5ed 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,4 +1,4 @@ -Copyright 2018 EMBL - European Bioinformatics Institute +Copyright 2024 EMBL - European Bioinformatics Institute Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.md b/README.md index 245609d..99172da 100644 --- a/README.md +++ b/README.md @@ -17,8 +17,8 @@ pip3 install --upgrade pip pip3 install jupyter ``` -For Windows machines (optionally also for Linux/OSX) you can install Jupyter with Anaconda: -https://www.anaconda.com/download/ +For Windows machines (optionally also for Linux/OSX) you can install Jupyter with conda-forge: +https://conda-forge.org/ ### Prerequisites @@ -37,6 +37,16 @@ jupyter notebook Jupyter Notebook will open a window in your browser, and you can select the specific notebooks you would like to view. + +## Install and run with Docker + +```shell +docker build -t pdbe-api-training . +docker run -p 8888:8888 pdbe-api-training +``` + + + ## Authors * **Mihaly Varadi** - *Initial work* - [github](https://github.com/mvaradi) diff --git a/pdbe_tutorial_2024/6_complexes.ipynb b/pdbe_tutorial_2024/6_complexes.ipynb index 4f507a9..5e04394 100755 --- a/pdbe_tutorial_2024/6_complexes.ipynb +++ b/pdbe_tutorial_2024/6_complexes.ipynb @@ -26,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "bb161de1", "metadata": {}, "outputs": [], @@ -58,7 +58,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "d9bbb9e5", "metadata": {}, "outputs": [], @@ -102,10 +102,427 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "7570fae8", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://www.ebi.ac.uk/pdbe/graph-api/uniprot/complex/P43681\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'complex_id': 'PDB-CPX-142097',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P0ABE7',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 562},\n", + " {'complex_id': 'PDB-CPX-142097',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-142097',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-142097',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-142097',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P0ABE7',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 562},\n", + " {'complex_id': 'PDB-CPX-142097',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-142097',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-142097',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-148175',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-148175',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-148175',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-148175',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-148175',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-148175',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-148175',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-148175',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-155120',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 1,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-155120',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 1,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-155120',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 1,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-155120',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 1,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-209547',\n", + " 'subcomplexes': ['PDB-CPX-142097'],\n", + " 'accession': 'P0ABE7',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 562},\n", + " {'complex_id': 'PDB-CPX-209547',\n", + " 'subcomplexes': ['PDB-CPX-142097'],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-209547',\n", + " 'subcomplexes': ['PDB-CPX-142097'],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-209547',\n", + " 'subcomplexes': ['PDB-CPX-142097'],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-209547',\n", + " 'subcomplexes': ['PDB-CPX-142097'],\n", + " 'accession': 'P0ABE7',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 562},\n", + " {'complex_id': 'PDB-CPX-209547',\n", + " 'subcomplexes': ['PDB-CPX-142097'],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-209547',\n", + " 'subcomplexes': ['PDB-CPX-142097'],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-209547',\n", + " 'subcomplexes': ['PDB-CPX-142097'],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-210711',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-210711',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-210711',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-210711',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-210711',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-210711',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-210711',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-210711',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-210712',\n", + " 'subcomplexes': ['PDB-CPX-148175'],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-210712',\n", + " 'subcomplexes': ['PDB-CPX-148175'],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-210712',\n", + " 'subcomplexes': ['PDB-CPX-148175'],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-210712',\n", + " 'subcomplexes': ['PDB-CPX-148175'],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-210712',\n", + " 'subcomplexes': ['PDB-CPX-148175'],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-210712',\n", + " 'subcomplexes': ['PDB-CPX-148175'],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-210712',\n", + " 'subcomplexes': ['PDB-CPX-148175'],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-210712',\n", + " 'subcomplexes': ['PDB-CPX-148175'],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239666',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239666',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239666',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239666',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239666',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239666',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239666',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239666',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239667',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239667',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239667',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239667',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239667',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239667',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239667',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239667',\n", + " 'subcomplexes': [],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239668',\n", + " 'subcomplexes': ['PDB-CPX-148175'],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239668',\n", + " 'subcomplexes': ['PDB-CPX-148175'],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239669',\n", + " 'subcomplexes': ['PDB-CPX-148175'],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239669',\n", + " 'subcomplexes': ['PDB-CPX-148175'],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239670',\n", + " 'subcomplexes': ['PDB-CPX-148175'],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239670',\n", + " 'subcomplexes': ['PDB-CPX-148175'],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239670',\n", + " 'subcomplexes': ['PDB-CPX-148175'],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239670',\n", + " 'subcomplexes': ['PDB-CPX-148175'],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239670',\n", + " 'subcomplexes': ['PDB-CPX-148175'],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239670',\n", + " 'subcomplexes': ['PDB-CPX-148175'],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239670',\n", + " 'subcomplexes': ['PDB-CPX-148175'],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239670',\n", + " 'subcomplexes': ['PDB-CPX-148175'],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239671',\n", + " 'subcomplexes': ['PDB-CPX-148175'],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239671',\n", + " 'subcomplexes': ['PDB-CPX-148175'],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239671',\n", + " 'subcomplexes': ['PDB-CPX-148175'],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239671',\n", + " 'subcomplexes': ['PDB-CPX-148175'],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239671',\n", + " 'subcomplexes': ['PDB-CPX-148175'],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239671',\n", + " 'subcomplexes': ['PDB-CPX-148175'],\n", + " 'accession': 'P17787',\n", + " 'stoichiometry': 3,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239671',\n", + " 'subcomplexes': ['PDB-CPX-148175'],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606},\n", + " {'complex_id': 'PDB-CPX-239671',\n", + " 'subcomplexes': ['PDB-CPX-148175'],\n", + " 'accession': 'P43681',\n", + " 'stoichiometry': 2,\n", + " 'taxonomy_id': 9606}]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "uniprot_accession = 'P43681'\n", "results = get_complexes_protein_data(uniprot_accession) \n", @@ -135,10 +552,277 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "b50176b8", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
complex_idaccessionsubcomplexesstoichiometrytaxonomy_id
0PDB-CPX-142097P0ABE7[]2562
1PDB-CPX-142097P17787[]39606
2PDB-CPX-142097P43681[]29606
3PDB-CPX-148175P17787[]39606
4PDB-CPX-148175P43681[]29606
5PDB-CPX-155120P43681[]19606
6PDB-CPX-209547P0ABE7[PDB-CPX-142097]2562
7PDB-CPX-209547P17787[PDB-CPX-142097]39606
8PDB-CPX-209547P43681[PDB-CPX-142097]29606
9PDB-CPX-210711P17787[]29606
10PDB-CPX-210711P43681[]39606
11PDB-CPX-210712P17787[PDB-CPX-148175]39606
12PDB-CPX-210712P43681[PDB-CPX-148175]29606
13PDB-CPX-239666P17787[]29606
14PDB-CPX-239666P43681[]39606
15PDB-CPX-239667P17787[]29606
16PDB-CPX-239667P43681[]39606
17PDB-CPX-239668P17787[PDB-CPX-148175]39606
18PDB-CPX-239668P43681[PDB-CPX-148175]29606
19PDB-CPX-239669P17787[PDB-CPX-148175]39606
20PDB-CPX-239669P43681[PDB-CPX-148175]29606
21PDB-CPX-239670P17787[PDB-CPX-148175]39606
22PDB-CPX-239670P43681[PDB-CPX-148175]29606
23PDB-CPX-239671P17787[PDB-CPX-148175]39606
24PDB-CPX-239671P43681[PDB-CPX-148175]29606
\n", + "
" + ], + "text/plain": [ + " complex_id accession subcomplexes stoichiometry taxonomy_id\n", + "0 PDB-CPX-142097 P0ABE7 [] 2 562\n", + "1 PDB-CPX-142097 P17787 [] 3 9606\n", + "2 PDB-CPX-142097 P43681 [] 2 9606\n", + "3 PDB-CPX-148175 P17787 [] 3 9606\n", + "4 PDB-CPX-148175 P43681 [] 2 9606\n", + "5 PDB-CPX-155120 P43681 [] 1 9606\n", + "6 PDB-CPX-209547 P0ABE7 [PDB-CPX-142097] 2 562\n", + "7 PDB-CPX-209547 P17787 [PDB-CPX-142097] 3 9606\n", + "8 PDB-CPX-209547 P43681 [PDB-CPX-142097] 2 9606\n", + "9 PDB-CPX-210711 P17787 [] 2 9606\n", + "10 PDB-CPX-210711 P43681 [] 3 9606\n", + "11 PDB-CPX-210712 P17787 [PDB-CPX-148175] 3 9606\n", + "12 PDB-CPX-210712 P43681 [PDB-CPX-148175] 2 9606\n", + "13 PDB-CPX-239666 P17787 [] 2 9606\n", + "14 PDB-CPX-239666 P43681 [] 3 9606\n", + "15 PDB-CPX-239667 P17787 [] 2 9606\n", + "16 PDB-CPX-239667 P43681 [] 3 9606\n", + "17 PDB-CPX-239668 P17787 [PDB-CPX-148175] 3 9606\n", + "18 PDB-CPX-239668 P43681 [PDB-CPX-148175] 2 9606\n", + "19 PDB-CPX-239669 P17787 [PDB-CPX-148175] 3 9606\n", + "20 PDB-CPX-239669 P43681 [PDB-CPX-148175] 2 9606\n", + "21 PDB-CPX-239670 P17787 [PDB-CPX-148175] 3 9606\n", + "22 PDB-CPX-239670 P43681 [PDB-CPX-148175] 2 9606\n", + "23 PDB-CPX-239671 P17787 [PDB-CPX-148175] 3 9606\n", + "24 PDB-CPX-239671 P43681 [PDB-CPX-148175] 2 9606" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Reformat data using groupby remove repetition in the dataset \n", "df_complexes_with_duplicates = pd.DataFrame(results)\n", @@ -156,10 +840,383 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "29952a0c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
complex_idsubcomplexesaccessionprotein_namestoichiometrytaxonomy_idtaxonomy_name
0PDB-CPX-142097[]P0ABE7Soluble cytochrome b5622562Escherichia coli
1PDB-CPX-142097[]P17787Neuronal acetylcholine receptor subunit beta-239606Homo sapiens
2PDB-CPX-142097[]P43681Neuronal acetylcholine receptor subunit alpha-429606Homo sapiens
3PDB-CPX-148175[]P17787Neuronal acetylcholine receptor subunit beta-239606Homo sapiens
4PDB-CPX-148175[]P43681Neuronal acetylcholine receptor subunit alpha-429606Homo sapiens
5PDB-CPX-155120[]P43681Neuronal acetylcholine receptor subunit alpha-419606Homo sapiens
6PDB-CPX-209547[PDB-CPX-142097]P0ABE7Soluble cytochrome b5622562Escherichia coli
7PDB-CPX-209547[PDB-CPX-142097]P17787Neuronal acetylcholine receptor subunit beta-239606Homo sapiens
8PDB-CPX-209547[PDB-CPX-142097]P43681Neuronal acetylcholine receptor subunit alpha-429606Homo sapiens
9PDB-CPX-210711[]P17787Neuronal acetylcholine receptor subunit beta-229606Homo sapiens
10PDB-CPX-210711[]P43681Neuronal acetylcholine receptor subunit alpha-439606Homo sapiens
11PDB-CPX-210712[PDB-CPX-148175]P17787Neuronal acetylcholine receptor subunit beta-239606Homo sapiens
12PDB-CPX-210712[PDB-CPX-148175]P43681Neuronal acetylcholine receptor subunit alpha-429606Homo sapiens
13PDB-CPX-239666[]P17787Neuronal acetylcholine receptor subunit beta-229606Homo sapiens
14PDB-CPX-239666[]P43681Neuronal acetylcholine receptor subunit alpha-439606Homo sapiens
15PDB-CPX-239667[]P17787Neuronal acetylcholine receptor subunit beta-229606Homo sapiens
16PDB-CPX-239667[]P43681Neuronal acetylcholine receptor subunit alpha-439606Homo sapiens
17PDB-CPX-239668[PDB-CPX-148175]P17787Neuronal acetylcholine receptor subunit beta-239606Homo sapiens
18PDB-CPX-239668[PDB-CPX-148175]P43681Neuronal acetylcholine receptor subunit alpha-429606Homo sapiens
19PDB-CPX-239669[PDB-CPX-148175]P17787Neuronal acetylcholine receptor subunit beta-239606Homo sapiens
20PDB-CPX-239669[PDB-CPX-148175]P43681Neuronal acetylcholine receptor subunit alpha-429606Homo sapiens
21PDB-CPX-239670[PDB-CPX-148175]P17787Neuronal acetylcholine receptor subunit beta-239606Homo sapiens
22PDB-CPX-239670[PDB-CPX-148175]P43681Neuronal acetylcholine receptor subunit alpha-429606Homo sapiens
23PDB-CPX-239671[PDB-CPX-148175]P17787Neuronal acetylcholine receptor subunit beta-239606Homo sapiens
24PDB-CPX-239671[PDB-CPX-148175]P43681Neuronal acetylcholine receptor subunit alpha-429606Homo sapiens
\n", + "
" + ], + "text/plain": [ + " complex_id subcomplexes accession \\\n", + "0 PDB-CPX-142097 [] P0ABE7 \n", + "1 PDB-CPX-142097 [] P17787 \n", + "2 PDB-CPX-142097 [] P43681 \n", + "3 PDB-CPX-148175 [] P17787 \n", + "4 PDB-CPX-148175 [] P43681 \n", + "5 PDB-CPX-155120 [] P43681 \n", + "6 PDB-CPX-209547 [PDB-CPX-142097] P0ABE7 \n", + "7 PDB-CPX-209547 [PDB-CPX-142097] P17787 \n", + "8 PDB-CPX-209547 [PDB-CPX-142097] P43681 \n", + "9 PDB-CPX-210711 [] P17787 \n", + "10 PDB-CPX-210711 [] P43681 \n", + "11 PDB-CPX-210712 [PDB-CPX-148175] P17787 \n", + "12 PDB-CPX-210712 [PDB-CPX-148175] P43681 \n", + "13 PDB-CPX-239666 [] P17787 \n", + "14 PDB-CPX-239666 [] P43681 \n", + "15 PDB-CPX-239667 [] P17787 \n", + "16 PDB-CPX-239667 [] P43681 \n", + "17 PDB-CPX-239668 [PDB-CPX-148175] P17787 \n", + "18 PDB-CPX-239668 [PDB-CPX-148175] P43681 \n", + "19 PDB-CPX-239669 [PDB-CPX-148175] P17787 \n", + "20 PDB-CPX-239669 [PDB-CPX-148175] P43681 \n", + "21 PDB-CPX-239670 [PDB-CPX-148175] P17787 \n", + "22 PDB-CPX-239670 [PDB-CPX-148175] P43681 \n", + "23 PDB-CPX-239671 [PDB-CPX-148175] P17787 \n", + "24 PDB-CPX-239671 [PDB-CPX-148175] P43681 \n", + "\n", + " protein_name stoichiometry \\\n", + "0 Soluble cytochrome b562 2 \n", + "1 Neuronal acetylcholine receptor subunit beta-2 3 \n", + "2 Neuronal acetylcholine receptor subunit alpha-4 2 \n", + "3 Neuronal acetylcholine receptor subunit beta-2 3 \n", + "4 Neuronal acetylcholine receptor subunit alpha-4 2 \n", + "5 Neuronal acetylcholine receptor subunit alpha-4 1 \n", + "6 Soluble cytochrome b562 2 \n", + "7 Neuronal acetylcholine receptor subunit beta-2 3 \n", + "8 Neuronal acetylcholine receptor subunit alpha-4 2 \n", + "9 Neuronal acetylcholine receptor subunit beta-2 2 \n", + "10 Neuronal acetylcholine receptor subunit alpha-4 3 \n", + "11 Neuronal acetylcholine receptor subunit beta-2 3 \n", + "12 Neuronal acetylcholine receptor subunit alpha-4 2 \n", + "13 Neuronal acetylcholine receptor subunit beta-2 2 \n", + "14 Neuronal acetylcholine receptor subunit alpha-4 3 \n", + "15 Neuronal acetylcholine receptor subunit beta-2 2 \n", + "16 Neuronal acetylcholine receptor subunit alpha-4 3 \n", + "17 Neuronal acetylcholine receptor subunit beta-2 3 \n", + "18 Neuronal acetylcholine receptor subunit alpha-4 2 \n", + "19 Neuronal acetylcholine receptor subunit beta-2 3 \n", + "20 Neuronal acetylcholine receptor subunit alpha-4 2 \n", + "21 Neuronal acetylcholine receptor subunit beta-2 3 \n", + "22 Neuronal acetylcholine receptor subunit alpha-4 2 \n", + "23 Neuronal acetylcholine receptor subunit beta-2 3 \n", + "24 Neuronal acetylcholine receptor subunit alpha-4 2 \n", + "\n", + " taxonomy_id taxonomy_name \n", + "0 562 Escherichia coli \n", + "1 9606 Homo sapiens \n", + "2 9606 Homo sapiens \n", + "3 9606 Homo sapiens \n", + "4 9606 Homo sapiens \n", + "5 9606 Homo sapiens \n", + "6 562 Escherichia coli \n", + "7 9606 Homo sapiens \n", + "8 9606 Homo sapiens \n", + "9 9606 Homo sapiens \n", + "10 9606 Homo sapiens \n", + "11 9606 Homo sapiens \n", + "12 9606 Homo sapiens \n", + "13 9606 Homo sapiens \n", + "14 9606 Homo sapiens \n", + "15 9606 Homo sapiens \n", + "16 9606 Homo sapiens \n", + "17 9606 Homo sapiens \n", + "18 9606 Homo sapiens \n", + "19 9606 Homo sapiens \n", + "20 9606 Homo sapiens \n", + "21 9606 Homo sapiens \n", + "22 9606 Homo sapiens \n", + "23 9606 Homo sapiens \n", + "24 9606 Homo sapiens " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Map UniProt IDs to Protein Names\n", "accession_mapping = {\n", @@ -181,7 +1238,6 @@ "new_column_order = ['complex_id', 'subcomplexes', 'accession', 'protein_name', 'stoichiometry', 'taxonomy_id', 'taxonomy_name' ]\n", "df_complexes = df_complexes[new_column_order]\n", "\n", - "\n", "df_complexes" ] }, @@ -300,7 +1356,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -314,7 +1370,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.10.15" } }, "nbformat": 4, diff --git a/requirements.txt b/requirements.txt index d4e08f4..23bc824 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,110 @@ -requests -matplotlib -pandas -jupyter -solrq +anyio==4.6.2.post1 +appnope==0.1.4 +argon2-cffi==23.1.0 +argon2-cffi-bindings==21.2.0 +arrow==1.3.0 +asttokens==2.4.1 +async-lru==2.0.4 +attrs==24.2.0 +babel==2.16.0 +beautifulsoup4==4.12.3 +bleach==6.2.0 +certifi==2024.8.30 +cffi==1.17.1 +charset-normalizer==3.4.0 +comm==0.2.2 +contourpy==1.3.1 +cycler==0.12.1 +debugpy==1.8.8 +decorator==5.1.1 +defusedxml==0.7.1 +exceptiongroup==1.2.2 +executing==2.1.0 +fastjsonschema==2.20.0 +fonttools==4.54.1 +fqdn==1.5.1 +h11==0.14.0 +httpcore==1.0.6 +httpx==0.27.2 +idna==3.10 +ipykernel==6.29.5 +ipython==8.29.0 +ipywidgets==8.1.5 +isoduration==20.11.0 +jedi==0.19.2 +Jinja2==3.1.4 +json5==0.9.28 +jsonpointer==3.0.0 +jsonschema==4.23.0 +jsonschema-specifications==2024.10.1 +jupyter==1.1.1 +jupyter-console==6.6.3 +jupyter-events==0.10.0 +jupyter-lsp==2.2.5 +jupyter_client==8.6.3 +jupyter_core==5.7.2 +jupyter_server==2.14.2 +jupyter_server_terminals==0.5.3 +jupyterlab==4.2.5 +jupyterlab_pygments==0.3.0 +jupyterlab_server==2.27.3 +jupyterlab_widgets==3.0.13 +kiwisolver==1.4.7 +MarkupSafe==3.0.2 +matplotlib==3.9.2 +matplotlib-inline==0.1.7 +mistune==3.0.2 +nbclient==0.10.0 +nbconvert==7.16.4 +nbformat==5.10.4 +nest-asyncio==1.6.0 +notebook==7.2.2 +notebook_shim==0.2.4 +numpy==2.1.3 +overrides==7.7.0 +packaging==24.2 +pandas==2.2.3 +pandocfilters==1.5.1 +parso==0.8.4 +pexpect==4.9.0 +pillow==11.0.0 +platformdirs==4.3.6 +prometheus_client==0.21.0 +prompt_toolkit==3.0.48 +psutil==6.1.0 +ptyprocess==0.7.0 +pure_eval==0.2.3 +pycparser==2.22 +Pygments==2.18.0 +pyparsing==3.2.0 +python-dateutil==2.9.0.post0 +python-json-logger==2.0.7 +pytz==2024.2 +PyYAML==6.0.2 +pyzmq==26.2.0 +referencing==0.35.1 +requests==2.32.3 +rfc3339-validator==0.1.4 +rfc3986-validator==0.1.1 +rpds-py==0.21.0 +Send2Trash==1.8.3 +six==1.16.0 +sniffio==1.3.1 +solrq==1.1.2 +soupsieve==2.6 +stack-data==0.6.3 +terminado==0.18.1 +tinycss2==1.4.0 +tomli==2.1.0 +tornado==6.4.1 +traitlets==5.14.3 +types-python-dateutil==2.9.0.20241003 +typing_extensions==4.12.2 +tzdata==2024.2 +uri-template==1.3.0 +urllib3==2.2.3 +wcwidth==0.2.13 +webcolors==24.11.1 +webencodings==0.5.1 +websocket-client==1.8.0 +widgetsnbextension==4.0.13 diff --git a/requirements_nonversioned.txt b/requirements_nonversioned.txt new file mode 100644 index 0000000..d4e08f4 --- /dev/null +++ b/requirements_nonversioned.txt @@ -0,0 +1,5 @@ +requests +matplotlib +pandas +jupyter +solrq From 6b3b0722e3dc80f7000d4b0092843ba98a3f78ab Mon Sep 17 00:00:00 2001 From: jellaway Date: Thu, 14 Nov 2024 17:38:01 +0000 Subject: [PATCH 4/6] Mount dir to docker Allows work to be saved in jupyter-server and then loaded in later --- .dockerignore | 6 +++++- .gitignore | 6 +++++- README.md | 6 +++++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/.dockerignore b/.dockerignore index 945ca0e..8d78f03 100644 --- a/.dockerignore +++ b/.dockerignore @@ -31,4 +31,8 @@ htmlcov/ .coverage .cache nosetests.xml -coverage.xml \ No newline at end of file +coverage.xml + +# Trash +pdbe_tutorial_2024/.Trash-0/ +.Trash-0/ \ No newline at end of file diff --git a/.gitignore b/.gitignore index 8dd27a7..4039ed8 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,8 @@ venv/ .env/ # Editors -.vscode/ \ No newline at end of file +.vscode/ + +# Trash +pdbe_tutorial_2024/.Trash-0/ +.Trash-0/ \ No newline at end of file diff --git a/README.md b/README.md index 99172da..4270052 100644 --- a/README.md +++ b/README.md @@ -40,11 +40,15 @@ Jupyter Notebook will open a window in your browser, and you can select the spec ## Install and run with Docker +Make sure your Docker client is running and then run: + ```shell +cd /path/to/your/pdbe-api-training/ docker build -t pdbe-api-training . -docker run -p 8888:8888 pdbe-api-training +docker run -p 8888:8888 -v /path/to/your/pdbe-api-training/pdbe_tutorial_2024/:/pdbe_api_tutorial/pdbe_tutorial_2024 pdbe-api-training ``` +Now open your browser and go to `http://localhost:8888/` and you should see the Jupyter Notebook interface. The `-v` flag ensures changes you make to the notebooks will be saved in the `pdbe_tutorial_2024` directory in the repository cloned to your local machine. ## Authors From f514dd4493da528e820570f7a17747b6dcda443a Mon Sep 17 00:00:00 2001 From: jellaway Date: Thu, 14 Nov 2024 17:58:31 +0000 Subject: [PATCH 5/6] Improved install docs --- README.md | 65 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 4270052..cb3e3de 100644 --- a/README.md +++ b/README.md @@ -2,53 +2,66 @@ This repository contains Jupyter Notebooks that can be used as training materials for understanding how the PDBe REST API works, and provides a number of examples of answering specific questions about PDB entries using the API. -## Getting Started -Either use Binder to use the notebook: +## Getting started with Docker (recommended) -[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/PDBeurope/pdbe-api-training/master) +This option launches the summer school notebooks in a Docker container. This is the recommended way to run the notebooks as it ensures that all dependencies are correctly installed and that the notebooks will run as expected. -Or +Make sure your [Docker](https://docs.docker.com/engine/install/) is running on your machine before executing the following commands: -You will need to have Jupyter installed on your machine. For Linux/OSX machines, simply type: +### Clone the repository +```console +git clone https://github.com/PDBeurope/pdbe-api-training . +cd /path/to/your/pdbe-api-training/ +``` +### Build the Docker container (you only need to do this once) +```console +docker build -t pdbe-api-training . ``` -pip3 install --upgrade pip -pip3 install jupyter + +### Run the Docker container +```console +docker run -p 8888:8888 -v /path/to/your/pdbe-api-training/pdbe_tutorial_2024/:/pdbe_api_tutorial/pdbe_tutorial_2024 pdbe-api-training ``` -For Windows machines (optionally also for Linux/OSX) you can install Jupyter with conda-forge: -https://conda-forge.org/ +Now open your browser and go to `http://localhost:8888/` and you should see the Jupyter Notebook interface. The `-v` flag ensures changes you make to the notebooks will be saved in the `pdbe_tutorial_2024` directory in the repository cloned to your local machine. -### Prerequisites +## Getting started with Binder -If installing using Anaconda, there is no additional prerequisites. If installing using pip3, you will need to have Python3 on your machine, and of course pip3 +Notebooks can also be run on cloud via Binder. Click on the badge below to launch the notebooks in Binder: -### Installing +[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/PDBeurope/pdbe-api-training/master) -From command line on Unix machine these would be the steps setting up everyone: +## Getting started with Python and Jupyter + +You will need to have Python3.10, pip3 and Jupyter installed on your machine. + +### Upgrade pip3 +```console +pip3 install --upgrade pip ``` + +For Windows machines (optionally also for Linux/OSX) you can install Jupyter with conda-forge: +https://conda-forge.org/ + +### Clone the repository + +```console mkdir pdbe_jupyter cd pdbe_jupyter git clone https://github.com/PDBeurope/pdbe-api-training . -jupyter notebook ``` -Jupyter Notebook will open a window in your browser, and you can select the specific notebooks you would like to view. - - -## Install and run with Docker - -Make sure your Docker client is running and then run: - -```shell -cd /path/to/your/pdbe-api-training/ -docker build -t pdbe-api-training . -docker run -p 8888:8888 -v /path/to/your/pdbe-api-training/pdbe_tutorial_2024/:/pdbe_api_tutorial/pdbe_tutorial_2024 pdbe-api-training +### Install the dependencies and start Jupyter +```console +cd pdbe-api-training +pip3 install -r requirements.txt +jupyter ./pdbe_tutorial_2024 ``` -Now open your browser and go to `http://localhost:8888/` and you should see the Jupyter Notebook interface. The `-v` flag ensures changes you make to the notebooks will be saved in the `pdbe_tutorial_2024` directory in the repository cloned to your local machine. +Jupyter Notebook will open a window in your browser, and you can select the specific notebooks you would like to view. ## Authors From 991003b397729394e8039f65408d72b44e90d03e Mon Sep 17 00:00:00 2001 From: Joseph Ellaway <69363546+Joseph-Ellaway@users.noreply.github.com> Date: Thu, 5 Dec 2024 16:32:46 +0000 Subject: [PATCH 6/6] Author label --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index de53709..8eb76b1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,7 @@ FROM python:3.10 +LABEL maintainer="pdbegroup" + WORKDIR /pdbe_api_tutorial COPY ./requirements.txt /pdbe_api_tutorial/requirements.txt