From 2cf758d47d8dcce60e201b85cb8d5d3c609107ea Mon Sep 17 00:00:00 2001 From: gbouras13 Date: Tue, 19 Mar 2024 15:20:23 +1030 Subject: [PATCH 1/5] add ability to utlise gpu on apple silicon --- README.md | 11 ++++++++++- docs/install.md | 10 +++++++++- pyproject.toml | 2 +- src/phold/features/predict_3Di.py | 28 ++++++++++++++++------------ src/phold/utils/VERSION | 2 +- 5 files changed, 37 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 593f027..6669207 100644 --- a/README.md +++ b/README.md @@ -60,12 +60,21 @@ mamba create -n pholdENV -c conda-forge -c bioconda phold To utilise `phold` with GPU, a GPU compatible version of `pytorch` must be installed. By default conda/mamba will install a CPU-only version. -Therefore, if you have an NVIDIA GPU, please try: +If you have an NVIDIA GPU, please try: ```bash mamba create -n pholdENV -c conda-forge -c bioconda phold pytorch=*=cuda* ``` +If you have a Mac running Apple Silicon chip (M1/2/3), `phold` should be able to use the GPU. Please try: + +```bash +mamba create -n pholdENV python==3.11 +conda activate pholdENV +mamba install pytorch::pytorch torchvision torchaudio -c pytorch +mamba install -c conda-forge -c bioconda phold +``` + If you are having trouble with `pytorch` see [this link](https://pytorch.org) for more instructions. If you have an older version of CUDA installed, then you might find [this link useful](https://pytorch.org/get-started/previous-versions/). Once `phold` is installed, to download and install the database run: diff --git a/docs/install.md b/docs/install.md index 77fe6b6..ab50225 100644 --- a/docs/install.md +++ b/docs/install.md @@ -92,11 +92,19 @@ conda config --add channels conda-forge We would recommend installing `phold` into a fresh environment. Assuming you installed miniforge, to create an environment called `pholdENV` with `phold` installed (assuming you have an NVIDIA GPU): - ```bash mamba create -n pholdENV -c conda-forge -c bioconda phold pytorch=*=cuda* ``` +If you have a Mac that runs Apple Silicon (M1/M2/M3), please try: + +```bash +mamba create -n pholdENV python==3.11 +conda activate pholdENV +mamba install pytorch::pytorch torchvision torchaudio -c pytorch +mamba install -c conda-forge -c bioconda phold +``` + If you don't have a GPU: ```bash diff --git a/pyproject.toml b/pyproject.toml index 498322a..32739d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ requires = ["setuptools>=61.0", "wheel>=0.37.1"] [project] # https://packaging.python.org/en/latest/specifications/declaring-project-metadata/ name = "phold" -version = "0.1.2" # change VERSION too +version = "0.1.3" # change VERSION too description = "Phage Annotations using Protein Structures" readme = "README.md" requires-python = ">=3.8, <3.12" diff --git a/src/phold/features/predict_3Di.py b/src/phold/features/predict_3Di.py index b547360..487e420 100644 --- a/src/phold/features/predict_3Di.py +++ b/src/phold/features/predict_3Di.py @@ -96,20 +96,24 @@ def get_T5_model( global device - if torch.cuda.is_available(): - if cpu is True: - device = torch.device("cpu") - dev_name = "cpu" - else: - device = torch.device("cuda:0") - dev_name = "cuda:0" - else: + if cpu is True: device = torch.device("cpu") dev_name = "cpu" - if cpu is not True: - logger.warning("No available GPU was found, but --cpu was not specified") - logger.warning("ProstT5 will be run with CPU only") - + else: + # check for NVIDIA/cuda + if torch.cuda.is_available(): + device = torch.device("cuda:0") + dev_name = "cuda:0" + # check for apple silicon/metal + elif torch.backends.mps.is_available(): + device = torch.device("mps") + dev_name = "mps" + else: + device = torch.device("cpu") + dev_name = "cpu" + if cpu is not True: + logger.warning("No available GPU was found, but --cpu was not specified") + logger.warning("ProstT5 will be run with CPU only") # logger device only if the function is called logger.info("Using device: {}".format(dev_name)) diff --git a/src/phold/utils/VERSION b/src/phold/utils/VERSION index 8294c18..7693c96 100644 --- a/src/phold/utils/VERSION +++ b/src/phold/utils/VERSION @@ -1 +1 @@ -0.1.2 \ No newline at end of file +0.1.3 \ No newline at end of file From b6a2453f9b0532af8e6a76cbe5d426b0a4254988 Mon Sep 17 00:00:00 2001 From: gbouras13 Date: Tue, 19 Mar 2024 15:28:46 +1030 Subject: [PATCH 2/5] fix docs a bit --- README.md | 2 +- docs/install.md | 23 ++++++++++++----------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 6669207..8081dd6 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ If you have an NVIDIA GPU, please try: mamba create -n pholdENV -c conda-forge -c bioconda phold pytorch=*=cuda* ``` -If you have a Mac running Apple Silicon chip (M1/2/3), `phold` should be able to use the GPU. Please try: +If you have a Mac running an Apple Silicon chip (M1/M2/M3), `phold` should be able to use the GPU. Please try: ```bash mamba create -n pholdENV python==3.11 diff --git a/docs/install.md b/docs/install.md index ab50225..7834d32 100644 --- a/docs/install.md +++ b/docs/install.md @@ -38,6 +38,17 @@ cd phold pip install -e . ``` +## Mac (M1/M2/M3) + +If you have a Mac that runs Apple Silicon (M1/M2/M3), please try: + +```bash +mamba create -n pholdENV python==3.11 +conda activate pholdENV +mamba install pytorch::pytorch torchvision torchaudio -c pytorch +mamba install -c conda-forge -c bioconda phold +``` + ## Torch To utilise `phold` with GPU, a GPU compatible version of `pytorch` must be installed. @@ -46,7 +57,7 @@ If it is not automatically installed via the installation methods above, please If you have an older version of the CUDA driver installed on your NVIDIA GPU, then you might find [this link useful](https://pytorch.org/get-started/previous-versions/). -Phold has been tested on NVIDIA GPUs (A100, RTX4090) and AMD GPUs (Radeon). +Phold has been tested on NVIDIA (A100, RTX4090), AMD (MI250) and Mac (M1 Pro) GPUs. Installation on AMD GPUs requires a version of `torch` compatible with rocm e.g. @@ -95,16 +106,6 @@ We would recommend installing `phold` into a fresh environment. Assuming you ins ```bash mamba create -n pholdENV -c conda-forge -c bioconda phold pytorch=*=cuda* ``` - -If you have a Mac that runs Apple Silicon (M1/M2/M3), please try: - -```bash -mamba create -n pholdENV python==3.11 -conda activate pholdENV -mamba install pytorch::pytorch torchvision torchaudio -c pytorch -mamba install -c conda-forge -c bioconda phold -``` - If you don't have a GPU: ```bash From a65048de3199871c9d9aa51491904bd77e05327a Mon Sep 17 00:00:00 2001 From: gbouras13 Date: Tue, 19 Mar 2024 15:42:02 +1030 Subject: [PATCH 3/5] fix memory bug with phold plot for lots of plots --- src/phold/plot/plot.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/phold/plot/plot.py b/src/phold/plot/plot.py index 7b15d4a..c6c844e 100644 --- a/src/phold/plot/plot.py +++ b/src/phold/plot/plot.py @@ -5,6 +5,7 @@ from pycirclize import Circos from pycirclize.parser import Genbank from matplotlib.lines import Line2D +import matplotlib.pyplot as plt from matplotlib.patches import Patch import numpy as np from Bio import SeqUtils @@ -625,3 +626,5 @@ def create_circos_plot( # Save the image as an SVG fig.savefig(svg_plot_file, format="svg", dpi=dpi) + + plt.close(fig) From ddbf116cc4c5f8044ad0fa06b19ea56db2ea4574 Mon Sep 17 00:00:00 2001 From: gbouras13 Date: Tue, 19 Mar 2024 16:08:27 +1030 Subject: [PATCH 4/5] add test for phold proteins-compare when pdb input is specified --- HISTORY.md | 6 +++++ src/phold/features/create_foldseek_db.py | 2 +- tests/test_data/NC_043029_aa.fasta | 30 ++++++++++++++++++++++++ tests/test_integration.py | 8 ++++++- 4 files changed, 44 insertions(+), 2 deletions(-) create mode 100644 tests/test_data/NC_043029_aa.fasta diff --git a/HISTORY.md b/HISTORY.md index cfe87c6..e184333 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,11 @@ # History +0.1.3 (2024-03-19) +------------------ + +* Adds compability with Apple Silicon (M1/M2/M3) GPUs +* Fixes memory issue for `phold plot` with many contigs + 0.1.2 (2024-03-06) ------------------ diff --git a/src/phold/features/create_foldseek_db.py b/src/phold/features/create_foldseek_db.py index 54ff8af..f93246f 100644 --- a/src/phold/features/create_foldseek_db.py +++ b/src/phold/features/create_foldseek_db.py @@ -210,7 +210,7 @@ def generate_foldseek_db_from_pdbs( if num_pdbs == 0: logger.error( - f"No pdbs with matching CDS ids were found at all. Check the {pdb_dir}" + f"No pdbs with matching CDS ids were found at all. Check the {pdb_dir} directory" ) # generate the db diff --git a/tests/test_data/NC_043029_aa.fasta b/tests/test_data/NC_043029_aa.fasta new file mode 100644 index 0000000..f60c617 --- /dev/null +++ b/tests/test_data/NC_043029_aa.fasta @@ -0,0 +1,30 @@ +>NC_043029_CDS_0001 +MAVDRARFRMAVEGGAGGFSPLSPGEKGQRAAAEIGPGSNTGQKGQQDAIIDYLTIVVPLSALEEVNCKKLDLLLFRIFGFRGEVVAGAIREKNWNFYEQSAVLIDRENEVVGRVGIGGKKSTVCLSLTGMGCKWIRDWARVYKQCSMLDAKITRVDCAHDDYEGERLDVHALREVAAQGGFTEGGCPPRHRFISDEGHNTGCTLYVGGKGHKELCVYEKGKAEGLPSSRWVRAEVRLYGKHMEIPLDVLLNPGAYLRGSYSALQDLIKGVCTRLRTIRKHVEVSAEAMVLWMERQVGPALSVLRGAFGDSWSDFCEARIVRDGHPGRFRGIAKGDALHRFVREELCPSAA* +>NC_043029_CDS_0002 +MPICRVKSAAVEERHNSKTNTINRSQTVGLDLGNGFELPFRVGLGSRPPYTPGEYDIDPQSFALSQYGDLVLKRYVDLVPLQAKAAAAPAKP* +>NC_043029_CDS_0003 +MAVLIPACREADLDTAAGTCTAVIWIPQPALLPELPIEDAQAIGAKIALLWAVAYVFRLIRKKIEQS* +>NC_043029_CDS_0004 +MHKMFNALKGKGAALAAVGTAALASAPAFASGGGGVDVGPVVTSINGALGPVGQIGAAVLLVLVGIKVYKWVRRAM* +>NC_043029_CDS_0005 +MGAPRDVTATGGQGRLPPPGLLTPWIGQGAWDGRVDLAVRMARGLRDHLRGLQLMRWVARVFASAFIRRVAVLLVAALVGWCFSGRAHAAACASYTDQCTEGAAKQGALAWGGAQSKCVAVAGPNGRAGGNVSSKKSEGAGRGYFTVKAECLLNGNVVTYVEPAPPAEQGQWFYTQSCDAQPSYTGTGPWGSGGSAKNGSLGCRNGCDGIWQTNADASKTWTPLGNTCPDDEKKTCETYGDGYYWNSLLKVCEPPEGKCQGGGRPNSLGQCAPEPCPEGMAQQADGTCKKKDNECPAGQVRSPDGKCLPGDGQCAKGEVRGQDGTCKKDADNDGNPDPVNEDSFSGGDDCSAPPSCSGSPIMCGQARIQWRIDCNTRKNRNIAGGTCASMPICTGEKCDAMEYSALLMQWRSACALEKMAQGNGNGGGDNGDTKAIRDALTGTGGAVTTAPDRPSSDVWAPRSGTPVKPDTGGYGWGRTCPQPPSFEVFGNVIQINTAPLCNWLILGGYFVMGLAALASLRIIASRDA* +>NC_043029_CDS_0006 +MPMLISTLLTALAALFRSKWGPWVAEAMVWLGLSWATNEFLVQPWIDQMEQAMRAGTPGGEFGALVIAYAGLMKFDVACTMIASAVTAKFAVGAAKTFLTKRA* +>NC_043029_CDS_0007 +MPIELFTGQPGNGKTALMMERLVAEAKAASRPIFAAGIDGLDPGLATVLDDPRHWNNKDADGNYIVPDGSLIFVDEAWKWFGHLHDATRQQTPRHVLELAEHRHRGLDFVWTTQQPNQLYPFVRGLIGSHAHVVRRFGTKMLDVYRWGELNEEIKSLAKRDMAQRTTRLLPSQVFGQYKSAEVHTIKARIPFKVMLLPVLAIAAIVFAYLAYTSLRPSSFAGGEGKEGTQSASADAAPSPFRPAGAKEDAPRWPTAAAYAKDHLPRISTMPWTAPVFDERQARSDPQLVCMSSLEGLDAQGVRQEASCRCLTEQGTAYELSQPECRTLARNGPVYNPYRERSEERSTQRIEDLERSRPGVATTSAGGVAQHVERSMGTFPESPSYRSDSYMTTAPGPNKL* +>NC_043029_CDS_0008 +MTSSARELLKWLAVILMTGDHVAKVIYGGYVPGLSEAGRVAFPLFALVMAYNLAQPGADVGKSVRRLALWGAIAQPVHALAFGYWLPLNILLTFGVCAAAVYAACQRNWIVLAFAAVVLPAFVDYQWAGVAFVLLAWLGFRTGRLLLTLVAFAPLCAFNGNLWALVAIPAALGLSHTAWSVPRGRWTFYGYYVAHLACLGLLAPILRP* +>NC_043029_CDS_0009 +MERERPEYLQPIPRRRWEFPWLGMWAVLLLGGAGAGIWLHLKTGDAWNTRFMAAAETSDAAAPIEPSQADTDASRQVMIAEIRARRELAEIAAKRARAGRSDTPAHTDELRCINGIAFRRIPGGWENVPGAPCP* +>NC_043029_CDS_0010 +MRDRKLTGPWAGFSFKGGRLVTPEGRELEPQDLAWLSLTAAQAQEWRRMMESSRAIDKPRNPLSFNAASVVNLSDALAQRRKKRSPGAMAGPDAEPPAAVLPVPGPKRRQRV* +>NC_043029_CDS_0011 +MRSIDLLLDKAREKCERPSDRALAEKLRVTASAVSKWRKGGVITEMHATALAAIAGLDGEIVVRVMEEQAETPAQRRVWRSVLDRLSAAAAVLMLVVFAAPGAARAKAIDSQGSSGSDQPHSVYYVRIILGWLARLLPLPRHLLWHGA* +>NC_043029_CDS_0012 +MIDPFIAFVLLAAIVAVSIGSAKLVSWCLDRRGESARRSAREAAIVAEACAELAATGWTAEDEASFQAIRGQQLVFLKHLQEVRHG* +>NC_043029_CDS_0013 +MVKVLLFSAVLFGAVAILKDELYFAVVSALLGLLAYGFQAAEDRSNGR* +>NC_043029_CDS_0014 +MAVDQFREFLRDPFVVSVLGGVLLTGLYWSLVLALRAKGAGNGR* +>NC_043029_CDS_0015 +MAAECLVITKADWDQLMQLFAGMFLLLAFCAVFSPFDLHSWEYRVRRYLRRRRIARIRESV diff --git a/tests/test_integration.py b/tests/test_integration.py index ae2ffde..b84c229 100755 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -46,6 +46,7 @@ remote_fasta_dir: Path = f"{output_dir}/combined_truncated_phold_remote_fasta" proteins_predict_dir: Path = f"{output_dir}/combined_truncated_phold_proteins_predict" proteins_compare_dir: Path = f"{output_dir}/combined_truncated_phold_proteins_compare" +proteins_compare_pdb_dir: Path = f"{output_dir}/NC_043029_phold_proteins_compare_pdb" plots_dir: Path = f"{output_dir}/plot_output" @@ -57,7 +58,6 @@ def remove_directory(dir_path): if os.path.exists(dir_path): shutil.rmtree(dir_path) - @pytest.fixture(scope="session") def gpu_available(pytestconfig): return pytestconfig.getoption("gpu_available") @@ -129,6 +129,12 @@ def test_compare_pdb(): cmd = f"phold compare -i {input_gbk} -o {compare_pdb_dir} -t {threads} -d {database_dir} --pdb --pdb_dir {pdb_dir} -f" exec_command(cmd) +def test_proteins_compare_pdb(): + """test phold proteins-compare with pdbs input""" + input_faa: Path = f"{test_data}/NC_043029_aa.fasta" + cmd = f"phold proteins-compare -i {input_faa} -o {proteins_compare_pdb_dir} -t {threads} -d {database_dir} --pdb --pdb_dir {pdb_dir} -f" + exec_command(cmd) + def test_predict_fasta(gpu_available): """test phold predict with fasta input""" From aa7cd41bb91995bf859ee6e0167f9e318adb7c73 Mon Sep 17 00:00:00 2001 From: gbouras13 Date: Tue, 19 Mar 2024 17:19:46 +1030 Subject: [PATCH 5/5] add options for --threads with tests --- tests/conftest.py | 1 + tests/test_integration.py | 35 ++++++++++++++++++++++------------- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index c4c2fa0..9897f3f 100755 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -6,4 +6,5 @@ def pytest_addoption(parser): parser.addoption("--gpu_available", action="store_true") parser.addoption("--run_remote", action="store_true") + parser.addoption("--threads", action="store", default=1) diff --git a/tests/test_integration.py b/tests/test_integration.py index b84c229..56a81c6 100755 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -10,6 +10,10 @@ # to run with remote and with gpu pytest --run_remote --gpu_available . +# to run with 8 threads + +pytest --run_remote --gpu_available --threads 8 . + """ # import @@ -51,7 +55,7 @@ logger.add(lambda _: sys.exit(1), level="ERROR") -threads = 1 +# threads = 1 def remove_directory(dir_path): @@ -66,6 +70,11 @@ def gpu_available(pytestconfig): def run_remote(pytestconfig): return pytestconfig.getoption("run_remote") +@pytest.fixture(scope="session") +def threads(pytestconfig): + return pytestconfig.getoption("threads") + + def exec_command(cmnd, stdout=subprocess.PIPE, stderr=subprocess.PIPE): """executes shell command and returns stdout if completes exit code 0 @@ -90,7 +99,7 @@ def test_install(): exec_command(cmd) -def test_run_genbank(gpu_available): +def test_run_genbank(gpu_available, threads): """test phold run with genbank input""" input_gbk: Path = f"{test_data}/combined_truncated_acr_defense_vfdb_card.gbk" cmd = f"phold run -i {input_gbk} -o {run_gbk_dir} -t {threads} -d {database_dir} -f" @@ -98,7 +107,7 @@ def test_run_genbank(gpu_available): cmd = f"{cmd} --cpu" exec_command(cmd) -def test_run_fasta(gpu_available): +def test_run_fasta(gpu_available, threads): """test phold run with genbank input""" input_fasta: Path = f"{test_data}/combined_truncated_acr_defense_vfdb_card.fasta" cmd = f"phold run -i {input_fasta} -o {run_fasta_dir} -t {threads} -d {database_dir} -f" @@ -107,7 +116,7 @@ def test_run_fasta(gpu_available): exec_command(cmd) -def test_predict_genbank(gpu_available): +def test_predict_genbank(gpu_available, threads): """test phold predict with genbank input""" input_gbk: Path = f"{test_data}/combined_truncated_acr_defense_vfdb_card.gbk" cmd = f"phold predict -i {input_gbk} -o {predict_gbk_dir} -t {threads} -d {database_dir} -f" @@ -116,27 +125,27 @@ def test_predict_genbank(gpu_available): exec_command(cmd) -def test_compare_genbank(): +def test_compare_genbank(threads): """test phold compare with genbank input""" input_gbk: Path = f"{test_data}/combined_truncated_acr_defense_vfdb_card.gbk" cmd = f"phold compare -i {input_gbk} -o {compare_gbk_dir} --predictions_dir {predict_gbk_dir} -t {threads} -d {database_dir} -f" exec_command(cmd) -def test_compare_pdb(): +def test_compare_pdb(threads): """test phold compare with pdbs input""" input_gbk: Path = f"{test_data}/NC_043029.gbk" cmd = f"phold compare -i {input_gbk} -o {compare_pdb_dir} -t {threads} -d {database_dir} --pdb --pdb_dir {pdb_dir} -f" exec_command(cmd) -def test_proteins_compare_pdb(): +def test_proteins_compare_pdb(threads): """test phold proteins-compare with pdbs input""" input_faa: Path = f"{test_data}/NC_043029_aa.fasta" cmd = f"phold proteins-compare -i {input_faa} -o {proteins_compare_pdb_dir} -t {threads} -d {database_dir} --pdb --pdb_dir {pdb_dir} -f" exec_command(cmd) -def test_predict_fasta(gpu_available): +def test_predict_fasta(gpu_available, threads): """test phold predict with fasta input""" input_fasta: Path = f"{test_data}/combined_truncated_acr_defense_vfdb_card.fasta" cmd = f"phold predict -i {input_fasta} -o {predict_fasta_dir} -t {threads} -d {database_dir} -f" @@ -145,14 +154,14 @@ def test_predict_fasta(gpu_available): exec_command(cmd) -def test_compare_fasta(): +def test_compare_fasta(threads): """test phold compare with fasta input""" input_fasta: Path = f"{test_data}/combined_truncated_acr_defense_vfdb_card.fasta" cmd = f"phold compare -i {input_fasta} -o {compare_fasta_dir} --predictions_dir {predict_fasta_dir} -t {threads} -d {database_dir} -f" exec_command(cmd) -def test_proteins_predict(gpu_available): +def test_proteins_predict(gpu_available, threads): """test phold proteins-predict""" input_fasta: Path = f"{test_data}/phanotate.faa" cmd = f"phold proteins-predict -i {input_fasta} -o {proteins_predict_dir} -t {threads} -d {database_dir} -f" @@ -161,7 +170,7 @@ def test_proteins_predict(gpu_available): exec_command(cmd) -def test_proteins_compare(): +def test_proteins_compare(threads): """test phold proteins-compare""" input_fasta: Path = f"{test_data}/phanotate.faa" cmd = f"phold proteins-compare -i {input_fasta} --predictions_dir {proteins_predict_dir} -o {proteins_compare_dir} -t {threads} -d {database_dir} -f" @@ -176,14 +185,14 @@ def test_plot(): -def test_remote_genbank(run_remote): +def test_remote_genbank(run_remote, threads): """test phold remote with genbank input""" input_gbk: Path = f"{test_data}/combined_truncated_acr_defense_vfdb_card.gbk" if run_remote is True: cmd = f"phold remote -i {input_gbk} -o {remote_gbk_dir} -t {threads} -d {database_dir} -f" exec_command(cmd) -def test_remote_fasta(run_remote): +def test_remote_fasta(run_remote, threads): """test phold remote with fasta input""" input_fasta: Path = ( f"{test_data}/combined_truncated_acr_defense_vfdb_card.fasta"