From 7160efcaf0b37b796cd843627c426e0e7336f451 Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Fri, 1 Nov 2024 08:27:09 -0500 Subject: [PATCH 01/12] latest vcflib --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index dd66275..bcf9d1b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -113,7 +113,7 @@ RUN git clone https://github.com/pangenome/vcfbub \ RUN git clone --recursive https://github.com/vcflib/vcflib.git \ && cd vcflib \ - && git checkout 404b98a6a0601a8668fb039eae5196fa1ae12525 \ + && git checkout 0272f2d8ebcb70ca9b7f23a0aed3991e0a63ae6b \ && mkdir -p build \ && cd build \ && cmake -DZIG=OFF -DCMAKE_BUILD_TYPE=Debug -DWFA_GITMODULE=ON .. && cmake --build . -- -j $(nproc) \ From 74d9afb82190b8efa6bbba2b59ef8b3fd616f035 Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Fri, 1 Nov 2024 09:01:09 -0500 Subject: [PATCH 02/12] try default R --- Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index bcf9d1b..dd47cd5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -165,10 +165,10 @@ RUN wget https://github.com/RealTimeGenomics/rtg-tools/releases/download/3.12.1/ # Install base R # NOTE: we might have to go the conda way on the long run # https://www.reddit.com/r/Rlanguage/comments/oi31xn/installing_r41_on_debian_bullseye_testing/ -RUN apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-key B8F25A8A73EACF41 \ - && echo "deb http://cloud.r-project.org/bin/linux/debian bullseye-cran40/" > /etc/apt/sources.list.d/r-packages.list \ - && apt update \ - && apt install -y r-base \ +#RUN apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-key B8F25A8A73EACF41 \ +# && echo "deb http://cloud.r-project.org/bin/linux/debian bullseye-cran40/" > /etc/apt/sources.list.d/r-packages.list \ +# && apt update \ +RUN apt install -y r-base \ && apt-get clean \ && apt-get purge \ && rm -rf /var/lib/apt/lists/* \ From e4982cb781319ec19c6e8b3b1518ffe8fde822ac Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Fri, 1 Nov 2024 09:27:07 -0500 Subject: [PATCH 03/12] rework R installation; reduce docker image size --- Dockerfile | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/Dockerfile b/Dockerfile index dd47cd5..a009aa4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,7 +9,7 @@ LABEL about.license="SPDX:MIT" # dependencies RUN apt-get update \ - && apt-get install -y \ + && apt-get install -y --no-install-recommends \ git \ bash \ cmake \ @@ -162,18 +162,22 @@ RUN wget https://github.com/RealTimeGenomics/rtg-tools/releases/download/3.12.1/ && unzip rtg-tools-3.12.1-linux-x64.zip && sed -i 's/read -r -p "Would you like to enable automatic usage logging (y\/n)? " REPLY/REPLY="n"/g' /rtg-tools-3.12.1/rtg \ && ln -s /rtg-tools-3.12.1/rtg /usr/local/bin/ && rtg help -# Install base R -# NOTE: we might have to go the conda way on the long run -# https://www.reddit.com/r/Rlanguage/comments/oi31xn/installing_r41_on_debian_bullseye_testing/ -#RUN apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-key B8F25A8A73EACF41 \ -# && echo "deb http://cloud.r-project.org/bin/linux/debian bullseye-cran40/" > /etc/apt/sources.list.d/r-packages.list \ -# && apt update \ -RUN apt install -y r-base \ +# Install R and required packages +RUN apt-get install -y --no-install-recommends \ + software-properties-common \ + dirmngr \ + wget \ + gpg-agent \ + && wget -qO- https://cloud.r-project.org/bin/linux/debian/marutter_pubkey.asc | tee -a /etc/apt/trusted.gpg.d/cran_debian_key.asc \ + && echo "deb https://cloud.r-project.org/bin/linux/debian bullseye-cran40/" > /etc/apt/sources.list.d/r-packages.list \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + r-base \ && apt-get clean \ - && apt-get purge \ && rm -rf /var/lib/apt/lists/* \ && wget https://cran.r-project.org/src/contrib/Archive/data.table/data.table_1.15.2.tar.gz \ - && R CMD INSTALL data.table_1.15.2.tar.gz + && R CMD INSTALL data.table_1.15.2.tar.gz \ + && rm data.table_1.15.2.tar.gz RUN wget https://github.com/arq5x/bedtools2/releases/download/v2.31.0/bedtools.static \ && mv bedtools.static /usr/local/bin/bedtools \ From 668889173f2a6bb5fc033736bd65584f65badbf9 Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Fri, 1 Nov 2024 09:45:05 -0500 Subject: [PATCH 04/12] RUN apt-get update --- Dockerfile | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index a009aa4..465d07a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -163,11 +163,12 @@ RUN wget https://github.com/RealTimeGenomics/rtg-tools/releases/download/3.12.1/ && ln -s /rtg-tools-3.12.1/rtg /usr/local/bin/ && rtg help # Install R and required packages -RUN apt-get install -y --no-install-recommends \ - software-properties-common \ - dirmngr \ - wget \ - gpg-agent \ +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + software-properties-common \ + dirmngr \ + wget \ + gpg-agent \ && wget -qO- https://cloud.r-project.org/bin/linux/debian/marutter_pubkey.asc | tee -a /etc/apt/trusted.gpg.d/cran_debian_key.asc \ && echo "deb https://cloud.r-project.org/bin/linux/debian bullseye-cran40/" > /etc/apt/sources.list.d/r-packages.list \ && apt-get update \ From 28c0014697a7b87675b484e167701e3d7abbd9e9 Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Fri, 1 Nov 2024 10:02:31 -0500 Subject: [PATCH 05/12] fix key --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 465d07a..69d7c9f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -169,8 +169,8 @@ RUN apt-get update \ dirmngr \ wget \ gpg-agent \ - && wget -qO- https://cloud.r-project.org/bin/linux/debian/marutter_pubkey.asc | tee -a /etc/apt/trusted.gpg.d/cran_debian_key.asc \ - && echo "deb https://cloud.r-project.org/bin/linux/debian bullseye-cran40/" > /etc/apt/sources.list.d/r-packages.list \ + && wget -qO- https://cloud.r-project.org/bin/linux/debian/marutter_pubkey.asc | gpg --dearmor -o /etc/apt/trusted.gpg.d/cran.gpg \ + && echo "deb [signed-by=/etc/apt/trusted.gpg.d/cran.gpg] https://cloud.r-project.org/bin/linux/debian bullseye-cran40/" > /etc/apt/sources.list.d/r-packages.list \ && apt-get update \ && apt-get install -y --no-install-recommends \ r-base \ From a7bb41a2cd5e08cd8b35eb58e4bff2ce63a7530f Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Fri, 1 Nov 2024 10:18:06 -0500 Subject: [PATCH 06/12] add gnupg --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index 69d7c9f..54a7bc6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -169,6 +169,7 @@ RUN apt-get update \ dirmngr \ wget \ gpg-agent \ + gnupg \ && wget -qO- https://cloud.r-project.org/bin/linux/debian/marutter_pubkey.asc | gpg --dearmor -o /etc/apt/trusted.gpg.d/cran.gpg \ && echo "deb [signed-by=/etc/apt/trusted.gpg.d/cran.gpg] https://cloud.r-project.org/bin/linux/debian bullseye-cran40/" > /etc/apt/sources.list.d/r-packages.list \ && apt-get update \ From 96623a3d3ad7ed1bbc9ab287110537ba71fe3930 Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Fri, 1 Nov 2024 10:44:12 -0500 Subject: [PATCH 07/12] update --- Dockerfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 54a7bc6..eb07348 100644 --- a/Dockerfile +++ b/Dockerfile @@ -170,8 +170,9 @@ RUN apt-get update \ wget \ gpg-agent \ gnupg \ - && wget -qO- https://cloud.r-project.org/bin/linux/debian/marutter_pubkey.asc | gpg --dearmor -o /etc/apt/trusted.gpg.d/cran.gpg \ - && echo "deb [signed-by=/etc/apt/trusted.gpg.d/cran.gpg] https://cloud.r-project.org/bin/linux/debian bullseye-cran40/" > /etc/apt/sources.list.d/r-packages.list \ + && wget -O- https://cloud.r-project.org/bin/linux/debian/marutter_pubkey.asc > /etc/apt/trusted.gpg.d/marutter.asc \ + && apt-key add /etc/apt/trusted.gpg.d/marutter.asc \ + && echo "deb https://cloud.r-project.org/bin/linux/debian bullseye-cran40/" > /etc/apt/sources.list.d/r-packages.list \ && apt-get update \ && apt-get install -y --no-install-recommends \ r-base \ From 473fe4ef0c604468ad1064500d250ec1d03844a4 Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Fri, 1 Nov 2024 11:14:42 -0500 Subject: [PATCH 08/12] update --- Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index eb07348..e55a4ca 100644 --- a/Dockerfile +++ b/Dockerfile @@ -170,9 +170,9 @@ RUN apt-get update \ wget \ gpg-agent \ gnupg \ - && wget -O- https://cloud.r-project.org/bin/linux/debian/marutter_pubkey.asc > /etc/apt/trusted.gpg.d/marutter.asc \ - && apt-key add /etc/apt/trusted.gpg.d/marutter.asc \ - && echo "deb https://cloud.r-project.org/bin/linux/debian bullseye-cran40/" > /etc/apt/sources.list.d/r-packages.list \ + && gpg --keyserver keyserver.ubuntu.com --recv-key '95C0FAF38DB3CCAD0C080A7BDC78B2DDEABC47B7' \ + && gpg --armor --export '95C0FAF38DB3CCAD0C080A7BDC78B2DDEABC47B7' | tee /etc/apt/trusted.gpg.d/cran_debian_key.asc \ + && echo "deb http://cloud.r-project.org/bin/linux/debian bullseye-cran40/" > /etc/apt/sources.list.d/r-packages.list \ && apt-get update \ && apt-get install -y --no-install-recommends \ r-base \ From 802e1d7fd5118652397b3188ec15ac9ea08a2ece Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Fri, 1 Nov 2024 13:50:14 -0500 Subject: [PATCH 09/12] update debian --- Dockerfile | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/Dockerfile b/Dockerfile index e55a4ca..2510da5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,8 @@ -FROM debian:bullseye-slim AS binary +FROM debian:bookworm-slim AS binary LABEL authors="Erik Garrison, Simon Heumos, Andrea Guarracino" LABEL description="Preliminary docker image containing all requirements for pggb pipeline" -LABEL base_image="debian:bullseye-slim" +LABEL base_image="debian:bookworm-slim" LABEL software="pggb" LABEL about.home="https://github.com/pangenome/pggb" LABEL about.license="SPDX:MIT" @@ -164,16 +164,6 @@ RUN wget https://github.com/RealTimeGenomics/rtg-tools/releases/download/3.12.1/ # Install R and required packages RUN apt-get update \ - && apt-get install -y --no-install-recommends \ - software-properties-common \ - dirmngr \ - wget \ - gpg-agent \ - gnupg \ - && gpg --keyserver keyserver.ubuntu.com --recv-key '95C0FAF38DB3CCAD0C080A7BDC78B2DDEABC47B7' \ - && gpg --armor --export '95C0FAF38DB3CCAD0C080A7BDC78B2DDEABC47B7' | tee /etc/apt/trusted.gpg.d/cran_debian_key.asc \ - && echo "deb http://cloud.r-project.org/bin/linux/debian bullseye-cran40/" > /etc/apt/sources.list.d/r-packages.list \ - && apt-get update \ && apt-get install -y --no-install-recommends \ r-base \ && apt-get clean \ From afba6a44fa65622add60c32dfbd5c55d8583ff6d Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Fri, 1 Nov 2024 14:09:44 -0500 Subject: [PATCH 10/12] update python stuff --- Dockerfile | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/Dockerfile b/Dockerfile index 2510da5..daf45d4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,6 +16,8 @@ RUN apt-get update \ make \ g++-11 \ python3-dev \ + python3-pip \ + python3-venv \ pybind11-dev \ libbz2-dev \ bc \ @@ -38,6 +40,7 @@ RUN apt-get update \ libcairo2-dev \ unzip \ parallel \ + r-base \ && apt-get clean \ && apt-get purge \ && rm -rf /var/lib/apt/lists/* @@ -98,8 +101,6 @@ RUN git clone https://github.com/marschall-lab/GFAffix.git \ && cd ../ \ && rm -rf GFAffix -RUN pip install multiqc==1.22.2 - RUN wget https://github.com/vgteam/vg/releases/download/v1.59.0/vg && chmod +x vg && mv vg /usr/local/bin/vg RUN git clone https://github.com/pangenome/vcfbub \ @@ -122,10 +123,6 @@ RUN git clone --recursive https://github.com/vcflib/vcflib.git \ && cd ../ \ && rm -rf vcflib -# Community detection dependencies -RUN pip install igraph==0.11.5 -RUN pip install pycairo==1.26.1 - # Additional tools RUN git clone https://github.com/ekg/fastix.git \ && cd fastix \ @@ -162,13 +159,8 @@ RUN wget https://github.com/RealTimeGenomics/rtg-tools/releases/download/3.12.1/ && unzip rtg-tools-3.12.1-linux-x64.zip && sed -i 's/read -r -p "Would you like to enable automatic usage logging (y\/n)? " REPLY/REPLY="n"/g' /rtg-tools-3.12.1/rtg \ && ln -s /rtg-tools-3.12.1/rtg /usr/local/bin/ && rtg help -# Install R and required packages -RUN apt-get update \ - && apt-get install -y --no-install-recommends \ - r-base \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* \ - && wget https://cran.r-project.org/src/contrib/Archive/data.table/data.table_1.15.2.tar.gz \ +# Install R package +RUN wget https://cran.r-project.org/src/contrib/Archive/data.table/data.table_1.15.2.tar.gz \ && R CMD INSTALL data.table_1.15.2.tar.gz \ && rm data.table_1.15.2.tar.gz @@ -176,6 +168,15 @@ RUN wget https://github.com/arq5x/bedtools2/releases/download/v2.31.0/bedtools.s && mv bedtools.static /usr/local/bin/bedtools \ && chmod +x /usr/local/bin/bedtools +# Set up Python virtual environment +RUN python3 -m venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" + +# Install Python packages in virtual environment +RUN pip install multiqc==1.22.2 \ + && pip install igraph==0.11.5 \ + && pip install pycairo==1.26.1 + # copy required scripts COPY scripts/* /usr/local/bin/ COPY scripts /usr/local/bin/scripts/ From 74c6a4b3cfe2286a749688ce97e6a6bf6aec7c06 Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Fri, 1 Nov 2024 14:26:38 -0500 Subject: [PATCH 11/12] add libclang --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index daf45d4..477367e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -41,6 +41,7 @@ RUN apt-get update \ unzip \ parallel \ r-base \ + libclang-dev \ && apt-get clean \ && apt-get purge \ && rm -rf /var/lib/apt/lists/* From 7f35ae6567b7bb0b03e59a3b1da8e0ee1ff6a1b8 Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Fri, 1 Nov 2024 14:49:55 -0500 Subject: [PATCH 12/12] local abPOA is broken --- .github/workflows/build_and_test_docker.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_and_test_docker.yml b/.github/workflows/build_and_test_docker.yml index d499623..0236580 100644 --- a/.github/workflows/build_and_test_docker.yml +++ b/.github/workflows/build_and_test_docker.yml @@ -11,11 +11,11 @@ jobs: run: docker build . --file Dockerfile --target binary --tag pggb - name: Run a test on the DRB1-3123 dataset (SPOA) run: docker run -v ${PWD}/data/:/data pggb /bin/bash -c "pggb -i data/HLA/DRB1-3123.fa.gz -p 70 -s 3000 -G 800,900,1100 -n 10 -t 2 -Z -M -m -o drib1_spoa && ls drib1_spoa/* && head drib1_spoa/*.log -n 63" - - name: Run a test on the DRB1-3123 dataset (abPOA) - run: docker run -v ${PWD}/data/:/data pggb /bin/bash -c "pggb -i data/HLA/DRB1-3123.fa.gz -p 70 -s 3000 -G 800,900,1100 -n 10 -t 2 -Z -M -m -b -o drib1_abpoa && ls drib1_abpoa/* && head drib1_abpoa/*.log -n 63" + - name: Run a test on the DRB1-3123 dataset (abPOA global) + run: docker run -v ${PWD}/data/:/data pggb /bin/bash -c "pggb -i data/HLA/DRB1-3123.fa.gz -p 70 -s 3000 -G 800,900,1100 -n 10 -t 2 -Z -M -m --global-poa -b -o drib1_abpoa && ls drib1_abpoa/* && head drib1_abpoa/*.log -n 63" - name: Run a test on the DRB1-3123 dataset (paf) run: docker run -v ${PWD}/data/:/data pggb /bin/bash -c "pggb -i data/HLA/DRB1-3123.fa.gz -a data/paf/DRB1-3123.fa.15a1009.wfmash.paf -p 70 -s 3000 -G 2000 -n 10 -t 2 -Z -M -m -o drib1_paf && ls drib1_paf/* && head drib1_paf/*.log -n 63" - name: Run a test on the LPA dataset (SPOA global) - run: docker run -v ${PWD}/data/:/data pggb /bin/bash -c "pggb -i data/LPA/LPA.fa.gz -p 95 -s 20000 -G 800,900 -k 79 -t 2 -Z -O 0.001 -m -z -o lpa -V 'chm13,chm1:1000' && ls lpa/* && head lpa/*.log -n 63" + run: docker run -v ${PWD}/data/:/data pggb /bin/bash -c "pggb -i data/LPA/LPA.fa.gz -p 95 -s 20000 -G 800,900 -k 79 -t 2 -Z -O 0.001 -m --global-poa -o lpa -V 'chm13,chm1:1000' && ls lpa/* && head lpa/*.log -n 63" - name: Run a test for the gfa2evaluation script on a mini HPRC chrMT dataset run: docker run -v ${PWD}/data/:/data pggb /bin/bash -c "gfa2evaluation.sh data/chrM.pan.4.gfa chm13 data/test_eval 2"