Dockerfile.tmpl

ARG BASE_IMAGE_REPO \
    BASE_IMAGE_TAG \
    CPU_BASE_IMAGE_NAME \
    GPU_BASE_IMAGE_NAME \
    LIGHTGBM_VERSION \
    TORCH_VERSION \
    TORCHAUDIO_VERSION \
    TORCHVISION_VERSION \
    JAX_VERSION

{{ if eq .Accelerator "gpu" }}
FROM gcr.io/kaggle-images/python-lightgbm-whl:${GPU_BASE_IMAGE_NAME}-${BASE_IMAGE_TAG}-${LIGHTGBM_VERSION} AS lightgbm_whl
FROM gcr.io/kaggle-images/python-torch-whl:${GPU_BASE_IMAGE_NAME}-${BASE_IMAGE_TAG}-${TORCH_VERSION} AS torch_whl
FROM gcr.io/kaggle-images/python-jaxlib-whl:${GPU_BASE_IMAGE_NAME}-${BASE_IMAGE_TAG}-${JAX_VERSION} AS jaxlib_whl
FROM ${BASE_IMAGE_REPO}/${GPU_BASE_IMAGE_NAME}:${BASE_IMAGE_TAG}
{{ else }}
FROM ${BASE_IMAGE_REPO}/${CPU_BASE_IMAGE_NAME}:${BASE_IMAGE_TAG}
{{ end }}

# Ensures shared libraries installed with conda can be found by the dynamic link loader.
ENV LIBRARY_PATH="$LIBRARY_PATH:/opt/conda/lib" \
    LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib"

{{ if eq .Accelerator "gpu" }}
ARG CUDA_MAJOR_VERSION \
    CUDA_MINOR_VERSION
ENV CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} \
    CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION}
# Make sure we are on the right version of CUDA
RUN update-alternatives --set cuda /usr/local/cuda-$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION
# NVIDIA binaries from the host are mounted to /opt/bin.
ENV PATH=/opt/bin:${PATH} \
    # Add CUDA stubs to LD_LIBRARY_PATH to support building the GPU image on a CPU machine.
    LD_LIBRARY_PATH_NO_STUBS="$LD_LIBRARY_PATH" \
    LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64/stubs"
RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1
{{ end }}

# Keep these variables in sync if base image is updated.
ENV TENSORFLOW_VERSION=2.16.1 \
    # See https://github.com/tensorflow/io#tensorflow-version-compatibility
    TENSORFLOW_IO_VERSION=0.37.0

# We need to redefine the ARG here to get the ARG value defined above the FROM instruction.
# See: https://docs.docker.com/engine/reference/builder/#understand-how-arg-and-from-interact
ARG LIGHTGBM_VERSION \
    TORCH_VERSION \
    TORCHAUDIO_VERSION \
    TORCHVISION_VERSION \
    JAX_VERSION

# Disable pesky logs like: KMP_AFFINITY: pid 6121 tid 6121 thread 0 bound to OS proc set 0
# See: https://stackoverflow.com/questions/57385766/disable-tensorflow-log-information
ENV KMP_WARNINGS=0 \
    # Also make the KMP logs noverbose.
    # https://stackoverflow.com/questions/70250304/stop-tensorflow-from-printing-warning-message
    KMP_SETTINGS=false \
    # Remove the pip as the root user warning.
    PIP_ROOT_USER_ACTION=ignore

ADD clean-layer.sh  /tmp/clean-layer.sh
ADD patches/nbconvert-extensions.tpl /opt/kaggle/nbconvert-extensions.tpl
ADD patches/template_conf.json /opt/kaggle/conf.json

# Update GPG key per documentation at https://cloud.google.com/compute/docs/troubleshooting/known-issues
RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -
RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key --keyring /usr/share/keyrings/cloud.google.gpg add -

# Use a fixed apt-get repo to stop intermittent failures due to flaky httpredir connections,
# as described by Lionel Chan at http://stackoverflow.com/a/37426929/5881346
RUN sed -i "s/httpredir.debian.org/debian.uchicago.edu/" /etc/apt/sources.list && \
    apt-get update --allow-releaseinfo-change && \
    # Needed by lightGBM (GPU build)
    # https://lightgbm.readthedocs.io/en/latest/GPU-Tutorial.html#build-lightgbm
    apt-get install -y build-essential unzip cmake libboost-dev libboost-system-dev libboost-filesystem-dev p7zip-full && \
    # b/182601974: ssh client was removed from the base image but is required for packages such as stable-baselines.
    apt-get install -y openssh-client && \
    apt-get install -y graphviz && pip install graphviz && \
    /tmp/clean-layer.sh

# b/128333086: Set PROJ_DATA to points to the proj4 cartographic library.
ENV PROJ_DATA=/opt/conda/share/proj

# Install micromamba, setup channels, and replace conda with micromamba
ENV MAMBA_ROOT_PREFIX=/opt/conda
RUN curl -L "https://micro.mamba.pm/install.sh" -o /tmp/micromamba-install.sh \
    && bash /tmp/micromamba-install.sh \
    && rm /tmp/micromamba-install.sh \
    && mv ~/.local/bin/micromamba /usr/bin/micromamba \
    && (!(which conda) || cp /usr/bin/micromamba $(which conda)) \
    && micromamba config append channels nvidia \
    && micromamba config append channels rapidsai \
    && micromamba config append channels conda-forge \
    && micromamba config set channel_priority flexible \
    && python -m nb_conda_kernels.install --disable

# Install conda packages not available on pip.
# When using pip in a conda environment, conda commands should be ran first and then
# the remaining pip commands: https://www.anaconda.com/using-pip-in-a-conda-environment/
RUN micromamba install -y mkl cartopy imagemagick pyproj "shapely<2" && \
    rm -rf /opt/conda/lib/python3.10/site-packages/pyproj/proj_dir/ && \
    /tmp/clean-layer.sh

# Install spacy
# b/232247930: uninstall pyarrow to avoid double installation with the GPU specific version.
# b/341938540: unistall grpc-cpp to allow >=v24.4 cudf and cuml to be installed.
{{ if eq .Accelerator "gpu" }}
RUN pip uninstall -y pyarrow && \
    micromamba install -vvvy spacy "cudf>=24.4" "cuml>=24.4" cupy cuda-version=$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION && \
    /tmp/clean-layer.sh
{{ else }}
RUN pip install spacy && \
    /tmp/clean-layer.sh
{{ end}}

# Install PyTorch
# b/356397043: magma-cuda121 is the latest version
{{ if eq .Accelerator "gpu" }}
COPY --from=torch_whl /tmp/whl/*.whl /tmp/torch/
# b/356397043: We are currently using cuda 12.3,
# but magma-cuda121 is the latest compatible version 
RUN micromamba install -y -c pytorch magma-cuda121 && \
    pip install /tmp/torch/*.whl && \
    sudo apt -y install libsox-dev && \
    rm -rf /tmp/torch && \
    /tmp/clean-layer.sh
{{ else }}
RUN pip install \
        torch==$TORCH_VERSION+cpu \
        torchvision==$TORCHVISION_VERSION+cpu \
        torchaudio==$TORCHAUDIO_VERSION+cpu \
        --index-url https://download.pytorch.org/whl/cpu && \
    /tmp/clean-layer.sh
{{ end }}

# Install LightGBM
{{ if eq .Accelerator "gpu" }}
COPY --from=lightgbm_whl /tmp/whl/*.whl /tmp/lightgbm/
# Install OpenCL (required by LightGBM GPU version)
RUN apt-get install -y ocl-icd-libopencl1 clinfo && \
    mkdir -p /etc/OpenCL/vendors && \
    echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd && \
    pip install /tmp/lightgbm/*.whl && \
    rm -rf /tmp/lightgbm && \
    /tmp/clean-layer.sh
{{ else }}
RUN pip install lightgbm==$LIGHTGBM_VERSION && \
    /tmp/clean-layer.sh
{{ end }}

# Install JAX
{{ if eq .Accelerator "gpu" }}
COPY --from=jaxlib_whl /tmp/whl/*.whl /tmp/jax/
# b/319722433#comment9: Use pip wheels once versions matches our CUDA version.
RUN pip install /tmp/jax/*.whl jax==$JAX_VERSION && \
    /tmp/clean-layer.sh
{{ else }}
RUN pip install jax[cpu] && \
    /tmp/clean-layer.sh
{{ end }}


# Install GPU specific packages
{{ if eq .Accelerator "gpu" }}
# Install GPU-only packages
# No specific package for nnabla-ext-cuda 12.x minor versions.
RUN export PATH=/usr/local/cuda/bin:$PATH && \
    export CUDA_ROOT=/usr/local/cuda && \
    pip install pycuda \
        pynvrtc \
        pynvml && \
    /tmp/clean-layer.sh
{{ end }}

# b/308525631: Pin Matplotlib until seaborn can be upgraded
# to >0.13.0 (now it's stuck by a package conflict with ydata-profiling 4.5.1).
RUN JAXVER=$(pip freeze | grep -e "^jax==") && \
    pip install --upgrade \
        "matplotlib<3.8.0" \
        # ipympl adds interactive widget support for matplotlib
        ipympl==0.7.0 \
        "seaborn==0.12.2" \
        pyupset \
        python-dateutil dask dask-expr igraph \
        pyyaml joblib geopy mne pyshp \
        pandas \
        polars \
        flax \
        "${JAXVER}" && \
    /tmp/clean-layer.sh

RUN apt-get update && \
    apt-get install -y default-jre && \
    /tmp/clean-layer.sh

RUN pip install -f http://h2o-release.s3.amazonaws.com/h2o/latest_stable_Py.html h2o && /tmp/clean-layer.sh

RUN pip install \
        "tensorflow==${TENSORFLOW_VERSION}" \
        "tensorflow-io==${TENSORFLOW_IO_VERSION}" \
        tensorflow-probability \
        tensorflow_decision_forests \
        tensorflow-text \
        "tensorflow_hub>=0.16.0" \
        tf-keras \
        "keras>3" \
        keras-cv \
        keras-nlp && \
    /tmp/clean-layer.sh

ADD patches/keras_internal.py \
    patches/keras_internal_test.py \
    /opt/conda/lib/python3.10/site-packages/tensorflow_decision_forests/keras/

# b/350573866: xgboost v2.1.0 breaks learntools
RUN apt-get install -y libfreetype6-dev && \
    apt-get install -y libglib2.0-0 libxext6 libsm6 libxrender1 libfontconfig1 --fix-missing && \
    rm -rf /opt/conda/lib/python3.10/site-packages/numpy* && \
    pip install "numpy==1.26.4" && \
    pip install gensim \
        textblob \
        wordcloud \
        "xgboost==2.0.3" \
        pydot \
        hep_ml && \
    # NLTK Project datasets
    mkdir -p /usr/share/nltk_data && \
    # NLTK Downloader no longer continues smoothly after an error, so we explicitly list
    # the corpuses that work
    # "yes | ..." answers yes to the retry prompt in case of an error. See b/133762095.
    yes | python -m nltk.downloader -d /usr/share/nltk_data abc alpino averaged_perceptron_tagger \
    basque_grammars biocreative_ppi bllip_wsj_no_aux \
    book_grammars brown brown_tei cess_cat cess_esp chat80 city_database cmudict \
    comtrans conll2000 conll2002 conll2007 crubadan dependency_treebank \
    europarl_raw floresta gazetteers genesis gutenberg \
    ieer inaugural indian jeita kimmo knbc large_grammars lin_thesaurus mac_morpho machado \
    masc_tagged maxent_ne_chunker maxent_treebank_pos_tagger moses_sample movie_reviews \
    mte_teip5 names nps_chat omw opinion_lexicon paradigms \
    pil pl196x porter_test ppattach problem_reports product_reviews_1 product_reviews_2 propbank \
    pros_cons ptb punkt qc reuters rslp rte sample_grammars semcor senseval sentence_polarity \
    sentiwordnet shakespeare sinica_treebank smultron snowball_data spanish_grammars \
    state_union stopwords subjectivity swadesh switchboard tagsets timit toolbox treebank \
    twitter_samples udhr2 udhr unicode_samples universal_tagset universal_treebanks_v20 \
    vader_lexicon verbnet webtext word2vec_sample wordnet wordnet_ic words ycoe && \
    pip install scikit-image && \
    pip install opencv-contrib-python opencv-python && \
    /tmp/clean-layer.sh

RUN pip install cython \
        fasttext \
        opencv-contrib-python \
        opencv-python \
        "scipy<1.14.0" \
        # Scikit-learn accelerated library for x86
        "scikit-learn-intelex>=2023.0.1" \
        # HDF5 support
        h5py \
        # PUDB, for local debugging convenience
        pudb \
        imbalanced-learn \
        # Profiling and other utilities
        line_profiler \
        bokeh \
        numba \
        datashader \
        # b/328788268: libpysal 4.10 seems to fail with "module 'shapely' has no attribute 'Geometry'. Did you mean: 'geometry'"
        "libpysal==4.9.2" \
        # b/276344496: Install specific version of boto3, because 1.26.103 is broken.
        "boto3==1.26.100" \
        Boruta && \
    # Pandoc is a dependency of deap
    apt-get install -y pandoc && \
    /tmp/clean-layer.sh

RUN apt-get install -y git-lfs && \
    # vtk with dependencies
    apt-get install -y libgl1-mesa-glx && \
    pip install vtk && \
    # xvfbwrapper with dependencies
    apt-get install -y xvfb && \
    pip install xvfbwrapper && \
    /tmp/clean-layer.sh

RUN pip install mpld3 \
        gpxpy \
        arrow \
        nilearn \
        nibabel \
        imgaug \
        preprocessing \
        path.py && \
    pip install deap \
        # b/302136621: Fix eli5 import for learntools, newer version require scikit-learn > 1.3
        "tpot==0.12.1" \
        scikit-optimize \
        haversine \
        toolz cytoolz \
        plotly \
        hyperopt \
        langid \
        # Useful data exploration libraries (for missing data and generating reports)
        missingno \
        pandas-profiling \
        bayesian-optimization \
        matplotlib-venn \
        pyldavis \
        mlxtend \
        altair \
        ImageHash \
        ecos \
        CVXcanon \
        pymc3 \
        tifffile \
        geojson \
        pydicom \
        wavio \
        SimpleITK \
        squarify \
        fuzzywuzzy \
        python-louvain \
        pyexcel-ods \
        sklearn-pandas \
        prophet \
        holidays \
        holoviews \
        scikit-multilearn \
        leven \
        catboost \
        folium \
        scikit-plot \
        fury dipy \
        plotnine \
        scikit-surprise \
        pymongo \
        eli5 \
        kaggle \
        kagglehub \
        google-generativeai \
        pytest && \
    /tmp/clean-layer.sh

 # Add google PAIR-code Facets
RUN cd /opt/ && git clone https://github.com/PAIR-code/facets && cd facets/ && jupyter nbextension install facets-dist/ --user && \
    export PYTHONPATH=$PYTHONPATH:/opt/facets/facets_overview/python/ && \
    pip install librosa \
        sentencepiece \
        cufflinks \
        lime \
        memory_profiler && \
    /tmp/clean-layer.sh

RUN pip install annoy \
        category_encoders && \
    # b/183041606#comment5: the Kaggle data proxy doesn't support these APIs. If the library is missing, it falls back to using a regular BigQuery query to fetch data.
    pip uninstall -y google-cloud-bigquery-storage && \
    # google-cloud-automl 2.0.0 introduced incompatible API changes, need to pin to 1.0.1
    # After launch this should be installed from pip
    pip install git+https://github.com/googleapis/python-aiplatform.git@mb-release \
        google-cloud-automl==1.0.1 \
        google-api-core==1.33.2 \
        google-cloud-bigquery \
        google-cloud-storage && \
    # Split these installations to avoid `pip._vendor.resolvelib.resolvers.ResolutionTooDeep: 200000`
    # b/315753846: Unpin translate package.
    pip install google-cloud-translate==3.12.1 \
        google-cloud-language==2.* \
        google-cloud-videointelligence==2.* \
        google-cloud-vision==2.* \
        protobuf==3.20.3 \
        # Pandas data reader
        pandas-datareader \
        emoji \
        # Add Japanese morphological analysis engine
        janome \
        # yellowbrick machine learning visualization library
        yellowbrick \
        mlcrate && \
    /tmp/clean-layer.sh

# b/273059949: The pre-installed nbconvert is slow on html conversions and has to be force-uninstalled.
# b/274619697: learntools also requires a specific nbconvert right now
RUN rm -rf /opt/conda/lib/python3.10/site-packages/{nbconvert,nbclient,mistune,platformdirs}*

RUN pip install bleach \
        certifi \
        cycler \
        decorator \
        entrypoints \
        html5lib \
        ipykernel \
        ipython \
        ipython-genutils \
        # Fix qgrid by pinning ipywidgets https://github.com/quantopian/qgrid/issues/376
        ipywidgets==7.7.1 \
        isoweek \
        jedi \
        jsonschema \
        jupyter-client \
        jupyter-console \
        jupyter-core \
        jupyterlab-lsp \
        MarkupSafe \
        mistune \
        nbformat \
        notebook \
        "nbconvert==6.4.5" \
        papermill \
        python-lsp-server[all] \
        olefile \
        kornia \
        pandas_summary \
        pandocfilters \
        pexpect \
        pickleshare \
        Pillow && \
    # Install openslide and its python binding
    apt-get install -y openslide-tools && \
    pip install openslide-python \
        ptyprocess \
        Pygments \
        pyparsing \
        pytz \
        PyYAML \
        pyzmq \
        qtconsole \
        six \
        terminado \
        tornado \
        tqdm \
        traitlets \
        wcwidth \
        webencodings \
        widgetsnbextension \
        # Require pyarrow newer than https://github.com/advisories/GHSA-5wvp-7f3h-6wmm
        {{ if eq .Accelerator "gpu" }} pyarrow {{ else }} "pyarrow>=14.0.1" {{ end }} 

RUN python -m spacy download en_core_web_sm && python -m spacy download en_core_web_lg && \
    apt-get update && apt-get install -y ffmpeg && \
    /tmp/clean-layer.sh

    ###########
    #
    #      NEW CONTRIBUTORS:
    # Please add new pip/apt installs in this block. Don't forget a "&& \" at the end
    # of all non-final lines. Thanks!
    #
    ###########

RUN rm /opt/conda/lib/python3.10/site-packages/google*/direct_url.json && \
    rm /opt/conda/lib/python3.10/site-packages/google*/REQUESTED
# dlib has a libmkl incompatibility:
# test_dlib_face_detector (test_dlib.TestDLib) ... INTEL MKL ERROR: /opt/conda/bin/../lib/libmkl_avx512.so.2: undefined symbol: mkl_sparse_optimize_bsr_trsm_i8.
# Intel MKL FATAL ERROR: Cannot load libmkl_avx512.so.2 or libmkl_def.so.2.
# nnabla breaks protobuf compatibiilty:
RUN pip install wandb \
        pyemd \
        pympler \
        featuretools \
        #-e git+https://github.com/SohierDane/BigQuery_Helper#egg=bq_helper \
        git+https://github.com/Kaggle/learntools \
        ray \
        gym \
        pyarabic \
        pandasql \
	    # b/302136621: Fix eli5 import for learntools
        scikit-learn==1.2.2 \
	    # b/329869023 shap 0.45.0 breaks learntools
        shap==0.44.1 \
        cesium \
        rgf_python \
        jieba  \
        tsfresh \
        optuna \
        plotly_express \
        albumentations \
        Rtree \
        accelerate && \
        apt-get -y install libspatialindex-dev && \
    # b/370860329: newer versions are not capable with current tensorflow
    rm -rf /opt/conda/lib/python3.10/site-packages/numpy* && \
    pip install "numpy==1.26.4" && \
    pip install pytorch-ignite \
        qgrid \
        bqplot \
        earthengine-api \
        transformers \
        datasets \
        s3fs \
        gcsfs \
        kaggle-environments \
        # geopandas > v0.14.4 breaks learn tools
        geopandas==v0.14.4 \
        "shapely<2" \
        pydub \
        pydegensac \
        torchmetrics \
        pytorch-lightning \
        sympy \
        # flask is used by agents in the simulation competitions.
        flask \
        # pycrypto is used by competitions team.
        pycryptodome \
        nbdev \
        easyocr \
        onnx \
        tables \
        openpyxl \
        timm \
        torchinfo && \
    pip install git+https://github.com/facebookresearch/segment-anything.git && \
    # b/370860329: newer versions are not capable with current tensorflow
    pip install --no-dependencies fastai fastdownload && \
    # b/343971718: remove duplicate aiohttp installs, and reinstall it
    rm -rf /opt/conda/lib/python3.10/site-packages/aiohttp* && \
    micromamba install --force-reinstall -y aiohttp && \
    /tmp/clean-layer.sh

# Download base easyocr models.
# https://github.com/JaidedAI/EasyOCR#usage
RUN mkdir -p /root/.EasyOCR/model && \
    wget --no-verbose "https://github.com/JaidedAI/EasyOCR/releases/download/v1.3/latin_g2.zip" -O /root/.EasyOCR/model/latin.zip && \
    unzip /root/.EasyOCR/model/latin.zip -d /root/.EasyOCR/model/ && \
    rm /root/.EasyOCR/model/latin.zip && \
    wget --no-verbose "https://github.com/JaidedAI/EasyOCR/releases/download/v1.3/english_g2.zip" -O /root/.EasyOCR/model/english.zip && \
    unzip /root/.EasyOCR/model/english.zip -d /root/.EasyOCR/model/ && \
    rm /root/.EasyOCR/model/english.zip && \
    wget --no-verbose "https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/craft_mlt_25k.zip" -O /root/.EasyOCR/model/craft_mlt_25k.zip && \
    unzip /root/.EasyOCR/model/craft_mlt_25k.zip -d /root/.EasyOCR/model/ && \
    rm /root/.EasyOCR/model/craft_mlt_25k.zip && \
    /tmp/clean-layer.sh

# Tesseract and some associated utility packages
RUN apt-get install tesseract-ocr -y && \
    pip install pytesseract \
        wand \
        pdf2image \
        PyPDF && \
    /tmp/clean-layer.sh

ENV TESSERACT_PATH=/usr/bin/tesseract \
    # For Facets
    PYTHONPATH=$PYTHONPATH:/opt/facets/facets_overview/python/ \
    # For Theano with MKL
    MKL_THREADING_LAYER=GNU

# Temporary fixes and patches
# Temporary patch for Dask getting downgraded, which breaks Keras
RUN pip install --upgrade dask && \
    # Stop jupyter nbconvert trying to rewrite its folder hierarchy
    mkdir -p /root/.jupyter && touch /root/.jupyter/jupyter_nbconvert_config.py && touch /root/.jupyter/migrated && \
    mkdir -p /.jupyter && touch /.jupyter/jupyter_nbconvert_config.py && touch /.jupyter/migrated && \
    # Stop Matplotlib printing junk to the console on first load
    sed -i "s/^.*Matplotlib is building the font cache using fc-list.*$/# Warning removed by Kaggle/g" /opt/conda/lib/python3.10/site-packages/matplotlib/font_manager.py && \
    # Make matplotlib output in Jupyter notebooks display correctly
    mkdir -p /etc/ipython/ && echo "c = get_config(); c.IPKernelApp.matplotlib = 'inline'" > /etc/ipython/ipython_config.py && \
    # Temporary patch for broken libpixman 0.38 in conda-forge, symlink to system libpixman 0.34 untile conda package gets updated to 0.38.5 or higher.
    ln -sf /usr/lib/x86_64-linux-gnu/libpixman-1.so.0.34.0 /opt/conda/lib/libpixman-1.so.0.38.0 && \
    # b/333854354: pin jupyter-server to version 2.12.5; later versions break LSP (b/333854354)
    pip install --force-reinstall --no-deps jupyter_server==2.12.5 && \
    /tmp/clean-layer.sh

# Fix to import bq_helper library without downgrading setuptools
RUN mkdir -p ~/src && git clone https://github.com/SohierDane/BigQuery_Helper ~/src/BigQuery_Helper && \
    mkdir -p ~/src/BigQuery_Helper/bq_helper && \
    mv ~/src/BigQuery_Helper/bq_helper.py ~/src/BigQuery_Helper/bq_helper/__init__.py && \
    mv ~/src/BigQuery_Helper/test_helper.py ~/src/BigQuery_Helper/bq_helper/ && \
    sed -i 's/)/packages=["bq_helper"])/g' ~/src/BigQuery_Helper/setup.py && \
    pip install -e ~/src/BigQuery_Helper && \
    /tmp/clean-layer.sh

# Add BigQuery client proxy settings
ENV PYTHONUSERBASE "/root/.local"
ADD patches/kaggle_gcp.py \
    patches/kaggle_secrets.py \
    patches/kaggle_session.py \
    patches/kaggle_web_client.py \ 
    patches/kaggle_datasets.py \
    patches/log.py \
    patches/sitecustomize.py \
    /root/.local/lib/python3.10/site-packages/

# Override default imagemagick policies
ADD patches/imagemagick-policy.xml /etc/ImageMagick-6/policy.xml

# Add Kaggle module resolver
ADD patches/kaggle_module_resolver.py /opt/conda/lib/python3.10/site-packages/tensorflow_hub/kaggle_module_resolver.py
RUN sed -i '/from tensorflow_hub import uncompressed_module_resolver/a from tensorflow_hub import kaggle_module_resolver' /opt/conda/lib/python3.10/site-packages/tensorflow_hub/config.py && \
    sed -i '/_install_default_resolvers()/a \ \ registry.resolver.add_implementation(kaggle_module_resolver.KaggleFileResolver())' /opt/conda/lib/python3.10/site-packages/tensorflow_hub/config.py && \
    # Disable preloaded jupyter modules (they add to startup, and break when they are missing)
    sed -i /bq_stats/d /etc/ipython/ipython_kernel_config.py && \
    sed -i /beatrix/d /etc/ipython/ipython_kernel_config.py && \
    sed -i /bigquery/d /etc/ipython/ipython_kernel_config.py && \
    sed -i /sql/d /etc/ipython/ipython_kernel_config.py

# Force only one libcusolver
{{ if eq .Accelerator "gpu" }}
RUN rm /opt/conda/bin/../lib/libcusolver.so.11 && ln -s /usr/local/cuda/lib64/libcusolver.so.11 /opt/conda/bin/../lib/libcusolver.so.11
{{ else }}
RUN ln -s /usr/local/cuda/lib64/libcusolver.so.11 /opt/conda/bin/../lib/libcusolver.so.11
{{ end }}

# b/270147159: conda ships with a version of libtinfo which is missing version info causing warnings, replace it with a good version.
RUN rm /opt/conda/lib/libtinfo.so.6 && ln -s /usr/lib/x86_64-linux-gnu/libtinfo.so.6 /opt/conda/lib/libtinfo.so.6 && \
    # b/276358430: fix Jupyter lsp freezing up the jupyter server
    pip install "jupyter-lsp==1.5.1"

# Set backend for matplotlib
ENV MPLBACKEND="agg" \  
    # Set LC_ALL
    # https://github.com/explosion/spaCy/issues/12872#issuecomment-1661847588
    LC_ALL="POSIX"

ARG GIT_COMMIT=unknown \
    BUILD_DATE=unknown

LABEL git-commit=$GIT_COMMIT \
    build-date=$BUILD_DATE

ENV GIT_COMMIT=${GIT_COMMIT} \
    BUILD_DATE=${BUILD_DATE}

LABEL tensorflow-version=$TENSORFLOW_VERSION \
    # Used in the Jenkins `Docker GPU Build` step to restrict the images being pruned.
    kaggle-lang=python

# Correlate current release with the git hash inside the kernel editor by running `!cat /etc/git_commit`.
RUN echo "$GIT_COMMIT" > /etc/git_commit && echo "$BUILD_DATE" > /etc/build_date

{{ if eq .Accelerator "gpu" }}
# Remove the CUDA stubs.
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH_NO_STUBS" \
    # Add the CUDA home.
    CUDA_HOME=/usr/local/cuda
{{ end }}