-
Notifications
You must be signed in to change notification settings - Fork 40
/
Dockerfile-base-cuda
96 lines (72 loc) · 3.62 KB
/
Dockerfile-base-cuda
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
ARG BASE_IMAGE
FROM ${BASE_IMAGE}
RUN rm -f /etc/apt/sources.list.d/*
ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 PIP_NO_CACHE_DIR=1
# We need to create sym links for the Slurm PMI headers if we are using
# Ubuntu 18.04 because they are not installed in a standard location.
ARG UBUNTU_VERSION
COPY dockerfile_scripts /tmp/det_dockerfile_scripts
RUN /tmp/det_dockerfile_scripts/install_deb_packages.sh
ARG WITH_NCCL
# Install debuild util, etc. for later compiling GDRcopy libraries
RUN if [ "$WITH_NCCL" = "1" ]; then apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y devscripts debhelper; fi
# Install GDRcopy so OMPI, NCCL, AWS plugin, etc, can see it
RUN if [ "$WITH_NCCL" = "1" ]; then /tmp/det_dockerfile_scripts/build_gdrcopy.sh; fi
ENV PATH="/opt/conda/bin:${PATH}"
ARG CONDA="${PATH}"
ENV PYTHONUNBUFFERED=1 PYTHONFAULTHANDLER=1 PYTHONHASHSEED=0
ARG PYTHON_VERSION
RUN /tmp/det_dockerfile_scripts/install_python.sh ${PYTHON_VERSION}
# Install fixed version of FFI package for Ubuntu 20.04.
# This is done after above stuff to make sure we get right version.
RUN /tmp/det_dockerfile_scripts/install_package_fixes.sh
ARG WITH_MPI
ARG WITH_OFI
ARG UCX_INSTALL_DIR=/container/ucx
ARG OMPI_INSTALL_DIR=/container/ompi
ARG OFI_INSTALL_DIR=/container/ofi
ARG OMPI_WITH_CUDA=1
RUN if [ "$WITH_MPI" = "1" ]; then /tmp/det_dockerfile_scripts/ompi.sh "$UBUNTU_VERSION" "$WITH_OFI" "$OMPI_WITH_CUDA"; fi
# Make sure OMPI/UCX show up in the right paths
ARG VERBS_LIB_DIR=/usr/lib/libibverbs
ARG UCX_LIB_DIR=${UCX_INSTALL_DIR}/lib:${UCX_INSTALL_DIR}/lib64
ARG UCX_PATH_DIR=${UCX_INSTALL_DIR}/bin
ARG OFI_LIB_DIR=${OFI_INSTALL_DIR}/lib:${OFI_INSTALL_DIR}/lib64
ARG OFI_PATH_DIR=${OFI_INSTALL_DIR}/bin
ARG OMPI_LIB_DIR=${OMPI_INSTALL_DIR}/lib
ARG OMPI_PATH_DIR=${OMPI_INSTALL_DIR}/bin
# Set up UCX_LIBS and OFI_LIBS
ENV UCX_LIBS="${VERBS_LIB_DIR}:${UCX_LIB_DIR}:${OMPI_LIB_DIR}:"
ENV OFI_LIBS="${VERBS_LIB_DIR}:${OFI_LIB_DIR}:${OMPI_LIB_DIR}:"
# If WITH_OFI is true, then set EXTRA_LIBS to OFI libs, else set to empty string
ENV EXTRA_LIBS="${WITH_OFI:+${OFI_LIBS}}"
# If EXTRA_LIBS is empty, set to UCX libs, else leave as OFI libs
ENV EXTRA_LIBS="${EXTRA_LIBS:-${UCX_LIBS}}"
# But, only add them if WITH_MPI
ENV LD_LIBRARY_PATH=${WITH_MPI:+$EXTRA_LIBS}$LD_LIBRARY_PATH
#USING OFI
ENV PATH=${WITH_OFI:+$PATH:${WITH_MPI:+$OFI_PATH_DIR:$OMPI_PATH_DIR}}
#USING UCX
ENV PATH=${PATH:-$CONDA:${WITH_MPI:+$UCX_PATH_DIR:$OMPI_PATH_DIR}}
# Enable running OMPI as root
ENV OMPI_ALLOW_RUN_AS_ROOT ${WITH_MPI:+1}
ENV OMPI_ALLOW_RUN_AS_ROOT_CONFIRM ${WITH_MPI:+1}
# We uninstall these packages after installing. This ensures that we can
# successfully install these packages into containers running as non-root.
# `pip` does not uninstall dependencies, so we still have all the dependencies
# installed.
RUN python -m pip install determined && python -m pip uninstall -y determined
RUN python -m pip install -r /tmp/det_dockerfile_scripts/notebook-requirements.txt && jupyter labextension disable "@jupyterlab/apputils-extension:announcements"
ENV JUPYTER_CONFIG_DIR=/run/determined/jupyter/config
ENV JUPYTER_DATA_DIR=/run/determined/jupyter/data
ENV JUPYTER_RUNTIME_DIR=/run/determined/jupyter/runtime
RUN /tmp/det_dockerfile_scripts/add_det_nobody_user.sh
RUN /tmp/det_dockerfile_scripts/install_libnss_determined.sh
# Set an entrypoint that can scrape up the host libfabric.so and then
# run the user command. This is intended to enable performant execution
# on non-IB systems that have a proprietary libfabric.
RUN mkdir -p /container/bin && \
cp /tmp/det_dockerfile_scripts/scrape_libs.sh /container/bin
ENTRYPOINT ["/container/bin/scrape_libs.sh"]
CMD ["/bin/bash"]
RUN rm -r /tmp/*