forked from determined-ai/environments
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile-base-gpu
130 lines (107 loc) · 4.32 KB
/
Dockerfile-base-gpu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
ARG BASE_IMAGE
FROM ${BASE_IMAGE}
RUN rm -f /etc/apt/sources.list.d/*
ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 PIP_NO_CACHE_DIR=1
# We need to create sym links for the Slurm PMI headers if we are using
# Ubuntu 18.04 because they are not installed in a standard location.
ARG UBUNTU_VERSION
RUN mkdir -p /var/run/sshd
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
autoconf \
automake \
autotools-dev \
build-essential \
ca-certificates \
curl \
daemontools \
ibverbs-providers \
libibverbs1 \
libkrb5-dev \
librdmacm1 \
libssl-dev \
libtool \
git \
krb5-user \
g++ \
cmake \
make \
openssh-client \
openssh-server \
pkg-config \
wget \
nfs-common \
libnuma1 \
libnuma-dev \
libpmi2-0-dev \
unattended-upgrades \
&& unattended-upgrade \
&& rm -rf /var/lib/apt/lists/* \
&& rm /etc/ssh/ssh_host_ecdsa_key \
&& rm /etc/ssh/ssh_host_ed25519_key \
&& rm /etc/ssh/ssh_host_rsa_key \
&& if [ "$UBUNTU_VERSION" = "ubuntu18.04" ]; then ln -s /usr/include/slurm-wlm /usr/include/slurm; fi
COPY dockerfile_scripts /tmp/det_dockerfile_scripts
ARG WITH_NCCL
# Install debuild util, etc. for later compiling GDRcopy libraries
RUN if [ "$WITH_NCCL" = "1" ]; then apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y devscripts debhelper; fi
# Install GDRcopy so OMPI, NCCL, AWS plugin, etc, can see it
RUN if [ "$WITH_NCCL" = "1" ]; then /tmp/det_dockerfile_scripts/build_gdrcopy.sh; fi
ENV PATH="/opt/conda/bin:${PATH}"
ARG CONDA="${PATH}"
ENV PYTHONUNBUFFERED=1 PYTHONFAULTHANDLER=1 PYTHONHASHSEED=0
ARG PYTHON_VERSION
RUN /tmp/det_dockerfile_scripts/install_python.sh ${PYTHON_VERSION}
# Install fixed version of FFI package for Ubuntu 20.04.
# This is done after above stuff to make sure we get right version.
RUN /tmp/det_dockerfile_scripts/install_package_fixes.sh
ARG WITH_MPI
ARG WITH_OFI
ARG UCX_INSTALL_DIR=/container/ucx
ARG OMPI_INSTALL_DIR=/container/ompi
ARG OFI_INSTALL_DIR=/container/ofi
ARG OMPI_WITH_CUDA=1
RUN if [ "$WITH_MPI" = "1" ]; then /tmp/det_dockerfile_scripts/ompi.sh "$UBUNTU_VERSION" "$WITH_OFI" "$OMPI_WITH_CUDA"; fi
# Make sure OMPI/UCX show up in the right paths
ARG VERBS_LIB_DIR=/usr/lib/libibverbs
ARG UCX_LIB_DIR=${UCX_INSTALL_DIR}/lib:${UCX_INSTALL_DIR}/lib64
ARG UCX_PATH_DIR=${UCX_INSTALL_DIR}/bin
ARG OFI_LIB_DIR=${OFI_INSTALL_DIR}/lib:${OFI_INSTALL_DIR}/lib64
ARG OFI_PATH_DIR=${OFI_INSTALL_DIR}/bin
ARG OMPI_LIB_DIR=${OMPI_INSTALL_DIR}/lib
ARG OMPI_PATH_DIR=${OMPI_INSTALL_DIR}/bin
# Set up UCX_LIBS and OFI_LIBS
ENV UCX_LIBS="${VERBS_LIB_DIR}:${UCX_LIB_DIR}:${OMPI_LIB_DIR}:"
ENV OFI_LIBS="${VERBS_LIB_DIR}:${OFI_LIB_DIR}:${OMPI_LIB_DIR}:"
# If WITH_OFI is true, then set EXTRA_LIBS to OFI libs, else set to empty string
ENV EXTRA_LIBS="${WITH_OFI:+${OFI_LIBS}}"
# If EXTRA_LIBS is empty, set to UCX libs, else leave as OFI libs
ENV EXTRA_LIBS="${EXTRA_LIBS:-${UCX_LIBS}}"
# But, only add them if WITH_MPI
ENV LD_LIBRARY_PATH=${WITH_MPI:+$EXTRA_LIBS}$LD_LIBRARY_PATH
#USING OFI
ENV PATH=${WITH_OFI:+$PATH:${WITH_MPI:+$OFI_PATH_DIR:$OMPI_PATH_DIR}}
#USING UCX
ENV PATH=${PATH:-$CONDA:${WITH_MPI:+$UCX_PATH_DIR:$OMPI_PATH_DIR}}
# Enable running OMPI as root
ENV OMPI_ALLOW_RUN_AS_ROOT ${WITH_MPI:+1}
ENV OMPI_ALLOW_RUN_AS_ROOT_CONFIRM ${WITH_MPI:+1}
# We uninstall these packages after installing. This ensures that we can
# successfully install these packages into containers running as non-root.
# `pip` does not uninstall dependencies, so we still have all the dependencies
# installed.
RUN python -m pip install determined && python -m pip uninstall -y determined
RUN python -m pip install -r /tmp/det_dockerfile_scripts/notebook-requirements.txt
ENV JUPYTER_CONFIG_DIR=/run/determined/jupyter/config
ENV JUPYTER_DATA_DIR=/run/determined/jupyter/data
ENV JUPYTER_RUNTIME_DIR=/run/determined/jupyter/runtime
RUN /tmp/det_dockerfile_scripts/add_det_nobody_user.sh
RUN /tmp/det_dockerfile_scripts/install_libnss_determined.sh
# Set an entrypoint that can scrape up the host libfabric.so and then
# run the user command. This is intended to enable performant execution
# on non-IB systems that have a proprietary libfabric.
RUN mkdir -p /container/bin && \
cp /tmp/det_dockerfile_scripts/scrape_libs.sh /container/bin
ENTRYPOINT ["/container/bin/scrape_libs.sh"]
CMD ["/bin/bash"]
RUN rm -r /tmp/*