Skip to content

Commit

Permalink
Publish 3.4.4 to docker registry (#75)
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?

Publish 3.4.4 to docker registry

### Why are the changes needed?

To provide the docker image.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.
  • Loading branch information
dongjoon-hyun authored Oct 27, 2024
1 parent cf333e1 commit 18c599e
Show file tree
Hide file tree
Showing 10 changed files with 368 additions and 1 deletion.
41 changes: 41 additions & 0 deletions .github/workflows/build_3.4.4.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#

name: "Build and Test (3.4.4)"

on:
pull_request:
branches:
- 'master'
paths:
- '3.4.4/**'

jobs:
run-build:
strategy:
matrix:
image-type: ["all", "python", "scala", "r"]
name: Run
secrets: inherit
uses: ./.github/workflows/main.yml
with:
spark: 3.4.4
scala: 2.12
java: 11
image-type: ${{ matrix.image-type }}
3 changes: 2 additions & 1 deletion .github/workflows/publish-java11.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,10 @@ on:
spark:
description: 'The Spark version of Spark image.'
required: true
default: '3.4.3'
default: '3.4.4'
type: choice
options:
- 3.4.4
- 3.4.3
- 3.4.2
- 3.4.1
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ on:
- 3.5.2
- 3.5.1
- 3.5.0
- 3.4.4
- 3.4.3
- 3.4.2
- 3.4.1
Expand Down
29 changes: 29 additions & 0 deletions 3.4.4/scala2.12-java11-python3-r-ubuntu/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
FROM spark:3.4.4-scala2.12-java11-ubuntu

USER root

RUN set -ex; \
apt-get update; \
apt-get install -y python3 python3-pip; \
apt-get install -y r-base r-base-dev; \
rm -rf /var/lib/apt/lists/*

ENV R_HOME /usr/lib/R

USER spark
26 changes: 26 additions & 0 deletions 3.4.4/scala2.12-java11-python3-ubuntu/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
FROM spark:3.4.4-scala2.12-java11-ubuntu

USER root

RUN set -ex; \
apt-get update; \
apt-get install -y python3 python3-pip; \
rm -rf /var/lib/apt/lists/*

USER spark
28 changes: 28 additions & 0 deletions 3.4.4/scala2.12-java11-r-ubuntu/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
FROM spark:3.4.4-scala2.12-java11-ubuntu

USER root

RUN set -ex; \
apt-get update; \
apt-get install -y r-base r-base-dev; \
rm -rf /var/lib/apt/lists/*

ENV R_HOME /usr/lib/R

USER spark
81 changes: 81 additions & 0 deletions 3.4.4/scala2.12-java11-ubuntu/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
FROM eclipse-temurin:11-jre-focal

ARG spark_uid=185

RUN groupadd --system --gid=${spark_uid} spark && \
useradd --system --uid=${spark_uid} --gid=spark spark

RUN set -ex; \
apt-get update; \
apt-get install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu libnss-wrapper; \
mkdir -p /opt/spark; \
mkdir /opt/spark/python; \
mkdir -p /opt/spark/examples; \
mkdir -p /opt/spark/work-dir; \
chmod g+w /opt/spark/work-dir; \
touch /opt/spark/RELEASE; \
chown -R spark:spark /opt/spark; \
echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \
rm -rf /var/lib/apt/lists/*

# Install Apache Spark
# https://downloads.apache.org/spark/KEYS
ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-3.4.4/spark-3.4.4-bin-hadoop3.tgz \
SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-3.4.4/spark-3.4.4-bin-hadoop3.tgz.asc \
GPG_KEY=F28C9C925C188C35E345614DEDA00CE834F0FC5C

RUN set -ex; \
export SPARK_TMP="$(mktemp -d)"; \
cd $SPARK_TMP; \
wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \
wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \
export GNUPGHOME="$(mktemp -d)"; \
gpg --batch --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \
gpg --batch --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \
gpg --batch --verify spark.tgz.asc spark.tgz; \
gpgconf --kill all; \
rm -rf "$GNUPGHOME" spark.tgz.asc; \
\
tar -xf spark.tgz --strip-components=1; \
chown -R spark:spark .; \
mv jars /opt/spark/; \
mv RELEASE /opt/spark/; \
mv bin /opt/spark/; \
mv sbin /opt/spark/; \
mv kubernetes/dockerfiles/spark/decom.sh /opt/; \
mv examples /opt/spark/; \
ln -s "$(basename /opt/spark/examples/jars/spark-examples_*.jar)" /opt/spark/examples/jars/spark-examples.jar; \
mv kubernetes/tests /opt/spark/; \
mv data /opt/spark/; \
mv python/pyspark /opt/spark/python/pyspark/; \
mv python/lib /opt/spark/python/lib/; \
mv R /opt/spark/; \
chmod a+x /opt/decom.sh; \
cd ..; \
rm -rf "$SPARK_TMP";

COPY entrypoint.sh /opt/

ENV SPARK_HOME /opt/spark

WORKDIR /opt/spark/work-dir

USER spark

ENTRYPOINT [ "/opt/entrypoint.sh" ]
130 changes: 130 additions & 0 deletions 3.4.4/scala2.12-java11-ubuntu/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Prevent any errors from being silently ignored
set -eo pipefail

attempt_setup_fake_passwd_entry() {
# Check whether there is a passwd entry for the container UID
local myuid; myuid="$(id -u)"
# If there is no passwd entry for the container UID, attempt to fake one
# You can also refer to the https://github.com/docker-library/official-images/pull/13089#issuecomment-1534706523
# It's to resolve OpenShift random UID case.
# See also: https://github.com/docker-library/postgres/pull/448
if ! getent passwd "$myuid" &> /dev/null; then
local wrapper
for wrapper in {/usr,}/lib{/*,}/libnss_wrapper.so; do
if [ -s "$wrapper" ]; then
NSS_WRAPPER_PASSWD="$(mktemp)"
NSS_WRAPPER_GROUP="$(mktemp)"
export LD_PRELOAD="$wrapper" NSS_WRAPPER_PASSWD NSS_WRAPPER_GROUP
local mygid; mygid="$(id -g)"
printf 'spark:x:%s:%s:${SPARK_USER_NAME:-anonymous uid}:%s:/bin/false\n' "$myuid" "$mygid" "$SPARK_HOME" > "$NSS_WRAPPER_PASSWD"
printf 'spark:x:%s:\n' "$mygid" > "$NSS_WRAPPER_GROUP"
break
fi
done
fi
}

if [ -z "$JAVA_HOME" ]; then
JAVA_HOME=$(java -XshowSettings:properties -version 2>&1 > /dev/null | grep 'java.home' | awk '{print $3}')
fi

SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*"
for v in "${!SPARK_JAVA_OPT_@}"; do
SPARK_EXECUTOR_JAVA_OPTS+=( "${!v}" )
done

if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then
SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH"
fi

if ! [ -z "${PYSPARK_PYTHON+x}" ]; then
export PYSPARK_PYTHON
fi
if ! [ -z "${PYSPARK_DRIVER_PYTHON+x}" ]; then
export PYSPARK_DRIVER_PYTHON
fi

# If HADOOP_HOME is set and SPARK_DIST_CLASSPATH is not set, set it here so Hadoop jars are available to the executor.
# It does not set SPARK_DIST_CLASSPATH if already set, to avoid overriding customizations of this value from elsewhere e.g. Docker/K8s.
if [ -n "${HADOOP_HOME}" ] && [ -z "${SPARK_DIST_CLASSPATH}" ]; then
export SPARK_DIST_CLASSPATH="$($HADOOP_HOME/bin/hadoop classpath)"
fi

if ! [ -z "${HADOOP_CONF_DIR+x}" ]; then
SPARK_CLASSPATH="$HADOOP_CONF_DIR:$SPARK_CLASSPATH";
fi

if ! [ -z "${SPARK_CONF_DIR+x}" ]; then
SPARK_CLASSPATH="$SPARK_CONF_DIR:$SPARK_CLASSPATH";
elif ! [ -z "${SPARK_HOME+x}" ]; then
SPARK_CLASSPATH="$SPARK_HOME/conf:$SPARK_CLASSPATH";
fi

# SPARK-43540: add current working directory into executor classpath
SPARK_CLASSPATH="$SPARK_CLASSPATH:$PWD"

# Switch to spark if no USER specified (root by default) otherwise use USER directly
switch_spark_if_root() {
if [ $(id -u) -eq 0 ]; then
echo gosu spark
fi
}

case "$1" in
driver)
shift 1
CMD=(
"$SPARK_HOME/bin/spark-submit"
--conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS"
--conf "spark.executorEnv.SPARK_DRIVER_POD_IP=$SPARK_DRIVER_BIND_ADDRESS"
--deploy-mode client
"$@"
)
attempt_setup_fake_passwd_entry
# Execute the container CMD under tini for better hygiene
exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}"
;;
executor)
shift 1
CMD=(
${JAVA_HOME}/bin/java
"${SPARK_EXECUTOR_JAVA_OPTS[@]}"
-Xms"$SPARK_EXECUTOR_MEMORY"
-Xmx"$SPARK_EXECUTOR_MEMORY"
-cp "$SPARK_CLASSPATH:$SPARK_DIST_CLASSPATH"
org.apache.spark.scheduler.cluster.k8s.KubernetesExecutorBackend
--driver-url "$SPARK_DRIVER_URL"
--executor-id "$SPARK_EXECUTOR_ID"
--cores "$SPARK_EXECUTOR_CORES"
--app-id "$SPARK_APPLICATION_ID"
--hostname "$SPARK_EXECUTOR_POD_IP"
--resourceProfileId "$SPARK_RESOURCE_PROFILE_ID"
--podName "$SPARK_EXECUTOR_POD_NAME"
)
attempt_setup_fake_passwd_entry
# Execute the container CMD under tini for better hygiene
exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}"
;;

*)
# Non-spark-on-k8s command provided, proceeding in pass-through mode...
exec "$@"
;;
esac
2 changes: 2 additions & 0 deletions tools/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
"3.4.2": "F28C9C925C188C35E345614DEDA00CE834F0FC5C",
# issuer "[email protected]"
"3.4.3": "F28C9C925C188C35E345614DEDA00CE834F0FC5C",
# issuer "[email protected]"
"3.4.4": "F28C9C925C188C35E345614DEDA00CE834F0FC5C",
# issuer "[email protected]"
"3.5.0": "FC3AE3A7EAA1BAC98770840E7E1ABCC53AAA2216",
# issuer "[email protected]"
Expand Down
Loading

0 comments on commit 18c599e

Please sign in to comment.