Skip to content

Commit

Permalink
init spark 3.1.2
Browse files Browse the repository at this point in the history
  • Loading branch information
AugustinPeyridieux committed Oct 1, 2021
1 parent 7a3036a commit 076edf7
Show file tree
Hide file tree
Showing 39 changed files with 2,475 additions and 0 deletions.
72 changes: 72 additions & 0 deletions technologies/job/spark/spark-3.1/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
FROM openjdk:8-jre-slim-buster

ARG SPARK_VERSION=3.1.2
ARG HADOOP_VERSION=3.2
ARG TINI_VERSION="v0.18.0"

ENV DEBIAN_FRONTEND noninteractive

ENV SPARK_HOME /opt/spark
ENV PATH "$PATH:$SPARK_HOME/bin"
ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/lib/hadoop/lib/native"

# LIGHT DEPENDENCIES START
RUN apt update -qq && apt install -yqq --no-install-recommends \
ftp wget curl unzip telnet openssh-client krb5-user zip && \
rm -rf /var/lib/apt/lists/*
# LIGHT DEPENDENCIES END

# TINI INSTALL START
RUN set -ex && \
mkdir -p /opt/spark && \
mkdir -p /opt/spark/work-dir && \
touch /opt/spark/RELEASE && \
rm /bin/sh && \
ln -sv /bin/bash /bin/sh && \
echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \
chgrp root /etc/passwd && chmod ug+rw /etc/passwd && \
export TINI_HOME="/usr/local/sbin" && \
curl -fSL "https://github.com/krallin/tini/releases/download/$TINI_VERSION/tini" -o "${TINI_HOME}/tini" && \
curl -fSL "https://github.com/krallin/tini/releases/download/$TINI_VERSION/tini.asc" -o "${TINI_HOME}/tini.asc" && \
chmod +x "${TINI_HOME}/tini" && \
ln -s ${TINI_HOME}/tini /sbin/tini && \
"${TINI_HOME}/tini" -h
# TINI INSTALL END

# SPARK INSTALL START
RUN mkdir -p /tmp/spark && \
cd /tmp/spark && \
wget https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && \
tar xf spark-*.tgz && \
rm spark-*.tgz && \
cp -R /tmp/spark/*/jars /opt/spark && \
cp -R /tmp/spark/*/bin /opt/spark && \
cp -R /tmp/spark/*/sbin /opt/spark && \
rm -Rf /tmp/spark
# SPARK INSTALL END
# SPARK UI FIX SDKTECHNO#83 START
COPY assets/stagepage.js /tmp/
COPY assets/utils.js /tmp/

RUN mkdir -p /tmp/spark && cd /tmp/spark/ && \
cp /opt/spark/jars/spark-core_2.12-${SPARK_VERSION}.jar spark-core_2.12-${SPARK_VERSION}.jar && \
mkdir -p org/apache/spark/ui/static/ && \
cp /tmp/stagepage.js org/apache/spark/ui/static/stagepage.js && \
cp /tmp/utils.js org/apache/spark/ui/static/utils.js && \
zip spark-core_2.12-${SPARK_VERSION}.jar org/apache/spark/ui/static/stagepage.js org/apache/spark/ui/static/utils.js && \
cp spark-core_2.12-${SPARK_VERSION}.jar /opt/spark/jars/spark-core_2.12-${SPARK_VERSION}.jar && \
rm -Rf /tmp/spark
# SPARK UI FIX SDKTECHNO#83 END

RUN wget https://repo1.maven.org/maven2/com/qubole/spark/spark-sql-kinesis_2.12/1.2.0_spark-3.0/spark-sql-kinesis_2.12-1.2.0_spark-3.0.jar && \
wget https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.11.375/aws-java-sdk-bundle-1.11.375.jar && \
wget https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.2.0/hadoop-aws-3.2.0.jar && \
wget https://repo1.maven.org/maven2/org/mongodb/spark/mongo-spark-connector_2.12/3.0.1/mongo-spark-connector_2.12-3.0.1-assembly.jar && \
mv *.jar /opt/spark/jars/

COPY entrypoint.sh /opt/
RUN chmod 755 /opt/entrypoint.sh

WORKDIR /sandbox/

ENTRYPOINT [ "/opt/entrypoint.sh" ]
Loading

0 comments on commit 076edf7

Please sign in to comment.