Skip to content

Commit

Permalink
+ Updated Hadoop to 3.2.2
Browse files Browse the repository at this point in the history
+ Updated Spark to 3.1.1
  • Loading branch information
Genarito committed Mar 3, 2021
1 parent 3d14fcf commit b9aacda
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 21 deletions.
38 changes: 22 additions & 16 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,25 +18,31 @@ RUN apt update \
&& ssh-keygen -q -t rsa -N '' -f ~/.ssh/id_rsa \
&& cat ~/.ssh/id_rsa.pub > ~/.ssh/authorized_keys \
&& echo 'PermitRootLogin yes' >> /etc/ssh/sshd_config \
&& service ssh restart \
# Downloads and extracts Hadoop
&& wget http://apache.dattatec.com/hadoop/common/hadoop-3.1.3/hadoop-3.1.3.tar.gz \
&& service ssh restart

# Downloads and extracts Hadoop
RUN wget http://apache.dattatec.com/hadoop/common/hadoop-3.2.2/hadoop-3.2.2.tar.gz

# Configures Hadoop and removes downloaded .tar.gz file
&& tar -xzvf hadoop-3.1.3.tar.gz \
&& mv hadoop-3.1.3 $HADOOP_HOME \
RUN tar -xzvf hadoop-3.2.2.tar.gz \
&& mv hadoop-3.2.2 $HADOOP_HOME \
&& echo 'export JAVA_HOME=$(readlink -f /usr/bin/java | sed "s:bin/java::")' >> $HADOOP_HOME/etc/hadoop/hadoop-env.sh \
&& echo 'export PATH=$PATH:$HADOOP_HOME/bin' >> ~/.bashrc \
&& echo 'export PATH=$PATH:$HADOOP_HOME/sbin' >> ~/.bashrc \
&& rm hadoop-3.1.3.tar.gz
# Downloads Apache Spark
RUN wget apache.dattatec.com/spark/spark-3.0.0/spark-3.0.0-bin-hadoop2.7.tgz \
# Decompress, adds to PATH and then removes .tgz Apache Spark file
&& tar -xvzf spark-3.0.0-bin-hadoop2.7.tgz \
&& mv spark-3.0.0-bin-hadoop2.7 sbin/ \
&& echo 'export PATH=$PATH:/sbin/spark-3.0.0-bin-hadoop2.7/sbin/' >> ~/.bashrc \
&& echo 'export PATH=$PATH:/sbin/spark-3.0.0-bin-hadoop2.7/bin/' >> ~/.bashrc \
&& rm spark-3.0.0-bin-hadoop2.7.tgz
RUN mv ${HADOOP_STREAMING_HOME}/hadoop-streaming-3.1.3.jar ${HADOOP_STREAMING_HOME}/hadoop-streaming.jar \
&& rm hadoop-3.2.2.tar.gz

# Downloads Apache Spark
RUN wget http://apache.dattatec.com/spark/spark-3.1.1/spark-3.1.1-bin-without-hadoop.tgz

# Decompress, adds to PATH and then removes .tgz Apache Spark file
# NOTE: Spark bin folder goes first to prevent issues with /usr/local/bin duplicated binaries
RUN tar -xvzf spark-3.1.1-bin-without-hadoop.tgz \
&& mv spark-3.1.1-bin-without-hadoop sbin/ \
&& echo 'export PATH=$PATH:/sbin/spark-3.1.1-bin-without-hadoop/sbin/' >> ~/.bashrc \
&& echo 'export PATH=/sbin/spark-3.1.1-bin-without-hadoop/bin/:$PATH' >> ~/.bashrc \
&& rm spark-3.1.1-bin-without-hadoop.tgz

RUN mv ${HADOOP_STREAMING_HOME}/hadoop-streaming-3.2.2.jar ${HADOOP_STREAMING_HOME}/hadoop-streaming.jar \
&& source ~/.bashrc

# Installs some extra libraries
Expand Down Expand Up @@ -66,7 +72,7 @@ COPY ./config/mapred-site.xml .
COPY ./config/yarn-site.xml .

# Spark settings
WORKDIR /sbin/spark-3.0.0-bin-hadoop2.7/conf/
WORKDIR /sbin/spark-3.1.1-bin-without-hadoop/conf/
COPY ./config/spark-env.sh .
COPY ./config/log4j.properties .

Expand Down
8 changes: 4 additions & 4 deletions config/spark-cmd.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,19 @@ sleep 5

if [[ $1 = "start" ]]; then
if [[ $2 = "master-node" ]]; then
/sbin/spark-3.0.0-bin-hadoop2.7/sbin/start-master.sh
/sbin/spark-3.1.1-bin-without-hadoop/sbin/start-master.sh
sleep infinity
exit
fi
/sbin/spark-3.0.0-bin-hadoop2.7/sbin/start-slave.sh master-node:7077
/sbin/spark-3.1.1-bin-without-hadoop/sbin/start-worker.sh master-node:7077
sleep infinity
exit
fi

if [[ $1 = "stop" ]]; then
if [[ $2 = "master-node" ]]; then
/sbin/spark-3.0.0-bin-hadoop2.7/sbin/stop-master.sh
/sbin/spark-3.1.1-bin-without-hadoop/sbin/stop-master.sh
exit
fi
/sbin/spark-3.0.0-bin-hadoop2.7/sbin/stop-slave.sh
/sbin/spark-3.1.1-bin-without-hadoop/sbin/stop-worker.sh
fi
3 changes: 2 additions & 1 deletion config/spark-env.sh
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
export HADOOP_CONF_DIR="${HADOOP_HOME}/etc/hadoop"
export HADOOP_CONF_DIR="${HADOOP_HOME}/etc/hadoop"
export SPARK_DIST_CLASSPATH=$($HADOOP_HOME/bin/hadoop classpath)

0 comments on commit b9aacda

Please sign in to comment.