Skip to content

Commit

Permalink
DAPHNE-daphne-eu#767] libhdfs3 + dependencies
Browse files Browse the repository at this point in the history
This commit adds the necessary packages for the container scripts and code in the build script to build the dependencies for HDFS support.

Co-authored-by: Mark Dokter <[email protected]>
  • Loading branch information
psomas and corepointer committed Sep 13, 2024
1 parent c1fb18d commit 7470486
Show file tree
Hide file tree
Showing 7 changed files with 135 additions and 9 deletions.
67 changes: 62 additions & 5 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ function printHelp {
echo " --debug Compile with support for debug mode"
echo " --fpgaopencl Compile with support for Intel PAC D5005 FPGA"
echo " --mpi Compile with support for MPI"
echo " --hdfs Compile with support for HDFS"
echo " --no-papi Compile without support for PAPI"
}

Expand Down Expand Up @@ -449,6 +450,7 @@ BUILD_CUDA="-DUSE_CUDA=OFF"
BUILD_FPGAOPENCL="-DUSE_FPGAOPENCL=OFF"
BUILD_DEBUG="-DCMAKE_BUILD_TYPE=Release"
BUILD_MPI="-DUSE_MPI=OFF"
BUILD_HDFS="-DUSE_HDFS=OFF"
BUILD_PAPI="-DUSE_PAPI=ON"
WITH_DEPS=1
WITH_SUBMODULE_UPDATE=1
Expand Down Expand Up @@ -498,6 +500,10 @@ while [[ $# -gt 0 ]]; do
echo using MPI
export BUILD_MPI="-DUSE_MPI=ON"
;;
--hdfs)
echo using HDFS
export BUILD_HDFS="-DUSE_HDFS=ON"
;;
--no-papi)
echo not using PAPI
export BUILD_PAPI="-DUSE_PAPI=OFF"
Expand Down Expand Up @@ -894,7 +900,7 @@ if [ $WITH_DEPS -gt 0 ]; then
daphne_msg "No need to build Arrow again."
fi
#------------------------------------------------------------------------------
# spdlog
# #8.9 spdlog
#------------------------------------------------------------------------------
spdlogDirName="spdlog-$spdlogVersion"
spdlogArtifactFileName=$spdlogDirName.tar.gz
Expand All @@ -915,7 +921,34 @@ if [ $WITH_DEPS -gt 0 ]; then
daphne_msg "No need to build spdlog again."
fi
#------------------------------------------------------------------------------
# Eigen
# #8.12 HAWQ (libhdfs3)
#------------------------------------------------------------------------------
hawqDirName="hawq-rel-v$hawqVersion"
hawqTarName="v${hawqVersion}.tar.gz"
hawqInstDirName=$installPrefix
if ! is_dependency_downloaded "hawq_v${hawqVersion}"; then
daphne_msg "Get HAWQ (libhdfs3) version ${hawqVersion}"
wget "https://github.com/apache/hawq/archive/refs/tags/rel/${hawqTarName}" \
-qO "${cacheDir}/${hawqTarName}"
tar -xf "$cacheDir/$hawqTarName" -C "$sourcePrefix"
daphne_msg "Applying 0005-libhdfs3-remove-gtest-dep.patch"
patch -Np1 -i "${patchDir}/0005-libhdfs3-remove-gtest-dep.patch" -d "$sourcePrefix/$hawqDirName"
dependency_download_success "hawq_v${hawqVersion}"
fi
if ! is_dependency_installed "hawq_v${hawqVersion}"; then
cd "$sourcePrefix/$hawqDirName/depends/libhdfs3"
mkdir -p build
cd build
../bootstrap --prefix=${hawqInstDirName}
make -j"$(nproc)" DYNAMIC_ARCH=1 TARGET="$PAPI_OBLAS_ARCH" PREFIX="${installPrefix}"
make install
cd - > /dev/null
dependency_install_success "hawq_v${hawqVersion}"
else
daphne_msg "No need to build HAWQ (libhdfs3) again."
fi
#------------------------------------------------------------------------------
# #8.10 Eigen
#------------------------------------------------------------------------------
eigenDirName="eigen-${eigenVersion}"
if ! is_dependency_downloaded "eigen_v${eigenVersion}"; then
Expand All @@ -933,9 +966,33 @@ if [ $WITH_DEPS -gt 0 ]; then
else
daphne_msg "No need to build eigen again."
fi

#------------------------------------------------------------------------------
# #8.9 Build MLIR
# #8.11 HAWQ (libhdfs3)
#------------------------------------------------------------------------------
hawqDirName="hawq-rel-v$hawqVersion"
hawqTarName="v${hawqVersion}.tar.gz"
hawqInstDirName=$installPrefix
if ! is_dependency_downloaded "hawq_v${hawqVersion}"; then
daphne_msg "Get HAWQ (libhdfs3) version ${hawqVersion}"
wget "https://github.com/apache/hawq/archive/refs/tags/rel/${hawqTarName}" \
-qO "${cacheDir}/${hawqTarName}"
tar -xf "$cacheDir/$hawqTarName" -C "$sourcePrefix"
daphne_msg "Applying 0005-libhdfs3-remove-gtest-dep.patch"
patch -Np1 -i "${patchDir}/0005-libhdfs3-remove-gtest-dep.patch" -d "$sourcePrefix/$hawqDirName"
daphne_msg "Applying 0006-libhdfs3-add-cstdint-include.patch"
patch -Np1 -i "${patchDir}/0006-libhdfs3-add-cstdint-include.patch" -d "$sourcePrefix/$hawqDirName"
dependency_download_success "hawq_v${hawqVersion}"
fi
if ! is_dependency_installed "hawq_v${hawqVersion}"; then
cmake -G Ninja -S "$sourcePrefix/$hawqDirName/depends/libhdfs3" -B "${buildPrefix}/${hawqDirName}" \
-DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX="$installPrefix"
cmake --build "${buildPrefix}/${hawqDirName}" --target install/strip
dependency_install_success "hawq_v${hawqVersion}"
else
daphne_msg "No need to build HAWQ (libhdfs3) again."
fi
#------------------------------------------------------------------------------
# #8.12 Build MLIR
#------------------------------------------------------------------------------
# We rarely need to build MLIR/LLVM, only during the first build of the
# prototype and after upgrades of the LLVM sub-module. To avoid unnecessary
Expand Down Expand Up @@ -1016,7 +1073,7 @@ daphne_msg "Build Daphne"

cmake -S "$projectRoot" -B "$daphneBuildDir" -G Ninja -DANTLR_VERSION="$antlrVersion" \
-DCMAKE_PREFIX_PATH="$installPrefix" \
$BUILD_CUDA $BUILD_FPGAOPENCL $BUILD_DEBUG $BUILD_MPI $BUILD_PAPI
$BUILD_CUDA $BUILD_FPGAOPENCL $BUILD_DEBUG $BUILD_MPI $BUILD_HDFS $BUILD_PAPI

cmake --build "$daphneBuildDir" --target "$target"

Expand Down
3 changes: 1 addition & 2 deletions containers/daphne-deps.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,8 @@ ARG TZ
RUN apt-get -qq -y update && apt-get -y upgrade \
&& apt-get -y --no-install-recommends install \
ca-certificates file git openssh-client unzip wget tar \
libomp-dev libpfm4-dev libssl-dev libxml2-dev uuid-dev zlib1g-dev \
libgsasl-dev libkrb5-dev libomp-dev libpfm4-dev libssl-dev libxml2-dev uuid-dev zlib1g-dev \
build-essential clang gfortran lld llvm llvm-18-tools ninja-build openjdk-11-jdk-headless pkg-config python3 \

&& apt-get clean && rm -rf /var/lib/apt/lists/*
RUN ln -fs /usr/share/zoneinfo/$TZ /etc/localtime

Expand Down
2 changes: 1 addition & 1 deletion containers/daphne-dev.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ ARG DEBIAN_FRONTEND="noninteractive"
ARG TZ
RUN apt-get -qq -y update && apt-get -y upgrade && apt-get -y --no-install-recommends install \
ca-certificates file git openssh-client unzip wget tar \
libomp-dev libpfm4-dev libssl-dev libxml2-dev uuid-dev zlib1g-dev \
libomp-dev libpfm4-dev libssl-dev libxml2-dev uuid-dev zlib1g-dev libgsasl-dev libkrb5-dev \
build-essential clang gfortran lld llvm llvm-18-tools ninja-build openjdk-11-jdk-headless pkg-config python3-numpy python3-pandas \
vim nano rsync sudo iputils-ping virtualenv openssh-server iproute2 git htop gdb lldb lld gpg-agent net-tools \
software-properties-common ca-certificates file unzip wget tar zstd \
Expand Down
3 changes: 2 additions & 1 deletion containers/daphne.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ LABEL "org.opencontainers.image.version"="$TIMESTAMP"
LABEL "org.opencontainers.image.created"="${CREATION_DATE}"
LABEL "org.opencontainers.image.revision"="${GIT_HASH}"
RUN apt-get -qq -y update && apt-get -y upgrade && apt-get -y --no-install-recommends install \
libtinfo6 openssl zlib1g python3-numpy python3-pandas libxml2 \
libtinfo6 openssl zlib1g python3-numpy python3-pandas libxml2 libgsasl18 libkrb5-3 libuuid1 \
libuuid1 libgsasl7 libkrb5 \
&& apt-get clean && rm -rf /var/lib/apt/lists/*
COPY --from=daphne-build $DAPHNE_DIR/bin/* /usr/local/bin
COPY --from=daphne-build $DAPHNE_DIR/lib/* /usr/local/lib
Expand Down
1 change: 1 addition & 0 deletions software-package-versions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,4 @@ openMPIVersion=4.1.5
papiVersion=7.0.1
spdlogVersion=1.11.0
ubuntuVersion=24.04
hawqVersion=3.0.0.0
28 changes: 28 additions & 0 deletions thirdparty/patches/0005-libhdfs3-remove-gtest-dep.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
diff --git a/depends/libhdfs3/CMake/Platform.cmake b/depends/libhdfs3/CMake/Platform.cmake
index ea00fa3f..55fbf646 100644
--- a/depends/libhdfs3/CMake/Platform.cmake
+++ b/depends/libhdfs3/CMake/Platform.cmake
@@ -16,7 +16,7 @@ IF(CMAKE_COMPILER_IS_GNUCXX)
STRING(REGEX MATCHALL "[0-9]+" GCC_COMPILER_VERSION ${GCC_COMPILER_VERSION})

LIST(GET GCC_COMPILER_VERSION 0 GCC_COMPILER_VERSION_MAJOR)
- LIST(GET GCC_COMPILER_VERSION 1 GCC_COMPILER_VERSION_MINOR)
+ LIST(GET GCC_COMPILER_VERSION 0 GCC_COMPILER_VERSION_MINOR)

SET(GCC_COMPILER_VERSION_MAJOR ${GCC_COMPILER_VERSION_MAJOR} CACHE INTERNAL "gcc major version")
SET(GCC_COMPILER_VERSION_MINOR ${GCC_COMPILER_VERSION_MINOR} CACHE INTERNAL "gcc minor version")
diff --git a/depends/libhdfs3/CMakeLists.txt b/depends/libhdfs3/CMakeLists.txt
index f49e68dc..b96603d2 100644
--- a/depends/libhdfs3/CMakeLists.txt
+++ b/depends/libhdfs3/CMakeLists.txt
@@ -22,8 +22,8 @@ FIND_PACKAGE(LibXml2 REQUIRED)
FIND_PACKAGE(Protobuf REQUIRED)
FIND_PACKAGE(KERBEROS REQUIRED)
FIND_PACKAGE(GSasl REQUIRED)
-FIND_PACKAGE(GoogleTest REQUIRED)
-INCLUDE_DIRECTORIES(${GoogleTest_INCLUDE_DIR})
+#FIND_PACKAGE(GoogleTest REQUIRED)
+#INCLUDE_DIRECTORIES(${GoogleTest_INCLUDE_DIR})
LINK_LIBRARIES(${GoogleTest_LIBRARIES})

IF(OS_LINUX)
40 changes: 40 additions & 0 deletions thirdparty/patches/0006-libhdfs3-add-cstdint-include.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
--- hawq-rel-v3.0.0.0/depends/libhdfs3/src/rpc/RpcServerInfo.h.orig 2024-09-11 15:11:43.190472355 +0200
+++ hawq-rel-v3.0.0.0/depends/libhdfs3/src/rpc/RpcServerInfo.h 2024-09-09 16:00:30.659750184 +0200
@@ -24,6 +24,7 @@

#include "Hash.h"

+#include <cstdint>
#include <string>
#include <sstream>

--- hawq-rel-v3.0.0.0/depends/libhdfs3/src/server/DatanodeInfo.h.orig 2024-09-12 18:02:36.241330171 +0200
+++ hawq-rel-v3.0.0.0/depends/libhdfs3/src/server/DatanodeInfo.h 2024-09-09 16:00:30.668750292 +0200
@@ -22,6 +22,7 @@
#ifndef _HDFS_LIBHDFS3_SERVER_DATANODEINFO_H_
#define _HDFS_LIBHDFS3_SERVER_DATANODEINFO_H_

+#include <cstdint>
#include <string>
#include <sstream>

--- hawq-rel-v3.0.0.0/depends/libhdfs3/src/client/Permission.h.orig 2024-09-12 18:03:12.371739127 +0200
+++ hawq-rel-v3.0.0.0/depends/libhdfs3/src/client/Permission.h 2024-09-09 16:00:30.672750340 +0200
@@ -22,6 +22,7 @@
#ifndef _HDFS_LIBHDFS3_CLIENT_PERMISSION_H_
#define _HDFS_LIBHDFS3_CLIENT_PERMISSION_H_

+#include <cstdint>
#include <string>

namespace Hdfs {
--- hawq-rel-v3.0.0.0/depends/libhdfs3/src/common/FileWrapper.h.orig 2024-09-12 18:03:30.748947136 +0200
+++ hawq-rel-v3.0.0.0/depends/libhdfs3/src/common/FileWrapper.h 2024-09-09 16:00:30.664750244 +0200
@@ -22,6 +22,7 @@
#ifndef _HDFS_LIBHDFS3_COMMON_FILEWRAPPER_H_
#define _HDFS_LIBHDFS3_COMMON_FILEWRAPPER_H_

+#include <cstdint>
#include <string>
#include <cassert>
#include <cstdio>

0 comments on commit 7470486

Please sign in to comment.