Skip to content

Commit

Permalink
GEOMESA-3425 CLI - Additional Hadoop dependency fixes (#3253)
Browse files Browse the repository at this point in the history
  • Loading branch information
elahrvivaz authored Jan 6, 2025
1 parent 56c339e commit 2175eea
Show file tree
Hide file tree
Showing 13 changed files with 149 additions and 205 deletions.
24 changes: 24 additions & 0 deletions geomesa-accumulo/geomesa-accumulo-dist/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -64,21 +64,45 @@
<groupId>org.locationtech.geomesa</groupId>
<artifactId>geomesa-accumulo-spark-runtime-accumulo20_${scala.binary.version}</artifactId>
<version>${project.version}</version>
<exclusions>
<exclusion>
<groupId>*</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.locationtech.geomesa</groupId>
<artifactId>geomesa-accumulo-spark-runtime-accumulo21_${scala.binary.version}</artifactId>
<version>${project.version}</version>
<exclusions>
<exclusion>
<groupId>*</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.locationtech.geomesa</groupId>
<artifactId>geomesa-spark-converter_${scala.binary.version}</artifactId>
<exclusions>
<exclusion>
<groupId>*</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>

<!-- Yarn (dist)-->
<dependency>
<groupId>org.locationtech.geomesa</groupId>
<artifactId>geomesa-accumulo-jobs_${scala.binary.version}</artifactId>
<exclusions>
<exclusion>
<groupId>*</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>

<!-- Tools (bin,lib,etc)-->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,25 +30,27 @@ function dependencies() {
if [[ -n "$classpath" ]]; then
accumulo_version="$(get_classpath_version accumulo-core "$classpath" "$accumulo_version")"
hadoop_version="$(get_classpath_version hadoop-common "$classpath" "$hadoop_version")"
hadoop_version="$(get_classpath_version hadoop-client-api "$classpath" "$hadoop_version")"
zk_version="$(get_classpath_version zookeeper "$classpath" "$zk_version")"
fi

if [[ "$hadoop_version" == "3.2.3" ]]; then
echo >&2 "WARNING Updating Hadoop version from 3.2.3 to 3.2.4 due to invalid client-api Maven artifacts"
hadoop_version="3.2.4"
fi

declare -a gavs=(
"org.apache.accumulo:accumulo-core:${accumulo_version}:jar"
"org.apache.accumulo:accumulo-server-base:${accumulo_version}:jar"
"org.apache.accumulo:accumulo-start:${accumulo_version}:jar"
"org.apache.accumulo:accumulo-hadoop-mapreduce:${accumulo_version}:jar"
"org.apache.zookeeper:zookeeper:${zk_version}:jar"
"org.apache.hadoop:hadoop-client-api:${hadoop_version}:jar"
"org.apache.hadoop:hadoop-client-runtime:${hadoop_version}:jar"
"org.apache.commons:commons-configuration2:2.10.1:jar"
"org.apache.commons:commons-text:1.11.0:jar"
"org.apache.commons:commons-collections4:4.4:jar"
"org.apache.commons:commons-vfs2:2.9.0:jar"
"commons-logging:commons-logging:1.3.3:jar"
"org.apache.hadoop:hadoop-auth:${hadoop_version}:jar"
"org.apache.hadoop:hadoop-common:${hadoop_version}:jar"
"org.apache.hadoop:hadoop-hdfs:${hadoop_version}:jar"
"com.fasterxml.woodstox:woodstox-core:5.3.0:jar"
"org.codehaus.woodstox:stax2-api:4.2.1:jar"
"org.apache.commons:commons-text:%%commons.text.version%%:jar"
"org.apache.commons:commons-vfs2:2.9.0:jar"
"com.google.guava:guava:${guava_install_version}:jar"
"io.netty:netty-codec:%%netty.version%%:jar"
"io.netty:netty-handler:%%netty.version%%:jar"
Expand Down Expand Up @@ -90,20 +92,9 @@ function dependencies() {
)
fi

# add hadoop 3+ jars if needed
if version_ge "${hadoop_version}" 3.0.0; then
gavs+=(
"org.apache.hadoop:hadoop-client-api:${hadoop_version}:jar"
"org.apache.hadoop:hadoop-client-runtime:${hadoop_version}:jar"
)
else
gavs+=(
"commons-configuration:commons-configuration:1.6:jar"
)
fi
if ! version_ge "${hadoop_version}" 3.4.0; then
if ! version_ge "${hadoop_version}" 3.3.0; then
gavs+=(
"commons-collections:commons-collections:3.2.2:jar"
"org.apache.htrace:htrace-core4:4.1.0-incubating:jar"
)
fi

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,12 @@ object FileSystemDataStoreFactory extends GeoMesaDataStoreInfo {
FileSystemDataStoreParams.ConfigsParam
)

// lazy to avoid masking classpath errors with missing hadoop
private lazy val configuration = new Configuration()

override def canProcess(params: java.util.Map[String, _]): Boolean =
FileSystemDataStoreParams.PathParam.exists(params)

private val configuration = new Configuration()

object FileSystemDataStoreParams extends NamespaceParams {

val WriterFileTimeout: SystemProperty = SystemProperty("geomesa.fs.writer.partition.timeout", "60s")
Expand Down
60 changes: 18 additions & 42 deletions geomesa-fs/geomesa-fs-tools/conf-filtered/dependencies.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
# Update the versions as required to match the target environment.

hadoop_install_version="%%hadoop.version.recommended%%"
aws_sdk_v1_install_version="1.12.735" # latest version as of 2024/06
aws_sdk_v2_install_version="2.25.64" # latest version as of 2024/06
aws_crt_install_version="0.29.18"
aws_sdk_v1_install_version="1.12.780" # latest version as of 2025/01
aws_sdk_v2_install_version="2.29.44" # latest version as of 2025/01
aws_crt_install_version="0.33.7"
# this should match the parquet desired version
snappy_install_version="1.1.1.6"

Expand All @@ -31,55 +31,27 @@ function dependencies() {

if [[ -n "$classpath" ]]; then
hadoop_version="$(get_classpath_version hadoop-common "$classpath" "$hadoop_version")"
hadoop_version="$(get_classpath_version hadoop-client-api "$classpath" "$hadoop_version")"
aws_sdk_v1_version="$(get_classpath_version aws-java-sdk-core "$classpath" "$aws_sdk_v1_version")"
aws_sdk_v2_version="$(get_classpath_version aws-core "$classpath" "$aws_sdk_v2_version")"
snappy_version="$(get_classpath_version snappy-java "$classpath" "$snappy_version")"
fi

if [[ "$hadoop_version" == "3.2.3" ]]; then
echo >&2 "WARNING Updating Hadoop version from 3.2.3 to 3.2.4 due to invalid client-api Maven artifacts"
hadoop_version="3.2.4"
fi

declare -a gavs=(
"org.apache.hadoop:hadoop-auth:${hadoop_version}:jar"
"org.apache.hadoop:hadoop-common:${hadoop_version}:jar"
"org.apache.hadoop:hadoop-hdfs:${hadoop_version}:jar"
"org.apache.hadoop:hadoop-hdfs-client:${hadoop_version}:jar"
"org.apache.hadoop:hadoop-mapreduce-client-core:${hadoop_version}:jar"
"org.apache.hadoop:hadoop-client-api:${hadoop_version}:jar"
"org.apache.hadoop:hadoop-client-runtime:${hadoop_version}:jar"
"org.apache.hadoop:hadoop-aws:${hadoop_version}:jar"
"org.xerial.snappy:snappy-java:${snappy_version}:jar"
"com.fasterxml.woodstox:woodstox-core:5.3.0:jar"
"org.codehaus.woodstox:stax2-api:4.2.1:jar"
"org.apache.commons:commons-configuration2:2.8.0:jar"
"commons-configuration:commons-configuration:1.6:jar"
"commons-collections:commons-collections:3.2.2:jar"
"commons-lang:commons-lang:2.6:jar"
"commons-logging:commons-logging:1.1.3:jar"
"commons-cli:commons-cli:1.2:jar"
"commons-io:commons-io:2.5:jar"
"com.google.protobuf:protobuf-java:2.5.0:jar"
"org.apache.htrace:htrace-core:3.1.0-incubating:jar"
"org.apache.htrace:htrace-core4:4.1.0-incubating:jar"
# these are the versions used by hadoop 2.8 and 3.1
"org.apache.httpcomponents:httpclient:4.5.2:jar"
"org.apache.httpcomponents:httpcore:4.4.4:jar"
"commons-httpclient:commons-httpclient:3.1:jar"
"commons-logging:commons-logging:1.3.3:jar"
"org.apache.httpcomponents:httpclient:4.5.13:jar"
"org.apache.httpcomponents:httpcore:4.4.13:jar"
)

# add hadoop 3+ jars if needed
if version_ge "${hadoop_version}" 3.0.0; then
gavs+=(
"org.apache.hadoop:hadoop-client-api:${hadoop_version}:jar"
"org.apache.hadoop:hadoop-client-runtime:${hadoop_version}:jar"
"com.google.guava:guava:27.0-jre:jar"
)
else
gavs+=(
"com.google.guava:guava:11.0.2:jar"
)
fi
if ! version_ge "${hadoop_version}" 3.4.0; then
gavs+=(
"commons-collections:commons-collections:3.2.2:jar"
)
fi

# aws sdk
if version_ge "${hadoop_version}" 3.4.0; then
gavs+=(
Expand All @@ -105,6 +77,8 @@ function dependencies() {
"software.amazon.awssdk:profiles:${aws_sdk_v2_version}:jar"
"software.amazon.awssdk:protocol-core:${aws_sdk_v2_version}:jar"
"software.amazon.awssdk:regions:${aws_sdk_v2_version}:jar"
"software.amazon.awssdk:retries:${aws_sdk_v2_version}:jar"
"software.amazon.awssdk:retries-spi:${aws_sdk_v2_version}:jar"
"software.amazon.awssdk:s3:${aws_sdk_v2_version}:jar"
"software.amazon.awssdk:s3-transfer-manager:${aws_sdk_v2_version}:jar"
"software.amazon.awssdk:sdk-core:${aws_sdk_v2_version}:jar"
Expand All @@ -119,6 +93,8 @@ function dependencies() {
"com.amazonaws:aws-java-sdk-core:${aws_sdk_v1_version}:jar"
"com.amazonaws:aws-java-sdk-s3:${aws_sdk_v1_version}:jar"
"com.amazonaws:aws-java-sdk-dynamodb:${aws_sdk_v1_version}:jar"
"org.apache.htrace:htrace-core4:4.1.0-incubating:jar"
"com.google.guava:guava:27.0-jre:jar"
"joda-time:joda-time:2.8.1:jar"
)
fi
Expand Down
39 changes: 11 additions & 28 deletions geomesa-gt/geomesa-gt-tools/conf-filtered/dependencies.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,11 @@
# http://www.opensource.org/licenses/apache2.0.php.
#

# This file lists the dependencies required for running the geomesa-fs command-line tools.
# This file lists the dependencies required for running the geomesa command-line tools.
# Usually these dependencies will be provided by the environment (e.g. HADOOP_HOME).
# Update the versions as required to match the target environment.

hadoop_install_version="%%hadoop.version.recommended%%"
# required for hadoop - make sure it corresponds to the hadoop installed version
guava_install_version="%%geotools.guava.version%%"

# gets the dependencies for this module
# args:
Expand All @@ -25,37 +23,22 @@ function dependencies() {

if [[ -n "$classpath" ]]; then
hadoop_version="$(get_classpath_version hadoop-common "$classpath" "$hadoop_version")"
hadoop_version="$(get_classpath_version hadoop-client-api "$classpath" "$hadoop_version")"
fi

if [[ "$hadoop_version" == "3.2.3" ]]; then
echo >&2 "WARNING Updating Hadoop version from 3.2.3 to 3.2.4 due to invalid client-api Maven artifacts"
hadoop_version="3.2.4"
fi

declare -a gavs=(
"org.apache.hadoop:hadoop-auth:${hadoop_version}:jar"
"org.apache.hadoop:hadoop-common:${hadoop_version}:jar"
"org.apache.hadoop:hadoop-hdfs:${hadoop_version}:jar"
"org.apache.hadoop:hadoop-hdfs-client:${hadoop_version}:jar"
"commons-configuration:commons-configuration:1.6:jar"
"commons-logging:commons-logging:1.1.3:jar"
# htrace 3 required for hadoop before 2.8
# htrace 4 required for hadoop 2.8 and later
# since they have separate package names, should be safe to install both
"org.apache.htrace:htrace-core:3.1.0-incubating:jar"
"org.apache.htrace:htrace-core4:4.1.0-incubating:jar"
"com.google.guava:guava:${guava_install_version}:jar"
"org.apache.hadoop:hadoop-client-api:${hadoop_version}:jar"
"org.apache.hadoop:hadoop-client-runtime:${hadoop_version}:jar"
)

# add hadoop 3+ jars if needed
if version_ge "${hadoop_version}" 3.0.0; then
gavs+=(
"org.apache.hadoop:hadoop-client-api:${hadoop_version}:jar"
"org.apache.hadoop:hadoop-client-runtime:${hadoop_version}:jar"
)
else
gavs+=(
"commons-configuration:commons-configuration:1.6:jar"
)
fi
if ! version_ge "${hadoop_version}" 3.4.0; then
if ! version_ge "${hadoop_version}" 3.3.0; then
gavs+=(
"commons-collections:commons-collections:3.2.2:jar"
"org.apache.htrace:htrace-core4:4.1.0-incubating:jar"
)
fi

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import org.apache.hadoop.hbase.security.token.AuthenticationTokenIdentifier
import org.apache.hadoop.hbase.{HBaseConfiguration, HConstants}
import org.apache.hadoop.security.authentication.util.KerberosUtil
import org.apache.hadoop.security.{SecurityUtil, UserGroupInformation}
import org.locationtech.geomesa.hbase.HBaseSystemProperties
import org.locationtech.geomesa.hbase.data.HBaseDataStoreFactory.{HBaseGeoMesaKeyTab, HBaseGeoMesaPrincipal}
import org.locationtech.geomesa.hbase.data.HBaseDataStoreParams.{ConfigPathsParam, ConfigsParam, ConnectionParam, ZookeeperParam}
import org.locationtech.geomesa.hbase.utils.HBaseVersions
Expand All @@ -38,7 +39,7 @@ object HBaseConnectionPool extends LazyLogging {
// add common resources from system property - lazy to allow object initialization if there's an error
private lazy val configuration = {
val base = HBaseConfiguration.create()
HBaseDataStoreFactory.ConfigPathProperty.option.foreach(addResources(base, _))
HBaseSystemProperties.ConfigPathProperty.option.foreach(addResources(base, _))
base
}

Expand Down
Loading

0 comments on commit 2175eea

Please sign in to comment.