From 36cba800b88f437061d4876255ab5fb3a979714a Mon Sep 17 00:00:00 2001 From: suxiaogang223 Date: Wed, 10 Apr 2024 13:10:38 +0800 Subject: [PATCH] change hive docker to bridge network --- .../{hadoop-hive.env.tpl => hadoop-hive.env} | 22 ++---- .../docker-compose/hive/hive-2x.yaml.tpl | 69 +++++++++---------- .../docker-compose/hive/hive-2x_settings.env | 19 ++--- .../docker-compose/hive/hive-3x.yaml.tpl | 60 ++++++++-------- .../docker-compose/hive/hive-3x_settings.env | 19 ++--- .../thirdparties/run-thirdparties-docker.sh | 8 +-- 6 files changed, 79 insertions(+), 118 deletions(-) rename docker/thirdparties/docker-compose/hive/{hadoop-hive.env.tpl => hadoop-hive.env} (71%) diff --git a/docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl b/docker/thirdparties/docker-compose/hive/hadoop-hive.env similarity index 71% rename from docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl rename to docker/thirdparties/docker-compose/hive/hadoop-hive.env index e97e71b444eda22..5f24a742831e1b3 100644 --- a/docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl +++ b/docker/thirdparties/docker-compose/hive/hadoop-hive.env @@ -15,36 +15,24 @@ # limitations under the License. # -HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://${externalEnvIp}:${PG_PORT}/metastore +HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://hive-metastore-postgresql/metastore HIVE_SITE_CONF_javax_jdo_option_ConnectionDriverName=org.postgresql.Driver HIVE_SITE_CONF_javax_jdo_option_ConnectionUserName=hive HIVE_SITE_CONF_javax_jdo_option_ConnectionPassword=hive HIVE_SITE_CONF_datanucleus_autoCreateSchema=false -HIVE_SITE_CONF_hive_metastore_uris=thrift://${externalEnvIp}:${HMS_PORT} -HIVE_SITE_CONF_hive_metastore_port=${HMS_PORT} +HIVE_SITE_CONF_hive_metastore_uris=thrift://hive-metastore:9083 HIVE_SITE_CONF_hive_server2_thrift_bind_host=0.0.0.0 -HIVE_SITE_CONF_hive_server2_thrift_port=${HS_PORT} -HIVE_SITE_CONF_hive_compactor_initiator_on=true -HIVE_SITE_CONF_hive_compactor_worker_threads=2 +HIVE_SITE_CONF_hive_server2_thrift_port=10000 HIVE_SITE_CONF_metastore_storage_schema_reader_impl=org.apache.hadoop.hive.metastore.SerDeStorageSchemaReader +CORE_CONF_fs_defaultFS=hdfs://namenode:8020 CORE_CONF_hadoop_http_staticuser_user=root CORE_CONF_hadoop_proxyuser_hue_hosts=* CORE_CONF_hadoop_proxyuser_hue_groups=* -CORE_CONF_hadoop_proxyuser_hive_hosts=* -CORE_CONF_fs_defaultFS=hdfs://${externalEnvIp}:${FS_PORT} HDFS_CONF_dfs_webhdfs_enabled=true HDFS_CONF_dfs_permissions_enabled=false HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false -HDFS_CONF_dfs_namenode_http___address=0.0.0.0:${NAMENODE_HTTP_PORT} -HDFS_CONF_dfs_namenode_https___address=0.0.0.0:${NAMENODE_HTTPS_PORT} -HDFS_CONF_dfs_namenode_secondary_http___address=0.0.0.0:${NAMENODE_SECONDARY_HTTP_PORT} -HDFS_CONF_dfs_namenode_secondary_https___address=0.0.0.0:${NAMENODE_SECONDARY_HTTPS_PORT} -HDFS_CONF_dfs_datanode_address=0.0.0.0:${DATANODE_PORT} -HDFS_CONF_dfs_datanode_http_address=0.0.0.0:${DATANODE_HTTP_PORT} -HDFS_CONF_dfs_datanode_https_address=0.0.0.0:${DATANODE_HTTPS_PORT} -HDFS_CONF_dfs_datanode_ipc_address=0.0.0.0:${DATANODE_IPC_PORT} YARN_CONF_yarn_log___aggregation___enable=true YARN_CONF_yarn_resourcemanager_recovery_enabled=true @@ -59,4 +47,4 @@ YARN_CONF_yarn_resourcemanager_hostname=resourcemanager YARN_CONF_yarn_timeline___service_hostname=historyserver YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032 YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030 -YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031 +YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031 \ No newline at end of file diff --git a/docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl b/docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl index 256dd2ba509375d..7af6ea3fbde52a9 100644 --- a/docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl +++ b/docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl @@ -19,86 +19,83 @@ version: "3.8" services: - ${CONTAINER_UID}namenode: + namenode: image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8 environment: - CLUSTER_NAME=test env_file: - ./hadoop-hive.env - container_name: ${CONTAINER_UID}namenode - expose: - - "${NAMENODE_HTTP_PORT}" - - "${FS_PORT}" + container_name: ${CONTAINER_UID}hadoop2-namenode + ports: + - "${FS_PORT}:8020" healthcheck: - test: [ "CMD", "curl", "http://localhost:${NAMENODE_HTTP_PORT}/" ] + test: [ "CMD", "curl", "http://localhost:50070/" ] interval: 5s timeout: 120s retries: 120 - network_mode: "host" - ${CONTAINER_UID}datanode: + datanode: image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8 env_file: - ./hadoop-hive.env environment: - SERVICE_PRECONDITION: "${externalEnvIp}:${NAMENODE_HTTP_PORT}" - container_name: ${CONTAINER_UID}datanode - expose: - - "${DATANODE_HTTP_PORT}" + SERVICE_PRECONDITION: "namenode:50070" + container_name: ${CONTAINER_UID}hadoop2-datanode healthcheck: - test: [ "CMD", "curl", "http://localhost:${DATANODE_HTTP_PORT}" ] + test: [ "CMD", "curl", "http://localhost:50075" ] interval: 5s timeout: 60s retries: 120 - network_mode: "host" - ${CONTAINER_UID}hive-server: + hive-server: image: bde2020/hive:2.3.2-postgresql-metastore env_file: - ./hadoop-hive.env environment: - HIVE_CORE_CONF_javax_jdo_option_ConnectionURL: "jdbc:postgresql://${externalEnvIp}:${PG_PORT}/metastore" - SERVICE_PRECONDITION: "${externalEnvIp}:${HMS_PORT}" - container_name: ${CONTAINER_UID}hive-server - expose: - - "${HS_PORT}" + HIVE_CORE_CONF_javax_jdo_option_ConnectionURL: "jdbc:postgresql://hive-metastore/metastore" + SERVICE_PRECONDITION: "hive-metastore:9083" + container_name: ${CONTAINER_UID}hive2-server + ports: + - "${HS_PORT}:10000" depends_on: - - ${CONTAINER_UID}datanode - - ${CONTAINER_UID}namenode + - datanode + - namenode healthcheck: - test: beeline -u "jdbc:hive2://127.0.0.1:${HS_PORT}/default" -n health_check -e "show databases;" + test: beeline -u "jdbc:hive2://127.0.0.1:10000/default" -n health_check -e "show databases;" interval: 10s timeout: 120s retries: 120 - network_mode: "host" - ${CONTAINER_UID}hive-metastore: + hive-metastore: image: bde2020/hive:2.3.2-postgresql-metastore env_file: - ./hadoop-hive.env command: /bin/bash /mnt/scripts/hive-metastore.sh # command: /opt/hive/bin/hive --service metastore environment: - SERVICE_PRECONDITION: "${externalEnvIp}:${NAMENODE_HTTP_PORT} ${externalEnvIp}:${DATANODE_HTTP_PORT} ${externalEnvIp}:${PG_PORT}" - container_name: ${CONTAINER_UID}hive-metastore - expose: - - "${HMS_PORT}" + SERVICE_PRECONDITION: "namenode:50070 datanode:50075 hive-metastore-postgresql:5432" + container_name: ${CONTAINER_UID}hive2-metastore + ports: + - "${HMS_PORT}:9083" volumes: - ./scripts:/mnt/scripts depends_on: - - ${CONTAINER_UID}hive-metastore-postgresql - network_mode: "host" + - hive-metastore-postgresql - ${CONTAINER_UID}hive-metastore-postgresql: + hive-metastore-postgresql: image: bde2020/hive-metastore-postgresql:2.3.0 - restart: always - container_name: ${CONTAINER_UID}hive-metastore-postgresql + container_name: ${CONTAINER_UID}hive2-metastore-postgresql ports: - - ${PG_PORT}:5432 + - "${PG_PORT}:5432" healthcheck: test: ["CMD-SHELL", "pg_isready -U postgres"] interval: 5s timeout: 60s retries: 120 - network_mode: "bridge" + +# solve HiveServer2 connect error: +# java.net.URISyntaxException Illegal character in hostname :thrift://${CONTAINER_UID}hive2_default:9083 +networks: + default: + name: ${CONTAINER_UID}hive2-default diff --git a/docker/thirdparties/docker-compose/hive/hive-2x_settings.env b/docker/thirdparties/docker-compose/hive/hive-2x_settings.env index 347142e2cddfa25..8baad99cbc81b79 100644 --- a/docker/thirdparties/docker-compose/hive/hive-2x_settings.env +++ b/docker/thirdparties/docker-compose/hive/hive-2x_settings.env @@ -18,18 +18,9 @@ # Change this to a specific string. # Do not use "_" or other sepcial characters, only number and alphabeta. -# NOTICE: change this uid will modify hadoop-hive.env and hive-*.yaml +# NOTICE: change this uid will modify hive-*.yaml -export FS_PORT=8220 #should be same in regression-conf.groovy -export HMS_PORT=9283 #should be same in regression-conf.groovy -export HS_PORT=12000 #should be same in regression-conf.groovy -export PG_PORT=5632 #should be same in regression-conf.groovy - -export NAMENODE_HTTP_PORT=52070 -export NAMENODE_HTTPS_PORT=52470 -export NAMENODE_SECONDARY_HTTP_PORT=52090 -export NAMENODE_SECONDARY_HTTPS_PORT=52091 -export DATANODE_PORT=52010 -export DATANODE_HTTP_PORT=52075 -export DATANODE_HTTPS_PORT=52475 -export DATANODE_IPC_PORT=52020 +export FS_PORT=8120 #should be same in regression-conf.groovy +export HMS_PORT=9183 #should be same in regression-conf.groovy +export HS_PORT=11000 #should be same in regression-conf.groovy +export PG_PORT=5532 #should be same in regression-conf.groovy \ No newline at end of file diff --git a/docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl b/docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl index 150be0a9d8955a4..67ce71fd1aee563 100644 --- a/docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl +++ b/docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl @@ -19,86 +19,84 @@ version: "3.8" services: - ${CONTAINER_UID}hadoop3-namenode: + namenode: image: bde2020/hadoop-namenode:2.0.0-hadoop3.2.1-java8 environment: - CLUSTER_NAME=test env_file: - ./hadoop-hive.env container_name: ${CONTAINER_UID}hadoop3-namenode - expose: - - "${NAMENODE_HTTP_PORT}" - - "${FS_PORT}" + ports: + - "${FS_PORT}:8020" healthcheck: - test: [ "CMD", "curl", "http://localhost:${NAMENODE_HTTP_PORT}/" ] + test: [ "CMD", "curl", "http://localhost:9870/" ] interval: 5s timeout: 120s retries: 120 - network_mode: "host" - ${CONTAINER_UID}hadoop3-datanode: + datanode: image: bde2020/hadoop-datanode:2.0.0-hadoop3.2.1-java8 env_file: - ./hadoop-hive.env environment: - SERVICE_PRECONDITION: "${externalEnvIp}:${NAMENODE_HTTP_PORT}" + SERVICE_PRECONDITION: "namenode:9870" container_name: ${CONTAINER_UID}hadoop3-datanode - expose: - - "${DATANODE_HTTP_PORT}" healthcheck: - test: [ "CMD", "curl", "http://localhost:${DATANODE_HTTP_PORT}" ] + test: [ "CMD", "curl", "http://localhost:9864" ] interval: 5s timeout: 60s retries: 120 - network_mode: "host" - ${CONTAINER_UID}hive-server: + hive-server: image: lishizhen/hive:3.1.2-postgresql-metastore env_file: - ./hadoop-hive.env environment: - HIVE_CORE_CONF_javax_jdo_option_ConnectionURL: "jdbc:postgresql://${externalEnvIp}:${PG_PORT}/metastore" - SERVICE_PRECONDITION: "${externalEnvIp}:${HMS_PORT}" + HIVE_CORE_CONF_javax_jdo_option_ConnectionURL: "jdbc:postgresql://hive-metastore/metastore" + SERVICE_PRECONDITION: "hive-metastore:9083" container_name: ${CONTAINER_UID}hive3-server - expose: - - "${HS_PORT}" + ports: + - "${HS_PORT}:10000" depends_on: - - ${CONTAINER_UID}hadoop3-datanode - - ${CONTAINER_UID}hadoop3-namenode + - datanode + - namenode healthcheck: - test: beeline -u "jdbc:hive2://127.0.0.1:${HS_PORT}/default" -n health_check -e "show databases;" + test: beeline -u "jdbc:hive2://127.0.0.1:10000/default" -n health_check -e "show databases;" interval: 10s timeout: 120s retries: 120 - network_mode: "host" - ${CONTAINER_UID}hive3-metastore: + hive-metastore: image: lishizhen/hive:3.1.2-postgresql-metastore env_file: - ./hadoop-hive.env command: /bin/bash /mnt/scripts/hive-metastore.sh # command: /opt/hive/bin/hive --service metastore environment: - SERVICE_PRECONDITION: "${externalEnvIp}:${NAMENODE_HTTP_PORT} ${externalEnvIp}:${DATANODE_HTTP_PORT} ${externalEnvIp}:${PG_PORT}" + SERVICE_PRECONDITION: "namenode:9870 datanode:9864 hive-metastore-postgresql:5432" container_name: ${CONTAINER_UID}hive3-metastore - expose: - - "${HMS_PORT}" + ports: + - "${HMS_PORT}:9083" volumes: - ./scripts:/mnt/scripts depends_on: - - ${CONTAINER_UID}hive3-metastore-postgresql - network_mode: "host" + - hive-metastore-postgresql - ${CONTAINER_UID}hive3-metastore-postgresql: + hive-metastore-postgresql: image: bde2020/hive-metastore-postgresql:3.1.0 - restart: always container_name: ${CONTAINER_UID}hive3-metastore-postgresql ports: - - ${PG_PORT}:5432 + - "${PG_PORT}:5432" healthcheck: test: ["CMD-SHELL", "pg_isready -U postgres"] interval: 5s timeout: 60s retries: 120 - network_mode: "bridge" + +# solve HiveServer2 connect error: +# java.net.URISyntaxException Illegal character in hostname :thrift://${CONTAINER_UID}hive3_default:9083 + +networks: + default: + name: ${CONTAINER_UID}hive3-default diff --git a/docker/thirdparties/docker-compose/hive/hive-3x_settings.env b/docker/thirdparties/docker-compose/hive/hive-3x_settings.env index dbf403f41e98f30..82486a17f0aa3b2 100644 --- a/docker/thirdparties/docker-compose/hive/hive-3x_settings.env +++ b/docker/thirdparties/docker-compose/hive/hive-3x_settings.env @@ -18,18 +18,9 @@ # Change this to a specific string. # Do not use "_" or other sepcial characters, only number and alphabeta. -# NOTICE: change this uid will modify hadoop-hive.env and hive-*.yaml +# NOTICE: change this uid will modify hive-*.yaml -export FS_PORT=8320 #should be same in regression-conf.groovy -export HMS_PORT=9383 #should be same in regression-conf.groovy -export HS_PORT=13000 #should be same in regression-conf.groovy -export PG_PORT=5732 #should be same in regression-conf.groovy - -export NAMENODE_HTTP_PORT=53070 -export NAMENODE_HTTPS_PORT=53470 -export NAMENODE_SECONDARY_HTTP_PORT=53090 -export NAMENODE_SECONDARY_HTTPS_PORT=53091 -export DATANODE_PORT=53010 -export DATANODE_HTTP_PORT=53075 -export DATANODE_HTTPS_PORT=53475 -export DATANODE_IPC_PORT=53020 +export FS_PORT=8020 #should be same in regression-conf.groovy +export HMS_PORT=9083 #should be same in regression-conf.groovy +export HS_PORT=10000 #should be same in regression-conf.groovy +export PG_PORT=5432 #should be same in regression-conf.groovy \ No newline at end of file diff --git a/docker/thirdparties/run-thirdparties-docker.sh b/docker/thirdparties/run-thirdparties-docker.sh index c83b8dadf415cad..4f7f1d24627adaf 100755 --- a/docker/thirdparties/run-thirdparties-docker.sh +++ b/docker/thirdparties/run-thirdparties-docker.sh @@ -315,12 +315,10 @@ if [[ "${RUN_HIVE}" -eq 1 ]]; then # before start it, you need to download parquet file package, see "README" in "docker-compose/hive/scripts/" sed -i "s/s3Endpoint/${s3Endpoint}/g" "${ROOT}"/docker-compose/hive/scripts/hive-metastore.sh sed -i "s/s3BucketName/${s3BucketName}/g" "${ROOT}"/docker-compose/hive/scripts/hive-metastore.sh - # generate hive-3x.yaml and hadoop-hive.env - export externalEnvIp=${IP_HOST} + # generate hive-3x.yaml export CONTAINER_UID=${CONTAINER_UID} . "${ROOT}"/docker-compose/hive/hive-2x_settings.env envsubst < "${ROOT}"/docker-compose/hive/hive-2x.yaml.tpl > "${ROOT}"/docker-compose/hive/hive-2x.yaml - envsubst < "${ROOT}"/docker-compose/hive/hadoop-hive.env.tpl > "${ROOT}"/docker-compose/hive/hadoop-hive.env sudo docker compose -p ${CONTAINER_UID}hive2 -f "${ROOT}"/docker-compose/hive/hive-2x.yaml --env-file "${ROOT}"/docker-compose/hive/hadoop-hive.env down if [[ "${STOP}" -ne 1 ]]; then sudo docker compose -p ${CONTAINER_UID}hive2 -f "${ROOT}"/docker-compose/hive/hive-2x.yaml --env-file "${ROOT}"/docker-compose/hive/hadoop-hive.env up --build --remove-orphans -d @@ -341,12 +339,10 @@ if [[ "${RUN_HIVE3}" -eq 1 ]]; then # before start it, you need to download parquet file package, see "README" in "docker-compose/hive/scripts/" sed -i "s/s3Endpoint/${s3Endpoint}/g" "${ROOT}"/docker-compose/hive/scripts/hive-metastore.sh sed -i "s/s3BucketName/${s3BucketName}/g" "${ROOT}"/docker-compose/hive/scripts/hive-metastore.sh - # generate hive-3x.yaml and hadoop-hive.env - export externalEnvIp=${IP_HOST} + # generate hive-3x.yaml export CONTAINER_UID=${CONTAINER_UID} . "${ROOT}"/docker-compose/hive/hive-3x_settings.env envsubst < "${ROOT}"/docker-compose/hive/hive-3x.yaml.tpl > "${ROOT}"/docker-compose/hive/hive-3x.yaml - envsubst < "${ROOT}"/docker-compose/hive/hadoop-hive.env.tpl > "${ROOT}"/docker-compose/hive/hadoop-hive.env sudo docker compose -p ${CONTAINER_UID}hive3 -f "${ROOT}"/docker-compose/hive/hive-3x.yaml --env-file "${ROOT}"/docker-compose/hive/hadoop-hive.env down if [[ "${STOP}" -ne 1 ]]; then sudo docker compose -p ${CONTAINER_UID}hive3 -f "${ROOT}"/docker-compose/hive/hive-3x.yaml --env-file "${ROOT}"/docker-compose/hive/hadoop-hive.env up --build --remove-orphans -d