From d26e21e5f88a04ca79427b7316ba93f4b35f6e62 Mon Sep 17 00:00:00 2001 From: Jorge Ortiz Date: Thu, 12 Dec 2024 19:01:46 +0100 Subject: [PATCH] MET-6248 add solr and s3 local support --- docker-compose.yml | 32 +++- docker/minio/minio-bucket.sh | 5 + docker/solr/Dockerfile | 20 +++ docker/solr/solr-schema.sh | 168 ++++++++++++++++++ pom.xml | 2 +- .../metis/sandbox/config/S3Config.java | 28 ++- src/main/resources/sample.application.yml | 1 + src/test/resources/application.yml | 1 + 8 files changed, 253 insertions(+), 4 deletions(-) create mode 100644 docker/minio/minio-bucket.sh create mode 100644 docker/solr/Dockerfile create mode 100755 docker/solr/solr-schema.sh diff --git a/docker-compose.yml b/docker-compose.yml index 3cdb72a7..bcec7ac4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -18,7 +18,7 @@ services: - ./src/main/resources/database/schema_lockrepository.sql:/docker-entrypoint-initdb.d/schema_lockrepository.sql - ./src/main/resources/database/schema_validation.sql:/docker-entrypoint-initdb.d/schema_validation.sql rabbitmq: - image: rabbitmq:3.9.12-management + image: rabbitmq:3.11.2-management-alpine container_name: metis-sandbox-rabbitmq environment: - RABBIT_DEFAULT_VHOST=/ @@ -28,7 +28,7 @@ services: - '5672:5672' - '15672:15672' mongo: - image: mongo:4.2.9 + image: mongo:6.0.12 container_name: metis-sandbox-mongo environment: MONGO_INITDB_DATABASE: metis-sandbox @@ -36,6 +36,34 @@ services: MONGO_INITDB_ROOT_PASSWORD: guest ports: - '27017:27017' + solr: + build: + context: docker/solr/ + dockerfile: Dockerfile + container_name: metis-sandbox-solr + restart: always + ports: + - "8983:8983" + - "9983:9983" + entrypoint: + - docker-entrypoint.sh + - solr + - start + - -c + - -f + minio: + image: minio/minio + container_name: metis-sandbox-minio + restart: always + environment: + MINIO_ROOT_USER: sandbox + MINIO_ROOT_PASSWORD: metis-sandbox + ports: + - "9000:9000" + - "9001:9001" + volumes: + - .data/minio:/data + command: minio server /data/minio --console-address ":9001" metis-sandbox-local: image: europeana/metis-sandbox:develop container_name: metis-sandbox-local diff --git a/docker/minio/minio-bucket.sh b/docker/minio/minio-bucket.sh new file mode 100644 index 00000000..611c6c2f --- /dev/null +++ b/docker/minio/minio-bucket.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +mc alias set minio http://localhost:9000 sandbox metis-sandbox # setup Minio client +mc mb minio/metis-sandbox-bucket || true # create a test bucket +mc admin accesskey create minio/ --access-key bT3iWI27KcAQyLQCIOYT --secret-key pMDcycDwMnKbLvkqa2Cxb2KJVeU1u67lE7Fb1Ie # create accesskey diff --git a/docker/solr/Dockerfile b/docker/solr/Dockerfile new file mode 100644 index 00000000..fdfbcb3d --- /dev/null +++ b/docker/solr/Dockerfile @@ -0,0 +1,20 @@ +FROM solr:7.7.3-slim +USER 0 +RUN apt-get update \ + && apt-get install git -y \ + && apt-get install rsync -y \ + && apt-get install curl -y \ + && apt-get install nano -y \ + && apt-get clean \ + && git clone https://github.com/europeana/search +COPY solr-schema.sh /opt/solr/search +COPY europeana-collection-aliasing-solr6.6.5-0.0.1-SNAPSHOT.jar /opt/solr/contrib/lib/europeana-collection-aliasing-solr6.6.5-0.0.1-SNAPSHOT.jar +RUN chown -R solr:solr /opt/solr/search \ + && chown -R solr:solr /opt/solr/contrib/lib \ + && chmod ug+x /opt/solr/search/solr-schema.sh +USER solr +RUN solr start -c \ + && solr create_collection -c metis_sandbox_publish_local -p 8983 \ + && solr stop \ + && mkdir -p /opt/solr/server/solr/metis_sandbox_publish_local_shard1_replica_n1/conf \ + && cp /opt/solr/search/solr_confs/metadata/conf/query_aliases.xml /opt/solr/server/solr/metis_sandbox_publish_local_shard1_replica_n1/conf/query_aliases.xml diff --git a/docker/solr/solr-schema.sh b/docker/solr/solr-schema.sh new file mode 100755 index 00000000..d8f03d5c --- /dev/null +++ b/docker/solr/solr-schema.sh @@ -0,0 +1,168 @@ +#!/bin/bash + +function main(){ + declare_common_fields + declare_multiple_environments_fields + set_chosen_environment_fields + print_chosen_environment_and_options + git_get_selected_branch # Should set the COMMIT_HASH + rsync_local_dir_to_remote_dir + git_remove_pull_request_branches + zookeeper_find_current_and_old_configurations + zookeeper_upload_and_apply_new_configuration #Expects COMMIT_HASH + zookeeper_remove_current_and_old_configurations +} + +function declare_common_fields() { + #We assume git is present on workspace from the configuration of the job. Ideally same git directory should not be accessible from multiple jobs, to avoid conflicts. + GIT_REPO_DIR=$(pwd)/ + GIT_SOLR_CONF_SUBDIR=solr_confs/metadata/conf/ + TARGET_SOLR_CONF_ROOT_DIR=/opt/solr/solr_configurations/ + PULL_REQUEST_PREFIX=pull_request_ +} + +function declare_multiple_environments_fields() { + #The server has to have a zookeeper running for uploading the configuration. + ENVIRONMENT="LOCAL" + INDEX_ENVIRONMENT="PUBLISH" + BRANCH_OR_PR_NUMBER="master" + LOCAL_SOLR_SERVER=metis-sandbox-solr + LOCAL_ZOOKEEPER_SERVER=localhost + LOCAL_ZOOKEEPER_PORT="9983" + LOCAL_SOLR_PORT="8983" + LOCAL_SOLR_BINARIES_DIR=/opt/solr/ + LOCAL_PUBLISH_COLLECTION=metis_sandbox_publish_local + LOCAL_PUBLISH_SOLR_CONF_DIR=local_publishConf +} + +function set_chosen_environment_fields() { + #Initialize variables based on the chosen environment + TARGET_COMMAND_SERVER=${LOCAL_SOLR_SERVER} + ZOOKEEPER_PORT=${LOCAL_ZOOKEEPER_PORT} + SOLR_PORT=${LOCAL_SOLR_PORT} + SOLR_BINARIES_DIR=${LOCAL_SOLR_BINARIES_DIR} + + COLLECTION_NAME=${LOCAL_PUBLISH_COLLECTION} + TARGET_SOLR_CONF_DIR=${LOCAL_PUBLISH_SOLR_CONF_DIR} +} + +function print_chosen_environment_and_options() { + printf "%-40s \n" "Selected environment is:" + printf "%-40s %s\n" "Environment selected:" "${ENVIRONMENT}" + printf "%-40s %s\n" "Index environment selected:" "${INDEX_ENVIRONMENT}" + printf "%-40s %s\n" "Branch or PR specified:" "${BRANCH_OR_PR_NUMBER}" + printf "%-40s %s\n" "Server to execute update:" "${TARGET_COMMAND_SERVER}" + printf "%-40s %s\n" "Collection name chosen:" "${COLLECTION_NAME}" + printf "%-40s %s\n" "Target solr configuration directory:" "${TARGET_SOLR_CONF_DIR}" + printf "%-40s %s\n" "Zookeeper port:" "${ZOOKEEPER_PORT}" + printf "%-40s %s\n" "Solr port:" "${SOLR_PORT}" +} + +function git_get_selected_branch() { + #Check first if there is a branch + git -C "${GIT_REPO_DIR}" checkout "${BRANCH_OR_PR_NUMBER}" + if [ "$?" -ne "0" ]; then + printf "WARNING: Branch: %s, could not be found. Trying pull request..\n" "${BRANCH_OR_PR_NUMBER}" + #Verify first if the value is actually a number + number_regex='^[0-9]+$' + if ! [[ ${BRANCH_OR_PR_NUMBER} =~ ${number_regex} ]]; then + printf "ERROR: Value %s is not a number. Exiting..\n" "${BRANCH_OR_PR_NUMBER}" + exit 1 + fi + git -C "${GIT_REPO_DIR}" fetch -u origin "pull/${BRANCH_OR_PR_NUMBER}/head:${PULL_REQUEST_PREFIX}${BRANCH_OR_PR_NUMBER}" + #Verify if BRANCH_OR_PR_NUMBER specified could be fetched + if [ "$?" -ne "0" ]; then + printf "ERROR: Could not create branch from PR with number: %s. Exiting..\n" "${BRANCH_OR_PR_NUMBER}" + exit 1 + fi + git -C "${GIT_REPO_DIR}" checkout "${PULL_REQUEST_PREFIX}${BRANCH_OR_PR_NUMBER}" + printf "Show git summary of PR:\n" + git -C "${GIT_REPO_DIR}" show --summary + else + git -C "${GIT_REPO_DIR}" pull + fi + COMMIT_HASH="$(git rev-parse --short "${BRANCH_OR_PR_NUMBER}" | tr -d '\n')" +} + +function rsync_local_dir_to_remote_dir() { + #TODO This could be avoided and the config send directly to zookeeper. The downside would be that we always send all files. + local source="${GIT_REPO_DIR}${GIT_SOLR_CONF_SUBDIR}" + #local destination="$TARGET_COMMAND_SERVER:${TARGET_SOLR_CONF_ROOT_DIR}${TARGET_SOLR_CONF_DIR}" + local destination="${TARGET_SOLR_CONF_ROOT_DIR}${TARGET_SOLR_CONF_DIR}" + printf "Starting rsync from local directory: %s -> to directory: %s\n" "${source}" "${destination}" + mkdir -p "${destination}" + rsync --archive --compress --verbose --delete "${source}" "${destination}" +} + +function git_remove_pull_request_branches() { + #Delete pull requests to avoid excessive diskspace + #Sed trims leading and trailing spaces + git checkout master + local pull_request_branches + pull_request_branches=$(git branch | sed 's/^ *//;s/ *$//' | grep ${PULL_REQUEST_PREFIX}) + + #Set the field separator to new line + IFS=$'\n' + for pull_request_branch in $pull_request_branches + do + printf "Delete pull request: %s\n" "${pull_request_branch}" + git branch --delete --force "${pull_request_branch}" + done + #Reset IFS + IFS=$' \t\n' +} + +function zookeeper_find_current_and_old_configurations() { + printf "Check if there is current and old configuration.\n" + local zookeeper_command + zookeeper_command=$(zookeeper_create_command "-cmd ls /configs") + + #Finds configurations of format #/configs/${COLLECTION_NAME}_ or auto created configurations such as /configs/${COLLECTION_NAME}.AUTOCREATED + #Temporary also remove the #/configs/${TARGET_SOLR_CONF_DIR}_ (the first sed group) + CURRENT_AND_OLD_CONFIGURATION_PATHS=$(eval "$(echo "${zookeeper_command} | sed -n 's/^\s*\(\/configs\/\(${TARGET_SOLR_CONF_DIR}_[^\/]*\|${COLLECTION_NAME}_[^\/]*\|${COLLECTION_NAME}.AUTOCREATED\)\)\s.*$/\1/p'")") + echo "current and old configurations: ${CURRENT_AND_OLD_CONFIGURATION_PATHS}" +} + +function zookeeper_upload_and_apply_new_configuration() { + local date_stamp + date_stamp=$(date --iso-8601=seconds) + local new_configuration_name=${COLLECTION_NAME}_${COMMIT_HASH}_${date_stamp} + + printf "Uploading zookeeper new configuration: %s\n" "${new_configuration_name}" + local zookeeper_command + zookeeper_command=$(zookeeper_create_command "-cmd upconfig --confdir ${TARGET_SOLR_CONF_ROOT_DIR}${TARGET_SOLR_CONF_DIR} --confname ${new_configuration_name}") + $(echo "${zookeeper_command}") + #Update collection to take effect of the new configuration. + #Using MODIFYCOLLECTION instead of RELOAD command because MODIFYCOLLECTION will re-apply the mapping between the collection with the configuration name and then reload it. This helps to avoid the collection being mapped to another configuration name. + printf "Starting solr MODIFYCOLLECTION command\n" + $(echo "curl -v --get --data-urlencode collection=${COLLECTION_NAME} --data-urlencode collection.configName=${new_configuration_name} http://localhost:${SOLR_PORT}/solr/admin/collections?action=MODIFYCOLLECTION") +} + +function zookeeper_remove_current_and_old_configurations() { + local zookeeper_command + #Set the field separator to new line + echo "Remove current and old configuration" + IFS=$'\n' + for configuration_path in $CURRENT_AND_OLD_CONFIGURATION_PATHS + do + printf "Removing zookeeper configuration: %s\n" "${configuration_path}" + zookeeper_command=$(zookeeper_create_command "-cmd clear $(echo "${configuration_path}")") + $(eval "${zookeeper_command}") + done + #Reset IFS + IFS=$' \t\n' +} + +function zookeeper_create_command() { + #First argument should be the '-cmd' part onwards + local common_command_part="java -Dlog4j.configurationFile=file://${SOLR_BINARIES_DIR}server/resources/log4j2.xml -classpath .:${SOLR_BINARIES_DIR}server/lib/ext/*:${SOLR_BINARIES_DIR}server/solr-webapp/webapp/WEB-INF/lib/* org.apache.solr.cloud.ZkCLI -zkhost ${LOCAL_ZOOKEEPER_SERVER}:${ZOOKEEPER_PORT}" + local unique_command_part="${1}" + echo "${common_command_part} ${unique_command_part}" +} + +function execute_remote_ssh_command(){ + #ssh ${TARGET_COMMAND_SERVER} "${1}" + "${1}" +} + +main "$@" diff --git a/pom.xml b/pom.xml index c55c5cfb..c7993dad 100644 --- a/pom.xml +++ b/pom.xml @@ -32,7 +32,7 @@ 21 2.3.0 - 14-SNAPSHOT + 15-SNAPSHOT 14-SNAPSHOT 3.2.3 2.16.7 diff --git a/src/main/java/eu/europeana/metis/sandbox/config/S3Config.java b/src/main/java/eu/europeana/metis/sandbox/config/S3Config.java index 50df7c76..a6a64d8b 100644 --- a/src/main/java/eu/europeana/metis/sandbox/config/S3Config.java +++ b/src/main/java/eu/europeana/metis/sandbox/config/S3Config.java @@ -1,5 +1,7 @@ package eu.europeana.metis.sandbox.config; +import com.amazonaws.ClientConfiguration; +import com.amazonaws.Protocol; import com.amazonaws.auth.AWSCredentials; import com.amazonaws.auth.AWSStaticCredentialsProvider; import com.amazonaws.auth.BasicAWSCredentials; @@ -8,6 +10,7 @@ import com.amazonaws.services.s3.AmazonS3ClientBuilder; import eu.europeana.metis.sandbox.domain.Bucket; import org.springframework.beans.factory.annotation.Value; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; @@ -33,11 +36,34 @@ class S3Config { private String thumbnailsBucket; @Bean - AmazonS3 s3Client() { + @ConditionalOnProperty( + name="sandbox.s3.local-enabled", + havingValue = "true") + AmazonS3 s3ClientHttp() { AWSCredentials credentials = new BasicAWSCredentials(accessKey, secretKey); + ClientConfiguration clientConfig = new ClientConfiguration(); + clientConfig.setProtocol(Protocol.HTTP); return AmazonS3ClientBuilder .standard() .withCredentials(new AWSStaticCredentialsProvider(credentials)) + .withPathStyleAccessEnabled(true) + .withClientConfiguration(clientConfig) + .withEndpointConfiguration( + new EndpointConfiguration(endpoint, signingRegion)) + .build(); + } + + @Bean + @ConditionalOnProperty( + name="sandbox.s3.local-enabled", + havingValue = "false", + matchIfMissing = true) + AmazonS3 s3ClientHttps() { + AWSCredentials credentials = new BasicAWSCredentials(accessKey, secretKey); + return AmazonS3ClientBuilder + .standard() + .withCredentials(new AWSStaticCredentialsProvider(credentials)) + .withPathStyleAccessEnabled(true) .withEndpointConfiguration( new EndpointConfiguration(endpoint, signingRegion)) .build(); diff --git a/src/main/resources/sample.application.yml b/src/main/resources/sample.application.yml index ceb6e22c..f5ee3463 100644 --- a/src/main/resources/sample.application.yml +++ b/src/main/resources/sample.application.yml @@ -138,6 +138,7 @@ sandbox: endpoint: signing-region: thumbnails-bucket: + local-enabled: true portal: publish: dataset-base-url: diff --git a/src/test/resources/application.yml b/src/test/resources/application.yml index 5fe58df3..53085069 100644 --- a/src/test/resources/application.yml +++ b/src/test/resources/application.yml @@ -138,6 +138,7 @@ sandbox: endpoint: signing-region: thumbnails-bucket: + local-enabled: false portal: publish: dataset-base-url: "http://metis-test"