From beb44c000dafc8df70922d0039a27961573ed1aa Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Mon, 17 Apr 2023 21:09:58 -0400 Subject: [PATCH 1/6] fix flaky WPS provider responses causing failure during their registration in weaver --- birdhouse/components/weaver/default.env | 8 +- .../components/weaver/post-docker-compose-up | 119 +++++++++++------- 2 files changed, 81 insertions(+), 46 deletions(-) diff --git a/birdhouse/components/weaver/default.env b/birdhouse/components/weaver/default.env index f608be972..bb3e66835 100644 --- a/birdhouse/components/weaver/default.env +++ b/birdhouse/components/weaver/default.env @@ -27,6 +27,8 @@ EXTRA_VARS=' $WEAVER_MANAGER_LOG_LEVEL $WEAVER_WORKER_LOG_LEVEL $WEAVER_WPS_PROVIDERS_MAX_TIME + $WEAVER_WPS_PROVIDERS_RETRY_COUNT + $WEAVER_WPS_PROVIDERS_RETRY_AFTER ' # extend the original 'VARS' from 'birdhouse/pavics-compose.sh' to employ them for template substitution # adding them to 'VARS', they will also be validated in case of override of 'default.env' using 'env.local' @@ -87,8 +89,12 @@ export WEAVER_WPS_WORKDIR="/tmp/wps_workdir/weaver" export WEAVER_MANAGER_LOG_LEVEL=INFO export WEAVER_WORKER_LOG_LEVEL=INFO -# control maximum timeout to abandon registration (duration in seconds) +# control maximum timeout to abandon registration (duration in seconds, across whole procedure) export WEAVER_WPS_PROVIDERS_MAX_TIME=120 +# control maximum retries to abandon registration (retries per provider) +export WEAVER_WPS_PROVIDERS_RETRY_COUNT=5 +# control interval time between retries (duration in seconds, counts toward maximum timeout) +export WEAVER_WPS_PROVIDERS_RETRY_AFTER=5 export DELAYED_EVAL=" $DELAYED_EVAL diff --git a/birdhouse/components/weaver/post-docker-compose-up b/birdhouse/components/weaver/post-docker-compose-up index 794a60973..04e5146f3 100755 --- a/birdhouse/components/weaver/post-docker-compose-up +++ b/birdhouse/components/weaver/post-docker-compose-up @@ -25,7 +25,11 @@ # list of provider names (comma or space delimited), all are assumed to be available at # "https://${PAVICS_FQDN_PUBLIC}${TWITCHER_PROTECTED_PATH}/" # WEAVER_WPS_PROVIDERS_MAX_TIME: -# limit script execution up to maximum this number of seconds +# limit script execution up to a maximum of this number of seconds +# WEAVER_WPS_PROVIDERS_RETRY_COUNT: +# number of permitted retries to register a given WPS provider +# WEAVER_WPS_PROVIDERS_RETRY_AFTER: +# number of seconds between each retry request as needed # # Following configurations are expected to be inherited from bird-house/weaver-component env.local/default.env: # - MAGPIE_ADMIN_USERNAME @@ -62,13 +66,6 @@ reset_state() { set +vx; eval "${old_state}" } -echo "Running: $0" - -MAGPIE_URL="https://${PAVICS_FQDN_PUBLIC}/magpie" -WEAVER_URL="https://${PAVICS_FQDN_PUBLIC}${TWITCHER_PROTECTED_PATH}/${WEAVER_MANAGER_NAME}" -WEAVER_WPS_PROVIDERS_MAX_TIME=${WEAVER_WPS_PROVIDERS_MAX_TIME:-120} -REQUEST_TIMEOUT=2 - # logging if [ ! -z "$TERM" ]; then YELLOW=${YELLOW:-$(tput setaf 3)} @@ -81,16 +78,37 @@ else fi PREFIX="[Weaver] " ERROR="${PREFIX}${RED}ERROR${NORMAL}: " -WARNING="${PREFIX}${YELLOW}WARNING${NORMAL}: " +WARN="${PREFIX}${YELLOW}WARNING${NORMAL}: " + +echo "${PREFIX}Running: $0" + +MAGPIE_URL="https://${PAVICS_FQDN_PUBLIC}/magpie" +WEAVER_URL="https://${PAVICS_FQDN_PUBLIC}${TWITCHER_PROTECTED_PATH}/${WEAVER_MANAGER_NAME}" +WEAVER_WPS_PROVIDERS_MAX_TIME=${WEAVER_WPS_PROVIDERS_MAX_TIME:-120} +WEAVER_WPS_PROVIDERS_RETRY_AFTER=${WEAVER_WPS_PROVIDERS_RETRY_AFTER:-5} +WEAVER_WPS_PROVIDERS_RETRY_COUNT=${WEAVER_WPS_PROVIDERS_RETRY_COUNT:-5} +# double echo and no quotes used on purpose to remove empty/extra newlines/spaces +WEAVER_WPS_PROVIDERS=$(echo $(echo "${WEAVER_WPS_PROVIDERS}" | tr ',' ' ')) +REQUEST_TIMEOUT=2 if [ -z "${WEAVER_WPS_PROVIDERS}" ]; then - echo "${WARNING}Nothing specified in WEAVER_WPS_PROVIDERS to register WPS remote providers." + echo "${WARN}Nothing specified in WEAVER_WPS_PROVIDERS to register WPS remote providers." reset_state exit 0 fi -echo " Requested Weaver WPS providers: [${WEAVER_WPS_PROVIDERS}]" -echo " Will retry requests at most for ${WEAVER_WPS_PROVIDERS_MAX_TIME}s" +if [ "${WEAVER_WPS_PROVIDERS_RETRY_COUNT}" -lt 0 ]; then + WEAVER_WPS_PROVIDERS_RETRY_AFTER=0 + WEAVER_WPS_PROVIDERS_RETRY_COUNT=0 +fi +if [ "${WEAVER_WPS_PROVIDERS_RETRY_AFTER}" -lt 0 ]; then + WEAVER_WPS_PROVIDERS_RETRY_AFTER=0 +fi + +echo "${PREFIX}Requested Weaver WPS providers: [${WEAVER_WPS_PROVIDERS}]" +echo "${PREFIX}Will retry requests at most for ${WEAVER_WPS_PROVIDERS_MAX_TIME}s" +echo "${PREFIX}Will retry registration of each provider up to ${WEAVER_WPS_PROVIDERS_RETRY_COUNT} times" +echo "${PREFIX}Will retry registration of each provider with ${WEAVER_WPS_PROVIDERS_RETRY_AFTER}s intervals" if [ -z "$WEAVER_CURL_IMAGE" ]; then WEAVER_CURL_IMAGE="curlimages/curl:7.87.0" @@ -220,9 +238,6 @@ while true; do printf "." done -# parse providers -WEAVER_WPS_PROVIDERS="$(echo "${WEAVER_WPS_PROVIDERS}" | tr ',' ' ')" - # move on to actual registration of WPS providers echo "${PREFIX}Using URL: [${WEAVER_URL}]" start_time="$(date -u +%s)" @@ -268,37 +283,51 @@ for prov in ${WEAVER_WPS_PROVIDERS}; do printf "." done - # unregister in case of multiple up/down to regenerate from scratch, don't care if NotFound returned - echo "${PREFIX}Unregistering any remote WPS provider matching [${prov}]." - curl_cmd --insecure --silent --location \ - -m ${REQUEST_TIMEOUT} \ - -w "${PREFIX}Delete [${prov}] response: %{http_code}" -o /dev/null \ - -b "${cookie}" \ - -X DELETE \ - "${WEAVER_URL}/providers/${prov}" - - # register the new provider and validate - printf "\n%s" "${PREFIX}Registering remote WPS provider [${prov}] on [${prov_url}]... " - resp=$( \ + retry=0 + retry_msg="" + total=${WEAVER_WPS_PROVIDERS_RETRY_COUNT} + while true; do + if [ ${retry} -ne 0 ]; then + retry_msg=" (retry: ${retry}/${total})" + fi + # unregister in case of multiple up/down to regenerate from scratch, don't care if NotFound returned + echo "${PREFIX}Unregistering any remote WPS provider matching [${prov}]${retry_msg}." curl_cmd --insecure --silent --location \ -m ${REQUEST_TIMEOUT} \ - -w "\n%{http_code}" \ + -w "${PREFIX}Delete [${prov}] response: %{http_code}${retry_msg}" -o /dev/null \ -b "${cookie}" \ - -H "Content-Type: application/json" \ - -X POST \ - -d "{\"id\": \"${prov}\", \"url\": \"${prov_url}\"}" \ - "${WEAVER_URL}/providers" \ - ) - ret=$? - code=$(echo "${resp}" | tail -n -1) - body=$(echo "${resp}" | head -n -1) - if [ ${ret} -ne 0 ] || [ "${code}" -ne 201 ]; then - printf "\n%s\n" "${ERROR}Failed registration of remote WPS provider [${prov}] on [${prov_url}]." - printf "Error:\n%s\n" "${body}" - reset_state - exit 23 - fi - echo "OK!" + -X DELETE \ + "${WEAVER_URL}/providers/${prov}" + + # register the new provider and validate + printf "\n%s" "${PREFIX}Registering remote WPS provider [${prov}] on [${prov_url}]${retry_msg}... " + resp=$( \ + curl_cmd --insecure --silent --location \ + -m ${REQUEST_TIMEOUT} \ + -w "\n%{http_code}" \ + -b "${cookie}" \ + -H "Content-Type: application/json" \ + -X POST \ + -d "{\"id\": \"${prov}\", \"url\": \"${prov_url}\"}" \ + "${WEAVER_URL}/providers" \ + ) + ret=$? + code=$(echo "${resp}" | tail -n -1) + body=$(echo "${resp}" | head -n -1) + if [ ${ret} -ne 0 ] || [ "${code}" -ne 201 ]; then + printf "\n%s\n" "${WARN}Failed registration of remote WPS provider [${prov}] on [${prov_url}]${retry_msg}." + printf "Error:\n%s\n" "${body}" + if [ ${retry} -gt ${total} ]; then + echo "${ERROR}Maximum retry attempts ${total} reached for WPS provider [${prov}]. Aborting." + reset_state + exit 23 + fi + echo "${WARN}Will retry after ${WEAVER_WPS_PROVIDERS_RETRY_AFTER}s..." + sleep ${WEAVER_WPS_PROVIDERS_RETRY_AFTER} + retry=$((retry+1)) + fi + echo "OK!" # displayed on same line after first registration printf + done done echo "${PREFIX}All Weaver remote WPS providers registered successfully!" @@ -317,10 +346,10 @@ ${PAVICS_COMPOSE} exec weaver-worker bash "${CELERY_HEALTHCHECK}" | tee "${PAVIC ret_worker=$? out_worker=$(cat "${PAVICS_LOG_DIR}/weaver-worker.log" | tail -n 1 | grep -c "ERROR") if [ ${ret_weaver} -ne 0 ] || [ ${ret_worker} -ne 0 ] || [ "${out_weaver}" -ne 0 ] || [ "${out_worker}" -ne 0 ]; then - echo "Weaver WebApp and/or Worker Celery tasks were not ready. Restarting both..." + echo "${PREFIX}Weaver WebApp and/or Worker Celery tasks were not ready. Restarting both..." ${PAVICS_COMPOSE} restart weaver weaver-worker else - echo "Weaver WebApp and/or Worker Celery tasks are both ready." + echo "${PREFIX}Weaver WebApp and/or Worker Celery tasks are both ready." fi reset_state From 2ed1ba7d3b29ca6504576a66483dec9425e6c18a Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Mon, 17 Apr 2023 21:32:34 -0400 Subject: [PATCH 2/6] patch for loop break --- birdhouse/components/weaver/post-docker-compose-up | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/birdhouse/components/weaver/post-docker-compose-up b/birdhouse/components/weaver/post-docker-compose-up index 04e5146f3..213043f8b 100755 --- a/birdhouse/components/weaver/post-docker-compose-up +++ b/birdhouse/components/weaver/post-docker-compose-up @@ -325,8 +325,10 @@ for prov in ${WEAVER_WPS_PROVIDERS}; do echo "${WARN}Will retry after ${WEAVER_WPS_PROVIDERS_RETRY_AFTER}s..." sleep ${WEAVER_WPS_PROVIDERS_RETRY_AFTER} retry=$((retry+1)) + else + echo "OK!" # displayed on same line after first registration printf + break fi - echo "OK!" # displayed on same line after first registration printf done done echo "${PREFIX}All Weaver remote WPS providers registered successfully!" From 073fffc2749359e6e37ae1cdce35a18ae2545de0 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Mon, 17 Apr 2023 21:35:31 -0400 Subject: [PATCH 3/6] add missing prefix --- birdhouse/components/weaver/post-docker-compose-up | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/birdhouse/components/weaver/post-docker-compose-up b/birdhouse/components/weaver/post-docker-compose-up index 213043f8b..6fc35cab0 100755 --- a/birdhouse/components/weaver/post-docker-compose-up +++ b/birdhouse/components/weaver/post-docker-compose-up @@ -178,7 +178,7 @@ if [ -z "${cookie}" ]; then fi # validate that Magpie token retrieved is adequate -printf "Validate Magpie token..." +printf "%s" "${PREFIX}Validate Magpie token..." resp=$( \ curl_cmd --insecure --silent --location \ -m ${REQUEST_TIMEOUT} \ From 09697ead5e6dd09707467f786e389c7dc526eecd Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Mon, 17 Apr 2023 22:02:48 -0400 Subject: [PATCH 4/6] fix broken pipe error --- birdhouse/components/weaver/post-docker-compose-up | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/birdhouse/components/weaver/post-docker-compose-up b/birdhouse/components/weaver/post-docker-compose-up index 6fc35cab0..daa0da9ce 100755 --- a/birdhouse/components/weaver/post-docker-compose-up +++ b/birdhouse/components/weaver/post-docker-compose-up @@ -115,10 +115,10 @@ if [ -z "$WEAVER_CURL_IMAGE" ]; then fi # POSIX portable RNG if RANDOM does not exist on the current shell -RANDOM_NUMBER=${RANDOM:-$(tr -dc 0-9 < /dev/urandom | head -c 5)} +RANDOM_NUMBER=${RANDOM:-$(tr -dc 0-9 < /dev/urandom | (head -c 5 ; dd of=/dev/null))} # To know when a docker run was started in case it hangs. -DOCKER_RUN_TAG="weaver_post_curl_`date -Isecond | sed 's/:/_/g' | sed 's/+/p/g'`_${RANDOM_NUMBER}" +DOCKER_RUN_TAG="weaver_post_curl_$(date -Isecond | sed 's/:/_/g' | sed 's/+/p/g')_${RANDOM_NUMBER}" curl_cmd() { docker run --rm --name "${DOCKER_RUN_TAG}" "${WEAVER_CURL_IMAGE}" "$@" } From 9709bd2fea5cca351a280b05a169dbd9c1813dfd Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 18 Apr 2023 11:39:16 -0400 Subject: [PATCH 5/6] update random number broken pipe handling strategy --- birdhouse/components/weaver/post-docker-compose-up | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/birdhouse/components/weaver/post-docker-compose-up b/birdhouse/components/weaver/post-docker-compose-up index daa0da9ce..10de53a85 100755 --- a/birdhouse/components/weaver/post-docker-compose-up +++ b/birdhouse/components/weaver/post-docker-compose-up @@ -115,7 +115,7 @@ if [ -z "$WEAVER_CURL_IMAGE" ]; then fi # POSIX portable RNG if RANDOM does not exist on the current shell -RANDOM_NUMBER=${RANDOM:-$(tr -dc 0-9 < /dev/urandom | (head -c 5 ; dd of=/dev/null))} +RANDOM_NUMBER=${RANDOM:-$(tr -dc 0-9 < /dev/urandom 2>/dev/null | head -c 5)} # To know when a docker run was started in case it hangs. DOCKER_RUN_TAG="weaver_post_curl_$(date -Isecond | sed 's/:/_/g' | sed 's/+/p/g')_${RANDOM_NUMBER}" From fef989e58eb4af58bd028160c65959a35fd10c4c Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 19 Apr 2023 11:04:32 -0400 Subject: [PATCH 6/6] remove extra comma causing tuple as release time in canarieapi configs --- .../config/canarie-api/canarie_api_monitoring.py.template | 2 +- .../config/canarie-api/canarie_api_monitoring.py.template | 2 +- .../raven/config/canarie-api/canarie_api_monitoring.py.template | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/birdhouse/config/flyingpigeon/config/canarie-api/canarie_api_monitoring.py.template b/birdhouse/config/flyingpigeon/config/canarie-api/canarie_api_monitoring.py.template index 949136c27..6af7a86c8 100644 --- a/birdhouse/config/flyingpigeon/config/canarie-api/canarie_api_monitoring.py.template +++ b/birdhouse/config/flyingpigeon/config/canarie-api/canarie_api_monitoring.py.template @@ -1,5 +1,5 @@ FLYINGPIGEON_VERSION = "${FLYINGPIGEON_VERSION}" -FLYINGPIGEON_RELEASE = get_release_time_from_repo_tag("github", "bird-house/flyingpigeon", FLYINGPIGEON_VERSION), +FLYINGPIGEON_RELEASE = get_release_time_from_repo_tag("github", "bird-house/flyingpigeon", FLYINGPIGEON_VERSION) SERVICES['flyingpigeon'] = { 'info': { diff --git a/birdhouse/config/hummingbird/config/canarie-api/canarie_api_monitoring.py.template b/birdhouse/config/hummingbird/config/canarie-api/canarie_api_monitoring.py.template index 669f28346..32b19dd48 100644 --- a/birdhouse/config/hummingbird/config/canarie-api/canarie_api_monitoring.py.template +++ b/birdhouse/config/hummingbird/config/canarie-api/canarie_api_monitoring.py.template @@ -1,5 +1,5 @@ HUMMINGBIRD_VERSION = "${HUMMINGBIRD_VERSION}" -HUMMINGBIRD_RELEASE = get_release_time_from_repo_tag("github", "bird-house/hummingbird", HUMMINGBIRD_VERSION), +HUMMINGBIRD_RELEASE = get_release_time_from_repo_tag("github", "bird-house/hummingbird", HUMMINGBIRD_VERSION) SERVICES['hummingbird'] = { 'info': { diff --git a/birdhouse/config/raven/config/canarie-api/canarie_api_monitoring.py.template b/birdhouse/config/raven/config/canarie-api/canarie_api_monitoring.py.template index 1de91359f..8b22c8441 100644 --- a/birdhouse/config/raven/config/canarie-api/canarie_api_monitoring.py.template +++ b/birdhouse/config/raven/config/canarie-api/canarie_api_monitoring.py.template @@ -1,5 +1,5 @@ RAVEN_VERSION = "${RAVEN_VERSION}" -RAVEN_RELEASE = get_release_time_from_repo_tag("github", "Ouranosinc/raven", RAVEN_VERSION), +RAVEN_RELEASE = get_release_time_from_repo_tag("github", "Ouranosinc/raven", RAVEN_VERSION) SERVICES['raven'] = { 'info': {