From 686d51e028c07888ec59ff7c7e5aa705d1ce8cb4 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 20 Dec 2024 12:44:09 -0500 Subject: [PATCH 1/7] update weaver 6.1.0 --- CHANGES.md | 23 +++++++- .../weaver/config/magpie/config.yml.template | 7 +++ .../conf.extra-service.d/weaver.conf.template | 56 +++++++++++++------ birdhouse/components/weaver/default.env | 26 ++++++++- .../weaver/docker-compose-extra.yml | 4 ++ 5 files changed, 96 insertions(+), 20 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 7be494e32..3924de9d2 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -15,7 +15,28 @@ [Unreleased](https://github.com/bird-house/birdhouse-deploy/tree/master) (latest) ------------------------------------------------------------------------------------------------------------------ -[//]: # (list changes here, using '-' for each new entry, remove this when items are added) +## Changes + +- Weaver: update `weaver` component default version to [6.1.0](https://github.com/crim-ca/weaver/tree/6.1.0). + + ### Relevant changes + * Add support of *OGC API - Processes - Part 3: Workflows and Chaining* with *Nested Process* ad-hoc workflow. + * Add support of *OGC API - Processes - Part 3: Workflows and Chaining* with *Remote Collection* (STAC and OGC). + * Add support of *OGC API - Processes - Part 4: Job Management* endpoints for job "pending" creation and execution. + * Add support of *OGC API - Processes - Part 4: Job Management* endpoints for job provenance as *W3C PROV* metadata. + * Multiple alignment and fixes related to latest *OGC API - Processes - Part 1: Core* definitions regarding handling + of input parameters and headers when submitting jobs to obtain alternate result representations and behavior. + * Add HTML responses by default via web browsers or as requested by `Accept` headers or `f` query parameter. + * Add improved CWL schema validation with `Weaver`-specific definitions where applicable + (see https://github.com/crim-ca/weaver/tree/master/weaver/schemas/cwl). + +- Weaver: modifications to `proxy` configurations for `weaver` + + * Add `WEAVER_ALT_PREFIX` optional variable that auto-configures `WEAVER_ALT_PREFIX_PROXY_LOCATION`, + which allows setting an alternate endpoint to redirect requests to `weaver`. + It uses `/ogcapi` by default which is a very common expectation from servers supporting OGC standards. + * Use the `TWITCHER_VERIFY_PATH` approach to accelerate access of `weaver` resources authorization. + * Modify proxy pass definitions and URL prefixes to resolve correctly with HTML resources. [2.7.0](https://github.com/bird-house/birdhouse-deploy/tree/2.7.0) (2024-12-19) ------------------------------------------------------------------------------------------------------------------ diff --git a/birdhouse/components/weaver/config/magpie/config.yml.template b/birdhouse/components/weaver/config/magpie/config.yml.template index cacab5393..5ce8dcaa7 100644 --- a/birdhouse/components/weaver/config/magpie/config.yml.template +++ b/birdhouse/components/weaver/config/magpie/config.yml.template @@ -109,6 +109,13 @@ permissions: group: anonymous action: create + # HTML rendering files + - service: ${WEAVER_MANAGER_NAME} + resource: /static + permission: read + group: anonymous + action: create + # Process deployment (write) and listing (read) # use 'read-match' to allow only listing, and not describe underlying processes (require 'read' on them individually) - service: ${WEAVER_MANAGER_NAME} diff --git a/birdhouse/components/weaver/config/proxy/conf.extra-service.d/weaver.conf.template b/birdhouse/components/weaver/config/proxy/conf.extra-service.d/weaver.conf.template index 9a3d5b91e..1be17aff5 100644 --- a/birdhouse/components/weaver/config/proxy/conf.extra-service.d/weaver.conf.template +++ b/birdhouse/components/weaver/config/proxy/conf.extra-service.d/weaver.conf.template @@ -1,25 +1,47 @@ + location = /weaver-auth { + internal; + # note: using 'TWITCHER_VERIFY_PATH' path to avoid performing the request via proxy 'TWITCHER_PROTECTED_PATH' + # This ensures that access is validated for the user, but does not trigger its access/download twice. + # It is also more efficient, since less contents are transferred/buffered. + proxy_pass ${BIRDHOUSE_PROXY_SCHEME}://${BIRDHOUSE_FQDN_PUBLIC}${TWITCHER_VERIFY_PATH}/$request_uri; + proxy_pass_request_body off; + proxy_set_header Host $host; + proxy_set_header Content-Length ""; + proxy_set_header X-Original-URI $request_uri; + proxy_set_header X-Forwarded-Proto $real_scheme; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Host $host:$server_port; + } + + location = /${WEAVER_MANAGER_NAME} { return 302 /${WEAVER_MANAGER_NAME}/; } + location ~ ^/${WEAVER_MANAGER_NAME}(.*)$ { + auth_request /weaver-auth; + auth_request_set $auth_status $upstream_status; + + # NOTE: + # Inject the 'WEAVER_MANAGER_NAME' prefix here to align with 'SCRIPT_NAME' in the docker-compose config. + # This is needed to help UI elements resolve the full URI path with proxy service prefixes since the + # generated locations returned that must be interpreted/retrieved by the client/browser would otherwise + # not be aware of the proxy redirection path prefix, leading to unresolved resources. + proxy_pass http://weaver:4001/${WEAVER_MANAGER_NAME}$1; + proxy_set_header Host $http_host; + proxy_set_header X-Original-URI $request_uri; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $real_scheme; + proxy_set_header X-Forwarded-Host $http_host:$server_port; + proxy_buffering off; + } + # NOTE: # Redirect to internal network of twitcher with Weaver root endpoint and alias allows to set # the same 'magpie' permissions on the 'weaver' service defined by "WEAVER_MANAGER_NAME". # This allows verification of the same service user/group permissions references regardless # whether the *shortcut* Weaver endpoint, the alias or the explicit 'twitcher' proxy route is used. - # redirect EMS/ADES to actual secured Weaver path - #location /${WEAVER_CONFIG} { - # return 302 ${BIRDHOUSE_PROXY_SCHEME}://${BIRDHOUSE_FQDN_PUBLIC}${TWITCHER_PROTECTED_PATH}/${WEAVER_MANAGER_NAME}; - #} - - location /${WEAVER_MANAGER_NAME} { - proxy_pass ${BIRDHOUSE_PROXY_SCHEME}://${BIRDHOUSE_FQDN_PUBLIC}${TWITCHER_PROTECTED_PATH}/${WEAVER_MANAGER_NAME}; - proxy_set_header Host $host; - proxy_buffering off; - include /etc/nginx/conf.d/cors.include; + location = ${TWITCHER_PROTECTED_PATH}/${WEAVER_MANAGER_NAME} { return 302 /${WEAVER_MANAGER_NAME}/; } + location ~ ^${TWITCHER_PROTECTED_PATH}/${WEAVER_MANAGER_NAME}(.*)\$ { + return 302 /${WEAVER_MANAGER_NAME}$1; } - # NOTE: - # this is needed only if not using the location already provided by the core configuration - # see 'birdhouse/components/proxy/conf.d/all-services.include.template' - # location where process job outputs will be accessible - #location ^~ ${WEAVER_WPS_OUTPUTS_PATH}/ { - # alias ${WEAVER_WPS_OUTPUTS_DIR}/; - #} + # optional alternate endpoint to access weaver (see 'components/weaver/default.env') + ${WEAVER_ALT_PREFIX_PROXY_LOCATION} diff --git a/birdhouse/components/weaver/default.env b/birdhouse/components/weaver/default.env index 97ab1623e..7f048c200 100644 --- a/birdhouse/components/weaver/default.env +++ b/birdhouse/components/weaver/default.env @@ -29,6 +29,7 @@ EXTRA_VARS=' $WEAVER_WPS_PROVIDERS_MAX_TIME $WEAVER_WPS_PROVIDERS_RETRY_COUNT $WEAVER_WPS_PROVIDERS_RETRY_AFTER + $WEAVER_ALT_PREFIX_PROXY_LOCATION ' # extend the original 'VARS' from 'birdhouse/birdhouse-compose.sh' to employ them for template substitution # adding them to 'VARS', they will also be validated in case of override of 'default.env' using 'env.local' @@ -36,6 +37,7 @@ VARS="$VARS $EXTRA_VARS" OPTIONAL_VARS=" $OPTIONAL_VARS + \$WEAVER_ALT_PREFIX \$WEAVER_DOCKER \$WEAVER_VERSION \$WEAVER_WORKER_IMAGE @@ -53,7 +55,7 @@ OPTIONAL_VARS=" export WEAVER_CONFIG=HYBRID # default release version that will be used to fetch docker images (API mananger & celery workers services) -export WEAVER_VERSION=5.6.1 +export WEAVER_VERSION=6.1.0 export WEAVER_DOCKER=pavics/weaver export WEAVER_IMAGE='${WEAVER_DOCKER}:${WEAVER_VERSION}' export WEAVER_MANAGER_IMAGE='${WEAVER_IMAGE}-manager' @@ -63,7 +65,8 @@ export WEAVER_IMAGE_URI='registry.hub.docker.com/${WEAVER_IMAGE}' # default release of the MongoDB version employed by Weaver # NOTE: # MongoDB>=5.0 is REQUIRED for Weaver>=4.5.0 -export WEAVER_MONGODB_VERSION=5.0.4 +# MongoDB==7.x works, but default remains 5.0 to avoid DB migration issues (update manually as desired) +export WEAVER_MONGODB_VERSION=5.0 # URL is used by both Weaver API and Celery Worker # it should contain the docker service name as host to map using shared link between images # if credentials are desired, they can be defined with the override of the URL variable @@ -96,6 +99,17 @@ export WEAVER_WPS_OUTPUTS_PATH="/wpsoutputs/weaver" export WEAVER_WPS_OUTPUTS_DIR='${BIRDHOUSE_WPS_OUTPUTS_DIR}/weaver' export WEAVER_WPS_WORKDIR="/tmp/wps_workdir/weaver" +# Optional alternate endpoint that will redirect to Weaver. +# If explicitly set to empty value, it will not be configured in the proxy. +export WEAVER_ALT_PREFIX=/ogcapi +export WEAVER_ALT_PREFIX_PROXY_LOCATION=' + $([ -z "${WEAVER_ALT_PREFIX}" ] && echo "" || echo " + location ~ ^${WEAVER_ALT_PREFIX}(.*)\$ { + return 302 /${WEAVER_MANAGER_NAME}\$1; + } +") +' + # logging export WEAVER_MANAGER_LOG_LEVEL=INFO export WEAVER_WORKER_LOG_LEVEL=INFO @@ -116,6 +130,7 @@ export WEAVER_UNREGISTER_DROPPED_PROVIDERS="False" export DELAYED_EVAL=" $DELAYED_EVAL + WEAVER_ALT_PREFIX_PROXY_LOCATION WEAVER_WPS_OUTPUTS_DIR WEAVER_MONGODB_DATA_DIR WEAVER_MONGODB_URL @@ -124,3 +139,10 @@ export DELAYED_EVAL=" WEAVER_MANAGER_IMAGE WEAVER_WORKER_IMAGE " + +COMPONENT_DEPENDENCIES=" + $COMPONENT_DEPENDENCIES + ./components/wps_outputs-volume + ./components/magpie + ./components/twitcher +" diff --git a/birdhouse/components/weaver/docker-compose-extra.yml b/birdhouse/components/weaver/docker-compose-extra.yml index 4fa6d5ee8..0b2871519 100644 --- a/birdhouse/components/weaver/docker-compose-extra.yml +++ b/birdhouse/components/weaver/docker-compose-extra.yml @@ -19,6 +19,10 @@ services: image: ${WEAVER_MANAGER_IMAGE} environment: HOSTNAME: ${BIRDHOUSE_FQDN} + # 'HTTP_HOST' and 'SCRIPT_NAMe' are used to guide pyramid in the resolution of resources, such as + # when invoking the 'static_url' endpoint, so it can be made aware of reverse-proxy context + HTTP_HOST: ${BIRDHOUSE_FQDN} + SCRIPT_NAME: /${WEAVER_MANAGER_NAME} FORWARDED_ALLOW_IPS: "*" #env_file: # - ./components/mongodb/credentials.env From be78fa57dfe25f2690d113707c463e135517d53c Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 20 Dec 2024 17:17:26 -0500 Subject: [PATCH 2/7] fix typo --- birdhouse/components/weaver/docker-compose-extra.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/birdhouse/components/weaver/docker-compose-extra.yml b/birdhouse/components/weaver/docker-compose-extra.yml index 0b2871519..58f5eed3f 100644 --- a/birdhouse/components/weaver/docker-compose-extra.yml +++ b/birdhouse/components/weaver/docker-compose-extra.yml @@ -19,7 +19,7 @@ services: image: ${WEAVER_MANAGER_IMAGE} environment: HOSTNAME: ${BIRDHOUSE_FQDN} - # 'HTTP_HOST' and 'SCRIPT_NAMe' are used to guide pyramid in the resolution of resources, such as + # 'HTTP_HOST' and 'SCRIPT_NAME' are used to guide pyramid in the resolution of resources, such as # when invoking the 'static_url' endpoint, so it can be made aware of reverse-proxy context HTTP_HOST: ${BIRDHOUSE_FQDN} SCRIPT_NAME: /${WEAVER_MANAGER_NAME} From aba090e8be02a92882c593f6ca9241061533476c Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 20 Dec 2024 17:31:55 -0500 Subject: [PATCH 3/7] fix missing query params forwarding for weaver + partial patch twitcher-proxy-weaver redirect to weaver alias --- .../conf.extra-service.d/weaver.conf.template | 16 ++++++++++------ birdhouse/components/weaver/default.env | 2 +- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/birdhouse/components/weaver/config/proxy/conf.extra-service.d/weaver.conf.template b/birdhouse/components/weaver/config/proxy/conf.extra-service.d/weaver.conf.template index 1be17aff5..6d6d2d774 100644 --- a/birdhouse/components/weaver/config/proxy/conf.extra-service.d/weaver.conf.template +++ b/birdhouse/components/weaver/config/proxy/conf.extra-service.d/weaver.conf.template @@ -14,8 +14,10 @@ proxy_set_header X-Forwarded-Host $host:$server_port; } - location = /${WEAVER_MANAGER_NAME} { return 302 /${WEAVER_MANAGER_NAME}/; } - location ~ ^/${WEAVER_MANAGER_NAME}(.*)$ { + location = /${WEAVER_MANAGER_NAME} { + return 302 /${WEAVER_MANAGER_NAME}/$is_args$args; + } + location ~ ^/${WEAVER_MANAGER_NAME}/(.*)$ { auth_request /weaver-auth; auth_request_set $auth_status $upstream_status; @@ -24,7 +26,7 @@ # This is needed to help UI elements resolve the full URI path with proxy service prefixes since the # generated locations returned that must be interpreted/retrieved by the client/browser would otherwise # not be aware of the proxy redirection path prefix, leading to unresolved resources. - proxy_pass http://weaver:4001/${WEAVER_MANAGER_NAME}$1; + proxy_pass http://weaver:4001/${WEAVER_MANAGER_NAME}$1$is_args$args; proxy_set_header Host $http_host; proxy_set_header X-Original-URI $request_uri; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; @@ -38,9 +40,11 @@ # the same 'magpie' permissions on the 'weaver' service defined by "WEAVER_MANAGER_NAME". # This allows verification of the same service user/group permissions references regardless # whether the *shortcut* Weaver endpoint, the alias or the explicit 'twitcher' proxy route is used. - location = ${TWITCHER_PROTECTED_PATH}/${WEAVER_MANAGER_NAME} { return 302 /${WEAVER_MANAGER_NAME}/; } - location ~ ^${TWITCHER_PROTECTED_PATH}/${WEAVER_MANAGER_NAME}(.*)\$ { - return 302 /${WEAVER_MANAGER_NAME}$1; + location = ${TWITCHER_PROTECTED_PATH}/${WEAVER_MANAGER_NAME} { + return 301 /${WEAVER_MANAGER_NAME}/$is_args$args; + } + location ${TWITCHER_PROTECTED_PATH}/${WEAVER_MANAGER_NAME}/ { + return 301 /${WEAVER_MANAGER_NAME}/$is_args$args; } # optional alternate endpoint to access weaver (see 'components/weaver/default.env') diff --git a/birdhouse/components/weaver/default.env b/birdhouse/components/weaver/default.env index 7f048c200..84dcb3544 100644 --- a/birdhouse/components/weaver/default.env +++ b/birdhouse/components/weaver/default.env @@ -105,7 +105,7 @@ export WEAVER_ALT_PREFIX=/ogcapi export WEAVER_ALT_PREFIX_PROXY_LOCATION=' $([ -z "${WEAVER_ALT_PREFIX}" ] && echo "" || echo " location ~ ^${WEAVER_ALT_PREFIX}(.*)\$ { - return 302 /${WEAVER_MANAGER_NAME}\$1; + return 302 /${WEAVER_MANAGER_NAME}\$1\$is_args\$args; } ") ' From f16df97196f918ba34bdcb92369c343b78b7837d Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 20 Dec 2024 17:32:58 -0500 Subject: [PATCH 4/7] replace 302 by 301 for weaver redirects --- .../config/proxy/conf.extra-service.d/weaver.conf.template | 2 +- birdhouse/components/weaver/default.env | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/birdhouse/components/weaver/config/proxy/conf.extra-service.d/weaver.conf.template b/birdhouse/components/weaver/config/proxy/conf.extra-service.d/weaver.conf.template index 6d6d2d774..6de09bee4 100644 --- a/birdhouse/components/weaver/config/proxy/conf.extra-service.d/weaver.conf.template +++ b/birdhouse/components/weaver/config/proxy/conf.extra-service.d/weaver.conf.template @@ -15,7 +15,7 @@ } location = /${WEAVER_MANAGER_NAME} { - return 302 /${WEAVER_MANAGER_NAME}/$is_args$args; + return 301 /${WEAVER_MANAGER_NAME}/$is_args$args; } location ~ ^/${WEAVER_MANAGER_NAME}/(.*)$ { auth_request /weaver-auth; diff --git a/birdhouse/components/weaver/default.env b/birdhouse/components/weaver/default.env index 84dcb3544..c66932797 100644 --- a/birdhouse/components/weaver/default.env +++ b/birdhouse/components/weaver/default.env @@ -105,7 +105,7 @@ export WEAVER_ALT_PREFIX=/ogcapi export WEAVER_ALT_PREFIX_PROXY_LOCATION=' $([ -z "${WEAVER_ALT_PREFIX}" ] && echo "" || echo " location ~ ^${WEAVER_ALT_PREFIX}(.*)\$ { - return 302 /${WEAVER_MANAGER_NAME}\$1\$is_args\$args; + return 301 /${WEAVER_MANAGER_NAME}\$1\$is_args\$args; } ") ' From e551143f45f0a77e001836a3ec928301e5a63ebe Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 20 Dec 2024 17:44:17 -0500 Subject: [PATCH 5/7] add info about WEAVER_ALT_PREFIX to docs --- birdhouse/components/README.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/birdhouse/components/README.rst b/birdhouse/components/README.rst index 0adb96657..d0c64a5b4 100644 --- a/birdhouse/components/README.rst +++ b/birdhouse/components/README.rst @@ -448,6 +448,16 @@ Customizing the Component Further ``docker-compose-extra.yml`` could be needed to define any other ``volumes`` entries where these component would need to be mounted to. + - Optionally, set ``WEAVER_ALT_PREFIX`` with any desired prefix location to use as alternate alias + for the ``/weaver/`` endpoint. The ``/weaver/`` endpoint will remain available. + The ``WEAVER_ALT_PREFIX`` alias defines an *additional* equivalent location to access the service. + By default ``/ogcapi`` is employed as a common value for this suite of OGC standards. + + Note that custom prefix values, if specified, should start with a leading ``/``, and leave out any trailing ``/``. + The prefix can also use multiple levels as desired (e.g.: ``/my/custom/path``). + + If the original ``/weaver/`` endpoint is deemed sufficient, and you would rather omit this additional alias + entirely, the ``WEAVER_ALT_PREFIX`` variable should be explicitly set to an empty value. .. _finch: https://github.com/bird-house/finch From af450b56e2609230e1256673bf48419af7ee7d2d Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 20 Dec 2024 17:59:50 -0500 Subject: [PATCH 6/7] revert slashes breaking for weaver script-name --- .../config/proxy/conf.extra-service.d/weaver.conf.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/birdhouse/components/weaver/config/proxy/conf.extra-service.d/weaver.conf.template b/birdhouse/components/weaver/config/proxy/conf.extra-service.d/weaver.conf.template index 6de09bee4..cd47bfff4 100644 --- a/birdhouse/components/weaver/config/proxy/conf.extra-service.d/weaver.conf.template +++ b/birdhouse/components/weaver/config/proxy/conf.extra-service.d/weaver.conf.template @@ -26,7 +26,7 @@ # This is needed to help UI elements resolve the full URI path with proxy service prefixes since the # generated locations returned that must be interpreted/retrieved by the client/browser would otherwise # not be aware of the proxy redirection path prefix, leading to unresolved resources. - proxy_pass http://weaver:4001/${WEAVER_MANAGER_NAME}$1$is_args$args; + proxy_pass http://weaver:4001/${WEAVER_MANAGER_NAME}/$1$is_args$args; proxy_set_header Host $http_host; proxy_set_header X-Original-URI $request_uri; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; From 5a983130a0456ad2b0bd2ce53389bd3db7b9205e Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Sat, 21 Dec 2024 00:26:37 -0500 Subject: [PATCH 7/7] bump weaver to 6.1.1 - bugfix for PROV endpoints --- CHANGES.md | 2 +- birdhouse/components/weaver/default.env | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index c0a507540..d332c8edf 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -17,7 +17,7 @@ ## Changes -- Weaver: update `weaver` component default version to [6.1.0](https://github.com/crim-ca/weaver/tree/6.1.0). +- Weaver: update `weaver` component default version to [6.1.1](https://github.com/crim-ca/weaver/tree/6.1.1). ### Relevant changes * Add support of *OGC API - Processes - Part 3: Workflows and Chaining* with *Nested Process* ad-hoc workflow. diff --git a/birdhouse/components/weaver/default.env b/birdhouse/components/weaver/default.env index c66932797..1b96d4cef 100644 --- a/birdhouse/components/weaver/default.env +++ b/birdhouse/components/weaver/default.env @@ -55,7 +55,7 @@ OPTIONAL_VARS=" export WEAVER_CONFIG=HYBRID # default release version that will be used to fetch docker images (API mananger & celery workers services) -export WEAVER_VERSION=6.1.0 +export WEAVER_VERSION=6.1.1 export WEAVER_DOCKER=pavics/weaver export WEAVER_IMAGE='${WEAVER_DOCKER}:${WEAVER_VERSION}' export WEAVER_MANAGER_IMAGE='${WEAVER_IMAGE}-manager'