diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 16fd47ca3..2bac4e005 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 2.5.3 +current_version = 2.6.0 commit = True tag = False tag_name = {new_version} @@ -30,11 +30,11 @@ search = {current_version} replace = {new_version} [bumpversion:file:RELEASE.txt] -search = {current_version} 2024-09-11T22:57:09Z +search = {current_version} 2024-11-19T13:53:14Z replace = {new_version} {utcnow:%Y-%m-%dT%H:%M:%SZ} [bumpversion:part:releaseTime] -values = 2024-09-11T22:57:09Z +values = 2024-11-19T13:53:14Z [bumpversion:file(version):birdhouse/components/canarie-api/docker_configuration.py.template] search = 'version': '{current_version}' diff --git a/CHANGES.md b/CHANGES.md index 6e4b5261a..4de3b4958 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -46,8 +46,80 @@ * ``/thanos-query``: a prometheus-like query interface to inspect the data stored by thanos * ``/thanos-minio``: a minio web console to inspect the data stored by minio. -- Update the prometheus version from `v2.19.0` to the current latest `v2.52.0`. This is required to support the interaction between - prometheus and thanos. +[2.6.0](https://github.com/bird-house/birdhouse-deploy/tree/2.6.0) (2024-11-19) +------------------------------------------------------------------------------------------------------------------ + +## Changes + +- Add the `prometheus-log-parser` optional component + + This component parses log files from other components and converts their logs to prometheus + metrics that are then ingested by the monitoring Prometheus instance (the one created by the + `components/monitoring` component). + + For more information on how this component reads log files and converts them to prometheus components see + the [log-parser](https://github.com/DACCS-Climate/log-parser/) documentation. + + To configure this component: + + * set the `PROMETHEUS_LOG_PARSER_POLL_DELAY` variable to a number of seconds to set how often the log parser + checks if new lines have been added to log files (default: 1) + * set the `PROMETHEUS_LOG_PARSER_TAIL` variable to `"true"` to only parse new lines in log files. If unset, + this will parse all existing lines in the log file as well (default: `"true"`) + + To view all metrics exported by the log parser: + + * Navigate to the `https:///prometheus/graph` search page + * Put `{job="log_parser"}` in the search bar and click the "Execute" button + +- Update the prometheus version to the current latest `v2.53.3`. This is required to support + loading multiple prometheus scrape configuration files with the `scrape_config_files` + configuration option. + +[2.5.5](https://github.com/bird-house/birdhouse-deploy/tree/2.5.5) (2024-11-14) +------------------------------------------------------------------------------------------------------------------ + +## Changes +- Jupyter env: new full build with latest of everything + + See [Ouranosinc/PAVICS-e2e-workflow-tests#137](https://github.com/Ouranosinc/PAVICS-e2e-workflow-tests/pull/137) + for more info. + + +[2.5.4](https://github.com/bird-house/birdhouse-deploy/tree/2.5.4) (2024-10-31) +------------------------------------------------------------------------------------------------------------------ + +## Changes + +- THREDDS: add more options to configure `catalog.xml` + - The default THREDDS configuration creates two default datasets, the *Service Data* dataset and the + *Main* dataset. The *Service Data* dataset is used internally and hosts WPS outputs. The *Main* dataset is the + place where users can access data served by THREDDS. Both of these are configured to serve files with the following + extensions: .nc .ncml .txt .md .rst .csv + + - In order to allow the THREDDS server to serve files with additional extensions, this introduces two new + variables: + - `THREDDS_SERVICE_DATA_EXTRA_FILE_FILTERS`: this allows users to specify additional [filter + elements](https://docs.unidata.ucar.edu/tds/current/userguide/tds_dataset_scan_ref.html#including-only-desired-files) to the *Service Data* dataset. This is especially useful if a WPS + outputs files with an extension other than the default (eg: .h5) to the `wps_outputs/` directory. + - `THREDDS_DATASET_DATASETSCAN_BODY`: this allows users to specify the whole body of the *Main* dataset's + [``](https://docs.unidata.ucar.edu/tds/current/userguide/tds_dataset_scan_ref.html) element. + This allows users to fully customize how this dataset serves files. + + - We limit the configuration options for the *Service Data* dataset more than the *Main* dataset because the *Service + Data* dataset requires a basic configuration in order to properly serve WPS outputs. Making significant changes + to this configuration could have unexpected negative impacts on WPS usage. + + - In order to allow customization of the Magpie THREDDS configuration in case new file extensions are added we introduce + two additional variables: + - `THREDDS_MAGPIE_EXTRA_METADATA_PREFIXES`: additional file prefixes (ie. regular expression match patterns) that Magpie + should treat as metadata (accessible with "browse" permissions). + - `THREDDS_MAGPIE_EXTRA_DATA_PREFIXES`: additional file prefixes (ie. regular expression match patterns) that Magpie + should treat as data (accessible with "read" permissions). + + - The defaults for these new variables are fully backwards compatible. Without changing these variables, the THREDDS + server should behave exactly the same as before except that .md files and .rst files are now considered metadata + files according to the Magpie configuration, meaning that they can now be viewed with "browse" permissions. [2.5.3](https://github.com/bird-house/birdhouse-deploy/tree/2.5.3) (2024-09-11) ------------------------------------------------------------------------------------------------------------------ diff --git a/Makefile b/Makefile index 190c95210..178116bd2 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # Generic variables override SHELL := bash override APP_NAME := birdhouse-deploy -override APP_VERSION := 2.5.3 +override APP_VERSION := 2.6.0 # utility to remove comments after value of an option variable override clean_opt = $(shell echo "$(1)" | $(_SED) -r -e "s/[ '$'\t'']+$$//g") diff --git a/README.rst b/README.rst index e0924ff64..3d0a9963c 100644 --- a/README.rst +++ b/README.rst @@ -18,13 +18,13 @@ for a full-fledged production platform. * - citation - | |citation| -.. |commits-since| image:: https://img.shields.io/github/commits-since/bird-house/birdhouse-deploy/2.5.3.svg +.. |commits-since| image:: https://img.shields.io/github/commits-since/bird-house/birdhouse-deploy/2.6.0.svg :alt: Commits since latest release - :target: https://github.com/bird-house/birdhouse-deploy/compare/2.5.3...master + :target: https://github.com/bird-house/birdhouse-deploy/compare/2.6.0...master -.. |latest-version| image:: https://img.shields.io/badge/tag-2.5.3-blue.svg?style=flat +.. |latest-version| image:: https://img.shields.io/badge/tag-2.6.0-blue.svg?style=flat :alt: Latest Tag - :target: https://github.com/bird-house/birdhouse-deploy/tree/2.5.3 + :target: https://github.com/bird-house/birdhouse-deploy/tree/2.6.0 .. |readthedocs| image:: https://readthedocs.org/projects/birdhouse-deploy/badge/?version=latest :alt: ReadTheDocs Build Status (latest version) diff --git a/RELEASE.txt b/RELEASE.txt index 17a7e48f6..9200ea1cd 100644 --- a/RELEASE.txt +++ b/RELEASE.txt @@ -1 +1 @@ -2.5.3 2024-09-11T22:57:09Z +2.6.0 2024-11-19T13:53:14Z diff --git a/birdhouse/components/README.rst b/birdhouse/components/README.rst index b816f2c74..cf29abda8 100644 --- a/birdhouse/components/README.rst +++ b/birdhouse/components/README.rst @@ -276,6 +276,7 @@ Here is a sample setup to test autodeploy: # then remove the running scheduler container birdhouse compose up -d --remove-orphans +.. _Monitoring: Monitoring ========== diff --git a/birdhouse/components/canarie-api/docker_configuration.py.template b/birdhouse/components/canarie-api/docker_configuration.py.template index b6eef6344..fc82b7d34 100644 --- a/birdhouse/components/canarie-api/docker_configuration.py.template +++ b/birdhouse/components/canarie-api/docker_configuration.py.template @@ -108,8 +108,8 @@ SERVICES = { # NOTE: # Below version and release time auto-managed by 'make VERSION=x.y.z bump'. # Do NOT modify it manually. See 'Tagging policy' in 'birdhouse/README.rst'. - 'version': '2.5.3', - 'releaseTime': '2024-09-11T22:57:09Z', + 'version': '2.6.0', + 'releaseTime': '2024-11-19T13:53:14Z', 'institution': '${BIRDHOUSE_INSTITUTION}', 'researchSubject': '${BIRDHOUSE_SUBJECT}', 'supportEmail': '${BIRDHOUSE_SUPPORT_EMAIL}', @@ -141,8 +141,8 @@ PLATFORMS = { # NOTE: # Below version and release time auto-managed by 'make VERSION=x.y.z bump'. # Do NOT modify it manually. See 'Tagging policy' in 'birdhouse/README.rst'. - 'version': '2.5.3', - 'releaseTime': '2024-09-11T22:57:09Z', + 'version': '2.6.0', + 'releaseTime': '2024-11-19T13:53:14Z', 'institution': '${BIRDHOUSE_INSTITUTION}', 'researchSubject': '${BIRDHOUSE_SUBJECT}', 'supportEmail': '${BIRDHOUSE_SUPPORT_EMAIL}', diff --git a/birdhouse/components/jupyterhub/default.env b/birdhouse/components/jupyterhub/default.env index b52b7ff2e..91f40f08e 100644 --- a/birdhouse/components/jupyterhub/default.env +++ b/birdhouse/components/jupyterhub/default.env @@ -10,7 +10,7 @@ export JUPYTERHUB_IMAGE='${JUPYTERHUB_DOCKER}:${JUPYTERHUB_VERSION}' export JUPYTERHUB_IMAGE_URI='registry.hub.docker.com/${JUPYTERHUB_IMAGE}' # Jupyter single-user server images, can be overriden in env.local to have a space separated list of multiple images -export JUPYTERHUB_DOCKER_NOTEBOOK_IMAGES="pavics/workflow-tests:py311-240506-update240508" +export JUPYTERHUB_DOCKER_NOTEBOOK_IMAGES="pavics/workflow-tests:py311-241111" # Name of the image displayed on the JupyterHub image selection page # Can be overriden in env.local to have a space separated list of multiple images, the name order must correspond diff --git a/birdhouse/components/monitoring/default.env b/birdhouse/components/monitoring/default.env index a42c1a1a8..76188073d 100644 --- a/birdhouse/components/monitoring/default.env +++ b/birdhouse/components/monitoring/default.env @@ -8,7 +8,7 @@ export GRAFANA_VERSION="7.0.3" export GRAFANA_DOCKER=grafana/grafana export GRAFANA_IMAGE='${GRAFANA_DOCKER}:${GRAFANA_VERSION}' -export PROMETHEUS_VERSION="v2.52.0" +export PROMETHEUS_VERSION="v2.53.3" export PROMETHEUS_DOCKER=prom/prometheus export PROMETHEUS_IMAGE='${PROMETHEUS_DOCKER}:${PROMETHEUS_VERSION}' diff --git a/birdhouse/components/monitoring/prometheus.yml.template b/birdhouse/components/monitoring/prometheus.yml.template index f5d7cd09e..737331edd 100644 --- a/birdhouse/components/monitoring/prometheus.yml.template +++ b/birdhouse/components/monitoring/prometheus.yml.template @@ -20,6 +20,9 @@ scrape_configs: - targets: - ${BIRDHOUSE_FQDN}:9100 +scrape_config_files: + - "/etc/prometheus/scrape_config.d/*.yml" + rule_files: - "/etc/prometheus/*.rules" diff --git a/birdhouse/components/proxy/nginx.conf.template b/birdhouse/components/proxy/nginx.conf.template index 46479d035..a7acb6e8a 100755 --- a/birdhouse/components/proxy/nginx.conf.template +++ b/birdhouse/components/proxy/nginx.conf.template @@ -15,6 +15,8 @@ http { include /etc/nginx/mime.types; default_type application/octet-stream; + # NOTE: if this log_format ever changes, make sure to update the relevant code in the + # prometheus-log-parser component as well to match the change. log_format main '$remote_addr - $remote_user [$time_iso8601] "$request" ' '$status $body_bytes_sent "$http_referer" ' '"$http_user_agent" "$http_x_forwarded_for"'; diff --git a/birdhouse/components/thredds/catalog.xml.template b/birdhouse/components/thredds/catalog.xml.template index 231b266c8..f7979d9b3 100644 --- a/birdhouse/components/thredds/catalog.xml.template +++ b/birdhouse/components/thredds/catalog.xml.template @@ -4,14 +4,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" > - - - - - - - - + + + + + + + + @@ -27,24 +27,14 @@ + ${THREDDS_SERVICE_DATA_EXTRA_FILE_FILTERS} - - all - - - - - - - - - - + ${THREDDS_DATASET_DATASETSCAN_BODY} diff --git a/birdhouse/components/thredds/config/magpie/providers.cfg.template b/birdhouse/components/thredds/config/magpie/providers.cfg.template index 3ca5f9d84..499944ccb 100644 --- a/birdhouse/components/thredds/config/magpie/providers.cfg.template +++ b/birdhouse/components/thredds/config/magpie/providers.cfg.template @@ -15,21 +15,24 @@ providers: - ".+\\.ncml" # match longest extension first to avoid tuncating it by match of sorter '.nc' - ".+\\.nc" metadata_type: - prefixes: - - null # note: special YAML value evaluated as `no-prefix`, use quotes if literal value is needed - - "\\w+\\.gif" # threddsIcon, folder icon, etc. - - "\\w+\\.ico" # favicon - - "\\w+\\.txt" # licence - - "\\w+\\.css" # tds.css - - "catalog\\.\\w+" # note: special case for `THREDDS` top-level directory (root) accessed for `BROWSE` - - catalog - - ncml - - uddc - - iso + prefixes: [ + null, # note: special YAML value evaluated as `no-prefix`, use quotes if literal value is needed + "\\w+\\.gif", # threddsIcon, folder icon, etc. + "\\w+\\.ico", # favicon + "\\w+\\.css", # tds.css + "catalog\\.\\w+", # note: special case for `THREDDS` top-level directory (root) accessed for `BROWSE` + catalog, + ncml, + uddc, + iso, + ${THREDDS_MAGPIE_EXTRA_METADATA_PREFIXES} + ] data_type: - prefixes: - - fileServer - - dodsC - - wcs - - wms - - ncss + prefixes: [ + fileServer, + dodsC, + wcs, + wms, + ncss, + ${THREDDS_MAGPIE_EXTRA_DATA_PREFIXES} + ] diff --git a/birdhouse/components/thredds/default.env b/birdhouse/components/thredds/default.env index e39a11007..9af3f0204 100644 --- a/birdhouse/components/thredds/default.env +++ b/birdhouse/components/thredds/default.env @@ -17,7 +17,25 @@ export THREDDS_SERVICE_DATA_LOCATION_NAME='Birdhouse' export THREDDS_DATASET_URL_PATH='datasets' export THREDDS_SERVICE_DATA_URL_PATH='birdhouse' +export THREDDS_MAGPIE_EXTRA_METADATA_PREFIXES='".+\\.txt", ".+\\.md", ".+\\.rst"' +export THREDDS_MAGPIE_EXTRA_DATA_PREFIXES='' +export THREDDS_SERVICE_DATA_EXTRA_FILE_FILTERS='' + +export THREDDS_DATASET_DATASETSCAN_BODY=' + + all + + + + + + + + + + +' # add any new variables not already in 'VARS' or 'OPTIONAL_VARS' that must be replaced in templates here VARS=" @@ -28,6 +46,7 @@ VARS=" \$THREDDS_DATASET_LOCATION_NAME \$THREDDS_DATASET_URL_PATH \$THREDDS_DATASET_LOCATION_ON_CONTAINER + \$THREDDS_DATASET_DATASETSCAN_BODY " OPTIONAL_VARS=" @@ -39,6 +58,9 @@ OPTIONAL_VARS=" \$THREDDS_IMAGE \$THREDDS_IMAGE_URI \$THREDDS_ADDITIONAL_CATALOG + \$THREDDS_SERVICE_DATA_EXTRA_FILE_FILTERS + \$THREDDS_MAGPIE_EXTRA_METADATA_PREFIXES + \$THREDDS_MAGPIE_EXTRA_DATA_PREFIXES " export DELAYED_EVAL=" diff --git a/birdhouse/env.local.example b/birdhouse/env.local.example index 28a2f7059..24f37af5c 100644 --- a/birdhouse/env.local.example +++ b/birdhouse/env.local.example @@ -262,7 +262,7 @@ export GEOSERVER_ADMIN_PASSWORD="${__DEFAULT__GEOSERVER_ADMIN_PASSWORD}" #export BIRDHOUSE_ALLOW_UNSECURE_HTTP="" # Jupyter single-user server images -#export JUPYTERHUB_DOCKER_NOTEBOOK_IMAGES="pavics/workflow-tests:py311-240506-update240508 \ +#export JUPYTERHUB_DOCKER_NOTEBOOK_IMAGES="pavics/workflow-tests:py311-241111 \ # pavics/crim-jupyter-eo:0.3.0 \ # pavics/crim-jupyter-nlp:0.4.0 \ # birdhouse/pavics-jupyter-base:mlflow-proxy" @@ -456,26 +456,84 @@ export GEOSERVER_ADMIN_PASSWORD="${__DEFAULT__GEOSERVER_ADMIN_PASSWORD}" # Additional catalogs for THREDDS. Add as many datasetScan XML blocks as needed to THREDDS_ADDITIONAL_CATALOG. # Each block defines a new top-level catalog. See birdhouse/components/thredds/catalog.xml.template for more information. -export THREDDS_ADDITIONAL_CATALOG="" -#export THREDDS_ADDITIONAL_CATALOG=" -# +export THREDDS_ADDITIONAL_CATALOG='' +#export THREDDS_ADDITIONAL_CATALOG=' +# # -# +# # all # # # -# -# -# -# -# -# +# +# +# +# +# +# # # # +#' +# It is possible to define additional compound services in the THREDDS_ADDITIONAL_CATALOG variable as well. +# This may be useful if you are creating a catalog that only provides a subset of the services defined in the +# compound service named "all" (see birdhouse/components/thredds/catalog.xml.template). +# DO NOT define any non-compound services in THREDDS_ADDITIONAL_CATALOG that is not an exact copy of one of the +# variables defined in "all"! Especially, do not change the "base" attribute of any existing service. +# Doing so may break the way that access permissions are enforced when accessing data through this service. + +# Additional file filters to add for the Service Data THREDDS dataset. By default, the Service Data dataset will only +# serve files with the following extensions: .nc .ncml .txt .md .rst .csv +# If you need this dataset to serve other files you should update the THREDDS_SERVICE_DATA_EXTRA_FILE_FILTERS to add +# additional file filters. +# This may be useful to set if a WPS outputs files to the wps_outputs/ directory (hosted under the Service Data dataset) +# in a file format other than one of the defaults. +# See the example below which would also enable serving .png and .h5 files. +#export THREDDS_SERVICE_DATA_EXTRA_FILE_FILTERS=" +# +# #" +# Set this variable to customize the body of the XML element for the main THREDDS dataset. This is typically +# the dataset where you would store most of the data served by THREDDS (additional datasets can be configured by setting the +# THREDDS_ADDITIONAL_CATALOG variable). +# By default, the main dataset will only serve files with the following extensions: .nc .ncml .txt .md .rst .csv and will use +# the THREDDS service named "all" (see components/thredds/catalog.xml.template). However this can be customized if desired. +# See the example below which would change the configuration to serve .h5, .md, and .json files. +# See the THREDDS documentation for the element for all configuration options. +#export THREDDS_DATASET_DATASETSCAN_BODY=" +# +# all +# +# +# +# +# +# +# +#" + +# Files served by THREDDS are considered to either contain data or metadata (or both). The THREDDS Magpie service allows +# us to handle access permissions different for metadata vs. data. Magpie let's users with "browse" permissions access +# metadata but only users with "read" permissions can access data. +# By accessing files through different THREDDS services (see THREDDS documentation), we can either read the metadata with +# "browse" permissions or the data itself with "read" permissions. For example, by default a NetCDF file can be accessed +# using the NCML service to get its metadata or through the NCSS service to access the data itself. +# +# If you have a file that you would like to be treated as metadata (Magpie will allow users with "browse" permissions to +# access it) no matter which THREDDS service is used to access it, add the file pattern to the `THREDDS_MAGPIE_EXTRA_METADATA_PREFIXES` +# variable. Similarly, if you have a file that you would like to be treated as data no matter which THREDDS service is used +# to access it, add the file pattern to the `THREDDS_MAGPIE_EXTRA_DATA_PREFIXES` variable. +# +# For example, if you want all files with a .h5 extension to be treated as data files in all cases, add '".+\\.h5"' to the +# `THREDDS_MAGPIE_EXTRA_DATA_PREFIXES` variable. Note that values are regular expressions (python) where slashes are double +# escaped. Expressions should be surrounded by double quotes and if multiple expressions are included they should be comma +# delimited. +# +# Current defaults are: +#export THREDDS_MAGPIE_EXTRA_METADATA_PREFIXES='".+\\.txt", ".+\\.md", ".+\\.rst"' +#export THREDDS_MAGPIE_EXTRA_DATA_PREFIXES='' + # Allow using Github as external AuthN/AuthZ provider with Magpie # To setup Github as login, goto under section [OAuth Apps] # and create a new Magpie application with configurations: @@ -582,6 +640,10 @@ export THREDDS_ADDITIONAL_CATALOG="" #export THANOS_MINIO_ROOT_USER="${__DEFAULT__THANOS_MINIO_ROOT_USER}" #export THANOS_MINIO_ROOT_PASSWORD="${__DEFAULT__THANOS_MINIO_ROOT_PASSWORD}" +# Below are for the prometheus-log-parser optional component +#export PROMETHEUS_LOG_PARSER_POLL_DELAY=1 # time in seconds +#export PROMETHEUS_LOG_PARSER_TAIL=true + ############################################################################# # Emu optional vars ############################################################################# diff --git a/birdhouse/optional-components/README.rst b/birdhouse/optional-components/README.rst index fca9c8333..a10d75b69 100644 --- a/birdhouse/optional-components/README.rst +++ b/birdhouse/optional-components/README.rst @@ -517,3 +517,47 @@ Enabling this component creates the additional endpoints: `thanos` needs access to the data stored by prometheus on disk (in docker this is acheived by sharing a named volume). .. _minio: https://min.io/ + +.. _prometheus-log-parser + +Prometheus Log Parser +--------------------- + +Parses log files from other components and converts their logs to prometheus metrics that are then ingested by the +monitoring Prometheus instance (the one created by the :ref:`Monitoring` component). + +For more information on how this component reads log files and converts them to prometheus components see +the log-parser_ documentation. + +To configure this component: + + * set the ``PROMETHEUS_LOG_PARSER_POLL_DELAY`` variable to a number of seconds to set how often the log parser + checks if new lines have been added to log files (default: 1) + * set the ``PROMETHEUS_LOG_PARSER_TAIL`` variable to ``"true"`` to only parse new lines in log files. If unset, + this will parse all existing lines in the log file as well (default: ``"true"``) + +To view all metrics exported by the log parser: + + * Navigate to the ``https:///prometheus/graph`` search page + * Put ``{job="log_parser"}`` in the search bar and click the "Execute" button + +For developers, to create a new parser that can be used to track log files: + + 1. create a python file that can be mounted as a volume to the ``PROMETHEUS_LOG_PARSER_PARSERS_DIR`` + directory on the ``prometheus-log-parser`` container. + 2. mount any log files that you want to parse as a volume on the ``prometheus-log-parser`` container. + 3. the python script should create at least one `prometheus metric using the prometheus_client + library `_ and must contain a global constant named ``LOG_PARSER_CONFIG`` + which is a dictionary where keys are paths to log files (mounted in the container) and values are a + list of "line parser" functions. + * a "line parser" is any function that takes a string as a single argument (a single line from a + log file). These functions are where you'd write the code that parses the line and converts it + into a prometheus metric. + * your line parser function should update one of the prometheus metrics you created previously. + + For an example of a working log parser, see + `birdhouse/optional-components/prometheus-log-parser/config/thredds/prometheus-log-exporter.py`_ + (:download:`download `). + +.. _log-parser: https://github.com/DACCS-Climate/log-parser/ +.. _prometheus_python_metrics: https://prometheus.github.io/client_python/instrumenting/ diff --git a/birdhouse/optional-components/prometheus-log-parser/config/monitoring/.gitignore b/birdhouse/optional-components/prometheus-log-parser/config/monitoring/.gitignore new file mode 100644 index 000000000..add998b9b --- /dev/null +++ b/birdhouse/optional-components/prometheus-log-parser/config/monitoring/.gitignore @@ -0,0 +1 @@ +scrape_configs.yml diff --git a/birdhouse/optional-components/prometheus-log-parser/config/monitoring/docker-compose-extra.yml b/birdhouse/optional-components/prometheus-log-parser/config/monitoring/docker-compose-extra.yml new file mode 100644 index 000000000..c16e2f8ef --- /dev/null +++ b/birdhouse/optional-components/prometheus-log-parser/config/monitoring/docker-compose-extra.yml @@ -0,0 +1,5 @@ +version: "3.4" +services: + prometheus: + volumes: + - ./optional-components/prometheus-log-parser/config/monitoring/scrape_configs.yml:/etc/prometheus/scrape_config.d/log_parser.yml:ro diff --git a/birdhouse/optional-components/prometheus-log-parser/config/monitoring/scrape_configs.yml.template b/birdhouse/optional-components/prometheus-log-parser/config/monitoring/scrape_configs.yml.template new file mode 100644 index 000000000..025cadc67 --- /dev/null +++ b/birdhouse/optional-components/prometheus-log-parser/config/monitoring/scrape_configs.yml.template @@ -0,0 +1,6 @@ +scrape_configs: + - job_name: log_parser + honor_labels: true + static_configs: + - targets: + - prometheus-log-parser:${PROMETHEUS_LOG_PARSER_CLIENT_PORT} diff --git a/birdhouse/optional-components/prometheus-log-parser/config/proxy/docker-compose-extra.yml b/birdhouse/optional-components/prometheus-log-parser/config/proxy/docker-compose-extra.yml new file mode 100644 index 000000000..0b0308c40 --- /dev/null +++ b/birdhouse/optional-components/prometheus-log-parser/config/proxy/docker-compose-extra.yml @@ -0,0 +1,13 @@ +version: "3.4" +services: + proxy: + volumes: + - proxy-logs:${PROXY_LOG_DIR} + prometheus-log-parser: + volumes: + - proxy-logs:/var/log/proxy + environment: + - PROXY_LOG_FILE=${PROXY_LOG_FILE} + +volumes: + proxy-logs: diff --git a/birdhouse/optional-components/prometheus-log-parser/config/thredds/docker-compose-extra.yml b/birdhouse/optional-components/prometheus-log-parser/config/thredds/docker-compose-extra.yml new file mode 100644 index 000000000..eea8c0b8d --- /dev/null +++ b/birdhouse/optional-components/prometheus-log-parser/config/thredds/docker-compose-extra.yml @@ -0,0 +1,5 @@ +version: "3.4" +services: + prometheus-log-parser: + volumes: + - ./optional-components/prometheus-log-parser/config/thredds/prometheus-log-exporter.py:${PROMETHEUS_LOG_PARSER_PARSERS_DIR}/thredds-prometheus-log-exporter.py:ro diff --git a/birdhouse/optional-components/prometheus-log-parser/config/thredds/prometheus-log-exporter.py b/birdhouse/optional-components/prometheus-log-parser/config/thredds/prometheus-log-exporter.py new file mode 100644 index 000000000..8869ff183 --- /dev/null +++ b/birdhouse/optional-components/prometheus-log-parser/config/thredds/prometheus-log-exporter.py @@ -0,0 +1,44 @@ +import os +import re + +import prometheus_client + + +# This matches a request to the THREDDS data services as defined in birdhouse/components/thredds/catalog.xml.template +THREDDS_REQ_URI_REGEX = r'\/[^\s]+\/thredds\/(?PdodsC|fileServer|ncss)\/(?P[^\s]*)(?:\?(?P\w+))?' + +# This matches the nginx log_fomat as defined in birdhouse/components/proxy/nginx.conf.template +REGEX = re.compile( + r'(?P(?:^|\b(?-|[a-z_][a-z0-9_]{0,30})\s' + r'(?P\[(?P\d\d\d\d-\d\d-\d\d)T(?P