diff --git a/Makefile b/Makefile index dddd13a72..0b3bb6138 100644 --- a/Makefile +++ b/Makefile @@ -139,7 +139,9 @@ lifemonitor: docker/lifemonitor.Dockerfile certs app.py gunicorn.conf.py ## Buil printf "\n$(yellow)WARNING: $(bold)Skip build of LifeMonitor Docker image !!! $(reset)\n" ; \ else \ printf "\n$(bold)Building LifeMonitor Docker image...$(reset)\n" ; \ - $(build_kit) docker $(build_cmd) $(cache_from_opt) $(cache_to_opt) \ + $(build_kit) docker $(build_cmd) \ + --build-arg USER_ID=$(id -u) --build-arg GROUP_ID=$(id -g) \ + $(cache_from_opt) $(cache_to_opt) \ ${sw_version_arg} ${build_number_arg} ${tags_opt} ${labels_opt} ${platforms_opt} \ -f docker/lifemonitor.Dockerfile -t crs4/lifemonitor . ;\ printf "$(done)\n" ; \ @@ -221,20 +223,34 @@ start-dev: images compose-files dev reset_compose permissions ## Start LifeMonit start-testing: compose-files aux_images ro_crates images reset_compose permissions ## Start LifeMonitor in a Testing environment @printf "\n$(bold)Starting testing services...$(reset)\n" ; \ base=$$(if [[ -f "docker-compose.yml" ]]; then echo "-f docker-compose.yml"; fi) ; \ - echo "$$(USER_UID=$$(id -u) USER_GID=$$(id -g) \ - $(docker_compose) $${base} \ - -f docker-compose.extra.yml \ - -f docker-compose.base.yml \ - -f docker-compose.monitoring.yml \ - -f docker-compose.dev.yml \ - -f docker-compose.test.yml \ - config)" > docker-compose.yml \ + echo "$$($(docker_compose) $${base} \ + -f docker-compose.extra.yml \ + -f docker-compose.base.yml \ + -f docker-compose.monitoring.yml \ + -f docker-compose.dev.yml \ + -f docker-compose.test.yml \ + config)" > docker-compose.yml \ && cp {,.test.}docker-compose.yml \ && $(docker_compose) -f docker-compose.yml up -d db lmtests seek jenkins webserver worker ws_server ;\ $(docker_compose) -f ./docker-compose.yml \ exec -T lmtests /bin/bash -c "tests/wait-for-seek.sh 600"; \ printf "$(done)\n" +start-maintenance: compose-files aux_images ro_crates images reset_compose permissions ## Start LifeMonitor in a Testing environment + @printf "\n$(bold)Starting testing services...$(reset)\n" ; \ + base=$$(if [[ -f "docker-compose.yml" ]]; then echo "-f docker-compose.yml"; fi) ; \ + echo "$$(USER_UID=$$(id -u) USER_GID=$$(id -g) \ + $(docker_compose) $${base} \ + -f docker-compose.extra.yml \ + -f docker-compose.base.yml \ + -f docker-compose.monitoring.yml \ + -f docker-compose.dev.yml \ + -f docker-compose.maintenance.yml \ + config)" > docker-compose.yml \ + && cp {,.maintenance.}docker-compose.yml \ + && $(docker_compose) -f docker-compose.yml up -d db redis lm ws_server nginx console ;\ + printf "$(done)\n" + start-nginx: certs docker-compose.base.yml permissions ## Start a nginx front-end proxy for the LifeMonitor back-end @printf "\n$(bold)Starting nginx proxy...$(reset)\n" ; \ base=$$(if [[ -f "docker-compose.yml" ]]; then echo "-f docker-compose.yml"; fi) ; \ @@ -262,7 +278,6 @@ start-aux-services: aux_images ro_crates docker-compose.extra.yml permissions ## run-tests: start-testing ## Run all tests in the Testing Environment @printf "\n$(bold)Running tests...$(reset)\n" ; \ - USER_UID=$$(id -u) USER_GID=$$(id -g) \ $(docker_compose) exec -T lmtests /bin/bash -c "pytest --durations=10 --color=yes tests" diff --git a/docker-compose.monitoring.yml b/docker-compose.monitoring.yml index 0d6aae3a3..2e17806e1 100644 --- a/docker-compose.monitoring.yml +++ b/docker-compose.monitoring.yml @@ -2,7 +2,7 @@ version: "3.5" services: prometheus: - image: prom/prometheus:v2.24.1 + image: prom/prometheus:v2.48.0 ports: - "9090:9090" volumes: diff --git a/docker/lifemonitor.Dockerfile b/docker/lifemonitor.Dockerfile index 3f8d67f3a..ed6bfe018 100644 --- a/docker/lifemonitor.Dockerfile +++ b/docker/lifemonitor.Dockerfile @@ -8,8 +8,18 @@ RUN apt-get update -q \ postgresql-client-11 default-jre \ && apt-get clean -y && rm -rf /var/lib/apt/lists +# Set the parametric USER ID +ARG USER_ID +ENV USER_ID=${USER_ID:-1000} + +# Set the parametric GROUP ID +ARG GROUP_ID +ENV GROUP_ID=${GROUP_ID:-1000} + # Create a user 'lm' with HOME at /lm and set 'lm' as default git user -RUN useradd -d /lm -m lm +RUN groupadd -g ${GROUP_ID} lm && \ + useradd -u ${USER_ID} -g lm -d /lm -m lm + # Set the default user ENV USER=lm @@ -67,6 +77,7 @@ RUN mkdir -p /var/data/lm \ && chown -R lm:lm /var/data/lm \ && ln -s /var/data/lm /lm/data \ && chown -R lm:lm /lm/data \ + && mkdir -p /var/log/lm && chown -R lm:lm /var/log/lm \ && mkdir /lm/.nextflow && chmod -R 777 /lm/.nextflow # Set the default user diff --git a/docker/worker_entrypoint.sh b/docker/worker_entrypoint.sh index cdc5544df..d82d65a4d 100755 --- a/docker/worker_entrypoint.sh +++ b/docker/worker_entrypoint.sh @@ -84,7 +84,6 @@ while : ; do ${threads:-} \ lifemonitor.tasks.worker:broker lifemonitor.tasks ${queues} exit_code=$? - exit_code=$? if [[ $exit_code == 3 ]]; then log "dramatiq worker could not connect to message broker (exit code ${exit_code})" log "Restarting..." diff --git a/k8s/Chart.yaml b/k8s/Chart.yaml index 591bec1f5..1f5ad15c4 100644 --- a/k8s/Chart.yaml +++ b/k8s/Chart.yaml @@ -7,12 +7,12 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.10.0 +version: 0.11.0 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. -appVersion: 0.11.7 +appVersion: 0.11.8 # Chart dependencies dependencies: diff --git a/k8s/backup-key.secret.yaml b/k8s/backup-key.secret.yaml new file mode 100644 index 000000000..932768594 --- /dev/null +++ b/k8s/backup-key.secret.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Secret +metadata: + name: lifemonitor-api-backup-key +type: Opaque +data: + encryptionKey: \ No newline at end of file diff --git a/k8s/templates/_helpers.tpl b/k8s/templates/_helpers.tpl index 2ab056c4c..cd046970a 100644 --- a/k8s/templates/_helpers.tpl +++ b/k8s/templates/_helpers.tpl @@ -67,6 +67,13 @@ Define lifemonitor TLS secret name {{- printf "%s-tls" .Release.Name }} {{- end }} +{{/* +Define lifemonitor secret name for backup key +*/}} +{{- define "chart.lifemonitor.backup.key" -}} +{{- printf "%s-backup-key" .Release.Name }} +{{- end }} + {{/* Define volume name of LifeMonitor backup data @@ -167,6 +174,8 @@ Define mount points shared by some pods. - mountPath: "/lm/certs/" name: lifemonitor-tls readOnly: true +- name: lifemonitor-logs + mountPath: "/var/log" - name: lifemonitor-settings mountPath: "/lm/settings.conf" subPath: settings.conf diff --git a/k8s/templates/backend.deployment.yaml b/k8s/templates/backend.deployment.yaml index 3e94f0d5b..baea295c9 100644 --- a/k8s/templates/backend.deployment.yaml +++ b/k8s/templates/backend.deployment.yaml @@ -54,6 +54,8 @@ spec: {{- include "lifemonitor.common-env" . | nindent 12 }} volumeMounts: {{- include "lifemonitor.common-volume-mounts" . | nindent 12 }} + resources: + {{- toYaml .Values.lifemonitor.initContainers.initBackend.resources | nindent 12 }} containers: - name: backend securityContext: @@ -61,7 +63,11 @@ spec: image: {{ include "chart.lifemonitor.image" . }} imagePullPolicy: {{ .Values.lifemonitor.imagePullPolicy }} env: - {{- include "lifemonitor.common-env" . | nindent 12 }} + {{- include "lifemonitor.common-env" . | nindent 12 }} + {{- if .Values.maintenanceMode.enabled }} + - name: FLASK_ENV + value: "maintenance" + {{- end }} volumeMounts: {{- include "lifemonitor.common-volume-mounts" . | nindent 12 }} ports: @@ -85,8 +91,27 @@ spec: periodSeconds: 3 resources: {{- toYaml .Values.lifemonitor.resources | nindent 12 }} + {{- if .Values.monitoring.loki.enabled }} + - name: log-exporter + image: {{ .Values.monitoring.loki.exporter.image }} + imagePullPolicy: {{ $.Values.monitoring.loki.exporter.imagePullPolicy }} + args: + - "-config.file=/etc/promtail/promtail.yaml" # Found in the ConfigMap + resources: + {{- toYaml $.Values.monitoring.loki.exporter.resources | nindent 12 }} + volumeMounts: + - name: promtail-config + mountPath: /etc/promtail + - name: lifemonitor-logs + mountPath: /var/log + {{- end }} volumes: {{- include "lifemonitor.common-volume" . | nindent 8 }} + {{- if .Values.monitoring.loki.enabled }} + - name: promtail-config + configMap: + name: "{{.Release.Name}}-promtail-backend-configmap" + {{- end }} {{- with .Values.lifemonitor.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} diff --git a/k8s/templates/backup.job.yaml b/k8s/templates/backup.job.yaml index 109e8d2dc..f320994e0 100644 --- a/k8s/templates/backup.job.yaml +++ b/k8s/templates/backup.job.yaml @@ -29,12 +29,22 @@ spec: {{- include "lifemonitor.common-volume-mounts" . | nindent 12 }} - name: lifemonitor-backup mountPath: "/var/data/backup" + {{- if .Values.backup.encryptionKeySecret }} + - name: lifemonitor-backup-encryption-key + mountPath: "/lm/backup/encryption.key" + subPath: encryptionKey + {{- end }} restartPolicy: OnFailure volumes: {{- include "lifemonitor.common-volume" . | nindent 10 }} - name: lifemonitor-backup persistentVolumeClaim: claimName: {{ .Values.backup.existingClaim }} + {{- if .Values.backup.encryptionKeySecret }} + - name: lifemonitor-backup-encryption-key + secret: + secretName: {{ .Values.backup.encryptionKeySecret }} + {{- end }} {{- with .Values.lifemonitor.nodeSelector }} nodeSelector: {{- toYaml . | nindent 10 }} diff --git a/k8s/templates/console.deployment.yaml b/k8s/templates/console.deployment.yaml new file mode 100644 index 000000000..4ead8a13f --- /dev/null +++ b/k8s/templates/console.deployment.yaml @@ -0,0 +1,73 @@ +{{- if or (.Values.maintenanceMode.enabled) (.Values.console.enabled ) }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "chart.fullname" . }}-console + labels: + {{- include "chart.labels" . | nindent 4 }} +spec: + {{- if not .Values.lifemonitor.autoscaling.enabled }} + replicas: {{ .Values.lifemonitor.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "chart.selectorLabels" . | nindent 6 }} + template: + metadata: + annotations: + checksum/settings: {{ include (print $.Template.BasePath "/settings.secret.yaml") . | sha256sum }} + {{- with .Values.lifemonitor.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "chart.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.lifemonitor.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "chart.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.lifemonitor.podSecurityContext | nindent 8 }} + containers: + - name: app + securityContext: + {{- toYaml .Values.lifemonitor.securityContext | nindent 12 }} + image: {{ include "chart.lifemonitor.image" . }} + imagePullPolicy: {{ .Values.lifemonitor.imagePullPolicy }} + command: ["/bin/sh","-c"] + args: ["wait-for-redis.sh && wait-for-postgres.sh && sleep infinity"] + env: + {{- include "lifemonitor.common-env" . | nindent 12 }} + - name: LIFEMONITOR_MAINTENANCE_MODE + value: {{ .Values.maintenanceMode.enabled | quote }} + - name: LIFEMONITOR_CONSOLE_ENABLED + value: {{ .Values.console.enabled | quote }} + volumeMounts: + {{- include "lifemonitor.common-volume-mounts" . | nindent 12 }} + - name: lifemonitor-backup + mountPath: "/var/data/backup" + ports: + - name: http + containerPort: 8000 + protocol: TCP + resources: + {{- toYaml .Values.lifemonitor.resources | nindent 12 }} + volumes: + - name: lifemonitor-backup + persistentVolumeClaim: + claimName: {{ .Values.backup.existingClaim }} + {{- include "lifemonitor.common-volume" . | nindent 8 }} + {{- with .Values.lifemonitor.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.lifemonitor.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.lifemonitor.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/k8s/templates/data-logs.pvc.yaml b/k8s/templates/data-logs.pvc.yaml deleted file mode 100644 index 7abd1a9cc..000000000 --- a/k8s/templates/data-logs.pvc.yaml +++ /dev/null @@ -1,15 +0,0 @@ -{{- $dataLogsExists := (lookup "v1" "PersistentVolumeClaim" .Release.Namespace ( include "chart.lifemonitor.data.logs" . ) ) }} -{{- if not $dataLogsExists }} -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: {{ include "chart.lifemonitor.data.logs" . }} - namespace: {{ .Release.Namespace }} -spec: - storageClassName: {{ .Values.global.readWriteManyStorageClass }} - accessModes: - - ReadWriteMany - resources: - requests: - storage: 1Gi -{{- end -}} diff --git a/k8s/templates/promtail.configmap.yaml b/k8s/templates/promtail.configmap.yaml new file mode 100644 index 000000000..b997c876c --- /dev/null +++ b/k8s/templates/promtail.configmap.yaml @@ -0,0 +1,110 @@ +{{- if .Values.monitoring.loki.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: "{{$.Release.Name}}-promtail-proxy-configmap" +data: + promtail.yaml: | + server: + http_listen_port: 9080 + grpc_listen_port: 0 + log_level: "debug" + positions: + filename: /tmp/positions.yaml + clients: # Specify target + - url: {{ $.Values.monitoring.loki.url }}/loki/api/v1/push + scrape_configs: + - job_name: "lifemonitor-api-proxy-logger" + static_configs: + - targets: + - localhost + labels: + app: "lifemonitor-backend" + component: "{{$.Release.Name}}-proxy" + environment: "{{$.Release.Namespace}}" + format: "extended" + level: "INFO" + __path__: /var/log/nginx/access.log + - targets: + - localhost + labels: + app: "lifemonitor-backend" + component: "{{ $.Release.Name }}-proxy" + environment: "{{ $.Release.Namespace }}" + format: "extended" + level: "ERROR" + __path__: /var/log/nginx/*error.log + pipeline_stages: + - drop: + expression: ".*(DEBUG|health|heartbeat).*" +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: "{{ $.Release.Name }}-promtail-backend-configmap" +data: + promtail.yaml: | + server: + http_listen_port: 9080 + grpc_listen_port: 0 + log_level: "debug" + positions: + filename: /tmp/positions.yaml + clients: # Specify target + - url: {{ $.Values.monitoring.loki.url }}/loki/api/v1/push + scrape_configs: + - job_name: "lifemonitor-api-backend-logger" + static_configs: + - targets: + - localhost + labels: + app: "lifemonitor-backend" + component: "api-backend" + environment: "{{ $.Release.Namespace }}" + format: "backend" + __path__: /var/log/lm/*.log + pipeline_stages: + - drop: + expression: ".*(DEBUG|health|heartbeat).*" + - regex: + expression: '(.*)(?PERROR|INFO|DEBUG|WARNING)(.*)' + - labels: + level: log_level + + +{{- range $i, $queue := .Values.worker.queues }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: "{{ $.Release.Name }}-promtail-worker-{{ $queue.name }}-configmap" +data: + promtail.yaml: | + server: + http_listen_port: 9080 + grpc_listen_port: 0 + log_level: "debug" + positions: + filename: /tmp/positions.yaml + clients: # Specify target + - url: {{ $.Values.monitoring.loki.url }}/loki/api/v1/push + scrape_configs: + - job_name: "lifemonitor-api-backend-logger" + static_configs: + - targets: + - localhost + labels: + app: "lifemonitor-backend" + component: "api-worker-{{ $queue.name }}" + environment: "{{ $.Release.Namespace }}" + format: "backend" + __path__: /var/log/lm/*.log + pipeline_stages: + - drop: + expression: ".*(DEBUG|health|heartbeat).*" + - regex: + expression: '(.*)(?PERROR|INFO|DEBUG|WARNING)(.*)' + - labels: + level: log_level +{{- end }} +{{- end }} \ No newline at end of file diff --git a/k8s/templates/settings.secret.yaml b/k8s/templates/settings.secret.yaml index 8c19c8c02..7a0e00d9f 100644 --- a/k8s/templates/settings.secret.yaml +++ b/k8s/templates/settings.secret.yaml @@ -78,6 +78,9 @@ stringData: {{- if .Values.backup.retain_days }} BACKUP_RETAIN_DAYS={{ .Values.backup.retain_days }} {{- end }} + {{- if .Values.backup.encryptionKeySecret }} + BACKUP_ENCRYPTION_KEY_PATH=/lm/backup/encryption.key + {{- end }} {{- if .Values.backup.remote.enabled }} BACKUP_REMOTE_PATH={{ .Values.backup.remote.path }} BACKUP_REMOTE_HOST={{ .Values.backup.remote.host }} @@ -87,6 +90,12 @@ stringData: {{- end }} {{- end }} + # Maintenance Mode Settings + {{- if .Values.maintenanceMode.enabled -}} + MAINTENANCE_MODE={{.Values.maintenanceMode.enabled}} + MAINTENANCE_MODE_MAIN_MESSAGE={{.Values.maintenanceMode.mainMessage}} + MAINTENANCE_MODE_SECONDARY_MESSAGE={{.Values.maintenanceMode.secondaryMessage}} + {{- end }} # Set admin credentials LIFEMONITOR_ADMIN_PASSWORD={{ .Values.lifemonitor.administrator.password }} diff --git a/k8s/templates/worker.deployment.yaml b/k8s/templates/worker.deployment.yaml index f5ff41e8c..e622bde25 100644 --- a/k8s/templates/worker.deployment.yaml +++ b/k8s/templates/worker.deployment.yaml @@ -50,6 +50,8 @@ spec: {{- include "lifemonitor.common-env" $ | nindent 12 }} volumeMounts: {{- include "lifemonitor.common-volume-mounts" $ | nindent 12 }} + resources: + {{- toYaml $.Values.worker.initContainers.initWorker.resources | nindent 12 }} containers: - name: worker securityContext: @@ -70,6 +72,10 @@ spec: {{ else }} value: {{ $queue.name }} {{ end }} + {{- if $.Values.maintenanceMode.enabled }} + - name: FLASK_ENV + value: "maintenance" + {{- end }} ports: - containerPort: 9191 volumeMounts: @@ -88,8 +94,27 @@ spec: # periodSeconds: 3 resources: {{- toYaml $.Values.worker.resources | nindent 12 }} + {{- if $.Values.monitoring.loki.enabled }} + - name: log-exporter + image: {{ $.Values.monitoring.loki.exporter.image }} + imagePullPolicy: {{ $.Values.monitoring.loki.exporter.imagePullPolicy }} + args: + - "-config.file=/etc/promtail/promtail.yaml" # Found in the ConfigMap + resources: + {{- toYaml $.Values.monitoring.loki.exporter.resources | nindent 12 }} + volumeMounts: + - name: promtail-config + mountPath: /etc/promtail + - name: lifemonitor-logs + mountPath: /var/log + {{- end }} volumes: {{- include "lifemonitor.common-volume" $ | nindent 8 }} + {{- if $.Values.monitoring.loki.enabled }} + - name: promtail-config + configMap: + name: "{{$.Release.Name}}-promtail-worker-{{ $queue.name }}-configmap" + {{- end }} {{- with $.Values.worker.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} diff --git a/k8s/templates/wss.deployment.yaml b/k8s/templates/wss.deployment.yaml index a127f74e2..8bae0a963 100644 --- a/k8s/templates/wss.deployment.yaml +++ b/k8s/templates/wss.deployment.yaml @@ -60,6 +60,10 @@ spec: imagePullPolicy: {{ .Values.lifemonitor.imagePullPolicy }} env: {{- include "lifemonitor.common-env" . | nindent 12 }} + {{- if .Values.maintenanceMode.enabled }} + - name: FLASK_ENV + value: "maintenance" + {{- end }} volumeMounts: {{- include "lifemonitor.common-volume-mounts" . | nindent 12 }} ports: diff --git a/k8s/values.yaml b/k8s/values.yaml index f02c2dee6..b9052247d 100644 --- a/k8s/values.yaml +++ b/k8s/values.yaml @@ -6,6 +6,12 @@ nameOverride: "" fullnameOverride: "" +# manage maintenance mode +maintenanceMode: + enabled: false + # mainMessage: "We're busy updating the Life-Monitor service for you." + # secondaryMessage: "We'll be back shortly." + # The name and port number of the server (e.g.: 'lm.local:8000'), # used as base_url on all the links returned by the API externalServerName: &hostname api.lifemonitor.eu @@ -120,6 +126,7 @@ backup: successfulJobsHistoryLimit: 30 failedJobsHistoryLimit: 30 existingClaim: data-api-backup + # encryptionKeySecret: lifemonitor-api-backup-key # Settings to mirror the (cluster) local backup # to a remote site via FTPS or SFTP remote: @@ -136,11 +143,28 @@ monitoring: enabled: false prometheus: namespace: kube-prometheus-stack + loki: + enabled: false + url: http://loki:3100 + exporter: + image: grafana/promtail:main-60ea954 + imagePullPolicy: IfNotPresent + resources: + requests: + memory: 128Mi + cpu: 0.1 + limits: + memory: 256Mi + cpu: 0.2 + +# management console settings +console: + enabled: false rateLimiting: zone: accounts: - enabled: false + enabled: false size: 60m rate: 2r/s burst: 20 @@ -193,7 +217,6 @@ lifemonitor: enableTestConnection: false resources: - {} # We usually recommend not to specify default resources and to leave this as a conscious # choice for the user. This also increases chances charts run on environments with little # resources, such as Minikube. If you do want to specify resources, uncomment the following @@ -204,6 +227,20 @@ lifemonitor: # requests: # cpu: 100m # memory: 128Mi + limits: + cpu: 1.8 + memory: 7936Mi + requests: + cpu: 0.5 + memory: 1024Mi + + # configure resources for the init containers + initContainers: + initBackend: + resources: + limits: + memory: 256Mi + cpu: 200m autoscaling: enabled: false @@ -260,7 +297,6 @@ worker: replicaCount: 1 resources: - {} # We usually recommend not to specify default resources and to leave this as a conscious # choice for the user. This also increases chances charts run on environments with little # resources, such as Minikube. If you do want to specify resources, uncomment the following @@ -271,6 +307,20 @@ worker: # requests: # cpu: 100m # memory: 128Mi + limits: + cpu: 1.8 + memory: 7936Mi + requests: + cpu: 0.5 + memory: 1024Mi + + # configure resources for the init containers + initContainers: + initWorker: + resources: + limits: + memory: 256Mi + cpu: 200m autoscaling: enabled: false diff --git a/lifemonitor/app.py b/lifemonitor/app.py index 2074dce5e..6917ce07c 100644 --- a/lifemonitor/app.py +++ b/lifemonitor/app.py @@ -27,14 +27,13 @@ from flask_migrate import Migrate import lifemonitor.config as config -from lifemonitor import redis from lifemonitor import errors as errors_controller +from lifemonitor import redis from lifemonitor.auth.services import current_user from lifemonitor.integrations import init_integrations from lifemonitor.metrics import init_metrics from lifemonitor.routes import register_routes from lifemonitor.tasks import init_task_queues -from lifemonitor.utils import get_domain from . import commands from .cache import init_cache @@ -47,8 +46,15 @@ logger = logging.getLogger(__name__) -def create_app(env=None, settings=None, init_app=True, init_integrations=True, - worker=False, load_jobs=True, **kwargs): +def create_app( + env=None, + settings=None, + init_app=True, + init_integrations=True, + worker=False, + load_jobs=True, + **kwargs, +): """ App factory method :param env: @@ -58,10 +64,10 @@ def create_app(env=None, settings=None, init_app=True, init_integrations=True, """ # set app env app_env = env or os.environ.get("FLASK_ENV", "production") - if app_env != 'production': + if app_env != "production": # Set the DEBUG_METRICS env var to also enable the # prometheus metrics exporter when running in development mode - os.environ['DEBUG_METRICS'] = 'true' + os.environ["DEBUG_METRICS"] = "true" # load app config app_config = config.get_config_by_name(app_env, settings=settings) # set the FlaskApp instance path @@ -77,24 +83,26 @@ def create_app(env=None, settings=None, init_app=True, init_integrations=True, if os.environ.get("FLASK_APP_CONFIG_FILE", None): app.config.from_envvar("FLASK_APP_CONFIG_FILE") # set worker flag - app.config['WORKER'] = worker + app.config["WORKER"] = worker # append proxy settings - app.config['PROXY_ENTRIES'] = config.load_proxy_entries(app.config) + app.config["PROXY_ENTRIES"] = config.load_proxy_entries(app.config) # initialize the application if init_app: with app.app_context() as ctx: - initialize_app(app, ctx, load_jobs=load_jobs, load_integrations=init_integrations) + initialize_app( + app, ctx, load_jobs=load_jobs, load_integrations=init_integrations + ) @app.route("/") def index(): if not current_user.is_authenticated: return render_template("index.j2") - return redirect(url_for('auth.index')) + return redirect(url_for("auth.index")) @app.route("/profile") def profile(): - return redirect(url_for('auth.index', back=request.args.get('back', False))) + return redirect(url_for("auth.index", back=request.args.get("back", False))) # append routes to check app health @app.route("/health") @@ -103,11 +111,23 @@ def health(): @app.route("/openapi.html") def openapi(): - return redirect('/static/specs/apidocs.html', code=302) + return redirect("/static/specs/apidocs.html", code=302) + + @app.route("/maintenance") + def maintenance(): + if not app.config.get("MAINTENANCE_MODE", False): + return redirect(url_for("index")) + return render_template("maintenance/maintenance.j2", + main_message=app.config.get("MAINTENANCE_MODE_MAIN_MESSAGE", None), + secondary_message=app.config.get("MAINTENANCE_MODE_SECONDARY_MESSAGE", None)) @app.before_request def set_request_start_time(): request.start_time = time.time() + if app.config.get("MAINTENANCE_MODE", False): + logger.debug("Application is running in maintenance mode. Request %s cannot be served!", request.path) + if not request.path.startswith("/maintenance"): + return redirect('/maintenance') @app.after_request def log_response(response): @@ -120,7 +140,7 @@ def log_response(response): # for h in request.headers: # logger.debug("header: %s %s", h, request.headers.get(h, None)) # log the request - processing_time = (time.time() * 1000.0 - request.start_time * 1000.0) + processing_time = time.time() * 1000.0 - request.start_time * 1000.0 logger.info( "resp: %s %s %s %s %s %s %s %s %0.3fms", request.remote_addr, @@ -131,7 +151,7 @@ def log_response(response): response.content_length, request.referrer, request.user_agent, - processing_time + processing_time, ) # return the response return response @@ -139,37 +159,45 @@ def log_response(response): return app -def initialize_app(app: Flask, app_context, prom_registry=None, load_jobs: bool = True, load_integrations: bool = True): +def initialize_app( + app: Flask, + app_context, + prom_registry=None, + load_jobs: bool = True, + load_integrations: bool = True, +): # init tmp folder - os.makedirs(app.config.get('BASE_TEMP_FOLDER'), exist_ok=True) + os.makedirs(app.config.get("BASE_TEMP_FOLDER"), exist_ok=True) # enable CORS CORS(app, expose_headers=["Content-Type", "X-CSRFToken"], supports_credentials=True) # configure logging config.configure_logging(app) - # register error handlers - errors_controller.register_api(app) - # init Redis connection - redis.init(app) - # configure app DB - db.init_app(app) - # initialize Migration engine - Migrate(app, db) - # initialize cache - init_cache(app) - # configure serializer engine (Flask Marshmallow) - ma.init_app(app) - # configure app routes - register_routes(app) - # init scheduler/worker for async tasks - init_task_queues(app, load_jobs=load_jobs) - # init mail system - init_mail(app) - # initialize integrations - if load_integrations: - init_integrations(app) - # initialize metrics engine - init_metrics(app, prom_registry) - # register commands - commands.register_commands(app) - # register the domain filter with Jinja - app.jinja_env.filters['domain'] = get_domain + # check if the app is running in maintenance mode + if app.config.get("MAINTENANCE_MODE", False): + logger.warning("Application is running in maintenance mode") + else: + # register error handlers + errors_controller.register_api(app) + # init Redis connection + redis.init(app) + # configure app DB + db.init_app(app) + # initialize Migration engine + Migrate(app, db) + # initialize cache + init_cache(app) + # configure serializer engine (Flask Marshmallow) + ma.init_app(app) + # configure app routes + register_routes(app) + # init scheduler/worker for async tasks + init_task_queues(app, load_jobs=load_jobs) + # init mail system + init_mail(app) + # initialize integrations + if load_integrations: + init_integrations(app) + # initialize metrics engine + init_metrics(app, prom_registry) + # register commands + commands.register_commands(app) diff --git a/lifemonitor/commands/backup.py b/lifemonitor/commands/backup.py index 0ad6020cd..9acb367ce 100644 --- a/lifemonitor/commands/backup.py +++ b/lifemonitor/commands/backup.py @@ -25,6 +25,7 @@ import sys import time from pathlib import Path +from typing import BinaryIO import click from click_option_group import GroupedOption, optgroup @@ -32,7 +33,8 @@ from flask.blueprints import Blueprint from flask.cli import with_appcontext from flask.config import Config -from lifemonitor.utils import FtpUtils + +from lifemonitor.utils import FtpUtils, encrypt_folder from .db import backup, backup_options @@ -45,6 +47,15 @@ # set help for the CLI command _blueprint.cli.help = "Manage backups of database and RO-Crates" +# define the encryption key options +encryption_key_option = click.option("-k", "--encryption-key", default=None, help="Encryption key") +encryption_key_file_option = click.option("-kf", "--encryption-key-file", + type=click.File("rb"), default=None, + help="File containing the encryption key") +encryption_asymmetric_option = click.option("-a", "--encryption-asymmetric", is_flag=True, default=False, + show_default=True, + help="Use asymmetric encryption") + class RequiredIf(GroupedOption): def __init__(self, *args, **kwargs): @@ -113,17 +124,29 @@ def bck(ctx): @backup_options @synch_otptions @with_appcontext -def db_cmd(file, directory, verbose, *args, **kwargs): +def db_cmd(file, directory, + encryption_key, encryption_key_file, encryption_asymmetric, + verbose, *args, **kwargs): """ Make a backup of the database """ - result = backup_db(directory, file, verbose, *args, **kwargs) + result = backup_db(directory, file, + encryption_key=encryption_key, + encryption_key_file=encryption_key_file, + encryption_asymmetric=encryption_asymmetric, + verbose=verbose, *args, **kwargs) sys.exit(result) -def backup_db(directory, file=None, verbose=False, *args, **kwargs): +def backup_db(directory, file=None, + encryption_key=None, encryption_key_file=None, encryption_asymmetric=False, + verbose=False, *args, **kwargs): logger.debug(sys.argv) - result = backup(directory, file, verbose) + logger.debug("Backup DB: %r - %r - %r - %r - %r - %r - %r",) + logger.warning(f"Encryption asymmetric: {encryption_asymmetric}") + result = backup(directory, file, + encryption_key=encryption_key, encryption_key_file=encryption_key_file, + encryption_asymmetric=encryption_asymmetric, verbose=verbose) if result.returncode == 0: synch = kwargs.pop('synch', False) if synch: @@ -134,21 +157,45 @@ def backup_db(directory, file=None, verbose=False, *args, **kwargs): @bck.command("crates") @click.option("-d", "--directory", default="./", show_default=True, help="Local path to store RO-Crates") +@encryption_key_option +@encryption_key_file_option +@encryption_asymmetric_option @synch_otptions @with_appcontext -def crates_cmd(directory, *args, **kwargs): +def crates_cmd(directory, + encryption_key, encryption_key_file, encryption_asymmetric, + *args, **kwargs): """ Make a backup of the registered workflow RO-Crates """ - result = backup_crates(current_app.config, directory, *args, **kwargs) - sys.exit(result) + result = backup_crates(current_app.config, directory, + encryption_key=encryption_key, encryption_key_file=encryption_key_file, + encryption_asymmetric=encryption_asymmetric, + *args, **kwargs) + sys.exit(result.returncode) -def backup_crates(config, directory, *args, **kwargs): +def backup_crates(config, directory, + encryption_key: bytes = None, encryption_key_file: BinaryIO = None, + encryption_asymmetric: bool = False, + *args, **kwargs) -> subprocess.CompletedProcess: assert config.get("DATA_WORKFLOWS", None), "DATA_WORKFLOWS not configured" + # get the path of the RO-Crates rocrate_source_path = config.get("DATA_WORKFLOWS").removesuffix('/') + # create the directory if not exists os.makedirs(directory, exist_ok=True) - result = subprocess.run(f'rsync -avh --delete {rocrate_source_path}/ {directory} ', shell=True, capture_output=True) + # flag to check the result of the rsync or encrypt command + result = False + # encrypt the RO-Crates if an encryption key is provided + if encryption_key or encryption_key_file: + if not encryption_key: + encryption_key = encryption_key_file.read() + result = encrypt_folder(rocrate_source_path, directory, encryption_key, + encryption_asymmetric=encryption_asymmetric) + result = subprocess.CompletedProcess(returncode=0 if result else 1, args=()) + else: + result = subprocess.run(f'rsync -avh --delete {rocrate_source_path}/ {directory} ', + shell=True, capture_output=True) if result.returncode == 0: print("Created backup of workflow RO-Crates @ '%s'" % directory) synch = kwargs.pop('synch', False) @@ -156,8 +203,11 @@ def backup_crates(config, directory, *args, **kwargs): logger.debug("Remaining args: %r", kwargs) return __remote_synch__(source=directory, **kwargs) else: - print("Unable to backup workflow RO-Crates\n%s", result.stderr.decode()) - return result.returncode + try: + print("Unable to backup workflow RO-Crates\n%s", result.stderr.decode()) + except Exception: + print("Unable to backup workflow RO-Crates\n") + return result def auto(config: Config): @@ -167,19 +217,38 @@ def auto(config: Config): click.echo("No BACKUP_LOCAL_PATH found in your settings") sys.exit(0) + # search for an encryption key file + encryption_key = None + encryption_key_file = config.get("BACKUP_ENCRYPTION_KEY_PATH", None) + if not encryption_key_file: + click.echo("WARNING: No BACKUP_ENCRYPTION_KEY_PATH found in your settings") + logger.warning("No BACKUP_ENCRYPTION_KEY_PATH found in your settings") + else: + # read the encryption key from the file if the key is not provided + if isinstance(encryption_key_file, str): + with open(encryption_key_file, "rb") as encryption_key_file: + encryption_key = encryption_key_file.read() + elif isinstance(encryption_key_file, BinaryIO): + encryption_key = encryption_key_file.read() + else: + raise ValueError("Invalid encryption key file") + # set paths base_path = base_path.removesuffix('/') # remove trailing '/' db_backups = f"{base_path}/db" rc_backups = f"{base_path}/crates" logger.debug("Backup paths: %r - %r - %r", base_path, db_backups, rc_backups) # backup database - result = backup(db_backups) + result = backup(db_backups, + encryption_key=encryption_key, + encryption_asymmetric=True) if result.returncode != 0: sys.exit(result.returncode) # backup crates - result = backup_crates(config, rc_backups) - if result != 0: - sys.exit(result) + result = backup_crates(config, rc_backups, + encryption_key=encryption_key, encryption_asymmetric=True) + if result.returncode != 0: + sys.exit(result.returncode) # clean up old files retain_days = int(config.get("BACKUP_RETAIN_DAYS", -1)) logger.debug("RETAIN DAYS: %d", retain_days) diff --git a/lifemonitor/commands/db.py b/lifemonitor/commands/db.py index 13f55194b..7bd09bfd0 100644 --- a/lifemonitor/commands/db.py +++ b/lifemonitor/commands/db.py @@ -24,13 +24,15 @@ import subprocess import sys from datetime import datetime +from typing import BinaryIO import click from flask import current_app from flask.cli import with_appcontext from flask_migrate import cli, current, stamp, upgrade + from lifemonitor.auth.models import User -from lifemonitor.utils import hide_secret +from lifemonitor.utils import decrypt_file, encrypt_file, hide_secret # set module level logger logger = logging.getLogger() @@ -105,11 +107,20 @@ def wait_for_db(): # define common options verbose_option = click.option("-v", "--verbose", default=False, is_flag=True, help="Enable verbose mode") +encryption_key_option = click.option("-k", "--encryption-key", default=None, help="Encryption key") +encryption_key_file_option = click.option("-kf", "--encryption-key-file", + type=click.File("rb"), + default=None, help="File containing the encryption key") +encryption_asymmetric_option = click.option("-a", "--encryption-asymmetric", is_flag=True, default=False, + help="Use asymmetric encryption", show_default=True) def backup_options(func): # backup command options (evaluated in reverse order!) func = verbose_option(func) + func = encryption_asymmetric_option(func) + func = encryption_key_file_option(func) + func = encryption_key_option(func) func = click.option("-f", "--file", default=None, help="Backup filename (default 'hhmmss_yyyymmdd.tar')")(func) func = click.option("-d", "--directory", default="./", help="Directory path for the backup file (default '.')")(func) return func @@ -118,20 +129,34 @@ def backup_options(func): @cli.db.command("backup") @backup_options @with_appcontext -def backup_cmd(directory, file, verbose): +def backup_cmd(directory, file, + encryption_key, encryption_key_file, encryption_asymmetric, + verbose): """ Make a backup of the current app database """ - result = backup(directory, file, verbose) + logger.debug("%r - %r - %r - %r - %r - %r ", file, directory, + encryption_key, encryption_key_file, encryption_asymmetric, verbose) + result = backup(directory, file, + encryption_key=encryption_key, + encryption_key_file=encryption_key_file, + encryption_asymmetric=encryption_asymmetric, + verbose=verbose) # report exit code to the main process sys.exit(result.returncode) -def backup(directory, file=None, verbose=False) -> subprocess.CompletedProcess: +def backup(directory, file=None, + encryption_key=None, encryption_key_file: BinaryIO = None, + encryption_asymmetric=False, + verbose=False) -> subprocess.CompletedProcess: """ Make a backup of the current app database """ - logger.debug("%r - %r - %r", file, directory, verbose) + logger.debug("%r - %r - %r - %r - %r - %r", + file, directory, + encryption_key, encryption_key_file, encryption_asymmetric, + verbose) from lifemonitor.db import db_connection_params params = db_connection_params() if not file: @@ -148,6 +173,30 @@ def backup(directory, file=None, verbose=False) -> subprocess.CompletedProcess: msg = f"Created backup of database {params['dbname']} @ {target_path}" logger.debug(msg) print(msg) + if encryption_key is not None or encryption_key_file is not None: + msg = f"Encrypting backup file {target_path}..." + logger.debug(msg) + print(msg) + # read the encryption key from the file if the key is not provided + if encryption_key is None: + encryption_key = encryption_key_file.read() + # encrypt the backup file using the encryption key with the Fernet algorithm + try: + with open(target_path, "rb") as input_file: + with open(target_path + ".enc", "wb") as output_file: + encrypt_file(input_file, output_file, encryption_key, + encryption_asymmetric=encryption_asymmetric, + raise_error=True) + # remove the original backup file + os.remove(target_path) + msg = f"Backup file {target_path} encrypted" + logger.debug(msg) + print(msg) + except ValueError as e: + logger.error("Unable to encrypt backup file '%s'. ERROR: %s", target_path, str(e)) + except Exception as e: + print("Unable to encrypt backup file '%s'. ERROR: %s" % (target_path, str(e))) + sys.exit(1) else: click.echo("\nERROR Unable to backup the database: %s" % result.stderr.decode()) if verbose and result.stderr: @@ -159,79 +208,117 @@ def backup(directory, file=None, verbose=False) -> subprocess.CompletedProcess: @click.argument("file") @click.option("-s", "--safe", default=False, is_flag=True, help="Preserve the current database renaming it as '_yyyymmdd_hhmmss'") +@encryption_key_option +@encryption_key_file_option +@encryption_asymmetric_option @verbose_option @with_appcontext -def restore(file, safe, verbose): +def restore(file, safe, + encryption_key, encryption_key_file, encryption_asymmetric, + verbose): """ Restore a backup of the app database """ from lifemonitor.db import (create_db, db_connection_params, db_exists, drop_db, rename_db) + # initialize the encrypted file reference + encrypted_file = None + # check if DB file exists if not os.path.isfile(file): print("File '%s' not found!" % file) sys.exit(128) - # check if delete or preserve the current app database (if exists) - new_db_name = None - params = db_connection_params() - db_copied = False - if db_exists(params['dbname']): - if safe: - answer = input(f"The database '{params['dbname']}' will be renamed. Continue? (y/n): ") - if not answer.lower() in ('y', 'yes'): - sys.exit(0) - else: - answer = input(f"The database '{params['dbname']}' will be delete. Continue? (y/n): ") - if not answer.lower() in ('y', 'yes'): - sys.exit(0) - # create a snapshot of the current database - new_db_name = f"{params['dbname']}_{datetime.now().strftime('%Y%m%d_%H%M%S')}" - rename_db(params['dbname'], new_db_name) - db_copied = True - msg = f"Created a DB snapshot: data '{params['dbname']}' temporarily renamed as '{new_db_name}'" - logger.debug(msg) - if verbose: - print(msg) - # restore database - create_db(current_app.config) - cmd = f"PGPASSWORD={params['password']} pg_restore -h {params['host']} -U {params['user']} -d {params['dbname']} -v {file}" - if verbose: - print("Dabaset file: %s" % file) - print("Backup command: %s" % hide_secret(cmd, params['password'])) - result = subprocess.run(cmd, shell=True) - logger.debug("Restore result: %r", hide_secret(cmd, params['password'])) - if result.returncode == 0: - if db_copied and safe: - print(f"Existing database '{params['dbname']}' renamed as '{new_db_name}'") - msg = f"Backup {file} restored to database '{params['dbname']}'" - logger.debug(msg) - print(msg) - # if mode is set to 'not safe' - # delete the temp snapshot of the current database - if not safe: - drop_db(db_name=new_db_name) - msg = f"Current database '{params['dbname']}' deleted" - logger.debug(msg) - if verbose: - print(msg) - else: - # if any error occurs - # restore the previous latest version of the DB - # previously saved as temp snapshot - if new_db_name: - # delete the db just created - drop_db() - # restore the old database snapshot - rename_db(new_db_name, params['dbname']) + + try: + + # check if the DB backup is encrypted and the key or key file is provided + if file.endswith(".enc"): + if encryption_key is None and encryption_key_file is None: + print("The backup file '%s' is encrypted but no encryption key is provided!" % file) + sys.exit(128) + + # read the encryption key from the file if the key is not provided + if encryption_key is None: + encryption_key = encryption_key_file.read() + + # Set the reference to the encrypted file + encrypted_file = file + + # decrypt the backup file using the encryption key with the Fernet algorithm + file = file.removesuffix(".enc") + with open(encrypted_file, "rb") as input_file: + with open(file, "wb") as output_file: + decrypt_file(input_file, output_file, + encryption_key, encryption_asymmetric=encryption_asymmetric) + logger.debug("Decrypted backup file '%s' to '%s'", encrypted_file, file) + + # check if delete or preserve the current app database (if exists) + new_db_name = None + params = db_connection_params() + db_copied = False + if db_exists(params['dbname']): + if safe: + answer = input(f"The database '{params['dbname']}' will be renamed. Continue? (y/n): ") + if not answer.lower() in ('y', 'yes'): + sys.exit(0) + else: + answer = input(f"The database '{params['dbname']}' will be delete. Continue? (y/n): ") + if not answer.lower() in ('y', 'yes'): + sys.exit(0) + # create a snapshot of the current database + new_db_name = f"{params['dbname']}_{datetime.now().strftime('%Y%m%d_%H%M%S')}" + rename_db(params['dbname'], new_db_name) db_copied = True - msg = f"Database restored '{params['dbname']}' renamed as '{new_db_name}'" + msg = f"Created a DB snapshot: data '{params['dbname']}' temporarily renamed as '{new_db_name}'" logger.debug(msg) if verbose: print(msg) - print("ERROR: Unable to restore the database backup") - if verbose and result.stderr: - print("ERROR [stderr]: %s" % result.stderr.decode()) + # restore database + create_db(current_app.config) + cmd = f"PGPASSWORD={params['password']} pg_restore -h {params['host']} -U {params['user']} -d {params['dbname']} -v {file}" + if verbose: + print("Dabaset file: %s" % file) + print("Backup command: %s" % hide_secret(cmd, params['password'])) + result = subprocess.run(cmd, shell=True) + logger.debug("Restore result: %r", hide_secret(cmd, params['password'])) + if result.returncode == 0: + if db_copied and safe: + print(f"Existing database '{params['dbname']}' renamed as '{new_db_name}'") + msg = f"Backup {file} restored to database '{params['dbname']}'" + logger.debug(msg) + print(msg) + # if mode is set to 'not safe' + # delete the temp snapshot of the current database + if not safe: + drop_db(db_name=new_db_name) + msg = f"Current database '{params['dbname']}' deleted" + logger.debug(msg) + if verbose: + print(msg) + else: + # if any error occurs + # restore the previous latest version of the DB + # previously saved as temp snapshot + if new_db_name: + # delete the db just created + drop_db() + # restore the old database snapshot + rename_db(new_db_name, params['dbname']) + db_copied = True + msg = f"Database restored '{params['dbname']}' renamed as '{new_db_name}'" + logger.debug(msg) + if verbose: + print(msg) + print("ERROR: Unable to restore the database backup") + if verbose and result.stderr: + print("ERROR [stderr]: %s" % result.stderr.decode()) + finally: + if encrypted_file and os.path.isfile(file): + # remove the decrypted file + os.remove(file) + logger.debug("Removed decrypted backup file '%s'", file) + # report exit code to the main process sys.exit(result.returncode) diff --git a/lifemonitor/commands/encrypt.py b/lifemonitor/commands/encrypt.py new file mode 100644 index 000000000..3f7b3f7e2 --- /dev/null +++ b/lifemonitor/commands/encrypt.py @@ -0,0 +1,251 @@ +# Copyright (c) 2020-2022 CRS4 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import base64 +import logging +import os +import sys + +import click +from flask import Blueprint + +from ..utils import (decrypt_file, decrypt_folder, encrypt_file, + encrypt_folder, generate_asymmetric_encryption_keys, + generate_symmetric_encryption_key, serialization) + +# set module level logger +logger = logging.getLogger(__name__) + +# define the blueprint for DB commands +blueprint = Blueprint('ed', __name__) + +# set CLI help +blueprint.cli.help = "Manage files encryption/decryption" + +# define the encryption key options +encryption_asymmetric_option = click.option("-a", "--encryption-asymmetric", is_flag=True, default=False, + help="Use asymmetric encryption", show_default=True) +encryption_key_option = click.option("-k", "--encryption-key", default=None, help="Encryption key") +encryption_key_file_option = click.option("-kf", "--encryption-key-file", + type=click.File("rb"), default="lifemonitor.key", + help="File containing the encryption key") + + +@blueprint.cli.command('gen-keys') +@click.option("-f", "--key-file", type=click.Path(exists=False), default="lifemonitor.key", show_default=True) +@encryption_asymmetric_option +def generate_encryption_keys_cmd(key_file, encryption_asymmetric): + """Generate a new pair of encryption keys""" + try: + # init reference to the key (symmetric or asymmetric) bytes + key = None + # check if the key file already exists + if os.path.exists(key_file): + print("Key file '%s' already exists" % os.path.abspath(key_file)) + sys.exit(1) + if not encryption_asymmetric: + # generate the key + key = generate_symmetric_encryption_key() + print("Key generated: %s" % key.decode("utf-8")) + # save the key + with open(key_file, "wb") as f: + f.write(key) + print("Key saved in '%s'" % os.path.abspath(key_file)) + else: + # generate the key pair + priv, pub = generate_asymmetric_encryption_keys(key_filename=key_file) + logger.debug(f"Keys saved: private={key_file}, public={key_file + '.pub'}") + logger.debug(f"Private key: {priv}") + logger.debug(f"Public key: {pub}") + print("Keys saved: private=%s, public=%s" % (key_file, key_file + ".pub")) + # set reference to the public key + key = pub.public_bytes(encoding=serialization.Encoding.PEM, + format=serialization.PublicFormat.SubjectPublicKeyInfo) + # generate the kubernetes secret containing the key + with open(key_file + ".secret.yaml", "w") as f: + with open(os.path.join("k8s", "backup-key.secret.yaml"), "r") as t: + # base 64 encode the key + f.write(t.read().replace("", + base64.b64encode(key).decode("utf-8"))) + sys.exit(0) + except Exception as e: + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + else: + logger.error(f"Error generating key: {e}") + sys.exit(1) + + +@blueprint.cli.command('encrypt') +@click.argument("input_file", metavar="input", type=click.File("rb")) +@click.option("-o", "--out", type=click.File("wb"), default=".enc", show_default=True, help="Output file") +@encryption_key_option +@encryption_key_file_option +@encryption_asymmetric_option +def encrypt_cmd(input_file, out, encryption_key, encryption_key_file, encryption_asymmetric): + """Encrypt a file""" + try: + # log the parameters + logger.debug(f"Input file: {input_file.name}") + logger.debug(f"Output file: {out.name}") + logger.debug(f"Encryption key: {encryption_key}") + logger.debug(f"Encryption key file: {encryption_key_file.name}") + + # # check if the key or key file are not set + if encryption_key is None and encryption_key_file is None: + print("ERROR: Key or key file should be set") + sys.exit(1) + # check if the output file already exists + if os.path.exists(out.name): + print("ERROR: Output file '%s' already exists" % os.path.abspath(out.name)) + sys.exit(1) + # initialize the output file + if out.name == ".enc": + out.name = "%s.enc" % os.path.abspath(input_file.name) + + # read the encryption key from the file if the key is not provided + if encryption_key is None: + encryption_key = encryption_key_file.read() + + # encrypt the file + encrypt_file(input_file, out, encryption_key, encryption_asymmetric=encryption_asymmetric) + logger.debug(f"File encrypted: {out.name}") + print(f"File encrypted: {out.name}") + sys.exit(0) + except Exception as e: + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + else: + logger.error(f"Error encrypting file: {e}") + sys.exit(1) + + +@blueprint.cli.command('encrypt-folder') +@click.argument("input_folder", type=click.Path(exists=True)) +@click.option("-o", "--output_folder", type=click.Path(exists=False), default="", show_default=True, help="Output file") +@encryption_key_option +@encryption_key_file_option +@encryption_asymmetric_option +def encrypt_folder_cmd(input_folder, output_folder, + encryption_key, encryption_key_file, encryption_asymmetric): + + # log the parameters + logger.debug(f"Input folder: {input_folder}") + logger.debug(f"Output file: {output_folder}") + logger.debug(f"Encryption key: {encryption_key}") + logger.debug(f"Encryption key file: {encryption_key_file.name}") + + # # check if the key or key file are not set + if encryption_key is None and encryption_key_file is None: + print("ERROR: Key or key file should be set") + sys.exit(1) + + # init the output folder + if output_folder == "": + output_folder = input_folder + logger.debug(f"Using Output folder: {output_folder}") + + # read the encryption key from the file if the key is not provided + if encryption_key is None: + encryption_key = encryption_key_file.read() + + # encrypt the folder + encrypted_files = encrypt_folder(input_folder, output_folder, encryption_key, encryption_asymmetric=encryption_asymmetric) + print(f"Encryption completed: {encrypted_files} files encrypted on {output_folder}") + sys.exit(0) + + +@blueprint.cli.command('decrypt') +@click.argument("input_file", metavar="input", type=click.File("rb")) +@click.option("-o", "--out", type=click.File("wb"), default="", show_default=True, help="Output file") +@encryption_key_option +@encryption_key_file_option +@encryption_asymmetric_option +def decrypt_cmd(input_file, out, encryption_key, encryption_key_file, encryption_asymmetric): + """Decrypt a file""" + try: + # log the parameters + logger.debug(f"Input file: {input_file.name}") + logger.debug(f"Output file: {out.name}") + logger.debug(f"Encryption key: {encryption_key}") + logger.debug(f"Encryption key file: {encryption_key_file.name}") + + # check if the key or key file are not set + if encryption_key is None and encryption_key_file is None: + print("ERROR: Key or key file should be set") + sys.exit(1) + + # check if the output file already exists + if os.path.exists(out.name): + print("Output file '%s' already exists" % os.path.abspath(out.name)) + sys.exit(1) + # initialize the output file + if out.name == "": + out.name = "%s" % os.path.abspath(input_file.name).removesuffix(".enc") + # read the encryption key from the file if the key is not provided + if encryption_key is None: + encryption_key = encryption_key_file.read() + # decrypt the file + decrypt_file(input_file, out, encryption_key, encryption_asymmetric=encryption_asymmetric) + logger.debug(f"File decrypted: {out.name}") + print(f"File decrypted: {out.name}") + sys.exit(0) + except Exception as e: + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + else: + logger.error(f"Error decrypting file: {e}") + sys.exit(1) + + +@blueprint.cli.command('decrypt-folder') +@click.argument("input_folder", type=click.Path(exists=True)) +@click.option("-o", "--output_folder", type=click.Path(exists=False), default="", show_default=True, help="Output file") +@encryption_key_option +@encryption_key_file_option +@encryption_asymmetric_option +def decrypt_folder_cmd(input_folder, output_folder, + encryption_key, encryption_key_file, encryption_asymmetric): + + # log the parameters + logger.debug(f"Input folder: {input_folder}") + logger.debug(f"Output file: {output_folder}") + logger.debug(f"Encryption key: {encryption_key}") + logger.debug(f"Encryption key file: {encryption_key_file.name}") + + # check if the key or key file are not set + if encryption_key is None and encryption_key_file is None: + print("ERROR: Key or key file should be set") + sys.exit(1) + + # init the output folder + if output_folder == "": + output_folder = input_folder + logger.debug(f"Using Output folder: {output_folder}") + + # read the encryption key from the file if the key is not provided + if encryption_key is None: + encryption_key = encryption_key_file.read() + + # decrypt the folder + decrypted_files = decrypt_folder(input_folder, output_folder, + encryption_key, asymmetric_encryption=encryption_asymmetric) + print(f"Decryption completed: {decrypted_files} files decrypted on {output_folder}") + sys.exit(0) diff --git a/lifemonitor/config.py b/lifemonitor/config.py index e2469f9f9..069cf4987 100644 --- a/lifemonitor/config.py +++ b/lifemonitor/config.py @@ -130,6 +130,8 @@ class BaseConfig: SERVICE_AVAILABILITY_TIMEOUT = 1 # Cookie Settings SESSION_COOKIE_NAME = 'lifemonitor_session' + # Disable Maintenance Mode by default + MAINTENANCE_MODE = False class DevelopmentConfig(BaseConfig): @@ -149,6 +151,13 @@ class ProductionConfig(BaseConfig): CACHE_TYPE = "flask_caching.backends.rediscache.RedisCache" +class MaintenanceConfig(BaseConfig): + CONFIG_NAME = "maintenance" + TESTING = False + CACHE_TYPE = "flask_caching.backends.rediscache.RedisCache" + MAINTENANCE_MODE = True + + class TestingConfig(BaseConfig): CONFIG_NAME = "testing" SETTINGS_FILE = "tests/settings.conf" @@ -178,7 +187,8 @@ class TestingSupportConfig(TestingConfig): DevelopmentConfig, TestingConfig, ProductionConfig, - TestingSupportConfig + TestingSupportConfig, + MaintenanceConfig ] _config_by_name = {cfg.CONFIG_NAME: cfg for cfg in _EXPORT_CONFIGS} @@ -293,6 +303,12 @@ def configure_logging(app): if level_value == logging.DEBUG: log_format = f'[{COLOR_SEQ % (90)}%(asctime)s{RESET_SEQ}] %(levelname)s in %(module)s::%(funcName)s @ line: %(lineno)s: {COLOR_SEQ % (90)}%(message)s{RESET_SEQ}' + # configure and initialize log_path + log_file_path = app.config.get('LOG_FILE_PATH', '/var/log/lm') + if not os.path.exists(log_file_path): + os.makedirs(log_file_path, exist_ok=True) + + # configure logging dictConfig({ 'version': 1, 'formatters': {'default': { @@ -305,24 +321,34 @@ def configure_logging(app): # 'param': '', } }, - 'handlers': {'wsgi': { - 'class': 'logging.StreamHandler', - 'stream': 'ext://flask.logging.wsgi_errors_stream', - 'formatter': 'default', - 'filters': ['myfilter'] - }}, + 'handlers': { + 'wsgi': { + 'class': 'logging.StreamHandler', + 'stream': 'ext://flask.logging.wsgi_errors_stream', + 'formatter': 'default', + 'filters': ['myfilter'] + }, + 'rotating_to_file': { + 'level': logging.INFO, + 'class': "logging.handlers.RotatingFileHandler", + 'formatter': 'default', + "filename": os.path.join(log_file_path, 'lifemonitor.log'), + "maxBytes": 10485760, + "backupCount": 10, + }, + }, 'response': { 'level': logging.INFO, - 'handlers': ['wsgi'], + 'handlers': ['wsgi', 'rotating_to_file'], }, 'root': { 'level': level_value, - 'handlers': ['wsgi'] + 'handlers': ['wsgi', 'rotating_to_file'] }, # Lower the log level for the github.Requester object -- else it'll flood us with messages 'Requester': { 'level': logging.ERROR, - 'handlers': ['wsgi'] + 'handlers': ['wsgi', 'rotating_to_file'] }, 'disable_existing_loggers': False, }) diff --git a/lifemonitor/errors.py b/lifemonitor/errors.py index f01e2aae8..234915289 100644 --- a/lifemonitor/errors.py +++ b/lifemonitor/errors.py @@ -83,6 +83,22 @@ def handle_404(e: Exception = None): ) +@blueprint.route("/405") +def handle_405(e: Exception = None): + resource = request.args.get("resource", None, type=str) + logger.debug(f"Method not allowed for resource {resource}") + return handle_error( + { + "title": "LifeMonitor: Method not allowed", + "code": "404", + "description": str(e) + if e and logger.isEnabledFor(logging.DEBUG) + else "Method not allowed for this resource", + "resource": resource, + } + ) + + @blueprint.route("/429") def handle_429(e: Exception = None): return handle_error( diff --git a/lifemonitor/metrics/__init__.py b/lifemonitor/metrics/__init__.py index 238bb8022..828f1a8c2 100644 --- a/lifemonitor/metrics/__init__.py +++ b/lifemonitor/metrics/__init__.py @@ -60,7 +60,7 @@ def init_metrics(app, prom_registry=None): else: logger.warning("Unable to start multiprocess prometheus exporter: 'PROMETHEUS_MULTIPROC_DIR' not set." f"Metrics will be exposed through the `{__METRICS_ENDPOINT__}` endpoint.") - logger.warning("Configured class for metrics: %r", metrics_class) + logger.info("Configured class for metrics: %r", metrics_class) # init metrics metrics = metrics_class(app, defaults_prefix=model.PREFIX, registry=prom_registry) diff --git a/lifemonitor/static/img/icons/maintenance-1.png b/lifemonitor/static/img/icons/maintenance-1.png new file mode 100644 index 000000000..485af25cc Binary files /dev/null and b/lifemonitor/static/img/icons/maintenance-1.png differ diff --git a/lifemonitor/static/img/icons/maintenance-2.png b/lifemonitor/static/img/icons/maintenance-2.png new file mode 100644 index 000000000..485af25cc Binary files /dev/null and b/lifemonitor/static/img/icons/maintenance-2.png differ diff --git a/lifemonitor/static/src/package.json b/lifemonitor/static/src/package.json index 856f2e8a4..d7702bce1 100644 --- a/lifemonitor/static/src/package.json +++ b/lifemonitor/static/src/package.json @@ -1,7 +1,7 @@ { "name": "lifemonitor", "description": "Workflow Testing Service", - "version": "0.11.7", + "version": "0.11.8", "license": "MIT", "author": "CRS4", "main": "../dist/js/lifemonitor.min.js", diff --git a/lifemonitor/tasks/scheduler.py b/lifemonitor/tasks/scheduler.py index d59d35971..e986b538a 100644 --- a/lifemonitor/tasks/scheduler.py +++ b/lifemonitor/tasks/scheduler.py @@ -29,7 +29,7 @@ def _on_event(self, event: events.JobEvent): logger.debug("List of current jobs: %r", self.get_jobs()) logger.debug("List of deferred jobs: %r", self._not_scheduled_jobs) if event.code in [events.EVENT_JOB_EXECUTED, events.EVENT_JOB_ERROR]: - logger.warning("List of current jobs: %r", self.get_jobs()) + logger.debug("List of current jobs: %r", self.get_jobs()) @staticmethod def __enqueue_dramatiq_job__(**message): diff --git a/lifemonitor/tasks/worker.py b/lifemonitor/tasks/worker.py index e8e5fb638..8888c20ca 100644 --- a/lifemonitor/tasks/worker.py +++ b/lifemonitor/tasks/worker.py @@ -5,9 +5,13 @@ logger = logging.getLogger(__name__) - app = create_app(worker=True, load_jobs=True) app.app_context().push() - -broker = app.broker +# check if the app is in maintenance mode +if app.config.get("MAINTENANCE_MODE", False): + logger.warning("Application is in maintenance mode") + app.run() +else: + # initialise the message broker + broker = app.broker diff --git a/lifemonitor/templates/maintenance/base.j2 b/lifemonitor/templates/maintenance/base.j2 new file mode 100644 index 000000000..8f2229b98 --- /dev/null +++ b/lifemonitor/templates/maintenance/base.j2 @@ -0,0 +1,58 @@ + + + + + {% block title %}Life Monitor{% endblock %} + + + + {% block stylesheets %} + + + + + + + + + + + + + + + + + + + + + + {% endblock stylesheets %} + + {% block extra_stylesheets %} {%endblock extra_stylesheets %} + + + + {% block body %}{% endblock %} + + + {% block javascripts_libraries %} + + + + + + + + + {# Enable notifications #} + {{ macros.messages() }} + + {% endblock javascripts_libraries %} + + {% block javascripts %} {% endblock javascripts %} + + + diff --git a/lifemonitor/templates/maintenance/maintenance.j2 b/lifemonitor/templates/maintenance/maintenance.j2 new file mode 100644 index 000000000..f460b5867 --- /dev/null +++ b/lifemonitor/templates/maintenance/maintenance.j2 @@ -0,0 +1,59 @@ +{% extends 'maintenance/base.j2' %} +{% import 'macros.j2' as macros %} + + +{% block title %} {{ title }} {% endblock title %} + + +{% block extra_stylesheets %} {% endblock extra_stylesheets %} + + +{% block body_class %} error-page {% endblock %} + + +{% block body %} + +
+
+ {{ macros.render_logo(style="width: 200px; margin: auto;") }} +
+

+ We'll be back soon! +

+
+ +
+ + {% if not main_message %} + +

+ We're busy updating the + Life-Monitor + service for you.
Please check back soon! +

+ {% else %} +

+ {{ main_message }} +

+ {% endif %} + + + {% if secondary_message %} +

+ {{ secondary_message }} +

+ {% endif %} + + + + + +
+ +{% endblock body %} + + +{% block javascripts %} +{% endblock javascripts %} \ No newline at end of file diff --git a/lifemonitor/utils.py b/lifemonitor/utils.py index a912ce020..b19438fa0 100644 --- a/lifemonitor/utils.py +++ b/lifemonitor/utils.py @@ -34,6 +34,7 @@ import shutil import socket import string +import struct import subprocess import tempfile import time @@ -43,7 +44,8 @@ from datetime import datetime, timezone from importlib import import_module from os.path import basename, dirname, isfile, join -from typing import Dict, Iterable, List, Literal, Optional, Tuple, Type +from typing import (BinaryIO, Dict, Iterable, List, Literal, Optional, Tuple, + Type) from urllib.parse import urlparse import flask @@ -53,6 +55,9 @@ import pygit2 import requests import yaml +from cryptography.fernet import Fernet +from cryptography.hazmat.primitives import hashes, serialization +from cryptography.hazmat.primitives.asymmetric import padding, rsa from dateutil import parser from lifemonitor.cache import cached @@ -1248,3 +1253,257 @@ def rm_tree(self, path): self.ftp.rmd(path) except ftplib.all_errors as e: logger.debug('Could not remove {0}: {1}'.format(path, e)) + + +def generate_symmetric_encryption_key() -> bytes: + """Generate a new encryption key""" + key = None + try: + key = Fernet.generate_key() + logger.debug("Encryption key generated: %r", key) + except Exception as e: + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + return key + + +def generate_asymmetric_encryption_keys( + key_filename: str = "lifemonitor.key", + public_exponent=65537, key_size=2048) -> Tuple[rsa.RSAPrivateKey, rsa.RSAPublicKey]: + + # Generate the RSA private key + private_key = rsa.generate_private_key( + public_exponent=public_exponent, + key_size=key_size, + ) + + # Write the private key to a file + with open(f"{key_filename}", "wb") as key_file: + key_file.write( + private_key.private_bytes( + encoding=serialization.Encoding.PEM, + format=serialization.PrivateFormat.PKCS8, + encryption_algorithm=serialization.NoEncryption() + ) + ) + + # Extract the corresponding public key + public_key = private_key.public_key() + + # Write the public key to a file + with open(f"{key_filename}.pub", "wb") as key_file: + key_file.write( + public_key.public_bytes( + encoding=serialization.Encoding.PEM, + format=serialization.PublicFormat.SubjectPublicKeyInfo + ) + ) + + return private_key, public_key + + +def encrypt_file(input_file: BinaryIO, output_file: BinaryIO, key: bytes, + encryption_asymmetric: bool = False, + raise_error: bool = True, block=65536) -> bool: + """Encrypt a file using AES-256-CBC""" + # check if input and output are valid + if not input_file or not output_file: + raise ValueError("Invalid input/output file") + # check if the input file exists + if not os.path.exists(input_file.name): + raise ValueError(f"Input file {input_file.name} does not exist") + # check if the key is valid + if not key: + raise ValueError("Invalid encryption key") + try: + logger.warning("Encryption asymmetric: %r", encryption_asymmetric) + # encrypt the file chunk by chunk + # using a symmetric encryption algorithm + if not encryption_asymmetric: + cipher = Fernet(key) + while True: + chunk = input_file.read(block) + if not chunk or len(chunk) == 0: + break + enc = cipher.encrypt(chunk) + output_file.write(struct.pack(' bool: + + # check if the input folder exists + if not os.path.exists(input_folder): + raise ValueError(f"Input folder {input_folder} does not exist") + + # check if the key is valid + if not key: + raise ValueError("Invalid encryption key") + + # initialize the counter + count = 0 + try: + # walk on the input folder + for root, dirs, files in os.walk(input_folder): + for file in files: + input_file = os.path.join(root, file) + logger.debug(f"Input file: {input_file}") + file_output_folder = root.replace(input_folder, output_folder) + logger.debug(f"File output folder: {file_output_folder}") + if not os.path.exists(file_output_folder): + os.makedirs(file_output_folder, exist_ok=True) + logger.debug(f"Created folder: {file_output_folder}") + output_file = f"{os.path.join(file_output_folder, file)}.enc" + logger.debug(f"Encrypting file: {input_file}") + logger.debug(f"Output file: {output_file}") + with open(input_file, "rb") as f: + with open(output_file, "wb") as o: + encrypt_file(f, o, key, raise_error=raise_error, block=block, + encryption_asymmetric=encryption_asymmetric) + logger.debug(f"File encrypted: {output_file}") + print(f"File encrypted: {output_file}") + count += 1 + logger.debug(f"File encrypted: {count}") + logger.debug(f"Encryption completed: {count} files encrypted on {output_folder}") + except Exception as e: + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + if raise_error: + raise lm_exceptions.LifeMonitorException(detail=str(e)) + return count + + +def decrypt_file(input_file: BinaryIO, output_file: BinaryIO, key: bytes, + encryption_asymmetric: bool = False, block=65536, + raise_error: bool = True) -> bool: + """Decrypt a file using AES-256-CBC""" + # check if input and output are valid + if not input_file or not output_file: + raise ValueError("Invalid input/output file") + # check if the input file exists + if not os.path.exists(input_file.name): + raise ValueError(f"Input file {input_file.name} does not exist") + # check if the key is valid + if not key: + raise ValueError("Invalid encryption key") + try: + # decrypt the file chunk by chunk + # using a symmetric encryption algorithm + if not encryption_asymmetric: + cipher = Fernet(key) + while True: + size_data = input_file.read(4) + if len(size_data) == 0: + break + chunk = input_file.read(struct.unpack(' int: + + # check if the input folder exists + if not os.path.exists(input_folder): + raise ValueError(f"Input folder {input_folder} does not exist") + + # check if the key is valid + if not key: + raise ValueError("Invalid encryption key") + + # walk on the input folder + count = 0 + try: + for root, dirs, files in os.walk(input_folder): + for file in files: + input_file = os.path.join(root, file) + file_output_folder = root.replace(input_folder, output_folder) + logger.debug(f"File output folder: {file_output_folder}") + if not os.path.exists(file_output_folder): + os.makedirs(file_output_folder, exist_ok=True) + logger.debug(f"Created folder: {file_output_folder}") + output_file = f"{os.path.join(file_output_folder, file).removesuffix('.enc')}" + logger.debug(f"Decrypting file: {input_file}") + logger.debug(f"Output file: {output_file}") + with open(input_file, "rb") as f: + with open(output_file, "wb") as o: + decrypt_file(f, o, key, raise_error=raise_error, + encryption_asymmetric=asymmetric_encryption) + logger.debug(f"File decrypted: {output_file}") + print(f"File decrypted: {output_file}") + count += 1 + logger.debug(f"File decrypted: {count}") + logger.debug(f"Decryption completed: {count} files decrypted on {output_folder}") + except Exception as e: + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + if raise_error: + raise lm_exceptions.LifeMonitorException(detail=str(e)) + return count diff --git a/requirements.txt b/requirements.txt index 922726e29..38792ea0e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -51,7 +51,7 @@ SQLAlchemy==1.4.41 wheel~=0.40.0 Werkzeug~=2.2.3 repo2rocrate~=0.1.2 -urllib3==1.26.17 +urllib3==1.26.18 flask-socketio==5.3.4 gevent-websocket==0.10.1 eventlet==0.33.3 diff --git a/settings.conf b/settings.conf index 5407e8a07..6953728f9 100644 --- a/settings.conf +++ b/settings.conf @@ -4,6 +4,14 @@ FLASK_ENV=development # Set the LOG_LEVEL LOG_LEVEL=INFO # default: 'INFO' on production, 'DEBUG' on development +# Set the path for the log file +# LOG_FILE_PATH=/var/log/lm # default: /var/log/lm + +# Manage the maintenance mode +# MAINTENANCE_MODE=True # default: False +# MAINTENANCE_MODE_MAIN_MESSAGE="We're busy updating the Life-Monitor service for you.Please check back soon!" +# MAINTENANCE_MODE_SECONDARY_MESSAGE="We are currently performing maintenance on the LifeMonitor service. Please try again later." + # The name and port number of the back-end server (e.g., 'localhost:8000'). # If the back-end is served through a reverse proxy, # then you have to set SERVER_NAME to the appropriate proxy entry @@ -91,6 +99,7 @@ CACHE_WORKFLOW_TIMEOUT=1800 # Backup settings BACKUP_LOCAL_PATH="./backups" BACKUP_RETAIN_DAYS=30 +# BACKUP_ENCRYPTION_KEY_PATH= # BACKUP_REMOTE_PATH="lm-backups" # BACKUP_REMOTE_HOST="ftp-site.domain.it" # BACKUP_REMOTE_USER="lm" diff --git a/specs/api.yaml b/specs/api.yaml index 78dc09749..00719c62a 100644 --- a/specs/api.yaml +++ b/specs/api.yaml @@ -3,7 +3,7 @@ openapi: "3.0.0" info: - version: "0.11.7" + version: "0.11.8" title: "Life Monitor API" description: | *Workflow sustainability service* @@ -18,7 +18,7 @@ info: servers: - url: / description: > - Version 0.11.7 of API. + Version 0.11.8 of API. tags: - name: GitHub Integration diff --git a/utils/grafana/lifemonitor-backend.dashboard.json b/utils/grafana/lifemonitor-backend.dashboard.json new file mode 100644 index 000000000..fb0d6198f --- /dev/null +++ b/utils/grafana/lifemonitor-backend.dashboard.json @@ -0,0 +1,1580 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "panel", + "id": "geomap", + "name": "Geomap", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "10.1.1" + }, + { + "type": "panel", + "id": "piechart", + "name": "Pie chart", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "Dashboard to monitor to the LifeMonitor Back-end", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 53, + "panels": [], + "title": "Direct API Back-end Connections and Requests", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-BlYlRd" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 52, + "options": { + "basemap": { + "config": {}, + "name": "Layer 0", + "type": "default" + }, + "controls": { + "mouseWheelZoom": true, + "showAttribution": true, + "showDebug": true, + "showMeasure": false, + "showScale": true, + "showZoom": true + }, + "layers": [ + { + "config": { + "blur": 15, + "radius": 5, + "weight": { + "fixed": 1, + "max": 1, + "min": 0 + } + }, + "filterData": { + "id": "byRefId", + "options": "A" + }, + "name": "Requests", + "tooltip": true, + "type": "heatmap" + } + ], + "tooltip": { + "mode": "details" + }, + "view": { + "allLayers": true, + "id": "europe", + "lat": 46, + "lon": 14, + "zoom": 4 + } + }, + "pluginVersion": "10.1.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": true, + "expr": "sum by(country_long, country_short, city, latitude, longitude) (lifemonitor_api_proxy_http_connection_requests{environment=\"$environment\", request!=\"/metrics\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "requests", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Requests Location", + "transformations": [ + { + "id": "reduce", + "options": { + "includeTimeField": false, + "labelsToFields": true, + "mode": "seriesToRows", + "reducers": [ + "last" + ] + } + } + ], + "type": "geomap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 11 + }, + "id": 55, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.1.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "lifemonitor_api_proxy_http_connection_status{environment=\"$environment\", state=\"active\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Active Http Connections", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 8, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 18, + "x": 6, + "y": 11 + }, + "id": 49, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.1.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "lifemonitor_api_proxy_http_connection_status{environment=\"$environment\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{state}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Number of HTTP Connections by state", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 18 + }, + "id": 56, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "diff" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.1.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum(lifemonitor_api_proxy_http_connection_requests{environment=\"$environment\", upstream_address!=\"-\"})", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Upstream Connection Requests", + "transformations": [], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Number of requests By Country", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 18 + }, + "id": 47, + "options": { + "displayLabels": [], + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true, + "values": [ + "percent" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "range" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.1.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(country_long) (lifemonitor_api_proxy_http_connection_requests{environment=\"$environment\", upstream_address!=\"-\"})", + "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{country_long}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Requests by Country", + "transformations": [], + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Percentage of request errors by type", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "failure" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 18 + }, + "id": 57, + "options": { + "displayLabels": [], + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true, + "values": [ + "percent" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "range" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.1.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(status) (lifemonitor_api_proxy_http_connection_requests{environment=\"$environment\", status=~\"2.*\", upstream_address!=\"-\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "success", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by() (lifemonitor_api_proxy_http_requests_error_total{environment=\"$environment\", upstream_address!=\"-\", status!~\"2.+\"})", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "failure", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Requests Success/Failure %", + "transformations": [], + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Percentage of request errors by type", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 18 + }, + "id": 51, + "options": { + "displayLabels": [], + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true, + "values": [ + "percent" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "range" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.1.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(status) (lifemonitor_api_proxy_http_requests_error_total{environment=\"$environment\", upstream_address!=\"-\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "% Request Errors", + "transformations": [], + "type": "piechart" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 25 + }, + "id": 46, + "panels": [], + "title": "Performance of API Back-end Requests", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "HTTP 500" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#bf1b00", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 26 + }, + "id": 13, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.1.0", + "targets": [ + { + "$$hashKey": "object:140", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": true, + "expr": "increase(lifemonitor_api_http_request_total{environment=\"$environment\"}[1m])", + "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "HTTP {{ status }}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Total requests per minute", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 14, + "w": 24, + "x": 0, + "y": 34 + }, + "id": 2, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.1.0", + "targets": [ + { + "$$hashKey": "object:214", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": true, + "expr": "rate(lifemonitor_api_http_request_duration_seconds_count{status=\"200\", environment=\"$environment\"}[30s])", + "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ path }}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Requests per second", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "errors" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#c15c17", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 48 + }, + "id": 4, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.1.0", + "targets": [ + { + "$$hashKey": "object:766", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": true, + "expr": "sum(rate(lifemonitor_api_http_request_duration_seconds_count{status!=\"200\", environment=\"$environment\"}[30s]))", + "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "errors", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Errors per second", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 55 + }, + "id": 6, + "links": [], + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.1.0", + "targets": [ + { + "$$hashKey": "object:146", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": true, + "expr": "rate(lifemonitor_api_http_request_duration_seconds_sum{status=\"200\", environment=\"$environment\"}[30s]) / rate(lifemonitor_api_http_request_duration_seconds_count{status=\"200\", environment=\"$environment\"}[30s])", + "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ path }}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Average response time [30s]", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 24, + "x": 0, + "y": 64 + }, + "id": 11, + "links": [], + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.1.0", + "targets": [ + { + "$$hashKey": "object:1079", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": true, + "expr": "increase(lifemonitor_api_http_request_duration_seconds_bucket{status=\"200\", le=\"0.25\", environment=\"$environment\"}[30s]) / ignoring(le) increase(lifemonitor_api_http_request_duration_seconds_count{status=\"200\", environment=\"$environment\"}[30s])", + "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ path }}", + "refId": "A", + "useBackend": false + } + ], + "title": "Requests under 250ms", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 24, + "x": 0, + "y": 75 + }, + "id": 15, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.1.0", + "targets": [ + { + "$$hashKey": "object:426", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": true, + "expr": "histogram_quantile(0.5, rate(lifemonitor_api_http_request_duration_seconds_bucket{status=\"200\", environment=\"$environment\"}[30s]))", + "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ path }}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Request duration [s] - p50", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 86 + }, + "id": 16, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.1.0", + "targets": [ + { + "$$hashKey": "object:426", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": true, + "expr": "histogram_quantile(0.9, rate(lifemonitor_api_http_request_duration_seconds_bucket{status=\"200\", environment=\"$environment\"}[30s]))", + "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ path }}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Request duration [s] - p90", + "type": "timeseries" + } + ], + "refresh": "5s", + "schemaVersion": 38, + "style": "dark", + "tags": [ + "monitoring", + "lifemonitor", + "back-end" + ], + "templating": { + "list": [ + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(environment)", + "hide": 0, + "includeAll": false, + "label": "Environment", + "multi": true, + "name": "environment", + "options": [], + "query": { + "query": "label_values(environment)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "3s" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "Europe/Rome", + "title": "LifeMonitor Dashboard: Back-end", + "uid": "f76cf34a-7e8c-4f49-89e5-d9e616517125", + "version": 10, + "weekStart": "" +} \ No newline at end of file diff --git a/utils/grafana/LifeMonitorDashboard.json b/utils/grafana/lifemonitor-frontend.dashboard.json similarity index 64% rename from utils/grafana/LifeMonitorDashboard.json rename to utils/grafana/lifemonitor-frontend.dashboard.json index 976656905..55a5cd41e 100644 --- a/utils/grafana/LifeMonitorDashboard.json +++ b/utils/grafana/lifemonitor-frontend.dashboard.json @@ -11,12 +11,6 @@ ], "__elements": {}, "__requires": [ - { - "type": "panel", - "id": "gauge", - "name": "Gauge", - "version": "" - }, { "type": "panel", "id": "geomap", @@ -76,6 +70,7 @@ } ] }, + "description": "Dashboard to monitor to the LifeMonitor Back-end", "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, @@ -84,16 +79,14 @@ "liveNow": false, "panels": [ { - "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, - "id": 22, - "panels": [], - "title": "LifeMonitor Objects", + "id": 60, + "title": "Active WebApp Clients", "type": "row" }, { @@ -101,15 +94,11 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "description": "Number of users registered on this LifeMonitor instance", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, - "custom": { - "neutral": 38 - }, "mappings": [], "thresholds": { "mode": "absolute", @@ -117,6 +106,10 @@ { "color": "green", "value": null + }, + { + "color": "red", + "value": 80 } ] } @@ -124,13 +117,16 @@ "overrides": [] }, "gridPos": { - "h": 6, - "w": 4, + "h": 9, + "w": 5, "x": 0, "y": 1 }, - "id": 18, + "id": 59, "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ @@ -139,8 +135,7 @@ "fields": "", "values": false }, - "showThresholdLabels": false, - "showThresholdMarkers": true + "textMode": "auto" }, "pluginVersion": "10.1.1", "targets": [ @@ -149,27 +144,38 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "disableTextWrap": false, "editorMode": "builder", - "expr": "sum(lifemonitor_api_users)", + "expr": "sum(lifemonitor_webapp_proxy_http_websocket_connections_total{environment=\"$environment\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, "legendFormat": "__auto", "range": true, - "refId": "A" + "refId": "A", + "useBackend": false } ], - "title": "Users", - "type": "gauge" + "title": "Active WebApp Clients", + "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "description": "Number of workflows registered on this LifeMonitor instance", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, "mappings": [], "thresholds": { "mode": "absolute", @@ -177,6 +183,10 @@ { "color": "green", "value": null + }, + { + "color": "red", + "value": 80 } ] } @@ -184,23 +194,56 @@ "overrides": [] }, "gridPos": { - "h": 6, - "w": 4, - "x": 4, + "h": 9, + "w": 19, + "x": 5, "y": 1 }, - "id": 20, + "id": 58, "options": { - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "basemap": { + "config": {}, + "name": "Layer 0", + "type": "default" + }, + "controls": { + "mouseWheelZoom": true, + "showAttribution": true, + "showDebug": false, + "showMeasure": false, + "showScale": false, + "showZoom": true + }, + "layers": [ + { + "config": { + "blur": 15, + "radius": 5, + "weight": { + "fixed": 1, + "max": 1, + "min": 0 + } + }, + "filterData": { + "id": "byRefId", + "options": "A" + }, + "name": "Layer 1", + "tooltip": true, + "type": "heatmap" + } + ], + "tooltip": { + "mode": "details" }, - "showThresholdLabels": false, - "showThresholdMarkers": true + "view": { + "allLayers": true, + "id": "europe", + "lat": 46, + "lon": 14, + "zoom": 4 + } }, "pluginVersion": "10.1.1", "targets": [ @@ -209,122 +252,127 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "disableTextWrap": false, "editorMode": "builder", - "expr": "sum(lifemonitor_api_workflows)", + "expr": "lifemonitor_webapp_proxy_http_websocket_connections_total{environment=\"$environment\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, "legendFormat": "__auto", "range": true, - "refId": "A" + "refId": "A", + "useBackend": false } ], - "title": "Workflows", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "description": "Number of workflow versions registered on this LifeMonitor instance", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } + "title": "Localisation of Active WebApp Clients", + "transformations": [ + { + "id": "reduce", + "options": { + "labelsToFields": true, + "reducers": [ + "lastNotNull" ] } - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 8, - "y": 1 - }, - "id": 24, - "options": { - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true - }, - "pluginVersion": "10.1.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "builder", - "expr": "sum(lifemonitor_api_workflow_versions)", - "legendFormat": "__auto", - "range": true, - "refId": "A" } ], - "title": "Workflow Versions", - "type": "gauge" + "type": "geomap" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 53, + "panels": [], + "title": "WebApp Connections and Requests", + "type": "row" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "description": "Number of Workflow registries registered on the LifeMonitor instance", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "mode": "continuous-BlYlRd" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", + "color": "text", "value": null - }, - { - "color": "red", - "value": 80 } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 6, - "w": 4, - "x": 12, - "y": 1 + "h": 10, + "w": 24, + "x": 0, + "y": 11 }, - "id": 26, + "id": 52, "options": { - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "basemap": { + "config": {}, + "name": "Layer 0", + "type": "default" + }, + "controls": { + "mouseWheelZoom": true, + "showAttribution": true, + "showDebug": true, + "showMeasure": false, + "showScale": true, + "showZoom": true + }, + "layers": [ + { + "config": { + "blur": 15, + "radius": 5, + "weight": { + "fixed": 1, + "max": 1, + "min": 0 + } + }, + "filterData": { + "id": "byRefId", + "options": "A" + }, + "name": "Requests", + "tooltip": true, + "type": "heatmap" + } + ], + "tooltip": { + "mode": "details" }, - "showThresholdLabels": false, - "showThresholdMarkers": true + "view": { + "allLayers": true, + "id": "europe", + "lat": 46, + "lon": 14, + "zoom": 4 + } }, "pluginVersion": "10.1.1", "targets": [ @@ -333,22 +381,40 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "disableTextWrap": false, "editorMode": "builder", - "expr": "sum(lifemonitor_api_workflow_registries)", - "legendFormat": "__auto", + "exemplar": true, + "expr": "sum by(country_long, country_short, city, latitude, longitude) (lifemonitor_webapp_proxy_http_connection_requests{environment=\"$environment\", request!=\"/metrics\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "requests", "range": true, - "refId": "A" + "refId": "A", + "useBackend": false + } + ], + "title": "Requests Location", + "transformations": [ + { + "id": "reduce", + "options": { + "includeTimeField": false, + "labelsToFields": true, + "mode": "seriesToRows", + "reducers": [ + "last" + ] + } } ], - "title": "Workflow Registries", - "type": "gauge" + "type": "geomap" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "description": "Number of workflow suites registered on the LifeMonitor instance", "fieldConfig": { "defaults": { "color": { @@ -368,13 +434,16 @@ "overrides": [] }, "gridPos": { - "h": 6, - "w": 4, - "x": 16, - "y": 1 + "h": 7, + "w": 6, + "x": 0, + "y": 21 }, - "id": 28, + "id": 55, "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ @@ -383,8 +452,7 @@ "fields": "", "values": false }, - "showThresholdLabels": false, - "showThresholdMarkers": true + "textMode": "auto" }, "pluginVersion": "10.1.1", "targets": [ @@ -393,681 +461,21 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "disableTextWrap": false, "editorMode": "builder", - "expr": "sum(lifemonitor_api_workflow_suites)", + "expr": "sum(lifemonitor_webapp_proxy_http_connection_status{environment=\"$environment\", state=\"active\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, "legendFormat": "__auto", "range": true, - "refId": "A" + "refId": "A", + "useBackend": false } ], - "title": "Workflow Suites", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "description": "Number of workflow test instances registered on the LifeMonitor instance", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 20, - "y": 1 - }, - "id": 30, - "options": { - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true - }, - "pluginVersion": "10.1.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "builder", - "expr": "sum(lifemonitor_api_workflow_test_instances)", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Workflow Test Instances", - "type": "gauge" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 7 - }, - "id": 45, - "panels": [], - "title": "LifeMonitor WebApp Active Clients", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "description": "Number of Current Users on the Web App", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 5, - "x": 0, - "y": 8 - }, - "id": 36, - "options": { - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true - }, - "pluginVersion": "10.1.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "sum(lifemonitor_webapp_http_websocket_connections_total)", - "fullMetaSearch": false, - "includeNullMetadata": true, - "legendFormat": "__auto", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "Active Web App Clients", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-BlYlRd" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "text", - "value": null - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 19, - "x": 5, - "y": 8 - }, - "id": 52, - "options": { - "basemap": { - "config": {}, - "name": "Layer 0", - "type": "default" - }, - "controls": { - "mouseWheelZoom": true, - "showAttribution": true, - "showDebug": true, - "showMeasure": false, - "showScale": true, - "showZoom": true - }, - "layers": [ - { - "config": { - "showLegend": true, - "style": { - "color": { - "field": "Last", - "fixed": "dark-green" - }, - "opacity": 0.4, - "rotation": { - "field": "Last", - "fixed": 0, - "max": 360, - "min": -360, - "mode": "mod" - }, - "size": { - "field": "Total", - "fixed": 5, - "max": 15, - "min": 2 - }, - "symbol": { - "fixed": "img/icons/marker/circle.svg", - "mode": "fixed" - }, - "text": { - "field": "Last", - "fixed": "", - "mode": "field" - }, - "textConfig": { - "fontSize": 12, - "offsetX": 10, - "offsetY": 0, - "textAlign": "left", - "textBaseline": "middle" - } - } - }, - "filterData": { - "id": "byRefId", - "options": "A" - }, - "location": { - "latitude": "lifemonitor_api_proxy_http_requests_total", - "mode": "auto" - }, - "name": "Web Clients", - "tooltip": true, - "type": "markers" - } - ], - "tooltip": { - "mode": "details" - }, - "view": { - "allLayers": true, - "id": "zero", - "lat": 0, - "lon": 0, - "zoom": 1 - } - }, - "pluginVersion": "10.1.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "lifemonitor_webapp_http_websocket_connections_total", - "fullMetaSearch": false, - "includeNullMetadata": true, - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "Location of Active Web App Clients", - "transformations": [ - { - "id": "reduce", - "options": { - "includeTimeField": false, - "labelsToFields": true, - "mode": "seriesToRows", - "reducers": [ - "last" - ] - } - } - ], - "type": "geomap" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 16 - }, - "id": 44, - "panels": [], - "title": "Rate of LifeMonitor WebApp HTTP Connections to Back-end API", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "mappings": [] - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 7, - "x": 0, - "y": 17 - }, - "id": 38, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "pieType": "pie", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "9.3.6", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "sum by(client_country_long) (rate(lifemonitor_webapp_http_requests_total[30s]))", - "fullMetaSearch": false, - "includeNullMetadata": true, - "legendFormat": "__auto", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "Requests by Country", - "type": "piechart" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "status=200": { - "color": "purple", - "index": 0, - "text": "OK" - } - }, - "type": "value" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 5, - "x": 7, - "y": 17 - }, - "id": 39, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "10.1.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "rate(lifemonitor_webapp_http_connections{status=\"200\"}[30s])", - "fullMetaSearch": false, - "includeNullMetadata": true, - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "Status 200", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "yellow", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 4, - "x": 12, - "y": 17 - }, - "id": 40, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "10.1.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "sum(rate(lifemonitor_webapp_http_connections{status=~\"3[0-9][0-9]\"}[30s]))", - "fullMetaSearch": false, - "includeNullMetadata": true, - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "Status ~= 3**", - "transformations": [], - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 4, - "x": 16, - "y": 17 - }, - "id": 42, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "10.1.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "sum(rate(lifemonitor_webapp_http_connections{status=~\"4[0-9][0-9]\"}[30s]))", - "fullMetaSearch": false, - "includeNullMetadata": true, - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "Status ~= 4**", - "transformations": [], - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 4, - "x": 20, - "y": 17 - }, - "id": 43, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "10.1.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "sum(rate(lifemonitor_webapp_http_connections{status=~\"5[0-9][0-9]\"}[30s]))", - "fullMetaSearch": false, - "includeNullMetadata": true, - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "Status ~= 5**", - "transformations": [], + "title": "Active Http Connections", "type": "stat" }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 26 - }, - "id": 48, - "panels": [], - "title": "WebApp Http Connections to Back-end API by status", - "type": "row" - }, { "datasource": { "type": "prometheus", @@ -1076,14 +484,45 @@ "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 8, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { - "color": "#5f675e", + "color": "green", "value": null } ] @@ -1092,25 +531,23 @@ "overrides": [] }, "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 27 + "h": 7, + "w": 18, + "x": 6, + "y": 21 }, - "id": 41, + "id": 49, "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "textMode": "auto" + "tooltip": { + "mode": "single", + "sort": "none" + } }, "pluginVersion": "10.1.1", "targets": [ @@ -1121,37 +558,25 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "sum by(status) (lifemonitor_webapp_http_connections)", + "expr": "sum by(state) (lifemonitor_webapp_proxy_http_connection_status{environment=\"$environment\"})", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, - "legendFormat": "__auto", + "legendFormat": "{{state}}", "range": true, "refId": "A", "useBackend": false } ], - "title": "WebApp HTTP Connections 200 status", - "type": "stat" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 33 - }, - "id": 53, - "panels": [], - "title": "Direct API Back-end Requests", - "type": "row" + "title": "Number of HTTP Connections by state", + "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "description": "", "fieldConfig": { "defaults": { "color": { @@ -1164,6 +589,10 @@ { "color": "green", "value": null + }, + { + "color": "red", + "value": 80 } ] } @@ -1174,20 +603,22 @@ "h": 7, "w": 6, "x": 0, - "y": 34 + "y": 28 }, - "id": 49, + "id": 56, "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "diff" ], "fields": "", "values": false }, - "showThresholdLabels": false, - "showThresholdMarkers": true + "textMode": "auto" }, "pluginVersion": "10.1.1", "targets": [ @@ -1198,9 +629,9 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "increase(lifemonitor_api_proxy_http_active_connections_number[30s])", + "expr": "sum(lifemonitor_webapp_proxy_http_connection_requests{environment=\"$environment\", upstream_address!=\"-\"})", "fullMetaSearch": false, - "includeNullMetadata": true, + "includeNullMetadata": false, "instant": false, "legendFormat": "__auto", "range": true, @@ -1208,29 +639,30 @@ "useBackend": false } ], - "title": "Active HTTP Connections", - "type": "gauge" + "title": "Upstream Connection Requests", + "transformations": [], + "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "description": "Number of requests By Country", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } }, "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "yellow", - "value": null - } - ] - } + "unit": "none" }, "overrides": [] }, @@ -1238,20 +670,31 @@ "h": 7, "w": 6, "x": 6, - "y": 34 + "y": 28 }, - "id": 50, + "id": 47, "options": { - "orientation": "auto", + "displayLabels": [], + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true, + "values": [ + "percent" + ] + }, + "pieType": "pie", "reduceOptions": { "calcs": [ - "lastNotNull" + "range" ], "fields": "", "values": false }, - "showThresholdLabels": false, - "showThresholdMarkers": true + "tooltip": { + "mode": "single", + "sort": "none" + } }, "pluginVersion": "10.1.1", "targets": [ @@ -1262,25 +705,26 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "increase(lifemonitor_api_proxy_http_upstream_connections_total[30s])", + "expr": "sum by(country_long) (lifemonitor_webapp_proxy_http_connection_requests{environment=\"$environment\", upstream_address!=\"-\"})", + "format": "time_series", "fullMetaSearch": false, "includeNullMetadata": true, - "instant": false, - "legendFormat": "__auto", + "legendFormat": "{{country_long}}", "range": true, "refId": "A", "useBackend": false } ], - "title": "Active Upstream Connections", - "type": "gauge" + "title": "Requests by Country", + "transformations": [], + "type": "piechart" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "description": "Number of Active Clients By Country", + "description": "Percentage of request errors by type", "fieldConfig": { "defaults": { "color": { @@ -1295,25 +739,60 @@ }, "mappings": [] }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "failure" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] }, "gridPos": { "h": 7, "w": 6, "x": 12, - "y": 34 + "y": 28 }, - "id": 47, + "id": 57, "options": { + "displayLabels": [], "legend": { "displayMode": "list", "placement": "bottom", - "showLegend": true + "showLegend": true, + "values": [ + "percent" + ] }, "pieType": "pie", "reduceOptions": { "calcs": [ - "lastNotNull" + "range" ], "fields": "", "values": false @@ -1323,7 +802,7 @@ "sort": "none" } }, - "pluginVersion": "10.1.0", + "pluginVersion": "10.1.1", "targets": [ { "datasource": { @@ -1332,16 +811,34 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "sum by(client_country_long) (increase(lifemonitor_api_proxy_http_requests_total[30s]))", - "fullMetaSearch": false, + "expr": "sum by(status) (lifemonitor_webapp_proxy_http_connection_requests{environment=\"$environment\", status=~\"2.*\", upstream_address!=\"-\"})", + "fullMetaSearch": true, "includeNullMetadata": true, - "legendFormat": "__auto", + "legendFormat": "success", "range": true, "refId": "A", + "useBackend": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by() (lifemonitor_webapp_proxy_http_connection_requests{environment=\"$environment\", upstream_address!=\"-\", status!~\"2.+\"})", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "failure", + "range": true, + "refId": "B", "useBackend": false } ], - "title": "Active Requests by Country", + "title": "Requests Success/Failure %", + "transformations": [], "type": "piechart" }, { @@ -1349,7 +846,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "description": "Number of Active Clients By Country", + "description": "Percentage of request errors by type", "fieldConfig": { "defaults": { "color": { @@ -1370,19 +867,23 @@ "h": 7, "w": 6, "x": 18, - "y": 34 + "y": 28 }, "id": 51, "options": { + "displayLabels": [], "legend": { "displayMode": "list", "placement": "bottom", - "showLegend": true + "showLegend": true, + "values": [ + "percent" + ] }, "pieType": "pie", "reduceOptions": { "calcs": [ - "lastNotNull" + "range" ], "fields": "", "values": false @@ -1392,7 +893,7 @@ "sort": "none" } }, - "pluginVersion": "10.1.0", + "pluginVersion": "10.1.1", "targets": [ { "datasource": { @@ -1401,7 +902,7 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "sum by(client_country_long) (lifemonitor_api_proxy_http_requests_total)", + "expr": "sum by(status) (lifemonitor_webapp_proxy_http_connection_requests{environment=\"$environment\", upstream_address!=\"-\"})", "fullMetaSearch": false, "includeNullMetadata": true, "legendFormat": "__auto", @@ -1410,7 +911,8 @@ "useBackend": false } ], - "title": "# Requests by Country", + "title": "% Request Errors", + "transformations": [], "type": "piechart" }, { @@ -1419,11 +921,11 @@ "h": 1, "w": 24, "x": 0, - "y": 41 + "y": 35 }, "id": 46, "panels": [], - "title": "LifeMonitor API Backend Requests", + "title": "Performance of API Back-end Requests", "type": "row" }, { @@ -1506,7 +1008,7 @@ "h": 8, "w": 24, "x": 0, - "y": 42 + "y": 36 }, "id": 13, "links": [], @@ -1533,15 +1035,19 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "disableTextWrap": false, "editorMode": "builder", "exemplar": true, - "expr": "increase(lifemonitor_api_http_request_total[1m])", + "expr": "increase(lifemonitor_api_http_request_total{environment=\"$environment\"}[1m])", "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": true, "interval": "", "intervalFactor": 1, "legendFormat": "HTTP {{ status }}", "range": true, - "refId": "A" + "refId": "A", + "useBackend": false } ], "title": "Total requests per minute", @@ -1604,38 +1110,13 @@ }, "unit": "short" }, - "overrides": [ - { - "__systemRef": "hideSeriesFrom", - "matcher": { - "id": "byNames", - "options": { - "mode": "exclude", - "names": [ - "/workflows" - ], - "prefix": "All except:", - "readOnly": true - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": false, - "tooltip": false, - "viz": true - } - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 14, "w": 24, "x": 0, - "y": 50 + "y": 44 }, "id": 2, "links": [], @@ -1662,15 +1143,19 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "editorMode": "code", + "disableTextWrap": false, + "editorMode": "builder", "exemplar": true, - "expr": "rate(lifemonitor_api_http_request_duration_seconds_count{status=\"200\"}[30s])", + "expr": "rate(lifemonitor_api_http_request_duration_seconds_count{status=\"200\", environment=\"$environment\"}[30s])", "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": true, "interval": "", "intervalFactor": 1, "legendFormat": "{{ path }}", "range": true, - "refId": "A" + "refId": "A", + "useBackend": false } ], "title": "Requests per second", @@ -1722,8 +1207,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1755,7 +1239,7 @@ "h": 7, "w": 24, "x": 0, - "y": 64 + "y": 58 }, "id": 4, "links": [], @@ -1783,15 +1267,19 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "editorMode": "code", + "disableTextWrap": false, + "editorMode": "builder", "exemplar": true, - "expr": "sum(rate(lifemonitor_api_http_request_duration_seconds_count{status!=\"200\"}[30s]))", + "expr": "sum(rate(lifemonitor_api_http_request_duration_seconds_count{status!=\"200\", environment=\"$environment\"}[30s]))", "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": true, "interval": "", "intervalFactor": 1, "legendFormat": "errors", "range": true, - "refId": "A" + "refId": "A", + "useBackend": false } ], "title": "Errors per second", @@ -1843,8 +1331,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1860,7 +1347,7 @@ "h": 9, "w": 24, "x": 0, - "y": 71 + "y": 65 }, "id": 6, "links": [], @@ -1886,15 +1373,19 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "editorMode": "code", + "disableTextWrap": false, + "editorMode": "builder", "exemplar": true, - "expr": "rate(lifemonitor_api_http_request_duration_seconds_sum{status=\"200\"}[30s])\n/\nrate(lifemonitor_api_http_request_duration_seconds_count{status=\"200\"}[30s])", + "expr": "rate(lifemonitor_api_http_request_duration_seconds_sum{status=\"200\", environment=\"$environment\"}[30s]) / rate(lifemonitor_api_http_request_duration_seconds_count{status=\"200\", environment=\"$environment\"}[30s])", "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": true, "interval": "", "intervalFactor": 1, "legendFormat": "{{ path }}", "range": true, - "refId": "A" + "refId": "A", + "useBackend": false } ], "title": "Average response time [30s]", @@ -1948,8 +1439,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1965,7 +1455,7 @@ "h": 11, "w": 24, "x": 0, - "y": 80 + "y": 74 }, "id": 11, "links": [], @@ -1991,15 +1481,19 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "editorMode": "code", + "disableTextWrap": false, + "editorMode": "builder", "exemplar": true, - "expr": "increase(lifemonitor_api_http_request_duration_seconds_bucket{status=\"200\",le=\"0.25\"}[30s]) \n/ ignoring (le) increase(lifemonitor_api_http_request_duration_seconds_count{status=\"200\"}[30s])", + "expr": "increase(lifemonitor_api_http_request_duration_seconds_bucket{status=\"200\", le=\"0.25\", environment=\"$environment\"}[30s]) / ignoring(le) increase(lifemonitor_api_http_request_duration_seconds_count{status=\"200\", environment=\"$environment\"}[30s])", "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": true, "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "{{ path }}", - "refId": "A" + "refId": "A", + "useBackend": false } ], "title": "Requests under 250ms", @@ -2052,8 +1546,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2069,7 +1562,7 @@ "h": 11, "w": 24, "x": 0, - "y": 91 + "y": 85 }, "id": 15, "links": [], @@ -2098,15 +1591,19 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "editorMode": "code", + "disableTextWrap": false, + "editorMode": "builder", "exemplar": true, - "expr": "histogram_quantile(0.5, rate(lifemonitor_api_http_request_duration_seconds_bucket{status=\"200\"}[30s]))", + "expr": "histogram_quantile(0.5, rate(lifemonitor_api_http_request_duration_seconds_bucket{status=\"200\", environment=\"$environment\"}[30s]))", "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": true, "interval": "", "intervalFactor": 1, "legendFormat": "{{ path }}", "range": true, - "refId": "A" + "refId": "A", + "useBackend": false } ], "title": "Request duration [s] - p50", @@ -2158,8 +1655,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2175,7 +1671,7 @@ "h": 7, "w": 24, "x": 0, - "y": 102 + "y": 96 }, "id": 16, "links": [], @@ -2204,15 +1700,19 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "editorMode": "code", + "disableTextWrap": false, + "editorMode": "builder", "exemplar": true, - "expr": "histogram_quantile(0.9, rate(lifemonitor_api_http_request_duration_seconds_bucket{status=\"200\"}[30s]))", + "expr": "histogram_quantile(0.9, rate(lifemonitor_api_http_request_duration_seconds_bucket{status=\"200\", environment=\"$environment\"}[30s]))", "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": true, "interval": "", "intervalFactor": 1, "legendFormat": "{{ path }}", "range": true, - "refId": "A" + "refId": "A", + "useBackend": false } ], "title": "Request duration [s] - p90", @@ -2222,12 +1722,40 @@ "refresh": "auto", "schemaVersion": 38, "style": "dark", - "tags": [], + "tags": [ + "monitoring", + "lifemonitor", + "back-end" + ], "templating": { - "list": [] + "list": [ + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(environment)", + "hide": 0, + "includeAll": false, + "label": "Environment", + "multi": true, + "name": "environment", + "options": [], + "query": { + "query": "label_values(environment)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] }, "time": { - "from": "now-5m", + "from": "now-15m", "to": "now" }, "timepicker": { @@ -2247,8 +1775,8 @@ ] }, "timezone": "Europe/Rome", - "title": "LifeMonitor Dashboard", - "uid": "_eX4mpl3", - "version": 12, + "title": "LifeMonitor Dashboard: Front-end", + "uid": "d6ae2f51-d538-4ba8-b9b6-143e66d6151b", + "version": 8, "weekStart": "" } \ No newline at end of file diff --git a/utils/grafana/lifemonitor-logs.dashboard.json b/utils/grafana/lifemonitor-logs.dashboard.json new file mode 100644 index 000000000..769a2c652 --- /dev/null +++ b/utils/grafana/lifemonitor-logs.dashboard.json @@ -0,0 +1,594 @@ +{ + "__inputs": [ + { + "name": "DS_LOKI", + "label": "Loki", + "description": "", + "type": "datasource", + "pluginId": "loki", + "pluginName": "Loki" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "panel", + "id": "geomap", + "name": "Geomap", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "10.1.1" + }, + { + "type": "panel", + "id": "logs", + "name": "Logs", + "version": "" + }, + { + "type": "datasource", + "id": "loki", + "name": "Loki", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "text", + "name": "Text", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "$$hashKey": "object:75", + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Explore logs reported via Promtail", + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": 12019, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "loki", + "uid": "${DS_LOKI}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 15, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 7, + "options": { + "basemap": { + "config": {}, + "name": "Layer 0", + "type": "default" + }, + "controls": { + "mouseWheelZoom": true, + "showAttribution": true, + "showDebug": false, + "showMeasure": false, + "showScale": false, + "showZoom": true + }, + "layers": [ + { + "config": { + "showLegend": true, + "style": { + "color": { + "fixed": "dark-green" + }, + "opacity": 0.4, + "rotation": { + "fixed": 0, + "max": 360, + "min": -360, + "mode": "mod" + }, + "size": { + "fixed": 5, + "max": 15, + "min": 2 + }, + "symbol": { + "fixed": "img/icons/marker/circle.svg", + "mode": "fixed" + }, + "text": { + "fixed": "", + "mode": "field" + }, + "textConfig": { + "fontSize": 12, + "offsetX": 0, + "offsetY": 0, + "textAlign": "center", + "textBaseline": "middle" + } + } + }, + "filterData": { + "id": "byRefId", + "options": "A" + }, + "location": { + "mode": "auto" + }, + "name": "Layer 1", + "tooltip": true, + "type": "markers" + } + ], + "tooltip": { + "mode": "details" + }, + "view": { + "allLayers": true, + "id": "zero", + "lat": 0, + "lon": 0, + "zoom": 1 + } + }, + "pluginVersion": "10.1.1", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${DS_LOKI}" + }, + "editorMode": "builder", + "expr": "{app=\"$app\", component=\"$component\"} | pattern ` - [