diff --git a/.github/environments/values.pr.yaml b/.github/environments/values.pr.yaml index bbbc682..5be54e3 100644 --- a/.github/environments/values.pr.yaml +++ b/.github/environments/values.pr.yaml @@ -5,5 +5,8 @@ features: persistentVolumeClaim: enabled: false +cronJob: + enabled: false + fluentBit: enabled: false diff --git a/app/cacheCleaner.js b/app/cacheCleaner.js new file mode 100644 index 0000000..98c2be4 --- /dev/null +++ b/app/cacheCleaner.js @@ -0,0 +1,115 @@ +const config = require('config'); +const { readdirSync, realpathSync, rmSync, statSync } = require('fs-extra'); +const { tmpdir } = require('os'); +const { join } = require('path'); + +const log = require('./src/components/log')(module.filename); + +const RATIO = 0.8; // Best practice is to keep the cache no more than 80% full + +const osTempDir = realpathSync(tmpdir()); +const cacheDir = (() => { + if (config.has('carbone.cacheDir')) { + return realpathSync(config.get('carbone.cacheDir')); + } else { + return osTempDir; + } +})(); + +const cacheSize = (() => { + const parseRegex = /^(\d+(?:\.\d+)?) *([kmgtp]?b)$/i; + const unitMap = { + b: Math.pow(10, 0), + kb: Math.pow(10, 3), + mb: Math.pow(10, 6), + gb: Math.pow(10, 9), + tb: Math.pow(10, 12), + pb: Math.pow(10, 15) + }; + + if (config.has('carbone.cacheSize')) { + const result = parseRegex.exec(config.get('carbone.cacheSize')); + if (result && Array.isArray(result)) { + return parseInt(result[1]) * unitMap[result[2].toLowerCase()]; + } + } else { + return null; + } +})(); +const cacheSizeLimit = Math.ceil(cacheSize * RATIO); + +log.info(`Cache directory ${cacheDir} with max size of ${cacheSizeLimit}`); + +// Short circuit exits +if (!cacheSize) { + log.info('Maximum cache size not defined - Exiting'); + process.exit(0); +} else if (cacheDir === osTempDir) { + log.info('Cache points to OS temp directory - Exiting'); + process.exit(0); +} + +// Check cache size and prune oldest files away as needed +try { + const currCacheSize = dirSize(cacheDir); + const files = getSortedFiles(cacheDir); + const status = currCacheSize < cacheSizeLimit ? 'below' : 'above'; + + log.info(`Current cache size ${currCacheSize} ${status} threshold of ${cacheSizeLimit}`, { + cacheLimit: cacheSizeLimit, + cacheSize: currCacheSize + }); + + // Prune files if necessary + let rmCount = 0; + for (const file of files) { + if (dirSize(cacheDir) < cacheSizeLimit) break; + rmSync(`${cacheDir}/${file}`, { recursive: true, force: true }); + rmCount++; + } + + log.info(`${rmCount} objects were pruned from the cache - Exiting`, { removeCount: rmCount }); + process.exit(0); +} catch(err) { + log.error(err.message); + process.exit(1); +} + +/** + * @function dirSize + * Recursively calculates the size of directory `dir` + * @param {string} dir The directory to calculate + * @returns {number} The size of the directory in bytes + */ +function dirSize(dir) { + const files = readdirSync(dir, { withFileTypes: true }); + const paths = files.map(file => { + const path = join(dir, file.name); + + if (file.isDirectory()) return dirSize(path); + if (file.isFile()) { + const { size } = statSync(path); + return size; + } + return 0; + }); + + return paths.flat(Infinity).reduce((i, size) => i + size, 0); +} + +/** + * @function getSortedFiles + * Acquires a list of files and directories ordered from oldest to newest modified + * @param {string} dir The directory to inspect + * @returns {Array} The list of files and directories in directory `dir` + */ +function getSortedFiles(dir) { + const files = readdirSync(dir); + return files + .map(fileName => ({ + name: fileName, + time: statSync(`${dir}/${fileName}`).mtime.getTime(), + })) + .sort((a, b) => a.time - b.time) + .map(file => file.name); +} diff --git a/app/package.json b/app/package.json index 2383a8c..6c3b227 100644 --- a/app/package.json +++ b/app/package.json @@ -7,6 +7,7 @@ "build": "echo Nothing to build", "serve": "nodemon ./bin/www", "start": "node ./bin/www", + "prune": "node ./cacheCleaner.js", "lint": "eslint . --no-fix --ignore-pattern 'node_modules' --ext .js", "lint:fix": "eslint . --fix --ignore-pattern 'node_modules' --ext .js", "test": "jest --verbose --forceExit --detectOpenHandles", diff --git a/app/src/components/upload.js b/app/src/components/upload.js index cd40e0f..2be4575 100644 --- a/app/src/components/upload.js +++ b/app/src/components/upload.js @@ -9,23 +9,24 @@ const fileUploadsDir = config.get('carbone.cacheDir'); const formFieldName = config.get('carbone.formFieldName'); const maxFileSize = bytes.parse(config.get('carbone.uploadSize')); const maxFileCount = parseInt(config.get('carbone.uploadCount')); +const osTempDir = fs.realpathSync(os.tmpdir()); let storage = undefined; let uploader = undefined; -// Upload directory checks +// Cache directory check try { fs.ensureDirSync(fileUploadsDir); } catch (e) { - console.warn(`Unable to use directory "${fileUploadsDir}". Falling back to default OS temp directory`); - fs.realpathSync(os.tmpdir()); + console.warn(`Unable to use cache directory "${fileUploadsDir}". Cache will fall back to default OS temp directory "${osTempDir}"`); } // Setup storage location if (!storage) { storage = multer.diskStorage({ destination: (_req, _file, cb) => { - cb(null, fileUploadsDir); + // Always write transiently uploaded files to os temp scratch space + cb(null, osTempDir); } }); } diff --git a/charts/cdogs/Chart.yaml b/charts/cdogs/Chart.yaml index eca82dc..f89ce32 100644 --- a/charts/cdogs/Chart.yaml +++ b/charts/cdogs/Chart.yaml @@ -3,7 +3,7 @@ name: common-document-generation-service # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.0.2 +version: 0.0.3 kubeVersion: ">= 1.13.0" description: A microservice for merging JSON data into xml-based templates (powered by Carbone.io) # A chart can be either an 'application' or a 'library' chart. diff --git a/charts/cdogs/README.md b/charts/cdogs/README.md index 6df856f..b8ac4d5 100644 --- a/charts/cdogs/README.md +++ b/charts/cdogs/README.md @@ -1,6 +1,6 @@ # common-document-generation-service -![Version: 0.0.2](https://img.shields.io/badge/Version-0.0.2-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 2.4.2](https://img.shields.io/badge/AppVersion-2.4.2-informational?style=flat-square) +![Version: 0.0.3](https://img.shields.io/badge/Version-0.0.3-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 2.4.2](https://img.shields.io/badge/AppVersion-2.4.2-informational?style=flat-square) A microservice for merging JSON data into xml-based templates (powered by Carbone.io) @@ -34,10 +34,13 @@ Kubernetes: `>= 1.13.0` | config.configMap | object | `{"CACHE_DIR":"/var/lib/file-cache/data","CACHE_SIZE":"2GB","CONVERTER_FACTORY_TIMEOUT":"60000","KC_PUBLICKEY":null,"KC_REALM":null,"KC_SERVERURL":null,"SERVER_BODYLIMIT":"100mb","SERVER_LOGLEVEL":"http","SERVER_PORT":"3000","START_CARBONE":"true","UPLOAD_FIELD_NAME":"template","UPLOAD_FILE_COUNT":"1","UPLOAD_FILE_SIZE":"25MB"}` | These values will be wholesale added to the configmap as is; refer to the cdogs documentation for what each of these values mean and whether you need them defined. Ensure that all values are represented explicitly as strings, as non-string values will not translate over as expected into container environment variables. For configuration keys named `*_ENABLED`, either leave them commented/undefined, or set them to string value "true". | | config.enabled | bool | `false` | | | config.releaseScoped | bool | `false` | This should be set to true if and only if you require configmaps and secrets to be release scoped. In the event you want all instances in the same namespace to share a similar configuration, this should be set to false | +| cronJob.enabled | bool | `true` | Specifies whether a cache cleaning cronjob should be created | +| cronJob.schedule | string | `"0 0 * * 1,4"` | Every Monday & Thursday - https://crontab.guru/#0_0_*_*_1,4 | +| cronJob.suspend | bool | `false` | In test environments, you might want to create the cronjob for consistency, but suspend it | | failurePolicy | string | `"Retry"` | | | features.authentication | bool | `false` | Specifies whether to run in authenticated mode | | fluentBit.config.aws.defaultRegion | string | `"ca-central-1"` | AWS Kinesis default region | -| fluentBit.config.aws.kinesisStream | string | `"nress-prod-iit-logs"` | AWS Kinesis stream name | +| fluentBit.config.aws.kinesisStream | string | `"nr-apm-stack-documents"` | AWS Kinesis stream name | | fluentBit.config.aws.roleArn | string | `nil` | AWS Kinesis role ARN | | fluentBit.config.logHostname | string | `"fluentd-csst.apps.silver.devops.gov.bc.ca"` | Fluentd logging hostname endpoint | | fluentBit.config.namespace | string | `nil` | The openshift/k8s namespace identifier | diff --git a/charts/cdogs/templates/cronjob.yaml b/charts/cdogs/templates/cronjob.yaml new file mode 100644 index 0000000..4832500 --- /dev/null +++ b/charts/cdogs/templates/cronjob.yaml @@ -0,0 +1,60 @@ +{{- if and .Values.cronJob.enabled .Values.persistentVolumeClaim.enabled }} +apiVersion: batch/v1 +kind: CronJob +metadata: + name: {{ template "cdogs.fullname" . }} + labels: {{ include "cdogs.labels" . | nindent 4 }} +spec: + concurrencyPolicy: Forbid + failedJobsHistoryLimit: 3 + successfulJobsHistoryLimit: 3 + startingDeadlineSeconds: 60 + jobTemplate: + metadata: + labels: {{ include "cdogs.labels" . | nindent 8 }} + spec: + backoffLimit: 6 + activeDeadlineSeconds: 300 + parallelism: 1 + completions: 1 + template: + metadata: + labels: {{ include "cdogs.labels" . | nindent 12 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: {{ toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.serviceAccount.create }} + serviceAccountName: {{ include "cdogs.serviceAccountName" . }} + {{- end }} + {{- with .Values.podSecurityContext }} + securityContext: {{ toYaml . | nindent 8 }} + {{- end }} + containers: + - name: job + {{- with .Values.securityContext }} + securityContext: {{ toYaml . | nindent 12 }} + {{- end }} + image: "{{ .Values.image.repository }}/{{ .Chart.Name }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: + - node + - ./cacheCleaner.js + resources: {{ toYaml .Values.resources | nindent 16 }} + env: + - name: NODE_ENV + value: production + envFrom: + - configMapRef: + name: {{ include "cdogs.configname" . }}-config + volumeMounts: + - name: file-cache-data + mountPath: /var/lib/file-cache/data + restartPolicy: Never + volumes: + - name: file-cache-data + persistentVolumeClaim: + claimName: {{ include "cdogs.configname" . }}-cache + schedule: {{ .Values.cronJob.schedule }} + suspend: {{ .Values.cronJob.suspend }} +{{- end }} diff --git a/charts/cdogs/values.yaml b/charts/cdogs/values.yaml index 51db929..c36c6e5 100644 --- a/charts/cdogs/values.yaml +++ b/charts/cdogs/values.yaml @@ -125,6 +125,14 @@ persistentVolumeClaim: # -- PVC Storage size (use M or G, not Mi or Gi) storageSize: 2G +cronJob: + # -- Specifies whether a cache cleaning cronjob should be created + enabled: true + # -- Every Monday & Thursday - https://crontab.guru/#0_0_*_*_1,4 + schedule: 0 0 * * 1,4 + # -- In test environments, you might want to create the cronjob for consistency, but suspend it + suspend: false + config: # Set to true if you want to let Helm manage and overwrite your configmaps. enabled: false @@ -183,7 +191,7 @@ fluentBit: # -- AWS Kinesis default region defaultRegion: ca-central-1 # -- AWS Kinesis stream name - kinesisStream: nress-prod-iit-logs + kinesisStream: nr-apm-stack-documents # -- AWS Kinesis role ARN roleArn: ~ # -- Fluentd logging hostname endpoint