From 560b7d45ac1c7c7e162ee72b7f26dc39ccc5ed26 Mon Sep 17 00:00:00 2001 From: nielm Date: Fri, 24 Nov 2023 17:39:18 +0100 Subject: [PATCH] refactor config and proxy-server into separate files Proxy-server needs to be started before malware-scanner --- cloudrun-malware-scanner/bootstrap.sh | 16 +- cloudrun-malware-scanner/config.js | 136 +++++++++++++++ cloudrun-malware-scanner/gcs-proxy-server.js | 113 +++++++++++++ cloudrun-malware-scanner/package.json | 3 +- cloudrun-malware-scanner/server.js | 168 +------------------ 5 files changed, 265 insertions(+), 171 deletions(-) create mode 100644 cloudrun-malware-scanner/config.js create mode 100644 cloudrun-malware-scanner/gcs-proxy-server.js diff --git a/cloudrun-malware-scanner/bootstrap.sh b/cloudrun-malware-scanner/bootstrap.sh index 2c72d43..90a9e5e 100755 --- a/cloudrun-malware-scanner/bootstrap.sh +++ b/cloudrun-malware-scanner/bootstrap.sh @@ -47,13 +47,6 @@ apt-get -qqy install --no-install-recommends clamav-daemon clamav-freshclam export PATH="$PATH:$HOME/.local/bin" # add pipx locations to path. pipx install cvdupdate -# The node server includes a reverse proxy which adds authentication -# to requests to GCS REST API, allowing freshclam to access the GCS -# CVD mirror as if it was an unauthenticated HTPP server -# -export PROXY_PORT=8888 -PROXY_SERVER_ADDRESS=127.0.0.1:${PROXY_PORT} - # Ensure clamav services are shut down, as we do not have the config files set up yet. service clamav-daemon stop & service clamav-freshclam stop & @@ -82,6 +75,15 @@ if ! gsutil ls "gs://${CVD_MIRROR_BUCKET}/" > /dev/null ; then exit 1 fi +# Start the reverse proxy which adds authentication +# to requests to GCS REST API, allowing freshclam to access the GCS +# CVD mirror bucket as if it was an unauthenticated HTPP server +# +export PROXY_PORT=8888 +PROXY_SERVER_ADDRESS=127.0.0.1:${PROXY_PORT} +npm start-proxy "${CONFIG_FILE}" & +sleep 5 + # This function is used to update clam and freshclam config files. # Use by specifying the config file on the command line and # piping the config file updates in. diff --git a/cloudrun-malware-scanner/config.js b/cloudrun-malware-scanner/config.js new file mode 100644 index 0000000..2450785 --- /dev/null +++ b/cloudrun-malware-scanner/config.js @@ -0,0 +1,136 @@ +/* +* Copyright 2022 Google LLC + +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at + +* https://www.apache.org/licenses/LICENSE-2.0 + +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +const {Storage} = require('@google-cloud/storage'); +const {logger} = require('./logger.js'); +const pkgJson = require('./package.json'); + + +/** + * Configuration object. + * + * Values are read from the JSON configuration file. + * See {@link readAndVerifyConfig}. + * + * @typedef {{ + * buckets: Array< + * { + * unscanned: string, + * clean: string, + * quarantined: string + * }>, + * ClamCvdMirrorBucket: string + * }} + */ +const Config = null; + +const storage = new Storage({userAgent: `${pkgJson.name}/${pkgJson.version}`}); + +/** + * Read configuration from JSON configuration file. + * and store in BUCKET_CONFIG global + * + * @async + * @param {string} configFile + * @return {Config} + */ +async function readAndVerifyConfig(configFile) { + logger.info(`Using configuration file: ${configFile}`); + + + /** @type {Config} */ + let config; + + try { + config = require(configFile); + delete config.comments; + } catch (e) { + logger.fatal( + {err: e}, + `Unable to read JSON file from ${configFile}`); + throw new Error(`Invalid configuration ${configFile}`); + } + + if (config.buckets.length === 0) { + logger.fatal(`No buckets configured for scanning in ${configFile}`); + throw new Error('No buckets configured'); + } + + logger.info('BUCKET_CONFIG: '+JSON.stringify(config, null, 2)); + + // Check buckets are specified and exist. + let success = true; + for (let x = 0; x < config.buckets.length; x++) { + const buckets = BUCKET_CONFIG.buckets[x]; + for (const bucketType of ['unscanned', 'clean', 'quarantined']) { + if ( !(await checkBucketExists( + buckets[bucketType], + `config.buckets[${x}].${bucketType}`))) { + success=false; + } + } + if (buckets.unscanned === buckets.clean || + buckets.unscanned === buckets.quarantined || + buckets.clean === buckets.quarantined) { + logger.fatal( + `Error in ${configFile} buckets[${x}]: bucket names are not unique`); + success = false; + } + } + if ( !(await checkBucketExists( + config.ClamCvdMirrorBucket, + 'ClamCvdMirrorBucket'))) { + success=false; + } + + if (!success) { + throw new Error('Invalid configuration'); + } + return config; +} + + +/** + * Check that given bucket exists. Returns true on success + * + * @param {string} bucketName + * @param {string} configName + * @return {Promise} + */ +async function checkBucketExists(bucketName, configName) { + if (!bucketName) { + logger.fatal(`Error in config: no "${configName}" bucket defined`); + success = false; + } + // Check for bucket existence by listing files in bucket, will throw + // an exception if the bucket is not readable. + // This is used in place of Bucket.exists() to avoid the need for + // Project/viewer permission. + try { + await storage.bucket(bucketName).getFiles( + {maxResults: 1, prefix: 'zzz', autoPaginate: false}); + return true; + } catch (e) { + logger.fatal( + `Error in config: cannot view files in "${ + configName}" : ${bucketName} : ${e.message}`); + logger.debug({err: e}); + return false; + } +} + +exports.Config = Config; +exports.readAndVerifyConfig = readAndVerifyConfig; diff --git a/cloudrun-malware-scanner/gcs-proxy-server.js b/cloudrun-malware-scanner/gcs-proxy-server.js new file mode 100644 index 0000000..e026665 --- /dev/null +++ b/cloudrun-malware-scanner/gcs-proxy-server.js @@ -0,0 +1,113 @@ +/* +* Copyright 2022 Google LLC + +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at + +* https://www.apache.org/licenses/LICENSE-2.0 + +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +const {GoogleAuth} = require('google-auth-library'); +const {logger} = require('./logger.js'); +// eslint-disable-next-line no-unused-vars +const {Config, readAndVerifyConfig} = require('./config.js'); +const httpProxy = require('http-proxy'); + +const googleAuth = new GoogleAuth(); + +// access token for GCS requests - will be refreshed every 50 mins +let accessToken; +const ACCESS_TOKEN_REFRESH_INTERVAL = 50*60*1000; + +/** + * Set up a reverse proxy to add authentication to HTTP requests from + * freshclam and proxy it to the GCS API + * + * @param {string} clamCvdMirrorBucket + */ +async function setupGcsReverseProxy(clamCvdMirrorBucket) { + // Get an OAuth2 access token and refresh it every 50mins. + accessToken = await googleAuth.getAccessToken(); + + setInterval(async () => { + logger.info(`Refreshing Oauth2 Access Token for GCS proxy.`); + accessToken = await googleAuth.getAccessToken(); + }, ACCESS_TOKEN_REFRESH_INTERVAL); + + const proxy = httpProxy.createProxyServer({ + target: 'https://storage.googleapis.com/', + changeOrigin: true, + autoRewrite: true, + secure: true, + ws: false, + }); + + // Error handling... + proxy.on('error', function(err, req, res) { + let statusCode = 500; + if (res && res.statusCode && res.statusCode != 200) { + statusCode = res.statusCode; + } + logger.error(`Failed to proxy to GCS for path ${req.url}, returning code ${ + statusCode}: ${err}`); + res.writeHead(statusCode, { + 'Content-Type': 'text/plain', + }); + res.end(`Failed to proxy to GCS: status ${statusCode}\n`); + }); + + // Add auth header/ + proxy.on('proxyReq', function(proxyReq, req, res) { + if (proxyReq.path.startsWith( + '/' + clamCvdMirrorBucket + '/')) { + logger.info(`Proxying request for ${proxyReq.path} to GCS`); + proxyReq.setHeader('Authorization', 'Bearer ' + accessToken); + } else { + logger.error( + `Denying Proxy request for ${proxyReq.path} to GCS - invalid path`); + res.writeHead(404, { + 'Content-Type': 'text/plain', + }); + res.end('Failed to proxy to GCS - invalid path: status 404\n'); + } + }); + + const PROXY_PORT = process.env.PROXY_PORT || 8888; + + proxy.listen(PROXY_PORT, 'localhost'); + logger.info( + `GCS authenticating reverse proxy listenting on port ${PROXY_PORT}`); +} + +/** + * Perform async setup and start the app. + * + * @async + */ +async function run() { + let configFile; + if (process.argv.length >= 3) { + configFile = process.argv[2]; + } else { + configFile = './config.json'; + } + + /** @type {Config} */ + const config = await readAndVerifyConfig(configFile); + + await setupGcsReverseProxy(config.ClamCvdMirrorBucket); +} + +// Start the service, exiting on error. +run().catch((e) => { + logger.fatal(e); + logger.fatal('Exiting'); + process.exit(1); +}); diff --git a/cloudrun-malware-scanner/package.json b/cloudrun-malware-scanner/package.json index e9c937b..54e3253 100644 --- a/cloudrun-malware-scanner/package.json +++ b/cloudrun-malware-scanner/package.json @@ -1,10 +1,11 @@ { "name": "gcs-malware-scanner", - "version": "2.2.0", + "version": "2.3.0", "description": "Service to scan GCS documents for the malware and move the analyzed documents to appropriate buckets", "main": "index.js", "scripts": { "start": "node server.js", + "start-proxy": "node gcs-proxy-server.js", "test": "echo \"Error: no test specified\" && exit 1", "eslint": "eslint *.js", "eslint-fix": "eslint --fix *.js" diff --git a/cloudrun-malware-scanner/server.js b/cloudrun-malware-scanner/server.js index 9051e24..ade93e5 100644 --- a/cloudrun-malware-scanner/server.js +++ b/cloudrun-malware-scanner/server.js @@ -24,7 +24,8 @@ const metrics = require('./metrics.js'); const util = require('node:util'); const execFile = util.promisify(require('node:child_process').execFile); const {setTimeout} = require('timers/promises'); -const httpProxy = require('http-proxy'); +// eslint-disable-next-line no-unused-vars +const {Config, readAndVerifyConfig} = require('./config.js'); const PORT = process.env.PORT || 8080; const CLAMD_HOST = '127.0.0.1'; @@ -50,15 +51,7 @@ const MAX_FILE_SIZE = 500000000; // 500MiB * Values are read from the JSON configuration file. * See {@link readAndVerifyConfig}. * - * @type {{ - * buckets: Array< - * { - * unscanned: string, - * clean: string, - * quarantined: string - * }>, - * ClamCvdMirrorBucket: string - * }} + * @type {Config} */ const BUCKET_CONFIG = { buckets: [], @@ -72,10 +65,6 @@ const scanner = clamd.createScanner(CLAMD_HOST, CLAMD_PORT); const storage = new Storage({userAgent: `${pkgJson.name}/${pkgJson.version}`}); const googleAuth = new GoogleAuth(); -// access token for GCS requests - will be refreshed every 50 mins -let accessToken; -const ACCESS_TOKEN_REFRESH_INTERVAL = 50*60*1000; - /** * Route that is invoked by Cloud Run when a malware scan is requested * for a document uploaded to GCS. @@ -323,95 +312,6 @@ async function moveProcessedFile(filename, isClean, config) { await srcfile.move(destinationBucket); } -/** - * Read configuration from JSON configuration file. - * and store in BUCKET_CONFIG global - * - * @async - * @param {string} configFile - */ -async function readAndVerifyConfig(configFile) { - logger.info(`Using configuration file: ${configFile}`); - - try { - const config = require(configFile); - delete config.comments; - Object.assign(BUCKET_CONFIG, config); - } catch (e) { - logger.fatal( - {err: e}, - `Unable to read JSON file from ${configFile}`); - throw new Error(`Invalid configuration ${configFile}`); - } - - if (BUCKET_CONFIG.buckets.length === 0) { - logger.fatal(`No buckets configured for scanning in ${configFile}`); - throw new Error('No buckets configured'); - } - - logger.info('BUCKET_CONFIG: '+JSON.stringify(BUCKET_CONFIG, null, 2)); - - // Check buckets are specified and exist. - let success = true; - for (let x = 0; x < BUCKET_CONFIG.buckets.length; x++) { - const config = BUCKET_CONFIG.buckets[x]; - for (const bucketType of ['unscanned', 'clean', 'quarantined']) { - if ( !(await checkBucketExists( - config[bucketType], - `config.buckets[${x}].${bucketType}`))) { - success=false; - } - } - if (config.unscanned === config.clean || - config.unscanned === config.quarantined || - config.clean === config.quarantined) { - logger.fatal( - `Error in ${configFile} buckets[${x}]: bucket names are not unique`); - success = false; - } - } - if ( !(await checkBucketExists( - BUCKET_CONFIG.ClamCvdMirrorBucket, - 'ClamCvdMirrorBucket'))) { - success=false; - } - - if (!success) { - throw new Error('Invalid configuration'); - } -} - - -/** - * Check that given bucket exists. Returns true on success - * - * @param {string} bucketName - * @param {string} configName - * @return {Promise} - */ -async function checkBucketExists(bucketName, configName) { - if (!bucketName) { - logger.fatal(`Error in config: no "${configName}" bucket defined`); - success = false; - } - // Check for bucket existence by listing files in bucket, will throw - // an exception if the bucket is not readable. - // This is used in place of Bucket.exists() to avoid the need for - // Project/viewer permission. - try { - await storage.bucket(bucketName).getFiles( - {maxResults: 1, prefix: 'zzz', autoPaginate: false}); - return true; - } catch (e) { - logger.fatal( - `Error in config: cannot view files in "${ - configName}" : ${bucketName} : ${e.message}`); - logger.debug({err: e}); - return false; - } -} - - /** * Wait up to 5 mins for ClamD to respond */ @@ -434,64 +334,6 @@ async function waitForClamD() { process.exit(1); } -/** - * Set up a reverse proxy to add authentication to HTTP requests from - * freshclam and proxy it to the GCS API - */ -async function setupGcsReverseProxy() { - // Get an OAuth2 access token and refresh it every 50mins. - accessToken = await googleAuth.getAccessToken(); - - setInterval(async () => { - logger.info(`Refreshing Oauth2 Access Token for GCS proxy.`); - accessToken = await googleAuth.getAccessToken(); - }, ACCESS_TOKEN_REFRESH_INTERVAL); - - const proxy = httpProxy.createProxyServer({ - target: 'https://storage.googleapis.com/', - changeOrigin: true, - autoRewrite: true, - secure: true, - ws: false, - }); - - // Error handling... - proxy.on('error', function(err, req, res) { - let statusCode = 500; - if (res && res.statusCode && res.statusCode != 200) { - statusCode = res.statusCode; - } - logger.error(`Failed to proxy to GCS for path ${req.url}, returning code ${ - statusCode}: ${err}`); - res.writeHead(statusCode, { - 'Content-Type': 'text/plain', - }); - res.end(`Failed to proxy to GCS: status ${statusCode}\n`); - }); - - // Add auth header/ - proxy.on('proxyReq', function(proxyReq, req, res) { - if (proxyReq.path.startsWith( - '/' + BUCKET_CONFIG.ClamCvdMirrorBucket + '/')) { - logger.info(`Proxying request for ${proxyReq.path} to GCS`); - proxyReq.setHeader('Authorization', 'Bearer ' + accessToken); - } else { - logger.error( - `Denying Proxy request for ${proxyReq.path} to GCS - invalid path`); - res.writeHead(404, { - 'Content-Type': 'text/plain', - }); - res.end('Failed to proxy to GCS - invalid path: status 404\n'); - } - }); - - const PROXY_PORT = process.env.PROXY_PORT || 8888; - - proxy.listen(PROXY_PORT, 'localhost'); - logger.info( - `GCS authenticating reverse proxy listenting on port ${PROXY_PORT}`); -} - /** * Perform async setup and start the app. * @@ -511,9 +353,9 @@ async function run() { } else { configFile = './config.json'; } - await readAndVerifyConfig(configFile); + const config = await readAndVerifyConfig(configFile); - await setupGcsReverseProxy(); + Object.assign(BUCKET_CONFIG, config); await waitForClamD();