From 647cca7759bd702bc1d60f8376cd14717c858c92 Mon Sep 17 00:00:00 2001 From: Mira Kuntz Date: Fri, 16 Feb 2024 12:23:58 +0100 Subject: [PATCH 01/19] increase verbosity --- templates/walle.py | 60 +++++++++++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 24 deletions(-) diff --git a/templates/walle.py b/templates/walle.py index 53275fc..d71bd6b 100644 --- a/templates/walle.py +++ b/templates/walle.py @@ -138,21 +138,25 @@ def make_parser() -> argparse.ArgumentParser: class Job: def __init__( self, - galaxy_id: int, - object_store_id: int, user_id: int, user_name: str, + user_mail: str, tool_id: str, - job_runner_name: str, + galaxy_id: int, + runner_id: int, + runner_name: str, + object_store_id: int, jwd=None, ) -> None: - self.galaxy_id = galaxy_id - self.object_store_id = object_store_id - self.jwd = jwd self.user_id = user_id self.user_name = user_name + self.user_mail = user_mail self.tool_id = tool_id - self.job_runner_name = job_runner_name + self.galaxy_id = galaxy_id + self.runner_id = runner_id + self.runner_name = runner_name + self.object_store_id = object_store_id + self.jwd = jwd def report_id_and_user_name(self) -> str: return f"{self.galaxy_id} {self.user_name}" @@ -211,7 +215,7 @@ def file_in_size_range(file_stat: os.stat_result, min_size=None, max_size=None) return True -def all_files_in_dir(dir: pathlib.Path, args) -> [pathlib.Path]: +def all_files_in_dir(dir: pathlib.Path, args) -> list[pathlib.Path]: """ Gets all files of given directory and its subdirectories and appends file to a list of pathlib.Path objects, if atime @@ -255,8 +259,8 @@ def digest_file_sha1(chunksize: int, path: pathlib.Path) -> str: def scan_file_for_malware( - chunksize: int, file: pathlib.Path, lib: [Malware] -) -> [Malware]: + chunksize: int, file: pathlib.Path, lib: list[Malware] +) -> list[Malware]: """ Returning a list of Malware, because it could potentially happen (even if it should not), @@ -286,11 +290,12 @@ def report_matching_malware(job: Job, malware: Malware, path: pathlib.Path) -> s """ Create log line depending on verbosity """ - return f"{datetime.datetime.now()} {job.user_name} {job.galaxy_id} \ + return f"{datetime.datetime.now()} {job.user_id} {job.user_name} {job.user_mail} \ +{job.tool_id} {job.galaxy_id} {job.runner_id} {job.runner_name} {job.object_store_id} \ {malware.malware_class} {malware.program} {malware.version} {path}" -def construct_malware_list(malware_yaml: dict) -> [Malware]: +def construct_malware_list(malware_yaml: dict) -> list[Malware]: """ creates a flat list of malware objects, that hold all info The nested structure in yaml is for better optical structuring @@ -359,7 +364,7 @@ def get_jwd_path(self, job: Job): job.galaxy_id, [job.object_store_id], self.backends, - job.job_runner_name, + job.runner_name, ) return jwd @@ -397,10 +402,13 @@ def __init__(self): db_password, ) - def get_running_jobs(self, tool=None) -> [Job]: + def get_running_jobs(self, tool=None) -> list[Job]: query = f""" - SELECT id, object_store_id, tool_id, user_id, user, job_runner_name - FROM job + SELECT j.user_id, u.username, u.email, j.tool_id, j.id, + j.job_runner_external_id, j.job_runner_name, j.object_store_id + FROM + job j + INNER JOIN galaxy_user u ON j.user_id = u.id WHERE state = 'running' AND object_store_id IS NOT NULL AND user_id IS NOT NULL @@ -422,21 +430,25 @@ def get_running_jobs(self, tool=None) -> [Job]: sys.exit(1) running_jobs_list = [] for ( - job_id, - object_store_id, - tool_id, user_id, user_name, - job_runner_name, + user_mail, + tool_id, + job_id, + runner_id, + runner_name, + object_store_id, ) in running_jobs: running_jobs_list.append( Job( - galaxy_id=job_id, - object_store_id=object_store_id, - tool_id=tool_id, user_id=user_id, user_name=user_name, - job_runner_name=job_runner_name, + user_mail=user_mail, + tool_id=tool_id, + galaxy_id=job_id, + runner_id=runner_id, + runner_name=runner_name, + object_store_id=object_store_id, ) ) return running_jobs_list From b41a2bd4265ad2dc3cddada268b305d2965579d6 Mon Sep 17 00:00:00 2001 From: Mira Kuntz Date: Mon, 19 Feb 2024 15:51:25 +0100 Subject: [PATCH 02/19] add delete user function (wip) --- defaults/main.yml | 5 +++ tasks/main.yml | 10 +++++ templates/walle.py | 102 ++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 107 insertions(+), 10 deletions(-) diff --git a/defaults/main.yml b/defaults/main.yml index 5e95d70..adc0f3e 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -15,6 +15,11 @@ walle_script_location: /usr/local/bin/walle.py walle_filesize_min: 1 walle_filesize_max: 10 walle_since_hours: 24 +#delete users when malware was found and malware severity reached walle_delete_threshold +walle_delete_users: false +walle_delete_threshold: medium +#walle_api_key: #admin api key to delete users, goes to VAULT +#walle_galaxy_url: #galaxy_hostname, no leading slash #walle_tool: walle_verbose: false diff --git a/tasks/main.yml b/tasks/main.yml index abe9750..d740529 100644 --- a/tasks/main.yml +++ b/tasks/main.yml @@ -33,6 +33,14 @@ - 'export MALWARE_LIB="{{ walle_malware_database_location }}/{{ walle_database_file }}"' - 'export PGPASSFILE="{{ walle_pgpass_file }}"' +- name: Add optinal env variables for database (WallE) + ansible.builtin.lineinfile: + path: "{{ walle_bashrc }}" + line: "{{ item }}" + with_items: + - 'export GALAXY_API_KEY="{{ walle_api_key }}"' + - 'export GALAXY_BASE_URL="{{ walle_galaxy_url }}"' + when: walle_delete_users - name: Create logfile (WallE) ansible.builtin.file: @@ -79,4 +87,6 @@ {% if walle_filesize_max %} --max-size {{ walle_filesize_max }} {% endif %} {% if walle_since_hours %} --since {{ walle_since_hours }} {% endif %} {% if walle_verbose %} -v {% endif %} + {% if walle_delete_users %} --delete-users {{ walle_delete_threshold }} + {% endif %} >> {{ walle_log_dir }}/walle.log 2>&1 diff --git a/templates/walle.py b/templates/walle.py index d71bd6b..8fd3467 100644 --- a/templates/walle.py +++ b/templates/walle.py @@ -8,6 +8,7 @@ import hashlib import os import pathlib +import requests import sys import time import zlib @@ -29,6 +30,45 @@ def convert_arg_to_seconds(hours: str) -> int: return int(hours) * 60 * 60 +class Severity: + def __init__(self, number: int, name: str): + self.value = number + self.name = name + + def __eq__(self, other) -> bool: + if not isinstance(other, Severity): + raise ValueError("The other must be an instance of the Severity") + if self.value == other.value and self.name == other.name: + return True + else: + return False + + def __le__(self, other) -> bool: + if not isinstance(other, Severity): + raise ValueError("The other must be an instance of the Severity") + if self.value <= other.value: + return True + else: + return False + + def __ge__(self, other) -> bool: + if not isinstance(other, Severity): + raise ValueError("The other must be an instance of the Severity") + if self.value >= other.value: + return True + else: + return False + + +VALID_SEVERITIES = (Severity(0, "low"), Severity(1, "medium"), Severity(2, "high")) + + +def convert_str_to_severity(test_level: str) -> Severity: + for level in VALID_SEVERITIES: + if (level.name.casefold()).__eq__(test_level.casefold()): + return level + + def make_parser() -> argparse.ArgumentParser: my_parser = argparse.ArgumentParser( prog="WALL·E", @@ -64,6 +104,10 @@ def make_parser() -> argparse.ArgumentParser: PGPASSFILE: path to .pgpass file (same as gxadmin's) in format: :5432:*:: + The '--delete-user' flag requires additional environment variables: + GALAXY_BASE_URL: Instance hostname including scheme (https://examplegalaxy.org) + GALAXY_API_KEY: Galaxy API key with admin privileges + GALAXY_ROOT: Galaxy root directiory (e.g. /srv/galaxy) """, formatter_class=argparse.RawTextHelpFormatter, ) @@ -131,7 +175,16 @@ def make_parser() -> argparse.ArgumentParser: action="store_true", help="Show progress bar. Leave unset for cleaner logs and slightly higher performance", ) - + my_parser.add_argument( + "--delete-user", + metavar="MIN_SEVERITY", + choices=VALID_SEVERITIES, + type=convert_str_to_severity, + help="Delete user when severity level is equal or higher. \ + Following additional environment variables are expected: \ + GALAXY_API_KEY \ + GALAXY_BASE_URL", + ) return my_parser @@ -182,7 +235,7 @@ def __init__( malware_class: str, program: str, version: str, - severity: str, + severity: Severity, description: str, crc32: str, sha1: str, @@ -292,7 +345,7 @@ def report_matching_malware(job: Job, malware: Malware, path: pathlib.Path) -> s """ return f"{datetime.datetime.now()} {job.user_id} {job.user_name} {job.user_mail} \ {job.tool_id} {job.galaxy_id} {job.runner_id} {job.runner_name} {job.object_store_id} \ -{malware.malware_class} {malware.program} {malware.version} {path}" +{malware.malware_class} {malware.program} {malware.severity.name} {malware.version} {path}" def construct_malware_list(malware_yaml: dict) -> list[Malware]: @@ -309,9 +362,9 @@ def construct_malware_list(malware_yaml: dict) -> list[Malware]: malware_class=malware_class, program=program, version=version, - severity=malware_yaml[malware_class][program][version][ - "severity" - ], + severity=convert_str_to_severity( + malware_yaml[malware_class][program][version]["severity"] + ), description=malware_yaml[malware_class][program][version][ "description" ], @@ -454,6 +507,20 @@ def get_running_jobs(self, tool=None) -> list[Job]: return running_jobs_list +def delete_user(user_id: int, base_url: str, api_key: str) -> bool: + url = f"{base_url}/api/users/{encode_galaxy_user_id(user_id)}" + header = {"x-api-key": api_key} + response = requests.delete(url=url, headers=header) + if response.status_code == 200: + print(f"User {user_id} deleted successfully.") + else: + print(f"Failed to delete user {user_id}!") + + +def encode_galaxy_user_id(id: int) -> str: + pass + + def main(): """ Miner Finder's main function. Shows a status bar while processing the jobs found in Galaxy @@ -463,11 +530,16 @@ def main(): db = RunningJobDatabase() malware_library = construct_malware_list(load_malware_lib_from_env()) jobs = db.get_running_jobs(args.tool) + if args.delete_user: + api_key = get_str_from_env_or_error("GALAXY_API_KEY") + galaxy_url = get_path_from_env_or_error("GALAXY_BASE_URL") + galaxy_root = get_path_from_env_or_error("GALAXY_ROOT") if args.interactive: if args.verbose: print( - "TIMESTAMP GALAXY_USER JOB_ID \ -MALWARE_CLASS MALWARE MALWARE_VERSION PATH" + "TIMESTAMP GALAXY_USER_ID GALAXY_USER_MAIL TOOL_ID \ + GALAXY_JOB_ID RUNNER_JOB_ID RUNNER_NAME MALWARE_CLASS \ + OBJECT_STORE_ID MALWARE SEVERITY MALWARE_VERSION PATH" ) else: print("GALAXY_USER JOB_ID") @@ -487,8 +559,8 @@ def main(): ) if len(matching_malware) > 0: print("\n") - if args.verbose: - for malware in matching_malware: + for malware in matching_malware: + if args.verbose: print( report_matching_malware( job=job, @@ -496,6 +568,16 @@ def main(): path=file, ) ) + if args.delete_user: + print(type(args.delete_user)) + print(type(malware.severity)) + if args.delete_user <= malware.severity: + delete_user( + user_id=job.user_id, + api_key=api_key, + base_url=galaxy_url, + ) + else: print(job.report_id_and_user_name()) break From 2a26524d23feb8167020fd6ec3d952b8a9c2de7a Mon Sep 17 00:00:00 2001 From: Mira Kuntz Date: Mon, 19 Feb 2024 16:22:24 +0100 Subject: [PATCH 03/19] use helper functions to get envs --- templates/walle.py | 97 ++++++++++++++++++++++------------------------ 1 file changed, 46 insertions(+), 51 deletions(-) diff --git a/templates/walle.py b/templates/walle.py index 53275fc..0ffd1b9 100644 --- a/templates/walle.py +++ b/templates/walle.py @@ -229,11 +229,8 @@ def all_files_in_dir(dir: pathlib.Path, args) -> [pathlib.Path]: return files -def load_malware_lib_from_env(env=CHECKSUM_FILE_ENV) -> dict: - if not os.environ.get(env): - raise ValueError(env) - malware_lib_path = os.environ.get(env).strip() - with open(malware_lib_path, "r") as malware_yaml: +def load_malware_lib_from_env(malware_file: pathlib.Path) -> dict: + with open(malware_file, "r") as malware_yaml: malware_lib = yaml.safe_load(malware_yaml) return malware_lib @@ -326,31 +323,17 @@ class JWDGetter: This class is a workaround for calling 'galaxy_jwd.py's main function. """ - def __init__(self) -> None: + def __init__( + self, galaxy_config_file: pathlib.Path, pulsar_app_conf: pathlib.Path + ) -> None: """ Reads the storage backend configuration (might deserve it's own function in galaxy_jwd.py) """ - if not os.environ.get("GALAXY_CONFIG_FILE"): - raise ValueError("Please set ENV GALAXY_CONFIG_FILE") - galaxy_config_file = os.environ.get("GALAXY_CONFIG_FILE").strip() - - # Check if the given galaxy.yml file exists - if not os.path.isfile(galaxy_config_file): - raise ValueError( - f"The given galaxy.yml file {galaxy_config_file} does not exist" - ) - if not os.environ.get("GALAXY_PULSAR_APP_CONF"): - raise ValueError("Please set ENV GALAXY_PULSAR_APP_CONF") - galaxy_pulsar_app_conf = os.environ.get("GALAXY_PULSAR_APP_CONF").strip() - object_store_conf = galaxy_jwd.get_object_store_conf_path(galaxy_config_file) backends = galaxy_jwd.parse_object_store(object_store_conf) - # Add pulsar staging directory (runner: pulsar_embedded) to backends - backends["pulsar_embedded"] = galaxy_jwd.get_pulsar_staging_dir( - galaxy_pulsar_app_conf - ) + backends["pulsar_embedded"] = galaxy_jwd.get_pulsar_staging_dir(pulsar_app_conf) self.backends = backends # might deserve it's own function in galaxy_jwd.py @@ -365,31 +348,7 @@ def get_jwd_path(self, job: Job): class RunningJobDatabase(galaxy_jwd.Database): - def __init__(self): - if not os.environ.get("PGDATABASE"): - raise ValueError("Please set ENV PGDATABASE") - db_name = os.environ.get("PGDATABASE").strip() - - if not os.environ.get("PGUSER"): - raise ValueError("Please set ENV PGUSER") - db_user = os.environ.get("PGUSER").strip() - - if not os.environ.get("PGHOST"): - raise ValueError("Please set ENV PGHOST") - db_host = os.environ.get("PGHOST").strip() - - # Check if ~/.pgpass file exists and is not empty - if ( - not os.path.isfile(os.path.expanduser("~/.pgpass")) - or os.stat(os.path.expanduser("~/.pgpass")).st_size == 0 - ): - raise ValueError( - "Please create a ~/.pgpass file in format: " - ":5432:*::" - ) - db_password = galaxy_jwd.extract_password_from_pgpass( - pgpass_file=os.environ.get("PGPASSFILE").strip() - ) + def __init__(self, db_host: str, db_name: str, db_user: str, db_password: str): super().__init__( db_name, db_user, @@ -442,14 +401,50 @@ def get_running_jobs(self, tool=None) -> [Job]: return running_jobs_list +def get_path_from_env_or_error(env: str) -> pathlib.Path: + if os.environ.get(env): + if (path := pathlib.Path(os.environ.get(env).strip())).exists(): + return path + else: + raise ValueError(f"Path for {env} is invalid") + else: + raise ValueError(f"Please set ENV {env}") + + +def get_str_from_env_or_error(env: str) -> str: + if os.environ.get(env): + if len(from_env := os.environ.get(env).strip()) > 0: + return from_env + else: + raise ValueError(f"Path for {env} is invalid") + else: + raise ValueError(f"Please set ENV {env}") + + def main(): """ Miner Finder's main function. Shows a status bar while processing the jobs found in Galaxy """ args = make_parser().parse_args() - jwd_getter = JWDGetter() - db = RunningJobDatabase() - malware_library = construct_malware_list(load_malware_lib_from_env()) + galaxy_config_file = get_path_from_env_or_error("GALAXY_CONFIG_FILE") + + jwd_getter = JWDGetter( + galaxy_config_file=galaxy_config_file, + pulsar_app_conf=get_path_from_env_or_error("GALAXY_PULSAR_APP_CONF"), + ) + db = RunningJobDatabase( + db_host=get_str_from_env_or_error("PGHOST"), + db_password=galaxy_jwd.extract_password_from_pgpass( + get_path_from_env_or_error("PGPASSFILE") + ), + db_name=get_str_from_env_or_error("PGDATABASE"), + db_user=get_str_from_env_or_error("PGUSER"), + ) + malware_library = construct_malware_list( + malware_yaml=load_malware_lib_from_env( + malware_file=get_path_from_env_or_error("MALWARE_LIB") + ) + ) jobs = db.get_running_jobs(args.tool) if args.interactive: if args.verbose: From 85de10d69eae3f0f6b19e21d82f46f3c4d8b5be4 Mon Sep 17 00:00:00 2001 From: Mira Kuntz Date: Thu, 22 Feb 2024 16:02:18 +0100 Subject: [PATCH 04/19] delete users as api method --- templates/walle.py | 66 ++++++++++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 28 deletions(-) diff --git a/templates/walle.py b/templates/walle.py index 02058c8..e101b20 100644 --- a/templates/walle.py +++ b/templates/walle.py @@ -8,12 +8,12 @@ import hashlib import os import pathlib -import requests import sys import time import zlib import galaxy_jwd +import requests import yaml from tqdm import tqdm @@ -486,18 +486,28 @@ def get_str_from_env_or_error(env: str) -> str: raise ValueError(f"Please set ENV {env}") -def delete_user(user_id: int, base_url: str, api_key: str) -> bool: - url = f"{base_url}/api/users/{encode_galaxy_user_id(user_id)}" - header = {"x-api-key": api_key} - response = requests.delete(url=url, headers=header) - if response.status_code == 200: - print(f"User {user_id} deleted successfully.") - else: - print(f"Failed to delete user {user_id}!") +class GalaxyAPI: + def __init__(self, base_url: str, api_key: str) -> None: + self.base_url = base_url + self.api_key = api_key + self.auth_header = {"x-api-key": self.api_key} + def delete_user(self, encoded_user_id: str) -> bool: + url = f"{self.base_url}/api/users/{encoded_user_id}" + response = requests.delete(url=url, headers=self.auth_header) + if response.status_code == 200: + return True + else: + print(f"Failed to delete user {encoded_user_id}!") -def encode_galaxy_user_id(id: int) -> str: - pass + def encode_galaxy_user_id(self, decoded_id: int) -> str: + url = f"{self.base_url}/api/configuration/encode/{decoded_id}" + response = requests.get(url=url, headers=self.auth_header) + if response.status_code != 200: + print(f"Failed to encode id {decoded_id}!") + else: + json_response = response.json() + return json_response["encoded_id"] def main(): @@ -526,9 +536,13 @@ def main(): ) jobs = db.get_running_jobs(args.tool) if args.delete_user: - api_key = get_str_from_env_or_error("GALAXY_API_KEY") - galaxy_url = get_path_from_env_or_error("GALAXY_BASE_URL") - galaxy_root = get_path_from_env_or_error("GALAXY_ROOT") + api = GalaxyAPI( + api_key=get_str_from_env_or_error("GALAXY_API_KEY"), + base_url=get_str_from_env_or_error("GALAXY_BASE_URL"), + ) + delete_users = set() + if not args.verbose: + report_users = set() if args.interactive: if args.verbose: print( @@ -555,7 +569,10 @@ def main(): if len(matching_malware) > 0: print("\n") for malware in matching_malware: - if args.verbose: + if not args.verbose and job.user_id not in report_users: + print(job.report_id_and_user_name()) + report_users.add(job.user_id) + else: print( report_matching_malware( job=job, @@ -563,25 +580,18 @@ def main(): path=file, ) ) - if args.delete_user: - print(type(args.delete_user)) - print(type(malware.severity)) + if args.delete_user and job.user_id not in delete_users: if args.delete_user <= malware.severity: - delete_user( - user_id=job.user_id, - api_key=api_key, - base_url=galaxy_url, - ) - - else: - print(job.report_id_and_user_name()) - break - + delete_users.add(job.user_id) else: print( f"JWD for Job {job.galaxy_id} found but does not exist in FS", file=sys.stderr, ) + # Deletes users at the end, to report all malicious jobs of a user + for user_id in delete_users: + # add notification api call here + api.delete_user(encoded_user_id=api.encode_galaxy_user_id(decoded_id=user_id)) if args.interactive: print("Complete.") From 855585110e05722f26f02c8d368d24e309f6f796 Mon Sep 17 00:00:00 2001 From: Mira Kuntz Date: Thu, 22 Feb 2024 16:02:54 +0100 Subject: [PATCH 05/19] env vars should use regex --- defaults/main.yml | 29 +++++++++++++++++++++++++---- tasks/main.yml | 20 +++++++++----------- 2 files changed, 34 insertions(+), 15 deletions(-) diff --git a/defaults/main.yml b/defaults/main.yml index adc0f3e..bf8e3f3 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -15,12 +15,33 @@ walle_script_location: /usr/local/bin/walle.py walle_filesize_min: 1 walle_filesize_max: 10 walle_since_hours: 24 -#delete users when malware was found and malware severity reached walle_delete_threshold +walle_envs_database: + - key: MALWARE_LIB + value: "{{ walle_malware_database_location }}/{{ walle_database_file }}" + - key: PGPASSFILE + value: "{{ walle_pgpass_file }}" + - key: PGHOST + value: 127.0.0.1 + - key: PGUSER + value: galaxy + - key: PGDATABASE + value: galaxy + +# delete users when malware was found and malware severity reached walle_delete_threshold walle_delete_users: false walle_delete_threshold: medium -#walle_api_key: #admin api key to delete users, goes to VAULT -#walle_galaxy_url: #galaxy_hostname, no leading slash -#walle_tool: +walle_envs_user_deletion: + - key: GALAXY_API_KEY + value: "{{ walle_api_key }}" + - key: GALAXY_BASE_URL + value: "{{ walle_galaxy_url }}" + - key: GALAXY_CONFIG_FILE + value: "{{ galaxy_config_dir }}/galaxy.yml" + - key: GALAXY_LIB + value: "{{ galaxy_root }}/server/lib" +# walle_api_key: #admin api key to delete users, goes to VAULT +# walle_galaxy_url: #galaxy_hostname, no leading slash +# walle_tool: walle_verbose: false # Cron diff --git a/tasks/main.yml b/tasks/main.yml index d740529..0a168a6 100644 --- a/tasks/main.yml +++ b/tasks/main.yml @@ -28,18 +28,16 @@ - name: Add env variable for database (WallE) ansible.builtin.lineinfile: path: "{{ walle_bashrc }}" - line: "{{ item }}" - with_items: - - 'export MALWARE_LIB="{{ walle_malware_database_location }}/{{ walle_database_file }}"' - - 'export PGPASSFILE="{{ walle_pgpass_file }}"' + regexp: "^export {{ item.key }}=" + line: 'export {{ item.key }}="{{ item.value}}"' + with_items: "{{ walle_envs_database }}" -- name: Add optinal env variables for database (WallE) +- name: Add env variables for user deletion (WallE) ansible.builtin.lineinfile: path: "{{ walle_bashrc }}" - line: "{{ item }}" - with_items: - - 'export GALAXY_API_KEY="{{ walle_api_key }}"' - - 'export GALAXY_BASE_URL="{{ walle_galaxy_url }}"' + regexp: "^export {{ item.key }}=" + line: 'export {{ item.key }}="{{ item.value}}"' + with_items: "{{ walle_envs_user_deletion }}" when: walle_delete_users - name: Create logfile (WallE) @@ -81,12 +79,12 @@ {{ walle_virtualenv }}/bin/python {% else %} {{ walle_python }} {% endif %} {{ walle_script_location }} - {% if walle_tool %} {{ walle_tool }} {% endif %} + {% if walle_tool %} --tool {{ walle_tool }} {% endif %} {% if walle_filesize_min %} --min-size {{ walle_filesize_min }} {% endif %} {% if walle_filesize_max %} --max-size {{ walle_filesize_max }} {% endif %} {% if walle_since_hours %} --since {{ walle_since_hours }} {% endif %} {% if walle_verbose %} -v {% endif %} - {% if walle_delete_users %} --delete-users {{ walle_delete_threshold }} + {% if walle_delete_users %} --delete-user {{ walle_delete_threshold }} {% endif %} >> {{ walle_log_dir }}/walle.log 2>&1 From 9ff105142bf281ad9b2f1f2f824f6d7ef9044913 Mon Sep 17 00:00:00 2001 From: Mira Kuntz Date: Thu, 22 Feb 2024 17:08:05 +0100 Subject: [PATCH 06/19] change order of verbose log --- templates/walle.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/templates/walle.py b/templates/walle.py index e101b20..9e37303 100644 --- a/templates/walle.py +++ b/templates/walle.py @@ -60,7 +60,7 @@ def __ge__(self, other) -> bool: return False -VALID_SEVERITIES = (Severity(0, "low"), Severity(1, "medium"), Severity(2, "high")) +VALID_SEVERITIES = (Severity(0, "LOW"), Severity(1, "MEDIUM"), Severity(2, "HIGH")) def convert_str_to_severity(test_level: str) -> Severity: @@ -181,6 +181,8 @@ def make_parser() -> argparse.ArgumentParser: choices=VALID_SEVERITIES, type=convert_str_to_severity, help="Delete user when severity level is equal or higher. \ + Make sure that you know what the consequences are on your \ + instance, when a user is set to deleted. \ Following additional environment variables are expected: \ GALAXY_API_KEY \ GALAXY_BASE_URL", @@ -340,9 +342,9 @@ def report_matching_malware(job: Job, malware: Malware, path: pathlib.Path) -> s """ Create log line depending on verbosity """ - return f"{datetime.datetime.now()} {job.user_id} {job.user_name} {job.user_mail} \ + return f"{datetime.datetime.now()} {malware.severity.name} {job.user_id} {job.user_name} {job.user_mail} \ {job.tool_id} {job.galaxy_id} {job.runner_id} {job.runner_name} {job.object_store_id} \ -{malware.malware_class} {malware.program} {malware.severity.name} {malware.version} {path}" +{malware.malware_class} {malware.program} {malware.version} {path}" def construct_malware_list(malware_yaml: dict) -> list[Malware]: @@ -546,9 +548,9 @@ def main(): if args.interactive: if args.verbose: print( - "TIMESTAMP GALAXY_USER_ID GALAXY_USER_MAIL TOOL_ID \ - GALAXY_JOB_ID RUNNER_JOB_ID RUNNER_NAME MALWARE_CLASS \ - OBJECT_STORE_ID MALWARE SEVERITY MALWARE_VERSION PATH" + "TIMESTAMP MALWARE_SEVERITY USER_ID USER_NAME USER_MAIL TOOL_ID GALAXY_JOB_ID \ + RUNNER_JOB_ID RUNNER_NAME OBJECT_STORE_ID MALWARE_CLASS \ + MALWARE_NAME MALWARE_VERSION PATH" ) else: print("GALAXY_USER JOB_ID") @@ -590,7 +592,7 @@ def main(): ) # Deletes users at the end, to report all malicious jobs of a user for user_id in delete_users: - # add notification api call here + # add notification here api.delete_user(encoded_user_id=api.encode_galaxy_user_id(decoded_id=user_id)) if args.interactive: print("Complete.") From a0d9aea459dc9460f734b2f2c5d0eea9319385df Mon Sep 17 00:00:00 2001 From: Mira <86979912+mira-miracoli@users.noreply.github.com> Date: Wed, 28 Feb 2024 09:40:27 +0100 Subject: [PATCH 07/19] spacing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Björn Grüning --- tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/main.yml b/tasks/main.yml index 0a168a6..875b706 100644 --- a/tasks/main.yml +++ b/tasks/main.yml @@ -36,7 +36,7 @@ ansible.builtin.lineinfile: path: "{{ walle_bashrc }}" regexp: "^export {{ item.key }}=" - line: 'export {{ item.key }}="{{ item.value}}"' + line: 'export {{ item.key }}="{{ item.value }}"' with_items: "{{ walle_envs_user_deletion }}" when: walle_delete_users From 3169764ce3f97a0ff3c1ecf1c8e5831cbeb3a1ec Mon Sep 17 00:00:00 2001 From: Mira <86979912+mira-miracoli@users.noreply.github.com> Date: Wed, 28 Feb 2024 09:45:29 +0100 Subject: [PATCH 08/19] spacing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Björn Grüning --- tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/main.yml b/tasks/main.yml index 875b706..42837fe 100644 --- a/tasks/main.yml +++ b/tasks/main.yml @@ -29,7 +29,7 @@ ansible.builtin.lineinfile: path: "{{ walle_bashrc }}" regexp: "^export {{ item.key }}=" - line: 'export {{ item.key }}="{{ item.value}}"' + line: 'export {{ item.key }}="{{ item.value }}"' with_items: "{{ walle_envs_database }}" - name: Add env variables for user deletion (WallE) From c75cdf8d442a6fa72a5d99da88657b0e9e26c40b Mon Sep 17 00:00:00 2001 From: Mira <86979912+mira-miracoli@users.noreply.github.com> Date: Wed, 28 Feb 2024 09:45:54 +0100 Subject: [PATCH 09/19] example with purge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Björn Grüning --- templates/walle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/walle.py b/templates/walle.py index 9e37303..ee58a36 100644 --- a/templates/walle.py +++ b/templates/walle.py @@ -182,7 +182,7 @@ def make_parser() -> argparse.ArgumentParser: type=convert_str_to_severity, help="Delete user when severity level is equal or higher. \ Make sure that you know what the consequences are on your \ - instance, when a user is set to deleted. \ + instance, when a user is set to deleted (e.g. when a user is purged automatically after deletion). \ Following additional environment variables are expected: \ GALAXY_API_KEY \ GALAXY_BASE_URL", From 3d1252fba97a3d54cd7a51eda55800116a6f92aa Mon Sep 17 00:00:00 2001 From: Mira Kuntz Date: Wed, 28 Feb 2024 11:39:42 +0100 Subject: [PATCH 10/19] add user to report users unconditionally --- templates/walle.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/templates/walle.py b/templates/walle.py index ee58a36..2de58db 100644 --- a/templates/walle.py +++ b/templates/walle.py @@ -542,9 +542,8 @@ def main(): api_key=get_str_from_env_or_error("GALAXY_API_KEY"), base_url=get_str_from_env_or_error("GALAXY_BASE_URL"), ) - delete_users = set() - if not args.verbose: - report_users = set() + delete_users = set() + report_users = set() if args.interactive: if args.verbose: print( @@ -571,9 +570,9 @@ def main(): if len(matching_malware) > 0: print("\n") for malware in matching_malware: + # report only once if not args.verbose and job.user_id not in report_users: print(job.report_id_and_user_name()) - report_users.add(job.user_id) else: print( report_matching_malware( @@ -582,6 +581,7 @@ def main(): path=file, ) ) + report_users.add(job.user_id) if args.delete_user and job.user_id not in delete_users: if args.delete_user <= malware.severity: delete_users.add(job.user_id) From c92be149197d893804801cb11bb36823f88bccd8 Mon Sep 17 00:00:00 2001 From: Mira Kuntz Date: Wed, 28 Feb 2024 16:36:04 +0100 Subject: [PATCH 11/19] catch broken symlinks --- templates/walle.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/templates/walle.py b/templates/walle.py index 2de58db..570ecde 100644 --- a/templates/walle.py +++ b/templates/walle.py @@ -279,12 +279,16 @@ def all_files_in_dir(dir: pathlib.Path, args) -> list[pathlib.Path]: files = [] for root, _, filenames in os.walk(dir): for filename in filenames: - file = pathlib.Path(os.path.join(root, filename)) - file_stat = file.stat() - if file_in_size_range( - file_stat, args.min_size, args.max_size - ) and file_accessed_in_range(file_stat, args.since): - files.append(file) + try: + file = pathlib.Path(os.path.join(root, filename)) + file_stat = file.stat() + if file_in_size_range( + file_stat, args.min_size, args.max_size + ) and file_accessed_in_range(file_stat, args.since): + files.append(file) + except FileNotFoundError: + pass + return files From 8d6cee894cfb0403b8eada1bb4637db723f726d4 Mon Sep 17 00:00:00 2001 From: Mira Kuntz Date: Thu, 12 Sep 2024 09:16:37 +0200 Subject: [PATCH 12/19] delete users pre-testing pre-doc update --- templates/walle.py | 513 +++++++++++++++++++++++++++++++-------------- 1 file changed, 357 insertions(+), 156 deletions(-) diff --git a/templates/walle.py b/templates/walle.py index 570ecde..9afde44 100644 --- a/templates/walle.py +++ b/templates/walle.py @@ -11,6 +11,10 @@ import sys import time import zlib +import logging +from datetime import datetime, timezone +from dateutil.relativedelta import relativedelta +from typing import Dict import galaxy_jwd import requests @@ -21,13 +25,46 @@ CURRENT_TIME = int(time.time()) +DEFAULT_SUBJECT = "Galaxy Account deleted due to ToS violations" +DEFAULT_MESSAGE = """ +Our systems have detected activity related to your Galaxy account that most likely violate our terms of service. +To prevent damage and in accordance with our terms of service, we automatically deleted your account. +This means your jobs were terminated and you can not login anymore. +However it is possible to restore the account and its data. +If you think your account was deleted due to an error, please contact +""" +DEFAULT_NOTIFICATION_EXPIRATION = 6 +ONLY_ONE_INSTANCE = "The other must be an instance of the Severity" + +UserId = str +UserMail = str +UserIdMail = Dict[UserId, UserMail] + +logging.basicConfig( + format="{asctime} - {levelname} - {message}", + style="{", + datefmt="%Y-%m-%d %H:%M", +) +logger = logging.getLogger(__name__) + def convert_arg_to_byte(mb: str) -> int: return int(mb) << 20 -def convert_arg_to_seconds(hours: str) -> int: - return int(hours) * 60 * 60 +def convert_arg_to_seconds(hours: str) -> float: + return float(hours) * 60 * 60 + + +def get_iso_time_utc_add_months(months_in_future: int): + """ + Get the current UTC time and format it to ISO 8601 format + """ + calculated_time = datetime.now(timezone.utc) + relativedelta( + months=months_in_future + ) + formatted_time = calculated_time.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z" + return formatted_time class Severity: @@ -37,7 +74,7 @@ def __init__(self, number: int, name: str): def __eq__(self, other) -> bool: if not isinstance(other, Severity): - raise ValueError("The other must be an instance of the Severity") + raise ValueError(ONLY_ONE_INSTANCE) if self.value == other.value and self.name == other.name: return True else: @@ -45,7 +82,7 @@ def __eq__(self, other) -> bool: def __le__(self, other) -> bool: if not isinstance(other, Severity): - raise ValueError("The other must be an instance of the Severity") + raise ValueError(ONLY_ONE_INSTANCE) if self.value <= other.value: return True else: @@ -53,7 +90,7 @@ def __le__(self, other) -> bool: def __ge__(self, other) -> bool: if not isinstance(other, Severity): - raise ValueError("The other must be an instance of the Severity") + raise ValueError(ONLY_ONE_INSTANCE) if self.value >= other.value: return True else: @@ -67,6 +104,7 @@ def convert_str_to_severity(test_level: str) -> Severity: for level in VALID_SEVERITIES: if (level.name.casefold()).__eq__(test_level.casefold()): return level + raise ValueError("{test_level} is not a valid severity level") def make_parser() -> argparse.ArgumentParser: @@ -94,7 +132,7 @@ def make_parser() -> argparse.ArgumentParser: and convert it to integer representation. e.g. with: gzip -1 -c /path/to/file | tail -c8 | hexdump -n4 -e '"%u"' - + The following ENVs (same as gxadmin's) should be set: GALAXY_CONFIG_FILE: Path to the galaxy.yml file GALAXY_LOG_DIR: Path to the Galaxy log directory @@ -106,8 +144,12 @@ def make_parser() -> argparse.ArgumentParser: :5432:*:: The '--delete-user' flag requires additional environment variables: GALAXY_BASE_URL: Instance hostname including scheme (https://examplegalaxy.org) + GALAXY_ADMIN_EMAIL: The email users can contact to file complaints GALAXY_API_KEY: Galaxy API key with admin privileges GALAXY_ROOT: Galaxy root directiory (e.g. /srv/galaxy) + WALLE_USER_DELETION_MESSAGE: The message Galaxy should send as notification to a user before it deletes their account + WALLE_USER_DELETION_SUBJECT: The message's subject line. + WALLE_NOTIFICATION_EXP_MONTHS: When the notification expires (is deleted from the database) in months from when it is sent. """, formatter_class=argparse.RawTextHelpFormatter, ) @@ -137,22 +179,27 @@ def make_parser() -> argparse.ArgumentParser: my_parser.add_argument( "--min-size", metavar="MIN_SIZE_MB", - help="Minimum filesize im MB to limit the files to scan.", + help="Minimum filesize im MB to limit the files to scan. \ + The check will be skipped if value is 0 (default)", type=convert_arg_to_byte, + default=0, ) my_parser.add_argument( "--max-size", metavar="MAX_SIZE_MB", help="Maximum filesize im MB to limit the files to scan. \ - CAUTION: Not setting this value can lead to very long computation times", + CAUTION: Not setting this value can lead to very long computation times. \ + The check will be skipped if value is 0 (default)", type=convert_arg_to_byte, + default=0, ) my_parser.add_argument( "--since", - help="Access time in hours backwards from now", + help="Access time in hours backwards from now, default=0 (skip check)", type=convert_arg_to_seconds, + default=0, ) my_parser.add_argument( @@ -190,33 +237,6 @@ def make_parser() -> argparse.ArgumentParser: return my_parser -class Job: - def __init__( - self, - user_id: int, - user_name: str, - user_mail: str, - tool_id: str, - galaxy_id: int, - runner_id: int, - runner_name: str, - object_store_id: int, - jwd=None, - ) -> None: - self.user_id = user_id - self.user_name = user_name - self.user_mail = user_mail - self.tool_id = tool_id - self.galaxy_id = galaxy_id - self.runner_id = runner_id - self.runner_name = runner_name - self.object_store_id = object_store_id - self.jwd = jwd - - def report_id_and_user_name(self) -> str: - return f"{self.galaxy_id} {self.user_name}" - - class Malware: """ Loads a yaml with the following schema @@ -251,45 +271,152 @@ def __init__( self.sha1 = sha1 -def file_accessed_in_range( - file_stat: os.stat_result, since: int, now=CURRENT_TIME -) -> bool: - if since is not None: - if now - since > file_stat.st_atime: - return False - return True - +class Job: + def __init__( + self, + user_id: UserId, + user_name: str, + user_mail: UserMail, + tool_id: str, + galaxy_id: int, + runner_id: int, + runner_name: str, + object_store_id: int, + jwd=pathlib.Path(), + files=None, + ) -> None: + self.user_id = user_id + self.user_name = user_name + self.user_mail = user_mail + self.tool_id = tool_id + self.galaxy_id = galaxy_id + self.runner_id = runner_id + self.runner_name = runner_name + self.object_store_id = object_store_id + self.jwd = jwd + self.files = files | [] -def file_in_size_range(file_stat: os.stat_result, min_size=None, max_size=None) -> bool: - if min_size is not None: - if file_stat.st_size < min_size: - return False - if max_size is not None: - if file_stat.st_size > max_size: + def set_jwd_path(self, jwd: str) -> bool: + jwd_path = pathlib.Path(jwd) + if jwd_path.exists(): # Move to job initialization + self.jwd = jwd_path + return True + else: return False - return True + def check_if_jwd_exists_and_get_files(self, args: argparse.Namespace) -> bool: + """ + Gets all files of given directory and its subdirectories and + appends file to a list of pathlib.Path objects, if atime + and the filesize is within the specified range. + """ + if self.jwd.exists(): + for dirpath, _, filenames in os.walk(self.jwd): + self.collect_files_in_a_directory( + args=args, dirpath=dirpath, filenames=filenames + ) + if len(self.files) > 0: + return True + return False -def all_files_in_dir(dir: pathlib.Path, args) -> list[pathlib.Path]: - """ - Gets all files of given directory and its subdirectories and - appends file to a list of pathlib.Path objects, if atime - and the filesize is within the specified range. - """ - files = [] - for root, _, filenames in os.walk(dir): + def collect_files_in_a_directory( + self, args: argparse.Namespace, dirpath: str, filenames: list[str] + ): for filename in filenames: - try: - file = pathlib.Path(os.path.join(root, filename)) - file_stat = file.stat() - if file_in_size_range( - file_stat, args.min_size, args.max_size - ) and file_accessed_in_range(file_stat, args.since): - files.append(file) - except FileNotFoundError: - pass + file = pathlib.Path(os.path.join(dirpath, filename)) + if not os.path.islink(file): + self.check_if_file_in_range_and_accessed(args=args, filepath=file) - return files + def check_if_file_in_range_and_accessed( + self, filepath: pathlib.Path, args: argparse.Namespace + ): + try: + file_stat = filepath.stat() + if file_in_size_range( + file_stat, args.min_size, args.max_size + ) and file_accessed_in_range(file_stat, args.since): + self.files.append(filepath) + except OSError: + pass + + def report_id_and_user_name(self) -> UserIdMail: + logger.info(self.user_id, self.user_name) + return {self.user_id: self.user_mail} + + def report_matching_malware(self, index: int, malware: Malware): + """ + Create log line depending on verbosity + """ + logger.debug( + get_iso_time_utc_add_months(0), + malware.severity.name, + self.user_id, + self.user_name, + self.user_mail, + self.tool_id, + self.galaxy_id, + self.runner_id, + self.runner_name, + self.object_store_id, + malware.malware_class, + malware.program, + malware.version, + self.files[index], + ) + + +class Case: + def __init__( + self, + job: Job, + malware: Malware, + fileindex: int, + reported_users: UserIdMail, + delete_users: UserIdMail, + severity: Severity, + ) -> None: + self.job = job + self.malware = malware + self.fileindex = fileindex + self.reported_users = reported_users + self.delete_users = delete_users + self.severity = severity + + def report_according_to_verbosity(self, verbose: bool) -> UserIdMail: + if verbose: + self.job.report_matching_malware(index=self.fileindex, malware=self.malware) + return {} + elif self.job.user_id not in self.reported_users: + return self.job.report_id_and_user_name() + else: + return {} + + def mark_user_for_deletion(self, delete: bool) -> UserIdMail: + if delete and self.job.user_id not in self.delete_users: + return self.check_severity_level() + return {} + + def check_severity_level(self) -> UserIdMail: + if self.malware.severity >= self.severity: + return {self.job.user_id: self.job.user_mail} + else: + return {} + + +def file_accessed_in_range( + file_stat: os.stat_result, since: float, now=time.time() +) -> bool: + if since != 0 and now - since > file_stat.st_atime: + return False + return True + + +def file_in_size_range(file_stat: os.stat_result, min_size: int, max_size: int) -> bool: + if min_size is not None and file_stat.st_size < min_size: + return False + if max_size is not None and file_stat.st_size > max_size: + return False + return True def load_malware_lib_from_env(malware_file: pathlib.Path) -> dict: @@ -342,15 +469,6 @@ def scan_file_for_malware( return matches -def report_matching_malware(job: Job, malware: Malware, path: pathlib.Path) -> str: - """ - Create log line depending on verbosity - """ - return f"{datetime.datetime.now()} {malware.severity.name} {job.user_id} {job.user_name} {job.user_mail} \ -{job.tool_id} {job.galaxy_id} {job.runner_id} {job.runner_name} {job.object_store_id} \ -{malware.malware_class} {malware.program} {malware.version} {path}" - - def construct_malware_list(malware_yaml: dict) -> list[Malware]: """ creates a flat list of malware objects, that hold all info @@ -401,7 +519,7 @@ def __init__( self.backends = backends # might deserve it's own function in galaxy_jwd.py - def get_jwd_path(self, job: Job): + def get_jwd_path(self, job: Job) -> str: jwd = galaxy_jwd.decode_path( job.galaxy_id, [job.object_store_id], @@ -420,8 +538,8 @@ def __init__(self, db_host: str, db_name: str, db_user: str, db_password: str): db_password, ) - def get_running_jobs(self, tool=None) -> list[Job]: - query = f""" + def get_running_jobs(self, tool: str) -> list[Job]: + query = """ SELECT j.user_id, u.username, u.email, j.tool_id, j.id, j.job_runner_external_id, j.job_runner_name, j.object_store_id FROM @@ -432,7 +550,7 @@ def get_running_jobs(self, tool=None) -> list[Job]: AND user_id IS NOT NULL """ cur = self.conn.cursor() - if len(tool) > 0: + if tool is not None and len(tool) > 0: query += f"AND tool_id LIKE '%{tool}%'" cur.execute(query + ";") running_jobs = cur.fetchall() @@ -441,11 +559,8 @@ def get_running_jobs(self, tool=None) -> list[Job]: # Create a dictionary with job_id as key and object_store_id, and # update_time as values if not running_jobs: - print( - f"No running jobs with tool_id like {tool} found.", - file=sys.stderr, - ) - sys.exit(1) + logger.warn(f"No running jobs with tool_id like {tool} found.") + sys.exit(0) running_jobs_list = [] for ( user_id, @@ -472,59 +587,146 @@ def get_running_jobs(self, tool=None) -> list[Job]: return running_jobs_list +def evaluate_match_for_deletion( + job: Job, + match: Malware, + delete_users: UserIdMail, + severity: Severity, +) -> UserIdMail: + """ + If in verbose mode, print detailed information for every match. No updates on 'reported' needed. + """ + if job.user_id not in delete_users and (severity <= match.severity): + return {job.user_id: job.user_mail} + return {} + + def get_path_from_env_or_error(env: str) -> pathlib.Path: - if os.environ.get(env): - if (path := pathlib.Path(os.environ.get(env).strip())).exists(): + try: + os.environ.get(env) + try: + (path := pathlib.Path(os.environ.get(env, "").strip())).exists() return path - else: - raise ValueError(f"Path for {env} is invalid") - else: - raise ValueError(f"Please set ENV {env}") + except ValueError: + logger.error(f"Path for {env} is invalid") + raise ValueError + except ValueError: + logger.error(f"Please set ENV {env}") + raise ValueError def get_str_from_env_or_error(env: str) -> str: - if os.environ.get(env): - if len(from_env := os.environ.get(env).strip()) > 0: - return from_env - else: - raise ValueError(f"Path for {env} is invalid") - else: - raise ValueError(f"Please set ENV {env}") + try: + os.environ.get(env) + try: + if len(from_env := os.environ.get(env, "").strip()) == 0: + raise ValueError + else: + return from_env + except ValueError: + logger.error(f"Path for {env} is invalid") + raise ValueError + except ValueError: + logger.error(f"Please set ENV {env}") + raise ValueError class GalaxyAPI: - def __init__(self, base_url: str, api_key: str) -> None: + def __init__( + self, + base_url: str, + api_key: str, + admin_email: str, + delete_subject: str, + delete_message: str, + notification_expiration_months: int, + ) -> None: self.base_url = base_url self.api_key = api_key self.auth_header = {"x-api-key": self.api_key} + self.delete_subject = delete_subject + self.delete_message = delete_message + self.notification_expiration_months = notification_expiration_months + + def notify_user(self, encoded_user_id: UserId) -> bool: + url = f"{self.base_url}/api/notifications" + response = requests.post( + url=url, + json={ + "recipients": { + "user_ids": [encoded_user_id], + "group_ids": [], + "role_ids": [], + }, + "notification": { + "source": "string", + "category": "notification", + "variant": "urgent", + "content": { + "subject": self.delete_subject, + "message": self.delete_message, + "category": "message", + }, + "publication_time": get_iso_time_utc_add_months(0), + "expiration_time": get_iso_time_utc_add_months( + self.notification_expiration_months + ), + }, + }, + ) + if response.status_code == 200: + if response.json()["total_notifications_sent"] == 1: + return True + return False - def delete_user(self, encoded_user_id: str) -> bool: + def delete_user(self, encoded_user_id: UserId) -> bool: url = f"{self.base_url}/api/users/{encoded_user_id}" response = requests.delete(url=url, headers=self.auth_header) if response.status_code == 200: return True else: - print(f"Failed to delete user {encoded_user_id}!") + return False - def encode_galaxy_user_id(self, decoded_id: int) -> str: + def encode_galaxy_user_id(self, decoded_id: UserId) -> str: url = f"{self.base_url}/api/configuration/encode/{decoded_id}" response = requests.get(url=url, headers=self.auth_header) if response.status_code != 200: - print(f"Failed to encode id {decoded_id}!") + return "" else: json_response = response.json() return json_response["encoded_id"] + def encode_id_notify_and_delete_user(self, user_id: UserId): + encoded_user_id = self.encode_galaxy_user_id(decoded_id=user_id) + if self.notify_user(encoded_user_id): + logger.debug(f"User {user_id} notified.") + if self.delete_user(encoded_user_id): + logger.info(f"User {user_id} notified and deleted.") + else: + logger.error(f"Failed to delete user {encoded_user_id}!") + + +def print_table_header(verbose: bool, interactive: bool): + if interactive: + if verbose: + logger.debug( + "TIMESTAMP MALWARE_SEVERITY USER_ID USER_NAME USER_MAIL TOOL_ID GALAXY_JOB_ID \ + RUNNER_JOB_ID RUNNER_NAME OBJECT_STORE_ID MALWARE_CLASS \ + MALWARE_NAME MALWARE_VERSION PATH" + ) + else: + logger.info("GALAXY_USER JOB_ID") + def main(): """ Miner Finder's main function. Shows a status bar while processing the jobs found in Galaxy """ args = make_parser().parse_args() - galaxy_config_file = get_path_from_env_or_error("GALAXY_CONFIG_FILE") + logger.setLevel(logging.DEBUG if args.verbose else logging.INFO) jwd_getter = JWDGetter( - galaxy_config_file=galaxy_config_file, + galaxy_config_file=get_path_from_env_or_error("GALAXY_CONFIG_FILE"), pulsar_app_conf=get_path_from_env_or_error("GALAXY_PULSAR_APP_CONF"), ) db = RunningJobDatabase( @@ -541,22 +743,11 @@ def main(): ) ) jobs = db.get_running_jobs(args.tool) - if args.delete_user: - api = GalaxyAPI( - api_key=get_str_from_env_or_error("GALAXY_API_KEY"), - base_url=get_str_from_env_or_error("GALAXY_BASE_URL"), - ) - delete_users = set() - report_users = set() - if args.interactive: - if args.verbose: - print( - "TIMESTAMP MALWARE_SEVERITY USER_ID USER_NAME USER_MAIL TOOL_ID GALAXY_JOB_ID \ - RUNNER_JOB_ID RUNNER_NAME OBJECT_STORE_ID MALWARE_CLASS \ - MALWARE_NAME MALWARE_VERSION PATH" - ) - else: - print("GALAXY_USER JOB_ID") + delete_users = dict() + reported_users: UserIdMail = {} + + print_table_header(verbose=args.verbose, interactive=args.interactive) + for job in tqdm( jobs, disable=(not args.interactive), @@ -564,42 +755,52 @@ def main(): ascii=False, ncols=75, ): - jwd_path = jwd_getter.get_jwd_path(job) - if pathlib.Path(jwd_path).exists(): - job.jwd = pathlib.Path(jwd_path) - for file in all_files_in_dir(job.jwd, args): - matching_malware = scan_file_for_malware( - chunksize=args.chunksize, file=file, lib=malware_library - ) - if len(matching_malware) > 0: - print("\n") - for malware in matching_malware: - # report only once - if not args.verbose and job.user_id not in report_users: - print(job.report_id_and_user_name()) - else: - print( - report_matching_malware( - job=job, - malware=malware, - path=file, - ) - ) - report_users.add(job.user_id) - if args.delete_user and job.user_id not in delete_users: - if args.delete_user <= malware.severity: - delete_users.add(job.user_id) - else: - print( - f"JWD for Job {job.galaxy_id} found but does not exist in FS", - file=sys.stderr, + if not job.set_jwd_path(jwd_getter.get_jwd_path(job)): + continue + if not job.check_if_jwd_exists_and_get_files(args): + continue + for index, file in enumerate(job.files): + matching_malware = scan_file_for_malware( + chunksize=args.chunksize, file=file, lib=malware_library ) + for malware in matching_malware: + case = Case( + job=job, + malware=malware, + fileindex=index, + reported_users=reported_users, + severity=args.severity, + delete_users=delete_users, + ) + reported_users = case.report_according_to_verbosity( + verbose=args.verbose + ) + delete_users = case.mark_user_for_deletion(args.delete_user) # Deletes users at the end, to report all malicious jobs of a user - for user_id in delete_users: - # add notification here - api.delete_user(encoded_user_id=api.encode_galaxy_user_id(decoded_id=user_id)) + if args.delete_user: + api = GalaxyAPI( + api_key=get_str_from_env_or_error("GALAXY_API_KEY"), + base_url=get_str_from_env_or_error("GALAXY_BASE_URL"), + admin_email=get_str_from_env_or_error("GALAXY_ADMIN_EMAIL"), + delete_subject=os.environ.get( + "WALLE_USER_DELETION_SUBJECT", default=DEFAULT_SUBJECT + ), + delete_message=os.environ.get( + "WALLE_USER_DELETION_MESSAGE", default=DEFAULT_MESSAGE + ), + notification_expiration_months=int( + os.environ.get( + "WALLE_NOTIFICATION_EXP_MONTHS", + default=DEFAULT_NOTIFICATION_EXPIRATION, + ) + ), + ) + for user_id in delete_users: + # add notification here + api.encode_id_notify_and_delete_user(user_id) + if args.interactive: - print("Complete.") + logger.debug("Complete.") if __name__ == "__main__": From e5dd0ccc2847b710d81125b272051b2b0e604e9f Mon Sep 17 00:00:00 2001 From: Mira Kuntz Date: Tue, 17 Sep 2024 15:14:19 +0200 Subject: [PATCH 13/19] fixes from testing, notifications still broken --- templates/walle.py | 146 +++++++++++++++++++++++---------------------- 1 file changed, 74 insertions(+), 72 deletions(-) diff --git a/templates/walle.py b/templates/walle.py index 44a5c16..a644f29 100644 --- a/templates/walle.py +++ b/templates/walle.py @@ -283,7 +283,7 @@ def __init__( runner_name: str, object_store_id: int, jwd=pathlib.Path(), - files=None, + files=[], ) -> None: self.user_id = user_id self.user_name = user_name @@ -294,7 +294,7 @@ def __init__( self.runner_name = runner_name self.object_store_id = object_store_id self.jwd = jwd - self.files = files | [] + self.files = files def set_jwd_path(self, jwd: str) -> bool: jwd_path = pathlib.Path(jwd) @@ -339,68 +339,71 @@ def check_if_file_in_range_and_accessed( except OSError: pass - def report_id_and_user_name(self) -> UserIdMail: - logger.info(self.user_id, self.user_name) - return {self.user_id: self.user_mail} - - def report_matching_malware(self, index: int, malware: Malware): - """ - Create log line depending on verbosity - """ - logger.debug( - get_iso_time_utc_add_months(0), - malware.severity.name, - self.user_id, - self.user_name, - self.user_mail, - self.tool_id, - self.galaxy_id, - self.runner_id, - self.runner_name, - self.object_store_id, - malware.malware_class, - malware.program, - malware.version, - self.files[index], - ) class Case: def __init__( self, + verbose: bool, job: Job, malware: Malware, fileindex: int, reported_users: UserIdMail, delete_users: UserIdMail, - severity: Severity, ) -> None: self.job = job + self.verbose = verbose self.malware = malware self.fileindex = fileindex self.reported_users = reported_users self.delete_users = delete_users - self.severity = severity - def report_according_to_verbosity(self, verbose: bool) -> UserIdMail: - if verbose: - self.job.report_matching_malware(index=self.fileindex, malware=self.malware) + def report_according_to_verbosity(self) -> UserIdMail: + if self.verbose: + self.report_matching_malware() return {} elif self.job.user_id not in self.reported_users: - return self.job.report_id_and_user_name() + return self.report_id_and_user_name() else: return {} - def mark_user_for_deletion(self, delete: bool) -> UserIdMail: - if delete and self.job.user_id not in self.delete_users: - return self.check_severity_level() + def mark_user_for_deletion(self, severity: Severity) -> UserIdMail: + if self.job.user_id not in self.delete_users: + return self.check_severity_level(severity) return {} - def check_severity_level(self) -> UserIdMail: - if self.malware.severity >= self.severity: + def check_severity_level(self, severity: Severity) -> UserIdMail: + if self.malware.severity >= severity: + logger.debug(f"User {self.job.user_id} marked for deletion") return {self.job.user_id: self.job.user_mail} else: return {} + def report_id_and_user_name(self) -> UserIdMail: + logger.info(self.job.user_id, self.job.user_name) + return {self.job.user_id: self.job.user_mail} + + def report_matching_malware(self): + """ + Create log line depending on verbosity + """ + logger.debug( + "%s %s %s %s %s %s %s %s %s %s %s %s %s %s", + get_iso_time_utc_add_months(0), + self.malware.severity.name, + self.job.user_id, + self.job.user_name, + self.job.user_mail, + self.job.tool_id, + self.job.galaxy_id, + self.job.runner_id, + self.job.runner_name, + self.job.object_store_id, + self.malware.malware_class, + self.malware.program, + self.malware.version, + self.job.files[self.fileindex], + ) + def file_accessed_in_range( file_stat: os.stat_result, since: float, now=time.time() @@ -529,7 +532,7 @@ def get_jwd_path(self, job: Job) -> str: class RunningJobDatabase(galaxy_jwd.Database): - def __init__(self, db_host: str, db_name: str, db_user: str, db_password: str): + def __init__(self, db_name: str, db_host=None, db_user=None, db_password=None): super().__init__( db_name, db_user, @@ -558,7 +561,7 @@ def get_running_jobs(self, tool: str) -> list[Job]: # Create a dictionary with job_id as key and object_store_id, and # update_time as values if not running_jobs: - logger.warn(f"No running jobs with tool_id like {tool} found.") + logger.warning(f"No running jobs with tool_id like {tool} found.") sys.exit(0) running_jobs_list = [] for ( @@ -651,6 +654,7 @@ def notify_user(self, encoded_user_id: UserId) -> bool: url = f"{self.base_url}/api/notifications" response = requests.post( url=url, + headers=self.auth_header, json={ "recipients": { "user_ids": [encoded_user_id], @@ -659,7 +663,7 @@ def notify_user(self, encoded_user_id: UserId) -> bool: }, "notification": { "source": "string", - "category": "notification", + "category": "message", "variant": "urgent", "content": { "subject": self.delete_subject, @@ -676,20 +680,23 @@ def notify_user(self, encoded_user_id: UserId) -> bool: if response.status_code == 200: if response.json()["total_notifications_sent"] == 1: return True + logger.error("Can not notify user %s, response from Galaxy: %s", encoded_user_id, response.content) return False def delete_user(self, encoded_user_id: UserId) -> bool: url = f"{self.base_url}/api/users/{encoded_user_id}" response = requests.delete(url=url, headers=self.auth_header) - if response.status_code == 200: - return True - else: + if response.status_code != 200: + logger.error("Can not encode delete user %s, response from Galaxy: %s", encoded_user_id, response.content) return False + else: + return True def encode_galaxy_user_id(self, decoded_id: UserId) -> str: url = f"{self.base_url}/api/configuration/encode/{decoded_id}" response = requests.get(url=url, headers=self.auth_header) if response.status_code != 200: + logger.error("Can not encode user id, response from Galaxy: %s", response.content) return "" else: json_response = response.json() @@ -701,22 +708,32 @@ def encode_id_notify_and_delete_user(self, user_id: UserId): logger.debug(f"User {user_id} notified.") if self.delete_user(encoded_user_id): logger.info(f"User {user_id} notified and deleted.") - else: - logger.error(f"Failed to delete user {encoded_user_id}!") def print_table_header(verbose: bool, interactive: bool): if interactive: if verbose: logger.debug( - "TIMESTAMP MALWARE_SEVERITY USER_ID USER_NAME USER_MAIL TOOL_ID GALAXY_JOB_ID \ - RUNNER_JOB_ID RUNNER_NAME OBJECT_STORE_ID MALWARE_CLASS \ - MALWARE_NAME MALWARE_VERSION PATH" + "TIMESTAMP MALWARE_SEVERITY USER_ID USER_NAME USER_MAIL \ + TOOL_ID GALAXY_JOB_ID RUNNER_JOB_ID RUNNER_NAME \ + OBJECT_STORE_ID MALWARE_CLASS MALWARE_NAME \ + MALWARE_VERSION PATH" ) else: logger.info("GALAXY_USER JOB_ID") +def get_database_with_password() -> RunningJobDatabase: + return RunningJobDatabase( + db_host=get_str_from_env_or_error("PGHOST"), + db_password=galaxy_jwd.extract_password_from_pgpass( + get_path_from_env_or_error("PGPASSFILE") + ), + db_name=get_str_from_env_or_error("PGDATABASE"), + db_user=get_str_from_env_or_error("PGUSER"), + ) + + def main(): """ Miner Finder's main function. Shows a status bar while processing the jobs found in Galaxy @@ -728,14 +745,7 @@ def main(): galaxy_config_file=get_path_from_env_or_error("GALAXY_CONFIG_FILE"), pulsar_app_conf=get_path_from_env_or_error("GALAXY_PULSAR_APP_CONF"), ) - db = RunningJobDatabase( - db_host=get_str_from_env_or_error("PGHOST"), - db_password=galaxy_jwd.extract_password_from_pgpass( - get_path_from_env_or_error("PGPASSFILE") - ), - db_name=get_str_from_env_or_error("PGDATABASE"), - db_user=get_str_from_env_or_error("PGUSER"), - ) + db = get_database_with_password() malware_library = construct_malware_list( malware_yaml=load_malware_lib_from_env( malware_file=get_path_from_env_or_error("MALWARE_LIB") @@ -746,14 +756,7 @@ def main(): reported_users: UserIdMail = {} print_table_header(verbose=args.verbose, interactive=args.interactive) - - for job in tqdm( - jobs, - disable=(not args.interactive), - desc="Processing jobs…", - ascii=False, - ncols=75, - ): + for job in jobs: if not job.set_jwd_path(jwd_getter.get_jwd_path(job)): continue if not job.check_if_jwd_exists_and_get_files(args): @@ -765,16 +768,15 @@ def main(): for malware in matching_malware: case = Case( job=job, + verbose=args.verbose, malware=malware, fileindex=index, reported_users=reported_users, - severity=args.severity, delete_users=delete_users, ) - reported_users = case.report_according_to_verbosity( - verbose=args.verbose - ) - delete_users = case.mark_user_for_deletion(args.delete_user) + reported_users = case.report_according_to_verbosity() + if args.delete_user: + delete_users = case.mark_user_for_deletion(args.delete_user) # Deletes users at the end, to report all malicious jobs of a user if args.delete_user: api = GalaxyAPI( @@ -782,15 +784,15 @@ def main(): base_url=get_str_from_env_or_error("GALAXY_BASE_URL"), admin_email=get_str_from_env_or_error("GALAXY_ADMIN_EMAIL"), delete_subject=os.environ.get( - "WALLE_USER_DELETION_SUBJECT", default=DEFAULT_SUBJECT + "WALLE_USER_DELETION_SUBJECT", DEFAULT_SUBJECT ), delete_message=os.environ.get( - "WALLE_USER_DELETION_MESSAGE", default=DEFAULT_MESSAGE + "WALLE_USER_DELETION_MESSAGE", DEFAULT_MESSAGE ), notification_expiration_months=int( os.environ.get( "WALLE_NOTIFICATION_EXP_MONTHS", - default=DEFAULT_NOTIFICATION_EXPIRATION, + DEFAULT_NOTIFICATION_EXPIRATION, ) ), ) From 38c80e9aeb900a6d8a312582f9ed9171442a5f74 Mon Sep 17 00:00:00 2001 From: Mira Kuntz Date: Tue, 17 Sep 2024 16:16:05 +0200 Subject: [PATCH 14/19] remove publish and expiration time --- templates/walle.py | 33 ++------------------------------- 1 file changed, 2 insertions(+), 31 deletions(-) diff --git a/templates/walle.py b/templates/walle.py index a644f29..9fb4f68 100644 --- a/templates/walle.py +++ b/templates/walle.py @@ -33,7 +33,6 @@ However it is possible to restore the account and its data. If you think your account was deleted due to an error, please contact """ -DEFAULT_NOTIFICATION_EXPIRATION = 6 ONLY_ONE_INSTANCE = "The other must be an instance of the Severity" UserId = str @@ -55,18 +54,6 @@ def convert_arg_to_byte(mb: str) -> int: def convert_arg_to_seconds(hours: str) -> float: return float(hours) * 60 * 60 - -def get_iso_time_utc_add_months(months_in_future: int): - """ - Get the current UTC time and format it to ISO 8601 format - """ - calculated_time = datetime.now(timezone.utc) + relativedelta( - months=months_in_future - ) - formatted_time = calculated_time.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z" - return formatted_time - - class Severity: def __init__(self, number: int, name: str): self.value = number @@ -387,8 +374,7 @@ def report_matching_malware(self): Create log line depending on verbosity """ logger.debug( - "%s %s %s %s %s %s %s %s %s %s %s %s %s %s", - get_iso_time_utc_add_months(0), + "%s %s %s %s %s %s %s %s %s %s %s %s %s", self.malware.severity.name, self.job.user_id, self.job.user_name, @@ -641,14 +627,12 @@ def __init__( admin_email: str, delete_subject: str, delete_message: str, - notification_expiration_months: int, ) -> None: self.base_url = base_url self.api_key = api_key self.auth_header = {"x-api-key": self.api_key} self.delete_subject = delete_subject self.delete_message = delete_message - self.notification_expiration_months = notification_expiration_months def notify_user(self, encoded_user_id: UserId) -> bool: url = f"{self.base_url}/api/notifications" @@ -670,10 +654,6 @@ def notify_user(self, encoded_user_id: UserId) -> bool: "message": self.delete_message, "category": "message", }, - "publication_time": get_iso_time_utc_add_months(0), - "expiration_time": get_iso_time_utc_add_months( - self.notification_expiration_months - ), }, }, ) @@ -735,9 +715,6 @@ def get_database_with_password() -> RunningJobDatabase: def main(): - """ - Miner Finder's main function. Shows a status bar while processing the jobs found in Galaxy - """ args = make_parser().parse_args() logger.setLevel(logging.DEBUG if args.verbose else logging.INFO) @@ -788,13 +765,7 @@ def main(): ), delete_message=os.environ.get( "WALLE_USER_DELETION_MESSAGE", DEFAULT_MESSAGE - ), - notification_expiration_months=int( - os.environ.get( - "WALLE_NOTIFICATION_EXP_MONTHS", - DEFAULT_NOTIFICATION_EXPIRATION, - ) - ), + ) ) for user_id in delete_users: # add notification here From 37dc3f1bc37da450f90efe854895ad40f97b92b3 Mon Sep 17 00:00:00 2001 From: Mira Kuntz Date: Wed, 18 Sep 2024 10:23:27 +0200 Subject: [PATCH 15/19] updated help --- templates/walle.py | 68 ++++++++++++++++++++-------------------------- 1 file changed, 30 insertions(+), 38 deletions(-) diff --git a/templates/walle.py b/templates/walle.py index 9fb4f68..f9bbd03 100644 --- a/templates/walle.py +++ b/templates/walle.py @@ -4,7 +4,6 @@ # when a file in the JWD matches to a list of hashes import argparse -import datetime import hashlib import os import pathlib @@ -12,6 +11,7 @@ import time import zlib import logging +from argparse import RawTextHelpFormatter from datetime import datetime, timezone from dateutil.relativedelta import relativedelta from typing import Dict @@ -19,12 +19,9 @@ import galaxy_jwd import requests import yaml -from tqdm import tqdm CHECKSUM_FILE_ENV = "MALWARE_LIB" -CURRENT_TIME = int(time.time()) - DEFAULT_SUBJECT = "Galaxy Account deleted due to ToS violations" DEFAULT_MESSAGE = """ Our systems have detected activity related to your Galaxy account that most likely violate our terms of service. @@ -33,7 +30,7 @@ However it is possible to restore the account and its data. If you think your account was deleted due to an error, please contact """ -ONLY_ONE_INSTANCE = "The other must be an instance of the Severity" +ONLY_ONE_INSTANCE = "The other must be an instance of the Severity class" UserId = str UserMail = str @@ -102,7 +99,7 @@ def make_parser() -> argparse.ArgumentParser: from the environment variable "MALWARE_LIB", searches in JWDs of currently running jobs for matching files and reports jobs, users and malware details if specified. - Malware library file has the following schema: + The malware library file has the following schema: class: program: version: @@ -112,6 +109,7 @@ def make_parser() -> argparse.ArgumentParser: crc32: sha1: WARNING: + ---------------------------------------------------------------- Be careful with how you generate the CRC32 hashes: There are multiple algorithms, this script is using the one specified by RFC in the GZIP specification. @@ -119,6 +117,7 @@ def make_parser() -> argparse.ArgumentParser: and convert it to integer representation. e.g. with: gzip -1 -c /path/to/file | tail -c8 | hexdump -n4 -e '"%u"' + ---------------------------------------------------------------- The following ENVs (same as gxadmin's) should be set: GALAXY_CONFIG_FILE: Path to the galaxy.yml file @@ -126,9 +125,9 @@ def make_parser() -> argparse.ArgumentParser: PGDATABASE: Name of the Galaxy database PGUSER: Galaxy database user PGHOST: Galaxy database host - PGPASSFILE: path to .pgpass file (same as gxadmin's) in format: :5432:*:: + The '--delete-user' flag requires additional environment variables: GALAXY_BASE_URL: Instance hostname including scheme (https://examplegalaxy.org) GALAXY_ADMIN_EMAIL: The email users can contact to file complaints @@ -136,7 +135,6 @@ def make_parser() -> argparse.ArgumentParser: GALAXY_ROOT: Galaxy root directiory (e.g. /srv/galaxy) WALLE_USER_DELETION_MESSAGE: The message Galaxy should send as notification to a user before it deletes their account WALLE_USER_DELETION_SUBJECT: The message's subject line. - WALLE_NOTIFICATION_EXP_MONTHS: When the notification expires (is deleted from the database) in months from when it is sent. """, formatter_class=argparse.RawTextHelpFormatter, ) @@ -156,18 +154,11 @@ def make_parser() -> argparse.ArgumentParser: default=100, ) - # not yet implemented - # my_parser.add_argument( - # "--remove-jobs", - # action="store_true", - # help="Removes the jobs from condor and fails them in Galaxy", - # ) - my_parser.add_argument( "--min-size", metavar="MIN_SIZE_MB", help="Minimum filesize im MB to limit the files to scan. \ - The check will be skipped if value is 0 (default)", +The check will be skipped if value is 0 (default)", type=convert_arg_to_byte, default=0, ) @@ -175,9 +166,9 @@ def make_parser() -> argparse.ArgumentParser: my_parser.add_argument( "--max-size", metavar="MAX_SIZE_MB", - help="Maximum filesize im MB to limit the files to scan. \ - CAUTION: Not setting this value can lead to very long computation times. \ - The check will be skipped if value is 0 (default)", + help="Maximum filesize im MB to limit the files to scan.\n \ +CAUTION: Not setting this value can lead to very long computation times.\n \ +The check will be skipped if value is 0 (default)", type=convert_arg_to_byte, default=0, ) @@ -191,8 +182,8 @@ def make_parser() -> argparse.ArgumentParser: my_parser.add_argument( "--tool", - help="A string to filter tools in the tool_id column of currently running jobs. \ - Use like 'grep' after the gxadmin query queue-details command.", + help="A string to filter tools in the tool_id column of currently running jobs.\n \ +Use like 'grep' after the gxadmin query queue-details command.", type=str, default="", ) @@ -200,8 +191,8 @@ def make_parser() -> argparse.ArgumentParser: "-v", "--verbose", action="store_true", - help="Report not only the job and user ID that matched, but also Path of matched file and malware info. \ - If set, the scanning process will quit after the first match in a JWD to save resources.", + help="Report not only the job and user ID that matched, but also Path of matched file and malware info.\n \ +If set, the scanning process will quit after the first match in a JWD to save resources.", ) my_parser.add_argument( "-i", @@ -214,12 +205,13 @@ def make_parser() -> argparse.ArgumentParser: metavar="MIN_SEVERITY", choices=VALID_SEVERITIES, type=convert_str_to_severity, - help="Delete user when severity level is equal or higher. \ - Make sure that you know what the consequences are on your \ - instance, when a user is set to deleted (e.g. when a user is purged automatically after deletion). \ - Following additional environment variables are expected: \ - GALAXY_API_KEY \ - GALAXY_BASE_URL", + help="Delete user when the found malware's severity level is equal or higher than this value.\n \ +Possible values are 'LOW', 'MEDIUM' or 'HIGH'.\n \ +Make sure that you know the consequences on your instance, especially regarding GDPR and\n \ +what happens when a user is set to deleted (e.g. when a user is purged automatically after deletion).\n \ +Following additional environment variables are expected:\n \ +GALAXY_API_KEY\n \ +GALAXY_BASE_URL", ) return my_parser @@ -360,13 +352,13 @@ def mark_user_for_deletion(self, severity: Severity) -> UserIdMail: def check_severity_level(self, severity: Severity) -> UserIdMail: if self.malware.severity >= severity: - logger.debug(f"User {self.job.user_id} marked for deletion") + logger.debug(f"User %s marked for deletion",self.job.user_id) return {self.job.user_id: self.job.user_mail} else: return {} def report_id_and_user_name(self) -> UserIdMail: - logger.info(self.job.user_id, self.job.user_name) + logger.info("%s %s", self.job.user_id, self.job.user_name) return {self.job.user_id: self.job.user_mail} def report_matching_malware(self): @@ -547,7 +539,7 @@ def get_running_jobs(self, tool: str) -> list[Job]: # Create a dictionary with job_id as key and object_store_id, and # update_time as values if not running_jobs: - logger.warning(f"No running jobs with tool_id like {tool} found.") + logger.warning("No running jobs with tool_id like '%s' found.", tool) sys.exit(0) running_jobs_list = [] for ( @@ -596,10 +588,10 @@ def get_path_from_env_or_error(env: str) -> pathlib.Path: (path := pathlib.Path(os.environ.get(env, "").strip())).exists() return path except ValueError: - logger.error(f"Path for {env} is invalid") + logger.error(f"Path for %s is invalid", env) raise ValueError except ValueError: - logger.error(f"Please set ENV {env}") + logger.error(f"Please set ENV %s", env) raise ValueError @@ -612,10 +604,10 @@ def get_str_from_env_or_error(env: str) -> str: else: return from_env except ValueError: - logger.error(f"Path for {env} is invalid") + logger.error(f"Path for %s is invalid", env) raise ValueError except ValueError: - logger.error(f"Please set ENV {env}") + logger.error(f"Please set ENV %s", env) raise ValueError @@ -685,9 +677,9 @@ def encode_galaxy_user_id(self, decoded_id: UserId) -> str: def encode_id_notify_and_delete_user(self, user_id: UserId): encoded_user_id = self.encode_galaxy_user_id(decoded_id=user_id) if self.notify_user(encoded_user_id): - logger.debug(f"User {user_id} notified.") + logger.debug("User %s notified.", user_id) if self.delete_user(encoded_user_id): - logger.info(f"User {user_id} notified and deleted.") + logger.info("User %s notified and deleted.", user_id) def print_table_header(verbose: bool, interactive: bool): From d79a28f981795f239d55dbce99d2e4fbd41b1037 Mon Sep 17 00:00:00 2001 From: Mira Kuntz Date: Wed, 18 Sep 2024 13:54:28 +0200 Subject: [PATCH 16/19] fix dict append --- templates/walle.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/templates/walle.py b/templates/walle.py index f9bbd03..715741b 100644 --- a/templates/walle.py +++ b/templates/walle.py @@ -51,6 +51,7 @@ def convert_arg_to_byte(mb: str) -> int: def convert_arg_to_seconds(hours: str) -> float: return float(hours) * 60 * 60 + class Severity: def __init__(self, number: int, name: str): self.value = number @@ -352,7 +353,7 @@ def mark_user_for_deletion(self, severity: Severity) -> UserIdMail: def check_severity_level(self, severity: Severity) -> UserIdMail: if self.malware.severity >= severity: - logger.debug(f"User %s marked for deletion",self.job.user_id) + logger.debug(f"User %s marked for deletion", self.job.user_id) return {self.job.user_id: self.job.user_mail} else: return {} @@ -652,14 +653,22 @@ def notify_user(self, encoded_user_id: UserId) -> bool: if response.status_code == 200: if response.json()["total_notifications_sent"] == 1: return True - logger.error("Can not notify user %s, response from Galaxy: %s", encoded_user_id, response.content) + logger.error( + "Can not notify user %s, response from Galaxy: %s", + encoded_user_id, + response.content, + ) return False def delete_user(self, encoded_user_id: UserId) -> bool: url = f"{self.base_url}/api/users/{encoded_user_id}" response = requests.delete(url=url, headers=self.auth_header) if response.status_code != 200: - logger.error("Can not encode delete user %s, response from Galaxy: %s", encoded_user_id, response.content) + logger.error( + "Can not encode delete user %s, response from Galaxy: %s", + encoded_user_id, + response.content, + ) return False else: return True @@ -668,7 +677,9 @@ def encode_galaxy_user_id(self, decoded_id: UserId) -> str: url = f"{self.base_url}/api/configuration/encode/{decoded_id}" response = requests.get(url=url, headers=self.auth_header) if response.status_code != 200: - logger.error("Can not encode user id, response from Galaxy: %s", response.content) + logger.error( + "Can not encode user id, response from Galaxy: %s", response.content + ) return "" else: json_response = response.json() @@ -686,10 +697,8 @@ def print_table_header(verbose: bool, interactive: bool): if interactive: if verbose: logger.debug( - "TIMESTAMP MALWARE_SEVERITY USER_ID USER_NAME USER_MAIL \ - TOOL_ID GALAXY_JOB_ID RUNNER_JOB_ID RUNNER_NAME \ - OBJECT_STORE_ID MALWARE_CLASS MALWARE_NAME \ - MALWARE_VERSION PATH" + "MALWARE_SEVERITY USER_ID USER_NAME USER_MAIL TOOL_ID GALAXY_JOB_ID \ +RUNNER_JOB_ID RUNNER_NAME OBJECT_STORE_ID MALWARE_CLASS MALWARE_NAME MALWARE_VERSION PATH" ) else: logger.info("GALAXY_USER JOB_ID") @@ -743,9 +752,9 @@ def main(): reported_users=reported_users, delete_users=delete_users, ) - reported_users = case.report_according_to_verbosity() + reported_users.update(case.report_according_to_verbosity()) if args.delete_user: - delete_users = case.mark_user_for_deletion(args.delete_user) + delete_users.update(case.mark_user_for_deletion(args.delete_user)) # Deletes users at the end, to report all malicious jobs of a user if args.delete_user: api = GalaxyAPI( @@ -757,7 +766,7 @@ def main(): ), delete_message=os.environ.get( "WALLE_USER_DELETION_MESSAGE", DEFAULT_MESSAGE - ) + ), ) for user_id in delete_users: # add notification here From 6789c3d4c23fddfab1fb8c629663d55ac9cb95b9 Mon Sep 17 00:00:00 2001 From: Mira Kuntz Date: Wed, 18 Sep 2024 16:07:55 +0200 Subject: [PATCH 17/19] update env variables and doc --- README.md | 85 ++++++++++++++++++-------- defaults/main.yml | 8 ++- templates/tests.py | 144 +++++++++++++++++++++++++++++++++++++++++++++ templates/walle.py | 65 ++++++++++---------- 4 files changed, 239 insertions(+), 63 deletions(-) create mode 100644 templates/tests.py diff --git a/README.md b/README.md index c19d6bc..5f3ffc8 100644 --- a/README.md +++ b/README.md @@ -17,27 +17,53 @@ This role expect several requirements. 2. the python packages imported in `walle.py` and `galaxy_jwd.py` must be present 3. Following environment vars must be set: - `GALAXY_CONFIG_FILE`: Path to the galaxy.yml file - - `GALAXY_LOG_DIR`: Path to the Galaxy log directory - `PGDATABASE`: Name of the Galaxy database - `PGUSER`: Galaxy database user - `PGHOST`: Galaxy database host - `PGPASSFILE`: path to Postgres' `.pgpass` file (defaults to `/home//.pgpass`) - - `GALAXY_PULSAR_APP_CONF`: + - `GALAXY_PULSAR_APP_CONF`: [Galaxy's Pulsar configuration file](https://github.com/galaxyproject/pulsar/blob/master/app.yml.sample) (for the staging directory)[^1] +4. These environment vars must be set when using `--delete-user` mode + - `GALAXY_BASE_URL`: Instance hostname including scheme (https://examplegalaxy.org) + - `GALAXY_API_KEY`: Galaxy API key with admin privileges\ + Optional, for default values see the constants in `walle.py`: + - `WALLE_USER_DELETION_MESSAGE`: Message that tells the user why their account is deleted. + - `WALLE_USER_DELETION_SUBJECT`: The message's subject line. + +[^1]: You should always run 'dangerous' jobs in embedded Pulsar. ## Ansible -For ansible details consult `defaults/main.yml`, it should be pretty much self-explanatory. +For ansible details consult `defaults/main.yml`, it should be pretty much self-explanatory. ## Usage From the tools help command: ~~~ -usage: Wall·E [-h] [--chunksize CHUNKSIZE] [--min-size MIN_SIZE_MB] [--max-size MAX_SIZE_MB] [--since SINCE] [--tool TOOL] [-v] [-i] +usage: WALL·E [-h] [--chunksize CHUNKSIZE] [--min-size MIN_SIZE_MB] [--max-size MAX_SIZE_MB] [--since SINCE] [--tool TOOL] [-v] [-i] [--delete-user MIN_SEVERITY] + + Galaxy's Static Malware Scanner + + DESCRIPTION + Loads a yaml malware library with CRC32 and SHA1 hashes + from the environment variable "MALWARE_LIB". + Gets a list of running jobs from Galaxy's database, + optionally filtered by a '--tool ' substring. + Then iterates over the jobs, scans all files in the Job Working Directory, + optionally filtered by size and access time, + for files that match both hashes and reports details to stdout. + If '--delete-user' flag is set it notifies and deletes the user. + + REQUIREMENTS + galaxy_jwd.py as well as all other imported packages must be present. + The following environment variables (same as gxadmin's) should be set: + GALAXY_CONFIG_FILE: Path to the galaxy.yml file + PGDATABASE: Name of the Galaxy database + PGUSER: Galaxy database user + PGHOST: Galaxy database host + PGPASSFILE: path to .pgpass file (same as gxadmin's) in format: + :5432:*:: - Loads a yaml malware library with CRC32 and SHA1 hashes as arguments - from the environment variable "MALWARE_LIB", - searches in JWDs of currently running jobs for matching files - and reports jobs, users and malware details if specified. - Malware library file has the following schema: + MALWARE LIBRARY SCHEMA + The malware library file has the following schema: class: - name: + program: version: severity: [high, medium, low] description: "optional info" @@ -45,6 +71,7 @@ usage: Wall·E [-h] [--chunksize CHUNKSIZE] [--min-size MIN_SIZE_MB] [--max-size crc32: sha1: WARNING: + ---------------------------------------------------------------- Be careful with how you generate the CRC32 hashes: There are multiple algorithms, this script is using the one specified by RFC in the GZIP specification. @@ -52,28 +79,34 @@ usage: Wall·E [-h] [--chunksize CHUNKSIZE] [--min-size MIN_SIZE_MB] [--max-size and convert it to integer representation. e.g. with: gzip -1 -c /path/to/file | tail -c8 | hexdump -n4 -e '"%u"' - - The following ENVs (same as gxadmin's) should be set: - GALAXY_CONFIG_FILE: Path to the galaxy.yml file - GALAXY_LOG_DIR: Path to the Galaxy log directory - PGDATABASE: Name of the Galaxy database - PGUSER: Galaxy database user - PGHOST: Galaxy database host + ---------------------------------------------------------------- - PGPASSFILE: path to a ~/.pgpass file (same as gxadmin's) in format: - :5432:*:: - optional arguments: -h, --help show this help message and exit --chunksize CHUNKSIZE Chunksize in MiB for hashing the files in JWDs, defaults to 100 MiB --min-size MIN_SIZE_MB - Minimum filesize im MB to limit the files to scan. + Minimum filesize im MB to limit the files to scan. The check will be skipped if value is 0 (default) --max-size MAX_SIZE_MB - Maximum filesize im MB to limit the files to scan. CAUTION: Not setting this value can lead to very long computation times - --since SINCE Access time in hours backwards from now - --tool TOOL A string to filter tools in the tool_id column of currently running jobs. Use like 'grep' after the gxadmin query queue-details command. - -v, --verbose Report not only the job and user ID that matched, but also Path of matched file and malware info. If set, the scanning process will quit after the first match in a JWD to save resources. - -i, --interactive Show progress bar. Leave unset for cleaner logs and slightly higher performance + Maximum filesize im MB to limit the files to scan. + CAUTION: Not setting this value can lead to very long computation times. + The check will be skipped if value is 0 (default) + --since SINCE Access time in hours backwards from now, default=0 (skip check) + --tool TOOL A string to filter tools in the tool_id column of currently running jobs. + Use like 'grep' after the gxadmin query queue-details command. + -v, --verbose Report details for every match. + -i, --interactive Show table header. + --delete-user MIN_SEVERITY + Delete user when the found malware's severity level is equal or higher than this value. + Possible values are 'LOW', 'MEDIUM' or 'HIGH'. + This feature requires Galaxy's notification framework to be enabled. + Make sure that you know the consequences on your instance, especially regarding GDPR and + what happens when a user is set to deleted (e.g. when a user is purged automatically after deletion). + Following additional environment variables are expected: + GALAXY_BASE_URL: Instance hostname including scheme (https://examplegalaxy.org) + GALAXY_API_KEY: Galaxy API key with admin privileges + Optional, for default see documentation: + WALLE_USER_DELETION_MESSAGE: Message that tells the user why their account is deleted. + WALLE_USER_DELETION_SUBJECT: The message's subject line. ~~~ diff --git a/defaults/main.yml b/defaults/main.yml index bf8e3f3..2b9a2e9 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -12,7 +12,7 @@ walle_log_dir: /var/log/walle walle_script_location: /usr/local/bin/walle.py # Script args -walle_filesize_min: 1 +walle_filesize_min: 0 walle_filesize_max: 10 walle_since_hours: 24 walle_envs_database: @@ -37,8 +37,10 @@ walle_envs_user_deletion: value: "{{ walle_galaxy_url }}" - key: GALAXY_CONFIG_FILE value: "{{ galaxy_config_dir }}/galaxy.yml" - - key: GALAXY_LIB - value: "{{ galaxy_root }}/server/lib" +# - key: GALAXY_USER_DELETION_MESSAGE +# value: "Your personalized message" +# - key: GALAXY_USER_DELETION_SUBJECT +# value: "Your personalized subject line" # walle_api_key: #admin api key to delete users, goes to VAULT # walle_galaxy_url: #galaxy_hostname, no leading slash # walle_tool: diff --git a/templates/tests.py b/templates/tests.py new file mode 100644 index 0000000..a4b6901 --- /dev/null +++ b/templates/tests.py @@ -0,0 +1,144 @@ +import unittest +from unittest.mock import patch, MagicMock, call +import pathlib +import os +import argparse +import walle + +# Assuming the following classes are defined elsewhere +# UserId, UserMail, UserIdMail, Malware, Severity, etc. +# I'll mock them here for the purpose of testing + + +class TestJobClass(unittest.TestCase): + + def setUp(self): + self.mock_user_id = MagicMock() + self.mock_user_mail = MagicMock() + self.mock_tool_id = "tool_123" + self.mock_galaxy_id = 1 + self.mock_runner_id = 1 + self.mock_runner_name = "runner_1" + self.mock_object_store_id = 1 + self.mock_jwd = pathlib.Path("/mock/jwd") + self.mock_files = [] + + self.job = walle.Job( + user_id=self.mock_user_id, + user_name="test_user", + user_mail=self.mock_user_mail, + tool_id=self.mock_tool_id, + galaxy_id=self.mock_galaxy_id, + runner_id=self.mock_runner_id, + runner_name=self.mock_runner_name, + object_store_id=self.mock_object_store_id, + jwd=self.mock_jwd, + files=self.mock_files, + ) + + @patch("pathlib.Path.exists") + def test_set_jwd_success(self, mock_exists): + # Test when the jwd path exists + mock_exists.return_value = True + result = self.job.set_jwd("/new/jwd") + self.assertTrue(result) + self.assertEqual(self.job.jwd, pathlib.Path("/new/jwd")) + + @patch("pathlib.Path.exists") + def test_set_jwd_failure(self, mock_exists): + # Test when the jwd path does not exist + mock_exists.return_value = False + result = self.job.set_jwd("/new/jwd") + self.assertFalse(result) + self.assertNotEqual(self.job.jwd, pathlib.Path("/new/jwd")) + + @patch("os.walk") + @patch("pathlib.Path.stat") + @patch("pathlib.Path.exists") + @patch("pathlib.Path.is_file") + @patch("builtins.open", new_callable=MagicMock) + def test_get_files( + self, mock_open, mock_is_file, mock_exists, mock_stat, mock_walk + ): + # Set up the mocks + mock_exists.return_value = True + mock_is_file.return_value = True + mock_walk.return_value = [ + ("/mock/jwd", ("subdir",), ("file1", "file2")), + ] + mock_stat.return_value = MagicMock() + + # Mocking file_in_size_range and file_accessed_in_range + with patch("file_in_size_range", return_value=True), patch( + "file_accessed_in_range", return_value=True + ): + args = argparse.Namespace(min_size=0, max_size=1000, since=0) + result = self.job.get_files(args) + self.assertTrue(result) + + @patch("file_in_size_range") + @patch("file_accessed_in_range") + @patch("pathlib.Path.exists") + def test_get_files_no_files_found( + self, mock_exists, mock_file_in_size_range, mock_file_accessed_in_range + ): + mock_exists.return_value = True + mock_file_in_size_range.return_value = False + mock_file_accessed_in_range.return_value = False + + args = argparse.Namespace(min_size=0, max_size=1000, since=0) + result = self.job.get_files(args) + self.assertFalse(result) + + @patch("logging.Logger.info") + def test_report_id_and_user_name(self, mock_logger_info): + result = self.job.report_id_and_user_name() + mock_logger_info.assert_called_once_with(self.mock_user_id, "test_user") + self.assertEqual(result, {self.mock_user_id: self.mock_user_mail}) + + @patch("logging.Logger.debug") + @patch("get_iso_time_utc_add_months") + def test_report_matching_malware(self, mock_get_iso_time, mock_logger_debug): + mock_malware = MagicMock() + mock_malware.severity.name = "High" + mock_malware.malware_class = "ClassA" + mock_malware.program = "ProgramA" + mock_malware.version = "1.0" + + # Add a mock file to the job + self.job.files = [pathlib.Path("/mock/file")] + + # Call the method + self.job.report_matching_malware(0, mock_malware) + + mock_get_iso_time.assert_called_once_with(0) + mock_logger_debug.assert_called_once_with( + mock_get_iso_time.return_value, + "High", + self.mock_user_id, + "test_user", + self.mock_user_mail, + self.mock_tool_id, + self.mock_galaxy_id, + self.mock_runner_id, + self.mock_runner_name, + self.mock_object_store_id, + "ClassA", + "ProgramA", + "1.0", + pathlib.Path("/mock/file"), + ) + + def test_mark_for_deletion_case_severity_high(self): + mock_severity = MagicMock() + result = self.job.mark_for_deletion(case_severity=5, severity_level=3) + self.assertEqual(result, {self.mock_user_id: self.mock_user_mail}) + + def test_mark_for_deletion_case_severity_low(self): + mock_severity = MagicMock() + result = self.job.mark_for_deletion(case_severity=2, severity_level=3) + self.assertEqual(result, {}) + + +if __name__ == "__main__": + unittest.main() diff --git a/templates/walle.py b/templates/walle.py index 715741b..73e91e7 100644 --- a/templates/walle.py +++ b/templates/walle.py @@ -1,7 +1,4 @@ #!/usr/bin/env python -# Keep your system clean! -# A command line script that iterates over the currently running jobs and stops them as well as logs the user, -# when a file in the JWD matches to a list of hashes import argparse import hashlib @@ -11,9 +8,6 @@ import time import zlib import logging -from argparse import RawTextHelpFormatter -from datetime import datetime, timezone -from dateutil.relativedelta import relativedelta from typing import Dict import galaxy_jwd @@ -96,10 +90,29 @@ def make_parser() -> argparse.ArgumentParser: my_parser = argparse.ArgumentParser( prog="WALL·E", description=""" - Loads a yaml malware library with CRC32 and SHA1 hashes as arguments - from the environment variable "MALWARE_LIB", - searches in JWDs of currently running jobs for matching files - and reports jobs, users and malware details if specified. + Galaxy's Static Malware Scanner + + DESCRIPTION + Loads a yaml malware library with CRC32 and SHA1 hashes + from the environment variable "MALWARE_LIB". + Gets a list of running jobs from Galaxy's database, + optionally filtered by a '--tool ' substring. + Then iterates over the jobs, scans all files in the Job Working Directory, + optionally filtered by size and access time, + for files that match both hashes and reports details to stdout. + If '--delete-user' flag is set it notifies and deletes the user. + + REQUIREMENTS + galaxy_jwd.py as well as all other imported packages must be present. + The following environment variables (same as gxadmin's) should be set: + GALAXY_CONFIG_FILE: Path to the galaxy.yml file + PGDATABASE: Name of the Galaxy database + PGUSER: Galaxy database user + PGHOST: Galaxy database host + PGPASSFILE: path to .pgpass file (same as gxadmin's) in format: + :5432:*:: + + MALWARE LIBRARY SCHEMA The malware library file has the following schema: class: program: @@ -119,23 +132,6 @@ def make_parser() -> argparse.ArgumentParser: e.g. with: gzip -1 -c /path/to/file | tail -c8 | hexdump -n4 -e '"%u"' ---------------------------------------------------------------- - - The following ENVs (same as gxadmin's) should be set: - GALAXY_CONFIG_FILE: Path to the galaxy.yml file - GALAXY_LOG_DIR: Path to the Galaxy log directory - PGDATABASE: Name of the Galaxy database - PGUSER: Galaxy database user - PGHOST: Galaxy database host - PGPASSFILE: path to .pgpass file (same as gxadmin's) in format: - :5432:*:: - - The '--delete-user' flag requires additional environment variables: - GALAXY_BASE_URL: Instance hostname including scheme (https://examplegalaxy.org) - GALAXY_ADMIN_EMAIL: The email users can contact to file complaints - GALAXY_API_KEY: Galaxy API key with admin privileges - GALAXY_ROOT: Galaxy root directiory (e.g. /srv/galaxy) - WALLE_USER_DELETION_MESSAGE: The message Galaxy should send as notification to a user before it deletes their account - WALLE_USER_DELETION_SUBJECT: The message's subject line. """, formatter_class=argparse.RawTextHelpFormatter, ) @@ -192,14 +188,13 @@ def make_parser() -> argparse.ArgumentParser: "-v", "--verbose", action="store_true", - help="Report not only the job and user ID that matched, but also Path of matched file and malware info.\n \ -If set, the scanning process will quit after the first match in a JWD to save resources.", + help="Report details for every match." ) my_parser.add_argument( "-i", "--interactive", action="store_true", - help="Show progress bar. Leave unset for cleaner logs and slightly higher performance", + help="Show table header.", ) my_parser.add_argument( "--delete-user", @@ -208,11 +203,15 @@ def make_parser() -> argparse.ArgumentParser: type=convert_str_to_severity, help="Delete user when the found malware's severity level is equal or higher than this value.\n \ Possible values are 'LOW', 'MEDIUM' or 'HIGH'.\n \ +This feature requires Galaxy's notification framework to be enabled.\n \ Make sure that you know the consequences on your instance, especially regarding GDPR and\n \ what happens when a user is set to deleted (e.g. when a user is purged automatically after deletion).\n \ Following additional environment variables are expected:\n \ -GALAXY_API_KEY\n \ -GALAXY_BASE_URL", +GALAXY_BASE_URL: Instance hostname including scheme (https://examplegalaxy.org)\n \ +GALAXY_API_KEY: Galaxy API key with admin privileges\n \ +Optional, for default see documentation:\n \ +WALLE_USER_DELETION_MESSAGE: Message that tells the user why their account is deleted.\n \ +WALLE_USER_DELETION_SUBJECT: The message's subject line." ) return my_parser @@ -617,7 +616,6 @@ def __init__( self, base_url: str, api_key: str, - admin_email: str, delete_subject: str, delete_message: str, ) -> None: @@ -760,7 +758,6 @@ def main(): api = GalaxyAPI( api_key=get_str_from_env_or_error("GALAXY_API_KEY"), base_url=get_str_from_env_or_error("GALAXY_BASE_URL"), - admin_email=get_str_from_env_or_error("GALAXY_ADMIN_EMAIL"), delete_subject=os.environ.get( "WALLE_USER_DELETION_SUBJECT", DEFAULT_SUBJECT ), From 13c320ab6ea66b5a29a25ac8e5da6237d414cc32 Mon Sep 17 00:00:00 2001 From: Mira Kuntz Date: Wed, 18 Sep 2024 16:14:19 +0200 Subject: [PATCH 18/19] remove empty test file and format --- templates/tests.py | 144 --------------------------------------------- templates/walle.py | 7 +-- 2 files changed, 2 insertions(+), 149 deletions(-) delete mode 100644 templates/tests.py diff --git a/templates/tests.py b/templates/tests.py deleted file mode 100644 index a4b6901..0000000 --- a/templates/tests.py +++ /dev/null @@ -1,144 +0,0 @@ -import unittest -from unittest.mock import patch, MagicMock, call -import pathlib -import os -import argparse -import walle - -# Assuming the following classes are defined elsewhere -# UserId, UserMail, UserIdMail, Malware, Severity, etc. -# I'll mock them here for the purpose of testing - - -class TestJobClass(unittest.TestCase): - - def setUp(self): - self.mock_user_id = MagicMock() - self.mock_user_mail = MagicMock() - self.mock_tool_id = "tool_123" - self.mock_galaxy_id = 1 - self.mock_runner_id = 1 - self.mock_runner_name = "runner_1" - self.mock_object_store_id = 1 - self.mock_jwd = pathlib.Path("/mock/jwd") - self.mock_files = [] - - self.job = walle.Job( - user_id=self.mock_user_id, - user_name="test_user", - user_mail=self.mock_user_mail, - tool_id=self.mock_tool_id, - galaxy_id=self.mock_galaxy_id, - runner_id=self.mock_runner_id, - runner_name=self.mock_runner_name, - object_store_id=self.mock_object_store_id, - jwd=self.mock_jwd, - files=self.mock_files, - ) - - @patch("pathlib.Path.exists") - def test_set_jwd_success(self, mock_exists): - # Test when the jwd path exists - mock_exists.return_value = True - result = self.job.set_jwd("/new/jwd") - self.assertTrue(result) - self.assertEqual(self.job.jwd, pathlib.Path("/new/jwd")) - - @patch("pathlib.Path.exists") - def test_set_jwd_failure(self, mock_exists): - # Test when the jwd path does not exist - mock_exists.return_value = False - result = self.job.set_jwd("/new/jwd") - self.assertFalse(result) - self.assertNotEqual(self.job.jwd, pathlib.Path("/new/jwd")) - - @patch("os.walk") - @patch("pathlib.Path.stat") - @patch("pathlib.Path.exists") - @patch("pathlib.Path.is_file") - @patch("builtins.open", new_callable=MagicMock) - def test_get_files( - self, mock_open, mock_is_file, mock_exists, mock_stat, mock_walk - ): - # Set up the mocks - mock_exists.return_value = True - mock_is_file.return_value = True - mock_walk.return_value = [ - ("/mock/jwd", ("subdir",), ("file1", "file2")), - ] - mock_stat.return_value = MagicMock() - - # Mocking file_in_size_range and file_accessed_in_range - with patch("file_in_size_range", return_value=True), patch( - "file_accessed_in_range", return_value=True - ): - args = argparse.Namespace(min_size=0, max_size=1000, since=0) - result = self.job.get_files(args) - self.assertTrue(result) - - @patch("file_in_size_range") - @patch("file_accessed_in_range") - @patch("pathlib.Path.exists") - def test_get_files_no_files_found( - self, mock_exists, mock_file_in_size_range, mock_file_accessed_in_range - ): - mock_exists.return_value = True - mock_file_in_size_range.return_value = False - mock_file_accessed_in_range.return_value = False - - args = argparse.Namespace(min_size=0, max_size=1000, since=0) - result = self.job.get_files(args) - self.assertFalse(result) - - @patch("logging.Logger.info") - def test_report_id_and_user_name(self, mock_logger_info): - result = self.job.report_id_and_user_name() - mock_logger_info.assert_called_once_with(self.mock_user_id, "test_user") - self.assertEqual(result, {self.mock_user_id: self.mock_user_mail}) - - @patch("logging.Logger.debug") - @patch("get_iso_time_utc_add_months") - def test_report_matching_malware(self, mock_get_iso_time, mock_logger_debug): - mock_malware = MagicMock() - mock_malware.severity.name = "High" - mock_malware.malware_class = "ClassA" - mock_malware.program = "ProgramA" - mock_malware.version = "1.0" - - # Add a mock file to the job - self.job.files = [pathlib.Path("/mock/file")] - - # Call the method - self.job.report_matching_malware(0, mock_malware) - - mock_get_iso_time.assert_called_once_with(0) - mock_logger_debug.assert_called_once_with( - mock_get_iso_time.return_value, - "High", - self.mock_user_id, - "test_user", - self.mock_user_mail, - self.mock_tool_id, - self.mock_galaxy_id, - self.mock_runner_id, - self.mock_runner_name, - self.mock_object_store_id, - "ClassA", - "ProgramA", - "1.0", - pathlib.Path("/mock/file"), - ) - - def test_mark_for_deletion_case_severity_high(self): - mock_severity = MagicMock() - result = self.job.mark_for_deletion(case_severity=5, severity_level=3) - self.assertEqual(result, {self.mock_user_id: self.mock_user_mail}) - - def test_mark_for_deletion_case_severity_low(self): - mock_severity = MagicMock() - result = self.job.mark_for_deletion(case_severity=2, severity_level=3) - self.assertEqual(result, {}) - - -if __name__ == "__main__": - unittest.main() diff --git a/templates/walle.py b/templates/walle.py index 73e91e7..4fd981a 100644 --- a/templates/walle.py +++ b/templates/walle.py @@ -185,10 +185,7 @@ def make_parser() -> argparse.ArgumentParser: default="", ) my_parser.add_argument( - "-v", - "--verbose", - action="store_true", - help="Report details for every match." + "-v", "--verbose", action="store_true", help="Report details for every match." ) my_parser.add_argument( "-i", @@ -211,7 +208,7 @@ def make_parser() -> argparse.ArgumentParser: GALAXY_API_KEY: Galaxy API key with admin privileges\n \ Optional, for default see documentation:\n \ WALLE_USER_DELETION_MESSAGE: Message that tells the user why their account is deleted.\n \ -WALLE_USER_DELETION_SUBJECT: The message's subject line." +WALLE_USER_DELETION_SUBJECT: The message's subject line.", ) return my_parser From f5646303f7c98269f8794bc7e626c8db824c47c9 Mon Sep 17 00:00:00 2001 From: Mira Kuntz Date: Wed, 18 Sep 2024 16:19:27 +0200 Subject: [PATCH 19/19] forgot vars --- defaults/main.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/defaults/main.yml b/defaults/main.yml index 2b9a2e9..71232fd 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -26,6 +26,10 @@ walle_envs_database: value: galaxy - key: PGDATABASE value: galaxy + - key: GALAXY_CONFIG_FILE + value: "{{ galaxy_config_dir }}/galaxy.yml" + - key: GALAXY_PULSAR_APP_CONF + value: "{{ galaxy_config_dir }}/pulsar_app.yml" # delete users when malware was found and malware severity reached walle_delete_threshold walle_delete_users: false @@ -35,8 +39,6 @@ walle_envs_user_deletion: value: "{{ walle_api_key }}" - key: GALAXY_BASE_URL value: "{{ walle_galaxy_url }}" - - key: GALAXY_CONFIG_FILE - value: "{{ galaxy_config_dir }}/galaxy.yml" # - key: GALAXY_USER_DELETION_MESSAGE # value: "Your personalized message" # - key: GALAXY_USER_DELETION_SUBJECT