Skip to content

Commit

Permalink
add cleaning updates
Browse files Browse the repository at this point in the history
  • Loading branch information
Atticus1806 committed Nov 13, 2024
1 parent d256f43 commit 0a3c1d1
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 7 deletions.
45 changes: 38 additions & 7 deletions sisyphus/cleaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import sys
import tempfile

from typing import List, Optional

from sisyphus import graph
import sisyphus.global_settings as gs

Expand Down Expand Up @@ -206,7 +208,7 @@ def search_for_unused(job_dirs, current=gs.WORK_DIR, verbose=True):
return unused


def remove_directories(dirs, message, move_postfix=".cleanup", mode="remove", force=False):
def remove_directories(dirs, message, move_postfix=".cleanup", mode="remove", force=False, filter_affected=None):
"""list all directories that will be deleted and add a security check"""
if isinstance(dirs, str):
dirs = load_remove_list(dirs)
Expand All @@ -228,7 +230,19 @@ def remove_directories(dirs, message, move_postfix=".cleanup", mode="remove", fo
if input_var.lower() != "n":
logging.info("Affected directories:")
for i in tmp:
logging.info(i)
if os.path.exists(i + "/info") and gs.CLEANER_PRINT_ALIAS:
with open(i + "/info") as f:
lines = f.readlines()
if lines[-1].strip().startswith("ALIAS"):
s = lines[-1].strip()
s.replace("ALIAS:", "ALIAS AT CREATION:")
else:
s = ""
else:
s = ""
if filter_affected is None or any(x in i for x in filter_affected):
logging.info(i + " " + s)

else:
with tempfile.NamedTemporaryFile(mode="w") as tmp_file:
for directory in dirs:
Expand Down Expand Up @@ -280,8 +294,17 @@ def cleanup_jobs():
job._sis_cleanup()


def cleanup_keep_value(min_keep_value, load_from: str = "", mode: str = "remove"):
"""Go through all jobs in the current graph to remove all jobs with a lower keep value that the given minimum"""
def cleanup_keep_value(
min_keep_value, load_from: str = "", mode: str = "remove", filter_affected: Optional[List[str]] = None
):
"""Go through all jobs in the current graph to remove all jobs with a lower keep value that the given minimum
:param min_keep_value: Remove jobs with lower keep value than this
:param load_from: File name to load list with used directories
:param mode: Cleanup mode ('remove', 'move', or 'dryrun')
:param filter_affected: Defines what substrings should be printed when listing affected directories
"""
if min_keep_value <= 0:
logging.error("Keep value must be larger than 0")
if load_from:
Expand All @@ -291,17 +314,25 @@ def cleanup_keep_value(min_keep_value, load_from: str = "", mode: str = "remove"

to_remove = find_too_low_keep_value(job_dirs, min_keep_value)
remove_directories(
to_remove, "Remove jobs with lower keep value than min", move_postfix=".cleanup", mode=mode, force=False
to_remove,
"Remove jobs with lower keep value than min",
move_postfix=".cleanup",
mode=mode,
force=False,
filter_affected=filter_affected,
)


def cleanup_unused(load_from: str = "", job_dirs=None, mode="remove"):
def cleanup_unused(
load_from: str = "", job_dirs: List = None, mode: str = "remove", filter_affected: Optional[List[str]] = None
):
"""Check work directory and remove all subdirectories which do not belong to the given list of directories.
If no input is given it removes everything that is not in the current graph
:param load_from: File name to load list with used directories
:param job_dirs: Already loaded list of used directories
:param mode: Cleanup mode ('remove', 'move', or 'dryrun')
:param filter_affected: Defines what substrings should be printed when listing affected directories
:return:
"""
if job_dirs:
Expand All @@ -311,4 +342,4 @@ def cleanup_unused(load_from: str = "", job_dirs=None, mode="remove"):
else:
job_dirs = list_all_graph_directories()
to_remove = search_for_unused(job_dirs, verbose=True)
remove_directories(to_remove, "Not used in graph", mode=mode, force=False)
remove_directories(to_remove, "Not used in graph", mode=mode, force=False, filter_affected=filter_affected)
3 changes: 3 additions & 0 deletions sisyphus/global_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,9 @@ def file_caching(path):
JOB_CLEANUP_KEEP_INPUT = True
#: Default value for job used by tk.cleaner to determine if a job should be removed or not
JOB_DEFAULT_KEEP_VALUE = 50
#:
CLEANER_PRINT_ALIAS = True

#: How many threads should update the graph in parallel, useful if the filesystem has a high latency
GRAPH_WORKER = 16

Expand Down

0 comments on commit 0a3c1d1

Please sign in to comment.