From 1bfbc2d05c181b3cdcb0361cfc981b01c48d866f Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Wed, 1 Nov 2023 08:20:49 +0000 Subject: [PATCH] Write extracted tools only once Also: - Use `.extend()` instead of `+=` (slightly faster) - Write error messages to `sys.stderr` --- bin/extract_galaxy_tools.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/bin/extract_galaxy_tools.py b/bin/extract_galaxy_tools.py index 1191db12..54bfbcea 100644 --- a/bin/extract_galaxy_tools.py +++ b/bin/extract_galaxy_tools.py @@ -2,6 +2,7 @@ import argparse import base64 +import sys import time import xml.etree.ElementTree as et from pathlib import Path @@ -60,7 +61,7 @@ def get_tool_github_repositories(g: Github) -> List[str]: for i in range(1, 5): repo_f = repo.get_contents(f"repositories0{i}.list") repo_l = get_string_content(repo_f).rstrip() - repo_list += repo_l.split("\n") + repo_list.extend(repo_l.split("\n")) return repo_list @@ -221,7 +222,7 @@ def get_tool_metadata(tool: ContentFile, repo: Repository) -> Optional[Dict[str, try: root = et.fromstring(file_content) except Exception: - print(file_content) + print(file_content, sys.stderr) else: # version if metadata["Galaxy wrapper version"] is None: @@ -293,7 +294,7 @@ def parse_tools(repo: Repository) -> List[Dict[str, Any]]: try: repo_tools = repo.get_contents("wrappers") except Exception: - print("No tool folder found") + print("No tool folder found", sys.stderr) return [] assert isinstance(repo_tools, list) tool_folders.append(repo_tools) @@ -418,10 +419,12 @@ def filter_tools(tools: List[Dict], ts_cat: List[str], excluded_tools: List[str] print(r) if "github" not in r: continue - repo = get_github_repo(r, g) - tools += parse_tools(repo) - export_tools(tools, args.all_tools, format_list_col=True) - print() + try: + repo = get_github_repo(r, g) + tools.extend(parse_tools(repo)) + except Exception as e: + print(f"Error while extracting tools from repo {r}: {e}", file=sys.stderr) + export_tools(tools, args.all_tools, format_list_col=True) elif args.command == "filtertools": tools = pd.read_csv(Path(args.tools), sep="\t", keep_default_na=False).to_dict("records") # get categories and tools to exclude