diff --git a/bin/compare_tools.py b/bin/compare_tools.py index e89d06c9..29450573 100644 --- a/bin/compare_tools.py +++ b/bin/compare_tools.py @@ -82,5 +82,3 @@ def update_excl_keep_tool_lists(tuto_tool_suites: Set, excl_tool_fp: str, keep_t tuto_tools = get_tutorials_tool_suites(args.filtered_tutorials, args.all_tools) update_excl_keep_tool_lists(tuto_tools, args.exclude, args.keep) - - \ No newline at end of file diff --git a/bin/extract_galaxy_tools.py b/bin/extract_galaxy_tools.py index eb777193..a726bc98 100644 --- a/bin/extract_galaxy_tools.py +++ b/bin/extract_galaxy_tools.py @@ -94,9 +94,6 @@ def get_tool_stats_from_stats_file(tool_stats_df: pd.DataFrame, tool_ids: List[s return int(agg_count) - - - def get_string_content(cf: ContentFile) -> str: """ Get string of the content from a ContentFile @@ -524,7 +521,9 @@ def export_tools_to_tsv( df["EDAM operation"] = shared_functions.format_list_column(df["EDAM operation"]) df["EDAM topic"] = shared_functions.format_list_column(df["EDAM topic"]) - df["EDAM operation (no superclasses)"] = shared_functions.format_list_column(df["EDAM operation (no superclasses)"]) + df["EDAM operation (no superclasses)"] = shared_functions.format_list_column( + df["EDAM operation (no superclasses)"] + ) df["EDAM topic (no superclasses)"] = shared_functions.format_list_column(df["EDAM topic (no superclasses)"]) df["bio.tool ids"] = shared_functions.format_list_column(df["bio.tool ids"]) diff --git a/bin/extract_gtn_tutorials.py b/bin/extract_gtn_tutorials.py index d1e9c864..bbac8a4d 100644 --- a/bin/extract_gtn_tutorials.py +++ b/bin/extract_gtn_tutorials.py @@ -46,7 +46,7 @@ def get_short_tool_ids(tuto: dict) -> None: Get tool ids without toolshed URL """ tuto["short_tools"] = set() - if "tools" in tuto: + if "tools" in tuto: for tool in tuto["tools"]: if "toolshed" in tool: tuto["short_tools"].add(tool.split("/")[-2]) @@ -97,9 +97,7 @@ def get_visit_results(url: str, tuto: dict, plausible_api: str) -> None: """ Extract visit results from Plausible URL """ - headers = { - 'Authorization' : f"Bearer {plausible_api}" - } + headers = {"Authorization": f"Bearer {plausible_api}"} results = get_request_json(url, headers) if "results" in results: for metric in ["visitors", "pageviews", "visit_duration"]: @@ -128,10 +126,7 @@ def get_youtube_stats(tuto: dict) -> None: """ tuto["video_versions"] = 0 tuto["video_view"] = 0 - ydl_opts = { - "ignoreerrors": True, - "quiet": True - } + ydl_opts = {"ignoreerrors": True, "quiet": True} if "video_library" in tuto and tuto["video_library"]["tutorial"]: tuto["video_versions"] = len(tuto["video_library"]["tutorial"]["versions"]) for v in tuto["video_library"]["tutorial"]["versions"]: @@ -142,7 +137,7 @@ def get_youtube_stats(tuto: dict) -> None: if info: tuto["video_view"] += info["view_count"] - + def format_tutorial(tuto: dict, edam_ontology, tools: dict, feedback: dict, plausible_api: str) -> None: tuto["url"] = f'https://training.galaxyproject.org/{tuto["url"]}' tuto["mod_date"] = format_date(tuto["mod_date"]) @@ -174,11 +169,15 @@ def get_feedback_per_tutorials() -> Dict: return feedback_per_tuto -def get_tutorials(tool_fp: str, plausible_api: str, run_test: bool,) -> List[Dict]: +def get_tutorials( + tool_fp: str, + plausible_api: str, + run_test: bool, +) -> List[Dict]: """ Extract training material from the GTN API, format them, extract EDAM operations from tools, feedback stats, view stats, etc """ - tools = shared_functions.read_suite_per_tool_id(tool_fp) + tools = shared_functions.read_suite_per_tool_id(tool_fp) feedback = get_feedback_per_tutorials() edam_ontology = get_ontology("https://edamontology.org/EDAM_unstable.owl").load() topics = get_request_json("https://training.galaxyproject.org/training-material/api/topics.json") @@ -186,7 +185,9 @@ def get_tutorials(tool_fp: str, plausible_api: str, run_test: bool,) -> List[Dic topics = ["microbiome"] tutos = [] for topic in topics: - topic_information = get_request_json(f"https://training.galaxyproject.org/training-material/api/topics/{topic}.json") + topic_information = get_request_json( + f"https://training.galaxyproject.org/training-material/api/topics/{topic}.json" + ) for tuto in topic_information["materials"]: if tuto is None: continue @@ -195,7 +196,7 @@ def get_tutorials(tool_fp: str, plausible_api: str, run_test: bool,) -> List[Dic return tutos -def filter_tutorials(tutorials: List[Dict], tags: List) -> List[Dict]: +def filter_tutorials(tutorials: List[Dict], tags: List) -> List[Dict]: """ Filter training based on a list of tags """ @@ -215,66 +216,69 @@ def export_tutorials_to_tsv(tutorials: List[Dict], output_fp: str) -> None: """ Export tutorials to a TSV file """ - df = (pd.DataFrame(tutorials) - .assign( - Workflows=lambda df: df.workflows.notna(), - exact_supported_servers= lambda df: df.exact_supported_servers.fillna("").apply(list), - inexact_supported_servers= lambda df: df.inexact_supported_servers.fillna("").apply(list), - visit_duration= lambda df: df.visit_duration/60 + df = (pd.DataFrame(tutorials).assign( + Workflows=lambda df: df.workflows.notna(), + exact_supported_servers=lambda df: df.exact_supported_servers.fillna("").apply(list), + inexact_supported_servers=lambda df: df.inexact_supported_servers.fillna("").apply(list), + visit_duration=lambda df: df.visit_duration/60 ) ) for col in ["exact_supported_servers", "inexact_supported_servers", "short_tools", "edam_operation", "edam_topic"]: df[col] = shared_functions.format_list_column(df[col]) - - df = (df - .rename(columns = { - "title": "Title", - "hands_on": "Tutorial", - "url": "Link", - "slides": "Slides", - "mod_date": "Last modification", - "pub_date": "Creation", - "version": "Version", - "short_tools": "Tools", - "exact_supported_servers": "Servers with precise tool versions", - "inexact_supported_servers": "Servers with tool but different versions", - "topic_name_human": "Topic", - "video": "Video", - "edam_topic": "EDAM topic", - "edam_operation": "EDAM operation", - "feedback_number": "Feedback number", - "feedback_mean_note": "Feedback mean note", - "visitors": "Visitors", - "pageviews": "Page views", - "visit_duration": "Visit duration", - "video_versions": "Video versions", - "video_view": "Video views" - }) + + df = ( + df.rename( + columns={ + "title": "Title", + "hands_on": "Tutorial", + "url": "Link", + "slides": "Slides", + "mod_date": "Last modification", + "pub_date": "Creation", + "version": "Version", + "short_tools": "Tools", + "exact_supported_servers": "Servers with precise tool versions", + "inexact_supported_servers": "Servers with tool but different versions", + "topic_name_human": "Topic", + "video": "Video", + "edam_topic": "EDAM topic", + "edam_operation": "EDAM operation", + "feedback_number": "Feedback number", + "feedback_mean_note": "Feedback mean note", + "visitors": "Visitors", + "pageviews": "Page views", + "visit_duration": "Visit duration", + "video_versions": "Video versions", + "video_view": "Video views", + } + ) .fillna("") - .reindex(columns = [ - "Topic", - "Title", - "Link", - "EDAM topic", - "EDAM operation", - "Creation", - "Last modification", - "Version", - "Tutorial", - "Slides", - "Video", - "Workflows", - "Tools", - "Servers with precise tool versions", - "Servers with tool but different versions", - "Feedback number", - "Feedback mean note", - "Visitors", - "Page views", - "Visit duration", - "Video views" - ]) + .reindex( + columns=[ + "Topic", + "Title", + "Link", + "EDAM topic", + "EDAM operation", + "Creation", + "Last modification", + "Version", + "Tutorial", + "Slides", + "Video", + "Workflows", + "Tools", + "Servers with precise tool versions", + "Servers with tool but different versions", + "Feedback number", + "Feedback mean note", + "Visitors", + "Page views", + "Visit duration", + "Video views", + ] + ) ) df.to_csv(output_fp, sep="\t", index=False) @@ -287,7 +291,9 @@ def export_tutorials_to_tsv(tutorials: List[Dict], output_fp: str) -> None: subparser = parser.add_subparsers(dest="command") # Extract tutorials extracttutorials = subparser.add_parser("extracttutorials", help="Extract all training materials") - extracttutorials.add_argument("--all_tutorials", "-o", required=True, help="Filepath to JSON with all extracted training materials") + extracttutorials.add_argument( + "--all_tutorials", "-o", required=True, help="Filepath to JSON with all extracted training materials" + ) extracttutorials.add_argument( "--tools", "-t", @@ -336,5 +342,3 @@ def export_tutorials_to_tsv(tutorials: List[Dict], output_fp: str) -> None: # filter training lists filtered_tutorials = filter_tutorials(tutorials, tags) export_tutorials_to_tsv(filtered_tutorials, args.filtered_tutorials) - - \ No newline at end of file diff --git a/bin/shared_functions.py b/bin/shared_functions.py index 87bf5e66..bc1786d8 100644 --- a/bin/shared_functions.py +++ b/bin/shared_functions.py @@ -47,7 +47,7 @@ def load_json(input_df: str): Read a JSON file """ with Path(input_df).open("r") as t: - content = json.load(t) + content = json.load(t) return content @@ -64,4 +64,4 @@ def read_suite_per_tool_id(tool_fp: str) -> Dict: "Galaxy wrapper owner": suite["Galaxy wrapper id"], "EDAM operation": suite["EDAM operation"], } - return tools \ No newline at end of file + return tools