Skip to content

Commit

Permalink
Fix lint
Browse files Browse the repository at this point in the history
  • Loading branch information
bebatut committed Jun 24, 2024
1 parent 9ff0c0a commit e68f466
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 78 deletions.
2 changes: 0 additions & 2 deletions bin/compare_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,5 +82,3 @@ def update_excl_keep_tool_lists(tuto_tool_suites: Set, excl_tool_fp: str, keep_t

tuto_tools = get_tutorials_tool_suites(args.filtered_tutorials, args.all_tools)
update_excl_keep_tool_lists(tuto_tools, args.exclude, args.keep)


7 changes: 3 additions & 4 deletions bin/extract_galaxy_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,6 @@ def get_tool_stats_from_stats_file(tool_stats_df: pd.DataFrame, tool_ids: List[s
return int(agg_count)





def get_string_content(cf: ContentFile) -> str:
"""
Get string of the content from a ContentFile
Expand Down Expand Up @@ -524,7 +521,9 @@ def export_tools_to_tsv(
df["EDAM operation"] = shared_functions.format_list_column(df["EDAM operation"])
df["EDAM topic"] = shared_functions.format_list_column(df["EDAM topic"])

df["EDAM operation (no superclasses)"] = shared_functions.format_list_column(df["EDAM operation (no superclasses)"])
df["EDAM operation (no superclasses)"] = shared_functions.format_list_column(
df["EDAM operation (no superclasses)"]
)
df["EDAM topic (no superclasses)"] = shared_functions.format_list_column(df["EDAM topic (no superclasses)"])

df["bio.tool ids"] = shared_functions.format_list_column(df["bio.tool ids"])
Expand Down
144 changes: 74 additions & 70 deletions bin/extract_gtn_tutorials.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def get_short_tool_ids(tuto: dict) -> None:
Get tool ids without toolshed URL
"""
tuto["short_tools"] = set()
if "tools" in tuto:
if "tools" in tuto:
for tool in tuto["tools"]:
if "toolshed" in tool:
tuto["short_tools"].add(tool.split("/")[-2])
Expand Down Expand Up @@ -97,9 +97,7 @@ def get_visit_results(url: str, tuto: dict, plausible_api: str) -> None:
"""
Extract visit results from Plausible URL
"""
headers = {
'Authorization' : f"Bearer {plausible_api}"
}
headers = {"Authorization": f"Bearer {plausible_api}"}
results = get_request_json(url, headers)
if "results" in results:
for metric in ["visitors", "pageviews", "visit_duration"]:
Expand Down Expand Up @@ -128,10 +126,7 @@ def get_youtube_stats(tuto: dict) -> None:
"""
tuto["video_versions"] = 0
tuto["video_view"] = 0
ydl_opts = {
"ignoreerrors": True,
"quiet": True
}
ydl_opts = {"ignoreerrors": True, "quiet": True}
if "video_library" in tuto and tuto["video_library"]["tutorial"]:
tuto["video_versions"] = len(tuto["video_library"]["tutorial"]["versions"])
for v in tuto["video_library"]["tutorial"]["versions"]:
Expand All @@ -142,7 +137,7 @@ def get_youtube_stats(tuto: dict) -> None:
if info:
tuto["video_view"] += info["view_count"]


def format_tutorial(tuto: dict, edam_ontology, tools: dict, feedback: dict, plausible_api: str) -> None:
tuto["url"] = f'https://training.galaxyproject.org/{tuto["url"]}'
tuto["mod_date"] = format_date(tuto["mod_date"])
Expand Down Expand Up @@ -174,19 +169,25 @@ def get_feedback_per_tutorials() -> Dict:
return feedback_per_tuto


def get_tutorials(tool_fp: str, plausible_api: str, run_test: bool,) -> List[Dict]:
def get_tutorials(
tool_fp: str,
plausible_api: str,
run_test: bool,
) -> List[Dict]:
"""
Extract training material from the GTN API, format them, extract EDAM operations from tools, feedback stats, view stats, etc
"""
tools = shared_functions.read_suite_per_tool_id(tool_fp)
tools = shared_functions.read_suite_per_tool_id(tool_fp)
feedback = get_feedback_per_tutorials()
edam_ontology = get_ontology("https://edamontology.org/EDAM_unstable.owl").load()
topics = get_request_json("https://training.galaxyproject.org/training-material/api/topics.json")
if run_test:
topics = ["microbiome"]
tutos = []
for topic in topics:
topic_information = get_request_json(f"https://training.galaxyproject.org/training-material/api/topics/{topic}.json")
topic_information = get_request_json(
f"https://training.galaxyproject.org/training-material/api/topics/{topic}.json"
)
for tuto in topic_information["materials"]:
if tuto is None:
continue
Expand All @@ -195,7 +196,7 @@ def get_tutorials(tool_fp: str, plausible_api: str, run_test: bool,) -> List[Dic
return tutos


def filter_tutorials(tutorials: List[Dict], tags: List) -> List[Dict]:
def filter_tutorials(tutorials: List[Dict], tags: List) -> List[Dict]:
"""
Filter training based on a list of tags
"""
Expand All @@ -215,66 +216,69 @@ def export_tutorials_to_tsv(tutorials: List[Dict], output_fp: str) -> None:
"""
Export tutorials to a TSV file
"""
df = (pd.DataFrame(tutorials)
.assign(
Workflows=lambda df: df.workflows.notna(),
exact_supported_servers= lambda df: df.exact_supported_servers.fillna("").apply(list),
inexact_supported_servers= lambda df: df.inexact_supported_servers.fillna("").apply(list),
visit_duration= lambda df: df.visit_duration/60
df = (pd.DataFrame(tutorials).assign(
Workflows=lambda df: df.workflows.notna(),
exact_supported_servers=lambda df: df.exact_supported_servers.fillna("").apply(list),
inexact_supported_servers=lambda df: df.inexact_supported_servers.fillna("").apply(list),
visit_duration=lambda df: df.visit_duration/60
)
)

for col in ["exact_supported_servers", "inexact_supported_servers", "short_tools", "edam_operation", "edam_topic"]:
df[col] = shared_functions.format_list_column(df[col])

df = (df
.rename(columns = {
"title": "Title",
"hands_on": "Tutorial",
"url": "Link",
"slides": "Slides",
"mod_date": "Last modification",
"pub_date": "Creation",
"version": "Version",
"short_tools": "Tools",
"exact_supported_servers": "Servers with precise tool versions",
"inexact_supported_servers": "Servers with tool but different versions",
"topic_name_human": "Topic",
"video": "Video",
"edam_topic": "EDAM topic",
"edam_operation": "EDAM operation",
"feedback_number": "Feedback number",
"feedback_mean_note": "Feedback mean note",
"visitors": "Visitors",
"pageviews": "Page views",
"visit_duration": "Visit duration",
"video_versions": "Video versions",
"video_view": "Video views"
})

df = (
df.rename(
columns={
"title": "Title",
"hands_on": "Tutorial",
"url": "Link",
"slides": "Slides",
"mod_date": "Last modification",
"pub_date": "Creation",
"version": "Version",
"short_tools": "Tools",
"exact_supported_servers": "Servers with precise tool versions",
"inexact_supported_servers": "Servers with tool but different versions",
"topic_name_human": "Topic",
"video": "Video",
"edam_topic": "EDAM topic",
"edam_operation": "EDAM operation",
"feedback_number": "Feedback number",
"feedback_mean_note": "Feedback mean note",
"visitors": "Visitors",
"pageviews": "Page views",
"visit_duration": "Visit duration",
"video_versions": "Video versions",
"video_view": "Video views",
}
)
.fillna("")
.reindex(columns = [
"Topic",
"Title",
"Link",
"EDAM topic",
"EDAM operation",
"Creation",
"Last modification",
"Version",
"Tutorial",
"Slides",
"Video",
"Workflows",
"Tools",
"Servers with precise tool versions",
"Servers with tool but different versions",
"Feedback number",
"Feedback mean note",
"Visitors",
"Page views",
"Visit duration",
"Video views"
])
.reindex(
columns=[
"Topic",
"Title",
"Link",
"EDAM topic",
"EDAM operation",
"Creation",
"Last modification",
"Version",
"Tutorial",
"Slides",
"Video",
"Workflows",
"Tools",
"Servers with precise tool versions",
"Servers with tool but different versions",
"Feedback number",
"Feedback mean note",
"Visitors",
"Page views",
"Visit duration",
"Video views",
]
)
)

df.to_csv(output_fp, sep="\t", index=False)
Expand All @@ -287,7 +291,9 @@ def export_tutorials_to_tsv(tutorials: List[Dict], output_fp: str) -> None:
subparser = parser.add_subparsers(dest="command")
# Extract tutorials
extracttutorials = subparser.add_parser("extracttutorials", help="Extract all training materials")
extracttutorials.add_argument("--all_tutorials", "-o", required=True, help="Filepath to JSON with all extracted training materials")
extracttutorials.add_argument(
"--all_tutorials", "-o", required=True, help="Filepath to JSON with all extracted training materials"
)
extracttutorials.add_argument(
"--tools",
"-t",
Expand Down Expand Up @@ -336,5 +342,3 @@ def export_tutorials_to_tsv(tutorials: List[Dict], output_fp: str) -> None:
# filter training lists
filtered_tutorials = filter_tutorials(tutorials, tags)
export_tutorials_to_tsv(filtered_tutorials, args.filtered_tutorials)


4 changes: 2 additions & 2 deletions bin/shared_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def load_json(input_df: str):
Read a JSON file
"""
with Path(input_df).open("r") as t:
content = json.load(t)
content = json.load(t)
return content


Expand All @@ -64,4 +64,4 @@ def read_suite_per_tool_id(tool_fp: str) -> Dict:
"Galaxy wrapper owner": suite["Galaxy wrapper id"],
"EDAM operation": suite["EDAM operation"],
}
return tools
return tools

0 comments on commit e68f466

Please sign in to comment.