Skip to content

Commit

Permalink
Remove tool duplication (galaxyproject#104)
Browse files Browse the repository at this point in the history
* Add to export script an option to avoid parsing extra repositories in conf

* Use avoid extra repo option for all steps except 1st

* Snake case argument names

* Remove FROGS from extra repo
  • Loading branch information
bebatut authored Jun 3, 2024
1 parent 1917c45 commit 0cf3343
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 18 deletions.
2 changes: 1 addition & 1 deletion bin/extract_all_tools.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ mkdir -p 'results/'
python bin/extract_galaxy_tools.py \
extractools \
--api $GITHUB_API_KEY \
--all_tools 'results/all_tools.tsv'
--all-tools 'results/all_tools.tsv'

python bin/create_interactive_table.py \
--table "results/all_tools.tsv" \
Expand Down
17 changes: 14 additions & 3 deletions bin/extract_all_tools_stepwise.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,20 @@ mkdir -p 'results/'

output="results/${1}_tools.tsv"

python bin/extract_galaxy_tools.py \
if [[ $1 =~ "01" ]]; then
python bin/extract_galaxy_tools.py \
extractools \
--api $GITHUB_API_KEY \
--all_tools $output \
--planemorepository $1
--all-tools $output \
--planemo-repository-list $1
else
python bin/extract_galaxy_tools.py \
extractools \
--api $GITHUB_API_KEY \
--all-tools $output \
--planemo-repository-list $1 \
--avoid-extra-repositories
fi



4 changes: 2 additions & 2 deletions bin/extract_all_tools_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ output="results/${1}_tools.tsv"
python bin/extract_galaxy_tools.py \
extractools \
--api $GITHUB_API_KEY \
--all_tools $output \
--planemorepository $1 \
--all-tools $output \
--planemo-repository-list $1 \
--test

35 changes: 25 additions & 10 deletions bin/extract_galaxy_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,14 +123,14 @@ def get_string_content(cf: ContentFile) -> str:


def get_tool_github_repositories(
g: Github, RepoSelection: Optional[str], run_test: bool, add_extra_repositories: bool = True
g: Github, repository_list: Optional[str], run_test: bool, add_extra_repositories: bool = True
) -> List[str]:
"""
Get list of tool GitHub repositories to parse
:param g: GitHub instance
:param RepoSelection: The selection to use from the repository (needed to split the process for CI jobs)
:run_test: for testing only parse the repository
:param repository_list: The selection to use from the repository (needed to split the process for CI jobs)
:param run_test: for testing only parse the repository
"""

if run_test:
Expand All @@ -140,8 +140,8 @@ def get_tool_github_repositories(
repo_list: List[str] = []
for i in range(1, 5):
repo_selection = f"repositories0{i}.list"
if RepoSelection: # only get these repositories
if RepoSelection == repo_selection:
if repository_list: # only get these repositories
if repository_list == repo_selection:
repo_f = repo.get_contents(repo_selection)
repo_l = get_string_content(repo_f).rstrip()
repo_list.extend(repo_l.split("\n"))
Expand Down Expand Up @@ -614,11 +614,21 @@ def filter_tools(
# Extract tools
extractools = subparser.add_parser("extractools", help="Extract tools")
extractools.add_argument("--api", "-a", required=True, help="GitHub access token")
extractools.add_argument("--all_tools", "-o", required=True, help="Filepath to TSV with all extracted tools")
extractools.add_argument("--all-tools", "-o", required=True, help="Filepath to TSV with all extracted tools")
extractools.add_argument(
"--planemorepository", "-pr", required=False, help="Repository list to use from the planemo-monitor repository"
"--planemo-repository-list",
"-pr",
required=False,
help="Repository list to use from the planemo-monitor repository",
)
extractools.add_argument(
"--avoid-extra-repositories",
"-e",
action="store_true",
default=False,
required=False,
help="Do not parse extra repositories in conf file",
)

extractools.add_argument(
"--test",
"-t",
Expand All @@ -637,7 +647,7 @@ def filter_tools(
help="Filepath to TSV with all extracted tools, generated by extractools command",
)
filtertools.add_argument(
"--filtered_tools",
"--filtered-tools",
"-f",
required=True,
help="Filepath to TSV with filtered tools",
Expand All @@ -663,7 +673,12 @@ def filter_tools(
# connect to GitHub
g = Github(args.api)
# get list of GitHub repositories to parse
repo_list = get_tool_github_repositories(g, args.planemorepository, args.test)
repo_list = get_tool_github_repositories(
g=g,
repository_list=args.planemo_repository_list,
run_test=args.test,
add_extra_repositories=not args.avoid_extra_repositories,
)
# parse tools in GitHub repositories to extract metada, filter by TS categories and export to output file
tools: List[Dict] = []
for r in repo_list:
Expand Down
2 changes: 1 addition & 1 deletion bin/get_community_tools.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ for com_data_fp in data/communities/* ; do
python bin/extract_galaxy_tools.py \
filtertools \
--tools "results/all_tools.tsv" \
--filtered_tools "results/$community/tools.tsv" \
--filtered-tools "results/$community/tools.tsv" \
--categories "data/communities/$community/categories" \
--exclude "data/communities/$community/tools_to_exclude" \
--keep "data/communities/$community/tools_to_keep"
Expand Down
1 change: 0 additions & 1 deletion data/conf.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
extra-repositories:
- https://github.com/qiime2/galaxy-tools
- https://github.com/geraldinepascal/FROGS-wrappers

0 comments on commit 0cf3343

Please sign in to comment.