Skip to content
name: Preprocessing of specified datasets
on:
workflow_dispatch:
inputs:
datasets:
description: 'Datasets to preprocess, separated by space. Example: 0001 0002 0044'
required: True
type: string
do_standardize:
description: 'Whether to standardize compounds'
type: boolean
default: true
do_classyfire:
description: 'Whether to compute ClassyFire classes'
type: boolean
default: true
jobs:
preprocess:
name: Preprocess raw data
# NOTE: on windows as computing of descriptors has a bug on linux right now
runs-on: ubuntu-latest
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} # needed for pulling R packages from github
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
lfs: true
- name: List all files for selected datasets
run: |
for f in ${{ inputs.datasets }}; do
ls -lh raw_data/$f || true
ls -lh processed_data/$f || true
done
continue-on-error: true
- name: Standardize compounds
using: 'docker'
image: 'Dockerfile'
args:
- Rscript scripts/R_ci/compounds_standardize.R
- ${{ inputs.datasets }}
if: ${{ inputs.do_standardize }}
- name: Compounds classyfire classes
using: 'docker'
image: 'Dockerfile'
args:
- Rscript scripts/R_ci/compounds_classyfire.R
- ${{ inputs.datasets }}
if: ${{ inputs.do_classyfire }}
- name: Commit preprocessing
run: |
git config --global user.email '[email protected]'
git config --global user.name 'Github Actions'
# Use LFS storage of main repository: no push access to fork LFS storage
# TODO: change once repository is moved
git add processed_data raw_data
git commit -m "Preprocessing ${{ inputs.datasets }}" -m "Tasks:
- standardize compounds: ${{ inputs.do_standardize }}
- compute classyfire classes: ${{ inputs.do_classyfire }}
- compute descriptors: ${{ inputs.do_descriptors }}
- compute fingerprints: ${{ inputs.do_fingerprints }}
- standardize metadata: ${{ inputs.do_metadata }}"
git lfs push origin HEAD # first push LFS, otherwise failure because of lfs.url
git push origin HEAD
- name: Debug with tmate on failure
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3