Skip to content

Add dataset 39098

Add dataset 39098 #150

Workflow file for this run

name: Preprocessing
on:
pull_request_target:
# (re)opened PR or new commit in fork
types: [ opened, synchronize, reopened ]
paths:
- 'raw_data/**'
- 'processed_data/**'
jobs:
preprocess:
name: Preprocess raw data
runs-on: ubuntu-latest
container:
image: ghcr.io/${{ github.repository_owner }}/repo_rt_preprocessing:latest
env:
RENV_PATHS_LIBRARY: '/renv/library'
defaults:
run:
shell: bash
steps:
- name: Checkout fork repository
uses: actions/checkout@v3
with:
fetch-depth: 0
repository: ${{github.event.pull_request.head.repo.full_name}}
ref: ${{ github.head_ref }}
lfs: true
- name: Get changed files
id: files
uses: Ana06/[email protected]
- name: Get new/changed datasets
id: filesfolders
shell: bash {0}
run: echo "files=$(for f in ${{ steps.files.outputs.added_modified_renamed }}; do basename $(dirname $f); done | grep -E '^[0-9]+$' | sort | uniq | tr '\n' ' ')" >> $GITHUB_OUTPUT
- name: List all added files
run: |
for f in ${{ steps.filesfolders.outputs.files }}; do
ls -lh raw_data/$f
done
- name: Standardize compounds
run: Rscript scripts/R_ci/compounds_standardize.R ${{ steps.filesfolders.outputs.files }}
- name: Compounds classyfire classes
run: Rscript scripts/R_ci/compounds_classyfire.R ${{ steps.filesfolders.outputs.files }}
- name: Compounds descriptors
run: Rscript scripts/R_ci/compounds_descriptors.R ${{ steps.filesfolders.outputs.files }}
- name: Compounds fingerprints
run: Rscript scripts/R_ci/compounds_fingerprints.R ${{ steps.filesfolders.outputs.files }}
- name: Metadata standardization
run: Rscript scripts/R_ci/metadata_standardize.R ${{ steps.filesfolders.outputs.files }}
- name: Generate dataset reports
run: Rscript scripts/R_ci/compounds_overview.R ${{ steps.filesfolders.outputs.files }}
- name: Verify that required files are present
run: Rscript scripts/R_ci/files_complete.R ${{ steps.filesfolders.outputs.files }}
- name: Update overview table of all datasets
run: python3 scripts/Python/datasets_overview.py
continue-on-error: true
- name: QSPR-based validation
run: python3 scripts/Python/validation_qspr.py ${{ steps.filesfolders.outputs.files }}
continue-on-error: true
- name: Retention order-based validation for datasets with nominally identical setups
run: python3 scripts/Python/validation_order.py --mode same_condition ${{ steps.filesfolders.outputs.files }}
continue-on-error: true
- name: Retention order-based validation for datasets of systematic measurements
run: python3 scripts/Python/validation_order.py --mode systematic ${{ steps.filesfolders.outputs.files }}
continue-on-error: true
- name: Commit preprocessing
run: |
git config --global user.email '[email protected]'
git config --global user.name 'Github Actions'
# because of dockerized environment, git will otherwise complain about "dubious ownership of directory"
git config --global safe.directory '*'
# Use LFS storage of main repository: no push access to fork LFS storage
# TODO: change once repository is moved
git config lfs.url 'https://github.com/f-kretschmer/pr_test_repo.git/info/lfs'
git add processed_data raw_data
git commit -m "Preprocessing ${{ steps.filesfolders.outputs.files }}"
git lfs push origin HEAD # first push LFS, otherwise failure because of lfs.url
git push origin HEAD
- name: Add comment with report to PR
uses: actions/github-script@v6
with:
script: |
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: (await exec.getExecOutput('python3 scripts/Python/report.py', '${{ steps.filesfolders.outputs.files }}'.trim().split(' '))).stdout
})
continue-on-error: true
- name: Label as successfully preprocessed
if: ${{ success() }}
uses: andymckay/labeler@master
with:
add-labels: "preprocessing successful"
remove-labels: "preprocessing failed"
- name: Debug with tmate on failure
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3
- name: Label as failed
if: ${{ failure() }}
uses: andymckay/labeler@master
with:
add-labels: "preprocessing failed"
remove-labels: "preprocessing successful"