diff --git a/.bumpversion.cfg b/.bumpversion.cfg new file mode 100644 index 00000000..079dcdaa --- /dev/null +++ b/.bumpversion.cfg @@ -0,0 +1,25 @@ +[bumpversion] +current_version = 1.0.0 +commit = True +tag = False +parse = (?P\d+)\.(?P\d+)\.(?P[a-z0-9+]+) + +[bumpversion:file:nextflow.config] +search = version = '{current_version}' +replace = version = '{new_version}' + +[bumpversion:file:README.md] +search = -revision {current_version} +replace = -revision {new_version} + +[bumpversion:file (example commands):docs/usage.md] +search = -revision {current_version} +replace = -revision {new_version} + +[bumpversion:file (urls):docs/usage.md] +search = /{current_version}/ +replace = /{new_version}/ + +[bumpversion:file (templated example):docs/usage.md] +search = {current_version}` +replace = {new_version}` diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000..b290e090 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,20 @@ +{ + "name": "nfcore", + "image": "nfcore/gitpod:latest", + "remoteUser": "gitpod", + "runArgs": ["--privileged"], + + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + // Set *default* container specific settings.json values on container create. + "settings": { + "python.defaultInterpreterPath": "/opt/conda/bin/python" + }, + + // Add the IDs of extensions you want installed when the container is created. + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] + } + } +} diff --git a/.editorconfig b/.editorconfig index b78de6e6..72dda289 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,7 +8,7 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space -[*.{md,yml,yaml,html,css,scss,js,cff}] +[*.{md,yml,yaml,html,css,scss,js}] indent_size = 2 # These files are edited and tested upstream in nf-core/modules @@ -18,7 +18,16 @@ end_of_line = unset insert_final_newline = unset trim_trailing_whitespace = unset indent_style = unset -indent_size = unset +[/subworkflows/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset [/assets/email*] indent_size = unset + +# ignore python and markdown +[*.{py,md}] +indent_style = unset diff --git a/.gitattributes b/.gitattributes index 050bb120..7a2dabc2 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,4 @@ *.config linguist-language=nextflow +*.nf.test linguist-language=nextflow modules/nf-core/** linguist-generated subworkflows/nf-core/** linguist-generated diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index c54ec775..2509ca62 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -9,6 +9,7 @@ Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) +> [!NOTE] > If you need help using or modifying nf-core/oncoanalyser then the best place to ask is on the nf-core Slack [#oncoanalyser](https://nfcore.slack.com/channels/oncoanalyser) channel ([join our Slack here](https://nf-co.re/join/slack)). ## Contribution workflow @@ -25,6 +26,12 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests +You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: + +```bash +nf-test test --profile debug,test,docker --verbose +``` + When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. @@ -85,7 +92,7 @@ Once there, use `nf-core schema build` to add to `nextflow_schema.json`. Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. -The process resources can be passed on to the tool dynamically within the process with the `${task.cpu}` and `${task.memory}` variables in the `script:` block. +The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. ### Naming schemes @@ -101,3 +108,18 @@ If you are using a new feature from core Nextflow, you may bump the minimum requ ### Images and figures For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines). + +## GitHub Codespaces + +This repo includes a devcontainer configuration which will create a GitHub Codespaces for Nextflow development! This is an online developer environment that runs in your browser, complete with VSCode and a terminal. + +To get started: + +- Open the repo in [Codespaces](https://github.com/nf-core/oncoanalyser/codespaces) +- Tools installed + - nf-core + - Nextflow + +Devcontainer specs: + +- [DevContainer config](.devcontainer/devcontainer.json) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 3ed51a88..93b3c04b 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -42,9 +42,9 @@ body: attributes: label: System information description: | - * Nextflow version _(eg. 21.10.3)_ + * Nextflow version _(eg. 23.04.0)_ * Hardware _(eg. HPC, Desktop, Cloud)_ * Executor _(eg. slurm, local, awsbatch)_ - * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter or Charliecloud)_ + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ * OS _(eg. CentOS Linux, macOS, Linux Mint)_ * Version of nf-core/oncoanalyser _(eg. 1.1, 1.5, 1.8.2)_ diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 045d9396..19d3a06b 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -15,9 +15,11 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/onco - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! -- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/oncoanalyser/tree/master/.github/CONTRIBUTING.md)- [ ] If necessary, also make a PR on the nf-core/oncoanalyser _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/oncoanalyser/tree/master/.github/CONTRIBUTING.md) +- [ ] If necessary, also make a PR on the nf-core/oncoanalyser _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index f32fa1b6..f8fecdac 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -8,13 +8,13 @@ on: types: [published] workflow_dispatch: jobs: - run-tower: + run-platform: name: Run AWS full tests if: github.repository == 'nf-core/oncoanalyser' runs-on: ubuntu-latest steps: - - name: Launch workflow via tower - uses: nf-core/tower-action@v3 + - name: Launch workflow via Seqera Platform + uses: seqeralabs/action-tower-launch@v2 # TODO nf-core: You can customise AWS full pipeline tests as required # Add full size test data (but still relatively small datasets for few samples) # on the `test_full.config` test runs with only one set of parameters @@ -22,13 +22,18 @@ jobs: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/oncoanalyser/work-${{ github.sha }} parameters: | { + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/oncoanalyser/results-${{ github.sha }}" } - profiles: test_full,aws_tower - - uses: actions/upload-artifact@v3 + profiles: test_full + + - uses: actions/upload-artifact@v4 with: - name: Tower debug log file - path: tower_action_*.log + name: Seqera Platform debug log file + path: | + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 9a4ec76d..1ddcaa95 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -5,25 +5,29 @@ name: nf-core AWS test on: workflow_dispatch: jobs: - run-tower: + run-platform: name: Run AWS tests if: github.repository == 'nf-core/oncoanalyser' runs-on: ubuntu-latest steps: - # Launch workflow using Tower CLI tool action - - name: Launch workflow via tower - uses: nf-core/tower-action@v3 + # Launch workflow using Seqera Platform CLI tool action + - name: Launch workflow via Seqera Platform + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/oncoanalyser/work-${{ github.sha }} parameters: | { "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/oncoanalyser/results-test-${{ github.sha }}" } - profiles: test,aws_tower - - uses: actions/upload-artifact@v3 + profiles: test + + - uses: actions/upload-artifact@v4 with: - name: Tower debug log file - path: tower_action_*.log + name: Seqera Platform debug log file + path: | + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 0a1cdd32..0494f13f 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -13,13 +13,13 @@ jobs: - name: Check PRs if: github.repository == 'nf-core/oncoanalyser' run: | - { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/oncoanalyser ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/oncoanalyser ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] # If the above check failed, post a comment on the PR explaining the failure # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets - name: Post PR comment if: failure() - uses: mshick/add-pr-comment@v1 + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 with: message: | ## This PR is against the `master` branch :x: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e1f80dbd..0f0e4cc0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,29 +11,33 @@ on: env: NXF_ANSI_LOG: false +concurrency: + group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" + cancel-in-progress: true + jobs: test: - name: Run pipeline with test data + name: Run pipeline stubs # Only run on push if this is the nf-core dev branch (merged PRs) if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/oncoanalyser') }}" runs-on: ubuntu-latest strategy: matrix: NXF_VER: - - "21.10.3" + - "23.04.0" - "latest-everything" steps: - name: Check out pipeline code - uses: actions/checkout@v2 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 with: version: "${{ matrix.NXF_VER }}" - - name: Run pipeline with test data - # TODO nf-core: You can customise CI pipeline run tests as required - # For example: adding multiple test runs with different parameters - # Remember that you can parallelise this by using strategy.matrix + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - name: Run pipeline stubs only run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results + nextflow run ${GITHUB_WORKSPACE} -stub -profile test_stub --outdir ./results diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml new file mode 100644 index 00000000..0b6b1f27 --- /dev/null +++ b/.github/workflows/clean-up.yml @@ -0,0 +1,24 @@ +name: "Close user-tagged issues and PRs" +on: + schedule: + - cron: "0 0 * * 0" # Once a week + +jobs: + clean-up: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 + with: + stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." + stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." + close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." + days-before-stale: 30 + days-before-close: 20 + days-before-pr-close: -1 + any-of-labels: "awaiting-changes,awaiting-feedback" + exempt-issue-labels: "WIP" + exempt-pr-labels: "WIP" + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml new file mode 100644 index 00000000..d09995d0 --- /dev/null +++ b/.github/workflows/download_pipeline.yml @@ -0,0 +1,79 @@ +name: Test successful pipeline download with 'nf-core download' + +# Run the workflow when: +# - dispatched manually +# - when a PR is opened or reopened to master branch +# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. +on: + workflow_dispatch: + inputs: + testbranch: + description: "The specific branch you wish to utilize for the test execution of nf-core download." + required: true + default: "dev" + pull_request: + types: + - opened + - edited + - synchronize + branches: + - master + pull_request_target: + branches: + - master + +env: + NXF_ANSI_LOG: false + +jobs: + download: + runs-on: ubuntu-latest + steps: + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + architecture: "x64" + - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 + with: + singularity-version: 3.8.3 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/nf-core/tools.git@dev + + - name: Get the repository name and current branch set as environment variable + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} + echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} + + - name: Download the pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + run: | + nf-core download ${{ env.REPO_LOWERCASE }} \ + --revision ${{ env.REPO_BRANCH }} \ + --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ + --compress "none" \ + --container-system 'singularity' \ + --container-library "quay.io" -l "docker.io" -l "ghcr.io" \ + --container-cache-utilisation 'amend' \ + --download-configuration + + - name: Inspect download + run: tree ./${{ env.REPOTITLE_LOWERCASE }} + + - name: Run the downloaded pipeline (stub) + id: stub_run_pipeline + continue-on-error: true + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test_stub,singularity --outdir ./results diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index 50f455de..02ca9338 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -4,7 +4,7 @@ on: types: [created] jobs: - deploy: + fix-linting: # Only run if comment is on a PR with the main repo, and if it contains the magic keywords if: > contains(github.event.comment.html_url, '/pull/') && @@ -13,10 +13,17 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@v3 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 with: token: ${{ secrets.nf_core_bot_auth_token }} + # indication that the linting is being fixed + - name: React on comment + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: eyes + # Action runs on the issue comment, so we don't get the PR by default # Use the gh cli to check out the PR - name: Checkout Pull Request @@ -24,32 +31,59 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v2 + # Install and run pre-commit + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" - - name: Install Prettier - run: npm install -g prettier @prettier/plugin-php + - name: Install pre-commit + run: pip install pre-commit - # Check that we actually need to fix something - - name: Run 'prettier --check' - id: prettier_status - run: | - if prettier --check ${GITHUB_WORKSPACE}; then - echo "::set-output name=result::pass" - else - echo "::set-output name=result::fail" - fi + - name: Run pre-commit + id: pre-commit + run: pre-commit run --all-files + continue-on-error: true - - name: Run 'prettier --write' - if: steps.prettier_status.outputs.result == 'fail' - run: prettier --write ${GITHUB_WORKSPACE} + # indication that the linting has finished + - name: react if linting finished succesfully + if: steps.pre-commit.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: "+1" - name: Commit & push changes - if: steps.prettier_status.outputs.result == 'fail' + id: commit-and-push + if: steps.pre-commit.outcome == 'failure' run: | git config user.email "core@nf-co.re" git config user.name "nf-core-bot" git config push.default upstream git add . git status - git commit -m "[automated] Fix linting with Prettier" + git commit -m "[automated] Fix code linting" git push + + - name: react if linting errors were fixed + id: react-if-fixed + if: steps.commit-and-push.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: hooray + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: confused + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + issue-number: ${{ github.event.issue.number }} + body: | + @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. + See [CI log](https://github.com/nf-core/oncoanalyser/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 8a5ce69b..1fcafe88 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -4,79 +4,41 @@ name: nf-core linting # that the code meets the nf-core guidelines. on: push: + branches: + - dev pull_request: release: types: [published] jobs: - EditorConfig: + pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - - uses: actions/setup-node@v2 - - - name: Install editorconfig-checker - run: npm install -g editorconfig-checker - - - name: Run ECLint check - run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') - - Prettier: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - - uses: actions/setup-node@v2 - - - name: Install Prettier - run: npm install -g prettier - - - name: Run Prettier --check - run: prettier --check ${GITHUB_WORKSPACE} - - PythonBlack: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - - name: Check code lints with Black - uses: psf/black@stable - - # If the above check failed, post a comment on the PR explaining the failure - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@v1 + - name: Set up Python 3.12 + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - message: | - ## Python linting (`black`) is failing - - To keep the code consistent with lots of contributors, we run automated code consistency checks. - To fix this CI test, please run: - - * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` - * Fix formatting errors in your pipeline: `black .` - - Once you push these changes the test should pass, and you can hide this comment :+1: + python-version: "3.12" - We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + - name: Install pre-commit + run: pip install pre-commit - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false + - name: Run pre-commit + run: pre-commit run --all-files nf-core: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v2 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@v3 + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: "3.7" + python-version: "3.12" architecture: "x64" - name: Install dependencies @@ -97,7 +59,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 04758f61..40acc23f 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,17 +11,17 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@v2 + uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3 with: workflow: linting.yml workflow_conclusion: completed - name: Get PR number id: pr_number - run: echo "::set-output name=pr_number::$(cat linting-logs/PR_number.txt)" + run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@v2 + uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml new file mode 100644 index 00000000..03ecfcf7 --- /dev/null +++ b/.github/workflows/release-announcements.yml @@ -0,0 +1,75 @@ +name: release-announcements +# Automatic release toot and tweet anouncements +on: + release: + types: [published] + workflow_dispatch: + +jobs: + toot: + runs-on: ubuntu-latest + steps: + - name: get topics and convert to hashtags + id: get_topics + run: | + echo "topics=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ')" >> $GITHUB_OUTPUT + + - uses: rzr/fediverse-action@master + with: + access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} + host: "mstdn.science" # custom host if not "mastodon.social" (default) + # GitHub event payload + # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release + message: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + + ${{ steps.get_topics.outputs.topics }} #nfcore #openscience #nextflow #bioinformatics + + send-tweet: + runs-on: ubuntu-latest + + steps: + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.10" + - name: Install dependencies + run: pip install tweepy==4.14.0 + - name: Send tweet + shell: python + run: | + import os + import tweepy + + client = tweepy.Client( + access_token=os.getenv("TWITTER_ACCESS_TOKEN"), + access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), + consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), + consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), + ) + tweet = os.getenv("TWEET") + client.create_tweet(text=tweet) + env: + TWEET: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} + TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} + TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} + TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} + + bsky-post: + runs-on: ubuntu-latest + steps: + - uses: zentered/bluesky-post-action@80dbe0a7697de18c15ad22f4619919ceb5ccf597 # v0.1.0 + with: + post: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + env: + BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} + BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} + # diff --git a/.gitignore b/.gitignore index 5124c9ac..5b62d021 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +examples/ +*.tar.gz .nextflow* work/ data/ diff --git a/.gitpod.yml b/.gitpod.yml index 85d95ecc..105a1821 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -1,14 +1,20 @@ image: nfcore/gitpod:latest +tasks: + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update + - name: unset JAVA_TOOL_OPTIONS + command: | + unset JAVA_TOOL_OPTIONS vscode: extensions: # based on nf-core.nf-core-extensionpack - - codezombiech.gitignore # Language support for .gitignore files - # - cssho.vscode-svgviewer # SVG viewer - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code - - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar - mechatroner.rainbow-csv # Highlight columns in csv files in different colors - # - nextflow.nextflow # Nextflow syntax highlighting + # - nextflow.nextflow # Nextflow syntax highlighting - oderwat.indent-rainbow # Highlight indentation level - streetsidesoftware.code-spell-checker # Spelling checker for source code + - charliermarsh.ruff # Code linter Ruff diff --git a/.nf-core.yml b/.nf-core.yml index 3805dc81..12d00da0 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1 +1,9 @@ repository_type: pipeline +lint: + actions_ci: False + multiqc_config: False + files_exist: + - lib/Utils.groovy + - lib/WorkflowMain.groovy + - lib/WorkflowOncoanalyser.groovy +nf_core_version: "2.14.1" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..4dc0f1dc --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,13 @@ +repos: + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v3.1.0" + hooks: + - id: prettier + additional_dependencies: + - prettier@3.2.5 + + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python + rev: "2.7.3" + hooks: + - id: editorconfig-checker + alias: ec diff --git a/.prettierignore b/.prettierignore index eb74a574..437d763d 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,5 +1,6 @@ email_template.html adaptivecard.json +slackreport.json .nextflow* work/ data/ @@ -8,3 +9,4 @@ results/ testing/ testing* *.pyc +bin/ diff --git a/CHANGELOG.md b/CHANGELOG.md index f5b610f4..09612f63 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,16 +1,8 @@ # nf-core/oncoanalyser: Changelog -The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) -and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project mostly adheres to +[Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.0dev - [date] +## 1.0.0 - [date] Initial release of nf-core/oncoanalyser, created with the [nf-core](https://nf-co.re/) template. - -### `Added` - -### `Fixed` - -### `Dependencies` - -### `Deprecated` diff --git a/CITATION.cff b/CITATION.cff deleted file mode 100644 index 017666c0..00000000 --- a/CITATION.cff +++ /dev/null @@ -1,56 +0,0 @@ -cff-version: 1.2.0 -message: "If you use `nf-core tools` in your work, please cite the `nf-core` publication" -authors: - - family-names: Ewels - given-names: Philip - - family-names: Peltzer - given-names: Alexander - - family-names: Fillinger - given-names: Sven - - family-names: Patel - given-names: Harshil - - family-names: Alneberg - given-names: Johannes - - family-names: Wilm - given-names: Andreas - - family-names: Garcia - given-names: Maxime Ulysse - - family-names: Di Tommaso - given-names: Paolo - - family-names: Nahnsen - given-names: Sven -title: "The nf-core framework for community-curated bioinformatics pipelines." -version: 2.4.1 -doi: 10.1038/s41587-020-0439-x -date-released: 2022-05-16 -url: https://github.com/nf-core/tools -prefered-citation: - type: article - authors: - - family-names: Ewels - given-names: Philip - - family-names: Peltzer - given-names: Alexander - - family-names: Fillinger - given-names: Sven - - family-names: Patel - given-names: Harshil - - family-names: Alneberg - given-names: Johannes - - family-names: Wilm - given-names: Andreas - - family-names: Garcia - given-names: Maxime Ulysse - - family-names: Di Tommaso - given-names: Paolo - - family-names: Nahnsen - given-names: Sven - doi: 10.1038/s41587-020-0439-x - journal: nature biotechnology - start: 276 - end: 278 - title: "The nf-core framework for community-curated bioinformatics pipelines." - issue: 3 - volume: 38 - year: 2020 - url: https://dx.doi.org/10.1038/s41587-020-0439-x diff --git a/CITATIONS.md b/CITATIONS.md index dfebcc85..19dc7afc 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -10,10 +10,61 @@ ## Pipeline tools -- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) +- [BCFtools](https://doi.org/10.1093/gigascience/giab008) -- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) - > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. + > Danecek, P., Bonfield, J. K., Liddle, J., Marshall, J., Ohan, V., Pollard, M. O., Whitwham, A., Keane, T., McCarthy, S. A., Davies, R. M., & Li, H. (2021). Twelve years of SAMtools and BCFtools. GigaScience, 10(2), giab008. https://doi.org/10.1093/gigascience/giab008 + +- [BWA](https://doi.org/10.1093/bioinformatics/btp324) + + > Li, H., & Durbin, R. (2009). Fast and accurate short read alignment with Burrows–Wheeler transform. Bioinformatics, 25(14), 1754–1760. https://doi.org/10.1093/bioinformatics/btp324 + +- [bwa-mem2](https://doi.org/10.1109/IPDPS.2019.00041) + + > Vasimuddin, Md., Misra, S., Li, H., & Aluru, S. (2019). Efficient Architecture-Aware Acceleration of BWA-MEM for Multicore Systems. 2019 IEEE International Parallel and Distributed Processing Symposium (IPDPS), 314–324. https://doi.org/10.1109/IPDPS.2019.00041 + +- [CHORD](https://doi.org/10.1038/s41467-020-19406-4) + + > Nguyen, L., W. M. Martens, J., Van Hoeck, A., & Cuppen, E. (2020). Pan-cancer landscape of homologous recombination deficiency. Nature Communications, 11(1), 5584. https://doi.org/10.1038/s41467-020-19406-4 + +- [fastp](https://doi.org/10.1093/bioinformatics/bty560) + + > Chen, S., Zhou, Y., Chen, Y., & Gu, J. (2018). fastp: An ultra-fast all-in-one FASTQ preprocessor. Bioinformatics, 34(17), i884–i890. https://doi.org/10.1093/bioinformatics/bty560 + +- [GATK](https://doi.org/10.1093/bioinformatics/btp324) + + > McKenna, A., Hanna, M., Banks, E., Sivachenko, A., Cibulskis, K., Kernytsky, A., Garimella, K., Altshuler, D., Gabriel, S., Daly, M., & DePristo, M. A. (2010). The Genome Analysis Toolkit: A MapReduce framework for analyzing next-generation DNA sequencing data. Genome Research, 20(9), 1297–1303. https://doi.org/10.1101/gr.107524.110 + +- [GRIDSS2](https://doi.org/10.1186/s13059-021-02423-x) + + > Cameron, D. L., Baber, J., Shale, C., Valle-Inclan, J. E., Besselink, N., van Hoeck, A., Janssen, R., Cuppen, E., Priestley, P., & Papenfuss, A. T. (2021). GRIDSS2: Comprehensive characterisation of somatic structural variation using single breakend variants and structural variant phasing. Genome Biology, 22(1), Article 1. https://doi.org/10.1186/s13059-021-02423-x + +- [LILAC](https://doi.org/10.1038/s41588-023-01367-1) + + > Martínez-Jiménez, F., Priestley, P., Shale, C., Baber, J., Rozemuller, E., & Cuppen, E. (2023). Genetic immune escape landscape in primary and metastatic cancer. Nature Genetics, 55(5), 820–831. https://doi.org/10.1038/s41588-023-01367-1 + +- [LINX](https://doi.org/10.1016/j.xgen.2022.100112) + + > Shale, C., Cameron, D. L., Baber, J., Wong, M., Cowley, M. J., Papenfuss, A. T., Cuppen, E., & Priestley, P. (2022). Unscrambling cancer genomes via integrated analysis of structural variation and copy number. Cell Genomics, 2(4). https://doi.org/10.1016/j.xgen.2022.100112 + +- [PURPLE](https://doi.org/10.1038/s41586-019-1689-y) + + > Priestley, P., Baber, J., Lolkema, M. P., Steeghs, N., de Bruijn, E., Shale, C., Duyvesteyn, K., Haidari, S., van Hoeck, A., Onstenk, W., Roepman, P., Voda, M., Bloemendal, H. J., Tjan-Heijnen, V. C. G., van Herpen, C. M. L., Labots, M., Witteveen, P. O., Smit, E. F., Sleijfer, S., … Cuppen, E. (2019). Pan-cancer whole-genome analyses of metastatic solid tumours. Nature, 575(7781), 210–216. https://doi.org/10.1038/s41586-019-1689-y + +- [Sambamba](https://doi.org/10.1093/bioinformatics/btv098) + + > Tarasov, A., Vilella, A. J., Cuppen, E., Nijman, I. J., & Prins, P. (2015). Sambamba: Fast processing of NGS alignment formats. Bioinformatics, 31(12), 2032–2034. https://doi.org/10.1093/bioinformatics/btv098 + +- [SAMtools](https://doi.org/10.1093/gigascience/giab008) + + > Danecek, P., Bonfield, J. K., Liddle, J., Marshall, J., Ohan, V., Pollard, M. O., Whitwham, A., Keane, T., McCarthy, S. A., Davies, R. M., & Li, H. (2021). Twelve years of SAMtools and BCFtools. GigaScience, 10(2), giab008. https://doi.org/10.1093/gigascience/giab008 + +- [STAR](https://doi.org/10.1093/bioinformatics/bts635) + + > Dobin, A., Davis, C. A., Schlesinger, F., Drenkow, J., Zaleski, C., Jha, S., Batut, P., Chaisson, M., & Gingeras, T. R. (2013). STAR: Ultrafast universal RNA-seq aligner. Bioinformatics, 29(1), 15–21. https://doi.org/10.1093/bioinformatics/bts635 + +- [VIRUSBreakend](https://doi.org/10.1093/bioinformatics/btab343) + + > Cameron, D. L., Jacobs, N., Roepman, P., Priestley, P., Cuppen, E., & Papenfuss, A. T. (2021). VIRUSBreakend: Viral Integration Recognition Using Single Breakends. Bioinformatics, 37(19), 3115–3119. https://doi.org/10.1093/bioinformatics/btab343 ## Software packaging/containerisation tools @@ -31,5 +82,8 @@ - [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index f4fd052f..c089ec78 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,18 +1,20 @@ -# Code of Conduct at nf-core (v1.0) +# Code of Conduct at nf-core (v1.4) ## Our Pledge -In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core, pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: - Age +- Ability - Body size +- Caste - Familial status - Gender identity and expression - Geographical location - Level of experience - Nationality and national origins - Native language -- Physical and neurological ability +- Neurodiversity - Race or ethnicity - Religion - Sexual identity and orientation @@ -22,80 +24,133 @@ Please note that the list above is alphabetised and is therefore not ranked in a ## Preamble -> Note: This Code of Conduct (CoC) has been drafted by the nf-core Safety Officer and been edited after input from members of the nf-core team and others. "We", in this document, refers to the Safety Officer and members of the nf-core core team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will amended periodically to keep it up-to-date, and in case of any dispute, the most current version will apply. +:::note +This Code of Conduct (CoC) has been drafted by Renuka Kudva, Cris Tuñí, and Michael Heuer, with input from the nf-core Core Team and Susanna Marquez from the nf-core community. "We", in this document, refers to the Safety Officers and members of the nf-core Core Team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will be amended periodically to keep it up-to-date. In case of any dispute, the most current version will apply. +::: -An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). Our current safety officer is Renuka Kudva. +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). + +Our Safety Officers are Saba Nafees, Cris Tuñí, and Michael Heuer. nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. -We have therefore adopted this code of conduct (CoC), which we require all members of our community and attendees in nf-core events to adhere to in all our workspaces at all times. Workspaces include but are not limited to Slack, meetings on Zoom, Jitsi, YouTube live etc. +We have therefore adopted this CoC, which we require all members of our community and attendees of nf-core events to adhere to in all our workspaces at all times. Workspaces include, but are not limited to, Slack, meetings on Zoom, gather.town, YouTube live etc. -Our CoC will be strictly enforced and the nf-core team reserve the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. +Our CoC will be strictly enforced and the nf-core team reserves the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. -We ask all members of our community to help maintain a supportive and productive workspace and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. +We ask all members of our community to help maintain supportive and productive workspaces and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. -Questions, concerns or ideas on what we can include? Contact safety [at] nf-co [dot] re +Questions, concerns, or ideas on what we can include? Contact members of the Safety Team on Slack or email safety [at] nf-co [dot] re. ## Our Responsibilities -The safety officer is responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. +Members of the Safety Team (the Safety Officers) are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. -The safety officer in consultation with the nf-core core team have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. +The Safety Team, in consultation with the nf-core core team, have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this CoC, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. -Members of the core team or the safety officer who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and be subject to the same actions as others in violation of the CoC. +Members of the core team or the Safety Team who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and will be subject to the same actions as others in violation of the CoC. -## When are where does this Code of Conduct apply? +## When and where does this Code of Conduct apply? -Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events. This includes but is not limited to the following listed alphabetically and therefore in no order of preference: +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference): - Communicating with an official project email address. - Communicating with community members within the nf-core Slack channel. - Participating in hackathons organised by nf-core (both online and in-person events). -- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence. -- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, Jitsi, YouTube live etc. +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. - Representing nf-core on social media. This includes both official and personal accounts. ## nf-core cares 😊 -nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include but are not limited to the following (listed in alphabetical order): +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order): - Ask for consent before sharing another community member’s personal information (including photographs) on social media. - Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. -- Celebrate your accomplishments at events! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) - Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) - Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) - Focus on what is best for the team and the community. (When in doubt, ask) -- Graciously accept constructive criticism, yet be unafraid to question, deliberate, and learn. +- Accept feedback, yet be unafraid to question, deliberate, and learn. - Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) -- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communications to be kind.**) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) - Take breaks when you feel like you need them. -- Using welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack.) +- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) ## nf-core frowns on 😕 -The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this code of conduct. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces. +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces: - Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. - “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. - Spamming or trolling of individuals on social media. -- Use of sexual or discriminatory imagery, comments, or jokes and unwelcome sexual attention. -- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion or work experience. +- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. ### Online Trolling -The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the added issue of online trolling. This is unacceptable, reports of such behaviour will be taken very seriously, and perpetrators will be excluded from activities immediately. +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the risk of online trolling. This is unacceptable — reports of such behaviour will be taken very seriously and perpetrators will be excluded from activities immediately. -All community members are required to ask members of the group they are working within for explicit consent prior to taking screenshots of individuals during video calls. +All community members are **required** to ask members of the group they are working with for explicit consent prior to taking screenshots of individuals during video calls. -## Procedures for Reporting CoC violations +## Procedures for reporting CoC violations If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. -You can reach out to members of the [nf-core core team](https://nf-co.re/about) and they will forward your concerns to the safety officer(s). +You can reach out to members of the Safety Team (Saba Nafees, Cris Tuñí, and Michael Heuer) on Slack. Alternatively, contact a member of the nf-core core team [nf-core core team](https://nf-co.re/about), and they will forward your concerns to the Safety Team. + +Issues directly concerning members of the Core Team or the Safety Team will be dealt with by other members of the core team and the safety manager — possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson and details will be shared in due course. + +All reports will be handled with the utmost discretion and confidentiality. + +You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include: + +- Your contact information. +- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. +- The behaviour that was in violation and the circumstances surrounding the incident. +- The approximate time of the behaviour (if different than the time the report was made). +- Other people involved in the incident, if applicable. +- If you believe the incident is ongoing. +- If there is a publicly available record (e.g. mailing list record, a screenshot). +- Any additional information. + +After you file a report, one or more members of our Safety Team will contact you to follow up on your report. + +## Who will read and handle reports + +All reports will be read and handled by the members of the Safety Team at nf-core. + +If members of the Safety Team are deemed to have a conflict of interest with a report, they will be required to recuse themselves as per our Code of Conduct and will not have access to any follow-ups. + +To keep this first report confidential from any of the Safety Team members, please submit your first report by direct messaging on Slack/direct email to any of the nf-core members you are comfortable disclosing the information to, and be explicit about which member(s) you do not consent to sharing the information with. + +## Reviewing reports + +After receiving the report, members of the Safety Team will review the incident report to determine whether immediate action is required, for example, whether there is immediate threat to participants’ safety. + +The Safety Team, in consultation with members of the nf-core core team, will assess the information to determine whether the report constitutes a Code of Conduct violation, for them to decide on a course of action. + +In the case of insufficient information, one or more members of the Safety Team may contact the reporter, the reportee, or any other attendees to obtain more information. -Issues directly concerning members of the core team will be dealt with by other members of the core team and the safety manager, and possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson, and details will be shared in due course. +Once additional information is gathered, the Safety Team will collectively review and decide on the best course of action to take, if any. The Safety Team reserves the right to not act on a report. -All reports will be handled with utmost discretion and confidentially. +## Confidentiality + +All reports, and any additional information included, are only shared with the team of safety officers (and possibly members of the core team, in case the safety officer is in violation of the CoC). We will respect confidentiality requests for the purpose of protecting victims of abuse. + +We will not name harassment victims, beyond discussions between the safety officer and members of the nf-core team, without the explicit consent of the individuals involved. + +## Enforcement + +Actions taken by the nf-core’s Safety Team may include, but are not limited to: + +- Asking anyone to stop a behaviour. +- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. +- Removing access to the gather.town and Slack, either temporarily or permanently. +- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. +- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident. +- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. +- No action. ## Attribution and Acknowledgements @@ -106,6 +161,22 @@ All reports will be handled with utmost discretion and confidentially. ## Changelog -### v1.0 - March 12th, 2021 +### v1.4 - February 8th, 2022 + +- Included a new member of the Safety Team. Corrected a typographical error in the text. + +### v1.3 - December 10th, 2021 + +- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. + +### v1.2 - November 12th, 2021 + +- Removed information specific to reporting CoC violations at the Hackathon in October 2021. + +### v1.1 - October 14th, 2021 + +- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. + +### v1.0 - March 15th, 2021 - Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/README.md b/README.md index 5275a6a3..89e4062d 100644 --- a/README.md +++ b/README.md @@ -1,92 +1,166 @@ -# ![nf-core/oncoanalyser](docs/images/nf-core-oncoanalyser_logo_light.png#gh-light-mode-only) ![nf-core/oncoanalyser](docs/images/nf-core-oncoanalyser_logo_dark.png#gh-dark-mode-only) - -[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/oncoanalyser/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) - -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg)](https://www.nextflow.io/) +

+ + + nf-core/oncoanalyser + +

+ +[![GitHub Actions CI Status](https://github.com/nf-core/oncoanalyser/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/oncoanalyser/actions/workflows/ci.yml) +[![GitHub Actions Linting Status](https://github.com/nf-core/oncoanalyser/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/oncoanalyser/actions/workflows/linting.yml) +[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/oncoanalyser/results) +[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) + +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.5-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) -[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/oncoanalyser) +[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/oncoanalyser) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23oncoanalyser-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/oncoanalyser)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23oncoanalyser-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/oncoanalyser) +[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core) +[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core) +[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction - +**nf-core/oncoanalyser** is a Nextflow implementation of the comprehensive cancer DNA/RNA analysis and reporting +workflow from the Hartwig Medical Foundation (HMF). The workflow starts from FASTQ or BAM and calls genomic variants, +analyses transcript data, infers important biomarkers and features (e.g. TMB, HRD, mutational signatures, HLA alleles, +oncoviral content, tissue of origin, etc), annotates and interprets results in the clinical context, and more. -**nf-core/oncoanalyser** is a bioinformatics best-practice analysis pipeline for A comprehensive cancer WGS/WTS analysis and reporting pipeline. +Both the HMF WGS/WTS workflow and targeted sequencing workflow are available in oncoanalyser. The targeted sequencing +workflow has built-in support for the TSO500 panel and can also run custom panels with externally-generated +normalisation data. -The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! +The key analysis results for each sample are summarised and presented in an ORANGE report (summary page excerpt shown +below from _[COLO829_wgts.orange_report.pdf](https://pub-29f2e5b2b7384811bdbbcba44f8b5083.r2.dev/oncoanalyser/other/example_report/COLO829_wgts.orange_report.pdf)_): - +

-On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources.The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/oncoanalyser/results). +For detailed information on each component of the HMF workflow, please refer to +[hartwigmedical/hmftools](https://github.com/hartwigmedical/hmftools/). ## Pipeline summary - +The following processes and tools can be run with `oncoanalyser`: + +- Simple DNA/RNA alignment (`bwa-mem2`, `STAR`) +- Post-alignment processing (`MarkDups`, `Picard MarkDuplicates`) +- SNV, MNV, and INDEL calling (`SAGE`, `PAVE`) +- CNV calling (`AMBER`, `COBALT`, `PURPLE`) +- SV calling (`SvPrep`, `GRIDSS`, `GRIPSS`) +- SV event interpretation (`LINX`) +- Transcript analysis (`Isofox`) +- Oncoviral detection (`VIRUSBreakend`, `Virus Interpreter`) +- HLA calling (`LILAC`) +- HRD status prediction (`CHORD`) +- Mutational signature fitting (`Sigs`) +- Tissue of origin prediction (`CUPPA`) +- Report generation (`ORANGE`, `linxreport`) + +## Usage + +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. + +Create a samplesheet with your inputs (WGS/WTS FASTQs in this example): -1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) -2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) +```csv +group_id,subject_id,sample_id,sample_type,sequence_type,filetype,info,filepath +P1__wgts,P1,SA,normal,dna,fastq,library_id:SA_library;lane:001,/path/to/SA.normal.dna.wgs.001.R1.fastq.gz;/path/to/SA.normal.dna.wgs.001.R2.fastq.gz +P1__wgts,P1,SB,tumor,dna,fastq,library_id:SB_library;lane:001,/path/to/SB.tumor.dna.wgs.001.R1.fastq.gz;/path/to/SB.tumor.dna.wgs.001.R2.fastq.gz +P1__wgts,P1,SC,tumor,rna,fastq,library_id:SC_library;lane:001,/path/to/SC.tumor.rna.wts.001.R1.fastq.gz;/path/to/SC.tumor.rna.wts.001.R2.fastq.gz +``` -## Quick Start +Launch `oncoanalyser`: -1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=21.10.3`) +```bash +nextflow run nf-core/oncoanalyser \ + -profile docker \ + -revision 1.0.0 \ + --mode wgts \ + --genome GRCh38_hmf \ + --input samplesheet.csv \ + --outdir output/ +``` -2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_. +> [!WARNING] +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). -3. Download the pipeline and test it on a minimal dataset with a single command: +For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/oncoanalyser/usage) and the [parameter documentation](https://nf-co.re/oncoanalyser/parameters). - ```bash - nextflow run nf-core/oncoanalyser -profile test,YOURPROFILE --outdir - ``` +## Pipeline output - Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string. +To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/oncoanalyser/results) tab on the nf-core website pipeline page. +For more details about the output files and reports, please refer to the +[output documentation](https://nf-co.re/oncoanalyser/output). - > - The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`. - > - Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. - > - If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs. - > - If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs. +## Version information -4. Start running your own analysis! +### Extended support - +As `oncoanalyser` is used in clinical settings and subject to accreditation standards in some instances, there is a need +for long-term stability and reliability for feature releases in order to meet operational requirements. This is +accomplished through long-term support of several nominated feature releases, which all receive bug fixes and security +fixes during the period of extended support. - ```bash - nextflow run nf-core/oncoanalyser --input samplesheet.csv --outdir --genome GRCh37 -profile - ``` +Each release that is given extended support is allocated a separate long-lived git branch with the 'stable' prefix, e.g. +`stable/1.2.x`, `stable/1.5.x`. Feature development otherwise occurs on the `dev` branch with stable releases pushed to +`master`. -## Documentation +Versions nominated to have current long-term support: -The nf-core/oncoanalyser pipeline comes with documentation about the pipeline [usage](https://nf-co.re/oncoanalyser/usage), [parameters](https://nf-co.re/oncoanalyser/parameters) and [output](https://nf-co.re/oncoanalyser/output). +- TBD + +### Release parity + +Versioning between `oncoanalyser` and hmftools naturally differ, however it is often necessary to relate the functional +equivalence of these two pieces of software. The functional/feature parity with regards to version releases are detailed +in the below table. + +| oncoanalyser | hmftools | +| ------------------- | -------- | +| 0.1.0 through 0.2.7 | 5.33 | +| 0.3.0 through 1.0.0 | 5.34 | + +## Known issues + +There are currently no known issues. ## Credits -nf-core/oncoanalyser was originally written by Stephen Watts. +The `oncoanalyser` pipeline was written by Stephen Watts while in the [Genomics Platform +Group](https://mdhs.unimelb.edu.au/centre-for-cancer-research/our-research/genomics-platform-group) at the [University +of Melbourne Centre for Cancer Research](https://mdhs.unimelb.edu.au/centre-for-cancer-research). -We thank the following people for their extensive assistance in the development of this pipeline: +We thank the following organisations and people for their extensive assistance in the development of this pipeline, +listed in alphabetical order: - +- [Hartwig Medical Foundation + Australia](https://www.hartwigmedicalfoundation.nl/en/partnerships/hartwig-medical-foundation-australia/) +- Oliver Hofmann ## Contributions and Support If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). -For further information or help, don't hesitate to get in touch on the [Slack `#oncoanalyser` channel](https://nfcore.slack.com/channels/oncoanalyser) (you can join with [this invite](https://nf-co.re/join/slack)). +For further information or help, don't hesitate to get in touch on the [Slack `#oncoanalyser` +channel](https://nfcore.slack.com/channels/oncoanalyser) (you can join with [this invite](https://nf-co.re/join/slack)). ## Citations - - - - +You can cite the `oncoanalyser` zenodo record for a specific version using the following doi: +[10.5281/zenodo.XXXXXXX](https://doi.org/10.5281/zenodo.XXXXXXX) -An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. +An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) +file. You can cite the `nf-core` publication as follows: > **The nf-core framework for community-curated bioinformatics pipelines.** > -> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen. +> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, +> Paolo Di Tommaso & Sven Nahnsen. > > _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x). diff --git a/assets/email_template.html b/assets/email_template.html index 098117ff..b906e78c 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -4,7 +4,7 @@ - + nf-core/oncoanalyser Pipeline Report @@ -12,7 +12,7 @@ -

nf-core/oncoanalyser v${version}

+

nf-core/oncoanalyser ${version}

Run Name: $runName

<% if (!success){ diff --git a/assets/email_template.txt b/assets/email_template.txt index 9058d515..30511616 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -4,7 +4,7 @@ |\\ | |__ __ / ` / \\ |__) |__ } { | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, `._,._,' - nf-core/oncoanalyser v${version} + nf-core/oncoanalyser ${version} ---------------------------------------------------- Run Name: $runName diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index 5e1aba5b..822724fb 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -3,17 +3,19 @@ description: "Suggested text and references to use when describing pipeline usag section_name: "nf-core/oncoanalyser Methods Description" section_href: "https://github.com/nf-core/oncoanalyser" plot_type: "html" -## TODO nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline -## You inject any metadata in the Nextflow '${workflow}' object data: |

Methods

-

Data was processed using nf-core/oncoanalyser v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).

+

Data was processed using nf-core/oncoanalyser v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

${workflow.commandLine}
+

${tool_citations}

References

    -
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. https://doi.org/10.1038/nbt.3820
  • -
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. https://doi.org/10.1038/s41587-020-0439-x
  • +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: 10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: 10.1038/s41587-020-0439-x
  • +
  • Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475–476. doi: 10.1038/s41592-018-0046-7
  • +
  • da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. doi: 10.1093/bioinformatics/btx192
  • + ${tool_bibliography}
Notes:
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml deleted file mode 100644 index dfb73cca..00000000 --- a/assets/multiqc_config.yml +++ /dev/null @@ -1,13 +0,0 @@ -report_comment: > - This report has been generated by the nf-core/oncoanalyser - analysis pipeline. For information about how to interpret these results, please see the - documentation. -report_section_order: - "nf-core-oncoanalyser-methods-description": - order: -1000 - software_versions: - order: -1001 - "nf-core-oncoanalyser-summary": - order: -1002 - -export_plots: true diff --git a/assets/nf-core-oncoanalyser_logo_light.png b/assets/nf-core-oncoanalyser_logo_light.png index 72327aa5..f529ea38 100644 Binary files a/assets/nf-core-oncoanalyser_logo_light.png and b/assets/nf-core-oncoanalyser_logo_light.png differ diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 5f653ab7..1a8d4b39 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,12 @@ -sample,fastq_1,fastq_2 -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, +group_id,subject_id,sample_id,sample_type,sequence_type,filetype,filepath +subject_one__to__dna,subject_one,sample_a,tumor,dna,bam,/path/to/subject_one/sample_a.tumor.bam + +subject_one__tn__dna,subject_one,sample_a,tumor,dna,bam,/path/to/subject_one/sample_a.tumor.bam +subject_one__tn__dna,subject_one,sample_b,normal,dna,bam,/path/to/subject_one/sample_b.normal.bam + +subject_one__tn__dna_rna,subject_one,sample_a,tumor,dna,bam,/path/to/subject_one/sample_a.tumor.bam +subject_one__tn__dna_rna,subject_one,sample_b,normal,dna,bam,/path/to/subject_one/sample_b.normal.bam +subject_one__tn__dna_rna,subject_one,sample_c,tumor,rna,bam,/path/to/subject_one/sample_c.tumor_rna.bam + +subject_one__to__dna_rna,subject_one,sample_a,tumor,dna,bam,/path/to/subject_one/sample_a.tumor.bam +subject_one__to__dna_rna,subject_one,sample_c,tumor,rna,bam,/path/to/subject_one/sample_c.tumor_rna.bam diff --git a/assets/schema_input.json b/assets/schema_input.json index 20402902..36182151 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -7,30 +7,35 @@ "items": { "type": "object", "properties": { - "sample": { + "group_id": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces" + "errorMessage": "group_id must be provided and cannot contain spaces" }, - "fastq_1": { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "subject_id": { + "pattern": "^\\S+$", + "errorMessage": "subject_id must be provided and cannot contain spaces" + }, + "sample_id": { + "pattern": "^\\S+$", + "errorMessage": "subject_id must be provided and cannot contain spaces" + }, + "sample_type": { + "pattern": "^\\S+$", + "errorMessage": "sample_type must be provided and cannot contain spaces" }, - "fastq_2": { - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "sequence_type": { + "pattern": "^\\S+$", + "errorMessage": "sequence_type must be provided and cannot contain spaces" + }, + "filetype": { + "pattern": "^\\S+$", + "errorMessage": "filetype must be provided and cannot contain spaces" + }, + "filepath": { + "pattern": "^\\S+$", + "errorMessage": "filepath must be provided and cannot contain spaces" } - }, - "required": ["sample", "fastq_1"] + } } } diff --git a/assets/slackreport.json b/assets/slackreport.json new file mode 100644 index 00000000..c1b840e2 --- /dev/null +++ b/assets/slackreport.json @@ -0,0 +1,34 @@ +{ + "attachments": [ + { + "fallback": "Plain-text summary of the attachment.", + "color": "<% if (success) { %>good<% } else { %>danger<%} %>", + "author_name": "nf-core/oncoanalyser ${version} - ${runName}", + "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", + "fields": [ + { + "title": "Command used to launch the workflow", + "value": "```${commandLine}```", + "short": false + } + <% + if (!success) { %> + , + { + "title": "Full error message", + "value": "```${errorReport}```", + "short": false + }, + { + "title": "Pipeline configuration", + "value": "<% out << summary.collect{ k,v -> k == "hook_url" ? "_${k}_: (_hidden_)" : ( ( v.class.toString().contains('Path') || ( v.class.toString().contains('String') && v.contains('/') ) ) ? "_${k}_: `${v}`" : (v.class.toString().contains('DateTime') ? ("_${k}_: " + v.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM))) : "_${k}_: ${v}") ) }.join(",\n") %>", + "short": false + } + <% } + %> + ], + "footer": "Completed at <% out << dateComplete.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM)) %> (duration: ${duration})" + } + ] +} diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py deleted file mode 100755 index 11b15572..00000000 --- a/bin/check_samplesheet.py +++ /dev/null @@ -1,262 +0,0 @@ -#!/usr/bin/env python - - -"""Provide a command line tool to validate and transform tabular samplesheets.""" - - -import argparse -import csv -import logging -import sys -from collections import Counter -from pathlib import Path - -logger = logging.getLogger() - - -class RowChecker: - """ - Define a service that can validate and transform each given row. - - Attributes: - modified (list): A list of dicts, where each dict corresponds to a previously - validated and transformed row. The order of rows is maintained. - - """ - - VALID_FORMATS = ( - ".fq.gz", - ".fastq.gz", - ) - - def __init__( - self, - sample_col="sample", - first_col="fastq_1", - second_col="fastq_2", - single_col="single_end", - **kwargs, - ): - """ - Initialize the row checker with the expected column names. - - Args: - sample_col (str): The name of the column that contains the sample name - (default "sample"). - first_col (str): The name of the column that contains the first (or only) - FASTQ file path (default "fastq_1"). - second_col (str): The name of the column that contains the second (if any) - FASTQ file path (default "fastq_2"). - single_col (str): The name of the new column that will be inserted and - records whether the sample contains single- or paired-end sequencing - reads (default "single_end"). - - """ - super().__init__(**kwargs) - self._sample_col = sample_col - self._first_col = first_col - self._second_col = second_col - self._single_col = single_col - self._seen = set() - self.modified = [] - - def validate_and_transform(self, row): - """ - Perform all validations on the given row and insert the read pairing status. - - Args: - row (dict): A mapping from column headers (keys) to elements of that row - (values). - - """ - self._validate_sample(row) - self._validate_first(row) - self._validate_second(row) - self._validate_pair(row) - self._seen.add((row[self._sample_col], row[self._first_col])) - self.modified.append(row) - - def _validate_sample(self, row): - """Assert that the sample name exists and convert spaces to underscores.""" - if len(row[self._sample_col]) <= 0: - raise AssertionError("Sample input is required.") - # Sanitize samples slightly. - row[self._sample_col] = row[self._sample_col].replace(" ", "_") - - def _validate_first(self, row): - """Assert that the first FASTQ entry is non-empty and has the right format.""" - if len(row[self._first_col]) <= 0: - raise AssertionError("At least the first FASTQ file is required.") - self._validate_fastq_format(row[self._first_col]) - - def _validate_second(self, row): - """Assert that the second FASTQ entry has the right format if it exists.""" - if len(row[self._second_col]) > 0: - self._validate_fastq_format(row[self._second_col]) - - def _validate_pair(self, row): - """Assert that read pairs have the same file extension. Report pair status.""" - if row[self._first_col] and row[self._second_col]: - row[self._single_col] = False - first_col_suffix = Path(row[self._first_col]).suffixes[-2:] - second_col_suffix = Path(row[self._second_col]).suffixes[-2:] - if first_col_suffix != second_col_suffix: - raise AssertionError("FASTQ pairs must have the same file extensions.") - else: - row[self._single_col] = True - - def _validate_fastq_format(self, filename): - """Assert that a given filename has one of the expected FASTQ extensions.""" - if not any(filename.endswith(extension) for extension in self.VALID_FORMATS): - raise AssertionError( - f"The FASTQ file has an unrecognized extension: {filename}\n" - f"It should be one of: {', '.join(self.VALID_FORMATS)}" - ) - - def validate_unique_samples(self): - """ - Assert that the combination of sample name and FASTQ filename is unique. - - In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the - number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment. - - """ - if len(self._seen) != len(self.modified): - raise AssertionError("The pair of sample name and FASTQ must be unique.") - seen = Counter() - for row in self.modified: - sample = row[self._sample_col] - seen[sample] += 1 - row[self._sample_col] = f"{sample}_T{seen[sample]}" - - -def read_head(handle, num_lines=10): - """Read the specified number of lines from the current position in the file.""" - lines = [] - for idx, line in enumerate(handle): - if idx == num_lines: - break - lines.append(line) - return "".join(lines) - - -def sniff_format(handle): - """ - Detect the tabular format. - - Args: - handle (text file): A handle to a `text file`_ object. The read position is - expected to be at the beginning (index 0). - - Returns: - csv.Dialect: The detected tabular format. - - .. _text file: - https://docs.python.org/3/glossary.html#term-text-file - - """ - peek = read_head(handle) - handle.seek(0) - sniffer = csv.Sniffer() - if not sniffer.has_header(peek): - logger.critical("The given sample sheet does not appear to contain a header.") - sys.exit(1) - dialect = sniffer.sniff(peek) - return dialect - - -def check_samplesheet(file_in, file_out): - """ - Check that the tabular samplesheet has the structure expected by nf-core pipelines. - - Validate the general shape of the table, expected columns, and each row. Also add - an additional column which records whether one or two FASTQ reads were found. - - Args: - file_in (pathlib.Path): The given tabular samplesheet. The format can be either - CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``. - file_out (pathlib.Path): Where the validated and transformed samplesheet should - be created; always in CSV format. - - Example: - This function checks that the samplesheet follows the following structure, - see also the `viral recon samplesheet`_:: - - sample,fastq_1,fastq_2 - SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz - SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz - SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz, - - .. _viral recon samplesheet: - https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv - - """ - required_columns = {"sample", "fastq_1", "fastq_2"} - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_in.open(newline="") as in_handle: - reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) - # Validate the existence of the expected header columns. - if not required_columns.issubset(reader.fieldnames): - req_cols = ", ".join(required_columns) - logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.") - sys.exit(1) - # Validate each row. - checker = RowChecker() - for i, row in enumerate(reader): - try: - checker.validate_and_transform(row) - except AssertionError as error: - logger.critical(f"{str(error)} On line {i + 2}.") - sys.exit(1) - checker.validate_unique_samples() - header = list(reader.fieldnames) - header.insert(1, "single_end") - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_out.open(mode="w", newline="") as out_handle: - writer = csv.DictWriter(out_handle, header, delimiter=",") - writer.writeheader() - for row in checker.modified: - writer.writerow(row) - - -def parse_args(argv=None): - """Define and immediately parse command line arguments.""" - parser = argparse.ArgumentParser( - description="Validate and transform a tabular samplesheet.", - epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", - ) - parser.add_argument( - "file_in", - metavar="FILE_IN", - type=Path, - help="Tabular input samplesheet in CSV or TSV format.", - ) - parser.add_argument( - "file_out", - metavar="FILE_OUT", - type=Path, - help="Transformed output samplesheet in CSV format.", - ) - parser.add_argument( - "-l", - "--log-level", - help="The desired log level (default WARNING).", - choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"), - default="WARNING", - ) - return parser.parse_args(argv) - - -def main(argv=None): - """Coordinate argument parsing and program execution.""" - args = parse_args(argv) - logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") - if not args.file_in.is_file(): - logger.error(f"The given input file {args.file_in} was not found!") - sys.exit(2) - args.file_out.parent.mkdir(parents=True, exist_ok=True) - check_samplesheet(args.file_in, args.file_out) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/conf/base.config b/conf/base.config index 05eac721..e8443c5e 100644 --- a/conf/base.config +++ b/conf/base.config @@ -9,13 +9,12 @@ */ process { - - // TODO nf-core: Check the defaults for all processes + // Defaults for all processes cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' @@ -24,7 +23,6 @@ process { // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. // If possible, it would be nice to keep the same label naming convention when // adding in your local modules too. - // TODO nf-core: Customise requirements for specific processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { cpus = { check_max( 1 , 'cpus' ) } @@ -49,6 +47,9 @@ process { withLabel:process_long { time = { check_max( 20.h * task.attempt, 'time' ) } } + withLabel:process_medium_memory { + memory = { check_max( 50.GB * task.attempt, 'memory' ) } + } withLabel:process_high_memory { memory = { check_max( 200.GB * task.attempt, 'memory' ) } } @@ -59,7 +60,4 @@ process { errorStrategy = 'retry' maxRetries = 2 } - withName:CUSTOM_DUMPSOFTWAREVERSIONS { - cache = false - } } diff --git a/conf/hmf_data.config b/conf/hmf_data.config new file mode 100644 index 00000000..6fdcba0b --- /dev/null +++ b/conf/hmf_data.config @@ -0,0 +1,108 @@ +params { + hmf_data_paths { + '37' { + // AMBER + heterozygous_sites = 'dna_pipeline/copy_number/AmberGermlineSites.37.tsv.gz' + // COBALT + gc_profile = 'dna_pipeline/copy_number/GC_profile.1000bp.37.cnp' + diploid_bed = 'dna_pipeline/copy_number/DiploidRegions.37.bed.gz' + // CUPPA + cuppa_resources = 'cuppa/' + // SV Prep + sv_prep_blocklist = 'dna_pipeline/sv/sv_prep_blacklist.37.bed' + // GRIDSS, GRIPSS + gridss_pon_breakends = 'dna_pipeline/sv/sgl_pon.37.bed.gz' + gridss_pon_breakpoints = 'dna_pipeline/sv/sv_pon.37.bedpe.gz' + gridss_region_blocklist = 'dna_pipeline/sv/gridss_blacklist.37.bed.gz' + repeatmasker_annotations = 'dna_pipeline/sv/repeat_mask_data.37.fa.gz' + // Isofox + isofox_counts = 'rna_pipeline/read_151_exp_counts.csv' + isofox_gc_ratios = 'rna_pipeline/read_100_exp_gc_ratios.csv' + // LILAC + lilac_resources = 'dna_pipeline/immune/' + // ORANGE + cohort_mapping = 'orange/cohort_mapping.tsv' + cohort_percentiles = 'orange/cohort_percentiles.tsv' + alt_sj_distribution = 'rna_pipeline/isofox.hmf_3444.alt_sj_cohort.37.csv' + gene_exp_distribution = 'rna_pipeline/isofox.hmf_3444.gene_distribution.37.csv' + // SAGE + clinvar_annotations = 'dna_pipeline/variants/clinvar.37.vcf.gz' + sage_blocklist_regions = 'dna_pipeline/variants/KnownBlacklist.germline.37.bed' + sage_blocklist_sites = 'dna_pipeline/variants/KnownBlacklist.germline.37.vcf.gz' + sage_actionable_panel = 'dna_pipeline/variants/ActionableCodingPanel.37.bed.gz' + sage_coverage_panel = 'dna_pipeline/variants/CoverageCodingPanel.37.bed.gz' + sage_highconf_regions = 'dna_pipeline/variants/NA12878_GIAB_highconf_IllFB-IllGATKHC-CG-Ion-Solid_ALLCHROM_v3.2.2_highconf.bed.gz' + sage_known_hotspots_germline = 'dna_pipeline/variants/KnownHotspots.germline.37.vcf.gz' + sage_known_hotspots_somatic = 'dna_pipeline/variants/KnownHotspots.somatic.37.vcf.gz' + sage_pon = 'dna_pipeline/variants/SageGermlinePon.1000x.37.tsv.gz' + // Sigs + sigs_signatures = 'sigs/snv_cosmic_signatures.csv' + // Virus Interpreter + virus_reporting_db = 'virusinterpreter/virus_reporting_db.tsv' + virus_taxonomy_db = 'virusinterpreter/taxonomy_db.tsv' + // Misc + disease_ontology = 'disease_ontology/doid.json' + driver_gene_panel = 'dna_pipeline/common/DriverGenePanel.37.tsv' + ensembl_data_resources = 'dna_pipeline/common/ensembl_data/' + gnomad_resource = 'dna_pipeline/variants/gnomad_variants_v37.csv.gz' + gridss_config = 'dna_pipeline/sv/gridss.properties' + known_fusion_data = 'dna_pipeline/sv/known_fusion_data.37.csv' + known_fusions = 'dna_pipeline/sv/known_fusions.37.bedpe' + purple_germline_del = 'dna_pipeline/copy_number/cohort_germline_del_freq.37.csv' + segment_mappability = 'dna_pipeline/variants/mappability_150.37.bed.gz' + unmap_regions = 'dna_pipeline/common/unmap_regions.37.tsv' + } + '38' { + // AMBER + heterozygous_sites = 'dna_pipeline/copy_number/AmberGermlineSites.38.tsv.gz' + // COBALT + gc_profile = 'dna_pipeline/copy_number/GC_profile.1000bp.38.cnp' + diploid_bed = 'dna_pipeline/copy_number/DiploidRegions.38.bed.gz' + // CUPPA + cuppa_resources = 'cuppa/' + // SV Prep + sv_prep_blocklist = 'dna_pipeline/sv/sv_prep_blacklist.38.bed' + // GRIDSS, GRIPSS + gridss_pon_breakends = 'dna_pipeline/sv/sgl_pon.38.bed.gz' + gridss_pon_breakpoints = 'dna_pipeline/sv/sv_pon.38.bedpe.gz' + gridss_region_blocklist = 'dna_pipeline/sv/gridss_blacklist.38.bed.gz' + repeatmasker_annotations = 'dna_pipeline/sv/repeat_mask_data.38.fa.gz' + // Isofox + isofox_counts = 'rna_pipeline/read_151_exp_counts.csv' + isofox_gc_ratios = 'rna_pipeline/read_100_exp_gc_ratios.csv' + // LILAC + lilac_resources = 'dna_pipeline/immune/' + // ORANGE + cohort_mapping = 'orange/cohort_mapping.tsv' + cohort_percentiles = 'orange/cohort_percentiles.tsv' + alt_sj_distribution = 'rna_pipeline/isofox.hmf_3444.alt_sj_cohort.38.csv' + gene_exp_distribution = 'rna_pipeline/isofox.hmf_3444.gene_distribution.38.csv' + // SAGE + clinvar_annotations = 'dna_pipeline/variants/clinvar.38.vcf.gz' + sage_blocklist_regions = 'dna_pipeline/variants/KnownBlacklist.germline.38.bed' + sage_blocklist_sites = 'dna_pipeline/variants/KnownBlacklist.germline.38.vcf.gz' + sage_actionable_panel = 'dna_pipeline/variants/ActionableCodingPanel.38.bed.gz' + sage_coverage_panel = 'dna_pipeline/variants/CoverageCodingPanel.38.bed.gz' + sage_highconf_regions = 'dna_pipeline/variants/HG001_GRCh38_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-10X-SOLID_CHROM1-X_v.3.3.2_highconf_nosomaticdel_noCENorHET7.bed.gz' + sage_known_hotspots_germline = 'dna_pipeline/variants/KnownHotspots.germline.38.vcf.gz' + sage_known_hotspots_somatic = 'dna_pipeline/variants/KnownHotspots.somatic.38.vcf.gz' + sage_pon = 'dna_pipeline/variants/SageGermlinePon.98x.38.tsv.gz' + // Sigs + sigs_signatures = 'sigs/snv_cosmic_signatures.csv' + // Virus Interpreter + virus_reporting_db = 'virusinterpreter/virus_reporting_db.tsv' + virus_taxonomy_db = 'virusinterpreter/taxonomy_db.tsv' + // Misc + disease_ontology = 'disease_ontology/doid.json' + driver_gene_panel = 'dna_pipeline/common/DriverGenePanel.38.tsv' + ensembl_data_resources = 'dna_pipeline/common/ensembl_data/' + gnomad_resource = 'dna_pipeline/variants/gnomad/' + gridss_config = 'dna_pipeline/sv/gridss.properties' + known_fusion_data = 'dna_pipeline/sv/known_fusion_data.38.csv' + known_fusions = 'dna_pipeline/sv/known_fusions.38.bedpe' + purple_germline_del = 'dna_pipeline/copy_number/cohort_germline_del_freq.38.csv' + segment_mappability = 'dna_pipeline/variants/mappability_150.38.bed.gz' + unmap_regions = 'dna_pipeline/common/unmap_regions.38.tsv' + } + } +} diff --git a/conf/hmf_genomes.config b/conf/hmf_genomes.config new file mode 100644 index 00000000..0e4a6aa5 --- /dev/null +++ b/conf/hmf_genomes.config @@ -0,0 +1,29 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for HMF genome paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines reference genomes using HMF genome paths. +---------------------------------------------------------------------------------------- +*/ + + +params { + genomes { + 'GRCh37_hmf' { + fasta = "${params.hmf_genome_base}/GRCh37_hmf/24.0/Homo_sapiens.GRCh37.GATK.illumina.fasta" + fai = "${params.hmf_genome_base}/GRCh37_hmf/24.0/samtools_index/1.16/Homo_sapiens.GRCh37.GATK.illumina.fasta.fai" + dict = "${params.hmf_genome_base}/GRCh37_hmf/24.0/samtools_index/1.16/Homo_sapiens.GRCh37.GATK.illumina.fasta.dict" + bwamem2_index = "${params.hmf_genome_base}/GRCh37_hmf/24.1/bwa-mem2_index/2.2.1.tar.gz" + gridss_index = "${params.hmf_genome_base}/GRCh37_hmf/24.1/gridss_index/2.13.2.tar.gz" + star_index = "${params.hmf_genome_base}/GRCh37_hmf/24.0/star_index/gencode_19/2.7.3a.tar.gz" + } + 'GRCh38_hmf' { + fasta = "${params.hmf_genome_base}/GRCh38_hmf/24.0/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna" + fai = "${params.hmf_genome_base}/GRCh38_hmf/24.0/samtools_index/1.16/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.fai" + dict = "${params.hmf_genome_base}/GRCh38_hmf/24.0/samtools_index/1.16/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.dict" + bwamem2_index = "${params.hmf_genome_base}/GRCh38_hmf/24.1/bwa-mem2_index/2.2.1.tar.gz" + gridss_index = "${params.hmf_genome_base}/GRCh38_hmf/24.1/gridss_index/2.13.2.tar.gz" + star_index = "${params.hmf_genome_base}/GRCh38_hmf/24.0/star_index/gencode_38/2.7.3a.tar.gz" + } + } +} diff --git a/conf/igenomes.config b/conf/igenomes.config index 7a1b3ac6..3a19bc37 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -10,19 +10,13 @@ params { // illumina iGenomes reference file paths + // NOTE(SW): the HMF reference data files are incompatible with hg19 due to different contig naming genomes { 'GRCh37' { - fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/GRCh37-blacklist.bed" + fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" + fai = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa.fai" + dict = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.dict" + bwa_index = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" } 'GRCh38' { fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" @@ -36,6 +30,14 @@ params { macs_gsize = "2.7e9" blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" } + 'CHM13' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" + bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" + gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz" + mito_name = "chrM" + } 'GRCm38' { fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" @@ -272,161 +274,10 @@ params { mito_name = "Mt" } 'hg38' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" - } - 'hg19' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg19-blacklist.bed" - } - 'mm10' { - fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "1.87e9" - blacklist = "${projectDir}/assets/blacklists/mm10-blacklist.bed" - } - 'bosTau8' { - fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" - mito_name = "chrM" - } - 'ce10' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "9e7" - } - 'canFam3' { - fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" - mito_name = "chrM" - } - 'danRer10' { - fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "1.37e9" - } - 'dm6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "1.2e8" - } - 'equCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" - mito_name = "chrM" - } - 'galGal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" - mito_name = "chrM" - } - 'panTro4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" - mito_name = "chrM" - } - 'rn6' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" - mito_name = "chrM" - } - 'sacCer3' { - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" - readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "1.2e7" - } - 'susScr3' { - fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" - mito_name = "chrM" + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" + fai = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa.fai" + dict = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.dict" + bwa_image = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" } } } diff --git a/conf/modules.config b/conf/modules.config index da58a5d8..cc09c657 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -12,29 +12,241 @@ process { - publishDir = [ - path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + withName: 'WRITE_REFERENCE_DATA' { + def date = new java.util.Date().format('yyyyMMdd_HHmmss'); + publishDir = [ + path: { "${params.outdir}/reference_data/${workflow_version}/${date}" }, + mode: params.publish_dir_mode, + ] + } + + withName: 'STAR_GENOMEGENERATE' { + ext.args = '--genomeSAindexNbases 14 --sjdbOverhang 200 --genomeChrBinNbits 15' + } + + withName: 'GATK4_MARKDUPLICATES' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/alignments/rna/${filename}" }, + ] + } + + withName: 'MARKDUPS' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/alignments/dna/${filename}" }, + ] + } + + withName: 'AMBER' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" }, + ] + } + + withName: 'COBALT' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" }, + ] + } + + withName: '.*:GRIDSS_SVPREP_CALLING:(?:PREPROCESS|ASSEMBLE|CALL)' { + ext.otherJvmHeap = 4.GB + } + + withName: '.*:GRIDSS_SVPREP_CALLING:DEPTH_ANNOTATOR' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/gridss/${filename}" } + ] + } + + withName: '.*:GRIPSS_FILTERING:GERMLINE' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/gripss/germline/${filename}" }, + ] + } + + withName: '.*:GRIPSS_FILTERING:SOMATIC' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/gripss/somatic/${filename}" }, + ] + } + + withName: '.*:SAGE_CALLING:GERMLINE' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/sage/${filename}" }, + ] + } + + withName: '.*:SAGE_CALLING:SOMATIC' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/sage/${filename}" }, + ] + } + + withName: '.*:SAGE_APPEND:(?:GERMLINE|SOMATIC)' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/sage/append/${filename}" }, + ] + } + + withName: '.*:PAVE_ANNOTATION:(?:GERMLINE|SOMATIC)' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/pave/${filename}" }, + ] + } + + withName: 'PURPLE' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" }, + ] + } + + withName: '.*:LINX_ANNOTATION:GERMLINE' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/linx/germline_annotations/" }, + ] + } + + withName: '.*:LINX_ANNOTATION:SOMATIC' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/linx/somatic_annotations/" }, + ] + } + + withName: '.*:LINX_PLOTTING:VISUALISER' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/linx/somatic_plots/" }, + ] + } + + withName: '.*:LINX_PLOTTING:REPORT' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/linx/${filename}" }, + ] + } + + withName: 'BAMTOOLS' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/bamtools/${filename}" }, + ] + } + + withName: 'CHORD' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" }, + ] + } + + withName: 'EXTRACTCONTIG' { + // Run is an boolean input value passed to the process that is conditioned on whether there are runnable + // inputs for LILAC + ext.when = { run } + } + + withName: 'LILAC' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" } + ] + } + + withName: 'SIGS' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" }, + ] + } + + withName: 'VIRUSBREAKEND' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/virusbreakend/${filename}" }, + ] + } + + withName: 'VIRUSINTERPRETER' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" }, + ] + } - withName: SAMPLESHEET_CHECK { + withName: 'ISOFOX' { publishDir = [ - path: { "${params.outdir}/pipeline_info" }, + path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" }, ] } - withName: FASTQC { - ext.args = '--quiet' + withName: 'CUPPA' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" }, + ] + } + + withName: 'SAMTOOLS_FLAGSTAT' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/flagstats/${filename}" }, + ] } - withName: CUSTOM_DUMPSOFTWAREVERSIONS { + withName: 'ORANGE' { publishDir = [ - path: { "${params.outdir}/pipeline_info" }, + path: { "${params.outdir}" }, mode: params.publish_dir_mode, - pattern: '*_versions.yml' + // NOTE(SW): java.io.File and Nextflow's file do not work here, resorting to string splitting + saveAs: { filename -> + if (filename.equals('versions.yml')) { + return null + } else { + def tokens = filename.split('[/]') + return "${meta.key}/orange/${tokens[-1]}" + } + } ] } diff --git a/conf/panel_data.config b/conf/panel_data.config new file mode 100644 index 00000000..dbb979ce --- /dev/null +++ b/conf/panel_data.config @@ -0,0 +1,39 @@ +params { + + panel_data_paths { + + tso500 { + + '37' { + driver_gene_panel = 'common/DriverGenePanel.tso500.37.tsv' + sage_actionable_panel = 'variants/ActionableCodingPanel.tso500.37.bed.gz' + sage_coverage_panel = 'variants/CoverageCodingPanel.tso500.37.bed.gz' + pon_artefacts = 'variants/pon_artefacts.tso500.37.tsv.gz' + target_region_bed = 'copy_number/target_regions_definition.tso500.37.bed.gz' + target_region_normalisation = 'copy_number/cobalt_normalisation.tso500.37.tsv' + target_region_ratios = 'copy_number/target_regions_ratios.tso500.37.tsv' + target_region_msi_indels = 'copy_number/target_regions_msi_indels.tso500.37.tsv' + isofox_tpm_norm = 'rna_resources/isofox.tso500_gene_normalisation.csv' + isofox_gene_ids = 'rna_resources/tso500_rna_gene_ids.csv' + isofox_counts = 'rna_resources/read_93_exp_counts.37.csv' + isofox_gc_ratios = 'rna_resources/read_93_exp_gc_ratios.37.csv' + } + + '38' { + driver_gene_panel = 'common/DriverGenePanel.tso500.38.tsv' + sage_actionable_panel = 'variants/ActionableCodingPanel.tso500.38.bed.gz' + sage_coverage_panel = 'variants/CoverageCodingPanel.tso500.38.bed.gz' + pon_artefacts = 'variants/pon_artefacts.tso500.38.tsv.gz' + target_region_bed = 'copy_number/target_regions_definition.tso500.38.bed.gz' + target_region_normalisation = 'copy_number/cobalt_normalisation.tso500.38.tsv' + target_region_ratios = 'copy_number/target_regions_ratios.tso500.38.tsv' + target_region_msi_indels = 'copy_number/target_regions_msi_indels.tso500.38.tsv' + isofox_tpm_norm = 'rna_resources/isofox.tso500_gene_normalisation.csv' + isofox_gene_ids = 'rna_resources/tso500_rna_gene_ids.csv' + isofox_counts = 'rna_resources/read_93_exp_counts.38.csv' + isofox_gc_ratios = 'rna_resources/read_93_exp_gc_ratios.38.csv' + } + + } + } +} diff --git a/conf/targeted_parameters.config b/conf/targeted_parameters.config new file mode 100644 index 00000000..6345042a --- /dev/null +++ b/conf/targeted_parameters.config @@ -0,0 +1,14 @@ +process { + withName: '^.*COBALT_PROFILING:COBALT' { + ext.args = '-pcf_gamma 50' + } + + withName: '^.*:SAGE_CALLING:SOMATIC' { + ext.args = [ + '-high_depth_mode', + '-hard_min_tumor_vaf 0.002', + '-hotspot_min_tumor_vaf 0.015', + '-panel_min_tumor_qual 150', + ].join(' ').trim() + } +} diff --git a/conf/test.config b/conf/test.config index 42f23058..c9ede07f 100644 --- a/conf/test.config +++ b/conf/test.config @@ -10,20 +10,30 @@ ---------------------------------------------------------------------------------------- */ +process { + withName: 'PURPLE' { + ext.args = '-min_purity 1 -max_purity 1 -min_ploidy 2 -max_ploidy 2' + } +} + params { config_profile_name = 'Test profile' config_profile_description = 'Minimal test dataset to check pipeline function' + // NOTE(SW): incompatible with GHA given size of reference data, STAR align requires ~30 GB memory // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' + max_cpus = 1 + max_memory = '30.GB' max_time = '6.h' // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' + input = 'https://github.com/nf-core/test-datasets/raw/oncoanalyser/samplesheet/fastq_eval.subject_a.wgts.tndna_trna.minimal.csv' + + // Reference data + ref_data_virusbreakenddb_path = 'https://pub-349bcb8decb44bf7acbddf90b270a061.r2.dev/virusbreakend/virusbreakenddb_test-24.04.0.tar.gz' - // Genome references - genome = 'R64-1-1' + // Analysis config + mode = 'wgts' + genome = 'GRCh38_hmf' + max_fastq_records = 0 } diff --git a/conf/test_full.config b/conf/test_full.config index 3a0c73e0..a2bd6f4c 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -10,15 +10,21 @@ ---------------------------------------------------------------------------------------- */ +process { + withName: 'PURPLE' { + ext.args = '-min_purity 1 -max_purity 1 -min_ploidy 2 -max_ploidy 2' + } +} + params { config_profile_name = 'Full test profile' config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' + input = 'https://github.com/nf-core/test-datasets/raw/oncoanalyser/samplesheet/fastq_eval.subject_a.wgts.tndna_trna.minimal.csv' - // Genome references - genome = 'R64-1-1' + // Analysis config + mode = 'wgts' + genome = 'GRCh38_hmf' + max_fastq_records = 0 } diff --git a/conf/test_stub.config b/conf/test_stub.config new file mode 100644 index 00000000..e7d08d0a --- /dev/null +++ b/conf/test_stub.config @@ -0,0 +1,47 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running stub tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/oncoanalyser -profile test_stub -stub --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Stub test profile' + config_profile_description = 'Stub test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 1 + max_memory = '8.GB' + max_time = '6.h' + + // Input data + input = 'https://github.com/nf-core/test-datasets/raw/oncoanalyser/samplesheet/bam_eval.subject_a.wgts.tndna_trna.minimal.stub.csv' + + // Reference data + genomes { + + 'GRCh38_hmf' { + fasta = "temp/GRCh38.fasta" + fai = "temp/GRCh38.fai" + dict = "temp/GRCh38.dict" + bwamem2_index = "temp/GRCh38_bwa-mem2_index/" + gridss_index = "temp/GRCh38_gridss_index/" + star_index = "temp/GRCh38_star_index/" + } + + } + + ref_data_virusbreakenddb_path = "temp/virusbreakenddb_20210401/" + ref_data_hmf_data_path = "temp/hmf_bundle_38/" + ref_data_panel_data_path = "temp/panel_bundle/tso500_38/" + + // Analysis config + mode = 'wgts' + genome = 'GRCh38_hmf' + create_stub_placeholders = true +} diff --git a/docs/images/COLO829_wgts.orange_report.summary_section.png b/docs/images/COLO829_wgts.orange_report.summary_section.png new file mode 100644 index 00000000..a5b9e6fa Binary files /dev/null and b/docs/images/COLO829_wgts.orange_report.summary_section.png differ diff --git a/docs/images/mqc_fastqc_adapter.png b/docs/images/mqc_fastqc_adapter.png deleted file mode 100755 index 361d0e47..00000000 Binary files a/docs/images/mqc_fastqc_adapter.png and /dev/null differ diff --git a/docs/images/mqc_fastqc_counts.png b/docs/images/mqc_fastqc_counts.png deleted file mode 100755 index cb39ebb8..00000000 Binary files a/docs/images/mqc_fastqc_counts.png and /dev/null differ diff --git a/docs/images/mqc_fastqc_quality.png b/docs/images/mqc_fastqc_quality.png deleted file mode 100755 index a4b89bf5..00000000 Binary files a/docs/images/mqc_fastqc_quality.png and /dev/null differ diff --git a/docs/images/nf-core-oncoanalyser_logo_dark.png b/docs/images/nf-core-oncoanalyser_logo_dark.png index 6e9f66c1..8be36007 100644 Binary files a/docs/images/nf-core-oncoanalyser_logo_dark.png and b/docs/images/nf-core-oncoanalyser_logo_dark.png differ diff --git a/docs/images/nf-core-oncoanalyser_logo_light.png b/docs/images/nf-core-oncoanalyser_logo_light.png index 5d6f1c71..8a840262 100644 Binary files a/docs/images/nf-core-oncoanalyser_logo_light.png and b/docs/images/nf-core-oncoanalyser_logo_light.png differ diff --git a/docs/output.md b/docs/output.md index fd696f58..1d9ac032 100644 --- a/docs/output.md +++ b/docs/output.md @@ -2,56 +2,504 @@ ## Introduction -This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. +This document describes the output produced by the pipeline. The directories listed below will be created in the results +directory after the pipeline has finished. All paths are relative to the top-level results directory. -The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. - - +```tree +output/ +│   +├── subject_1/ +│   ├── alignments/ +│   ├── amber/ +│   ├── bamtools/ +│   ├── chord/ +│   ├── cobalt/ +│   ├── cuppa/ +│   ├── flagstats/ +│   ├── gridss/ +│   ├── gripss/ +│   ├── isofox/ +│   ├── lilac/ +│   ├── linx/ +│   ├── orange/ +│   ├── pave/ +│   ├── purple/ +│   ├── sage/ +│   ├── sigs/ +│   ├── virusbreakend/ +│   └── virusinterpreter/ +│   +├── subject_2/ +│   └── ... +│   +... +│   +└── pipeline_info/ +``` ## Pipeline overview -The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: +- [Simple DNA/RNA alignment](#simple-dnarna-alignment) + - [bwa-mem2](#bwa-mem2) - DNA alignment + - [STAR](#star) - RNA alignment +- [Alignment post-processing](#alignment-post-processing) + - [MarkDups](#markdups) - General alignment processing + - [Picard Markduplicates](#picard-markduplicates) - Duplicate read marking +- [SNV, MNV, INDEL calling](#snv-mnv-indel-calling) + - [SAGE](#sage) - SNV, MNV, INDEL calling + - [PAVE](#pave) - Small variant annotation (transcript/coding effects) +- [SV calling](#sv-calling) + - [SvPrep](#svprep) - Read filtering for SV calling + - [GRIDSS](#gridss) - SV calling + - [GRIPSS](#gripss) - SV filtering and post-processing +- [CNV calling](#cnv-calling) + - [AMBER](#amber) - β-allele frequencies + - [COBALT](#cobalt) - Read depth ratios + - [PURPLE](#purple) - Purity/ploid estimation, variant annotation +- [SV event interpretation](#sv-event-interpretation) + - [LINX](#linx) - SV event clustering and annotation +- [Transcript analysis](#transcript-analysis) + - [Isofox](#isofox) - transcript counts, novel splicing and fusion calling +- [Oncoviral detection](#oncoviral-detection) + - [VIRUSBreakend](#virusbreakend) - viral content and integration calling + - [Virus Interpreter](#virus-interpreter) - oncoviral calling post-processing +- [HLA calling](#hla-calling) + - [LILAC](#lilac) - HLA calling +- [HRD status prediction](#hrd-status-prediction) + - [CHORD](#chord) - HRD status prediction +- [Mutational signature fitting](#mutational-signature-fitting) + - [Sigs](#sigs) - Mutational signature fitting +- [Tissue of origin prediction](#tissue-of-origin-prediction) + - [CUPPA](#cuppa) - Tissue of origin prediction +- [Report generation](#report-generation) + - [ORANGE](#orange) - Key results summary + - [linxreport](#linxreport) - Interactive LINX report +- [Pipeline information](#pipeline-information) - Workflow execution metrics + +### Simple DNA/RNA alignment + +Alignment functionality in `oncoanalyser` is simple and rigid, and exists only to meet the exact requirements of the +hmftools. + +#### bwa-mem2 + +[bwa-mem2](https://github.com/bwa-mem2/bwa-mem2) is a short-read mapping tool used to align reads to a large reference +sequences. In `oncoanalyser`, bwa-mem2 is used to align DNA reads to the human genome. + +_No outputs are published directly from bwa-mem2, see [MarkDups](#markdups) for the fully processed alignment outputs_ + +#### STAR + +[STAR](https://github.com/alexdobin/STAR) is a specialised mapping to used to align RNA reads to a reference +transcriptome. + +_No outputs are published directly from STAR, see [Picard MarkDuplicates](#picard-markduplicates) for the fully processed alignment outputs_ + +### Alignment post-processing + +#### MarkDups + +
+Output files + +- `/alignments/dna/` + - `.duplicate_freq.tsv`: Normal DNA sample read duplicate frequencies. + - `.markdups.bam`: Normal DNA sample output read alignments. + - `.markdups.bam.bai`: Normal DNA sample output read alignments index. + - `.duplicate_freq.tsv`: Tumor DNA sample read duplicate frequencies. + - `.markdups.bam`: Tumor DNA sample output read alignments. + - `.markdups.bam.bai`: Tumor DNA sample output read alignments index. + +
+ +[MarkDups](https://github.com/hartwigmedical/hmftools/tree/mark-dups-v1.1.7/mark-dups) applies various alignment +post-processing routines such as duplicate marking and unmapping of problematic regions. It can also handle UMIs when +configured to do so. + +_MarkDups is only run on DNA alignments_ + +### Picard MarkDuplicates + +
+Output files + +- `/alignments/rna/` + - `.md.bam`: Tumor RNA sample read alignments. + - `.md.bam.bai`: Tumor RNA sample read alignments index. + - `.md.metrics`: Tumor RNA sample read duplicate marking metrics. + +
+ +[Picard MarkDuplicates](https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard) used to +mark duplicate reads following alignment. + +_Picard MarkDuplicates is only run on RNA alignments_ + +### SNV, MNV, INDEL calling + +#### SAGE + +
+Output files + +- `/sage/append/` + + - `.sage.append.vcf.gz`: Tumor DNA sample small variant VCF with RNA data appended. + - `.sage.append.vcf.gz`: Normal DNA sample small variant VCF with RNA data appended. + +- `/sage/somatic/` + + - `.sage.bqr.png`: Normal DNA sample base quality recalibration metrics plot. + - `.sage.bqr.tsv`: Normal DNA sample base quality recalibration metrics. + - `.sage.bqr.png`: Tumor DNA sample base quality recalibration metrics plot. + - `.sage.bqr.tsv`: Tumor DNA sample base quality recalibration metrics. + - `.sage.exon.medians.tsv`: Tumor DNA sample exon median depths. + - `.sage.gene.coverage.tsv`: Tumor DNA sample gene coverages. + - `.sage.somatic.filtered.vcf.gz.tbi`: Tumor DNA sample filtered small variant calls index. + - `.sage.somatic.filtered.vcf.gz`: Tumor DNA sample filtered small variant calls. + - `.sage.somatic.vcf.gz.tbi`: Tumor DNA sample small variant calls index. + - `.sage.somatic.vcf.gz`: Tumor DNA sample small variant calls. + +- `/sage/germline/` + - `.sage.bqr.png`: Tumor DNA sample base quality recalibration metrics plot. + - `.sage.bqr.tsv`: Tumor DNA sample base quality recalibration metrics. + - `.sage.exon.medians.tsv`: Normal DNA sample exon median depths. + - `.sage.gene.coverage.tsv`: Normal DNA sample gene coverages. + - `.sage.bqr.png`: Normal DNA sample base quality recalibration metrics plot. + - `.sage.bqr.tsv`: Normal DNA sample base quality recalibration metrics. + - `.sage.germline.filtered.vcf.gz.tbi`: Normal DNA sample filtered small variant calls index. + - `.sage.germline.filtered.vcf.gz`: Normal DNA sample filtered small variant calls. + - `.sage.germline.vcf.gz.tbi`: Normal DNA sample small variant calls index. + - `.sage.germline.vcf.gz`: Normal DNA sample small variant calls. + +
+ +[SAGE](https://github.com/hartwigmedical/hmftools/tree/master/sage) is a SNV, MNV, and INDEL caller optimised for 100x +tumor and 40x normal. + +#### PAVE -- [FastQC](#fastqc) - Raw read QC -- [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline -- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution +
+Output files + +- `/pave/` + - `.sage.germline.filtered.pave.vcf.gz.tbi`: Annotated SAGE germline small variants index. + - `.sage.germline.filtered.pave.vcf.gz`: Annotated SAGE germline small variants. + - `.sage.somatic.filtered.pave.vcf.gz.tbi`: Annotated SAGE somatic small variants index. + - `.sage.somatic.filtered.pave.vcf.gz`: Annotated SAGE somatic small variants. + +
+ +[PAVE](https://github.com/hartwigmedical/hmftools/tree/master/pave) annotates variants called by SAGE with impact +information with regards to transcript and coding effects. + +### SV calling + +#### SvPrep + +[SvPrep](https://github.com/hartwigmedical/hmftools/tree/master/sv-prep) runs prior to SV calling to reducing runtime by +rapidly identifying reads that are likely to be involved in a SV event. + +_No outputs are published directly from SvPrep, see [GRIPSS](#gripss) for the fully processed SV calling outputs_ + +#### GRIDSS + +
+Output files + +- `/gridss/` + - `.gridss.vcf.gz`: GRIDSS structural variants. + - `.gridss.vcf.gz.tbi`: GRIDSS structural variants index. + +
+ +[GRIDSS](https://github.com/PapenfussLab/gridss) is a SV caller than uses both read support and local +breakend/breakpoint assemblies to call variants. + +#### GRIPSS + +
+Output files + +- `/gripss/germline/` + + - `.gripss.filtered.germline.vcf.gz`: Filtered GRIDSS germline structural variants. + - `.gripss.filtered.germline.vcf.gz.tbi`: Filtered GRIDSS germline structural variants index. + - `.gripss.germline.vcf.gz`: GRIDSS structural variants (GRIPSS filters set but not applied). + - `.gripss.germline.vcf.gz.tbi`: GRIDSS structural variants index (GRIPSS filters set but not applied). + +- `/gripss/somatic/` + - `.gripss.filtered.somatic.vcf.gz`: Filtered GRIDSS somatic structural variants. + - `.gripss.filtered.somatic.vcf.gz.tbi`: Filtered GRIDSS somatic structural variants index. + - `.gripss.somatic.vcf.gz`: GRIDSS structural variants (GRIPSS filters set but not applied). + - `.gripss.somatic.vcf.gz.tbi`: GRIDSS structural variants index (GRIPSS filters set but not applied). + +
-### FastQC +[GRIPSS](https://github.com/hartwigmedical/hmftools/tree/master/gripss) applies filter and post-processing to SV calls. + +### CNV calling + +#### AMBER
Output files -- `fastqc/` - - `*_fastqc.html`: FastQC report containing quality metrics. - - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. +- `/amber/` + - `amber.version`: AMBER version file. + - `.amber.baf.pcf`: Tumor DNA sample piecewise constant fit. + - `.amber.baf.tsv.gz`: Tumor DNA sample β-allele frequencies. + - `.amber.contamination.tsv`: Tumor DNA sample contamination TSV. + - `.amber.contamination.vcf.gz`: Tumor DNA sample contamination sites. + - `.amber.contamination.vcf.gz.tbi`: Tumor DNA sample contamination sites index. + - `.amber.qc`: AMBER QC file. + - `.amber.homozygousregion.tsv`: Normal DNA sample regions of homozygosity.
-[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). +[AMBER](https://github.com/hartwigmedical/hmftools/tree/master/amber) generates β-allele frequencies in tumor samples +for CNV calling in PURPLE. -![MultiQC - FastQC sequence counts plot](images/mqc_fastqc_counts.png) +#### COBALT -![MultiQC - FastQC mean quality scores plot](images/mqc_fastqc_quality.png) +
+Output files -![MultiQC - FastQC adapter content plot](images/mqc_fastqc_adapter.png) +- `/cobalt/` + - `cobalt.version`: COBALT version file. + - `.cobalt.gc.median.tsv`: Tumor DNA sample GC median read depths. + - `.cobalt.ratio.pcf`: Tumor DNA sample piecewise constant fit. + - `.cobalt.ratio.tsv.gz`: Tumor DNA sample read counts and ratios (with reference or supposed diploid + regions). + - `.cobalt.gc.median.tsv`: Normal DNA sample GC median read depths. + - `.cobalt.ratio.median.tsv`: Normal DNA sample chromosome median ratios. + - `.cobalt.ratio.pcf`: Normal DNA sample piecewise constant fit. -> **NB:** The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. +
-### MultiQC +[COBALT](https://github.com/hartwigmedical/hmftools/tree/master/cobalt) generates read depth ratios (or an estimation +for tumor-only) for CNV calling in PURPLE. + +#### PURPLE
Output files -- `multiqc/` - - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - - `multiqc_plots/`: directory containing static images from the report in various formats. +- `/purple/` + - `circos/`: Circos plot data. + - `.purple.cnv.gene.tsv`: Somatic gene copy number. + - `.purple.cnv.somatic.tsv`: Copy number variant segments. + - `.purple.driver.catalog.germline.tsv`: Normal DNA sample driver catalogue. + - `.purple.driver.catalog.somatic.tsv`: Tumor DNA sample driver catalogue. + - `.purple.germline.deletion.tsv`: Normal DNA deletions. + - `.purple.germline.vcf.gz`: Normal DNA SAGE small variants with PURPLE annotations. + - `.purple.germline.vcf.gz.tbi`: Normal DNA SAGE small variants with PURPLE annotations index. + - `.purple.purity.range.tsv`: Purity/ploid model fit scores across a range of purity values. + - `.purple.purity.tsv`: Purity/ploidy summary. + - `.purple.qc`: PURPLE QC file. + - `.purple.segment.tsv`: Genomic copy number segments. + - `.purple.somatic.clonality.tsv`: Clonality peak model data. + - `.purple.somatic.hist.tsv`: Somatic variants histogram data. + - `.purple.somatic.vcf.gz`: Tumor DNA sample small variants with PURPLE annotations. + - `.purple.somatic.vcf.gz.tbi`: Tumor DNA sample small variants with PURPLE annotations index. + - `.purple.sv.germline.vcf.gz`: Germline structural variants with PURPLE annotations. + - `.purple.sv.germline.vcf.gz.tbi`: Germline structural variants with PURPLE annotations index. + - `.purple.sv.vcf.gz`: Somatic structural variants with PURPLE annotations. + - `.purple.sv.vcf.gz.tbi`: Somatic structural variants with PURPLE annotations. + - `plot/`: PURPLE plots. + - `purple.version`: PURPLE version file.
-[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. +[PURPLE](https://github.com/hartwigmedical/hmftools/tree/master/purple) is a CNV caller that also infers tumor +purity/ploidy and annotates both small and structural variant calls with copy-number information. + +### SV event interpretation + +#### LINX + +
+Output files + +- `/linx/germline_annotations/` + + - `linx.version`: LINX version file. + - `.linx.germline.breakend.tsv`: Normal DNA sample breakend data. + - `.linx.germline.clusters.tsv`: Normal DNA sample clustered events. + - `.linx.germline.disruption.tsv`: Normal DNA sample breakend data. + - `.linx.germline.driver.catalog.tsv`: Normal DNA sample driver catalogue. + - `.linx.germline.links.tsv`: Normal DNA sample cluster links. + - `.linx.germline.svs.tsv`: Normal DNA sample structural variants. + +- `/linx/somatic_annotations/` + + - `linx.version`: LINX version file. + - `.linx.breakend.tsv`: Tumor DNA sample breakend data. + - `.linx.clusters.tsv`: Tumor DNA sample clustered events. + - `.linx.driver.catalog.tsv`: Tumor DNA sample driver catalogue. + - `.linx.drivers.tsv`: Tumor DNA sample LINX driver drivers. + - `.linx.fusion.tsv`: Tumor DNA sample fusions. + - `.linx.links.tsv`: Tumor DNA sample cluster links. + - `.linx.svs.tsv`: Tumor DNA sample structural variants. + - `.linx.vis_*`: Tumor DNA sample visualisation data. -Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQC. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . +- `/linx/somatic_plots/` + - `all/*png`: All available tumor DNA sample cluster plots. + - `reportable/*png`: Driver-only tumor DNA sample cluster plots. + +
+ +[LINX](https://github.com/hartwigmedical/hmftools/tree/master/linx) clusters PURPLE-annotated SVs into high-order events +and classifies these events within a biological context. Following clustering and interpretation, events are visualised +as LINX plots. + +### Transcript analysis + +#### Isofox + +
+Output files + +- `/isofox/` + - `.isf.alt_splice_junc.csv`: Tumor RNA sample alternative splice junctions. + - `.isf.fusions.csv`: Tumor RNA sample fusions, unfiltered. + - `.isf.gene_collection.csv`: Tumor RNA sample gene-collection fragment counts. + - `.isf.gene_data.csv`: Tumor RNA sample gene fragment counts. + - `.isf.pass_fusions.csv`: Tumor RNA sample fusions, filtered. + - `.isf.retained_intron.csv`: Tumor RNA sample retained introns. + - `.isf.summary.csv`: Tumor RNA sample analysis summary file. + - `.isf.transcript_data.csv`: Tumor RNA sample transcript fragment counts. + +
+ +[Isofox](https://github.com/hartwigmedical/hmftools/tree/master/isofox) analyses RNA alignment data to quantify +transcripts, identify novel splice junctions, and caller fusions. + +### Oncoviral detection + +#### VIRUSBreakend + +
+Output files + +- `/virusbreakend/` + - `.virusbreakend.vcf`: Tumor DNA sample viral integratino sites. + - `.virusbreakend.vcf.summary.tsv`: Tumor DNA sample analysis summary file. + +
+ +[VIRUSBreakend](VIRUSBreakend) detects the presence of oncoviruses and intergration sites in tumor samples. + +#### Virus Interpreter + +
+Output files + +- `/virusinterpreter/` + - `.virus.annotated.tsv`: Processed oncoviral call/annotation data. + +
+ +[Virus Interpreter](https://github.com/hartwigmedical/hmftools/tree/master/virus-interpreter) post-processing for +VIRUSBreakend calls that provides higher-level interpretation of data. + +### HLA calling + +#### LILAC + +
+Output files + +- `/lilac/` + - `.lilac.candidates.coverage.tsv`: Coverage of high scoring candidates. + - `.lilac.qc.tsv`: LILAC qc file. + - `.lilac.tsv`: Analysis summary. + +
+ +[LILAC](https://github.com/hartwigmedical/hmftools/tree/master/lilac) calls HLA Class I and characterises allelic status +(copy-number alterations, somatic mutations) in the tumor sample. Analysis can also incorporate RNA data as an +indirectly measurement of allele expression. + +### HRD status prediction + +#### CHORD + +
+Output files + +- `/chord/` + - `_chord_prediction.txt`: Tumor DNA sample analysis summary file. + - `_chord_signatures.txt`: Tumor DNA sample variant counts contributing to signatures. + +
+ +[CHORD](https://github.com/UMCUGenetics/CHORD) predicts the HRD status of a tumor using statistical inference on the +basis of relative somatic mutation counts. + +### Mutational signature fitting + +#### Sigs + +
+Output files + +- `/sigs/` + - `.sig.allocation.tsv`: Tumor DNA sample signature allocations. + - `.sig.snv_counts.csv`: Tumor DNA sample variant counts contributing to signatures. + +
+ +[Sigs](https://github.com/hartwigmedical/hmftools/tree/master/sigs) fits defined COSMIC trinucleotide mutational +signatures to tumor sample data. + +### Tissue of origin prediction + +#### CUPPA + +
+Output files + +- `/cuppa/` + - `_cup_report.pdf`: Combined figure of summary and feature plot. + - `.cup.data.csv`: Model feature scores. + - `.cup.report.features.png`: Feature plot. + - `.cup.report.summary.png`: Summary plot. + - `.cuppa.chart.png`: CUPPA chart plot. + - `.cuppa.conclusion.txt`: Prediction conclusion file. + +
+ +[CUPPA](https://github.com/hartwigmedical/hmftools/tree/master/cuppa) predicts tissue of origin for a given tumor sample +using DNA and/or RNA features generated by upstream hmftools components. + +### Report generation + +#### ORANGE + +
+Output files + +- `/orange/` + - `.orange.json`: Aggregated report data. + - `.orange.pdf`: Static report PDF. + +
+ +[ORANGE](https://github.com/hartwigmedical/hmftools/tree/master/orange) summaries and integrates key results from +hmftool components into a single static PDF report. + +#### linxreport + +
+Output files + +- `/linx/` + - `_linx.html`: Interactive HTML report. + +
+ +[linxreport](https://github.com/umccr/linxreport) generates an interactive report containing LINX annotations and plots. ### Pipeline information @@ -62,7 +510,6 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + - Parameters used by the pipeline run: `params.json`. - -[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. diff --git a/docs/usage.md b/docs/usage.md index 8d291b27..31aa71c3 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -6,61 +6,173 @@ ## Introduction - +The `oncoanalyser` pipeline typically runs from FASTQs or BAMs and supports two modes: (1) whole genome and/or +transcriptome, and (2) targeted panel. Launching an analysis requires only the creation of a samplesheet that describes +details of each input such as the sample type (tumor or normal), sequence type (DNA or RNA), and filepath. -## Samplesheet input +Various aspects of an `oncoanalyser` analysis can be configured to fit a range of needs, and many of these are +considered [advanced usage](#advanced-usage) of the pipeline. The most useful include: -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. +- precise process selection +- starting from existing data +- granular control over reference/resource files -```bash ---input '[path to samplesheet file]' +These features enable `oncoanalyser` to be run in a highly flexible way. For example, an analysis can be run with +existing PURPLE data as the starting point and skip variant calling processes. Additionally, reference/resource files +can be staged locally to optimise execution or modified to create user-defined driver gene panels. + +:::danger + +When starting from BAMs rather than FASTQ it is expected that: + +- RNA read alignments are generated with STAR using [specific + parameters](https://github.com/hartwigmedical/hmftools/tree/master/isofox#a-note-on-alignment-and-multi-mapping), this + is **critical** for WTS data, and +- reads are aligned to a Hartwig-distributed reference genome ([custom genomes](#custom-genomes) can be used but are not + recommended) + +::: + +## Supported analyses + +A variety of analyses are accessible in `oncoanalyser` and are implicitly run according to the data described in the +samplesheet. The supported analysis types for each workflow are listed below. + +| Input sequence data | WGS/WTS workflow | Targeted sequencing workflow\* | +| ----------------------------------- | :----------------: | :---------------------------------------: | +| • Tumor/normal DNA
• Tumor RNA | :white_check_mark: | - | +| • Tumor only DNA
• Tumor RNA | :white_check_mark: | :white_check_mark: | +| • Tumor/normal DNA | :white_check_mark: | - | +| • Tumor only DNA | :white_check_mark: | :white_check_mark: | +| • Tumor only RNA | :white_check_mark: | - | + +\* Supported analyses relate to the TSO500 panel only + +:::note + +The default settings of `oncoanalyser` will accommodate typical sequencing depths for sample inputs and each individual +tool is generally sequencing depth agnostic. However, variant calling is optimised for 100x tumor and 40x normal when +invoked in `wgts` mode and expects sparse high-depth read data characteristic of panel sequencing when run in `targeted` +mode. For atypical input sequence data you may consult the [hmftools +documentation](https://github.com/hartwigmedical/hmftools) and [configure](#custom-tool-arguments) `oncoanalyser` +accordingly. + +::: + +## Samplesheet + +A samplesheet that contains information of each input in CSV format is needed to run `oncoanalyser`. The required input +details and columns are [described below](#column-descriptions). + +Several different input filetypes beyond FASTQ and BAM are recognised, including intermediate output files generated +during execution such as the PURPLE output directory. The full list of recognised input filetypes is available +[here](https://github.com/nf-core/oncoanalyser/blob/1.0.0/lib/Constants.groovy#L58-L90). + +### Simple example + +#### FASTQ + +:::note + +Currently only non-interleaved paired-end reads are accepted as FASTQ input. + +::: + +```csv title="samplesheet.csv" +group_id,subject_id,sample_id,sample_type,sequence_type,filetype,info,filepath +P1_wgts,P1,SA,normal,dna,fastq,library_id:SA_library;lane:001,/path/to/P1.SA.normal.dna.wgs.001.R1.fastq.gz;/path/to/P1.SA.normal.dna.wgs.001.R2.fastq.gz +P1_wgts,P1,SB,tumor,dna,fastq,library_id:SB_library;lane:001,/path/to/P1.SB.tumor.dna.wgs.001.R1.fastq.gz;/path/to/P1.SB.tumor.dna.wgs.001.R2.fastq.gz +P1_wgts,P1,SC,tumor,rna,fastq,library_id:SC_library;lane:001,/path/to/P1.SC.tumor.rna.wts.001.R1.fastq.gz;/path/to/P1.SC.tumor.rna.wts.001.R2.fastq.gz ``` -### Multiple runs of the same sample +#### BAM + +:::note -The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: +Inputs with the `bam` filetype will be processed by MarkDups as required by hmftools. Where an input BAM has already +been processed specifically by [HMF +MarkDups](https://github.com/hartwigmedical/hmftools/blob/mark-dups-v1.1.7/mark-dups/README.md), you can avoid needless +reprocessing by setting `bam_markdups` as the filetype instead. It is important to understand that duplicate marking by +other tools (e.g. GATK) cannot be used as a substitute since HMF MarkDups performs key operations beyond just duplicate +marking. -```console -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz +
+ +Please note there are other essential requirements around the use of BAMs as inputs, see the warning above in the +[Introduction](#introduction). + +::: + +```csv title="samplesheet.csv" +group_id,subject_id,sample_id,sample_type,sequence_type,filetype,filepath +P1_wgts,P1,SA,normal,dna,bam,/path/to/P1.SA.normal.dna.wgs.bam +P1_wgts,P1,SB,tumor,dna,bam,/path/to/P1.SB.tumor.dna.wgs.bam +P1_wgts,P1,SC,tumor,rna,bam,/path/to/P1.SC.tumor.rna.wts.bam ``` -### Full samplesheet +### Multiple lanes -The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. +```csv title="samplesheet.csv" +group_id,subject_id,sample_id,sample_type,sequence_type,filetype,info,filepath +P1_wgts,P1,SA,normal,dna,fastq,library_id:SA_library;lane:001,/path/to/P1.SA.normal.dna.wgs.001.R1.fastq.gz;/path/to/P1.SA.normal.dna.wgs.001.R2.fastq.gz +P1_wgts,P1,SA,normal,dna,fastq,library_id:SA_library;lane:002,/path/to/P1.SA.normal.dna.wgs.002.R1.fastq.gz;/path/to/P1.SA.normal.dna.wgs.002.R2.fastq.gz +P1_wgts,P1,SB,tumor,dna,fastq,library_id:SB_library;lane:001,/path/to/P1.SB.tumor.dna.wgs.001.R1.fastq.gz;/path/to/P1.SB.tumor.dna.wgs.001.R2.fastq.gz +P1_wgts,P1,SB,tumor,dna,fastq,library_id:SB_library;lane:002,/path/to/P1.SB.tumor.dna.wgs.002.R1.fastq.gz;/path/to/P1.SB.tumor.dna.wgs.002.R2.fastq.gz +P1_wgts,P1,SC,tumor,rna,fastq,library_id:SC_library;lane:001,/path/to/P1.SC.tumor.rna.wts.001.R1.fastq.gz;/path/to/P1.SC.tumor.rna.wts.001.R2.fastq.gz +``` -A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. +### Multiple patients -```console -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz -CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz -TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, -TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, +```csv title="samplesheet.csv" +group_id,subject_id,sample_id,sample_type,sequence_type,filetype,info,filepath +P1_wgts,P1,SA,normal,dna,fastq,library_id:SA_library;lane:001,/path/to/P1.SA.normal.dna.wgs.001.R1.fastq.gz;/path/to/P1.SA.normal.dna.wgs.001.R2.fastq.gz +P1_wgts,P1,SB,tumor,dna,fastq,library_id:SB_library;lane:001,/path/to/P1.SB.tumor.dna.wgs.001.R1.fastq.gz;/path/to/P1.SB.tumor.dna.wgs.001.R2.fastq.gz +P2_wgts,P2,SA,normal,dna,fastq,library_id:SA_library;lane:001,/path/to/P2.SA.normal.dna.wgs.001.R1.fastq.gz;/path/to/P2.SA.normal.dna.wgs.001.R2.fastq.gz +P2_wgts,P2,SB,tumor,dna,fastq,library_id:SB_library;lane:001,/path/to/P2.SB.tumor.dna.wgs.001.R1.fastq.gz;/path/to/P2.SB.tumor.dna.wgs.001.R2.fastq.gz ``` -| Column | Description | -| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +### Column descriptions -An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. +| Column | Description | +| ------------- | ------------------------------------------------------------------------------ | +| group_id | Group ID for a set of samples and inputs | +| subject_id | Subject/patient ID | +| sample_id | Sample ID | +| sample_type | Sample type: `tumor`, `normal` | +| sequence_type | Sequence type: `dna`, `rna` | +| filetype | File type: e.g. `fastq`, `bam`, `bai` | +| info | Additional input information: `library_id`, `lane`, `cancer_type` _[optional]_ | +| filepath | Absolute filepath to input file (can be local filepath, URL, S3 URI) | + +The identifiers provided in the samplesheet are used to set output file paths: + +- `group_id`: top-level output directory for analysis files e.g. `output/COLO829_example/` +- tumor `sample_id`: output prefix for most filenames e.g. `COLO829T.purple.sv.vcf.gz` +- normal `sample_id`: output prefix for some filenames e.g. `COLO829R.cobalt.ratio.pcf` ## Running the pipeline The typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/oncoanalyser --input samplesheet.csv --outdir --genome GRCh37 -profile docker +nextflow run nf-core/oncoanalyser \ + -profile docker \ + -revision 1.0.0 \ + --mode \ + --genome \ + --input samplesheet.csv \ + --outdir ``` -This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. +This will launch the pipeline with the `docker` configuration profile. See below for more information on profiles. + +:::note + +Reference data will be retrieved by `oncoanalyser` for every analysis run. It is therefore strongly recommended when +running multiple analyses to pre-stage reference data locally to avoid it being retrieved multiple times. See [Staging +reference data](#staging-reference-data). + +::: Note that the pipeline will create the following files in your working directory: @@ -71,6 +183,37 @@ work # Directory containing the nextflow working files # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` +If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file. + +Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. + +:::warning + +Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must +only be used for [tuning process resource +specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such +as output directories), or module arguments (args). + +::: + +The above pipeline run specified with a params file in yaml format: + +```bash +nextflow run nf-core/oncoanalyser -profile docker -params-file params.yaml +``` + +with `params.yaml` containing the following as an example: + +```yaml +mode: 'wgts' +genome: 'GRCh38_hmf' +input: './samplesheet.csv' +outdir: './results/' +<...> +``` + +You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). + ### Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: @@ -83,29 +226,293 @@ nextflow pull nf-core/oncoanalyser It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. -First, go to the [nf-core/oncoanalyser releases page](https://github.com/nf-core/oncoanalyser/releases) and find the latest version number - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. +First, go to the [nf-core/oncoanalyser releases page](https://github.com/nf-core/oncoanalyser/releases) and find the latest pipeline version - numeric only (eg. `1.0.0`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.0.0`. Of course, you can switch to another version by changing the number after the `-r` flag. + +This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, in the `/pipeline_info/software_versions.yml` file. + +To further assist in reproducbility, you can share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. + +:::tip +If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +::: + +## Advanced usage + +### Selecting processes + +Most of the major components in `oncoanalyser` can be skipped using `--processes_exclude` (the full list of available +processes can be viewed [here](https://github.com/nf-core/oncoanalyser/blob/1.0.0/lib/Constants.groovy#L36-L56)). +Multiple processes can be given as a comma-separated list. While there are some use-cases for this feature (e.g. +skipping resource intensive processes such as VIRUSBreakend), it becomes more powerful when combined with existing +inputs as described in the following section. + +:::warning + +When skipping components no checks are done to identify orphan processes in the execution DAG or for redundant +processes. + +::: + +### Existing inputs + +The `oncoanalyser` pipeline has been designed to allow entry at arbitrary points, which is particularly useful in +situations where previous outputs exist and re-running `oncoanalyser` is desired (e.g. to subsequently execute an +optional sensor/workflow or re-run an analysis with an upgraded tool such as PURPLE). The primary advantage of this +approach is that only the required processes are executed, reducing costs and runtimes by skipping unnecessary +processes. + +In order to effectively utilise this feature, existing inputs must be set in the [samplesheet](#samplesheet) and the +appropriate [processes selected](#selecting-processes). Take the below example where existing PURPLE inputs are used so +that all upstream variant calling can be skipped: + +```csv title='samplesheet.existing_purple.csv' +group_id,subject_id,sample_id,sample_type,sequence_type,filetype,filepath +P1_wgts,P1,SA,normal,dna,bam,/path/to/P1.SA.normal.dna.wgs.bam +P1_wgts,P1,SB,tumor,dna,bam,/path/to/P1.SB.tumor.dna.wgs.bam +P1_wgts,P1,SB,tumor,dna,purple_dir,/path/to/P1.purple_dir/ +``` + +:::note + +The original source input file (i.e. BAM or FASTQ) must always be provided for `oncoanalyser` to infer the correct +analysis type. + +::: + +And now run and skip variant calling: + +```bash +nextflow run nf-core/oncoanalyser \ + -profile docker \ + -revision 1.0.0 \ + --mode wgts \ + --processes_exclude markdups,amber,cobalt,gridss,gripss,sage,pave \ + --genome GRCh38_hmf \ + --input samplesheet.csv \ + --outdir output/ +``` + +:::warning + +Providing existing inputs will cause `oncoanalyser` to skip the corresponding process but _not any_ of the upstream +processes. It is the responsibility of the user to skip all relevant processes. + +::: -This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. +### Configuring reference data + +All reference data can be configured as needed, and are defined in the following locations: + +| Reference data | Filepath | Note | +| ----------------------- | ------------------------- | --------------------------------------- | +| Genomes and indexes | `conf/hmf_genomes.config` | Absolute paths | +| hmftools resource files | `conf/hmf_data.config` | Paths relative to data bundle directory | +| Panel resource files | `conf/panel_data.config` | Paths relative to data bundle directory | + +See the below sections for further details on customising reference data. + +#### Customising hmf data + +To override hmftools resource files, first [stage the bundle](#staging-reference-data) locally then copy in your +custom file under the bundle directory and create a new config with relevant file paths: + +```groovy title="hmf_data.custom.config" +params { + hmf_data_paths { + '38' { + driver_gene_panel = 'custom_files/DriverGenePanel.tsv' + sage_actionable_panel = 'custom_files/ActionableCodingPanel.bed.gz' + sage_coverage_panel = 'custom_files/CoverageCodingPanel.bed.gz' + } + } +} +``` + +To use these hmftools resource file overrides in `oncoanalyser` the local bundle directory must be provided with +`--ref_data_hmf_data_path`. + +#### Customising other data + +The path or URI to the VIRUSBreakend database can also be explicitly set with `--ref_data_virusbreakenddb_path`. There +are additional arguments to manually set various other reference data files, please review the parameters documentation +for the complete list. + +#### Staging reference data + +Default reference data can be staged locally with `oncoanalyser` by providing a samplesheet for the desired analysis and +setting the `--prepare_reference_only` argument. The samplesheet and `oncoanalyser` configuration will determine the +relevant reference data to download. For example the following command will download the `GRCh38_hmf` genome plus +indices, reference data, and databases required to run a WGTS analysis for tumor/normal DNA with tumor RNA: + +```csv title="samplesheet.csv" +group_id,subject_id,sample_id,sample_type,sequence_type,filetype,filepath +P1_wgts,P1,SA,normal,dna,bam,/path/to/P1.SA.normal.dna.wgs.bam +P1_wgts,P1,SB,tumor,dna,bam,/path/to/P1.SB.tumor.dna.wgs.bam +P1_wgts,P1,SC,tumor,rna,bam,/path/to/P1.SC.tumor.rna.wts.bam +``` + +```bash +nextflow run nf-core/oncoanalyser \ + -profile docker \ + -revision 1.0.0 \ + --mode wgts \ + --genome GRCh38_hmf \ + --prepare_reference_only \ + --input samplesheet.csv \ + --outdir prepare_reference/ +``` + +Executing the above command will download and unpack default reference data without running any analysis, and once +complete the prepared reference files can found in `./prepare_reference/reference_data/1.0.0//`. It is +recommended to remove the Nextflow work directory after staging data to free disk space. + +For `oncoanalyser` to use locally staged reference data a custom config can be used: + +```groovy title="refdata.local.config" +params { + + genomes { + GRCh38_hmf { + fasta = "/path/to/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna" + fai = "/path/to/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.fai" + dict = "/path/to/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.dict" + bwamem2_index = "/path/to/bwa-mem2_index/" + gridss_index = "/path/to/gridss_index/" + star_index = "/path/to/star_index/" + } + } + + ref_data_hmf_data_path = "/path/to/hmftools_data/" + ref_data_panel_data_path = "/path/to/tso500_panel_data/" + ref_data_virusbreakenddb_path = "/path/to/virusbreakenddb/" +} +``` + +Specific reference files can also be downloaded directly from the hosting service with the corresponding URL. + +##### Reference data URLs + +_GRCh37 genome (Hartwig) [`GRCh37_hmf`]_ + +| Type | Name | +| -------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FASTA | [Homo_sapiens.GRCh37.GATK.illumina.fasta](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/24.0/Homo_sapiens.GRCh37.GATK.illumina.fasta) | +| FASTA index | [Homo_sapiens.GRCh37.GATK.illumina.fasta.fai](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/24.0/samtools_index/1.16/Homo_sapiens.GRCh37.GATK.illumina.fasta.fai) | +| FASTA seq dictionary | [Homo_sapiens.GRCh37.GATK.illumina.fasta.dict](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/24.0/samtools_index/1.16/Homo_sapiens.GRCh37.GATK.illumina.fasta.dict) | +| bwa-mem2 index | [bwa-mem2_index/2.2.1.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/24.1/bwa-mem2_index/2.2.1.tar.gz) | +| GRIDSS index | [gridss_index/2.13.2.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/24.1/gridss_index/2.13.2.tar.gz) | +| STAR index | [star_index/gencode_19/2.7.3a.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/24.0/star_index/gencode_19/2.7.3a.tar.gz) | + +_GRCh38 genome (Hartwig) [`GRCh38_hmf`]_ + +| Type | Name | +| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FASTA | [GCA_000001405.15_GRCh38_no_alt_analysis_set.fna](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/24.0/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna) | +| FASTA index | [GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.fai](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/24.0/samtools_index/1.16/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.fai) | +| FASTA seq dictionary | [GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.dict](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/24.0/samtools_index/1.16/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.dict) | +| bwa-mem2 index | [bwa-mem2_index/2.2.1.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/24.1/bwa-mem2_index/2.2.1.tar.gz) | +| GRIDSS index | [gridss_index/2.13.2.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/24.1/gridss_index/2.13.2.tar.gz) | +| STAR index | [star_index/gencode_38/2.7.3a.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/24.0/star_index/gencode_38/2.7.3a.tar.gz) | + +_Other reference data_ + +| Type | Name | +| ---------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | +| hmftools data (GRCh37) | [hmftools/5.34_37--2.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/hmftools/5.34_37--2.tar.gz) | +| hmftools data (GRCh38) | [hmftools/5.34_38--2.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/hmftools/5.34_38--2.tar.gz) | +| TSO500 data (GRCh37) | [panels/tso500_5.34_37--1.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/panels/tso500_5.34_37--1.tar.gz) | +| TSO500 data (GRCh38) | [panels/tso500_5.34_38--1.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/panels/tso500_5.34_38--1.tar.gz) | +| HLA slice BED | [hla_slice/grch38_alt.plus_homologous.bed](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/other/hla_slice/grch38_alt.plus_homologous.bed) | +| VIRUSBreakend database | [virusbreakenddb_20210401.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/virusbreakend/virusbreakenddb_20210401.tar.gz) | + +#### Custom genomes + +It is strongly recommended to use a Hartwig-distributed reference genome for alignments and subsequent analysis +(`GRCh37_hmf` or `GRCh38_hmf`). Where it is not feasible to do so, a custom genome can instead be used by providing the +relevant FASTA file in a configuration file: + +```groovy title='genome.custom.config' +params { + genomes { + CustomGenome { + fasta = "/path/to/custom_genome.fa" + } + } +} +``` + +Each index required for the analysis will first be created before running the rest of `oncoanalyser` with the following +command: + +:::note + +In a process similar to [staging reference data](#staging-reference-data), you can first generate the required indexes +by setting `--prepare_reference_only` and then provide the prepared reference files to `oncoanalyser` through a custom +config file. This avoids having to regenerate indexes for each new analysis. + +::: + +```bash +nextflow run nf-core/oncoanalyser \ + -profile docker \ + -revision 1.0.0 \ + -config genome.custom.config \ + --mode wgts \ + \ + --genome CustomGenome \ + --genome_version <37|38> \ + --genome_type \ + --force_genome \ + \ + --input samplesheet.csv \ + --outdir output/ +``` + +Creation of a STAR index also requires transcript annotations, please provide either of the following GTF files via the +`--ref_data_genome_gtf` option after decompressing: + +- GRCh37: [GENCODE v38 (Ensembl v104) + annotations](https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_38/gencode.v38.annotation.gtf.gz) +- GRCh38: [GENCODE v37 (Ensembl v74) + annotations](https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_19/gencode.v19.annotation.gtf.gz) + +:::warning + +STAR index must use transcript annotations from Ensembl versions that match hmftools resource data (GRCh37: v74; GRCh38: +v104). + +::: + +When creating indexes for reference genomes with alternative haplotypes, an ALT file must be given with +`--ref_data_genome_alt`. Importantly, a STAR index will not be generated for reference genomes with alternative +haplotypes since this requires careful processing and is hence left to the user. ## Core Nextflow arguments -> **NB:** These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +:::note +These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +::: ### `-profile` Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. -Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. When using Biocontainers, most of these software packaging methods pull Docker containers from quay.io e.g [FastQC](https://quay.io/repository/biocontainers/fastqc) except for Singularity which directly downloads Singularity images via https hosted by the [Galaxy project](https://depot.galaxyproject.org/singularity/) and Conda which downloads and installs software locally from [Bioconda](https://bioconda.github.io/). +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. -> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +:::info +We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +::: The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! They are loaded in sequence, so later profiles can overwrite earlier profiles. -If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended. +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer environment. +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters - `docker` - A generic configuration profile to be used with [Docker](https://docker.com/) - `singularity` @@ -116,11 +523,12 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) - `charliecloud` - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `apptainer` + - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) +- `wave` + - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later). - `conda` - - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. -- `test` - - A profile with a complete configuration for automated testing - - Includes links to test data so needs no other parameters + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. ### `-resume` @@ -138,96 +546,19 @@ Specify the path to a specific config file (this is a core Nextflow command). Se Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. -For example, if the nf-core/rnaseq pipeline is failing after multiple re-submissions of the `STAR_ALIGN` process due to an exit code of `137` this would indicate that there is an out of memory issue: - -```console -[62/149eb0] NOTE: Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) -- Execution is retried (1) -Error executing process > 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)' - -Caused by: - Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) - -Command executed: - STAR \ - --genomeDir star \ - --readFilesIn WT_REP1_trimmed.fq.gz \ - --runThreadN 2 \ - --outFileNamePrefix WT_REP1. \ - - -Command exit status: - 137 - -Command output: - (empty) - -Command error: - .command.sh: line 9: 30 Killed STAR --genomeDir star --readFilesIn WT_REP1_trimmed.fq.gz --runThreadN 2 --outFileNamePrefix WT_REP1. -Work dir: - /home/pipelinetest/work/9d/172ca5881234073e8d76f2a19c88fb - -Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run` -``` - -To bypass this error you would need to find exactly which resources are set by the `STAR_ALIGN` process. The quickest way is to search for `process STAR_ALIGN` in the [nf-core/rnaseq Github repo](https://github.com/nf-core/rnaseq/search?q=process+STAR_ALIGN). -We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so, based on the search results, the file we want is `modules/nf-core/software/star/align/main.nf`. -If you click on the link to that file you will notice that there is a `label` directive at the top of the module that is set to [`label process_high`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/modules/nf-core/software/star/align/main.nf#L9). -The [Nextflow `label`](https://www.nextflow.io/docs/latest/process.html#label) directive allows us to organise workflow processes in separate groups which can be referenced in a configuration file to select and configure subset of processes having similar computing requirements. -The default values for the `process_high` label are set in the pipeline's [`base.config`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L33-L37) which in this case is defined as 72GB. -Providing you haven't set any other standard nf-core parameters to **cap** the [maximum resources](https://nf-co.re/usage/configuration#max-resources) used by the pipeline then we can try and bypass the `STAR_ALIGN` process failure by creating a custom config file that sets at least 72GB of memory, in this case increased to 100GB. -The custom config below can then be provided to the pipeline via the [`-c`](#-c) parameter as highlighted in previous sections. - -```nextflow -process { - withName: 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN' { - memory = 100.GB - } -} -``` - -> **NB:** We specify the full process name i.e. `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN` in the config file because this takes priority over the short name (`STAR_ALIGN`) and allows existing configuration using the full process name to be correctly overridden. -> -> If you get a warning suggesting that the process selector isn't recognised check that the process name has been specified correctly. - -### Updating containers - -The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. If for some reason you need to use a different version of a particular tool with the pipeline then you just need to identify the `process` name and override the Nextflow `container` definition for that process using the `withName` declaration. For example, in the [nf-core/viralrecon](https://nf-co.re/viralrecon) pipeline a tool called [Pangolin](https://github.com/cov-lineages/pangolin) has been used during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. Given that the lineage assignments change quite frequently it doesn't make sense to re-release the nf-core/viralrecon everytime a new version of Pangolin has been released. However, you can override the default container used by the pipeline by creating a custom config file and passing it as a command-line argument via `-c custom.config`. - -1. Check the default version used by the pipeline in the module file for [Pangolin](https://github.com/nf-core/viralrecon/blob/a85d5969f9025409e3618d6c280ef15ce417df65/modules/nf-core/software/pangolin/main.nf#L14-L19) -2. Find the latest version of the Biocontainer available on [Quay.io](https://quay.io/repository/biocontainers/pangolin?tag=latest&tab=tags) -3. Create the custom config accordingly: - - - For Docker: +To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. - ```nextflow - process { - withName: PANGOLIN { - container = 'quay.io/biocontainers/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` +### Custom containers - - For Singularity: +In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date. - ```nextflow - process { - withName: PANGOLIN { - container = 'https://depot.galaxyproject.org/singularity/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` +To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. - - For Conda: +### Custom tool arguments - ```nextflow - process { - withName: PANGOLIN { - conda = 'bioconda::pangolin=3.0.5' - } - } - ``` +A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. -> **NB:** If you wish to periodically update individual tool-specific results (e.g. Pangolin) generated by the pipeline then you must ensure to keep the `work/` directory otherwise the `-resume` ability of the pipeline will be compromised and it will restart from scratch. +To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. ### nf-core/configs @@ -237,13 +568,13 @@ See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). -## Azure Resource Requests +## Azure resource requests To be used with the `azurebatch` profile by specifying the `-profile azurebatch`. We recommend providing a compute `params.vm_type` of `Standard_D16_v3` VMs by default but these options can be changed if required. Note that the choice of VM size depends on your quota and the overall workload during the analysis. -For a thorough list, please refer the [Azure Sizes for virtual machines in Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes). +For a thorough list, please refer to the [Azure Sizes for virtual machines in Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes). ## Running in the background diff --git a/lib/Constants.groovy b/lib/Constants.groovy new file mode 100644 index 00000000..cb827975 --- /dev/null +++ b/lib/Constants.groovy @@ -0,0 +1,347 @@ +class Constants { + + // NOTE(SW): the HMF reference data files are incompatible with hg19 due to different contig naming + static List GENOMES_VERSION_37 = ['GRCh37_hmf', 'GRCh37'] + static List GENOMES_VERSION_38 = ['GRCh38_hmf', 'GRCh38', 'hg38'] + static List GENOMES_ALT = ['GRCh38', 'hg38'] + + static List GENOMES_SUPPORTED = ['GRCh37_hmf', 'GRCh38_hmf'] + static List GENOMES_DEFINED = Constants.GENOMES_VERSION_37 + Constants.GENOMES_VERSION_38 + + static List PANELS_DEFINED = ['tso500'] + + + static String HMF_DATA_37_PATH = 'https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/hmftools/5.34_37--2.tar.gz' + static String HMF_DATA_38_PATH = 'https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/hmftools/5.34_38--2.tar.gz' + + + static String TSO500_PANEL_37_PATH = 'https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/panels/tso500_5.34_37--1.tar.gz' + static String TSO500_PANEL_38_PATH = 'https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/panels/tso500_5.34_38--1.tar.gz' + + + static String VIRUSBREAKENDDB_PATH = 'https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/virusbreakend/virusbreakenddb_20210401.tar.gz' + + static String HLA_SLICE_BED_GRCH38_ALT_PATH = 'https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/other/hla_slice/grch38_alt.plus_homologous.bed' + + + static Integer DEFAULT_ISOFOX_READ_LENGTH_WTS = 151 + static Integer DEFAULT_ISOFOX_READ_LENGTH_TARGETED = 93 + + + static enum RunMode { + TARGETED, + WGTS, + } + + static enum Process { + ALIGNMENT, + AMBER, + BAMTOOLS, + CHORD, + COBALT, + CUPPA, + FLAGSTAT, + GRIDSS, + GRIPSS, + ISOFOX, + LILAC, + LINX, + MARKDUPS, + ORANGE, + PAVE, + PURPLE, + SAGE, + SIGS, + VIRUSINTERPRETER, + } + + static enum FileType { + // Generic + BAM, + BAM_MARKDUPS, + BAI, + FASTQ, + // Process + AMBER_DIR, + BAMTOOLS, + COBALT_DIR, + GRIDSS_VCF, + GRIDSS_VCF_TBI, + GRIPSS_VCF, + GRIPSS_VCF_TBI, + GRIPSS_UNFILTERED_VCF, + GRIPSS_UNFILTERED_VCF_TBI, + ISOFOX_DIR, + LILAC_DIR, + LINX_ANNO_DIR, + PAVE_VCF, + PURPLE_DIR, + SAGE_VCF, + SAGE_VCF_TBI, + SAGE_APPEND_VCF, + VIRUSINTERPRETER_DIR, + // ORANGE specific + CHORD_DIR, + SIGS_DIR, + CUPPA_DIR, + FLAGSTAT, + LINX_PLOT_DIR, + SAGE_DIR, + } + + static enum SampleType { + TUMOR, + NORMAL, + TUMOR_NORMAL, + } + + static enum SequenceType { + DNA, + RNA, + DNA_RNA, + } + + static enum InfoField { + CANCER_TYPE, + LANE, + LIBRARY_ID, + } + + static Map PLACEHOLDER_META = [meta_placeholder: null] + static List PLACEHOLDER_OPTIONAL_CHANNEL = [] + + static Map INPUT = [ + + BAM_DNA_TUMOR: [ + FileType.BAM, + SampleType.TUMOR, + SequenceType.DNA, + ], + + BAM_MARKDUPS_DNA_TUMOR: [ + FileType.BAM_MARKDUPS, + SampleType.TUMOR, + SequenceType.DNA, + ], + + BAM_DNA_NORMAL: [ + FileType.BAM, + SampleType.NORMAL, + SequenceType.DNA, + ], + + BAM_MARKDUPS_DNA_NORMAL: [ + FileType.BAM_MARKDUPS, + SampleType.NORMAL, + SequenceType.DNA, + ], + + BAM_RNA_TUMOR: [ + FileType.BAM, + SampleType.TUMOR, + SequenceType.RNA, + ], + + BAI_DNA_TUMOR: [ + FileType.BAI, + SampleType.TUMOR, + SequenceType.DNA, + ], + + BAI_DNA_NORMAL: [ + FileType.BAI, + SampleType.NORMAL, + SequenceType.DNA, + ], + + BAI_RNA_TUMOR: [ + FileType.BAI, + SampleType.TUMOR, + SequenceType.RNA, + ], + + ISOFOX_DIR: [ + FileType.ISOFOX_DIR, + SampleType.TUMOR, + SequenceType.RNA, + ], + + AMBER_DIR: [ + FileType.AMBER_DIR, + [SampleType.TUMOR, SampleType.TUMOR_NORMAL], + SequenceType.DNA, + ], + COBALT_DIR: [ + FileType.COBALT_DIR, + [SampleType.TUMOR, SampleType.TUMOR_NORMAL], + SequenceType.DNA, + ], + + BAMTOOLS_TUMOR: [ + FileType.BAMTOOLS, + SampleType.TUMOR, + SequenceType.DNA, + ], + BAMTOOLS_NORMAL: [ + FileType.BAMTOOLS, + SampleType.NORMAL, + SequenceType.DNA, + ], + + FLAGSTAT_TUMOR: [ + FileType.FLAGSTAT, + SampleType.TUMOR, + SequenceType.DNA, + ], + FLAGSTAT_NORMAL: [ + FileType.FLAGSTAT, + SampleType.NORMAL, + SequenceType.DNA, + ], + + SAGE_VCF_TUMOR: [ + FileType.SAGE_VCF, + SampleType.TUMOR, + SequenceType.DNA, + ], + SAGE_VCF_NORMAL: [ + FileType.SAGE_VCF, + SampleType.NORMAL, + SequenceType.DNA, + ], + SAGE_VCF_TBI_TUMOR: [ + FileType.SAGE_VCF_TBI, + SampleType.TUMOR, + SequenceType.DNA, + ], + SAGE_VCF_TBI_NORMAL: [ + FileType.SAGE_VCF_TBI, + SampleType.NORMAL, + SequenceType.DNA, + ], + SAGE_DIR_TUMOR: [ + FileType.SAGE_DIR, + SampleType.TUMOR, + SequenceType.DNA, + ], + SAGE_DIR_NORMAL: [ + FileType.SAGE_DIR, + SampleType.NORMAL, + SequenceType.DNA, + ], + SAGE_APPEND_VCF_TUMOR: [ + FileType.SAGE_APPEND_VCF, + SampleType.TUMOR, + SequenceType.DNA_RNA, + ], + SAGE_APPEND_VCF_NORMAL: [ + FileType.SAGE_APPEND_VCF, + SampleType.NORMAL, + SequenceType.DNA_RNA, + ], + + PAVE_VCF_TUMOR: [ + FileType.PAVE_VCF, + SampleType.TUMOR, + SequenceType.DNA, + ], + PAVE_VCF_NORMAL: [ + FileType.PAVE_VCF, + SampleType.NORMAL, + SequenceType.DNA, + ], + + GRIDSS_VCF: [ + FileType.GRIDSS_VCF, + [SampleType.TUMOR, SampleType.TUMOR_NORMAL], + SequenceType.DNA, + ], + + GRIPSS_VCF_TUMOR: [ + FileType.GRIPSS_VCF, + [SampleType.TUMOR, SampleType.TUMOR_NORMAL], + SequenceType.DNA, + ], + GRIPSS_VCF_TUMOR_TBI: [ + FileType.GRIPSS_VCF_TBI, + [SampleType.TUMOR, SampleType.TUMOR_NORMAL], + SequenceType.DNA, + ], + GRIPSS_VCF_NORMAL: [ + FileType.GRIPSS_VCF, + SampleType.NORMAL, + SequenceType.DNA, + ], + GRIPSS_VCF_NORMAL_TBI: [ + FileType.GRIPSS_VCF_TBI, + SampleType.NORMAL, + SequenceType.DNA, + ], + GRIPSS_UNFILTERED_VCF_TUMOR: [ + FileType.GRIPSS_UNFILTERED_VCF, + [SampleType.TUMOR, SampleType.TUMOR_NORMAL], + SequenceType.DNA, + ], + GRIPSS_UNFILTERED_VCF_TUMOR_TBI: [ + FileType.GRIPSS_UNFILTERED_VCF_TBI, + [SampleType.TUMOR, SampleType.TUMOR_NORMAL], + SequenceType.DNA, + ], + GRIPSS_UNFILTERED_VCF_NORMAL: [ + FileType.GRIPSS_UNFILTERED_VCF, + SampleType.NORMAL, + SequenceType.DNA, + ], + + PURPLE_DIR: [ + FileType.PURPLE_DIR, + [SampleType.TUMOR, SampleType.TUMOR_NORMAL], + SequenceType.DNA, + ], + + LINX_PLOT_DIR_TUMOR: [ + FileType.LINX_PLOT_DIR, + SampleType.TUMOR, + SequenceType.DNA, + ], + LINX_ANNO_DIR_TUMOR: [ + FileType.LINX_ANNO_DIR, + SampleType.TUMOR, + SequenceType.DNA, + ], + LINX_ANNO_DIR_NORMAL: [ + FileType.LINX_ANNO_DIR, + SampleType.NORMAL, + SequenceType.DNA, + ], + + CHORD_DIR: [ + FileType.CHORD_DIR, + SampleType.TUMOR, + SequenceType.DNA, + ], + SIGS_DIR: [ + FileType.SIGS_DIR, + SampleType.TUMOR, + SequenceType.DNA, + ], + LILAC_DIR: [ + FileType.LILAC_DIR, + [SampleType.TUMOR, SampleType.NORMAL, SampleType.TUMOR_NORMAL], + [SequenceType.DNA, SequenceType.DNA_RNA], + ], + + VIRUSINTERPRETER_DIR: [ + FileType.VIRUSINTERPRETER_DIR, + SampleType.TUMOR, + SequenceType.DNA, + ], + + CUPPA_DIR: [ + FileType.CUPPA_DIR, + SampleType.TUMOR, + [SequenceType.DNA, SequenceType.RNA, SequenceType.DNA_RNA], + ], + + ] +} diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy deleted file mode 100755 index b3d092f8..00000000 --- a/lib/NfcoreSchema.groovy +++ /dev/null @@ -1,529 +0,0 @@ -// -// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. -// - -import org.everit.json.schema.Schema -import org.everit.json.schema.loader.SchemaLoader -import org.everit.json.schema.ValidationException -import org.json.JSONObject -import org.json.JSONTokener -import org.json.JSONArray -import groovy.json.JsonSlurper -import groovy.json.JsonBuilder - -class NfcoreSchema { - - // - // Resolve Schema path relative to main workflow directory - // - public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { - return "${workflow.projectDir}/${schema_filename}" - } - - // - // Function to loop over all parameters defined in schema and check - // whether the given parameters adhere to the specifications - // - /* groovylint-disable-next-line UnusedPrivateMethodParameter */ - public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { - def has_error = false - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Check for nextflow core params and unexpected params - def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text - def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') - def nf_params = [ - // Options for base `nextflow` command - 'bg', - 'c', - 'C', - 'config', - 'd', - 'D', - 'dockerize', - 'h', - 'log', - 'q', - 'quiet', - 'syslog', - 'v', - 'version', - - // Options for `nextflow run` command - 'ansi', - 'ansi-log', - 'bg', - 'bucket-dir', - 'c', - 'cache', - 'config', - 'dsl2', - 'dump-channels', - 'dump-hashes', - 'E', - 'entry', - 'latest', - 'lib', - 'main-script', - 'N', - 'name', - 'offline', - 'params-file', - 'pi', - 'plugins', - 'poll-interval', - 'pool-size', - 'profile', - 'ps', - 'qs', - 'queue-size', - 'r', - 'resume', - 'revision', - 'stdin', - 'stub', - 'stub-run', - 'test', - 'w', - 'with-charliecloud', - 'with-conda', - 'with-dag', - 'with-docker', - 'with-mpi', - 'with-notification', - 'with-podman', - 'with-report', - 'with-singularity', - 'with-timeline', - 'with-tower', - 'with-trace', - 'with-weblog', - 'without-docker', - 'without-podman', - 'work-dir' - ] - def unexpectedParams = [] - - // Collect expected parameters from the schema - def expectedParams = [] - def enums = [:] - for (group in schemaParams) { - for (p in group.value['properties']) { - expectedParams.push(p.key) - if (group.value['properties'][p.key].containsKey('enum')) { - enums[p.key] = group.value['properties'][p.key]['enum'] - } - } - } - - for (specifiedParam in params.keySet()) { - // nextflow params - if (nf_params.contains(specifiedParam)) { - log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" - has_error = true - } - // unexpected params - def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' - def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } - def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() - def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) - if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { - // Temporarily remove camelCase/camel-case params #1035 - def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} - if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ - unexpectedParams.push(specifiedParam) - } - } - } - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Validate parameters against the schema - InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() - JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) - - // Remove anything that's in params.schema_ignore_params - raw_schema = removeIgnoredParams(raw_schema, params) - - Schema schema = SchemaLoader.load(raw_schema) - - // Clean the parameters - def cleanedParams = cleanParameters(params) - - // Convert to JSONObject - def jsonParams = new JsonBuilder(cleanedParams) - JSONObject params_json = new JSONObject(jsonParams.toString()) - - // Validate - try { - schema.validate(params_json) - } catch (ValidationException e) { - println '' - log.error 'ERROR: Validation of pipeline parameters failed!' - JSONObject exceptionJSON = e.toJSON() - printExceptions(exceptionJSON, params_json, log, enums) - println '' - has_error = true - } - - // Check for unexpected parameters - if (unexpectedParams.size() > 0) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - println '' - def warn_msg = 'Found unexpected parameters:' - for (unexpectedParam in unexpectedParams) { - warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}" - } - log.warn warn_msg - log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}" - println '' - } - - if (has_error) { - System.exit(1) - } - } - - // - // Beautify parameters for --help - // - public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - Integer num_hidden = 0 - String output = '' - output += 'Typical pipeline command:\n\n' - output += " ${colors.cyan}${command}${colors.reset}\n\n" - Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - Integer max_chars = paramsMaxChars(params_map) + 1 - Integer desc_indent = max_chars + 14 - Integer dec_linewidth = 160 - desc_indent - for (group in params_map.keySet()) { - Integer num_params = 0 - String group_output = colors.underlined + colors.bold + group + colors.reset + '\n' - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (group_params.get(param).hidden && !params.show_hidden_params) { - num_hidden += 1 - continue; - } - def type = '[' + group_params.get(param).type + ']' - def description = group_params.get(param).description - def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' - def description_default = description + colors.dim + defaultValue + colors.reset - // Wrap long description texts - // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap - if (description_default.length() > dec_linewidth){ - List olines = [] - String oline = "" // " " * indent - description_default.split(" ").each() { wrd -> - if ((oline.size() + wrd.size()) <= dec_linewidth) { - oline += wrd + " " - } else { - olines += oline - oline = wrd + " " - } - } - olines += oline - description_default = olines.join("\n" + " " * desc_indent) - } - group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n' - num_params += 1 - } - group_output += '\n' - if (num_params > 0){ - output += group_output - } - } - if (num_hidden > 0){ - output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset - } - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Groovy Map summarising parameters/workflow options used by the pipeline - // - public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { - // Get a selection of core Nextflow workflow options - def Map workflow_summary = [:] - if (workflow.revision) { - workflow_summary['revision'] = workflow.revision - } - workflow_summary['runName'] = workflow.runName - if (workflow.containerEngine) { - workflow_summary['containerEngine'] = workflow.containerEngine - } - if (workflow.container) { - workflow_summary['container'] = workflow.container - } - workflow_summary['launchDir'] = workflow.launchDir - workflow_summary['workDir'] = workflow.workDir - workflow_summary['projectDir'] = workflow.projectDir - workflow_summary['userName'] = workflow.userName - workflow_summary['profile'] = workflow.profile - workflow_summary['configFiles'] = workflow.configFiles.join(', ') - - // Get pipeline parameters defined in JSON Schema - def Map params_summary = [:] - def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - for (group in params_map.keySet()) { - def sub_params = new LinkedHashMap() - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (params.containsKey(param)) { - def params_value = params.get(param) - def schema_value = group_params.get(param).default - def param_type = group_params.get(param).type - if (schema_value != null) { - if (param_type == 'string') { - if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { - def sub_string = schema_value.replace('\$projectDir', '') - sub_string = sub_string.replace('\${projectDir}', '') - if (params_value.contains(sub_string)) { - schema_value = params_value - } - } - if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { - def sub_string = schema_value.replace('\$params.outdir', '') - sub_string = sub_string.replace('\${params.outdir}', '') - if ("${params.outdir}${sub_string}" == params_value) { - schema_value = params_value - } - } - } - } - - // We have a default in the schema, and this isn't it - if (schema_value != null && params_value != schema_value) { - sub_params.put(param, params_value) - } - // No default in the schema, and this isn't empty - else if (schema_value == null && params_value != "" && params_value != null && params_value != false) { - sub_params.put(param, params_value) - } - } - } - params_summary.put(group, sub_params) - } - return [ 'Core Nextflow options' : workflow_summary ] << params_summary - } - - // - // Beautify parameters for summary and return as string - // - public static String paramsSummaryLog(workflow, params) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - String output = '' - def params_map = paramsSummaryMap(workflow, params) - def max_chars = paramsMaxChars(params_map) - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - if (group_params) { - output += colors.bold + group + colors.reset + '\n' - for (param in group_params.keySet()) { - output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n' - } - output += '\n' - } - } - output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Loop over nested exceptions and print the causingException - // - private static void printExceptions(ex_json, params_json, log, enums, limit=5) { - def causingExceptions = ex_json['causingExceptions'] - if (causingExceptions.length() == 0) { - def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ - // Missing required param - if (m.matches()) { - log.error "* Missing required parameter: --${m[0][1]}" - } - // Other base-level error - else if (ex_json['pointerToViolation'] == '#') { - log.error "* ${ex_json['message']}" - } - // Error with specific param - else { - def param = ex_json['pointerToViolation'] - ~/^#\// - def param_val = params_json[param].toString() - if (enums.containsKey(param)) { - def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" - if (enums[param].size() > limit) { - log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" - } else { - log.error "${error_msg}: ${enums[param].join(', ')})" - } - } else { - log.error "* --${param}: ${ex_json['message']} (${param_val})" - } - } - } - for (ex in causingExceptions) { - printExceptions(ex, params_json, log, enums) - } - } - - // - // Remove an element from a JSONArray - // - private static JSONArray removeElement(json_array, element) { - def list = [] - int len = json_array.length() - for (int i=0;i - if(raw_schema.keySet().contains('definitions')){ - raw_schema.definitions.each { definition -> - for (key in definition.keySet()){ - if (definition[key].get("properties").keySet().contains(ignore_param)){ - // Remove the param to ignore - definition[key].get("properties").remove(ignore_param) - // If the param was required, change this - if (definition[key].has("required")) { - def cleaned_required = removeElement(definition[key].required, ignore_param) - definition[key].put("required", cleaned_required) - } - } - } - } - } - if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { - raw_schema.get("properties").remove(ignore_param) - } - if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { - def cleaned_required = removeElement(raw_schema.required, ignore_param) - raw_schema.put("required", cleaned_required) - } - } - return raw_schema - } - - // - // Clean and check parameters relative to Nextflow native classes - // - private static Map cleanParameters(params) { - def new_params = params.getClass().newInstance(params) - for (p in params) { - // remove anything evaluating to false - if (!p['value']) { - new_params.remove(p.key) - } - // Cast MemoryUnit to String - if (p['value'].getClass() == nextflow.util.MemoryUnit) { - new_params.replace(p.key, p['value'].toString()) - } - // Cast Duration to String - if (p['value'].getClass() == nextflow.util.Duration) { - new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) - } - // Cast LinkedHashMap to String - if (p['value'].getClass() == LinkedHashMap) { - new_params.replace(p.key, p['value'].toString()) - } - } - return new_params - } - - // - // This function tries to read a JSON params file - // - private static LinkedHashMap paramsLoad(String json_schema) { - def params_map = new LinkedHashMap() - try { - params_map = paramsRead(json_schema) - } catch (Exception e) { - println "Could not read parameters settings from JSON. $e" - params_map = new LinkedHashMap() - } - return params_map - } - - // - // Method to actually read in JSON file using Groovy. - // Group (as Key), values are all parameters - // - Parameter1 as Key, Description as Value - // - Parameter2 as Key, Description as Value - // .... - // Group - // - - private static LinkedHashMap paramsRead(String json_schema) throws Exception { - def json = new File(json_schema).text - def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') - def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') - /* Tree looks like this in nf-core schema - * definitions <- this is what the first get('definitions') gets us - group 1 - title - description - properties - parameter 1 - type - description - parameter 2 - type - description - group 2 - title - description - properties - parameter 1 - type - description - * properties <- parameters can also be ungrouped, outside of definitions - parameter 1 - type - description - */ - - // Grouped params - def params_map = new LinkedHashMap() - schema_definitions.each { key, val -> - def Map group = schema_definitions."$key".properties // Gets the property object of the group - def title = schema_definitions."$key".title - def sub_params = new LinkedHashMap() - group.each { innerkey, value -> - sub_params.put(innerkey, value) - } - params_map.put(title, sub_params) - } - - // Ungrouped params - def ungrouped_params = new LinkedHashMap() - schema_properties.each { innerkey, value -> - ungrouped_params.put(innerkey, value) - } - params_map.put("Other parameters", ungrouped_params) - - return params_map - } - - // - // Get maximum number of characters across all parameter names - // - private static Integer paramsMaxChars(params_map) { - Integer max_chars = 0 - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (param.size() > max_chars) { - max_chars = param.size() - } - } - } - return max_chars - } -} diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy deleted file mode 100755 index 27feb009..00000000 --- a/lib/NfcoreTemplate.groovy +++ /dev/null @@ -1,313 +0,0 @@ -// -// This file holds several functions used within the nf-core pipeline template. -// - -import org.yaml.snakeyaml.Yaml - -class NfcoreTemplate { - - // - // Check AWS Batch related parameters have been specified correctly - // - public static void awsBatch(workflow, params) { - if (workflow.profile.contains('awsbatch')) { - // Check params.awsqueue and params.awsregion have been set if running on AWSBatch - assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - } - } - - // - // Warn if a -profile or Nextflow config has not been provided to run the pipeline - // - public static void checkConfigProvided(workflow, log) { - if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { - log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + - "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + - " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + - " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + - " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + - "Please refer to the quick start section and usage docs for the pipeline.\n " - } - } - - // - // Construct and send completion email - // - public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { - - // Set up the e-mail variables - def subject = "[$workflow.manifest.name] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[$workflow.manifest.name] FAILED: $workflow.runName" - } - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['Date Started'] = workflow.start - misc_fields['Date Completed'] = workflow.complete - misc_fields['Pipeline script file path'] = workflow.scriptFile - misc_fields['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision - misc_fields['Nextflow Version'] = workflow.nextflow.version - misc_fields['Nextflow Build'] = workflow.nextflow.build - misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - def email_fields = [:] - email_fields['version'] = workflow.manifest.version - email_fields['runName'] = workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary << misc_fields - - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success) { - mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { - if (mqc_report.size() > 1) { - log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" - } - mqc_report = mqc_report[0] - } - } - } catch (all) { - if (multiqc_report) { - log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" - } - } - - // Check if we are only sending emails on failure - def email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$projectDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$projectDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] - def sf = new File("$projectDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - Map colors = logColours(params.monochrome_logs) - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" - } catch (all) { - // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { - mail_cmd += [ '-A', mqc_report ] - } - mail_cmd.execute() << email_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" - } - } - - // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - } - - // - // Construct and send adaptive card - // https://adaptivecards.io - // - public static void adaptivecard(workflow, params, summary_params, projectDir, log) { - def hook_url = params.hook_url - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['start'] = workflow.start - misc_fields['complete'] = workflow.complete - misc_fields['scriptfile'] = workflow.scriptFile - misc_fields['scriptid'] = workflow.scriptId - if (workflow.repository) misc_fields['repository'] = workflow.repository - if (workflow.commitId) misc_fields['commitid'] = workflow.commitId - if (workflow.revision) misc_fields['revision'] = workflow.revision - misc_fields['nxf_version'] = workflow.nextflow.version - misc_fields['nxf_build'] = workflow.nextflow.build - misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp - - def msg_fields = [:] - msg_fields['version'] = workflow.manifest.version - msg_fields['runName'] = workflow.runName - msg_fields['success'] = workflow.success - msg_fields['dateComplete'] = workflow.complete - msg_fields['duration'] = workflow.duration - msg_fields['exitStatus'] = workflow.exitStatus - msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - msg_fields['errorReport'] = (workflow.errorReport ?: 'None') - msg_fields['commandLine'] = workflow.commandLine - msg_fields['projectDir'] = workflow.projectDir - msg_fields['summary'] = summary << misc_fields - - // Render the JSON template - def engine = new groovy.text.GStringTemplateEngine() - def hf = new File("$projectDir/assets/adaptivecard.json") - def json_template = engine.createTemplate(hf).make(msg_fields) - def json_message = json_template.toString() - - // POST - def post = new URL(hook_url).openConnection(); - post.setRequestMethod("POST") - post.setDoOutput(true) - post.setRequestProperty("Content-Type", "application/json") - post.getOutputStream().write(json_message.getBytes("UTF-8")); - def postRC = post.getResponseCode(); - if (! postRC.equals(200)) { - log.warn(post.getErrorStream().getText()); - } - } - - // - // Print pipeline summary on completion - // - public static void summary(workflow, params, log) { - Map colors = logColours(params.monochrome_logs) - if (workflow.success) { - if (workflow.stats.ignoredCount == 0) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" - } - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" - } - } - - // - // ANSII Colours used for terminal logging - // - public static Map logColours(Boolean monochrome_logs) { - Map colorcodes = [:] - - // Reset / Meta - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" - colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" - colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" - colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" - - // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - - // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" - - // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" - - // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" - - // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" - - return colorcodes - } - - // - // Does what is says on the tin - // - public static String dashedLine(monochrome_logs) { - Map colors = logColours(monochrome_logs) - return "-${colors.dim}----------------------------------------------------${colors.reset}-" - } - - // - // nf-core logo - // - public static String logo(workflow, monochrome_logs) { - Map colors = logColours(monochrome_logs) - String.format( - """\n - ${dashedLine(monochrome_logs)} - ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} - ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} - ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} - ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} - ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} v${workflow.manifest.version}${colors.reset} - ${dashedLine(monochrome_logs)} - """.stripIndent() - ) - } -} diff --git a/lib/Processes.groovy b/lib/Processes.groovy new file mode 100644 index 00000000..480e10bb --- /dev/null +++ b/lib/Processes.groovy @@ -0,0 +1,61 @@ +import nextflow.Nextflow + +import Constants +import Utils + + +class Processes { + + public static getRunStages(include, exclude, manual_select, log) { + def processes = manual_select ? [] : Constants.Process.values().toList() + def include_list = this.getProcessList(include, log) + def exclude_list = this.getProcessList(exclude, log) + this.checkIncludeExcludeList(include_list, exclude_list, log) + + processes.addAll(include_list) + processes.removeAll(exclude_list) + + return Constants.Process + .values() + .collectEntries { p -> [p.name().toLowerCase(), p in processes] } + } + + public static getProcessList(process_str, log) { + if (!process_str) { + return [] + } + return process_str + .tokenize(',') + .collect { name -> + try { + return Constants.Process.valueOf(name.toUpperCase()) + } catch(java.lang.IllegalArgumentException e) { + def processes_str = Processes.getProcessNames().join('\n - ') + log.error "recieved invalid process: '${name}'. Valid options are:\n - ${processes_str}" + Nextflow.exit(1) + } + } + .unique() + } + + public static checkIncludeExcludeList(include_list, exclude_list, log) { + def processes_shared = [*include_list, *exclude_list] + .countBy { it } + .findAll { k, v -> v > 1 } + .keySet() + + if (processes_shared) { + def processes_shared_str = processes_shared.join('\n - ') + def message_base = 'the following processes was found in the include and the exclude list' + log.error "${message_base}:\n - ${processes_shared_str}" + Nextflow.exit(1) + } + } + + public static getProcessNames() { + Constants.Process + .values() + *.name() + *.toLowerCase() + } +} diff --git a/lib/Utils.groovy b/lib/Utils.groovy index 8d030f4e..38fae00c 100644 --- a/lib/Utils.groovy +++ b/lib/Utils.groovy @@ -4,44 +4,548 @@ import org.yaml.snakeyaml.Yaml +import nextflow.Nextflow +import nextflow.splitter.SplitterEx + class Utils { - // - // When running with -profile conda, warn if channels have not been set-up appropriately - // - public static void checkCondaChannels(log) { - Yaml parser = new Yaml() - def channels = [] + public static parseInput(input_fp_str, stub_run, log) { + + // NOTE(SW): using NF .splitCsv channel operator, hence should be easily interchangable with NF syntax + + def input_fp = Utils.getFileObject(input_fp_str) + def inputs = nextflow.splitter.SplitterEx.splitCsv(input_fp, [header: true]) + .groupBy { it['group_id'] } + .collect { group_id, entries -> + + def meta = [group_id: group_id] + def sample_keys = [] as Set + + // Process each entry + entries.each { + // Add subject id if absent or check if current matches existing + if (meta.containsKey('subject_id') && meta.subject_id != it.subject_id) { + log.error "got unexpected subject name for ${group_id} ${meta.subject_id}: ${it.subject_id}" + Nextflow.exit(1) + } else { + meta.subject_id = it.subject_id + } + + // Sample type + def sample_type_enum = Utils.getEnumFromString(it.sample_type, Constants.SampleType) + if (!sample_type_enum) { + def sample_type_str = Utils.getEnumNames(Constants.SampleType).join('\n - ') + log.error "received invalid sample type: '${it.sample_type}'. Valid options are:\n - ${sample_type_str}" + Nextflow.exit(1) + } + + // Sequence type + def sequence_type_enum = Utils.getEnumFromString(it.sequence_type, Constants.SequenceType) + if (!sequence_type_enum) { + def sequence_type_str = Utils.getEnumNames(Constants.SequenceType).join('\n - ') + log.error "received invalid sequence type: '${it.sequence_type}'. Valid options are:\n - ${sequence_type_str}" + Nextflow.exit(1) + } + + // Filetype + def filetype_enum = Utils.getEnumFromString(it.filetype, Constants.FileType) + if (!filetype_enum) { + def filetype_str = Utils.getEnumNames(Constants.FileType).join('\n - ') + log.error "received invalid file type: '${it.filetype}'. Valid options are:\n - ${filetype_str}" + Nextflow.exit(1) + } + + def sample_key = [sample_type_enum, sequence_type_enum] + def meta_sample = meta.get(sample_key, [sample_id: it.sample_id]) + + if (meta_sample.sample_id != it.sample_id) { + log.error "got unexpected sample name for ${group_id} ${sample_type_enum}/${sequence_type_enum}: ${it.sample_id}" + Nextflow.exit(1) + } + + if (meta_sample.containsKey(filetype_enum) & filetype_enum != Constants.FileType.FASTQ) { + log.error "got duplicate file for ${group_id} ${sample_type_enum}/${sequence_type_enum}: ${filetype_enum}" + Nextflow.exit(1) + } + + // Info data + def info_data = [:] + if (it.containsKey('info')) { + // Parse + it.info + .tokenize(';') + .each { e -> + def (k, v) = e.tokenize(':') + def info_field_enum = Utils.getEnumFromString(k, Constants.InfoField) + + if (!info_field_enum) { + def info_field_str = Utils.getEnumNames(Constants.InfoField).join('\n - ') + log.error "received invalid info field: '${k}'. Valid options are:\n - ${info_field_str}" + Nextflow.exit(1) + } + + if (info_data.containsKey(info_field_enum)) { + log.error "got duplicate info field for ${group_id} ${sample_type_enum}/${sequence_type_enum}: ${info_field_enum}" + Nextflow.exit(1) + } + + info_data[info_field_enum] = v + } + + // Process + if (info_data.containsKey(Constants.InfoField.CANCER_TYPE)) { + meta[Constants.InfoField.CANCER_TYPE] = info_data[Constants.InfoField.CANCER_TYPE] + } + + } + + + // Handle inputs appropriately + if (filetype_enum === Constants.FileType.FASTQ) { + + if (!info_data.containsKey(Constants.InfoField.LIBRARY_ID)) { + log.error "missing 'library_id' info field for ${group_id} ${sample_type_enum}/${sequence_type_enum}" + Nextflow.exit(1) + } + + if (!info_data.containsKey(Constants.InfoField.LANE)) { + log.error "missing 'lane' info field for ${group_id} ${sample_type_enum}/${sequence_type_enum}" + Nextflow.exit(1) + } + + def (fwd, rev) = it.filepath.tokenize(';') + def fastq_key = [info_data[Constants.InfoField.LIBRARY_ID], info_data[Constants.InfoField.LANE]] + + if (meta_sample.containsKey(fastq_key)) { + log.error "got duplicate lane + library_id data for ${group_id} ${sample_type_enum}/${sequence_type_enum}: ${fastq_key}" + Nextflow.exit(1) + } + + if (!meta_sample.containsKey(filetype_enum)) { + meta_sample[filetype_enum] = [:] + } + + meta_sample[filetype_enum][fastq_key] = ['fwd': fwd, 'rev': rev] + + } else { + + meta_sample[filetype_enum] = Utils.getFileObject(it.filepath) + + } + + // Record sample key to simplify iteration later on + sample_keys << sample_key + } + + // Check that required indexes are provided or are accessible + sample_keys.each { sample_key -> + + meta[sample_key]*.key.each { key -> + + // NOTE(SW): I was going to use two maps but was unable to get an enum map to compile + + def index_enum + def index_str + + if (key === Constants.FileType.BAM) { + index_enum = Constants.FileType.BAI + index_str = 'bai' + } else if (key === Constants.FileType.BAM_MARKDUPS) { + index_enum = Constants.FileType.BAI + index_str = 'bai' + } else if (key === Constants.FileType.GRIDSS_VCF) { + index_enum = Constants.FileType.GRIDSS_VCF_TBI + index_str = 'tbi' + } else if (key === Constants.FileType.GRIPSS_VCF) { + index_enum = Constants.FileType.GRIPSS_VCF_TBI + index_str = 'tbi' + } else if (key === Constants.FileType.GRIPSS_UNFILTERED_VCF) { + index_enum = Constants.FileType.GRIPSS_UNFILTERED_VCF_TBI + index_str = 'tbi' + } else if (key === Constants.FileType.SAGE_VCF) { + index_enum = Constants.FileType.SAGE_VCF_TBI + index_str = 'tbi' + } else { + return + } + + if (meta[sample_key].containsKey(index_enum)) { + return + } + + def fp = meta[sample_key][key].toUriString() + def index_fp = nextflow.Nextflow.file("${fp}.${index_str}") + + if (!index_fp.exists() && !stub_run) { + def (sample_type, sequence_type) = sample_key + log.error "no index provided or found for ${meta.group_id} ${sample_type}/${sequence_type}: ${key}: ${fp}" + Nextflow.exit(1) + } + + meta[sample_key][index_enum] = index_fp + + } + } + + return meta + } + + return inputs + } + + public static void createStubPlaceholders(params) { + + def fps = [ + params.ref_data_genome_alt, + params.ref_data_genome_bwamem2_index, + params.ref_data_genome_dict, + params.ref_data_genome_fai, + params.ref_data_genome_fasta, + params.ref_data_genome_gridss_index, + params.ref_data_genome_gtf, + params.ref_data_genome_star_index, + params.ref_data_virusbreakenddb_path, + ] + + params.hmf_data_paths[params.genome_version.toString()] + .each { k, v -> + fps << "${params.ref_data_hmf_data_path.replaceAll('/$', '')}/${v}" + } + + if(params.panel !== null) { + params.panel_data_paths[params.panel][params.genome_version.toString()] + .each { k, v -> + fps << "${params.ref_data_panel_data_path.replaceAll('/$', '')}/${v}" + } + } + + fps.each { fp_str -> + if (fp_str === null) return + + def fp = Utils.getFileObject(fp_str) + + if (!fp_str || fp.exists()) return + + if (fp_str.endsWith('/')) { + fp.mkdirs() + } else { + fp.getParent().mkdirs() + fp.toFile().createNewFile() + } + } + + } + + public static void validateInput(inputs, run_config, params, log) { + + def sample_keys = [ + [Constants.SampleType.TUMOR, Constants.SequenceType.DNA], + [Constants.SampleType.TUMOR, Constants.SequenceType.RNA], + [Constants.SampleType.NORMAL, Constants.SequenceType.DNA], + ] + + inputs.each { meta -> + + // Require BAMs or BAM_MARKDUPs or FASTQs for each defined sample type + // NOTE(SW): repeating key pairs above to avoid having to duplicate error messages + sample_keys.each { key -> + + if (!meta.containsKey(key)) { + return + } + + def (sample_type, sequence_type) = key + + if (!meta[key].containsKey(Constants.FileType.BAM) && + !meta[key].containsKey(Constants.FileType.BAM_MARKDUPS) && + !meta[key].containsKey(Constants.FileType.FASTQ)) { + + log.error "no BAMs nor BAM_MARKDUPs nor FASTQs provided for ${meta.group_id} ${sample_type}/${sequence_type}\n\n" + + "NB: BAMs or BAM_MARKDUPs or FASTQs are always required as they are the basis to determine input sample type." + Nextflow.exit(1) + } + + } + + // Apply some required restrictions to targeted mode + if (run_config.mode === Constants.RunMode.TARGETED) { + + // Do not allow normal DNA + if (Utils.hasNormalDna(meta)) { + log.error "targeted mode is not compatible with the normal DNA BAM provided for ${meta.group_id}\n\n" + + "The targeted workflow supports only tumor DNA BAMs (and tumor RNA BAMs for TSO500)" + Nextflow.exit(1) + } + + // Do not allow only tumor RNA + if (Utils.hasTumorRnaBam(meta) && !Utils.hasTumorDna(meta)) { + log.error "targeted mode is not compatible with only tumor RNA provided for ${meta.group_id}\n\n" + + "The targeted workflow requires tumor DNA and can optionally take tumor RNA, depending on " + + "the configured panel." + Nextflow.exit(1) + } + + // Restrict tumor RNA inputs to the TSO500 panel + if (Utils.hasTumorRnaBam(meta) && run_config.panel != 'tso500') { + def panel = run_config.panel.toUpperCase() + log.error "only the TSO500 panel supports tumor RNA analysis but got: ${panel}" + Nextflow.exit(1) + } + + } + + // Do not allow normal DNA only + if (Utils.hasNormalDna(meta) && !Utils.hasTumorDna(meta)) { + log.error "germline only mode not supported, found only a normal DNA BAM for ${meta.group_id}" + Nextflow.exit(1) + } + + // Enforce unique samples names within groups + def sample_ids_duplicated = sample_keys + .groupBy { meta.getOrDefault(it, [:]).getOrDefault('sample_id', null) } + .findResults { k, v -> k !== null & v.size() > 1 ? [k, v] : null } + + if (sample_ids_duplicated) { + def duplicate_message_strs = sample_ids_duplicated.collect { sample_id, keys -> + def key_strs = keys.collect { sample_type, sequence_type -> "${sample_type}/${sequence_type}" } + return " * ${sample_id}: ${key_strs.join(", ")}" + } + log.error "duplicate sample names found for ${meta.group_id}:\n\n${duplicate_message_strs.join("\n")}" + Nextflow.exit(1) + } + + } + + + // NOTE(SW): the follwing final config checks are performed here since they require additional information + // regarding processes that are run and also inputs + + def has_alt_contigs = params.genome_type == 'alt' + + // Ensure that custom genomes with ALT contigs that need indexes built have the required .alt file + def has_bwa_indexes = (params.ref_data_genome_bwamem2_index && params.ref_data_genome_gridss_index) + def has_alt_file = params.containsKey('ref_data_genome_alt') && params.ref_data_genome_alt + def run_bwa_or_gridss_index = run_config.stages.alignment && run_config.has_dna_fastq && !has_bwa_indexes + + if (run_bwa_or_gridss_index && has_alt_contigs && !has_alt_file) { + log.error "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " The genome .alt file is required when building bwa-mem2 or GRIDSS indexes\n" + + " for reference genomes containing ALT contigs\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + Nextflow.exit(1) + } + + // Refuse to create STAR index for reference genome containing ALTs, refer to Slack channel + def run_star_index = run_config.stages.alignment && run_config.has_rna_fastq && !params.ref_data_genome_star_index + + if (run_star_index && has_alt_contigs) { + log.error "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Refusing to create the STAR index for a reference genome with ALT contigs.\n" + + " Please review https://github.com/alexdobin/STAR docs or contact us on Slack.\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + Nextflow.exit(1) + } + + // Require that an input GTF file is provided when creating STAR index + if (run_star_index && !params.ref_data_genome_gtf) { + log.error "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Creating a STAR index requires the appropriate genome transcript annotations\n" + + " as a GTF file. Please contact us on Slack for further information." + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + Nextflow.exit(1) + } + + } + + static public getEnumFromString(s, e) { try { - def config = parser.load("conda config --show channels".execute().text) - channels = config.channels - } catch(NullPointerException | IOException e) { - log.warn "Could not verify conda channel configuration." - return + return e.valueOf(s.toUpperCase()) + } catch(java.lang.IllegalArgumentException err) { + return null } + } + + public static getEnumNames(e) { + e + .values() + *.name() + *.toLowerCase() + } + - // Check that all channels are present - // This channel list is ordered by required channel priority. - def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] - def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + static public getFileObject(path) { + return path ? nextflow.Nextflow.file(path) : [] + } - // Check that they are in the right order - def channel_priority_violation = false - def n = required_channels_in_order.size() - for (int i = 0; i < n - 1; i++) { - channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + static public getRunMode(run_mode, log) { + def run_mode_enum = Utils.getEnumFromString(run_mode, Constants.RunMode) + if (!run_mode_enum) { + def run_modes_str = Utils.getEnumNames(Constants.RunMode).join('\n - ') + log.error "recieved an invalid run mode: '${run_mode}'. Valid options are:\n - ${run_modes_str}" + Nextflow.exit(1) } + return run_mode_enum + } + + + // Sample records + static public getTumorDnaSample(meta) { + return meta.getOrDefault([Constants.SampleType.TUMOR, Constants.SequenceType.DNA], [:]) + } + + static public getTumorRnaSample(meta) { + return meta.getOrDefault([Constants.SampleType.TUMOR, Constants.SequenceType.RNA], [:]) + } + + static public getNormalDnaSample(meta) { + return meta.getOrDefault([Constants.SampleType.NORMAL, Constants.SequenceType.DNA], [:]) + } + + + // Sample names + static public getTumorDnaSampleName(meta) { + return getTumorDnaSample(meta)['sample_id'] + } + + static public getTumorRnaSampleName(meta) { + return getTumorRnaSample(meta)['sample_id'] + } + + static public getNormalDnaSampleName(meta) { + return getNormalDnaSample(meta)['sample_id'] + } + - if (channels_missing | channel_priority_violation) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " There is a problem with your Conda configuration!\n\n" + - " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/\n" + - " The observed channel order is \n" + - " ${channels}\n" + - " but the following channel order is required:\n" + - " ${required_channels_in_order}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + // Files + static public getTumorDnaFastq(meta) { + return getTumorDnaSample(meta).getOrDefault(Constants.FileType.FASTQ, null) + } + + static public getTumorDnaBam(meta) { + return getTumorDnaSample(meta).getOrDefault(Constants.FileType.BAM, null) + } + + static public getTumorDnaMarkdupsBam(meta) { + return getTumorDnaSample(meta).getOrDefault(Constants.FileType.BAM_MARKDUPS, null) + } + + static public getTumorDnaBai(meta) { + return getTumorDnaSample(meta).getOrDefault(Constants.FileType.BAI, null) + } + + + static public hasTumorDnaFastq(meta) { + return getTumorDnaFastq(meta) !== null + } + + static public hasTumorDnaBam(meta) { + return getTumorDnaBam(meta) !== null + } + + static public hasTumorDnaMarkdupsBam(meta) { + return getTumorDnaMarkdupsBam(meta) !== null + } + + + static public getNormalDnaFastq(meta) { + return getNormalDnaSample(meta).getOrDefault(Constants.FileType.FASTQ, null) + } + + static public getNormalDnaBam(meta) { + return getNormalDnaSample(meta).getOrDefault(Constants.FileType.BAM, null) + } + + static public getNormalDnaMarkdupsBam(meta) { + return getNormalDnaSample(meta).getOrDefault(Constants.FileType.BAM_MARKDUPS, null) + } + static public getNormalDnaBai(meta) { + return getNormalDnaSample(meta).getOrDefault(Constants.FileType.BAI, null) + } + + + static public hasNormalDnaFastq(meta) { + return getNormalDnaFastq(meta) !== null + } + + static public hasNormalDnaBam(meta) { + return getNormalDnaBam(meta) !== null + } + + static public hasNormalDnaMarkdupsBam(meta) { + return getNormalDnaMarkdupsBam(meta) !== null + } + + + static public hasDnaFastq(meta) { + return hasNormalDnaFastq(meta) || hasTumorDnaFastq(meta) + } + + static public hasDnaMarkdupsBam(meta) { + return hasNormalDnaMarkdupsBam(meta) || hasTumorDnaMarkdupsBam(meta) + } + + + static public getTumorRnaFastq(meta) { + return getTumorRnaSample(meta).getOrDefault(Constants.FileType.FASTQ, null) + } + + static public getTumorRnaBam(meta) { + return getTumorRnaSample(meta).getOrDefault(Constants.FileType.BAM, null) + } + + static public getTumorRnaBai(meta) { + return getTumorRnaSample(meta).getOrDefault(Constants.FileType.BAI, null) + } + + + static public hasTumorRnaFastq(meta) { + return getTumorRnaFastq(meta) !== null + } + + static public hasTumorRnaBam(meta) { + return getTumorRnaBam(meta) !== null + } + + + // Status + static public hasTumorDna(meta) { + return hasTumorDnaBam(meta) || hasTumorDnaMarkdupsBam(meta) || hasTumorDnaFastq(meta) + } + + static public hasNormalDna(meta) { + return hasNormalDnaBam(meta) || hasNormalDnaMarkdupsBam(meta) || hasNormalDnaFastq(meta) + } + + static public hasTumorRna(meta) { + return hasTumorRnaBam(meta) || hasTumorRnaFastq(meta) + } + + + // Misc + public static getInput(meta, key) { + + def result = [] + def (key_filetype, key_filetypes, key_sequencetypes) = key + + for (key_sample in [key_filetypes, key_sequencetypes].combinations()) { + if (meta.containsKey(key_sample) && meta[key_sample].containsKey(key_filetype)) { + result = meta[key_sample].get(key_filetype) + break + } + } + return result + } + + public static hasExistingInput(meta, key) { + return getInput(meta, key) != [] + } + + public static selectCurrentOrExisting(val, meta, key) { + if (hasExistingInput(meta, key)) { + return getInput(meta, key) + } else { + return val } } + } diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 07f95372..56047a62 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -2,92 +2,247 @@ // This file holds several functions specific to the main.nf workflow in the nf-core/oncoanalyser pipeline // +import nextflow.Nextflow + +import Utils + class WorkflowMain { // - // Citation string for pipeline + // Set parameter defaults where required // - public static String citation(workflow) { - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - // TODO nf-core: Add Zenodo DOI for pipeline after first release - //"* The pipeline\n" + - //" https://doi.org/10.5281/zenodo.XXXXXXX\n\n" + - "* The nf-core framework\n" + - " https://doi.org/10.1038/s41587-020-0439-x\n\n" + - "* Software dependencies\n" + - " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" - } + public static void setParamsDefaults(params, log) { - // - // Print help to screen if required - // - public static String help(workflow, params, log) { - def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" - def help_string = '' - help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) - help_string += NfcoreSchema.paramsHelp(workflow, params, command) - help_string += '\n' + citation(workflow) + '\n' - help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) - return help_string - } + def default_invalid = false - // - // Print parameter summary log to screen - // - public static String paramsSummaryLog(workflow, params, log) { - def summary_log = '' - summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) - summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) - summary_log += '\n' + citation(workflow) + '\n' - summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) - return summary_log - } + // Set defaults common to all run configuration - // - // Validate parameters and print summary to screen - // - public static void initialise(workflow, params, log) { - // Print help to screen if required - if (params.help) { - log.info help(workflow, params, log) - System.exit(0) + if (!params.containsKey('genome_version')) { + if (Constants.GENOMES_VERSION_37.contains(params.genome)) { + params.genome_version = '37' + } else if (Constants.GENOMES_VERSION_38.contains(params.genome)) { + params.genome_version = '38' + } else { + default_invalid = true + } } - // Validate workflow parameters via the JSON schema - if (params.validate_params) { - NfcoreSchema.validateParameters(workflow, params, log) + if (!params.containsKey('genome_type')) { + if (Constants.GENOMES_ALT.contains(params.genome)) { + params.genome_type = 'alt' + } else if (Constants.GENOMES_DEFINED.contains(params.genome)) { + params.genome_type = 'no_alt' + } else { + default_invalid = true + } } - // Print parameter summary log to screen + if (!params.containsKey('ref_hmf_data_path')) { + if (params.genome_version.toString() == '37') { + params.ref_data_hmf_data_path = Constants.HMF_DATA_37_PATH + } else if (params.genome_version.toString() == '38') { + params.ref_data_hmf_data_path = Constants.HMF_DATA_38_PATH + } else { + default_invalid = true + } + } - log.info paramsSummaryLog(workflow, params, log) + // Bad configuration, catch in validateParams + if (default_invalid) { + return + } - // Check that a -profile or Nextflow config has been provided to run the pipeline - NfcoreTemplate.checkConfigProvided(workflow, log) + // Set defaults specific to run configuration without attempting to validate - // Check that conda channels are set-up correctly - if (params.enable_conda) { - Utils.checkCondaChannels(log) + def run_mode + if (params.mode !== null) { + run_mode = Utils.getRunMode(params.mode, log) + } else { + // Bad configuration, catch in validateParams + return } - // Check AWS batch settings - NfcoreTemplate.awsBatch(workflow, params) + if (run_mode === Constants.RunMode.TARGETED) { - // Check input has been provided - if (!params.input) { - log.error "Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'" - System.exit(1) + // Attempt to set default panel data path; make no assumption on valid 'panel' value + + if (params.containsKey('panel')) { + if (params.panel == 'tso500' && params.genome_version.toString() == '37') { + params.ref_data_panel_data_path = Constants.TSO500_PANEL_37_PATH + } else if (params.panel == 'tso500' && params.genome_version.toString() == '38') { + params.ref_data_panel_data_path = Constants.TSO500_PANEL_38_PATH + } + } + } + + def stages = Processes.getRunStages( + params.processes_include, + params.processes_exclude, + params.processes_manual, + log, + ) + + if (!params.containsKey('ref_data_virusbreakenddb_path') && stages.virusinterpreter && run_mode === Constants.RunMode.WGTS){ + params.ref_data_virusbreakenddb_path = Constants.VIRUSBREAKENDDB_PATH + } + + if (!params.containsKey('ref_data_hla_slice_bed') && stages.lilac) { + if (params.genome_version.toString() == '38' && params.genome_type == 'alt') { + params.ref_data_hla_slice_bed = Constants.HLA_SLICE_BED_GRCH38_ALT_PATH + } } + + // Final point to set any default to avoid access to undefined parameters during nf-validation + if (!params.containsKey('panel')) { params.panel = null } + if (!params.containsKey('ref_data_genome_alt')) { params.ref_data_genome_alt = null } + if (!params.containsKey('ref_data_genome_gtf')) { params.ref_data_genome_gtf = null } + if (!params.containsKey('ref_data_hla_slice_bed')) { params.ref_data_hla_slice_bed = null } + if (!params.containsKey('ref_data_panel_data_path')) { params.ref_data_panel_data_path = null } + if (!params.containsKey('ref_data_virusbreakenddb_path')) { params.ref_data_virusbreakenddb_path = null } + } + // - // Get attribute from genome config file e.g. fasta + // Check and validate parameters // - public static Object getGenomeAttribute(params, attribute) { - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] + public static void validateParams(params, log) { + + // Common parameters + + if (!params.genome) { + log.error "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome must be set using the --genome CLI argument or in a configuration file.\n" + + " Currently, the available genome are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + Nextflow.exit(1) + } else if (!params.genomes.containsKey(params.genome)) { + log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + Nextflow.exit(1) + } + + if (!Constants.GENOMES_SUPPORTED.contains(params.genome)) { + if (!params.force_genome) { + log.error "currently only the GRCh37_hmf and GRCh38_hmf genomes are supported but got ${params.genome}" + + ", please adjust the --genome argument accordingly or override with --force_genome." + Nextflow.exit(1) + } else { + log.warn "currently only the GRCh37_hmf and GRCh38_hmf genomes are supported but forcing to " + + "proceed with \"${params.genome}\"" } } - return null + + if (!params.genome_version) { + log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome version wasn't provided and genome '${params.genome}' is not defined in \n" + + " genome version list.\n" + + " Currently, the list of genomes in the version list include:\n" + + " ${Constants.GENOMES_DEFINED.join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + Nextflow.exit(1) + } + + if (!params.genome_type) { + log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome type wasn't provided and genome '${params.genome}' is not defined in \n" + + " genome type list.\n" + + " Currently, the list of genomes in the type list include:\n" + + " ${Constants.GENOMES_DEFINED.join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + Nextflow.exit(1) + } + + if (!params.ref_data_hmf_data_path) { + log.error "HMF data path wasn't provided" + Nextflow.exit(1) + } + + // Run configuration specific parameters + + if (!params.mode) { + def run_modes = Utils.getEnumNames(Constants.RunMode).join('\n - ') + log.error "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Run mode must be set using the --mode CLI argument or in a configuration \n" + + " file.\n" + + " Currently, the available run modes are:\n" + + " - ${run_modes}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + Nextflow.exit(1) + } + + def run_mode = Utils.getRunMode(params.mode, log) + + if (run_mode === Constants.RunMode.TARGETED) { + + if (!params.containsKey('panel')) { + + def panels = Constants.PANELS_DEFINED.join('\n - ') + log.error "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " A panel is required to be set using the --panel CLI argument or in a \n" + + " configuration file.\n" + + " Currently, the available panels are:\n" + + " - ${panels}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + Nextflow.exit(1) + + } else if (!Constants.PANELS_DEFINED.contains(params.panel)) { + + def panels = Constants.PANELS_DEFINED.join('\n - ') + log.error "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " The ${params.panel} is not defined. Currently, the available panels are:\n" + + " - ${panels}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + Nextflow.exit(1) + + } + + } + + if (params.ref_data_genome_alt !== null) { + if (params.genome_type != 'alt') { + log.error "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Using a reference genome without ALT contigs but found an .alt file\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + Nextflow.exit(1) + } + + def ref_data_genome_alt_fn = nextflow.Nextflow.file(params.ref_data_genome_alt).name + def ref_data_genome_fasta_fn = nextflow.Nextflow.file(params.ref_data_genome_fasta).name + if (ref_data_genome_alt_fn != "${ref_data_genome_fasta_fn}.alt") { + log.error "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Found .alt file with filename of ${ref_data_genome_alt_fn} but it is required to match\n" + + " reference genome FASTA filename stem: ${ref_data_genome_fasta_fn}.alt\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + Nextflow.exit(1) + } + } + + + } + + public static getRunConfig(params, inputs, log) { + + def run_mode = Utils.getRunMode(params.mode, log) + + def stages = Processes.getRunStages( + params.processes_include, + params.processes_exclude, + params.processes_manual, + log, + ) + + return [ + mode: run_mode, + panel: run_mode === Constants.RunMode.TARGETED ? params.panel : null, + stages: stages, + has_dna: inputs.any { Utils.hasTumorDna(it) }, + has_rna: inputs.any { Utils.hasTumorRna(it) }, + has_rna_fastq: inputs.any { Utils.hasTumorRnaFastq(it) }, + has_dna_fastq: inputs.any { Utils.hasTumorDnaFastq(it) || Utils.hasNormalDnaFastq(it) }, + ] } } diff --git a/lib/WorkflowOncoanalyser.groovy b/lib/WorkflowOncoanalyser.groovy index ae6958ad..b4701e1f 100755 --- a/lib/WorkflowOncoanalyser.groovy +++ b/lib/WorkflowOncoanalyser.groovy @@ -2,76 +2,125 @@ // This file holds several functions specific to the workflow/oncoanalyser.nf in the nf-core/oncoanalyser pipeline // -import groovy.text.SimpleTemplateEngine +import static groovy.io.FileType.FILES + +import nextflow.Channel +import nextflow.Nextflow + +import Constants +import Processes +import Utils class WorkflowOncoanalyser { - // - // Check and validate parameters - // - public static void initialise(params, log) { - genomeExistsError(params, log) + public static groupByMeta(Map named_args, ... channels) { + def r = channels + // Set position; required to use non-blocking .mix operator + // NOTE(SW): operating on native list object containing channels + def i = 0 + r = r + .collect { ch -> + def ii = i + def d = ch.map { data -> + def meta = data[0] + def values = data[1..-1] + return [meta, [position: ii, values: values]] + } + i++ + return d + } + r = Channel.empty().mix(*r) - if (!params.fasta) { - log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." - System.exit(1) - } - } + // NOTE(SW): As of Nextflow 22.10.6, groupTuple requires a matching meta /and/ an additional element to complete without error, these placeholders are filtered in the groupByMeta function + r = r.filter { it[0] != Constants.PLACEHOLDER_META } + + r = r + .groupTuple(size: channels.size()) + .map { data -> + def meta = data[0] + def values_map = data[1] - // - // Get workflow summary for MultiQC - // - public static String paramsSummaryMultiqc(workflow, summary) { - String summary_section = '' - for (group in summary.keySet()) { - def group_params = summary.get(group) // This gets the parameters of that particular group - if (group_params) { - summary_section += "

$group

\n" - summary_section += "
\n" - for (param in group_params.keySet()) { - summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" + def values_list = values_map + .sort(false) { it.position } + .collect { it.values } + return [meta, *values_list] + } + + if (named_args.getOrDefault('flatten', true)) { + def flatten_mode = named_args.getOrDefault('flatten_mode', 'nonrecursive') + if (flatten_mode == 'recursive') { + r = r.map { it.flatten() } + } else if (flatten_mode == 'nonrecursive') { + r = r.map { data -> + def meta = data[0] + def inputs = data[1..-1].collectMany { it } + return [meta, *inputs] } - summary_section += "
\n" + } else { + System.err.println "ERROR: got bad flatten_mode: ${flatten_mode}" + Nextflow.exit(1) } } - String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" - yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" - yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" - yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" - yaml_file_text += "plot_type: 'html'\n" - yaml_file_text += "data: |\n" - yaml_file_text += "${summary_section}" - return yaml_file_text + return r + } + + // NOTE(SW): function signature required to catch where no named arguments are passed + public static groupByMeta(... channels) { + return groupByMeta([:], *channels) } - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { - // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file - def meta = [:] - meta.workflow = run_workflow.toMap() - meta["manifest_map"] = run_workflow.manifest.toMap() - - meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" - meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " - - def methods_text = mqc_methods_yaml.text - - def engine = new SimpleTemplateEngine() - def description_html = engine.createTemplate(methods_text).make(meta) - - return description_html - }// - // Exit pipeline if incorrect --genome key provided - // - private static void genomeExistsError(params, log) { - if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - System.exit(1) + public static getInput(Map named_args, meta, key) { + + def result + def (key_filetype, key_filetypes, key_sequencetypes) = key + + for (key_sample in [key_filetypes, key_sequencetypes].combinations()) { + if (meta.containsKey(key_sample) && meta[key_sample].containsKey(key_filetype)) { + // NOTE(SW): could return early here then false below + return meta[key_sample].getAt(key_filetype) + break + } + } + + if (result) { + return result + } else { + return false } + + } + + // NOTE(SW): function signature required to catch where no named arguments are passed + public static getInput(ch, key) { + return getInput([:], ch, key) + } + + public static joinMeta(Map named_args, ch_a, ch_b) { + // NOTE(SW): the cross operator is used to allow many-to-one relationship between ch_output + // and ch_metas + def key_a = named_args.getOrDefault('key_a', 'group_id') + def key_b = named_args.getOrDefault('key_b', 'key') + def ch_ready_a = ch_a.map { [it[0].getAt(key_b), it[1..-1]] } + def ch_ready_b = ch_b.map { meta -> [meta.getAt(key_a), meta] } + return ch_ready_b + .cross(ch_ready_a) + .map { b, a -> + def (ka, values) = a + def (kb, meta) = b + return [meta, *values] + } + } + + // NOTE(SW): function signature required to catch where no named arguments are passed + public static joinMeta(ch_output, ch_metas) { + joinMeta([:], ch_output, ch_metas) + } + + public static restoreMeta(ch_output, ch_metas) { + // NOTE(SW): ch_output must contain a Map in the first position with a key named 'key' that + // contains the corresponding meta.id value, for example: [val(meta_process), *process_outputs] + joinMeta([:], ch_output, ch_metas) } } diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar deleted file mode 100644 index 805c8bb5..00000000 Binary files a/lib/nfcore_external_java_deps.jar and /dev/null differ diff --git a/main.nf b/main.nf index 5fa2e2fb..91add502 100644 --- a/main.nf +++ b/main.nf @@ -1,10 +1,12 @@ #!/usr/bin/env nextflow +import Constants +import Utils + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nf-core/oncoanalyser ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Github : https://github.com/nf-core/oncoanalyser - Website: https://nf-co.re/oncoanalyser Slack : https://nfcore.slack.com/channels/oncoanalyser ---------------------------------------------------------------------------------------- @@ -14,47 +16,112 @@ nextflow.enable.dsl = 2 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - GENOME PARAMETER VALUES + IMPORT NF-CORE UTILITY FUNCTIONS / SUBWORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_oncoanalyser_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_oncoanalyser_pipeline' + +include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_oncoanalyser_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE & PRINT PARAMETER SUMMARY + SET DEFAULT VALUES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -WorkflowMain.initialise(workflow, params, log) +params.ref_data_genome_fasta = getGenomeAttribute('fasta') +params.ref_data_genome_fai = getGenomeAttribute('fai') +params.ref_data_genome_dict = getGenomeAttribute('dict') +params.ref_data_genome_bwamem2_index = getGenomeAttribute('bwamem2_index') +params.ref_data_genome_gridss_index = getGenomeAttribute('gridss_index') +params.ref_data_genome_star_index = getGenomeAttribute('star_index') + +WorkflowMain.setParamsDefaults(params, log) +WorkflowMain.validateParams(params, log) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOW FOR PIPELINE + CREATE PLACEHOLDER FILES FOR STUB RUNS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { ONCOANALYSER } from './workflows/oncoanalyser' +// NOTE(SW): required prior to workflow import + +if (workflow.stubRun && params.create_stub_placeholders) { + Utils.createStubPlaceholders(params) +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT WORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { TARGETED } from './workflows/targeted' +include { WGTS } from './workflows/wgts' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NAMED WORKFLOWS FOR PIPELINE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ // -// WORKFLOW: Run main nf-core/oncoanalyser analysis pipeline +// WORKFLOW: Run main analysis pipeline depending on type of input // +run_mode = Utils.getRunMode(params.mode, log) + workflow NFCORE_ONCOANALYSER { - ONCOANALYSER () + + if (run_mode === Constants.RunMode.WGTS) { + WGTS() + } else if (run_mode === Constants.RunMode.TARGETED) { + TARGETED() + } else { + log.error("received bad run mode: ${run_mode}") + Nextflow.exit(1) + } + } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN ALL WORKFLOWS + RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// WORKFLOW: Execute a single named workflow for the pipeline -// See: https://github.com/nf-core/rnaseq/issues/619 -// workflow { - NFCORE_ONCOANALYSER () + + // + // SUBWORKFLOW: Run initialisation tasks + // + PIPELINE_INITIALISATION( + params.version, + params.help, + params.validate_params, + params.monochrome_logs, + args, + params.outdir, + ) + + // + // WORKFLOW: Run main workflow + // + NFCORE_ONCOANALYSER() + + // + // SUBWORKFLOW: Run completion tasks + // + PIPELINE_COMPLETION( + params.email, + params.email_on_fail, + params.plaintext_email, + params.outdir, + params.monochrome_logs, + params.hook_url, + ) } /* diff --git a/modules.json b/modules.json index 4ce9f998..be653409 100644 --- a/modules.json +++ b/modules.json @@ -5,17 +5,69 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { - "custom/dumpsoftwareversions": { + "bwa/index": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + "git_sha": "6278bf9afd4a4b2d00fa6052250e73da3d91546f", + "installed_by": ["modules"], + "patch": "modules/nf-core/bwa/index/bwa-index.diff" }, - "fastqc": { + "bwamem2/index": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + "git_sha": "7081e04c18de9480948d34513a1c1e2d0fa9126d", + "installed_by": ["modules"], + "patch": "modules/nf-core/bwamem2/index/bwamem2-index.diff" }, - "multiqc": { + "gatk4/markduplicates": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + "git_sha": "e726b1730dff525bde4a6839e544dabfea4cd7fd", + "installed_by": ["modules"] + }, + "samtools/dict": { + "branch": "master", + "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", + "installed_by": ["modules"], + "patch": "modules/nf-core/samtools/dict/samtools-dict.diff" + }, + "samtools/faidx": { + "branch": "master", + "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", + "installed_by": ["modules"], + "patch": "modules/nf-core/samtools/faidx/samtools-faidx.diff" + }, + "samtools/flagstat": { + "branch": "master", + "git_sha": "bbb99cb8d679555cc01c98766de7869f83283545", + "installed_by": ["modules"] + }, + "samtools/sort": { + "branch": "master", + "git_sha": "d5d785b3d8b422cda9c6d84a23f629a8e9ff8cd8", + "installed_by": ["modules"] + }, + "star/genomegenerate": { + "branch": "master", + "git_sha": "a21faa6a3481af92a343a10926f59c189a2c16c9", + "installed_by": ["modules"], + "patch": "modules/nf-core/star/genomegenerate/star-genomegenerate.diff" + } + } + }, + "subworkflows": { + "nf-core": { + "utils_nextflow_pipeline": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + }, + "utils_nfcore_pipeline": { + "branch": "master", + "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", + "installed_by": ["subworkflows"] + }, + "utils_nfvalidation_plugin": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] } } } diff --git a/modules/local/amber/environment.yml b/modules/local/amber/environment.yml new file mode 100644 index 00000000..f5e73626 --- /dev/null +++ b/modules/local/amber/environment.yml @@ -0,0 +1,7 @@ +name: amber +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-amber=4.0.1 diff --git a/modules/local/amber/main.nf b/modules/local/amber/main.nf new file mode 100644 index 00000000..84b33026 --- /dev/null +++ b/modules/local/amber/main.nf @@ -0,0 +1,58 @@ +process AMBER { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-amber:4.0.1--hdfd78af_0' : + 'biocontainers/hmftools-amber:4.0.1--hdfd78af_0' }" + + input: + tuple val(meta), path(tumor_bam), path(normal_bam), path(tumor_bai), path(normal_bai) + val genome_ver + path heterozygous_sites + path target_region_bed + + output: + tuple val(meta), path('amber/'), emit: amber_dir + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def reference_arg = meta.containsKey('normal_id') ? "-reference ${meta.normal_id}" : '' + def reference_bam_arg = normal_bam ? "-reference_bam ${normal_bam}" : '' + + def target_regions_bed_arg = target_region_bed ? "-target_regions_bed ${target_region_bed}" : '' + + """ + amber \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + ${args} \\ + -tumor ${meta.tumor_id} \\ + -tumor_bam ${tumor_bam} \\ + ${reference_arg} \\ + ${reference_bam_arg} \\ + ${target_regions_bed_arg} \\ + -ref_genome_version ${genome_ver} \\ + -loci ${heterozygous_sites} \\ + -threads ${task.cpus} \\ + -output_dir amber/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + amber: \$(amber -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + mkdir -p amber/ + touch amber/placeholder + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/amber/meta.yml b/modules/local/amber/meta.yml new file mode 100644 index 00000000..dbc5fc90 --- /dev/null +++ b/modules/local/amber/meta.yml @@ -0,0 +1,59 @@ +name: amber +description: Generate a tumor BAF file for PURPLE copy number fit +keywords: + - baf + - cnv +tools: + - amber: + description: Generate a tumor BAF file for PURPLE copy number fit. + homepage: https://github.com/hartwigmedical/hmftools/tree/master/amber + documentation: https://github.com/hartwigmedical/hmftools/tree/master/amber + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + - tumor_bam: + type: file + description: Tumor BAM file + pattern: "*.{bam}" + - normal_bam: + type: file + description: Normal BAM file + pattern: "*.{bam}" + - tumor_bai: + type: file + description: Tumor BAI file + pattern: "*.{bai}" + - normal_bai: + type: file + description: Normal BAI file + pattern: "*.{bai}" + - genome_ver: + type: string + description: Reference genome version + - heterozygous_sites: + type: file + description: AMBER heterozygous sites file + pattern: "*.{vcf.gz}" + - target_region_bed: + type: file + description: Target region BED file (optional) + pattern: "*.{bed}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + - amber_dir: + type: directory + description: AMBER output directory + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/bamtools/environment.yml b/modules/local/bamtools/environment.yml new file mode 100644 index 00000000..3423794a --- /dev/null +++ b/modules/local/bamtools/environment.yml @@ -0,0 +1,7 @@ +name: bamtools +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-bam-tools=1.2.1 diff --git a/modules/local/bamtools/main.nf b/modules/local/bamtools/main.nf new file mode 100644 index 00000000..5b397403 --- /dev/null +++ b/modules/local/bamtools/main.nf @@ -0,0 +1,51 @@ +process BAMTOOLS { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-bam-tools:1.2.1--hdfd78af_0' : + 'biocontainers/hmftools-bam-tools:1.2.1--hdfd78af_0' }" + + input: + tuple val(meta), path(bam), path(bai) + path genome_fasta + val genome_ver + + output: + tuple val(meta), path('*wgsmetrics'), emit: metrics + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + bamtools \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + com.hartwig.hmftools.bamtools.metrics.BamMetrics \\ + ${args} \\ + -sample ${meta.sample_id} \\ + -bam_file ${bam} \\ + -ref_genome ${genome_fasta} \\ + -ref_genome_version ${genome_ver} \\ + -threads ${task.cpus} \\ + -write_old_style \\ + -log_level INFO \\ + -output_dir ./ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bamtools: \$(bamtools -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + touch ${meta.sample_id}.wgsmetrics + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/bamtools/meta.yml b/modules/local/bamtools/meta.yml new file mode 100644 index 00000000..ac8cee66 --- /dev/null +++ b/modules/local/bamtools/meta.yml @@ -0,0 +1,46 @@ +name: bamtools +description: Rapidly process BAMs for various tasks +keywords: + - bam +tools: + - bamtools: + description: Performs rapid processing of BAM files + homepage: https://github.com/hartwigmedical/hmftools/tree/master/bam-tools + documentation: https://github.com/hartwigmedical/hmftools/tree/master/bam-tools + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - bam: + type: file + description: BAM file + pattern: "*.{bam}" + - bai: + type: file + description: BAI file + pattern: "*.{bai}" + - genome_fasta: + type: file + description: Reference genome assembly FASTA file + pattern: "*.{fa,fasta}" + - genome_ver: + type: string + description: Reference genome version +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - metrics: + description: BAM metrics file + pattern: "*.{wgsmetrics}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/bwa-mem2/mem/environment.yml b/modules/local/bwa-mem2/mem/environment.yml new file mode 100644 index 00000000..8ae51aa4 --- /dev/null +++ b/modules/local/bwa-mem2/mem/environment.yml @@ -0,0 +1,9 @@ +name: bwa-mem2_mem +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bwa-mem2=2.2.1 + - bioconda::samtools=1.19.2 + - bioconda::sambamba=1.0.1 diff --git a/modules/local/bwa-mem2/mem/main.nf b/modules/local/bwa-mem2/mem/main.nf new file mode 100644 index 00000000..d12f7356 --- /dev/null +++ b/modules/local/bwa-mem2/mem/main.nf @@ -0,0 +1,73 @@ +process BWAMEM2_ALIGN { + tag "${meta.id}" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-4dde50190ae599f2bb2027cb2c8763ea00fb5084:4163e62e1daead7b7ea0228baece715bec295c22-0' : + 'biocontainers/mulled-v2-4dde50190ae599f2bb2027cb2c8763ea00fb5084:4163e62e1daead7b7ea0228baece715bec295c22-0' }" + + input: + tuple val(meta), path(reads_fwd), path(reads_rev) + path genome_fasta + path genome_bwamem2_index + + output: + tuple val(meta), path('*.bam'), path('*.bai'), emit: bam + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + + def read_group_tag = "@RG\\tID:${meta.read_group}\\tSM:${meta.sample_id}" + def output_fn = meta.split ? "${meta.split}.${meta.sample_id}.${meta.read_group}.bam" : "${meta.sample_id}.${meta.read_group}.bam" + + """ + ln -fs \$(find -L ${genome_bwamem2_index} -type f) ./ + + bwa-mem2 mem \\ + ${args} \\ + -Y \\ + -K 100000000 \\ + -R '${read_group_tag}' \\ + -t ${task.cpus} \\ + ${genome_fasta} \\ + ${reads_fwd} \\ + ${reads_rev} | \\ + \\ + sambamba view \\ + ${args2} \\ + --sam-input \\ + --format bam \\ + --compression-level 0 \\ + --nthreads ${task.cpus} \\ + /dev/stdin | \\ + \\ + sambamba sort \\ + ${args3} \\ + --nthreads ${task.cpus} \\ + --out ${output_fn} \\ + /dev/stdin + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa-mem2: \$(bwa-mem2 version 2>/dev/null) + sambamba: \$(sambamba --version 2>&1 | egrep '^sambamba' | head -n 1 | awk '{ print \$NF }') + END_VERSIONS + """ + + stub: + def output_fn = meta.split ? "${meta.split}.${meta.sample_id}.${meta.read_group}.bam" : "${meta.sample_id}.${meta.read_group}.bam" + + """ + touch ${output_fn} + touch ${output_fn}.bai + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/bwa-mem2/mem/meta.yml b/modules/local/bwa-mem2/mem/meta.yml new file mode 100644 index 00000000..da61d816 --- /dev/null +++ b/modules/local/bwa-mem2/mem/meta.yml @@ -0,0 +1,51 @@ +name: bwa-mem2_mem +description: The mem alignment algorithm of bwa-mem2 +keywords: + - bwa + - mem + - read alignment + - bwa-mem2 +tools: + - bwa-mem2: + description: Burrow-Wheeler Aligner for short-read alignment + homepage: https://github.com/bwa-mem2/bwa-mem2 + documentation: https://github.com/bwa-mem2/bwa-mem2 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + - reads_fwd: + type: file + description: Forward reads FASTQ file + pattern: "*.{fastq.gz}" + - reads_rev: + type: file + description: Reverse reads FASTQ file + pattern: "*.{fastq.gz}" + - genome_fasta: + type: file + description: Reference genome assembly FASTA file + pattern: "*.{fa,fasta}" + - genome_bwamem2_index: + type: directory + description: bwa-mem2 index directory +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + - bam: + type: list + description: BAM and BAI file + pattern: "*.{bam,bam.bai}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" + - "@mkcmkc" diff --git a/modules/local/chord/environment.yml b/modules/local/chord/environment.yml new file mode 100644 index 00000000..b698f67e --- /dev/null +++ b/modules/local/chord/environment.yml @@ -0,0 +1,7 @@ +name: chord +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::r-chord=2.03 diff --git a/modules/local/chord/main.nf b/modules/local/chord/main.nf new file mode 100644 index 00000000..bd835166 --- /dev/null +++ b/modules/local/chord/main.nf @@ -0,0 +1,86 @@ +process CHORD { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/r-chord:2.03--r43hdfd78af_0' : + 'biocontainers/r-chord:2.03--r43hdfd78af_0' }" + + input: + tuple val(meta), path(smlv_vcf), path(sv_vcf) + val genome_ver + + output: + tuple val(meta), path('chord/'), emit: chord_dir + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + #!/usr/bin/env Rscript + library('CHORD') + + sampleName <- '${meta.sample_id}' + snvIndVcf <- '${smlv_vcf}' + svVcf <- '${sv_vcf}' + refGenomeVsn <- '${genome_ver}' + + sigOutTxt <- 'chord/${meta.sample_id}_chord_signatures.txt' + prdOutTxt <- 'chord/${meta.sample_id}_chord_prediction.txt' + + dir.create('chord/') + + if (refGenomeVsn == '37') { + library(BSgenome.Hsapiens.UCSC.hg19) + refGenome <- BSgenome.Hsapiens.UCSC.hg19 + } else if (refGenomeVsn == '38') { + library(BSgenome.Hsapiens.UCSC.hg38) + refGenome <- BSgenome.Hsapiens.UCSC.hg38 + } else { + stop('Unsupported ref genome version: ', refGenomeVsn, ' (should be 37 or 38)\\n') + } + + cat('[INFO] Performing chord signature extraction\\n') + signatures <- CHORD::extractSigsChord( + vcf.snv=snvIndVcf, + vcf.indel=snvIndVcf, + vcf.sv=svVcf, + sample.name=sampleName, + sv.caller='gridss', + vcf.filters=list(snv='PASS', indel='PASS', sv='PASS'), + ref.genome=refGenome + ) + + cat('[INFO] Performing chord HRD prediction\\n') + prediction <- chordPredict( + signatures, + hrd.cutoff=0.5 + ) + + cat('[INFO] Writing output file:', sigOutTxt,'\\n') + write.table(signatures, file=sigOutTxt, sep='\\t') + + cat('[INFO] Writing output file:', prdOutTxt,'\\n') + write.table(prediction, file=prdOutTxt, sep='\\t', quote=FALSE, row.names=FALSE) + + cat('[INFO] FINISHED CHORD signature extraction and HRD prediction\\n') + + sink('versions.yml') + writeLines('"${task.process}":') + writeLines(paste(' CHORD:', packageVersion('CHORD'))) + writeLines(paste(' mutSigExtractor:', packageVersion('mutSigExtractor'))) + sink() + """ + + stub: + """ + mkdir -p chord/ + touch chord/${meta.sample_id}_chord_signatures.txt + touch chord/${meta.sample_id}_chord_prediction.txt + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/chord/meta.yml b/modules/local/chord/meta.yml new file mode 100644 index 00000000..fd53d09f --- /dev/null +++ b/modules/local/chord/meta.yml @@ -0,0 +1,43 @@ +name: chord +description: Predict HR deficiency from mutation contexts +keywords: + - hrd + - mutational signatures +tools: + - chord: + description: Prediction of HR deficiency from mutation contexts. + homepage: https://github.com/UMCUGenetics/CHORD + documentation: https://github.com/UMCUGenetics/CHORD + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - smlv_vcf: + type: file + description: Tumor small variant VCF file + pattern: "*.{vcf.gz}" + - sv_vcf: + type: file + description: PURPLE SV VCF file + pattern: "*.{vcf.gz}" + - genome_ver: + type: string + description: Reference genome version +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - chord_dir: + type: directory + description: CHORD output directory + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/cobalt/environment.yml b/modules/local/cobalt/environment.yml new file mode 100644 index 00000000..727b5a89 --- /dev/null +++ b/modules/local/cobalt/environment.yml @@ -0,0 +1,7 @@ +name: cobalt +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-cobalt=1.16 diff --git a/modules/local/cobalt/main.nf b/modules/local/cobalt/main.nf new file mode 100644 index 00000000..b3142b70 --- /dev/null +++ b/modules/local/cobalt/main.nf @@ -0,0 +1,59 @@ +process COBALT { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-cobalt:1.16--hdfd78af_0' : + 'biocontainers/hmftools-cobalt:1.16--hdfd78af_0' }" + + input: + tuple val(meta), path(tumor_bam), path(normal_bam), path(tumor_bai), path(normal_bai) + path gc_profile + path diploid_regions + path target_region_normalisation + + output: + tuple val(meta), path('cobalt/'), emit: cobalt_dir + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def reference_arg = meta.containsKey('normal_id') ? "-reference ${meta.normal_id}" : '' + def reference_bam_arg = normal_bam ? "-reference_bam ${normal_bam}" : '' + + def diploid_regions_arg = diploid_regions ? "-tumor_only_diploid_bed ${diploid_regions}" : '' + def target_region_arg = target_region_normalisation ? "-target_region ${target_region_normalisation}" : '' + + """ + cobalt \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + ${args} \\ + -tumor ${meta.tumor_id} \\ + -tumor_bam ${tumor_bam} \\ + ${reference_arg} \\ + ${reference_bam_arg} \\ + -threads ${task.cpus} \\ + -gc_profile ${gc_profile} \\ + ${diploid_regions_arg} \\ + ${target_region_arg} \\ + -output_dir cobalt/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cobalt: \$(cobalt -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + mkdir -p cobalt/ + touch cobalt/placeholder + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/cobalt/meta.yml b/modules/local/cobalt/meta.yml new file mode 100644 index 00000000..61812410 --- /dev/null +++ b/modules/local/cobalt/meta.yml @@ -0,0 +1,61 @@ +name: cobalt +description: Count bam lines determines the read depth ratios of the supplied tumor and reference genomes +keywords: + - cobalt + - read depth ratios + - cnv +tools: + - cobalt: + description: Count bam lines determines the read depth ratios of the supplied tumor and reference genomes. + homepage: https://github.com/hartwigmedical/hmftools/tree/master/cobalt + documentation: https://github.com/hartwigmedical/hmftools/tree/master/cobalt + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + - tumor_bam: + type: file + description: Tumor BAM file + pattern: "*.{bam}" + - normal_bam: + type: file + description: Normal BAM file (optional) + pattern: "*.{bam}" + - tumor_bai: + type: file + description: Tumor BAI file + pattern: "*.{bai}" + - normal_bai: + type: file + description: Normal BAI file (optional) + pattern: "*.{bai}" + - gc_profile: + type: file + description: GC profile file + pattern: "*.{cnp}" + - diploid_regions: + type: file + description: Diploid regions file (optional) + pattern: "*.{bed.gz}" + - target_region_normalisation: + type: file + description: Normalisation file (optional) + pattern: "*.{tsv}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + - cobalt_dir: + type: directory + description: COBALT output directory + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/cuppa/environment.yml b/modules/local/cuppa/environment.yml new file mode 100644 index 00000000..eb974257 --- /dev/null +++ b/modules/local/cuppa/environment.yml @@ -0,0 +1,7 @@ +name: cuppa +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-cuppa=1.8.1 diff --git a/modules/local/cuppa/main.nf b/modules/local/cuppa/main.nf new file mode 100644 index 00000000..8a4971f6 --- /dev/null +++ b/modules/local/cuppa/main.nf @@ -0,0 +1,83 @@ +process CUPPA { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-cuppa:1.8.1--hdfd78af_0' : + 'biocontainers/hmftools-cuppa:1.8.1--hdfd78af_0' }" + + input: + tuple val(meta), path(isofox_dir), path(purple_dir), path(linx_dir), path(virusinterpreter_dir) + val genome_ver + path cuppa_resources, stageAs: 'cuppa_reference_data' + val classifier + + output: + tuple val(meta), path('cuppa/'), emit: cuppa_dir + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + # Symlink input files into a single directory + mkdir -p sample_data/ + if [[ ${classifier} == 'DNA' || ${classifier} == 'ALL' ]]; then + find -L ${purple_dir} ${linx_dir} ${virusinterpreter_dir} -maxdepth 1 -type f -exec ln -fs ../{} sample_data/ \\; + fi + + if [[ ${classifier} == 'RNA' ]]; then + find -L ${isofox_dir} -maxdepth 1 -type f -exec ln -fs ../{} sample_data/ \\; + elif [[ ${classifier} == 'ALL' ]]; then + # NOTE(SW): CUPPA requires that the RNA sample name matches the DNA sample name + for fp in \$(find -L ${isofox_dir} -maxdepth 1 -type f); do + fn_out=\$(sed 's/^${meta.sample_rna_id}/${meta.sample_id}/' <<< \${fp##*/}); + cp \${fp} sample_data/\${fn_out} + done; + # Rename identifier in the summary file + sed -i 's/^${meta.sample_rna_id}/${meta.sample_id}/g' sample_data/${meta.sample_id}.isf.summary.csv; + fi; + + mkdir -p cuppa/ + + cuppa \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + ${args} \\ + -sample ${meta.sample_id} \\ + -sample_data_dir sample_data/ \\ + -categories ${classifier} \\ + -ref_data_dir ${cuppa_resources} \\ + -ref_genome_version ${genome_ver} \\ + -create_pdf \\ + -output_dir cuppa/ + + if [[ ${classifier} == 'DNA' || ${classifier} == 'ALL' ]]; then + cuppa-chart \\ + -sample ${meta.sample_id} \\ + -sample_data cuppa/${meta.sample_id}.cup.data.csv \\ + -output_dir cuppa/; + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cuppa: \$(cuppa | sed -n '1s/^.* //p') + END_VERSIONS + """ + + stub: + """ + mkdir -p cuppa/ + touch cuppa/${meta.sample_id}.cup.data.csv + touch cuppa/${meta.sample_id}.cuppa.conclusion.txt + touch cuppa/${meta.sample_id}_cup_report.pdf + touch cuppa/${meta.sample_id}.cup.report.summary.png + touch cuppa/${meta.sample_id}.cup.report.features.png + touch cuppa/${meta.sample_id}.cuppa.chart.png + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/cuppa/meta.yml b/modules/local/cuppa/meta.yml new file mode 100644 index 00000000..e2000a73 --- /dev/null +++ b/modules/local/cuppa/meta.yml @@ -0,0 +1,54 @@ +name: cuppa_classifier +description: Predict tumor sample tissue of origin +keywords: + - cup + - tumor + - classification +tools: + - cuppa: + description: Prediction of tumor sample tissue of origin + homepage: https://github.com/hartwigmedical/hmftools/tree/master/cuppa + documentation: https://github.com/hartwigmedical/hmftools/tree/master/cuppa + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - isofox_dir: + type: directory + description: Isofox output directory (optional) + - purple_dir: + type: directory + description: PURPLE output directory (optional) + - linx_dir: + type: directory + description: LINX output directory (optional) + - virusinterpreter_dir: + type: directory + description: Virus Interpreter output directory (optional) + - genome_ver: + type: string + description: Reference genome version + - cuppa_resources: + type: directory + description: CUPPA resource directory + - classifer: + type: string + description: CUPPA classifier to use +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - cuppa_dir: + type: directory + description: CUPPA output directory + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/custom/extract_tarball/main.nf b/modules/local/custom/extract_tarball/main.nf new file mode 100644 index 00000000..e956b0ad --- /dev/null +++ b/modules/local/custom/extract_tarball/main.nf @@ -0,0 +1,31 @@ +process CUSTOM_EXTRACTTARBALL { + label 'process_single' + + conda "conda-forge::tar=1.34" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'quay.io/nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(tarball) + + output: + path "${meta.id}/", emit: extracted_dir + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + mkdir -p ${meta.id}/ + + tar ${args} -xzvf ${tarball} --strip-components 1 -C ${meta.id}/ + """ + + stub: + """ + mkdir -p ${meta.id}/ + """ +} diff --git a/modules/local/custom/lilac_extract_and_index_contig/main.nf b/modules/local/custom/lilac_extract_and_index_contig/main.nf new file mode 100644 index 00000000..f55ccb10 --- /dev/null +++ b/modules/local/custom/lilac_extract_and_index_contig/main.nf @@ -0,0 +1,50 @@ +process CUSTOM_EXTRACTCONTIG { + tag "${contig_name}" + label 'process_single' + + conda "bwa-mem2=2.2.1 samtools=1.19.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-4dde50190ae599f2bb2027cb2c8763ea00fb5084:4163e62e1daead7b7ea0228baece715bec295c22-0' : + 'biocontainers/mulled-v2-4dde50190ae599f2bb2027cb2c8763ea00fb5084:4163e62e1daead7b7ea0228baece715bec295c22-0' }" + + input: + val contig_name + path genome_fasta + path genome_fai + val run + + output: + path "*extracted.fa" , emit: contig + path "*extracted.fa.*", emit: bwamem2_index + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + + """ + samtools faidx \\ + ${args} \\ + -o ${contig_name}_extracted.fa \\ + ${genome_fasta} \\ + ${contig_name} + + bwa-mem2 index ${args2} ${contig_name}_extracted.fa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa-mem2: \$(bwa-mem2 version 2>/dev/null) + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${contig_name}_extracted.fa ${contig_name}_extracted.fa.amb + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/custom/lilac_realign_reads_lilac/main.nf b/modules/local/custom/lilac_realign_reads_lilac/main.nf new file mode 100644 index 00000000..33ddd63a --- /dev/null +++ b/modules/local/custom/lilac_realign_reads_lilac/main.nf @@ -0,0 +1,74 @@ +process CUSTOM_REALIGNREADS { + tag "${meta.id}" + label 'process_low' + + conda "bioconda::bwa-mem2=2.2.1 bioconda::samtools=1.19.2 bioconda::sambamba=1.0.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-4dde50190ae599f2bb2027cb2c8763ea00fb5084:4163e62e1daead7b7ea0228baece715bec295c22-0' : + 'biocontainers/mulled-v2-4dde50190ae599f2bb2027cb2c8763ea00fb5084:4163e62e1daead7b7ea0228baece715bec295c22-0' }" + + input: + tuple val(meta), path(bam), path(bai) + path reference + path reference_indices + + output: + tuple val(meta), path("*realigned.bam"), path("*realigned.bam.bai"), emit: bam + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def args4 = task.ext.args4 ?: '' + def args5 = task.ext.args5 ?: '' + + """ + sambamba sort ${args} -n ${bam} -o ${meta.sample_id}_sorted.bam + + samtools fastq ${args2} -@${task.threads} ${meta.sample_id}_sorted.bam \\ + -1 ${meta.sample_id}_R1.fastq.gz \\ + -2 ${meta.sample_id}_R2.fastq.gz \\ + -0 ${meta.sample_id}_other.fastq.gz \\ + -s ${meta.sample_id}_singleton.fastq.gz; + + bwa-mem2 mem \\ + ${args3} \\ + -Y \\ + -t ${task.cpus} \\ + ${reference} \\ + ${meta.sample_id}_R1.fastq.gz \\ + ${meta.sample_id}_R2.fastq.gz | \\ + \\ + sambamba view \\ + ${args4} \\ + --sam-input \\ + --format bam \\ + --compression-level 0 \\ + --nthreads ${task.cpus} \\ + /dev/stdin | \\ + \\ + sambamba sort \\ + ${args5} \\ + --nthreads ${task.cpus} \\ + --out ${bam.baseName}.realigned.bam \\ + /dev/stdin + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa-mem2: \$(bwa-mem2 version 2>/dev/null) + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + sambamba: \$(sambamba --version 2>&1 | egrep '^sambamba' | head -n 1 | awk '{ print \$NF }') + END_VERSIONS + """ + + stub: + """ + touch ${bam.baseName}.realigned.bam ${bam.baseName}.realigned.bam.bai + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/custom/lilac_slice/main.nf b/modules/local/custom/lilac_slice/main.nf new file mode 100644 index 00000000..0a86f06a --- /dev/null +++ b/modules/local/custom/lilac_slice/main.nf @@ -0,0 +1,47 @@ +process CUSTOM_SLICE { + tag "${meta.id}" + label 'process_single' + + conda "samtools=1.19.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" + + input: + tuple val(meta), path(bam), path(bai) + path bed + + output: + tuple val(meta), path("*sliced.bam"), path("*sliced.bam.bai"), emit: bam + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + samtools view \\ + ${args} \\ + --regions-file ${bed} \\ + -@${task.cpus} \\ + -Obam \\ + ${bam} | \\ + samtools sort -T tmp -o ${bam.baseName}.sliced.bam + + samtools index ${bam.baseName}.sliced.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${bam.baseName}.sliced.bam ${bam.baseName}.sliced.bam.bai + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/custom/write_reference_data/main.nf b/modules/local/custom/write_reference_data/main.nf new file mode 100644 index 00000000..04a1151e --- /dev/null +++ b/modules/local/custom/write_reference_data/main.nf @@ -0,0 +1,22 @@ +process WRITE_REFERENCE_DATA { + tag "${fp.name}" + label 'process_single' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'quay.io/nf-core/ubuntu:20.04' }" + + input: + path fp + val workflow_version + + output: + path fp, includeInputs: true + + when: + task.ext.when == null || task.ext.when + + script: + """ + """ +} diff --git a/modules/local/fastp/environment.yml b/modules/local/fastp/environment.yml new file mode 100644 index 00000000..70389e66 --- /dev/null +++ b/modules/local/fastp/environment.yml @@ -0,0 +1,7 @@ +name: fastp +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fastp=0.23.4 diff --git a/modules/local/fastp/main.nf b/modules/local/fastp/main.nf new file mode 100644 index 00000000..0690204c --- /dev/null +++ b/modules/local/fastp/main.nf @@ -0,0 +1,51 @@ +process FASTP { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--hadf994f_2' : + 'biocontainers/fastp:0.23.4--hadf994f_2' }" + + input: + tuple val(meta), path(reads_fwd), path(reads_rev) + val max_fastq_records + + output: + tuple val(meta), path('*_R1.fastp.fastq.gz'), path('*_R2.fastp.fastq.gz'), emit: fastq + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + fastp \\ + ${args} \\ + --in1 ${reads_fwd} \\ + --in2 ${reads_rev} \\ + --disable_quality_filtering \\ + --disable_length_filtering \\ + --disable_adapter_trimming \\ + --disable_trim_poly_g \\ + --split_by_lines ${4 * max_fastq_records} \\ + --thread ${task.cpus} \\ + --out1 ${meta.sample_id}_${meta.library_id}_${meta.lane}_R1.fastp.fastq.gz \\ + --out2 ${meta.sample_id}_${meta.library_id}_${meta.lane}_R2.fastp.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + touch 00{1..4}.${meta.sample_id}_${meta.library_id}_${meta.lane}_R1.fastp.fastq.gz + touch 00{1..4}.${meta.sample_id}_${meta.library_id}_${meta.lane}_R2.fastp.fastq.gz + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/fastp/meta.yml b/modules/local/fastp/meta.yml new file mode 100644 index 00000000..23aa5fb6 --- /dev/null +++ b/modules/local/fastp/meta.yml @@ -0,0 +1,47 @@ +name: fastp +description: An ultra-fast all-in-one FASTQ preprocessor +keywords: + - fastp + - fastq + - processing + - quality control +tools: + - fastp: + description: An ultra-fast all-in-one FASTQ preprocessor + homepage: https://github.com/OpenGene/fastp + documentation: https://github.com/OpenGene/fastp + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + - reads_fwd: + type: file + description: Forward reads FASTQ file + pattern: "*.{fastq.gz}" + - reads_rev: + type: file + description: Reverse reads FASTQ file + pattern: "*.{fastq.gz}" + - max_fastq_records: + type: integer + description: Maximum number of reads per file +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + - fastq: + type: list + description: Forward and reverse FASTQ files + pattern: "*.{fastq.gz}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" + - "@mkcmkc" diff --git a/modules/local/gridss/index/environment.yml b/modules/local/gridss/index/environment.yml new file mode 100644 index 00000000..ed9cb011 --- /dev/null +++ b/modules/local/gridss/index/environment.yml @@ -0,0 +1,7 @@ +name: gridss_index +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gridss=2.13.2=h50ea8bc_3 diff --git a/modules/local/gridss/index/main.nf b/modules/local/gridss/index/main.nf new file mode 100644 index 00000000..d947daa6 --- /dev/null +++ b/modules/local/gridss/index/main.nf @@ -0,0 +1,67 @@ +process GRIDSS_INDEX { + tag "${genome_fasta.name}" + label 'process_single' + label 'process_medium_memory' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gridss:2.13.2--h50ea8bc_3' : + 'biocontainers/gridss:2.13.2--h50ea8bc_3' }" + + input: + path genome_fasta + path genome_fai + path genome_dict + path genome_bwa_index + + output: + path 'gridss_index/', emit: index + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + # Symlink BWA indices next to assembly FASTA + ln -s \$(find -L ${genome_bwa_index} -type f) ./ + + # Create indexes + PrepareReference \\ + ${args} \\ + REFERENCE_SEQUENCE=${genome_fasta} \\ + CREATE_SEQUENCE_DICTIONARY='false' \\ + CREATE_BWA_INDEX_IMAGE='true' \\ + CREATE_GRIDSS_REFERENCE_CACHE='true' + + # Move under single directory for output + mkdir -p gridss_index/ + mv ${genome_fasta.name}.img gridss_index/ + mv ${genome_fasta.name}.gridsscache gridss_index/ + + # Symlink BWA index files into output directory + ln -s ../${genome_fasta.name}.{amb,ann,bwt,pac,sa} gridss_index/ + + # Also include the ALT file if present + if [[ -e ${genome_fasta.name}.alt || -L ${genome_fasta.name}.alt ]]; then + ln -s ../${genome_fasta.name}.alt gridss_index/; + fi; + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gridss: \$(CallVariants --version 2>&1 | sed 's/-gridss\$//') + END_VERSIONS + """ + + stub: + """ + mkdir -p gridss_index/ + touch gridss_index/${genome_fasta.name}.{sa,pac,bwt,ann,amb} + touch gridss_index/${genome_fasta.name}.img + touch gridss_index/${genome_fasta.name}.gridsscache + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/gridss/index/meta.yml b/modules/local/gridss/index/meta.yml new file mode 100644 index 00000000..9f0ee56f --- /dev/null +++ b/modules/local/gridss/index/meta.yml @@ -0,0 +1,42 @@ +name: gridss_index +description: Create GRIDSS indices +keywords: + - index + - sv +tools: + - gridss: + description: GRIDSS is a module software suite containing tools useful for the detection of genomic rearrangements. + homepage: https://github.com/PapenfussLab/gridss + documentation: https://github.com/PapenfussLab/gridss + licence: ["GPL >=3"] +input: + - genome_fasta: + type: file + description: Reference genome assembly FASTA file + pattern: "*.{fa,fasta}" + - genome_fai: + type: file + description: Reference genome assembly fai file + pattern: "*.{fai}" + - genome_dict: + type: file + description: Reference genome assembly dict file (optional) + pattern: "*.{dict}" + - genome_bwa_index: + type: directory + description: Directory containing reference genome assembly BWA index (optional) +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - index: + type: directory + description: Directory containing GRIDSS index + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/gripss/germline/environment.yml b/modules/local/gripss/germline/environment.yml new file mode 100644 index 00000000..c2e0d23f --- /dev/null +++ b/modules/local/gripss/germline/environment.yml @@ -0,0 +1,7 @@ +name: gripss_germline +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-gripss=2.4 diff --git a/modules/local/gripss/germline/main.nf b/modules/local/gripss/germline/main.nf new file mode 100644 index 00000000..8f28b7a5 --- /dev/null +++ b/modules/local/gripss/germline/main.nf @@ -0,0 +1,63 @@ +process GRIPSS_GERMLINE { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-gripss:2.4--hdfd78af_0' : + 'biocontainers/hmftools-gripss:2.4--hdfd78af_0' }" + + input: + tuple val(meta), path(gridss_vcf) + path genome_fasta + val genome_ver + path genome_fai + path pon_breakends + path pon_breakpoints + path known_fusions + path repeatmasker_annotations + + output: + tuple val(meta), path('*.filtered.germline.vcf.gz'), path('*.filtered.germline.vcf.gz.tbi'), emit: vcf + tuple val(meta), path('*gripss.germline.vcf.gz'), path('*gripss.germline.vcf.gz.tbi') , emit: vcf_unfiltered + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + gripss \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + ${args} \\ + -sample ${meta.normal_id} \\ + -reference ${meta.tumor_id} \\ + -vcf ${gridss_vcf} \\ + -germline \\ + -ref_genome ${genome_fasta} \\ + -ref_genome_version ${genome_ver} \\ + -pon_sgl_file ${pon_breakends} \\ + -pon_sv_file ${pon_breakpoints} \\ + -known_hotspot_file ${known_fusions} \\ + -repeat_mask_file ${repeatmasker_annotations} \\ + -output_id germline \\ + -output_dir ./ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gripss: \$(gripss -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + touch ${meta.normal_id}.gripss.filtered.germline.vcf.gz + touch ${meta.normal_id}.gripss.filtered.germline.vcf.gz.tbi + touch ${meta.normal_id}.gripss.germline.vcf.gz + touch ${meta.normal_id}.gripss.germline.vcf.gz.tbi + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/gripss/germline/meta.yml b/modules/local/gripss/germline/meta.yml new file mode 100644 index 00000000..03ae9328 --- /dev/null +++ b/modules/local/gripss/germline/meta.yml @@ -0,0 +1,68 @@ +name: gripss_germline +description: Filter and process GRIDSS germline structural variants +keywords: + - sv + - filtering + - germline +tools: + - gripss: + description: Apply filtering and post-processing to GRIDSS structural variants. + homepage: https://github.com/hartwigmedical/hmftools/tree/master/gripss + documentation: https://github.com/hartwigmedical/hmftools/tree/master/gripss + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id', normal_id: 'normal_name'] + - gridss_vcf: + type: file + description: VCF file + pattern: "*.{vcf.gz}" + - genome_fasta: + type: file + description: Reference genome assembly FASTA file + pattern: "*.{fa,fasta}" + - genome_ver: + type: string + description: Reference genome version + - genome_fai: + type: file + description: Reference genome assembly fai file + pattern: "*.{fai}" + - pon_breakends: + type: file + description: GRIDSS breakend PON file + pattern: "*.{bed.gz}" + - pon_breakpoints: + type: file + description: GRIDSS breakpoint PON file + pattern: "*.{bedpe.gz}" + - known_fusions: + type: file + description: HMF Known Fusions file + pattern: "*.{bedpe}" + - repeatmasker_annotations: + type: file + description: RepeatMasker annotations file +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id', normal_id: 'normal_name'] + - vcf: + type: list + description: Filtered VCF file and index file + pattern: "*.{vcf.gz,vcf.gz.tbi}" + - vcf_unfiltered: + type: list + description: Unfiltered VCF file and index file + pattern: "*.{vcf.gz,vcf.gz.tbi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/gripss/somatic/environment.yml b/modules/local/gripss/somatic/environment.yml new file mode 100644 index 00000000..5ae83f45 --- /dev/null +++ b/modules/local/gripss/somatic/environment.yml @@ -0,0 +1,7 @@ +name: gripss_somatic +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-gripss=2.4 diff --git a/modules/local/gripss/somatic/main.nf b/modules/local/gripss/somatic/main.nf new file mode 100644 index 00000000..24815d4d --- /dev/null +++ b/modules/local/gripss/somatic/main.nf @@ -0,0 +1,68 @@ +process GRIPSS_SOMATIC { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-gripss:2.4--hdfd78af_0' : + 'biocontainers/hmftools-gripss:2.4--hdfd78af_0' }" + + input: + tuple val(meta), path(gridss_vcf) + path genome_fasta + val genome_ver + path genome_fai + path pon_breakends + path pon_breakpoints + path known_fusions + path repeatmasker_annotations + path target_region_bed + + output: + tuple val(meta), path('*.gripss.filtered{,.somatic}.vcf.gz'), path('*.gripss.filtered{,.somatic}.vcf.gz.tbi'), emit: vcf + tuple val(meta), path('*.gripss{,.somatic}.vcf.gz'), path('*.gripss{,.somatic}.vcf.gz.tbi') , emit: vcf_unfiltered + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def reference_arg = meta.containsKey('normal_id') ? "-reference ${meta.normal_id}" : '' + def target_regions_bed_arg = target_region_bed ? "-target_regions_bed ${target_region_bed}" : '' + def output_id_arg = meta.containsKey('normal_id') ? '-output_id somatic' : '' + + """ + gripss \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + ${args} \\ + -sample ${meta.tumor_id} \\ + ${reference_arg} \\ + -vcf ${gridss_vcf} \\ + -ref_genome ${genome_fasta} \\ + -ref_genome_version ${genome_ver} \\ + -pon_sgl_file ${pon_breakends} \\ + -pon_sv_file ${pon_breakpoints} \\ + -known_hotspot_file ${known_fusions} \\ + -repeat_mask_file ${repeatmasker_annotations} \\ + ${target_regions_bed_arg} \\ + ${output_id_arg} \\ + -output_dir ./ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gripss: \$(gripss -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + touch ${meta.tumor_id}.gripss.filtered.somatic.vcf.gz + touch ${meta.tumor_id}.gripss.filtered.somatic.vcf.gz.tbi + touch ${meta.tumor_id}.gripss.somatic.vcf.gz + touch ${meta.tumor_id}.gripss.somatic.vcf.gz.tbi + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/gripss/somatic/meta.yml b/modules/local/gripss/somatic/meta.yml new file mode 100644 index 00000000..878ef0e8 --- /dev/null +++ b/modules/local/gripss/somatic/meta.yml @@ -0,0 +1,72 @@ +name: gripss_somatic +description: Filter and process GRIDSS somatic structural variants +keywords: + - sv + - filtering + - somatic +tools: + - gripss: + description: Apply filtering and post-processing to GRIDSS structural variants. + homepage: https://github.com/hartwigmedical/hmftools/tree/master/gripss + documentation: https://github.com/hartwigmedical/hmftools/tree/master/gripss + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + - gridss_vcf: + type: file + description: VCF file + pattern: "*.{vcf.gz}" + - genome_fasta: + type: file + description: Reference genome assembly FASTA file + pattern: "*.{fa,fasta}" + - genome_ver: + type: string + description: Reference genome version + - genome_fai: + type: file + description: Reference genome assembly fai file + pattern: "*.{fai}" + - pon_breakends: + type: file + description: GRIDSS breakend PON file + pattern: "*.{bed.gz}" + - pon_breakpoints: + type: file + description: GRIDSS breakpoint PON file + pattern: "*.{bedpe.gz}" + - known_fusions: + type: file + description: HMF Known Fusions file + pattern: "*.{bedpe}" + - repeatmasker_annotations: + type: file + description: RepeatMasker annotations file + - target_region_bed: + type: file + description: Target region BED file (optional) + pattern: "*.{bed}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + - vcf: + type: list + description: Hard filtered VCF file and index file + pattern: "*.{vcf.gz,vcf.gz.tbi}" + - vcf_unfiltered: + type: list + description: Unfiltered VCF file and index file + pattern: "*.{vcf.gz,vcf.gz.tbi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/isofox/environment.yml b/modules/local/isofox/environment.yml new file mode 100644 index 00000000..d4251c57 --- /dev/null +++ b/modules/local/isofox/environment.yml @@ -0,0 +1,7 @@ +name: isofox +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-isofox=1.7.1 diff --git a/modules/local/isofox/main.nf b/modules/local/isofox/main.nf new file mode 100644 index 00000000..5e298ced --- /dev/null +++ b/modules/local/isofox/main.nf @@ -0,0 +1,74 @@ +process ISOFOX { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-isofox:1.7.1--hdfd78af_0' : + 'biocontainers/hmftools-isofox:1.7.1--hdfd78af_0' }" + + input: + tuple val(meta), path(bam), path(bai) + val functions + val read_length + path genome_fasta + val genome_ver + path genome_fai + path ensembl_data_resources + path exp_counts + path exp_gc_ratios + path gene_ids + path tpm_norm + + output: + tuple val(meta), path('isofox/'), emit: isofox_dir + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def functions_arg = functions ? "-functions \'${functions}\'" : '' + + def exp_counts_arg = exp_counts ? "-exp_counts_file ${exp_counts}" : '' + def exp_gc_ratios_arg = exp_gc_ratios ? "-exp_gc_ratios_file ${exp_gc_ratios}" : '' + + def gene_ids_arg = gene_ids ? "-gene_id_file ${gene_ids}" : '' + def tpm_norm_arg = tpm_norm ? "-panel_tpm_norm_file ${tpm_norm}" : '' + + """ + mkdir -p isofox/ + + isofox \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + ${args} \\ + -sample ${meta.sample_id} \\ + -bam_file ${bam} \\ + ${functions_arg} \\ + -read_length ${read_length} \\ + -ref_genome ${genome_fasta} \\ + -ref_genome_version ${genome_ver} \\ + -ensembl_data_dir ${ensembl_data_resources} \\ + ${exp_counts_arg} \\ + ${exp_gc_ratios_arg} \\ + ${gene_ids_arg} \\ + ${tpm_norm_arg} \\ + -threads ${task.cpus} \\ + -output_dir isofox/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + isofox: \$(isofox -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + mkdir -p isofox/ + touch isofox/placeholder + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/isofox/meta.yml b/modules/local/isofox/meta.yml new file mode 100644 index 00000000..2e0d1930 --- /dev/null +++ b/modules/local/isofox/meta.yml @@ -0,0 +1,76 @@ +name: isofox +description: Characterise and count gene, transcript features +keywords: + - rna + - rnaseq +tools: + - isofox: + description: Characterises and counts gene, transcript features + homepage: https://github.com/hartwigmedical/hmftools/tree/master/isofox + documentation: https://github.com/hartwigmedical/hmftools/tree/master/isofox + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - bam: + type: file + description: BAM file + pattern: "*.{bam}" + - bai: + type: file + description: BAI file + pattern: "*.{bai}" + - functions: + type: string + description: Isofox functions to run + - read_length: + type: integer + description: Read length + - genome_fasta: + type: file + description: Reference genome assembly FASTA file + pattern: "*.{fa,fasta}" + - genome_ver: + type: string + description: Reference genome version + - genome_fai: + type: file + description: Reference genome assembly fai file + pattern: "*.{fai}" + - ensembl_data_resources: + type: directory + description: HMF ensembl data resources directory + - exp_counts: + type: file + description: Isofox expected counts file (optional) + pattern: "*.{csv}" + - exp_gc_ratios: + type: file + description: Isofox expected GC ratio counts file (optional) + pattern: "*.{csv}" + - gene_ids: + type: file + description: Isofox gene ID file (optional) + pattern: "*.{csv}" + - tmp_norm: + type: file + description: Isofox TPM noramlisation file (optional) + pattern: "*.{csv}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - isofox_dir: + type: directory + description: Isofox output directory + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/lilac/environment.yml b/modules/local/lilac/environment.yml new file mode 100644 index 00000000..8832e639 --- /dev/null +++ b/modules/local/lilac/environment.yml @@ -0,0 +1,7 @@ +name: lilac +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-lilac=1.6 diff --git a/modules/local/lilac/main.nf b/modules/local/lilac/main.nf new file mode 100644 index 00000000..b4517b31 --- /dev/null +++ b/modules/local/lilac/main.nf @@ -0,0 +1,72 @@ +process LILAC { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-lilac:1.6--hdfd78af_0' : + 'biocontainers/hmftools-lilac:1.6--hdfd78af_0' }" + + input: + tuple val(meta), path(normal_dna_bam), path(normal_dna_bai), path(tumor_dna_bam), path(tumor_dna_bai), path(tumor_rna_bam), path(tumor_rna_bai), path(purple_dir) + path genome_fasta + val genome_ver + path lilac_resources, stageAs: 'lilac_resources' + + output: + tuple val(meta), path('lilac/'), emit: lilac_dir + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def sample_name = getSampleName(meta, tumor_dna_bam, normal_dna_bam) + + def normal_bam_arg = normal_dna_bam ? "-reference_bam ${normal_dna_bam}" : '' + def tumor_dna_bam_arg = tumor_dna_bam ? "-tumor_bam ${tumor_dna_bam}" : '' + def tumor_rna_bam_arg = tumor_rna_bam ? "-rna_bam ${tumor_rna_bam}" : '' + + def purple_dir_arg = purple_dir ? "-purple_dir ${purple_dir}" : '' + + """ + lilac \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + ${args} \\ + -sample ${sample_name} \\ + ${normal_bam_arg} \\ + ${tumor_dna_bam_arg} \\ + ${tumor_rna_bam_arg} \\ + ${purple_dir_arg} \\ + -ref_genome ${genome_fasta} \\ + -ref_genome_version ${genome_ver} \\ + -resource_dir ${lilac_resources} \\ + -threads ${task.cpus} \\ + -output_dir lilac/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + lilac: \$(lilac -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + mkdir -p lilac/ + touch lilac/placeholder + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} + +def getSampleName(meta, tumor_bam, normal_bam) { + if (tumor_bam) { + return meta.tumor_id + } else if (normal_bam) { + return meta.normal_id + } else { + Sys.exit(1) + } +} diff --git a/modules/local/lilac/meta.yml b/modules/local/lilac/meta.yml new file mode 100644 index 00000000..b9b79617 --- /dev/null +++ b/modules/local/lilac/meta.yml @@ -0,0 +1,70 @@ +name: lilac +description: Type HLA alleles and call somatic structural variants +keywords: + - hla + - typing + - sv +tools: + - lilac: + description: Performs HLA typing and calls somatic structural variants + homepage: https://github.com/hartwigmedical/hmftools/tree/master/lilac + documentation: https://github.com/hartwigmedical/hmftools/tree/master/lilac + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + - normal_dna_bam: + type: file + description: Normal DNA BAM file (optional) + pattern: "*.{bam}" + - normal_dna_bai: + type: file + description: Normal DNA BAI file (optional) + pattern: "*.{bai}" + - tumor_dna_bam: + type: file + description: Normal DNA BAM file (optional) + pattern: "*.{bam}" + - tumor_dna_bai: + type: file + description: Tumor DNA BAI file (optional) + pattern: "*.{bai}" + - tumor_rna_bam: + type: file + description: Normal RNA BAM file (optional) + pattern: "*.{bam}" + - tumor_rna_bai: + type: file + description: Tumor RNA BAI file (optional) + pattern: "*.{bai}" + - purple_dir: + type: directory + description: PURPLE output directory (optional) + - genome_fasta: + type: file + description: Reference genome assembly FASTA file + pattern: "*.{fa,fasta}" + - genome_ver: + type: string + description: Reference genome version + - lilac_resources: + type: directory + description: LILAC resources directory +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + - lilac_dir: + type: file + description: LILAC output directory + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/linx/germline/environment.yml b/modules/local/linx/germline/environment.yml new file mode 100644 index 00000000..2b587a0d --- /dev/null +++ b/modules/local/linx/germline/environment.yml @@ -0,0 +1,7 @@ +name: linx_germline +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-linx=1.25 diff --git a/modules/local/linx/germline/main.nf b/modules/local/linx/germline/main.nf new file mode 100644 index 00000000..c28491e3 --- /dev/null +++ b/modules/local/linx/germline/main.nf @@ -0,0 +1,51 @@ +process LINX_GERMLINE { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-linx:1.25--hdfd78af_0' : + 'biocontainers/hmftools-linx:1.25--hdfd78af_0' }" + + input: + tuple val(meta), path(sv_vcf) + val genome_ver + path ensembl_data_resources + path driver_gene_panel + + output: + tuple val(meta), path('linx_germline/'), emit: annotation_dir + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + linx \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + ${args} \\ + -sample ${meta.sample_id} \\ + -sv_vcf ${sv_vcf} \\ + -germline \\ + -ref_genome_version ${genome_ver} \\ + -ensembl_data_dir ${ensembl_data_resources} \\ + -driver_gene_panel ${driver_gene_panel} \\ + -output_dir linx_germline/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + linx: \$(linx -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + mkdir linx_germline/ + touch linx_germline/placeholder + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/linx/germline/meta.yml b/modules/local/linx/germline/meta.yml new file mode 100644 index 00000000..50c7b694 --- /dev/null +++ b/modules/local/linx/germline/meta.yml @@ -0,0 +1,47 @@ +name: linx_germline +description: Generate LINX germline annotation data +keywords: + - germline + - sv + - annotation +tools: + - linx: + description: An annotation, interpretation and visualisation tool for structural variants. + homepage: https://github.com/hartwigmedical/hmftools/tree/master/linx + documentation: https://github.com/hartwigmedical/hmftools/tree/master/linx + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - sv_vcf: + type: directory + description: PURPLE germline SV VCF file + pattern: "*.{vcf.gz}" + - genome_ver: + type: string + description: Reference genome version + - ensembl_data_resources: + type: directory + description: HMF ensembl data resources directory + - driver_gene_panel: + type: file + description: Driver gene panel file + pattern: "*.{csv}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - annotation_dir: + type: directory + description: LINX annotation output directory + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/linx/somatic/environment.yml b/modules/local/linx/somatic/environment.yml new file mode 100644 index 00000000..2c7883f5 --- /dev/null +++ b/modules/local/linx/somatic/environment.yml @@ -0,0 +1,7 @@ +name: linx_somatic +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-linx=1.25 diff --git a/modules/local/linx/somatic/main.nf b/modules/local/linx/somatic/main.nf new file mode 100644 index 00000000..310fbd9c --- /dev/null +++ b/modules/local/linx/somatic/main.nf @@ -0,0 +1,54 @@ +process LINX_SOMATIC { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-linx:1.25--hdfd78af_0' : + 'biocontainers/hmftools-linx:1.25--hdfd78af_0' }" + + input: + tuple val(meta), path(purple_dir) + val genome_ver + path ensembl_data_resources + path known_fusion_data + path driver_gene_panel + + output: + tuple val(meta), path('linx_somatic/'), emit: annotation_dir + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + linx \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + ${args} \\ + -sample ${meta.sample_id} \\ + -sv_vcf ${purple_dir}/${meta.sample_id}.purple.sv.vcf.gz \\ + -purple_dir ${purple_dir} \\ + -ref_genome_version ${genome_ver} \\ + -ensembl_data_dir ${ensembl_data_resources} \\ + -known_fusion_file ${known_fusion_data} \\ + -driver_gene_panel ${driver_gene_panel} \\ + -write_vis_data \\ + -output_dir linx_somatic/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + linx: \$(linx -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + mkdir linx_somatic/ + touch linx_somatic/placeholder + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/linx/somatic/meta.yml b/modules/local/linx/somatic/meta.yml new file mode 100644 index 00000000..7e8694dd --- /dev/null +++ b/modules/local/linx/somatic/meta.yml @@ -0,0 +1,50 @@ +name: linx_somatic +description: Generate LINX somatic annotation data +keywords: + - somatic + - sv + - annotation +tools: + - linx: + description: An annotation, interpretation and visualisation tool for structural variants. + homepage: https://github.com/hartwigmedical/hmftools/tree/master/linx + documentation: https://github.com/hartwigmedical/hmftools/tree/master/linx + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - purple_dir: + type: directory + description: PURPLE output directory + - genome_ver: + type: string + description: Reference genome version + - ensembl_data_resources: + type: directory + description: HMF ensembl data resources directory + - known_fusion_data: + type: file + description: Known fusions data file + pattern: "*.{bedpe}" + - driver_gene_panel: + type: file + description: Driver Gene Panel file + pattern: "*.{csv}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - annotation_dir: + type: directory + description: LINX annotation output directory + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/linx/visualiser/environment.yml b/modules/local/linx/visualiser/environment.yml new file mode 100644 index 00000000..a0fa994a --- /dev/null +++ b/modules/local/linx/visualiser/environment.yml @@ -0,0 +1,7 @@ +name: linx_visualiser +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-linx=1.25 diff --git a/modules/local/linx/visualiser/main.nf b/modules/local/linx/visualiser/main.nf new file mode 100644 index 00000000..951ea1dc --- /dev/null +++ b/modules/local/linx/visualiser/main.nf @@ -0,0 +1,100 @@ +process LINX_VISUALISER { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-linx:1.25--hdfd78af_0' : + 'biocontainers/hmftools-linx:1.25--hdfd78af_0' }" + + input: + tuple val(meta), path(linx_annotation_dir) + val genome_ver + path ensembl_data_resources + + output: + tuple val(meta), path('plots/'), emit: plots + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + + """ + # NOTE(SW): the output plot directories are always required for ORANGE, which is straightfoward to handle with POSIX + # fs but more involved with FusionFS since it will not write empty directories to S3. A placeholder file can't be + # used in the plot directory to force FusionFS to create the directory as ORANGE will treat the placeholder as a PNG + # and fail. Optional outputs are possible but requires further channel logic and output to detect when complete. + # Instead I place the two plot output directories under a parent directory, only operating on that to allow use of a + # placeholder and support empty outputs when using FusionFS. Handling missing/non-existent directories are deferred + # to downstream processes, bypassing the need to implement further channel operations. + + mkdir -p plots/ + + # NOTE(SW): LINX v1.24.1 require trailing slashes for the -plot_out and -data_out arguments since no filesystem + # separator is used when constructing fusion plot output filepaths. + + # https://github.com/hartwigmedical/hmftools/blob/linx-v1.24.1/linx/src/main/java/com/hartwig/hmftools/linx/visualiser/circos/ChromosomeRangeExecution.java#L22-L29 + # https://github.com/hartwigmedical/hmftools/blob/linx-v1.24.1/linx/src/main/java/com/hartwig/hmftools/linx/visualiser/circos/FusionExecution.java#L18-L23 + + # Generate all chromosome and cluster plots by default + + linx \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + com.hartwig.hmftools.linx.visualiser.SvVisualiser \\ + ${args} \\ + -sample ${meta.sample_id} \\ + -vis_file_dir ${linx_annotation_dir} \\ + -ref_genome_version ${genome_ver} \\ + -ensembl_data_dir ${ensembl_data_resources} \\ + -circos \$(which circos) \\ + -threads ${task.cpus} \\ + -plot_out plots/all/ \\ + -data_out data/all/ + + # Rerun LINX to render only reportable cluster plots in a separate directory. While this is regenerating existing + # cluster plots, the number of reportable plots is generally very small and I prefer to rely on the internal LINX + # logic to determine whether a cluster is reportable rather than attempting to infer manually to copy out target + # plot files. + + # The ORANGE report receives only reportable clusters while the gpgr LINX report receives chromosome and all cluster + # plots. + + # https://github.com/hartwigmedical/hmftools/blob/linx-v1.24.1/linx/src/main/java/com/hartwig/hmftools/linx/visualiser/SampleData.java#L220-L236 + + linx \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + com.hartwig.hmftools.linx.visualiser.SvVisualiser \\ + ${args2} \\ + -sample ${meta.sample_id} \\ + -vis_file_dir ${linx_annotation_dir} \\ + -ref_genome_version ${genome_ver} \\ + -ensembl_data_dir ${ensembl_data_resources} \\ + -circos \$(which circos) \\ + -plot_reportable \\ + -threads ${task.cpus} \\ + -plot_out plots/reportable/ \\ + -data_out data/reportable/ + + # Create placeholders to force FusionFS to create parent plot directory on S3 + if [[ \$(ls plots/ | wc -l) -eq 0 ]]; then + touch plots/.keep; + fi; + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + linx: \$(linx -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + mkdir -p plots/{all,reportable}/ + touch plots/{all,reportable}/placeholder + + echo -e '${task.process}:\n stub: noversions\n' > versions.yml + """ +} diff --git a/modules/local/linx/visualiser/meta.yml b/modules/local/linx/visualiser/meta.yml new file mode 100644 index 00000000..d48c1fac --- /dev/null +++ b/modules/local/linx/visualiser/meta.yml @@ -0,0 +1,43 @@ +name: linx_visualiser +description: Visualise LINX somatic annotations +keywords: + - somatic + - sv + - annotation + - visualisation +tools: + - linx: + description: An annotation, interpretation and visualisation tool for structural variants. + homepage: https://github.com/hartwigmedical/hmftools/tree/master/linx + documentation: https://github.com/hartwigmedical/hmftools/tree/master/linx + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - linx_annotation_dir: + type: directory + description: LINX somatic annotation output directory + - genome_ver: + type: string + description: Reference genome version + - ensembl_data_resources: + type: directory + description: HMF ensembl data resources directory +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - plots: + type: directory + description: Directory containing output plots + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/linxreport/environment.yml b/modules/local/linxreport/environment.yml new file mode 100644 index 00000000..33d28665 --- /dev/null +++ b/modules/local/linxreport/environment.yml @@ -0,0 +1,7 @@ +name: linxreport +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::r-linxreport=1.0.0 diff --git a/modules/local/linxreport/main.nf b/modules/local/linxreport/main.nf new file mode 100644 index 00000000..f3d3a55e --- /dev/null +++ b/modules/local/linxreport/main.nf @@ -0,0 +1,51 @@ +process LINXREPORT { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/r-linxreport:1.0.0--r43hdfd78af_0' : + 'biocontainers/r-linxreport:1.0.0--r43hdfd78af_0' }" + + input: + tuple val(meta), path(linx_annotation_dir), path(linx_visualiser_dir) + + output: + tuple val(meta), path('*_linx.html'), emit: html + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def plot_dir = linx_visualiser_dir.resolve('all/').toUriString() + + """ + # Set input plot directory and create it doesn't exist. See the LINX visualiser module for further info. + if [[ ! -e ${plot_dir} ]]; then + mkdir -p ${plot_dir}; + fi; + + linxreport.R \\ + ${args} \\ + --sample ${meta.sample_id} \\ + --plot ${plot_dir} \\ + --table ${linx_annotation_dir} \\ + --out ${meta.sample_id}_linx.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + R: \$(R --version | head -n1 | sed 's/^R version \\([0-9.]\\+\\).\\+/\\1/') + linxreport: \$(linxreport.R --version) + END_VERSIONS + """ + + stub: + """ + touch ${meta.sample_id}_linx.html + + echo -e '${task.process}:\n stub: noversions\n' > versions.yml + """ +} diff --git a/modules/local/linxreport/meta.yml b/modules/local/linxreport/meta.yml new file mode 100644 index 00000000..7bf2e0ab --- /dev/null +++ b/modules/local/linxreport/meta.yml @@ -0,0 +1,41 @@ +name: linxreport +description: LINX result reporter +keywords: + - report + - linx + - sv + - cnv +tools: + - linxreport: + description: LINX result reporter + homepage: https://github.com/umccr/linxreport + documentation: https://github.com/umccr/linxreport + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - linx_annotation_dir: + type: directory + description: Somatic LINX annotation output directory + - linx_visualiser_dir: + type: directory + description: Somatic LINX visualiser output directory +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - html: + type: file + description: gpgr LINX report file + pattern: "*.{html}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/markdups/environment.yml b/modules/local/markdups/environment.yml new file mode 100644 index 00000000..9fa2cb52 --- /dev/null +++ b/modules/local/markdups/environment.yml @@ -0,0 +1,7 @@ +name: markdups +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-mark-dups=1.1.7 diff --git a/modules/local/markdups/main.nf b/modules/local/markdups/main.nf new file mode 100644 index 00000000..649f85a3 --- /dev/null +++ b/modules/local/markdups/main.nf @@ -0,0 +1,76 @@ +process MARKDUPS { + tag "${meta.id}" + label 'process_medium' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-mark-dups:1.1.7--hdfd78af_0' : + 'biocontainers/hmftools-mark-dups:1.1.7--hdfd78af_0' }" + + input: + tuple val(meta), path(bams), path(bais) + path genome_fasta + val genome_ver + path genome_fai + path genome_dict + path unmap_regions + val has_umis + + output: + tuple val(meta), path('*bam'), path('*bai'), emit: bam + path 'versions.yml' , emit: versions + path '*.tsv' + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def umi_flags = has_umis ? '-umi_enabled -umi_duplex -umi_duplex_delim +' : '' + + """ + markdups \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + ${args} \\ + \\ + -samtools \$(which samtools) \\ + -sambamba \$(which sambamba) \\ + \\ + -sample ${meta.sample_id} \\ + -input_bam ${bams.join(',')} \\ + \\ + -form_consensus \\ + ${umi_flags} \\ + \\ + -unmap_regions ${unmap_regions} \\ + -ref_genome ${genome_fasta} \\ + -ref_genome_version ${genome_ver} \\ + \\ + -write_stats \\ + -threads ${task.cpus} \\ + \\ + -output_bam ${meta.sample_id}.markdups.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + markdups: \$(markdups -version | awk '{ print \$NF }') + sambamba: \$(sambamba --version 2>&1 | egrep '^sambamba' | head -n 1 | awk '{ print \$NF }') + samtools: \$(samtools --version 2>&1 | egrep '^samtools\\s' | head -n 1 | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + touch ${meta.sample_id}.markdups.bam + touch ${meta.sample_id}.markdups.bam.bai + touch ${meta.sample_id}.duplicate_freq.tsv + + if [[ -n "${has_umis}" ]]; then + touch ${meta.sample_id}.umi_coord_freq.tsv + touch ${meta.sample_id}.umi_edit_distance.tsv + touch ${meta.sample_id}.umi_nucleotide_freq.tsv + fi; + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/markdups/meta.yml b/modules/local/markdups/meta.yml new file mode 100644 index 00000000..b621748f --- /dev/null +++ b/modules/local/markdups/meta.yml @@ -0,0 +1,62 @@ +name: markdups +description: Identify and mark duplicate reads ifrom alignment data +keywords: + - duplicating marking + - markdups +tools: + - MarkDups: + description: Identify and mark duplicate reads ifrom alignment data + homepage: https://github.com/hartwigmedical/hmftools/tree/mark-dups-v1.1.7/mark-dups + documentation: https://github.com/hartwigmedical/hmftools/tree/mark-dups-v1.1.7/mark-dups + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + - bams: + type: list + description: List BAM files + - bais: + type: list + description: List BAI files + - genome_fasta: + type: file + description: Reference genome assembly FASTA file + pattern: "*.{fa,fasta}" + - genome_ver: + type: string + description: Reference genome version + - genome_fai: + type: file + description: Reference genome assembly fai file + pattern: "*.{fai}" + - genome_dict: + type: file + description: Reference genome assembly dict file + pattern: "*.{dict}" + - unmap_regions: + type: file + description: Unmapped regions file + pattern: "*.{tsv}" + - has_umis: + type: boolean + description: Flag indicating presence of UMIs in reads +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + - bam: + type: list + description: BAM and BAI file + pattern: "*.{bam,bam.bai}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" + - "@mkcmkc" diff --git a/modules/local/orange/environment.yml b/modules/local/orange/environment.yml new file mode 100644 index 00000000..f3d03cc2 --- /dev/null +++ b/modules/local/orange/environment.yml @@ -0,0 +1,7 @@ +name: orange +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-orange=2.7.1 diff --git a/modules/local/orange/main.nf b/modules/local/orange/main.nf new file mode 100644 index 00000000..0490df97 --- /dev/null +++ b/modules/local/orange/main.nf @@ -0,0 +1,164 @@ +process ORANGE { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-orange:2.7.1--hdfd78af_0' : + 'biocontainers/hmftools-orange:2.7.1--hdfd78af_0' }" + + input: + tuple val(meta), path(bam_metrics_somatic), path(bam_metrics_germline), path(flagstat_somatic), path(flagstat_germline), path(sage_somatic_dir), path(sage_germline_dir), path(smlv_somatic_vcf), path(smlv_germline_vcf), path(purple_dir), path(linx_somatic_anno_dir), path(linx_somatic_plot_dir), path(linx_germline_anno_dir), path(virusinterpreter_dir), path(chord_dir), path(sigs_dir), path(lilac_dir), path(cuppa_dir), path(isofox_dir) + val genome_ver + path disease_ontology + path cohort_mapping + path cohort_percentiles + path known_fusion_data + path driver_gene_panel + path ensembl_data_resources + path isofox_alt_sj + path isofox_gene_distribution + val pipeline_version + + output: + tuple val(meta), path('output/*.orange.pdf') , emit: pdf + tuple val(meta), path('output/*.orange.json'), emit: json + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def pipeline_version_str = pipeline_version ?: 'not specified' + + def virus_dir_arg = virusinterpreter_dir ? "-virus_dir ${virusinterpreter_dir}" : '' + def chord_dir_arg = chord_dir ? "-chord_dir ${chord_dir}" : '' + def sigs_dir_arg = sigs_dir ? "-sigs_dir ${sigs_dir}" : '' + def cuppa_dir_arg = cuppa_dir ? "-cuppa_dir ${cuppa_dir}" : '' + def plot_dir = linx_somatic_plot_dir.resolve('reportable/').toUriString().replaceAll('/$', '') + + def normal_id_arg = meta.containsKey('normal_dna_id') ? "-reference_sample_id ${meta.normal_dna_id}" : '' + def normal_metrics_arg = bam_metrics_germline ? "-ref_sample_wgs_metrics_file ${bam_metrics_germline}" : '' + def normal_flagstat_arg = flagstat_germline ? "-ref_sample_flagstat_file ${flagstat_germline}" : '' + def normal_sage_dir = sage_germline_dir ? "-sage_germline_dir ${sage_germline_dir}" : '' + def normal_linx_arg = linx_germline_anno_dir ? "-linx_germline_dir ${linx_germline_anno_dir}" : '' + + def rna_id_arg = meta.containsKey('tumor_rna_id') ? "-rna_sample_id ${meta.tumor_rna_id}" : '' + def isofox_dir_arg = isofox_dir ? '-isofox_dir isofox_dir__prepared/' : '' + + def isofox_gene_distribution_arg = isofox_gene_distribution ? "-isofox_gene_distribution ${isofox_gene_distribution}" : '' + def isofox_alt_sj_arg = isofox_alt_sj ? "-isofox_alt_sj_cohort ${isofox_alt_sj}" : '' + + """ + echo "${pipeline_version_str}" > pipeline_version.txt + + # When WTS data is present, ORANGE expects the somatic SAGE VCF to have appended WTS data; CS indicates this should + # occur after PURPLE. Since ORANGE only collects the somatic SAGE VCF from the PURPLE output directory, we must + # prepare accordingly + + # Isofox inputs are also expected to have the tumor sample ID in the filename + + # NOTES(SW): Use of symlinks was causing reliability issues on HPC with Singularity, switched to full file copy instead + + purple_dir_local=${purple_dir} + if [[ -n "${rna_id_arg}" ]]; then + + purple_dir_local=purple__prepared; + + if [[ -d \${purple_dir_local}/ ]]; then + rm -r \${purple_dir_local}/; + fi + + cp -rL ${purple_dir} \${purple_dir_local}/ + cp -L ${smlv_somatic_vcf} \${purple_dir_local}/${meta.tumor_id}.purple.somatic.vcf.gz; + + if [[ -n "${smlv_germline_vcf}" ]]; then + cp -L ${smlv_germline_vcf} \${purple_dir_local}/${meta.tumor_id}.purple.germline.vcf.gz; + fi; + + mkdir -p isofox_dir__prepared/; + for fp in ${isofox_dir}/*; do + cp -L \${fp} isofox_dir__prepared/\$(sed 's/${meta.tumor_rna_id}/${meta.tumor_id}/' <<< \${fp##*/}); + done; + + fi + + # Set input plot directory and create it doesn't exist. See the LINX visualiser module for further info. + if [[ ! -e ${plot_dir}/ ]]; then + mkdir -p ${plot_dir}/; + fi; + + # NOTE(SW): '--add-opens java.base/java.time=ALL-UNNAMED' resolves issue writing JSON, see: + # https://stackoverflow.com/questions/70412805/what-does-this-error-mean-java-lang-reflect-inaccessibleobjectexception-unable/70878195#70878195 + + # NOTE(SW): DOID label: 162 [cancer]; Hartwig cohort group: unknown + + mkdir -p output/ + + # NOTE(SW): manually locating ORANGE install directory so that we can applu `--add-opens`, won't fix old bioconda recipe + orange_bin_fp=\$(which orange) + orange_install_dir=\$(readlink \${orange_bin_fp} | xargs dirname) + orange_jar=\$(dirname \${orange_bin_fp})/\${orange_install_dir}/orange.jar + + java \\ + --add-opens java.base/java.time=ALL-UNNAMED \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + -jar \${orange_jar} \\ + ${args} \\ + \\ + -experiment_date \$(date +%y%m%d) \\ + -add_disclaimer \\ + -pipeline_version_file pipeline_version.txt \\ + \\ + -tumor_sample_id ${meta.tumor_id} \\ + -primary_tumor_doids 162 \\ + -tumor_sample_wgs_metrics_file ${bam_metrics_somatic} \\ + -tumor_sample_flagstat_file ${flagstat_somatic} \\ + -sage_dir ${sage_somatic_dir} \\ + -purple_dir \${purple_dir_local} \\ + -purple_plot_dir \${purple_dir_local}/plot/ \\ + -linx_dir ${linx_somatic_anno_dir} \\ + -linx_plot_dir ${plot_dir}/ \\ + -lilac_dir ${lilac_dir} \\ + ${virus_dir_arg} \\ + ${chord_dir_arg} \\ + ${sigs_dir_arg} \\ + ${cuppa_dir_arg} \\ + \\ + ${normal_id_arg} \\ + ${normal_metrics_arg} \\ + ${normal_flagstat_arg} \\ + ${normal_sage_dir} \\ + ${normal_linx_arg} \\ + \\ + ${rna_id_arg} \\ + ${isofox_dir_arg} \\ + \\ + -ref_genome_version ${genome_ver} \\ + -doid_json ${disease_ontology} \\ + -cohort_mapping_tsv ${cohort_mapping} \\ + -cohort_percentiles_tsv ${cohort_percentiles} \\ + -known_fusion_file ${known_fusion_data} \\ + -driver_gene_panel ${driver_gene_panel} \\ + -ensembl_data_dir ${ensembl_data_resources} \\ + ${isofox_gene_distribution_arg} \\ + ${isofox_alt_sj_arg} \\ + -output_dir output/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + orange: \$(orange -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + mkdir -p output/ + touch output/${meta.tumor_id}.orange.json + touch output/${meta.tumor_id}.orange.pdf + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/orange/meta.yml b/modules/local/orange/meta.yml new file mode 100644 index 00000000..3c6d9450 --- /dev/null +++ b/modules/local/orange/meta.yml @@ -0,0 +1,127 @@ +name: orange +description: Summarise key outputs of the HMF toolkit +keywords: + - cancer report +tools: + - orange: + description: Summarises key outputs of the HMF toolkit + homepage: https://github.com/hartwigmedical/hmftools/tree/master/orange + documentation: https://github.com/hartwigmedical/hmftools/tree/master/orange + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + - bam_metrics_somatic: + type: file + description: Somatic collectwgsmetrics file + - bam_metrics_germline: + type: file + description: Germline collectwgsmetrics file (optional) + - flagstat_somatic: + type: file + description: Somatic SAMtools flagstat output file + - flagstat_germline: + type: file + description: Somatic SAMtools flagstat output file (optional) + - sage_somatic_dir: + type: directory + description: SAGE somatic output directory + - sage_germline_dir: + type: directory + description: SAGE germline output directory (optional) + - smlv_somatic_vcf: + type: file + description: Small somatic variant VCF file (annotated with RNA data) (optional) + - smlv_germline_vcf: + type: file + description: Small germline variant VCF file (annotated with RNA data) (optional) + - purple_dir: + type: directory + description: PURPLE output directory + - linx_somatic_anno_dir: + type: directory + description: LINX somatic annotation output directory + - linx_somatic_plot_dir: + type: directory + description: LINX somatic plot output directory + - linx_germline_anno_dir: + type: directory + description: LINX germline annotation output directory (optional) + - virusinterpreter_dir: + type: directory + description: Virus Interpreter output directory (optional) + - chord_dir: + type: directory + description: CHORD output directory (optional) + - sigs_dir: + type: directory + description: Sigs output directory (optional) + - lilac_dir: + type: directory + description: LILAC output directory + - cuppa_dir: + type: directory + description: CUPPA output directory (optional) + - isofox_dir: + type: directory + description: Isofox output directory (optional) + - genome_ver: + type: string + description: Reference genome version + - disease_ontology: + type: file + description: Disease ontology file + pattern: "*.{json}" + - cohort_mapping: + type: file + description: HMF cohort mapping file + pattern: "*.{tsv}" + - cohort_percentiles: + type: file + description: HMF cohort percentiles file + pattern: "*.{tsv}" + - known_fusion_data: + type: file + description: Known fusions data file + pattern: "*.{bedpe}" + - driver_gene_panel: + type: file + description: Driver gene panel file + pattern: "*.{csv}" + - ensembl_data_resources: + type: directory + description: HMF ensembl data resources directory + - isofox_alt_sj: + type: file + description: Isofox cohort alternate slice junction file + pattern: "*.{csv}" + - isofox_gene_distribution: + type: file + description: Isofox cohort gene expression file + pattern: "*.{csv}" + - pipeline_version: + type: string + description: Pipeline version to display in report +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - pdf: + type: file + description: Report file + pattern: "*.{pdf}" + - json: + type: file + description: Compiled JSON output file + pattern: "*.{json}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/pave/germline/environment.yml b/modules/local/pave/germline/environment.yml new file mode 100644 index 00000000..c7f743e7 --- /dev/null +++ b/modules/local/pave/germline/environment.yml @@ -0,0 +1,7 @@ +name: pave_germline +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-pave=1.6 diff --git a/modules/local/pave/germline/main.nf b/modules/local/pave/germline/main.nf new file mode 100644 index 00000000..fdbdeead --- /dev/null +++ b/modules/local/pave/germline/main.nf @@ -0,0 +1,79 @@ +// NOTE(SW): use of tumor sample name here is consistent with Pipeline5 +// - https://github.com/hartwigmedical/pipeline5/blob/v5.33/cluster/src/main/java/com/hartwig/pipeline/tertiary/pave/PaveGermline.java#L36-L41 +// - https://github.com/hartwigmedical/pipeline5/blob/v5.33/cluster/src/main/java/com/hartwig/pipeline/tertiary/pave/PaveArguments.java#L31-L43 + +process PAVE_GERMLINE { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-pave:1.6--hdfd78af_0' : + 'biocontainers/hmftools-pave:1.6--hdfd78af_0' }" + + input: + tuple val(meta), path(sage_vcf), path(sage_tbi) + path genome_fasta + val genome_ver + path genome_fai + path sage_blocklist_regions + path sage_blocklist_sites + path clinvar_annotations + path segment_mappability + path driver_gene_panel + path ensembl_data_resources + path gnomad_resource + + output: + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path("*.vcf.gz.tbi"), emit: index + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def gnomad_args + if (genome_ver.toString() == '37') { + gnomad_args = "-gnomad_freq_file ${gnomad_resource}" + } else if (genome_ver.toString() == '38') { + gnomad_args = "-gnomad_freq_dir ${gnomad_resource}" + } else { + error "got bad genome version: ${genome_ver}" + } + + """ + pave \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + ${args} \\ + -sample ${meta.sample_id} \\ + -vcf_file ${sage_vcf} \\ + -ref_genome ${genome_fasta} \\ + -ref_genome_version ${genome_ver} \\ + -clinvar_vcf ${clinvar_annotations} \\ + -driver_gene_panel ${driver_gene_panel} \\ + -mappability_bed ${segment_mappability} \\ + -ensembl_data_dir ${ensembl_data_resources} \\ + -blacklist_bed ${sage_blocklist_regions} \\ + -blacklist_vcf ${sage_blocklist_sites} \\ + -gnomad_pon_filter -1 \\ + ${gnomad_args} \\ + -read_pass_only \\ + -threads ${task.cpus} \\ + -output_dir ./ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pave: \$(pave -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + touch ${meta.sample_id}.sage.pave_germline.vcf.gz{,.tbi} + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/pave/germline/meta.yml b/modules/local/pave/germline/meta.yml new file mode 100644 index 00000000..5d209cec --- /dev/null +++ b/modules/local/pave/germline/meta.yml @@ -0,0 +1,84 @@ +name: pave_germline +description: Annotate small variant VCF with gene, transcript coding and protein effects +keywords: + - annotation + - gene + - transcript + - protein + - vcf +tools: + - pave: + description: Annotates small variant VCF with gene, transcript coding and protein effects. + homepage: https://github.com/hartwigmedical/hmftools/tree/master/pave + documentation: https://github.com/hartwigmedical/hmftools/tree/master/pave + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - sage_vcf: + type: file + description: SAGE VCF file + pattern: "*.{vcf.gz}" + - sage_tbi: + type: file + description: SAGE VCF index file + pattern: "*.{tbi}" + - genome_fasta: + type: file + description: Reference genome assembly FASTA file + pattern: "*.{fa,fasta}" + - genome_ver: + type: string + description: Reference genome version + - genome_fai: + type: file + description: Reference genome assembly fai file + pattern: "*.{fai}" + - sage_blocklist_regions: + type: file + description: SAGE regions blocklist file + pattern: "*.{bed}" + - sage_blocklist_sites: + type: file + description: SAGE sites blocklist file + pattern: "*.{vcf.gz}" + - clinvar_annotations: + type: file + description: ClinVar annotations VCF file + pattern: "*.{vcf.gz}" + - segment_mappability: + type: file + description: Segment mappability file + pattern: "*.{bed.gz}" + - driver_gene_panel: + type: file + description: Driver gene panel file + pattern: "*.{tsv}" + - ensembl_data_resources: + type: directory + description: HMF ensembl data resources directory + - gnomad_resource: + description: gnomAD resource +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - vcf: + type: file + description: PAVE VCF file + pattern: "*.{vcf.gz}" + - index: + type: file + description: PAVE VCF index file + pattern: "*.{vcf.gz.tbi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/pave/somatic/environment.yml b/modules/local/pave/somatic/environment.yml new file mode 100644 index 00000000..686998c2 --- /dev/null +++ b/modules/local/pave/somatic/environment.yml @@ -0,0 +1,7 @@ +name: pave_somatic +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-pave=1.6 diff --git a/modules/local/pave/somatic/main.nf b/modules/local/pave/somatic/main.nf new file mode 100644 index 00000000..cb747971 --- /dev/null +++ b/modules/local/pave/somatic/main.nf @@ -0,0 +1,88 @@ +process PAVE_SOMATIC { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-pave:1.6--hdfd78af_0' : + 'biocontainers/hmftools-pave:1.6--hdfd78af_0' }" + + input: + tuple val(meta), path(sage_vcf), path(sage_tbi) + path genome_fasta + val genome_ver + path genome_fai + path sage_pon + path pon_artefacts + path sage_blocklist_regions + path sage_blocklist_sites + path clinvar_annotations + path segment_mappability + path driver_gene_panel + path ensembl_data_resources + path gnomad_resource + + output: + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path("*.vcf.gz.tbi"), emit: index + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def pon_filters + def gnomad_args + if (genome_ver.toString() == '37') { + pon_filters = 'HOTSPOT:10:5;PANEL:6:5;UNKNOWN:6:0' + gnomad_args = "-gnomad_freq_file ${gnomad_resource}" + } else if (genome_ver.toString() == '38') { + pon_filters = 'HOTSPOT:5:5;PANEL:2:5;UNKNOWN:2:0' + gnomad_args = "-gnomad_freq_dir ${gnomad_resource}" + } else { + error "got bad genome version: ${genome_ver}" + } + + // Targeted mode + def pon_artefact_arg = pon_artefacts ? "-pon_artefact_file ${pon_artefacts}" : '' + def sage_blocklist_regions_arg = sage_blocklist_regions ? "-blacklist_bed ${sage_blocklist_regions}" : '' + def sage_blocklist_sites_arg = sage_blocklist_sites ? "-blacklist_vcf ${sage_blocklist_sites}" : '' + def clinvar_annotations = clinvar_annotations ? "-clinvar_vcf ${clinvar_annotations}" : '' + + """ + pave \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + ${args} \\ + -sample ${meta.sample_id} \\ + -vcf_file ${sage_vcf} \\ + -ref_genome ${genome_fasta} \\ + -ref_genome_version ${genome_ver} \\ + -pon_file ${sage_pon} \\ + -pon_filters "${pon_filters}" \\ + ${pon_artefact_arg} \\ + ${clinvar_annotations} \\ + -driver_gene_panel ${driver_gene_panel} \\ + -mappability_bed ${segment_mappability} \\ + -ensembl_data_dir ${ensembl_data_resources} \\ + ${sage_blocklist_regions_arg} \\ + ${sage_blocklist_sites_arg} \\ + ${gnomad_args} \\ + -read_pass_only \\ + -threads ${task.cpus} \\ + -output_dir ./ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pave: \$(pave -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + touch ${meta.sample_id}.sage.pave_somatic.vcf.gz{,.tbi} + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/pave/somatic/meta.yml b/modules/local/pave/somatic/meta.yml new file mode 100644 index 00000000..399efd0c --- /dev/null +++ b/modules/local/pave/somatic/meta.yml @@ -0,0 +1,92 @@ +name: pave_somatic +description: Annotate small variant VCF with gene, transcript coding and protein effects +keywords: + - pave + - annotation + - gene + - transcript + - protein + - vcf +tools: + - pave: + description: Annotates small variant VCF with gene, transcript coding and protein effects. + homepage: https://github.com/hartwigmedical/hmftools/tree/master/pave + documentation: https://github.com/hartwigmedical/hmftools/tree/master/pave + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - sage_vcf: + type: file + description: SAGE VCF file + pattern: "*.{vcf.gz}" + - sage_tbi: + type: file + description: SAGE VCF index file + pattern: "*.{tbi}" + - genome_fasta: + type: file + description: Reference genome assembly FASTA file + pattern: "*.{fa,fasta}" + - genome_ver: + type: string + description: Reference genome version + - genome_fai: + type: file + description: Reference genome assembly fai file + pattern: "*.{fai}" + - sage_pon: + type: file + description: SAGE PON file + pattern: "*.{tsv.gz}" + - pon_artefacts: + type: file + description: Taregeted sequencing PON artefacts file (optional) + - sage_blocklist_regions: + type: file + description: SAGE regions blocklist file + pattern: "*.{bed}" + - sage_blocklist_sites: + type: file + description: SAGE sites blocklist file + pattern: "*.{vcf.gz}" + - clinvar_annotations: + type: file + description: ClinVar annotations VCF file + pattern: "*.{vcf.gz}" + - segment_mappability: + type: file + description: Segment mappability file + pattern: "*.{bed.gz}" + - driver_gene_panel: + type: file + description: Driver gene panel file + pattern: "*.{tsv}" + - ensembl_data_resources: + type: directory + description: HMF ensembl data resources directory + - gnomad_resource: + description: gnomAD resource +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - vcf: + type: file + description: PAVE VCF file + pattern: "*.{vcf.gz}" + - index: + type: file + description: PAVE VCF index file + pattern: "*.{vcf.gz.tbi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/purple/environment.yml b/modules/local/purple/environment.yml new file mode 100644 index 00000000..5226454d --- /dev/null +++ b/modules/local/purple/environment.yml @@ -0,0 +1,7 @@ +name: purple +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-purple=4.0.2 diff --git a/modules/local/purple/main.nf b/modules/local/purple/main.nf new file mode 100644 index 00000000..b571bd07 --- /dev/null +++ b/modules/local/purple/main.nf @@ -0,0 +1,104 @@ +process PURPLE { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-purple:4.0.2--hdfd78af_0' : + 'biocontainers/hmftools-purple:4.0.2--hdfd78af_0' }" + + input: + tuple val(meta), path(amber), path(cobalt), path(sv_tumor_vcf), path(sv_tumor_tbi), path(sv_tumor_unfiltered_vcf), path(sv_tumor_unfiltered_tbi), path(sv_normal_vcf), path(sv_normal_tbi), path(smlv_tumor_vcf), path(smlv_normal_vcf) + path genome_fasta + val genome_ver + path genome_fai + path genome_dict + path gc_profile + path sage_known_hotspots_somatic + path sage_known_hotspots_germline + path driver_gene_panel + path ensembl_data_resources + path germline_del + path target_region_bed + path target_region_ratios + path target_region_msi_indels + + output: + tuple val(meta), path('purple/'), emit: purple_dir + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def reference_arg = meta.containsKey('normal_id') ? "-reference ${meta.normal_id}" : '' + + def sv_tumor_vcf_arg = sv_tumor_vcf ? "-somatic_sv_vcf ${sv_tumor_vcf}" : '' + def sv_normal_vcf_arg = sv_normal_vcf ? "-germline_sv_vcf ${sv_normal_vcf}" : '' + + def sv_tumor_recovery_vcf_arg = sv_tumor_unfiltered_vcf ? "-sv_recovery_vcf ${sv_tumor_unfiltered_vcf}" : '' + + def smlv_tumor_vcf_arg = smlv_tumor_vcf ? "-somatic_vcf ${smlv_tumor_vcf}" : '' + def smlv_normal_vcf_arg = smlv_normal_vcf ? "-germline_vcf ${smlv_normal_vcf}" : '' + + def sage_known_hotspots_germline_arg = sage_known_hotspots_germline ? "-germline_hotspots ${sage_known_hotspots_germline}" : '' + def germline_del_arg = germline_del ? "-germline_del_freq_file ${germline_del}" : '' + + def target_region_bed_arg = target_region_bed ? "-target_regions_bed ${target_region_bed}" : '' + def target_region_ratios_arg = target_region_ratios ? "-target_regions_ratios ${target_region_ratios}" : '' + def target_region_msi_indels_arg = target_region_msi_indels ? "-target_regions_msi_indels ${target_region_msi_indels}" : '' + + """ + purple \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + ${args} \\ + -tumor ${meta.tumor_id} \\ + ${reference_arg} \\ + -amber ${amber} \\ + -cobalt ${cobalt} \\ + ${sv_tumor_vcf_arg} \\ + ${sv_normal_vcf_arg} \\ + ${sv_tumor_recovery_vcf_arg} \\ + ${smlv_tumor_vcf_arg} \\ + ${smlv_normal_vcf_arg} \\ + -ref_genome ${genome_fasta} \\ + -ref_genome_version ${genome_ver} \\ + -driver_gene_panel ${driver_gene_panel} \\ + -ensembl_data_dir ${ensembl_data_resources} \\ + -somatic_hotspots ${sage_known_hotspots_somatic} \\ + ${sage_known_hotspots_germline_arg} \\ + ${target_region_bed_arg} \\ + ${target_region_ratios_arg} \\ + ${target_region_msi_indels_arg} \\ + ${germline_del_arg} \\ + -gc_profile ${gc_profile} \\ + -circos \$(which circos) \\ + -threads ${task.cpus} \\ + -output_dir purple/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + purple: \$(purple -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + mkdir purple/ + touch purple/${meta.tumor_id}.purple.cnv.gene.tsv + touch purple/${meta.tumor_id}.purple.cnv.somatic.tsv + touch purple/${meta.tumor_id}.purple.driver.catalog.germline.tsv + touch purple/${meta.tumor_id}.purple.driver.catalog.somatic.tsv + touch purple/${meta.tumor_id}.purple.germline.vcf.gz + touch purple/${meta.tumor_id}.purple.germline.vcf.gz + touch purple/${meta.tumor_id}.purple.purity.tsv + touch purple/${meta.tumor_id}.purple.qc + touch purple/${meta.tumor_id}.purple.somatic.vcf.gz + touch purple/${meta.tumor_id}.purple.sv.germline.vcf.gz + touch purple/${meta.tumor_id}.purple.sv.vcf.gz + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/purple/meta.yml b/modules/local/purple/meta.yml new file mode 100644 index 00000000..3a027a82 --- /dev/null +++ b/modules/local/purple/meta.yml @@ -0,0 +1,123 @@ +name: purple +description: Estimate purity and ploidy from WGS data +keywords: + - variant caller + - ploidy + - purity + - cnv + - sv +tools: + - purple: + description: Estimates purity and ploidy from WGS data + homepage: https://github.com/hartwigmedical/hmftools/tree/master/purple + documentation: https://github.com/hartwigmedical/hmftools/tree/master/purple + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + - amber: + type: directory + description: AMBER output directory + - cobalt: + type: directory + description: COBALT output directory + - sv_tumor_vcf: + type: file + description: GRIPSS somatic VCF file (optional) + pattern: "*.{vcf.gz}" + - sv_tumor_tbi: + type: file + description: GRIPSS somatic VCF index file (optional) + pattern: "*.{vcf.gz.tbi}" + - sv_tumor_unfiltered_vcf: + type: file + description: GRIPSS unfiltered somatic VCF file (optional) + pattern: "*.{vcf.gz}" + - sv_tumor_unfiltered_tbi: + type: file + description: GRIPSS unfiltered somatic VCF index file (optional) + pattern: "*.{vcf.gz.tbi}" + - sv_normal_vcf: + type: file + description: GRIPSS germline VCF file (optional) + pattern: "*.{vcf.gz}" + - sv_normal_tbi: + type: file + description: GRIPSS germline VCF index file (optional) + pattern: "*.{vcf.gz.tbi}" + - smlv_tumor_vcf: + type: file + description: Small variant tumor VCF file (optional) + pattern: "*.{vcf.gz}" + - smlv_normal_vcf: + type: file + description: Small variant normal VCF file (optional) + pattern: "*.{vcf.gz}" + - genome_fasta: + type: file + description: Reference genome assembly FASTA file + pattern: "*.{fa,fasta}" + - genome_ver: + type: string + description: Reference genome version + - genome_fai: + type: file + description: Reference genome assembly fai file + pattern: "*.{fai}" + - genome_dict: + type: file + description: Reference genome assembly dict file + pattern: "*.{dict}" + - gc_profile: + type: file + description: GC profile file + pattern: "*.{cnp}" + - sage_known_hotspots_somatic: + type: file + description: SAGE somatic known hotspots file + pattern: "*.{vcf.gz}" + - sage_known_hotspots_germline: + type: file + description: SAGE germline known hotspots file + pattern: "*.{vcf.gz}" + - driver_gene_panel: + type: file + description: Driver gene panel file + pattern: "*.{csv}" + - ensembl_data_resources: + type: directory + description: HMF ensembl data resources directory + - germline_del: + type: file + description: Cohort frequency for germline deletions (optional) + pattern: "*.{csv}" + - target_region_bed: + type: file + description: Target regions BED file (optional) + pattern: "*.{bed}" + - target_regions_ratios: + type: file + description: Target regions ratios file (optional) + pattern: "*.{tsv}" + - target_regions_msi_indels: + type: file + description: Target regions MSI/INDELS file (optional) + pattern: "*.{tsv}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + - purple_dir: + type: directory + description: PURPLE output directory + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/sage/append/environment.yml b/modules/local/sage/append/environment.yml new file mode 100644 index 00000000..e6f84927 --- /dev/null +++ b/modules/local/sage/append/environment.yml @@ -0,0 +1,7 @@ +name: sage_append +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-sage=3.4.4 diff --git a/modules/local/sage/append/main.nf b/modules/local/sage/append/main.nf new file mode 100644 index 00000000..a00d96d1 --- /dev/null +++ b/modules/local/sage/append/main.nf @@ -0,0 +1,52 @@ +process SAGE_APPEND { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-sage:3.4.4--hdfd78af_0' : + 'biocontainers/hmftools-sage:3.4.4--hdfd78af_0' }" + + input: + tuple val(meta), path(vcf), path(bam), path(bai) + path genome_fasta + val genome_ver + path genome_fai + path genome_dict + + output: + tuple val(meta), path('*.append.vcf.gz'), emit: vcf + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + sage \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + com.hartwig.hmftools.sage.append.SageAppendApplication \\ + ${args} \\ + -input_vcf ${vcf} \\ + -reference ${meta.tumor_rna_id} \\ + -reference_bam ${bam} \\ + -ref_genome ${genome_fasta} \\ + -ref_genome_version ${genome_ver} \\ + -threads ${task.cpus} \\ + -output_vcf ${meta.dna_id}.sage.append.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sage: \$(sage -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + touch "${meta.dna_id}.sage.append.vcf.gz" + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/sage/append/meta.yml b/modules/local/sage/append/meta.yml new file mode 100644 index 00000000..970d2539 --- /dev/null +++ b/modules/local/sage/append/meta.yml @@ -0,0 +1,61 @@ +name: sage_append +description: A tool to append data to existing SAGE calls +keywords: + - append + - snv + - mnv +tools: + - sage: + description: A precise and highly sensitive somatic SNV, MNV and small INDEL caller. + homepage: https://github.com/hartwigmedical/hmftools/tree/master/sage + documentation: https://github.com/hartwigmedical/hmftools/tree/master/sage + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample informatio + e.g. [id: 'sample_id', append_id: 'sample_id_append'] + - vcf: + type: file + description: VCF file + pattern: "*.{vcf.gz}" + - bam: + type: file + description: BAM file + pattern: "*.{bam}" + - bai: + type: file + description: BAI file + pattern: "*.{bai}" + - genome_fasta: + type: file + description: Reference genome assembly FASTA file + pattern: "*.{fa,fasta}" + - genome_ver: + type: string + description: Reference genome version + - genome_fai: + type: file + description: Reference genome assembly fai file + pattern: "*.{fai}" + - genome_dict: + type: file + description: Reference genome assembly dict file + pattern: "*.{dict}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id', append_id: 'sample_id_append'] + - vcf: + type: file + description: VCF file + pattern: "*.{vcf.gz}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/sage/germline/environment.yml b/modules/local/sage/germline/environment.yml new file mode 100644 index 00000000..40abd45c --- /dev/null +++ b/modules/local/sage/germline/environment.yml @@ -0,0 +1,7 @@ +name: sage_germline +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-sage=3.4.4 diff --git a/modules/local/sage/germline/main.nf b/modules/local/sage/germline/main.nf new file mode 100644 index 00000000..a79155e3 --- /dev/null +++ b/modules/local/sage/germline/main.nf @@ -0,0 +1,82 @@ +process SAGE_GERMLINE { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-sage:3.4.4--hdfd78af_0' : + 'biocontainers/hmftools-sage:3.4.4--hdfd78af_0' }" + + input: + tuple val(meta), path(tumor_bam), path(normal_bam), path(tumor_bai), path(normal_bai) + path genome_fasta + val genome_ver + path genome_fai + path genome_dict + path sage_known_hotspots_germline + path sage_actionable_panel + path sage_coverage_panel + path sage_highconf_regions + path ensembl_data_resources + + output: + tuple val(meta), path('germline/*.sage.germline.vcf.gz'), path('germline/*.sage.germline.vcf.gz.tbi'), emit: vcf + tuple val(meta), path('germline/') , emit: sage_dir + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + mkdir germline/ + + sage \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + ${args} \\ + -tumor ${meta.normal_id} \\ + -tumor_bam ${normal_bam} \\ + -reference ${meta.tumor_id} \\ + -reference_bam ${tumor_bam} \\ + -ref_genome ${genome_fasta} \\ + -ref_genome_version ${genome_ver} \\ + -hotspots ${sage_known_hotspots_germline} \\ + -panel_bed ${sage_actionable_panel} \\ + -coverage_bed ${sage_coverage_panel} \\ + -high_confidence_bed ${sage_highconf_regions} \\ + -ensembl_data_dir ${ensembl_data_resources} \\ + -hotspot_min_tumor_qual 50 \\ + -panel_min_tumor_qual 75 \\ + -hotspot_max_germline_vaf 100 \\ + -hotspot_max_germline_rel_raw_base_qual 100 \\ + -panel_max_germline_vaf 100 \\ + -panel_max_germline_rel_raw_base_qual 100 \\ + -ref_sample_count 0 \\ + -panel_only \\ + -write_bqr_data \\ + -write_bqr_plot \\ + -threads ${task.cpus} \\ + -output_vcf germline/${meta.tumor_id}.sage.germline.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sage: \$(sage -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + mkdir -p germline/ + touch germline/${meta.tumor_id}.sage.germline.vcf.gz + touch germline/${meta.tumor_id}.sage.germline.vcf.gz.tbi + touch germline/${meta.tumor_id}.sage.bqr.png + touch germline/${meta.tumor_id}.sage.bqr.tsv + touch germline/${meta.normal_id}.sage.bqr.png + touch germline/${meta.normal_id}.sage.bqr.tsv + touch germline/${meta.normal_id}.gene.coverage.tsv + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/sage/germline/meta.yml b/modules/local/sage/germline/meta.yml new file mode 100644 index 00000000..1f7882e2 --- /dev/null +++ b/modules/local/sage/germline/meta.yml @@ -0,0 +1,88 @@ +name: sage_germline +description: A precise and highly sensitive somatic SNV, MNV and small INDEL caller +keywords: + - germline + - variant caller + - snv + - mnv +tools: + - sage: + description: A precise and highly sensitive somatic SNV, MNV and small INDEL caller. + homepage: https://github.com/hartwigmedical/hmftools/tree/master/sage + documentation: https://github.com/hartwigmedical/hmftools/tree/master/sage + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample informatio + e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + - tumor_bam: + type: file + description: Tumor BAM file + pattern: "*.{bam}" + - normal_bam: + type: file + description: Normal BAM file + pattern: "*.{bam}" + - tumor_bai: + type: file + description: Tumor BAI file + pattern: "*.{bai}" + - normal_bai: + type: file + description: Normal BAI file + pattern: "*.{bai}" + - genome_fasta: + type: file + description: Reference genome assembly FASTA file + pattern: "*.{fa,fasta}" + - genome_ver: + type: string + description: Reference genome version + - genome_fai: + type: file + description: Reference genome assembly fai file + pattern: "*.{fai}" + - genome_dict: + type: file + description: Reference genome assembly dict file + pattern: "*.{dict}" + - sage_known_hotspots_germline: + type: file + description: SAGE germline known hotspots file + pattern: "*.{vcf.gz}" + - sage_actionable_panel: + type: file + description: SAGE actionable panel file + pattern: "*.{bed.gz}" + - sage_coverage_panel: + type: file + description: SAGE coverage gene panel file + pattern: "*.{bed.gz}" + - sage_highconf_regions: + type: file + description: SAGE high confidence regions file + pattern: "*.{bed.gz}" + - ensembl_data_resources: + type: directory + description: HMF ensembl data resources directory +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + - vcf: + type: file + description: VCF file + pattern: "*.{vcf.gz}" + - sage_dir: + type: directory + description: SAGE output directory + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/sage/somatic/environment.yml b/modules/local/sage/somatic/environment.yml new file mode 100644 index 00000000..5befb17f --- /dev/null +++ b/modules/local/sage/somatic/environment.yml @@ -0,0 +1,7 @@ +name: sage_somatic +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-sage=3.4.4 diff --git a/modules/local/sage/somatic/main.nf b/modules/local/sage/somatic/main.nf new file mode 100644 index 00000000..fc7f282c --- /dev/null +++ b/modules/local/sage/somatic/main.nf @@ -0,0 +1,79 @@ +// NOTE(SW): logic that determines BQR outputs assumes '-output_vcf' is a path that includes at least leading one directory + +process SAGE_SOMATIC { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-sage:3.4.4--hdfd78af_0' : + 'biocontainers/hmftools-sage:3.4.4--hdfd78af_0' }" + + input: + tuple val(meta), path(tumor_bam), path(normal_bam), path(tumor_bai), path(normal_bai) + path genome_fasta + val genome_ver + path genome_fai + path genome_dict + path sage_known_hotspots_somatic + path sage_actionable_panel + path sage_coverage_panel + path sage_highconf_regions + path ensembl_data_resources + + output: + tuple val(meta), path('somatic/*.sage.somatic.vcf.gz'), path('somatic/*.sage.somatic.vcf.gz.tbi'), emit: vcf + tuple val(meta), path('somatic/') , emit: sage_dir + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def reference_arg = meta.containsKey('normal_id') ? "-reference ${meta.normal_id}" : '' + def reference_bam_arg = normal_bam ? "-reference_bam ${normal_bam}" : '' + + """ + mkdir -p somatic/ + + sage \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + ${args} \\ + ${reference_arg} \\ + ${reference_bam_arg} \\ + -tumor ${meta.tumor_id} \\ + -tumor_bam ${tumor_bam} \\ + -ref_genome ${genome_fasta} \\ + -ref_genome_version ${genome_ver} \\ + -hotspots ${sage_known_hotspots_somatic} \\ + -panel_bed ${sage_actionable_panel} \\ + -coverage_bed ${sage_coverage_panel} \\ + -high_confidence_bed ${sage_highconf_regions} \\ + -ensembl_data_dir ${ensembl_data_resources} \\ + -write_bqr_data \\ + -write_bqr_plot \\ + -threads ${task.cpus} \\ + -output_vcf somatic/${meta.tumor_id}.sage.somatic.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sage: \$(sage -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + mkdir -p somatic/ + touch somatic/${meta.tumor_id}.sage.somatic.vcf.gz + touch somatic/${meta.tumor_id}.sage.somatic.vcf.gz.tbi + touch somatic/${meta.tumor_id}.gene.coverage.tsv + touch somatic/${meta.tumor_id}.sage.bqr.png + touch somatic/${meta.tumor_id}.sage.bqr.tsv + touch somatic/${meta.normal_id}.sage.bqr.png + touch somatic/${meta.normal_id}.sage.bqr.tsv + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/sage/somatic/meta.yml b/modules/local/sage/somatic/meta.yml new file mode 100644 index 00000000..912dc7a0 --- /dev/null +++ b/modules/local/sage/somatic/meta.yml @@ -0,0 +1,88 @@ +name: sage_somatic +description: A precise and highly sensitive somatic SNV, MNV and small INDEL caller +keywords: + - somatic + - variant caller + - snv + - mnv +tools: + - sage: + description: A precise and highly sensitive somatic SNV, MNV and small INDEL caller. + homepage: https://github.com/hartwigmedical/hmftools/tree/master/sage + documentation: https://github.com/hartwigmedical/hmftools/tree/master/sage + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample informatio + e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + - tumor_bam: + type: file + description: Tumor BAM file + pattern: "*.{bam}" + - normal_bam: + type: file + description: Normal BAM file + pattern: "*.{bam}" + - tumor_bai: + type: file + description: Tumor BAI file + pattern: "*.{bai}" + - normal_bai: + type: file + description: Normal BAI file + pattern: "*.{bai}" + - genome_fasta: + type: file + description: Reference genome assembly FASTA file + pattern: "*.{fa,fasta}" + - genome_ver: + type: string + description: Reference genome version + - genome_fai: + type: file + description: Reference genome assembly fai file + pattern: "*.{fai}" + - genome_dict: + type: file + description: Reference genome assembly dict file + pattern: "*.{dict}" + - sage_known_hotspots_somatic: + type: file + description: SAGE somatic known hotspots file + pattern: "*.{vcf.gz}" + - sage_actionable_panel: + type: file + description: SAGE actionable gene panel file + pattern: "*.{bed.gz}" + - sage_coverage_panel: + type: file + description: SAGE coverage gene panel file + pattern: "*.{bed.gz}" + - sage_highconf_regions: + type: file + description: SAGE high confidence regions file + pattern: "*.{bed.gz}" + - ensembl_data_resources: + type: directory + description: HMF ensembl data resources directory +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + - vcf: + type: file + description: SAGE VCF file + pattern: "*.{vcf.gz}" + - sage_dir: + type: directory + description: SAGE output directory + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/sambamba/merge/environment.yml b/modules/local/sambamba/merge/environment.yml new file mode 100644 index 00000000..1e158af2 --- /dev/null +++ b/modules/local/sambamba/merge/environment.yml @@ -0,0 +1,7 @@ +name: sambamba_merge +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::sambamba=1.0.1 diff --git a/modules/local/sambamba/merge/main.nf b/modules/local/sambamba/merge/main.nf new file mode 100644 index 00000000..b8b3828f --- /dev/null +++ b/modules/local/sambamba/merge/main.nf @@ -0,0 +1,41 @@ +process SAMBAMBA_MERGE { + tag "${meta.id}" + label 'process_medium' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/sambamba:1.0.1--h6f6fda4_0' : + 'biocontainers/sambamba:1.0.1--h6f6fda4_0' }" + + input: + tuple val(meta), path(bams) + + output: + tuple val(meta), path('*bam'), emit: bam + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + sambamba merge \\ + ${args} \\ + --nthreads ${task.cpus} \\ + ${meta.sample_id}.bam \\ + ${bams} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sambamba: \$(sambamba --version 2>&1 | grep -m1 sambamba | sed 's/^sambamba //') + END_VERSIONS + """ + + stub: + """ + touch ${meta.sample_id}.bam + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/sambamba/merge/meta.yml b/modules/local/sambamba/merge/meta.yml new file mode 100644 index 00000000..c4424055 --- /dev/null +++ b/modules/local/sambamba/merge/meta.yml @@ -0,0 +1,38 @@ +name: sambamba_merge +description: Merge several BAM files into one +keywords: + - sambamba + - bam + - merge +tools: + - sambamba: + description: Tools for working with SAM/BAM data + homepage: https://github.com/biod/sambamba + documentation: https://lomereiter.github.io/sambamba/index.html + licence: ["GPL v2"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + - bams: + type: list + description: List BAM files +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + - bam: + type: file + description: BAM file + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" + - "@mkcmkc" diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf deleted file mode 100644 index 6df76a07..00000000 --- a/modules/local/samplesheet_check.nf +++ /dev/null @@ -1,27 +0,0 @@ -process SAMPLESHEET_CHECK { - tag "$samplesheet" - - conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'quay.io/biocontainers/python:3.8.3' }" - - input: - path samplesheet - - output: - path '*.csv' , emit: csv - path "versions.yml", emit: versions - - script: // This script is bundled with the pipeline, in nf-core/oncoanalyser/bin/ - """ - check_samplesheet.py \\ - $samplesheet \\ - samplesheet.valid.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ -} diff --git a/modules/local/sigs/environment.yml b/modules/local/sigs/environment.yml new file mode 100644 index 00000000..6804c9a4 --- /dev/null +++ b/modules/local/sigs/environment.yml @@ -0,0 +1,7 @@ +name: sigs +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-sigs=1.2.1 diff --git a/modules/local/sigs/main.nf b/modules/local/sigs/main.nf new file mode 100644 index 00000000..929bfda3 --- /dev/null +++ b/modules/local/sigs/main.nf @@ -0,0 +1,48 @@ +process SIGS { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-sigs:1.2.1--hdfd78af_0' : + 'biocontainers/hmftools-sigs:1.2.1--hdfd78af_0' }" + + input: + tuple val(meta), path(smlv_vcf) + path signatures + + output: + tuple val(meta), path('sigs/'), emit: sigs_dir + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + mkdir -p sigs/ + + sigs \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + ${args} \\ + -sample ${meta.sample_id} \\ + -somatic_vcf_file ${smlv_vcf} \\ + -signatures_file ${signatures} \\ + -output_dir sigs/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sigs: \$(sigs -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + mkdir -p sigs/ + touch sigs/placeholder + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/sigs/meta.yml b/modules/local/sigs/meta.yml new file mode 100644 index 00000000..70dd85b6 --- /dev/null +++ b/modules/local/sigs/meta.yml @@ -0,0 +1,40 @@ +name: sigs +description: Fit somatic small variants to signature definitions +keywords: + - signatures + - variants +tools: + - sigs: + description: Fits somatic small variants to signature definitions. + homepage: https://github.com/hartwigmedical/hmftools/tree/master/sigs + documentation: https://github.com/hartwigmedical/hmftools/tree/master/sigs + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id', tumor_id: 'tumor_name'] + - smlv_vcf: + type: file + description: Small somatic variant VCF file + pattern: "*.{vcf.gz}" + - signatures: + type: file + description: Signatures file + pattern: "*.{csv}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + - sigs_dir: + type: directory + description: Sigs output directory + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/star/align/environment.yml b/modules/local/star/align/environment.yml new file mode 100644 index 00000000..e694b27f --- /dev/null +++ b/modules/local/star/align/environment.yml @@ -0,0 +1,7 @@ +name: star_align +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::star=2.7.3a diff --git a/modules/local/star/align/main.nf b/modules/local/star/align/main.nf new file mode 100644 index 00000000..6694a23e --- /dev/null +++ b/modules/local/star/align/main.nf @@ -0,0 +1,66 @@ +process STAR_ALIGN { + tag "${meta.id}" + label 'process_high' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/star:2.7.3a--0' : + 'biocontainers/star:2.7.3a--0' }" + + input: + tuple val(meta), path(reads_fwd), path(reads_rev) + path genome_star_index + + output: + tuple val(meta), path('*bam'), emit: bam + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + STAR \\ + ${args} \\ + --readFilesIn ${reads_fwd} ${reads_rev} \\ + --genomeDir ${genome_star_index} \\ + --runThreadN ${task.cpus} \\ + --readFilesCommand zcat \\ + --alignSJstitchMismatchNmax 5 -1 5 5 \\ + --alignSplicedMateMapLmin 35 \\ + --alignSplicedMateMapLminOverLmate 0.33 \\ + --chimJunctionOverhangMin 10 \\ + --chimOutType WithinBAM SoftClip \\ + --chimScoreDropMax 30 \\ + --chimScoreJunctionNonGTAG 0 \\ + --chimScoreMin 1 \\ + --chimScoreSeparation 1 \\ + --chimSegmentMin 10 \\ + --chimSegmentReadGapMax 3 \\ + --limitOutSJcollapsed 3000000 \\ + --outBAMcompression 0 \\ + --outFilterMatchNmin 35 \\ + --outFilterMatchNminOverLread 0.33 \\ + --outFilterMismatchNmax 3 \\ + --outFilterMultimapNmax 10 \\ + --outFilterScoreMinOverLread 0.33 \\ + --outSAMattributes All \\ + --outSAMattrRGline ID:${meta.read_group} SM:${meta.sample_id} \\ + --outSAMtype BAM Unsorted \\ + --outSAMunmapped Within \\ + --runRNGseed 0 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + END_VERSIONS + """ + + stub: + """ + touch Aligned.out.bam + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/star/align/meta.yml b/modules/local/star/align/meta.yml new file mode 100644 index 00000000..19bb83c5 --- /dev/null +++ b/modules/local/star/align/meta.yml @@ -0,0 +1,46 @@ +name: star_align +description: An ultrafast universal RNA-seq aligner +keywords: + - rna-seq + - rna + - aligner + - star +tools: + - star: + description: An ultrafast universal RNA-seq aligner + homepage: https://github.com/alexdobin/STAR + documentation: https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + - reads_fwd: + type: file + description: Forward reads FASTQ file + pattern: "*.{fastq.gz}" + - reads_rev: + type: file + description: Reverse reads FASTQ file + pattern: "*.{fastq.gz}" + - genome_star_index: + type: directory + description: STAR index directory +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + - bam: + type: file + description: BAM file + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/svprep/assemble/environment.yml b/modules/local/svprep/assemble/environment.yml new file mode 100644 index 00000000..ff1c9e1a --- /dev/null +++ b/modules/local/svprep/assemble/environment.yml @@ -0,0 +1,7 @@ +name: svprep_assemble +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-sv-prep=1.2.4 diff --git a/modules/local/svprep/assemble/main.nf b/modules/local/svprep/assemble/main.nf new file mode 100644 index 00000000..498930ef --- /dev/null +++ b/modules/local/svprep/assemble/main.nf @@ -0,0 +1,104 @@ +process GRIDSS_ASSEMBLE { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-sv-prep:1.2.4--hdfd78af_0' : + 'biocontainers/hmftools-sv-prep:1.2.4--hdfd78af_0' }" + + input: + tuple val(meta), path(bams), path(bams_filtered), path(preprocess_dirs), val(labels) + path genome_fasta + path genome_fai + path genome_dict + path genome_gridss_index + path blocklist + path gridss_config + + output: + tuple val(meta), path('gridss_assemble/'), emit: assemble_dir + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def config_arg = gridss_config ? "--configuration ${gridss_config}" : '' + def output_dirname = 'gridss_assemble' + def labels_list = labels instanceof List ? labels : [labels] + def labels_arg = labels_list.join(',') + // NOTE(SW): Nextflow implicitly casts List to an atomic TaskPath, hence the required check below + def bams_list = bams instanceof List ? bams : [bams] + def bams_arg = "--bams ${bams_list.join(',')}" + def bams_filtered_list = bams_filtered instanceof List ? bams_filtered : [bams_filtered] + def bams_filtered_arg = "--filtered_bams ${bams_filtered_list.join(',')}" + + // JVM heap for other tasks must be no greater than 1/4 of task memory, defaults to 1 GB if not provided + def otherJvmHeap = Math.min( + Math.round(task.memory.bytes * 0.25), + task.ext.otherJvmHeap ? task.ext.otherJvmHeap.bytes : 1.GB.bytes + ) + + """ + # Create shadow directory with file symlinks of GRIDSS 'working' dir to prevent cache invalidation + # NOTE(SW): for reasons that elude me, NF doesn't always stage in the workingdir; remove if it is present + shadow_input_directory() { + src=\${1} + dst="${output_dirname}/work/\${src##*/}" + for filepath_src in \$(find -L \${src} ! -type d); do + # Get destination location for symlink + filepath_src_rel=\$(sed 's#^'\${src}'/*##' <<< \${filepath_src}) + filepath_dst=\${dst%/}/\${filepath_src_rel} + # Create directory for symlink + mkdir -p \${filepath_dst%/*}; + # Get path for symlink source file, then create it + # NOTE(SW): ideally we would get the relative path using the --relative-to but this is only + # supported for GNU realpath and fails for others such as BusyBox, which is used in Biocontainers + symlinkpath=\$(realpath \${filepath_src}) + ln -s "\${symlinkpath}" \${filepath_dst}; + done + if [[ -L "\${src##*/}" ]]; then + rm "\${src}" + fi + } + for preprocess_dir in ${preprocess_dirs}; do + shadow_input_directory \${preprocess_dir}; + done + + # Symlink indices next to assembly FASTA + ln -s \$(find -L ${genome_gridss_index} -regex '.*\\.\\(amb\\|ann\\|pac\\|gridsscache\\|sa\\|bwt\\|img\\|alt\\)') ./ + + # Run + gridss_svprep \\ + ${args} \\ + --jvmheap ${Math.round((task.memory.bytes - otherJvmHeap) * 0.95)} \\ + --otherjvmheap ${otherJvmHeap} \\ + --steps assemble \\ + --labels ${labels_arg} \\ + --reference ${genome_fasta} \\ + --blacklist ${blocklist} \\ + --workingdir ${output_dirname}/work \\ + --assembly ${output_dirname}/sv_assemblies.bam \\ + --threads ${task.cpus} \\ + ${config_arg} \\ + ${bams_arg} \\ + ${bams_filtered_arg} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gridss: \$(CallVariants --version 2>&1 | sed 's/-gridss\$//') + svprep: \$(svprep -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + mkdir -p gridss_assemble/ + touch gridss_assemble/placeholder + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/svprep/assemble/meta.yml b/modules/local/svprep/assemble/meta.yml new file mode 100644 index 00000000..1a71333d --- /dev/null +++ b/modules/local/svprep/assemble/meta.yml @@ -0,0 +1,67 @@ +name: gridss_assemble +description: Assemble SVs with GRIDSS +keywords: + - assemble + - sv +tools: + - gridss: + description: GRIDSS is a module software suite containing tools useful for the detection of genomic rearrangements. + homepage: https://github.com/PapenfussLab/gridss + documentation: https://github.com/PapenfussLab/gridss + licence: ["GPL >=3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - bams: + type: list + description: List of BAM files + - bams_filtered: + type: list + description: List of filtered BAM files + - preprocess_dirs: + type: list + description: List of GRIDSS preprocess output directories + - labels: + type: list + description: List of labels corresponding to BAMs and GRIDSS preprocess output directories + - genome_fasta: + type: file + description: Reference genome assembly FASTA file + pattern: "*.{fa,fasta}" + - genome_fai: + type: file + description: Reference genome assembly fai file + pattern: "*.{fai}" + - genome_dict: + type: file + description: Reference genome assembly dict file + pattern: "*.{dict}" + - genome_gridss_index: + type: file + description: Reference genome assembly GRIDSS index file + pattern: "*.{gridsscache}" + - blocklist: + type: file + description: GRIDSS blocklist file + pattern: "*.{bed.gz}" + - gridss_config: + type: file + description: GRIDSS configuration file (optional) +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - assembly_dir: + type: directory + description: GRIDSS assemble output directory + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/svprep/call/environment.yml b/modules/local/svprep/call/environment.yml new file mode 100644 index 00000000..90d888f4 --- /dev/null +++ b/modules/local/svprep/call/environment.yml @@ -0,0 +1,7 @@ +name: svprep_call +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-sv-prep=1.2.4 diff --git a/modules/local/svprep/call/main.nf b/modules/local/svprep/call/main.nf new file mode 100644 index 00000000..b5ea5973 --- /dev/null +++ b/modules/local/svprep/call/main.nf @@ -0,0 +1,108 @@ +process GRIDSS_CALL { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-sv-prep:1.2.4--hdfd78af_0' : + 'biocontainers/hmftools-sv-prep:1.2.4--hdfd78af_0' }" + + input: + tuple val(meta), path(bams), path(bams_filtered), path(assemble_dir), val(labels) + path genome_fasta + path genome_fai + path genome_dict + path genome_gridss_index + path blocklist + path gridss_config + + output: + tuple val(meta), path('gridss_call/sv.svprep.gridss.vcf.gz'), emit: vcf + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def config_arg = gridss_config ? "--configuration ${gridss_config}" : '' + def output_dirname = 'gridss_call' + def labels_list = labels instanceof List ? labels : [labels] + def labels_arg = labels_list.join(',') + // NOTE(SW): Nextflow implicitly casts List to an atomic TaskPath, hence the required check below + def bams_list = bams instanceof List ? bams : [bams] + def bams_arg = "--bams ${bams_list.join(',')}" + def bams_filtered_list = bams_filtered instanceof List ? bams_filtered : [bams_filtered] + def bams_filtered_arg = "--filtered_bams ${bams_filtered_list.join(',')}" + + // JVM heap for other tasks must be no greater than 1/4 of task memory, defaults to 1 GB if not provided + def otherJvmHeap = Math.min( + Math.round(task.memory.bytes * 0.25), + task.ext.otherJvmHeap ? task.ext.otherJvmHeap.bytes : 1.GB.bytes + ) + + """ + # Create shadow directory with file symlinks of GRIDSS 'working' dir to prevent cache invalidation + # NOTE(SW): for reasons that elude me, NF doesn't always stage in the workingdir; remove if it is present + shadow_input_directory() { + src=\${1} + dst="${output_dirname}/" + for filepath_src in \$(find -L \${src} ! -type d); do + # Get destination location for symlink + filepath_src_rel=\$(sed 's#^'\${src}'/*##' <<< \${filepath_src}) + filepath_dst=\${dst%/}/\${filepath_src_rel} + # Create directory for symlink + mkdir -p \${filepath_dst%/*}; + # Get path for symlink source file, then create it + # NOTE(SW): ideally we would get the relative path using the --relative-to but this is only + # supported for GNU realpath and fails for others such as BusyBox, which is used in Biocontainers + symlinkpath=\$(realpath \${filepath_src}) + ln -s "\${symlinkpath}" \${filepath_dst}; + done + if [[ -L "\${src##*/}" ]]; then + rm "\${src}" + fi + } + shadow_input_directory ${assemble_dir} + + # Symlink indices next to assembly FASTA + ln -s \$(find -L ${genome_gridss_index} -regex '.*\\.\\(amb\\|ann\\|pac\\|gridsscache\\|sa\\|bwt\\|img\\|alt\\)') ./ + + # Run + gridss_svprep \\ + ${args} \\ + --jvmheap ${Math.round((task.memory.bytes - otherJvmHeap) * 0.95)} \\ + --otherjvmheap ${otherJvmHeap} \\ + --steps call \\ + --labels ${labels_arg} \\ + --reference ${genome_fasta} \\ + --blacklist ${blocklist} \\ + --workingdir ${output_dirname}/work/ \\ + --assembly ${output_dirname}/sv_assemblies.bam \\ + --output ${output_dirname}/sv.svprep.gridss.vcf.gz \\ + --threads ${task.cpus} \\ + ${config_arg} \\ + ${bams_arg} \\ + ${bams_filtered_arg} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gridss: \$(CallVariants --version 2>&1 | sed 's/-gridss\$//') + svprep: \$(svprep -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + mkdir -p gridss_call/ + cat < gridss_call/sv.svprep.gridss.vcf.gz + ##fileformat=VCFv4.1 + ##contig= + #CHROM POS ID REF ALT QUAL FILTER INFO + . . . . . . . + EOF + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/svprep/call/meta.yml b/modules/local/svprep/call/meta.yml new file mode 100644 index 00000000..da81cdb5 --- /dev/null +++ b/modules/local/svprep/call/meta.yml @@ -0,0 +1,68 @@ +name: gridss_call +description: Call SVs with GRIDSS +keywords: + - calling + - sv +tools: + - gridss: + description: GRIDSS is a module software suite containing tools useful for the detection of genomic rearrangements. + homepage: https://github.com/PapenfussLab/gridss + documentation: https://github.com/PapenfussLab/gridss + licence: ["GPL >=3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - bams: + type: list + description: List of BAM files + - bams_filtered: + type: list + description: List of filtered BAM files + - assemble_dir: + type: directory + description: GRIDSS assemble output directory + - labels: + type: list + description: List of labels corresponding to BAMs and GRIDSS preprocess output directories + - genome_fasta: + type: file + description: Reference genome assembly FASTA file + pattern: "*.{fa,fasta}" + - genome_fai: + type: file + description: Reference genome assembly fai file + pattern: "*.{fai}" + - genome_dict: + type: file + description: Reference genome assembly dict file + pattern: "*.{dict}" + - genome_gridss_index: + type: file + description: Reference genome assembly GRIDSS index file + pattern: "*.{gridsscache}" + - blocklist: + type: file + description: GRIDSS blocklist file + pattern: "*.{bed.gz}" + - gridss_config: + type: file + description: GRIDSS configuration file (optional) +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - vcf: + type: file + description: GRIDSS SV VCF file + pattern: "*.{vcf.gz}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/svprep/depth_annotator/environment.yml b/modules/local/svprep/depth_annotator/environment.yml new file mode 100644 index 00000000..dc6c4f3c --- /dev/null +++ b/modules/local/svprep/depth_annotator/environment.yml @@ -0,0 +1,7 @@ +name: svprep_depth_annotator +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-sv-prep=1.2.4 diff --git a/modules/local/svprep/depth_annotator/main.nf b/modules/local/svprep/depth_annotator/main.nf new file mode 100644 index 00000000..0a75529e --- /dev/null +++ b/modules/local/svprep/depth_annotator/main.nf @@ -0,0 +1,58 @@ +process SVPREP_DEPTH_ANNOTATOR { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-sv-prep:1.2.4--hdfd78af_0' : + 'biocontainers/hmftools-sv-prep:1.2.4--hdfd78af_0' }" + + input: + tuple val(meta), path(bams), path(bais), path(vcf), val(labels) + path genome_fasta + val genome_ver + + output: + tuple val(meta), path("${meta.tumor_id}.gridss.vcf.gz"), emit: vcf + path "${meta.tumor_id}.gridss.vcf.gz.tbi" , emit: tbi + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def labels_list = labels instanceof List ? labels : [labels] + def labels_arg = labels_list.join(',') + // NOTE(SW): Nextflow implicitly casts List to an atomic TaskPath, hence the required check below + def bams_list = bams instanceof List ? bams : [bams] + def bams_arg = "${bams_list.join(',')}" + + """ + svprep \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + com.hartwig.hmftools.svprep.depth.DepthAnnotator \\ + ${args} \\ + -input_vcf ${vcf} \\ + -samples ${labels_arg} \\ + -bam_files ${bams_arg} \\ + -ref_genome ${genome_fasta} \\ + -ref_genome_version ${genome_ver} \\ + -threads ${task.cpus} \\ + -output_vcf ${meta.tumor_id}.gridss.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + svprep: \$(svprep -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + touch ${meta.tumor_id}.gridss.vcf.gz + touch ${meta.tumor_id}.gridss.vcf.gz.tbi + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/svprep/depth_annotator/meta.yml b/modules/local/svprep/depth_annotator/meta.yml new file mode 100644 index 00000000..893d9ad8 --- /dev/null +++ b/modules/local/svprep/depth_annotator/meta.yml @@ -0,0 +1,58 @@ +name: depth_annotator +description: Annotate GRIDSS BAM with depth information +keywords: + - depth + - annotation + - sv +tools: + - svprep: + description: Selects reads associated with SV events for input to GRIDSS + homepage: https://github.com/hartwigmedical/hmftools/tree/master/sv-prep + documentation: https://github.com/hartwigmedical/hmftools/tree/master/sv-prep + licence: ["GPL >=3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - bams: + type: list + description: BAM files + - bai: + type: list + description: BAI files + - vcf: + type: file + description: VCF file + pattern: "*.{vcf.gz}" + - labels: + type: list + description: List of labels corresponding to BAMs and GRIDSS preprocess output directories + - genome_fasta: + type: file + description: Reference genome assembly FASTA file + pattern: "*.{fa,fasta}" + - genome_ver: + type: string + description: Reference genome version +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - vcf: + type: file + description: VCF file + pattern: "*.{vcf.gz}" + - tbi: + type: file + description: VCF index file + pattern: "*.{vcf.gz.tbi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/svprep/preprocess/environment.yml b/modules/local/svprep/preprocess/environment.yml new file mode 100644 index 00000000..6414c016 --- /dev/null +++ b/modules/local/svprep/preprocess/environment.yml @@ -0,0 +1,7 @@ +name: svprep_preprocess +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-sv-prep=1.2.4 diff --git a/modules/local/svprep/preprocess/main.nf b/modules/local/svprep/preprocess/main.nf new file mode 100644 index 00000000..525fa43a --- /dev/null +++ b/modules/local/svprep/preprocess/main.nf @@ -0,0 +1,61 @@ +process GRIDSS_PREPROCESS { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-sv-prep:1.2.4--hdfd78af_0' : + 'biocontainers/hmftools-sv-prep:1.2.4--hdfd78af_0' }" + + input: + tuple val(meta), path(bam), path(bam_filtered) + path genome_fasta + path genome_fai + path genome_dict + path genome_gridss_index + path gridss_config + + output: + tuple val(meta), path("gridss_preprocess/${meta.sample_id}.sv_prep.sorted.bam.gridss.working/"), emit: preprocess_dir + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def config_arg = gridss_config ? "--configuration ${gridss_config}" : '' + + """ + # Symlink indices next to assembly FASTA + ln -s \$(find -L ${genome_gridss_index} -regex '.*\\.\\(amb\\|ann\\|pac\\|gridsscache\\|sa\\|bwt\\|img\\|alt\\)') ./ + + gridss_svprep \\ + ${args} \\ + --jvmheap ${Math.round(task.memory.bytes * 0.95)} \\ + --steps preprocess \\ + --reference ${genome_fasta} \\ + --workingdir gridss_preprocess/ \\ + --threads ${task.cpus} \\ + ${config_arg} \\ + --labels ${meta.sample_id} \\ + --bams ${bam} \\ + --filtered_bams ${bam_filtered} \\ + --output placeholder + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gridss: \$(CallVariants --version 2>&1 | sed 's/-gridss\$//') + svprep: \$(svprep -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + mkdir -p gridss_preprocess/${meta.sample_id}.sv_prep.sorted.bam.gridss.working/ + touch gridss_preprocess/${meta.sample_id}.sv_prep.sorted.bam.gridss.working/placeholder + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/svprep/preprocess/meta.yml b/modules/local/svprep/preprocess/meta.yml new file mode 100644 index 00000000..dd63074b --- /dev/null +++ b/modules/local/svprep/preprocess/meta.yml @@ -0,0 +1,59 @@ +name: gridss_preprocess +description: Preprocess reads for GRIDSS +keywords: + - preprocessing + - sv +tools: + - gridss: + description: GRIDSS is a module software suite containing tools useful for the detection of genomic rearrangements. + homepage: https://github.com/PapenfussLab/gridss + documentation: https://github.com/PapenfussLab/gridss + licence: ["GPL >=3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - bam: + type: file + description: BAM file + pattern: "*.{bam}" + - bam_filtered: + type: file + description: Filtered BAM file + pattern: "*.{bam}" + - genome_fasta: + type: file + description: Reference genome assembly FASTA file + pattern: "*.{fa,fasta}" + - genome_fai: + type: file + description: Reference genome assembly fai file + pattern: "*.{fai}" + - genome_dict: + type: file + description: Reference genome assembly dict file + pattern: "*.{dict}" + - genome_gridss_index: + type: file + description: Reference genome assembly GRIDSS index file + pattern: "*.{gridsscache}" + - gridss_config: + type: file + description: GRIDSS configuration file (optional) +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - preprocess_dir: + type: directory + description: GRIDSS preprocess output directory + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/svprep/svprep/environment.yml b/modules/local/svprep/svprep/environment.yml new file mode 100644 index 00000000..9435340f --- /dev/null +++ b/modules/local/svprep/svprep/environment.yml @@ -0,0 +1,7 @@ +name: svprep_svprep +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-sv-prep=1.2.4 diff --git a/modules/local/svprep/svprep/main.nf b/modules/local/svprep/svprep/main.nf new file mode 100644 index 00000000..bba5bc5a --- /dev/null +++ b/modules/local/svprep/svprep/main.nf @@ -0,0 +1,68 @@ +process SVPREP { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-sv-prep:1.2.4--hdfd78af_0' : + 'biocontainers/hmftools-sv-prep:1.2.4--hdfd78af_0' }" + + input: + tuple val(meta), path(bam), path(bai), path(junctions) + path genome_fasta + val genome_ver + path sv_blocklist + path known_fusions + val write_types + + output: + tuple val(meta), path("*.sorted.bam") , emit: bam + tuple val(meta), path("*.sv_prep.junctions.tsv"), emit: junctions + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + + def write_types_arg = write_types ? "-write_types \'${write_types}\'" : '' + def existing_juction_file_arg = junctions ? "-existing_junction_file ${junctions}" : '' + + """ + svprep \\ + -Xmx${Math.round(task.memory.bytes * 0.75)} \\ + ${args} \\ + -sample ${meta.sample_id} \\ + -bam_file ${bam} \\ + -ref_genome ${genome_fasta} \\ + -ref_genome_version ${genome_ver} \\ + -blacklist_bed ${sv_blocklist} \\ + -known_fusion_bed ${known_fusions} \\ + ${write_types_arg} \\ + ${existing_juction_file_arg} \\ + -threads ${task.cpus} \\ + -output_dir ./ + + samtools sort \\ + ${args2} \\ + -@ ${task.cpus} \\ + -T ${meta.sample_id}.sv_prep.tmp \\ + -o ${meta.sample_id}.sv_prep.sorted.bam \\ + ${meta.sample_id}.sv_prep.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + svprep: \$(svprep -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + touch "${meta.sample_id}.sv_prep.sorted.bam" + touch "${meta.sample_id}.sv_prep.junctions.tsv" + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/svprep/svprep/meta.yml b/modules/local/svprep/svprep/meta.yml new file mode 100644 index 00000000..d214e268 --- /dev/null +++ b/modules/local/svprep/svprep/meta.yml @@ -0,0 +1,68 @@ +name: svprep +description: Select reads associated with SV events +keywords: + - filtering + - reads + - sv +tools: + - svprep: + description: Selects reads associated with SV events for input to GRIDSS + homepage: https://github.com/hartwigmedical/hmftools/tree/master/sv-prep + documentation: https://github.com/hartwigmedical/hmftools/tree/master/sv-prep + licence: ["GPL >=3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - bam: + type: file + description: BAM file + pattern: "*.{bam}" + - bai: + type: file + description: BAI file + pattern: "*.{bai}" + - junctions: + type: file + description: Existing candidate SV junctions file (optional) + pattern: "*.{csv}" + - genome_fasta: + type: file + description: Reference genome assembly FASTA file + pattern: "*.{fa,fasta}" + - genome_ver: + type: string + description: Reference genome version + - sv_blocklist: + type: file + description: SV Prep blocklist file + pattern: "*.{bed}" + - known_fusions: + type: file + description: Known fusions file + pattern: "*.{bedpe}" + - write_types: + type: string + description: Type of output files to write +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - bam: + type: file + description: BAM file containing selected reads + pattern: "*.{bam}" + - junctions: + type: file + description: Candidate SV junctions file + pattern: "*.{csv}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/virusbreakend/Dockerfile b/modules/local/virusbreakend/Dockerfile new file mode 100644 index 00000000..f8cc9d06 --- /dev/null +++ b/modules/local/virusbreakend/Dockerfile @@ -0,0 +1,34 @@ +FROM continuumio/miniconda3:23.5.2-0-alpine as build + +RUN \ + conda install conda-libmamba-solver + +RUN \ + echo -e > ~/.condarc '\ +solver: libmamba\n\ +channels:\n\ + - conda-forge\n\ + - bioconda\n\ + - defaults' + +RUN \ + conda create -y -p /env/ \ + 'gridss=2.13.2=h50ea8bc_3' \ + 'grep' + +# Install Dfam database required for RepeatMasker +RUN \ + conda install -y curl && \ + curl -s https://www.dfam.org/releases/Dfam_3.7/families/Dfam_curatedonly.h5.gz | \ + gzip -cd > /env/share/RepeatMasker/Libraries/Dfam.h5 + +RUN \ + conda clean -yaf + +# Move Conda environment into standard BioContainers base image +FROM quay.io/bioconda/base-glibc-busybox-bash:2.1.0 + +COPY --from=build /env/ /env/ + +ENV PATH="/env/bin:${PATH}" +ENV LD_LIBRARY_PATH="/env/lib/:${LD_LIBRARY_PATH}" diff --git a/modules/local/virusbreakend/environment.yml b/modules/local/virusbreakend/environment.yml new file mode 100644 index 00000000..cb3dd2e9 --- /dev/null +++ b/modules/local/virusbreakend/environment.yml @@ -0,0 +1,7 @@ +name: virusbreakend +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gridss=2.13.2=h50ea8bc_3 diff --git a/modules/local/virusbreakend/main.nf b/modules/local/virusbreakend/main.nf new file mode 100644 index 00000000..ae262ef8 --- /dev/null +++ b/modules/local/virusbreakend/main.nf @@ -0,0 +1,55 @@ +// NOTE(SW): the --db argument for the virusbreakend command must have a trailing slash if it is a symlink + +process VIRUSBREAKEND { + tag "${meta.id}" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "nf-core/gridss:2.13.2--1" + + input: + tuple val(meta), path(bam) + path genome_fasta + path genome_fai + path genome_dict + path genome_gridss_index + path virusbreakenddb + path gridss_config + + output: + tuple val(meta), path("*.summary.tsv"), emit: tsv + path "*.virusbreakend.vcf" , emit: vcf + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + # Symlink indices next to assembly FASTA + ln -s \$(find -L ${genome_gridss_index} -regex '.*\\.\\(amb\\|ann\\|pac\\|gridsscache\\|sa\\|bwt\\|img\\|alt\\)') ./ + + virusbreakend \\ + ${args} \\ + --gridssargs "--jvmheap ${Math.round(task.memory.bytes * 0.95)}" \\ + --threads ${task.cpus} \\ + --db ${virusbreakenddb.toString().replaceAll("/\$", "")}/ \\ + --output ${meta.sample_id}.virusbreakend.vcf \\ + --reference ${genome_fasta} \\ + ${bam} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gridss: \$(CallVariants --version 2>&1 | sed 's/-gridss\$//') + END_VERSIONS + """ + + stub: + """ + touch ${meta.sample_id}.virusbreakend.vcf ${meta.sample_id}.summary.tsv + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/virusbreakend/meta.yml b/modules/local/virusbreakend/meta.yml new file mode 100644 index 00000000..a61ff589 --- /dev/null +++ b/modules/local/virusbreakend/meta.yml @@ -0,0 +1,63 @@ +name: virusbreakend +description: Detect viral integration from WGS data +keywords: + - viral + - integration +tools: + - virusbreakend: + description: Performs detection of viral intergation from WGS data + homepage: https://github.com/PapenfussLab/gridss/blob/master/VIRUSBreakend_Readme.md + documentation: https://github.com/PapenfussLab/gridss/blob/master/VIRUSBreakend_Readme.md + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - bam: + type: file + description: BAM file + pattern: "*.{bam}" + - genome_fasta: + type: file + description: Reference genome assembly FASTA file + pattern: "*.{fa,fasta}" + - genome_fai: + type: file + description: Reference genome assembly fai file + pattern: "*.{fai}" + - genome_dict: + type: file + description: Reference genome assembly dict file + pattern: "*.{dict}" + - genome_gridss_index: + type: file + description: Reference genome assembly GRIDSS index file + pattern: "*.{gridsscache}" + - virusbreakenddb: + type: directory + description: VIRUSBreakend database directory + - gridss_config: + type: file + description: GRIDSS configuration file (optional) +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - tsv: + type: file + description: Summary file + pattern: "*.{tsv}" + - vcf: + type: file + description: VCF file + pattern: "*.{vcf.gz}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/virusinterpreter/environment.yml b/modules/local/virusinterpreter/environment.yml new file mode 100644 index 00000000..ff968f29 --- /dev/null +++ b/modules/local/virusinterpreter/environment.yml @@ -0,0 +1,7 @@ +name: virusinterpreter +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-virus-interpreter=1.3 diff --git a/modules/local/virusinterpreter/main.nf b/modules/local/virusinterpreter/main.nf new file mode 100644 index 00000000..fac520e8 --- /dev/null +++ b/modules/local/virusinterpreter/main.nf @@ -0,0 +1,52 @@ +process VIRUSINTERPRETER { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-virus-interpreter:1.3--hdfd78af_0' : + 'biocontainers/hmftools-virus-interpreter:1.3--hdfd78af_0' }" + + input: + tuple val(meta), path(virus_tsv), path(purple_dir), path(wgs_metrics) + path taxonomy_db + path reporting_db + + output: + tuple val(meta), path('virusinterpreter/'), emit: virusinterpreter_dir + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + mkdir -p virusinterpreter/ + + virusinterpreter \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + ${args} \\ + -sample ${meta.sample_id} \\ + -purple_dir ${purple_dir} \\ + -tumor_sample_wgs_metrics_file ${wgs_metrics} \\ + -virus_breakend_tsv ${virus_tsv} \\ + -taxonomy_db_tsv ${taxonomy_db} \\ + -virus_reporting_db_tsv ${reporting_db} \\ + -output_dir virusinterpreter/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + virusinterpreter: \$(virusinterpreter | sed -n '1s/^.*Interpreter v//p') + END_VERSIONS + """ + + stub: + """ + mkdir -p virusinterpreter/ + touch virusinterpreter/${meta.sample_id}.virus.annotated.tsv + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/virusinterpreter/meta.yml b/modules/local/virusinterpreter/meta.yml new file mode 100644 index 00000000..251ef87f --- /dev/null +++ b/modules/local/virusinterpreter/meta.yml @@ -0,0 +1,52 @@ +name: virusinterpreter +description: Filters and annotates VIRUSBreakend calls +keywords: + - viral + - integration + - annotation + - filtering +tools: + - virusinterpreter: + description: Performs filtering and annotation of VIRUSBreakend calls + homepage: https://github.com/hartwigmedical/hmftools/tree/master/virus-interpreter + documentation: https://github.com/hartwigmedical/hmftools/tree/master/virus-interpreter + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - virus_tsv: + type: file + description: VIRUSBreakend summary file + pattern: "*.{tsv}" + - purple_dir: + type: directory + description: PURPLE output directory + - wgs_metrics: + type: file + description: Somatic collectwgsmetrics file + - taxonomy_db: + type: file + description: Virus Interpreter taxonomy database file + pattern: "*.{tsv}" + - reporting_db: + type: file + description: Virus Interpreter reporting database file + pattern: "*.{tsv}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - virusinterpreter_dir: + type: directory + description: Virus Interpreter output direcotry + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/nf-core/bwa/index/bwa-index.diff b/modules/nf-core/bwa/index/bwa-index.diff new file mode 100644 index 00000000..1fdcefb0 --- /dev/null +++ b/modules/nf-core/bwa/index/bwa-index.diff @@ -0,0 +1,67 @@ +Changes in module 'nf-core/bwa/index' +--- modules/nf-core/bwa/index/main.nf ++++ modules/nf-core/bwa/index/main.nf +@@ -8,25 +8,32 @@ + 'biocontainers/bwa:0.7.17--hed695b0_7' }" + + input: +- tuple val(meta), path(fasta) ++ path fasta ++ path alt + + output: +- tuple val(meta), path(bwa) , emit: index +- path "versions.yml" , emit: versions ++ path bwa_index , emit: index ++ path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: +- def prefix = task.ext.prefix ?: "${fasta.baseName}" ++ def prefix = task.ext.prefix ?: "${fasta.name}" + def args = task.ext.args ?: '' ++ + """ +- mkdir bwa ++ mkdir -p bwa_index/ + bwa \\ + index \\ + $args \\ +- -p bwa/${prefix} \\ ++ -p bwa_index/${prefix} \\ + $fasta ++ ++ # Include ALT file where necessary ++ if [[ -n "${alt}" ]]; then ++ ln -s ../${alt} bwa_index/; ++ fi; + + cat <<-END_VERSIONS > versions.yml + "${task.process}": +@@ -35,15 +42,15 @@ + """ + + stub: +- def prefix = task.ext.prefix ?: "${fasta.baseName}" ++ def prefix = task.ext.prefix ?: "${fasta.name}" + """ +- mkdir bwa ++ mkdir -p bwa_index/ + +- touch bwa/${prefix}.amb +- touch bwa/${prefix}.ann +- touch bwa/${prefix}.bwt +- touch bwa/${prefix}.pac +- touch bwa/${prefix}.sa ++ touch bwa_index/${prefix}.amb ++ touch bwa_index/${prefix}.ann ++ touch bwa_index/${prefix}.bwt ++ touch bwa_index/${prefix}.pac ++ touch bwa_index/${prefix}.sa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + +************************************************************ diff --git a/modules/nf-core/bwa/index/environment.yml b/modules/nf-core/bwa/index/environment.yml new file mode 100644 index 00000000..5d3cb323 --- /dev/null +++ b/modules/nf-core/bwa/index/environment.yml @@ -0,0 +1,7 @@ +name: bwa_index +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bwa=0.7.17 diff --git a/modules/nf-core/bwa/index/main.nf b/modules/nf-core/bwa/index/main.nf new file mode 100644 index 00000000..efb59703 --- /dev/null +++ b/modules/nf-core/bwa/index/main.nf @@ -0,0 +1,60 @@ +process BWA_INDEX { + tag "$fasta" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bwa:0.7.17--hed695b0_7' : + 'biocontainers/bwa:0.7.17--hed695b0_7' }" + + input: + path fasta + path alt + + output: + path bwa_index , emit: index + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${fasta.name}" + def args = task.ext.args ?: '' + + """ + mkdir -p bwa_index/ + bwa \\ + index \\ + $args \\ + -p bwa_index/${prefix} \\ + $fasta + + # Include ALT file where necessary + if [[ -n "${alt}" ]]; then + ln -s ../${alt} bwa_index/; + fi; + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${fasta.name}" + """ + mkdir -p bwa_index/ + + touch bwa_index/${prefix}.amb + touch bwa_index/${prefix}.ann + touch bwa_index/${prefix}.bwt + touch bwa_index/${prefix}.pac + touch bwa_index/${prefix}.sa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bwa/index/meta.yml b/modules/nf-core/bwa/index/meta.yml new file mode 100644 index 00000000..730628d0 --- /dev/null +++ b/modules/nf-core/bwa/index/meta.yml @@ -0,0 +1,45 @@ +name: bwa_index +description: Create BWA index for reference genome +keywords: + - index + - fasta + - genome + - reference +tools: + - bwa: + description: | + BWA is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: http://bio-bwa.sourceforge.net/ + documentation: http://www.htslib.org/doc/samtools.html + arxiv: arXiv:1303.3997 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input genome fasta file +output: + - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: BWA genome index files + pattern: "*.{amb,ann,bwt,pac,sa}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@maxulysse" +maintainers: + - "@drpatelh" + - "@maxulysse" diff --git a/modules/nf-core/bwa/index/tests/main.nf.test b/modules/nf-core/bwa/index/tests/main.nf.test new file mode 100644 index 00000000..af33e73c --- /dev/null +++ b/modules/nf-core/bwa/index/tests/main.nf.test @@ -0,0 +1,33 @@ +nextflow_process { + + name "Test Process BWA_INDEX" + tag "modules_nfcore" + tag "modules" + tag "bwa" + tag "bwa/index" + script "../main.nf" + process "BWA_INDEX" + + test("BWA index") { + + when { + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bwa/index/tests/main.nf.test.snap b/modules/nf-core/bwa/index/tests/main.nf.test.snap new file mode 100644 index 00000000..e51ad5bf --- /dev/null +++ b/modules/nf-core/bwa/index/tests/main.nf.test.snap @@ -0,0 +1,43 @@ +{ + "BWA index": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "genome.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e", + "genome.ann:md5,c32e11f6c859f166c7525a9c1d583567", + "genome.bwt:md5,0469c30a1e239dd08f68afe66fde99da", + "genome.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66", + "genome.sa:md5,ab3952cabf026b48cd3eb5bccbb636d1" + ] + ] + ], + "1": [ + "versions.yml:md5,0f20525da90e7489a7ebb02adca3265f" + ], + "index": [ + [ + { + "id": "test" + }, + [ + "genome.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e", + "genome.ann:md5,c32e11f6c859f166c7525a9c1d583567", + "genome.bwt:md5,0469c30a1e239dd08f68afe66fde99da", + "genome.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66", + "genome.sa:md5,ab3952cabf026b48cd3eb5bccbb636d1" + ] + ] + ], + "versions": [ + "versions.yml:md5,0f20525da90e7489a7ebb02adca3265f" + ] + } + ], + "timestamp": "2023-10-17T17:20:20.180927714" + } +} \ No newline at end of file diff --git a/modules/nf-core/bwa/index/tests/tags.yml b/modules/nf-core/bwa/index/tests/tags.yml new file mode 100644 index 00000000..28bb483c --- /dev/null +++ b/modules/nf-core/bwa/index/tests/tags.yml @@ -0,0 +1,2 @@ +bwa/index: + - modules/nf-core/bwa/index/** diff --git a/modules/nf-core/bwamem2/index/bwamem2-index.diff b/modules/nf-core/bwamem2/index/bwamem2-index.diff new file mode 100644 index 00000000..0fa349b5 --- /dev/null +++ b/modules/nf-core/bwamem2/index/bwamem2-index.diff @@ -0,0 +1,74 @@ +Changes in module 'nf-core/bwamem2/index' +--- modules/nf-core/bwamem2/index/main.nf ++++ modules/nf-core/bwamem2/index/main.nf +@@ -1,6 +1,7 @@ + process BWAMEM2_INDEX { + tag "$fasta" + label 'process_single' ++ label 'process_high_memory' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? +@@ -8,11 +9,12 @@ + 'biocontainers/bwa-mem2:2.2.1--he513fc3_0' }" + + input: +- tuple val(meta), path(fasta) ++ path fasta ++ path alt + + output: +- tuple val(meta), path("bwamem2"), emit: index +- path "versions.yml" , emit: versions ++ path "bwa-mem2_index", emit: index ++ path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when +@@ -20,12 +22,18 @@ + script: + def prefix = task.ext.prefix ?: "${fasta}" + def args = task.ext.args ?: '' ++ + """ +- mkdir bwamem2 ++ mkdir -p bwa-mem2_index/ + bwa-mem2 \\ + index \\ + $args \\ +- $fasta -p bwamem2/${prefix} ++ $fasta -p bwa-mem2_index/${prefix} ++ ++ # Include ALT file where necessary ++ if [[ -n "${alt}" ]]; then ++ ln -s ../${alt} bwa-mem2_index/; ++ fi; + + cat <<-END_VERSIONS > versions.yml + "${task.process}": +@@ -37,12 +45,17 @@ + def prefix = task.ext.prefix ?: "${fasta}" + + """ +- mkdir bwamem2 +- touch bwamem2/${prefix}.0123 +- touch bwamem2/${prefix}.ann +- touch bwamem2/${prefix}.pac +- touch bwamem2/${prefix}.amb +- touch bwamem2/${prefix}.bwt.2bit.64 ++ mkdir -p bwa-mem2_index/ ++ touch bwa-mem2_index/${prefix}.0123 ++ touch bwa-mem2_index/${prefix}.ann ++ touch bwa-mem2_index/${prefix}.pac ++ touch bwa-mem2_index/${prefix}.amb ++ touch bwa-mem2_index/${prefix}.bwt.2bit.64 ++ ++ # Include ALT file where necessary ++ if [[ -n "${alt}" ]]; then ++ ln -s ../${alt} bwa-mem2_index/; ++ fi; + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + +************************************************************ diff --git a/modules/nf-core/bwamem2/index/environment.yml b/modules/nf-core/bwamem2/index/environment.yml new file mode 100644 index 00000000..26b43917 --- /dev/null +++ b/modules/nf-core/bwamem2/index/environment.yml @@ -0,0 +1,7 @@ +name: bwamem2_index +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bwa-mem2=2.2.1 diff --git a/modules/nf-core/bwamem2/index/main.nf b/modules/nf-core/bwamem2/index/main.nf new file mode 100644 index 00000000..9c4171d8 --- /dev/null +++ b/modules/nf-core/bwamem2/index/main.nf @@ -0,0 +1,65 @@ +process BWAMEM2_INDEX { + tag "$fasta" + label 'process_single' + label 'process_high_memory' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bwa-mem2:2.2.1--he513fc3_0' : + 'biocontainers/bwa-mem2:2.2.1--he513fc3_0' }" + + input: + path fasta + path alt + + output: + path "bwa-mem2_index", emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${fasta}" + def args = task.ext.args ?: '' + + """ + mkdir -p bwa-mem2_index/ + bwa-mem2 \\ + index \\ + $args \\ + $fasta -p bwa-mem2_index/${prefix} + + # Include ALT file where necessary + if [[ -n "${alt}" ]]; then + ln -s ../${alt} bwa-mem2_index/; + fi; + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${fasta}" + + """ + mkdir -p bwa-mem2_index/ + touch bwa-mem2_index/${prefix}.0123 + touch bwa-mem2_index/${prefix}.ann + touch bwa-mem2_index/${prefix}.pac + touch bwa-mem2_index/${prefix}.amb + touch bwa-mem2_index/${prefix}.bwt.2bit.64 + + # Include ALT file where necessary + if [[ -n "${alt}" ]]; then + ln -s ../${alt} bwa-mem2_index/; + fi; + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bwamem2/index/meta.yml b/modules/nf-core/bwamem2/index/meta.yml new file mode 100644 index 00000000..c14a1092 --- /dev/null +++ b/modules/nf-core/bwamem2/index/meta.yml @@ -0,0 +1,42 @@ +name: bwamem2_index +description: Create BWA-mem2 index for reference genome +keywords: + - index + - fasta + - genome + - reference +tools: + - bwamem2: + description: | + BWA-mem2 is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: https://github.com/bwa-mem2/bwa-mem2 + documentation: https://github.com/bwa-mem2/bwa-mem2#usage + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input genome fasta file +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: BWA genome index files + pattern: "*.{0123,amb,ann,bwt.2bit.64,pac}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/modules/nf-core/bwamem2/index/tests/main.nf.test b/modules/nf-core/bwamem2/index/tests/main.nf.test new file mode 100644 index 00000000..dbf11132 --- /dev/null +++ b/modules/nf-core/bwamem2/index/tests/main.nf.test @@ -0,0 +1,31 @@ +nextflow_process { + + name "Test Process BWAMEM2_INDEX" + tag "modules_nfcore" + tag "modules" + tag "bwamem2" + tag "bwamem2/index" + script "../main.nf" + process "BWAMEM2_INDEX" + + test("BWAMEM2 index") { + + when { + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/bwamem2/index/tests/main.nf.test.snap b/modules/nf-core/bwamem2/index/tests/main.nf.test.snap new file mode 100644 index 00000000..69b268ee --- /dev/null +++ b/modules/nf-core/bwamem2/index/tests/main.nf.test.snap @@ -0,0 +1,47 @@ +{ + "BWAMEM2 index": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "genome.fasta.0123:md5,b02870de80106104abcb03cd9463e7d8", + "genome.fasta.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e", + "genome.fasta.ann:md5,c32e11f6c859f166c7525a9c1d583567", + "genome.fasta.bwt.2bit.64:md5,d097a1b82dee375d41a1ea69895a9216", + "genome.fasta.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66" + ] + ] + ], + "1": [ + "versions.yml:md5,9ffd13d12e7108ed15c58566bc4717d6" + ], + "index": [ + [ + { + "id": "test" + }, + [ + "genome.fasta.0123:md5,b02870de80106104abcb03cd9463e7d8", + "genome.fasta.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e", + "genome.fasta.ann:md5,c32e11f6c859f166c7525a9c1d583567", + "genome.fasta.bwt.2bit.64:md5,d097a1b82dee375d41a1ea69895a9216", + "genome.fasta.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66" + ] + ] + ], + "versions": [ + "versions.yml:md5,9ffd13d12e7108ed15c58566bc4717d6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.02.0" + }, + "timestamp": "2024-03-18T12:59:39.132616" + } +} \ No newline at end of file diff --git a/modules/nf-core/bwamem2/index/tests/tags.yml b/modules/nf-core/bwamem2/index/tests/tags.yml new file mode 100644 index 00000000..3953018e --- /dev/null +++ b/modules/nf-core/bwamem2/index/tests/tags.yml @@ -0,0 +1,2 @@ +bwamem2/index: + - modules/nf-core/bwamem2/index/** diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf deleted file mode 100644 index cebb6e05..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ /dev/null @@ -1,24 +0,0 @@ -process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_single' - - // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" - - input: - path versions - - output: - path "software_versions.yml" , emit: yml - path "software_versions_mqc.yml", emit: mqc_yml - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - template 'dumpsoftwareversions.py' -} diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml deleted file mode 100644 index 60b546a0..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: custom_dumpsoftwareversions -description: Custom module used to dump software versions within the nf-core pipeline template -keywords: - - custom - - version -tools: - - custom: - description: Custom module used to dump software versions within the nf-core pipeline template - homepage: https://github.com/nf-core/tools - documentation: https://github.com/nf-core/tools - licence: ["MIT"] -input: - - versions: - type: file - description: YML file containing software versions - pattern: "*.yml" - -output: - - yml: - type: file - description: Standard YML file containing software versions - pattern: "software_versions.yml" - - mqc_yml: - type: file - description: MultiQC custom content YML file containing software versions - pattern: "software_versions_mqc.yml" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@drpatelh" - - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py deleted file mode 100644 index 787bdb7b..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ /dev/null @@ -1,91 +0,0 @@ -#!/usr/bin/env python - -import platform -from textwrap import dedent - -import yaml - - -def _make_versions_html(versions): - html = [ - dedent( - """\\ - - - - - - - - - - """ - ) - ] - for process, tmp_versions in sorted(versions.items()): - html.append("") - for i, (tool, version) in enumerate(sorted(tmp_versions.items())): - html.append( - dedent( - f"""\\ - - - - - - """ - ) - ) - html.append("") - html.append("
    Process Name Software Version
    {process if (i == 0) else ''}{tool}{version}
    ") - return "\\n".join(html) - - -versions_this_module = {} -versions_this_module["${task.process}"] = { - "python": platform.python_version(), - "yaml": yaml.__version__, -} - -with open("$versions") as f: - versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module - -# aggregate versions by the module name (derived from fully-qualified process name) -versions_by_module = {} -for process, process_versions in versions_by_process.items(): - module = process.split(":")[-1] - try: - if versions_by_module[module] != process_versions: - raise AssertionError( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) - except KeyError: - versions_by_module[module] = process_versions - -versions_by_module["Workflow"] = { - "Nextflow": "$workflow.nextflow.version", - "$workflow.manifest.name": "$workflow.manifest.version", -} - -versions_mqc = { - "id": "software_versions", - "section_name": "${workflow.manifest.name} Software Versions", - "section_href": "https://github.com/${workflow.manifest.name}", - "plot_type": "html", - "description": "are collected at run time from the software output.", - "data": _make_versions_html(versions_by_module), -} - -with open("software_versions.yml", "w") as f: - yaml.dump(versions_by_module, f, default_flow_style=False) -with open("software_versions_mqc.yml", "w") as f: - yaml.dump(versions_mqc, f, default_flow_style=False) - -with open("versions.yml", "w") as f: - yaml.dump(versions_this_module, f, default_flow_style=False) diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf deleted file mode 100644 index 05730368..00000000 --- a/modules/nf-core/fastqc/main.nf +++ /dev/null @@ -1,59 +0,0 @@ -process FASTQC { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'quay.io/biocontainers/fastqc:0.11.9--0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*.html"), emit: html - tuple val(meta), path("*.zip") , emit: zip - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - // Add soft-links to original FastQs for consistent naming in pipeline - def prefix = task.ext.prefix ?: "${meta.id}" - if (meta.single_end) { - """ - [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz - fastqc $args --threads $task.cpus ${prefix}.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - } else { - """ - [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz - [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz - fastqc $args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - } - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.html - touch ${prefix}.zip - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml deleted file mode 100644 index 4da5bb5a..00000000 --- a/modules/nf-core/fastqc/meta.yml +++ /dev/null @@ -1,52 +0,0 @@ -name: fastqc -description: Run FastQC on sequenced reads -keywords: - - quality control - - qc - - adapters - - fastq -tools: - - fastqc: - description: | - FastQC gives general quality metrics about your reads. - It provides information about the quality score distribution - across your reads, the per base sequence content (%A/C/G/T). - You get information about adapter contamination and other - overrepresented sequences. - homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ - documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ - licence: ["GPL-2.0-only"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - html: - type: file - description: FastQC report - pattern: "*_{fastqc.html}" - - zip: - type: file - description: FastQC report archive - pattern: "*_{fastqc.zip}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@grst" - - "@ewels" - - "@FelixKrueger" diff --git a/modules/nf-core/gatk4/markduplicates/main.nf b/modules/nf-core/gatk4/markduplicates/main.nf new file mode 100644 index 00000000..7e0cffa7 --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/main.nf @@ -0,0 +1,65 @@ +process GATK4_MARKDUPLICATES { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::gatk4=4.4.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(bam) + path fasta + path fasta_fai + + output: + tuple val(meta), path("*cram"), emit: cram, optional: true + tuple val(meta), path("*bam"), emit: bam, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true + tuple val(meta), path("*.bai"), emit: bai, optional: true + tuple val(meta), path("*.metrics"), emit: metrics + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.sample_id}" + def input_list = bam.collect{"--INPUT $it"}.join(' ') + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M" MarkDuplicates \\ + $input_list \\ + --OUTPUT ${prefix}.md.bam \\ + --METRICS_FILE ${prefix}.md.metrics \\ + --TMP_DIR . \\ + --CREATE_INDEX \\ + ${reference} \\ + $args + + mv ${prefix}.md.bai ${prefix}.md.bam.bai + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.sample_id}" + + """ + touch ${prefix}.md.bam + touch ${prefix}.md.bam.bai + touch ${prefix}.md.metrics + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/nf-core/gatk4/markduplicates/meta.yml b/modules/nf-core/gatk4/markduplicates/meta.yml new file mode 100644 index 00000000..ddf98d2f --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/meta.yml @@ -0,0 +1,72 @@ +name: gatk4_markduplicates +description: This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA. +keywords: + - markduplicates + - bam + - sort +tools: + - gatk4: + description: + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM file + pattern: "*.{bam}" + - fasta: + type: file + description: Fasta file + pattern: "*.{fasta}" + - fasta_fai: + type: file + description: Fasta index file + pattern: "*.{fai}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + type: file + description: Marked duplicates BAM file + pattern: "*.{bam}" + - cram: + type: file + description: Marked duplicates CRAM file + pattern: "*.{cram}" + - bai: + type: file + description: BAM index file + pattern: "*.{bam.bai}" + - crai: + type: file + description: CRAM index file + pattern: "*.{cram.crai}" + - metrics: + type: file + description: Duplicate metrics file generated by GATK + pattern: "*.{metrics.txt}" + +authors: + - "@ajodeh-juma" + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf deleted file mode 100644 index a8159a57..00000000 --- a/modules/nf-core/multiqc/main.nf +++ /dev/null @@ -1,53 +0,0 @@ -process MULTIQC { - label 'process_single' - - conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" - - input: - path multiqc_files, stageAs: "?/*" - path(multiqc_config) - path(extra_multiqc_config) - path(multiqc_logo) - - output: - path "*multiqc_report.html", emit: report - path "*_data" , emit: data - path "*_plots" , optional:true, emit: plots - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def config = multiqc_config ? "--config $multiqc_config" : '' - def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' - """ - multiqc \\ - --force \\ - $args \\ - $config \\ - $extra_config \\ - . - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) - END_VERSIONS - """ - - stub: - """ - touch multiqc_data - touch multiqc_plots - touch multiqc_report.html - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml deleted file mode 100644 index ebc29b27..00000000 --- a/modules/nf-core/multiqc/meta.yml +++ /dev/null @@ -1,55 +0,0 @@ -name: MultiQC -description: Aggregate results from bioinformatics analyses across many samples into a single report -keywords: - - QC - - bioinformatics tools - - Beautiful stand-alone HTML report -tools: - - multiqc: - description: | - MultiQC searches a given directory for analysis logs and compiles a HTML report. - It's a general use tool, perfect for summarising the output from numerous bioinformatics tools. - homepage: https://multiqc.info/ - documentation: https://multiqc.info/docs/ - licence: ["GPL-3.0-or-later"] - -input: - - multiqc_files: - type: file - description: | - List of reports / files recognised by MultiQC, for example the html and zip output of FastQC - - multiqc_config: - type: file - description: Optional config yml for MultiQC - pattern: "*.{yml,yaml}" - - extra_multiqc_config: - type: file - description: Second optional config yml for MultiQC. Will override common sections in multiqc_config. - pattern: "*.{yml,yaml}" - - multiqc_logo: - type: file - description: Optional logo file for MultiQC - pattern: "*.{png}" - -output: - - report: - type: file - description: MultiQC report file - pattern: "multiqc_report.html" - - data: - type: dir - description: MultiQC data dir - pattern: "multiqc_data" - - plots: - type: file - description: Plots created by MultiQC - pattern: "*_data" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@abhi18av" - - "@bunop" - - "@drpatelh" - - "@jfy133" diff --git a/modules/nf-core/samtools/dict/main.nf b/modules/nf-core/samtools/dict/main.nf new file mode 100644 index 00000000..fb019510 --- /dev/null +++ b/modules/nf-core/samtools/dict/main.nf @@ -0,0 +1,44 @@ +process SAMTOOLS_DICT { + tag "$fasta" + label 'process_single' + + conda (params.enable_conda ? "bioconda::samtools=1.16.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : + 'biocontainers/samtools:1.16.1--h6899075_1' }" + + input: + path fasta + + output: + path "*.dict" , emit: dict + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + dict \\ + $args \\ + $fasta \\ + > ${fasta}.dict + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${fasta}.dict + cat <<-END_VERSIONS > versions.yml + + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/dict/meta.yml b/modules/nf-core/samtools/dict/meta.yml new file mode 100644 index 00000000..e3eeccc8 --- /dev/null +++ b/modules/nf-core/samtools/dict/meta.yml @@ -0,0 +1,41 @@ +name: samtools_dict +description: Create a sequence dictionary file from a FASTA file +keywords: + - dict + - fasta +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: FASTA file + pattern: "*.{fa,fasta}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - dict: + type: file + description: FASTA dictionary file + pattern: "*.{dict}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@muffato" diff --git a/modules/nf-core/samtools/dict/samtools-dict.diff b/modules/nf-core/samtools/dict/samtools-dict.diff new file mode 100644 index 00000000..d80d13fb --- /dev/null +++ b/modules/nf-core/samtools/dict/samtools-dict.diff @@ -0,0 +1,20 @@ +Changes in module 'nf-core/samtools/dict' +--- modules/nf-core/samtools/dict/main.nf ++++ modules/nf-core/samtools/dict/main.nf +@@ -8,11 +8,11 @@ + 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + + input: +- tuple val(meta), path(fasta) ++ path fasta + + output: +- tuple val(meta), path ("*.dict"), emit: dict +- path "versions.yml" , emit: versions ++ path "*.dict" , emit: dict ++ path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + +************************************************************ diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf new file mode 100644 index 00000000..75bfdb96 --- /dev/null +++ b/modules/nf-core/samtools/faidx/main.nf @@ -0,0 +1,44 @@ +process SAMTOOLS_FAIDX { + tag "$fasta" + label 'process_single' + + conda (params.enable_conda ? "bioconda::samtools=1.16.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : + 'biocontainers/samtools:1.16.1--h6899075_1' }" + + input: + path fasta + + output: + path "*.fai" , emit: fai + path "*.gzi" , emit: gzi, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + faidx \\ + $args \\ + $fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${fasta}.fai + cat <<-END_VERSIONS > versions.yml + + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml new file mode 100644 index 00000000..fe2fe9a1 --- /dev/null +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -0,0 +1,47 @@ +name: samtools_faidx +description: Index FASTA file +keywords: + - index + - fasta +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: FASTA file + pattern: "*.{fa,fasta}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" + - gzi: + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@phue" diff --git a/modules/nf-core/samtools/faidx/samtools-faidx.diff b/modules/nf-core/samtools/faidx/samtools-faidx.diff new file mode 100644 index 00000000..aef567c6 --- /dev/null +++ b/modules/nf-core/samtools/faidx/samtools-faidx.diff @@ -0,0 +1,22 @@ +Changes in module 'nf-core/samtools/faidx' +--- modules/nf-core/samtools/faidx/main.nf ++++ modules/nf-core/samtools/faidx/main.nf +@@ -8,12 +8,12 @@ + 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + + input: +- tuple val(meta), path(fasta) ++ path fasta + + output: +- tuple val(meta), path ("*.fai"), emit: fai +- tuple val(meta), path ("*.gzi"), emit: gzi, optional: true +- path "versions.yml" , emit: versions ++ path "*.fai" , emit: fai ++ path "*.gzi" , emit: gzi, optional: true ++ path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + +************************************************************ diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf new file mode 100644 index 00000000..610178dd --- /dev/null +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -0,0 +1,42 @@ +process SAMTOOLS_FLAGSTAT { + tag "$meta.id" + label 'process_single' + + conda "bioconda::samtools=1.16.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : + 'biocontainers/samtools:1.16.1--h6899075_1' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.flagstat"), emit: flagstat + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.sample_id}" + """ + samtools \\ + flagstat \\ + --threads ${task.cpus} \\ + $bam \\ + > ${prefix}.flagstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.sample_id}" + """ + touch ${prefix}.flagstat + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/nf-core/samtools/flagstat/meta.yml b/modules/nf-core/samtools/flagstat/meta.yml new file mode 100644 index 00000000..95269063 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/meta.yml @@ -0,0 +1,49 @@ +name: samtools_flagstat +description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG type +keywords: + - stats + - mapping + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" diff --git a/modules/nf-core/samtools/flagstat/samtools-flagstat.diff b/modules/nf-core/samtools/flagstat/samtools-flagstat.diff new file mode 100644 index 00000000..462e26c9 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/samtools-flagstat.diff @@ -0,0 +1,26 @@ +Changes in module 'nf-core/samtools/flagstat' +--- modules/nf-core/samtools/flagstat/main.nf ++++ modules/nf-core/samtools/flagstat/main.nf +@@ -19,7 +19,7 @@ + + script: + def args = task.ext.args ?: '' +- def prefix = task.ext.prefix ?: "${meta.id}" ++ def prefix = task.ext.prefix ?: "${meta.sample_id}" + """ + samtools \\ + flagstat \\ +@@ -32,4 +32,11 @@ + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ ++ ++ stub: ++ def prefix = task.ext.prefix ?: "${meta.sample_id}" ++ """ ++ touch ${prefix}.flagstat ++ echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml ++ """ + } + +************************************************************ diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf new file mode 100644 index 00000000..18a3e1dc --- /dev/null +++ b/modules/nf-core/samtools/sort/main.nf @@ -0,0 +1,49 @@ +process SAMTOOLS_SORT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : + 'biocontainers/samtools:1.18--h50ea8bc_1' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path("*.csi"), emit: csi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.prefix}" + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + samtools sort \\ + $args \\ + -@ $task.cpus \\ + -o ${prefix}.bam \\ + -T $prefix \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.prefix}" + """ + touch ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml new file mode 100644 index 00000000..2200de72 --- /dev/null +++ b/modules/nf-core/samtools/sort/meta.yml @@ -0,0 +1,51 @@ +name: samtools_sort +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - csi: + type: file + description: BAM index file (optional) + pattern: "*.csi" +authors: + - "@drpatelh" + - "@ewels" +maintainers: + - "@drpatelh" + - "@ewels" diff --git a/modules/nf-core/star/genomegenerate/environment.yml b/modules/nf-core/star/genomegenerate/environment.yml new file mode 100644 index 00000000..9d03dd2e --- /dev/null +++ b/modules/nf-core/star/genomegenerate/environment.yml @@ -0,0 +1,7 @@ +name: star_genomegenerate +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::star=2.7.3a diff --git a/modules/nf-core/star/genomegenerate/main.nf b/modules/nf-core/star/genomegenerate/main.nf new file mode 100644 index 00000000..0b1964d9 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/main.nf @@ -0,0 +1,92 @@ +process STAR_GENOMEGENERATE { + tag "$fasta" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/star:2.7.3a--0' : + 'quay.io/biocontainers/star:2.7.3a--0' }" + + input: + path fasta + path gtf + + output: + path "star_index" , emit: index + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args_list = args.tokenize() + def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + def include_gtf = gtf ? "--sjdbGTFfile $gtf" : '' + + """ + mkdir -p star_index/ + + STAR \\ + --runMode genomeGenerate \\ + --genomeDir star_index/ \\ + --genomeFastaFiles $fasta \\ + $include_gtf \\ + --runThreadN $task.cpus \\ + $memory \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + END_VERSIONS + """ + + stub: + if (gtf) { + """ + mkdir -p star_index/ + touch star_index/Genome + touch star_index/Log.out + touch star_index/SA + touch star_index/SAindex + touch star_index/chrLength.txt + touch star_index/chrName.txt + touch star_index/chrNameLength.txt + touch star_index/chrStart.txt + touch star_index/exonGeTrInfo.tab + touch star_index/exonInfo.tab + touch star_index/geneInfo.tab + touch star_index/genomeParameters.txt + touch star_index/sjdbInfo.txt + touch star_index/sjdbList.fromGTF.out.tab + touch star_index/sjdbList.out.tab + touch star_index/transcriptInfo.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + } else { + """ + mkdir -p star_index/ + touch star_index/Genome + touch star_index/Log.out + touch star_index/SA + touch star_index/SAindex + touch star_index/chrLength.txt + touch star_index/chrName.txt + touch star_index/chrNameLength.txt + touch star_index/chrStart.txt + touch star_index/genomeParameters.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/star/genomegenerate/meta.yml b/modules/nf-core/star/genomegenerate/meta.yml new file mode 100644 index 00000000..1061e1b8 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/meta.yml @@ -0,0 +1,53 @@ +name: star_genomegenerate +description: Create index for STAR +keywords: + - index + - fasta + - genome + - reference +tools: + - star: + description: | + STAR is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: https://github.com/alexdobin/STAR + manual: https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf + doi: 10.1093/bioinformatics/bts635 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Fasta file of the reference genome + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - gtf: + type: file + description: GTF file of the reference genome +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - index: + type: directory + description: Folder containing the star index files + pattern: "star" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kevinmenden" + - "@drpatelh" +maintainers: + - "@kevinmenden" + - "@drpatelh" diff --git a/modules/nf-core/star/genomegenerate/star-genomegenerate.diff b/modules/nf-core/star/genomegenerate/star-genomegenerate.diff new file mode 100644 index 00000000..ae33e85b --- /dev/null +++ b/modules/nf-core/star/genomegenerate/star-genomegenerate.diff @@ -0,0 +1,185 @@ +Changes in module 'nf-core/star/genomegenerate' +--- modules/nf-core/star/genomegenerate/environment.yml ++++ modules/nf-core/star/genomegenerate/environment.yml +@@ -4,7 +4,4 @@ + - bioconda + - defaults + dependencies: +- - bioconda::samtools=1.18 +- - bioconda::htslib=1.18 +- - bioconda::star=2.7.10a +- - conda-forge::gawk=5.1.0 ++ - bioconda::star=2.7.3a + +--- modules/nf-core/star/genomegenerate/main.nf ++++ modules/nf-core/star/genomegenerate/main.nf +@@ -4,16 +4,16 @@ + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? +- 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' : +- 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' }" ++ 'https://depot.galaxyproject.org/singularity/star:2.7.3a--0' : ++ 'quay.io/biocontainers/star:2.7.3a--0' }" + + input: +- tuple val(meta), path(fasta) +- tuple val(meta2), path(gtf) ++ path fasta ++ path gtf + + output: +- tuple val(meta), path("star") , emit: index +- path "versions.yml" , emit: versions ++ path "star_index" , emit: index ++ path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when +@@ -23,96 +23,69 @@ + def args_list = args.tokenize() + def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + def include_gtf = gtf ? "--sjdbGTFfile $gtf" : '' +- if (args_list.contains('--genomeSAindexNbases')) { ++ ++ """ ++ mkdir -p star_index/ ++ ++ STAR \\ ++ --runMode genomeGenerate \\ ++ --genomeDir star_index/ \\ ++ --genomeFastaFiles $fasta \\ ++ $include_gtf \\ ++ --runThreadN $task.cpus \\ ++ $memory \\ ++ $args ++ ++ cat <<-END_VERSIONS > versions.yml ++ "${task.process}": ++ star: \$(STAR --version | sed -e "s/STAR_//g") ++ END_VERSIONS + """ +- mkdir star +- STAR \\ +- --runMode genomeGenerate \\ +- --genomeDir star/ \\ +- --genomeFastaFiles $fasta \\ +- $include_gtf \\ +- --runThreadN $task.cpus \\ +- $memory \\ +- $args ++ ++ stub: ++ if (gtf) { ++ """ ++ mkdir -p star_index/ ++ touch star_index/Genome ++ touch star_index/Log.out ++ touch star_index/SA ++ touch star_index/SAindex ++ touch star_index/chrLength.txt ++ touch star_index/chrName.txt ++ touch star_index/chrNameLength.txt ++ touch star_index/chrStart.txt ++ touch star_index/exonGeTrInfo.tab ++ touch star_index/exonInfo.tab ++ touch star_index/geneInfo.tab ++ touch star_index/genomeParameters.txt ++ touch star_index/sjdbInfo.txt ++ touch star_index/sjdbList.fromGTF.out.tab ++ touch star_index/sjdbList.out.tab ++ touch star_index/transcriptInfo.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') +- gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } else { + """ +- samtools faidx $fasta +- NUM_BASES=`gawk '{sum = sum + \$2}END{if ((log(sum)/log(2))/2 - 1 > 14) {printf "%.0f", 14} else {printf "%.0f", (log(sum)/log(2))/2 - 1}}' ${fasta}.fai` +- +- mkdir star +- STAR \\ +- --runMode genomeGenerate \\ +- --genomeDir star/ \\ +- --genomeFastaFiles $fasta \\ +- $include_gtf \\ +- --runThreadN $task.cpus \\ +- --genomeSAindexNbases \$NUM_BASES \\ +- $memory \\ +- $args ++ mkdir -p star_index/ ++ touch star_index/Genome ++ touch star_index/Log.out ++ touch star_index/SA ++ touch star_index/SAindex ++ touch star_index/chrLength.txt ++ touch star_index/chrName.txt ++ touch star_index/chrNameLength.txt ++ touch star_index/chrStart.txt ++ touch star_index/genomeParameters.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') +- gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') +- END_VERSIONS +- """ +- } +- +- stub: +- if (gtf) { +- """ +- mkdir star +- touch star/Genome +- touch star/Log.out +- touch star/SA +- touch star/SAindex +- touch star/chrLength.txt +- touch star/chrName.txt +- touch star/chrNameLength.txt +- touch star/chrStart.txt +- touch star/exonGeTrInfo.tab +- touch star/exonInfo.tab +- touch star/geneInfo.tab +- touch star/genomeParameters.txt +- touch star/sjdbInfo.txt +- touch star/sjdbList.fromGTF.out.tab +- touch star/sjdbList.out.tab +- touch star/transcriptInfo.tab +- +- cat <<-END_VERSIONS > versions.yml +- "${task.process}": +- star: \$(STAR --version | sed -e "s/STAR_//g") +- samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') +- gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') +- END_VERSIONS +- """ +- } else { +- """ +- mkdir star +- touch star/Genome +- touch star/Log.out +- touch star/SA +- touch star/SAindex +- touch star/chrLength.txt +- touch star/chrName.txt +- touch star/chrNameLength.txt +- touch star/chrStart.txt +- touch star/genomeParameters.txt +- +- cat <<-END_VERSIONS > versions.yml +- "${task.process}": +- star: \$(STAR --version | sed -e "s/STAR_//g") +- samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') +- gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } + +************************************************************ diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test b/modules/nf-core/star/genomegenerate/tests/main.nf.test new file mode 100644 index 00000000..c17c8ba4 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test @@ -0,0 +1,115 @@ +nextflow_process { + + name "Test Process STAR_GENOMEGENERATE" + script "../main.nf" + process "STAR_GENOMEGENERATE" + tag "modules" + tag "modules_nfcore" + tag "star" + tag "star/genomegenerate" + + test("fasta_gtf") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("fasta_gtf_index") }, + { assert snapshot(process.out.versions).match("fasta_gtf_versions") } + ) + } + } + + test("fasta_gtf_stub") { + + options '-stub' + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("fasta_gtf_stub_index") }, + { assert snapshot(process.out.versions).match("fasta_gtf_stub_versions") } + ) + } + } + + test("fasta") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ [], [] ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("fasta_index") }, + { assert snapshot(process.out.versions).match("fasta_versions") } + ) + } + + } + + test("fasta_stub") { + + options '-stub' + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ [], [] ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("fasta_stub_index") }, + { assert snapshot(process.out.versions).match("fasta_stub_versions") } + ) + } + + } + +} diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap new file mode 100644 index 00000000..5653d6e6 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap @@ -0,0 +1,90 @@ +{ + "fasta_gtf_versions": { + "content": [ + [ + "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:54:31.798555" + }, + "fasta_stub_versions": { + "content": [ + [ + "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:55:07.521209" + }, + "fasta_gtf_stub_index": { + "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, exonGeTrInfo.tab, exonInfo.tab, geneInfo.tab, genomeParameters.txt, sjdbInfo.txt, sjdbList.fromGTF.out.tab, sjdbList.out.tab, transcriptInfo.tab]" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:54:46.478098" + }, + "fasta_gtf_stub_versions": { + "content": [ + [ + "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:54:46.491657" + }, + "fasta_index": { + "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, genomeParameters.txt]" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:54:57.552329" + }, + "fasta_versions": { + "content": [ + [ + "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:54:57.560541" + }, + "fasta_gtf_index": { + "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, exonGeTrInfo.tab, exonInfo.tab, geneInfo.tab, genomeParameters.txt, sjdbInfo.txt, sjdbList.fromGTF.out.tab, sjdbList.out.tab, transcriptInfo.tab]" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:54:31.786814" + }, + "fasta_stub_index": { + "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, genomeParameters.txt]" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:55:07.517472" + } +} \ No newline at end of file diff --git a/modules/nf-core/star/genomegenerate/tests/tags.yml b/modules/nf-core/star/genomegenerate/tests/tags.yml new file mode 100644 index 00000000..79f619bf --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/tags.yml @@ -0,0 +1,2 @@ +star/genomegenerate: + - modules/nf-core/star/genomegenerate/** diff --git a/nextflow.config b/nextflow.config index 0eef462b..21529327 100644 --- a/nextflow.config +++ b/nextflow.config @@ -9,58 +9,119 @@ // Global default params, used in configs params { - // TODO nf-core: Specify your pipeline's command line flags // Input options - input = null + input = null + // Workflow mode + mode = null - // References - genome = null - igenomes_base = 's3://ngi-igenomes/igenomes' - igenomes_ignore = false - // MultiQC options - multiqc_config = null - multiqc_title = null - multiqc_logo = null - max_multiqc_email_size = '25.MB' - multiqc_methods_description = null + // Process configuration + processes_manual = false + processes_include = null + processes_exclude = null - // Boilerplate options - outdir = null - tracedir = "${params.outdir}/pipeline_info" - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - hook_url = null - help = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes' - enable_conda = false + // Reference genome information; iGenomes is effectively disabled but retained for linting + genome = null + force_genome = false + igenomes_base = 's3://ngi-igenomes/igenomes' + igenomes_ignore = true + hmf_genome_base = 'https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes' + + // Other reference data and config exposed to user on CLI + max_fastq_records = 10000000 + + isofox_counts = null + isofox_gc_ratios = null + isofox_gene_ids = null + isofox_tpm_norm = null + isofox_read_length = null + isofox_functions = 'TRANSCRIPT_COUNTS;ALT_SPLICE_JUNCTIONS;FUSIONS;RETAINED_INTRONS' + + gridss_config = null + + prepare_reference_only = false + create_stub_placeholders = false + // Boilerplate options + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + version = false // Config options + config_profile_name = null + config_profile_description = null custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_description = null config_profile_contact = null config_profile_url = null - config_profile_name = null - // Max resource options // Defaults only, expecting to be overwritten - max_memory = '128.GB' - max_cpus = 16 - max_time = '240.h' + max_memory = '128.GB' + max_cpus = 16 + max_time = '240.h' + + // Parameter lint ignore list + // NOTE(SW): entries here are generally have conditional defaults or are for internal use only + lint_ignore = [ + 'lint_ignore', + 'genome_type', + 'genome_version', + 'genomes', + 'hmf_genome_base', + 'hmf_data_paths', + 'panel', + 'panel_data_paths', + 'ref_data', + 'ref_data_genome_alt', + 'ref_data_genome_bwamem2_index', + 'ref_data_genome_dict', + 'ref_data_genome_fai', + 'ref_data_genome_fasta', + 'ref_data_genome_gridss_index', + 'ref_data_genome_gtf', + 'ref_data_genome_star_index', + 'ref_data_hla_slice_bed', + 'ref_data_hmf_data_path', + 'ref_data_panel_data_path', + 'ref_data_virusbreakenddb_path', + ] + + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = true + validationSchemaIgnoreParams = "igenomes_base,${lint_ignore.join(',')}" + validationShowHiddenParams = false + validate_params = true + +} +// Load igenomes.config if required +if (!params.igenomes_ignore) { + includeConfig 'conf/igenomes.config' +} else { + params.genomes = [:] } // Load base.config by default for all pipelines includeConfig 'conf/base.config' +// Load data configs +includeConfig 'conf/hmf_data.config' +includeConfig 'conf/hmf_genomes.config' +includeConfig 'conf/panel_data.config' + +// Load panel configuration if needed +if (params.containsKey('mode') && params.mode == 'targeted') { + includeConfig 'conf/targeted_parameters.config' +} + // Load nf-core custom profiles from different Institutions try { includeConfig "${params.custom_config_base}/nfcore_custom.config" @@ -69,88 +130,128 @@ try { } // Load nf-core/oncoanalyser custom profiles from different institutions. -// Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs! -// try { -// includeConfig "${params.custom_config_base}/pipeline/oncoanalyser.config" -// } catch (Exception e) { -// System.err.println("WARNING: Could not load nf-core/config/oncoanalyser profiles: ${params.custom_config_base}/pipeline/oncoanalyser.config") -// } - - +try { + includeConfig "${params.custom_config_base}/pipeline/oncoanalyser.config" +} catch (Exception e) { + System.err.println("WARNING: Could not load nf-core/config/oncoanalyser profiles: ${params.custom_config_base}/pipeline/oncoanalyser.config") +} profiles { - debug { process.beforeScript = 'echo $HOSTNAME' } + debug { + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + nextflow.enable.configProcessNamesValidation = true + } conda { - params.enable_conda = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + conda.channels = ['conda-forge', 'bioconda', 'defaults'] + apptainer.enabled = false } mamba { - params.enable_conda = true - conda.useMamba = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } docker { - docker.enabled = true - docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' + } + arm { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false + } + apptainer { + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + wave { + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' } gitpod { - executor.name = 'local' - executor.cpus = 16 - executor.memory = 60.GB + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } + test_stub { includeConfig 'conf/test_stub.config' } } +// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' -// Load igenomes.config if required -if (!params.igenomes_ignore) { - includeConfig 'conf/igenomes.config' -} else { - params.genomes = [:] +// Nextflow plugins +plugins { + id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet } - // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -165,32 +266,35 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } manifest { name = 'nf-core/oncoanalyser' - author = 'Stephen Watts' + author = """Stephen Watts""" homePage = 'https://github.com/nf-core/oncoanalyser' - description = 'A comprehensive cancer WGS/WTS analysis and reporting pipeline' + description = """A comprehensive cancer DNA/RNA analysis and reporting pipeline""" mainScript = 'main.nf' - nextflowVersion = '!>=21.10.3' - version = '1.0dev' + nextflowVersion = '!>=22.10.5' + version = '1.0.0' doi = '' } diff --git a/nextflow_schema.json b/nextflow_schema.json index d570597b..a64d8932 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -2,7 +2,7 @@ "$schema": "http://json-schema.org/draft-07/schema", "$id": "https://raw.githubusercontent.com/nf-core/oncoanalyser/master/nextflow_schema.json", "title": "nf-core/oncoanalyser pipeline parameters", - "description": "A comprehensive cancer WGS/WTS analysis and reporting pipeline", + "description": "A comprehensive cancer DNA/RNA analysis and reporting pipeline", "type": "object", "definitions": { "input_output_options": { @@ -15,9 +15,10 @@ "input": { "type": "string", "format": "file-path", + "exists": true, + "schema": "assets/schema_input.json", "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", - "schema": "assets/schema_input.json", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/oncoanalyser/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" @@ -27,7 +28,15 @@ "format": "directory-path", "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", "fa_icon": "fas fa-folder-open" - }, + } + } + }, + "other_options": { + "title": "Other options", + "type": "object", + "fa_icon": "fas fa-book", + "description": "Other options specific to this pipeline.", + "properties": { "email": { "type": "string", "description": "Email address for completion summary.", @@ -35,40 +44,116 @@ "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" }, - "multiqc_title": { + "mode": { + "type": "string", + "description": "Workflow run mode.", + "fa_icon": "fas fa-diagram-project", + "pattern": "^(wgts|targeted)" + }, + "panel": { + "type": "string", + "description": "Name of panel to use.", + "fa_icon": "fas fa-book" + }, + "force_genome": { + "type": "boolean", + "description": "Skip check for restricted genome.", + "default": false, + "fa_icon": "fas fa-palette" + }, + "processes_manual": { + "type": "boolean", + "description": "Run only processes manually provided in processes_include.", + "fa_icon": "fas fa-diagram-project" + }, + "processes_exclude": { "type": "string", - "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", - "fa_icon": "fas fa-file-signature" + "description": "Pipeline processes to exclude.", + "fa_icon": "fas fa-diagram-project" + }, + "processes_include": { + "type": "string", + "description": "Pipeline processes to include.", + "fa_icon": "fas fa-diagram-project" + }, + "prepare_reference_only": { + "type": "boolean", + "description": "Prepare and write reference output only.", + "default": false, + "fa_icon": "fas fa-diagram-project" + }, + "create_stub_placeholders": { + "type": "boolean", + "description": "Create placeholders for reference data during stub run.", + "default": false, + "fa_icon": "fas fa-diagram-project" + }, + "max_fastq_records": { + "type": "integer", + "description": "When positive, will use fastp to split fastq files so that each resultant fastq file has no more than max_fastq_records records. When nonpositive, fastp is not used and the provided fastq files are passed as-is to the aligner.", + "fa_icon": "fas fa-cog" + }, + "gridss_config": { + "type": "string", + "description": "Path to GRIDSS configuration file.", + "fa_icon": "fas fa-cog" + }, + "isofox_read_length": { + "type": "integer", + "description": "User defined RNA read length used for Isofox.", + "fa_icon": "fas fa-cog" + }, + "isofox_gc_ratios": { + "type": "string", + "description": "User defined Isofox expected GC ratios file.", + "fa_icon": "fas fa-cog" + }, + "isofox_counts": { + "type": "string", + "description": "User defined Isofox expected counts files (read length dependent).", + "fa_icon": "fas fa-cog" + }, + "isofox_tpm_norm": { + "type": "string", + "description": "User defined Isofox TPM normalisation file for panel data.", + "fa_icon": "fas fa-cog" + }, + "isofox_gene_ids": { + "type": "string", + "description": "User defined Isofox gene list file for panel data.", + "fa_icon": "fas fa-cog" + }, + "isofox_functions": { + "type": "string", + "description": "Semicolon-separated list of Isofox functions to run", + "default": "TRANSCRIPT_COUNTS;ALT_SPLICE_JUNCTIONS;FUSIONS;RETAINED_INTRONS", + "fa_icon": "fas fa-cog" } } }, - "reference_genome_options": { - "title": "Reference genome options", + "reference_data_options": { + "title": "Reference data options", "type": "object", "fa_icon": "fas fa-dna", - "description": "Reference genome related files and options required for the workflow.", + "description": "Reference data files and options required for the workflow.", "properties": { "genome": { "type": "string", - "description": "Name of iGenomes reference.", - "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." + "description": "Name of genome reference.", + "fa_icon": "fas fa-book" }, - "fasta": { + "genome_version": { "type": "string", - "format": "file-path", - "mimetype": "text/plain", - "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", - "description": "Path to FASTA genome file.", - "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", - "fa_icon": "far fa-file-code" + "description": "Version of reference genome.", + "fa_icon": "far fa-file-code", + "enum": ["37", "38", 37, 38], + "hidden": true }, - "igenomes_base": { + "genome_type": { "type": "string", - "format": "directory-path", - "description": "Directory / URL base for iGenomes references.", - "default": "s3://ngi-igenomes/igenomes", - "fa_icon": "fas fa-cloud-download-alt", + "description": "Type of reference genome.", + "fa_icon": "far fa-file-code", + "enum": ["alt", "no_alt"], "hidden": true }, "igenomes_ignore": { @@ -76,7 +161,28 @@ "description": "Do not load the iGenomes reference config.", "fa_icon": "fas fa-ban", "hidden": true, - "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe c lashes between custom parameters and those supplied in `igenomes.config`." + }, + "ref_data_hmf_data_path": { + "type": "string", + "description": "Path to HMF data.", + "fa_icon": "far fa-folder-open" + }, + "ref_data_panel_data_path": { + "type": "string", + "description": "Path to panel data.", + "fa_icon": "far fa-folder-open" + }, + "ref_data_virusbreakenddb_path": { + "type": "string", + "description": "Path to VIRUSBreakend database (directory or tarball).", + "fa_icon": "far fa-file-code" + }, + "ref_data_hla_slice_bed": { + "format": "file-path", + "pattern": "^\\S+\\.bed$", + "description": "Path to HLA slice BED file.", + "fa_icon": "far fa-file-code" } } }, @@ -157,7 +263,7 @@ "description": "Maximum amount of time that can be requested for any single job.", "default": "240.h", "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" } @@ -176,6 +282,12 @@ "fa_icon": "fas fa-question-circle", "hidden": true }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, "publish_dir_mode": { "type": "string", "default": "copy", @@ -199,14 +311,6 @@ "fa_icon": "fas fa-remove-format", "hidden": true }, - "max_multiqc_email_size": { - "type": "string", - "description": "File size limit when attaching MultiQC reports to summary emails.", - "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", - "default": "25.MB", - "fa_icon": "fas fa-file-upload", - "hidden": true - }, "monochrome_logs": { "type": "boolean", "description": "Do not use coloured log outputs.", @@ -217,31 +321,7 @@ "type": "string", "description": "Incoming hook URL for messaging service", "fa_icon": "fas fa-people-group", - "help_text": "Incoming hook URL for messaging service. Currently, only MS Teams is supported.", - "hidden": true - }, - "multiqc_config": { - "type": "string", - "description": "Custom config file to supply to MultiQC.", - "fa_icon": "fas fa-cog", - "hidden": true - }, - "multiqc_logo": { - "type": "string", - "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", - "fa_icon": "fas fa-image", - "hidden": true - }, - "multiqc_methods_description": { - "type": "string", - "description": "Custom MultiQC yaml file containing HTML including a methods description.", - "fa_icon": "fas fa-cog" - }, - "tracedir": { - "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", - "fa_icon": "fas fa-cogs", + "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", "hidden": true }, "validate_params": { @@ -251,18 +331,26 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "show_hidden_params": { + "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." }, - "enable_conda": { + "validationFailUnrecognisedParams": { "type": "boolean", - "description": "Run this workflow with Conda. You can also use '-profile conda' instead of providing this parameter.", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", "hidden": true, - "fa_icon": "fas fa-bacon" + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + }, + "validationLenientMode": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "hidden": true, + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } } } @@ -272,7 +360,10 @@ "$ref": "#/definitions/input_output_options" }, { - "$ref": "#/definitions/reference_genome_options" + "$ref": "#/definitions/other_options" + }, + { + "$ref": "#/definitions/reference_data_options" }, { "$ref": "#/definitions/institutional_config_options" diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 0d62beb6..00000000 --- a/pyproject.toml +++ /dev/null @@ -1,10 +0,0 @@ -# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. -# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. -[tool.black] -line-length = 120 -target_version = ["py37", "py38", "py39", "py310"] - -[tool.isort] -profile = "black" -known_first_party = ["nf_core"] -multi_line_output = 3 diff --git a/subworkflows/local/amber_profiling/main.nf b/subworkflows/local/amber_profiling/main.nf new file mode 100644 index 00000000..179888bf --- /dev/null +++ b/subworkflows/local/amber_profiling/main.nf @@ -0,0 +1,90 @@ +// +// AMBER determines b-allele frequencies at predetermined positions +// + +import Constants +import Utils + +include { AMBER } from '../../../modules/local/amber/main' + +workflow AMBER_PROFILING { + take: + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_tumor_bam // channel: [mandatory] [ meta, bam, bai ] + ch_normal_bam // channel: [mandatory] [ meta, bam, bai ] + + // Reference data + genome_version // channel: [mandatory] genome version + heterozygous_sites // channel: [optional] /path/to/heterozygous_sites + target_region_bed // channel: [optional] /path/to/target_region_bed + + main: + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Select input sources and sort + // channel: runnable: [ meta, tumor_bam, tumor_bai, normal_bam, normal_bai] + // channel: skip: [ meta ] + ch_inputs_sorted = WorkflowOncoanalyser.groupByMeta( + ch_tumor_bam, + ch_normal_bam, + ) + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + return [ + meta, + Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_TUMOR), + tumor_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_TUMOR), + Utils.selectCurrentOrExisting(normal_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_NORMAL), + normal_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_NORMAL), + ] + } + .branch { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.AMBER_DIR) + runnable: tumor_bam && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_amber, tumor_bam, normal_bam, tumor_bai, normal_bai ] + ch_amber_inputs = ch_inputs_sorted.runnable + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + + def meta_amber = [ + key: meta.group_id, + id: meta.group_id, + tumor_id: Utils.getTumorDnaSampleName(meta), + ] + + if (normal_bam) { + meta_amber.normal_id = Utils.getNormalDnaSampleName(meta) + } + + [meta_amber, tumor_bam, normal_bam, tumor_bai, normal_bai] + } + + // Run process + AMBER( + ch_amber_inputs, + genome_version, + heterozygous_sites, + target_region_bed, + ) + + ch_versions = ch_versions.mix(AMBER.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, amber_dir ] + ch_outputs = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(AMBER.out.amber_dir, ch_inputs), + ch_inputs_sorted.skip.map { meta -> [meta, []] }, + ) + + emit: + amber_dir = ch_outputs // channel: [ meta, amber_dir ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/bamtools_metrics/main.nf b/subworkflows/local/bamtools_metrics/main.nf new file mode 100644 index 00000000..6e16c412 --- /dev/null +++ b/subworkflows/local/bamtools_metrics/main.nf @@ -0,0 +1,117 @@ +// +// Bam Tools calculates summary statistics for BAMs +// + +import Constants +import Utils + +include { BAMTOOLS } from '../../../modules/local/bamtools/main' + +workflow BAMTOOLS_METRICS { + take: + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_tumor_bam // channel: [mandatory] [ meta, bam, bai ] + ch_normal_bam // channel: [mandatory] [ meta, bam, bai ] + + // Reference data + genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_version // channel: [mandatory] genome version + + main: + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Sort inputs, separate by tumor and normal + // channel: runnable: [ meta, bam, bai ] + // channel: skip: [ meta ] + ch_inputs_tumor_sorted = ch_tumor_bam + .map { meta, bam, bai -> + return [ + meta, + Utils.selectCurrentOrExisting(bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_TUMOR), + bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_TUMOR), + ] + } + .branch { meta, bam, bai -> + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.BAMTOOLS_TUMOR) + runnable: bam && !has_existing + skip: true + return meta + } + + // channel: runnable: [ meta, bam, bai ] + // channel: skip: [ meta ] + ch_inputs_normal_sorted = ch_normal_bam + .map { meta, bam, bai -> + return [ + meta, + Utils.selectCurrentOrExisting(bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_NORMAL), + bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_NORMAL), + ] + } + .branch { meta, bam, bai -> + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.BAMTOOLS_NORMAL) + runnable: bam && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_bamtools, bam, bai ] + ch_bamtools_inputs = Channel.empty() + .mix( + ch_inputs_tumor_sorted.runnable.map { meta, bam, bai -> [meta, Utils.getTumorDnaSample(meta), 'tumor', bam, bai] }, + ch_inputs_normal_sorted.runnable.map { meta, bam, bai -> [meta, Utils.getNormalDnaSample(meta), 'normal', bam, bai] }, + ) + .map { meta, meta_sample, sample_type, bam, bai -> + + def meta_bamtools = [ + key: meta.group_id, + id: "${meta.group_id}_${meta_sample.sample_id}", + sample_id: meta_sample.sample_id, + sample_type: sample_type, + ] + + return [meta_bamtools, bam, bai] + } + + // Run process + BAMTOOLS( + ch_bamtools_inputs, + genome_fasta, + genome_version, + ) + + ch_versions = ch_versions.mix(BAMTOOLS.out.versions) + + // Sort into a tumor and normal channel + ch_bamtools_out = BAMTOOLS.out.metrics + .branch { meta_bamtools, metrics -> + assert ['tumor', 'normal'].contains(meta_bamtools.sample_type) + tumor: meta_bamtools.sample_type == 'tumor' + normal: meta_bamtools.sample_type == 'normal' + placeholder: true + } + + // Set outputs, restoring original meta + // channel: [ meta, metrics ] + ch_somatic_metrics = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(ch_bamtools_out.tumor, ch_inputs), + ch_inputs_tumor_sorted.skip.map { meta -> [meta, []] }, + ) + + ch_germline_metrics = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(ch_bamtools_out.normal, ch_inputs), + ch_inputs_normal_sorted.skip.map { meta -> [meta, []] }, + ) + + emit: + somatic = ch_somatic_metrics // channel: [ meta, metrics ] + germline = ch_germline_metrics // channel: [ meta, metrics ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/chord_prediction/main.nf b/subworkflows/local/chord_prediction/main.nf new file mode 100644 index 00000000..a26d068f --- /dev/null +++ b/subworkflows/local/chord_prediction/main.nf @@ -0,0 +1,95 @@ +// +// CHORD predicts HR status for tumor samples +// + +import Constants +import Utils + +include { CHORD } from '../../../modules/local/chord/main' + +workflow CHORD_PREDICTION { + take: + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_purple // channel: [mandatory] [ meta, purple_dir ] + + // Reference data + genome_version // channel: [mandatory] genome version + + main: + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Select input sources + // channel: [ meta, purple_dir ] + ch_inputs_selected = ch_purple + .map { meta, purple_dir -> + return [meta, Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR)] + } + + // Sort inputs + // channel: runnable: [ meta, purple_dir ] + // channel: skip: [ meta ] + ch_inputs_sorted = ch_inputs_selected + .branch { meta, purple_dir -> + + def has_dna = Utils.hasTumorDna(meta) + + def tumor_id + def has_smlv_vcf + def has_sv_vcf + + if (has_dna) { + tumor_id = Utils.getTumorDnaSampleName(meta) + has_smlv_vcf = purple_dir ? file(purple_dir).resolve("${tumor_id}.purple.somatic.vcf.gz") : [] + has_sv_vcf = purple_dir ? file(purple_dir).resolve("${tumor_id}.purple.sv.vcf.gz") : [] + } + + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.CHORD_DIR) + + runnable: has_dna && purple_dir && has_smlv_vcf && has_sv_vcf && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_chord, smlv_vcf, sv_vcf ] + ch_chord_inputs = ch_inputs_sorted.runnable + .map { meta, purple_dir -> + + def tumor_id = Utils.getTumorDnaSampleName(meta) + + def meta_chord = [ + key: meta.group_id, + id: meta.group_id, + sample_id: tumor_id, + ] + + def smlv_vcf = file(purple_dir).resolve("${tumor_id}.purple.somatic.vcf.gz") + def sv_vcf = file(purple_dir).resolve("${tumor_id}.purple.sv.vcf.gz") + + return [meta_chord, smlv_vcf, sv_vcf] + } + + // Run process + CHORD( + ch_chord_inputs, + genome_version, + ) + + ch_versions = ch_versions.mix(CHORD.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, chord_dir ] + ch_outputs = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(CHORD.out.chord_dir, ch_inputs), + ch_inputs_sorted.skip.map { meta -> [meta, []] }, + ) + + emit: + chord_dir = ch_outputs // channel: [ meta, chord_dir ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/cobalt_profiling/main.nf b/subworkflows/local/cobalt_profiling/main.nf new file mode 100644 index 00000000..10b06485 --- /dev/null +++ b/subworkflows/local/cobalt_profiling/main.nf @@ -0,0 +1,103 @@ +// +// COBALT calculates read ratios between tumor and normal samples +// + +import Constants +import Utils + +include { COBALT } from '../../../modules/local/cobalt/main' + +workflow COBALT_PROFILING { + take: + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_tumor_bam // channel: [mandatory] [ meta, bam, bai ] + ch_normal_bam // channel: [mandatory] [ meta, bam, bai ] + + // Reference data + gc_profile // channel: [mandatory] /path/to/gc_profile + diploid_bed // channel: [optional] /path/to/diploid_bed + target_region_normalisation // channel: [optional] /path/to/target_region_normalisation + + main: + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Select input sources and sort + // NOTE(SW): germline mode is not currently supported + // channel: runnable: [ meta, tumor_bam, tumor_bai, normal_bam, normal_bai] + // channel: skip: [ meta ] + ch_inputs_sorted = WorkflowOncoanalyser.groupByMeta( + ch_tumor_bam, + ch_normal_bam, + ) + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + return [ + meta, + Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_TUMOR), + tumor_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_TUMOR), + Utils.selectCurrentOrExisting(normal_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_NORMAL), + normal_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_NORMAL), + ] + } + .branch { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.COBALT_DIR) + runnable_tn: tumor_bam && normal_bam && !has_existing + runnable_to: tumor_bam && !has_existing + skip: true + return meta + } + + // First set diploid BED input for tumor/normal and tumor only samples + // NOTE(SW): since the diploid BED is provided as a channel, I seem to be only able to include via channel ops + // channel: [ meta, tumor_bam, tumor_bai, normal_bam, normal_bai, diploid_bed ] + ch_inputs_runnable = Channel.empty() + .mix( + ch_inputs_sorted.runnable_tn.map { [*it, []] }, + ch_inputs_sorted.runnable_to.combine(diploid_bed), + ) + + // Create process input channel + // channel: sample_data: [ meta_cobalt, tumor_bam, normal_bam, tumor_bai, normal_bai ] + // channel: diploid_bed: [ diploid_bed ] + ch_cobalt_inputs = ch_inputs_runnable + .multiMap { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, diploid_bed -> + + def meta_cobalt = [ + key: meta.group_id, + id: meta.group_id, + tumor_id: Utils.getTumorDnaSampleName(meta), + ] + + if (normal_bam) { + meta_cobalt.normal_id = Utils.getNormalDnaSampleName(meta) + } + + sample_data: [meta_cobalt, tumor_bam, normal_bam, tumor_bai, normal_bai] + diploid_bed: diploid_bed + } + + // Run process + COBALT( + ch_cobalt_inputs.sample_data, + gc_profile, + ch_cobalt_inputs.diploid_bed, + target_region_normalisation, + ) + + ch_versions = ch_versions.mix(COBALT.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, cobalt_dir ] + ch_outputs = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(COBALT.out.cobalt_dir, ch_inputs), + ch_inputs_sorted.skip.map { meta -> [meta, []] }, + ) + + emit: + cobalt_dir = ch_outputs // channel: [ meta, cobalt_dir ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/cuppa_prediction/main.nf b/subworkflows/local/cuppa_prediction/main.nf new file mode 100644 index 00000000..5c839073 --- /dev/null +++ b/subworkflows/local/cuppa_prediction/main.nf @@ -0,0 +1,149 @@ +// +// CUPPA predicts tissue of origin from molecular profiles +// + +import Constants +import Utils + +include { CUPPA } from '../../../modules/local/cuppa/main' + +workflow CUPPA_PREDICTION { + take: + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_isofox // channel: [mandatory] [ meta, isofox_dir ] + ch_purple // channel: [mandatory] [ meta, purple_dir ] + ch_linx // channel: [mandatory] [ meta, linx_annotation_dir ] + ch_virusinterpreter // channel: [mandatory] [ meta, virusinterpreter_dir ] + + // Reference data + genome_version // channel: [mandatory] genome version + cuppa_resources // channel: [mandatory] /path/to/cuppa_resources/ + + main: + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Select input sources + // channel: [ meta, isofox_dir, purple_dir, linx_annotation_dir, virusinterpreter_dir ] + ch_inputs_selected = WorkflowOncoanalyser.groupByMeta( + ch_isofox, + ch_purple, + ch_linx, + ch_virusinterpreter, + ) + .map { meta, isofox_dir, purple_dir, linx_annotation_dir, virusinterpreter_dir -> + + def inputs = [ + Utils.selectCurrentOrExisting(isofox_dir, meta, Constants.INPUT.ISOFOX_DIR), + Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR), + Utils.selectCurrentOrExisting(linx_annotation_dir, meta, Constants.INPUT.LINX_ANNO_DIR_TUMOR), + Utils.selectCurrentOrExisting(virusinterpreter_dir, meta, Constants.INPUT.VIRUSINTERPRETER_DIR), + ] + + return [meta, *inputs] + } + + // Sort inputs + // channel: runnable: [ meta, isofox_dir, purple_dir, linx_annotation_dir, virusinterpreter_dir ] + // channel: skip: [ meta ] + ch_inputs_sorted = ch_inputs_selected + .branch { meta, isofox_dir, purple_dir, linx_annotation_dir, virusinterpreter_dir -> + + // Run the following: + // - tumor DNA and normal DNA + // - tumor DNA and normal DNA, and tumor RNA + // - tumor RNA only + // + // Do not run the following: + // - tumor DNA only + // - panel mode (controlled by excluded from targeted subworkflow) + // + // (run exclusions currently done basis for presence of normal DNA) + + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.CUPPA_DIR) + def has_normal_dna = Utils.hasNormalDna(meta) + + def has_runnable_inputs = isofox_dir || (purple_dir && linx_annotation_dir && has_normal_dna) + + runnable: has_runnable_inputs && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: sample_data: [ meta, isofox_dir, purple_dir, linx_annotation_dir, virusinterpreter_dir ] + // channel: classifer: [ classifier ] + ch_cuppa_inputs = ch_inputs_sorted.runnable + .multiMap{ meta, isofox_dir, purple_dir, linx_annotation_dir, virusinterpreter_dir -> + + def meta_cuppa = [ + key: meta.group_id, + id: meta.group_id, + ] + + def has_tumor_dna = Utils.hasTumorDna(meta) + def has_normal_dna = Utils.hasNormalDna(meta) + def has_tumor_rna = Utils.hasTumorRna(meta) + + def has_dna_inputs = (purple_dir && linx_annotation_dir) + def has_rna_inputs = isofox_dir + + def run_dna = has_dna_inputs && has_tumor_dna && has_normal_dna + def run_rna = has_rna_inputs && has_tumor_rna + + def classifier + + if (run_dna && run_rna) { + + classifier = 'ALL' + + meta_cuppa.sample_id = Utils.getTumorDnaSampleName(meta) + meta_cuppa.sample_rna_id = Utils.getTumorRnaSampleName(meta) + + } else if (run_dna) { + + classifier = 'DNA' + + meta_cuppa.sample_id = Utils.getTumorDnaSampleName(meta) + + } else if (run_rna) { + + classifier = 'RNA' + + meta_cuppa.sample_id = Utils.getTumorRnaSampleName(meta) + + } else { + + assert false + + } + + sample_data: [meta_cuppa, isofox_dir, purple_dir, linx_annotation_dir, virusinterpreter_dir] + classifier: classifier + } + + // Run process + CUPPA( + ch_cuppa_inputs.sample_data, + genome_version, + cuppa_resources, + ch_cuppa_inputs.classifier, + ) + + ch_versions = ch_versions.mix(CUPPA.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, cuppa_dir ] + ch_outputs = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(CUPPA.out.cuppa_dir, ch_inputs), + ch_inputs_sorted.skip.map { meta -> [meta, []] }, + ) + + emit: + cuppa_dir = ch_outputs // channel: [ meta, cuppa_dir ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/flagstat_metrics/main.nf b/subworkflows/local/flagstat_metrics/main.nf new file mode 100644 index 00000000..2ad4798a --- /dev/null +++ b/subworkflows/local/flagstat_metrics/main.nf @@ -0,0 +1,111 @@ +// +// SAMtools flagstat generates statistics for read alignments from the SAM FLAG field +// + +import Constants +import Utils + +include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/samtools/flagstat/main' + +workflow FLAGSTAT_METRICS { + take: + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_tumor_bam // channel: [mandatory] [ meta, bam, bai ] + ch_normal_bam // channel: [mandatory] [ meta, bam, bai ] + + main: + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Sort inputs, separate by tumor and normal + // channel: runnable: [ meta, bam, bai ] + // channel: skip: [ meta ] + ch_inputs_tumor_sorted = ch_tumor_bam + .map { meta, bam, bai -> + return [ + meta, + Utils.selectCurrentOrExisting(bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_TUMOR), + bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_TUMOR), + ] + } + .branch { meta, bam, bai -> + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.FLAGSTAT_TUMOR) + runnable: bam && !has_existing + skip: true + return meta + } + + // channel: runnable: [ meta, bam, bai ] + // channel: skip: [ meta ] + ch_inputs_normal_sorted = ch_normal_bam + .map { meta, bam, bai -> + return [ + meta, + Utils.selectCurrentOrExisting(bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_NORMAL), + bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_NORMAL), + ] + } + .branch { meta, bam, bai -> + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.FLAGSTAT_NORMAL) + runnable: bam && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_flagstat, bam, bai ] + ch_flagstat_inputs = Channel.empty() + .mix( + ch_inputs_tumor_sorted.runnable.map { meta, bam, bai -> [meta, Utils.getTumorDnaSample(meta), 'tumor', bam, bai] }, + ch_inputs_normal_sorted.runnable.map { meta, bam, bai -> [meta, Utils.getNormalDnaSample(meta), 'normal', bam, bai] }, + ) + .map { meta, meta_sample, sample_type, bam, bai -> + + def meta_flagstat = [ + key: meta.group_id, + id: "${meta.group_id}_${meta_sample.sample_id}", + sample_id: meta_sample.sample_id, + sample_type: sample_type, + ] + + return [meta_flagstat, bam, bai] + } + + // Run process + SAMTOOLS_FLAGSTAT( + ch_flagstat_inputs, + ) + + ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) + + // Sort into a tumor and normal channel + ch_flagstat_out = SAMTOOLS_FLAGSTAT.out.flagstat + .branch { meta_flagstat, flagstat -> + assert ['tumor', 'normal'].contains(meta_flagstat.sample_type) + tumor: meta_flagstat.sample_type == 'tumor' + normal: meta_flagstat.sample_type == 'normal' + placeholder: true + } + + // Set outputs, restoring original meta + // channel: [ meta, flagstat ] + ch_somatic_flagstat = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(ch_flagstat_out.tumor, ch_inputs), + ch_inputs_tumor_sorted.skip.map { meta -> [meta, []] }, + ) + + ch_germline_flagstat = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(ch_flagstat_out.normal, ch_inputs), + ch_inputs_normal_sorted.skip.map { meta -> [meta, []] }, + ) + + emit: + somatic = ch_somatic_flagstat // channel: [ meta, flagstat ] + germline = ch_germline_flagstat // channel: [ meta, flagstat ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/gridss_svprep_calling/main.nf b/subworkflows/local/gridss_svprep_calling/main.nf new file mode 100644 index 00000000..4811f741 --- /dev/null +++ b/subworkflows/local/gridss_svprep_calling/main.nf @@ -0,0 +1,383 @@ +// +// SV Prep selects only reads relevant to SV events run prior to execution of GRIDSS. +// GRIDSS detects structural variants, and reports breakends and breakpoints. +// + +import Constants +import Utils + +include { GRIDSS_ASSEMBLE as ASSEMBLE } from '../../../modules/local/svprep/assemble/main' +include { GRIDSS_CALL as CALL } from '../../../modules/local/svprep/call/main' +include { SVPREP_DEPTH_ANNOTATOR as DEPTH_ANNOTATOR } from '../../../modules/local/svprep/depth_annotator/main' +include { GRIDSS_PREPROCESS as PREPROCESS } from '../../../modules/local/svprep/preprocess/main' +include { SVPREP as SVPREP_NORMAL } from '../../../modules/local/svprep/svprep/main' +include { SVPREP as SVPREP_TUMOR } from '../../../modules/local/svprep/svprep/main' + +workflow GRIDSS_SVPREP_CALLING { + take: + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_tumor_bam // channel: [mandatory] [ meta, bam, bai ] + ch_normal_bam // channel: [mandatory] [ meta, bam, bai ] + + // Reference data + genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_version // channel: [mandatory] genome version + genome_fai // channel: [mandatory] /path/to/genome_fai + genome_dict // channel: [mandatory] /path/to/genome_dict + genome_gridss_index // channel: [mandatory] /path/to/genome_gridss_index + gridss_blocklist // channel: [mandatory] /path/to/gridss_blocklist + sv_prep_blocklist // channel: [mandatory] /path/to/sv_prep_blocklist + known_fusions // channel: [mandatory] /path/to/known_fusions + + // Params + gridss_config // channel: [optional] /path/to/gridss_config + + main: + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Select input sources and sort + // channel: runnable_tn: [ meta, tumor_bam, tumor_bai, normal_bam, normal_bai ] + // channel: runnable_to: [ meta, tumor_bam, tumor_bai ] + // channel: skip: [ meta ] + ch_inputs_sorted = WorkflowOncoanalyser.groupByMeta( + ch_tumor_bam, + ch_normal_bam, + ) + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + return [ + meta, + Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_TUMOR), + tumor_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_TUMOR), + Utils.selectCurrentOrExisting(normal_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_NORMAL), + normal_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_NORMAL), + ] + } + .branch { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.GRIDSS_VCF) + + runnable_tn: tumor_bam && normal_bam && !has_existing + runnable_to: tumor_bam && !has_existing + return [meta, tumor_bam, tumor_bai] + skip: true + return meta + } + + // + // MODULE: SV Prep (tumor) + // + // Create process input channel + // channel: [ meta_svprep, bam_tumor, bai_tumor, [] ] + ch_svprep_tumor_inputs = Channel.empty() + .mix( + ch_inputs_sorted.runnable_to.map { [*it, [], []] }, + ch_inputs_sorted.runnable_tn, + ) + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + + def meta_svprep = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorDnaSampleName(meta), + sample_type: 'tumor', + // NOTE(SW): slightly redundant since we have this information then lose it with .mix above + group_size: normal_bam ? 2 : 1 + ] + + return [meta_svprep, tumor_bam, tumor_bai, []] + + } + + // Run process + SVPREP_TUMOR( + ch_svprep_tumor_inputs, + genome_fasta, + genome_version, + sv_prep_blocklist, + known_fusions, + 'JUNCTIONS;BAM;FRAGMENT_LENGTH_DIST', // -write_types argument + ) + + ch_versions = ch_versions.mix(SVPREP_TUMOR.out.versions) + + // channel: [ meta_gridss, bam_tumor, bam_tumor_filtered ] + ch_preprocess_inputs_tumor = WorkflowOncoanalyser.groupByMeta( + SVPREP_TUMOR.out.bam, + ch_svprep_tumor_inputs, + ) + .map { meta_svprep, bam_filtered, bam, bai, jnc_optional -> + return [meta_svprep, bam, bam_filtered] + } + + // + // MODULE: SV Prep (normal) + // + // Create process input channel + // channel: [ meta_svprep, bam_normal, bai_normal, junctions_tumor ] + ch_svprep_normal_inputs = WorkflowOncoanalyser.groupByMeta( + ch_inputs_sorted.runnable_tn, + // NOTE(SW): this implicitly selects only entries present in ch_inputs_sorted.runnable_tn + WorkflowOncoanalyser.restoreMeta(SVPREP_TUMOR.out.junctions, ch_inputs_sorted.runnable_tn.map { it[0] }) + ) + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, junctions_tumor -> + + def meta_svprep = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getNormalDnaSampleName(meta), + sample_type: 'normal', + group_size: 2, // Assumption holds since germline only is not supported and we source from runnable_tn + ] + + return [meta_svprep, normal_bam, normal_bai, junctions_tumor] + + } + + // Run process + SVPREP_NORMAL( + ch_svprep_normal_inputs, + genome_fasta, + genome_version, + sv_prep_blocklist, + known_fusions, + 'JUNCTIONS;BAM;FRAGMENT_LENGTH_DIST', // -write_types argument + ) + + ch_versions = ch_versions.mix(SVPREP_NORMAL.out.versions) + + // channel: [ meta_gridss, bam_normal, bam_normal_filtered ] + ch_preprocess_inputs_normal = WorkflowOncoanalyser.groupByMeta( + SVPREP_NORMAL.out.bam, + ch_svprep_normal_inputs, + ) + // Switching meta name here from meta_svprep + .map { meta_gridss, bam_filtered, bam, bai, junctions -> + return [meta_gridss, bam, bam_filtered] + } + + // + // MODULE: GRIDSS preprocess + // + // Create process input channel + // channel: [ meta_gridss, bam, bam_filtered ] + ch_preprocess_inputs = Channel.empty() + .mix( + ch_preprocess_inputs_tumor, + ch_preprocess_inputs_normal, + ) + .map { meta_svprep, bam, bam_filtered -> + + def meta_gridss = [ + key: meta_svprep.key, + id: "${meta_svprep.id}__${meta_svprep.sample_id}", + sample_id: meta_svprep.sample_id, + sample_type: meta_svprep.sample_type, + group_size: meta_svprep.group_size, + ] + + return [meta_gridss, bam, bam_filtered] + } + + // Run process + PREPROCESS( + ch_preprocess_inputs, + genome_fasta, + genome_fai, + genome_dict, + genome_gridss_index, + gridss_config, + ) + + ch_versions = ch_versions.mix(PREPROCESS.out.versions) + + // Gather BAMs and outputs from preprocessing for each tumor/normal and tumor only set + // channel: [key, [[meta_gridss, bam, bam_filtered, preprocess_dir], ...] ] + ch_bams_and_preprocess = WorkflowOncoanalyser.groupByMeta( + ch_preprocess_inputs, + PREPROCESS.out.preprocess_dir, + ) + .map { + def meta_gridss = it[0] + def other = it[1..-1] + [groupKey(meta_gridss.key, meta_gridss.group_size), [meta_gridss, *other]] + } + .groupTuple() + + // + // MODULE: GRIDSS assemble + // + // Create process input channel + // channel: tumor/normal: [ meta_gridss, [bams], [bams_filtered], [preprocess_dirs], [labels] ] + // channel: tumor only: [ meta_gridss, bam, bam_filtered, preprocess_dir, label ] + ch_assemble_inputs = ch_bams_and_preprocess + .map { key, entries -> + + assert entries.size() == 1 || entries.size() == 2 + + def tumor_entry = entries.find { e -> e[0].sample_type == 'tumor' } + def normal_entry = entries.find { e -> e[0].sample_type == 'normal' } + + assert tumor_entry !== null + + def (tmeta, tbam, tbam_filtered, tpreprocess) = tumor_entry + def meta_gridss = [ + // Effectively meta.group_id, and both are required. Reminder: + // * key: channel element grouping + // * id: task tag + key: tmeta.key, + id: tmeta.key, + ] + + def data = [] + + if (normal_entry === null) { + + data = [ + meta_gridss, + tbam, + tbam_filtered, + tpreprocess, + tmeta.sample_id, + ] + + } else { + + def (nmeta, nbam, nbam_filtered, npreprocess) = normal_entry + data = [ + meta_gridss, + [nbam, tbam], + [nbam_filtered, tbam_filtered], + [npreprocess, tpreprocess], + [nmeta.sample_id, tmeta.sample_id], + ] + + } + + return data + } + + // Run process + ASSEMBLE( + ch_assemble_inputs, + genome_fasta, + genome_fai, + genome_dict, + genome_gridss_index, + gridss_blocklist, + gridss_config, + ) + + ch_versions = ch_versions.mix(ASSEMBLE.out.versions) + + // + // MODULE: GRIDSS call + // + // Create process input channel + // channel: [ meta_gridss, [bams], [bams_filtered], assemble_dir, [labels] ] + ch_call_inputs = WorkflowOncoanalyser.groupByMeta( + ch_assemble_inputs, + ASSEMBLE.out.assemble_dir, + flatten: false, + ) + .map { data -> + def meta_gridss = data[0] + def (bams, bams_filtered, preprocess_dirs, labels) = data[1] + def (assemble_dir) = data[2] + return [meta_gridss, bams, bams_filtered, assemble_dir, labels] + } + + // Run process + CALL( + ch_call_inputs, + genome_fasta, + genome_fai, + genome_dict, + genome_gridss_index, + gridss_blocklist, + gridss_config, + ) + + ch_versions = ch_versions.mix(CALL.out.versions) + + // + // MODULE: SV Prep depth annotation + // + // Restore original meta, create process input channel + // channel: [ meta, [bams], [bais], vcf, [labels] ] + ch_depth_inputs_tn = WorkflowOncoanalyser.groupByMeta( + ch_inputs_sorted.runnable_tn, + // NOTE(SW): this implicitly selects only entries present in ch_inputs_sorted.runnable_tn + WorkflowOncoanalyser.restoreMeta(CALL.out.vcf, ch_inputs_sorted.runnable_tn.map { it[0] }) + ) + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, vcf -> + return [ + meta, + [normal_bam, tumor_bam], + [normal_bai, tumor_bai], + vcf, + [Utils.getNormalDnaSampleName(meta), Utils.getTumorDnaSampleName(meta)], + ] + } + + // channel: [ meta, bam, bai, vcf, label ] + ch_depth_inputs_to = WorkflowOncoanalyser.groupByMeta( + ch_inputs_sorted.runnable_to, + // NOTE(SW): this implicitly selects only entries present in ch_inputs_sorted.runnable_to + WorkflowOncoanalyser.restoreMeta(CALL.out.vcf, ch_inputs_sorted.runnable_to.map { it[0] }) + ) + .map { meta, tumor_bam, tumor_bai, vcf -> + return [ + meta, + tumor_bam, + tumor_bai, + vcf, + Utils.getTumorDnaSampleName(meta), + ] + } + + // channel: runnable_tn: [ meta_svprep, [bams], [bais], vcf, [labels] ] + // channel: runnable_to: [ meta_svprep, bam, bai, vcf, label ] + ch_depth_inputs = Channel.empty() + .mix( + ch_depth_inputs_tn, + ch_depth_inputs_to, + ) + .map { d -> + + def meta = d[0] + def fps = d[1..-1] + + def meta_svprep = [ + key: meta.group_id, + id: meta.group_id, + tumor_id: Utils.getTumorDnaSampleName(meta) + ] + + return [meta_svprep, *fps] + } + + // Add depth annotations to calls + DEPTH_ANNOTATOR( + ch_depth_inputs, + genome_fasta, + genome_version, + ) + + ch_versions = ch_versions.mix(DEPTH_ANNOTATOR.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, gridss_vcf ] + ch_outputs = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(DEPTH_ANNOTATOR.out.vcf, ch_inputs), + ch_inputs_sorted.skip.map { meta -> [meta, []] }, + ) + + emit: + vcf = ch_outputs // channel: [ meta, vcf ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/gripss_filtering/main.nf b/subworkflows/local/gripss_filtering/main.nf new file mode 100644 index 00000000..324e6761 --- /dev/null +++ b/subworkflows/local/gripss_filtering/main.nf @@ -0,0 +1,179 @@ +// +// GRIPSS performs SV filtering. +// + +import Constants +import Utils + +include { GRIPSS_GERMLINE as GERMLINE } from '../../../modules/local/gripss/germline/main' +include { GRIPSS_SOMATIC as SOMATIC } from '../../../modules/local/gripss/somatic/main' + +workflow GRIPSS_FILTERING { + take: + // Sample inputs + ch_inputs // channel: [mandatory] [ meta ] + ch_gridss // channel: [mandatory] [ meta, gridss_vcf ] + + // Reference data + genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_version // channel: [mandatory] genome version + genome_fai // channel: [mandatory] /path/to/genome_fai + breakend_pon // channel: [mandatory] /path/to/breakend_pon + breakpoint_pon // channel: [mandatory] /path/to/breakpoint_pon + known_fusions // channel: [mandatory] /path/to/known_fusions + repeatmasker_annotations // channel: [mandatory] /path/to/repeatmasker_annotations + target_region_bed // channel: [optional] /path/to/target_region_bed + + main: + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Select input sources and sort + // channel: runnable: [ meta, gridss_vcf ] + // channel: skip: [ meta ] + ch_inputs_sorted = ch_gridss + .map { meta, gridss_vcf -> + return [ + meta, + Utils.selectCurrentOrExisting(gridss_vcf, meta, Constants.INPUT.GRIDSS_VCF), + ] + } + .branch { meta, gridss_vcf -> + runnable: gridss_vcf + skip: true + return meta + } + + // + // MODULE: GRIPSS germline + // + // Select inputs that are eligible to run + // channel: runnable: [ meta, gridss_vcf ] + // channel: skip: [ meta ] + ch_inputs_germline_sorted = ch_inputs_sorted.runnable + .branch { meta, gridss_vcf -> + def has_tumor_normal = Utils.hasTumorDna(meta) && Utils.hasNormalDna(meta) + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.GRIPSS_VCF_NORMAL) + + runnable: has_tumor_normal && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_gripss, gridss_vcf ] + ch_gripss_germline_inputs = ch_inputs_germline_sorted.runnable + .map { meta, gridss_vcf -> + + def meta_gripss = [ + key: meta.group_id, + id: meta.group_id, + tumor_id: Utils.getTumorDnaSampleName(meta), + normal_id: Utils.getNormalDnaSampleName(meta), + ] + + return [meta_gripss, gridss_vcf] + } + + // Run process + GERMLINE( + ch_gripss_germline_inputs, + genome_fasta, + genome_version, + genome_fai, + breakend_pon, + breakpoint_pon, + known_fusions, + repeatmasker_annotations, + ) + + ch_versions = ch_versions.mix(GERMLINE.out.versions) + + // + // MODULE: GRIPSS somatic + // + // Select inputs that are eligible to run + // channel: runnable: [ meta, gridss_vcf ] + // channel: skip: [ meta ] + ch_inputs_somatic_sorted = ch_inputs_sorted.runnable + .branch { meta, gridss_vcf -> + def has_tumor = Utils.hasTumorDna(meta) + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.GRIPSS_VCF_TUMOR) + + runnable: has_tumor && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_gripss, gridss_vcf ] + ch_gripss_somatic_inputs = ch_inputs_somatic_sorted.runnable + .map { meta, gridss_vcf -> + + def meta_gripss = [ + key: meta.group_id, + id: meta.group_id, + tumor_id: Utils.getTumorDnaSampleName(meta), + ] + + if (Utils.hasNormalDna(meta)) { + meta_gripss.normal_id = Utils.getNormalDnaSampleName(meta) + } + + return [meta_gripss, gridss_vcf] + } + + // Run process + SOMATIC( + ch_gripss_somatic_inputs, + genome_fasta, + genome_version, + genome_fai, + breakend_pon, + breakpoint_pon, + known_fusions, + repeatmasker_annotations, + target_region_bed, + ) + + ch_versions = ch_versions.mix(SOMATIC.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, gripss_vcf, gripss_tbi ] + ch_somatic_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(SOMATIC.out.vcf, ch_inputs), + ch_inputs_somatic_sorted.skip.map { meta -> [meta, [], []] }, + ch_inputs_sorted.skip.map { meta -> [meta, [], []] }, + ) + + ch_somatic_unfiltered_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(SOMATIC.out.vcf_unfiltered, ch_inputs), + ch_inputs_somatic_sorted.skip.map { meta -> [meta, [], []] }, + ch_inputs_sorted.skip.map { meta -> [meta, [], []] }, + ) + + ch_germline_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(GERMLINE.out.vcf, ch_inputs), + ch_inputs_germline_sorted.skip.map { meta -> [meta, [], []] }, + ch_inputs_sorted.skip.map { meta -> [meta, [], []] }, + ) + + ch_germline_unfiltered_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(GERMLINE.out.vcf_unfiltered, ch_inputs), + ch_inputs_germline_sorted.skip.map { meta -> [meta, [], []] }, + ch_inputs_sorted.skip.map { meta -> [meta, [], []] }, + ) + + emit: + somatic = ch_somatic_out // channel: [ meta, gripss_vcf, gripss_tbi ] + germline = ch_germline_out // channel: [ meta, gripss_vcf, gripss_tbi ] + somatic_unfiltered = ch_somatic_unfiltered_out // channel: [ meta, gripss_vcf, gripss_tbi ] + germline_unfiltered = ch_germline_unfiltered_out // channel: [ meta, gripss_vcf, gripss_tbi ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf deleted file mode 100644 index 0aecf87f..00000000 --- a/subworkflows/local/input_check.nf +++ /dev/null @@ -1,44 +0,0 @@ -// -// Check input samplesheet and get read channels -// - -include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' - -workflow INPUT_CHECK { - take: - samplesheet // file: /path/to/samplesheet.csv - - main: - SAMPLESHEET_CHECK ( samplesheet ) - .csv - .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channel(it) } - .set { reads } - - emit: - reads // channel: [ val(meta), [ reads ] ] - versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] -} - -// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] -def create_fastq_channel(LinkedHashMap row) { - // create meta map - def meta = [:] - meta.id = row.sample - meta.single_end = row.single_end.toBoolean() - - // add path(s) of the fastq file(s) to the meta map - def fastq_meta = [] - if (!file(row.fastq_1).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" - } - if (meta.single_end) { - fastq_meta = [ meta, [ file(row.fastq_1) ] ] - } else { - if (!file(row.fastq_2).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" - } - fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] - } - return fastq_meta -} diff --git a/subworkflows/local/isofox_quantification/main.nf b/subworkflows/local/isofox_quantification/main.nf new file mode 100644 index 00000000..5cb79e4b --- /dev/null +++ b/subworkflows/local/isofox_quantification/main.nf @@ -0,0 +1,96 @@ +// +// Isofox estimates transcript abundance, detects novel SJs, and identifies fusion events +// + +import Constants +import Utils + +include { ISOFOX } from '../../../modules/local/isofox/main' + +workflow ISOFOX_QUANTIFICATION { + take: + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_tumor_rna_bam // channel: [mandatory] [ meta, bam, bai ] + + // Reference data + genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_version // channel: [mandatory] genome version + genome_fai // channel: [mandatory] /path/to/genome_fai + ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ + isofox_counts // channel: [mandatory] /path/to/isofox_counts + isofox_gc_ratios // channel: [mandatory] /path/to/isofox_gc_ratios + isofox_gene_ids // channel: [optional] /path/to/gene_ids + isofox_tpm_norm // channel: [optional] /path/to/tpm_norm + + // Params + isofox_functions // string: [optional] Isofox functions + isofox_read_length // string: [mandatory] Isofox read length + + main: + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Select input sources and sort + // channel: runnable: [ meta, tumor_bam, tumor_bai ] + // channel: skip: [ meta ] + ch_inputs_sorted = ch_tumor_rna_bam + .map { meta, tumor_bam, tumor_bai -> + return [ + meta, + Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_RNA_TUMOR), + Utils.selectCurrentOrExisting(tumor_bai, meta, Constants.INPUT.BAI_RNA_TUMOR), + ] + } + .branch { meta, tumor_bam, tumor_bai -> + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.ISOFOX_DIR) + runnable: tumor_bam && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_isofox, tumor_bam, tumor_bai ] + ch_isofox_inputs = ch_inputs_sorted.runnable + .map { meta, tumor_bam, tumor_bai -> + + def meta_isofox = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorRnaSampleName(meta), + ] + + return [meta_isofox, tumor_bam, tumor_bai] + } + + // Run process + ISOFOX( + ch_isofox_inputs, + isofox_functions, + isofox_read_length, + genome_fasta, + genome_version, + genome_fai, + ensembl_data_resources, + isofox_counts, + isofox_gc_ratios, + isofox_gene_ids, + isofox_tpm_norm, + ) + + ch_versions = ch_versions.mix(ISOFOX.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, isofox_dir ] + ch_outputs = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(ISOFOX.out.isofox_dir, ch_inputs), + ch_inputs_sorted.skip.map { meta -> [meta, []] }, + ) + + emit: + isofox_dir = ch_outputs // channel: [ meta, isofox_dir ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/lilac_calling/main.nf b/subworkflows/local/lilac_calling/main.nf new file mode 100644 index 00000000..5ee68fd4 --- /dev/null +++ b/subworkflows/local/lilac_calling/main.nf @@ -0,0 +1,224 @@ +// +// LILAC is a WGS tool for HLA typing and somatic CNV and SNV calling +// + +import Constants +import Utils + +include { CUSTOM_EXTRACTCONTIG as EXTRACTCONTIG } from '../../../modules/local/custom/lilac_extract_and_index_contig/main' +include { CUSTOM_REALIGNREADS as REALIGNREADS } from '../../../modules/local/custom/lilac_realign_reads_lilac/main' +include { CUSTOM_SLICE as SLICEBAM } from '../../../modules/local/custom/lilac_slice/main' +include { LILAC } from '../../../modules/local/lilac/main' + +workflow LILAC_CALLING { + take: + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_tumor_bam // channel: [mandatory] [ meta, bam, bai ] + ch_normal_bam // channel: [mandatory] [ meta, bam, bai ] + ch_tumor_rna_bam // channel: [mandatory] [ meta, bam, bai ] + ch_purple // channel: [mandatory] [ meta, purple_dir ] + + // Reference data + genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_version // channel: [mandatory] genome version + genome_fai // channel: [mandatory] /path/to/genome_fai + lilac_resource_dir // channel: [mandatory] /path/to/lilac_resource_dir/ + hla_slice_bed // channel: [mandatory] /path/to/hla_slice_bed + + main: + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Select input sources and sort for DNA BAMs + // channel: runnable: [ meta, tumor_dna_bam, tumor_dna_bai, normal_dna_bam, normal_dna_bai ] + // channel: skip: [ meta ] + ch_dna_inputs_sorted = WorkflowOncoanalyser.groupByMeta( + ch_tumor_bam, + ch_normal_bam, + ) + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + return [ + meta, + Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_TUMOR), + tumor_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_TUMOR), + Utils.selectCurrentOrExisting(normal_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_NORMAL), + normal_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_NORMAL), + ] + } + .branch { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.LILAC_DIR) + + runnable: (tumor_bam || normal_bam) && !has_existing + skip: true + return meta + } + + // Realign reads mapping to HLA regions and homologus regions if using reference genome with ALT contigs + // NOTE(SW): the aim of this process is to take reads mapping to ALT contigs and align them to the three + // relevant HLA genes on chr6. All reads including those previously mapped to chr6 are realigned for + // consistency. + if (params.genome_type == 'alt') { + + // Flatten into BAM/BAI pairs, select inputs that are eligible to run + // channel: runnable: [ meta_extra, bam, bai ] + // channel: skip: [ meta_extra ] + ch_realign_inputs_sorted = ch_dna_inputs_sorted.runnable + .flatMap { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + + def tumor_sample_id = Utils.hasTumorDna(meta) ? Utils.getTumorDnaSampleName(meta) : [] + def normal_sample_id = Utils.hasNormalDna(meta) ? Utils.getNormalDnaSampleName(meta) : [] + + return [ + [[key: meta.group_id, *:meta, sample_id: tumor_sample_id, sample_type: 'tumor'], tumor_bam, tumor_bai], + [[key: meta.group_id, *:meta, sample_id: normal_sample_id, sample_type: 'normal'], normal_bam, normal_bai], + ] + } + .branch { meta_extra, bam, bai -> + runnable: bam && bai + skip: true + return meta_extra + } + + // + // MODULE: Custom BAM slice (LILAC) + // + // Create process input channel + // channel: [ meta_realign, bam, bai ] + ch_slice_inputs = ch_realign_inputs_sorted.runnable + .map { meta_extra, bam, bai -> + + def meta_realign = [ + key: meta_extra.group_id, + id: "${meta_extra.group_id}__${meta_extra.sample_id}", + sample_id: meta_extra.sample_id, + sample_type: meta_extra.sample_type, + ] + + return [meta_realign, bam, bai] + } + + // Run process + SLICEBAM( + ch_slice_inputs, + hla_slice_bed, + ) + + ch_versions = ch_versions.mix(SLICEBAM.out.versions) + + // + // MODULE: Custom extract contig (LILAC) + // + // Only run if we have runnable inputs, no blocking since operating only on input metas + ch_extract_contig_run = ch_realign_inputs_sorted.runnable + .toList() + .map { !it.isEmpty() } + + EXTRACTCONTIG( + 'chr6', + genome_fasta, + genome_fai, + ch_extract_contig_run, + ) + + ch_versions = ch_versions.mix(EXTRACTCONTIG.out.versions) + + // + // MODULE: Custom realign reads (LILAC) + // + REALIGNREADS( + SLICEBAM.out.bam, + EXTRACTCONTIG.out.contig, + EXTRACTCONTIG.out.bwamem2_index, + ) + + ch_versions = ch_versions.mix(REALIGNREADS.out.versions) + + // Separate all BAMs by sample type so they can be merged with desired order + // channel: [ < meta_extra OR meta_realign >, bam, bai ] + ch_slice_reunited_bams = Channel.empty() + .mix( + ch_realign_inputs_sorted.skip.map { meta_extra -> [meta_extra, [], []] }, + REALIGNREADS.out.bam, + ) + .branch { meta_ambiguous, bam, bai -> + tumor: meta_ambiguous.sample_type == 'tumor' + normal: meta_ambiguous.sample_type == 'normal' + } + + // Restore meta, pair tumor and normal BAMs + // channel: [ meta, tumor_dna_bam, tumor_dna_bai, normal_dna_bam, normal_dna_bai ] + ch_dna_inputs_ready = WorkflowOncoanalyser.groupByMeta( + WorkflowOncoanalyser.restoreMeta(ch_slice_reunited_bams.tumor, ch_inputs), + WorkflowOncoanalyser.restoreMeta(ch_slice_reunited_bams.normal, ch_inputs), + ) + + } else { + + // channel: [ meta, tumor_dna_bam, tumor_dna_bai, normal_dna_bam, normal_dna_bai ] + ch_dna_inputs_ready = ch_dna_inputs_sorted.runnable + + } + + // + // MODULE: LILAC + // + // Create process input channel + // channel: [ meta_lilac, normal_dna_bam, normal_dna_bai, tumor_dna_bam, tumor_dna_bai, tumor_rna_bam, tumor_rna_bai, purple_dir ] + ch_lilac_inputs = WorkflowOncoanalyser.groupByMeta( + ch_dna_inputs_ready, + ch_tumor_rna_bam, + ch_purple, + ) + .map { meta, tbam_dna, tbai_dna, nbam_dna, nbai_dna, tbam_rna, tbai_rna, purple_dir -> + + def meta_lilac = [ + key: meta.group_id, + id: meta.group_id, + ] + + if (Utils.hasTumorDna(meta)) { + meta_lilac.tumor_id = Utils.getTumorDnaSampleName(meta) + } + + if (Utils.hasNormalDna(meta)) { + meta_lilac.normal_id = Utils.getNormalDnaSampleName(meta) + } + + return [ + meta_lilac, + nbam_dna, + nbai_dna, + tbam_dna, + tbai_dna, + Utils.selectCurrentOrExisting(tbam_rna, meta, Constants.INPUT.BAM_RNA_TUMOR), + Utils.selectCurrentOrExisting(tbai_rna, meta, Constants.INPUT.BAI_RNA_TUMOR), + Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR), + ] + } + + // Run process + LILAC( + ch_lilac_inputs, + genome_fasta, + genome_version, + lilac_resource_dir, + ) + + ch_versions = ch_versions.mix(LILAC.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, amber_dir ] + ch_outputs = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(LILAC.out.lilac_dir, ch_inputs), + ch_dna_inputs_sorted.skip.map { meta -> [meta, []] }, + ) + + emit: + lilac_dir = ch_outputs // channel: [ meta, lilac_dir ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/linx_annotation/main.nf b/subworkflows/local/linx_annotation/main.nf new file mode 100644 index 00000000..0381a05c --- /dev/null +++ b/subworkflows/local/linx_annotation/main.nf @@ -0,0 +1,156 @@ +// +// LINX annotates and interprets structural variants +// + +import Constants +import Utils + +include { LINX_GERMLINE as GERMLINE } from '../../../modules/local/linx/germline/main' +include { LINX_SOMATIC as SOMATIC } from '../../../modules/local/linx/somatic/main' + +workflow LINX_ANNOTATION { + take: + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_purple // channel: [mandatory] [ meta, purple_dir ] + + // Reference data + genome_version // channel: [mandatory] genome version + ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ + known_fusion_data // channel: [mandatory] /path/to/known_fusion_data + driver_gene_panel // channel: [mandatory] /path/to/driver_gene_panel + + main: + // Channel for versions.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Select input sources and sort + // channel: runnable: [ meta, purple_dir ] + // channel: skip: [ meta ] + ch_inputs_sorted = ch_purple + .map { meta, purple_dir -> + return [ + meta, + Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR), + ] + } + .branch { meta, purple_dir -> + runnable: purple_dir + skip: true + return meta + } + + // + // MODULE: LINX germline annotation + // + // Select inputs that are eligible to run + // channel: runnable: [ meta, purple_dir ] + // channel: skip: [ meta ] + ch_inputs_germline_sorted = ch_inputs_sorted.runnable + .branch { meta, purple_dir -> + + def tumor_id = Utils.getTumorDnaSampleName(meta) + + def has_tumor_normal = Utils.hasTumorDna(meta) && Utils.hasNormalDna(meta) + def has_sv_germline_vcf = file(purple_dir).resolve("${tumor_id}.purple.sv.germline.vcf.gz") + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.PURPLE_DIR) + + runnable: has_tumor_normal && has_sv_germline_vcf && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: [ meta, sv_vcf ] + ch_linx_germline_inputs = ch_inputs_germline_sorted.runnable + .map { meta, purple_dir -> + + def tumor_id = Utils.getTumorDnaSampleName(meta) + + def meta_linx = [ + key: meta.group_id, + id: meta.group_id, + sample_id: tumor_id, + ] + + def sv_vcf = file(purple_dir).resolve("${tumor_id}.purple.sv.germline.vcf.gz") + + return [meta_linx, sv_vcf] + } + + // Run process + GERMLINE( + ch_linx_germline_inputs, + genome_version, + ensembl_data_resources, + driver_gene_panel, + ) + + ch_versions = ch_versions.mix(GERMLINE.out.versions) + + // + // MODULE: LINX somatic annotation + // + // Select inputs that are eligible to run + // channel: runnable: [ meta, purple_dir ] + // channel: skip: [ meta ] + ch_inputs_somatic_sorted = ch_inputs_sorted.runnable + .branch { meta, purple_dir -> + + def has_tumor = Utils.hasTumorDna(meta) + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.PURPLE_DIR) + + runnable: has_tumor && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: [ meta, purple_dir ] + ch_linx_somatic_inputs = ch_inputs_somatic_sorted.runnable + .map { meta, purple_dir -> + + def meta_linx = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorDnaSampleName(meta), + ] + + return [meta_linx, purple_dir] + } + + // Run process + SOMATIC( + ch_linx_somatic_inputs, + genome_version, + ensembl_data_resources, + known_fusion_data, + driver_gene_panel, + ) + + ch_versions = ch_versions.mix(SOMATIC.out.versions) + + + // Set outputs, restoring original meta + // channel: [ meta, linx_annotation_dir ] + ch_somatic_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(SOMATIC.out.annotation_dir, ch_inputs), + ch_inputs_somatic_sorted.skip.map { meta -> [meta, []] }, + ch_inputs_sorted.skip.map { meta -> [meta, []] }, + ) + + ch_germline_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(GERMLINE.out.annotation_dir, ch_inputs), + ch_inputs_germline_sorted.skip.map { meta -> [meta, []] }, + ch_inputs_sorted.skip.map { meta -> [meta, []] }, + ) + + emit: + somatic = ch_somatic_out // channel: [ meta, linx_annotation_dir ] + germline = ch_germline_out // channel: [ meta, linx_annotation_dir ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/linx_plotting/main.nf b/subworkflows/local/linx_plotting/main.nf new file mode 100644 index 00000000..4b52da0f --- /dev/null +++ b/subworkflows/local/linx_plotting/main.nf @@ -0,0 +1,110 @@ +// +// LINX plotting visualises clusters structural variants +// + +import Constants +import Utils + +include { LINXREPORT as REPORT } from '../../../modules/local/linxreport/main' +include { LINX_VISUALISER as VISUALISER } from '../../../modules/local/linx/visualiser/main' + +workflow LINX_PLOTTING { + take: + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_annotations // channel: [mandatory] [ meta, annotation_dir ] + + // Reference data + genome_version // channel: [mandatory] genome version + ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ + + main: + // Channel for versions.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Select input sources and sort + // channel: runnable: [ meta, annotation_dir ] + // channel: skip: [ meta ] + ch_inputs_sorted = ch_annotations + .map { meta, annotation_dir -> + return [ + meta, + Utils.selectCurrentOrExisting(annotation_dir, meta, Constants.INPUT.LINX_ANNO_DIR_TUMOR), + ] + } + .branch { meta, annotation_dir -> + + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.LINX_PLOT_DIR_TUMOR) + + runnable: annotation_dir && !has_existing + skip: true + return meta + } + + // + // MODULE: LINX visualiser + // + // Create process input channel + // channel: [ meta_linx, annotation_dir ] + ch_linx_visualiser_inputs = ch_inputs_sorted.runnable + .map { meta, annotation_dir -> + + def meta_linx = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorDnaSampleName(meta), + ] + + return [meta_linx, annotation_dir] + } + + // Run process + VISUALISER( + ch_linx_visualiser_inputs, + genome_version, + ensembl_data_resources, + ) + + ch_versions = ch_versions.mix(VISUALISER.out.versions) + + // + // MODULE: gpgr LINX report + // + // Create process input channel + // channel: [ meta_gpgr, annotation_dir, visualiser_dir ] + ch_gpgr_linx_inputs = WorkflowOncoanalyser.groupByMeta( + ch_inputs_sorted.runnable, + WorkflowOncoanalyser.restoreMeta(VISUALISER.out.plots, ch_inputs), + ) + .map { meta, annotation_dir, visualiser_dir -> + + def meta_gpgr_linx = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorDnaSampleName(meta), + ] + + return [meta_gpgr_linx, annotation_dir, visualiser_dir] + } + + // Run process + REPORT( + ch_gpgr_linx_inputs, + ) + + ch_versions = ch_versions.mix(REPORT.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, visualiser_dir ] + ch_visualiser_dir_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(VISUALISER.out.plots, ch_inputs), + ch_inputs_sorted.skip.map { meta -> [meta, []] }, + ) + + emit: + visualiser_dir = ch_visualiser_dir_out // channel: [ meta, visualiser_dir ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/orange_reporting/main.nf b/subworkflows/local/orange_reporting/main.nf new file mode 100644 index 00000000..451fa9d0 --- /dev/null +++ b/subworkflows/local/orange_reporting/main.nf @@ -0,0 +1,242 @@ +// +// ORANGE collates outputs of hmftools into a static PDF report +// + +import Constants +import Utils + +include { ORANGE } from '../../../modules/local/orange/main' + +workflow ORANGE_REPORTING { + take: + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_bamtools_somatic // channel: [mandatory] [ meta, metrics ] + ch_bamtools_germline // channel: [mandatory] [ meta, metrics ] + ch_flagstat_somatic // channel: [mandatory] [ meta, metrics ] + ch_flagstat_germline // channel: [mandatory] [ meta, metrics ] + ch_sage_somatic // channel: [mandatory] [ meta, sage_dir ] + ch_sage_germline // channel: [mandatory] [ meta, sage_dir ] + ch_sage_somatic_append // channel: [mandatory] [ meta, sage_append_vcf ] + ch_sage_germline_append // channel: [mandatory] [ meta, sage_append_vcf ] + ch_purple // channel: [mandatory] [ meta, purple_dir ] + ch_linx_somatic_annotation // channel: [mandatory] [ meta, linx_annotation_dir ] + ch_linx_somatic_plot // channel: [mandatory] [ meta, linx_visualiser_dir ] + ch_linx_germline_annotation // channel: [mandatory] [ meta, linx_annotation_dir ] + ch_virusinterpreter // channel: [mandatory] [ meta, virusinterpreter_dir ] + ch_chord // channel: [mandatory] [ meta, chord_dir ] + ch_sigs // channel: [mandatory] [ meta, sigs_dir ] + ch_lilac // channel: [mandatory] [ meta, lilac_dir ] + ch_cuppa // channel: [mandatory] [ meta, cuppa_dir ] + ch_isofox // channel: [mandatory] [ meta, isofox_dir ] + + // Reference data + genome_version // channel: [mandatory] genome version + disease_ontology // channel: [mandatory] /path/to/disease_ontology + cohort_mapping // channel: [mandatory] /path/to/cohort_mapping + cohort_percentiles // channel: [mandatory] /path/to/cohort_percentiles + known_fusion_data // channel: [mandatory] /path/to/known_fusion_data + driver_gene_panel // channel: [mandatory] /path/to/driver_gene_panel + ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ + isofox_alt_sj // channel: [optional] /path/to/isofox_alt_sj + isofox_gene_distribution // channel: [optional] /path/to/isofox_gene_distribution + + main: + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Set expected input ordering and size + input_expected_size = 18 + + dna_tumor_input_indexes = [ + 0, // bamtools_somatic + 2, // flagstat_somatic + 4, // sage_somatic + 8, // purple_dir + 9, // linx_somatic_annotation + 10, // linx_somatic_plot_dir + 15, // lilac_dir + ] + + dna_normal_input_indexes = [ + 1, // bamtools_germline + 3, // flagstat_germline + 5, // sage_germline + 11, // linx_germline_annotation + ] + + rna_tumor_input_indexes = [ + 6, // sage_somatic_append + 17, // isofox_dir + ] + + rna_sage_germline_append_index = 7 // sage_germline_append + + // Select input sources + // channel: [ meta, tbt_metrics, nbt_metrics, tfs_metrics, nfs_metrics, tsage_dir, nsage_dir, tsage_append, nsage_append, purple_dir, tlinx_anno_dir, tlinx_plot_dir, nlinx_anno_dir, virusinterpreter_dir, chord_dir, sigs_dir, lilac_dir, cuppa_dir, isofox_dir ] + ch_inputs_selected = WorkflowOncoanalyser.groupByMeta( + ch_bamtools_somatic, + ch_bamtools_germline, + ch_flagstat_somatic, + ch_flagstat_germline, + ch_sage_somatic, + ch_sage_germline, + ch_sage_somatic_append, + ch_sage_germline_append, + ch_purple, + ch_linx_somatic_annotation, + ch_linx_somatic_plot, + ch_linx_germline_annotation, + ch_virusinterpreter, + ch_chord, + ch_sigs, + ch_lilac, + ch_cuppa, + ch_isofox, + ) + .map { d -> + + def meta = d[0] + def inputs = d[1..-1] + + assert inputs.size() == input_expected_size + + // NOTE(SW): avoiding further complexity with loops etc + + def inputs_selected = [ + Utils.selectCurrentOrExisting(inputs[0], meta, Constants.INPUT.BAMTOOLS_TUMOR), + Utils.selectCurrentOrExisting(inputs[1], meta, Constants.INPUT.BAMTOOLS_NORMAL), + Utils.selectCurrentOrExisting(inputs[2], meta, Constants.INPUT.FLAGSTAT_TUMOR), + Utils.selectCurrentOrExisting(inputs[3], meta, Constants.INPUT.FLAGSTAT_NORMAL), + Utils.selectCurrentOrExisting(inputs[4], meta, Constants.INPUT.SAGE_DIR_TUMOR), + Utils.selectCurrentOrExisting(inputs[5], meta, Constants.INPUT.SAGE_DIR_NORMAL), + Utils.selectCurrentOrExisting(inputs[6], meta, Constants.INPUT.SAGE_APPEND_VCF_TUMOR), + Utils.selectCurrentOrExisting(inputs[7], meta, Constants.INPUT.SAGE_APPEND_VCF_NORMAL), + Utils.selectCurrentOrExisting(inputs[8], meta, Constants.INPUT.PURPLE_DIR), + Utils.selectCurrentOrExisting(inputs[9], meta, Constants.INPUT.LINX_ANNO_DIR_TUMOR), + Utils.selectCurrentOrExisting(inputs[10], meta, Constants.INPUT.LINX_PLOT_DIR_TUMOR), + Utils.selectCurrentOrExisting(inputs[11], meta, Constants.INPUT.LINX_ANNO_DIR_NORMAL), + Utils.selectCurrentOrExisting(inputs[12], meta, Constants.INPUT.VIRUSINTERPRETER_DIR), + Utils.selectCurrentOrExisting(inputs[13], meta, Constants.INPUT.CHORD_DIR), + Utils.selectCurrentOrExisting(inputs[14], meta, Constants.INPUT.SIGS_DIR), + Utils.selectCurrentOrExisting(inputs[15], meta, Constants.INPUT.LILAC_DIR), + Utils.selectCurrentOrExisting(inputs[16], meta, Constants.INPUT.CUPPA_DIR), + Utils.selectCurrentOrExisting(inputs[17], meta, Constants.INPUT.ISOFOX_DIR), + ] + + return [meta, *inputs_selected] + } + + // Sort inputs + // channel: runnable: [ meta, tbt_metrics, nbt_metrics, tfs_metrics, nfs_metrics, tsage_dir, nsage_dir, tsage_append, nsage_append, purple_dir, tlinx_anno_dir, tlinx_plot_dir, nlinx_anno_dir, virusinterpreter_dir, chord_dir, sigs_dir, lilac_dir, cuppa_dir, isofox_dir ] + // channel: skip: [ meta ] + ch_inputs_sorted = ch_inputs_selected + .branch { d -> + + def meta = d[0] + def inputs = d[1..-1] + + def has_dna_tumor = dna_tumor_input_indexes + .collect { i -> inputs[i] } + .every() + + def has_rna_tumor = rna_tumor_input_indexes + .collect { i -> inputs[i] } + .every() + + runnable_dna_and_rna: has_dna_tumor && has_rna_tumor + runnable_dna: has_dna_tumor + skip: true + return meta + } + + // First set RNA reference files + // NOTE(SW): since the RNA reference files are provided as channels, I seem to be only able to include via channel ops + // channel: [ meta, tbt_metrics, nbt_metrics, tfs_metrics, nfs_metrics, tsage_dir, nsage_dir, tsage_append, nsage_append, purple_dir, tlinx_anno_dir, tlinx_plot_dir, nlinx_anno_dir, virusinterpreter_dir, chord_dir, sigs_dir, lilac_dir, cuppa_dir, isofox_dir, isofox_alt_sj, isofox_gene_distribution ] + ch_inputs_runnable = Channel.empty() + .mix( + ch_inputs_sorted.runnable_dna.map { d -> [*d, [], []] }, + ch_inputs_sorted.runnable_dna_and_rna + .combine(isofox_alt_sj) + .combine(isofox_gene_distribution), + ) + + // Create process input channel + // channel: sample_data: [ meta, tbt_metrics, nbt_metrics, tfs_metrics, nfs_metrics, tsage_dir, nsage_dir, tsmlv_vcf, nsmlv_vcf, purple_dir, tlinx_anno_dir, tlinx_plot_dir, nlinx_anno_dir, virusinterpreter_dir, chord_dir, sigs_dir, lilac_dir, cuppa_dir, isofox_dir ] + // channel: isofox_alt_sj: [ isofox_alt_sj ] + // channel: isofox_gene_distribution: [ isofox_gene_distribution ] + ch_orange_inputs = ch_inputs_runnable + .multiMap { d -> + + def meta = d[0] + def inputs = d[1..-3] + + def isofox_alt_sj = d[-2] + def isofox_gene_distribution = d[-1] + + def meta_orange = [ + key: meta.group_id, + id: meta.group_id, + tumor_id: Utils.getTumorDnaSampleName(meta), + ] + + def inputs_selected = inputs.clone() + + // Require all normal DNA inputs to be present else clear them + def has_dna_normal = dna_normal_input_indexes + .collect { i -> inputs[i] } + .every() + + if (has_dna_normal) { + meta_orange.normal_dna_id = Utils.getNormalDnaSampleName(meta) + } else { + dna_normal_input_indexes.each { i -> inputs_selected[i] = [] } + } + + // Require all tumor RNA inputs to be present else clear them + // SAGE append germline is only required when normal DNA is present + def rna_tumor_input_indexes_ready + if (has_dna_normal) { + rna_tumor_input_indexes_ready = [*rna_tumor_input_indexes, rna_sage_germline_append_index] + } else { + rna_tumor_input_indexes_ready = rna_tumor_input_indexes.clone() + } + + def has_rna_tumor = rna_tumor_input_indexes_ready + .collect { i -> inputs[i] } + .every() + + if (has_rna_tumor) { + meta_orange.tumor_rna_id = Utils.getTumorRnaSampleName(meta) + } else { + rna_tumor_input_indexes.each { i -> inputs_selected[i] = [] } + } + + assert inputs_selected.size() == input_expected_size + + sample_data: [meta_orange, *inputs_selected] + isofox_alt_sj: isofox_alt_sj + isofox_gene_distribution: isofox_gene_distribution + } + + // Run process + ORANGE( + ch_orange_inputs.sample_data, + genome_version, + disease_ontology, + cohort_mapping, + cohort_percentiles, + known_fusion_data, + driver_gene_panel, + ensembl_data_resources, + ch_orange_inputs.isofox_alt_sj, + ch_orange_inputs.isofox_gene_distribution, + "5.34 [oncoanalyser]", + ) + + ch_versions = ch_versions.mix(ORANGE.out.versions) + + emit: + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/pave_annotation/main.nf b/subworkflows/local/pave_annotation/main.nf new file mode 100644 index 00000000..2092581e --- /dev/null +++ b/subworkflows/local/pave_annotation/main.nf @@ -0,0 +1,179 @@ +// +// PAVE annotates somatic and germline variant VCFs with gene and transcript coding and protein effects +// + +import Constants +import Utils + +include { PAVE_GERMLINE as GERMLINE } from '../../../modules/local/pave/germline/main' +include { PAVE_SOMATIC as SOMATIC } from '../../../modules/local/pave/somatic/main' + +workflow PAVE_ANNOTATION { + take: + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_sage_germline_vcf // channel: [mandatory] [ meta, sage_germline_vcf, sage_somatic_tbi ] + ch_sage_somatic_vcf // channel: [mandatory] [ meta, sage_somatic_vcf, sage_somatic_tbi ] + + // Reference data + genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_version // channel: [mandatory] genome version + genome_fai // channel: [mandatory] /path/to/genome_fai + sage_pon // channel: [mandatory] /path/to/sage_pon + pon_artefacts // channel: [optional] /path/to/pon_artefacts + sage_blocklist_regions // channel: [mandatory] /path/to/sage_blocklist_regions + sage_blocklist_sites // channel: [mandatory] /path/to/sage_blocklist_sites + clinvar_annotations // channel: [mandatory] /path/to/clinvar_annotations + segment_mappability // channel: [mandatory] /path/to/segment_mappability + driver_gene_panel // channel: [mandatory] /path/to/driver_gene_panel + ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ + gnomad_resource // channel: [mandatory] /path/to/gnomad_resource + + main: + // Channel for version.yml files + ch_versions = Channel.empty() + + // + // MODULE: PAVE germline + // + // Select input sources and sort + // channel: runnable: [ meta, sage_vcf, sage_tbi ] + // channel: skip: [ meta ] + ch_sage_germline_inputs_sorted = ch_sage_germline_vcf + .map { meta, sage_vcf, sage_tbi -> + return [ + meta, + Utils.selectCurrentOrExisting(sage_vcf, meta, Constants.INPUT.SAGE_VCF_NORMAL), + Utils.selectCurrentOrExisting(sage_tbi, meta, Constants.INPUT.SAGE_VCF_TBI_NORMAL), + ] + } + .branch { meta, sage_vcf, sage_tbi -> + + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.PAVE_VCF_NORMAL) + + runnable: Utils.hasTumorDna(meta) && Utils.hasNormalDna(meta) && sage_vcf && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_pave, sage_vcf, sage_tbi ] + ch_pave_germline_inputs = ch_sage_germline_inputs_sorted.runnable + .map { meta, sage_vcf, sage_tbi -> + + def meta_pave = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorDnaSampleName(meta), + ] + + return [meta_pave, sage_vcf, sage_tbi] + } + + // Run process + GERMLINE( + ch_pave_germline_inputs, + genome_fasta, + genome_version, + genome_fai, + sage_blocklist_regions, + sage_blocklist_sites, + clinvar_annotations, + segment_mappability, + driver_gene_panel, + ensembl_data_resources, + gnomad_resource, + ) + + ch_versions = ch_versions.mix(GERMLINE.out.versions) + + // + // MODULE: PAVE somatic + // + // Select input sources and sort + // channel: runnable: [ meta, sage_vcf, sage_tbi ] + // channel: skip: [ meta ] + ch_sage_somatic_inputs_sorted = ch_sage_somatic_vcf + .map { meta, sage_vcf, sage_tbi -> + return [ + meta, + Utils.selectCurrentOrExisting(sage_vcf, meta, Constants.INPUT.SAGE_VCF_TUMOR), + Utils.selectCurrentOrExisting(sage_tbi, meta, Constants.INPUT.SAGE_VCF_TBI_TUMOR), + ] + } + .branch { meta, sage_vcf, sage_tbi -> + + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.PAVE_VCF_TUMOR) + + runnable: Utils.hasTumorDna(meta) && sage_vcf && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_pave, sage_vcf, sage_tbi ] + ch_pave_somatic_inputs = ch_sage_somatic_inputs_sorted.runnable + .map { meta, sage_vcf, sage_tbi -> + + def meta_pave = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorDnaSampleName(meta), + ] + + return [meta_pave, sage_vcf, sage_tbi] + } + + // Set resource files according to run mode + // NOTE(SW): required since certain files can be used in germline and somatic depending on mode + // but want to avoid duplicating as multiple inputs + // NOTE(SW): this pattern should be used only sparingly; implicit config from workflows is prefered + sage_blocklist_regions_somatic = sage_blocklist_regions + sage_blocklist_sites_somatic = sage_blocklist_sites + clinvar_annotations_somatic = clinvar_annotations + run_mode = Utils.getEnumFromString(params.mode, Constants.RunMode) + if (run_mode === Constants.RunMode.WGTS) { + sage_blocklist_regions_somatic = [] + sage_blocklist_sites_somatic = [] + clinvar_annotations_somatic = [] + } + + // Run process + SOMATIC( + ch_pave_somatic_inputs, + genome_fasta, + genome_version, + genome_fai, + sage_pon, + pon_artefacts, + sage_blocklist_regions_somatic, + sage_blocklist_sites_somatic, + clinvar_annotations_somatic, + segment_mappability, + driver_gene_panel, + ensembl_data_resources, + gnomad_resource, + ) + + ch_versions = ch_versions.mix(SOMATIC.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, pave_vcf ] + ch_somatic_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(SOMATIC.out.vcf, ch_inputs), + ch_sage_somatic_inputs_sorted.skip.map { meta -> [meta, []] }, + ) + + ch_germline_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(GERMLINE.out.vcf, ch_inputs), + ch_sage_germline_inputs_sorted.skip.map { meta -> [meta, []] }, + ) + + emit: + germline = ch_germline_out // channel: [ meta, pave_vcf ] + somatic = ch_somatic_out // channel: [ meta, pave_vcf ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/prepare_inputs/main.nf b/subworkflows/local/prepare_inputs/main.nf new file mode 100644 index 00000000..8a10e735 --- /dev/null +++ b/subworkflows/local/prepare_inputs/main.nf @@ -0,0 +1,24 @@ +// +// Prepare inputs (tests only) +// + +// NOTE(SW): inputs for the pipeline are prepared outside of NF +// workflow/channels to allow higher-level conditionals, however nf-test +// well-formed meta (including Constants) that can only be made available +// through running workflows/processes with 'setup'. Hence, this subworkflow +// isn't used in the main pipeline and is only used for execution of tests. + +import Utils + +workflow PREPARE_INPUTS { + take: + input_fp_str + + main: + ch_inputs = Channel.fromList( + Utils.parseInput(input_fp_str, workflow.stubRun, log) + ) + + emit: + inputs = ch_inputs // channel: [ meta ] +} diff --git a/subworkflows/local/prepare_reference/main.nf b/subworkflows/local/prepare_reference/main.nf new file mode 100644 index 00000000..29479dd2 --- /dev/null +++ b/subworkflows/local/prepare_reference/main.nf @@ -0,0 +1,306 @@ +// +// Prepare reference data as required +// + +import Constants + +include { BWAMEM2_INDEX } from '../../../modules/nf-core/bwamem2/index/main' +include { BWA_INDEX } from '../../../modules/nf-core/bwa/index/main' +include { SAMTOOLS_DICT } from '../../../modules/nf-core/samtools/dict/main' +include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx/main' +include { STAR_GENOMEGENERATE } from '../../../modules/nf-core/star/genomegenerate/main' + +include { CUSTOM_EXTRACTTARBALL as DECOMP_BWAMEM2_INDEX } from '../../../modules/local/custom/extract_tarball/main' +include { CUSTOM_EXTRACTTARBALL as DECOMP_GRIDSS_INDEX } from '../../../modules/local/custom/extract_tarball/main' +include { CUSTOM_EXTRACTTARBALL as DECOMP_HMF_DATA } from '../../../modules/local/custom/extract_tarball/main' +include { CUSTOM_EXTRACTTARBALL as DECOMP_PANEL_DATA } from '../../../modules/local/custom/extract_tarball/main' +include { CUSTOM_EXTRACTTARBALL as DECOMP_STAR_INDEX } from '../../../modules/local/custom/extract_tarball/main' +include { CUSTOM_EXTRACTTARBALL as DECOMP_VIRUSBREAKEND_DB } from '../../../modules/local/custom/extract_tarball/main' +include { GRIDSS_INDEX } from '../../../modules/local/gridss/index/main' +include { WRITE_REFERENCE_DATA } from '../../../modules/local/custom/write_reference_data/main' + +workflow PREPARE_REFERENCE { + take: + run_config // channel: [mandatory] run configuration + + main: + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // + // Set some variables for brevity + // + ch_genome_fasta = Channel.fromPath(params.ref_data_genome_fasta) + ch_genome_version = Channel.value(params.genome_version) + run_virusinterpreter = run_config.mode !== Constants.RunMode.TARGETED && run_config.stages.virusinterpreter + + // + // Set .fai and .dict indexes, create if required + // + ch_genome_fai = getRefFileChannel('ref_data_genome_fai') + if (!params.ref_data_genome_fai) { + SAMTOOLS_FAIDX(ch_genome_fasta) + ch_genome_fai = SAMTOOLS_FAIDX.out.fai + ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) + } + + ch_genome_dict = getRefFileChannel('ref_data_genome_dict') + if (!params.ref_data_genome_dict) { + SAMTOOLS_DICT(ch_genome_fasta) + ch_genome_dict = SAMTOOLS_DICT.out.dict + ch_versions = ch_versions.mix(SAMTOOLS_DICT.out.versions) + } + + // + // Set bwa-mem2 index, unpack or create if required + // + ch_genome_bwamem2_index = Channel.empty() + if (run_config.has_dna_fastq && run_config.stages.alignment) { + if (!params.ref_data_genome_bwamem2_index) { + + BWAMEM2_INDEX( + ch_genome_fasta, + params.ref_data_genome_alt ? file(params.ref_data_genome_alt) : [], + ) + ch_genome_bwamem2_index = BWAMEM2_INDEX.out.index + ch_versions = ch_versions.mix(BWAMEM2_INDEX.out.versions) + + } else if (params.ref_data_genome_bwamem2_index.endsWith('.tar.gz')) { + + ch_genome_bwamem2_index_inputs = Channel.fromPath(params.ref_data_genome_bwamem2_index) + .map { [[id: "bwa-mem2_index_${it.name.replaceAll('\\.tar\\.gz$', '')}"], it] } + + DECOMP_BWAMEM2_INDEX(ch_genome_bwamem2_index_inputs) + ch_genome_bwamem2_index = DECOMP_BWAMEM2_INDEX.out.extracted_dir + + } else { + + ch_genome_bwamem2_index = getRefFileChannel('ref_data_genome_bwamem2_index') + + } + } + + // + // Set GRIDSS index, unpack or create if required + // + ch_genome_gridss_index = Channel.empty() + if (run_config.has_dna && (run_config.stages.gridss || run_virusinterpreter)) { + if (!params.ref_data_genome_gridss_index) { + + BWA_INDEX( + ch_genome_fasta, + params.ref_data_genome_alt ? file(params.ref_data_genome_alt) : [], + ) + ch_versions = ch_versions.mix(BWA_INDEX.out.versions) + + GRIDSS_INDEX( + ch_genome_fasta, + ch_genome_fai, + ch_genome_dict, + BWA_INDEX.out.index, + ) + ch_genome_gridss_index = GRIDSS_INDEX.out.index + ch_versions = ch_versions.mix(GRIDSS_INDEX.out.versions) + + } else if (params.ref_data_genome_gridss_index.endsWith('.tar.gz')) { + + ch_genome_gridss_index_inputs = Channel.fromPath(params.ref_data_genome_gridss_index) + .map { [[id: "gridss_index_${it.name.replaceAll('\\.tar\\.gz$', '')}"], it] } + + DECOMP_GRIDSS_INDEX(ch_genome_gridss_index_inputs) + ch_genome_gridss_index = DECOMP_GRIDSS_INDEX.out.extracted_dir + + } else { + + ch_genome_gridss_index = getRefFileChannel('ref_data_genome_gridss_index') + + } + } + + // + // Set STAR index , unpack or create if required + // + ch_genome_star_index = Channel.empty() + if (run_config.has_rna_fastq && run_config.stages.alignment) { + if (!params.ref_data_genome_star_index) { + + STAR_GENOMEGENERATE( + ch_genome_fasta, + file(params.ref_data_genome_gtf), + ) + ch_genome_star_index = STAR_GENOMEGENERATE.out.index + ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions) + + } else if (params.ref_data_genome_star_index.endsWith('.tar.gz')) { + + ch_genome_star_index_inputs = Channel.fromPath(params.ref_data_genome_star_index) + .map { [[id: "star_index_${it.name.replaceAll('\\.tar\\.gz$', '')}"], it] } + + DECOMP_STAR_INDEX(ch_genome_star_index_inputs) + ch_genome_star_index = DECOMP_STAR_INDEX.out.extracted_dir + + } else { + + ch_genome_star_index = getRefFileChannel('ref_data_genome_star_index') + + } + } + + // + // Set VIRUSBreakend database, unpack if required + // + ch_virusbreakenddb = Channel.empty() + if (run_config.has_dna && run_virusinterpreter) { + if (params.ref_data_virusbreakenddb_path.endsWith('.tar.gz')) { + + ch_virusbreakenddb_inputs = Channel.fromPath(params.ref_data_virusbreakenddb_path) + .map { [[id: it.name.replaceAll('\\.tar\\.gz$', '')], it] } + + DECOMP_VIRUSBREAKEND_DB(ch_virusbreakenddb_inputs) + ch_virusbreakenddb = DECOMP_VIRUSBREAKEND_DB.out.extracted_dir + + } else { + + ch_virusbreakenddb = Channel.fromPath(params.ref_data_virusbreakenddb_path) + + } + } + + // + // Set HMF reference data, unpack if required + // + ch_hmf_data = Channel.empty() + hmf_data_paths = params.hmf_data_paths[params.genome_version.toString()] + if (params.ref_data_hmf_data_path.endsWith('tar.gz')) { + + ch_hmf_data_inputs = Channel.fromPath(params.ref_data_hmf_data_path) + .map { [[id: "hmf_data_${it.name.replaceAll('\\.tar\\.gz$', '')}"], it] } + + DECOMP_HMF_DATA(ch_hmf_data_inputs) + + ch_hmf_data = DECOMP_HMF_DATA.out.extracted_dir + .collect() + .map { dir_list -> + assert dir_list.size() == 1 + def dirpath = dir_list[0].toUriString() + return createDataMap(hmf_data_paths, dirpath) + } + + } else { + + ch_hmf_data = Channel.value(createDataMap(hmf_data_paths, params.ref_data_hmf_data_path)) + + } + + // + // Set panel reference data, unpack if required + // + ch_panel_data = Channel.empty() + if (run_config.mode === Constants.RunMode.TARGETED) { + + panel_data_paths_versions = params.panel_data_paths[params.panel] + panel_data_paths = panel_data_paths_versions[params.genome_version.toString()] + + if (params.ref_data_panel_data_path.endsWith('tar.gz')) { + + ch_panel_data_inputs = Channel.fromPath(params.ref_data_panel_data_path) + .map { [[id: "panel_data_${it.name.replaceAll('\\.tar\\.gz$', '')}"], it] } + + DECOMP_PANEL_DATA(ch_panel_data_inputs) + + ch_panel_data = DECOMP_PANEL_DATA.out.extracted_dir + .collect() + .map { dir_list -> + assert dir_list.size() == 1 + def dirpath = dir_list[0].toUriString() + return createDataMap(panel_data_paths, dirpath) + } + + } else { + + ch_panel_data = Channel.value(createDataMap(panel_data_paths, params.ref_data_panel_data_path)) + + } + } + + // + // Write prepared reference data if requested + // + if (params.prepare_reference_only) { + + // Create channel of data files to stage (if not already local) and write + ch_refdata = Channel.empty() + .mix( + ch_genome_fasta, + ch_genome_fai, + ch_genome_dict, + ch_genome_bwamem2_index, + ch_genome_gridss_index, + ch_genome_star_index, + ch_virusbreakenddb, + // Also include base paths for hmf_data and panel_data + Channel.empty() + .mix( + ch_hmf_data, + ch_panel_data, + ) + .map { getDataBaseDirectory(it) } + ) + + WRITE_REFERENCE_DATA( + ch_refdata, + workflow.manifest.version, + ) + + // Clear all stages to prevent running any analysis + run_config.stages = [:] + } + + emit: + genome_fasta = ch_genome_fasta.first() // path: genome_fasta + genome_fai = ch_genome_fai.first() // path: genome_fai + genome_dict = ch_genome_dict.first() // path: genome_dict + genome_bwamem2_index = ch_genome_bwamem2_index.first() // path: genome_bwa-mem2_index + genome_gridss_index = ch_genome_gridss_index.first() // path: genome_gridss_index + genome_star_index = ch_genome_star_index.first() // path: genome_star_index + genome_version = ch_genome_version // val: genome_version + + virusbreakenddb = ch_virusbreakenddb.first() // path: VIRUSBreakend database + hmf_data = ch_hmf_data // map: HMF data paths + panel_data = ch_panel_data // map: Panel data paths + + versions = ch_versions // channel: [ versions.yml ] +} + +def getRefFileChannel(key) { + def fp = params.get(key) ? file(params.getAt(key)) : [] + return Channel.of(fp) +} + +def createDataMap(entries, ref_data_path) { + return entries + .collectEntries { name, path -> + def ref_data_file = getRefdataFile(path, ref_data_path) + return [name, ref_data_file] + } +} + +def getRefdataFile(filepath, ref_data_path) { + def data_path_noslash = ref_data_path.toString().replaceAll('/$', '') + return file("${data_path_noslash}/${filepath}", checkIfExists: true) +} + +def getDataBaseDirectory(data) { + def c = [] + data + .collect { it.value.toUriString().getChars() } + .transpose() + .findIndexOf { + def cs = it.unique() + if (cs.size() != 1) return true + c << cs.pop() + return false + } + return file("${c.join('')}") +} diff --git a/subworkflows/local/purple_calling/main.nf b/subworkflows/local/purple_calling/main.nf new file mode 100644 index 00000000..48142ae9 --- /dev/null +++ b/subworkflows/local/purple_calling/main.nf @@ -0,0 +1,145 @@ +// +// PURPLE is a CNV caller that infers purity/ploidy and recovers low-confidence SVs +// + +import Constants +import Utils + +include { PURPLE } from '../../../modules/local/purple/main' + +workflow PURPLE_CALLING { + take: + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_amber // channel: [mandatory] [ meta, amber_dir ] + ch_cobalt // channel: [mandatory] [ meta, cobalt_dir ] + ch_smlv_somatic // channel: [mandatory] [ meta, pave_vcf ] + ch_smlv_germline // channel: [mandatory] [ meta, pave_vcf ] + ch_sv_somatic // channel: [mandatory] [ meta, gripss_vcf, gripss_tbi ] + ch_sv_germline // channel: [mandatory] [ meta, gripss_vcf, gripss_tbi ] + ch_sv_somatic_unfiltered // channel: [mandatory] [ meta, gripss_vcf, gripss_tbi ] + + // Reference data + genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_version // channel: [mandatory] genome version + genome_fai // channel: [mandatory] /path/to/genome_fai + genome_dict // channel: [mandatory] /path/to/genome_dict + gc_profile // channel: [mandatory] /path/to/gc_profile + sage_known_hotspots_somatic // channel: [mandatory] /path/to/sage_known_hotspots_somatic + sage_known_hotspots_germline // channel: [optional] /path/to/sage_known_hotspots_germline + driver_gene_panel // channel: [mandatory] /path/to/driver_gene_panel + ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ + purple_germline_del // channel: [optional] /path/to/purple_germline_del + target_region_bed // channel: [optional] /path/to/target_region_bed + target_region_ratios // channel: [optional] /path/to/target_region_ratios + target_region_msi_indels // channel: [optional] /path/to/target_region_msi_indels + + main: + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Select input sources + // channel: [ meta, amber_dir, cobalt_dir, sv_somatic_vcf, sv_somatic_tbi, sv_somatic_unfiltered_vcf, sv_somatic_unfiltered_tbi, sv_germline_vcf, sv_germline_tbi, smlv_somatic_vcf, smlv_germline_vcf ] + ch_inputs_selected = WorkflowOncoanalyser.groupByMeta( + ch_amber, + ch_cobalt, + ch_sv_somatic, + ch_sv_somatic_unfiltered, + ch_sv_germline, + ch_smlv_somatic, + ch_smlv_germline, + ) + .map { d -> + + def meta = d[0] + + // NOTE(SW): avoiding further complexity with loops etc + + def inputs = [ + Utils.selectCurrentOrExisting(d[1], meta, Constants.INPUT.AMBER_DIR), + Utils.selectCurrentOrExisting(d[2], meta, Constants.INPUT.COBALT_DIR), + Utils.selectCurrentOrExisting(d[3], meta, Constants.INPUT.GRIPSS_VCF_TUMOR), + Utils.selectCurrentOrExisting(d[4], meta, Constants.INPUT.GRIPSS_VCF_TUMOR_TBI), + Utils.selectCurrentOrExisting(d[5], meta, Constants.INPUT.GRIPSS_UNFILTERED_VCF_TUMOR), + Utils.selectCurrentOrExisting(d[6], meta, Constants.INPUT.GRIPSS_UNFILTERED_VCF_TUMOR_TBI), + Utils.selectCurrentOrExisting(d[7], meta, Constants.INPUT.GRIPSS_VCF_NORMAL), + Utils.selectCurrentOrExisting(d[8], meta, Constants.INPUT.GRIPSS_VCF_NORMAL_TBI), + Utils.selectCurrentOrExisting(d[9], meta, Constants.INPUT.PAVE_VCF_TUMOR), + Utils.selectCurrentOrExisting(d[10], meta, Constants.INPUT.PAVE_VCF_NORMAL), + ] + + return [meta, *inputs] + } + + // Sort inputs + // channel: runnable: [ meta, amber_dir, cobalt_dir, sv_somatic_vcf, sv_somatic_tbi, sv_somatic_unfiltered_vcf, sv_somatic_unfiltered_tbi, sv_germline_vcf, sv_germline_tbi, smlv_somatic_vcf, smlv_germline_vcf ] + // channel: skip: [ meta ] + ch_inputs_sorted = ch_inputs_selected + .branch { d -> + def meta = d[0] + def amber_dir = d[1] + def cobalt_dir = d[2] + + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.PURPLE_DIR) + + runnable: amber_dir && cobalt_dir && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_purple, amber_dir, cobalt_dir, sv_somatic_vcf, sv_somatic_tbi, sv_somatic_unfiltered_vcf, sv_somatic_unfiltered_tbi, sv_germline_vcf, sv_germline_tbi, smlv_somatic_vcf, smlv_germline_vcf ] + ch_purple_inputs = ch_inputs_sorted.runnable + .map { d -> + + def meta = d[0] + def inputs = d[1..-1] + + def meta_purple = [ + key: meta.group_id, + id: meta.group_id, + tumor_id: Utils.getTumorDnaSampleName(meta), + ] + + if (Utils.hasNormalDna(meta)) { + meta_purple.normal_id = Utils.getNormalDnaSampleName(meta) + } + + return [meta_purple, *inputs] + + } + + // Run process + PURPLE( + ch_purple_inputs, + genome_fasta, + genome_version, + genome_fai, + genome_dict, + gc_profile, + sage_known_hotspots_somatic, + sage_known_hotspots_germline, + driver_gene_panel, + ensembl_data_resources, + purple_germline_del, + target_region_bed, + target_region_ratios, + target_region_msi_indels, + ) + + ch_versions = ch_versions.mix(PURPLE.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, purple_dir ] + ch_outputs = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(PURPLE.out.purple_dir, ch_inputs), + ch_inputs_sorted.skip.map { meta -> [meta, []] }, + ) + + emit: + purple_dir = ch_outputs // channel: [ meta, purple_dir ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/read_alignment_dna/main.nf b/subworkflows/local/read_alignment_dna/main.nf new file mode 100644 index 00000000..18764169 --- /dev/null +++ b/subworkflows/local/read_alignment_dna/main.nf @@ -0,0 +1,213 @@ +// +// Align DNA reads +// + +import Constants +import Utils + +include { BWAMEM2_ALIGN } from '../../../modules/local/bwa-mem2/mem/main' +include { FASTP } from '../../../modules/local/fastp/main' + +workflow READ_ALIGNMENT_DNA { + take: + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + + // Reference data + genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_bwamem2_index // channel: [mandatory] /path/to/genome_bwa-mem2_index_dir/ + + // Params + max_fastq_records // numeric: [mandatory] max number of FASTQ records per split + + main: + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Sort inputs, separate by tumor and normal + // channel: [ meta ] + ch_inputs_tumor_sorted = ch_inputs + .branch { meta -> + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.BAM_DNA_TUMOR) + runnable: Utils.hasTumorDnaFastq(meta) && !has_existing + skip: true + } + + ch_inputs_normal_sorted = ch_inputs + .branch { meta -> + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.BAM_DNA_NORMAL) + runnable: Utils.hasNormalDnaFastq(meta) && !has_existing + skip: true + } + + // Create FASTQ input channel + // channel: [ meta_fastq, fastq_fwd, fastq_rev ] + ch_fastq_inputs = Channel.empty() + .mix( + ch_inputs_tumor_sorted.runnable.map { meta -> [meta, Utils.getTumorDnaSample(meta), 'tumor'] }, + ch_inputs_normal_sorted.runnable.map { meta -> [meta, Utils.getNormalDnaSample(meta), 'normal'] }, + ) + .flatMap { meta, meta_sample, sample_type -> + meta_sample + .getAt(Constants.FileType.FASTQ) + .collect { key, fps -> + def (library_id, lane) = key + + def meta_fastq = [ + key: meta.group_id, + id: "${meta.group_id}_${meta_sample.sample_id}", + sample_id: meta_sample.sample_id, + library_id: library_id, + lane: lane, + sample_type: sample_type, + ] + + return [meta_fastq, fps['fwd'], fps['rev']] + } + } + + // + // MODULE: fastp + // + // Split FASTQ into chunks if requested for distributed processing + // channel: [ meta_fastq_ready, fastq_fwd, fastq_fwd ] + ch_fastqs_ready = Channel.empty() + if (max_fastq_records > 0) { + + // Run process + FASTP( + ch_fastq_inputs, + max_fastq_records, + ) + + ch_versions = ch_versions.mix(FASTP.out.versions) + + // Prepare outputs within conditional block + ch_fastqs_ready = FASTP.out.fastq + .flatMap { meta_fastq, reads_fwd, reads_rev -> + + def data = [reads_fwd, reads_rev] + .transpose() + .collect { fwd, rev -> + + def split_fwd = fwd.name.replaceAll('\\..+$', '') + def split_rev = rev.name.replaceAll('\\..+$', '') + + assert split_fwd == split_rev + + // NOTE(SW): split allows meta_fastq_ready to be unique, which is required during reunite below + def meta_fastq_ready = [ + *:meta_fastq, + id: "${meta_fastq.id}_${split_fwd}", + split: split_fwd, + ] + + return [meta_fastq_ready, fwd, rev] + } + + return data + } + + } else { + + ch_fastqs_ready = ch_fastq_inputs + .map { meta_fastq, fastq_fwd, fastq_rev -> + + def meta_fastq_ready = [ + *:meta_fastq, + split: null, + ] + + return [meta_fastq_ready, fastq_fwd, fastq_rev] + } + + } + + // + // MODULE: BWA-MEM2 + // + // Create process input channel + // channel: [ meta_bwamem2, fastq_fwd, fastq_rev ] + ch_bwamem2_inputs = ch_fastqs_ready + .map { meta_fastq_ready, fastq_fwd, fastq_rev -> + + def meta_bwamem2 = [ + *:meta_fastq_ready, + read_group: "${meta_fastq_ready.sample_id}.${meta_fastq_ready.library_id}.${meta_fastq_ready.lane}", + ] + + return [meta_bwamem2, fastq_fwd, fastq_rev] + } + + // Run process + BWAMEM2_ALIGN( + ch_bwamem2_inputs, + genome_fasta, + genome_bwamem2_index, + ) + + ch_versions = ch_versions.mix(BWAMEM2_ALIGN.out.versions) + + // Reunite BAMs + // First, count expected BAMs per sample for non-blocking groupTuple op + // channel: [ meta_count, group_size ] + ch_sample_fastq_counts = ch_bwamem2_inputs + .map { meta_bwamem2, reads_fwd, reads_rev -> + + def meta_count = [ + key: meta_bwamem2.key, + sample_type: meta_bwamem2.sample_type, + ] + + return [meta_count, meta_bwamem2] + } + .groupTuple() + .map { meta_count, metas_bwamem2 -> return [meta_count, metas_bwamem2.size()] } + + // Now, group with expected size then sort into tumor and normal channels + // channel: [ meta_group, [bam, ...], [bai, ...] ] + ch_bams_united = ch_sample_fastq_counts + .cross( + // First element to match meta_count above for `cross` + BWAMEM2_ALIGN.out.bam.map { meta_bwamem2, bam, bai -> [[key: meta_bwamem2.key, sample_type: meta_bwamem2.sample_type], bam, bai] } + ) + .map { count_tuple, bam_tuple -> + + def group_size = count_tuple[1] + def (meta_bam, bam, bai) = bam_tuple + + def meta_group = [ + *:meta_bam, + ] + + return tuple(groupKey(meta_group, group_size), bam, bai) + } + .groupTuple() + .branch { meta_group, bams, bais -> + assert ['tumor', 'normal'].contains(meta_group.sample_type) + tumor: meta_group.sample_type == 'tumor' + normal: meta_group.sample_type == 'normal' + placeholder: true + } + + // Set outputs, restoring original meta + // channel: [ meta, [bam, ...], [bai, ...] ] + ch_bam_tumor_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(ch_bams_united.tumor, ch_inputs), + ch_inputs_tumor_sorted.skip.map { meta -> [meta, [], []] }, + ) + + ch_bam_normal_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(ch_bams_united.normal, ch_inputs), + ch_inputs_normal_sorted.skip.map { meta -> [meta, [], []] }, + ) + + emit: + dna_tumor = ch_bam_tumor_out // channel: [ meta, [bam, ...], [bai, ...] ] + dna_normal = ch_bam_normal_out // channel: [ meta, [bam, ...], [bai, ...] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/read_alignment_rna/main.nf b/subworkflows/local/read_alignment_rna/main.nf new file mode 100644 index 00000000..2a9b7876 --- /dev/null +++ b/subworkflows/local/read_alignment_rna/main.nf @@ -0,0 +1,212 @@ +// +// Align RNA reads +// + +import Constants +import Utils + +include { GATK4_MARKDUPLICATES } from '../../../modules/nf-core/gatk4/markduplicates/main' +include { SAMBAMBA_MERGE } from '../../../modules/local/sambamba/merge/main' +include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' +include { STAR_ALIGN } from '../../../modules/local/star/align/main' + +workflow READ_ALIGNMENT_RNA { + take: + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + + // Reference data + genome_star_index // channel: [mandatory] /path/to/genome_star_index/ + + main: + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Sort inputs + // channel: [ meta ] + ch_inputs_sorted = ch_inputs + .branch { meta -> + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.BAM_RNA_TUMOR) + runnable: Utils.hasTumorRnaFastq(meta) && !has_existing + skip: true + } + + // Create FASTQ input channel + // channel: [ meta_fastq, fastq_fwd, fastq_rev ] + ch_fastq_inputs = ch_inputs_sorted.runnable + .flatMap { meta -> + def meta_sample = Utils.getTumorRnaSample(meta) + meta_sample + .getAt(Constants.FileType.FASTQ) + .collect { key, fps -> + def (library_id, lane) = key + + def meta_fastq = [ + key: meta.group_id, + id: "${meta.group_id}_${meta_sample.sample_id}", + sample_id: meta_sample.sample_id, + library_id: library_id, + lane: lane, + ] + + return [meta_fastq, fps['fwd'], fps['rev']] + } + } + + // + // MODULE: STAR alignment + // + // Create process input channel + // channel: [ meta_star, fastq_fwd, fastq_rev ] + ch_star_inputs = ch_fastq_inputs + .map { meta_fastq, fastq_fwd, fastq_rev -> + def meta_star = [ + *:meta_fastq, + read_group: "${meta_fastq.sample_id}.${meta_fastq.library_id}.${meta_fastq.lane}", + ] + + return [meta_star, fastq_fwd, fastq_rev] + } + + // Run process + STAR_ALIGN( + ch_star_inputs, + genome_star_index, + ) + + ch_versions = ch_versions.mix(STAR_ALIGN.out.versions) + + // + // MODULE: SAMtools sort + // + // Create process input channel + // channel: [ meta_sort, bam ] + ch_sort_inputs = STAR_ALIGN.out.bam + .map { meta_star, bam -> + def meta_sort = [ + *:meta_star, + prefix: meta_star.read_group, + ] + + return [meta_sort, bam] + } + + // Run process + SAMTOOLS_SORT( + ch_sort_inputs, + ) + + ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions) + + // + // MODULE: Sambamba merge + // + // Reunite BAMs + // First, count expected BAMs per sample for non-blocking groupTuple op + // channel: [ meta_count, group_size ] + ch_sample_fastq_counts = ch_star_inputs + .map { meta_star, reads_fwd, reads_rev -> + def meta_count = [key: meta_star.key] + return [meta_count, meta_star] + } + .groupTuple() + .map { meta_count, meta_stars -> return [meta_count, meta_stars.size()] } + + // Now, group with expected size then sort into tumor and normal channels + // channel: [ meta_group, [bam, ...] ] + ch_bams_united = ch_sample_fastq_counts + .cross( + // First element to match meta_count above for `cross` + SAMTOOLS_SORT.out.bam.map { meta_star, bam -> [[key: meta_star.key], bam] } + ) + .map { count_tuple, bam_tuple -> + + def group_size = count_tuple[1] + def (meta_bam, bam) = bam_tuple + + def meta_group = [ + *:meta_bam, + ] + + return tuple(groupKey(meta_group, group_size), bam) + } + .groupTuple() + + // Sort into merge-eligible BAMs (at least two BAMs required) + // channel: runnable: [ meta_group, [bam, ...] ] + // channel: skip: [ meta_group, bam ] + ch_bams_united_sorted = ch_bams_united + .branch { meta_group, bams -> + runnable: bams.size() > 1 + skip: true + return [meta_group, bams[0]] + } + + // Create process input channel + // channel: [ meta_merge, [bams, ...] ] + ch_merge_inputs = WorkflowOncoanalyser.restoreMeta(ch_bams_united_sorted.runnable, ch_inputs) + .map { meta, bams -> + def meta_merge = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorRnaSampleName(meta), + ] + return [meta_merge, bams] + } + + // Run process + SAMBAMBA_MERGE( + ch_merge_inputs, + ) + + ch_versions = ch_versions.mix(SAMBAMBA_MERGE.out.versions) + + // + // MODULE: GATK4 markduplicates + // + // Create process input channel + // channel: [ meta_markdups, bam ] + ch_markdups_inputs = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(SAMBAMBA_MERGE.out.bam, ch_inputs), + WorkflowOncoanalyser.restoreMeta(ch_bams_united_sorted.skip, ch_inputs), + ) + .map { meta, bam -> + def meta_markdups = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorRnaSampleName(meta), + ] + return [meta_markdups, bam] + } + + // Run process + GATK4_MARKDUPLICATES( + ch_markdups_inputs, + [], + [], + ) + + ch_versions = ch_versions.mix(GATK4_MARKDUPLICATES.out.versions) + + // Combine BAMs and BAIs + // channel: [ meta, bam, bai ] + ch_bams_ready = WorkflowOncoanalyser.groupByMeta( + WorkflowOncoanalyser.restoreMeta(GATK4_MARKDUPLICATES.out.bam, ch_inputs), + WorkflowOncoanalyser.restoreMeta(GATK4_MARKDUPLICATES.out.bai, ch_inputs), + ) + + // Set outputs + // channel: [ meta, bam, bai ] + ch_bam_out = Channel.empty() + .mix( + ch_bams_ready, + ch_inputs_sorted.skip.map { meta -> [meta, [], []] }, + ) + + emit: + rna_tumor = ch_bam_out // channel: [ meta, bam, bai ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/read_processing/main.nf b/subworkflows/local/read_processing/main.nf new file mode 100644 index 00000000..4a0967ca --- /dev/null +++ b/subworkflows/local/read_processing/main.nf @@ -0,0 +1,123 @@ +// +// Apply post-alignment processing +// + +import Constants +import Utils + +include { MARKDUPS } from '../../../modules/local/markdups/main' + +workflow READ_PROCESSING { + take: + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_dna_tumor // channel: [mandatory] [ meta, [bam, ...], [bai, ...] ] + ch_dna_normal // channel: [mandatory] [ meta, [bam, ...], [bai, ...] ] + + // Reference data + genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_ver // channel: [mandatory] genome version + genome_fai // channel: [mandatory] /path/to/genome_fai + genome_dict // channel: [mandatory] /path/to/genome_dict + unmap_regions // channel: [mandatory] /path/to/unmap_regions + + // Params + has_umis // boolean: [mandatory] UMI processing flag + + main: + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Select and sort input sources, separating bytumor and normal + // channel: runnable: [ meta, [bam, ...], [bai, ...] ] + // channel: skip: [ meta ] + ch_inputs_tumor_sorted = ch_dna_tumor + .map { meta, bams, bais -> + return [ + meta, + Utils.hasExistingInput(meta, Constants.INPUT.BAM_DNA_TUMOR) ? [Utils.getInput(meta, Constants.INPUT.BAM_DNA_TUMOR)] : bams, + Utils.hasExistingInput(meta, Constants.INPUT.BAI_DNA_TUMOR) ? [Utils.getInput(meta, Constants.INPUT.BAI_DNA_TUMOR)] : bais, + ] + } + .branch { meta, bams, bais -> + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.BAM_MARKDUPS_DNA_TUMOR) + runnable: bams && !has_existing + skip: true + return meta + } + + ch_inputs_normal_sorted = ch_dna_normal + .map { meta, bams, bais -> + return [ + meta, + Utils.hasExistingInput(meta, Constants.INPUT.BAM_DNA_NORMAL) ? [Utils.getInput(meta, Constants.INPUT.BAM_DNA_NORMAL)] : bams, + Utils.hasExistingInput(meta, Constants.INPUT.BAI_DNA_NORMAL) ? [Utils.getInput(meta, Constants.INPUT.BAI_DNA_NORMAL)] : bais, + ] + } + .branch { meta, bams, bais -> + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.BAM_MARKDUPS_DNA_NORMAL) + runnable: bams && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_markdups, [bam, ...], [bai, ...] ] + ch_markdups_inputs = Channel.empty() + .mix( + ch_inputs_tumor_sorted.runnable.map { meta, bams, bais -> [meta, Utils.getTumorDnaSample(meta), 'tumor', bams, bais] }, + ch_inputs_normal_sorted.runnable.map { meta, bams, bais -> [meta, Utils.getNormalDnaSample(meta), 'normal', bams, bais] }, + ) + .map { meta, meta_sample, sample_type, bams, bais -> + + def meta_markdups = [ + key: meta.group_id, + id: "${meta.group_id}_${meta_sample.sample_id}", + sample_id: meta_sample.sample_id, + sample_type: sample_type, + ] + + return [meta_markdups, bams, bais] + } + + // Run process + MARKDUPS( + ch_markdups_inputs, + genome_fasta, + genome_ver, + genome_fai, + genome_dict, + unmap_regions, + has_umis, + ) + + // Sort into a tumor and normal channel + ch_markdups_out = MARKDUPS.out.bam + .branch { meta_markdups, bam, bai -> + assert ['tumor', 'normal'].contains(meta_markdups.sample_type) + tumor: meta_markdups.sample_type == 'tumor' + normal: meta_markdups.sample_type == 'normal' + placeholder: true + } + + // Set outputs, restoring original meta + // channel: [ meta, bam, bai ] + ch_bam_tumor_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(ch_markdups_out.tumor, ch_inputs), + ch_inputs_tumor_sorted.skip.map { meta -> [meta, [], []] }, + ) + + ch_bam_normal_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(ch_markdups_out.normal, ch_inputs), + ch_inputs_normal_sorted.skip.map { meta -> [meta, [], []] }, + ) + + emit: + dna_tumor = ch_bam_tumor_out // channel: [ meta, bam, bai ] + dna_normal = ch_bam_normal_out // channel: [ meta, bam, bai ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/sage_append/main.nf b/subworkflows/local/sage_append/main.nf new file mode 100644 index 00000000..a7ded0be --- /dev/null +++ b/subworkflows/local/sage_append/main.nf @@ -0,0 +1,172 @@ +// +// SAGE append adds WTS data to an existing SAGE VCF +// + +import Constants +import Utils + +include { SAGE_APPEND as SOMATIC } from '../../../modules/local/sage/append/main' +include { SAGE_APPEND as GERMLINE } from '../../../modules/local/sage/append/main' + +workflow SAGE_APPEND { + take: + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_tumor_rna_bam // channel: [mandatory] [ meta, bam, bai ] + ch_purple_dir // channel: [mandatory] [ meta, purple_dir ] + + // Reference data + genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_version // channel: [mandatory] genome version + genome_fai // channel: [mandatory] /path/to/genome_fai + genome_dict // channel: [mandatory] /path/to/genome_dict + + main: + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Select input sources and sort + // channel: runnable: [ meta, tumor_bam, tumor_bai, purple_dir ] + // channel: skip: [ meta ] + ch_inputs_sorted = WorkflowOncoanalyser.groupByMeta( + ch_tumor_rna_bam, + ch_purple_dir, + ) + .map { meta, tumor_bam, tumor_bai, purple_dir -> + return [ + meta, + Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_RNA_TUMOR), + Utils.selectCurrentOrExisting(tumor_bai, meta, Constants.INPUT.BAI_RNA_TUMOR), + Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR), + ] + } + .branch { meta, tumor_bam, tumor_bai, purple_dir -> + runnable: tumor_bam && purple_dir + skip: true + return meta + } + + // + // MODULE: SAGE append germline + // + // Select inputs that are eligible to run + // channel: runnable: [ meta, tumor_bam, tumor_bai, purple_dir ] + // channel: skip: [ meta ] + ch_inputs_germline_sorted = ch_inputs_sorted.runnable + .branch { meta, tumor_bam, tumor_bai, purple_dir -> + + def tumor_dna_id = Utils.getTumorDnaSampleName(meta) + + def has_normal_dna = Utils.hasNormalDna(meta) + def has_tumor_rna = Utils.hasTumorRna(meta) + def has_smlv_germline = file(purple_dir).resolve("${tumor_dna_id}.purple.germline.vcf.gz") + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.SAGE_APPEND_VCF_NORMAL) + + runnable: has_normal_dna && has_tumor_rna && has_smlv_germline && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_append, purple_smlv_vcf, tumor_bam, tumor_bai ] + ch_sage_append_germline_inputs = ch_inputs_germline_sorted.runnable + .map { meta, tumor_bam, tumor_bai, purple_dir -> + + def tumor_dna_id = Utils.getTumorDnaSampleName(meta) + + def meta_append = [ + key: meta.group_id, + id: meta.group_id, + tumor_rna_id: Utils.getTumorRnaSampleName(meta), + dna_id: Utils.getNormalDnaSampleName(meta), + ] + + def purple_smlv_vcf = file(purple_dir).resolve("${tumor_dna_id}.purple.germline.vcf.gz") + + return [meta_append, purple_smlv_vcf, tumor_bam, tumor_bai] + } + + // Run process + GERMLINE( + ch_sage_append_germline_inputs, + genome_fasta, + genome_version, + genome_fai, + genome_dict, + ) + + ch_versions = ch_versions.mix(GERMLINE.out.versions) + + // + // MODULE: SAGE append somatic + // + // Select inputs that are eligible to run + // channel: runnable: [ meta, tumor_bam, tumor_bai, purple_dir ] + // channel: skip: [ meta ] + ch_inputs_somatic_sorted = ch_inputs_sorted.runnable + .branch { meta, tumor_bam, tumor_bai, purple_dir -> + def tumor_dna_id = Utils.getTumorDnaSampleName(meta) + + def has_tumor_dna = Utils.hasTumorDna(meta) + def has_tumor_rna = Utils.hasTumorRna(meta) + def has_smlv_somatic = file(purple_dir).resolve("${tumor_dna_id}.purple.somatic.vcf.gz") + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.SAGE_APPEND_VCF_TUMOR) + + runnable: has_tumor_dna && has_tumor_rna && has_smlv_somatic && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_append, purple_smlv_vcf, tumor_bam, tumor_bai ] + ch_sage_append_somatic_inputs = ch_inputs_somatic_sorted.runnable + .map { meta, tumor_bam, tumor_bai, purple_dir -> + + def tumor_dna_id = Utils.getTumorDnaSampleName(meta) + + def meta_append = [ + key: meta.group_id, + id: meta.group_id, + tumor_rna_id: Utils.getTumorRnaSampleName(meta), + dna_id: Utils.getTumorDnaSampleName(meta), + ] + + def purple_smlv_vcf = file(purple_dir).resolve("${tumor_dna_id}.purple.somatic.vcf.gz") + + return [meta_append, purple_smlv_vcf, tumor_bam, tumor_bai] + } + + // Run process + SOMATIC( + ch_sage_append_somatic_inputs, + genome_fasta, + genome_version, + genome_fai, + genome_dict, + ) + + ch_versions = ch_versions.mix(SOMATIC.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, sage_append_vcf ] + ch_somatic_vcf = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(SOMATIC.out.vcf, ch_inputs), + ch_inputs_somatic_sorted.skip.map { meta -> [meta, []] }, + ch_inputs_sorted.skip.map { meta -> [meta, []] }, + ) + + ch_germline_vcf = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(GERMLINE.out.vcf, ch_inputs), + ch_inputs_germline_sorted.skip.map { meta -> [meta, []] }, + ch_inputs_sorted.skip.map { meta -> [meta, []] }, + ) + + emit: + somatic_vcf = ch_somatic_vcf // channel: [ meta, sage_append_vcf ] + germline_vcf = ch_germline_vcf // channel: [ meta, sage_append_vcf ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/sage_calling/main.nf b/subworkflows/local/sage_calling/main.nf new file mode 100644 index 00000000..9cc81222 --- /dev/null +++ b/subworkflows/local/sage_calling/main.nf @@ -0,0 +1,197 @@ +// +// SAGE is a precise and highly sensitive somatic SNV, MNV and small INDEL caller +// + +import Constants +import Utils + +include { SAGE_GERMLINE as GERMLINE } from '../../../modules/local/sage/germline/main' +include { SAGE_SOMATIC as SOMATIC } from '../../../modules/local/sage/somatic/main' + +workflow SAGE_CALLING { + take: + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_tumor_bam // channel: [mandatory] [ meta, bam, bai ] + ch_normal_bam // channel: [mandatory] [ meta, bam, bai ] + + // Reference data + genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_version // channel: [mandatory] genome version + genome_fai // channel: [mandatory] /path/to/genome_fai + genome_dict // channel: [mandatory] /path/to/genome_dict + sage_known_hotspots_somatic // channel: [mandatory] /path/to/sage_known_hotspots_somatic + sage_known_hotspots_germline // channel: [optional] /path/to/sage_known_hotspots_germline + sage_actionable_panel // channel: [mandatory] /path/to/sage_actionable_panel + sage_coverage_panel // channel: [mandatory] /path/to/sage_coverage_panel + sage_highconf_regions // channel: [mandatory] /path/to/sage_highconf_regions + segment_mappability // channel: [mandatory] /path/to/segment_mappability + driver_gene_panel // channel: [mandatory] /path/to/driver_gene_panel + ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ + + main: + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Sort inputs + // channel: runnable: [ meta, tumor_bam, tumor_bai, normal_bam, normal_bai ] + // channel: skip: [ meta ] + ch_inputs_sorted = WorkflowOncoanalyser.groupByMeta( + ch_tumor_bam, + ch_normal_bam, + ) + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + return [ + meta, + Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_TUMOR), + tumor_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_TUMOR), + Utils.selectCurrentOrExisting(normal_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_NORMAL), + normal_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_NORMAL), + ] + } + .branch { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + runnable: tumor_bam + skip: true + return meta + } + + // + // MODULE: SAGE germline + // + // Select inputs that are eligible to run + // channel: runnable: [ meta, tumor_bam, tumor_bai, normal_bam, normal_bai ] + // channel: skip: [ meta ] + ch_inputs_germline_sorted = ch_inputs_sorted.runnable + .branch { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + def has_tumor_normal = tumor_bam && normal_bam + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.SAGE_VCF_NORMAL) + + runnable: has_tumor_normal && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_sage, tumor_bam, normal_bam, tumor_bai, normal_bai ] + ch_sage_germline_inputs = ch_inputs_germline_sorted.runnable + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + + def meta_sage = [ + key: meta.group_id, + id: meta.group_id, + tumor_id: Utils.getTumorDnaSampleName(meta), + normal_id: Utils.getNormalDnaSampleName(meta), + ] + + return [meta_sage, tumor_bam, normal_bam, tumor_bai, normal_bai] + } + + // Run process + GERMLINE( + ch_sage_germline_inputs, + genome_fasta, + genome_version, + genome_fai, + genome_dict, + sage_known_hotspots_germline, + sage_actionable_panel, + sage_coverage_panel, + sage_highconf_regions, + ensembl_data_resources, + ) + + ch_versions = ch_versions.mix(GERMLINE.out.versions) + + // + // MODULE: SAGE somatic + // + // Select inputs that are eligible to run + // channel: runnable: [ meta, tumor_bam, tumor_bai, normal_bam, normal_bai ] + // channel: skip: [ meta ] + ch_inputs_somatic_sorted = ch_inputs_sorted.runnable + .branch { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + def has_tumor = tumor_bam + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.SAGE_VCF_TUMOR) + + runnable: has_tumor && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: tumor/normal: [ meta_sage, tumor_bam, normal_bam, tumor_bai, normal_bai ] + // channel: tumor only: [ meta_sage, tumor_bam, [], tumor_bai, [] ] + ch_sage_somatic_inputs = ch_inputs_somatic_sorted.runnable + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + + def meta_sage = [ + key: meta.group_id, + id: meta.group_id, + tumor_id: Utils.getTumorDnaSampleName(meta), + ] + + if (normal_bam) { + meta_sage.normal_id = Utils.getNormalDnaSampleName(meta) + } + + return [meta_sage, tumor_bam, normal_bam, tumor_bai, normal_bai] + } + + // Run process + SOMATIC( + ch_sage_somatic_inputs, + genome_fasta, + genome_version, + genome_fai, + genome_dict, + sage_known_hotspots_somatic, + sage_actionable_panel, + sage_coverage_panel, + sage_highconf_regions, + ensembl_data_resources, + ) + + ch_versions = ch_versions.mix(SOMATIC.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, sage_vcf, sage_tbi ] + ch_somatic_vcf_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(SOMATIC.out.vcf, ch_inputs), + ch_inputs_somatic_sorted.skip.map { meta -> [meta, [], []] }, + ch_inputs_sorted.skip.map { meta -> [meta, [], []] }, + ) + + // channel: [ meta, sage_vcf, sage_tbi ] + ch_germline_vcf_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(GERMLINE.out.vcf, ch_inputs), + ch_inputs_germline_sorted.skip.map { meta -> [meta, [], []] }, + ch_inputs_sorted.skip.map { meta -> [meta, [], []] }, + ) + + // channel: [ meta, sage_dir ] + ch_somatic_dir = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(SOMATIC.out.sage_dir, ch_inputs), + ch_inputs_somatic_sorted.skip.map { meta -> [meta, []] }, + ch_inputs_sorted.skip.map { meta -> [meta, []] }, + ) + + // channel: [ meta, sage_dir ] + ch_germline_dir = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(GERMLINE.out.sage_dir, ch_inputs), + ch_inputs_germline_sorted.skip.map { meta -> [meta, []] }, + ch_inputs_sorted.skip.map { meta -> [meta, []] }, + ) + + emit: + germline_vcf = ch_germline_vcf_out // channel: [ meta, sage_vcf, sage_tbi ] + somatic_vcf = ch_somatic_vcf_out // channel: [ meta, sage_vcf, sage_tbi ] + germline_dir = ch_germline_dir // channel: [ meta, sage_dir ] + somatic_dir = ch_somatic_dir // channel: [ meta, sage_dir ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/sigs_fitting/main.nf b/subworkflows/local/sigs_fitting/main.nf new file mode 100644 index 00000000..240ebc35 --- /dev/null +++ b/subworkflows/local/sigs_fitting/main.nf @@ -0,0 +1,91 @@ +// +// Sigs fits trinucleotide signature definitions with sample SNV counts +// + +import Constants +import Utils + +include { SIGS } from '../../../modules/local/sigs/main' + +workflow SIGS_FITTING { + take: + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_purple // channel: [mandatory] [ meta, purple_dir ] + + // Reference data + sigs_signatures // channel: [mandatory] /path/to/sigs_signatures + + main: + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Select input sources + // channel: [ meta, purple_dir ] + ch_inputs_selected = ch_purple + .map { meta, purple_dir -> + return [meta, Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR)] + } + + // Sort inputs + // channel: runnable: [ meta, purple_dir ] + // channel: skip: [ meta ] + ch_inputs_sorted = ch_inputs_selected + .branch { meta, purple_dir -> + + def has_dna = Utils.hasTumorDna(meta) + + def tumor_id + def has_smlv_vcf + if (has_dna) { + tumor_id = Utils.getTumorDnaSampleName(meta) + has_smlv_vcf = purple_dir ? file(purple_dir).resolve("${tumor_id}.purple.somatic.vcf.gz") : [] + } + + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.SIGS_DIR) + + runnable: has_dna && purple_dir && has_smlv_vcf && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_sigs, smlv_vcf ] + ch_sigs_inputs = ch_inputs_sorted.runnable + .map { meta, purple_dir -> + + def tumor_id = Utils.getTumorDnaSampleName(meta) + + def meta_sigs = [ + key: meta.group_id, + id: meta.group_id, + sample_id: tumor_id, + ] + + def smlv_vcf = file(purple_dir).resolve("${tumor_id}.purple.somatic.vcf.gz") + + return [meta_sigs, smlv_vcf] + } + + // Run process + SIGS( + ch_sigs_inputs, + sigs_signatures, + ) + + ch_versions = ch_versions.mix(SIGS.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, sigs_dir ] + ch_outputs = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(SIGS.out.sigs_dir, ch_inputs), + ch_inputs_sorted.skip.map { meta -> [meta, []] }, + ) + + emit: + sigs_dir = ch_outputs // channel: [ meta, sigs_dir ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/utils_nfcore_oncoanalyser_pipeline/main.nf b/subworkflows/local/utils_nfcore_oncoanalyser_pipeline/main.nf new file mode 100644 index 00000000..f36242b9 --- /dev/null +++ b/subworkflows/local/utils_nfcore_oncoanalyser_pipeline/main.nf @@ -0,0 +1,241 @@ +// +// Subworkflow with functionality specific to the nf-core/oncoanalyser pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { fromSamplesheet } from 'plugin/nf-validation' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' +include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' + +/* +======================================================================================== + SUBWORKFLOW TO INITIALISE PIPELINE +======================================================================================== +*/ + +workflow PIPELINE_INITIALISATION { + + take: + version // boolean: Display version and exit + help // boolean: Display help text + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + + main: + + ch_versions = Channel.empty() + + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE ( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + ) + + // + // Validate parameters and generate parameter summary to stdout + // + pre_help_text = nfCoreLogo(monochrome_logs) + post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) + def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " + UTILS_NFVALIDATION_PLUGIN ( + help, + workflow_command, + pre_help_text, + post_help_text, + validate_params, + "nextflow_schema.json" + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE ( + nextflow_cli_args + ) + // + // Custom validation for pipeline parameters + // + validateInputParameters() + + emit: + versions = ch_versions +} + +/* +======================================================================================== + SUBWORKFLOW FOR PIPELINE COMPLETION +======================================================================================== +*/ + +workflow PIPELINE_COMPLETION { + + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + + main: + + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + + // + // Completion email and summary + // + workflow.onComplete { + if (email || email_on_fail) { + completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs, multiqc_report.toList()) + } + + completionSummary(monochrome_logs) + + if (hook_url) { + imNotification(summary_params, hook_url) + } + } + + workflow.onError { + log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + } +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ +// +// Check and validate pipeline parameters +// +def validateInputParameters() { + genomeExistsError() +} + +// +// Validate channels from input samplesheet +// +def validateInputSamplesheet(input) { + def (metas, fastqs) = input[1..2] + + // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end + def endedness_ok = metas.collect{ it.single_end }.unique().size == 1 + if (!endedness_ok) { + error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") + } + + return [ metas[0], fastqs ] +} +// +// Get attribute from genome config file e.g. fasta +// +def getGenomeAttribute(attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null +} + +// +// Exit pipeline if incorrect --genome key provided +// +def genomeExistsError() { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + error(error_string) + } +} + +// +// Generate methods description for MultiQC +// +def toolCitationText() { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " The toolCitationText function is not currently implemented and should not be used." + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + error(error_string) + + /* + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text + */ +} + +def toolBibliographyText() { + // TODO nf-core: Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text +} + +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + if (meta.manifest_map.doi) { + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + def temp_doi_ref = "" + String[] manifest_doi = meta.manifest_map.doi.tokenize(",") + for (String doi_ref: manifest_doi) temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " + meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) + } else meta["doi_text"] = "" + meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + // meta["tool_bibliography"] = toolBibliographyText() + + + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} diff --git a/subworkflows/local/virusbreakend_calling/main.nf b/subworkflows/local/virusbreakend_calling/main.nf new file mode 100644 index 00000000..bd73af0e --- /dev/null +++ b/subworkflows/local/virusbreakend_calling/main.nf @@ -0,0 +1,154 @@ +// +// VIRUSBreakend and Virus Interpreter identify viral content and insertion sites +// + +import Constants +import Utils + +include { VIRUSBREAKEND } from '../../../modules/local/virusbreakend/main' +include { VIRUSINTERPRETER } from '../../../modules/local/virusinterpreter/main' + +workflow VIRUSBREAKEND_CALLING { + take: + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_tumor_bam // channel: [mandatory] [ meta, bam, bai ] + ch_purple // channel: [mandatory] [ meta, purple_dir ] + ch_bamtools_somatic // channel: [mandatory] [ meta, metrics ] + + // Reference data + genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_fai // channel: [mandatory] /path/to/genome_fai + genome_dict // channel: [mandatory] /path/to/genome_dict + genome_gridss_index // channel: [mandatory] /path/to/genome_gridss_index + virusbreakenddb // channel: [mandatory] /path/to/virusbreakenddb/ + virus_taxonomy_db // channel: [mandatory] /path/to/virus_taxonomy_db + virus_reporting_db // channel: [mandatory] /path/to/virus_reporting_db + + // Params + gridss_config // channel: [optional] /path/to/gridss_config + + main: + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Sort inputs + // NOTE(SW): VIRUSBreakend inputs are not allowed in the samplesheet, so aren't considered + // channel: [ meta, tumor_bam, tumor_bai ] + ch_inputs_sorted = ch_tumor_bam + .map { meta, tumor_bam, tumor_bai -> + return [ + meta, + Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_TUMOR), + Utils.selectCurrentOrExisting(tumor_bai, meta, Constants.INPUT.BAI_DNA_TUMOR), + ] + } + .branch { meta, tumor_bam, tumor_bai -> + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.VIRUSINTERPRETER_DIR) + runnable: tumor_bam && !has_existing + skip: true + return meta + } + + // + // MODULE: VIRUSBreakend + // + // Create process input channel + // channel: [ meta_virus, tumor_bam ] + ch_virusbreakend_inputs = ch_inputs_sorted.runnable + .map { meta, tumor_bam, tumor_bai -> + + def meta_virus = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorDnaSampleName(meta), + ] + + return [meta_virus, tumor_bam] + } + + // Run process + VIRUSBREAKEND( + ch_virusbreakend_inputs, + genome_fasta, + genome_fai, + genome_dict, + genome_gridss_index, + virusbreakenddb, + gridss_config, + ) + + ch_versions = ch_versions.mix(VIRUSBREAKEND.out.versions) + + // + // MODULE: Virus Interpreter + // + // Select input sources + // channel: [ meta, virus_tsv, purple_dir, metrics ] + ch_virusinterpreter_inputs_selected = WorkflowOncoanalyser.groupByMeta( + WorkflowOncoanalyser.restoreMeta(VIRUSBREAKEND.out.tsv, ch_inputs), + ch_purple, + ch_bamtools_somatic, + ) + .map { meta, virus_tsv, purple_dir, metrics -> + + def inputs = [ + virus_tsv, + Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR), + Utils.selectCurrentOrExisting(metrics, meta, Constants.INPUT.BAMTOOLS_TUMOR), + ] + + return [meta, *inputs] + } + + // Sort inputs + // channel: [ meta, virus_tsv, purple_dir, metrics ] + // channel: skip: [ meta ] + ch_virusinterpreter_inputs_sorted = ch_virusinterpreter_inputs_selected + .branch { meta, virus_tsv, purple_dir, metrics -> + runnable: virus_tsv && purple_dir && metrics + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_virus, virus_tsv, purple_dir, metrics ] + ch_virusinterpreter_inputs = ch_virusinterpreter_inputs_sorted.runnable + .map { d -> + + def meta = d[0] + def inputs = d[1..-1] + + def meta_virus = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorDnaSampleName(meta), + ] + + return [meta_virus, *inputs] + } + + // Run process + VIRUSINTERPRETER( + ch_virusinterpreter_inputs, + virus_taxonomy_db, + virus_reporting_db, + ) + + ch_versions = ch_versions.mix(VIRUSINTERPRETER.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, virusinterpreter_dir ] + ch_outputs = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(VIRUSINTERPRETER.out.virusinterpreter_dir, ch_inputs), + ch_virusinterpreter_inputs_sorted.skip.map { meta -> [meta, []] }, + ch_inputs_sorted.skip.map { meta -> [meta, []] }, + ) + + emit: + virusinterpreter_dir = ch_outputs // channel: [ meta, virusinterpreter_dir ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf new file mode 100644 index 00000000..ac31f28f --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -0,0 +1,126 @@ +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info "${workflow.manifest.name} ${getWorkflowVersion()}" + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = JsonOutput.toJson(params) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } + + if (channels_missing | channel_priority_violation) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml new file mode 100644 index 00000000..e5c3a0a8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..68718e4f --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -0,0 +1,54 @@ + +nextflow_function { + + name "Test Functions" + script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Test Function getWorkflowVersion") { + + function "getWorkflowVersion" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dumpParametersToJSON") { + + function "dumpParametersToJSON" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = "$outputDir" + """.stripIndent() + } + } + + then { + assertAll( + { assert function.success } + ) + } + } + + test("Test Function checkCondaChannels") { + + function "checkCondaChannels" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..e3f0baf4 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,20 @@ +{ + "Test Function getWorkflowVersion": { + "content": [ + "v9.9.9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:05.308243" + }, + "Test Function checkCondaChannels": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:12.425833" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..ca964ce8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,111 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NEXTFLOW_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + workflow "UTILS_NEXTFLOW_PIPELINE" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Should run no inputs") { + + when { + workflow { + """ + print_version = false + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should print version") { + + when { + workflow { + """ + print_version = true + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.stdout.contains("nextflow_workflow v9.9.9") } + ) + } + } + + test("Should dump params") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = 'results' + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should not create params JSON if no output directory") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = null + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config new file mode 100644 index 00000000..d0a926bf --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml new file mode 100644 index 00000000..f8476112 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nextflow_pipeline: + - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf new file mode 100644 index 00000000..14558c39 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -0,0 +1,446 @@ +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +import org.yaml.snakeyaml.Yaml +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFCORE_PIPELINE { + + take: + nextflow_cli_args + + main: + valid_config = checkConfigProvided() + checkProfileProvided(nextflow_cli_args) + + emit: + valid_config +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + valid_config = true + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + + "Please refer to the quick start section and usage docs for the pipeline.\n " + valid_config = false + } + return valid_config +} + +// +// Exit pipeline if --profile contains spaces +// +def checkProfileProvided(nextflow_cli_args) { + if (workflow.profile.endsWith(',')) { + error "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } + if (nextflow_cli_args[0]) { + log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } +} + +// +// Citation string for pipeline +// +def workflowCitation() { + def temp_doi_ref = "" + String[] manifest_doi = workflow.manifest.doi.tokenize(",") + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + for (String doi_ref: manifest_doi) temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + temp_doi_ref + "\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + Yaml yaml = new Yaml() + versions = yaml.load(yaml_file).collectEntries { k, v -> [ k.tokenize(':')[-1], v ] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + $workflow.manifest.name: ${getWorkflowVersion()} + Nextflow: $workflow.nextflow.version + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions + .unique() + .map { processVersionsFromYAML(it) } + .unique() + .mix(Channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + for (group in summary_params.keySet()) { + def group_params = summary_params.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    $group

    \n" + summary_section += "
    \n" + for (param in group_params.keySet()) { + summary_section += "
    $param
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// nf-core logo +// +def nfCoreLogo(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) +} + +// +// Return dashed line +// +def dashedLine(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// +// Attach the multiqc report to email +// +def attachMultiqcReport(multiqc_report) { + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + if (multiqc_report) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + } + return mqc_report +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = attachMultiqcReport(multiqc_report) + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = logColours(monochrome_logs) + if (email_address) { + try { + if (plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } +} + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml new file mode 100644 index 00000000..d08d2434 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - nextflow_cli_args: + type: list + description: | + Nextflow CLI positional arguments +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..1dc317f8 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -0,0 +1,134 @@ + +nextflow_function { + + name "Test Functions" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Test Function checkConfigProvided") { + + function "checkConfigProvided" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function checkProfileProvided") { + + function "checkProfileProvided" + + when { + function { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function workflowCitation") { + + function "workflowCitation" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function nfCoreLogo") { + + function "nfCoreLogo" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dashedLine") { + + function "dashedLine" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function without logColours") { + + function "logColours" + + when { + function { + """ + input[0] = true + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function with logColours") { + function "logColours" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..1037232c --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,166 @@ +{ + "Test Function checkProfileProvided": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:03.360873" + }, + "Test Function checkConfigProvided": { + "content": [ + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:59.729647" + }, + "Test Function nfCoreLogo": { + "content": [ + "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:10.562934" + }, + "Test Function workflowCitation": { + "content": [ + "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:07.019761" + }, + "Test Function without logColours": { + "content": [ + { + "reset": "", + "bold": "", + "dim": "", + "underlined": "", + "blink": "", + "reverse": "", + "hidden": "", + "black": "", + "red": "", + "green": "", + "yellow": "", + "blue": "", + "purple": "", + "cyan": "", + "white": "", + "bblack": "", + "bred": "", + "bgreen": "", + "byellow": "", + "bblue": "", + "bpurple": "", + "bcyan": "", + "bwhite": "", + "ublack": "", + "ured": "", + "ugreen": "", + "uyellow": "", + "ublue": "", + "upurple": "", + "ucyan": "", + "uwhite": "", + "iblack": "", + "ired": "", + "igreen": "", + "iyellow": "", + "iblue": "", + "ipurple": "", + "icyan": "", + "iwhite": "", + "biblack": "", + "bired": "", + "bigreen": "", + "biyellow": "", + "biblue": "", + "bipurple": "", + "bicyan": "", + "biwhite": "" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:17.969323" + }, + "Test Function dashedLine": { + "content": [ + "-\u001b[2m----------------------------------------------------\u001b[0m-" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:14.366181" + }, + "Test Function with logColours": { + "content": [ + { + "reset": "\u001b[0m", + "bold": "\u001b[1m", + "dim": "\u001b[2m", + "underlined": "\u001b[4m", + "blink": "\u001b[5m", + "reverse": "\u001b[7m", + "hidden": "\u001b[8m", + "black": "\u001b[0;30m", + "red": "\u001b[0;31m", + "green": "\u001b[0;32m", + "yellow": "\u001b[0;33m", + "blue": "\u001b[0;34m", + "purple": "\u001b[0;35m", + "cyan": "\u001b[0;36m", + "white": "\u001b[0;37m", + "bblack": "\u001b[1;30m", + "bred": "\u001b[1;31m", + "bgreen": "\u001b[1;32m", + "byellow": "\u001b[1;33m", + "bblue": "\u001b[1;34m", + "bpurple": "\u001b[1;35m", + "bcyan": "\u001b[1;36m", + "bwhite": "\u001b[1;37m", + "ublack": "\u001b[4;30m", + "ured": "\u001b[4;31m", + "ugreen": "\u001b[4;32m", + "uyellow": "\u001b[4;33m", + "ublue": "\u001b[4;34m", + "upurple": "\u001b[4;35m", + "ucyan": "\u001b[4;36m", + "uwhite": "\u001b[4;37m", + "iblack": "\u001b[0;90m", + "ired": "\u001b[0;91m", + "igreen": "\u001b[0;92m", + "iyellow": "\u001b[0;93m", + "iblue": "\u001b[0;94m", + "ipurple": "\u001b[0;95m", + "icyan": "\u001b[0;96m", + "iwhite": "\u001b[0;97m", + "biblack": "\u001b[1;90m", + "bired": "\u001b[1;91m", + "bigreen": "\u001b[1;92m", + "biyellow": "\u001b[1;93m", + "biblue": "\u001b[1;94m", + "bipurple": "\u001b[1;95m", + "bicyan": "\u001b[1;96m", + "biwhite": "\u001b[1;97m" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:21.714424" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..8940d32d --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,29 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + when { + workflow { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap new file mode 100644 index 00000000..859d1030 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -0,0 +1,19 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:25.726491" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config new file mode 100644 index 00000000..d0a926bf --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml new file mode 100644 index 00000000..ac8523c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfcore_pipeline: + - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf new file mode 100644 index 00000000..2585b65d --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf @@ -0,0 +1,62 @@ +// +// Subworkflow that uses the nf-validation plugin to render help text and parameter summary +// + +/* +======================================================================================== + IMPORT NF-VALIDATION PLUGIN +======================================================================================== +*/ + +include { paramsHelp } from 'plugin/nf-validation' +include { paramsSummaryLog } from 'plugin/nf-validation' +include { validateParameters } from 'plugin/nf-validation' + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFVALIDATION_PLUGIN { + + take: + print_help // boolean: print help + workflow_command // string: default commmand used to run pipeline + pre_help_text // string: string to be printed before help text and summary log + post_help_text // string: string to be printed after help text and summary log + validate_params // boolean: validate parameters + schema_filename // path: JSON schema file, null to use default value + + main: + + log.debug "Using schema file: ${schema_filename}" + + // Default values for strings + pre_help_text = pre_help_text ?: '' + post_help_text = post_help_text ?: '' + workflow_command = workflow_command ?: '' + + // + // Print help message if needed + // + if (print_help) { + log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text + System.exit(0) + } + + // + // Print parameter summary to stdout + // + log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text + + // + // Validate parameters relative to the parameter JSON schema + // + if (validate_params){ + validateParameters(parameters_schema: schema_filename) + } + + emit: + dummy_emit = true +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml new file mode 100644 index 00000000..3d4a6b04 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml @@ -0,0 +1,44 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFVALIDATION_PLUGIN" +description: Use nf-validation to initiate and validate a pipeline +keywords: + - utility + - pipeline + - initialise + - validation +components: [] +input: + - print_help: + type: boolean + description: | + Print help message and exit + - workflow_command: + type: string + description: | + The command to run the workflow e.g. "nextflow run main.nf" + - pre_help_text: + type: string + description: | + Text to print before the help message + - post_help_text: + type: string + description: | + Text to print after the help message + - validate_params: + type: boolean + description: | + Validate the parameters and error if invalid. + - schema_filename: + type: string + description: | + The filename of the schema to validate against. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test new file mode 100644 index 00000000..5784a33f --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test @@ -0,0 +1,200 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFVALIDATION_PLUGIN" + script "../main.nf" + workflow "UTILS_NFVALIDATION_PLUGIN" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "plugin/nf-validation" + tag "'plugin/nf-validation'" + tag "utils_nfvalidation_plugin" + tag "subworkflows/utils_nfvalidation_plugin" + + test("Should run nothing") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should run help") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with command") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with extra text") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = "pre-help-text" + post_help_text = "post-help-text" + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('pre-help-text') } }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } }, + { assert workflow.stdout.any { it.contains('post-help-text') } } + ) + } + } + + test("Should validate params") { + + when { + + params { + monochrome_logs = true + test_data = '' + outdir = 1 + } + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = true + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ ERROR: Validation of pipeline parameters failed!') } } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json new file mode 100644 index 00000000..7626c1c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", + "title": ". pipeline parameters", + "description": "", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], + "properties": { + "validate_params": { + "type": "boolean", + "description": "Validate parameters?", + "default": true, + "hidden": true + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules", + "description": "Base for test data directory", + "hidden": true + }, + "test_data": { + "type": "string", + "description": "Fake test data param", + "hidden": true + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "logo": { + "type": "boolean", + "default": true, + "description": "Display nf-core logo in console output.", + "fa_icon": "fas fa-image", + "hidden": true + }, + "singularity_pull_docker_container": { + "type": "boolean", + "description": "Pull Singularity container from Docker?", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Use monochrome_logs", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/generic_options" + } + ] +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml new file mode 100644 index 00000000..60b1cfff --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfvalidation_plugin: + - subworkflows/nf-core/utils_nfvalidation_plugin/** diff --git a/tower.yml b/tower.yml new file mode 100644 index 00000000..787aedfe --- /dev/null +++ b/tower.yml @@ -0,0 +1,5 @@ +reports: + multiqc_report.html: + display: "MultiQC HTML report" + samplesheet.csv: + display: "Auto-created samplesheet with collated metadata and FASTQ paths" diff --git a/workflows/oncoanalyser.nf b/workflows/oncoanalyser.nf deleted file mode 100644 index 50bc12ad..00000000 --- a/workflows/oncoanalyser.nf +++ /dev/null @@ -1,133 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) - -// Validate input parameters -WorkflowOncoanalyser.initialise(params, log) - -// TODO nf-core: Add all file path parameters for the pipeline to the list below -// Check input path parameters to see if they exist -def checkPathParamList = [ params.input, params.multiqc_config, params.fasta ] -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } - -// Check mandatory parameters -if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CONFIG FILES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() -ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() -ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules -// -include { INPUT_CHECK } from '../subworkflows/local/input_check' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT NF-CORE MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// MODULE: Installed directly from nf-core/modules -// -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// Info required for completion email and summary -def multiqc_report = [] - -workflow ONCOANALYSER { - - ch_versions = Channel.empty() - - // - // SUBWORKFLOW: Read in samplesheet, validate and stage input files - // - INPUT_CHECK ( - ch_input - ) - ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - - // - // MODULE: Run FastQC - // - FASTQC ( - INPUT_CHECK.out.reads - ) - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) - - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) - - // - // MODULE: MultiQC - // - workflow_summary = WorkflowOncoanalyser.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - methods_description = WorkflowOncoanalyser.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description) - ch_methods_description = Channel.value(methods_description) - - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - - MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.collect().ifEmpty([]), - ch_multiqc_custom_config.collect().ifEmpty([]), - ch_multiqc_logo.collect().ifEmpty([]) - ) - multiqc_report = MULTIQC.out.report.toList() - ch_versions = ch_versions.mix(MULTIQC.out.versions) -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } - NfcoreTemplate.summary(workflow, params, log) - if (params.hook_url) { - NfcoreTemplate.adaptivecard(workflow, params, summary_params, projectDir, log) - } -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ diff --git a/workflows/targeted.nf b/workflows/targeted.nf new file mode 100644 index 00000000..f0b5649d --- /dev/null +++ b/workflows/targeted.nf @@ -0,0 +1,692 @@ +import Constants +import Processes +import Utils + + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE INPUTS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ +// Parse input samplesheet +// NOTE(SW): this is done early and outside of gpars so that we can access synchronously and prior to pipeline execution +inputs = Utils.parseInput(params.input, workflow.stubRun, log) + +// Get run config +run_config = WorkflowMain.getRunConfig(params, inputs, log) + +// Validate inputs +Utils.validateInput(inputs, run_config, params, log) + +// Check input path parameters to see if they exist +def checkPathParamList = [ + params.isofox_counts, + params.isofox_gc_ratios, + params.isofox_gene_ids, + params.isofox_tpm_norm, +] + +// Conditional requirements +if (run_config.stages.gridss) { + if (params.gridss_config !== null) { + checkPathParamList.add(params.gridss_config) + } +} + +if (run_config.stages.lilac) { + if (params.genome_version.toString() == '38' && params.genome_type == 'alt' && params.containsKey('ref_data_hla_slice_bed')) { + checkPathParamList.add(params.ref_data_hla_slice_bed) + } +} + +// TODO(SW): consider whether we should check for null entries here for errors to be more informative +for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } + +// Check mandatory parameters +if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' + +include { AMBER_PROFILING } from '../subworkflows/local/amber_profiling' +include { BAMTOOLS_METRICS } from '../subworkflows/local/bamtools_metrics' +include { COBALT_PROFILING } from '../subworkflows/local/cobalt_profiling' +include { FLAGSTAT_METRICS } from '../subworkflows/local/flagstat_metrics' +include { GRIDSS_SVPREP_CALLING } from '../subworkflows/local/gridss_svprep_calling' +include { GRIPSS_FILTERING } from '../subworkflows/local/gripss_filtering' +include { ISOFOX_QUANTIFICATION } from '../subworkflows/local/isofox_quantification' +include { LILAC_CALLING } from '../subworkflows/local/lilac_calling' +include { LINX_ANNOTATION } from '../subworkflows/local/linx_annotation' +include { LINX_PLOTTING } from '../subworkflows/local/linx_plotting' +include { ORANGE_REPORTING } from '../subworkflows/local/orange_reporting' +include { PAVE_ANNOTATION } from '../subworkflows/local/pave_annotation' +include { PREPARE_REFERENCE } from '../subworkflows/local/prepare_reference' +include { PURPLE_CALLING } from '../subworkflows/local/purple_calling' +include { READ_ALIGNMENT_DNA } from '../subworkflows/local/read_alignment_dna' +include { READ_ALIGNMENT_RNA } from '../subworkflows/local/read_alignment_rna' +include { READ_PROCESSING } from '../subworkflows/local/read_processing' +include { SAGE_APPEND } from '../subworkflows/local/sage_append' +include { SAGE_CALLING } from '../subworkflows/local/sage_calling' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// Get absolute file paths +samplesheet = Utils.getFileObject(params.input) + +workflow TARGETED { + // Create channel for versions + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Create input channel from parsed CSV + // channel: [ meta ] + ch_inputs = Channel.fromList(inputs) + + // Set up reference data, assign more human readable variables + PREPARE_REFERENCE( + run_config, + ) + ref_data = PREPARE_REFERENCE.out + hmf_data = PREPARE_REFERENCE.out.hmf_data + panel_data = PREPARE_REFERENCE.out.panel_data + + // Set GRIDSS config + gridss_config = params.gridss_config !== null ? file(params.gridss_config) : hmf_data.gridss_config + + // + // SUBWORKFLOW: Run read alignment to generate BAMs + // + // channel: [ meta, [bam, ...], [bai, ...] ] + ch_align_dna_tumor_out = Channel.empty() + ch_align_dna_normal_out = Channel.empty() + ch_align_rna_tumor_out = Channel.empty() + if (run_config.stages.alignment) { + + READ_ALIGNMENT_DNA( + ch_inputs, + ref_data.genome_fasta, + ref_data.genome_bwamem2_index, + params.max_fastq_records, + ) + + READ_ALIGNMENT_RNA( + ch_inputs, + ref_data.genome_star_index, + ) + + ch_versions = ch_versions.mix( + READ_ALIGNMENT_DNA.out.versions, + READ_ALIGNMENT_RNA.out.versions, + ) + + ch_align_dna_tumor_out = ch_align_dna_tumor_out.mix(READ_ALIGNMENT_DNA.out.dna_tumor) + ch_align_dna_normal_out = ch_align_dna_normal_out.mix(READ_ALIGNMENT_DNA.out.dna_normal) + ch_align_rna_tumor_out = ch_align_rna_tumor_out.mix(READ_ALIGNMENT_RNA.out.rna_tumor) + + } else { + + ch_align_dna_tumor_out = ch_inputs.map { meta -> [meta, [], []] } + ch_align_dna_normal_out = ch_inputs.map { meta -> [meta, [], []] } + ch_align_rna_tumor_out = ch_inputs.map { meta -> [meta, [], []] } + + } + + // + // SUBWORKFLOW: Run MarkDups for DNA BAMs + // + // channel: [ meta, bam, bai ] + ch_process_dna_tumor_out = Channel.empty() + ch_process_dna_normal_out = Channel.empty() + if (run_config.stages.markdups) { + + has_umis = run_config.panel.equalsIgnoreCase('tso500') + + READ_PROCESSING( + ch_inputs, + ch_align_dna_tumor_out, + ch_align_dna_normal_out, + ref_data.genome_fasta, + ref_data.genome_version, + ref_data.genome_fai, + ref_data.genome_dict, + hmf_data.unmap_regions, + has_umis, + ) + + ch_versions = ch_versions.mix(READ_PROCESSING.out.versions) + + ch_process_dna_tumor_out = ch_process_dna_tumor_out.mix(READ_PROCESSING.out.dna_tumor) + ch_process_dna_normal_out = ch_process_dna_normal_out.mix(READ_PROCESSING.out.dna_normal) + + } else { + + ch_process_dna_tumor_out = ch_inputs.map { meta -> [meta, [], []] } + ch_process_dna_normal_out = ch_inputs.map { meta -> [meta, [], []] } + + } + + // + // MODULE: Run Isofox to analyse RNA data + // + // channel: [ meta, isofox_dir ] + ch_isofox_out = Channel.empty() + if (run_config.stages.isofox) { + + isofox_counts = params.isofox_counts ? file(params.isofox_counts) : panel_data.isofox_counts + isofox_gc_ratios = params.isofox_gc_ratios ? file(params.isofox_gc_ratios) : panel_data.isofox_gc_ratios + isofox_read_length = params.isofox_read_length !== null ? params.isofox_read_length : Constants.DEFAULT_ISOFOX_READ_LENGTH_TARGETED + + isofox_gene_ids = params.isofox_gene_ids ? file(params.isofox_gene_ids) : panel_data.isofox_gene_ids + isofox_tpm_norm = params.isofox_tpm_norm ? file(params.isofox_tpm_norm) : panel_data.isofox_tpm_norm + + ISOFOX_QUANTIFICATION( + ch_inputs, + ch_align_rna_tumor_out, + ref_data.genome_fasta, + ref_data.genome_version, + ref_data.genome_fai, + hmf_data.ensembl_data_resources, + isofox_counts, + isofox_gc_ratios, + isofox_gene_ids, + isofox_tpm_norm, + params.isofox_functions, + isofox_read_length, + ) + + ch_versions = ch_versions.mix(ISOFOX_QUANTIFICATION.out.versions) + + ch_isofox_out = ch_isofox_out.mix(ISOFOX_QUANTIFICATION.out.isofox_dir) + + } else { + + ch_isofox_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Run AMBER to obtain b-allele frequencies + // + // channel: [ meta, amber_dir ] + ch_amber_out = Channel.empty() + if (run_config.stages.amber) { + + AMBER_PROFILING( + ch_inputs, + ch_process_dna_tumor_out, + ch_process_dna_normal_out, + ref_data.genome_version, + hmf_data.heterozygous_sites, + panel_data.target_region_bed, + ) + + ch_versions = ch_versions.mix(AMBER_PROFILING.out.versions) + ch_amber_out = ch_amber_out.mix(AMBER_PROFILING.out.amber_dir) + + } else { + + ch_amber_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Run COBALT to obtain read ratios + // + // channel: [ meta, cobalt_dir ] + ch_cobalt_out = Channel.empty() + if (run_config.stages.cobalt) { + + COBALT_PROFILING( + ch_inputs, + ch_process_dna_tumor_out, + ch_process_dna_normal_out, + hmf_data.gc_profile, + hmf_data.diploid_bed, + panel_data.target_region_normalisation, + ) + + ch_versions = ch_versions.mix(COBALT_PROFILING.out.versions) + + ch_cobalt_out = ch_cobalt_out.mix(COBALT_PROFILING.out.cobalt_dir) + + } else { + + ch_cobalt_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Call structural variants with GRIDSS + // + // channel: [ meta, gridss_vcf ] + ch_gridss_out = Channel.empty() + if (run_config.stages.gridss) { + + GRIDSS_SVPREP_CALLING( + ch_inputs, + ch_process_dna_tumor_out, + ch_process_dna_normal_out, + ref_data.genome_fasta, + ref_data.genome_version, + ref_data.genome_fai, + ref_data.genome_dict, + ref_data.genome_gridss_index, + hmf_data.gridss_region_blocklist, + hmf_data.sv_prep_blocklist, + hmf_data.known_fusions, + gridss_config, + ) + + ch_versions = ch_versions.mix(GRIDSS_SVPREP_CALLING.out.versions) + + ch_gridss_out = ch_gridss_out.mix(GRIDSS_SVPREP_CALLING.out.vcf) + + } else { + + ch_gridss_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Run GRIPSS to filter GRIDSS SV calls + // + // channel: [ meta, vcf, tbi ] + ch_gripss_somatic_out = Channel.empty() + ch_gripss_germline_out = Channel.empty() + ch_gripss_somatic_unfiltered_out = Channel.empty() + if (run_config.stages.gripss) { + + GRIPSS_FILTERING( + ch_inputs, + ch_gridss_out, + ref_data.genome_fasta, + ref_data.genome_version, + ref_data.genome_fai, + hmf_data.gridss_pon_breakends, + hmf_data.gridss_pon_breakpoints, + hmf_data.known_fusions, + hmf_data.repeatmasker_annotations, + panel_data.target_region_bed, + ) + + ch_versions = ch_versions.mix(GRIPSS_FILTERING.out.versions) + + ch_gripss_somatic_out = ch_gripss_somatic_out.mix(GRIPSS_FILTERING.out.somatic) + ch_gripss_germline_out = ch_gripss_germline_out.mix(GRIPSS_FILTERING.out.germline) + ch_gripss_somatic_unfiltered_out = ch_gripss_somatic_unfiltered_out.mix(GRIPSS_FILTERING.out.somatic_unfiltered) + + } else { + + ch_gripss_somatic_out = ch_inputs.map { meta -> [meta, [], []] } + ch_gripss_germline_out = ch_inputs.map { meta -> [meta, [], []] } + ch_gripss_somatic_unfiltered_out = ch_inputs.map { meta -> [meta, [], []] } + + } + + // + // SUBWORKFLOW: call SNV, MNV, and small INDELS with SAGE + // + // channel: [ meta, sage_vcf, sage_tbi ] + ch_sage_germline_vcf_out = Channel.empty() + ch_sage_somatic_vcf_out = Channel.empty() + // channel: [ meta, sage_dir ] + ch_sage_germline_dir_out = Channel.empty() + ch_sage_somatic_dir_out = Channel.empty() + if (run_config.stages.sage) { + + SAGE_CALLING( + ch_inputs, + ch_process_dna_tumor_out, + ch_process_dna_normal_out, + ref_data.genome_fasta, + ref_data.genome_version, + ref_data.genome_fai, + ref_data.genome_dict, + hmf_data.sage_known_hotspots_somatic, + [], // sage_known_hotspots_germline + panel_data.sage_actionable_panel, + panel_data.sage_coverage_panel, + hmf_data.sage_highconf_regions, + hmf_data.segment_mappability, + panel_data.driver_gene_panel, + hmf_data.ensembl_data_resources, + ) + + ch_versions = ch_versions.mix(SAGE_CALLING.out.versions) + + ch_sage_germline_vcf_out = ch_sage_germline_vcf_out.mix(SAGE_CALLING.out.germline_vcf) + ch_sage_somatic_vcf_out = ch_sage_somatic_vcf_out.mix(SAGE_CALLING.out.somatic_vcf) + ch_sage_germline_dir_out = ch_sage_germline_dir_out.mix(SAGE_CALLING.out.germline_dir) + ch_sage_somatic_dir_out = ch_sage_somatic_dir_out.mix(SAGE_CALLING.out.somatic_dir) + + } else { + + ch_sage_germline_vcf_out = ch_inputs.map { meta -> [meta, [], []] } + ch_sage_somatic_vcf_out = ch_inputs.map { meta -> [meta, [], []] } + ch_sage_germline_dir_out = ch_inputs.map { meta -> [meta, []] } + ch_sage_somatic_dir_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Annotate variants with PAVE + // + // channel: [ meta, pave_vcf ] + ch_pave_germline_out = Channel.empty() + ch_pave_somatic_out = Channel.empty() + if (run_config.stages.pave) { + + PAVE_ANNOTATION( + ch_inputs, + ch_sage_germline_vcf_out, + ch_sage_somatic_vcf_out, + ref_data.genome_fasta, + ref_data.genome_version, + ref_data.genome_fai, + hmf_data.sage_pon, + panel_data.pon_artefacts, + hmf_data.sage_blocklist_regions, + hmf_data.sage_blocklist_sites, + hmf_data.clinvar_annotations, + hmf_data.segment_mappability, + panel_data.driver_gene_panel, + hmf_data.ensembl_data_resources, + hmf_data.gnomad_resource, + ) + + ch_versions = ch_versions.mix(PAVE_ANNOTATION.out.versions) + + ch_pave_germline_out = ch_pave_germline_out.mix(PAVE_ANNOTATION.out.germline) + ch_pave_somatic_out = ch_pave_somatic_out.mix(PAVE_ANNOTATION.out.somatic) + + } else { + + ch_pave_germline_out = ch_inputs.map { meta -> [meta, []] } + ch_pave_somatic_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Call CNVs, infer purity and ploidy, and recover low quality SVs with PURPLE + // + // channel: [ meta, purple_dir ] + ch_purple_out = Channel.empty() + if (run_config.stages.purple) { + + PURPLE_CALLING( + ch_inputs, + ch_amber_out, + ch_cobalt_out, + ch_pave_somatic_out, + ch_pave_germline_out, + ch_gripss_somatic_out, + ch_gripss_germline_out, + ch_gripss_somatic_unfiltered_out, + ref_data.genome_fasta, + ref_data.genome_version, + ref_data.genome_fai, + ref_data.genome_dict, + hmf_data.gc_profile, + hmf_data.sage_known_hotspots_somatic, + [], // sage_known_hotspots_germline + panel_data.driver_gene_panel, + hmf_data.ensembl_data_resources, + [], // purple_germline_del + panel_data.target_region_bed, + panel_data.target_region_ratios, + panel_data.target_region_msi_indels, + ) + + ch_versions = ch_versions.mix(PURPLE_CALLING.out.versions) + + ch_purple_out = ch_purple_out.mix(PURPLE_CALLING.out.purple_dir) + + } else { + + ch_purple_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Append RNA data to SAGE VCF + // + // channel: [ meta, sage_append_vcf ] + ch_sage_somatic_append_out = Channel.empty() + ch_sage_germline_append_out = Channel.empty() + if (run_config.stages.orange) { + + // NOTE(SW): currently used only for ORANGE but will also be used for Neo once implemented + + SAGE_APPEND( + ch_inputs, + ch_align_rna_tumor_out, + ch_purple_out, + ref_data.genome_fasta, + ref_data.genome_version, + ref_data.genome_fai, + ref_data.genome_dict, + ) + + ch_versions = ch_versions.mix(SAGE_APPEND.out.versions) + + ch_sage_somatic_append_out = ch_sage_somatic_append_out.mix(SAGE_APPEND.out.somatic_vcf) + ch_sage_germline_append_out = ch_sage_germline_append_out.mix(SAGE_APPEND.out.germline_vcf) + + } else { + + ch_sage_somatic_append_out = ch_inputs.map { meta -> [meta, []] } + ch_sage_germline_append_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Group structural variants into higher order events with LINX + // + // channel: [ meta, linx_annotation_dir ] + ch_linx_somatic_out = Channel.empty() + ch_linx_germline_out = Channel.empty() + if (run_config.stages.linx) { + + LINX_ANNOTATION( + ch_inputs, + ch_purple_out, + ref_data.genome_version, + hmf_data.ensembl_data_resources, + hmf_data.known_fusion_data, + panel_data.driver_gene_panel, + ) + + ch_versions = ch_versions.mix(LINX_ANNOTATION.out.versions) + + ch_linx_somatic_out = ch_linx_somatic_out.mix(LINX_ANNOTATION.out.somatic) + ch_linx_germline_out = ch_linx_germline_out.mix(LINX_ANNOTATION.out.germline) + + } else { + + ch_linx_somatic_out = ch_inputs.map { meta -> [meta, []] } + ch_linx_germline_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Visualise LINX annotations + // + // channel: [ meta, linx_visualiser_dir ] + ch_linx_somatic_visualiser_dir_out = Channel.empty() + if (run_config.stages.linx) { + + LINX_PLOTTING( + ch_inputs, + ch_linx_somatic_out, + ref_data.genome_version, + hmf_data.ensembl_data_resources, + ) + + ch_versions = ch_versions.mix(LINX_PLOTTING.out.versions) + + ch_linx_somatic_visualiser_dir_out = ch_linx_somatic_visualiser_dir_out.mix(LINX_PLOTTING.out.visualiser_dir) + + } else { + + ch_linx_somatic_visualiser_dir_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Run SAMtools flagstat to generate stats required for ORANGE + // + // channel: [ meta, metrics ] + ch_flagstat_somatic_out = Channel.empty() + ch_flagstat_germline_out = Channel.empty() + if (run_config.stages.orange && run_config.stages.flagstat) { + + FLAGSTAT_METRICS( + ch_inputs, + ch_process_dna_tumor_out, + ch_process_dna_normal_out, + ) + + ch_versions = ch_versions.mix(FLAGSTAT_METRICS.out.versions) + + ch_flagstat_somatic_out = ch_flagstat_somatic_out.mix(FLAGSTAT_METRICS.out.somatic) + ch_flagstat_germline_out = ch_flagstat_germline_out.mix(FLAGSTAT_METRICS.out.germline) + + } else { + + ch_flagstat_somatic_out = ch_inputs.map { meta -> [meta, []] } + ch_flagstat_germline_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Run Bam Tools to generate stats required for downstream processes + // + // channel: [ meta, metrics ] + ch_bamtools_somatic_out = Channel.empty() + ch_bamtools_germline_out = Channel.empty() + if (run_config.stages.bamtools) { + + BAMTOOLS_METRICS( + ch_inputs, + ch_process_dna_tumor_out, + ch_process_dna_normal_out, + ref_data.genome_fasta, + ref_data.genome_version, + ) + + ch_versions = ch_versions.mix(BAMTOOLS_METRICS.out.versions) + + ch_bamtools_somatic_out = ch_bamtools_somatic_out.mix(BAMTOOLS_METRICS.out.somatic) + ch_bamtools_germline_out = ch_bamtools_germline_out.mix(BAMTOOLS_METRICS.out.germline) + + } else { + + ch_bamtools_somatic_out = ch_inputs.map { meta -> [meta, []] } + ch_bamtools_germline_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Run LILAC for HLA typing and somatic CNV and SNV calling + // + // channel: [ meta, lilac_dir ] + ch_lilac_out = Channel.empty() + if (run_config.stages.lilac) { + + // Set HLA slice BED if provided in params + ref_data_hla_slice_bed = params.containsKey('ref_data_hla_slice_bed') ? params.ref_data_hla_slice_bed : [] + + LILAC_CALLING( + ch_inputs, + ch_process_dna_tumor_out, + ch_process_dna_normal_out, + ch_align_rna_tumor_out, + ch_purple_out, + ref_data.genome_fasta, + ref_data.genome_version, + ref_data.genome_fai, + hmf_data.lilac_resources, + ref_data_hla_slice_bed, + ) + + ch_versions = ch_versions.mix(LILAC_CALLING.out.versions) + + ch_lilac_out = ch_lilac_out.mix(LILAC_CALLING.out.lilac_dir) + + } else { + + ch_lilac_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Run ORANGE to generate static PDF report + // + if (run_config.stages.orange) { + + // Create placeholder channels for empty remaining channels + ch_chord_out = ch_inputs.map { meta -> [meta, []] } + ch_cuppa_out = ch_inputs.map { meta -> [meta, []] } + ch_sigs_out = ch_inputs.map { meta -> [meta, []] } + ch_virusinterpreter_out = ch_inputs.map { meta -> [meta, []] } + + ORANGE_REPORTING( + ch_inputs, + ch_bamtools_somatic_out, + ch_bamtools_germline_out, + ch_flagstat_somatic_out, + ch_flagstat_germline_out, + ch_sage_somatic_dir_out, + ch_sage_germline_dir_out, + ch_sage_somatic_append_out, + ch_sage_germline_append_out, + ch_purple_out, + ch_linx_somatic_out, + ch_linx_somatic_visualiser_dir_out, + ch_linx_germline_out, + ch_virusinterpreter_out, + ch_chord_out, + ch_sigs_out, + ch_lilac_out, + ch_cuppa_out, + ch_isofox_out, + ref_data.genome_version, + hmf_data.disease_ontology, + hmf_data.cohort_mapping, + hmf_data.cohort_percentiles, + hmf_data.known_fusion_data, + panel_data.driver_gene_panel, + hmf_data.ensembl_data_resources, + hmf_data.alt_sj_distribution, + hmf_data.gene_exp_distribution, + ) + + ch_versions = ch_versions.mix(ORANGE_REPORTING.out.versions) + } + + // + // TASK: Aggregate software versions + // + softwareVersionsToYAML(ch_versions) + .collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'software_versions.yml', + sort: true, + newLine: true, + ) +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ diff --git a/workflows/wgts.nf b/workflows/wgts.nf new file mode 100644 index 00000000..9facc5ac --- /dev/null +++ b/workflows/wgts.nf @@ -0,0 +1,795 @@ +import Constants +import Processes +import Utils + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE INPUTS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ +// Parse input samplesheet +// NOTE(SW): this is done early and outside of gpars so that we can access synchronously and prior to pipeline execution +inputs = Utils.parseInput(params.input, workflow.stubRun, log) + +// Get run config +run_config = WorkflowMain.getRunConfig(params, inputs, log) + +// Validate inputs +Utils.validateInput(inputs, run_config, params, log) + +// Check input path parameters to see if they exist +def checkPathParamList = [ + params.isofox_counts, + params.isofox_gc_ratios, +] + +// Conditional requirements +if (run_config.stages.gridss) { + if (params.gridss_config !== null) { + checkPathParamList.add(params.gridss_config) + } +} + +// Mode check required as evaluated regardless of workflow selection +if (run_config.stages.virusinterpreter && run_config.mode !== Constants.RunMode.TARGETED) { + checkPathParamList.add(params.ref_data_virusbreakenddb_path) +} + +if (run_config.stages.lilac) { + if (params.genome_version.toString() == '38' && params.genome_type == 'alt' && params.containsKey('ref_data_hla_slice_bed')) { + checkPathParamList.add(params.ref_data_hla_slice_bed) + } +} + +// TODO(SW): consider whether we should check for null entries here for errors to be more informative +for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } + +// Check mandatory parameters +if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' + +include { AMBER_PROFILING } from '../subworkflows/local/amber_profiling' +include { BAMTOOLS_METRICS } from '../subworkflows/local/bamtools_metrics' +include { CHORD_PREDICTION } from '../subworkflows/local/chord_prediction' +include { COBALT_PROFILING } from '../subworkflows/local/cobalt_profiling' +include { CUPPA_PREDICTION } from '../subworkflows/local/cuppa_prediction' +include { FLAGSTAT_METRICS } from '../subworkflows/local/flagstat_metrics' +include { GRIDSS_SVPREP_CALLING } from '../subworkflows/local/gridss_svprep_calling' +include { GRIPSS_FILTERING } from '../subworkflows/local/gripss_filtering' +include { ISOFOX_QUANTIFICATION } from '../subworkflows/local/isofox_quantification' +include { LILAC_CALLING } from '../subworkflows/local/lilac_calling' +include { LINX_ANNOTATION } from '../subworkflows/local/linx_annotation' +include { LINX_PLOTTING } from '../subworkflows/local/linx_plotting' +include { ORANGE_REPORTING } from '../subworkflows/local/orange_reporting' +include { PAVE_ANNOTATION } from '../subworkflows/local/pave_annotation' +include { PREPARE_REFERENCE } from '../subworkflows/local/prepare_reference' +include { PURPLE_CALLING } from '../subworkflows/local/purple_calling' +include { READ_ALIGNMENT_DNA } from '../subworkflows/local/read_alignment_dna' +include { READ_ALIGNMENT_RNA } from '../subworkflows/local/read_alignment_rna' +include { READ_PROCESSING } from '../subworkflows/local/read_processing' +include { SAGE_APPEND } from '../subworkflows/local/sage_append' +include { SAGE_CALLING } from '../subworkflows/local/sage_calling' +include { SIGS_FITTING } from '../subworkflows/local/sigs_fitting' +include { VIRUSBREAKEND_CALLING } from '../subworkflows/local/virusbreakend_calling' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// Get absolute file paths +samplesheet = Utils.getFileObject(params.input) + +workflow WGTS { + // Create channel for versions + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Create input channel from parsed CSV + // channel: [ meta ] + ch_inputs = Channel.fromList(inputs) + + // Set up reference data, assign more human readable variables + PREPARE_REFERENCE( + run_config, + ) + ref_data = PREPARE_REFERENCE.out + hmf_data = PREPARE_REFERENCE.out.hmf_data + + ch_versions = ch_versions.mix( + PREPARE_REFERENCE.out.versions, + ) + + // Set GRIDSS config + gridss_config = params.gridss_config !== null ? file(params.gridss_config) : hmf_data.gridss_config + + // + // SUBWORKFLOW: Run read alignment to generate BAMs + // + // channel: [ meta, [bam, ...], [bai, ...] ] + ch_align_dna_tumor_out = Channel.empty() + ch_align_dna_normal_out = Channel.empty() + ch_align_rna_tumor_out = Channel.empty() + if (run_config.stages.alignment) { + + READ_ALIGNMENT_DNA( + ch_inputs, + ref_data.genome_fasta, + ref_data.genome_bwamem2_index, + params.max_fastq_records, + ) + + READ_ALIGNMENT_RNA( + ch_inputs, + ref_data.genome_star_index, + ) + + ch_versions = ch_versions.mix( + READ_ALIGNMENT_DNA.out.versions, + READ_ALIGNMENT_RNA.out.versions, + ) + + ch_align_dna_tumor_out = ch_align_dna_tumor_out.mix(READ_ALIGNMENT_DNA.out.dna_tumor) + ch_align_dna_normal_out = ch_align_dna_normal_out.mix(READ_ALIGNMENT_DNA.out.dna_normal) + ch_align_rna_tumor_out = ch_align_rna_tumor_out.mix(READ_ALIGNMENT_RNA.out.rna_tumor) + + } else { + + ch_align_dna_tumor_out = ch_inputs.map { meta -> [meta, [], []] } + ch_align_dna_normal_out = ch_inputs.map { meta -> [meta, [], []] } + ch_align_rna_tumor_out = ch_inputs.map { meta -> [meta, [], []] } + + } + + // + // SUBWORKFLOW: Run MarkDups for DNA BAMs + // + // channel: [ meta, bam, bai ] + ch_process_dna_tumor_out = Channel.empty() + ch_process_dna_normal_out = Channel.empty() + if (run_config.stages.markdups) { + + READ_PROCESSING( + ch_inputs, + ch_align_dna_tumor_out, + ch_align_dna_normal_out, + ref_data.genome_fasta, + ref_data.genome_version, + ref_data.genome_fai, + ref_data.genome_dict, + hmf_data.unmap_regions, + false, // has_umis + ) + + ch_versions = ch_versions.mix(READ_PROCESSING.out.versions) + + ch_process_dna_tumor_out = ch_process_dna_tumor_out.mix(READ_PROCESSING.out.dna_tumor) + ch_process_dna_normal_out = ch_process_dna_normal_out.mix(READ_PROCESSING.out.dna_normal) + + } else { + + ch_process_dna_tumor_out = ch_inputs.map { meta -> [meta, [], []] } + ch_process_dna_normal_out = ch_inputs.map { meta -> [meta, [], []] } + + } + + // + // MODULE: Run Isofox to analyse RNA data + // + // channel: [ meta, isofox_dir ] + ch_isofox_out = Channel.empty() + if (run_config.stages.isofox) { + + isofox_counts = params.isofox_counts ? file(params.isofox_counts) : hmf_data.isofox_counts + isofox_gc_ratios = params.isofox_gc_ratios ? file(params.isofox_gc_ratios) : hmf_data.isofox_gc_ratios + isofox_read_length = params.isofox_read_length !== null ? params.isofox_read_length : Constants.DEFAULT_ISOFOX_READ_LENGTH_WTS + + ISOFOX_QUANTIFICATION( + ch_inputs, + ch_align_rna_tumor_out, + ref_data.genome_fasta, + ref_data.genome_version, + ref_data.genome_fai, + hmf_data.ensembl_data_resources, + isofox_counts, + isofox_gc_ratios, + [], // isofox_gene_ids + [], // isofox_tpm_norm + params.isofox_functions, + isofox_read_length, + ) + + ch_versions = ch_versions.mix(ISOFOX_QUANTIFICATION.out.versions) + + ch_isofox_out = ch_isofox_out.mix(ISOFOX_QUANTIFICATION.out.isofox_dir) + + } else { + + ch_isofox_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Run AMBER to obtain b-allele frequencies + // + // channel: [ meta, amber_dir ] + ch_amber_out = Channel.empty() + if (run_config.stages.amber) { + + AMBER_PROFILING( + ch_inputs, + ch_process_dna_tumor_out, + ch_process_dna_normal_out, + ref_data.genome_version, + hmf_data.heterozygous_sites, + [], // target_region_bed + ) + + ch_versions = ch_versions.mix(AMBER_PROFILING.out.versions) + + ch_amber_out = ch_amber_out.mix(AMBER_PROFILING.out.amber_dir) + + } else { + + ch_amber_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Run COBALT to obtain read ratios + // + // channel: [ meta, cobalt_dir ] + ch_cobalt_out = Channel.empty() + if (run_config.stages.cobalt) { + + COBALT_PROFILING( + ch_inputs, + ch_process_dna_tumor_out, + ch_process_dna_normal_out, + hmf_data.gc_profile, + hmf_data.diploid_bed, + [], // panel_target_region_normalisation + ) + + ch_versions = ch_versions.mix(COBALT_PROFILING.out.versions) + + ch_cobalt_out = ch_cobalt_out.mix(COBALT_PROFILING.out.cobalt_dir) + + } else { + + ch_cobalt_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Call structural variants with GRIDSS + // + // channel: [ meta, gridss_vcf ] + ch_gridss_out = Channel.empty() + if (run_config.stages.gridss) { + + GRIDSS_SVPREP_CALLING( + ch_inputs, + ch_process_dna_tumor_out, + ch_process_dna_normal_out, + ref_data.genome_fasta, + ref_data.genome_version, + ref_data.genome_fai, + ref_data.genome_dict, + ref_data.genome_gridss_index, + hmf_data.gridss_region_blocklist, + hmf_data.sv_prep_blocklist, + hmf_data.known_fusions, + gridss_config, + ) + + ch_versions = ch_versions.mix(GRIDSS_SVPREP_CALLING.out.versions) + + ch_gridss_out = ch_gridss_out.mix(GRIDSS_SVPREP_CALLING.out.vcf) + + } else { + + ch_gridss_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Run GRIPSS to filter GRIDSS SV calls + // + // channel: [ meta, gripss_vcf, gripss_tbi ] + ch_gripss_somatic_out = Channel.empty() + ch_gripss_germline_out = Channel.empty() + ch_gripss_somatic_unfiltered_out = Channel.empty() + if (run_config.stages.gripss) { + + GRIPSS_FILTERING( + ch_inputs, + ch_gridss_out, + ref_data.genome_fasta, + ref_data.genome_version, + ref_data.genome_fai, + hmf_data.gridss_pon_breakends, + hmf_data.gridss_pon_breakpoints, + hmf_data.known_fusions, + hmf_data.repeatmasker_annotations, + [], // target_region_bed + ) + + ch_versions = ch_versions.mix(GRIPSS_FILTERING.out.versions) + + ch_gripss_somatic_out = ch_gripss_somatic_out.mix(GRIPSS_FILTERING.out.somatic) + ch_gripss_germline_out = ch_gripss_germline_out.mix(GRIPSS_FILTERING.out.germline) + ch_gripss_somatic_unfiltered_out = ch_gripss_somatic_unfiltered_out.mix(GRIPSS_FILTERING.out.somatic_unfiltered) + + } else { + + ch_gripss_somatic_out = ch_inputs.map { meta -> [meta, [], []] } + ch_gripss_germline_out = ch_inputs.map { meta -> [meta, [], []] } + ch_gripss_somatic_unfiltered_out = ch_inputs.map { meta -> [meta, [], []] } + + } + + // + // SUBWORKFLOW: Call SNV, MNV, and small INDELS with SAGE + // + // channel: [ meta, sage_vcf, sage_tbi ] + ch_sage_germline_vcf_out = Channel.empty() + ch_sage_somatic_vcf_out = Channel.empty() + // channel: [ meta, sage_dir ] + ch_sage_germline_dir_out = Channel.empty() + ch_sage_somatic_dir_out = Channel.empty() + if (run_config.stages.sage) { + + SAGE_CALLING( + ch_inputs, + ch_process_dna_tumor_out, + ch_process_dna_normal_out, + ref_data.genome_fasta, + ref_data.genome_version, + ref_data.genome_fai, + ref_data.genome_dict, + hmf_data.sage_known_hotspots_somatic, + hmf_data.sage_known_hotspots_germline, + hmf_data.sage_actionable_panel, + hmf_data.sage_coverage_panel, + hmf_data.sage_highconf_regions, + hmf_data.segment_mappability, + hmf_data.driver_gene_panel, + hmf_data.ensembl_data_resources, + ) + + ch_versions = ch_versions.mix(SAGE_CALLING.out.versions) + + ch_sage_germline_vcf_out = ch_sage_germline_vcf_out.mix(SAGE_CALLING.out.germline_vcf) + ch_sage_somatic_vcf_out = ch_sage_somatic_vcf_out.mix(SAGE_CALLING.out.somatic_vcf) + ch_sage_germline_dir_out = ch_sage_germline_dir_out.mix(SAGE_CALLING.out.germline_dir) + ch_sage_somatic_dir_out = ch_sage_somatic_dir_out.mix(SAGE_CALLING.out.somatic_dir) + + } else { + + ch_sage_germline_vcf_out = ch_inputs.map { meta -> [meta, [], []] } + ch_sage_somatic_vcf_out = ch_inputs.map { meta -> [meta, [], []] } + ch_sage_germline_dir_out = ch_inputs.map { meta -> [meta, []] } + ch_sage_somatic_dir_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Annotate variants with PAVE + // + // channel: [ meta, pave_vcf ] + ch_pave_germline_out = Channel.empty() + ch_pave_somatic_out = Channel.empty() + if (run_config.stages.pave) { + + PAVE_ANNOTATION( + ch_inputs, + ch_sage_germline_vcf_out, + ch_sage_somatic_vcf_out, + ref_data.genome_fasta, + ref_data.genome_version, + ref_data.genome_fai, + hmf_data.sage_pon, + [], // sage_pon_artefacts + hmf_data.sage_blocklist_regions, + hmf_data.sage_blocklist_sites, + hmf_data.clinvar_annotations, + hmf_data.segment_mappability, + hmf_data.driver_gene_panel, + hmf_data.ensembl_data_resources, + hmf_data.gnomad_resource, + ) + + ch_versions = ch_versions.mix(PAVE_ANNOTATION.out.versions) + + ch_pave_germline_out = ch_pave_germline_out.mix(PAVE_ANNOTATION.out.germline) + ch_pave_somatic_out = ch_pave_somatic_out.mix(PAVE_ANNOTATION.out.somatic) + + } else { + + ch_pave_germline_out = ch_inputs.map { meta -> [meta, []] } + ch_pave_somatic_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Call CNVs, infer purity and ploidy, and recover low quality SVs with PURPLE + // + // channel: [ meta, purple_dir ] + ch_purple_out = Channel.empty() + if (run_config.stages.purple) { + + PURPLE_CALLING( + ch_inputs, + ch_amber_out, + ch_cobalt_out, + ch_pave_somatic_out, + ch_pave_germline_out, + ch_gripss_somatic_out, + ch_gripss_germline_out, + ch_gripss_somatic_unfiltered_out, + ref_data.genome_fasta, + ref_data.genome_version, + ref_data.genome_fai, + ref_data.genome_dict, + hmf_data.gc_profile, + hmf_data.sage_known_hotspots_somatic, + hmf_data.sage_known_hotspots_germline, + hmf_data.driver_gene_panel, + hmf_data.ensembl_data_resources, + hmf_data.purple_germline_del, + [], // target_region_bed + [], // target_region_ratios + [], // target_region_msi_indels + ) + + ch_versions = ch_versions.mix(PURPLE_CALLING.out.versions) + + ch_purple_out = ch_purple_out.mix(PURPLE_CALLING.out.purple_dir) + + } else { + + ch_purple_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Append RNA data to SAGE VCF + // + // channel: [ meta, sage_append_vcf ] + ch_sage_somatic_append_out = Channel.empty() + ch_sage_germline_append_out = Channel.empty() + if (run_config.stages.orange) { + + // NOTE(SW): currently used only for ORANGE but will also be used for Neo once implemented + + SAGE_APPEND( + ch_inputs, + ch_align_rna_tumor_out, + ch_purple_out, + ref_data.genome_fasta, + ref_data.genome_version, + ref_data.genome_fai, + ref_data.genome_dict, + ) + + ch_versions = ch_versions.mix(SAGE_APPEND.out.versions) + ch_sage_somatic_append_out = ch_sage_somatic_append_out.mix(SAGE_APPEND.out.somatic_vcf) + ch_sage_germline_append_out = ch_sage_germline_append_out.mix(SAGE_APPEND.out.germline_vcf) + + } else { + + ch_sage_somatic_append_out = ch_inputs.map { meta -> [meta, []] } + ch_sage_germline_append_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Group structural variants into higher order events with LINX + // + // channel: [ meta, linx_annotation_dir ] + ch_linx_somatic_out = Channel.empty() + ch_linx_germline_out = Channel.empty() + if (run_config.stages.linx) { + + LINX_ANNOTATION( + ch_inputs, + ch_purple_out, + ref_data.genome_version, + hmf_data.ensembl_data_resources, + hmf_data.known_fusion_data, + hmf_data.driver_gene_panel, + ) + + ch_versions = ch_versions.mix(LINX_ANNOTATION.out.versions) + + ch_linx_somatic_out = ch_linx_somatic_out.mix(LINX_ANNOTATION.out.somatic) + ch_linx_germline_out = ch_linx_germline_out.mix(LINX_ANNOTATION.out.germline) + + } else { + + ch_linx_somatic_out = ch_inputs.map { meta -> [meta, []] } + ch_linx_germline_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Visualise LINX annotations + // + // channel: [ meta, linx_visualiser_dir ] + ch_linx_somatic_visualiser_dir_out = Channel.empty() + if (run_config.stages.linx) { + + LINX_PLOTTING( + ch_inputs, + ch_linx_somatic_out, + ref_data.genome_version, + hmf_data.ensembl_data_resources, + ) + + ch_versions = ch_versions.mix(LINX_PLOTTING.out.versions) + + ch_linx_somatic_visualiser_dir_out = ch_linx_somatic_visualiser_dir_out.mix(LINX_PLOTTING.out.visualiser_dir) + + } else { + + ch_linx_somatic_visualiser_dir_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Run SAMtools flagstat to generate stats required for ORANGE + // + // channel: [ meta, metrics ] + ch_flagstat_somatic_out = Channel.empty() + ch_flagstat_germline_out = Channel.empty() + if (run_config.stages.orange && run_config.stages.flagstat) { + + FLAGSTAT_METRICS( + ch_inputs, + ch_process_dna_tumor_out, + ch_process_dna_normal_out, + ) + + ch_versions = ch_versions.mix(FLAGSTAT_METRICS.out.versions) + + ch_flagstat_somatic_out = ch_flagstat_somatic_out.mix(FLAGSTAT_METRICS.out.somatic) + ch_flagstat_germline_out = ch_flagstat_germline_out.mix(FLAGSTAT_METRICS.out.germline) + + } else { + + ch_flagstat_somatic_out = ch_inputs.map { meta -> [meta, []] } + ch_flagstat_germline_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Run Bam Tools to generate stats required for downstream processes + // + // channel: [ meta, metrics ] + ch_bamtools_somatic_out = Channel.empty() + ch_bamtools_germline_out = Channel.empty() + if (run_config.stages.bamtools) { + + BAMTOOLS_METRICS( + ch_inputs, + ch_process_dna_tumor_out, + ch_process_dna_normal_out, + ref_data.genome_fasta, + ref_data.genome_version, + ) + + ch_versions = ch_versions.mix(BAMTOOLS_METRICS.out.versions) + + ch_bamtools_somatic_out = ch_bamtools_somatic_out.mix(BAMTOOLS_METRICS.out.somatic) + ch_bamtools_germline_out = ch_bamtools_germline_out.mix(BAMTOOLS_METRICS.out.germline) + + } else { + + ch_bamtools_somatic_out = ch_inputs.map { meta -> [meta, []] } + ch_bamtools_germline_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Run Sigs to fit somatic smlv to signature definitions + // + // channel: [ meta, sigs_dir ] + ch_sigs_out = Channel.empty() + if (run_config.stages.sigs) { + + SIGS_FITTING( + ch_inputs, + ch_purple_out, + hmf_data.sigs_signatures, + ) + + ch_versions = ch_versions.mix(SIGS_FITTING.out.versions) + + ch_sigs_out = ch_sigs_out.mix(SIGS_FITTING.out.sigs_dir) + + } else { + + ch_sigs_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Run CHORD to predict HR deficiency status + // + // channel: [ meta, chord_dir ] + ch_chord_out = Channel.empty() + if (run_config.stages.chord) { + + CHORD_PREDICTION( + ch_inputs, + ch_purple_out, + ref_data.genome_version, + ) + + ch_versions = ch_versions.mix(CHORD_PREDICTION.out.versions) + + ch_chord_out = ch_chord_out.mix(CHORD_PREDICTION.out.chord_dir) + + } else { + + ch_chord_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Run LILAC for HLA typing and somatic CNV and SNV calling + // + // channel: [ meta, lilac_dir ] + ch_lilac_out = Channel.empty() + if (run_config.stages.lilac) { + + // Use HLA slice BED if provided in params or set as default requirement + ref_data_hla_slice_bed = params.containsKey('ref_data_hla_slice_bed') ? params.ref_data_hla_slice_bed : [] + + LILAC_CALLING( + ch_inputs, + ch_process_dna_tumor_out, + ch_process_dna_normal_out, + ch_align_rna_tumor_out, + ch_purple_out, + ref_data.genome_fasta, + ref_data.genome_version, + ref_data.genome_fai, + hmf_data.lilac_resources, + ref_data_hla_slice_bed, + ) + + ch_versions = ch_versions.mix(LILAC_CALLING.out.versions) + + ch_lilac_out = ch_lilac_out.mix(LILAC_CALLING.out.lilac_dir) + + } else { + + ch_lilac_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Run VIRUSBreakend and Virus Interpreter to quantify viral content + // + // channel: [ meta, virusinterpreter_dir ] + ch_virusinterpreter_out = Channel.empty() + if (run_config.stages.virusinterpreter) { + + VIRUSBREAKEND_CALLING( + ch_inputs, + ch_process_dna_tumor_out, + ch_purple_out, + ch_bamtools_somatic_out, + ref_data.genome_fasta, + ref_data.genome_fai, + ref_data.genome_dict, + ref_data.genome_gridss_index, + ref_data.virusbreakenddb, + hmf_data.virus_taxonomy_db, + hmf_data.virus_reporting_db, + gridss_config, + ) + + ch_versions = ch_versions.mix(VIRUSBREAKEND_CALLING.out.versions) + + ch_virusinterpreter_out = ch_virusinterpreter_out.mix(VIRUSBREAKEND_CALLING.out.virusinterpreter_dir) + + } else { + + ch_virusinterpreter_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Run CUPPA predict tissue of origin + // + // channel: [ meta, cuppa_dir ] + ch_cuppa_out = Channel.empty() + if (run_config.stages.cuppa) { + + CUPPA_PREDICTION( + ch_inputs, + ch_isofox_out, + ch_purple_out, + ch_linx_somatic_out, + ch_virusinterpreter_out, + ref_data.genome_version, + hmf_data.cuppa_resources, + ) + + ch_versions = ch_versions.mix(CUPPA_PREDICTION.out.versions) + + ch_cuppa_out = ch_cuppa_out.mix(CUPPA_PREDICTION.out.cuppa_dir) + + } else { + + ch_cuppa_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Run ORANGE to generate static PDF report + // + if (run_config.stages.orange) { + + ORANGE_REPORTING( + ch_inputs, + ch_bamtools_somatic_out, + ch_bamtools_germline_out, + ch_flagstat_somatic_out, + ch_flagstat_germline_out, + ch_sage_somatic_dir_out, + ch_sage_germline_dir_out, + ch_sage_somatic_append_out, + ch_sage_germline_append_out, + ch_purple_out, + ch_linx_somatic_out, + ch_linx_somatic_visualiser_dir_out, + ch_linx_germline_out, + ch_virusinterpreter_out, + ch_chord_out, + ch_sigs_out, + ch_lilac_out, + ch_cuppa_out, + ch_isofox_out, + ref_data.genome_version, + hmf_data.disease_ontology, + hmf_data.cohort_mapping, + hmf_data.cohort_percentiles, + hmf_data.known_fusion_data, + hmf_data.driver_gene_panel, + hmf_data.ensembl_data_resources, + hmf_data.alt_sj_distribution, + hmf_data.gene_exp_distribution, + ) + + ch_versions = ch_versions.mix(ORANGE_REPORTING.out.versions) + } + + // + // TASK: Aggregate software versions + // + softwareVersionsToYAML(ch_versions) + .collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'software_versions.yml', + sort: true, + newLine: true, + ) +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/