diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml new file mode 100644 index 00000000..ed7650c7 --- /dev/null +++ b/.github/workflows/pkgdown.yaml @@ -0,0 +1,48 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + release: + types: [published] + workflow_dispatch: + +name: pkgdown + +jobs: + pkgdown: + runs-on: ubuntu-latest + # Only restrict concurrency for non-PR jobs + concurrency: + group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::pkgdown, local::. + needs: website + + - name: Build site + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) + shell: Rscript {0} + + - name: Deploy to GitHub pages 🚀 + if: github.event_name != 'pull_request' + uses: JamesIves/github-pages-deploy-action@v4.4.1 + with: + clean: false + branch: gh-pages + folder: docs diff --git a/.gitignore b/.gitignore index 24ec0037..4654003f 100644 --- a/.gitignore +++ b/.gitignore @@ -28,3 +28,4 @@ inst/extdata/COSMIC_v3.4_SBS_GRCh37.rds inst/extdata/COSMIC_v3.4_SV_GRCh38.rds inst/extdata/human_T2T_gene_info.rds inst/extdata/ce11_gene_info.rds +docs diff --git a/DESCRIPTION b/DESCRIPTION index b6a4debe..e0a31f2d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: sigminer Title: Extract, Analyze and Visualize Mutational Signatures for Genomic Variations -Version: 2.2.2 +Version: 2.3.0 Authors@R: c( person("Shixiang", "Wang", , "w_shixiang@163.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-9855-7357")), @@ -26,7 +26,7 @@ Description: Genomic alterations including single nucleotide substitution, extract, analyze and visualize signatures from genomic alteration records, thus providing new insight into cancer study. License: MIT + file LICENSE -URL: https://github.com/ShixiangWang/sigminer +URL: https://github.com/ShixiangWang/sigminer, https://shixiangwang.github.io/sigminer/, https://shixiangwang.github.io/sigminer-book/ BugReports: https://github.com/ShixiangWang/sigminer/issues Depends: R (>= 3.5) diff --git a/R/data.R b/R/data.R index d59e6df2..dcfd4ceb 100644 --- a/R/data.R +++ b/R/data.R @@ -19,6 +19,15 @@ NULL #' data(centromeres.hg38) NULL +#' Location of Centromeres at Genome Build T2T +#' @docType data +#' @name centromeres.T2T +#' @format A data.frame +#' @source from T2T study +#' @examples +#' data(centromeres.T2T) +NULL + #' Location of Centromeres at Genome Build mm10 #' @docType data #' @name centromeres.mm10 @@ -62,6 +71,15 @@ NULL #' data(chromsize.hg38) NULL +#' Chromosome Size of Genome Build T2T +#' @docType data +#' @name chromsize.T2T +#' @format A data.frame +#' @source from T2T study +#' @examples +#' data(chromsize.T2T) +NULL + #' Chromosome Size of Genome Build mm10 #' @docType data #' @name chromsize.mm10 @@ -96,6 +114,15 @@ NULL #' data(cytobands.hg38) NULL +#' Location of Chromosome Cytobands at Genome Build T2T +#' @docType data +#' @name cytobands.T2T +#' @format A data.frame +#' @source from T2T study +#' @examples +#' data(cytobands.T2T) +NULL + #' Location of Chromosome Cytobands at Genome Build mm10 #' @docType data #' @name cytobands.mm10 @@ -130,6 +157,15 @@ NULL #' data(transcript.hg38) NULL +#' Merged Transcript Location at Genome Build T2T +#' @docType data +#' @name transcript.T2T +#' @format A `data.table` +#' @source from T2T study. +#' @examples +#' data(transcript.T2T) +NULL + #' Merged Transcript Location at Genome Build mm10 #' @docType data #' @name transcript.mm10 diff --git a/_pkgdown.yml b/_pkgdown.yml index 4a77b734..9b69d80e 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -9,9 +9,6 @@ template: primary: "#0054AD" border-radius: 0.5rem btn-border-radius: 0.25rem - # params: - # bootswatch: united -destination: docs authors: Shixiang Wang: @@ -126,18 +123,22 @@ reference: - CN.features - centromeres.hg19 - centromeres.hg38 + - centromeres.T2T - centromeres.mm10 - centromeres.mm9 - chromsize.hg19 - chromsize.hg38 + - chromsize.T2T - chromsize.mm10 - - - chromsize.mm9 + - chromsize.mm9 - cytobands.hg19 - cytobands.hg38 + - cytobands.T2T - cytobands.mm10 - cytobands.mm9 - transcript.hg19 - transcript.hg38 + - transcript.T2T - transcript.mm10 - transcript.mm9 - title: Helpers diff --git a/docs/404.html b/docs/404.html deleted file mode 100644 index 60a20e21..00000000 --- a/docs/404.html +++ /dev/null @@ -1,125 +0,0 @@ - - - - - - - -Page not found (404) • sigminer - - - - - - - - - - - - - - - - - - -
-
- - - - -
-
- - -Content not found. Please use links in the navbar. - -
- - - -
- - - - -
- - - - - - - - diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html deleted file mode 100644 index 1469ea1f..00000000 --- a/docs/LICENSE-text.html +++ /dev/null @@ -1,92 +0,0 @@ - -License • sigminer - - -
-
- - - -
-
- - -
YEAR: 2019
-COPYRIGHT HOLDER: Shixiang Wang, Xue-Song Liu
-
- -
- - - -
- - - -
- - - - - - - - diff --git a/docs/LICENSE.html b/docs/LICENSE.html deleted file mode 100644 index 6a3559f4..00000000 --- a/docs/LICENSE.html +++ /dev/null @@ -1,97 +0,0 @@ - -MIT License • sigminer - - -
-
- - - -
-
- - -
- -

Copyright (c) 2019 Shixiang Wang, Xue-Song Liu

-

Copyright (c) 2018 Geoffrey Macintyre, Anand Mayakonda

-

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

-

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

-

THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

-
- -
- - - -
- - - -
- - - - - - - - diff --git a/docs/apple-touch-icon-120x120.png b/docs/apple-touch-icon-120x120.png deleted file mode 100644 index 6d84805b..00000000 Binary files a/docs/apple-touch-icon-120x120.png and /dev/null differ diff --git a/docs/apple-touch-icon-152x152.png b/docs/apple-touch-icon-152x152.png deleted file mode 100644 index 5c43980d..00000000 Binary files a/docs/apple-touch-icon-152x152.png and /dev/null differ diff --git a/docs/apple-touch-icon-180x180.png b/docs/apple-touch-icon-180x180.png deleted file mode 100644 index ea2c5f0a..00000000 Binary files a/docs/apple-touch-icon-180x180.png and /dev/null differ diff --git a/docs/apple-touch-icon-60x60.png b/docs/apple-touch-icon-60x60.png deleted file mode 100644 index e376916b..00000000 Binary files a/docs/apple-touch-icon-60x60.png and /dev/null differ diff --git a/docs/apple-touch-icon-76x76.png b/docs/apple-touch-icon-76x76.png deleted file mode 100644 index a5f20e77..00000000 Binary files a/docs/apple-touch-icon-76x76.png and /dev/null differ diff --git a/docs/apple-touch-icon.png b/docs/apple-touch-icon.png deleted file mode 100644 index 15cae703..00000000 Binary files a/docs/apple-touch-icon.png and /dev/null differ diff --git a/docs/articles/cnsignature.html b/docs/articles/cnsignature.html deleted file mode 100644 index 46f2ba50..00000000 --- a/docs/articles/cnsignature.html +++ /dev/null @@ -1,375 +0,0 @@ - - - - - - - -Analyze Copy Number Signatures with sigminer • sigminer - - - - - - - - - - - - - - - - - - - -
-
- - - - -
-
- - - - -
-

Exploring copy number signatures with recently developed approach -have been described at The -repertoire of copy number alteration signatures in human cancer.

-

A more general introduction please read Extract, -Analyze and Visualize Mutational Signatures with Sigminer.

-
-
-library(sigminer)
-#> sigminer version 2.2.2
-#> - Star me at https://github.com/ShixiangWang/sigminer
-#> - Run hello() to see usage and citation.
-

For this analysis, data with six columns are required.

-
    -
  • Chromosome
  • -
  • Start.bp
  • -
  • End.bp
  • -
  • modal_cn (i.e. total copy number, integer)
  • -
  • minor_cn (i.e. copy number for minor allele, integer)
  • -
  • sample
  • -
-
-

Generate allele-specific copy number profile -

-
-load(system.file("extdata", "toy_segTab.RData",
-  package = "sigminer", mustWork = TRUE
-))
-
-set.seed(1234)
-segTabs$minor_cn <- sample(c(0, 1), size = nrow(segTabs), replace = TRUE)
-cn <- read_copynumber(segTabs,
-  seg_cols = c("chromosome", "start", "end", "segVal"),
-  genome_measure = "wg", complement = TRUE, add_loh = TRUE
-)
-#>  [2023-12-12 17:49:02.252718]: Started.
-#>  [2023-12-12 17:49:02.275248]: Genome build  : hg19.
-#>  [2023-12-12 17:49:02.27739]: Genome measure: wg.
-#>  [2023-12-12 17:49:02.279307]: When add_loh is TRUE, use_all is forced to TRUE.
-#> Please drop columns you don't want to keep before reading.
-#>  [2023-12-12 17:49:02.311056]: Chromosome size database for build obtained.
-#>  [2023-12-12 17:49:02.312975]: Reading input.
-#>  [2023-12-12 17:49:02.314494]: A data frame as input detected.
-#>  [2023-12-12 17:49:02.316376]: Column names checked.
-#>  [2023-12-12 17:49:02.324201]: Column order set.
-#>  [2023-12-12 17:49:02.332668]: Chromosomes unified.
-#>  [2023-12-12 17:49:02.352654]: Value 2 (normal copy) filled to uncalled chromosomes.
-#>  [2023-12-12 17:49:02.359564]: Data imported.
-#>  [2023-12-12 17:49:02.3611]: Segments info:
-#>  [2023-12-12 17:49:02.362549]:     Keep - 477
-#>  [2023-12-12 17:49:02.363963]:     Drop - 0
-#>  [2023-12-12 17:49:02.365851]: Segments sorted.
-#>  [2023-12-12 17:49:02.367199]: Adding LOH labels...
-#>  [2023-12-12 17:49:02.369816]: Joining adjacent segments with same copy number value. Be patient...
-#>  [2023-12-12 17:49:02.500946]: 410 segments left after joining.
-#>  [2023-12-12 17:49:02.502757]: Segmental table cleaned.
-#>  [2023-12-12 17:49:02.504189]: Annotating.
-#>  [2023-12-12 17:49:02.518467]: Annotation done.
-#>  [2023-12-12 17:49:02.520063]: Summarizing per sample.
-#>  [2023-12-12 17:49:02.539559]: Summarized.
-#>  [2023-12-12 17:49:02.541093]: Generating CopyNumber object.
-#>  [2023-12-12 17:49:02.542976]: Generated.
-#>  [2023-12-12 17:49:02.544371]: Validating object.
-#>  [2023-12-12 17:49:02.545781]: Done.
-#>  [2023-12-12 17:49:02.547409]: 0.295 secs elapsed.
-
-cn
-#> An object of class CopyNumber 
-#> =============================
-#>                           sample n_of_seg n_of_cnv n_of_amp n_of_del n_of_vchr
-#>  1: TCGA-DF-A2KN-01A-11D-A17U-01       34        6        5        1         4
-#>  2: TCGA-19-2621-01B-01D-0911-01       34        8        5        3         5
-#>  3: TCGA-B6-A0X5-01A-21D-A107-01       29        8        4        4         2
-#>  4: TCGA-A8-A07S-01A-11D-A036-01       39       11        2        9         4
-#>  5: TCGA-26-6174-01A-21D-1842-01       44       13        8        5         8
-#>  6: TCGA-CV-7432-01A-11D-2128-01       41       16        7        9         9
-#>  7: TCGA-06-0644-01A-02D-0310-01       47       19        5       14         8
-#>  8: TCGA-A5-A0G2-01A-11D-A042-01       40       21        5       16        10
-#>  9: TCGA-99-7458-01A-11D-2035-01       49       26       10       16        13
-#> 10: TCGA-05-4417-01A-22D-1854-01       53       37       33        4        17
-#>     n_loh cna_burden
-#>  1:    15      0.000
-#>  2:    20      0.095
-#>  3:    18      0.083
-#>  4:    21      0.106
-#>  5:    24      0.113
-#>  6:    24      0.188
-#>  7:    33      0.158
-#>  8:    23      0.375
-#>  9:    33      0.304
-#> 10:    29      0.617
-cn@data
-#>      chromosome     start       end segVal                       sample
-#>   1:       chr1   3218923 116319008      2 TCGA-05-4417-01A-22D-1854-01
-#>   2:       chr1 116324707 120523902      1 TCGA-05-4417-01A-22D-1854-01
-#>   3:       chr1 149879545 247812431      4 TCGA-05-4417-01A-22D-1854-01
-#>   4:      chr10    423671 135224372      3 TCGA-05-4417-01A-22D-1854-01
-#>   5:      chr11    458784  19461653      3 TCGA-05-4417-01A-22D-1854-01
-#>  ---                                                                   
-#> 406:       chr6   1016984 170898549      2 TCGA-DF-A2KN-01A-11D-A17U-01
-#> 407:       chr7    746917 158385118      2 TCGA-DF-A2KN-01A-11D-A17U-01
-#> 408:       chr8    617885 145225107      2 TCGA-DF-A2KN-01A-11D-A17U-01
-#> 409:       chr9    790234 140938075      2 TCGA-DF-A2KN-01A-11D-A17U-01
-#> 410:       chrX         1 155270560      2 TCGA-DF-A2KN-01A-11D-A17U-01
-#>       minor_cn   loh .loh_frac
-#>   1: 1.0000000 FALSE        NA
-#>   2: 0.0000000  TRUE        NA
-#>   3: 0.5000000  TRUE 0.1175943
-#>   4: 1.0000000 FALSE        NA
-#>   5: 1.0000000 FALSE        NA
-#>  ---                          
-#> 406: 0.3333333  TRUE 0.9979494
-#> 407: 1.0000000 FALSE        NA
-#> 408: 1.0000000 FALSE        NA
-#> 409: 0.5000000  TRUE 0.8328715
-#> 410:        NA FALSE        NA
-
-
-

Classify the segments with Steele et al method -

-
-

If you want to try other type of copy number signatures, change the -method argument.

-
-
-tally_s <- sig_tally(cn, method = "S")
-#>  [2023-12-12 17:49:02.67201]: Started.
-#>  [2023-12-12 17:49:02.676258]: When you use method 'S', please make sure you have set 'join_adj_seg' to FALSE and 'add_loh' to TRUE in 'read_copynumber() in the previous step!
-#>  [2023-12-12 17:49:02.706049]: Matrix generated.
-#>  [2023-12-12 17:49:02.707747]: 0.036 secs elapsed.
-
-str(tally_s$all_matrices, max.level = 1)
-#> List of 2
-#>  $ CN_40: int [1:10, 1:40] 0 0 0 0 0 0 0 0 0 0 ...
-#>   ..- attr(*, "dimnames")=List of 2
-#>  $ CN_48: int [1:10, 1:48] 0 0 0 0 0 0 0 0 0 0 ...
-#>   ..- attr(*, "dimnames")=List of 2
-
-
-

Find de novo signatures -

-
-sig_denovo = sig_auto_extract(tally_s$all_matrices$CN_48)
-#> Select Run 3, which K = 2 as best solution.
-head(sig_denovo$Signature)
-#>                         Sig1          Sig2
-#> 0:homdel:0-100Kb    0.000000  0.000000e+00
-#> 0:homdel:100Kb-1Mb  0.000000  0.000000e+00
-#> 0:homdel:>1Mb       0.000000  0.000000e+00
-#> 1:LOH:0-100Kb       3.609460 3.819129e-242
-#> 1:LOH:100Kb-1Mb     6.316554 2.814800e-127
-#> 1:LOH:1Mb-10Mb     13.535473 2.784288e-190
-
-
-

Refit (19) reference signatures -

-

This directly calculates the contribution of 19 reference -signatures.

-
-act_refit = sig_fit(t(tally_s$all_matrices$CN_48), sig_index = "ALL", sig_db = "CNS_TCGA")
-#>  [2023-12-12 17:49:03.675575]: Started.
-#>  [2023-12-12 17:49:03.677097]: Signature index detected.
-#>  [2023-12-12 17:49:03.678478]: Checking signature database in package.
-#>  [2023-12-12 17:49:03.694931]: Checking signature index.
-#>  [2023-12-12 17:49:03.699371]: Valid index for db 'CNS_TCGA':
-#> CN1 CN2 CN3 CN4 CN5 CN6 CN7 CN8 CN9 CN10 CN11 CN12 CN13 CN14 CN15 CN16 CN17 CN18 CN19
-#>  [2023-12-12 17:49:03.700851]: Database and index checked.
-#>  [2023-12-12 17:49:03.702355]: Signature normalized.
-#>  [2023-12-12 17:49:03.703705]: Checking row number for catalog matrix and signature matrix.
-#>  [2023-12-12 17:49:03.705038]: Checked.
-#>  [2023-12-12 17:49:03.706365]: Checking rownames for catalog matrix and signature matrix.
-#>  [2023-12-12 17:49:03.707705]: Checked.
-#>  [2023-12-12 17:49:03.709023]: Method 'QP' detected.
-#>  [2023-12-12 17:49:03.715184]: Corresponding function generated.
-#>  [2023-12-12 17:49:03.716575]: Calling function.
-#>  [2023-12-12 17:49:03.718255]: Fitting sample: TCGA-05-4417-01A-22D-1854-01
-#>  [2023-12-12 17:49:03.719948]: Fitting sample: TCGA-06-0644-01A-02D-0310-01
-#>  [2023-12-12 17:49:03.721412]: Fitting sample: TCGA-19-2621-01B-01D-0911-01
-#>  [2023-12-12 17:49:03.722872]: Fitting sample: TCGA-26-6174-01A-21D-1842-01
-#>  [2023-12-12 17:49:03.724322]: Fitting sample: TCGA-99-7458-01A-11D-2035-01
-#>  [2023-12-12 17:49:03.725782]: Fitting sample: TCGA-A5-A0G2-01A-11D-A042-01
-#>  [2023-12-12 17:49:03.727225]: Fitting sample: TCGA-A8-A07S-01A-11D-A036-01
-#>  [2023-12-12 17:49:03.728763]: Fitting sample: TCGA-B6-A0X5-01A-21D-A107-01
-#>  [2023-12-12 17:49:03.730274]: Fitting sample: TCGA-CV-7432-01A-11D-2128-01
-#>  [2023-12-12 17:49:03.73176]: Fitting sample: TCGA-DF-A2KN-01A-11D-A17U-01
-#>  [2023-12-12 17:49:03.733244]: Done.
-#>  [2023-12-12 17:49:03.734592]: Generating output signature exposures.
-#>  [2023-12-12 17:49:03.736503]: Done.
-#>  [2023-12-12 17:49:03.737906]: 0.062 secs elapsed.
-

We can use some threshold to keep really contributed signautres.

-
-act_refit2 = act_refit[apply(act_refit, 1, function(x) sum(x) > 0.1),]
-
-rownames(act_refit2)
-#>  [1] "CN1"  "CN2"  "CN3"  "CN4"  "CN9"  "CN11" "CN12" "CN13" "CN14" "CN19"
-
-
-

Plot signatures -

-

For de novo signatures:

-
-show_sig_profile(sig_denovo, mode = "copynumber", method = "S", style = "cosmic")
-

-

Show the activity/exposure.

-
-show_sig_exposure(sig_denovo)
-

-

For reference signatures, you can just select what you want:

-
-show_sig_profile(
-  get_sig_db("CNS_TCGA")$db[, rownames(act_refit2)],
-  style = "cosmic", 
-  mode = "copynumber", method = "S", check_sig_names = FALSE)
-

-

Similarly for showing activity.

-
-show_sig_exposure(act_refit2)
-

-

NOTE that this case shows relatively large difference with different -approaches, so you need to pick based on your data size/quality and -double-check the results. In general, for small-size data set, the -refitting approach is recommended.

-
-
-

Signature assignment -

-

To assign the de-novo signatures to reference signatures, we use -cosine similarity.

-
-get_sig_similarity(sig_denovo, sig_db = "CNS_TCGA")
-#> -Comparing against COSMIC signatures
-#> ------------------------------------
-#> --Found Sig1 most similar to CN1
-#>    Aetiology: See https://cancer.sanger.ac.uk/signatures/cn/ [similarity: 0.706]
-#> --Found Sig2 most similar to CN2
-#>    Aetiology: See https://cancer.sanger.ac.uk/signatures/cn/ [similarity: 0.771]
-#> ------------------------------------
-#> Return result invisiblely.
-
-
- - - -
- - - - -
- - - - - - - - diff --git a/docs/articles/cnsignature_files/figure-html/unnamed-chunk-10-1.png b/docs/articles/cnsignature_files/figure-html/unnamed-chunk-10-1.png deleted file mode 100644 index 59f7af6b..00000000 Binary files a/docs/articles/cnsignature_files/figure-html/unnamed-chunk-10-1.png and /dev/null differ diff --git a/docs/articles/cnsignature_files/figure-html/unnamed-chunk-11-1.png b/docs/articles/cnsignature_files/figure-html/unnamed-chunk-11-1.png deleted file mode 100644 index d219ba03..00000000 Binary files a/docs/articles/cnsignature_files/figure-html/unnamed-chunk-11-1.png and /dev/null differ diff --git a/docs/articles/cnsignature_files/figure-html/unnamed-chunk-8-1.png b/docs/articles/cnsignature_files/figure-html/unnamed-chunk-8-1.png deleted file mode 100644 index 043f8af3..00000000 Binary files a/docs/articles/cnsignature_files/figure-html/unnamed-chunk-8-1.png and /dev/null differ diff --git a/docs/articles/cnsignature_files/figure-html/unnamed-chunk-9-1.png b/docs/articles/cnsignature_files/figure-html/unnamed-chunk-9-1.png deleted file mode 100644 index 1fbf3bda..00000000 Binary files a/docs/articles/cnsignature_files/figure-html/unnamed-chunk-9-1.png and /dev/null differ diff --git a/docs/articles/index.html b/docs/articles/index.html deleted file mode 100644 index 49d00755..00000000 --- a/docs/articles/index.html +++ /dev/null @@ -1,91 +0,0 @@ - -Articles • sigminer - - -
-
- - - -
-
- - - -
-
- - -
- - - - - - - - diff --git a/docs/articles/sigminer.html b/docs/articles/sigminer.html deleted file mode 100644 index 91b7a36f..00000000 --- a/docs/articles/sigminer.html +++ /dev/null @@ -1,214 +0,0 @@ - - - - - - - -A Quick Start of sigminer Package • sigminer - - - - - - - - - - - - - - - - - - - -
-
- - - - -
-
- - - - -

Assume you have already gotten a catalog matrix (sample-by-component) -like below:

-
-library(sigminer)
-#> sigminer version 2.2.2
-#> - Star me at https://github.com/ShixiangWang/sigminer
-#> - Run hello() to see usage and citation.
-data("simulated_catalogs")
-mat <- t(simulated_catalogs$set1)
-
-mat[1:5, 1:5]
-#>          A[C>A]A A[C>A]C A[C>A]G A[C>A]T C[C>A]A
-#> Sample_1     911     761      88     744     883
-#> Sample_2     195     175      19     174     225
-#> Sample_3      95      51      12      55     142
-#> Sample_4     131      71      14      77     170
-#> Sample_5      33      10       2      14      55
-

Extract signatures with:

-
-# Here I reduce the values for n_bootstrap and n_nmf_run for reducing the run time.  In practice,
-# you should keep default or increase the values for better estimation.  The input data here is
-# simulated from 10 mutational signatures
-e1 <- bp_extract_signatures(mat, range = 8:12, n_bootstrap = 5, n_nmf_run = 10)
-

Check which signature number is proper:

-
-bp_show_survey2(e1, highlight = 10)
-#> Variables can be used: signature_number, silhouette, sample_cosine_distance, L1_error, L2_error, exposure_positive_correlation, signature_similarity_within_cluster, signature_similarity_across_cluster, silhouette_sample
-

-

Get the 10 signatures:

-
-obj <- bp_get_sig_obj(e1, 10)
-

Show signature profile:

-
-show_sig_profile(obj, mode = "SBS", style = "cosmic")
-

-Show signature activity (a.k.a. exposure) profile:

-
-show_sig_exposure(obj, rm_space = TRUE)
-

-

Calculate the similarity to COSMIC reference signatures:

-
-sim <- get_sig_similarity(obj, sig_db = "SBS")
-#> -Comparing against COSMIC signatures
-#> ------------------------------------
-#> --Found Sig1 most similar to SBS12
-#>    Aetiology: Unknown [similarity: 0.932]
-#> --Found Sig2 most similar to SBS13
-#>    Aetiology: Activity of APOBEC family of cytidine deaminases [similarity: 0.97]
-#> --Found Sig3 most similar to SBS5
-#>    Aetiology: Unknown (clock-like signature) [similarity: 0.95]
-#> --Found Sig4 most similar to SBS3
-#>    Aetiology: Defective homologous recombination DNA damage repair [similarity: 0.927]
-#> --Found Sig5 most similar to SBS8
-#>    Aetiology: Unknown [similarity: 0.934]
-#> --Found Sig6 most similar to SBS18
-#>    Aetiology: Damage by reactive oxygen species [similarity: 0.975]
-#> --Found Sig7 most similar to SBS1
-#>    Aetiology: Spontaneous deamination of 5-methylcytosine (clock-like signature) [similarity: 0.76]
-#> --Found Sig8 most similar to SBS6
-#>    Aetiology: Defective DNA mismatch repair [similarity: 0.957]
-#> --Found Sig9 most similar to SBS17b
-#>    Aetiology: Unknown [similarity: 0.888]
-#> --Found Sig10 most similar to SBS2
-#>    Aetiology: Activity of APOBEC family of cytidine deaminases [similarity: 0.986]
-#> ------------------------------------
-#> Return result invisiblely.
-
-if (require(pheatmap)) {
-    pheatmap::pheatmap(sim$similarity)
-}
-

-
-

More -

-

Please go to reference -list for well organized functions and documentation.

-

For more about mutational signature and sigminer -usage, you can read sigminer-book.

-
-
- - - -
- - - - -
- - - - - - - - diff --git a/docs/articles/sigminer_files/figure-html/unnamed-chunk-10-1.png b/docs/articles/sigminer_files/figure-html/unnamed-chunk-10-1.png deleted file mode 100644 index a49a29f8..00000000 Binary files a/docs/articles/sigminer_files/figure-html/unnamed-chunk-10-1.png and /dev/null differ diff --git a/docs/articles/sigminer_files/figure-html/unnamed-chunk-5-1.png b/docs/articles/sigminer_files/figure-html/unnamed-chunk-5-1.png deleted file mode 100644 index a11fa46b..00000000 Binary files a/docs/articles/sigminer_files/figure-html/unnamed-chunk-5-1.png and /dev/null differ diff --git a/docs/articles/sigminer_files/figure-html/unnamed-chunk-7-1.png b/docs/articles/sigminer_files/figure-html/unnamed-chunk-7-1.png deleted file mode 100644 index ca3a16c1..00000000 Binary files a/docs/articles/sigminer_files/figure-html/unnamed-chunk-7-1.png and /dev/null differ diff --git a/docs/articles/sigminer_files/figure-html/unnamed-chunk-8-1.png b/docs/articles/sigminer_files/figure-html/unnamed-chunk-8-1.png deleted file mode 100644 index 04d5cb79..00000000 Binary files a/docs/articles/sigminer_files/figure-html/unnamed-chunk-8-1.png and /dev/null differ diff --git a/docs/authors.html b/docs/authors.html deleted file mode 100644 index e5b1eea3..00000000 --- a/docs/authors.html +++ /dev/null @@ -1,131 +0,0 @@ - -Authors and Citation • sigminer - - -
-
- - - -
-
-
- - - -
  • -

    Shixiang Wang. Author, maintainer. -

    -
  • -
  • -

    Ziyu Tao. Author. -

    -
  • -
  • -

    Huimin Li. Author. -

    -
  • -
  • -

    Tao Wu. Author. -

    -
  • -
  • -

    Xue-Song Liu. Author, contributor. -

    -
  • -
  • -

    Anand Mayakonda. Contributor. -

    -
  • -
-
-
-

Citation

- Source: inst/CITATION -
-
- - -

Wang S, Li H, Song M, Tao Z, Wu T, He Z, et al. (2021) Copy number signature analysis tool and its application in prostate cancer reveals distinct mutational processes and clinical outcomes. PLoS Genet 17(5): e1009557. https://doi.org/10.1371/journal.pgen.1009557

-
@Article{,
-  title = {Copy number signature analysis tool and its application in prostate cancer reveals distinct mutational processes and clinical outcomes},
-  author = {Shixiang Wang and Huimin Li and Minfang Song and Zaoke He and Tao Wu and Xuan Wang and Ziyu Tao and Kai Wu and Xue-Song Liu},
-  journal = {Plos Genetics},
-  year = {2021},
-  volume = {17},
-  number = {5},
-  pages = {e1009557},
-  url = {https://doi.org/10.1371/journal.pgen.1009557},
-}
- -
- -
- - - -
- - - - - - - - diff --git a/docs/bootstrap-toc.css b/docs/bootstrap-toc.css deleted file mode 100644 index 5a859415..00000000 --- a/docs/bootstrap-toc.css +++ /dev/null @@ -1,60 +0,0 @@ -/*! - * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) - * Copyright 2015 Aidan Feldman - * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ - -/* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */ - -/* All levels of nav */ -nav[data-toggle='toc'] .nav > li > a { - display: block; - padding: 4px 20px; - font-size: 13px; - font-weight: 500; - color: #767676; -} -nav[data-toggle='toc'] .nav > li > a:hover, -nav[data-toggle='toc'] .nav > li > a:focus { - padding-left: 19px; - color: #563d7c; - text-decoration: none; - background-color: transparent; - border-left: 1px solid #563d7c; -} -nav[data-toggle='toc'] .nav > .active > a, -nav[data-toggle='toc'] .nav > .active:hover > a, -nav[data-toggle='toc'] .nav > .active:focus > a { - padding-left: 18px; - font-weight: bold; - color: #563d7c; - background-color: transparent; - border-left: 2px solid #563d7c; -} - -/* Nav: second level (shown on .active) */ -nav[data-toggle='toc'] .nav .nav { - display: none; /* Hide by default, but at >768px, show it */ - padding-bottom: 10px; -} -nav[data-toggle='toc'] .nav .nav > li > a { - padding-top: 1px; - padding-bottom: 1px; - padding-left: 30px; - font-size: 12px; - font-weight: normal; -} -nav[data-toggle='toc'] .nav .nav > li > a:hover, -nav[data-toggle='toc'] .nav .nav > li > a:focus { - padding-left: 29px; -} -nav[data-toggle='toc'] .nav .nav > .active > a, -nav[data-toggle='toc'] .nav .nav > .active:hover > a, -nav[data-toggle='toc'] .nav .nav > .active:focus > a { - padding-left: 28px; - font-weight: 500; -} - -/* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */ -nav[data-toggle='toc'] .nav > .active > ul { - display: block; -} diff --git a/docs/bootstrap-toc.js b/docs/bootstrap-toc.js deleted file mode 100644 index 1cdd573b..00000000 --- a/docs/bootstrap-toc.js +++ /dev/null @@ -1,159 +0,0 @@ -/*! - * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) - * Copyright 2015 Aidan Feldman - * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ -(function() { - 'use strict'; - - window.Toc = { - helpers: { - // return all matching elements in the set, or their descendants - findOrFilter: function($el, selector) { - // http://danielnouri.org/notes/2011/03/14/a-jquery-find-that-also-finds-the-root-element/ - // http://stackoverflow.com/a/12731439/358804 - var $descendants = $el.find(selector); - return $el.filter(selector).add($descendants).filter(':not([data-toc-skip])'); - }, - - generateUniqueIdBase: function(el) { - var text = $(el).text(); - var anchor = text.trim().toLowerCase().replace(/[^A-Za-z0-9]+/g, '-'); - return anchor || el.tagName.toLowerCase(); - }, - - generateUniqueId: function(el) { - var anchorBase = this.generateUniqueIdBase(el); - for (var i = 0; ; i++) { - var anchor = anchorBase; - if (i > 0) { - // add suffix - anchor += '-' + i; - } - // check if ID already exists - if (!document.getElementById(anchor)) { - return anchor; - } - } - }, - - generateAnchor: function(el) { - if (el.id) { - return el.id; - } else { - var anchor = this.generateUniqueId(el); - el.id = anchor; - return anchor; - } - }, - - createNavList: function() { - return $(''); - }, - - createChildNavList: function($parent) { - var $childList = this.createNavList(); - $parent.append($childList); - return $childList; - }, - - generateNavEl: function(anchor, text) { - var $a = $(''); - $a.attr('href', '#' + anchor); - $a.text(text); - var $li = $('
  • '); - $li.append($a); - return $li; - }, - - generateNavItem: function(headingEl) { - var anchor = this.generateAnchor(headingEl); - var $heading = $(headingEl); - var text = $heading.data('toc-text') || $heading.text(); - return this.generateNavEl(anchor, text); - }, - - // Find the first heading level (`

    `, then `

    `, etc.) that has more than one element. Defaults to 1 (for `

    `). - getTopLevel: function($scope) { - for (var i = 1; i <= 6; i++) { - var $headings = this.findOrFilter($scope, 'h' + i); - if ($headings.length > 1) { - return i; - } - } - - return 1; - }, - - // returns the elements for the top level, and the next below it - getHeadings: function($scope, topLevel) { - var topSelector = 'h' + topLevel; - - var secondaryLevel = topLevel + 1; - var secondarySelector = 'h' + secondaryLevel; - - return this.findOrFilter($scope, topSelector + ',' + secondarySelector); - }, - - getNavLevel: function(el) { - return parseInt(el.tagName.charAt(1), 10); - }, - - populateNav: function($topContext, topLevel, $headings) { - var $context = $topContext; - var $prevNav; - - var helpers = this; - $headings.each(function(i, el) { - var $newNav = helpers.generateNavItem(el); - var navLevel = helpers.getNavLevel(el); - - // determine the proper $context - if (navLevel === topLevel) { - // use top level - $context = $topContext; - } else if ($prevNav && $context === $topContext) { - // create a new level of the tree and switch to it - $context = helpers.createChildNavList($prevNav); - } // else use the current $context - - $context.append($newNav); - - $prevNav = $newNav; - }); - }, - - parseOps: function(arg) { - var opts; - if (arg.jquery) { - opts = { - $nav: arg - }; - } else { - opts = arg; - } - opts.$scope = opts.$scope || $(document.body); - return opts; - } - }, - - // accepts a jQuery object, or an options object - init: function(opts) { - opts = this.helpers.parseOps(opts); - - // ensure that the data attribute is in place for styling - opts.$nav.attr('data-toggle', 'toc'); - - var $topContext = this.helpers.createChildNavList(opts.$nav); - var topLevel = this.helpers.getTopLevel(opts.$scope); - var $headings = this.helpers.getHeadings(opts.$scope, topLevel); - this.helpers.populateNav($topContext, topLevel, $headings); - } - }; - - $(function() { - $('nav[data-toggle="toc"]').each(function(i, el) { - var $nav = $(el); - Toc.init($nav); - }); - }); -})(); diff --git a/docs/docsearch.css b/docs/docsearch.css deleted file mode 100644 index e5f1fe1d..00000000 --- a/docs/docsearch.css +++ /dev/null @@ -1,148 +0,0 @@ -/* Docsearch -------------------------------------------------------------- */ -/* - Source: https://github.com/algolia/docsearch/ - License: MIT -*/ - -.algolia-autocomplete { - display: block; - -webkit-box-flex: 1; - -ms-flex: 1; - flex: 1 -} - -.algolia-autocomplete .ds-dropdown-menu { - width: 100%; - min-width: none; - max-width: none; - padding: .75rem 0; - background-color: #fff; - background-clip: padding-box; - border: 1px solid rgba(0, 0, 0, .1); - box-shadow: 0 .5rem 1rem rgba(0, 0, 0, .175); -} - -@media (min-width:768px) { - .algolia-autocomplete .ds-dropdown-menu { - width: 175% - } -} - -.algolia-autocomplete .ds-dropdown-menu::before { - display: none -} - -.algolia-autocomplete .ds-dropdown-menu [class^=ds-dataset-] { - padding: 0; - background-color: rgb(255,255,255); - border: 0; - max-height: 80vh; -} - -.algolia-autocomplete .ds-dropdown-menu .ds-suggestions { - margin-top: 0 -} - -.algolia-autocomplete .algolia-docsearch-suggestion { - padding: 0; - overflow: visible -} - -.algolia-autocomplete .algolia-docsearch-suggestion--category-header { - padding: .125rem 1rem; - margin-top: 0; - font-size: 1.3em; - font-weight: 500; - color: #00008B; - border-bottom: 0 -} - -.algolia-autocomplete .algolia-docsearch-suggestion--wrapper { - float: none; - padding-top: 0 -} - -.algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column { - float: none; - width: auto; - padding: 0; - text-align: left -} - -.algolia-autocomplete .algolia-docsearch-suggestion--content { - float: none; - width: auto; - padding: 0 -} - -.algolia-autocomplete .algolia-docsearch-suggestion--content::before { - display: none -} - -.algolia-autocomplete .ds-suggestion:not(:first-child) .algolia-docsearch-suggestion--category-header { - padding-top: .75rem; - margin-top: .75rem; - border-top: 1px solid rgba(0, 0, 0, .1) -} - -.algolia-autocomplete .ds-suggestion .algolia-docsearch-suggestion--subcategory-column { - display: block; - padding: .1rem 1rem; - margin-bottom: 0.1; - font-size: 1.0em; - font-weight: 400 - /* display: none */ -} - -.algolia-autocomplete .algolia-docsearch-suggestion--title { - display: block; - padding: .25rem 1rem; - margin-bottom: 0; - font-size: 0.9em; - font-weight: 400 -} - -.algolia-autocomplete .algolia-docsearch-suggestion--text { - padding: 0 1rem .5rem; - margin-top: -.25rem; - font-size: 0.8em; - font-weight: 400; - line-height: 1.25 -} - -.algolia-autocomplete .algolia-docsearch-footer { - width: 110px; - height: 20px; - z-index: 3; - margin-top: 10.66667px; - float: right; - font-size: 0; - line-height: 0; -} - -.algolia-autocomplete .algolia-docsearch-footer--logo { - background-image: url("data:image/svg+xml;utf8,"); - background-repeat: no-repeat; - background-position: 50%; - background-size: 100%; - overflow: hidden; - text-indent: -9000px; - width: 100%; - height: 100%; - display: block; - transform: translate(-8px); -} - -.algolia-autocomplete .algolia-docsearch-suggestion--highlight { - color: #FF8C00; - background: rgba(232, 189, 54, 0.1) -} - - -.algolia-autocomplete .algolia-docsearch-suggestion--text .algolia-docsearch-suggestion--highlight { - box-shadow: inset 0 -2px 0 0 rgba(105, 105, 105, .5) -} - -.algolia-autocomplete .ds-suggestion.ds-cursor .algolia-docsearch-suggestion--content { - background-color: rgba(192, 192, 192, .15) -} diff --git a/docs/docsearch.js b/docs/docsearch.js deleted file mode 100644 index b35504cd..00000000 --- a/docs/docsearch.js +++ /dev/null @@ -1,85 +0,0 @@ -$(function() { - - // register a handler to move the focus to the search bar - // upon pressing shift + "/" (i.e. "?") - $(document).on('keydown', function(e) { - if (e.shiftKey && e.keyCode == 191) { - e.preventDefault(); - $("#search-input").focus(); - } - }); - - $(document).ready(function() { - // do keyword highlighting - /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ - var mark = function() { - - var referrer = document.URL ; - var paramKey = "q" ; - - if (referrer.indexOf("?") !== -1) { - var qs = referrer.substr(referrer.indexOf('?') + 1); - var qs_noanchor = qs.split('#')[0]; - var qsa = qs_noanchor.split('&'); - var keyword = ""; - - for (var i = 0; i < qsa.length; i++) { - var currentParam = qsa[i].split('='); - - if (currentParam.length !== 2) { - continue; - } - - if (currentParam[0] == paramKey) { - keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); - } - } - - if (keyword !== "") { - $(".contents").unmark({ - done: function() { - $(".contents").mark(keyword); - } - }); - } - } - }; - - mark(); - }); -}); - -/* Search term highlighting ------------------------------*/ - -function matchedWords(hit) { - var words = []; - - var hierarchy = hit._highlightResult.hierarchy; - // loop to fetch from lvl0, lvl1, etc. - for (var idx in hierarchy) { - words = words.concat(hierarchy[idx].matchedWords); - } - - var content = hit._highlightResult.content; - if (content) { - words = words.concat(content.matchedWords); - } - - // return unique words - var words_uniq = [...new Set(words)]; - return words_uniq; -} - -function updateHitURL(hit) { - - var words = matchedWords(hit); - var url = ""; - - if (hit.anchor) { - url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; - } else { - url = hit.url + '?q=' + escape(words.join(" ")); - } - - return url; -} diff --git a/docs/favicon-16x16.png b/docs/favicon-16x16.png deleted file mode 100644 index c7dadaac..00000000 Binary files a/docs/favicon-16x16.png and /dev/null differ diff --git a/docs/favicon-32x32.png b/docs/favicon-32x32.png deleted file mode 100644 index a5db0780..00000000 Binary files a/docs/favicon-32x32.png and /dev/null differ diff --git a/docs/favicon.ico b/docs/favicon.ico deleted file mode 100644 index e0bfcc97..00000000 Binary files a/docs/favicon.ico and /dev/null differ diff --git a/docs/index.html b/docs/index.html deleted file mode 100644 index cf985d03..00000000 --- a/docs/index.html +++ /dev/null @@ -1,295 +0,0 @@ - - - - - - - -Mutational Signature Miner in R • sigminer - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    -
    -
    - -

    CRAN status lifecycle R-CMD-check Closed issues Hits install with biocondacheck in Biotreasury

    -
    -

    -📊 Overview -

    -

    The cancer genome is shaped by various mutational processes over its lifetime, stemming from exogenous and cell-intrinsic DNA damage, and error-prone DNA replication, leaving behind characteristic mutational spectra, termed mutational signatures. This package, sigminer, helps users to extract, analyze and visualize signatures from genome alteration records, thus providing new insight into cancer study.

    -

    For pipeline tool, please see its co-evolutionary CLI sigflow.

    -

    SBS signatures:

    -

    -

    Copy number signatures:

    -

    -

    -

    DBS signatures:

    -

    -

    INDEL (i.e. ID) signatures:

    -

    -

    Genome rearrangement signatures:

    -

    -
    -

    -✈️ Features -

    -
      -
    • supports a standard de novo pipeline for identification of 5 types of signatures: copy number, SBS, DBS, INDEL and RS (genome rearrangement signature).
    • -
    • supports quantify exposure for one sample based on known signatures.
    • -
    • supports association and group analysis and visualization for signatures.
    • -
    • supports two types of signature exposures: relative exposure (relative contribution of signatures in each sample) and absolute exposure (estimated variation records of signatures in each sample).
    • -
    • supports basic summary and visualization for profile of mutation (powered by maftools) and copy number.
    • -
    • supports parallel computation by R packages foreach, future and NMF.
    • -
    • efficient code powered by R packages data.table and tidyverse.
    • -
    • elegant plots powered by R packages ggplot2, ggpubr, cowplot and patchwork.
    • -
    • well tested by R package testthat and documented by R package roxygen2, roxytest, pkgdown, and etc. for both reliable and reproducible research.
    • -
    -
    -
    -
    -

    - Installation -

    -

    You can install the stable release of sigminer from CRAN with:

    -
    -install.packages("BiocManager")
    -BiocManager::install("sigminer", dependencies = TRUE)
    -

    You can install the development version of sigminer from Github with:

    -
    -remotes::install_github("ShixiangWang/sigminer", dependencies = TRUE)
    -# For Chinese users, run 
    -remotes::install_git("https://gitee.com/ShixiangWang/sigminer", dependencies = TRUE)
    -

    You can also install sigminer from conda bioconda channel with

    -
    # Please note version number of the bioconda release
    -
    -# You can install an individual environment firstly with
    -# conda create -n sigminer
    -# conda activate sigminer
    -conda install -c bioconda -c conda-forge r-sigminer
    -
    -
    -

    -🔰 Usage -

    -

    A complete documentation of sigminer can be read online at https://shixiangwang.github.io/sigminer-book/. All functions are well organized and documented at https://shixiangwang.github.io/sigminer/reference/index.html. For usage of a specific function fun, run ?fun in your R console to see its documentation.

    -
    -
    -

    -📎 Citation -

    -

    If you use sigminer in academic field, please cite one of the following papers.

    -
    -
      -
    • -Wang S, Li H, Song M, Tao Z, Wu T, He Z, et al. (2021) Copy number signature analysis tool and its application in prostate cancer reveals distinct mutational processes and clinical outcomes. PLoS Genet 17(5): e1009557. https://doi.org/10.1371/journal.pgen.1009557 -
    • -
    • -Wang, S., Tao, Z., Wu, T., & Liu, X. S. (2021). Sigflow: an automated and comprehensive pipeline for cancer genome mutational signature analysis. Bioinformatics, 37(11), 1590-1592. https://doi.org/10.1093/bioinformatics/btaa895 -
    • -
    • -Ziyu Tao, Shixiang Wang, Chenxu Wu, Tao Wu, Xiangyu Zhao, Wei Ning, Guangshuai Wang, Jinyu Wang, Jing Chen, Kaixuan Diao, Fuxiang Chen, Xue-Song Liu, The repertoire of copy number alteration signatures in human cancer, Briefings in Bioinformatics, 2023, bbad053. https://doi.org/10.1093/bib/bbad053 -
    • -
    -
    -
    -
    -

    -⬇️ Download Stats -

    -

    -
    -
    -

    -📃 References -

    -

    Please properly cite the following references when you are using any corresponding features. The references are also listed in the function documentation. Very thanks to the works, sigminer cannot be created without the giants.

    -
      -
    1. Mayakonda, Anand, et al. “Maftools: efficient and comprehensive analysis of somatic variants in cancer.” Genome research 28.11 (2018): 1747-1756.
    2. -
    3. Gaujoux, Renaud, and Cathal Seoighe. “A Flexible R Package for Nonnegative Matrix Factorization.”” BMC Bioinformatics 11, no. 1 (December 2010).
    4. -
    5. H. Wickham. ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York, 2016.
    6. -
    7. Kim, Jaegil, et al. “Somatic ERCC2 mutations are associated with a distinct genomic signature in urothelial tumors.” Nature genetics 48.6 (2016): 600.
    8. -
    9. Alexandrov, Ludmil B., et al. “Deciphering signatures of mutational processes operative in human cancer.” Cell reports 3.1 (2013): 246-259.
    10. -
    11. Degasperi, Andrea, et al. “A practical framework and online tool for mutational signature analyses show intertissue variation and driver dependencies.” Nature cancer 1.2 (2020): 249-263.
    12. -
    13. Alexandrov, Ludmil B., et al. “The repertoire of mutational signatures in human cancer.” Nature 578.7793 (2020): 94-101.
    14. -
    15. Macintyre, Geoff, et al. “Copy number signatures and mutational processes in ovarian carcinoma.” Nature genetics 50.9 (2018): 1262.
    16. -
    17. Tan, Vincent YF, and Cédric Févotte. “Automatic relevance determination in nonnegative matrix factorization with the/spl beta/-divergence.” IEEE Transactions on Pattern Analysis and Machine Intelligence 35.7 (2012): 1592-1605.
    18. -
    19. Bergstrom EN, Huang MN, Mahto U, Barnes M, Stratton MR, Rozen SG, Alexandrov LB: SigProfilerMatrixGenerator: a tool for visualizing and exploring patterns of small mutational events. BMC Genomics 2019, 20:685 https://bmcgenomics.biomedcentral.com/articles/10.1186/s12864-019-6041-2 -
    20. -
    -
    -
    -

    -📄 LICENSE -

    -

    The software is made available for non commercial research purposes only under the MIT. However, notwithstanding any provision of the MIT License, the software currently may not be used for commercial purposes without explicit written permission after contacting patents’ authors.

    -

    Related patents:

    -
      -
    • -CN202011516653.7 https://kms.shanghaitech.edu.cn/handle/2MSLDSTB/127042 -
    • -
    -

    MIT © 2019-Present Shixiang Wang, Xue-Song Liu

    -

    MIT © 2018 Anand Mayakonda

    -
    -

    Sigminer v1-v2 are supported by Cancer Biology Group @ShanghaiTech

    -
    -Alt
    Alt
    -
    -
    -
    -
    - - -
    - - - -
    - - - - - - - - diff --git a/docs/link.svg b/docs/link.svg deleted file mode 100644 index 88ad8276..00000000 --- a/docs/link.svg +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - diff --git a/docs/logo.png b/docs/logo.png deleted file mode 100644 index a44d4f12..00000000 Binary files a/docs/logo.png and /dev/null differ diff --git a/docs/news/index.html b/docs/news/index.html deleted file mode 100644 index c28c12a1..00000000 --- a/docs/news/index.html +++ /dev/null @@ -1,572 +0,0 @@ - -Changelog • sigminer - - -
    -
    - - - -
    -
    - - -
    - -
    • Supported human T2T genome and corresponding annotation data.
    • -
    • Updated COSMIC database to v3.4. SV and RNA-SBS signatures are included.
    • -
    -
    -get_sig_db("latest_RNA-SBS_GRCh37")
    -get_sig_db("latest_SV_GRCh38")
    -
    • Fixed a bug in generating matrix for variation categories with strand bias due to problematic counting. (#445)
    • -
    -
    - -
    • Updated pkg doc following the new CRAN feature (thanks to K from the CRAN team).
    • -
    • Added samps option to show_sig_exposure().
    • -

    Example:

    -
    -
    -load(system.file("extdata", "toy_mutational_signature.RData",
    -                 package = "sigminer", mustWork = TRUE
    -))
    -# Show signature exposure
    -p1 <- show_sig_exposure(sig2, rm_space = TRUE)
    -p1
    -
    -expo = sig_exposure(sig2)
    -show_sig_exposure(expo,
    -                  rm_space = TRUE,
    -                  samps = colnames(expo)[order(colSums(expo))])
    -
    -
    - -
    • Fixed the error in generating SBS matrix when only one sample input (#432).
    • -
    -
    - -
    • Removed package ‘copynumber’ from suggests filed.
    • -
    • Supported Ziyu Tao et al approach for copy number segment classification.
    • -
    • Supported ce11 genome in read_vcf().
    • -
    • Added read_maf_minimal() to support a minimal MAF-like data as input.
    • -
    -
    - -
    • Fixed the issue about the latest CN signatures from COSMIC have inconsistent labels with built-in CN signatures (#421).
    • -
    -
    - -
    -
    - -
    • Fixed the bug about plotting CN chromosome distribution (#420, thanks to @jrcodina96).
    • -
    -
    - -
    -
    - -
    -
    - -
    • Added a vignette to introduce the analysis of copy number signatures.
    • -
    • Updated CNS_TCGA.
    • -
    • Enhanced group_enrichment() with reference group support.
    • -

    Example:

    -
    -
    -set.seed(1234)
    -df <- dplyr::tibble(
    -  g1 = rep(LETTERS[1:3], c(50, 40, 10)),
    -  g2 = rep(c("AA", "VV", "XX"), c(50, 40, 10)),
    -  e1 = sample(c("P", "N"), 100, replace = TRUE),
    -  e2 = rnorm(100)
    -)
    -
    -x1 = group_enrichment(df, grp_vars = c("g1", "g2"), 
    -                      enrich_vars = c("e1", "e2"), 
    -                      ref_group = c("B", "VV"))
    -x1
    -
    -
    - -
    • Added option for reading ASCAT objects in parallel.
    • -
    -
    - -
    • Fixed error in extracting invalid regions (#396, thanks to @KirsieMin).
    • -
    -
    - -
    -
    - -
    -
    - -
    • Added sig_unify_extract() as an unified signature extractor.
    • -
    • Fixed error showing reference signature profile for CNS_TCGA database.
    • -
    -
    - -
    -
    - -
    • Implemented Cohen-Sharir method-like Aneuploidy Score.
    • -
    • Enhanced error handling in show_sig_feature_corrplot() (#376).
    • -
    • Fixed INDEL classification.
    • -
    • Fixed end position determination in read_vcf().
    • -
    • Updated INDEL adjustment.
    • -
    • Included TCGA copy number signatures from SigProfiler.
    • -
    • Updated docs.
    • -
    -
    - -
    -
    - -
    • Fixed output_sig() error in handling exposure plot with >9 signatures (#366).
    • -
    • Added limitsize = FALSE for ggsave() or ggsave2() for handling big figure.
    • -
    -
    - -
    • Supported mm9 genome build.
    • -
    • Removed FTP link as CRAN suggested (#359).
    • -
    • Updated README.
    • -
    -
    - -
    -

    BUG REPORTS

    -
    • Fixed the SigProfiler installation error due to Python version in conda environment.
    • -
    • Fixed classification bug due to repeated function name call_component.
    • -
    • Fixed the bug when read_vcf() with ## commented VCF files.
    • -
    -
    -

    ENHANCEMENTS

    -
    • Added support for latest COSMIC v3.2 as reference signatures. You can obtain them by
    • -
    -
    -for (i in c("latest_SBS_GRCh37", "latest_DBS_GRCh37", "latest_ID_GRCh37",
    -            "latest_SBS_GRCh38", "latest_DBS_GRCh38",
    -            "latest_SBS_mm9", "latest_DBS_mm9",
    -            "latest_SBS_mm10", "latest_DBS_mm10",
    -            "latest_SBS_rn6", "latest_DBS_rn6")) {
    -  message(i)
    -  get_sig_db(i)
    -}
    -
    -
    -

    NEW FUNCTIONS

    -
    -
    -

    DEPRECATED

    -
    • Dropped copy number “M”” method to avoid misguiding user to use/read wrong signature profile and keep code simple.
    • -
    -
    -
    - -
    -

    BUG REPORTS

    -
    -
    -

    ENHANCEMENTS

    -
    • Modified the default visualization of bp_show_survey().
    • -
    • Enhanced torch check.
    • -
    -
    -

    NEW FUNCTIONS

    -
    -
    -

    DEPRECATED

    -
    -
    -
    - -
    -

    BUG REPORTS

    -
    • Fixed the assign problem about match pair in bp_extract_signatures() with lpSolve package instead of using my problematic code.
    • -
    -
    -

    ENHANCEMENTS

    -
    • Supported mm10 in read_vcf().
    • -
    • Removed large data files and store them in Zenodo to reduce package size.
    • -
    • Added cores check.
    • -
    • Upgraded SP to v1.1.0 (need test).
    • -
    • Tried installing Torch before SP (need test).
    • -
    -
    -

    NEW FUNCTIONS

    -
    -
    -

    DEPRECATED

    -
    -
    -
    - -
    -

    BUG REPORTS

    -
    -
    -

    ENHANCEMENTS

    -
    • Subset signatures to plot is available by sig_names option.
    • -
    • sigminer is available in bioconda channel: https://anaconda.org/bioconda/r-sigminer/ -
    • -
    • Updated ms strategy in sig_auto_extract() by assigning each signature to its best matched reference signatures.
    • -
    • Added get_shannon_diversity_index() to get diversity index for signatures (#333).
    • -
    • Added new method “S” (from Steele et al. 2019) for tallying copy number data (#329).
    • -
    • Included new (RS) reference signatures (related to #331).
    • -
    • Updated the internal code for getting relative activity in get_sig_exposure().
    • -
    -
    -

    NEW FUNCTIONS

    -
    -
    -

    DEPRECATED

    -
    -
    -
    - -
    • Updated author list.
    • -
    -

    BUG REPORTS

    -
    -
    -

    ENHANCEMENTS

    -
    -
    -

    NEW FUNCTIONS

    -
    -
    -

    DEPRECATED

    -
    -
    -
    - -
    -

    BUG REPORTS

    -
    -
    -

    ENHANCEMENTS

    -
    • A new option cut_p_value is added to show_group_enrichment() to cut continous p values as binned regions.
    • -
    • A Python backend for sig_extract() is provided.
    • -
    • User now can directly use sig_extract() and sig_auto_extract() instead of loading NMF package firstly.
    • -
    • Added benchmark results for different extraction approaches in README.
    • -
    • The threshold for auto_reduce in sig_fit() is modified from 0.99 to 0.95 and similarity update threshold updated from >0 to >=0.01.
    • -
    • Removed pConstant option from sig_extract() and sig_estimate(). Now a auto-check function is created for avoiding the error from NMF package due to no contribution of a component in all samples.
    • -
    -
    -

    NEW FUNCTIONS

    -
    • -bp_show_survey2() to plot a simplified version for signature number survey (#330).
    • -
    • -read_xena_variants() to read variant data from UCSC Xena as a MAF object for signature analysis.
    • -
    • -get_sig_rec_similarity() for getting reconstructed profile similarity for Signature object (#293).
    • -
    • Added functions start with bp_ which are combined to provide a best practice for extracting signatures in cancer researches. See more details, run ?bp in your R console.
    • -
    -
    -

    DEPRECATED

    -
    -
    -
    - -
    -
    - -
    -
    - -
    • Fixed bugs when outputing only 1 signatures.
    • -
    • Fixed label inverse bug in add_labels(), thanks to TaoTao for reporting.
    • -
    -
    - -
    -
    - -
    • Added auto_reduce option in sig_fit* functions to improve signature fitting.
    • -
    • Return cosine similarity for sample profile in sig_fit().
    • -
    • Set default strategy in sig_auto_extract() to ‘optimal’.
    • -
    • Supported search reference signature index in get_sig_cancer_type_index().
    • -
    • Outputed legacy COSMIC similarity for SBS signatures.
    • -
    • Added new option in sigprofiler_extract() to reduce failure in when refit is enabled.
    • -
    • Outputed both relative and absolute signature exposure in output_sig().
    • -
    • Updated background color in show_group_distribution().
    • -
    • Modified the default theme for signature profile in COSMIC style.
    • -
    • Updated the copy number classification method.
    • -
    -
    - -
    • Handled null catalogue.
    • -
    • Supported ordering the signatures for results from SigProfiler.
    • -
    • Supported importing refit results from SigProfiler.
    • -
    • Set optimize option in sig_extract() and sig_auto_extract().
    • -
    -
    - -
    -
    - -
    • Supported BSgenome.Hsapiens.1000genomes.hs37d5 in sig_tally().
    • -
    • Remove changing MT to M in mutation data.
    • -
    • Fixed bug in extract numeric signature names and signature orderings in show_sig_exposure().
    • -
    • Added letter_colors as an unexported discrete palette.
    • -
    -
    - -
    -
    - -
    • Added option to control the SigProfilerExtractor to avoid issue in docker image build.
    • -
    -
    - -
    • Some updates.
    • -
    • Compatible with SigProfiler 1.0.15
    • -
    -
    - -
    • Tried to speed up joining adjacent segments in read_copynumber(), got 200% improvement.
    • -
    -
    - -
    -
    - -
    • Fixed bug in OsCN feature calculation.
    • -
    • Removed useless options in read_maf().
    • -
    • Modify method ‘LS’ in sig_fit() to ‘NNLS’ and implement it with pracma package (#216).
    • -
    • Made use_all option in read_copynumber() working correctly.
    • -
    • Fixed potential problem raised by unordered copy number segments (#217).
    • -
    • Fixed a typo, correct MRSE to RMSE.
    • -
    • Added feature in show_sig_bootstrap_*() for plotting aggregated values.
    • -
    • Fixed bug when use get_groups() for clustering.
    • -
    • Fixed bug about using reference components from NatGen 2018 paper.
    • -
    • Added option highlight_size for show_sig_bootstrap_*().
    • -
    • Fixed bug about signature profile plotting for method ‘M’.
    • -
    -
    - -
    • Added “scatter” in sig_fit() function to better visualize a few samples.
    • -
    • Added “highlight” option.
    • -
    • -lsei package was removed from CRAN, here I reset default method to ‘QP’ and tried best to keep the LS usage in sigminer (#189).
    • -
    • Made consistent copy number labels in show_sig_profile() and added input checking for this function.
    • -
    • Fixed unconsistent bootstrap when use furrr, solution is from https://github.com/DavisVaughan/furrr/issues/107.
    • -
    • Properly handled null-count sample in sig_fit() for methods QP and SA.
    • -
    • Supported boxplot or violin in show_sig_fit() and show_sig_bootstrap_* functions.
    • -
    • Added job mode for sig_fit_bootstrap_batch for more useful in practice.
    • -
    • Added show_groups() to show the signature contribution in each group from get_groups().
    • -
    • Expanded clustering in get_groups() to result of sig_fit().
    • -
    • Properly handled null-count samples in sig_fit_bootstrap_batch().
    • -
    • Added strand bias labeling for INDEL.
    • -
    • Added COSMIC TSB signatures.
    • -
    -
    - -
    • Exported APOBEC result when the mode is ‘ALL’ in sig_tally().
    • -
    • Added batch bootstrap analysis feature (#158).
    • -
    • Supported all common signature plotting.
    • -
    • Added strand feature to signature profile.
    • -
    -
    - -
    • Added profile plot for DBS and INDEL.
    • -
    • Fixed error for signature extraction in mode ‘DBS’ or ‘ID’.
    • -
    • Fixed method ‘M’ for CN tally cannot work when cores > 1 (#161).
    • -
    -
    - -
    • Added multiple methods for sig_fit().
    • -
    • Added feature sig_fit_bootstrap() for bootstrap results.
    • -
    • Added multiple classification method for SBS signature.
    • -
    • Added strand bias enrichment analysis for SBS signature.
    • -
    • Moved multiple packages from field Imports to Suggests.
    • -
    • Added feature report_bootstrap_p_value() to report p values.
    • -
    • Added common DBS and ID signature.
    • -
    • Updated citation.
    • -
    -
    - -
    • Added merged transcript info for hg19 and hg38 build, this is availabe by data().
    • -
    • Added gene info for hg19 and hg38 build to extdata directory.
    • -
    -
    - -
    • Removed fuzzyjoin package from dependency.
    • -
    • Moved ggalluvial package to field suggsets.
    • -
    -
    - -

    All users, this is a break-through version of sigminer, most of functions have been modified, more features are implemented. Please read the reference list to see the function groups and their functionalities.

    -

    Please read the vignette for usage.

    -

    I Hope it helps your research work and makes a new contribution to the scientific community.

    -
    -
    - - - -
    - - -
    - - - - - - - - diff --git a/docs/pkgdown.css b/docs/pkgdown.css deleted file mode 100644 index 80ea5b83..00000000 --- a/docs/pkgdown.css +++ /dev/null @@ -1,384 +0,0 @@ -/* Sticky footer */ - -/** - * Basic idea: https://philipwalton.github.io/solved-by-flexbox/demos/sticky-footer/ - * Details: https://github.com/philipwalton/solved-by-flexbox/blob/master/assets/css/components/site.css - * - * .Site -> body > .container - * .Site-content -> body > .container .row - * .footer -> footer - * - * Key idea seems to be to ensure that .container and __all its parents__ - * have height set to 100% - * - */ - -html, body { - height: 100%; -} - -body { - position: relative; -} - -body > .container { - display: flex; - height: 100%; - flex-direction: column; -} - -body > .container .row { - flex: 1 0 auto; -} - -footer { - margin-top: 45px; - padding: 35px 0 36px; - border-top: 1px solid #e5e5e5; - color: #666; - display: flex; - flex-shrink: 0; -} -footer p { - margin-bottom: 0; -} -footer div { - flex: 1; -} -footer .pkgdown { - text-align: right; -} -footer p { - margin-bottom: 0; -} - -img.icon { - float: right; -} - -/* Ensure in-page images don't run outside their container */ -.contents img { - max-width: 100%; - height: auto; -} - -/* Fix bug in bootstrap (only seen in firefox) */ -summary { - display: list-item; -} - -/* Typographic tweaking ---------------------------------*/ - -.contents .page-header { - margin-top: calc(-60px + 1em); -} - -dd { - margin-left: 3em; -} - -/* Section anchors ---------------------------------*/ - -a.anchor { - display: none; - margin-left: 5px; - width: 20px; - height: 20px; - - background-image: url(./link.svg); - background-repeat: no-repeat; - background-size: 20px 20px; - background-position: center center; -} - -h1:hover .anchor, -h2:hover .anchor, -h3:hover .anchor, -h4:hover .anchor, -h5:hover .anchor, -h6:hover .anchor { - display: inline-block; -} - -/* Fixes for fixed navbar --------------------------*/ - -.contents h1, .contents h2, .contents h3, .contents h4 { - padding-top: 60px; - margin-top: -40px; -} - -/* Navbar submenu --------------------------*/ - -.dropdown-submenu { - position: relative; -} - -.dropdown-submenu>.dropdown-menu { - top: 0; - left: 100%; - margin-top: -6px; - margin-left: -1px; - border-radius: 0 6px 6px 6px; -} - -.dropdown-submenu:hover>.dropdown-menu { - display: block; -} - -.dropdown-submenu>a:after { - display: block; - content: " "; - float: right; - width: 0; - height: 0; - border-color: transparent; - border-style: solid; - border-width: 5px 0 5px 5px; - border-left-color: #cccccc; - margin-top: 5px; - margin-right: -10px; -} - -.dropdown-submenu:hover>a:after { - border-left-color: #ffffff; -} - -.dropdown-submenu.pull-left { - float: none; -} - -.dropdown-submenu.pull-left>.dropdown-menu { - left: -100%; - margin-left: 10px; - border-radius: 6px 0 6px 6px; -} - -/* Sidebar --------------------------*/ - -#pkgdown-sidebar { - margin-top: 30px; - position: -webkit-sticky; - position: sticky; - top: 70px; -} - -#pkgdown-sidebar h2 { - font-size: 1.5em; - margin-top: 1em; -} - -#pkgdown-sidebar h2:first-child { - margin-top: 0; -} - -#pkgdown-sidebar .list-unstyled li { - margin-bottom: 0.5em; -} - -/* bootstrap-toc tweaks ------------------------------------------------------*/ - -/* All levels of nav */ - -nav[data-toggle='toc'] .nav > li > a { - padding: 4px 20px 4px 6px; - font-size: 1.5rem; - font-weight: 400; - color: inherit; -} - -nav[data-toggle='toc'] .nav > li > a:hover, -nav[data-toggle='toc'] .nav > li > a:focus { - padding-left: 5px; - color: inherit; - border-left: 1px solid #878787; -} - -nav[data-toggle='toc'] .nav > .active > a, -nav[data-toggle='toc'] .nav > .active:hover > a, -nav[data-toggle='toc'] .nav > .active:focus > a { - padding-left: 5px; - font-size: 1.5rem; - font-weight: 400; - color: inherit; - border-left: 2px solid #878787; -} - -/* Nav: second level (shown on .active) */ - -nav[data-toggle='toc'] .nav .nav { - display: none; /* Hide by default, but at >768px, show it */ - padding-bottom: 10px; -} - -nav[data-toggle='toc'] .nav .nav > li > a { - padding-left: 16px; - font-size: 1.35rem; -} - -nav[data-toggle='toc'] .nav .nav > li > a:hover, -nav[data-toggle='toc'] .nav .nav > li > a:focus { - padding-left: 15px; -} - -nav[data-toggle='toc'] .nav .nav > .active > a, -nav[data-toggle='toc'] .nav .nav > .active:hover > a, -nav[data-toggle='toc'] .nav .nav > .active:focus > a { - padding-left: 15px; - font-weight: 500; - font-size: 1.35rem; -} - -/* orcid ------------------------------------------------------------------- */ - -.orcid { - font-size: 16px; - color: #A6CE39; - /* margins are required by official ORCID trademark and display guidelines */ - margin-left:4px; - margin-right:4px; - vertical-align: middle; -} - -/* Reference index & topics ----------------------------------------------- */ - -.ref-index th {font-weight: normal;} - -.ref-index td {vertical-align: top; min-width: 100px} -.ref-index .icon {width: 40px;} -.ref-index .alias {width: 40%;} -.ref-index-icons .alias {width: calc(40% - 40px);} -.ref-index .title {width: 60%;} - -.ref-arguments th {text-align: right; padding-right: 10px;} -.ref-arguments th, .ref-arguments td {vertical-align: top; min-width: 100px} -.ref-arguments .name {width: 20%;} -.ref-arguments .desc {width: 80%;} - -/* Nice scrolling for wide elements --------------------------------------- */ - -table { - display: block; - overflow: auto; -} - -/* Syntax highlighting ---------------------------------------------------- */ - -pre, code, pre code { - background-color: #f8f8f8; - color: #333; -} -pre, pre code { - white-space: pre-wrap; - word-break: break-all; - overflow-wrap: break-word; -} - -pre { - border: 1px solid #eee; -} - -pre .img, pre .r-plt { - margin: 5px 0; -} - -pre .img img, pre .r-plt img { - background-color: #fff; -} - -code a, pre a { - color: #375f84; -} - -a.sourceLine:hover { - text-decoration: none; -} - -.fl {color: #1514b5;} -.fu {color: #000000;} /* function */ -.ch,.st {color: #036a07;} /* string */ -.kw {color: #264D66;} /* keyword */ -.co {color: #888888;} /* comment */ - -.error {font-weight: bolder;} -.warning {font-weight: bolder;} - -/* Clipboard --------------------------*/ - -.hasCopyButton { - position: relative; -} - -.btn-copy-ex { - position: absolute; - right: 0; - top: 0; - visibility: hidden; -} - -.hasCopyButton:hover button.btn-copy-ex { - visibility: visible; -} - -/* headroom.js ------------------------ */ - -.headroom { - will-change: transform; - transition: transform 200ms linear; -} -.headroom--pinned { - transform: translateY(0%); -} -.headroom--unpinned { - transform: translateY(-100%); -} - -/* mark.js ----------------------------*/ - -mark { - background-color: rgba(255, 255, 51, 0.5); - border-bottom: 2px solid rgba(255, 153, 51, 0.3); - padding: 1px; -} - -/* vertical spacing after htmlwidgets */ -.html-widget { - margin-bottom: 10px; -} - -/* fontawesome ------------------------ */ - -.fab { - font-family: "Font Awesome 5 Brands" !important; -} - -/* don't display links in code chunks when printing */ -/* source: https://stackoverflow.com/a/10781533 */ -@media print { - code a:link:after, code a:visited:after { - content: ""; - } -} - -/* Section anchors --------------------------------- - Added in pandoc 2.11: https://github.com/jgm/pandoc-templates/commit/9904bf71 -*/ - -div.csl-bib-body { } -div.csl-entry { - clear: both; -} -.hanging-indent div.csl-entry { - margin-left:2em; - text-indent:-2em; -} -div.csl-left-margin { - min-width:2em; - float:left; -} -div.csl-right-inline { - margin-left:2em; - padding-left:1em; -} -div.csl-indent { - margin-left: 2em; -} diff --git a/docs/pkgdown.js b/docs/pkgdown.js deleted file mode 100644 index 6f0eee40..00000000 --- a/docs/pkgdown.js +++ /dev/null @@ -1,108 +0,0 @@ -/* http://gregfranko.com/blog/jquery-best-practices/ */ -(function($) { - $(function() { - - $('.navbar-fixed-top').headroom(); - - $('body').css('padding-top', $('.navbar').height() + 10); - $(window).resize(function(){ - $('body').css('padding-top', $('.navbar').height() + 10); - }); - - $('[data-toggle="tooltip"]').tooltip(); - - var cur_path = paths(location.pathname); - var links = $("#navbar ul li a"); - var max_length = -1; - var pos = -1; - for (var i = 0; i < links.length; i++) { - if (links[i].getAttribute("href") === "#") - continue; - // Ignore external links - if (links[i].host !== location.host) - continue; - - var nav_path = paths(links[i].pathname); - - var length = prefix_length(nav_path, cur_path); - if (length > max_length) { - max_length = length; - pos = i; - } - } - - // Add class to parent
  • , and enclosing
  • if in dropdown - if (pos >= 0) { - var menu_anchor = $(links[pos]); - menu_anchor.parent().addClass("active"); - menu_anchor.closest("li.dropdown").addClass("active"); - } - }); - - function paths(pathname) { - var pieces = pathname.split("/"); - pieces.shift(); // always starts with / - - var end = pieces[pieces.length - 1]; - if (end === "index.html" || end === "") - pieces.pop(); - return(pieces); - } - - // Returns -1 if not found - function prefix_length(needle, haystack) { - if (needle.length > haystack.length) - return(-1); - - // Special case for length-0 haystack, since for loop won't run - if (haystack.length === 0) { - return(needle.length === 0 ? 0 : -1); - } - - for (var i = 0; i < haystack.length; i++) { - if (needle[i] != haystack[i]) - return(i); - } - - return(haystack.length); - } - - /* Clipboard --------------------------*/ - - function changeTooltipMessage(element, msg) { - var tooltipOriginalTitle=element.getAttribute('data-original-title'); - element.setAttribute('data-original-title', msg); - $(element).tooltip('show'); - element.setAttribute('data-original-title', tooltipOriginalTitle); - } - - if(ClipboardJS.isSupported()) { - $(document).ready(function() { - var copyButton = ""; - - $("div.sourceCode").addClass("hasCopyButton"); - - // Insert copy buttons: - $(copyButton).prependTo(".hasCopyButton"); - - // Initialize tooltips: - $('.btn-copy-ex').tooltip({container: 'body'}); - - // Initialize clipboard: - var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { - text: function(trigger) { - return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, ""); - } - }); - - clipboardBtnCopies.on('success', function(e) { - changeTooltipMessage(e.trigger, 'Copied!'); - e.clearSelection(); - }); - - clipboardBtnCopies.on('error', function() { - changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); - }); - }); - } -})(window.jQuery || window.$) diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml deleted file mode 100644 index 284cb590..00000000 --- a/docs/pkgdown.yml +++ /dev/null @@ -1,11 +0,0 @@ -pandoc: 3.1.1 -pkgdown: 2.0.7 -pkgdown_sha: ~ -articles: - cnsignature: cnsignature.html - sigminer: sigminer.html -last_built: 2023-12-12T09:47Z -urls: - reference: https://shixiangwang.github.io/sigminer/reference - article: https://shixiangwang.github.io/sigminer/articles - diff --git a/docs/reference/CN.features.html b/docs/reference/CN.features.html deleted file mode 100644 index e1a44b91..00000000 --- a/docs/reference/CN.features.html +++ /dev/null @@ -1,105 +0,0 @@ - -Classification Table of Copy Number Features Devised by Wang et al. for Method 'W' — CN.features • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Classification Table of Copy Number Features Devised by Wang et al. for Method 'W'

    -
    - - -
    -

    Format

    -

    A data.table with "sigminer.features" class name

    -
    -
    -

    Source

    -

    Generate from code under data_raw/

    -
    - -
    -

    Examples

    -
    data(CN.features)
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/CopyNumber-class.html b/docs/reference/CopyNumber-class.html deleted file mode 100644 index 7ac0342e..00000000 --- a/docs/reference/CopyNumber-class.html +++ /dev/null @@ -1,124 +0,0 @@ - -Class CopyNumber — CopyNumber-class • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    S4 class for storing summarized absolute copy number profile.

    -
    - - -
    -

    Slots

    - - -
    data
    -

    data.table of absolute copy number calling.

    - - -
    summary.per.sample
    -

    data.table of copy number variation summary per sample.

    - - -
    genome_build
    -

    genome build version, should be one of 'hg19' or 'hg38'.

    - - -
    genome_measure
    -

    Set 'called' will use autosomo called segments size to compute total size -for CNA burden calculation, this option is useful for WES and target sequencing. -Set 'wg' will autosome size from genome build, this option is useful for WGS, SNP etc..

    - - -
    annotation
    -

    data.table of annotation for copy number segments.

    - - -
    dropoff.segs
    -

    data.table of copy number segments dropped from raw input.

    - - -
    - -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/MAF-class.html b/docs/reference/MAF-class.html deleted file mode 100644 index a3b1d466..00000000 --- a/docs/reference/MAF-class.html +++ /dev/null @@ -1,134 +0,0 @@ - -Class MAF — MAF-class • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    S4 class for storing summarized MAF. It is from maftools package.

    -
    - - -
    -

    Details

    -

    More about MAF object please see maftools.

    -
    -
    -

    Slots

    - - -
    data
    -

    data.table of MAF file containing all non-synonymous variants.

    - - -
    variants.per.sample
    -

    table containing variants per sample

    - - -
    variant.type.summary
    -

    table containing variant types per sample

    - - -
    variant.classification.summary
    -

    table containing variant classification per sample

    - - -
    gene.summary
    -

    table containing variant classification per gene

    - - -
    summary
    -

    table with basic MAF summary stats

    - - -
    maf.silent
    -

    subset of main MAF containing only silent variants

    - - -
    clinical.data
    -

    clinical data associated with each sample/Tumor_Sample_Barcode in MAF.

    - - -
    - -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/add_h_arrow.html b/docs/reference/add_h_arrow.html deleted file mode 100644 index 0370a38c..00000000 --- a/docs/reference/add_h_arrow.html +++ /dev/null @@ -1,166 +0,0 @@ - -Add Horizontal Arrow with Text Label to a ggplot — add_h_arrow • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Add Horizontal Arrow with Text Label to a ggplot

    -
    - -
    -
    add_h_arrow(
    -  p,
    -  x,
    -  y,
    -  label = "optimal number",
    -  space = 0.01,
    -  vjust = 0.3,
    -  seg_len = 0.1,
    -  arrow_len = unit(2, "mm"),
    -  arrow_type = c("closed", "open"),
    -  font_size = 5,
    -  font_family = c("serif", "sans", "mono"),
    -  font_face = c("plain", "bold", "italic")
    -)
    -
    - -
    -

    Arguments

    -
    p
    -

    a ggplot.

    - - -
    x
    -

    position at x axis.

    - - -
    y
    -

    position at y axis.

    - - -
    label
    -

    text label.

    - - -
    space
    -

    a small space between arrow and text.

    - - -
    vjust
    -

    vertical adjustment, set to 0 to align with the bottom, -0.5 for the middle, and 1 (the default) for the top.

    - - -
    seg_len
    -

    length of the arrow segment.

    - - -
    arrow_len
    -

    length of the arrow.

    - - -
    arrow_type
    -

    type of the arrow.

    - - -
    font_size
    -

    font size.

    - - -
    font_family
    -

    font family.

    - - -
    font_face
    -

    font face.

    - -
    -
    -

    Value

    - - -

    a ggplot object.

    -
    - -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/add_labels.html b/docs/reference/add_labels.html deleted file mode 100644 index ea808a68..00000000 --- a/docs/reference/add_labels.html +++ /dev/null @@ -1,195 +0,0 @@ - -Add Text Labels to a ggplot — add_labels • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Add text labels to a ggplot object, such as the result -from show_sig_profile.

    -
    - -
    -
    add_labels(
    -  p,
    -  x,
    -  y,
    -  y_end = NULL,
    -  n_label = NULL,
    -  labels = NULL,
    -  revert_order = FALSE,
    -  font_size = 5,
    -  font_family = "serif",
    -  font_face = c("plain", "bold", "italic"),
    -  ...
    -)
    -
    - -
    -

    Arguments

    -
    p
    -

    a ggplot.

    - - -
    x
    -

    position at x axis.

    - - -
    y
    -

    position at y axis.

    - - -
    y_end
    -

    end position of y axis when n_label is set.

    - - -
    n_label
    -

    the number of label, when this is set, -the position of labels at y axis is auto-generated -according to y and y_end.

    - - -
    labels
    -

    text labels or a similarity object from get_sig_similarity.

    - - -
    revert_order
    -

    if TRUE, revert label order.

    - - -
    font_size
    -

    font size.

    - - -
    font_family
    -

    font family.

    - - -
    font_face
    -

    font face.

    - - -
    ...
    -

    other parameters passing to ggplot2::annotate.

    - -
    -
    -

    Value

    - - -

    a ggplot object.

    -
    - -
    -

    Examples

    -
    # Load mutational signature
    -load(system.file("extdata", "toy_mutational_signature.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -# Show signature profile
    -p <- show_sig_profile(sig2, mode = "SBS")
    -
    -# Method 1
    -p1 <- add_labels(p,
    -  x = 0.75, y = 0.3, y_end = 0.9, n_label = 3,
    -  labels = paste0("text", 1:3)
    -)
    -p1
    -
    -# Method 2
    -p2 <- add_labels(p,
    -  x = c(0.15, 0.6, 0.75), y = c(0.3, 0.6, 0.9),
    -  labels = paste0("text", 1:3)
    -)
    -p2
    -
    -# Method 3
    -sim <- get_sig_similarity(sig2)
    -p3 <- add_labels(p,
    -  x = c(0.15, 0.6, 0.75), y = c(0.25, 0.55, 0.8),
    -  labels = sim, font_size = 2
    -)
    -p3
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/bp.html b/docs/reference/bp.html deleted file mode 100644 index 834c8245..00000000 --- a/docs/reference/bp.html +++ /dev/null @@ -1,592 +0,0 @@ - -A Best Practice for Signature Extraction and Exposure (Activity) Attribution — bp • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    These functions are combined to provide a best practice for optimally -identifying mutational signatures and attributing their activities (exposures) -in tumor samples. They are listed in order to use.

    • bp_extract_signatures() for extracting signatures.

    • -
    • bp_show_survey() for showing measures change under different -signature numbers to help user select optimal signature number. -At default, an aggregated score (named score) is generated to -suggest the best solution.

    • -
    • bp_show_survey2() for showing simplified signature number survey like -show_sig_number_survey().

    • -
    • bp_get_sig_obj() for get a (list of) Signature object which is common -used in sigminer for analysis and visualization.

    • -
    • bp_attribute_activity() for optimizing signature activities (exposures). -NOTE: the activities from extraction step may be better! -You can also use sig_extract to get optimal NMF result from multiple NMF runs. -Besides, you can use sig_fit to quantify exposures based on signatures extracted -from bp_extract_signatures().

    • -
    • bp_extract_signatures_iter() for extracting signature in a iteration way.

    • -
    • bp_cluster_iter_list() for clustering (hclust with average linkage) -iterated signatures to help collapse -multiple signatures into one. The result cluster can be visualized by -plot() or factoextra::fviz_dend().

    • -
    • bp_get_clustered_sigs() for getting clustered (grouped) mean signatures from signature clusters.

    • -
    • Extra: bp_get_stats() for obtaining stats for signatures and samples of a solution. -These stats are aggregated (averaged) as the stats for a solution -(specific signature number).

    • -
    • Extra: bp_get_rank_score() for obtaining rank score for all signature numbers.

    • -
    - -
    -
    bp_extract_signatures(
    -  nmf_matrix,
    -  range = 2:5,
    -  n_bootstrap = 20L,
    -  n_nmf_run = 50,
    -  RTOL = 0.001,
    -  min_contribution = 0,
    -  cores = min(4L, future::availableCores()),
    -  cores_solution = min(cores, length(range)),
    -  seed = 123456L,
    -  handle_hyper_mutation = TRUE,
    -  report_integer_exposure = FALSE,
    -  only_core_stats = nrow(nmf_matrix) > 100,
    -  cache_dir = file.path(tempdir(), "sigminer_bp"),
    -  keep_cache = FALSE,
    -  pynmf = FALSE,
    -  use_conda = TRUE,
    -  py_path = "/Users/wsx/anaconda3/bin/python"
    -)
    -
    -bp_extract_signatures_iter(
    -  nmf_matrix,
    -  range = 2:5,
    -  sim_threshold = 0.95,
    -  max_iter = 10L,
    -  n_bootstrap = 20L,
    -  n_nmf_run = 50,
    -  RTOL = 0.001,
    -  min_contribution = 0,
    -  cores = min(4L, future::availableCores()),
    -  cores_solution = min(cores, length(range)),
    -  seed = 123456L,
    -  handle_hyper_mutation = TRUE,
    -  report_integer_exposure = FALSE,
    -  only_core_stats = nrow(nmf_matrix) > 100,
    -  cache_dir = file.path(tempdir(), "sigminer_bp"),
    -  keep_cache = FALSE,
    -  pynmf = FALSE,
    -  use_conda = FALSE,
    -  py_path = "/Users/wsx/anaconda3/bin/python"
    -)
    -
    -bp_cluster_iter_list(x, k = NULL, include_final_iteration = TRUE)
    -
    -bp_get_clustered_sigs(SigClusters, cluster_label)
    -
    -bp_get_sig_obj(obj, signum = NULL)
    -
    -bp_get_stats(obj)
    -
    -bp_get_rank_score(obj)
    -
    -bp_show_survey2(
    -  obj,
    -  x = "signature_number",
    -  left_y = "silhouette",
    -  right_y = "L2_error",
    -  left_name = left_y,
    -  right_name = right_y,
    -  left_color = "black",
    -  right_color = "red",
    -  left_shape = 16,
    -  right_shape = 18,
    -  shape_size = 4,
    -  highlight = NULL
    -)
    -
    -bp_show_survey(
    -  obj,
    -  add_score = FALSE,
    -  scales = c("free_y", "free"),
    -  fixed_ratio = TRUE
    -)
    -
    -bp_attribute_activity(
    -  input,
    -  sample_class = NULL,
    -  nmf_matrix = NULL,
    -  method = c("bt", "stepwise"),
    -  bt_use_prop = FALSE,
    -  return_class = c("matrix", "data.table"),
    -  use_parallel = FALSE,
    -  cache_dir = file.path(tempdir(), "sigminer_attribute_activity"),
    -  keep_cache = FALSE
    -)
    -
    - -
    -

    Arguments

    -
    nmf_matrix
    -

    a matrix used for NMF decomposition with rows indicate samples and columns indicate components.

    - - -
    range
    -

    a numeric vector containing the ranks of factorization to try. Note that duplicates are removed -and values are sorted in increasing order. The results are notably returned in this order.

    - - -
    n_bootstrap
    -

    number of bootstrapped (resampling) catalogs used. -When it is 0, the original (input) mutation catalog is used for NMF decomposition, -this is not recommended, just for testing, user should not set it to 0.

    - - -
    n_nmf_run
    -

    number of NMF runs for each bootstrapped or original catalog. -At default, in total n_bootstrap x n_nmf_run (i.e. 1000) NMF runs are used -for the task.

    - - -
    RTOL
    -

    a threshold proposed by Nature Cancer paper to control how to -filter solutions of NMF. Default is 0.1% (from reference #2), -only NMF solutions with KLD (KL deviance) <= 100.1% minimal KLD are kept.

    - - -
    min_contribution
    -

    a component contribution threshold to filer out small -contributed components.

    - - -
    cores
    -

    number of cpu cores to run NMF.

    - - -
    cores_solution
    -

    cores for processing solutions, default is equal to argument cores.

    - - -
    seed
    -

    a random seed to make reproducible result.

    - - -
    handle_hyper_mutation
    -

    default is TRUE, handle hyper-mutant samples.

    - - -
    report_integer_exposure
    -

    if TRUE, report integer signature -exposure by bootstrapping technique.

    - - -
    only_core_stats
    -

    if TRUE, only calculate the core stats for signatures and samples.

    - - -
    cache_dir
    -

    a directory for keep temp result files.

    - - -
    keep_cache
    -

    if TRUE, keep cache results.

    - - -
    pynmf
    -

    if TRUE, use Python NMF driver Nimfa. -The seed currently is not used by this implementation, so the only way to reproduce -your result is setting keep_cache = TRUE.

    - - -
    use_conda
    -

    if TRUE, create an independent conda environment to run NMF.

    - - -
    py_path
    -

    path to Python executable file, e.g. '/Users/wsx/anaconda3/bin/python'. In my -test, it is more stable than use_conda=TRUE. You can install the Nimfa package by yourself -or set use_conda to TRUE to install required Python environment, and then set this option.

    - - -
    sim_threshold
    -

    a similarity threshold for selecting samples to auto-rerun -the extraction procedure (i.e. bp_extract_signatures()), default is 0.95.

    - - -
    max_iter
    -

    the maximum iteration size, default is 10, i.e., at most run -the extraction procedure 10 times.

    - - -
    x
    -

    result from bp_extract_signatures_iter() or a list of -Signature objects.

    - - -
    k
    -

    an integer sequence specifying the cluster number to get silhouette.

    - - -
    include_final_iteration
    -

    if FALSE, exclude final iteration result -from clustering for input from bp_extract_signatures_iter(), not applied -if input is a list of Signature objects.

    - - -
    SigClusters
    -

    result from bp_cluster_iter_list().

    - - -
    cluster_label
    -

    cluster labels for a specified cluster number, obtain it -from SigClusters$sil_df.

    - - -
    obj
    -

    a ExtractionResult object from bp_extract_signatures().

    - - -
    signum
    -

    a integer vector to extract the corresponding Signature object(s). -If it is NULL (default), all will be returned.

    - - -
    left_y
    -

    column name for left y axis.

    - - -
    right_y
    -

    column name for right y axis.

    - - -
    left_name
    -

    label name for left y axis.

    - - -
    right_name
    -

    label name for right y axis.

    - - -
    left_color
    -

    color for left axis.

    - - -
    right_color
    -

    color for right axis.

    - - -
    left_shape, right_shape, shape_size
    -

    shape setting.

    - - -
    highlight
    -

    a integer to highlight a x.

    - - -
    add_score
    -

    if FALSE, don't show score and label optimal points by -rank score.

    - - -
    scales
    -

    one of "free_y" (default) and "free" to control the scales -of plot facet.

    - - -
    fixed_ratio
    -

    if TRUE (default), make the x/y axis ratio fixed.

    - - -
    input
    -

    result from bp_extract_signatures() or a Signature object.

    - - -
    sample_class
    -

    a named string vector whose names are sample names -and values are class labels (i.e. cancer subtype). If it is NULL (the default), -treat all samples as one group.

    - - -
    method
    -

    one of 'bt' (use bootstrap exposure median, from reference #2, -the most recommended way in my personal view) or stepwise' -(stepwise reduce and update signatures then do signature fitting -with last signature sets, from reference #2, the result tends to assign -the contribution of removed signatures to the remaining signatures, -maybe I misunderstand the paper method? PAY ATTENTION).

    - - -
    bt_use_prop
    -

    this parameter is only used for bt method to reset -low contributing signature activity (relative activity <0.01). If TRUE, -use empirical P value calculation way (i.e. proportion, used by reference #2), -otherwise a t.test is applied.

    - - -
    return_class
    -

    string, 'matrix' or 'data.table'.

    - - -
    use_parallel
    -

    if TRUE, use parallel computation based on furrr package. -It can also be an integer for specifying cores.

    - -
    -
    -

    Value

    - - -

    It depends on the called function.

    -
    -
    -

    Details

    -

    The signature extraction approach is adopted from reference #1, #2, and -the whole best practice is adopted from the pipeline used by reference #3. -I implement the whole procedure with R code based on the method description -of papers. The code is well organized, tested and documented so user will -find it pretty simple and useful. Besides, the structure of the results is -very clear to see and also visualize like other approaches provided by sigminer.

    -
    -
    -

    Measure Explanation in Survey Plot

    - - -

    The survey plot provides a pretty good way to facilitate the signature number -selection. A score measure is calculated as the weighted mean of selected -measures and visualized as the first sub-plot. The optimal number is highlighted -with red color dot and the best values for each measures are also -highlighted with orange color dots. The detail of 6 measures shown in plot are -explained as below.

    • score - an aggregated score based on rank scores from selected measures below. -The higher, the better. When two signature numbers have the same score, -the larger signature number is preferred (this is a rare situation, you -have to double check other measures).

    • -
    • silhouette - the average silhouette width for signatures, also named as ASW in reference #2. -The signature number with silhouette decreases sharply is preferred.

    • -
    • distance - the average sample reconstructed cosine distance, the lower value is better.

    • -
    • error - the average sample reconstructed error calculated with L2 formula -(i.e. L2 error). This lower value is better. This measure represents a -similar concept like distance above, they are all used to quantify how well -sample mutation profiles can be reconstructed from signatures, but distance -cares the whole mutation profile similarity while error here cares value difference.

    • -
    • pos cor - the average positive signature exposure correlation coefficient. -The lower value is better. This measure is constructed based on my understanding -about signatures: mutational signatures are typically treated as independent -recurrent patterns, so their activities are less correlated.

    • -
    • similarity - the average similarity within in a signature cluster. -Like silhouette, the point decreases sharply is preferred. -In the practice, results from multiple NMF runs are clustered -with "clustering with match" algorithm proposed by reference #2. This value -indicates if the signature profiles extracted from different NMF runs are similar.

    • -
    -
    -

    References

    -

    Alexandrov, Ludmil B., et al. "Deciphering signatures of mutational processes operative in human cancer." Cell reports 3.1 (2013): 246-259.

    -

    Degasperi, Andrea, et al. "A practical framework and online tool for mutational signature analyses show intertissue variation and driver dependencies." Nature cancer 1.2 (2020): 249-263.

    -

    Alexandrov, Ludmil B., et al. “The repertoire of mutational signatures in human cancer.” Nature 578.7793 (2020): 94-101.

    -
    -
    -

    See also

    - -
    -
    -

    Author

    -

    Shixiang Wang w_shixiang@163.com

    -
    - -
    -

    Examples

    -
    data("simulated_catalogs")
    -# \donttest{
    -# Here I reduce the values for n_bootstrap and n_nmf_run
    -# for reducing the run time.
    -# In practice, you should keep default or increase the values
    -# for better estimation.
    -#
    -# The input data here is simulated from 10 mutational signatures
    -
    -# e1 <- bp_extract_signatures(
    -#   t(simulated_catalogs$set1),
    -#   range = 8:12,
    -#   n_bootstrap = 5,
    -#   n_nmf_run = 10
    -# )
    -#
    -# To avoid computation in examples,
    -# Here just load the result
    -# (e1$signature and e1$exposure set to NA to reduce package size)
    -load(system.file("extdata", "e1.RData", package = "sigminer"))
    -
    -
    -# See the survey for different signature numbers
    -# The suggested solution is marked as red dot
    -# with highest integrated score.
    -p1 <- bp_show_survey(e1)
    -p1
    -# You can also exclude plotting and highlighting the score
    -p2 <- bp_show_survey(e1, add_score = FALSE)
    -p2
    -
    -# You can also plot a simplified version
    -p3 <- bp_show_survey2(e1, highlight = 10)
    -p3
    -
    -# Obtain the suggested solution from extraction result
    -obj_suggested <- bp_get_sig_obj(e1, e1$suggested)
    -obj_suggested
    -# If you think the suggested signature number is not right
    -# Just pick up the solution you want
    -obj_s8 <- bp_get_sig_obj(e1, 8)
    -
    -# Track the reconstructed profile similarity
    -rec_sim <- get_sig_rec_similarity(obj_s8, t(simulated_catalogs$set1))
    -rec_sim
    -
    -# After extraction, you can assign the signatures
    -# to reference COSMIC signatures
    -# More see ?get_sig_similarity
    -sim <- get_sig_similarity(obj_suggested)
    -# Visualize the match result
    -if (require(pheatmap)) {
    -  pheatmap::pheatmap(sim$similarity)
    -}
    -
    -# You already got the activities of signatures
    -# in obj_suggested, however, you can still
    -# try to optimize the result.
    -# NOTE: the optimization step may not truly optimize the result!
    -expo <- bp_attribute_activity(e1, return_class = "data.table")
    -expo$abs_activity
    -# }
    -
    -if (FALSE) {
    -# Iterative extraction:
    -# This procedure will rerun extraction step
    -# for those samples with reconstructed catalog similarity
    -# lower than a threshold (default is 0.95)
    -e2 <- bp_extract_signatures_iter(
    -  t(simulated_catalogs$set1),
    -  range = 9:11,
    -  n_bootstrap = 5,
    -  n_nmf_run = 5,
    -  sim_threshold = 0.99
    -)
    -e2
    -# When the procedure run multiple rounds
    -# you can cluster the signatures from different rounds by
    -# the following command
    -# bp_cluster_iter_list(e2)
    -
    -## Extra utilities
    -rank_score <- bp_get_rank_score(e1)
    -rank_score
    -stats <- bp_get_stats(e2$iter1)
    -# Get the mean reconstructed similarity
    -1 - stats$stats_sample$cosine_distance_mean
    -}
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/centromeres.hg19.html b/docs/reference/centromeres.hg19.html deleted file mode 100644 index 6d11e05d..00000000 --- a/docs/reference/centromeres.hg19.html +++ /dev/null @@ -1,105 +0,0 @@ - -Location of Centromeres at Genome Build hg19 — centromeres.hg19 • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Location of Centromeres at Genome Build hg19

    -
    - - -
    -

    Format

    -

    A data.frame

    -
    -
    -

    Source

    -

    Generate from UCSC gold path

    -
    - -
    -

    Examples

    -
    data(centromeres.hg19)
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/centromeres.hg38.html b/docs/reference/centromeres.hg38.html deleted file mode 100644 index 5c78184c..00000000 --- a/docs/reference/centromeres.hg38.html +++ /dev/null @@ -1,105 +0,0 @@ - -Location of Centromeres at Genome Build hg38 — centromeres.hg38 • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Location of Centromeres at Genome Build hg38

    -
    - - -
    -

    Format

    -

    A data.frame

    -
    -
    -

    Source

    -

    Generate from Genome Reference Consortium

    -
    - -
    -

    Examples

    -
    data(centromeres.hg38)
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/centromeres.mm10.html b/docs/reference/centromeres.mm10.html deleted file mode 100644 index 8295f592..00000000 --- a/docs/reference/centromeres.mm10.html +++ /dev/null @@ -1,105 +0,0 @@ - -Location of Centromeres at Genome Build mm10 — centromeres.mm10 • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Location of Centromeres at Genome Build mm10

    -
    - - -
    -

    Format

    -

    A data.frame

    -
    - - -
    -

    Examples

    -
    data(centromeres.mm10)
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/centromeres.mm9.html b/docs/reference/centromeres.mm9.html deleted file mode 100644 index 7ce47d06..00000000 --- a/docs/reference/centromeres.mm9.html +++ /dev/null @@ -1,110 +0,0 @@ - -Location of Centromeres at Genome Build mm9 — centromeres.mm9 • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Location of Centromeres at Genome Build mm9

    -
    - - -
    -

    Format

    -

    A data.frame

    -
    -
    -

    Source

    -

    Generate from https://hgdownload.soe.ucsc.edu/goldenPath/mm9/database/ -with code:

    -

    for i in $(seq 1 19) X Y;
    
    -do
    
    -wget https://hgdownload.soe.ucsc.edu/goldenPath/mm9/database/chr${i}_gap.txt.gz
    
    -done

    -
    - -
    -

    Examples

    -
    data(centromeres.mm9)
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/chromsize.hg19.html b/docs/reference/chromsize.hg19.html deleted file mode 100644 index 6a5891c8..00000000 --- a/docs/reference/chromsize.hg19.html +++ /dev/null @@ -1,105 +0,0 @@ - -Chromosome Size of Genome Build hg19 — chromsize.hg19 • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Chromosome Size of Genome Build hg19

    -
    - - -
    -

    Format

    -

    A data.frame

    -
    -
    -

    Source

    -

    Generate from UCSC gold path

    -
    - -
    -

    Examples

    -
    data(chromsize.hg19)
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/chromsize.hg38.html b/docs/reference/chromsize.hg38.html deleted file mode 100644 index 9290cbba..00000000 --- a/docs/reference/chromsize.hg38.html +++ /dev/null @@ -1,105 +0,0 @@ - -Chromosome Size of Genome Build hg38 — chromsize.hg38 • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Chromosome Size of Genome Build hg38

    -
    - - -
    -

    Format

    -

    A data.frame

    -
    -
    -

    Source

    -

    Generate from UCSC gold path

    -
    - -
    -

    Examples

    -
    data(chromsize.hg38)
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/chromsize.mm10.html b/docs/reference/chromsize.mm10.html deleted file mode 100644 index f2b3b06e..00000000 --- a/docs/reference/chromsize.mm10.html +++ /dev/null @@ -1,105 +0,0 @@ - -Chromosome Size of Genome Build mm10 — chromsize.mm10 • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Chromosome Size of Genome Build mm10

    -
    - - -
    -

    Format

    -

    A data.frame

    -
    - - -
    -

    Examples

    -
    data(chromsize.mm10)
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/chromsize.mm9.html b/docs/reference/chromsize.mm9.html deleted file mode 100644 index 73036346..00000000 --- a/docs/reference/chromsize.mm9.html +++ /dev/null @@ -1,105 +0,0 @@ - -Chromosome Size of Genome Build mm9 — chromsize.mm9 • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Chromosome Size of Genome Build mm9

    -
    - - -
    -

    Format

    -

    A data.frame

    -
    - - -
    -

    Examples

    -
    data(chromsize.mm9)
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/cosine.html b/docs/reference/cosine.html deleted file mode 100644 index 51b24b87..00000000 --- a/docs/reference/cosine.html +++ /dev/null @@ -1,121 +0,0 @@ - -Calculate Cosine Measures — cosine • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Calculate Cosine Measures

    -
    - -
    -
    cosine(x, y)
    -
    - -
    -

    Arguments

    -
    x
    -

    a numeric vector or matrix with column representing vector to calculate similarity.

    - - -
    y
    -

    must be same format as x.

    - -
    -
    -

    Value

    - - -

    a numeric value or matrix.

    -
    - -
    -

    Examples

    -
    x <- c(1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0)
    -y <- c(0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0)
    -z1 <- cosine(x, y)
    -z1
    -z2 <- cosine(matrix(x), matrix(y))
    -z2
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/cytobands.hg19.html b/docs/reference/cytobands.hg19.html deleted file mode 100644 index 0d6b2167..00000000 --- a/docs/reference/cytobands.hg19.html +++ /dev/null @@ -1,105 +0,0 @@ - -Location of Chromosome Cytobands at Genome Build hg19 — cytobands.hg19 • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Location of Chromosome Cytobands at Genome Build hg19

    -
    - - -
    -

    Format

    -

    A data.frame

    -
    -
    -

    Source

    -

    from UCSC

    -
    - -
    -

    Examples

    -
    data(cytobands.hg19)
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/cytobands.hg38.html b/docs/reference/cytobands.hg38.html deleted file mode 100644 index 19d808f1..00000000 --- a/docs/reference/cytobands.hg38.html +++ /dev/null @@ -1,105 +0,0 @@ - -Location of Chromosome Cytobands at Genome Build hg38 — cytobands.hg38 • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Location of Chromosome Cytobands at Genome Build hg38

    -
    - - -
    -

    Format

    -

    A data.frame

    -
    -
    -

    Source

    -

    from UCSC

    -
    - -
    -

    Examples

    -
    data(cytobands.hg38)
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/cytobands.mm10.html b/docs/reference/cytobands.mm10.html deleted file mode 100644 index 597e8bf3..00000000 --- a/docs/reference/cytobands.mm10.html +++ /dev/null @@ -1,105 +0,0 @@ - -Location of Chromosome Cytobands at Genome Build mm10 — cytobands.mm10 • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Location of Chromosome Cytobands at Genome Build mm10

    -
    - - -
    -

    Format

    -

    A data.frame

    -
    - - -
    -

    Examples

    -
    data(cytobands.mm10)
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/cytobands.mm9.html b/docs/reference/cytobands.mm9.html deleted file mode 100644 index 747ed489..00000000 --- a/docs/reference/cytobands.mm9.html +++ /dev/null @@ -1,105 +0,0 @@ - -Location of Chromosome Cytobands at Genome Build mm9 — cytobands.mm9 • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Location of Chromosome Cytobands at Genome Build mm9

    -
    - - -
    -

    Format

    -

    A data.frame

    -
    - - -
    -

    Examples

    -
    data(cytobands.mm9)
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/enrich_component_strand_bias.html b/docs/reference/enrich_component_strand_bias.html deleted file mode 100644 index 3a28da86..00000000 --- a/docs/reference/enrich_component_strand_bias.html +++ /dev/null @@ -1,108 +0,0 @@ - -Performs Strand Bias Enrichment Analysis for a Given Sample-by-Component Matrix — enrich_component_strand_bias • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    See sig_tally for examples.

    -
    - -
    -
    enrich_component_strand_bias(mat)
    -
    - -
    -

    Arguments

    -
    mat
    -

    a sample-by-component matrix from sig_tally with strand bias labels "T:" and "B:".

    - -
    -
    -

    Value

    - - -

    a data.table sorted by p_value.

    -
    - -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/figures/README-unnamed-chunk-1-1.png b/docs/reference/figures/README-unnamed-chunk-1-1.png deleted file mode 100644 index dceb02ff..00000000 Binary files a/docs/reference/figures/README-unnamed-chunk-1-1.png and /dev/null differ diff --git a/docs/reference/figures/README-unnamed-chunk-2-1.png b/docs/reference/figures/README-unnamed-chunk-2-1.png deleted file mode 100644 index 6ccd39f2..00000000 Binary files a/docs/reference/figures/README-unnamed-chunk-2-1.png and /dev/null differ diff --git a/docs/reference/figures/README-unnamed-chunk-3-1.png b/docs/reference/figures/README-unnamed-chunk-3-1.png deleted file mode 100644 index fd389b35..00000000 Binary files a/docs/reference/figures/README-unnamed-chunk-3-1.png and /dev/null differ diff --git a/docs/reference/figures/README-unnamed-chunk-4-1.png b/docs/reference/figures/README-unnamed-chunk-4-1.png deleted file mode 100644 index 2ab10ccc..00000000 Binary files a/docs/reference/figures/README-unnamed-chunk-4-1.png and /dev/null differ diff --git a/docs/reference/figures/README-unnamed-chunk-5-1.png b/docs/reference/figures/README-unnamed-chunk-5-1.png deleted file mode 100644 index 475af4cf..00000000 Binary files a/docs/reference/figures/README-unnamed-chunk-5-1.png and /dev/null differ diff --git a/docs/reference/figures/README-unnamed-chunk-6-1.png b/docs/reference/figures/README-unnamed-chunk-6-1.png deleted file mode 100644 index 04b10332..00000000 Binary files a/docs/reference/figures/README-unnamed-chunk-6-1.png and /dev/null differ diff --git a/docs/reference/figures/README-unnamed-chunk-7-1.png b/docs/reference/figures/README-unnamed-chunk-7-1.png deleted file mode 100644 index b963e272..00000000 Binary files a/docs/reference/figures/README-unnamed-chunk-7-1.png and /dev/null differ diff --git a/docs/reference/figures/README-unnamed-chunk-8-1.png b/docs/reference/figures/README-unnamed-chunk-8-1.png deleted file mode 100644 index 1a9dab5a..00000000 Binary files a/docs/reference/figures/README-unnamed-chunk-8-1.png and /dev/null differ diff --git a/docs/reference/figures/README-unnamed-chunk-9-1.png b/docs/reference/figures/README-unnamed-chunk-9-1.png deleted file mode 100644 index 4fa58b80..00000000 Binary files a/docs/reference/figures/README-unnamed-chunk-9-1.png and /dev/null differ diff --git a/docs/reference/figures/logo.png b/docs/reference/figures/logo.png deleted file mode 100644 index a44d4f12..00000000 Binary files a/docs/reference/figures/logo.png and /dev/null differ diff --git a/docs/reference/get_Aneuploidy_score.html b/docs/reference/get_Aneuploidy_score.html deleted file mode 100644 index 5460694a..00000000 --- a/docs/reference/get_Aneuploidy_score.html +++ /dev/null @@ -1,164 +0,0 @@ - -Get Aneuploidy Score from Copy Number Profile — get_Aneuploidy_score • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    This implements a Cohen-Sharir method (see reference) like "Aneuploidy Score" computation. -You can read the source code to see how it works. Basically, it follows -the logic of Cohen-Sharir method but with some difference in detail implementation. -Their results should be counterpart, but with no data validation for now. -Please raise an issue if you find problem/bugs in this function.

    -
    - -
    -
    get_Aneuploidy_score(
    -  data,
    -  ploidy_df = NULL,
    -  genome_build = "hg19",
    -  rm_black_arms = FALSE
    -)
    -
    - -
    -

    Arguments

    -
    data
    -

    a CopyNumber object or a data.frame containing at least -'chromosome', 'start', 'end', 'segVal', 'sample' these columns.

    - - -
    ploidy_df
    -

    default is NULL, compute ploidy by segment-size weighted copy number -aross autosome, see get_cn_ploidy. You can also provide a data.frame with 'sample' -and 'ploidy' columns.

    - - -
    genome_build
    -

    genome build version, should be 'hg19', 'hg38', 'mm9' or 'mm10'.

    - - -
    rm_black_arms
    -

    if TRUE, remove short arms of chr13/14/15/21/22 from calculation -as documented in reference #3.

    - -
    -
    -

    Value

    - - -

    A data.frame

    - - -
    -
    -

    References

    - -
    • Cohen-Sharir, Y., McFarland, J. M., Abdusamad, M., Marquis, C., Bernhard, S. V., Kazachkova, M., ... & Ben-David, U. (2021). Aneuploidy renders cancer cells vulnerable to mitotic checkpoint inhibition. Nature, 1-6.

    • -
    • Logic reference: https://github.com/quevedor2/aneuploidy_score/.

    • -
    • Taylor, Alison M., et al. "Genomic and functional approaches to understanding cancer aneuploidy." Cancer cell 33.4 (2018): 676-689.

    • -
    - -
    -

    Examples

    -
    # Load copy number object
    -load(system.file("extdata", "toy_copynumber.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -
    -df <- get_Aneuploidy_score(cn)
    -df
    -
    -df2 <- get_Aneuploidy_score(cn@data)
    -df2
    -
    -df3 <- get_Aneuploidy_score(cn@data,
    -  ploidy_df = get_cn_ploidy(cn@data)
    -)
    -df3
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/get_adj_p.html b/docs/reference/get_adj_p.html deleted file mode 100644 index 9fd14241..00000000 --- a/docs/reference/get_adj_p.html +++ /dev/null @@ -1,203 +0,0 @@ - -Get Adjust P Values from Group Comparison — get_adj_p • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Setting aes(label=..p.adj..) in ggpubr::compare_means() does not -show adjust p values. The returned result of this function can be combined with ggpubr::stat_pvalue_manual() to fix -this problem.

    -
    - -
    -
    get_adj_p(
    -  data,
    -  .col,
    -  .grp = "Sample",
    -  comparisons = NULL,
    -  method = "wilcox.test",
    -  p.adjust.method = "fdr",
    -  p.digits = 3L,
    -  ...
    -)
    -
    - -
    -

    Source

    -

    https://github.com/kassambara/ggpubr/issues/143

    -
    -
    -

    Arguments

    -
    data
    -

    a data.frame containing column for groups and column for comparison.

    - - -
    .col
    -

    column name for comparison.

    - - -
    .grp
    -

    column name for groups.

    - - -
    comparisons
    -

    Default is NULL, use all combination in group column. -It can be a list of length-2 vectors. The entries in the vector are either -the names of 2 values on the x-axis or the 2 integers that correspond to the -index of the groups of interest, to be compared.

    - - -
    method
    -

    a character string indicating which method to be used for comparing means. -It can be 't.test', 'wilcox.test' etc..

    - - -
    p.adjust.method
    -

    correction method, default is 'fdr'. Run p.adjust.methods to -see all available options.

    - - -
    p.digits
    -

    how many significant digits are to be used.

    - - -
    ...
    -

    other arguments passed to ggpubr::compare_means()

    - -
    -
    -

    Value

    - - -

    a data.frame containing comparison result

    -
    - - -
    -

    Examples

    -
    library(ggpubr)
    -# T-test
    -stat.test <- compare_means(
    -  len ~ dose,
    -  data = ToothGrowth,
    -  method = "t.test",
    -  p.adjust.method = "fdr"
    -)
    -stat.test
    -# Create a simple box plot
    -p <- ggboxplot(ToothGrowth, x = "dose", y = "len")
    -p
    -
    -# Add p values
    -my_comparisons <- list(c("0.5", "1"), c("1", "2"), c("0.5", "2"))
    -p + stat_compare_means(method = "t.test", comparisons = my_comparisons)
    -
    -# Try adding adjust p values
    -# proposed by author of ggpubr
    -# however it does not work
    -p + stat_compare_means(aes(label = ..p.adj..), method = "t.test", comparisons = my_comparisons)
    -
    -# Solution:
    -# calculate adjust p values and their location
    -# then use stat_pvalue_manual() function
    -p_adj <- get_adj_p(ToothGrowth, .col = "len", .grp = "dose")
    -p_adj
    -p + stat_pvalue_manual(p_adj, label = "p.adj")
    -
    -# Show selected comparisons
    -# Of note, p value is ajusted
    -# for three comparisons, but only
    -# two are showed in figure
    -p_adj <- get_adj_p(ToothGrowth,
    -  .col = "len", .grp = "dose",
    -  comparisons = list(c("0.5", "1"), c("1", "2"))
    -)
    -p + stat_pvalue_manual(p_adj, label = "p.adj")
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/get_bayesian_result.html b/docs/reference/get_bayesian_result.html deleted file mode 100644 index 7a948039..00000000 --- a/docs/reference/get_bayesian_result.html +++ /dev/null @@ -1,126 +0,0 @@ - -Get Specified Bayesian NMF Result from Run — get_bayesian_result • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Sometimes, we may want to use or inspect specified run result from sig_auto_extract. -This function is designed for this purpose.

    -
    - -
    -
    get_bayesian_result(run_info)
    -
    - -
    -

    Arguments

    -
    run_info
    -

    a data.frame with 1 row and two necessary columns Run and file.

    - -
    -
    -

    Value

    - - -

    a list.

    -
    -
    -

    Author

    -

    Shixiang Wang

    -
    - -
    -

    Examples

    -
    load(system.file("extdata", "toy_copynumber_tally_W.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -
    -res <- sig_auto_extract(cn_tally_W$nmf_matrix, result_prefix = "Test_copynumber", nrun = 1)
    -
    -# All run info are stored in res$Raw$summary_run
    -# Obtain result of run 1
    -res_run1 <- get_bayesian_result(res$Raw$summary_run[1, ])
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/get_cn_freq_table.html b/docs/reference/get_cn_freq_table.html deleted file mode 100644 index 1dd34197..00000000 --- a/docs/reference/get_cn_freq_table.html +++ /dev/null @@ -1,130 +0,0 @@ - -Get CNV Frequency Table — get_cn_freq_table • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Get CNV Frequency Table

    -
    - -
    -
    get_cn_freq_table(
    -  data,
    -  genome_build = "hg19",
    -  cutoff = 2L,
    -  resolution_factor = 1L
    -)
    -
    - -
    -

    Arguments

    -
    data
    -

    a CopyNumber object or a data.frame containing -at least 'chromosome', 'start', 'end', 'segVal', 'sample' these columns.

    - - -
    genome_build
    -

    genome build version, used when data is a data.frame, should be 'hg19' or 'hg38'.

    - - -
    cutoff
    -

    copy number value cutoff for splitting data into AMP and DEL. -The values equal to cutoff are discarded. Default is 2, you can also set -a length-2 vector, e.g. c(2, 2).

    - - -
    resolution_factor
    -

    an integer to control the resolution. -When it is 1 (default), compute frequency in each cytoband. -When it is 2, use compute frequency in each half cytoband.

    - -
    -
    -

    Value

    - - -

    a data.table.

    -
    - -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/get_cn_ploidy.html b/docs/reference/get_cn_ploidy.html deleted file mode 100644 index 094c257d..00000000 --- a/docs/reference/get_cn_ploidy.html +++ /dev/null @@ -1,121 +0,0 @@ - -Get Ploidy from Absolute Copy Number Profile — get_cn_ploidy • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Get Ploidy from Absolute Copy Number Profile

    -
    - -
    -
    get_cn_ploidy(data)
    -
    - -
    -

    Arguments

    -
    data
    -

    a CopyNumber object or a data.frame containing at least 'chromosome', 'start', -'end', 'segVal' these columns.

    - -
    -
    -

    Value

    - - -

    a value or a data.table

    - - -
    - -
    -

    Examples

    -
    # Load copy number object
    -load(system.file("extdata", "toy_copynumber.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -
    -df <- get_cn_ploidy(cn)
    -df
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/get_genome_annotation.html b/docs/reference/get_genome_annotation.html deleted file mode 100644 index c3a9833f..00000000 --- a/docs/reference/get_genome_annotation.html +++ /dev/null @@ -1,143 +0,0 @@ - -Get Genome Annotation — get_genome_annotation • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Get Genome Annotation

    -
    - -
    -
    get_genome_annotation(
    -  data_type = c("chr_size", "centro_loc", "cytobands", "transcript", "gene"),
    -  chrs = paste0("chr", c(1:22, "X", "Y")),
    -  genome_build = c("hg19", "hg38", "T2T", "mm10", "mm9", "ce11")
    -)
    -
    - -
    -

    Arguments

    -
    data_type
    -

    'chr_size' for chromosome size, -'centro_loc' for location of centromeres, -'cytobands' for location of chromosome cytobands -and 'transcript' for location of transcripts.

    - - -
    chrs
    -

    chromosomes start with 'chr'

    - - -
    genome_build
    -

    one of 'hg19', 'hg38'

    - -
    -
    -

    Value

    - - -

    a data.frame containing annotation data

    -
    - -
    -

    Examples

    -
    df1 <- get_genome_annotation()
    -df1
    -
    -df2 <- get_genome_annotation(genome_build = "hg38")
    -df2
    -
    -df3 <- get_genome_annotation(data_type = "centro_loc")
    -df3
    -
    -df4 <- get_genome_annotation(data_type = "centro_loc", genome_build = "hg38")
    -df4
    -
    -df5 <- get_genome_annotation(data_type = "cytobands")
    -df5
    -
    -df6 <- get_genome_annotation(data_type = "cytobands", genome_build = "hg38")
    -df6
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/get_group_comparison.html b/docs/reference/get_group_comparison.html deleted file mode 100644 index 09b43458..00000000 --- a/docs/reference/get_group_comparison.html +++ /dev/null @@ -1,185 +0,0 @@ - -Get Comparison Result between Signature Groups — get_group_comparison • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Compare genotypes/phenotypes based on signature groups (samples are assigned to -several groups). For categorical -type, calculate fisher p value (using stats::fisher.test) and count table. -In larger than 2 by 2 tables, compute p-values by Monte Carlo simulation. -For continuous type, calculate anova p value (using stats::aov), -summary table and Tukey Honest significant difference (using stats::TukeyHSD). -The result of this function can be plotted by show_group_comparison().

    -
    - -
    -
    get_group_comparison(
    -  data,
    -  col_group,
    -  cols_to_compare,
    -  type = "ca",
    -  NAs = NA,
    -  verbose = FALSE
    -)
    -
    - -
    -

    Arguments

    -
    data
    -

    a data.frame containing signature groups and genotypes/phenotypes -(including categorical and continuous type data) want to analyze. User need to -construct this data.frame by him/herself.

    - - -
    col_group
    -

    column name of signature groups.

    - - -
    cols_to_compare
    -

    column names of genotypes/phenotypes want to summarize based on groups.

    - - -
    type
    -

    a characater vector with length same as cols_to_compare, -'ca' for categorical type and 'co' for continuous type.

    - - -
    NAs
    -

    default is NA, filter NAs for categorical columns. -Otherwise a value (either length 1 or length same as cols_to_compare) fill NAs.

    - - -
    verbose
    -

    if TRUE, print extra information.

    - -
    -
    -

    Value

    - - -

    a list contains data, summary, p value etc..

    -
    -
    -

    Author

    -

    Shixiang Wang w_shixiang@163.com

    -
    - -
    -

    Examples

    -
    # \donttest{
    -load(system.file("extdata", "toy_copynumber_signature_by_W.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -
    -# Assign samples to clusters
    -groups <- get_groups(sig, method = "k-means")
    -
    -set.seed(1234)
    -
    -groups$prob <- rnorm(10)
    -groups$new_group <- sample(c("1", "2", "3", "4", NA), size = nrow(groups), replace = TRUE)
    -
    -# Compare groups (filter NAs for categorical coloumns)
    -groups.cmp <- get_group_comparison(groups[, -1],
    -  col_group = "group",
    -  cols_to_compare = c("prob", "new_group"),
    -  type = c("co", "ca"), verbose = TRUE
    -)
    -
    -# Compare groups (Set NAs of categorical columns to 'Rest')
    -groups.cmp2 <- get_group_comparison(groups[, -1],
    -  col_group = "group",
    -  cols_to_compare = c("prob", "new_group"),
    -  type = c("co", "ca"), NAs = "Rest", verbose = TRUE
    -)
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/get_groups.html b/docs/reference/get_groups.html deleted file mode 100644 index e719e330..00000000 --- a/docs/reference/get_groups.html +++ /dev/null @@ -1,176 +0,0 @@ - -Get Sample Groups from Signature Decomposition Information — get_groups • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    One of key results from signature analysis is to cluster samples into different -groups. This function takes Signature object as input -and return the membership in each cluster.

    -
    - -
    -
    get_groups(
    -  Signature,
    -  method = c("consensus", "k-means", "exposure", "samples"),
    -  n_cluster = NULL,
    -  match_consensus = TRUE
    -)
    -
    - -
    -

    Arguments

    -
    Signature
    -

    a Signature object obtained either from sig_extract or sig_auto_extract. -Now it can be used to relative exposure result in data.table format from sig_fit.

    - - -
    method
    -

    grouping method, more see details, could be one of the following:

    • 'consensus' - returns the cluster membership based on the hierarchical clustering of the consensus matrix, -it can only be used for the result obtained by sig_extract() with multiple runs using NMF package.

    • -
    • 'k-means' - returns the clusters by k-means.

    • -
    • 'exposure' - assigns a sample into a group whose signature exposure -is dominant.

    • -
    • 'samples' - returns the cluster membership based on the contribution of signature to each sample, -it can only be used for the result obtained by sig_extract() using NMF package.

    • -
    - - -
    n_cluster
    -

    only used when the method is 'k-means'.

    - - -
    match_consensus
    -

    only used when the method is 'consensus'. -If TRUE, the result will match order as shown in consensus map.

    - -
    -
    -

    Value

    - - -

    a data.table object

    -
    -
    -

    Details

    -

    Users may find there are bigger differences between using method 'samples' and 'exposure' but -they use a similar idear to find dominant signature, here goes the reason:

    -

    Method 'samples' using data directly from NMF decomposition, this means the two matrix -W (basis matrix or signature matrix) and H (coefficient matrix or exposure matrix) are -the results of NMF. For method 'exposure', it uses the signature exposure loading matrix. -In this situation, each signture represents a number of mutations (alterations) -about implementation please see source code of sig_extract() function.

    -
    -
    -

    See also

    - -
    - -
    -

    Examples

    -
    # \donttest{
    -# Load copy number prepare object
    -load(system.file("extdata", "toy_copynumber_tally_W.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -# Extract copy number signatures
    -library(NMF)
    -sig <- sig_extract(cn_tally_W$nmf_matrix, 2,
    -  nrun = 10
    -)
    -
    -# Methods 'consensus' and 'samples' are from NMF::predict()
    -g1 <- get_groups(sig, method = "consensus", match_consensus = TRUE)
    -g1
    -g2 <- get_groups(sig, method = "samples")
    -g2
    -
    -# Use k-means clustering
    -g3 <- get_groups(sig, method = "k-means")
    -g3
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/get_intersect_size.html b/docs/reference/get_intersect_size.html deleted file mode 100644 index b81ea622..00000000 --- a/docs/reference/get_intersect_size.html +++ /dev/null @@ -1,130 +0,0 @@ - -Get Overlap Size between Interval x and y — get_intersect_size • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Get Overlap Size between Interval x and y

    -
    - -
    -
    get_intersect_size(x.start, x.end, y.start, y.end)
    -
    - -
    -

    Arguments

    -
    x.start
    -

    start position of interval x.

    - - -
    x.end
    -

    start position of interval x.

    - - -
    y.start
    -

    start position of interval x.

    - - -
    y.end
    -

    start position of interval x.

    - -
    -
    -

    Value

    - - -

    a numeric vector.

    -
    - -
    -

    Examples

    -
    o1 <- get_intersect_size(1, 5, 3, 20)
    -o1
    -o2 <- get_intersect_size(3, 20, 1, 10)
    -o2
    -o3 <- get_intersect_size(c(1, 2, 1), c(10, 4, 6), c(4, 2, 5), c(10, 3, 22))
    -o3
    -
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/get_pLOH_score.html b/docs/reference/get_pLOH_score.html deleted file mode 100644 index 98f71312..00000000 --- a/docs/reference/get_pLOH_score.html +++ /dev/null @@ -1,146 +0,0 @@ - -Get proportions of pLOH score from Allele Specific Copy Number Profile — get_pLOH_score • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    pLOH score represents the genome that displayed LOH.

    -
    - -
    -
    get_pLOH_score(data, rm_chrs = c("chrX", "chrY"), genome_build = "hg19")
    -
    - -
    -

    Arguments

    -
    data
    -

    a CopyNumber object or a data.frame containing at least -'chromosome', 'start', 'end', 'segVal', "minor_cn", 'sample' these columns.

    - - -
    rm_chrs
    -

    chromosomes to be removed in calculation. Default is sex -chromosomes (recommended).

    - - -
    genome_build
    -

    genome build version, should be 'hg19', 'hg38', 'mm9' or 'mm10'.

    - -
    -
    -

    Value

    - - -

    A data.frame

    - - -
    -
    -

    References

    -

    Steele, Christopher D., et al. "Signatures of copy number alterations in human cancer." bioRxiv (2021).

    -
    - -
    -

    Examples

    -
    # \donttest{
    -# Load toy dataset of absolute copynumber profile
    -load(system.file("extdata", "toy_segTab.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -
    -set.seed(1234)
    -segTabs$minor_cn <- sample(c(0, 1), size = nrow(segTabs), replace = TRUE)
    -cn <- read_copynumber(segTabs,
    -  seg_cols = c("chromosome", "start", "end", "segVal"),
    -  genome_measure = "wg", complement = TRUE, add_loh = TRUE
    -)
    -
    -df <- get_pLOH_score(cn)
    -df
    -
    -df2 <- get_pLOH_score(cn@data)
    -df2
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/get_shannon_diversity_index.html b/docs/reference/get_shannon_diversity_index.html deleted file mode 100644 index 6f9ca6e2..00000000 --- a/docs/reference/get_shannon_diversity_index.html +++ /dev/null @@ -1,143 +0,0 @@ - -Get Shannon Diversity Index for Signatures — get_shannon_diversity_index • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    $$H = - \sum_{i=1}^n{p_i ln(p_i)}$$ -where n is the number -of signatures identified in the signature with exposure > cutoff, -and pi is the normalized exposure of the ith signature with -exposure > cutoff. Exposures of signatures were normalized to -sum to 1.

    -
    - -
    -
    get_shannon_diversity_index(rel_expo, cutoff = 0.001)
    -
    - -
    -

    Arguments

    -
    rel_expo
    -

    a data.frame with numeric columns indicating -relative signature exposures for each sample. Typically -this data can be obtained from get_sig_exposure().

    - - -
    cutoff
    -

    a relative exposure cutoff for filtering signatures, -default is 0.1%.

    - -
    -
    -

    Value

    - - -

    a data.frame

    - - -
    -
    -

    References

    -

    Steele, Christopher D., et al. "Undifferentiated sarcomas develop through distinct evolutionary pathways." Cancer Cell 35.3 (2019): 441-456.

    -
    - -
    -

    Examples

    -
    # Load mutational signature
    -load(system.file("extdata", "toy_mutational_signature.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -# Get signature exposure
    -rel_expo <- get_sig_exposure(sig2, type = "relative")
    -rel_expo
    -diversity_index <- get_shannon_diversity_index(rel_expo)
    -diversity_index
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/get_sig_cancer_type_index.html b/docs/reference/get_sig_cancer_type_index.html deleted file mode 100644 index aaf22dbd..00000000 --- a/docs/reference/get_sig_cancer_type_index.html +++ /dev/null @@ -1,138 +0,0 @@ - -Obtain Signature Index for Cancer Types — get_sig_cancer_type_index • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Obtain Signature Index for Cancer Types

    -
    - -
    -
    get_sig_cancer_type_index(
    -  sig_type = c("legacy", "SBS", "DBS", "ID"),
    -  seq_type = c("WGS", "WES"),
    -  source = c("PCAWG", "TCGA", "nonPCAWG"),
    -  keyword = NULL
    -)
    -
    - -
    -

    Arguments

    -
    sig_type
    -

    signature type.

    - - -
    seq_type
    -

    sequencing type.

    - - -
    source
    -

    data source.

    - - -
    keyword
    -

    keyword to search in the signature index database.

    - -
    -
    -

    Value

    - - -

    a list.

    -
    - -
    -

    Examples

    -
    l1 <- get_sig_cancer_type_index()
    -l2 <- get_sig_cancer_type_index(sig_type = "SBS")
    -l3 <- get_sig_cancer_type_index(sig_type = "DBS", source = "PCAWG", seq_type = "WGS")
    -l4 <- get_sig_cancer_type_index(sig_type = "ID")
    -l5 <- get_sig_cancer_type_index(keyword = "breast")
    -l1
    -l2
    -l3
    -l4
    -l5
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/get_sig_db.html b/docs/reference/get_sig_db.html deleted file mode 100644 index e68d0532..00000000 --- a/docs/reference/get_sig_db.html +++ /dev/null @@ -1,190 +0,0 @@ - -Get Curated Reference Signature Database — get_sig_db • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Reference mutational signatures and their aetiologies, -mainly obtained from COSMIC database -(SigProfiler results) and cleaned before saving into -sigminer package. You can obtain:

    • COSMIC legacy SBS signatures.

    • -
    • COSMIC v3 SBS signatures.

    • -
    • COSMIC v3 DBS signatures.

    • -
    • COSMIC v3 ID (indel) signatures.

    • -
    • SBS and RS (rearrangement) signatures from Nik lab 2020 Nature Cancer paper.

    • -
    • RS signatures from BRCA560 and USARC cohorts.

    • -
    • Copy number signatures from USARC cohort and TCGA.

    • -
    • Copy number signatures from Liu lab 2023. It supports both PCAWG and TCGA cohort.

    • -
    - -
    -
    get_sig_db(sig_db = "legacy")
    -
    - -
    -

    Arguments

    -
    sig_db
    -

    default 'legacy', it can be 'legacy' (for COSMIC v2 'SBS'), -'SBS', 'DBS', 'ID' and 'TSB' (for COSMIV v3.1 signatures) -for small scale mutations. -For more specific details, it can also be 'SBS_hg19', 'SBS_hg38', -'SBS_mm9', 'SBS_mm10', 'DBS_hg19', 'DBS_hg38', 'DBS_mm9', 'DBS_mm10' to use -COSMIC v3 reference signatures from Alexandrov, Ludmil B., et al. (2020) (reference #1). -In addition, it can be one of "SBS_Nik_lab_Organ", "RS_Nik_lab_Organ", -"SBS_Nik_lab", "RS_Nik_lab" to refer reference signatures from -Degasperi, Andrea, et al. (2020) (reference #2); -"RS_BRCA560", "RS_USARC" to reference signatures from BRCA560 and USARC cohorts; -"CNS_USARC" (40 categories), "CNS_TCGA" (48 categories) to reference copy number signatures from USARC cohort and TCGA; -"CNS_TCGA176" (176 categories) and "CNS_PCAWG176" (176 categories) to reference copy number signatures from PCAWG and TCGA separately. -UPDATE, the latest version of reference version can be automatically -downloaded and loaded from https://cancer.sanger.ac.uk/signatures/downloads/ -when a option with latest_ prefix is specified (e.g. "latest_SBS_GRCh37"). -Note: the signature profile for different genome builds are basically same. -And specific database (e.g. 'SBS_mm10') contains less signatures than all COSMIC -signatures (because some signatures are not detected from Alexandrov, Ludmil B., et al. (2020)). -For all available options, check the parameter setting.

    - -
    -
    -

    Value

    - - -

    a list.

    -
    -
    -

    References

    - -
    • Steele, Christopher D., et al. "Signatures of copy number alterations in human cancer." Nature 606.7916 (2022): 984-991.

    • -
    • Alexandrov, Ludmil B., et al. "The repertoire of mutational signatures in human cancer." Nature 578.7793 (2020): 94-101.

    • -
    • Steele, Christopher D., et al. "Undifferentiated sarcomas develop through distinct evolutionary pathways." Cancer Cell 35.3 (2019): 441-456.

    • -
    • Ziyu Tao, et al. "The repertoire of copy number alteration signatures in human cancer." Briefings in Bioinformatics (2023): bbad053.

    • -
    - - -
    -

    Examples

    -
    s1 <- get_sig_db()
    -s2 <- get_sig_db("SBS")
    -s3 <- get_sig_db("DBS")
    -s4 <- get_sig_db("DBS_mm10")
    -s5 <- get_sig_db("SBS_Nik_lab")
    -s6 <- get_sig_db("ID")
    -s7 <- get_sig_db("RS_BRCA560")
    -s8 <- get_sig_db("RS_USARC")
    -s9 <- get_sig_db("RS_Nik_lab")
    -s10 <- get_sig_db("CNS_USARC")
    -s11 <- get_sig_db("CNS_TCGA")
    -s12 <- get_sig_db("CNS_TCGA176")
    -s13 <- get_sig_db("CNS_PCAWG176")
    -s1
    -s2
    -s3
    -s4
    -s5
    -s6
    -s7
    -s8
    -s9
    -s10
    -s11
    -s12
    -s13
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/get_sig_exposure.html b/docs/reference/get_sig_exposure.html deleted file mode 100644 index e4e1a582..00000000 --- a/docs/reference/get_sig_exposure.html +++ /dev/null @@ -1,161 +0,0 @@ - -Get Signature Exposure from 'Signature' Object — get_sig_exposure • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    The expected number of mutations (or copy number segment records) with each signature was -determined after a scaling transformation V ~ WH = W'H' where W' = WU' and H' = UH. -The scaling matrix U is a KxK diagnal matrix (K is signature number, U' is the inverse of U) -with the element corresponding to the L1-norm of column vectors of W -(ie. the sum of the elements of the vector). As a result, the k-th row vector of the final -matrix H' represents the absolute exposure (activity) of the k-th process across samples -(e.g., for SBS, the estimated (or expected) number of mutations generated by the k-th process). -Of note, for copy number signatures, only components of feature CN was used for calculating H'.

    -
    - -
    -
    get_sig_exposure(
    -  Signature,
    -  type = c("absolute", "relative"),
    -  rel_threshold = 0.01
    -)
    -
    - -
    -

    Arguments

    -
    Signature
    -

    a Signature object obtained either from sig_extract or sig_auto_extract, -or just a raw exposure matrix with column representing samples (patients) and row -representing signatures.

    - - -
    type
    -

    'absolute' for signature exposure and 'relative' for signature relative exposure.

    - - -
    rel_threshold
    -

    only used when type is 'relative', relative exposure less -than (<=) this value will be set to 0 and thus all signature exposures -may not sum to 1. This is similar to this argument in sig_fit.

    - -
    -
    -

    Value

    - - -

    a data.table

    - - -
    -
    -

    References

    -

    Kim, Jaegil, et al. "Somatic ERCC2 mutations are associated with a distinct genomic signature in urothelial tumors." -Nature genetics 48.6 (2016): 600.

    -
    -
    -

    Author

    -

    Shixiang Wang w_shixiang@163.com

    -
    - -
    -

    Examples

    -
    # Load mutational signature
    -load(system.file("extdata", "toy_mutational_signature.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -# Get signature exposure
    -expo1 <- get_sig_exposure(sig2)
    -expo1
    -expo2 <- get_sig_exposure(sig2, type = "relative")
    -expo2
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/get_sig_feature_association.html b/docs/reference/get_sig_feature_association.html deleted file mode 100644 index 6171c348..00000000 --- a/docs/reference/get_sig_feature_association.html +++ /dev/null @@ -1,165 +0,0 @@ - -Calculate Association between Signature Exposures and Other Features — get_sig_feature_association • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Association of signature exposures with other features will be performed using one of two procedures: -for a continuous association variable (including ordinal variable), correaltion is performed; -for a binary association variable, samples will be divided into two groups and Mann-Whitney U-test -is performed to test for differences in signature exposure medians between the two groups. -See get_tidy_association for cleaning association result.

    -
    - -
    -
    get_sig_feature_association(
    -  data,
    -  cols_to_sigs,
    -  cols_to_features,
    -  type = "ca",
    -  method_co = c("spearman", "pearson", "kendall"),
    -  method_ca = stats::wilcox.test,
    -  min_n = 0.01,
    -  verbose = FALSE,
    -  ...
    -)
    -
    - -
    -

    Arguments

    -
    data
    -

    a data.frame contains signature exposures and other features

    - - -
    cols_to_sigs
    -

    colnames for signature exposure

    - - -
    cols_to_features
    -

    colnames for other features

    - - -
    type
    -

    a character vector containing 'ca' for categorical variable and 'co' for continuous variable, -it must have the same length as cols_to_features.

    - - -
    method_co
    -

    method for continuous variable, default is "spearman", could also be "pearson" and "kendall".

    - - -
    method_ca
    -

    method for categorical variable, default is "wilcox.test"

    - - -
    min_n
    -

    a minimal fraction (e.g. 0.01) or a integer number (e.g. 10) for filtering some variables with few positive events. -Default is 0.01.

    - - -
    verbose
    -

    if TRUE, print extra message.

    - - -
    ...
    -

    other arguments passing to test functions, like cor.test.

    - -
    -
    -

    Value

    - - -

    a list. For 'co' features, 'measure' means correlation coefficient. -For 'ca' features, 'measure' means difference in means of signature exposure.

    -
    -
    -

    See also

    - -
    - -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/get_sig_rec_similarity.html b/docs/reference/get_sig_rec_similarity.html deleted file mode 100644 index 570d67b8..00000000 --- a/docs/reference/get_sig_rec_similarity.html +++ /dev/null @@ -1,112 +0,0 @@ - -Get Reconstructed Profile Cosine Similarity, RSS, etc. — get_sig_rec_similarity • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    See bp_extract_signatures for examples.

    -
    - -
    -
    get_sig_rec_similarity(Signature, nmf_matrix)
    -
    - -
    -

    Arguments

    -
    Signature
    -

    a Signature object.

    - - -
    nmf_matrix
    -

    a matrix used for NMF decomposition with rows indicate samples and columns indicate components.

    - -
    -
    -

    Value

    - - -

    a data.table.

    -
    - -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/get_sig_similarity.html b/docs/reference/get_sig_similarity.html deleted file mode 100644 index 9b1e979c..00000000 --- a/docs/reference/get_sig_similarity.html +++ /dev/null @@ -1,252 +0,0 @@ - -Calculate Similarity between Identified Signatures and Reference Signatures — get_sig_similarity • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    The reference signatures can be either a Signature object specified by Ref argument -or known COSMIC signatures specified by sig_db argument. -Two COSMIC databases are used for comparisons - "legacy" which includes 30 signaures, -and "SBS" - which includes updated/refined 65 signatures. This function is modified -from compareSignatures() in maftools package. -NOTE: all reference signatures are generated from gold standard tool: -SigProfiler.

    -
    - -
    -
    get_sig_similarity(
    -  Signature,
    -  Ref = NULL,
    -  sig_db = c("SBS", "legacy", "DBS", "ID", "TSB", "SBS_Nik_lab", "RS_Nik_lab",
    -    "RS_BRCA560", "RS_USARC", "CNS_USARC", "CNS_TCGA", "CNS_TCGA176", "CNS_PCAWG176",
    -    "SBS_hg19", "SBS_hg38", "SBS_mm9", "SBS_mm10", "DBS_hg19", "DBS_hg38", "DBS_mm9",
    -    "DBS_mm10", "SBS_Nik_lab_Organ", "RS_Nik_lab_Organ", "latest_SBS_GRCh37",
    -    "latest_DBS_GRCh37", "latest_ID_GRCh37", "latest_SBS_GRCh38", "latest_DBS_GRCh38",
    -    "latest_SBS_mm9", "latest_DBS_mm9", "latest_SBS_mm10", "latest_DBS_mm10",
    -    "latest_SBS_rn6", "latest_DBS_rn6", "latest_CN_GRCh37", 
    -    
    -    "latest_RNA-SBS_GRCh37", "latest_SV_GRCh38"),
    -  db_type = c("", "human-exome", "human-genome"),
    -  method = "cosine",
    -  normalize = c("row", "feature"),
    -  feature_setting = sigminer::CN.features,
    -  set_order = TRUE,
    -  pattern_to_rm = NULL,
    -  verbose = TRUE
    -)
    -
    - -
    -

    Arguments

    -
    Signature
    -

    a Signature object or a component-by-signature matrix/data.frame -(sum of each column is 1) or a normalized component-by-sample matrix/data.frame -(sum of each column is 1). -More please see examples.

    - - -
    Ref
    -

    default is NULL, can be a same object as Signature.

    - - -
    sig_db
    -

    default 'legacy', it can be 'legacy' (for COSMIC v2 'SBS'), -'SBS', 'DBS', 'ID' and 'TSB' (for COSMIV v3.1 signatures) -for small scale mutations. -For more specific details, it can also be 'SBS_hg19', 'SBS_hg38', -'SBS_mm9', 'SBS_mm10', 'DBS_hg19', 'DBS_hg38', 'DBS_mm9', 'DBS_mm10' to use -COSMIC v3 reference signatures from Alexandrov, Ludmil B., et al. (2020) (reference #1). -In addition, it can be one of "SBS_Nik_lab_Organ", "RS_Nik_lab_Organ", -"SBS_Nik_lab", "RS_Nik_lab" to refer reference signatures from -Degasperi, Andrea, et al. (2020) (reference #2); -"RS_BRCA560", "RS_USARC" to reference signatures from BRCA560 and USARC cohorts; -"CNS_USARC" (40 categories), "CNS_TCGA" (48 categories) to reference copy number signatures from USARC cohort and TCGA; -"CNS_TCGA176" (176 categories) and "CNS_PCAWG176" (176 categories) to reference copy number signatures from PCAWG and TCGA separately. -UPDATE, the latest version of reference version can be automatically -downloaded and loaded from https://cancer.sanger.ac.uk/signatures/downloads/ -when a option with latest_ prefix is specified (e.g. "latest_SBS_GRCh37"). -Note: the signature profile for different genome builds are basically same. -And specific database (e.g. 'SBS_mm10') contains less signatures than all COSMIC -signatures (because some signatures are not detected from Alexandrov, Ludmil B., et al. (2020)). -For all available options, check the parameter setting.

    - - -
    db_type
    -

    only used when sig_db is enabled. -"" for keeping default, "human-exome" for transforming to exome frequency of component, -and "human-genome" for transforming to whole genome frequency of component. -Currently only works for 'SBS'.

    - - -
    method
    -

    default is 'cosine' for cosine similarity.

    - - -
    normalize
    -

    one of "row" and "feature". "row" is typically used -for common mutational signatures. "feature" is designed by me to use when input -are copy number signatures.

    - - -
    feature_setting
    -

    a data.frame used for classification. -Only used when method is "Wang" ("W"). -Default is CN.features. Users can also set custom input with "feature", -"min" and "max" columns available. Valid features can be printed by -unique(CN.features$feature).

    - - -
    set_order
    -

    if TRUE, order the return similarity matrix.

    - - -
    pattern_to_rm
    -

    patterns for removing some features/components in similarity -calculation. A vector of component name is also accepted. -The remove operation will be done after normalization. Default is NULL.

    - - -
    verbose
    -

    if TRUE, print extra info.

    - -
    -
    -

    Value

    - - -

    a list containing smilarities, aetiologies if available, best match and RSS.

    -
    -
    -

    References

    -

    Alexandrov, Ludmil B., et al. "The repertoire of mutational signatures in human cancer." Nature 578.7793 (2020): 94-101.

    -

    Degasperi, Andrea, et al. "A practical framework and online tool for mutational signature analyses show intertissue variation and driver dependencies." Nature cancer 1.2 (2020): 249-263.

    -

    Steele, Christopher D., et al. "Undifferentiated sarcomas develop through distinct evolutionary pathways." Cancer Cell 35.3 (2019): 441-456.

    -

    Nik-Zainal, Serena, et al. "Landscape of somatic mutations in 560 breast cancer whole-genome sequences." Nature 534.7605 (2016): 47-54.

    -

    Steele, Christopher D., et al. "Signatures of copy number alterations in human cancer." Nature 606.7916 (2022): 984-991.

    -
    -
    -

    Author

    -

    Shixiang Wang w_shixiang@163.com

    -
    - -
    -

    Examples

    -
    # Load mutational signature
    -load(system.file("extdata", "toy_mutational_signature.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -
    -s1 <- get_sig_similarity(sig2, Ref = sig2)
    -s1
    -
    -s2 <- get_sig_similarity(sig2)
    -s2
    -s3 <- get_sig_similarity(sig2, sig_db = "SBS")
    -s3
    -
    -# Set order for result similarity matrix
    -s4 <- get_sig_similarity(sig2, sig_db = "SBS", set_order = TRUE)
    -s4
    -
    -## Remove some components
    -## in similarity calculation
    -s5 <- get_sig_similarity(sig2,
    -  Ref = sig2,
    -  pattern_to_rm = c("T[T>G]C", "T[T>G]G", "T[T>G]T")
    -)
    -s5
    -
    -## Same to DBS and ID signatures
    -x1 <- get_sig_db("DBS_hg19")
    -x2 <- get_sig_db("DBS_hg38")
    -s6 <- get_sig_similarity(x1$db, x2$db)
    -s6
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/get_tidy_association.html b/docs/reference/get_tidy_association.html deleted file mode 100644 index 09a4cc8e..00000000 --- a/docs/reference/get_tidy_association.html +++ /dev/null @@ -1,123 +0,0 @@ - -Get Tidy Signature Association Results — get_tidy_association • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Get Tidy Signature Association Results

    -
    - -
    -
    get_tidy_association(cor_res, p_adjust = FALSE, method = "fdr")
    -
    - -
    -

    Arguments

    -
    cor_res
    -

    data returned by get_sig_feature_association()

    - - -
    p_adjust
    -

    logical, if TRUE, adjust p values by data type.

    - - -
    method
    -

    p value correction method, see stats::p.adjust for -more detail.

    - -
    -
    -

    Value

    - - -

    a data.frame

    - - -
    - - -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/group_enrichment.html b/docs/reference/group_enrichment.html deleted file mode 100644 index ce4e010f..00000000 --- a/docs/reference/group_enrichment.html +++ /dev/null @@ -1,201 +0,0 @@ - -General Group Enrichment Analysis — group_enrichment • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    This function takes a data.frame as input, compares proportion of positive -cases or mean measure in one subgroup and the remaining samples.

    -
    - -
    -
    group_enrichment(
    -  df,
    -  grp_vars = NULL,
    -  enrich_vars = NULL,
    -  cross = TRUE,
    -  co_method = c("t.test", "wilcox.test"),
    -  ref_group = NA
    -)
    -
    - -
    -

    Arguments

    -
    df
    -

    a data.frame.

    - - -
    grp_vars
    -

    character vector specifying group variables to split samples -into subgroups (at least 2 subgroups, otherwise this variable will be skipped).

    - - -
    enrich_vars
    -

    character vector specifying measure variables to be compared. -If variable is not numeric, only binary cases are accepted in the form of -TRUE/FALSE or P/N (P for positive cases and N for negative cases). -Of note, NA values set to negative cases.

    - - -
    cross
    -

    logical, default is TRUE, combine all situations provided by -grp_vars and enrich_vars. For examples, c('A', 'B') and c('C', 'D') -will construct 4 combinations(i.e. "AC", "AD", "BC" and "BD"). A variable can -not be in both grp_vars and enrich_vars, such cases will be automatically -drop. If FALSE, use pairwise combinations, see section "examples" for use cases.

    - - -
    co_method
    -

    test method for continuous variable, default is 't.test'.

    - - -
    ref_group
    -

    reference group set in grp_vars.

    - -
    -
    -

    Value

    - - -

    a data.table with following columns:

    • grp_var: group variable name.

    • -
    • enrich_var: enrich variable (variable to be compared) name.

    • -
    • grp1: the first group name, should be a member in grp_var column.

    • -
    • grp2: the remaining samples, marked as 'Rest'.

    • -
    • grp1_size: sample size for grp1.

    • -
    • grp1_pos_measure: for binary variable, it stores the proportion of -positive cases in grp1; for continuous variable, it stores mean value.

    • -
    • grp2_size: sample size for grp2.

    • -
    • grp2_pos_measure: same as grp1_pos_measure but for grp2.

    • -
    • measure_observed: for binary variable, it stores odds ratio; -for continuous variable, it stores scaled mean ratio.

    • -
    • measure_tested: only for binary variable, it stores -estimated odds ratio and its 95% CI from fisher.test().

    • -
    • p_value: for binary variable, it stores p value from fisher.test(); -for continuous variable, it stores value from wilcox.test() or t.test().

    • -
    • type: one of "binary" and "continuous".

    • -
    • method: one of "fish.test", "wilcox.test" and "t.test".

    • -
    -
    -

    See also

    - -
    - -
    -

    Examples

    -
    set.seed(1234)
    -df <- dplyr::tibble(
    -  g1 = factor(abs(round(rnorm(99, 0, 1)))),
    -  g2 = rep(LETTERS[1:4], c(50, 40, 8, 1)),
    -  e1 = sample(c("P", "N"), 99, replace = TRUE),
    -  e2 = rnorm(99)
    -)
    -
    -print(str(df))
    -print(head(df))
    -
    -# Compare g1:e1, g1:e2, g2:e1 and g2:e2
    -x1 <- group_enrichment(df, grp_vars = c("g1", "g2"), enrich_vars = c("e1", "e2"))
    -x1
    -
    -# Only compare g1:e1, g2:e2
    -x2 <- group_enrichment(df,
    -  grp_vars = c("g1", "g2"),
    -  enrich_vars = c("e1", "e2"),
    -  co_method = "wilcox.test",
    -  cross = FALSE
    -)
    -x2
    -
    -# \donttest{
    -# Visualization
    -p1 <- show_group_enrichment(x1, fill_by_p_value = TRUE)
    -p1
    -p2 <- show_group_enrichment(x1, fill_by_p_value = FALSE)
    -p2
    -p3 <- show_group_enrichment(x1, return_list = TRUE)
    -p3
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/group_enrichment2.html b/docs/reference/group_enrichment2.html deleted file mode 100644 index 5e6c1b20..00000000 --- a/docs/reference/group_enrichment2.html +++ /dev/null @@ -1,137 +0,0 @@ - -Group Enrichment Analysis with Subsets — group_enrichment2 • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    More details see group_enrichment().

    -
    - -
    -
    group_enrichment2(
    -  df,
    -  subset_var,
    -  grp_vars,
    -  enrich_vars,
    -  co_method = c("t.test", "wilcox.test"),
    -  ref_group = NA
    -)
    -
    - -
    -

    Arguments

    -
    df
    -

    a data.frame.

    - - -
    subset_var
    -

    a column for subsetting.

    - - -
    grp_vars
    -

    character vector specifying group variables to split samples -into subgroups (at least 2 subgroups, otherwise this variable will be skipped).

    - - -
    enrich_vars
    -

    character vector specifying measure variables to be compared. -If variable is not numeric, only binary cases are accepted in the form of -TRUE/FALSE or P/N (P for positive cases and N for negative cases). -Of note, NA values set to negative cases.

    - - -
    co_method
    -

    test method for continuous variable, default is 't.test'.

    - - -
    ref_group
    -

    reference group set in grp_vars.

    - -
    -
    -

    See also

    - -
    - -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/handle_hyper_mutation.html b/docs/reference/handle_hyper_mutation.html deleted file mode 100644 index db0c9d98..00000000 --- a/docs/reference/handle_hyper_mutation.html +++ /dev/null @@ -1,115 +0,0 @@ - -Handle Hypermutant Samples — handle_hyper_mutation • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    This can be used for SNV/INDEL count matrix. For copy number analysis, -please skip it.

    -
    - -
    -
    handle_hyper_mutation(nmf_matrix)
    -
    - -
    -

    Arguments

    -
    nmf_matrix
    -

    a matrix used for NMF decomposition with rows indicate samples and columns indicate components.

    - -
    -
    -

    Value

    - - -

    a matrix.

    -
    -
    -

    References

    -

    Kim, Jaegil, et al. "Somatic ERCC2 mutations are associated with a distinct genomic signature in urothelial tumors." -Nature genetics 48.6 (2016): 600.

    -
    - -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/hello.html b/docs/reference/hello.html deleted file mode 100644 index df49bb3e..00000000 --- a/docs/reference/hello.html +++ /dev/null @@ -1,100 +0,0 @@ - -Say Hello to Users — hello • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Say Hello to Users

    -
    - -
    -
    hello()
    -
    - - -
    -

    Examples

    -
    hello()
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/index.html b/docs/reference/index.html deleted file mode 100644 index ba6f5bf5..00000000 --- a/docs/reference/index.html +++ /dev/null @@ -1,546 +0,0 @@ - -Function reference • sigminer - - -
    -
    - - - -
    -
    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -

    Data input, classes and operation

    -

    Read genomic variation data as an R object and more.

    -
    -

    read_copynumber()

    -

    Read Absolute Copy Number Profile

    -

    read_copynumber_seqz()

    -

    Read Absolute Copy Number Profile from Sequenza Result Directory

    -

    read_copynumber_ascat()

    -

    Read Copy Number Data from ASCAT Result Files

    -

    read_maf() read_maf_minimal()

    -

    Read MAF Files

    -

    read_sv_as_rs()

    -

    Read Structural Variation Data as RS object

    -

    read_vcf()

    -

    Read VCF Files as MAF Object

    -

    read_xena_variants()

    -

    Read UCSC Xena Variant Format Data as MAF Object

    -

    CopyNumber-class CopyNumber

    -

    Class CopyNumber

    -

    MAF-class MAF

    -

    Class MAF

    -

    subset(<CopyNumber>)

    -

    Subsetting CopyNumber object

    -

    Core pipeline 1 - de novo signature identification

    -

    Identify signatures from genomic variation profile. After reading data, the following functions construct the standard pipeline for most users.

    -
    -

    sig_tally()

    -

    Tally a Genomic Alteration Object

    -

    handle_hyper_mutation()

    -

    Handle Hypermutant Samples

    -

    sig_estimate() show_sig_number_survey() show_sig_number_survey2()

    -

    Estimate Signature Number

    -

    sig_unify_extract()

    -

    An Unified Interface to Extract Signatures

    -

    sig_extract()

    -

    Extract Signatures through NMF

    -

    sig_auto_extract()

    -

    Extract Signatures through the Automatic Relevance Determination Technique

    -

    sigprofiler_extract() sigprofiler_import() sigprofiler_reorder()

    -

    Extract Signatures with SigProfiler

    -

    bp_extract_signatures() bp_extract_signatures_iter() bp_cluster_iter_list() bp_get_clustered_sigs() bp_get_sig_obj() bp_get_stats() bp_get_rank_score() bp_show_survey2() bp_show_survey() bp_attribute_activity()

    -

    A Best Practice for Signature Extraction and Exposure (Activity) Attribution

    -

    Core pipeline 2 - signature exposure fitting and optimization

    -

    Quantify exposure in at least one sample with reference signatures. It can also be used to optimize signature exposure from pipeline 1 and analyze exposure instability.

    -
    -

    sig_tally()

    -

    Tally a Genomic Alteration Object

    -

    handle_hyper_mutation()

    -

    Handle Hypermutant Samples

    -

    get_sig_cancer_type_index()

    -

    Obtain Signature Index for Cancer Types

    -

    sig_fit()

    -

    Fit Signature Exposures with Linear Combination Decomposition

    -

    sig_fit_bootstrap()

    -

    Obtain Bootstrap Distribution of Signature Exposures of a Certain Tumor Sample

    -

    sig_fit_bootstrap_batch()

    -

    Exposure Instability Analysis of Signature Exposures with Bootstrapping

    -

    report_bootstrap_p_value()

    -

    Report P Values from bootstrap Results

    -

    Signature visualization

    -

    Show signature profile and exposure.

    -
    -

    show_catalogue()

    -

    Show Alteration Catalogue Profile

    -

    show_sig_profile()

    -

    Show Signature Profile

    -

    show_sig_profile_loop()

    -

    Show Signature Profile with Loop Way

    -

    show_sig_profile_heatmap()

    -

    Show Signature Profile with Heatmap

    -

    show_sig_exposure()

    -

    Plot Signature Exposure

    -

    show_cosmic()

    -

    Show Signature Information in Web Browser

    -

    show_cosmic_sig_profile()

    -

    Plot Reference (Mainly COSMIC) Signature Profile

    -

    show_sig_consensusmap()

    -

    Show Signature Consensus Map

    -

    Fit and bootstrap visualization

    -

    Show signature fit and bootstrap analysis results.

    -
    -

    show_sig_fit()

    -

    Show Signature Fit Result

    -

    show_sig_bootstrap_exposure() show_sig_bootstrap_error() show_sig_bootstrap_stability()

    -

    Show Signature Bootstrap Analysis Results

    -

    Output analysis results to local machine

    -

    Save results and plots as files. For automatic analysis, please use sigflow.

    -
    -

    output_tally()

    -

    Output Tally Result in Barplots

    -

    output_sig()

    -

    Output Signature Results

    -

    output_fit()

    -

    Output Signature Fitting Results

    -

    output_bootstrap()

    -

    Output Signature Bootstrap Fitting Results

    -

    Signature object operation

    -

    Obtain or modify Signature object information.

    -
    -

    sig_names() sig_modify_names() sig_number() sig_attrs() sig_signature() sig_exposure()

    -

    Obtain or Modify Signature Information

    -

    Signature analysis and corresponding visualization

    -

    Analyze signatures and output the result to visualization.

    -
    -

    get_sig_similarity()

    -

    Calculate Similarity between Identified Signatures and Reference Signatures

    -

    get_sig_rec_similarity()

    -

    Get Reconstructed Profile Cosine Similarity, RSS, etc.

    -

    get_sig_exposure()

    -

    Get Signature Exposure from 'Signature' Object

    -

    get_sig_feature_association()

    -

    Calculate Association between Signature Exposures and Other Features

    -

    get_tidy_association()

    -

    Get Tidy Signature Association Results

    -

    show_cor()

    -

    A Simple and General Way for Association Analysis

    -

    show_sig_feature_corrplot()

    -

    Draw Corrplot for Signature Exposures and Other Features

    -

    show_groups()

    -

    Show Signature Contribution in Clusters

    -

    get_groups()

    -

    Get Sample Groups from Signature Decomposition Information

    -

    get_group_comparison()

    -

    Get Comparison Result between Signature Groups

    -

    show_group_comparison()

    -

    Plot Group Comparison Result

    -

    group_enrichment()

    -

    General Group Enrichment Analysis

    -

    show_group_enrichment()

    -

    Show Group Enrichment Result

    -

    group_enrichment2()

    -

    Group Enrichment Analysis with Subsets

    -

    show_group_distribution()

    -

    Show Groupped Variable Distribution

    -

    show_group_mapping()

    -

    Map Groups using Sankey

    -

    enrich_component_strand_bias()

    -

    Performs Strand Bias Enrichment Analysis for a Given Sample-by-Component Matrix

    -

    simulate_signature() simulate_catalogue() simulate_catalogue_matrix()

    -

    Simulation Analysis

    -

    get_shannon_diversity_index()

    -

    Get Shannon Diversity Index for Signatures

    -

    Package datasets

    -

    Datasets used for signature identification and analysis (some stored in extdata/).

    -
    -

    simulated_catalogs

    -

    A List of Simulated SBS-96 Catalog Matrix

    -

    CN.features

    -

    Classification Table of Copy Number Features Devised by Wang et al. for Method 'W'

    -

    centromeres.hg19

    -

    Location of Centromeres at Genome Build hg19

    -

    centromeres.hg38

    -

    Location of Centromeres at Genome Build hg38

    -

    centromeres.mm10

    -

    Location of Centromeres at Genome Build mm10

    -

    centromeres.mm9

    -

    Location of Centromeres at Genome Build mm9

    -

    chromsize.hg19

    -

    Chromosome Size of Genome Build hg19

    -

    chromsize.hg38

    -

    Chromosome Size of Genome Build hg38

    -

    chromsize.mm10

    -

    Chromosome Size of Genome Build mm10

    -

    chromsize.mm9

    -

    Chromosome Size of Genome Build mm9

    -

    cytobands.hg19

    -

    Location of Chromosome Cytobands at Genome Build hg19

    -

    cytobands.hg38

    -

    Location of Chromosome Cytobands at Genome Build hg38

    -

    cytobands.mm10

    -

    Location of Chromosome Cytobands at Genome Build mm10

    -

    cytobands.mm9

    -

    Location of Chromosome Cytobands at Genome Build mm9

    -

    transcript.hg19

    -

    Merged Transcript Location at Genome Build hg19

    -

    transcript.hg38

    -

    Merged Transcript Location at Genome Build hg38

    -

    transcript.mm10

    -

    Merged Transcript Location at Genome Build mm10

    -

    transcript.mm9

    -

    Merged Transcript Location at Genome Build mm9

    -

    Helpers

    -

    Helper functions used in the sigminer package.

    -
    -

    get_sig_db()

    -

    Get Curated Reference Signature Database

    -

    sig_convert()

    -

    Convert Signatures between different Genomic Distribution of Components

    -

    get_genome_annotation()

    -

    Get Genome Annotation

    -

    get_bayesian_result()

    -

    Get Specified Bayesian NMF Result from Run

    -

    get_adj_p()

    -

    Get Adjust P Values from Group Comparison

    -

    use_color_style()

    -

    Set Color Style for Plotting

    -

    add_h_arrow()

    -

    Add Horizontal Arrow with Text Label to a ggplot

    -

    add_labels()

    -

    Add Text Labels to a ggplot

    -

    cosine()

    -

    Calculate Cosine Measures

    -

    get_cn_freq_table()

    -

    Get CNV Frequency Table

    -

    same_size_clustering()

    -

    Same Size Clustering

    -

    get_intersect_size()

    -

    Get Overlap Size between Interval x and y

    -

    Copy number analysis and visualization

    -

    Functions for analyzing copy number data and visualization.

    -
    -

    transform_seg_table()

    -

    Transform Copy Number Table

    -

    get_cn_ploidy()

    -

    Get Ploidy from Absolute Copy Number Profile

    -

    get_Aneuploidy_score()

    -

    Get Aneuploidy Score from Copy Number Profile

    -

    get_pLOH_score()

    -

    Get proportions of pLOH score from Allele Specific Copy Number Profile

    -

    scoring()

    -

    Score Copy Number Profile

    -

    show_cn_profile()

    -

    Show Sample Copy Number Profile

    -

    show_cn_circos()

    -

    Show Copy Number Profile in Circos

    -

    show_cn_group_profile()

    -

    Show Summary Copy Number Profile for Sample Groups

    -

    show_cn_freq_circos()

    -

    Show Copy Number Variation Frequency Profile with Circos

    -

    show_cn_distribution()

    -

    Show Copy Number Distribution either by Length or Chromosome

    -

    show_cn_features()

    -

    Show Copy Number Feature Distributions

    -

    show_cn_components()

    -

    Show Copy Number Components

    -

    Extra documentation

    -

    Extra function and introduction.

    -
    -

    hello()

    -

    Say Hello to Users

    -

    sigminer sigminer-package

    -

    sigminer: Extract, Analyze and Visualize Signatures for Genomic Variations

    - - -
    - - -
    - - - - - - - - diff --git a/docs/reference/output_bootstrap.html b/docs/reference/output_bootstrap.html deleted file mode 100644 index 948197a5..00000000 --- a/docs/reference/output_bootstrap.html +++ /dev/null @@ -1,138 +0,0 @@ - -Output Signature Bootstrap Fitting Results — output_bootstrap • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Output Signature Bootstrap Fitting Results

    -
    - -
    -
    output_bootstrap(x, result_dir, mut_type = "SBS", sig_db = mut_type)
    -
    - -
    -

    Arguments

    -
    x
    -

    result from sig_fit_bootstrap_batch.

    - - -
    result_dir
    -

    a result directory.

    - - -
    mut_type
    -

    one of 'SBS', 'DBS', 'ID' or 'CN'.

    - - -
    sig_db
    -

    default 'legacy', it can be 'legacy' (for COSMIC v2 'SBS'), -'SBS', 'DBS', 'ID' and 'TSB' (for COSMIV v3.1 signatures) -for small scale mutations. -For more specific details, it can also be 'SBS_hg19', 'SBS_hg38', -'SBS_mm9', 'SBS_mm10', 'DBS_hg19', 'DBS_hg38', 'DBS_mm9', 'DBS_mm10' to use -COSMIC v3 reference signatures from Alexandrov, Ludmil B., et al. (2020) (reference #1). -In addition, it can be one of "SBS_Nik_lab_Organ", "RS_Nik_lab_Organ", -"SBS_Nik_lab", "RS_Nik_lab" to refer reference signatures from -Degasperi, Andrea, et al. (2020) (reference #2); -"RS_BRCA560", "RS_USARC" to reference signatures from BRCA560 and USARC cohorts; -"CNS_USARC" (40 categories), "CNS_TCGA" (48 categories) to reference copy number signatures from USARC cohort and TCGA; -"CNS_TCGA176" (176 categories) and "CNS_PCAWG176" (176 categories) to reference copy number signatures from PCAWG and TCGA separately. -UPDATE, the latest version of reference version can be automatically -downloaded and loaded from https://cancer.sanger.ac.uk/signatures/downloads/ -when a option with latest_ prefix is specified (e.g. "latest_SBS_GRCh37"). -Note: the signature profile for different genome builds are basically same. -And specific database (e.g. 'SBS_mm10') contains less signatures than all COSMIC -signatures (because some signatures are not detected from Alexandrov, Ludmil B., et al. (2020)). -For all available options, check the parameter setting.

    - -
    -
    -

    Value

    - - -

    Nothing.

    -
    - -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/output_fit.html b/docs/reference/output_fit.html deleted file mode 100644 index 85acbb57..00000000 --- a/docs/reference/output_fit.html +++ /dev/null @@ -1,138 +0,0 @@ - -Output Signature Fitting Results — output_fit • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Output Signature Fitting Results

    -
    - -
    -
    output_fit(x, result_dir, mut_type = "SBS", sig_db = mut_type)
    -
    - -
    -

    Arguments

    -
    x
    -

    result from sig_fit.

    - - -
    result_dir
    -

    a result directory.

    - - -
    mut_type
    -

    one of 'SBS', 'DBS', 'ID' or 'CN'.

    - - -
    sig_db
    -

    default 'legacy', it can be 'legacy' (for COSMIC v2 'SBS'), -'SBS', 'DBS', 'ID' and 'TSB' (for COSMIV v3.1 signatures) -for small scale mutations. -For more specific details, it can also be 'SBS_hg19', 'SBS_hg38', -'SBS_mm9', 'SBS_mm10', 'DBS_hg19', 'DBS_hg38', 'DBS_mm9', 'DBS_mm10' to use -COSMIC v3 reference signatures from Alexandrov, Ludmil B., et al. (2020) (reference #1). -In addition, it can be one of "SBS_Nik_lab_Organ", "RS_Nik_lab_Organ", -"SBS_Nik_lab", "RS_Nik_lab" to refer reference signatures from -Degasperi, Andrea, et al. (2020) (reference #2); -"RS_BRCA560", "RS_USARC" to reference signatures from BRCA560 and USARC cohorts; -"CNS_USARC" (40 categories), "CNS_TCGA" (48 categories) to reference copy number signatures from USARC cohort and TCGA; -"CNS_TCGA176" (176 categories) and "CNS_PCAWG176" (176 categories) to reference copy number signatures from PCAWG and TCGA separately. -UPDATE, the latest version of reference version can be automatically -downloaded and loaded from https://cancer.sanger.ac.uk/signatures/downloads/ -when a option with latest_ prefix is specified (e.g. "latest_SBS_GRCh37"). -Note: the signature profile for different genome builds are basically same. -And specific database (e.g. 'SBS_mm10') contains less signatures than all COSMIC -signatures (because some signatures are not detected from Alexandrov, Ludmil B., et al. (2020)). -For all available options, check the parameter setting.

    - -
    -
    -

    Value

    - - -

    Nothing.

    -
    - -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/output_sig.html b/docs/reference/output_sig.html deleted file mode 100644 index 871c94e4..00000000 --- a/docs/reference/output_sig.html +++ /dev/null @@ -1,138 +0,0 @@ - -Output Signature Results — output_sig • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Output Signature Results

    -
    - -
    -
    output_sig(sig, result_dir, mut_type = "SBS", sig_db = mut_type)
    -
    - -
    -

    Arguments

    -
    sig
    -

    a Signature object.

    - - -
    result_dir
    -

    a result directory.

    - - -
    mut_type
    -

    one of 'SBS', 'DBS', 'ID' or 'CN'.

    - - -
    sig_db
    -

    default 'legacy', it can be 'legacy' (for COSMIC v2 'SBS'), -'SBS', 'DBS', 'ID' and 'TSB' (for COSMIV v3.1 signatures) -for small scale mutations. -For more specific details, it can also be 'SBS_hg19', 'SBS_hg38', -'SBS_mm9', 'SBS_mm10', 'DBS_hg19', 'DBS_hg38', 'DBS_mm9', 'DBS_mm10' to use -COSMIC v3 reference signatures from Alexandrov, Ludmil B., et al. (2020) (reference #1). -In addition, it can be one of "SBS_Nik_lab_Organ", "RS_Nik_lab_Organ", -"SBS_Nik_lab", "RS_Nik_lab" to refer reference signatures from -Degasperi, Andrea, et al. (2020) (reference #2); -"RS_BRCA560", "RS_USARC" to reference signatures from BRCA560 and USARC cohorts; -"CNS_USARC" (40 categories), "CNS_TCGA" (48 categories) to reference copy number signatures from USARC cohort and TCGA; -"CNS_TCGA176" (176 categories) and "CNS_PCAWG176" (176 categories) to reference copy number signatures from PCAWG and TCGA separately. -UPDATE, the latest version of reference version can be automatically -downloaded and loaded from https://cancer.sanger.ac.uk/signatures/downloads/ -when a option with latest_ prefix is specified (e.g. "latest_SBS_GRCh37"). -Note: the signature profile for different genome builds are basically same. -And specific database (e.g. 'SBS_mm10') contains less signatures than all COSMIC -signatures (because some signatures are not detected from Alexandrov, Ludmil B., et al. (2020)). -For all available options, check the parameter setting.

    - -
    -
    -

    Value

    - - -

    Nothing.

    -
    - -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/output_tally.html b/docs/reference/output_tally.html deleted file mode 100644 index 2637093b..00000000 --- a/docs/reference/output_tally.html +++ /dev/null @@ -1,117 +0,0 @@ - -Output Tally Result in Barplots — output_tally • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Output Tally Result in Barplots

    -
    - -
    -
    output_tally(x, result_dir, mut_type = "SBS")
    -
    - -
    -

    Arguments

    -
    x
    -

    a matrix with row representing components (motifs) and column -representing samples.

    - - -
    result_dir
    -

    a result directory.

    - - -
    mut_type
    -

    one of 'SBS', 'DBS', 'ID' or 'CN'.

    - -
    -
    -

    Value

    - - -

    Nothing.

    -
    - -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/pipe.html b/docs/reference/pipe.html deleted file mode 100644 index 52a92235..00000000 --- a/docs/reference/pipe.html +++ /dev/null @@ -1,96 +0,0 @@ - -Pipe operator — %>% • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    See magrittr::%>% for details.

    -
    - -
    -
    lhs %>% rhs
    -
    - - -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/read_copynumber.html b/docs/reference/read_copynumber.html deleted file mode 100644 index 64c97eb6..00000000 --- a/docs/reference/read_copynumber.html +++ /dev/null @@ -1,252 +0,0 @@ - -Read Absolute Copy Number Profile — read_copynumber • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Read absolute copy number profile for preparing CNV signature -analysis. See detail part of sig_tally() to see how to handle sex to get correct -summary.

    -
    - -
    -
    read_copynumber(
    -  input,
    -  pattern = NULL,
    -  ignore_case = FALSE,
    -  seg_cols = c("Chromosome", "Start.bp", "End.bp", "modal_cn"),
    -  samp_col = "sample",
    -  add_loh = FALSE,
    -  loh_min_len = 10000,
    -  loh_min_frac = 0.05,
    -  join_adj_seg = TRUE,
    -  skip_annotation = FALSE,
    -  use_all = add_loh,
    -  min_segnum = 0L,
    -  max_copynumber = 20L,
    -  genome_build = c("hg19", "hg38", "T2T", "mm10", "mm9", "ce11"),
    -  genome_measure = c("called", "wg"),
    -  complement = FALSE,
    -  ...
    -)
    -
    - -
    -

    Arguments

    -
    input
    -

    a data.frame or a file or a directory contains copy number profile.

    - - -
    pattern
    -

    an optional regular expression used to select part of files if -input is a directory, more detail please see list.files() function.

    - - -
    ignore_case
    -

    logical. Should pattern-matching be case-insensitive?

    - - -
    seg_cols
    -

    four strings used to specify chromosome, start position, -end position and copy number value in input, respectively. -Default use names from ABSOLUTE calling result.

    - - -
    samp_col
    -

    a character used to specify the sample column name. If input -is a directory and cannot find samp_col, sample names will use file names -(set this parameter to NULL is recommended in this case).

    - - -
    add_loh
    -

    if TRUE, add LOH labels to segments. NOTE a column -'minor_cn' must exist to indicate minor allele copy number value. -Sex chromosome will not be labeled.

    - - -
    loh_min_len
    -

    The length cut-off for labeling a segment as 'LOH'. -Default is 10Kb.

    - - -
    loh_min_frac
    -

    When join_adj_seg set to TRUE, only the length fraction -of LOH region is larger than this value will be labeled as 'LOH'. -Default is 30%.

    - - -
    join_adj_seg
    -

    if TRUE (default), join adjacent segments with -same copy number value. This is helpful for precisely count the number of breakpoint. -When set use_all=TRUE, the mean function will be applied to extra numeric columns -and unique string columns will be pasted by comma for joined records.

    - - -
    skip_annotation
    -

    if TRUE, skip annotation step, it may affect some analysis -and visualization functionality, but speed up reading data.

    - - -
    use_all
    -

    default is FALSE. If True, use all columns from raw input.

    - - -
    min_segnum
    -

    minimal number of copy number segments within a sample.

    - - -
    max_copynumber
    -

    bigger copy number within a sample will be reset to this value.

    - - -
    genome_build
    -

    genome build version, should be 'hg19', 'hg38', 'mm9' or 'mm10'.

    - - -
    genome_measure
    -

    default is 'called', can be 'wg' or 'called'. -Set 'called' will use called segments size to compute total size for CNA burden calculation, -this option is useful for WES and target sequencing. -Set 'wg' will use autosome size from genome build, this option is useful for WGS, SNP etc..

    - - -
    complement
    -

    if TRUE, complement chromosome (except 'Y') does not show in input data -with normal copy 2.

    - - -
    ...
    -

    other parameters pass to data.table::fread()

    - -
    -
    -

    Value

    - - -

    a CopyNumber object.

    -
    -
    -

    See also

    -

    read_maf for reading mutation data to MAF object.

    -
    -
    -

    Author

    -

    Shixiang Wang w_shixiang@163.com

    -
    - -
    -

    Examples

    -
    # Load toy dataset of absolute copynumber profile
    -load(system.file("extdata", "toy_segTab.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -
    -# \donttest{
    -cn <- read_copynumber(segTabs,
    -  seg_cols = c("chromosome", "start", "end", "segVal"),
    -  genome_build = "hg19", complement = FALSE
    -)
    -cn
    -cn_subset <- subset(cn, sample == "TCGA-DF-A2KN-01A-11D-A17U-01")
    -
    -# Add LOH
    -set.seed(1234)
    -segTabs$minor_cn <- sample(c(0, 1), size = nrow(segTabs), replace = TRUE)
    -cn <- read_copynumber(segTabs,
    -  seg_cols = c("chromosome", "start", "end", "segVal"),
    -  genome_measure = "wg", complement = TRUE, add_loh = TRUE
    -)
    -# Use tally method "S" (Steele et al.)
    -tally_s <- sig_tally(cn, method = "S")
    -
    -tab_file <- system.file("extdata", "metastatic_tumor.segtab.txt",
    -  package = "sigminer", mustWork = TRUE
    -)
    -cn2 <- read_copynumber(tab_file)
    -cn2
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/read_copynumber_ascat.html b/docs/reference/read_copynumber_ascat.html deleted file mode 100644 index 34cdf8fe..00000000 --- a/docs/reference/read_copynumber_ascat.html +++ /dev/null @@ -1,111 +0,0 @@ - -Read Copy Number Data from ASCAT Result Files — read_copynumber_ascat • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Note, the result is not a CopyNumber object, you need to generate it -by yourself.

    -
    - -
    -
    read_copynumber_ascat(x)
    -
    - -
    -

    Arguments

    -
    x
    -

    one or more .rds format files which contains ASCAT object from result of ascat.runAscat() -in ASCAT package.

    - -
    -
    -

    Value

    - - -

    a tidy list.

    -
    - -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/read_copynumber_seqz.html b/docs/reference/read_copynumber_seqz.html deleted file mode 100644 index 9f7a0a36..00000000 --- a/docs/reference/read_copynumber_seqz.html +++ /dev/null @@ -1,117 +0,0 @@ - -Read Absolute Copy Number Profile from Sequenza Result Directory — read_copynumber_seqz • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Read Absolute Copy Number Profile from Sequenza Result Directory

    -
    - -
    -
    read_copynumber_seqz(target_dir, return_df = FALSE, ...)
    -
    - -
    -

    Arguments

    -
    target_dir
    -

    a directory path.

    - - -
    return_df
    -

    if TRUE, return a data.frame directly, otherwise return a -CopyNumber object.

    - - -
    ...
    -

    other parameters passing to read_copynumber().

    - -
    -
    -

    Value

    - - -

    a data.frame or a CopyNumber object.

    -
    - -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/read_maf.html b/docs/reference/read_maf.html deleted file mode 100644 index a638de6d..00000000 --- a/docs/reference/read_maf.html +++ /dev/null @@ -1,150 +0,0 @@ - -Read MAF Files — read_maf • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    This function is a wrapper of maftools::read.maf. -Useless options in maftools::read.maf are dropped here. -You can also use maftools::read.maf to read the data. -All reference alleles and mutation alleles should be recorded in -positive strand format.

    -
    - -
    -
    read_maf(maf, verbose = TRUE)
    -
    -read_maf_minimal(dt)
    -
    - -
    -

    Arguments

    -
    maf
    -

    tab delimited MAF file. File can also be gz compressed. Required. Alternatively, you can also provide already read MAF file as a dataframe.

    - - -
    verbose
    -

    TRUE logical. Default to be talkative and prints summary.

    - - -
    dt
    -

    A data.frame contains at least the following columns: -"Tumor_Sample_Barcode", "Chromosome", "Start_Position", "End_Position", "Reference_Allele", "Tumor_Seq_Allele2"

    - -
    -
    -

    Functions

    - -
    • read_maf_minimal(): Read Maf data.frame from a minimal maf-like data

    • -
    -
    -

    See also

    -

    read_copynumber for reading copy number data to CopyNumber object.

    -
    - -
    -

    Examples

    -
    # \donttest{
    -laml.maf <- system.file("extdata", "tcga_laml.maf.gz", package = "maftools", mustWork = TRUE)
    -if (!require("R.utils")) {
    -  message("Please install 'R.utils' package firstly")
    -} else {
    -  laml <- read_maf(maf = laml.maf)
    -  laml
    -
    -  laml_mini <- laml@data[, list(
    -    Tumor_Sample_Barcode, Chromosome,
    -    Start_Position, End_Position,
    -    Reference_Allele, Tumor_Seq_Allele2
    -  )]
    -  laml2 <- read_maf_minimal(laml_mini)
    -  laml2
    -}
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/read_sv_as_rs.html b/docs/reference/read_sv_as_rs.html deleted file mode 100644 index 57eb875d..00000000 --- a/docs/reference/read_sv_as_rs.html +++ /dev/null @@ -1,129 +0,0 @@ - -Read Structural Variation Data as RS object — read_sv_as_rs • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Read Structural Variation Data as RS object

    -
    - -
    -
    read_sv_as_rs(input)
    -
    - -
    -

    Arguments

    -
    input
    -

    a data.frame or a file with the following columns: -"sample", "chr1", "start1", "end1", "chr2", "start2", "end2", "strand1", "strand2", "svclass". -NOTE: If column "svclass" already exists in input, "strand1" and "strand2" are optional. -If "svclass" is not provided, read_sv_as_rs() will compute it by -"strand1","strand2"(strand1/strand2),"chr1" and "chr2":

    • translocation, if mates are on different chromosomes.

    • -
    • inversion (+/-) and (-/+), if mates on the same chromosome.

    • -
    • deletion (+/+), if mates on the same chromosome.

    • -
    • tandem-duplication (-/-), if mates on the same chromosome.

    • -
    - -
    -
    -

    Value

    - - -

    a list

    - - -
    - -
    -

    Examples

    -
    sv <- readRDS(system.file("extdata", "toy_sv.rds", package = "sigminer", mustWork = TRUE))
    -rs <- read_sv_as_rs(sv)
    -# svclass is optional
    -rs2 <- read_sv_as_rs(sv[, setdiff(colnames(sv), "svclass")])
    -identical(rs, rs2)
    -if (FALSE) {
    -tally_rs <- sig_tally(rs)
    -}
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/read_vcf.html b/docs/reference/read_vcf.html deleted file mode 100644 index a401af8d..00000000 --- a/docs/reference/read_vcf.html +++ /dev/null @@ -1,144 +0,0 @@ - -Read VCF Files as MAF Object — read_vcf • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    MAF file is more recommended. In this function, we will mimic -the MAF object from the key c(1, 2, 4, 5, 7) columns of VCF file.

    -
    - -
    -
    read_vcf(
    -  vcfs,
    -  samples = NULL,
    -  genome_build = c("hg19", "hg38", "T2T", "mm10", "mm9", "ce11"),
    -  keep_only_pass = FALSE,
    -  verbose = TRUE
    -)
    -
    - -
    -

    Arguments

    -
    vcfs
    -

    VCF file paths.

    - - -
    samples
    -

    sample names for VCF files.

    - - -
    genome_build
    -

    genome build version like "hg19".

    - - -
    keep_only_pass
    -

    if TRUE, keep only 'PASS' mutation for analysis.

    - - -
    verbose
    -

    if TRUE, print extra info.

    - -
    -
    -

    Value

    - - -

    a MAF.

    -
    -
    -

    See also

    - -
    - -
    -

    Examples

    -
    vcfs <- list.files(system.file("extdata", package = "sigminer"), "*.vcf", full.names = TRUE)
    -# \donttest{
    -maf <- read_vcf(vcfs)
    -maf <- read_vcf(vcfs, keep_only_pass = TRUE)
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/read_xena_variants.html b/docs/reference/read_xena_variants.html deleted file mode 100644 index ce08933e..00000000 --- a/docs/reference/read_xena_variants.html +++ /dev/null @@ -1,124 +0,0 @@ - -Read UCSC Xena Variant Format Data as MAF Object — read_xena_variants • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Read UCSC Xena Variant Format Data as MAF Object

    -
    - -
    -
    read_xena_variants(path)
    -
    - -
    -

    Arguments

    -
    path
    -

    a path to variant file.

    - -
    -
    -

    Value

    - - -

    a MAF object.

    -
    - -
    -

    Examples

    -
    # \donttest{
    -if (requireNamespace("UCSCXenaTools")) {
    -  library(UCSCXenaTools)
    -  options(use_hiplot = TRUE)
    -  example_file <- XenaGenerate(subset = XenaDatasets == "mc3/ACC_mc3.txt") %>%
    -    XenaQuery() %>%
    -    XenaDownload()
    -  x <- read_xena_variants(example_file$destfiles)
    -  x@data
    -  y <- sig_tally(x)
    -  y
    -}
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/report_bootstrap_p_value.html b/docs/reference/report_bootstrap_p_value.html deleted file mode 100644 index e3152aae..00000000 --- a/docs/reference/report_bootstrap_p_value.html +++ /dev/null @@ -1,114 +0,0 @@ - -Report P Values from bootstrap Results — report_bootstrap_p_value • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    See examples in sig_fit_bootstrap.

    -
    - -
    -
    report_bootstrap_p_value(x, thresholds = c(0.01, 0.05, 0.1))
    -
    - -
    -

    Arguments

    -
    x
    -

    a (list of) result from sig_fit_bootstrap.

    - - -
    thresholds
    -

    a vector of relative exposure threshold for calculating p values.

    - -
    -
    -

    Value

    - - -

    a (list of) matrix

    - - -
    - -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/same_size_clustering.html b/docs/reference/same_size_clustering.html deleted file mode 100644 index 823ac795..00000000 --- a/docs/reference/same_size_clustering.html +++ /dev/null @@ -1,151 +0,0 @@ - -Same Size Clustering — same_size_clustering • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    This is a wrapper for several implementation that classify samples into -same size clusters, the details please see this blog. -The source code is modified based on code from the blog.

    -
    - -
    -
    same_size_clustering(
    -  mat,
    -  diss = FALSE,
    -  clsize = NULL,
    -  algo = c("nnit", "hcbottom", "kmvar"),
    -  method = c("maxd", "random", "mind", "elki", "ward.D", "average", "complete", "single")
    -)
    -
    - -
    -

    Arguments

    -
    mat
    -

    a data/distance matrix.

    - - -
    diss
    -

    if TRUE, treat mat as a distance matrix.

    - - -
    clsize
    -

    integer, number of sample within a cluster.

    - - -
    algo
    -

    algorithm.

    - - -
    method
    -

    method.

    - -
    -
    -

    Value

    - - -

    a vector.

    -
    - -
    -

    Examples

    -
    set.seed(1234L)
    -x <- rbind(
    -  matrix(rnorm(100, sd = 0.3), ncol = 2),
    -  matrix(rnorm(100, mean = 1, sd = 0.3), ncol = 2)
    -)
    -colnames(x) <- c("x", "y")
    -
    -y1 <- same_size_clustering(x, clsize = 10)
    -y11 <- same_size_clustering(as.matrix(dist(x)), clsize = 10, diss = TRUE)
    -
    -y2 <- same_size_clustering(x, clsize = 10, algo = "hcbottom", method = "ward.D")
    -
    -y3 <- same_size_clustering(x, clsize = 10, algo = "kmvar")
    -y33 <- same_size_clustering(as.matrix(dist(x)), clsize = 10, algo = "kmvar", diss = TRUE)
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/scoring.html b/docs/reference/scoring.html deleted file mode 100644 index 966b05a4..00000000 --- a/docs/reference/scoring.html +++ /dev/null @@ -1,177 +0,0 @@ - -Score Copy Number Profile — scoring • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Returns quantification of copy number profile and events including -tandem duplication and Chromothripisis etc. -Only copy number data from autosome is used here. -Some of the quantification methods are rough, -you use at your risk. You should do some extra work to check the -result scores.

    -
    - -
    -
    scoring(object, TD_size_cutoff = c(1000, 100000, 2000000), TD_cn_cutoff = Inf)
    -
    - -
    -

    Arguments

    -
    object
    -

    a object of CopyNumber.

    - - -
    TD_size_cutoff
    -

    a length-3 numeric vector used to specify the start, midpoint, end -segment size for determining tandem duplication size range, midpoint is used to split -TD into short TD and long TD. Default is 1Kb to 100Kb for short TD, 100Kb to 2Mb for long -TD.

    - - -
    TD_cn_cutoff
    -

    a number defining the maximum copy number of TD, -default is Inf, i.e. no cutoff.

    - -
    -
    -

    Value

    - - -

    a data.table with following scores:

    • cnaBurden: CNA burden representing the altered genomic fraction as previously reported.

    • -
    • cnaLoad: CNA load representing the quantity of copy number alteration.

    • -
    • MACN: mean altered copy number (MACN) reflecting the property of altered copy number segments, -calculated as -$$MACN = \frac{\sum_{i} CN_i}{N_{cnv}}$$ -where \(CN_i\) is the copy number of altered segment \(i\), \(N_{cnv}\) is -the number of CNV.

    • -
    • weightedMACN: same as MACN but weighted with segment length. -$$MACN_{weighted} = \frac{\sum_{i} (CN_i \times L_{i})}{ \sum_{i} L_{i} }$$ -where \(L_{i}\) is the length of altered copy number segment \(i\).

    • -
    • Ploidy: ploidy, the formula is same as weightedMACN but using all copy number segments instead of -altered copy number segments.

    • -
    • TDP_pnas: tandem duplication phenotype score from https://www.pnas.org/doi/10.1073/pnas.1520010113, -the threshold k in reference is omitted. -$$TDP = - \frac{\sum_{chr} |TD_{obs}-TD_{exp}|}{TD_{total}}$$ -where \(TD_{total}\) is the number of TD, \(TD_{obs}\) and -\(TD_exp\) are observed number of TD and expected number of TD for each chromosome.

    • -
    • TDP: tandem duplication score used defined by our group work, -TD represents segment with copy number greater than 2. -$$TD = \frac{TD_{total}}{\sum_{chr} |TD_{obs}-TD_{exp}|+1}$$

    • -
    • sTDP: TDP score for short TD.

    • -
    • lTDP: TDP score for long TD.

    • -
    • TDP_size : TDP region size (Mb).

    • -
    • sTDP_size: sTDP region size (Mb).

    • -
    • lTDP_size: lTDP region size(Mb).

    • -
    • Chromoth_state: chromothripsis state score, -according to reference doi:10.1016/j.cell.2013.02.023 -, -chromothripsis frequently leads to massive loss of segments on -the affected chromosome with segmental losses being interspersed with regions displaying -normal (disomic) copy-number (e.g., copy-number states oscillating between -copy-number = 1 and copy-number = 2), form tens to hundreds of locally clustered DNA rearrangements. -Most of methods use both SV and CNV to infer chromothripsis, here we roughly quantify it with -$$\sum_{chr}{N_{OsCN}^2}$$ -where \(N_{OsCN}\) is the number of oscillating copy number pattern "2-1-2" for each chromosome.

    • -
    - -
    -

    Examples

    -
    # Load copy number object
    -load(system.file("extdata", "toy_copynumber.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -
    -d <- scoring(cn)
    -d
    -
    -d2 <- scoring(cn, TD_cn_cutoff = 4L)
    -d2
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/show_catalogue.html b/docs/reference/show_catalogue.html deleted file mode 100644 index b2f85fb9..00000000 --- a/docs/reference/show_catalogue.html +++ /dev/null @@ -1,168 +0,0 @@ - -Show Alteration Catalogue Profile — show_catalogue • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Show Alteration Catalogue Profile

    -
    - -
    -
    show_catalogue(
    -  catalogue,
    -  mode = c("SBS", "copynumber", "DBS", "ID", "RS"),
    -  method = "Wang",
    -  normalize = c("raw", "row", "feature"),
    -  style = c("default", "cosmic"),
    -  samples = NULL,
    -  samples_name = NULL,
    -  x_lab = "Components",
    -  y_lab = "Counts",
    -  ...
    -)
    -
    - -
    -

    Arguments

    -
    catalogue
    -

    result from sig_tally or a -matrix with row representing components (motifs) and -column representing samples

    - - -
    mode
    -

    signature type for plotting, now supports 'copynumber', 'SBS', -'DBS', 'ID' and 'RS' (genome rearrangement signature).

    - - -
    method
    -

    method for copy number feature classification in sig_tally, -can be one of "Wang" ("W"), "S".

    - - -
    normalize
    -

    normalize method.

    - - -
    style
    -

    plot style, one of 'default' and 'cosmic'.

    - - -
    samples
    -

    default is NULL, show sum of all samples in one row. -If not NULL, show specified samples.

    - - -
    samples_name
    -

    set the sample names shown in plot.

    - - -
    x_lab
    -

    x axis lab.

    - - -
    y_lab
    -

    y axis lab.

    - - -
    ...
    -

    other arguments passing to show_sig_profile.

    - -
    -
    -

    Value

    - - -

    a ggplot object

    -
    - -
    -

    Examples

    -
    # \donttest{
    -data("simulated_catalogs")
    -p <- show_catalogue(simulated_catalogs$set1, style = "cosmic")
    -p
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/show_cn_circos.html b/docs/reference/show_cn_circos.html deleted file mode 100644 index 53c28d39..00000000 --- a/docs/reference/show_cn_circos.html +++ /dev/null @@ -1,171 +0,0 @@ - -Show Copy Number Profile in Circos — show_cn_circos • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Another visualization method for copy number profile like show_cn_profile.

    -
    - -
    -
    show_cn_circos(
    -  data,
    -  samples = NULL,
    -  show_title = TRUE,
    -  chrs = paste0("chr", 1:22),
    -  genome_build = c("hg19", "hg38", "T2T", "mm10", "mm9", "ce11"),
    -  col = NULL,
    -  side = "inside",
    -  ...
    -)
    -
    - -
    -

    Arguments

    -
    data
    -

    a CopyNumber object or a data.frame containing at least 'chromosome', 'start', -'end', 'segVal' these columns.

    - - -
    samples
    -

    default is NULL, can be a chracter vector representing multiple samples or -number of samples to show. -If data argument is a data.frame, a column called sample must exist.

    - - -
    show_title
    -

    if TRUE (default), show title with sample ID.

    - - -
    chrs
    -

    chromosomes start with 'chr'.

    - - -
    genome_build
    -

    genome build version, used when data is a data.frame, should be 'hg19' or 'hg38'.

    - - -
    col
    -

    colors for the heatmaps. If it is NULL, set to -circlize::colorRamp2(c(1, 2, 4), c("blue", "black", "red")).

    - - -
    side
    -

    side of the heatmaps.

    - - -
    ...
    -

    other parameters passing to circlize::circos.genomicHeatmap.

    - -
    -
    -

    Value

    - - -

    a circos plot

    -
    - -
    -

    Examples

    -
    load(system.file("extdata", "toy_copynumber.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -# \donttest{
    -show_cn_circos(cn, samples = 1)
    -show_cn_circos(cn, samples = "TCGA-99-7458-01A-11D-2035-01")
    -
    -## Remove title
    -show_cn_circos(cn, samples = 1, show_title = FALSE)
    -
    -## Subset chromosomes
    -show_cn_circos(cn, samples = 1, chrs = c("chr1", "chr2", "chr3"))
    -
    -## Arrange plots
    -layout(matrix(1:4, 2, 2))
    -show_cn_circos(cn, samples = 4)
    -
    -layout(1) # reset layout
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/show_cn_components.html b/docs/reference/show_cn_components.html deleted file mode 100644 index bfc4c941..00000000 --- a/docs/reference/show_cn_components.html +++ /dev/null @@ -1,160 +0,0 @@ - -Show Copy Number Components — show_cn_components • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Show classified components ("Wang" ("W") method) for copy number data.

    -
    - -
    -
    show_cn_components(
    -  parameters,
    -  method = "Wang",
    -  show_weights = TRUE,
    -  log_y = FALSE,
    -  return_plotlist = FALSE,
    -  base_size = 12,
    -  nrow = 2,
    -  align = "hv",
    -  ...
    -)
    -
    - -
    -

    Arguments

    -
    parameters
    -

    a data.frame contain parameter components, obtain this -from sig_tally function.

    - - -
    method
    -

    method for feature classification, can be one of -"Wang" ("W"), "S" (for method described in Steele et al. 2019), -"X" (for method described in Tao et al. 2023).

    - - -
    show_weights
    -

    default is TRUE, show weights for each component. -Only used when method is "Macintyre".

    - - -
    log_y
    -

    logical, if TRUE, show log10 based y axis, only -works for input from "Wang" ("W") method.

    - - -
    return_plotlist
    -

    if TRUE, return a list of ggplot objects but a combined plot.

    - - -
    base_size
    -

    overall font size.

    - - -
    nrow
    -

    (optional) Number of rows in the plot grid.

    - - -
    align
    -

    (optional) Specifies whether graphs in the grid should be horizontally ("h") or -vertically ("v") aligned. Options are "none" (default), "hv" (align in both directions), "h", and "v".

    - - -
    ...
    -

    other options pass to plot_grid function of cowplot package.

    - -
    -
    -

    Value

    - - -

    a ggplot object

    -
    -
    -

    Author

    -

    Shixiang Wang w_shixiang@163.com

    -
    - -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/show_cn_distribution.html b/docs/reference/show_cn_distribution.html deleted file mode 100644 index 6a5b8419..00000000 --- a/docs/reference/show_cn_distribution.html +++ /dev/null @@ -1,157 +0,0 @@ - -Show Copy Number Distribution either by Length or Chromosome — show_cn_distribution • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Visually summarize copy number distribution either by copy number segment length -or chromosome. Input is a CopyNumber object, genome_build option will -read from genome_build slot of object.

    -
    - -
    -
    show_cn_distribution(
    -  data,
    -  rm_normal = TRUE,
    -  mode = c("ld", "cd"),
    -  fill = FALSE,
    -  scale_chr = TRUE,
    -  base_size = 14
    -)
    -
    - -
    -

    Arguments

    -
    data
    -

    a CopyNumber object.

    - - -
    rm_normal
    -

    logical. Whether remove normal copy (i.e. "segVal" equals 2), default is TRUE.

    - - -
    mode
    -

    either "ld" for distribution by CN length or "cd" for distribution by chromosome.

    - - -
    fill
    -

    when mode is "cd" and fill is TRUE, plot percentage instead of count.

    - - -
    scale_chr
    -

    logical. If TRUE, normalize count to per Megabase unit.

    - - -
    base_size
    -

    overall font size.

    - -
    -
    -

    Value

    - - -

    a ggplot object

    -
    -
    -

    Author

    -

    Shixiang Wang w_shixiang@163.com

    -
    - -
    -

    Examples

    -
    # Load copy number object
    -load(system.file("extdata", "toy_copynumber.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -# Plot distribution
    -p1 <- show_cn_distribution(cn)
    -p1
    -p2 <- show_cn_distribution(cn, mode = "cd")
    -p2
    -p3 <- show_cn_distribution(cn, mode = "cd", fill = TRUE)
    -p3
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/show_cn_features.html b/docs/reference/show_cn_features.html deleted file mode 100644 index d3754d18..00000000 --- a/docs/reference/show_cn_features.html +++ /dev/null @@ -1,160 +0,0 @@ - -Show Copy Number Feature Distributions — show_cn_features • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Show Copy Number Feature Distributions

    -
    - -
    -
    show_cn_features(
    -  features,
    -  method = "Wang",
    -  rm_outlier = FALSE,
    -  ylab = NULL,
    -  log_y = FALSE,
    -  return_plotlist = FALSE,
    -  base_size = 12,
    -  nrow = 2,
    -  align = "hv",
    -  ...
    -)
    -
    - -
    -

    Arguments

    -
    features
    -

    a feature list generate from sig_tally function.

    - - -
    method
    -

    method for feature classification, can be one of -"Wang" ("W"), "S" (for method described in Steele et al. 2019), -"X" (for method described in Tao et al. 2023).

    - - -
    rm_outlier
    -

    default is FALSE, if TRUE, remove outliers. Only -works when method is "Wang" ("W").

    - - -
    ylab
    -

    lab of y axis.

    - - -
    log_y
    -

    logical, if TRUE, show log10 based y axis, only -works for input from "Wang" ("W") method.

    - - -
    return_plotlist
    -

    if TRUE, return a list of ggplot objects but a combined plot.

    - - -
    base_size
    -

    overall font size.

    - - -
    nrow
    -

    (optional) Number of rows in the plot grid.

    - - -
    align
    -

    (optional) Specifies whether graphs in the grid should be horizontally ("h") or -vertically ("v") aligned. Options are "none" (default), "hv" (align in both directions), "h", and "v".

    - - -
    ...
    -

    other options pass to plot_grid function of cowplot package.

    - -
    -
    -

    Value

    - - -

    a ggplot object

    -
    - -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/show_cn_freq_circos.html b/docs/reference/show_cn_freq_circos.html deleted file mode 100644 index 90bc7ccc..00000000 --- a/docs/reference/show_cn_freq_circos.html +++ /dev/null @@ -1,182 +0,0 @@ - -Show Copy Number Variation Frequency Profile with Circos — show_cn_freq_circos • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Show Copy Number Variation Frequency Profile with Circos

    -
    - -
    -
    show_cn_freq_circos(
    -  data,
    -  groups = NULL,
    -  cutoff = 2L,
    -  resolution_factor = 1L,
    -  title = c("AMP", "DEL"),
    -  chrs = paste0("chr", 1:22),
    -  genome_build = c("hg19", "hg38", "T2T", "mm10", "mm9", "ce11"),
    -  cols = NULL,
    -  plot_ideogram = TRUE,
    -  track_height = 0.5,
    -  ideogram_height = 1,
    -  ...
    -)
    -
    - -
    -

    Arguments

    -
    data
    -

    a CopyNumber object or a data.frame containing -at least 'chromosome', 'start', 'end', 'segVal', 'sample' these columns.

    - - -
    groups
    -

    a named list or a column name for specifying groups.

    - - -
    cutoff
    -

    copy number value cutoff for splitting data into AMP and DEL. -The values equal to cutoff are discarded. Default is 2, you can also set -a length-2 vector, e.g. c(2, 2).

    - - -
    resolution_factor
    -

    an integer to control the resolution. -When it is 1 (default), compute frequency in each cytoband. -When it is 2, use compute frequency in each half cytoband.

    - - -
    title
    -

    length-2 titles for AMP and DEL.

    - - -
    chrs
    -

    chromosomes start with 'chr'.

    - - -
    genome_build
    -

    genome build version, used when data is a data.frame, should be 'hg19' or 'hg38'.

    - - -
    cols
    -

    length-2 colors for AMP and DEL.

    - - -
    plot_ideogram
    -

    default is TRUE, show ideogram.

    - - -
    track_height
    -

    track height in mm unit.

    - - -
    ideogram_height
    -

    ideogram height in mm unit.

    - - -
    ...
    -

    other parameters passing to circlize::circos.genomicLines.

    - -
    -
    -

    Value

    - - -

    Nothing.

    -
    - -
    -

    Examples

    -
    # \donttest{
    -load(system.file("extdata", "toy_copynumber.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -
    -show_cn_freq_circos(cn)
    -ss <- unique(cn@data$sample)
    -show_cn_freq_circos(cn, groups = list(a = ss[1:5], b = ss[6:10]), cols = c("red", "green"))
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/show_cn_group_profile.html b/docs/reference/show_cn_group_profile.html deleted file mode 100644 index 7fbf58d6..00000000 --- a/docs/reference/show_cn_group_profile.html +++ /dev/null @@ -1,215 +0,0 @@ - -Show Summary Copy Number Profile for Sample Groups — show_cn_group_profile • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Show Summary Copy Number Profile for Sample Groups

    -
    - -
    -
    show_cn_group_profile(
    -  data,
    -  groups = NULL,
    -  fill_area = TRUE,
    -  cols = NULL,
    -  chrs = paste0("chr", c(1:22, "X")),
    -  genome_build = c("hg19", "hg38", "T2T", "mm10", "mm9", "ce11"),
    -  cutoff = 2L,
    -  resolution_factor = 1L,
    -  force_y_limit = TRUE,
    -  highlight_genes = NULL,
    -  repel = FALSE,
    -  nrow = NULL,
    -  ncol = NULL,
    -  return_plotlist = FALSE
    -)
    -
    - -
    -

    Arguments

    -
    data
    -

    a CopyNumber object or a data.frame containing -at least 'chromosome', 'start', 'end', 'segVal', 'sample' these columns.

    - - -
    groups
    -

    a named list or a column name for specifying groups.

    - - -
    fill_area
    -

    default is TRUE, fill area with colors.

    - - -
    cols
    -

    length-2 colors for AMP and DEL.

    - - -
    chrs
    -

    chromosomes start with 'chr'.

    - - -
    genome_build
    -

    genome build version, used when data is a data.frame, should be 'hg19' or 'hg38'.

    - - -
    cutoff
    -

    copy number value cutoff for splitting data into AMP and DEL. -The values equal to cutoff are discarded. Default is 2, you can also set -a length-2 vector, e.g. c(2, 2).

    - - -
    resolution_factor
    -

    an integer to control the resolution. -When it is 1 (default), compute frequency in each cytoband. -When it is 2, use compute frequency in each half cytoband.

    - - -
    force_y_limit
    -

    default is TRUE, force multiple plots

    - - -
    highlight_genes
    -

    gene list to highlight. -have same y ranges. You can also set a length-2 numeric value.

    - - -
    repel
    -

    if TRUE (default is FALSE), repel highlight genes to -avoid overlap.

    - - -
    nrow
    -

    number of rows in the plot grid when multiple samples are selected.

    - - -
    ncol
    -

    number of columns in the plot grid when multiple samples are selected.

    - - -
    return_plotlist
    -

    default is FALSE, if TRUE, return a plot list instead of a combined plot.

    - -
    -
    -

    Value

    - - -

    a (list of) ggplot object.

    -
    - -
    -

    Examples

    -
    load(system.file("extdata", "toy_copynumber.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -
    -p1 <- show_cn_group_profile(cn)
    -p1
    -# \donttest{
    -ss <- unique(cn@data$sample)
    -p2 <- show_cn_group_profile(cn, groups = list(a = ss[1:5], b = ss[6:10]))
    -p2
    -p3 <- show_cn_group_profile(cn,
    -  groups = list(g1 = ss[1:5], g2 = ss[6:10]),
    -  force_y_limit = c(-1, 1), nrow = 2
    -)
    -p3
    -
    -## Set custom cutoff for custom data
    -data <- cn@data
    -data$segVal <- data$segVal - 2L
    -p4 <- show_cn_group_profile(data,
    -  groups = list(g1 = ss[1:5], g2 = ss[6:10]),
    -  force_y_limit = c(-1, 1), nrow = 2,
    -  cutoff = c(0, 0)
    -)
    -p4
    -
    -## Add highlight gene
    -p5 <- show_cn_group_profile(cn, highlight_genes = c("TP53", "EGFR"))
    -p5
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/show_cn_profile.html b/docs/reference/show_cn_profile.html deleted file mode 100644 index add4c3e0..00000000 --- a/docs/reference/show_cn_profile.html +++ /dev/null @@ -1,192 +0,0 @@ - -Show Sample Copy Number Profile — show_cn_profile • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Sometimes it is very useful to check details about copy number profile for one or multiple -samples. This function is designed to do this job and can be further modified by ggplot2 -related packages.

    -
    - -
    -
    show_cn_profile(
    -  data,
    -  samples = NULL,
    -  show_n = NULL,
    -  show_title = FALSE,
    -  show_labels = NULL,
    -  chrs = paste0("chr", 1:22),
    -  position = NULL,
    -  genome_build = c("hg19", "hg38", "T2T", "mm10", "mm9", "ce11"),
    -  ylim = NULL,
    -  nrow = NULL,
    -  ncol = NULL,
    -  return_plotlist = FALSE
    -)
    -
    - -
    -

    Arguments

    -
    data
    -

    a CopyNumber object or a data.frame containing at least 'chromosome', 'start', -'end', 'segVal' these columns.

    - - -
    samples
    -

    default is NULL, can be a chracter vector representing multiple samples. If data argument -is a data.frame, a column called sample must exist.

    - - -
    show_n
    -

    number of samples to show, this is used for checking.

    - - -
    show_title
    -

    if TRUE, show title for multiple samples.

    - - -
    show_labels
    -

    one of NULL, "s" (for labelling short segments < 1e7) -or "a" (all segments).

    - - -
    chrs
    -

    chromosomes start with 'chr'.

    - - -
    position
    -

    a position range, e.g. "chr1:3218923-116319008". Only data -overlaps with this range will be shown.

    - - -
    genome_build
    -

    genome build version, used when data is a data.frame, should be 'hg19' or 'hg38'.

    - - -
    ylim
    -

    limites for y axis.

    - - -
    nrow
    -

    number of rows in the plot grid when multiple samples are selected.

    - - -
    ncol
    -

    number of columns in the plot grid when multiple samples are selected.

    - - -
    return_plotlist
    -

    default is FALSE, if TRUE, return a plot list instead of a combined plot.

    - -
    -
    -

    Value

    - - -

    a ggplot object or a list

    - - -
    - -
    -

    Examples

    -
    # Load copy number object
    -load(system.file("extdata", "toy_copynumber.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -
    -p <- show_cn_profile(cn, nrow = 2, ncol = 1)
    -p
    -# \donttest{
    -p2 <- show_cn_profile(cn,
    -  nrow = 2, ncol = 1,
    -  position = "chr1:3218923-116319008"
    -)
    -p2
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/show_cor.html b/docs/reference/show_cor.html deleted file mode 100644 index 86cbd904..00000000 --- a/docs/reference/show_cor.html +++ /dev/null @@ -1,187 +0,0 @@ - -A Simple and General Way for Association Analysis — show_cor • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    All variables must be continuous. -The matrix will be returned as an element of ggplot object. -This is basically a wrapper of R package -ggcorrplot.

    -
    - -
    -
    show_cor(
    -  data,
    -  x_vars = colnames(data),
    -  y_vars = x_vars,
    -  cor_method = "spearman",
    -  vis_method = "square",
    -  lab = TRUE,
    -  test = TRUE,
    -  hc_order = FALSE,
    -  p_adj = NULL,
    -  ...
    -)
    -
    - -
    -

    Arguments

    -
    data
    -

    a data.frame.

    - - -
    x_vars
    -

    variables/column names shown in x axis.

    - - -
    y_vars
    -

    variables/column names shown in y axis.

    - - -
    cor_method
    -

    method for correlation, default is 'spearman'.

    - - -
    vis_method
    -

    visualization method, default is 'square', -can also be 'circle'.

    - - -
    lab
    -

    logical value. If TRUE, add correlation coefficient on the plot.

    - - -
    test
    -

    if TRUE, run test for correlation and mark significance.

    - - -
    hc_order
    -

    logical value. If TRUE, -correlation matrix will be hc.ordered using hclust function.

    - - -
    p_adj
    -

    p adjust method, see stats::p.adjust for details.

    - - -
    ...
    -

    other parameters passing to ggcorrplot::ggcorrplot().

    - -
    -
    -

    Value

    - - -

    a ggplot object

    -
    -
    -

    See also

    -

    show_sig_feature_corrplot for specific and more powerful -association analysis and visualization.

    -
    - -
    -

    Examples

    -
    data("mtcars")
    -p1 <- show_cor(mtcars)
    -p2 <- show_cor(mtcars,
    -  x_vars = colnames(mtcars)[1:4],
    -  y_vars = colnames(mtcars)[5:8]
    -)
    -p3 <- show_cor(mtcars, vis_method = "circle", p_adj = "fdr")
    -p1
    -p1$cor
    -p2
    -p3
    -
    -## Auto detect problem variables
    -mtcars$xx <- 0L
    -p4 <- show_cor(mtcars)
    -p4
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/show_cosmic.html b/docs/reference/show_cosmic.html deleted file mode 100644 index 434f8e45..00000000 --- a/docs/reference/show_cosmic.html +++ /dev/null @@ -1,125 +0,0 @@ - -Show Signature Information in Web Browser — show_cosmic • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Show Signature Information in Web Browser

    -
    - -
    -
    show_cosmic(x = "home")
    -
    - -
    -

    Arguments

    -
    x
    -

    a string indicating location -("home" for COSMIC signature home, "legacy" for COSMIC v2 signatures, -"SBS" for COSMIC v3 SBS signatures, "DBS" for COSMIC v3 DBS signatures, -"ID" for COSMIC v3 INDEL signatures) or signature index (e.g. -"SBS1", "DBS2", "ID3").

    - -
    -
    -

    Value

    - - -

    Nothing.

    -
    - -
    -

    Examples

    -
    if (FALSE) {
    -show_cosmic()
    -show_cosmic("legacy")
    -show_cosmic("SBS")
    -show_cosmic("DBS")
    -show_cosmic("ID")
    -show_cosmic("SBS1")
    -show_cosmic("DBS2")
    -show_cosmic("ID3")
    -}
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/show_cosmic_sig_profile.html b/docs/reference/show_cosmic_sig_profile.html deleted file mode 100644 index b5264c90..00000000 --- a/docs/reference/show_cosmic_sig_profile.html +++ /dev/null @@ -1,159 +0,0 @@ - -Plot Reference (Mainly COSMIC) Signature Profile — show_cosmic_sig_profile • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Plot Reference (Mainly COSMIC) Signature Profile

    -
    - -
    -
    show_cosmic_sig_profile(
    -  sig_index = NULL,
    -  show_index = TRUE,
    -  sig_db = "legacy",
    -  ...
    -)
    -
    - -
    -

    Arguments

    -
    sig_index
    -

    a vector for signature index. "ALL" for all signatures.

    - - -
    show_index
    -

    if TRUE, show valid indices.

    - - -
    sig_db
    -

    default 'legacy', it can be 'legacy' (for COSMIC v2 'SBS'), -'SBS', 'DBS', 'ID' and 'TSB' (for COSMIV v3.1 signatures) -for small scale mutations. -For more specific details, it can also be 'SBS_hg19', 'SBS_hg38', -'SBS_mm9', 'SBS_mm10', 'DBS_hg19', 'DBS_hg38', 'DBS_mm9', 'DBS_mm10' to use -COSMIC v3 reference signatures from Alexandrov, Ludmil B., et al. (2020) (reference #1). -In addition, it can be one of "SBS_Nik_lab_Organ", "RS_Nik_lab_Organ", -"SBS_Nik_lab", "RS_Nik_lab" to refer reference signatures from -Degasperi, Andrea, et al. (2020) (reference #2); -"RS_BRCA560", "RS_USARC" to reference signatures from BRCA560 and USARC cohorts; -"CNS_USARC" (40 categories), "CNS_TCGA" (48 categories) to reference copy number signatures from USARC cohort and TCGA; -"CNS_TCGA176" (176 categories) and "CNS_PCAWG176" (176 categories) to reference copy number signatures from PCAWG and TCGA separately. -UPDATE, the latest version of reference version can be automatically -downloaded and loaded from https://cancer.sanger.ac.uk/signatures/downloads/ -when a option with latest_ prefix is specified (e.g. "latest_SBS_GRCh37"). -Note: the signature profile for different genome builds are basically same. -And specific database (e.g. 'SBS_mm10') contains less signatures than all COSMIC -signatures (because some signatures are not detected from Alexandrov, Ludmil B., et al. (2020)). -For all available options, check the parameter setting.

    - - -
    ...
    -

    other arguments passing to show_sig_profile.

    - -
    -
    -

    Value

    - - -

    a ggplot object

    -
    -
    -

    Author

    -

    Shixiang Wang w_shixiang@163.com

    -
    - -
    -

    Examples

    -
    # \donttest{
    -show_cosmic_sig_profile()
    -show_cosmic_sig_profile(sig_db = "SBS")
    -show_cosmic_sig_profile(sig_index = 1:5)
    -show_cosmic_sig_profile(sig_db = "SBS", sig_index = c("10a", "17a"))
    -
    -gg <- show_cosmic_sig_profile(sig_index = 1:5)
    -gg$aetiology
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/show_group_comparison.html b/docs/reference/show_group_comparison.html deleted file mode 100644 index 95ea75db..00000000 --- a/docs/reference/show_group_comparison.html +++ /dev/null @@ -1,246 +0,0 @@ - -Plot Group Comparison Result — show_group_comparison • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Using result data from get_group_comparison, this function plots -genotypes/phenotypes comparison between signature groups using ggplot2 package and return -a list of ggplot object contains individual and combined plots. The combined -plot is easily saved to local using cowplot::save_plot(). Of note, default fisher -test p values are shown for categorical data and fdr values are shown for -continuous data.

    -
    - -
    -
    show_group_comparison(
    -  group_comparison,
    -  xlab = "group",
    -  ylab_co = NA,
    -  legend_title_ca = NA,
    -  legend_position_ca = "bottom",
    -  set_ca_sig_yaxis = FALSE,
    -  set_ca_custom_xlab = FALSE,
    -  show_pvalue = TRUE,
    -  ca_p_threshold = 0.01,
    -  method = "wilcox.test",
    -  p.adjust.method = "fdr",
    -  base_size = 12,
    -  font_size_x = 12,
    -  text_angle_x = 30,
    -  text_hjust_x = 0.2,
    -  ...
    -)
    -
    - -
    -

    Arguments

    -
    group_comparison
    -

    a list from result of get_group_comparison function.

    - - -
    xlab
    -

    lab name of x axis for all plots. if it is NA, remove title for x axis.

    - - -
    ylab_co
    -

    lab name of y axis for plots of continuous type data. Of note, -this argument should be a character vector has same length as group_comparison, -the location for categorical type data should mark with NA.

    - - -
    legend_title_ca
    -

    legend title for plots of categorical type data.

    - - -
    legend_position_ca
    -

    legend position for plots of categorical type data. -Of note, -this argument should be a character vector has same length as group_comparison, -the location for continuous type data should mark with NA.

    - - -
    set_ca_sig_yaxis
    -

    if TRUE, use y axis to show signature proportion instead of -variable proportion.

    - - -
    set_ca_custom_xlab
    -

    only works when set_ca_sig_yaxis is TRUE. If -TRUE, set x labels using input xlab, otherwise variable names will be used.

    - - -
    show_pvalue
    -

    if TRUE, show p values.

    - - -
    ca_p_threshold
    -

    a p threshold for categorical variables, default is 0.01. -A p value less than 0.01 will be shown as P < 0.01.

    - - -
    method
    -

    a character string indicating which method to be used for comparing means. -It can be 't.test', 'wilcox.test' etc..

    - - -
    p.adjust.method
    -

    correction method, default is 'fdr'. Run p.adjust.methods to -see all available options.

    - - -
    base_size
    -

    overall font size.

    - - -
    font_size_x
    -

    font size for x.

    - - -
    text_angle_x
    -

    text angle for x.

    - - -
    text_hjust_x
    -

    adjust x axis text

    - - -
    ...
    -

    other paramters pass to ggpubr::compare_means() or ggpubr::stat_compare_means() -according to the specified method.

    - -
    -
    -

    Value

    - - -

    a list of ggplot objects.

    -
    -
    -

    Author

    -

    Shixiang Wang w_shixiang@163.com

    -
    - -
    -

    Examples

    -
    # \donttest{
    -load(system.file("extdata", "toy_copynumber_signature_by_W.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -
    -# Assign samples to clusters
    -groups <- get_groups(sig, method = "k-means")
    -
    -set.seed(1234)
    -
    -groups$prob <- rnorm(10)
    -groups$new_group <- sample(c("1", "2", "3", "4", NA), size = nrow(groups), replace = TRUE)
    -
    -# Compare groups (filter NAs for categorical coloumns)
    -groups.cmp <- get_group_comparison(groups[, -1],
    -  col_group = "group",
    -  cols_to_compare = c("prob", "new_group"),
    -  type = c("co", "ca"), verbose = TRUE
    -)
    -
    -# Compare groups (Set NAs of categorical columns to 'Rest')
    -groups.cmp2 <- get_group_comparison(groups[, -1],
    -  col_group = "group",
    -  cols_to_compare = c("prob", "new_group"),
    -  type = c("co", "ca"), NAs = "Rest", verbose = TRUE
    -)
    -
    -show_group_comparison(groups.cmp)
    -
    -ggcomp <- show_group_comparison(groups.cmp2)
    -ggcomp$co_comb
    -ggcomp$ca_comb
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/show_group_distribution.html b/docs/reference/show_group_distribution.html deleted file mode 100644 index b4e1cb20..00000000 --- a/docs/reference/show_group_distribution.html +++ /dev/null @@ -1,222 +0,0 @@ - -Show Groupped Variable Distribution — show_group_distribution • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    This is a general function, it can be used in any proper analysis.

    -
    - -
    -
    show_group_distribution(
    -  data,
    -  gvar,
    -  dvar,
    -  fun = stats::median,
    -  order_by_fun = FALSE,
    -  alpha = 0.8,
    -  g_label = "label",
    -  g_angle = 0,
    -  g_position = "top",
    -  point_size = 1L,
    -  segment_size = 1L,
    -  segment_color = "red",
    -  xlab = NULL,
    -  ylab = NULL,
    -  nrow = 1L,
    -  background_color = c("#DCDCDC", "#F5F5F5")
    -)
    -
    - -
    -

    Arguments

    -
    data
    -

    a data.frame.

    - - -
    gvar
    -

    a group variable name/index.

    - - -
    dvar
    -

    a distribution variable name/index.

    - - -
    fun
    -

    a function to summarize, default is stats::median, can also be mean.

    - - -
    order_by_fun
    -

    if TRUE, reorder the groups by summary measure computed -by argument fun.

    - - -
    alpha
    -

    alpha for points, range from 0 to 1.

    - - -
    g_label
    -

    a string 'label' (default) for labeling with sample size, -or 'norm' to show just group name, or a named vector to set facet labels.

    - - -
    g_angle
    -

    angle for facet labels, default is 0.

    - - -
    g_position
    -

    position for facet labels, default is 'top', can also -be 'bottom'.

    - - -
    point_size
    -

    size of point.

    - - -
    segment_size
    -

    size of segment.

    - - -
    segment_color
    -

    color of segment.

    - - -
    xlab
    -

    title for x axis.

    - - -
    ylab
    -

    title for y axis.

    - - -
    nrow
    -

    number of row.

    - - -
    background_color
    -

    background color for plot panel.

    - -
    -
    -

    Value

    - - -

    a ggplot object.

    -
    -
    -

    Author

    -

    Shixiang Wang w_shixiang@163.com

    -
    - -
    -

    Examples

    -
    # \donttest{
    -set.seed(1234)
    -data <- data.frame(
    -  yval = rnorm(120),
    -  gr = c(rep("A", 50), rep("B", 40), rep("C", 30))
    -)
    -p <- show_group_distribution(data,
    -  gvar = 2, dvar = 1,
    -  g_label = "norm",
    -  background_color = "grey"
    -)
    -p
    -p2 <- show_group_distribution(data,
    -  gvar = "gr", dvar = "yval",
    -  g_position = "bottom",
    -  order_by_fun = TRUE,
    -  alpha = 0.3
    -)
    -p2
    -
    -# Set custom group names
    -p3 <- show_group_distribution(data,
    -  gvar = 2, dvar = 1,
    -  g_label = c("A" = "X", "B" = "Y", "C" = "Z")
    -)
    -p3
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/show_group_enrichment.html b/docs/reference/show_group_enrichment.html deleted file mode 100644 index 52081d56..00000000 --- a/docs/reference/show_group_enrichment.html +++ /dev/null @@ -1,178 +0,0 @@ - -Show Group Enrichment Result — show_group_enrichment • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    See group_enrichment for examples. -NOTE the box fill and the box text have different meanings.

    -
    - -
    -
    show_group_enrichment(
    -  df_enrich,
    -  return_list = FALSE,
    -  scales = "free",
    -  add_text_annotation = TRUE,
    -  fill_by_p_value = TRUE,
    -  use_fdr = TRUE,
    -  cut_p_value = FALSE,
    -  cut_breaks = c(-Inf, -5, log10(0.05), -log10(0.05), 5, Inf),
    -  cut_labels = c("↓ 1e-5", "↓ 0.05", "non-significant", "↑ 0.05", "↑ 1e-5"),
    -  fill_scale = scale_fill_gradient2(low = "#08A76B", mid = "white", high = "red",
    -    midpoint = ifelse(fill_by_p_value, 0, 1)),
    -  cluster_row = FALSE,
    -  ...
    -)
    -
    - -
    -

    Arguments

    -
    df_enrich
    -

    result data.frame from group_enrichment.

    - - -
    return_list
    -

    if TRUE, return a list of ggplot object so user -can combine multiple plots by other R packages like patchwork.

    - - -
    scales
    -

    Should scales be fixed ("fixed", the default), -free ("free"), or free in one dimension ("free_x", -"free_y")?

    - - -
    add_text_annotation
    -

    if TRUE, add text annotation in box. -When show p value with filled color, the text indicates relative change; -when show relative change with filled color, the text indicates p value.

    - - -
    fill_by_p_value
    -

    if TRUE, show log10 based p values with filled color. -The +/- of p values indicates change direction. -If p vlaues is mapped to fill, then text shows effect size, and vice versa.

    - - -
    use_fdr
    -

    if TRUE, show FDR values instead of raw p-values.

    - - -
    cut_p_value
    -

    if TRUE, cut p values into 5 regions for better visualization. -Only works when fill_by_p_value = TRUE.

    - - -
    cut_breaks
    -

    when cut_p_value is TRUE, this option set the (log10 based) breaks.

    - - -
    cut_labels
    -

    when cut_p_value is TRUE, this option set the labels.

    - - -
    fill_scale
    -

    a Scale object generated by ggplot2 package to -set color for continuous values.

    - - -
    cluster_row
    -

    if TRUE, cluster rows with Hierarchical Clustering ('complete' method).

    - - -
    ...
    -

    other parameters passing to ggplot2::facet_wrap, only used -when return_list is FALSE.

    - -
    -
    -

    Value

    - - -

    a (list of) ggplot object.

    -
    - -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/show_group_mapping.html b/docs/reference/show_group_mapping.html deleted file mode 100644 index 9a815141..00000000 --- a/docs/reference/show_group_mapping.html +++ /dev/null @@ -1,170 +0,0 @@ - -Map Groups using Sankey — show_group_mapping • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    This feature is designed for signature analysis. However, users can also use -it in other similar situations.

    -
    - -
    -
    show_group_mapping(
    -  data,
    -  col_to_flow,
    -  cols_to_map,
    -  include_sig = FALSE,
    -  fill_na = FALSE,
    -  title = NULL,
    -  xlab = NULL,
    -  ylab = NULL,
    -  custom_theme = cowplot::theme_minimal_hgrid()
    -)
    -
    - -
    -

    Arguments

    -
    data
    -

    a data.frame containing signature group and other categorical groups.

    - - -
    col_to_flow
    -

    length-1 character showing the column to flow, typically a signature group.

    - - -
    cols_to_map
    -

    character vector showing colnames of other groups.

    - - -
    include_sig
    -

    default if FALSE, if TRUE, showing signature group.

    - - -
    fill_na
    -

    length-1 string to fill NA, default is FALSE.

    - - -
    title
    -

    the title.

    - - -
    xlab
    -

    label for x axis.

    - - -
    ylab
    -

    label for y axis.

    - - -
    custom_theme
    -

    theme for plotting, default is cowplot::theme_minimal_hgrid().

    - -
    -
    -

    Value

    - - -

    a ggplot object

    -
    - -
    -

    Examples

    -
    # \donttest{
    -data <- dplyr::tibble(
    -  Group1 = rep(LETTERS[1:5], each = 10),
    -  Group2 = rep(LETTERS[6:15], each = 5),
    -  zzzz = c(rep("xx", 20), rep("yy", 20), rep(NA, 10))
    -)
    -p1 <- show_group_mapping(data, col_to_flow = "Group1", cols_to_map = colnames(data)[-1])
    -p1
    -
    -p2 <- show_group_mapping(data,
    -  col_to_flow = "Group1", cols_to_map = colnames(data)[-1],
    -  include_sig = TRUE
    -)
    -p2
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/show_groups.html b/docs/reference/show_groups.html deleted file mode 100644 index bab0885b..00000000 --- a/docs/reference/show_groups.html +++ /dev/null @@ -1,116 +0,0 @@ - -Show Signature Contribution in Clusters — show_groups • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    See example section in sig_fit() for an examples.

    -
    - -
    -
    show_groups(grp_dt, ...)
    -
    - -
    -

    Arguments

    -
    grp_dt
    -

    a result data.table from get_groups.

    - - -
    ...
    -

    parameters passing to legend(), e.g. x = "topleft".

    - -
    -
    -

    Value

    - - -

    nothing.

    -
    -
    -

    See also

    - -
    - -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/show_sig_bootstrap.html b/docs/reference/show_sig_bootstrap.html deleted file mode 100644 index 255d0f68..00000000 --- a/docs/reference/show_sig_bootstrap.html +++ /dev/null @@ -1,338 +0,0 @@ - -Show Signature Bootstrap Analysis Results — show_sig_bootstrap • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    See details for description.

    -
    - -
    -
    show_sig_bootstrap_exposure(
    -  bt_result,
    -  sample = NULL,
    -  signatures = NULL,
    -  methods = "QP",
    -  plot_fun = c("boxplot", "violin"),
    -  agg_fun = c("mean", "median", "min", "max"),
    -  highlight = "auto",
    -  highlight_size = 4,
    -  palette = "aaas",
    -  title = NULL,
    -  xlab = FALSE,
    -  ylab = "Signature exposure",
    -  width = 0.3,
    -  dodge_width = 0.8,
    -  outlier.shape = NA,
    -  add = "jitter",
    -  add.params = list(alpha = 0.3),
    -  ...
    -)
    -
    -show_sig_bootstrap_error(
    -  bt_result,
    -  sample = NULL,
    -  methods = "QP",
    -  plot_fun = c("boxplot", "violin"),
    -  agg_fun = c("mean", "median"),
    -  highlight = "auto",
    -  highlight_size = 4,
    -  palette = "aaas",
    -  title = NULL,
    -  xlab = FALSE,
    -  ylab = "Reconstruction error (L2 norm)",
    -  width = 0.3,
    -  dodge_width = 0.8,
    -  outlier.shape = NA,
    -  add = "jitter",
    -  add.params = list(alpha = 0.3),
    -  legend = "none",
    -  ...
    -)
    -
    -show_sig_bootstrap_stability(
    -  bt_result,
    -  signatures = NULL,
    -  measure = c("RMSE", "CV", "MAE", "AbsDiff"),
    -  methods = "QP",
    -  plot_fun = c("boxplot", "violin"),
    -  palette = "aaas",
    -  title = NULL,
    -  xlab = FALSE,
    -  ylab = "Signature instability",
    -  width = 0.3,
    -  outlier.shape = NA,
    -  add = "jitter",
    -  add.params = list(alpha = 0.3),
    -  ...
    -)
    -
    - -
    -

    Arguments

    -
    bt_result
    -

    result object from sig_fit_bootstrap_batch.

    - - -
    sample
    -

    a sample id.

    - - -
    signatures
    -

    signatures to show.

    - - -
    methods
    -

    a subset of c("NNLS", "QP", "SA").

    - - -
    plot_fun
    -

    set the plot function.

    - - -
    agg_fun
    -

    set the aggregation function when sample is NULL.

    - - -
    highlight
    -

    set the color for optimal solution. Default is "auto", which use the same color as -bootstrap results, you can set it to color like "red", "gold", etc.

    - - -
    highlight_size
    -

    size for highlighting triangle, default is 4.

    - - -
    palette
    -

    the color palette to be used for coloring or filling by groups. -Allowed values include "grey" for grey color palettes; brewer palettes e.g. -"RdBu", "Blues", ...; or custom color palette e.g. c("blue", "red"); and -scientific journal palettes from ggsci R package, e.g.: "npg", "aaas", -"lancet", "jco", "ucscgb", "uchicago", "simpsons" and "rickandmorty".

    - - -
    title
    -

    plot main title.

    - - -
    xlab
    -

    character vector specifying x axis labels. Use xlab = FALSE to -hide xlab.

    - - -
    ylab
    -

    character vector specifying y axis labels. Use ylab = FALSE to -hide ylab.

    - - -
    width
    -

    numeric value between 0 and 1 specifying box width.

    - - -
    dodge_width
    -

    dodge width.

    - - -
    outlier.shape
    -

    point shape of outlier. Default is 19. To hide outlier, -specify outlier.shape = NA. When jitter is added, then outliers will -be automatically hidden.

    - - -
    add
    -

    character vector for adding another plot element (e.g.: dot plot or -error bars). Allowed values are one or the combination of: "none", -"dotplot", "jitter", "boxplot", "point", "mean", "mean_se", "mean_sd", -"mean_ci", "mean_range", "median", "median_iqr", "median_hilow", -"median_q1q3", "median_mad", "median_range"; see ?desc_statby for more -details.

    - - -
    add.params
    -

    parameters (color, shape, size, fill, linetype) for the -argument 'add'; e.g.: add.params = list(color = "red").

    - - -
    ...
    -

    other parameters passing to ggpubr::ggboxplot or ggpubr::ggviolin.

    - - -
    legend
    -

    character specifying legend position. Allowed values are one of -c("top", "bottom", "left", "right", "none"). To remove the legend use -legend = "none". Legend position can be also specified using a numeric -vector c(x, y); see details section.

    - - -
    measure
    -

    measure to estimate the exposure instability, can be one of 'RMSE', 'CV', 'MAE' and 'AbsDiff'.

    - -
    -
    -

    Value

    - - -

    a ggplot object

    -
    -
    -

    Details

    -

    Functions:

    • show_sig_bootstrap_exposure - this function plots exposures from bootstrap samples with both dotted boxplot. -The optimal exposure (the exposure from original input) is shown as triangle point. Only one sample can be plotted.

    • -
    • show_sig_bootstrap_error - this function plots decomposition errors from bootstrap samples with both dotted boxplot. -The error from optimal solution (the decomposition error from original input) is shown as triangle point. Only one sample can be plotted.

    • -
    • show_sig_bootstrap_stability - this function plots the signature exposure instability for specified signatures. Currently, -the instability measure supports 3 types:

      • 'RMSE' for Mean Root Squared Error (default) of bootstrap exposures and original exposures for each sample.

      • -
      • 'CV' for Coefficient of Variation (CV) based on RMSE (i.e. RMSE / btExposure_mean).

      • -
      • 'MAE' for Mean Absolute Error of bootstrap exposures and original exposures for each sample.

      • -
      • 'AbsDiff' for Absolute Difference between mean bootstram exposure and original exposure.

      • -
    • -
    -
    -

    References

    -

    Huang X, Wojtowicz D, Przytycka TM. Detecting presence of mutational signatures in cancer with confidence. Bioinformatics. 2018;34(2):330–337. doi:10.1093/bioinformatics/btx604

    -
    - - -
    -

    Examples

    -
    # \donttest{
    -if (require("BSgenome.Hsapiens.UCSC.hg19")) {
    -  laml.maf <- system.file("extdata", "tcga_laml.maf.gz", package = "maftools")
    -  laml <- read_maf(maf = laml.maf)
    -  mt_tally <- sig_tally(
    -    laml,
    -    ref_genome = "BSgenome.Hsapiens.UCSC.hg19",
    -    use_syn = TRUE
    -  )
    -
    -  library(NMF)
    -  mt_sig <- sig_extract(mt_tally$nmf_matrix,
    -    n_sig = 3,
    -    nrun = 2,
    -    cores = 1
    -  )
    -
    -  mat <- t(mt_tally$nmf_matrix)
    -  mat <- mat[, colSums(mat) > 0]
    -  bt_result <- sig_fit_bootstrap_batch(mat, sig = mt_sig, n = 10)
    -  ## Parallel computation
    -  ## bt_result = sig_fit_bootstrap_batch(mat, sig = mt_sig, n = 10, use_parallel = TRUE)
    -
    -  ## At default, mean bootstrap exposure for each sample has been calculated
    -  p <- show_sig_bootstrap_exposure(bt_result, methods = c("QP"))
    -  ## Show bootstrap exposure (optimal exposure is shown as triangle)
    -  p1 <- show_sig_bootstrap_exposure(bt_result, methods = c("QP"), sample = "TCGA-AB-2802")
    -  p1
    -  p2 <- show_sig_bootstrap_exposure(bt_result,
    -    methods = c("QP"),
    -    sample = "TCGA-AB-3012",
    -    signatures = c("Sig1", "Sig2")
    -  )
    -  p2
    -
    -  ## Show bootstrap error
    -  ## Similar to exposure above
    -  p <- show_sig_bootstrap_error(bt_result, methods = c("QP"))
    -  p
    -  p3 <- show_sig_bootstrap_error(bt_result, methods = c("QP"), sample = "TCGA-AB-2802")
    -  p3
    -
    -  ## Show exposure (in)stability
    -  p4 <- show_sig_bootstrap_stability(bt_result, methods = c("QP"))
    -  p4
    -  p5 <- show_sig_bootstrap_stability(bt_result, methods = c("QP"), measure = "MAE")
    -  p5
    -  p6 <- show_sig_bootstrap_stability(bt_result, methods = c("QP"), measure = "AbsDiff")
    -  p6
    -  p7 <- show_sig_bootstrap_stability(bt_result, methods = c("QP"), measure = "CV")
    -  p7
    -} else {
    -  message("Please install package 'BSgenome.Hsapiens.UCSC.hg19' firstly!")
    -}
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/show_sig_consensusmap.html b/docs/reference/show_sig_consensusmap.html deleted file mode 100644 index 628aa958..00000000 --- a/docs/reference/show_sig_consensusmap.html +++ /dev/null @@ -1,147 +0,0 @@ - -Show Signature Consensus Map — show_sig_consensusmap • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    This function is a wrapper of NMF::consensusmap().

    -
    - -
    -
    show_sig_consensusmap(
    -  sig,
    -  main = "Consensus matrix",
    -  tracks = c("consensus:", "silhouette:"),
    -  lab_row = NA,
    -  lab_col = NA,
    -  ...
    -)
    -
    - -
    -

    Arguments

    -
    sig
    -

    a Signature object obtained from sig_extract.

    - - -
    main
    -

    Main title as a character string or a grob.

    - - -
    tracks
    -

    Special additional annotation tracks to - highlight associations between basis components and - sample clusters:

    basis
    -

    matches each row - (resp. column) to the most contributing basis component - in basismap (resp. coefmap). In - basismap (resp. coefmap), adding a track - ':basis' to annCol (resp. annRow) - makes the column (resp. row) corresponding to the - component being also highlited using the mathcing - colours.

    - -
    - - -
    lab_row
    -

    labels for the rows.

    - - -
    lab_col
    -

    labels for the columns.

    - - -
    ...
    -

    other parameters passing to NMF::consensusmap().

    - -
    -
    -

    Value

    - - -

    nothing

    -
    - -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/show_sig_exposure.html b/docs/reference/show_sig_exposure.html deleted file mode 100644 index 9d244ec9..00000000 --- a/docs/reference/show_sig_exposure.html +++ /dev/null @@ -1,217 +0,0 @@ - -Plot Signature Exposure — show_sig_exposure • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Currently support copy number signatures and mutational signatures.

    -
    - -
    -
    show_sig_exposure(
    -  Signature,
    -  sig_names = NULL,
    -  groups = NULL,
    -  grp_order = NULL,
    -  grp_size = NULL,
    -  samps = NULL,
    -  cutoff = NULL,
    -  style = c("default", "cosmic"),
    -  palette = use_color_style(style),
    -  base_size = 12,
    -  font_scale = 1,
    -  rm_space = FALSE,
    -  rm_grid_line = TRUE,
    -  rm_panel_border = FALSE,
    -  hide_samps = TRUE,
    -  legend_position = "top"
    -)
    -
    - -
    -

    Arguments

    -
    Signature
    -

    a Signature object obtained either from sig_extract or sig_auto_extract, -or just a raw absolute exposure matrix with column representing samples (patients) and row -representing signatures (signature names must end with different digital numbers, -e.g. Sig1, Sig10, x12). If you named signatures with letters, -you can specify them by sig_names parameter.

    - - -
    sig_names
    -

    set name of signatures, can be a character vector.

    - - -
    groups
    -

    sample groups, default is NULL.

    - - -
    grp_order
    -

    order of groups, default is NULL.

    - - -
    grp_size
    -

    font size of groups.

    - - -
    samps
    -

    sample vector to filter samples or sort samples, default is NULL.

    - - -
    cutoff
    -

    a cutoff value to remove hyper-mutated samples.

    - - -
    style
    -

    plot style, one of 'default' and 'cosmic', works when -parameter set_gradient_color is FALSE.

    - - -
    palette
    -

    palette used to plot, default use a built-in palette -according to parameter style.

    - - -
    base_size
    -

    overall font size.

    - - -
    font_scale
    -

    a number used to set font scale.

    - - -
    rm_space
    -

    default is FALSE. If TRUE, it will remove border color -and expand the bar width to 1. This is useful when the sample size is big.

    - - -
    rm_grid_line
    -

    default is FALSE, if TRUE, remove grid lines of plot.

    - - -
    rm_panel_border
    -

    default is TRUE for style 'cosmic', -remove panel border to keep plot tight.

    - - -
    hide_samps
    -

    if TRUE, hide sample names.

    - - -
    legend_position
    -

    position of legend, default is 'top'.

    - -
    -
    -

    Value

    - - -

    a ggplot object

    -
    -
    -

    Author

    -

    Shixiang Wang

    -
    - -
    -

    Examples

    -
    # \donttest{
    -# Load mutational signature
    -load(system.file("extdata", "toy_mutational_signature.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -# Show signature exposure
    -p1 <- show_sig_exposure(sig2)
    -p1
    -
    -# Load copy number signature
    -load(system.file("extdata", "toy_copynumber_signature_by_W.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -# Show signature exposure
    -p2 <- show_sig_exposure(sig)
    -p2
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/show_sig_feature_corrplot.html b/docs/reference/show_sig_feature_corrplot.html deleted file mode 100644 index 6b2fa144..00000000 --- a/docs/reference/show_sig_feature_corrplot.html +++ /dev/null @@ -1,201 +0,0 @@ - -Draw Corrplot for Signature Exposures and Other Features — show_sig_feature_corrplot • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    This function is for association visualization. Of note, -the parameters p_val and drop will affect the visualization -of association results under p value threshold.

    -
    - -
    -
    show_sig_feature_corrplot(
    -  tidy_cor,
    -  feature_list,
    -  sort_features = FALSE,
    -  sig_orders = NULL,
    -  drop = TRUE,
    -  return_plotlist = FALSE,
    -  p_val = 0.05,
    -  xlab = "Signatures",
    -  ylab = "Features",
    -  co_gradient_colors = scale_color_gradient2(low = "blue", mid = "white", high = "red",
    -    midpoint = 0),
    -  ca_gradient_colors = co_gradient_colors,
    -  plot_ratio = "auto",
    -  breaks_count = NULL
    -)
    -
    - -
    -

    Arguments

    -
    tidy_cor
    -

    data returned by get_tidy_association.

    - - -
    feature_list
    -

    a character vector contains features want to be plotted. -If missing, all features will be used.

    - - -
    sort_features
    -

    default is FALSE, use feature order obtained from the previous -step. If TRUE, sort features as feature_list.

    - - -
    sig_orders
    -

    signature levels for ordering.

    - - -
    drop
    -

    if TRUE, when a feature has no association with all signatures -(p value larger than threshold set by p_val), this feature will be removed -from the plot. Otherwise, this feature (a row) will keep with all blank white.

    - - -
    return_plotlist
    -

    if TRUE, return as a list of ggplot objects.

    - - -
    p_val
    -

    p value threshold. If p value larger than this threshold, -the result becomes blank white.

    - - -
    xlab
    -

    label for x axis.

    - - -
    ylab
    -

    label for y axis.

    - - -
    co_gradient_colors
    -

    a Scale object representing gradient colors used to plot for continuous features.

    - - -
    ca_gradient_colors
    -

    a Scale object representing gradient colors used to plot for categorical features.

    - - -
    plot_ratio
    -

    a length-2 numeric vector to set the height/width ratio.

    - - -
    breaks_count
    -

    breaks for sample count. If set it to NULL, -ggplot bin scale will be used to automatically determine the -breaks. If set it to NA, aes for sample will be not used.

    - -
    -
    -

    Value

    - - -

    a ggplot2 object

    -
    - - -
    -

    Examples

    -
    # \donttest{
    -# The data is generated from Wang, Shixiang et al.
    -load(system.file("extdata", "asso_data.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -
    -p <- show_sig_feature_corrplot(
    -            tidy_data.seqz.feature,
    -            p_val = 0.05,
    -            breaks_count = c(0L,200L, 400L, 600L, 800L, 1020L))
    -p
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/show_sig_fit.html b/docs/reference/show_sig_fit.html deleted file mode 100644 index 1ded60df..00000000 --- a/docs/reference/show_sig_fit.html +++ /dev/null @@ -1,198 +0,0 @@ - -Show Signature Fit Result — show_sig_fit • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    See sig_fit for examples.

    -
    - -
    -
    show_sig_fit(
    -  fit_result,
    -  samples = NULL,
    -  signatures = NULL,
    -  plot_fun = c("boxplot", "violin", "scatter"),
    -  palette = "aaas",
    -  title = NULL,
    -  xlab = FALSE,
    -  ylab = "Signature exposure",
    -  legend = "none",
    -  width = 0.3,
    -  outlier.shape = NA,
    -  add = "jitter",
    -  add.params = list(alpha = 0.3),
    -  ...
    -)
    -
    - -
    -

    Arguments

    -
    fit_result
    -

    result object from sig_fit.

    - - -
    samples
    -

    samples to show, if NULL, all samples are used.

    - - -
    signatures
    -

    signatures to show.

    - - -
    plot_fun
    -

    set the plot function.

    - - -
    palette
    -

    the color palette to be used for coloring or filling by groups. -Allowed values include "grey" for grey color palettes; brewer palettes e.g. -"RdBu", "Blues", ...; or custom color palette e.g. c("blue", "red"); and -scientific journal palettes from ggsci R package, e.g.: "npg", "aaas", -"lancet", "jco", "ucscgb", "uchicago", "simpsons" and "rickandmorty".

    - - -
    title
    -

    plot main title.

    - - -
    xlab
    -

    character vector specifying x axis labels. Use xlab = FALSE to -hide xlab.

    - - -
    ylab
    -

    character vector specifying y axis labels. Use ylab = FALSE to -hide ylab.

    - - -
    legend
    -

    character specifying legend position. Allowed values are one of -c("top", "bottom", "left", "right", "none"). To remove the legend use -legend = "none". Legend position can be also specified using a numeric -vector c(x, y); see details section.

    - - -
    width
    -

    numeric value between 0 and 1 specifying box width.

    - - -
    outlier.shape
    -

    point shape of outlier. Default is 19. To hide outlier, -specify outlier.shape = NA. When jitter is added, then outliers will -be automatically hidden.

    - - -
    add
    -

    character vector for adding another plot element (e.g.: dot plot or -error bars). Allowed values are one or the combination of: "none", -"dotplot", "jitter", "boxplot", "point", "mean", "mean_se", "mean_sd", -"mean_ci", "mean_range", "median", "median_iqr", "median_hilow", -"median_q1q3", "median_mad", "median_range"; see ?desc_statby for more -details.

    - - -
    add.params
    -

    parameters (color, shape, size, fill, linetype) for the -argument 'add'; e.g.: add.params = list(color = "red").

    - - -
    ...
    -

    other arguments to be passed to -geom_boxplot, ggpar and -facet.

    - -
    -
    -

    Value

    - - -

    a ggplot object.

    -
    - - -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/show_sig_profile.html b/docs/reference/show_sig_profile.html deleted file mode 100644 index 18570d3f..00000000 --- a/docs/reference/show_sig_profile.html +++ /dev/null @@ -1,351 +0,0 @@ - -Show Signature Profile — show_sig_profile • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Who don't like to show a barplot for signature profile? This is for it.

    -
    - -
    -
    show_sig_profile(
    -  Signature,
    -  mode = c("SBS", "copynumber", "DBS", "ID", "RS"),
    -  method = "Wang",
    -  by_context = FALSE,
    -  normalize = c("row", "column", "raw", "feature"),
    -  y_tr = NULL,
    -  filters = NULL,
    -  feature_setting = sigminer::CN.features,
    -  style = c("default", "cosmic"),
    -  palette = use_color_style(style, ifelse(by_context, "SBS", mode), method),
    -  set_gradient_color = FALSE,
    -  free_space = "free_x",
    -  rm_panel_border = style == "cosmic",
    -  rm_grid_line = style == "cosmic",
    -  rm_axis_text = FALSE,
    -  bar_border_color = ifelse(style == "default", "grey50", "white"),
    -  bar_width = 0.7,
    -  paint_axis_text = TRUE,
    -  x_label_angle = ifelse(mode == "copynumber" & !(startsWith(method, "T") | method ==
    -    "X"), 60, 90),
    -  x_label_vjust = ifelse(mode == "copynumber" & !(startsWith(method, "T") | method ==
    -    "X"), 1, 0.5),
    -  x_label_hjust = 1,
    -  x_lab = "Components",
    -  y_lab = "auto",
    -  y_limits = NULL,
    -  params = NULL,
    -  show_cv = FALSE,
    -  params_label_size = 3,
    -  params_label_angle = 60,
    -  y_expand = 1,
    -  digits = 2,
    -  base_size = 12,
    -  font_scale = 1,
    -  sig_names = NULL,
    -  sig_orders = NULL,
    -  check_sig_names = TRUE
    -)
    -
    - -
    -

    Arguments

    -
    Signature
    -

    a Signature object obtained either from sig_extract or sig_auto_extract, -or just a raw signature matrix with row representing components (motifs) and column -representing signatures (column names must start with 'Sig').

    - - -
    mode
    -

    signature type for plotting, now supports 'copynumber', 'SBS', -'DBS', 'ID' and 'RS' (genome rearrangement signature).

    - - -
    method
    -

    method for copy number feature classification in sig_tally, -can be one of "Wang" ("W"), "S".

    - - -
    by_context
    -

    for specific use.

    - - -
    normalize
    -

    one of 'row', 'column', 'raw' and "feature", for row normalization (signature), -column normalization (component), raw data, row normalization by feature, respectively. -Of note, 'feature' only works when the mode is 'copynumber'.

    - - -
    y_tr
    -

    a function (e.g. log10) to transform y axis before plotting.

    - - -
    filters
    -

    a pattern used to select components to plot.

    - - -
    feature_setting
    -

    a data.frame used for classification. -Only used when method is "Wang" ("W"). -Default is CN.features. Users can also set custom input with "feature", -"min" and "max" columns available. Valid features can be printed by -unique(CN.features$feature).

    - - -
    style
    -

    plot style, one of 'default' and 'cosmic', works when -parameter set_gradient_color is FALSE.

    - - -
    palette
    -

    palette used to plot when set_gradient_color is FALSE, -default use a built-in palette according to parameter style.

    - - -
    set_gradient_color
    -

    default is FALSE, if TRUE, use gradient colors -to fill bars.

    - - -
    free_space
    -

    default is 'free_x'. If "fixed", all panels have the same size. -If "free_y" their height will be proportional to the length of the y scale; -if "free_x" their width will be proportional to the length of the x scale; -or if "free" both height and width will vary. -This setting has no effect unless the appropriate scales also vary.

    - - -
    rm_panel_border
    -

    default is TRUE for style 'cosmic', -remove panel border to keep plot tight.

    - - -
    rm_grid_line
    -

    default is FALSE, if TRUE, remove grid lines of plot.

    - - -
    rm_axis_text
    -

    default is FALSE, if TRUE, remove component texts. -This is useful when multiple signature profiles are plotted together.

    - - -
    bar_border_color
    -

    the color of bar border.

    - - -
    bar_width
    -

    bar width. By default, set to 70% of the resolution of the -data.

    - - -
    paint_axis_text
    -

    if TRUE, color on text of x axis.

    - - -
    x_label_angle
    -

    font angle for x label.

    - - -
    x_label_vjust
    -

    font vjust for x label.

    - - -
    x_label_hjust
    -

    font hjust for x label.

    - - -
    x_lab
    -

    x axis lab.

    - - -
    y_lab
    -

    y axis lab.

    - - -
    y_limits
    -

    limits to expand in y axis. e.g., 0.2, c(0, 0.3).

    - - -
    params
    -

    params data.frame of components, obtained from sig_tally.

    - - -
    show_cv
    -

    default is FALSE, if TRUE, show coefficient of variation when -params is not NULL.

    - - -
    params_label_size
    -

    font size for params label.

    - - -
    params_label_angle
    -

    font angle for params label.

    - - -
    y_expand
    -

    y expand height for plotting params of copy number signatures.

    - - -
    digits
    -

    digits for plotting params of copy number signatures.

    - - -
    base_size
    -

    overall font size.

    - - -
    font_scale
    -

    a number used to set font scale.

    - - -
    sig_names
    -

    subset signatures or set name of signatures, can be a character vector. -Default is NULL, prefix 'Sig' plus number is used.

    - - -
    sig_orders
    -

    set order of signatures, can be a character vector. -Default is NULL, the signatures are ordered by alphabetical order. -If an integer vector set, only specified signatures are plotted.

    - - -
    check_sig_names
    -

    if TRUE, check signature names when input is -a matrix, i.e., all signatures (colnames) must start with 'Sig'.

    - -
    -
    -

    Value

    - - -

    a ggplot object

    -
    - -
    -

    Author

    -

    Shixiang Wang

    -
    - -
    -

    Examples

    -
    # \donttest{
    -# Load SBS signature
    -load(system.file("extdata", "toy_mutational_signature.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -# Show signature profile
    -p1 <- show_sig_profile(sig2, mode = "SBS")
    -p1
    -
    -# Use 'y_tr' option to transform values in y axis
    -p11 <- show_sig_profile(sig2, mode = "SBS", y_tr = function(x) x * 100)
    -p11
    -
    -# Load copy number signature from method "W"
    -load(system.file("extdata", "toy_copynumber_signature_by_W.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -# Show signature profile
    -p2 <- show_sig_profile(sig,
    -  style = "cosmic",
    -  mode = "copynumber",
    -  method = "W",
    -  normalize = "feature"
    -)
    -p2
    -
    -# Visualize rearrangement signatures
    -s <- get_sig_db("RS_Nik_lab")
    -ss <- s$db[, 1:3]
    -colnames(ss) <- c("Sig1", "Sig2", "Sig3")
    -p3 <- show_sig_profile(ss, mode = "RS", style = "cosmic")
    -p3
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/show_sig_profile_heatmap.html b/docs/reference/show_sig_profile_heatmap.html deleted file mode 100644 index 863cee57..00000000 --- a/docs/reference/show_sig_profile_heatmap.html +++ /dev/null @@ -1,229 +0,0 @@ - -Show Signature Profile with Heatmap — show_sig_profile_heatmap • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    This is a complementary function to show_sig_profile(), it is used for visualizing -some big signatures, i.e. SBS-1536, not all signatures are supported. See details for -current supported signatures.

    -
    - -
    -
    show_sig_profile_heatmap(
    -  Signature,
    -  mode = c("SBS", "DBS"),
    -  normalize = c("row", "column", "raw"),
    -  filters = NULL,
    -  x_lab = NULL,
    -  y_lab = NULL,
    -  legend_name = "auto",
    -  palette = "red",
    -  x_label_angle = 90,
    -  x_label_vjust = 1,
    -  x_label_hjust = 0.5,
    -  y_label_angle = 0,
    -  y_label_vjust = 0.5,
    -  y_label_hjust = 1,
    -  flip_xy = FALSE,
    -  sig_names = NULL,
    -  sig_orders = NULL,
    -  check_sig_names = TRUE
    -)
    -
    - -
    -

    Arguments

    -
    Signature
    -

    a Signature object obtained either from sig_extract or sig_auto_extract, -or just a raw signature matrix with row representing components (motifs) and column -representing signatures (column names must start with 'Sig').

    - - -
    mode
    -

    one of "SBS" and "DBS".

    - - -
    normalize
    -

    one of 'row', 'column', 'raw' and "feature", for row normalization (signature), -column normalization (component), raw data, row normalization by feature, respectively. -Of note, 'feature' only works when the mode is 'copynumber'.

    - - -
    filters
    -

    a pattern used to select components to plot.

    - - -
    x_lab
    -

    x label.

    - - -
    y_lab
    -

    y label.

    - - -
    legend_name
    -

    name of figure legend.

    - - -
    palette
    -

    color for value.

    - - -
    x_label_angle
    -

    angle for x axis text.

    - - -
    x_label_vjust
    -

    vjust for x axis text.

    - - -
    x_label_hjust
    -

    hjust for x axis text.

    - - -
    y_label_angle
    -

    angle for y axis text.

    - - -
    y_label_vjust
    -

    vjust for y axis text.

    - - -
    y_label_hjust
    -

    hjust for y axis text.

    - - -
    flip_xy
    -

    if TRUE, flip x axis and y axis.

    - - -
    sig_names
    -

    subset signatures or set name of signatures, can be a character vector. -Default is NULL, prefix 'Sig' plus number is used.

    - - -
    sig_orders
    -

    set order of signatures, can be a character vector. -Default is NULL, the signatures are ordered by alphabetical order. -If an integer vector set, only specified signatures are plotted.

    - - -
    check_sig_names
    -

    if TRUE, check signature names when input is -a matrix, i.e., all signatures (colnames) must start with 'Sig'.

    - -
    -
    -

    Value

    - - -

    a ggplot object.

    -
    -
    -

    Details

    -

    Support:

    • SBS-24

    • -
    • SBS-96

    • -
    • SBS-384

    • -
    • SBS-1536

    • -
    • SBS-6144

    • -
    • DBS-78

    • -
    • DBS-186

    • -
    - -
    -

    Examples

    -
    # \donttest{
    -# Load SBS signature
    -load(system.file("extdata", "toy_mutational_signature.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -# Show signature profile
    -p1 <- show_sig_profile_heatmap(sig2, mode = "SBS")
    -p1
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/show_sig_profile_loop.html b/docs/reference/show_sig_profile_loop.html deleted file mode 100644 index 186349d3..00000000 --- a/docs/reference/show_sig_profile_loop.html +++ /dev/null @@ -1,155 +0,0 @@ - -Show Signature Profile with Loop Way — show_sig_profile_loop • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Show Signature Profile with Loop Way

    -
    - -
    -
    show_sig_profile_loop(
    -  Signature,
    -  sig_names = NULL,
    -  ncol = 1,
    -  nrow = NULL,
    -  x_lab = "Components",
    -  ...
    -)
    -
    - -
    -

    Arguments

    -
    Signature
    -

    a Signature object obtained either from sig_extract or sig_auto_extract, -or just a raw signature matrix with row representing components (motifs) and column -representing signatures (column names must start with 'Sig').

    - - -
    sig_names
    -

    subset signatures or set name of signatures, can be a character vector. -Default is NULL, prefix 'Sig' plus number is used.

    - - -
    ncol
    -

    (optional) Number of columns in the plot grid.

    - - -
    nrow
    -

    (optional) Number of rows in the plot grid.

    - - -
    x_lab
    -

    x axis lab.

    - - -
    ...
    -

    other parameters but sig_order passing to show_sig_profile.

    - -
    -
    -

    Value

    - - -

    a ggplot result from cowplot::plot_grid().

    -
    -
    -

    See also

    - -
    - -
    -

    Examples

    -
    # \donttest{
    -load(system.file("extdata", "toy_mutational_signature.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -# Show signature profile
    -p1 <- show_sig_profile_loop(sig2, mode = "SBS")
    -p1
    -p2 <- show_sig_profile_loop(sig2, mode = "SBS", style = "cosmic", sig_names = c("A", "B", "C"))
    -p2
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/sig_auto_extract.html b/docs/reference/sig_auto_extract.html deleted file mode 100644 index cd2efc94..00000000 --- a/docs/reference/sig_auto_extract.html +++ /dev/null @@ -1,256 +0,0 @@ - -Extract Signatures through the Automatic Relevance Determination Technique — sig_auto_extract • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    A bayesian variant of NMF algorithm to enable optimal inferences for the -number of signatures through the automatic relevance determination technique. -This functions delevers highly interpretable and sparse representations for -both signature profiles and attributions at a balance between data fitting and -model complexity (this method may introduce more signatures than expected, -especially for copy number signatures (thus I don't recommend you to use this feature -to extract copy number signatures)). See detail part and references for more.

    -
    - -
    -
    sig_auto_extract(
    -  nmf_matrix = NULL,
    -  result_prefix = "BayesNMF",
    -  destdir = tempdir(),
    -  method = c("L1W.L2H", "L1KL", "L2KL"),
    -  strategy = c("stable", "optimal", "ms"),
    -  ref_sigs = NULL,
    -  K0 = 25,
    -  nrun = 10,
    -  niter = 200000,
    -  tol = 0.0000001,
    -  cores = 1,
    -  optimize = FALSE,
    -  skip = FALSE,
    -  recover = FALSE
    -)
    -
    - -
    -

    Arguments

    -
    nmf_matrix
    -

    a matrix used for NMF decomposition with rows indicate samples and columns indicate components.

    - - -
    result_prefix
    -

    prefix for result data files.

    - - -
    destdir
    -

    path to save data runs, default is tempdir().

    - - -
    method
    -

    default is "L1W.L2H", which uses an exponential prior for W and -a half-normal prior for H (This method is used by PCAWG project, see reference #3). -You can also use "L1KL" to set expoential priors for both W and H, and "L2KL" to -set half-normal priors for both W and H. The latter two methods are originally -implemented by SignatureAnalyzer software.

    - - -
    strategy
    -

    the selection strategy for returned data. Set 'stable' for getting optimal -result from the most frequent K. Set 'optimal' for getting optimal result from all Ks. -Set 'ms' for getting result with maximum mean cosine similarity with provided reference -signatures. See ref_sigs option for details. -If you want select other solution, please check get_bayesian_result.

    - - -
    ref_sigs
    -

    A Signature object or matrix or string for specifying -reference signatures, only used when strategy = 'ms'. -See Signature and sig_db options in get_sig_similarity for details.

    - - -
    K0
    -

    number of initial signatures.

    - - -
    nrun
    -

    number of independent simulations.

    - - -
    niter
    -

    the maximum number of iterations.

    - - -
    tol
    -

    tolerance for convergence.

    - - -
    cores
    -

    number of cpu cores to run NMF.

    - - -
    optimize
    -

    if TRUE, then refit the denovo signatures with QP method, see sig_fit.

    - - -
    skip
    -

    if TRUE, it will skip running a previous stored result. This can be used to -extend run times, e.g. you try running 10 times firstly and then you want to extend it to -20 times.

    - - -
    recover
    -

    if TRUE, try to recover result from previous runs based on input result_prefix, -destdir and nrun. This is pretty useful for reproducing result. Please use skip if you want -to recover an unfinished job.

    - -
    -
    -

    Value

    - - -

    a list with Signature class.

    -
    -
    -

    Details

    -

    There are three methods available in this function: "L1W.L2H", "L1KL" and "L2KL". -They use different priors for the bayesian variant of NMF algorithm -(see method parameter) written by reference #1 and implemented in -SignatureAnalyzer software -(reference #2).

    -

    I copied source code for the three methods from Broad Institute and supplementary -files of reference #3, and wrote this higher function. It is more friendly for users -to extract, visualize and analyze signatures by combining with other powerful functions -in sigminer package. Besides, I implemented parallel computation to speed up -the calculation process and a similar input and output structure like sig_extract().

    -
    -
    -

    References

    -

    Tan, Vincent YF, and Cédric Févotte. "Automatic relevance determination in nonnegative matrix factorization with the/spl beta/-divergence." -IEEE Transactions on Pattern Analysis and Machine Intelligence 35.7 (2012): 1592-1605.

    -

    Kim, Jaegil, et al. "Somatic ERCC2 mutations are associated with a distinct genomic signature in urothelial tumors." -Nature genetics 48.6 (2016): 600.

    -

    Alexandrov, Ludmil, et al. "The repertoire of mutational signatures in human cancer." BioRxiv (2018): 322859.

    -
    -
    -

    See also

    -

    sig_tally for getting variation matrix, -sig_extract for extracting signatures using NMF package, sig_estimate for -estimating signature number for sig_extract.

    -
    -
    -

    Author

    -

    Shixiang Wang

    -
    - -
    -

    Examples

    -
    # \donttest{
    -load(system.file("extdata", "toy_copynumber_tally_W.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -res <- sig_auto_extract(cn_tally_W$nmf_matrix, result_prefix = "Test_copynumber", nrun = 1)
    -# At default, all run files are stored in tempdir()
    -dir(tempdir(), pattern = "Test_copynumber")
    -
    -laml.maf <- system.file("extdata", "tcga_laml.maf.gz", package = "maftools")
    -laml <- read_maf(maf = laml.maf)
    -mt_tally <- sig_tally(
    -  laml,
    -  ref_genome = "BSgenome.Hsapiens.UCSC.hg19",
    -  use_syn = TRUE
    -)
    -
    -x <- sig_auto_extract(mt_tally$nmf_matrix,
    -  strategy = "ms", nrun = 3, ref_sigs = "legacy"
    -)
    -x
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/sig_convert.html b/docs/reference/sig_convert.html deleted file mode 100644 index f80372e2..00000000 --- a/docs/reference/sig_convert.html +++ /dev/null @@ -1,149 +0,0 @@ - -Convert Signatures between different Genomic Distribution of Components — sig_convert • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Converts signatures between two representations relative to different sets of mutational opportunities. -Currently, only SBS signature is supported.

    -
    - -
    -
    sig_convert(sig, from = "human-genome", to = "human-exome")
    -
    - -
    -

    Arguments

    -
    sig
    -

    a Signature object obtained either from sig_extract or sig_auto_extract, -or just a raw signature matrix/data.frame with row representing components (motifs) and -column representing signatures.

    - - -
    from
    -

    either one of "human-genome" and "human-exome" or an opportunity matrix -(repeated n columns with each row represents the total number of mutations for -a component, n is the number of signature).

    - - -
    to
    -

    same as from.

    - -
    -
    -

    Value

    - - -

    a matrix.

    -
    -
    -

    Details

    -

    The default opportunity matrix for "human-genome" and "human-exome" comes from COSMIC -signature database v2 and v3.

    -
    -
    -

    References

    -

    convert_signatures function from sigfit package.

    -
    - -
    -

    Examples

    -
    # Load SBS signature
    -load(system.file("extdata", "toy_mutational_signature.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -# Exome-relative to Genome-relative
    -sig_converted <- sig_convert(sig2,
    -  from = "human-exome",
    -  to = "human-genome"
    -)
    -sig_converted
    -
    -# \donttest{
    -show_sig_profile(sig2, style = "cosmic")
    -show_sig_profile(sig_converted, style = "cosmic")
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/sig_estimate.html b/docs/reference/sig_estimate.html deleted file mode 100644 index 8307614c..00000000 --- a/docs/reference/sig_estimate.html +++ /dev/null @@ -1,359 +0,0 @@ - -Estimate Signature Number — sig_estimate • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Use NMF package to evaluate the optimal number of signatures. -This is used along with sig_extract. -Users should library(NMF) firstly. If NMF objects are returned, -the result can be further visualized by NMF plot methods like -NMF::consensusmap() and NMF::basismap().

    -

    sig_estimate() shows comprehensive rank survey generated by -NMF package, sometimes -it is hard to consider all measures. show_sig_number_survey() provides a -one or two y-axis visualization method to help users determine -the optimal signature number (showing both -stability ("cophenetic") and error (RSS) at default). -Users can also set custom measures to show.

    -

    show_sig_number_survey2() is modified from NMF package to -better help users to explore survey of signature number.

    -
    - -
    -
    sig_estimate(
    -  nmf_matrix,
    -  range = 2:5,
    -  nrun = 10,
    -  use_random = FALSE,
    -  method = "brunet",
    -  seed = 123456,
    -  cores = 1,
    -  keep_nmfObj = FALSE,
    -  save_plots = FALSE,
    -  plot_basename = file.path(tempdir(), "nmf"),
    -  what = "all",
    -  verbose = FALSE
    -)
    -
    -show_sig_number_survey(
    -  object,
    -  x = "rank",
    -  left_y = "cophenetic",
    -  right_y = "rss",
    -  left_name = left_y,
    -  right_name = toupper(right_y),
    -  left_color = "black",
    -  right_color = "red",
    -  left_shape = 16,
    -  right_shape = 18,
    -  shape_size = 4,
    -  highlight = NULL
    -)
    -
    -show_sig_number_survey2(
    -  x,
    -  y = NULL,
    -  what = c("all", "cophenetic", "rss", "residuals", "dispersion", "evar", "sparseness",
    -    "sparseness.basis", "sparseness.coef", "silhouette", "silhouette.coef",
    -    "silhouette.basis", "silhouette.consensus"),
    -  na.rm = FALSE,
    -  xlab = "Total signatures",
    -  ylab = "",
    -  main = "Signature number survey using NMF package"
    -)
    -
    - -
    -

    Arguments

    -
    nmf_matrix
    -

    a matrix used for NMF decomposition with rows indicate samples and columns indicate components.

    - - -
    range
    -

    a numeric vector containing the ranks of factorization to try. Note that duplicates are removed -and values are sorted in increasing order. The results are notably returned in this order.

    - - -
    nrun
    -

    a numeric giving the number of run to perform for each value in range, nrun set to 30~50 is -enough to achieve robust result.

    - - -
    use_random
    -

    Should generate random data from input to test measurements. Default is TRUE.

    - - -
    method
    -

    specification of the NMF algorithm. Use 'brunet' as default. -Available methods for NMF decompositions are 'brunet', 'lee', 'ls-nmf', 'nsNMF', 'offset'.

    - - -
    seed
    -

    specification of the starting point or seeding method, which will compute a starting point, -usually using data from the target matrix in order to provide a good guess.

    - - -
    cores
    -

    number of cpu cores to run NMF.

    - - -
    keep_nmfObj
    -

    default is FALSE, if TRUE, keep NMF objects from runs, and the result may be huge.

    - - -
    save_plots
    -

    if TRUE, save signature number survey plot to local machine.

    - - -
    plot_basename
    -

    when save plots, set custom basename for file path.

    - - -
    what
    -

    a character vector whose elements partially match one of the following item, -which correspond to the measures computed by summary() on each – multi-run – NMF result: -'all', 'cophenetic', 'rss', 'residuals', 'dispersion', 'evar', 'silhouette' -(and more specific *.coef, *.basis, *.consensus), 'sparseness' -(and more specific *.coef, *.basis). -It specifies which measure must be plotted (what='all' plots all the measures).

    - - -
    verbose
    -

    if TRUE, print extra message.

    - - -
    object
    -

    a Survey object generated from sig_estimate, or -a data.frame contains at least rank columns and columns for -one measure.

    - - -
    x
    -

    a data.frame or NMF.rank object obtained from sig_estimate().

    - - -
    left_y
    -

    column name for left y axis.

    - - -
    right_y
    -

    column name for right y axis.

    - - -
    left_name
    -

    label name for left y axis.

    - - -
    right_name
    -

    label name for right y axis.

    - - -
    left_color
    -

    color for left axis.

    - - -
    right_color
    -

    color for right axis.

    - - -
    left_shape, right_shape, shape_size
    -

    shape setting.

    - - -
    highlight
    -

    a integer to highlight a x.

    - - -
    y
    -

    for random simulation, -a data.frame or NMF.rank object obtained from sig_estimate().

    - - -
    na.rm
    -

    single logical that specifies if the rank - for which the measures are NA values should be removed - from the graph or not (default to FALSE). This is - useful when plotting results which include NAs due to - error during the estimation process. See argument - stop for nmfEstimateRank.

    - - -
    xlab
    -

    x-axis label

    - - -
    ylab
    -

    y-axis label

    - - -
    main
    -

    main title

    - -
    -
    -

    Value

    - - -
    • sig_estimate: a list contains information of NMF run and rank survey.

    • -
    • show_sig_number_survey: a ggplot object

    • -
    • show_sig_number_survey2: a ggplot object

    • -
    -
    -

    Details

    -

    The most common approach is to choose the smallest rank for which cophenetic correlation coefficient -starts decreasing (Used by this function). Another approach is to choose the rank for which the plot -of the residual sum of squares (RSS) between the input matrix and its estimate shows an inflection point. -More custom features please directly use NMF::nmfEstimateRank.

    -
    -
    -

    References

    -

    Gaujoux, Renaud, and Cathal Seoighe. "A flexible R package for nonnegative matrix factorization." BMC bioinformatics 11.1 (2010): 367.

    -
    -
    -

    See also

    -

    sig_extract for extracting signatures using NMF package, sig_auto_extract for -extracting signatures using automatic relevance determination technique.

    -

    sig_estimate for estimating signature number for sig_extract, -show_sig_number_survey2 for more visualization method.

    -
    -
    -

    Author

    -

    Shixiang Wang

    -
    - -
    -

    Examples

    -
    # \donttest{
    -load(system.file("extdata", "toy_copynumber_tally_W.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -library(NMF)
    -cn_estimate <- sig_estimate(cn_tally_W$nmf_matrix,
    -  cores = 1, nrun = 5,
    -  verbose = TRUE
    -)
    -
    -p <- show_sig_number_survey2(cn_estimate$survey)
    -p
    -
    -# Show two measures
    -show_sig_number_survey(cn_estimate)
    -# Show one measure
    -p1 <- show_sig_number_survey(cn_estimate, right_y = NULL)
    -p1
    -p2 <- add_h_arrow(p, x = 4.1, y = 0.953, label = "selected number")
    -p2
    -
    -# Show data from a data.frame
    -p3 <- show_sig_number_survey(cn_estimate$survey)
    -p3
    -# Show other measures
    -head(cn_estimate$survey)
    -p4 <- show_sig_number_survey(cn_estimate$survey,
    -  right_y = "dispersion",
    -  right_name = "dispersion"
    -)
    -p4
    -p5 <- show_sig_number_survey(cn_estimate$survey,
    -  right_y = "evar",
    -  right_name = "evar"
    -)
    -p5
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/sig_extract.html b/docs/reference/sig_extract.html deleted file mode 100644 index 67a107b6..00000000 --- a/docs/reference/sig_extract.html +++ /dev/null @@ -1,191 +0,0 @@ - -Extract Signatures through NMF — sig_extract • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Do NMF de-composition and then extract signatures.

    -
    - -
    -
    sig_extract(
    -  nmf_matrix,
    -  n_sig,
    -  nrun = 10,
    -  cores = 1,
    -  method = "brunet",
    -  optimize = FALSE,
    -  pynmf = FALSE,
    -  use_conda = TRUE,
    -  py_path = "/Users/wsx/anaconda3/bin/python",
    -  seed = 123456,
    -  ...
    -)
    -
    - -
    -

    Arguments

    -
    nmf_matrix
    -

    a matrix used for NMF decomposition with rows indicate samples and columns indicate components.

    - - -
    n_sig
    -

    number of signature. Please run sig_estimate to select a suitable value.

    - - -
    nrun
    -

    a numeric giving the number of run to perform for each value in range, nrun set to 30~50 is -enough to achieve robust result.

    - - -
    cores
    -

    number of cpu cores to run NMF.

    - - -
    method
    -

    specification of the NMF algorithm. Use 'brunet' as default. -Available methods for NMF decompositions are 'brunet', 'lee', 'ls-nmf', 'nsNMF', 'offset'.

    - - -
    optimize
    -

    if TRUE, then refit the denovo signatures with QP method, see sig_fit.

    - - -
    pynmf
    -

    if TRUE, use Python NMF driver Nimfa. -The seed currently is not used by this implementation.

    - - -
    use_conda
    -

    if TRUE, create an independent conda environment to run NMF.

    - - -
    py_path
    -

    path to Python executable file, e.g. '/Users/wsx/anaconda3/bin/python'. In my -test, it is more stable than use_conda=TRUE. You can install the Nimfa package by yourself -or set use_conda to TRUE to install required Python environment, and then set this option.

    - - -
    seed
    -

    specification of the starting point or seeding method, which will compute a starting point, -usually using data from the target matrix in order to provide a good guess.

    - - -
    ...
    -

    other arguments passed to NMF::nmf().

    - -
    -
    -

    Value

    - - -

    a list with Signature class.

    -
    -
    -

    References

    -

    Gaujoux, Renaud, and Cathal Seoighe. "A flexible R package for nonnegative matrix factorization." BMC bioinformatics 11.1 (2010): 367.

    -

    Mayakonda, Anand, et al. "Maftools: efficient and comprehensive analysis of somatic variants in cancer." Genome research 28.11 (2018): 1747-1756.

    -
    -
    -

    See also

    -

    sig_tally for getting variation matrix, -sig_estimate for estimating signature number for sig_extract, sig_auto_extract for -extracting signatures using automatic relevance determination technique.

    -
    -
    -

    Author

    -

    Shixiang Wang

    -
    - -
    -

    Examples

    -
    # \donttest{
    -load(system.file("extdata", "toy_copynumber_tally_W.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -# Extract copy number signatures
    -res <- sig_extract(cn_tally_W$nmf_matrix, 2, nrun = 1)
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/sig_fit.html b/docs/reference/sig_fit.html deleted file mode 100644 index 0d42272e..00000000 --- a/docs/reference/sig_fit.html +++ /dev/null @@ -1,294 +0,0 @@ - -Fit Signature Exposures with Linear Combination Decomposition — sig_fit • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    The function performs a signatures decomposition of a given mutational -catalogue V with known signatures W by solving the minimization problem -min(||W*H - V||) where W and V are known.

    -
    - -
    -
    sig_fit(
    -  catalogue_matrix,
    -  sig,
    -  sig_index = NULL,
    -  sig_db = c("legacy", "SBS", "DBS", "ID", "TSB", "SBS_Nik_lab", "RS_Nik_lab",
    -    "RS_BRCA560", "RS_USARC", "CNS_USARC", "CNS_TCGA", "CNS_TCGA176", "CNS_PCAWG176",
    -    "SBS_hg19", "SBS_hg38", "SBS_mm9", "SBS_mm10", "DBS_hg19", "DBS_hg38", "DBS_mm9",
    -    "DBS_mm10", "SBS_Nik_lab_Organ", "RS_Nik_lab_Organ", "latest_SBS_GRCh37",
    -    "latest_DBS_GRCh37", "latest_ID_GRCh37", "latest_SBS_GRCh38", "latest_DBS_GRCh38",
    -    "latest_SBS_mm9", "latest_DBS_mm9", "latest_SBS_mm10", "latest_DBS_mm10",
    -    "latest_SBS_rn6", "latest_DBS_rn6", "latest_CN_GRCh37", 
    -    
    -    "latest_RNA-SBS_GRCh37", "latest_SV_GRCh38"),
    -  db_type = c("", "human-exome", "human-genome"),
    -  show_index = TRUE,
    -  method = c("QP", "NNLS", "SA"),
    -  auto_reduce = FALSE,
    -  type = c("absolute", "relative"),
    -  return_class = c("matrix", "data.table"),
    -  return_error = FALSE,
    -  rel_threshold = 0,
    -  mode = c("SBS", "DBS", "ID", "copynumber"),
    -  true_catalog = NULL,
    -  ...
    -)
    -
    - -
    -

    Arguments

    -
    catalogue_matrix
    -

    a numeric matrix V with row representing components and -columns representing samples, typically you can get nmf_matrix from sig_tally() and -transpose it by t().

    - - -
    sig
    -

    a Signature object obtained either from sig_extract or sig_auto_extract, -or just a raw signature matrix/data.frame with row representing components (motifs) and -column representing signatures.

    - - -
    sig_index
    -

    a vector for signature index. "ALL" for all signatures.

    - - -
    sig_db
    -

    default 'legacy', it can be 'legacy' (for COSMIC v2 'SBS'), -'SBS', 'DBS', 'ID' and 'TSB' (for COSMIV v3.1 signatures) -for small scale mutations. -For more specific details, it can also be 'SBS_hg19', 'SBS_hg38', -'SBS_mm9', 'SBS_mm10', 'DBS_hg19', 'DBS_hg38', 'DBS_mm9', 'DBS_mm10' to use -COSMIC v3 reference signatures from Alexandrov, Ludmil B., et al. (2020) (reference #1). -In addition, it can be one of "SBS_Nik_lab_Organ", "RS_Nik_lab_Organ", -"SBS_Nik_lab", "RS_Nik_lab" to refer reference signatures from -Degasperi, Andrea, et al. (2020) (reference #2); -"RS_BRCA560", "RS_USARC" to reference signatures from BRCA560 and USARC cohorts; -"CNS_USARC" (40 categories), "CNS_TCGA" (48 categories) to reference copy number signatures from USARC cohort and TCGA; -"CNS_TCGA176" (176 categories) and "CNS_PCAWG176" (176 categories) to reference copy number signatures from PCAWG and TCGA separately. -UPDATE, the latest version of reference version can be automatically -downloaded and loaded from https://cancer.sanger.ac.uk/signatures/downloads/ -when a option with latest_ prefix is specified (e.g. "latest_SBS_GRCh37"). -Note: the signature profile for different genome builds are basically same. -And specific database (e.g. 'SBS_mm10') contains less signatures than all COSMIC -signatures (because some signatures are not detected from Alexandrov, Ludmil B., et al. (2020)). -For all available options, check the parameter setting.

    - - -
    db_type
    -

    only used when sig_db is enabled. -"" for keeping default, "human-exome" for transforming to exome frequency of component, -and "human-genome" for transforming to whole genome frequency of component. -Currently only works for 'SBS'.

    - - -
    show_index
    -

    if TRUE, show valid indices.

    - - -
    method
    -

    method to solve the minimazation problem. -'NNLS' for non-negative least square; 'QP' for quadratic programming; 'SA' for simulated annealing.

    - - -
    auto_reduce
    -

    if TRUE, try reducing the input reference signatures to increase -the cosine similarity of reconstructed profile to observed profile.

    - - -
    type
    -

    'absolute' for signature exposure and 'relative' for signature relative exposure.

    - - -
    return_class
    -

    string, 'matrix' or 'data.table'.

    - - -
    return_error
    -

    if TRUE, also return sample error (Frobenius norm) and cosine -similarity between observed sample profile (asa. spectrum) and reconstructed profile. NOTE: -it is better to obtain the error when the type is 'absolute', because the error is -affected by relative exposure accuracy.

    - - -
    rel_threshold
    -

    numeric vector, a signature with relative exposure -lower than (equal is included, i.e. <=) this value will be set to 0 -(both absolute exposure and relative exposure). -In this case, sum of signature contribution may not equal to 1.

    - - -
    mode
    -

    signature type for plotting, now supports 'copynumber', 'SBS', -'DBS', 'ID' and 'RS' (genome rearrangement signature).

    - - -
    true_catalog
    -

    used by sig_fit_bootstrap, user never use it.

    - - -
    ...
    -

    control parameters passing to argument control in GenSA function when use method 'SA'.

    - -
    -
    -

    Value

    - - -

    The exposure result either in matrix or data.table format. -If return_error set TRUE, a list is returned.

    -
    -
    -

    Details

    -

    The method 'NNLS' solves the minimization problem with nonnegative least-squares constraints. -The method 'QP' and 'SA' are modified from SignatureEstimation package. -See references for details. -Of note, when fitting exposures for copy number signatures, only components of -feature CN is used.

    -
    -
    -

    References

    -

    Daniel Huebschmann, Zuguang Gu and Matthias Schlesner (2019). YAPSA: Yet Another Package for Signature Analysis. R package version 1.12.0.

    -

    Huang X, Wojtowicz D, Przytycka TM. Detecting presence of mutational signatures in cancer with confidence. Bioinformatics. 2018;34(2):330–337. doi:10.1093/bioinformatics/btx604

    -

    Kim, Jaegil, et al. "Somatic ERCC2 mutations are associated with a distinct genomic signature in urothelial tumors." -Nature genetics 48.6 (2016): 600.

    -
    - - -
    -

    Examples

    -
    # \donttest{
    -W <- matrix(c(1, 2, 3, 4, 5, 6), ncol = 2)
    -colnames(W) <- c("sig1", "sig2")
    -W <- apply(W, 2, function(x) x / sum(x))
    -
    -H <- matrix(c(2, 5, 3, 6, 1, 9, 1, 2), ncol = 4)
    -colnames(H) <- paste0("samp", 1:4)
    -
    -V <- W %*% H
    -V
    -
    -if (requireNamespace("quadprog", quietly = TRUE)) {
    -  H_infer <- sig_fit(V, W, method = "QP")
    -  H_infer
    -  H
    -
    -  H_dt <- sig_fit(V, W, method = "QP", auto_reduce = TRUE, return_class = "data.table")
    -  H_dt
    -
    -  ## Show results
    -  show_sig_fit(H_infer)
    -  show_sig_fit(H_dt)
    -
    -  ## Get clusters/groups
    -  H_dt_rel <- sig_fit(V, W, return_class = "data.table", type = "relative")
    -  z <- get_groups(H_dt_rel, method = "k-means")
    -  show_groups(z)
    -}
    -
    -# if (requireNamespace("GenSA", quietly = TRUE)) {
    -#   H_infer <- sig_fit(V, W, method = "SA")
    -#   H_infer
    -#   H
    -#
    -#   H_dt <- sig_fit(V, W, method = "SA", return_class = "data.table")
    -#   H_dt
    -#
    -#   ## Modify arguments to method
    -#   sig_fit(V, W, method = "SA", maxit = 10, temperature = 100)
    -#
    -#   ## Show results
    -#   show_sig_fit(H_infer)
    -#   show_sig_fit(H_dt)
    -# }
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/sig_fit_bootstrap.html b/docs/reference/sig_fit_bootstrap.html deleted file mode 100644 index 0775a399..00000000 --- a/docs/reference/sig_fit_bootstrap.html +++ /dev/null @@ -1,272 +0,0 @@ - -Obtain Bootstrap Distribution of Signature Exposures of a Certain Tumor Sample — sig_fit_bootstrap • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    This can be used to obtain the confidence of signature exposures or search -the suboptimal decomposition solution.

    -
    - -
    -
    sig_fit_bootstrap(
    -  catalog,
    -  sig,
    -  n = 100L,
    -  sig_index = NULL,
    -  sig_db = "legacy",
    -  db_type = c("", "human-exome", "human-genome"),
    -  show_index = TRUE,
    -  method = c("QP", "NNLS", "SA"),
    -  auto_reduce = FALSE,
    -  SA_not_bootstrap = FALSE,
    -  type = c("absolute", "relative"),
    -  rel_threshold = 0,
    -  mode = c("SBS", "DBS", "ID", "copynumber"),
    -  find_suboptimal = FALSE,
    -  suboptimal_ref_error = NULL,
    -  suboptimal_factor = 1.05,
    -  ...
    -)
    -
    - -
    -

    Arguments

    -
    catalog
    -

    a named numeric vector or a numeric matrix with dimension Nx1. -N is the number of component, 1 is the sample.

    - - -
    sig
    -

    a Signature object obtained either from sig_extract or sig_auto_extract, -or just a raw signature matrix/data.frame with row representing components (motifs) and -column representing signatures.

    - - -
    n
    -

    the number of bootstrap replicates.

    - - -
    sig_index
    -

    a vector for signature index. "ALL" for all signatures.

    - - -
    sig_db
    -

    default 'legacy', it can be 'legacy' (for COSMIC v2 'SBS'), -'SBS', 'DBS', 'ID' and 'TSB' (for COSMIV v3.1 signatures) -for small scale mutations. -For more specific details, it can also be 'SBS_hg19', 'SBS_hg38', -'SBS_mm9', 'SBS_mm10', 'DBS_hg19', 'DBS_hg38', 'DBS_mm9', 'DBS_mm10' to use -COSMIC v3 reference signatures from Alexandrov, Ludmil B., et al. (2020) (reference #1). -In addition, it can be one of "SBS_Nik_lab_Organ", "RS_Nik_lab_Organ", -"SBS_Nik_lab", "RS_Nik_lab" to refer reference signatures from -Degasperi, Andrea, et al. (2020) (reference #2); -"RS_BRCA560", "RS_USARC" to reference signatures from BRCA560 and USARC cohorts; -"CNS_USARC" (40 categories), "CNS_TCGA" (48 categories) to reference copy number signatures from USARC cohort and TCGA; -"CNS_TCGA176" (176 categories) and "CNS_PCAWG176" (176 categories) to reference copy number signatures from PCAWG and TCGA separately. -UPDATE, the latest version of reference version can be automatically -downloaded and loaded from https://cancer.sanger.ac.uk/signatures/downloads/ -when a option with latest_ prefix is specified (e.g. "latest_SBS_GRCh37"). -Note: the signature profile for different genome builds are basically same. -And specific database (e.g. 'SBS_mm10') contains less signatures than all COSMIC -signatures (because some signatures are not detected from Alexandrov, Ludmil B., et al. (2020)). -For all available options, check the parameter setting.

    - - -
    db_type
    -

    only used when sig_db is enabled. -"" for keeping default, "human-exome" for transforming to exome frequency of component, -and "human-genome" for transforming to whole genome frequency of component. -Currently only works for 'SBS'.

    - - -
    show_index
    -

    if TRUE, show valid indices.

    - - -
    method
    -

    method to solve the minimazation problem. -'NNLS' for non-negative least square; 'QP' for quadratic programming; 'SA' for simulated annealing.

    - - -
    auto_reduce
    -

    if TRUE, try reducing the input reference signatures to increase -the cosine similarity of reconstructed profile to observed profile.

    - - -
    SA_not_bootstrap
    -

    if TRUE, directly run 'SA' multiple times with original input instead of -bootstrap samples.

    - - -
    type
    -

    'absolute' for signature exposure and 'relative' for signature relative exposure.

    - - -
    rel_threshold
    -

    numeric vector, a signature with relative exposure -lower than (equal is included, i.e. <=) this value will be set to 0 -(both absolute exposure and relative exposure). -In this case, sum of signature contribution may not equal to 1.

    - - -
    mode
    -

    signature type for plotting, now supports 'copynumber', 'SBS', -'DBS', 'ID' and 'RS' (genome rearrangement signature).

    - - -
    find_suboptimal
    -

    logical, if TRUE, find suboptimal decomposition with -slightly higher error than the optimal solution by method 'SA'. This is useful -to explore hidden dependencies between signatures. More see reference.

    - - -
    suboptimal_ref_error
    -

    baseline error used for finding suboptimal solution. -if it is NULL, then use 'SA' method to obtain the optimal error.

    - - -
    suboptimal_factor
    -

    suboptimal factor to get suboptimal error, default is 1.05, -i.e., suboptimal error is 1.05 times baseline error.

    - - -
    ...
    -

    control parameters passing to argument control in GenSA function when use method 'SA'.

    - -
    -
    -

    Value

    - - -

    a list

    - - -
    -
    -

    References

    -

    Huang X, Wojtowicz D, Przytycka TM. Detecting presence of mutational signatures in cancer with confidence. Bioinformatics. 2018;34(2):330–337. doi:10.1093/bioinformatics/btx604

    -
    - - -
    -

    Examples

    -
    W <- matrix(c(1, 2, 3, 4, 5, 6), ncol = 2)
    -colnames(W) <- c("sig1", "sig2")
    -W <- apply(W, 2, function(x) x / sum(x))
    -
    -H <- matrix(c(2, 5, 3, 6, 1, 9, 1, 2), ncol = 4)
    -colnames(H) <- paste0("samp", 1:4)
    -
    -V <- W %*% H
    -V
    -
    -if (requireNamespace("quadprog", quietly = TRUE)) {
    -  H_bootstrap <- sig_fit_bootstrap(V[, 1], W, n = 10, type = "absolute")
    -  ## Typically, you have to run many times to get close to the answer
    -  boxplot(t(H_bootstrap$expo))
    -  H[, 1]
    -
    -  ## Return P values
    -  ## In practice, run times >= 100
    -  ## is recommended
    -  report_bootstrap_p_value(H_bootstrap)
    -  ## For multiple samples
    -  ## Input a list
    -  report_bootstrap_p_value(list(samp1 = H_bootstrap, samp2 = H_bootstrap))
    -
    -  #   ## Find suboptimal decomposition
    -  #   H_suboptimal <- sig_fit_bootstrap(V[, 1], W,
    -  #     n = 10,
    -  #     type = "absolute",
    -  #     method = "SA",
    -  #     find_suboptimal = TRUE
    -  #   )
    -}
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/sig_fit_bootstrap_batch.html b/docs/reference/sig_fit_bootstrap_batch.html deleted file mode 100644 index 711d0df3..00000000 --- a/docs/reference/sig_fit_bootstrap_batch.html +++ /dev/null @@ -1,183 +0,0 @@ - -Exposure Instability Analysis of Signature Exposures with Bootstrapping — sig_fit_bootstrap_batch • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Read sig_fit_bootstrap for more option setting.

    -
    - -
    -
    sig_fit_bootstrap_batch(
    -  catalogue_matrix,
    -  methods = c("QP"),
    -  n = 100L,
    -  min_count = 1L,
    -  p_val_thresholds = c(0.05),
    -  use_parallel = FALSE,
    -  seed = 123456L,
    -  job_id = NULL,
    -  result_dir = tempdir(),
    -  ...
    -)
    -
    - -
    -

    Arguments

    -
    catalogue_matrix
    -

    a numeric matrix V with row representing components and -columns representing samples, typically you can get nmf_matrix from sig_tally() and -transpose it by t().

    - - -
    methods
    -

    a subset of c("NNLS", "QP", "SA").

    - - -
    n
    -

    the number of bootstrap replicates.

    - - -
    min_count
    -

    minimal exposure in a sample, default is 1. Any patient has total exposure less -than this value will be filtered out.

    - - -
    p_val_thresholds
    -

    a vector of relative exposure threshold for calculating p values.

    - - -
    use_parallel
    -

    if TRUE, use parallel computation based on furrr package. -It can also be an integer for specifying cores.

    - - -
    seed
    -

    random seed to reproduce the result.

    - - -
    job_id
    -

    a job ID, default is NULL, can be a string. When not NULL, all bootstrapped results -will be saved to local machine location defined by result_dir. This is very useful for running -more than 10 times for more than 100 samples.

    - - -
    result_dir
    -

    see above, default is temp directory defined by R.

    - - -
    ...
    -

    other common parameters passing to sig_fit_bootstrap, including -sig, sig_index, sig_db, db_type, mode, auto_reduce etc.

    - -
    -
    -

    Value

    - - -

    a list of data.table.

    -
    -
    -

    See also

    - -
    - -
    -

    Examples

    -
    W <- matrix(c(1, 2, 3, 4, 5, 6), ncol = 2)
    -colnames(W) <- c("sig1", "sig2")
    -W <- apply(W, 2, function(x) x / sum(x))
    -
    -H <- matrix(c(2, 5, 3, 6, 1, 9, 1, 2), ncol = 4)
    -colnames(H) <- paste0("samp", 1:4)
    -
    -V <- W %*% H
    -V
    -
    -if (requireNamespace("quadprog")) {
    -  z10 <- sig_fit_bootstrap_batch(V, sig = W, n = 10)
    -  z10
    -}
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/sig_operation.html b/docs/reference/sig_operation.html deleted file mode 100644 index 74edde4e..00000000 --- a/docs/reference/sig_operation.html +++ /dev/null @@ -1,156 +0,0 @@ - -Obtain or Modify Signature Information — sig_operation • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Obtain or Modify Signature Information

    -
    - -
    -
    sig_names(sig)
    -
    -sig_modify_names(sig, new_names)
    -
    -sig_number(sig)
    -
    -sig_attrs(sig)
    -
    -sig_signature(sig, normalize = c("row", "column", "raw", "feature"))
    -
    -sig_exposure(sig, type = c("absolute", "relative"))
    -
    - -
    -

    Arguments

    -
    sig
    -

    a Signature object obtained either from sig_extract or sig_auto_extract.

    - - -
    new_names
    -

    new signature names.

    - - -
    normalize
    -

    one of 'row', 'column', 'raw' and "feature", for row normalization (signature), -column normalization (component), raw data, row normalization by feature, respectively.

    - - -
    type
    -

    one of 'absolute' and 'relative'.

    - -
    -
    -

    Value

    - - -

    a Signature object or data.

    -
    - -
    -

    Examples

    -
    ## Operate signature names
    -load(system.file("extdata", "toy_mutational_signature.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -sig_names(sig2)
    -cc <- sig_modify_names(sig2, new_names = c("Sig2", "Sig1", "Sig3"))
    -sig_names(cc)
    -
    -# The older names are stored in tags.
    -print(attr(cc, "tag"))
    -## Get signature number
    -sig_number(sig2)
    -## Get signature attributes
    -sig_number(sig2)
    -## Get signature matrix
    -z <- sig_signature(sig2)
    -z <- sig_signature(sig2, normalize = "raw")
    -## Get exposure matrix
    -## Of note, this is different from get_sig_exposure()
    -## it returns a matrix instead of data table.
    -z <- sig_exposure(sig2) # it is same as sig$Exposure
    -z <- sig_exposure(sig2, type = "relative") # it is same as sig2$Exposure.norm
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/sig_tally.html b/docs/reference/sig_tally.html deleted file mode 100644 index 3e1b7a99..00000000 --- a/docs/reference/sig_tally.html +++ /dev/null @@ -1,298 +0,0 @@ - -Tally a Genomic Alteration Object — sig_tally • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Tally a variation object like MAF, CopyNumber and return a matrix for NMF de-composition and more. -This is a generic function, -so it can be further extended to other mutation cases. -Please read details about how to set sex for identifying copy number signatures. -Please read https://osf.io/s93d5/ for the generation of SBS, DBS and ID (INDEL) -components.

    -
    - -
    -
    sig_tally(object, ...)
    -
    -# S3 method for CopyNumber
    -sig_tally(
    -  object,
    -  method = "Wang",
    -  ignore_chrs = NULL,
    -  indices = NULL,
    -  add_loh = FALSE,
    -  feature_setting = sigminer::CN.features,
    -  cores = 1,
    -  keep_only_matrix = FALSE,
    -  ...
    -)
    -
    -# S3 method for RS
    -sig_tally(object, keep_only_matrix = FALSE, ...)
    -
    -# S3 method for MAF
    -sig_tally(
    -  object,
    -  mode = c("SBS", "DBS", "ID", "ALL"),
    -  ref_genome = "BSgenome.Hsapiens.UCSC.hg19",
    -  genome_build = NULL,
    -  add_trans_bias = FALSE,
    -  ignore_chrs = NULL,
    -  use_syn = TRUE,
    -  keep_only_matrix = FALSE,
    -  ...
    -)
    -
    - -
    -

    Arguments

    -
    object
    -

    a CopyNumber object or MAF object or SV object (from read_sv_as_rs).

    - - -
    ...
    -

    custom setting for operating object. Detail see S3 method for -corresponding class (e.g. CopyNumber).

    - - -
    method
    -

    method for feature classification, can be one of -"Wang" ("W"), "S" (for method described in Steele et al. 2019), -"X" (for method described in Tao et al. 2023).

    - - -
    ignore_chrs
    -

    Chromsomes to ignore from analysis. e.g. chrX and chrY.

    - - -
    indices
    -

    integer vector indicating segments to keep.

    - - -
    add_loh
    -

    flag to add LOH classifications.

    - - -
    feature_setting
    -

    a data.frame used for classification. -Only used when method is "Wang" ("W"). -Default is CN.features. Users can also set custom input with "feature", -"min" and "max" columns available. Valid features can be printed by -unique(CN.features$feature).

    - - -
    cores
    -

    number of computer cores to run this task. -You can use future::availableCores() function to check how -many cores you can use.

    - - -
    keep_only_matrix
    -

    if TRUE, keep only matrix for signature extraction. -For a MAF object, this will just return the most useful matrix.

    - - -
    mode
    -

    type of mutation matrix to extract, can be one of 'SBS', 'DBS' and 'ID'.

    - - -
    ref_genome
    -

    'BSgenome.Hsapiens.UCSC.hg19', 'BSgenome.Hsapiens.UCSC.hg38', -'BSgenome.Mmusculus.UCSC.mm10', 'BSgenome.Mmusculus.UCSC.mm9', etc.

    - - -
    genome_build
    -

    genome build 'hg19', 'hg38', 'mm9' or "mm10", if not set, guess it by ref_genome.

    - - -
    add_trans_bias
    -

    if TRUE, consider transcriptional bias categories. -'T:' for Transcribed (the variant is on the transcribed strand); -'U:' for Un-transcribed (the variant is on the untranscribed strand); -'B:' for Bi-directional (the variant is on both strand and is transcribed either way); -'N:' for Non-transcribed (the variant is in a non-coding region and is untranslated); -'Q:' for Questionable. -NOTE: the result counts of 'B' and 'N' labels are a little different from -SigProfilerMatrixGenerator, the reason is unknown (may be caused by annotation file).

    - - -
    use_syn
    -

    Logical. If TRUE, include synonymous variants in analysis.

    - -
    -
    -

    Value

    - - -

    a list contains a matrix used for NMF de-composition.

    -
    -
    -

    Details

    -

    For identifying copy number signatures, we have to derive copy number -features firstly. Due to the difference of copy number values in sex chromosomes -between male and female, we have to do an extra step if we don't want to -ignore them.

    -

    I create two options to control this, the default values are shown as -the following, you can use the same way to set (per R session).

    -

    options(sigminer.sex = "female", sigminer.copynumber.max = NA_integer_)

    • If your cohort are all females, you can totally ignore this.

    • -
    • If your cohort are all males, set sigminer.sex to 'male' and -sigminer.copynumber.max to a proper value (the best is consistent -with read_copynumber).

    • -
    • If your cohort contains both males and females, set sigminer.sex -as a data.frame with two columns "sample" and "sex". And -set sigminer.copynumber.max to a proper value (the best is consistent -with read_copynumber).

    • -
    -
    -

    Methods (by class)

    - -
    • sig_tally(CopyNumber): Returns copy number features, components and component-by-sample matrix

    • -
    • sig_tally(RS): Returns genome rearrangement sample-by-component matrix

    • -
    • sig_tally(MAF): Returns SBS mutation sample-by-component matrix and APOBEC enrichment

    • -
    -
    -

    References

    -

    Wang, Shixiang, et al. "Copy number signature analyses in prostate cancer reveal -distinct etiologies and clinical outcomes." medRxiv (2020).

    -

    Steele, Christopher D., et al. "Undifferentiated sarcomas develop through -distinct evolutionary pathways." Cancer Cell 35.3 (2019): 441-456.

    -

    Mayakonda, Anand, et al. "Maftools: efficient and comprehensive analysis of somatic variants in cancer." Genome research 28.11 (2018): 1747-1756.

    -

    Roberts SA, Lawrence MS, Klimczak LJ, et al. An APOBEC Cytidine Deaminase Mutagenesis Pattern is Widespread in Human Cancers. Nature genetics. 2013;45(9):970-976. doi:10.1038/ng.2702.

    -

    Bergstrom EN, Huang MN, Mahto U, Barnes M, Stratton MR, Rozen SG, Alexandrov LB: SigProfilerMatrixGenerator: a tool for visualizing and exploring patterns of small mutational events. BMC Genomics 2019, 20:685 https://bmcgenomics.biomedcentral.com/articles/10.1186/s12864-019-6041-2

    -
    -
    -

    See also

    -

    sig_estimate for estimating signature number for sig_extract, -sig_auto_extract for extracting signatures using automatic relevance determination technique.

    -
    -
    -

    Author

    -

    Shixiang Wang

    -
    - -
    -

    Examples

    -
    # Load copy number object
    -load(system.file("extdata", "toy_copynumber.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -# \donttest{
    -# Use method designed by Wang, Shixiang et al.
    -cn_tally_W <- sig_tally(cn, method = "W")
    -# }
    -# Use method designed by Steele et al.
    -# See example in read_copynumber
    -# \donttest{
    -# Prepare SBS signature analysis
    -laml.maf <- system.file("extdata", "tcga_laml.maf.gz", package = "maftools")
    -laml <- read_maf(maf = laml.maf)
    -if (require("BSgenome.Hsapiens.UCSC.hg19")) {
    -  mt_tally <- sig_tally(
    -    laml,
    -    ref_genome = "BSgenome.Hsapiens.UCSC.hg19",
    -    use_syn = TRUE
    -  )
    -  mt_tally$nmf_matrix[1:5, 1:5]
    -
    -  ## Use strand bias categories
    -  mt_tally <- sig_tally(
    -    laml,
    -    ref_genome = "BSgenome.Hsapiens.UCSC.hg19",
    -    use_syn = TRUE, add_trans_bias = TRUE
    -  )
    -  ## Test it by enrichment analysis
    -  enrich_component_strand_bias(mt_tally$nmf_matrix)
    -  enrich_component_strand_bias(mt_tally$all_matrices$SBS_24)
    -} else {
    -  message("Please install package 'BSgenome.Hsapiens.UCSC.hg19' firstly!")
    -}
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/sig_unify_extract.html b/docs/reference/sig_unify_extract.html deleted file mode 100644 index 7a573ad1..00000000 --- a/docs/reference/sig_unify_extract.html +++ /dev/null @@ -1,170 +0,0 @@ - -An Unified Interface to Extract Signatures — sig_unify_extract • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    This function provides an unified interface to signature extractor -implemented in sigminer. If you determine a specific approach, -please also read the documentation of corresponding extractor. -See "Arguments" part.

    -
    - -
    -
    sig_unify_extract(
    -  nmf_matrix,
    -  range = 2:5,
    -  nrun = 10,
    -  approach = c("bayes_nmf", "repeated_nmf", "bootstrap_nmf", "sigprofiler"),
    -  cores = 1L,
    -  ...
    -)
    -
    - -
    -

    Arguments

    -
    nmf_matrix
    -

    a matrix used for NMF decomposition with rows indicate samples and columns indicate components.

    - - -
    range
    -

    signature number range, i.e. 2:5.

    - - -
    nrun
    -

    the number of iteration to be performed to extract each signature number.

    - - -
    approach
    -

    approach name.

    - - -
    cores
    -

    number of cores used for computation.

    - - -
    ...
    -

    other parameters passing to signature extractor based -on the approach setting.

    - -
    -
    -

    Value

    - - -

    Result dependent on the approach setting.

    -
    - - -
    -

    Examples

    -
    # \donttest{
    -load(system.file("extdata", "toy_copynumber_tally_W.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -# Extract signatures
    -# It is same as sig_extract(cn_tally_W$nmf_matrix, 2, nrun = 1)
    -res <- sig_unify_extract(cn_tally_W$nmf_matrix, 2,
    -  nrun = 1,
    -  approach = "repeated_nmf"
    -)
    -# Auto-extract signatures based on bayesian NMF
    -res2 <- sig_unify_extract(cn_tally_W$nmf_matrix,
    -  nrun = 1,
    -  approach = "bayes_nmf"
    -)
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/sigminer-package.html b/docs/reference/sigminer-package.html deleted file mode 100644 index eb5ac492..00000000 --- a/docs/reference/sigminer-package.html +++ /dev/null @@ -1,123 +0,0 @@ - -sigminer: Extract, Analyze and Visualize Signatures for Genomic Variations — sigminer-package • sigminer - - -
    -
    - - - -
    -
    - - -
    - -
    - - - -
    -

    Author

    -

    Maintainer: Shixiang Wang w_shixiang@163.com (ORCID)

    -

    Authors:

    Other contributors:

    • Anand Mayakonda [contributor]

    • -
    - -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/sigminer.html b/docs/reference/sigminer.html deleted file mode 100644 index 8c383972..00000000 --- a/docs/reference/sigminer.html +++ /dev/null @@ -1,108 +0,0 @@ - -sigminer: Extract, Analyze and Visualize Signatures for Genomic Variations — sigminer • sigminer - - -
    -
    - - - -
    -
    - - -
    - -
    - - - -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/sigprofiler.html b/docs/reference/sigprofiler.html deleted file mode 100644 index 48eacad7..00000000 --- a/docs/reference/sigprofiler.html +++ /dev/null @@ -1,238 +0,0 @@ - -Extract Signatures with SigProfiler — sigprofiler • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    This function provides an interface to software SigProfiler. -More please see https://github.com/AlexandrovLab/SigProfilerExtractor. -Typically, a reference genome is not required because the input is a matrix (my understanding). -If you are using refitting result by SigProfiler, please make sure you have input the matrix same order as examples at https://github.com/AlexandrovLab/SigProfilerMatrixGenerator/tree/master/SigProfilerMatrixGenerator/references/matrix/BRCA_example. If not, use sigprofiler_reorder() firstly.

    -
    - -
    -
    sigprofiler_extract(
    -  nmf_matrix,
    -  output,
    -  output_matrix_only = FALSE,
    -  range = 2:5,
    -  nrun = 10L,
    -  refit = FALSE,
    -  refit_plot = FALSE,
    -  is_exome = FALSE,
    -  init_method = c("random", "nndsvd_min", "nndsvd", "nndsvda", "nndsvdar"),
    -  cores = -1L,
    -  genome_build = c("hg19", "hg38", "T2T", "mm10", "mm9", "ce11"),
    -  use_conda = FALSE,
    -  py_path = NULL,
    -  sigprofiler_version = "1.1.3"
    -)
    -
    -sigprofiler_import(
    -  output,
    -  order_by_expo = FALSE,
    -  type = c("suggest", "refit", "all")
    -)
    -
    -sigprofiler_reorder(
    -  nmf_matrix,
    -  type = c("SBS96", "SBS6", "SBS12", "SBS192", "SBS1536", "SBS3072", "DBS78", "DBS312",
    -    "DBS1248", "DBS4992")
    -)
    -
    - -
    -

    Arguments

    -
    nmf_matrix
    -

    a matrix used for NMF decomposition with rows indicate samples and columns indicate components.

    - - -
    output
    -

    output directory.

    - - -
    output_matrix_only
    -

    if TRUE, only generate matrix file for SigProfiler -so user can call SigProfiler with the input by himself.

    - - -
    range
    -

    signature number range, i.e. 2:5.

    - - -
    nrun
    -

    the number of iteration to be performed to extract each signature number.

    - - -
    refit
    -

    if TRUE, then refit the denovo signatures with nnls. Same -meaning as optimize option in sig_extract or sig_auto_extract.

    - - -
    refit_plot
    -

    if TRUE, SigProfiler will make -denovo to COSMIC sigantures decompostion plots. However, this may fail due -to some matrix cannot be identified by SigProfiler plot program.

    - - -
    is_exome
    -

    if TRUE, the exomes will be extracted.

    - - -
    init_method
    -

    the initialization algorithm for W and H matrix of NMF. -Options are 'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'alexandrov-lab-custom' -and 'nndsvd_min'.

    - - -
    cores
    -

    number of cores used for computation.

    - - -
    genome_build
    -

    I think this option is useless when input is matrix, keep it -in case it is useful.

    - - -
    use_conda
    -

    if TRUE, create an independent conda environment to run SigProfiler.

    - - -
    py_path
    -

    path to Python executable file, e.g. '/Users/wsx/anaconda3/bin/python'.

    - - -
    sigprofiler_version
    -

    version of SigProfilerExtractor. If this -package is not installed, the specified package will be installed. -If this package is installed, this option is useless.

    - - -
    order_by_expo
    -

    if TRUE, order the import signatures by their exposures, e.g. the signature -contributed the most exposure in all samples will be named as Sig1.

    - - -
    type
    -

    mutational signature type.

    - -
    -
    -

    Value

    - - -

    For sigprofiler_extract(), returns nothing. See output directory.

    - - -

    For sigprofiler_import(), a list containing Signature object.

    - - -

    A NMF matrix for input of sigprofiler_extract().

    -
    - -
    -

    Examples

    -
    if (FALSE) {
    -  load(system.file("extdata", "toy_copynumber_tally_W.RData",
    -    package = "sigminer", mustWork = TRUE
    -  ))
    -
    -  reticulate::conda_list()
    -
    -  sigprofiler_extract(cn_tally_W$nmf_matrix, "~/test/test_sigminer",
    -    use_conda = TRUE
    -  )
    -
    -  sigprofiler_extract(cn_tally_W$nmf_matrix, "~/test/test_sigminer",
    -    use_conda = FALSE, py_path = "/Users/wsx/anaconda3/bin/python"
    -  )
    -}
    -
    -data("simulated_catalogs")
    -sigprofiler_reorder(t(simulated_catalogs$set1))
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/simulated_catalogs.html b/docs/reference/simulated_catalogs.html deleted file mode 100644 index 2b6d392c..00000000 --- a/docs/reference/simulated_catalogs.html +++ /dev/null @@ -1,121 +0,0 @@ - -A List of Simulated SBS-96 Catalog Matrix — simulated_catalogs • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Data from doi:10.1038/s43018-020-0027-5 -. -5 simulated mutation catalogs are used by the paper but only 4 are available. -The data are simulated from COSMIC mutational signatures 1, 2, 3, 5, 6, 8, -12, 13, 17 and 18. Each sample is a linear combination of 5 randomly selected -signatures with the addiction of Poisson noise. The number of mutation in -each sample is randomly selected between 1,000 and 50,000 mutations, in log -scale so that a lower number of mutations is more likely to be selected. -The proportion of each signature in each sample is also random.

    -
    - - -
    -

    Format

    -

    A list of matrix

    -
    -
    -

    Source

    -

    Generate from code under data_raw/

    -
    - -
    -

    Examples

    -
    data(simulated_catalogs)
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/simulation.html b/docs/reference/simulation.html deleted file mode 100644 index cc885f50..00000000 --- a/docs/reference/simulation.html +++ /dev/null @@ -1,164 +0,0 @@ - -Simulation Analysis — simulation • sigminer - - -
    -
    - - - -
    -
    - - -
    - -
    • simulate_signature() - Simulate signatures from signature pool.

    • -
    • simulate_catalogue() - Simulate catalogs from signature/catalog pool.

    • -
    • simulate_catalogue_matrix() - Simulate a bootstrapped catalog matrix.

    • -
    - -
    -
    simulate_signature(x, weights = NULL)
    -
    -simulate_catalogue(x, n, weights = NULL)
    -
    -simulate_catalogue_matrix(x)
    -
    - -
    -

    Arguments

    -
    x
    -

    a numeric vector representing a signature/catalog or matrix with rows representing -signatures/samples and columns representing components.

    - - -
    weights
    -

    a numeric vector for weights.

    - - -
    n
    -

    an integer indicating mutation number to be generated in a catalog.

    - -
    -
    -

    Value

    - - -

    a matrix.

    -
    - -
    -

    Examples

    -
    # Generate a catalog
    -set.seed(1234)
    -catalog <- as.integer(table(sample(1:96, 1000, replace = TRUE)))
    -names(catalog) <- paste0("comp", 1:96)
    -# Generate a signature
    -sig <- catalog / sum(catalog)
    -
    -# Simulate catalogs
    -x1 <- simulate_catalogue(catalog, 10) # 10 mutations
    -x1
    -x2 <- simulate_catalogue(catalog, 100) # 100 mutations
    -x2
    -x3 <- simulate_catalogue(catalog, 1000) # 1000 mutations
    -x3
    -# Similar with a signature
    -x4 <- simulate_catalogue(sig, 10) # 10 mutations
    -x4
    -
    -# Load SBS signature
    -load(system.file("extdata", "toy_mutational_signature.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -s <- t(sig2$Signature.norm)
    -# Generate a signature from multiple signatures/catalogs
    -s1 <- simulate_signature(s)
    -s1
    -s2 <- simulate_signature(s, weights = 1:3)
    -s2
    -# Generate a catalog from multiple signatures/catalogs
    -c1 <- simulate_catalogue(s, 100, weights = 1:3)
    -c1
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/subset.CopyNumber.html b/docs/reference/subset.CopyNumber.html deleted file mode 100644 index 774387aa..00000000 --- a/docs/reference/subset.CopyNumber.html +++ /dev/null @@ -1,124 +0,0 @@ - -Subsetting CopyNumber object — subset.CopyNumber • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Subset data slot of CopyNumber object, un-selected rows will move to -dropoff.segs slot, annotation slot will update in the same way.

    -
    - -
    -
    # S3 method for CopyNumber
    -subset(x, subset = TRUE, ...)
    -
    - -
    -

    Arguments

    -
    x
    -

    a CopyNumber object to be subsetted.

    - - -
    subset
    -

    logical expression indicating rows to keep.

    - - -
    ...
    -

    further arguments to be passed to or from other methods. -Useless here.

    - -
    -
    -

    Value

    - - -

    a CopyNumber object

    -
    -
    -

    Author

    -

    Shixiang Wang

    -
    - -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/tidyeval.html b/docs/reference/tidyeval.html deleted file mode 100644 index 993b2379..00000000 --- a/docs/reference/tidyeval.html +++ /dev/null @@ -1,154 +0,0 @@ - -Tidy eval helpers — tidyeval • sigminer - - -
    -
    - - - -
    -
    - - -
    - -
    • sym() creates a symbol from a string and -syms() creates a list of symbols from a -character vector.

    • -
    • enquo() and -enquos() delay the execution of one or -several function arguments. enquo() returns a single quoted -expression, which is like a blueprint for the delayed computation. -enquos() returns a list of such quoted expressions.

    • -
    • expr() quotes a new expression locally. It -is mostly useful to build new expressions around arguments -captured with enquo() or enquos(): -expr(mean(!!enquo(arg), na.rm = TRUE)).

    • -
    • as_name() transforms a quoted variable name -into a string. Supplying something else than a quoted variable -name is an error.

      -

      That's unlike as_label() which also returns -a single string but supports any kind of R object as input, -including quoted function calls and vectors. Its purpose is to -summarise that object into a single label. That label is often -suitable as a default name.

      -

      If you don't know what a quoted expression contains (for instance -expressions captured with enquo() could be a variable -name, a call to a function, or an unquoted constant), then use -as_label(). If you know you have quoted a simple variable -name, or would like to enforce this, use as_name().

    • -

    To learn more about tidy eval and how to use these tools, visit -https://dplyr.tidyverse.org/articles/programming.html and the -Metaprogramming -section of Advanced R.

    -
    - - - -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/transcript.hg19.html b/docs/reference/transcript.hg19.html deleted file mode 100644 index 5deb7ff7..00000000 --- a/docs/reference/transcript.hg19.html +++ /dev/null @@ -1,105 +0,0 @@ - -Merged Transcript Location at Genome Build hg19 — transcript.hg19 • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Merged Transcript Location at Genome Build hg19

    -
    - - -
    -

    Format

    -

    A data.table

    -
    -
    -

    Source

    -

    from GENCODE release v33.

    -
    - -
    -

    Examples

    -
    data(transcript.hg19)
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/transcript.hg38.html b/docs/reference/transcript.hg38.html deleted file mode 100644 index c0cdebee..00000000 --- a/docs/reference/transcript.hg38.html +++ /dev/null @@ -1,105 +0,0 @@ - -Merged Transcript Location at Genome Build hg38 — transcript.hg38 • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Merged Transcript Location at Genome Build hg38

    -
    - - -
    -

    Format

    -

    A data.table

    -
    -
    -

    Source

    -

    from GENCODE release v33.

    -
    - -
    -

    Examples

    -
    data(transcript.hg38)
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/transcript.mm10.html b/docs/reference/transcript.mm10.html deleted file mode 100644 index c95ea883..00000000 --- a/docs/reference/transcript.mm10.html +++ /dev/null @@ -1,105 +0,0 @@ - -Merged Transcript Location at Genome Build mm10 — transcript.mm10 • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Merged Transcript Location at Genome Build mm10

    -
    - - -
    -

    Format

    -

    A data.table

    -
    -
    -

    Source

    -

    from GENCODE release M25.

    -
    - -
    -

    Examples

    -
    data(transcript.mm10)
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/transcript.mm9.html b/docs/reference/transcript.mm9.html deleted file mode 100644 index f876c9da..00000000 --- a/docs/reference/transcript.mm9.html +++ /dev/null @@ -1,105 +0,0 @@ - -Merged Transcript Location at Genome Build mm9 — transcript.mm9 • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Merged Transcript Location at Genome Build mm9

    -
    - - -
    -

    Format

    -

    A data.table

    -
    - - -
    -

    Examples

    -
    data(transcript.mm9)
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/transform_seg_table.html b/docs/reference/transform_seg_table.html deleted file mode 100644 index 46fb2ede..00000000 --- a/docs/reference/transform_seg_table.html +++ /dev/null @@ -1,162 +0,0 @@ - -Transform Copy Number Table — transform_seg_table • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Transform Copy Number Table

    -
    - -
    -
    transform_seg_table(
    -  data,
    -  genome_build = c("hg19", "hg38", "T2T", "mm10", "mm9", "ce11"),
    -  ref_type = c("cytoband", "gene"),
    -  values_fill = NA,
    -  values_fn = function(x, ...) {
    -     round(mean(x, ...))
    - },
    -  resolution_factor = 1L
    -)
    -
    - -
    -

    Arguments

    -
    data
    -

    a CopyNumber object or a data.frame containing -at least 'chromosome', 'start', 'end', 'segVal', 'sample' these columns.

    - - -
    genome_build
    -

    genome build version, used when data is a data.frame, should be 'hg19' or 'hg38'.

    - - -
    ref_type
    -

    annotation data type used for constructing matrix.

    - - -
    values_fill
    -

    Optionally, a (scalar) value that specifies what each -value should be filled in with when missing.

    -

    This can be a named list if you want to apply different fill values to -different value columns.

    - - -
    values_fn
    -

    Optionally, a function applied to the value in each cell -in the output. You will typically use this when the combination of -id_cols and names_from columns does not uniquely identify an -observation.

    -

    This can be a named list if you want to apply different aggregations -to different values_from columns.

    - - -
    resolution_factor
    -

    an integer to control the resolution. -When it is 1 (default), compute frequency in each cytoband. -When it is 2, use compute frequency in each half cytoband.

    - -
    -
    -

    Value

    - - -

    a data.table.

    -
    - -
    -

    Examples

    -
    # \donttest{
    -load(system.file("extdata", "toy_copynumber.RData",
    -  package = "sigminer", mustWork = TRUE
    -))
    -# Compute the mean segVal in each cytoband
    -x <- transform_seg_table(cn, resolution_factor = 1)
    -x
    -# Compute the mean segVal in each half-cytoband
    -x2 <- transform_seg_table(cn, resolution_factor = 2)
    -x2
    -# }
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/reference/use_color_style.html b/docs/reference/use_color_style.html deleted file mode 100644 index 93291fd3..00000000 --- a/docs/reference/use_color_style.html +++ /dev/null @@ -1,126 +0,0 @@ - -Set Color Style for Plotting — use_color_style • sigminer - - -
    -
    - - - -
    -
    - - -
    -

    Set Color Style for Plotting

    -
    - -
    -
    use_color_style(
    -  style,
    -  mode = c("SBS", "copynumber", "DBS", "ID", "RS"),
    -  method = "Wang"
    -)
    -
    - -
    -

    Arguments

    -
    style
    -

    one of 'default' and 'cosmic'.

    - - -
    mode
    -

    only used when the style is 'cosmic', can be one of -"SBS", "copynumber", "DBS", "ID".

    - - -
    method
    -

    used to set a more custom palette for different methods.

    - -
    -
    -

    Value

    - - -

    color values.

    -
    - -
    -

    Examples

    -
    use_color_style("default")
    -use_color_style("cosmic")
    -
    -
    - -
    - - -
    - - - - - - - - diff --git a/docs/sitemap.xml b/docs/sitemap.xml deleted file mode 100644 index 55de8748..00000000 --- a/docs/sitemap.xml +++ /dev/null @@ -1,345 +0,0 @@ - - - - https://shixiangwang.github.io/sigminer/404.html - - - https://shixiangwang.github.io/sigminer/articles/cnsignature.html - - - https://shixiangwang.github.io/sigminer/articles/index.html - - - https://shixiangwang.github.io/sigminer/articles/sigminer.html - - - https://shixiangwang.github.io/sigminer/authors.html - - - https://shixiangwang.github.io/sigminer/index.html - - - https://shixiangwang.github.io/sigminer/LICENSE-text.html - - - https://shixiangwang.github.io/sigminer/LICENSE.html - - - https://shixiangwang.github.io/sigminer/news/index.html - - - https://shixiangwang.github.io/sigminer/reference/add_h_arrow.html - - - https://shixiangwang.github.io/sigminer/reference/add_labels.html - - - https://shixiangwang.github.io/sigminer/reference/bp.html - - - https://shixiangwang.github.io/sigminer/reference/centromeres.hg19.html - - - https://shixiangwang.github.io/sigminer/reference/centromeres.hg38.html - - - https://shixiangwang.github.io/sigminer/reference/centromeres.mm10.html - - - https://shixiangwang.github.io/sigminer/reference/centromeres.mm9.html - - - https://shixiangwang.github.io/sigminer/reference/chromsize.hg19.html - - - https://shixiangwang.github.io/sigminer/reference/chromsize.hg38.html - - - https://shixiangwang.github.io/sigminer/reference/chromsize.mm10.html - - - https://shixiangwang.github.io/sigminer/reference/chromsize.mm9.html - - - https://shixiangwang.github.io/sigminer/reference/CN.features.html - - - https://shixiangwang.github.io/sigminer/reference/CopyNumber-class.html - - - https://shixiangwang.github.io/sigminer/reference/cosine.html - - - https://shixiangwang.github.io/sigminer/reference/cytobands.hg19.html - - - https://shixiangwang.github.io/sigminer/reference/cytobands.hg38.html - - - https://shixiangwang.github.io/sigminer/reference/cytobands.mm10.html - - - https://shixiangwang.github.io/sigminer/reference/cytobands.mm9.html - - - https://shixiangwang.github.io/sigminer/reference/enrich_component_strand_bias.html - - - https://shixiangwang.github.io/sigminer/reference/get_adj_p.html - - - https://shixiangwang.github.io/sigminer/reference/get_Aneuploidy_score.html - - - https://shixiangwang.github.io/sigminer/reference/get_bayesian_result.html - - - https://shixiangwang.github.io/sigminer/reference/get_cn_freq_table.html - - - https://shixiangwang.github.io/sigminer/reference/get_cn_ploidy.html - - - https://shixiangwang.github.io/sigminer/reference/get_genome_annotation.html - - - https://shixiangwang.github.io/sigminer/reference/get_groups.html - - - https://shixiangwang.github.io/sigminer/reference/get_group_comparison.html - - - https://shixiangwang.github.io/sigminer/reference/get_intersect_size.html - - - https://shixiangwang.github.io/sigminer/reference/get_pLOH_score.html - - - https://shixiangwang.github.io/sigminer/reference/get_shannon_diversity_index.html - - - https://shixiangwang.github.io/sigminer/reference/get_sig_cancer_type_index.html - - - https://shixiangwang.github.io/sigminer/reference/get_sig_db.html - - - https://shixiangwang.github.io/sigminer/reference/get_sig_exposure.html - - - https://shixiangwang.github.io/sigminer/reference/get_sig_feature_association.html - - - https://shixiangwang.github.io/sigminer/reference/get_sig_rec_similarity.html - - - https://shixiangwang.github.io/sigminer/reference/get_sig_similarity.html - - - https://shixiangwang.github.io/sigminer/reference/get_tidy_association.html - - - https://shixiangwang.github.io/sigminer/reference/group_enrichment.html - - - https://shixiangwang.github.io/sigminer/reference/group_enrichment2.html - - - https://shixiangwang.github.io/sigminer/reference/handle_hyper_mutation.html - - - https://shixiangwang.github.io/sigminer/reference/hello.html - - - https://shixiangwang.github.io/sigminer/reference/index.html - - - https://shixiangwang.github.io/sigminer/reference/MAF-class.html - - - https://shixiangwang.github.io/sigminer/reference/output_bootstrap.html - - - https://shixiangwang.github.io/sigminer/reference/output_fit.html - - - https://shixiangwang.github.io/sigminer/reference/output_sig.html - - - https://shixiangwang.github.io/sigminer/reference/output_tally.html - - - https://shixiangwang.github.io/sigminer/reference/pipe.html - - - https://shixiangwang.github.io/sigminer/reference/read_copynumber.html - - - https://shixiangwang.github.io/sigminer/reference/read_copynumber_ascat.html - - - https://shixiangwang.github.io/sigminer/reference/read_copynumber_seqz.html - - - https://shixiangwang.github.io/sigminer/reference/read_maf.html - - - https://shixiangwang.github.io/sigminer/reference/read_sv_as_rs.html - - - https://shixiangwang.github.io/sigminer/reference/read_vcf.html - - - https://shixiangwang.github.io/sigminer/reference/read_xena_variants.html - - - https://shixiangwang.github.io/sigminer/reference/report_bootstrap_p_value.html - - - https://shixiangwang.github.io/sigminer/reference/same_size_clustering.html - - - https://shixiangwang.github.io/sigminer/reference/scoring.html - - - https://shixiangwang.github.io/sigminer/reference/show_catalogue.html - - - https://shixiangwang.github.io/sigminer/reference/show_cn_circos.html - - - https://shixiangwang.github.io/sigminer/reference/show_cn_components.html - - - https://shixiangwang.github.io/sigminer/reference/show_cn_distribution.html - - - https://shixiangwang.github.io/sigminer/reference/show_cn_features.html - - - https://shixiangwang.github.io/sigminer/reference/show_cn_freq_circos.html - - - https://shixiangwang.github.io/sigminer/reference/show_cn_group_profile.html - - - https://shixiangwang.github.io/sigminer/reference/show_cn_profile.html - - - https://shixiangwang.github.io/sigminer/reference/show_cor.html - - - https://shixiangwang.github.io/sigminer/reference/show_cosmic.html - - - https://shixiangwang.github.io/sigminer/reference/show_cosmic_sig_profile.html - - - https://shixiangwang.github.io/sigminer/reference/show_groups.html - - - https://shixiangwang.github.io/sigminer/reference/show_group_comparison.html - - - https://shixiangwang.github.io/sigminer/reference/show_group_distribution.html - - - https://shixiangwang.github.io/sigminer/reference/show_group_enrichment.html - - - https://shixiangwang.github.io/sigminer/reference/show_group_mapping.html - - - https://shixiangwang.github.io/sigminer/reference/show_sig_bootstrap.html - - - https://shixiangwang.github.io/sigminer/reference/show_sig_consensusmap.html - - - https://shixiangwang.github.io/sigminer/reference/show_sig_exposure.html - - - https://shixiangwang.github.io/sigminer/reference/show_sig_feature_corrplot.html - - - https://shixiangwang.github.io/sigminer/reference/show_sig_fit.html - - - https://shixiangwang.github.io/sigminer/reference/show_sig_profile.html - - - https://shixiangwang.github.io/sigminer/reference/show_sig_profile_heatmap.html - - - https://shixiangwang.github.io/sigminer/reference/show_sig_profile_loop.html - - - https://shixiangwang.github.io/sigminer/reference/sigminer-package.html - - - https://shixiangwang.github.io/sigminer/reference/sigminer.html - - - https://shixiangwang.github.io/sigminer/reference/sigprofiler.html - - - https://shixiangwang.github.io/sigminer/reference/sig_auto_extract.html - - - https://shixiangwang.github.io/sigminer/reference/sig_convert.html - - - https://shixiangwang.github.io/sigminer/reference/sig_estimate.html - - - https://shixiangwang.github.io/sigminer/reference/sig_extract.html - - - https://shixiangwang.github.io/sigminer/reference/sig_fit.html - - - https://shixiangwang.github.io/sigminer/reference/sig_fit_bootstrap.html - - - https://shixiangwang.github.io/sigminer/reference/sig_fit_bootstrap_batch.html - - - https://shixiangwang.github.io/sigminer/reference/sig_operation.html - - - https://shixiangwang.github.io/sigminer/reference/sig_tally.html - - - https://shixiangwang.github.io/sigminer/reference/sig_unify_extract.html - - - https://shixiangwang.github.io/sigminer/reference/simulated_catalogs.html - - - https://shixiangwang.github.io/sigminer/reference/simulation.html - - - https://shixiangwang.github.io/sigminer/reference/subset.CopyNumber.html - - - https://shixiangwang.github.io/sigminer/reference/tidyeval.html - - - https://shixiangwang.github.io/sigminer/reference/transcript.hg19.html - - - https://shixiangwang.github.io/sigminer/reference/transcript.hg38.html - - - https://shixiangwang.github.io/sigminer/reference/transcript.mm10.html - - - https://shixiangwang.github.io/sigminer/reference/transcript.mm9.html - - - https://shixiangwang.github.io/sigminer/reference/transform_seg_table.html - - - https://shixiangwang.github.io/sigminer/reference/use_color_style.html - - diff --git a/man/centromeres.T2T.Rd b/man/centromeres.T2T.Rd new file mode 100644 index 00000000..19ede3a8 --- /dev/null +++ b/man/centromeres.T2T.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{centromeres.T2T} +\alias{centromeres.T2T} +\title{Location of Centromeres at Genome Build T2T} +\format{ +A data.frame +} +\source{ +from T2T study +} +\description{ +Location of Centromeres at Genome Build T2T +} +\examples{ +data(centromeres.T2T) +} diff --git a/man/chromsize.T2T.Rd b/man/chromsize.T2T.Rd new file mode 100644 index 00000000..db195165 --- /dev/null +++ b/man/chromsize.T2T.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{chromsize.T2T} +\alias{chromsize.T2T} +\title{Chromosome Size of Genome Build T2T} +\format{ +A data.frame +} +\source{ +from T2T study +} +\description{ +Chromosome Size of Genome Build T2T +} +\examples{ +data(chromsize.T2T) +} diff --git a/man/cytobands.T2T.Rd b/man/cytobands.T2T.Rd new file mode 100644 index 00000000..d708554a --- /dev/null +++ b/man/cytobands.T2T.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{cytobands.T2T} +\alias{cytobands.T2T} +\title{Location of Chromosome Cytobands at Genome Build T2T} +\format{ +A data.frame +} +\source{ +from T2T study +} +\description{ +Location of Chromosome Cytobands at Genome Build T2T +} +\examples{ +data(cytobands.T2T) +} diff --git a/man/transcript.T2T.Rd b/man/transcript.T2T.Rd new file mode 100644 index 00000000..3b4ecf20 --- /dev/null +++ b/man/transcript.T2T.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{transcript.T2T} +\alias{transcript.T2T} +\title{Merged Transcript Location at Genome Build T2T} +\format{ +A \code{data.table} +} +\source{ +from T2T study. +} +\description{ +Merged Transcript Location at Genome Build T2T +} +\examples{ +data(transcript.T2T) +}