update readme

qddyy · Dec 25, 2024 · faa4317 · faa4317
1 parent 2fec513
commit faa4317
Show file tree

Hide file tree

Showing 29 changed files with 306 additions and 162 deletions.
diff --git a/README.Rmd b/README.Rmd
@@ -23,7 +23,8 @@ options(
     asciicast_knitr_svg = TRUE,
     asciicast_padding_y = 0,
     asciicast_start_wait = 0,
-    asciicast_end_wait = 1
+    asciicast_end_wait = 1,
+    asciicast_timeout = Inf
 )
 ```
 
@@ -58,7 +59,6 @@ remotes::install_github("qddyy/LearnNonparam")
 
 ```{r, library, eval = FALSE}
 library(LearnNonparam)
-options(LearnNonparam.pmt_progress = TRUE)
 ```
 
 - Construct a test object
@@ -130,34 +130,80 @@ LearnNonparam::pmts()
 ```
 </details>
 
-`define_pmt` allows users to define new permutation tests. Take the two-sample Cramér-Von Mises test as an example:
+## Extending
 
-```{asciicast, define}
-t <- define_pmt(
+The `define_pmt` function allows users to define new permutation tests. Take the two-sample Wilcoxon test as an example:
+
+```{asciicast, define_r}
+t_custom <- define_pmt(
     # this is a two-sample permutation test
     inherit = "twosample",
     statistic = function(x, y) {
         # (optional) pre-calculate certain constants that remain invariant during permutation
-        n_x <- length(x)
-        n_y <- length(y)
-        F_x <- seq_len(n_x) / n_x
-        G_y <- seq_len(n_y) / n_y
+        m <- length(x)
+        n <- length(y)
         # return a closure to calculate the test statistic
-        function(x, y) {
-            x <- sort.int(x)
-            y <- sort.int(y)
-            F <- approxfun(x, F_x, "constant", 0, 1)
-            G <- approxfun(y, G_y, "constant", 0, 1)
-            sum(c(F_x - G(x), G_y - F(y))^2)
-        }
+        function(x, y) sum(x) / m - sum(y) / n
     },
-    # reject the null hypothesis when the test statistic is large
-    rejection = "r",
-    name = "Two-Sample Cramér-Von Mises Test",
-    alternative = "samples are from different continuous distributions"
+    # reject the null hypothesis when the test statistic is too large or too small
+    rejection = "lr", n_permu = 1e5
+)
+```
+
+Also, the statistic can be written in C++. Leveraging Rcpp sugars and C++14 features, only minor modifications are needed to make it compatible with C++ syntax.
+
+```{asciicast, define_cpp}
+t_cpp <- define_pmt(
+    inherit = "twosample", rejection = "lr", n_permu = 1e5,
+    statistic = "[](const auto& x, const auto& y) {
+        auto m = x.length();
+        auto n = y.length();
+        return [=](const auto& x, const auto& y) {
+            return sum(x) / m - sum(y) / n;
+        };
+    }"
+)
+```
+
+The following demonstrates that `t_custom` and `t_cpp` are equivalent:
+
+```{asciicast, prepare_data}
+x <- rnorm(10, mean = 0)
+y <- rnorm(10, mean = 5)
+```
+
+```{asciicast, t_custom_res}
+set.seed(0)
+t_custom$test(x, y)$print()
+```
+
+```{asciicast, t_cpp_res}
+set.seed(0)
+t_cpp$test(x, y)$print()
+```
+
+## Performance
+
+The `coin` package is a commonly used R package for performing permutation tests. Below is a benchmark comparing the computational efficiency of `coin` and `LearnNonparam`:
+
+```{asciicast, benchmark}
+library(coin)
+
+data <- c(x, y)
+group <- factor(c(rep("x", length(x)), rep("y", length(y))))
+
+options(LearnNonparam.pmt_progress = FALSE)
+benchmark <- microbenchmark::microbenchmark(
+    R = t_custom$test(x, y),
+    Rcpp = t_cpp$test(x, y),
+    coin = wilcox_test(data ~ group, distribution = approximate(nresample = 1e5, parallel = "no"))
 )
+```
 
-t$test(rnorm(10), runif(10))$print()
+```{asciicast, benchmark_res}
+benchmark
 ```
 
+It can be seen that C++ brings significantly better performance than pure R, even surpassing the `coin` package. However, all tests in this package are currently written in R with no plans for migration to C++ in the future. This is because the primary goal of this package is not to maximize performance but to offer a flexible framework for permutation tests.
+
 ## References
diff --git a/README.md b/README.md
@@ -41,7 +41,6 @@ remotes::install_github("qddyy/LearnNonparam")
 
 ``` r
 library(LearnNonparam)
-options(LearnNonparam.pmt_progress = TRUE)
 ```
 
 - Construct a test object
@@ -162,42 +161,127 @@ See <code>pmts()</code> for tests implemented in this package.
 
 </details>
 
-`define_pmt` allows users to define new permutation tests. Take the
-two-sample Cramér-Von Mises test as an example:
+## Extending
+
+The `define_pmt` function allows users to define new permutation tests.
+Take the two-sample Wilcoxon test as an example:
 
 ``` r
-t <- define_pmt(
+t_custom <- define_pmt(
     # this is a two-sample permutation test
     inherit = "twosample",
     statistic = function(x, y) {
         # (optional) pre-calculate certain constants that remain invariant during permutation
-        n_x <- length(x)
-        n_y <- length(y)
-        F_x <- seq_len(n_x) / n_x
-        G_y <- seq_len(n_y) / n_y
+        m <- length(x)
+        n <- length(y)
         # return a closure to calculate the test statistic
-        function(x, y) {
-            x <- sort.int(x)
-            y <- sort.int(y)
-            F <- approxfun(x, F_x, "constant", 0, 1)
-            G <- approxfun(y, G_y, "constant", 0, 1)
-            sum(c(F_x - G(x), G_y - F(y))^2)
-        }
+        function(x, y) sum(x) / m - sum(y) / n
     },
-    # reject the null hypothesis when the test statistic is large
-    rejection = "r",
-    name = "Two-Sample Cramér-Von Mises Test",
-    alternative = "samples are from different continuous distributions"
+    # reject the null hypothesis when the test statistic is too large or too small
+    rejection = "lr", n_permu = 1e5
 )
+```
+
+<picture>
+<source media="(prefers-color-scheme: dark)" srcset="man/figures/README/define_r-dark.svg">
+<img src="man/figures/README/define_r.svg" width="100%" style="display: block; margin: auto;" />
+</picture>
+
+Also, the statistic can be written in C++. Leveraging Rcpp sugars and
+C++14 features, only minor modifications are needed to make it
+compatible with C++ syntax.
 
-t$test(rnorm(10), runif(10))$print()
+``` r
+t_cpp <- define_pmt(
+    inherit = "twosample", rejection = "lr", n_permu = 1e5,
+    statistic = "[](const auto& x, const auto& y) {
+        auto m = x.length();
+        auto n = y.length();
+        return [=](const auto& x, const auto& y) {
+            return sum(x) / m - sum(y) / n;
+        };
+    }"
+)
 ```
 
 <picture>
-<source media="(prefers-color-scheme: dark)" srcset="man/figures/README/define-dark.svg">
-<img src="man/figures/README/define.svg" width="100%" style="display: block; margin: auto;" />
+<source media="(prefers-color-scheme: dark)" srcset="man/figures/README/define_cpp-dark.svg">
+<img src="man/figures/README/define_cpp.svg" width="100%" style="display: block; margin: auto;" />
 </picture>
 
+The following demonstrates that `t_custom` and `t_cpp` are equivalent:
+
+``` r
+x <- rnorm(10, mean = 0)
+y <- rnorm(10, mean = 5)
+```
+
+<picture>
+<source media="(prefers-color-scheme: dark)" srcset="man/figures/README/prepare_data-dark.svg">
+<img src="man/figures/README/prepare_data.svg" width="100%" style="display: block; margin: auto;" />
+</picture>
+
+``` r
+set.seed(0)
+t_custom$test(x, y)$print()
+```
+
+<picture>
+<source media="(prefers-color-scheme: dark)" srcset="man/figures/README/t_custom_res-dark.svg">
+<img src="man/figures/README/t_custom_res.svg" width="100%" style="display: block; margin: auto;" />
+</picture>
+
+``` r
+set.seed(0)
+t_cpp$test(x, y)$print()
+```
+
+<picture>
+<source media="(prefers-color-scheme: dark)" srcset="man/figures/README/t_cpp_res-dark.svg">
+<img src="man/figures/README/t_cpp_res.svg" width="100%" style="display: block; margin: auto;" />
+</picture>
+
+## Performance
+
+The `coin` package is a commonly used R package for performing
+permutation tests. Below is a benchmark comparing the computational
+efficiency of `coin` and `LearnNonparam`:
+
+``` r
+library(coin)
+
+data <- c(x, y)
+group <- factor(c(rep("x", length(x)), rep("y", length(y))))
+
+options(LearnNonparam.pmt_progress = FALSE)
+benchmark <- microbenchmark::microbenchmark(
+    R = t_custom$test(x, y),
+    Rcpp = t_cpp$test(x, y),
+    coin = wilcox_test(data ~ group, distribution = approximate(nresample = 1e5, parallel = "no"))
+)
+```
+
+<picture>
+<source media="(prefers-color-scheme: dark)" srcset="man/figures/README/benchmark-dark.svg">
+<img src="man/figures/README/benchmark.svg" width="100%" style="display: block; margin: auto;" />
+</picture>
+
+``` r
+benchmark
+```
+
+<picture>
+<source media="(prefers-color-scheme: dark)" srcset="man/figures/README/benchmark_res-dark.svg">
+<img src="man/figures/README/benchmark_res.svg" width="100%" style="display: block; margin: auto;" />
+</picture>
+
+It can be seen that C++ brings significantly better performance than
+pure R, even surpassing the `coin` package. However, all tests in this
+package are currently written in R with no plans for migration to C++ in
+the future. This is because the primary goal of this package is not to
+maximize performance but to offer a flexible framework for permutation
+tests.
+
 ## References
 
 <div id="refs" class="references csl-bib-body hanging-indent">

diff --git a/man/figures/README/benchmark-dark.svg b/man/figures/README/benchmark-dark.svg
diff --git a/man/figures/README/benchmark.svg b/man/figures/README/benchmark.svg
diff --git a/man/figures/README/benchmark_res-dark.svg b/man/figures/README/benchmark_res-dark.svg
diff --git a/man/figures/README/benchmark_res.svg b/man/figures/README/benchmark_res.svg
diff --git a/man/figures/README/define_cpp-dark.svg b/man/figures/README/define_cpp-dark.svg
diff --git a/man/figures/README/define_cpp.svg b/man/figures/README/define_cpp.svg
diff --git a/man/figures/README/define_r-dark.svg b/man/figures/README/define_r-dark.svg
diff --git a/man/figures/README/define_r.svg b/man/figures/README/define_r.svg