From 6c804e2523e4bf5d9afe6bdcfc0ce41f441071da Mon Sep 17 00:00:00 2001 From: etiennebacher Date: Sat, 4 Jan 2025 17:17:57 +0100 Subject: [PATCH] init --- R/000-wrappers.R | 7 +++++++ R/expr-string.R | 21 +++++++++++++++++++++ man/expr_str_to_decimal.Rd | 27 +++++++++++++++++++++++++++ src/init.c | 6 ++++++ src/rust/api.h | 1 + src/rust/src/expr/string.rs | 5 +++++ tests/testthat/test-expr-string.R | 16 ++++++++++++++++ 7 files changed, 83 insertions(+) create mode 100644 man/expr_str_to_decimal.Rd diff --git a/R/000-wrappers.R b/R/000-wrappers.R index 01ac14bd..f36666c4 100644 --- a/R/000-wrappers.R +++ b/R/000-wrappers.R @@ -2644,6 +2644,12 @@ class(`PlRDataType`) <- c("PlRDataType__bundle", "savvy_neopolars__sealed") } } +`PlRExpr_str_to_decimal` <- function(self) { + function(`infer_len`) { + .savvy_wrap_PlRExpr(.Call(savvy_PlRExpr_str_to_decimal__impl, `self`, `infer_len`)) + } +} + `PlRExpr_str_contains` <- function(self) { function(`pat`, `literal`, `strict`) { `pat` <- .savvy_extract_ptr(`pat`, "PlRExpr") @@ -3194,6 +3200,7 @@ class(`PlRDataType`) <- c("PlRDataType__bundle", "savvy_neopolars__sealed") e$`str_zfill` <- `PlRExpr_str_zfill`(ptr) e$`str_pad_end` <- `PlRExpr_str_pad_end`(ptr) e$`str_pad_start` <- `PlRExpr_str_pad_start`(ptr) + e$`str_to_decimal` <- `PlRExpr_str_to_decimal`(ptr) e$`str_contains` <- `PlRExpr_str_contains`(ptr) e$`str_ends_with` <- `PlRExpr_str_ends_with`(ptr) e$`str_starts_with` <- `PlRExpr_str_starts_with`(ptr) diff --git a/R/expr-string.R b/R/expr-string.R index 45c5d59c..1b2c1307 100644 --- a/R/expr-string.R +++ b/R/expr-string.R @@ -420,6 +420,27 @@ expr_str_zfill <- function(alignment) { wrap() } +#' Convert a String column into a Decimal column +#' +#' @description +#' This method infers the needed parameters `precision` and `scale`. +#' +#' @param inference_length Number of elements to parse to determine the +#' `precision` and `scale`. +#' @inherit as_polars_expr return +#' +#' @examples +#' df <- pl$DataFrame( +#' numbers = c( +#' "40.12", "3420.13", "120134.19", "3212.98", +#' "12.90", "143.09", "143.9" +#' ) +#' ) +#' df$with_columns(numbers_decimal = pl$col("numbers")$str$to_decimal()) +expr_str_to_decimal <- function(inference_length = 100) { + self$`_rexpr`$str_to_decimal(inference_length) |> + wrap() +} #' Left justify strings #' diff --git a/man/expr_str_to_decimal.Rd b/man/expr_str_to_decimal.Rd new file mode 100644 index 00000000..4ef3f382 --- /dev/null +++ b/man/expr_str_to_decimal.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expr-string.R +\name{expr_str_to_decimal} +\alias{expr_str_to_decimal} +\title{Convert a String column into a Decimal column} +\usage{ +expr_str_to_decimal(inference_length = 100) +} +\arguments{ +\item{inference_length}{Number of elements to parse to determine the +\code{precision} and \code{scale}.} +} +\value{ +A polars \link{expression} +} +\description{ +This method infers the needed parameters \code{precision} and \code{scale}. +} +\examples{ +df <- pl$DataFrame( + numbers = c( + "40.12", "3420.13", "120134.19", "3212.98", + "12.90", "143.09", "143.9" + ) +) +df$with_columns(numbers_decimal = pl$col("numbers")$str$to_decimal()) +} diff --git a/src/init.c b/src/init.c index 21774650..195d0119 100644 --- a/src/init.c +++ b/src/init.c @@ -2039,6 +2039,11 @@ SEXP savvy_PlRExpr_str_pad_start__impl(SEXP self__, SEXP c_arg__width, SEXP c_ar return handle_result(res); } +SEXP savvy_PlRExpr_str_to_decimal__impl(SEXP self__, SEXP c_arg__infer_len) { + SEXP res = savvy_PlRExpr_str_to_decimal__ffi(self__, c_arg__infer_len); + return handle_result(res); +} + SEXP savvy_PlRExpr_str_contains__impl(SEXP self__, SEXP c_arg__pat, SEXP c_arg__literal, SEXP c_arg__strict) { SEXP res = savvy_PlRExpr_str_contains__ffi(self__, c_arg__pat, c_arg__literal, c_arg__strict); return handle_result(res); @@ -2917,6 +2922,7 @@ static const R_CallMethodDef CallEntries[] = { {"savvy_PlRExpr_str_zfill__impl", (DL_FUNC) &savvy_PlRExpr_str_zfill__impl, 2}, {"savvy_PlRExpr_str_pad_end__impl", (DL_FUNC) &savvy_PlRExpr_str_pad_end__impl, 3}, {"savvy_PlRExpr_str_pad_start__impl", (DL_FUNC) &savvy_PlRExpr_str_pad_start__impl, 3}, + {"savvy_PlRExpr_str_to_decimal__impl", (DL_FUNC) &savvy_PlRExpr_str_to_decimal__impl, 2}, {"savvy_PlRExpr_str_contains__impl", (DL_FUNC) &savvy_PlRExpr_str_contains__impl, 4}, {"savvy_PlRExpr_str_ends_with__impl", (DL_FUNC) &savvy_PlRExpr_str_ends_with__impl, 2}, {"savvy_PlRExpr_str_starts_with__impl", (DL_FUNC) &savvy_PlRExpr_str_starts_with__impl, 2}, diff --git a/src/rust/api.h b/src/rust/api.h index d39fb62b..25600543 100644 --- a/src/rust/api.h +++ b/src/rust/api.h @@ -409,6 +409,7 @@ SEXP savvy_PlRExpr_str_strip_chars_start__ffi(SEXP self__, SEXP c_arg__matches); SEXP savvy_PlRExpr_str_zfill__ffi(SEXP self__, SEXP c_arg__alignment); SEXP savvy_PlRExpr_str_pad_end__ffi(SEXP self__, SEXP c_arg__width, SEXP c_arg__fillchar); SEXP savvy_PlRExpr_str_pad_start__ffi(SEXP self__, SEXP c_arg__width, SEXP c_arg__fillchar); +SEXP savvy_PlRExpr_str_to_decimal__ffi(SEXP self__, SEXP c_arg__infer_len); SEXP savvy_PlRExpr_str_contains__ffi(SEXP self__, SEXP c_arg__pat, SEXP c_arg__literal, SEXP c_arg__strict); SEXP savvy_PlRExpr_str_ends_with__ffi(SEXP self__, SEXP c_arg__sub); SEXP savvy_PlRExpr_str_starts_with__ffi(SEXP self__, SEXP c_arg__sub); diff --git a/src/rust/src/expr/string.rs b/src/rust/src/expr/string.rs index b9da2644..f8c4f9f2 100644 --- a/src/rust/src/expr/string.rs +++ b/src/rust/src/expr/string.rs @@ -80,6 +80,11 @@ impl PlRExpr { Ok(self.inner.clone().str().pad_start(width, fillchar).into()) } + fn str_to_decimal(&self, infer_len: NumericScalar) -> Result { + let infer_len = >::try_from(infer_len)?.0; + Ok(self.inner.clone().str().to_decimal(infer_len).into()) + } + fn str_contains(&self, pat: &PlRExpr, literal: bool, strict: bool) -> Result { if literal { Ok(self diff --git a/tests/testthat/test-expr-string.R b/tests/testthat/test-expr-string.R index fb92e107..5c59e382 100644 --- a/tests/testthat/test-expr-string.R +++ b/tests/testthat/test-expr-string.R @@ -1008,3 +1008,19 @@ test_that("$str$extract_many works", { pl$DataFrame(values = list("disco", c("rhap", "ody"))) ) }) + +# TODO: uncomment when https://github.com/pola-rs/polars/issues/20556 is solved +# test_that("to_decimal", { +# df <- pl$DataFrame( +# x = c( +# "40.12", "3420.13", "120134.19", "3212.98", +# "12.90", "143.09", "143.9" +# ) +# ) +# expect_equal( +# df$select(pl$col("x")$str$to_decimal()), +# pl$DataFrame(x = c( +# 40.12, 3420.13, 120134.19, 3212.98, 12.90, 143.09, 143.9 +# ), .schema_overrides = list(x = pl$Decimal(scale = 2))) +# ) +# })