Skip to content

Commit

Permalink
Merge pull request #1652 from rstudio/feature/as.raw
Browse files Browse the repository at this point in the history
Python bytes methods: add `as.raw()`, add `nul` arg to `as.character()`
  • Loading branch information
t-kalinowski authored Aug 22, 2024
2 parents c0fdb1c + ebf27d2 commit 588ff09
Show file tree
Hide file tree
Showing 6 changed files with 97 additions and 12 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ S3method(as.data.frame,polars.dataframe.frame.DataFrame)
S3method(as.double,numpy.ndarray)
S3method(as.environment,python.builtin.object)
S3method(as.matrix,numpy.ndarray)
S3method(as.raw,python.builtin.bytes)
S3method(as.vector,numpy.ndarray)
S3method(dim,pandas.core.frame.DataFrame)
S3method(dim,pandas.core.series.Series)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@

- Fixes for CRAN check failures (#1645)

- New `as.raw()` method for `python.builtin.bytes` (#1649, #1652)

- `as.character()` method for `python.builtin.bytes` gains a `nul` argument,
allowing for convenient handling of embedded NULs in the string. (#1652)

# reticulate 1.38.0

- Python Exceptions converted to R conditions are now R lists instead
Expand Down
54 changes: 49 additions & 5 deletions R/python.R
Original file line number Diff line number Diff line change
Expand Up @@ -84,19 +84,61 @@ as.character.python.builtin.object <- function(x, ...) {
py_str(x)
}

#' Convert Python bytes to an R character vector
#' Convert Python bytes to an R character or raw vector
#'
#' @inheritParams base::as.character
#'
#' @param encoding Encoding to use for conversion (defaults to utf-8)
#' @param errors Policy for handling conversion errors. Default is 'strict'
#' which raises an error. Other possible values are 'ignore' and 'replace'.
#'
#' @export
as.character.python.builtin.bytes <- function(x, encoding = "utf-8", errors = "strict", ...) {
x$decode(encoding = encoding, errors = errors)
#' @param nul Action to take if the bytes contain an embedded NUL (`\x00`).
#' Python allows embedded `NUL`s in strings, while R does not. There are four
#' options for handling embedded `NUL`s:
#'
#' 1. Error: This is the default
#' 2. Replace: Supply a replacement string: `nul = "<NUL>"`
#' 3. Remove: Supply an empty string: `nul = ""`
#' 4. Split: Supply an R `NULL` to indicate that string should be split at embedded `NUL` bytes: `nul = NULL`
#'
#' @export
#' @examplesIf reticulate::py_available()
#' # A bytes object with embedded NULLs
#' b <- import_builtins(convert = FALSE)$bytes(
#' as.raw(c(0x61, 0x20, 0x62, 0x00, 0x63, 0x20, 0x64)) # "a b<NUL>c d"
#' )
#'
#' try(as.character(b)) # Error : Embedded NUL in string.
#' as.character(b, nul = "<NUL>") # Replace: "a b<NUL>c d"
#' as.character(b, nul = "") # Remove: "a bc d"
#' as.character(b, nul = NULL) # Split: "a b" "c d"
as.character.python.builtin.bytes <-
function(x, encoding = "utf-8", errors = "strict",
nul = stop("Embedded NUL in string."), ...) {
local_conversion_scope(x, TRUE)
if(missing(nul))
# will throw an error if bytes contain embedded nul
x$decode(encoding = encoding, errors = errors)

else if(is.null(nul)) {
# split string at embedded nulls.
vapply(x$split(import("builtins")$bytes(list(0L))),
function(slice) slice$decode(encoding = encoding, errors = errors),
"")

} else {
# replace embedded nulls with supplied string
bt <- import("builtins", convert = FALSE)
nul <- bt$str(as.character(nul))$encode()
x$replace(bt$bytes(list(0L)), nul)$decode(encoding = encoding, errors = errors)
}

}

#' @export
#' @rdname as.character.python.builtin.bytes
as.raw.python.builtin.bytes <- function(x) {
import_builtins()$bytearray(x)
}

.operators <- new.env(parent = emptyenv())

Expand Down Expand Up @@ -943,6 +985,8 @@ py_list_attributes <- function(x) {
#'
#' @details The default implementation will call `PyObject_Str` on the object.
#'
#' @seealso [as.character.python.builtin.bytes()] For discussion on dealing with embedded `NUL`s in Python strings.
#'
#' @export
py_str <- function(object, ...) {
if (!is_py_object(object))
Expand Down
38 changes: 35 additions & 3 deletions man/as.character.python.builtin.bytes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions man/py_str.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions src/python.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1703,12 +1703,12 @@ SEXP py_to_r_cpp(PyObject* x, bool convert, bool simple) {
// bytearray
if (PyByteArray_Check(x)) {

if (PyByteArray_Size(x) == 0)
auto size = PyByteArray_Size(x);
if (size == 0)
return RawVector();

return RawVector(
PyByteArray_AsString(x),
PyByteArray_AsString(x) + PyByteArray_Size(x));
char* data = PyByteArray_AsString(x);
return RawVector(data, data + size);

}

Expand Down

0 comments on commit 588ff09

Please sign in to comment.