From 57d0f2d1679ca23ad96f7fb5d0b55e00dafb2963 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Tue, 12 Nov 2024 14:13:26 +0100 Subject: [PATCH 01/29] Handle loading CSV and TSV files Restructure loading to allow more file types --- R/Artifact.R | 15 +++++---------- R/file_handlers.R | 48 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 10 deletions(-) create mode 100644 R/file_handlers.R diff --git a/R/Artifact.R b/R/Artifact.R index 0f0cb16..9a2d061 100644 --- a/R/Artifact.R +++ b/R/Artifact.R @@ -9,21 +9,16 @@ ArtifactRecord <- R6::R6Class( # nolint object_name_linter inherit = Record, public = list( #' @description - #' Load the artifact into memory. This currently only supports AnnData - #' artifacts. + #' Load the artifact into memory. #' #' @return The artifact load = function() { - artifact_accessor <- private$get_value("_accessor") - file_path <- self$cache() - if (artifact_accessor == "AnnData") { - check_requires("Loading AnnData objects", "anndata") - anndata::read_h5ad(file_path) - } else { - cli_abort(paste0("Unsupported accessor: ", artifact_accessor)) - } + suffix <- private$get_value("suffix") + file_loader <- get_file_loader(suffix) + + file_loader(file_path) }, #' @description #' Cache the artifact to the local filesystem. This currently only supports diff --git a/R/file_handlers.R b/R/file_handlers.R new file mode 100644 index 0000000..69d9314 --- /dev/null +++ b/R/file_handlers.R @@ -0,0 +1,48 @@ +#' Get file loader +#' +#' Get the correct file loader function based on a file suffix +#' +#' @param suffix String giving a file suffix +#' +#' @return Function that can be used to load the file +#' @noRd +get_file_loader <- function(suffix) { + switch (suffix, + ".h5ad" = load_h5ad, + ".csv" = load_csv, + ".tsv" = load_tsv, + cli::cli_abort("Loading files with suffix {suffix} is not supported") + ) +} + +#' Load a H5AD file +#' +#' @param file Path to the file to load +#' +#' @return An `anndata::AnnDataR6` object +#' @noRd +load_h5ad <- function(file) { + check_requires("Loading AnnData objects", "anndata") + + anndata::read_h5ad(file) +} + +#' Load a CSV file +#' +#' @param file Path to the file to load +#' +#' @return A `data.frame` +#' @noRd +load_csv <- function(file) { + read.csv(file) +} + +#' Load a TSV file +#' +#' @param file Path to the file to load +#' +#' @return A `data.frame` +#' @noRd +load_tsv <- function(file) { + read.delim(file) +} From a06ad13cadaa262e50c9ee4124d0a6ac3f4ff3c4 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Tue, 12 Nov 2024 16:31:03 +0100 Subject: [PATCH 02/29] Add from_df() method to Registry --- R/Registry.R | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/R/Registry.R b/R/Registry.R index be0c1b8..b0ab1ec 100644 --- a/R/Registry.R +++ b/R/Registry.R @@ -138,6 +138,32 @@ Registry <- R6::R6Class( # nolint object_name_linter # Bind entries as rows list_rbind() }, + from_df = function(data_frame, key = NULL, description = NULL, run = NULL) { + if (private$.registry_name != "artifact") { + cli::cli_abort( + "Creating records from data frames is only supported for the Artifact registry" + ) + } + + check_requires("Creating records from data frames", "reticulate") + + py_lamin <- reticulate::import("lamindb") + + instance_settings <- private$.instance$get_settings() + system2("lamin", "lamin settings set auto-connect false") + py_lamin$connect( + paste0(instance_settings$owner, "/", instance_settings$name) + ) + + py_record <- py_lamin$Artifact$from_df( + data_frame, key = key, description = description, run = run + ) + + record_df <- reticulate::py_to_r(py_record$df()) + + record_class <- self$get_record_class() + record_class$new(as.list(record_df)) + }, #' @description #' Get the fields in the registry. #' From 9513d17fbd5d1f630776e14302f2d13b1af0b169 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Wed, 13 Nov 2024 17:14:59 +0100 Subject: [PATCH 03/29] Modify Record printing to avoid API calls --- R/Record.R | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/R/Record.R b/R/Record.R index 605ea32..e951257 100644 --- a/R/Record.R +++ b/R/Record.R @@ -120,12 +120,28 @@ Record <- R6::R6Class( # nolint object_name_linter "key" ) - record_fields <- private$.api$get_record( - module_name = private$.registry$module$name, - registry_name = private$.registry$name, - id_or_uid = private$.data[["uid"]], - include_foreign_keys = TRUE - ) + expected_fields <- private$.registry$get_fields() |> + discard(~ is.null(.x$column_name)) |> + map_chr("column_name") + + record_fields <- map(names(expected_fields), function(.field) { + value <- tryCatch( + self[[.field]], + error = function(err) { + if (!grepl("status code 404", conditionMessage(err))) { + cli::abort(conditionMessage(err)) + } + NULL + } + ) + + if (inherits(value, "Record")) { + value <- value$id + } + + value + }) |> + setNames(expected_fields) # Get the important fields that are in the record important_fields <- intersect(important_fields, names(record_fields)) From 4d2351ffb2ee2cf0133c71661a8e0ce7d3075507 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Wed, 13 Nov 2024 18:09:49 +0100 Subject: [PATCH 04/29] Recursively create records in Registry$from_df() --- R/Registry.R | 48 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/R/Registry.R b/R/Registry.R index b0ab1ec..a448bcd 100644 --- a/R/Registry.R +++ b/R/Registry.R @@ -138,7 +138,7 @@ Registry <- R6::R6Class( # nolint object_name_linter # Bind entries as rows list_rbind() }, - from_df = function(data_frame, key = NULL, description = NULL, run = NULL) { + from_df = function(dataframe, key = NULL, description = NULL, run = NULL) { if (private$.registry_name != "artifact") { cli::cli_abort( "Creating records from data frames is only supported for the Artifact registry" @@ -150,19 +150,16 @@ Registry <- R6::R6Class( # nolint object_name_linter py_lamin <- reticulate::import("lamindb") instance_settings <- private$.instance$get_settings() - system2("lamin", "lamin settings set auto-connect false") + system2("lamin", "settings set auto-connect false") py_lamin$connect( paste0(instance_settings$owner, "/", instance_settings$name) ) py_record <- py_lamin$Artifact$from_df( - data_frame, key = key, description = description, run = run + dataframe, key = key, description = description, run = run ) - record_df <- reticulate::py_to_r(py_record$df()) - - record_class <- self$get_record_class() - record_class$new(as.list(record_df)) + create_record_from_python(py_record, private$.instance) }, #' @description #' Get the fields in the registry. @@ -372,3 +369,40 @@ Registry <- R6::R6Class( # nolint object_name_linter } ) ) + +create_record_from_python <- function(py_record, instance) { + + py_classes <- class(py_record) + + # Skip related fields for now + if ("django.db.models.manager.Manager" %in% py_classes) { + return(NULL) + } + + class_split <- strsplit(py_classes[1], "\\.")[[1]] + module_name <- class_split[1] + if (module_name == "lnschema_core") { + module_name <- "core" + } + registry_name <- tolower(class_split[3]) + + registry <- instance$get_module(module_name)$get_registry(registry_name) + fields <- registry$get_field_names() + + record_list <- map(fields, function(.field) { + value <- tryCatch( + py_record[[.field]], + error = function(err) { + NULL + } + ) + if (inherits(value, "lnschema_core.models.Record")) { + value <- create_record_from_python(value, instance) + } + value + }) |> + setNames(fields) + + record_class <- registry$get_record_class() + suppressWarnings(record_class$new(record_list)) +} From d27dd95d7271690ff017a7a7437d802af18db159 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Thu, 14 Nov 2024 08:18:29 +0100 Subject: [PATCH 05/29] Add temporary record classes with saving --- R/Registry.R | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/R/Registry.R b/R/Registry.R index a448bcd..d400234 100644 --- a/R/Registry.R +++ b/R/Registry.R @@ -404,5 +404,40 @@ create_record_from_python <- function(py_record, instance) { setNames(fields) record_class <- registry$get_record_class() - suppressWarnings(record_class$new(record_list)) + temp_record_class <- create_temporary_record_class(record_class) + + suppressWarnings(temp_record_class$new(record_class, py_record, record_list)) +} + +create_temporary_record_class <- function(record_class) { + R6::R6Class( + paste0("Temporary", record_class$classname), + cloneable = FALSE, + inherit = record_class, + public = list( + initialize = function(record_class, py_record, data) { + private$.record_class <- record_class + private$.py_record <- py_record + + super$initialize(data) + }, + save = function() { + private$.py_record$save() + private$.registry$get(self$uid) + }, + #' @description + #' Print a `TemporaryRecord` + #' + #' @param style Logical, whether the output is styled using ANSI codes + print = function(style = TRUE) { + cat("!!! TEMPORARY RECORD !!!") + cat("\n\n") + super$print() + } + ), + private = list( + .record_class = NULL, + .py_record = NULL + ) + ) } From 433e278c81acce251c4a7dbb8b427c684c4f0c62 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Thu, 14 Nov 2024 10:22:48 +0100 Subject: [PATCH 06/29] Overwrite data after saving temporary record --- R/Registry.R | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/R/Registry.R b/R/Registry.R index d400234..a50e41f 100644 --- a/R/Registry.R +++ b/R/Registry.R @@ -422,22 +422,38 @@ create_temporary_record_class <- function(record_class) { super$initialize(data) }, save = function() { + if (isTRUE(private$.saved)) { + cli::cli_abort("This record has already been saved to the database") + } + private$.py_record$save() - private$.registry$get(self$uid) + + # Replace temporary data with data saved to the database + private$.data <- private$.api$get_record( + module_name = private$.registry$module$name, + registry_name = private$.registry$name, + id_or_uid = self$uid + ) + + private$.saved <- TRUE }, #' @description #' Print a `TemporaryRecord` #' #' @param style Logical, whether the output is styled using ANSI codes print = function(style = TRUE) { - cat("!!! TEMPORARY RECORD !!!") - cat("\n\n") + if (isFALSE(private$.saved)) { + cat("!!! TEMPORARY RECORD !!!") + cat("\n\n") + } + super$print() } ), private = list( .record_class = NULL, - .py_record = NULL + .py_record = NULL, + .saved = FALSE ) ) } From ccba12133cc1b4f376b8f0218885d90930061d13 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Thu, 14 Nov 2024 11:17:49 +0100 Subject: [PATCH 07/29] Create a default instance with connect(slug=NULL) --- R/Instance.R | 28 ++++++++++++++++++++++------ R/Registry.R | 7 +++++++ R/connect.R | 24 +++++++++++++++++++++++- 3 files changed, 52 insertions(+), 7 deletions(-) diff --git a/R/Instance.R b/R/Instance.R index f015da5..72723a9 100644 --- a/R/Instance.R +++ b/R/Instance.R @@ -1,4 +1,4 @@ -create_instance <- function(instance_settings) { +create_instance <- function(instance_settings, is_default) { super <- NULL # satisfy linter api <- InstanceAPI$new(instance_settings = instance_settings) @@ -46,11 +46,12 @@ create_instance <- function(instance_settings) { cloneable = FALSE, inherit = Instance, public = list( - initialize = function(settings, api, schema) { + initialize = function(settings, api, schema, is_default) { super$initialize( settings = settings, api = api, - schema = schema + schema = schema, + is_default = is_default ) } ), @@ -58,7 +59,12 @@ create_instance <- function(instance_settings) { ) # create the instance - RichInstance$new(settings = instance_settings, api = api, schema = schema) + RichInstance$new( + settings = instance_settings, + api = api, + schema = schema, + is_default = is_default + ) } #' @title Instance @@ -103,9 +109,11 @@ Instance <- R6::R6Class( # nolint object_name_linter #' @param settings The settings for the instance #' @param api The API for the instance #' @param schema The schema for the instance - initialize = function(settings, api, schema) { + #' @param is_default Logical, whether this is the default instance + initialize = function(settings, api, schema, is_default) { private$.settings <- settings private$.api <- api + private$.is_default <- is_default # create module classes from the schema private$.module_classes <- map( @@ -246,9 +254,17 @@ Instance <- R6::R6Class( # nolint object_name_linter ) } ), + active = list( + #' @field type (`logical(1)`)\cr + #' Whether this is the default instance. + is_default = function() { + private$.is_default + } + ), private = list( .settings = NULL, .api = NULL, - .module_classes = NULL + .module_classes = NULL, + .is_default = NULL ) ) diff --git a/R/Registry.R b/R/Registry.R index a50e41f..1d83d5c 100644 --- a/R/Registry.R +++ b/R/Registry.R @@ -139,6 +139,13 @@ Registry <- R6::R6Class( # nolint object_name_linter list_rbind() }, from_df = function(dataframe, key = NULL, description = NULL, run = NULL) { + if (isFALSE(private$.instance$is_default)) { + cli::cli_abort(c( + "Only the default instance can create records", + "i" = "Use {.code connect(slug = NULL)} to connect to the default instance" + )) + } + if (private$.registry_name != "artifact") { cli::cli_abort( "Creating records from data frames is only supported for the Artifact registry" diff --git a/R/connect.R b/R/connect.R index 7fc5594..15a295b 100644 --- a/R/connect.R +++ b/R/connect.R @@ -80,7 +80,29 @@ connect <- function(slug = NULL) { } } - create_instance(instance_settings = instance_settings) + is_default <- FALSE + if (is.null(slug)) { + instance_slug <- paste0( + instance_settings$owner, "/", name = instance_settings$name + ) + current_default <- getOption("LAMINR_DEFAULT_INSTANCE") + if (!is.null(current_default)) { + if (!identical(instance_slug, current_default)) { + cli::cli_abort(c( + "There is already a default instance {.field {current_default}}", + "i" = "To connect to another instance provide a slug" + )) + } + } else { + options(LAMINR_DEFAULT_INSTANCE = instance_slug) + } + is_default <- TRUE + } + + create_instance( + instance_settings = instance_settings, + is_default = is_default + ) } # nolint start: object_length_linter From d0bfe5a7cee50272675d13c38b1c80878216aaf2 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Thu, 14 Nov 2024 12:34:47 +0100 Subject: [PATCH 08/29] Adjust check_requires to output warnings --- R/utils.R | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/R/utils.R b/R/utils.R index c99b9b7..a696565 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,32 +1,40 @@ #' Check required packages #' -#' Check that required packages are available and give a nice error message with +#' Check that required packages are available and give a nice message with #' install instructions if not #' #' @param what A message stating what the packages are required for. Used at the #' start of the error message e.g. "{what} requires...". #' @param requires Character vector of required package names +#' @param type Type of message to give if packages are missing #' -#' @return `TRUE` invisibly if all packages are available, otherwise calls -#' [cli::cli_abort()] +#' @return Invisibly, Boolean whether or not all packages are available or +#' raises an error if any are missing and `type = "error"` #' @noRd -check_requires <- function(what, requires) { +check_requires <- function(what, requires, type = c("error", "warning")) { + type <- match.arg(type) + is_available <- map_lgl(requires, requireNamespace, quietly = TRUE) + msg_fun <- switch (type, + error = cli::cli_abort, + warning = cli::cli_warn + ) + if (any(!is_available)) { missing <- requires[!is_available] missing_str <- paste0("'", paste(missing, collapse = "', '"), "'") # nolint object_usage_linter - cli_abort( + msg_fun( c( "{what} requires the {.pkg {missing}} package{?s}", "i" = paste( - "To continue, install {cli::qty(missing)}{?it/them} using", - "{.code install.packages(c({missing_str}))}" + "Install {cli::qty(missing)}{?it/them} using", + "{.run install.packages(c({missing_str}))}" ) ), call = rlang::caller_env() ) } - invisible(TRUE) + invisible(all(is_available)) } From ca4eca999802abf26dee0b5648fb11102fb7455c Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Thu, 14 Nov 2024 12:35:13 +0100 Subject: [PATCH 09/29] Attempt to load Python lamin in connect() Gives a warning if it's not available. Module object is stored in the instance for further use. --- R/Instance.R | 21 +++++++++++++++------ R/Registry.R | 17 ++++++++--------- R/connect.R | 22 +++++++++++++++++++++- 3 files changed, 44 insertions(+), 16 deletions(-) diff --git a/R/Instance.R b/R/Instance.R index 72723a9..490eaa0 100644 --- a/R/Instance.R +++ b/R/Instance.R @@ -1,4 +1,4 @@ -create_instance <- function(instance_settings, is_default) { +create_instance <- function(instance_settings, is_default = FALSE, py_lamin = NULL) { super <- NULL # satisfy linter api <- InstanceAPI$new(instance_settings = instance_settings) @@ -46,12 +46,13 @@ create_instance <- function(instance_settings, is_default) { cloneable = FALSE, inherit = Instance, public = list( - initialize = function(settings, api, schema, is_default) { + initialize = function(settings, api, schema, is_default, py_lamin) { super$initialize( settings = settings, api = api, schema = schema, - is_default = is_default + is_default = is_default, + py_lamin = py_lamin ) } ), @@ -63,7 +64,8 @@ create_instance <- function(instance_settings, is_default) { settings = instance_settings, api = api, schema = schema, - is_default = is_default + is_default = is_default, + py_lamin = py_lamin ) } @@ -110,10 +112,11 @@ Instance <- R6::R6Class( # nolint object_name_linter #' @param api The API for the instance #' @param schema The schema for the instance #' @param is_default Logical, whether this is the default instance - initialize = function(settings, api, schema, is_default) { + initialize = function(settings, api, schema, is_default, py_lamin) { private$.settings <- settings private$.api <- api private$.is_default <- is_default + private$.py_lamin <- py_lamin # create module classes from the schema private$.module_classes <- map( @@ -259,12 +262,18 @@ Instance <- R6::R6Class( # nolint object_name_linter #' Whether this is the default instance. is_default = function() { private$.is_default + }, + #' @field type (`python.builtin.module`)\cr + #' Python lamindb module. + py_lamin = function() { + private$.py_lamin } ), private = list( .settings = NULL, .api = NULL, .module_classes = NULL, - .is_default = NULL + .is_default = NULL, + .py_lamin = NULL ) ) diff --git a/R/Registry.R b/R/Registry.R index 1d83d5c..83ec9f8 100644 --- a/R/Registry.R +++ b/R/Registry.R @@ -146,21 +146,20 @@ Registry <- R6::R6Class( # nolint object_name_linter )) } + if (is.null(private$.instance$py_lamin)) { + cli::cli_abort(c( + "Creating records requires the Python lamindb package", + "i" = "Check the output of {.code connect()} for warnings" + )) + } + if (private$.registry_name != "artifact") { cli::cli_abort( "Creating records from data frames is only supported for the Artifact registry" ) } - check_requires("Creating records from data frames", "reticulate") - - py_lamin <- reticulate::import("lamindb") - - instance_settings <- private$.instance$get_settings() - system2("lamin", "settings set auto-connect false") - py_lamin$connect( - paste0(instance_settings$owner, "/", instance_settings$name) - ) + py_lamin <- private$.instance$py_lamin py_record <- py_lamin$Artifact$from_df( dataframe, key = key, description = description, run = run diff --git a/R/connect.R b/R/connect.R index 15a295b..b542831 100644 --- a/R/connect.R +++ b/R/connect.R @@ -99,9 +99,29 @@ connect <- function(slug = NULL) { is_default <- TRUE } + py_lamin <- NULL + if (isTRUE(is_default)) { + check_requires("Connecting to Python", "reticulate", type = "warning") + + py_lamin <- tryCatch( + reticulate::import("lamindb"), + error = function(err) { + cli::cli_warn(c( + paste( + "Failed to connect to the Python {.pkg lamindb} package,", + "you will only be able to read from the database" + ), + "i" = "See {.run reticulate::py_config()} for more information" + )) + NULL + } + ) + } + create_instance( instance_settings = instance_settings, - is_default = is_default + is_default = is_default, + py_lamin = py_lamin ) } From 3deebfd8b7b7e47d7725f27759aa20417f8ef294 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Thu, 14 Nov 2024 13:41:34 +0100 Subject: [PATCH 10/29] Add reading for Parquet files --- DESCRIPTION | 1 + R/file_handlers.R | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3d00ea6..7e9d653 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -25,6 +25,7 @@ Imports: rlang Suggests: anndata, + nanoparquet, quarto, s3 (>= 1.1.0), testthat (>= 3.0.0), diff --git a/R/file_handlers.R b/R/file_handlers.R index 69d9314..2945030 100644 --- a/R/file_handlers.R +++ b/R/file_handlers.R @@ -11,7 +11,8 @@ get_file_loader <- function(suffix) { ".h5ad" = load_h5ad, ".csv" = load_csv, ".tsv" = load_tsv, - cli::cli_abort("Loading files with suffix {suffix} is not supported") + ".parquet" = load_parquet, + cli::cli_abort("Loading files with suffix {.val suffix} is not supported") ) } @@ -46,3 +47,15 @@ load_csv <- function(file) { load_tsv <- function(file) { read.delim(file) } + +#' Load a Parquet file +#' +#' @param file Path to the file to load +#' +#' @return A `data.frame` +#' @noRd +load_parquet <- function(file) { + check_requires("Reading Parquet files", "nanoparquet") + + nanoparquet::read_parquet(file) +} From cc8cd0aaa1a28b28a77546b3fc646cc07b98723c Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Thu, 14 Nov 2024 14:27:08 +0100 Subject: [PATCH 11/29] Document and pass checks --- DESCRIPTION | 1 + NAMESPACE | 4 ++- R/Instance.R | 5 +-- R/Record.R | 74 ++++++++++++++++++++++++++++++++++++++- R/Registry.R | 86 +++++++++++++++++++--------------------------- R/laminr-package.R | 6 ++-- man/Instance.Rd | 17 ++++++++- man/Registry.Rd | 34 ++++++++++++++++++ 8 files changed, 169 insertions(+), 58 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7e9d653..2fef0c9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,6 +27,7 @@ Suggests: anndata, nanoparquet, quarto, + reticulate, s3 (>= 1.1.0), testthat (>= 3.0.0), withr diff --git a/NAMESPACE b/NAMESPACE index 02250bf..db7fa84 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,6 +23,8 @@ importFrom(purrr,map_lgl) importFrom(purrr,modify_depth) importFrom(purrr,pmap) importFrom(purrr,reduce) -importFrom(purrr,set_names) importFrom(purrr,transpose) importFrom(purrr,walk) +importFrom(rlang,set_names) +importFrom(utils,read.csv) +importFrom(utils,read.delim) diff --git a/R/Instance.R b/R/Instance.R index 490eaa0..ab3b9df 100644 --- a/R/Instance.R +++ b/R/Instance.R @@ -112,6 +112,7 @@ Instance <- R6::R6Class( # nolint object_name_linter #' @param api The API for the instance #' @param schema The schema for the instance #' @param is_default Logical, whether this is the default instance + #' @param py_lamin A Python `lamindb` module object initialize = function(settings, api, schema, is_default, py_lamin) { private$.settings <- settings private$.api <- api @@ -258,12 +259,12 @@ Instance <- R6::R6Class( # nolint object_name_linter } ), active = list( - #' @field type (`logical(1)`)\cr + #' @field is_default (`logical(1)`)\cr #' Whether this is the default instance. is_default = function() { private$.is_default }, - #' @field type (`python.builtin.module`)\cr + #' @field py_lamin (`python.builtin.module`)\cr #' Python lamindb module. py_lamin = function() { private$.py_lamin diff --git a/R/Record.R b/R/Record.R index e951257..17bd972 100644 --- a/R/Record.R +++ b/R/Record.R @@ -45,6 +45,78 @@ create_record_class <- function(instance, registry, api) { RichRecordClass } +#' Create a temporary record class +#' +#' @param record_class A generator for a standard record class +#' +#' @details +#' The classes generated by this function inherit from a standard record class +#' and represent the situation where a new record has being created using Python +#' `lamindb` but has not yet been saved to the database. It should behave the +#' same as the standard class but indicate to the user that it has not yet been +#' saved. Saving is performed using the Python record object stored in the R +#' temporary record. After saving, the data in the object is replaced with that +#' in the database, indications that it has not been saved are removed and +#' further saving is prevented. In examples etc. the `$save()` should usually be +#' called immediately to avoid users seeing the temporary record in it's unsaved +#' state. +#' +#' @return The temporary record class R6 generator +#' @noRd +create_temporary_record_class <- function(record_class) { + super <- NULL # Satisfy checks + self <- NULL # Satisfy checks + private <- NULL # Satisfy checks + + R6::R6Class( + paste0("Temporary", record_class$classname), + cloneable = FALSE, + inherit = record_class, + public = list( + initialize = function(record_class, py_record, data) { + private$.record_class <- record_class + private$.py_record <- py_record + + super$initialize(data) + }, + save = function() { + if (isTRUE(private$.saved)) { + cli::cli_abort("This record has already been saved to the database") + } + + private$.py_record$save() + + # Replace temporary data with data saved to the database + private$.data <- private$.api$get_record( + module_name = private$.registry$module$name, + registry_name = private$.registry$name, + id_or_uid = self$uid + ) + + private$.saved <- TRUE + }, + print = function(style = TRUE) { + if (isFALSE(private$.saved)) { + cli::cat_line(paste( + cli::bg_red(cli::col_black("TEMPORARY")), + cli::cli_fmt(cli::cli_text( + "This record has not been saved to the database. ", + "Save it using {.code $save()}." + )) + )) + } + + super$print() + } + ), + private = list( + .record_class = NULL, + .py_record = NULL, + .saved = FALSE + ) + ) +} + #' @title Record #' #' @description @@ -141,7 +213,7 @@ Record <- R6::R6Class( # nolint object_name_linter value }) |> - setNames(expected_fields) + set_names(expected_fields) # Get the important fields that are in the record important_fields <- intersect(important_fields, names(record_fields)) diff --git a/R/Registry.R b/R/Registry.R index 83ec9f8..2e22b38 100644 --- a/R/Registry.R +++ b/R/Registry.R @@ -138,6 +138,21 @@ Registry <- R6::R6Class( # nolint object_name_linter # Bind entries as rows list_rbind() }, + #' @description + #' Create a record from a data frame + #' + #' @param dataframe The `data.frame` to create a record from + #' @param key A relative path within the default storage + #' @param description A string describing the record + #' @param run A `Run` object that creates the record + #' + #' @details + #' Creating records is only possible for the default instance, requires the + #' Python `lamindb` module and is only implemented for the core `Artifact` + #' registry. + #' + #' @return A `TemporaryRecord` object containing the new record. This is not + #' saved to the database until `temp_record$save()` is called. from_df = function(dataframe, key = NULL, description = NULL, run = NULL) { if (isFALSE(private$.instance$is_default)) { cli::cli_abort(c( @@ -330,7 +345,7 @@ Registry <- R6::R6Class( # nolint object_name_linter list( paste0("[", paste(map_chr(module_fields, "field_name"), collapse = ", "), "]") ) |> - setNames(module_heading) |> + set_names(module_heading) |> make_key_value_strings(quote_strings = FALSE) }) @@ -376,6 +391,23 @@ Registry <- R6::R6Class( # nolint object_name_linter ) ) +#' Create record from Python +#' +#' @param py_record A Python record object +#' @param instance `Instance` object to create the record for +#' +#' @details +#' The new record is created by: +#' +#' 1. Getting the module and registry from the Python class +#' 2. Getting the fields for this registry +#' 3. Iteratively getting the data for each field. Values that are records are +#' converted by calling this function. +#' 4. Get the matching temporary record class +#' 5. Return the temporary record +#' +#' @return The created `TemporaryRecord` object +#' @noRd create_record_from_python <- function(py_record, instance) { py_classes <- class(py_record) @@ -407,59 +439,11 @@ create_record_from_python <- function(py_record, instance) { } value }) |> - setNames(fields) + set_names(fields) record_class <- registry$get_record_class() temp_record_class <- create_temporary_record_class(record_class) + # Suppress warnings because we deliberately add unexpected data fields suppressWarnings(temp_record_class$new(record_class, py_record, record_list)) } - -create_temporary_record_class <- function(record_class) { - R6::R6Class( - paste0("Temporary", record_class$classname), - cloneable = FALSE, - inherit = record_class, - public = list( - initialize = function(record_class, py_record, data) { - private$.record_class <- record_class - private$.py_record <- py_record - - super$initialize(data) - }, - save = function() { - if (isTRUE(private$.saved)) { - cli::cli_abort("This record has already been saved to the database") - } - - private$.py_record$save() - - # Replace temporary data with data saved to the database - private$.data <- private$.api$get_record( - module_name = private$.registry$module$name, - registry_name = private$.registry$name, - id_or_uid = self$uid - ) - - private$.saved <- TRUE - }, - #' @description - #' Print a `TemporaryRecord` - #' - #' @param style Logical, whether the output is styled using ANSI codes - print = function(style = TRUE) { - if (isFALSE(private$.saved)) { - cat("!!! TEMPORARY RECORD !!!") - cat("\n\n") - } - - super$print() - } - ), - private = list( - .record_class = NULL, - .py_record = NULL, - .saved = FALSE - ) - ) -} diff --git a/R/laminr-package.R b/R/laminr-package.R index 2b04b2d..79aa94e 100644 --- a/R/laminr-package.R +++ b/R/laminr-package.R @@ -3,8 +3,10 @@ ## usethis namespace: start #' @importFrom cli cli_abort cli_warn cli_inform -#' @importFrom R6 R6Class #' @importFrom httr GET POST content add_headers -#' @importFrom purrr map map_chr map_lgl map2 pmap set_names list_flatten transpose discard keep list_c walk reduce +#' @importFrom purrr map map_chr map_lgl map2 pmap list_flatten transpose discard keep list_c walk reduce +#' @importFrom R6 R6Class +#' @importFrom rlang set_names +#' @importFrom utils read.csv read.delim ## usethis namespace: end NULL diff --git a/man/Instance.Rd b/man/Instance.Rd index 2d1e7c6..854265e 100644 --- a/man/Instance.Rd +++ b/man/Instance.Rd @@ -33,6 +33,17 @@ artifact$id artifact$load() } } +\section{Active bindings}{ +\if{html}{\out{
}} +\describe{ +\item{\code{is_default}}{(\code{logical(1)})\cr +Whether this is the default instance.} + +\item{\code{py_lamin}}{(\code{python.builtin.module})\cr +Python lamindb module.} +} +\if{html}{\out{
}} +} \section{Methods}{ \subsection{Public methods}{ \itemize{ @@ -53,7 +64,7 @@ artifact$load() Creates an instance of this R6 class. This class should not be instantiated directly, but rather by connecting to a LaminDB instance using the \code{\link[=connect]{connect()}} function. \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{Instance$new(settings, api, schema)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{Instance$new(settings, api, schema, is_default, py_lamin)}\if{html}{\out{
}} } \subsection{Arguments}{ @@ -64,6 +75,10 @@ but rather by connecting to a LaminDB instance using the \code{\link[=connect]{c \item{\code{api}}{The API for the instance} \item{\code{schema}}{The schema for the instance} + +\item{\code{is_default}}{Logical, whether this is the default instance} + +\item{\code{py_lamin}}{A Python \code{lamindb} module object} } \if{html}{\out{}} } diff --git a/man/Registry.Rd b/man/Registry.Rd index aade740..ef931ee 100644 --- a/man/Registry.Rd +++ b/man/Registry.Rd @@ -29,6 +29,7 @@ Whether the registry is a link table.} \item \href{#method-Registry-new}{\code{Registry$new()}} \item \href{#method-Registry-get}{\code{Registry$get()}} \item \href{#method-Registry-df}{\code{Registry$df()}} +\item \href{#method-Registry-from_df}{\code{Registry$from_df()}} \item \href{#method-Registry-get_fields}{\code{Registry$get_fields()}} \item \href{#method-Registry-get_field}{\code{Registry$get_field()}} \item \href{#method-Registry-get_field_names}{\code{Registry$get_field_names()}} @@ -110,6 +111,39 @@ A data.frame containing the available records } } \if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Registry-from_df}{}}} +\subsection{Method \code{from_df()}}{ +Create a record from a data frame +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Registry$from_df(dataframe, key = NULL, description = NULL, run = NULL)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{dataframe}}{The \code{data.frame} to create a record from} + +\item{\code{key}}{A relative path within the default storage} + +\item{\code{description}}{A string describing the record} + +\item{\code{run}}{A \code{Run} object that creates the record} +} +\if{html}{\out{
}} +} +\subsection{Details}{ +Creating records is only possible for the default instance, requires the +Python \code{lamindb} module and is only implemented for the core \code{Artifact} +registry. +} + +\subsection{Returns}{ +A \code{TemporaryRecord} object containing the new record. This is not +saved to the database until \code{temp_record$save()} is called. +} +} +\if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-Registry-get_fields}{}}} \subsection{Method \code{get_fields()}}{ From 2bddcfe45e2ba5d3d4aa1150e75af429b452a4b0 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Thu, 14 Nov 2024 15:02:18 +0100 Subject: [PATCH 12/29] Add get_temporary_record_class() to Registry --- R/Record.R | 8 ++++---- R/Registry.R | 23 +++++++++++++++++++---- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/R/Record.R b/R/Record.R index 17bd972..5d32bf5 100644 --- a/R/Record.R +++ b/R/Record.R @@ -73,7 +73,7 @@ create_temporary_record_class <- function(record_class) { cloneable = FALSE, inherit = record_class, public = list( - initialize = function(record_class, py_record, data) { + initialize = function(py_record, data) { private$.record_class <- record_class private$.py_record <- py_record @@ -99,8 +99,8 @@ create_temporary_record_class <- function(record_class) { if (isFALSE(private$.saved)) { cli::cat_line(paste( cli::bg_red(cli::col_black("TEMPORARY")), - cli::cli_fmt(cli::cli_text( - "This record has not been saved to the database. ", + cli::format_message(paste( + "This record has not been saved to the database.", "Save it using {.code $save()}." )) )) @@ -201,7 +201,7 @@ Record <- R6::R6Class( # nolint object_name_linter self[[.field]], error = function(err) { if (!grepl("status code 404", conditionMessage(err))) { - cli::abort(conditionMessage(err)) + cli::cli_abort(conditionMessage(err)) } NULL } diff --git a/R/Registry.R b/R/Registry.R index 2e22b38..19b92c1 100644 --- a/R/Registry.R +++ b/R/Registry.R @@ -215,6 +215,21 @@ Registry <- R6::R6Class( # nolint object_name_linter private$.record_class }, #' @description + #' Get the temporary record class for the registry. + #' + #' Note: This method is intended for internal use only and may be removed in the future. + #' + #' @return A [TemporaryRecord] class. + get_temporary_record_class = function() { + if (is.null(private$.temporary_record_class)) { + private$.temporary_record_class <- create_temporary_record_class( + private$.record_class + ) + } + + private$.temporary_record_class + }, + #' @description #' Print a `Registry` #' #' @param style Logical, whether the output is styled using ANSI codes @@ -365,7 +380,8 @@ Registry <- R6::R6Class( # nolint object_name_linter .class_name = NULL, .is_link_table = NULL, .fields = NULL, - .record_class = NULL + .record_class = NULL, + .temporary_record_class = NULL ), active = list( #' @field module ([Module])\cr @@ -441,9 +457,8 @@ create_record_from_python <- function(py_record, instance) { }) |> set_names(fields) - record_class <- registry$get_record_class() - temp_record_class <- create_temporary_record_class(record_class) + temp_record_class <- registry$get_temporary_record_class() # Suppress warnings because we deliberately add unexpected data fields - suppressWarnings(temp_record_class$new(record_class, py_record, record_list)) + suppressWarnings(temp_record_class$new(py_record, record_list)) } From d8a841d623961e12074d64196ba848614152998c Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Thu, 14 Nov 2024 15:08:38 +0100 Subject: [PATCH 13/29] Remove new file loaders --- DESCRIPTION | 1 - R/Artifact.R | 12 ++++++---- R/file_handlers.R | 61 ----------------------------------------------- 3 files changed, 8 insertions(+), 66 deletions(-) delete mode 100644 R/file_handlers.R diff --git a/DESCRIPTION b/DESCRIPTION index 2fef0c9..da27b72 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -25,7 +25,6 @@ Imports: rlang Suggests: anndata, - nanoparquet, quarto, reticulate, s3 (>= 1.1.0), diff --git a/R/Artifact.R b/R/Artifact.R index 9a2d061..172a23b 100644 --- a/R/Artifact.R +++ b/R/Artifact.R @@ -13,12 +13,16 @@ ArtifactRecord <- R6::R6Class( # nolint object_name_linter #' #' @return The artifact load = function() { - file_path <- self$cache() + artifact_accessor <- private$get_value("_accessor") - suffix <- private$get_value("suffix") - file_loader <- get_file_loader(suffix) + file_path <- self$cache() - file_loader(file_path) + if (artifact_accessor == "AnnData") { + check_requires("Loading AnnData objects", "anndata") + anndata::read_h5ad(file_path) + } else { + cli_abort(paste0("Unsupported accessor: ", artifact_accessor)) + } }, #' @description #' Cache the artifact to the local filesystem. This currently only supports diff --git a/R/file_handlers.R b/R/file_handlers.R deleted file mode 100644 index 2945030..0000000 --- a/R/file_handlers.R +++ /dev/null @@ -1,61 +0,0 @@ -#' Get file loader -#' -#' Get the correct file loader function based on a file suffix -#' -#' @param suffix String giving a file suffix -#' -#' @return Function that can be used to load the file -#' @noRd -get_file_loader <- function(suffix) { - switch (suffix, - ".h5ad" = load_h5ad, - ".csv" = load_csv, - ".tsv" = load_tsv, - ".parquet" = load_parquet, - cli::cli_abort("Loading files with suffix {.val suffix} is not supported") - ) -} - -#' Load a H5AD file -#' -#' @param file Path to the file to load -#' -#' @return An `anndata::AnnDataR6` object -#' @noRd -load_h5ad <- function(file) { - check_requires("Loading AnnData objects", "anndata") - - anndata::read_h5ad(file) -} - -#' Load a CSV file -#' -#' @param file Path to the file to load -#' -#' @return A `data.frame` -#' @noRd -load_csv <- function(file) { - read.csv(file) -} - -#' Load a TSV file -#' -#' @param file Path to the file to load -#' -#' @return A `data.frame` -#' @noRd -load_tsv <- function(file) { - read.delim(file) -} - -#' Load a Parquet file -#' -#' @param file Path to the file to load -#' -#' @return A `data.frame` -#' @noRd -load_parquet <- function(file) { - check_requires("Reading Parquet files", "nanoparquet") - - nanoparquet::read_parquet(file) -} From b9b93152f7007785e0ba99e8b70c981d731bd931 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Thu, 14 Nov 2024 15:13:10 +0100 Subject: [PATCH 14/29] Remove importing reading functions --- NAMESPACE | 2 -- R/laminr-package.R | 1 - 2 files changed, 3 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index db7fa84..8dc516f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -26,5 +26,3 @@ importFrom(purrr,reduce) importFrom(purrr,transpose) importFrom(purrr,walk) importFrom(rlang,set_names) -importFrom(utils,read.csv) -importFrom(utils,read.delim) diff --git a/R/laminr-package.R b/R/laminr-package.R index 79aa94e..1e2c4f5 100644 --- a/R/laminr-package.R +++ b/R/laminr-package.R @@ -7,6 +7,5 @@ #' @importFrom purrr map map_chr map_lgl map2 pmap list_flatten transpose discard keep list_c walk reduce #' @importFrom R6 R6Class #' @importFrom rlang set_names -#' @importFrom utils read.csv read.delim ## usethis namespace: end NULL From 6dc941de9b351bc86e8228225058a452cff93e73 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Thu, 14 Nov 2024 15:14:51 +0100 Subject: [PATCH 15/29] Style package --- R/Registry.R | 4 ++-- R/connect.R | 3 ++- R/utils.R | 2 +- man/Registry.Rd | 16 ++++++++++++++++ 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/R/Registry.R b/R/Registry.R index 19b92c1..5ddc895 100644 --- a/R/Registry.R +++ b/R/Registry.R @@ -177,7 +177,8 @@ Registry <- R6::R6Class( # nolint object_name_linter py_lamin <- private$.instance$py_lamin py_record <- py_lamin$Artifact$from_df( - dataframe, key = key, description = description, run = run + dataframe, + key = key, description = description, run = run ) create_record_from_python(py_record, private$.instance) @@ -425,7 +426,6 @@ Registry <- R6::R6Class( # nolint object_name_linter #' @return The created `TemporaryRecord` object #' @noRd create_record_from_python <- function(py_record, instance) { - py_classes <- class(py_record) # Skip related fields for now diff --git a/R/connect.R b/R/connect.R index b542831..3d5cc2b 100644 --- a/R/connect.R +++ b/R/connect.R @@ -83,7 +83,8 @@ connect <- function(slug = NULL) { is_default <- FALSE if (is.null(slug)) { instance_slug <- paste0( - instance_settings$owner, "/", name = instance_settings$name + instance_settings$owner, "/", + name = instance_settings$name ) current_default <- getOption("LAMINR_DEFAULT_INSTANCE") if (!is.null(current_default)) { diff --git a/R/utils.R b/R/utils.R index a696565..7f9573c 100644 --- a/R/utils.R +++ b/R/utils.R @@ -16,7 +16,7 @@ check_requires <- function(what, requires, type = c("error", "warning")) { is_available <- map_lgl(requires, requireNamespace, quietly = TRUE) - msg_fun <- switch (type, + msg_fun <- switch(type, error = cli::cli_abort, warning = cli::cli_warn ) diff --git a/man/Registry.Rd b/man/Registry.Rd index ef931ee..876755e 100644 --- a/man/Registry.Rd +++ b/man/Registry.Rd @@ -34,6 +34,7 @@ Whether the registry is a link table.} \item \href{#method-Registry-get_field}{\code{Registry$get_field()}} \item \href{#method-Registry-get_field_names}{\code{Registry$get_field_names()}} \item \href{#method-Registry-get_record_class}{\code{Registry$get_record_class()}} +\item \href{#method-Registry-get_temporary_record_class}{\code{Registry$get_temporary_record_class()}} \item \href{#method-Registry-print}{\code{Registry$print()}} \item \href{#method-Registry-to_string}{\code{Registry$to_string()}} } @@ -205,6 +206,21 @@ A \link{Record} class. } } \if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Registry-get_temporary_record_class}{}}} +\subsection{Method \code{get_temporary_record_class()}}{ +Get the temporary record class for the registry. + +Note: This method is intended for internal use only and may be removed in the future. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Registry$get_temporary_record_class()}\if{html}{\out{
}} +} + +\subsection{Returns}{ +A \link{TemporaryRecord} class. +} +} +\if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-Registry-print}{}}} \subsection{Method \code{print()}}{ From 134cc68b7f4513eba069f16a33515feacb0aea02 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Thu, 14 Nov 2024 15:19:26 +0100 Subject: [PATCH 16/29] Remove broken docs link --- R/Registry.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Registry.R b/R/Registry.R index 5ddc895..2f5b635 100644 --- a/R/Registry.R +++ b/R/Registry.R @@ -220,7 +220,7 @@ Registry <- R6::R6Class( # nolint object_name_linter #' #' Note: This method is intended for internal use only and may be removed in the future. #' - #' @return A [TemporaryRecord] class. + #' @return A `TemporaryRecord` class. get_temporary_record_class = function() { if (is.null(private$.temporary_record_class)) { private$.temporary_record_class <- create_temporary_record_class( From 39f88322f2b06246a3f749e1be704d88496f9c64 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Fri, 15 Nov 2024 10:42:41 +0100 Subject: [PATCH 17/29] Store user settings in option --- R/connect.R | 4 +++- R/settings_load.R | 11 +++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/R/connect.R b/R/connect.R index 3d5cc2b..0c41759 100644 --- a/R/connect.R +++ b/R/connect.R @@ -20,6 +20,8 @@ #' instance #' } connect <- function(slug = NULL) { + user_settings <- .get_user_settings() + instance_file <- if (is.null(slug)) { # if the slug is null, see if we can load the default instance @@ -144,7 +146,7 @@ connect <- function(slug = NULL) { owner <- split[[1]] name <- split[[2]] } else { - user_settings <- .settings_load__load_or_create_user_settings() + user_settings <- .get_user_settings() owner <- user_settings$handle name <- identifier diff --git a/R/settings_load.R b/R/settings_load.R index 444cc25..1a088ed 100644 --- a/R/settings_load.R +++ b/R/settings_load.R @@ -77,3 +77,14 @@ .settings_load__setup_user_from_store <- function(store) { # nolint object_length_linter UserSettings$new(store) } + +.get_user_settings <- function() { + user_settings <- getOption("LAMINR_USER_SETTINGS") + + if (is.null(user_settings)) { + user_settings <- .settings_load__load_or_create_user_settings() + options("LAMINR_USER_SETTINGS" = user_settings) + } + + user_settings +} From 55e722266062748dd94d58a487afaedea24d0590 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Fri, 15 Nov 2024 11:04:46 +0100 Subject: [PATCH 18/29] Add delete() method to Record --- R/InstanceAPI.R | 38 ++++++++++++++++++++++++++++++++++++++ R/Record.R | 16 ++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/R/InstanceAPI.R b/R/InstanceAPI.R index ff4dd9b..7929b84 100644 --- a/R/InstanceAPI.R +++ b/R/InstanceAPI.R @@ -197,6 +197,44 @@ InstanceAPI <- R6::R6Class( # nolint object_name_linter private$process_response(response, "get record") }, #' @description + #' Delete a record from the instance. + delete_record = function(module_name, + registry_name, + id_or_uid, + verbose = FALSE) { + + user_settings <- .get_user_settings() + + url <- paste0( + private$.instance_settings$api_url, + "/instances/", + private$.instance_settings$id, + "/modules/", + module_name, + "/", + registry_name, + "/", + id_or_uid, + "?schema_id=", + private$.instance_settings$schema_id + ) + + if (verbose) { + cli_inform("URL: {url}") + } + + response <- httr::DELETE( + url, + httr::add_headers( + accept = "application/json", + `Content-Type` = "application/json", + Authorization = paste("Bearer", user_settings$access_token) + ) + ) + + private$process_response(response, "delete record") + }, + #' @description #' Print an `API` #' #' @param style Logical, whether the output is styled using ANSI codes diff --git a/R/Record.R b/R/Record.R index 5d32bf5..4989f68 100644 --- a/R/Record.R +++ b/R/Record.R @@ -170,6 +170,22 @@ Record <- R6::R6Class( # nolint object_name_linter } }, #' @description + #' Delete a `Record` + #' + #' @param verbose Whether to print details of the API call + #' + #' @return `TRUE` invisibly if the deletion is successful + delete = function(verbose = FALSE) { + response <- private$.api$delete_record( + module_name = private$.registry$module$name, + registry_name = private$.registry$name, + id_or_uid = self$uid, + verbose = verbose + ) + + invisible(TRUE) + }, + #' @description #' Print a `Record` #' #' @param style Logical, whether the output is styled using ANSI codes From c2aa3d34b7e46f6037c26111afa15e0ebc97356f Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Fri, 15 Nov 2024 11:40:36 +0100 Subject: [PATCH 19/29] Update architecture vignette --- vignettes/architecture.qmd | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/vignettes/architecture.qmd b/vignettes/architecture.qmd index ba945f5..41e5fff 100644 --- a/vignettes/architecture.qmd +++ b/vignettes/architecture.qmd @@ -75,7 +75,6 @@ Fields define the type of data that can be stored in a registry and provide a wa For more information about fields, see `?Field`. The fields of core registries are documented in the `module_core` vignette: `vignette("module_core", package = "laminr")`. - ### Record A **record** is a single entry within a registry. @@ -88,7 +87,6 @@ In essence, you have **instances** that contain **modules**. Each module contains **registries**, which in turn hold **records**. Every record is composed of multiple **fields**. This hierarchical structure allows for flexible and organized management of data and metadata within LaminDB. - ## Class structure The `laminr` package provides a set of classes that mirror the core concepts of LaminDB. @@ -164,6 +162,8 @@ classDiagram +initialize(InstanceSettings Instance_settings) +get_schema(): Map +get_record(...): Map + +get_records(...): Map + +delete_record(...): NULL } class Module{ +initialize( @@ -187,8 +187,10 @@ classDiagram +get_field(String field_name): Field +get_field_names(): String[] +get(String id_or_uid, Bool include_foreign_keys, List~String~ select, Bool verbose): RichRecord - +get_registry_class(): RichRecordClass + +get_record_class(): RichRecordClass + +get_temporary_record_class(): TemporaryRecordClass +df(Integer limit, Bool verbose): DataFrame + +from_df(DataFrame dataframe, String key, String description, String run)): TemporaryRecord } class Field{ +initialize( @@ -211,6 +213,7 @@ classDiagram class Record{ +initialize(Instance Instance, Registry registry, API api, Map data): Record +get_value(String field_name): Any + +delete(): NULL } class RelatedRecords{ +initialize( @@ -285,9 +288,11 @@ classDiagram Bionty --|> Module RichInstance --> Bionty Registry --> RichRecord + Registry --> TemporaryRecord RichRecord --|> Record + TemporaryRecord --|> RichRecord Registry --> Artifact - Artifact --|> Record + Artifact --|> RichRecord %% ------------------------------------------------------------------------- %% --- Copied from base diagram -------------------------------------------- @@ -324,6 +329,8 @@ classDiagram +initialize(InstanceSettings Instance_settings) +get_schema(): Map +get_record(...): Map + +get_records(...): Map + +delete_record(...): NULL } class Module{ +initialize( @@ -347,8 +354,10 @@ classDiagram +get_field(String field_name): Field +get_field_names(): String[] +get(String id_or_uid, Bool include_foreign_keys, List~String~ select, Bool verbose): RichRecord - +get_registry_class(): RichRecordClass + +get_record_class(): RichRecordClass + +get_temporary_record_class(): TemporaryRecordClass +df(Integer limit, Bool verbose): DataFrame + +from_df(DataFrame dataframe, String key, String description, String run)): TemporaryRecord } class Field{ +initialize( @@ -371,6 +380,7 @@ classDiagram class Record{ +initialize(Instance Instance, Registry registry, API api, Map data): Record +get_value(String field_name): Any + +delete(): NULL } class RelatedRecords{ +initialize( @@ -378,6 +388,7 @@ classDiagram #emsp;String related_to, API api ): RelatedRecords +df(): DataFrame + +field: Field } %% ------------------------------------------------------------------------- @@ -412,6 +423,10 @@ classDiagram +...field value accessors... } style RichRecord fill:#ffe1c9 + class TemporaryRecord{ + +save(): NULL + } + style TemporaryRecord fill:#ffe1c9 class Artifact{ +...field value accessors... +cache(): String From d079cb3e90c2e5ccb0691fcb5fe6d9c5897f2134 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Fri, 15 Nov 2024 11:55:03 +0100 Subject: [PATCH 20/29] Update development vignette --- vignettes/development.qmd | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/vignettes/development.qmd b/vignettes/development.qmd index 87b0a87..708874a 100644 --- a/vignettes/development.qmd +++ b/vignettes/development.qmd @@ -45,7 +45,8 @@ This document outlines the features of the **{laminr}** package and the roadmap ### Manage data & metadata * [ ] **Create artifacts**: Create new artifacts from various data sources (e.g., files, data frames, in-memory objects). -* [ ] **Save artifacts**: Save artifacts to LaminDB with appropriate metadata. + - [x] `$from_df()`: Create an artifact from a data frame. +* [x] **Save artifacts**: Save artifacts to LaminDB with appropriate metadata. * [ ] **Load artifacts**: Load artifacts from LaminDB into R: - [ ] `fcs`: Load flow cytometry data. - [ ] `tsv`: Load tabular data. @@ -59,6 +60,7 @@ This document outlines the features of the **{laminr}** package and the roadmap - [x] `s3`: Interact with S3 storage. - [ ] `gcp`: Interact with Google Cloud Storage. * [ ] **Version artifacts**: Create new versions of artifacts. +* [x] **Delete artifacts**: Delete an existing artifact. * [ ] **Manage artifact metadata**: Add, update, and delete artifact metadata. * [ ] **Work with collections**: Create, manage, and query collections of artifacts. @@ -97,7 +99,7 @@ This document outlines the features of the **{laminr}** package and the roadmap ### Transfer data -* [ ] **Upload data**: Upload data files to LaminDB storage. +* [x] **Upload data**: Upload data files to LaminDB storage. * [x] **Download data**: Download data files from LaminDB storage. * [ ] **(Advanced) Support zero-copy data transfer**: Implement efficient data transfer mechanisms. @@ -116,7 +118,7 @@ A first version of the package that allows users to: ### Version 0.2.0 * Expand query functionality with comparators, relationships, and pagination. -* Implement basic data and metadata management features (create, save, load artifacts). +* Implement basic data and metadata management features (create, save, load and delete artifacts). * Expand support for different data formats and storage backends. ### Version 0.3.0 From e6a2792ad78d3c4fd5b70de687da7c59104d3de6 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Fri, 15 Nov 2024 11:56:14 +0100 Subject: [PATCH 21/29] Roxygenise --- man/Record.Rd | 21 +++++++++++++++++++++ man/Registry.Rd | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/man/Record.Rd b/man/Record.Rd index 133a9a1..b414a80 100644 --- a/man/Record.Rd +++ b/man/Record.Rd @@ -10,6 +10,7 @@ A record from a registry. \subsection{Public methods}{ \itemize{ \item \href{#method-Record-new}{\code{Record$new()}} +\item \href{#method-Record-delete}{\code{Record$delete()}} \item \href{#method-Record-print}{\code{Record$print()}} \item \href{#method-Record-to_string}{\code{Record$to_string()}} } @@ -39,6 +40,26 @@ but rather by connecting to a LaminDB instance using the \code{\link[=connect]{c } } \if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Record-delete}{}}} +\subsection{Method \code{delete()}}{ +Delete a \code{Record} +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Record$delete(verbose = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{verbose}}{Whether to print details of the API call} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +\code{TRUE} invisibly if the deletion is successful +} +} +\if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-Record-print}{}}} \subsection{Method \code{print()}}{ diff --git a/man/Registry.Rd b/man/Registry.Rd index 876755e..d1d3c9c 100644 --- a/man/Registry.Rd +++ b/man/Registry.Rd @@ -217,7 +217,7 @@ Note: This method is intended for internal use only and may be removed in the fu } \subsection{Returns}{ -A \link{TemporaryRecord} class. +A \code{TemporaryRecord} class. } } \if{html}{\out{
}} From 62540e692fb8f648f55ab245567e733e7f31e63b Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Fri, 15 Nov 2024 13:23:29 +0100 Subject: [PATCH 22/29] Add test for Artifact$from_df() --- tests/testthat/test-Artifact.R | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 tests/testthat/test-Artifact.R diff --git a/tests/testthat/test-Artifact.R b/tests/testthat/test-Artifact.R new file mode 100644 index 0000000..8455c28 --- /dev/null +++ b/tests/testthat/test-Artifact.R @@ -0,0 +1,21 @@ +skip_if_offline() + +test_that("creating an artifact from a data frame works", { + skip_if_not_installed("reticulate") + skip_if_not(reticulate::py_available("lamindb")) + + local_setup_lamindata_instance() + + db <- connect() + + dataframe <- data.frame( + Description = "laminr test data frame", + Timestamp = Sys.time() + ) + + new_artifact <- db$Artifact$from_df( + dataframe, description = dataframe$Description + ) + + expect_s3_class(new_artifact, "TemporaryArtifact") +}) From 8d7a5e2fc8d874135a45f6f29593aae3a893f1f1 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Fri, 15 Nov 2024 13:36:32 +0100 Subject: [PATCH 23/29] Fix incorrect function call in test --- tests/testthat/test-Artifact.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-Artifact.R b/tests/testthat/test-Artifact.R index 8455c28..233c3c3 100644 --- a/tests/testthat/test-Artifact.R +++ b/tests/testthat/test-Artifact.R @@ -2,7 +2,7 @@ skip_if_offline() test_that("creating an artifact from a data frame works", { skip_if_not_installed("reticulate") - skip_if_not(reticulate::py_available("lamindb")) + skip_if_not(reticulate::py_module_available("lamindb")) local_setup_lamindata_instance() From ad09309026c4a1c068187e96ffdca0ffda301462 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Mon, 18 Nov 2024 08:49:41 +0100 Subject: [PATCH 24/29] Update CHANGELOG --- CHANGELOG.md | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index efe83e1..277fac8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,13 +2,27 @@ ## NEW FUNCTIONALITY -* Add support for more loaders (PR #81). +- Add support for more loaders (PR #81). Currently supported: `.csv`, `.h5ad`, `.html`, `.jpg`, `.json`, `.parquet`, `.png`, `.rds`, `.svg`, `.tsv`, `.yaml`. - Planned: `.fcs`, `.h5mu`, `.zarr`. +- Add a `from_df()` method to the `Registry` class to create new artifacts from data frames (PR #78) +- Create `TemporaryRecord` classes for new artifacts before they have been saved to the database (PR #78) +- Add a `delete()` method to the `Record` class (PR #78) + +## MAJOR CHANGES + +- Running `connect(slug = NULL)` now connects to the default instance that is allowed to create records. + The default instance must be changed using the Lamin CLI. (PR #78) +- User setting are stored in a global option the first time `connect()` is run (PR #78) + +## TESTING + +- Add a test for creating artifacts from data frames (PR #78). ## DOCUMENTATION -* Updated installation instructions after **{laminr}** was released on CRAN (PR #74). +- Updated installation instructions after **{laminr}** was released on CRAN (PR #74). +- Updated the architecture vignette to include new methods and the new `TemporaryRecord` class (PR #78) +- Updated the development vignette with new functionality (PR #78) # laminr v0.1.0 From 0c2753762e6b29027b937c520998860215086762 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Mon, 18 Nov 2024 08:57:04 +0100 Subject: [PATCH 25/29] Error in API$delete_record() if null access token --- R/InstanceAPI.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/InstanceAPI.R b/R/InstanceAPI.R index 7929b84..540c6dd 100644 --- a/R/InstanceAPI.R +++ b/R/InstanceAPI.R @@ -204,6 +204,12 @@ InstanceAPI <- R6::R6Class( # nolint object_name_linter verbose = FALSE) { user_settings <- .get_user_settings() + if (is.null(user_settings$access_token)) { + cli::cli_abort(c( + "There is no access token for the current user", + "i" = "Run {.code lamin login} and reconnect to the database in a new R session" + )) + } url <- paste0( private$.instance_settings$api_url, From fea16e1d3596637bb98905ef4253b9dcde1a7fc0 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Mon, 18 Nov 2024 09:05:32 +0100 Subject: [PATCH 26/29] Move importing lamindb to create_instance --- R/Instance.R | 21 ++++++++++++++++++++- R/connect.R | 25 +------------------------ 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/R/Instance.R b/R/Instance.R index ab3b9df..a6968c1 100644 --- a/R/Instance.R +++ b/R/Instance.R @@ -1,4 +1,4 @@ -create_instance <- function(instance_settings, is_default = FALSE, py_lamin = NULL) { +create_instance <- function(instance_settings, is_default = FALSE) { super <- NULL # satisfy linter api <- InstanceAPI$new(instance_settings = instance_settings) @@ -59,6 +59,25 @@ create_instance <- function(instance_settings, is_default = FALSE, py_lamin = NU active = active ) + py_lamin <- NULL + if (isTRUE(is_default)) { + check_requires("Connecting to Python", "reticulate", type = "warning") + + py_lamin <- tryCatch( + reticulate::import("lamindb"), + error = function(err) { + cli::cli_warn(c( + paste( + "Failed to connect to the Python {.pkg lamindb} package,", + "you will not be able to create records" + ), + "i" = "See {.run reticulate::py_config()} for more information" + )) + NULL + } + ) + } + # create the instance RichInstance$new( settings = instance_settings, diff --git a/R/connect.R b/R/connect.R index 0c41759..23720fb 100644 --- a/R/connect.R +++ b/R/connect.R @@ -102,30 +102,7 @@ connect <- function(slug = NULL) { is_default <- TRUE } - py_lamin <- NULL - if (isTRUE(is_default)) { - check_requires("Connecting to Python", "reticulate", type = "warning") - - py_lamin <- tryCatch( - reticulate::import("lamindb"), - error = function(err) { - cli::cli_warn(c( - paste( - "Failed to connect to the Python {.pkg lamindb} package,", - "you will only be able to read from the database" - ), - "i" = "See {.run reticulate::py_config()} for more information" - )) - NULL - } - ) - } - - create_instance( - instance_settings = instance_settings, - is_default = is_default, - py_lamin = py_lamin - ) + create_instance(instance_settings, is_default) } # nolint start: object_length_linter From 3dbf7bc17ce07e58921f2b17b76a28ad5337719e Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Mon, 18 Nov 2024 09:09:32 +0100 Subject: [PATCH 27/29] Make Python lamin getter and function not field --- R/Instance.R | 11 ++++++----- R/Registry.R | 4 ++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/R/Instance.R b/R/Instance.R index a6968c1..61dc627 100644 --- a/R/Instance.R +++ b/R/Instance.R @@ -189,6 +189,12 @@ Instance <- R6::R6Class( # nolint object_name_linter get_api = function() { private$.api }, + #' @description Get the Python lamindb module + #' + #' @return Python lamindb module. + get_py_lamin = function() { + private$.py_lamin + } #' @description #' Print an `Instance` #' @@ -282,11 +288,6 @@ Instance <- R6::R6Class( # nolint object_name_linter #' Whether this is the default instance. is_default = function() { private$.is_default - }, - #' @field py_lamin (`python.builtin.module`)\cr - #' Python lamindb module. - py_lamin = function() { - private$.py_lamin } ), private = list( diff --git a/R/Registry.R b/R/Registry.R index 2f5b635..92f7fa1 100644 --- a/R/Registry.R +++ b/R/Registry.R @@ -161,7 +161,7 @@ Registry <- R6::R6Class( # nolint object_name_linter )) } - if (is.null(private$.instance$py_lamin)) { + if (is.null(private$.instance$get_py_lamin())) { cli::cli_abort(c( "Creating records requires the Python lamindb package", "i" = "Check the output of {.code connect()} for warnings" @@ -174,7 +174,7 @@ Registry <- R6::R6Class( # nolint object_name_linter ) } - py_lamin <- private$.instance$py_lamin + py_lamin <- private$.instance$get_py_lamin() py_record <- py_lamin$Artifact$from_df( dataframe, From 25ce8fd4e32202fdcf14a9d1e8f15c84d8465942 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Mon, 18 Nov 2024 09:14:53 +0100 Subject: [PATCH 28/29] Add features to README --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 745cbcd..c41ba00 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,8 @@ LaminDB is accompanied by LaminHub which is a data collaboration hub built on La - Load artifacts into memory. - Currently supported file formats: `.csv`, `.h5ad`, `.html`, `.jpg`, `.json`, `.parquet`, `.png`, `.rds`, `.svg`, `.tsv`, `.yaml`. - Planned: `.fcs`, `.h5mu`, `.zarr`. +- Create records from data frames. +- Delete records. See the development roadmap for more details (`vignette("development", package = "laminr")`). From 81274cb1539781ddcfd803d3ee4e20f3d8c9ae45 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Mon, 18 Nov 2024 09:20:45 +0100 Subject: [PATCH 29/29] Pass checks and style --- R/Instance.R | 2 +- R/InstanceAPI.R | 1 - man/Instance.Rd | 17 ++++++++++++++--- man/laminr-package.Rd | 4 ++-- tests/testthat/test-Artifact.R | 3 ++- tests/testthat/test-core_loaders.R | 1 + 6 files changed, 20 insertions(+), 8 deletions(-) diff --git a/R/Instance.R b/R/Instance.R index 61dc627..6f4b111 100644 --- a/R/Instance.R +++ b/R/Instance.R @@ -194,7 +194,7 @@ Instance <- R6::R6Class( # nolint object_name_linter #' @return Python lamindb module. get_py_lamin = function() { private$.py_lamin - } + }, #' @description #' Print an `Instance` #' diff --git a/R/InstanceAPI.R b/R/InstanceAPI.R index 540c6dd..1038b53 100644 --- a/R/InstanceAPI.R +++ b/R/InstanceAPI.R @@ -202,7 +202,6 @@ InstanceAPI <- R6::R6Class( # nolint object_name_linter registry_name, id_or_uid, verbose = FALSE) { - user_settings <- .get_user_settings() if (is.null(user_settings$access_token)) { cli::cli_abort(c( diff --git a/man/Instance.Rd b/man/Instance.Rd index 854265e..7d09979 100644 --- a/man/Instance.Rd +++ b/man/Instance.Rd @@ -38,9 +38,6 @@ artifact$load() \describe{ \item{\code{is_default}}{(\code{logical(1)})\cr Whether this is the default instance.} - -\item{\code{py_lamin}}{(\code{python.builtin.module})\cr -Python lamindb module.} } \if{html}{\out{}} } @@ -53,6 +50,7 @@ Python lamindb module.} \item \href{#method-Instance-get_module_names}{\code{Instance$get_module_names()}} \item \href{#method-Instance-get_settings}{\code{Instance$get_settings()}} \item \href{#method-Instance-get_api}{\code{Instance$get_api()}} +\item \href{#method-Instance-get_py_lamin}{\code{Instance$get_py_lamin()}} \item \href{#method-Instance-print}{\code{Instance$print()}} \item \href{#method-Instance-to_string}{\code{Instance$to_string()}} } @@ -160,6 +158,19 @@ The API for the instance. } } \if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Instance-get_py_lamin}{}}} +\subsection{Method \code{get_py_lamin()}}{ +Get the Python lamindb module +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Instance$get_py_lamin()}\if{html}{\out{
}} +} + +\subsection{Returns}{ +Python lamindb module. +} +} +\if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-Instance-print}{}}} \subsection{Method \code{print()}}{ diff --git a/man/laminr-package.Rd b/man/laminr-package.Rd index ea87da4..0f2dd61 100644 --- a/man/laminr-package.Rd +++ b/man/laminr-package.Rd @@ -4,9 +4,9 @@ \name{laminr-package} \alias{laminr} \alias{laminr-package} -\title{laminr: 'LaminDB' Interface in R} +\title{laminr: Interface for 'LaminDB'} \description{ -Interact with 'LaminDB' from R. 'LaminDB' is an open-source data framework for biology. This package allows you to query and download data from 'LaminDB' instances. +Interact with 'LaminDB'. 'LaminDB' is an open-source data framework for biology. This package allows you to query and download data from 'LaminDB' instances. } \seealso{ Useful links: diff --git a/tests/testthat/test-Artifact.R b/tests/testthat/test-Artifact.R index 233c3c3..acf0747 100644 --- a/tests/testthat/test-Artifact.R +++ b/tests/testthat/test-Artifact.R @@ -14,7 +14,8 @@ test_that("creating an artifact from a data frame works", { ) new_artifact <- db$Artifact$from_df( - dataframe, description = dataframe$Description + dataframe, + description = dataframe$Description ) expect_s3_class(new_artifact, "TemporaryArtifact") diff --git a/tests/testthat/test-core_loaders.R b/tests/testthat/test-core_loaders.R index 7bc3137..0107882 100644 --- a/tests/testthat/test-core_loaders.R +++ b/tests/testthat/test-core_loaders.R @@ -39,6 +39,7 @@ test_that("load_file with a .tsv works", { test_that("load_file with an .h5ad works", { skip_if_not_installed("anndata") skip_if_not_installed("reticulate") + skip_if_not(reticulate::py_module_available("anndata")) file <- withr::local_file(tempfile(fileext = ".h5ad"))