Skip to content

Commit

Permalink
added more file and bucket fxns #12 #13
Browse files Browse the repository at this point in the history
- also reworked file and bucket fxns to use s3fs package wherever possible
- re-export s3_path for use by users of this pkg #9
- added prompts via cli pkg to file and bucket fxns to protect destructive actions #7
- using fork of s3fs with a minor fix
  • Loading branch information
sckott committed Oct 19, 2023
1 parent a529636 commit fba7400
Show file tree
Hide file tree
Showing 24 changed files with 651 additions and 132 deletions.
6 changes: 5 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,15 @@ Imports:
purrr,
rlang,
tibble,
fs
fs,
s3fs (>= 0.1.3),
cli
Suggests:
roxyglobals,
testthat (>= 3.0.0),
vcr (>= 0.6.0)
Config/roxyglobals/filename: globals.R
Config/roxyglobals/unique: FALSE
Config/testthat/edition: 3
Remotes:
sckott/s3fs@sckott/file_download_vec
13 changes: 13 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,18 +1,26 @@
# Generated by roxygen2: do not edit by hand

export(aws_bucket_create)
export(aws_bucket_delete)
export(aws_bucket_download)
export(aws_bucket_exists)
export(aws_bucket_list_objects)
export(aws_bucket_tree)
export(aws_bucket_upload)
export(aws_buckets)
export(aws_file_attr)
export(aws_file_delete)
export(aws_file_download)
export(aws_file_exists)
export(aws_file_upload)
export(billing)
export(create_user)
export(list_users)
export(s3_path)
importFrom(cli,cli_inform)
importFrom(dplyr,mutate)
importFrom(fs,file_exists)
importFrom(fs,fs_bytes)
importFrom(lubridate,as_datetime)
importFrom(magrittr,"%>%")
importFrom(paws,costexplorer)
Expand All @@ -22,5 +30,10 @@ importFrom(purrr,list_rbind)
importFrom(purrr,map)
importFrom(purrr,map_chr)
importFrom(rlang,":=")
importFrom(s3fs,s3_dir_info)
importFrom(s3fs,s3_dir_tree)
importFrom(s3fs,s3_file_copy)
importFrom(s3fs,s3_file_delete)
importFrom(s3fs,s3_path)
importFrom(tibble,as_tibble)
importFrom(tibble,tibble)
137 changes: 129 additions & 8 deletions R/bucket.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,29 +30,150 @@ aws_bucket_create <- function(bucket, ...) {
CreateBucketConfiguration = list(LocationConstraint = env_var("AWS_REGION")), ...)
}

#' Delete an S3 bucket
#'
#' @export
#' @param bucket (character) bucket name. required
#' @param ... named parameters passed on to [delete_bucket](https://www.paws-r-sdk.com/docs/s3_delete_bucket/)
#' @note Requires the env var `AWS_REGION`. This function prompts you to make
#' sure that you want to delete the bucket.
#' @return an empty list
#' @examples \dontrun{
#' aws_bucket_create(bucket="bucket-to-delete-111")
#' aws_buckets()
#' aws_bucket_delete(bucket="bucket-to-delete-111")
#' aws_buckets()
#' }
aws_bucket_delete <- function(bucket, ...) {
# TODO: add a package level option to override the prompt for adv. users
if (yesno("Are you sure you want to delete {.strong {bucket}}?")) {
return(invisible())
}
env64$s3$delete_bucket(Bucket = bucket, ...)
}

#' Download an S3 bucket
#'
#' @export
#' @param bucket (character) bucket name. required
#' @param dest_path (character) destination directory to store files. required
#' @param ... named parameters passed on to [s3fs::s3_dir_download()]
#' @note Requires the env var `AWS_REGION`. This function prompts you to make
#' sure that you want to delete the bucket.
#' @examples \dontrun{
#' aws_bucket_create(bucket="tmp-bucket-369")
#' desc_file <- file.path(system.file(), "DESCRIPTION")
#' aws_file_upload(bucket = "tmp-bucket-369", path = desc_file)
#' aws_file_upload(bucket = "tmp-bucket-369", path = desc_file, key = "d_file")
#' temp_dir <- file.path(tempdir(), "tmp-bucket-369")
#' aws_bucket_download(bucket="tmp-bucket-369", dest_path=temp_dir)
#'
#' # cleanup
#' aws_bucket_delete("tmp-bucket-369")
#' }
aws_bucket_download <- function(bucket, dest_path, ...) {
s3fs::s3_dir_download(path = bucket, new_path = dest_path, ...)
}

#' Upload a folder of files to create an S3 bucket
#'
#' @export
#' @importFrom fs fs_bytes
#' @param path (character) local path to a directory. required
#' @param bucket (character) bucket name. required
#' @param max_batch (fs_bytes) maximum batch size being uploaded with each
#' multipart
#' @param ... named parameters passed on to [s3fs::s3_dir_upload()]
#' @note Requires the env var `AWS_REGION`. This function prompts you to make
#' sure that you want to delete the bucket.
#' @examples \dontrun{
#' library(fs)
#' tdir <- path(tempdir(), "apples")
#' dir.create(tdir)
#' tfiles <- replicate(n=10, file_temp(tmp_dir = tdir, ext=".txt"))
#' invisible(lapply(tfiles, function(x) write.csv(mtcars, x)))
#'
#' aws_bucket_upload(path=tdir, bucket="a-new-bucket-345")
#' aws_bucket_list_objects("a-new-bucket-345")
#'
#' # cleanup
#' objs <- aws_bucket_list_objects("a-new-bucket-345")
#' aws_file_delete(objs$uri)
#' aws_bucket_delete("a-new-bucket-345")
#' }
aws_bucket_upload <- function(path, bucket, max_batch = fs::fs_bytes("100MB"),
...) {

if (!aws_bucket_exists(bucket)) {
if (yesno("{.strong {bucket}} does not exist. Create it?")) {
cli::cli_inform("Exiting without uploading {.strong {basename(path)}}")
return(invisible())
}
aws_bucket_create(bucket)
}
s3fs::s3_dir_upload(path = path, new_path = bucket,
max_batch = max_batch)
}

#' List objects in an S3 bucket
#'
#' @export
#' @importFrom s3fs s3_dir_info
#' @param bucket (character) bucket name. required
#' @param ... named parameters passed on to [list_objects](https://www.paws-r-sdk.com/docs/s3_list_objects/)
#' @param ... named parameters passed on to [s3fs::s3_dir_info()]
#' @return if no objects found, an empty tibble. if tibble has rows each
#' is an S3 bucket, with 8 columns:
#' * bucket_name (character)
#' * key (character)
#' * uri (character)
#' * size (fs::bytes)
#' * type (character)
#' * owner (character)
#' * etag (character)
#' * last_modified (dttm)
#' @examples \dontrun{
#' aws_bucket_list_objects(bucket="s64-test-2")
#' }
aws_bucket_list_objects <- function(bucket, ...) {
env64$s3$list_objects(Bucket = bucket, ...)
out <- s3fs::s3_dir_info(bucket, ...)
if (is.data.frame(out) && NROW(out) > 0) {
as_tibble(out)
} else {
tibble()
}
}

#' List S3 buckets
#'
#' @export
#' @param ... named parameters passed on to [list_buckets](https://www.paws-r-sdk.com/docs/s3_list_buckets/)
#' @return tibble with zero or more rows (each an S3 bucket), with two columns:
#' * Name (character)
#' * CreationDate (dttm)
#' @autoglobal
#' @importFrom s3fs s3_dir_info
#' @inherit aws_bucket_list_objects
#' @note we set `refresh=TRUE` internally to make sure we return up to date
#' information about your buckets rather than what's cached locally
#' @examples \dontrun{
#' aws_buckets()
#' }
aws_buckets <- function(...) {
env64$s3$list_buckets(...) %>% .$Buckets %>% map(., as_tibble) %>% list_rbind()
out <- s3fs::s3_dir_info(refresh = TRUE, ...)
if (is.data.frame(out) && NROW(out) > 0) {
as_tibble(out)
} else {
tibble()
}
}

#' Print a tree of the objects in a bucket
#'
#' @export
#' @importFrom s3fs s3_dir_tree
#' @inheritParams aws_bucket_exists
#' @param recurse (logical): Returns all AWS S3 objects in lower sub directories
#' @param ... Additional arguments passed to [s3fs::s3_dir_ls()]
#' @return character vector of objects/files within the bucket,
#' printed as a tree
#' @examples \dontrun{
#' aws_bucket_tree("s3://s64-test-2")
#' }
aws_bucket_tree <- function(bucket, recurse = TRUE, ...) {
s3fs::s3_dir_tree(bucket, recurse = recurse, ...)
}
144 changes: 97 additions & 47 deletions R/files.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,84 +2,134 @@
#'
#' @export
#' @importFrom fs file_exists
#' @param bucket (character) an S3 bucket. required
#' @param path (character) a file path to read from or write to. required
#' @param key (character) a key for an object in an S3 `bucket`. required
#' @param ... named parameters passed on to [put_object](https://www.paws-r-sdk.com/docs/s3_put_object/)
#' @details Wraps [put_object](https://www.paws-r-sdk.com/docs/s3_put_object/)
#' @return a tibble with two columns and many rows
#' @details `bucket` parameter:
#' @importFrom s3fs s3_file_copy
#' @importFrom cli cli_inform
#' @param path (character) a file path to read from. required
#' @param remote_path (character) a remote path where the file
#' should go. required
#' @param ... named parameters passed on to [s3fs::s3_file_copy()]
#' @return (character) a vector of remote s3 paths
#' @details
#' - For upload: if it does exist it will be created
#' - For download: if it does not exist, function will return an error
#' @examples \dontrun{
#' desc_file <- file.path(system.file(), "DESCRIPTION")
#' aws_file_upload(bucket = "s64-test-2", path = desc_file)
#' demo_rds_file <- file.path(system.file(), "Meta/demo.rds")
#' aws_file_upload(demo_rds_file, s3_path("s64-test-2", basename(demo_rds_file)))
#'
#' # supply a different key
#' aws_file_upload(bucket = "s64-test-2", path = desc_file, key = "d_file")
#' ## many files at once
#' links_rds_file <- file.path(system.file(), "Meta/links.rds")
#' aws_file_upload(
#' c(demo_rds_file, links_rds_file),
#' s3_path("s64-test-2", c(basename(demo_rds_file), basename(links_rds_file))))
#'
#' # set expiration, expire 1 minute from now
#' aws_file_upload(bucket = "s64-test-2", path = desc_file, key = "ddd",
#' Expires = Sys.time() + 60)
#' aws_file_upload(demo_rds_file, s3_path("s64-test-2", "ddd.rds"), Expires = Sys.time() + 60)
#'
#' # bucket doesn't exist
#' aws_file_upload(bucket = "not-a-bucket", path = desc_file)
#' # path doesn't exist
#' aws_file_upload(bucket = "s64-test-2", path = "file_doesnt_exist.txt")
#' aws_file_upload(demo_rds_file, "s3://not-a-bucket/eee.rds")
#' }
aws_file_upload <- function(bucket, path, key = basename(path), ...) {
#'
#' @examplesIf interactive()
#' # path doesn't exist
#' aws_file_upload("file_doesnt_exist.txt", s3_path("s64-test-2", "file_doesnt_exist.txt"))
aws_file_upload <- function(path, remote_path, ...) {
stopifnot(fs::file_exists(path))
if (!aws_bucket_exists(bucket)) aws_bucket_create(bucket)
env64$s3$put_object(Body = path, Bucket = bucket, Key = key, ...) %>%
tibble_transpose()
bucket <- path_s3_parser(remote_path)[[1]]$bucket
if (!aws_bucket_exists(bucket)) {
if (yesno("{.strong {bucket}} does not exist. Create it?")) {
cli::cli_inform("Exiting without uploading {.strong {basename(path)}}")
return(invisible())
}
aws_bucket_create(bucket)
}
s3fs::s3_file_copy(path, remote_path, ...)
}

#' Download a file
#'
#' @export
#' @inheritParams aws_file_upload
#' @param ... named parameters passed on to [download_file](https://www.paws-r-sdk.com/docs/s3_download_file/)
#' @details Wraps [download_file](https://www.paws-r-sdk.com/docs/s3_download_file/)
#' @return Path of downloaded file
#' @param remote_path (character) one or more remote S3 paths. required
#' @param path (character) one or more file paths to write to. required
#' @param ... named parameters passed on to [s3fs::s3_file_download()]
#' @return (character) a vector of local file paths
#' @note USES A FORK OF s3fs FOR A MINOR FIX THAT MAKES LENGTH>1 INPUTS WORK
#' @examples \dontrun{
#' tfile <- tempfile()
#' aws_file_download(remote_path="s3://s64-test-2/DESCRIPTION", path=tfile)
#'
#' # many files
#' tfiles <- replicate(n=3, tempfile())
#' aws_file_download(remote_path=s3_path("s64-test-2", c("a_file", "c_file", "d_file")), path=tfiles)
#'
#' ## length of `remote_path` and `path` must be the same
#' tfiles <- replicate(n=2, tempfile())
#' aws_file_download(remote_path=s3_path("s64-test-2", c("a_file", "c_file", "d_file")), path=tfiles)
#'
#' # S3 file does not exist
#' temp_path <- tempfile()
#' aws_file_download(bucket = "s64-test-2", key = "DESCRIPTION",
#' path = temp_path)
#' aws_file_download(s3_path("s64-test-2", "TESTING123"), temp_path)
#' }
aws_file_download <- function(remote_path, path, ...) {
# FIXME: s3fs is not checking that length(remote_path) == length(path)
stopifnot(length(remote_path) == length(path))
s3fs::s3_file_download(remote_path, path, ...)
}

#' Delete a file
#'
#' @export
#' @importFrom s3fs s3_file_delete
#' @param remote_path (character) one or more remote S3 paths. required
#' @param ... named parameters passed on to [s3fs::s3_file_delete()]
#' @return (character) a vector of remote file paths
#' @examples \dontrun{
#' # create a file
#' tfile <- tempfile()
#' cat("Hello World!", file = tfile)
#' aws_file_upload(remote_path="s3://s64-test-2", path=tfile)
#'
#' # delete the file
#' aws_file_delete(s3_path("s64-test-2", basename(tfile)))
#'
#' # S3 key doesn't exist
#' aws_file_download(bucket = "s64-test-2", key = "TESTING123",
#' path = temp_path)
#' # file does not exist - no error is raised
#' aws_file_delete(s3_path("s64-test-2", "TESTING123"))
#' }
aws_file_download <- function(bucket, key, path, ...) {
env64$s3$download_file(Bucket = bucket, Key = key, Filename = path, ...)
return(path)
aws_file_delete <- function(remote_path, ...) {
s3fs::s3_file_delete(remote_path, ...)
}

#' File attributes
#'
#' @export
#' @inheritParams aws_file_upload
#' @param ... named parameters passed on to [head_object](https://www.paws-r-sdk.com/docs/s3_head_object/)
#' @return `list` of length 0
#' @inheritParams aws_file_download
#' @return a tibble with many columns, with number of rows matching length
#' of `remote_path`
#' @note uses [s3fs::s3_file_info()] internally
#' @examples \dontrun{
#' aws_file_attr(bucket = "s64-test-2", key = "DESCRIPTION")
#' aws_file_attr(bucket = "s64-test-2", key = "ddd")
#' aws_file_attr(bucket = "s64-test-2", key = "doesntexist")
#' # files one by one
#' aws_file_attr(s3_path("s64-test-2", "DESCRIPTION"))
#' aws_file_attr(s3_path("s64-test-2", "ddd"))
#' aws_file_attr(s3_path("s64-test-2", "doesntexist"))
#' # or all together
#' aws_file_attr(s3_path("s64-test-2", c("DESCRIPTION", "ddd")))
#' }
aws_file_attr <- function(bucket, key, ...) {
env64$s3$head_object(Bucket = bucket, Key = key, ...)
aws_file_attr <- function(remote_path) {
# TODO: error behavior isn't ideal b/c the error message doesn't indicate
# which file does not exist
s3fs::s3_file_info(remote_path) %>% as_tibble()
}

#' Check if a file exists
#'
#' @export
#' @inheritParams aws_file_upload
#' @return TRUE or FALSE
#' @inheritParams aws_file_attr
#' @return vector of booleans (`TRUE` or `FALSE`), length matches
#' `length(remote_path)`
#' @examples \dontrun{
#' aws_file_exists(bucket = "s64-test-2", key = "DESCRIPTION")
#' aws_file_exists(bucket = "s64-test-2", key = "doesntexist")
#' aws_file_exists(s3_path("s64-test-2", "DESCRIPTION"))
#' aws_file_exists(s3_path("s64-test-2", "doesntexist"))
#' aws_file_exists(s3_path("s64-test-2", c("DESCRIPTION", "doesntexist")))
#' }
aws_file_exists <- function(bucket, key, ...) {
res <- paws_handlr(aws_file_attr(bucket, key, ...))
!inherits(res, c("error", "error_response"))
aws_file_exists <- function(remote_path) {
s3fs::s3_file_exists(remote_path)
}
Loading

0 comments on commit fba7400

Please sign in to comment.