Consider utility function to provide summaries of multiple variables #52

JosiahParry · 2024-03-27T16:09:13Z

Semi-working idea

library(sfdep)
library(tidyverse)

guerry |> 
  mutate(
    nb = st_contiguity(geometry),
    wt = st_weights(nb)
  ) |> 
  sf::st_drop_geometry() |> 
  summarise(
    avg = mean(crime_pers),
    std_dev = sd(crime_pers),
    I = list(broom::tidy(global_moran_perm(crimea_pers, nb, wt)))
  ) |> 
  tidyr::unnest_wider(I) |> 
  knitr::kable()

summarise_var <- function(x, nb, wt) {
  tibble::tibble(
    avg = mean(x),
    std_dev = sd(x),
    I = list(broom::tidy(global_moran_perm(x, nb, wt)))
  ) |> 
    tidyr::unnest(I)
}

summarize_all_vars <- function(x, nb, wt) {
  lapply(x, summarise_var, nb, wt) |> 
    collapse::rowbind()
}

DHLocke · 2024-03-27T19:22:44Z

Another approach.. has to be an easier way library(sfdep) library(tidyverse) sums_with_I <- guerry |> select(code_dept, region, department, starts_with('crime')) |> mutate( nb = st_contiguity(geometry), wt = st_weights(nb) ) |> sf::st_drop_geometry() |> summarise(across(where(is.numeric) , .names = "{.col}__{.fn}" # puts a second underscore between variable name and the function (for easy splitting later) , list( mean = mean , sd = sd , I = ~broom::tidy(global_moran_perm(.x, nb, wt))))) # what I like about the approach above is that: # - it is easy to swap mean and sd to median and IQR, min, max, range.. etc.. # - the syntax builds logically on what the across vignette sets up. # what I don't like # - the output is wiiiide: variable, variable_function_1, variable_function_2, next_variable, next_variable_function_1 and more # here's how we might pry out and reshape # but first, make a Not IN function and a list of items to exclude `%nin%` <- Negate(`%in%`) # Trent Reznor function exclude <- c('I.parameter', 'I.method', 'I.alternative') sums_with_I # yuuck: too wide, gross column names with `$` embedded sums_with_I |> t() |> data.frame() |> rownames_to_column() |> tibble() |> separate(rowname, into = c('variable', 'statistic'), sep = '__') |> # double __ makes sep easy while allowing column names with "_" filter(statistic %nin% exclude) |> pivot_wider(id_cols = variable, names_from = statistic, values_from = t.sums_with_I.) |> readr::type_convert() # handsome output, but not a fun process

…

On Wed, Mar 27, 2024 at 12:09 PM Josiah Parry ***@***.***> wrote: Semi-working idea library(sfdep) library(tidyverse) guerry |> mutate( nb = st_contiguity(geometry), wt = st_weights(nb) ) |> sf::st_drop_geometry() |> summarise( avg = mean(crime_pers), std_dev = sd(crime_pers), I = list(broom::tidy(global_moran_perm(crimea_pers, nb, wt))) ) |> tidyr::unnest_wider(I) |> knitr::kable() summarise_var <- function(x, nb, wt) { tibble::tibble( avg = mean(x), std_dev = sd(x), I = list(broom::tidy(global_moran_perm(x, nb, wt))) ) |> tidyr::unnest(I) } summarize_all_vars <- function(x, nb, wt) { lapply(x, summarise_var, nb, wt) |> collapse::rowbind() } — Reply to this email directly, view it on GitHub <#52>, or unsubscribe <https://github.com/notifications/unsubscribe-auth/AD23UBQ5HLKXOSY3RHAV4A3Y2LVL5AVCNFSM6AAAAABFLFXFYGVHI2DSMVQWIX3LMV43ASLTON2WKOZSGIYTCMRQGY3DQOA> . You are receiving this because you are subscribed to this thread.Message ID: ***@***.***>

DHLocke · 2024-03-27T19:43:45Z

Maybe a more sophisticated pivot_longer fixes the second piece? https://stackoverflow.com/questions/70700654/pivot-longer-with-names-pattern-and-pairs-of-columns

…

-Dexter

On Wed, Mar 27, 2024 at 3:22 PM Dexter Locke ***@***.***> wrote: Another approach.. has to be an easier way library(sfdep) library(tidyverse) sums_with_I <- guerry |> select(code_dept, region, department, starts_with('crime')) |> mutate( nb = st_contiguity(geometry), wt = st_weights(nb) ) |> sf::st_drop_geometry() |> summarise(across(where(is.numeric) , .names = "{.col}__{.fn}" # puts a second underscore between variable name and the function (for easy splitting later) , list( mean = mean , sd = sd , I = ~broom::tidy(global_moran_perm(.x, nb, wt))))) # what I like about the approach above is that: # - it is easy to swap mean and sd to median and IQR, min, max, range.. etc.. # - the syntax builds logically on what the across vignette sets up. # what I don't like # - the output is wiiiide: variable, variable_function_1, variable_function_2, next_variable, next_variable_function_1 and more # here's how we might pry out and reshape # but first, make a Not IN function and a list of items to exclude `%nin%` <- Negate(`%in%`) # Trent Reznor function exclude <- c('I.parameter', 'I.method', 'I.alternative') sums_with_I # yuuck: too wide, gross column names with `$` embedded sums_with_I |> t() |> data.frame() |> rownames_to_column() |> tibble() |> separate(rowname, into = c('variable', 'statistic'), sep = '__') |> # double __ makes sep easy while allowing column names with "_" filter(statistic %nin% exclude) |> pivot_wider(id_cols = variable, names_from = statistic, values_from = t.sums_with_I.) |> readr::type_convert() # handsome output, but not a fun process On Wed, Mar 27, 2024 at 12:09 PM Josiah Parry ***@***.***> wrote: > Semi-working idea > > library(sfdep) > library(tidyverse) > guerry |> > mutate( > nb = st_contiguity(geometry), > wt = st_weights(nb) > ) |> > sf::st_drop_geometry() |> > summarise( > avg = mean(crime_pers), > std_dev = sd(crime_pers), > I = list(broom::tidy(global_moran_perm(crimea_pers, nb, wt))) > ) |> > tidyr::unnest_wider(I) |> > knitr::kable() > summarise_var <- function(x, nb, wt) { > tibble::tibble( > avg = mean(x), > std_dev = sd(x), > I = list(broom::tidy(global_moran_perm(x, nb, wt))) > ) |> > tidyr::unnest(I) > } > summarize_all_vars <- function(x, nb, wt) { > lapply(x, summarise_var, nb, wt) |> > collapse::rowbind() > } > > — > Reply to this email directly, view it on GitHub > <#52>, or unsubscribe > <https://github.com/notifications/unsubscribe-auth/AD23UBQ5HLKXOSY3RHAV4A3Y2LVL5AVCNFSM6AAAAABFLFXFYGVHI2DSMVQWIX3LMV43ASLTON2WKOZSGIYTCMRQGY3DQOA> > . > You are receiving this because you are subscribed to this thread.Message > ID: ***@***.***> >

DHLocke · 2024-04-18T16:05:01Z

Here's another abomination guerry |> select(code_dept, region, department, starts_with('crime')) |> mutate( nb = st_contiguity(geometry), wt = st_weights(nb) ) |> sf::st_drop_geometry() |> summarise(across(where(is.numeric) , .names = "{.col}__{.fn}" # puts a second underscore between variable name and the function (for easy splitting later) , list( mean = mean , sd = sd , I = ~broom::tidy(global_moran_perm(.x, nb, wt))))) |> tidylog::mutate_if(is.tibble, ~select(., `__Moran's I` = statistic, `__p` = p.value)) |> unnest() |> tibble::rownames_to_column() |> pivot_longer(-rowname) |> select(-rowname) |> separate(name, sep = '__', into = c('var', 'stat'), convert = TRUE) |> mutate(var = ifelse(var == '', NA, var), stat = str_remove(stat, '[0-9]')) |> tidylog::fill(var, .direction = 'down') |> pivot_wider(id_cols = 'var', names_from = 'stat', values_from = 'value') But it works?

…

On Wed, Mar 27, 2024 at 3:43 PM Dexter Locke ***@***.***> wrote: Maybe a more sophisticated pivot_longer fixes the second piece? https://stackoverflow.com/questions/70700654/pivot-longer-with-names-pattern-and-pairs-of-columns -Dexter On Wed, Mar 27, 2024 at 3:22 PM Dexter Locke ***@***.***> wrote: > Another approach.. has to be an easier way > > library(sfdep) > library(tidyverse) > > sums_with_I <- > guerry |> > select(code_dept, region, department, starts_with('crime')) |> > mutate( > nb = st_contiguity(geometry), > wt = st_weights(nb) > ) |> > sf::st_drop_geometry() |> > summarise(across(where(is.numeric) > , .names = "{.col}__{.fn}" # puts a second underscore > between variable name and the function (for easy splitting later) > , list( mean = mean > , sd = sd > , I = ~broom::tidy(global_moran_perm(.x, nb, > wt))))) > > # what I like about the approach above is that: > # - it is easy to swap mean and sd to median and IQR, min, max, range.. > etc.. > # - the syntax builds logically on what the across vignette sets up. > > # what I don't like > # - the output is wiiiide: variable, variable_function_1, > variable_function_2, next_variable, next_variable_function_1 and more > > # here's how we might pry out and reshape > # but first, make a Not IN function and a list of items to exclude > `%nin%` <- Negate(`%in%`) # Trent Reznor function > exclude <- c('I.parameter', 'I.method', 'I.alternative') > > sums_with_I # yuuck: too wide, gross column names with `$` embedded > > sums_with_I |> > t() |> > data.frame() |> > rownames_to_column() |> > tibble() |> > separate(rowname, into = c('variable', 'statistic'), sep = '__') |> # > double __ makes sep easy while allowing column names with "_" > filter(statistic %nin% exclude) |> > pivot_wider(id_cols = variable, names_from = statistic, values_from = > t.sums_with_I.) |> > readr::type_convert() > > # handsome output, but not a fun process > > > On Wed, Mar 27, 2024 at 12:09 PM Josiah Parry ***@***.***> > wrote: > >> Semi-working idea >> >> library(sfdep) >> library(tidyverse) >> guerry |> >> mutate( >> nb = st_contiguity(geometry), >> wt = st_weights(nb) >> ) |> >> sf::st_drop_geometry() |> >> summarise( >> avg = mean(crime_pers), >> std_dev = sd(crime_pers), >> I = list(broom::tidy(global_moran_perm(crimea_pers, nb, wt))) >> ) |> >> tidyr::unnest_wider(I) |> >> knitr::kable() >> summarise_var <- function(x, nb, wt) { >> tibble::tibble( >> avg = mean(x), >> std_dev = sd(x), >> I = list(broom::tidy(global_moran_perm(x, nb, wt))) >> ) |> >> tidyr::unnest(I) >> } >> summarize_all_vars <- function(x, nb, wt) { >> lapply(x, summarise_var, nb, wt) |> >> collapse::rowbind() >> } >> >> — >> Reply to this email directly, view it on GitHub >> <#52>, or unsubscribe >> <https://github.com/notifications/unsubscribe-auth/AD23UBQ5HLKXOSY3RHAV4A3Y2LVL5AVCNFSM6AAAAABFLFXFYGVHI2DSMVQWIX3LMV43ASLTON2WKOZSGIYTCMRQGY3DQOA> >> . >> You are receiving this because you are subscribed to this thread.Message >> ID: ***@***.***> >> >

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Consider utility function to provide summaries of multiple variables #52

Consider utility function to provide summaries of multiple variables #52

JosiahParry commented Mar 27, 2024

DHLocke commented Mar 27, 2024 via email

DHLocke commented Mar 27, 2024 via email

DHLocke commented Apr 18, 2024 via email

Consider utility function to provide summaries of multiple variables #52

Consider utility function to provide summaries of multiple variables #52

Comments

JosiahParry commented Mar 27, 2024

DHLocke commented Mar 27, 2024 via email

DHLocke commented Mar 27, 2024 via email

DHLocke commented Apr 18, 2024 via email