-
-
Notifications
You must be signed in to change notification settings - Fork 5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Consider utility function to provide summaries of multiple variables #52
Comments
Another approach.. has to be an easier way
library(sfdep)
library(tidyverse)
sums_with_I <-
guerry |>
select(code_dept, region, department, starts_with('crime')) |>
mutate(
nb = st_contiguity(geometry),
wt = st_weights(nb)
) |>
sf::st_drop_geometry() |>
summarise(across(where(is.numeric)
, .names = "{.col}__{.fn}" # puts a second underscore
between variable name and the function (for easy splitting later)
, list( mean = mean
, sd = sd
, I = ~broom::tidy(global_moran_perm(.x, nb,
wt)))))
# what I like about the approach above is that:
# - it is easy to swap mean and sd to median and IQR, min, max, range..
etc..
# - the syntax builds logically on what the across vignette sets up.
# what I don't like
# - the output is wiiiide: variable, variable_function_1,
variable_function_2, next_variable, next_variable_function_1 and more
# here's how we might pry out and reshape
# but first, make a Not IN function and a list of items to exclude
`%nin%` <- Negate(`%in%`) # Trent Reznor function
exclude <- c('I.parameter', 'I.method', 'I.alternative')
sums_with_I # yuuck: too wide, gross column names with `$` embedded
sums_with_I |>
t() |>
data.frame() |>
rownames_to_column() |>
tibble() |>
separate(rowname, into = c('variable', 'statistic'), sep = '__') |> #
double __ makes sep easy while allowing column names with "_"
filter(statistic %nin% exclude) |>
pivot_wider(id_cols = variable, names_from = statistic, values_from =
t.sums_with_I.) |>
readr::type_convert()
# handsome output, but not a fun process
…On Wed, Mar 27, 2024 at 12:09 PM Josiah Parry ***@***.***> wrote:
Semi-working idea
library(sfdep)
library(tidyverse)
guerry |>
mutate(
nb = st_contiguity(geometry),
wt = st_weights(nb)
) |>
sf::st_drop_geometry() |>
summarise(
avg = mean(crime_pers),
std_dev = sd(crime_pers),
I = list(broom::tidy(global_moran_perm(crimea_pers, nb, wt)))
) |>
tidyr::unnest_wider(I) |>
knitr::kable()
summarise_var <- function(x, nb, wt) {
tibble::tibble(
avg = mean(x),
std_dev = sd(x),
I = list(broom::tidy(global_moran_perm(x, nb, wt)))
) |>
tidyr::unnest(I)
}
summarize_all_vars <- function(x, nb, wt) {
lapply(x, summarise_var, nb, wt) |>
collapse::rowbind()
}
—
Reply to this email directly, view it on GitHub
<#52>, or unsubscribe
<https://github.com/notifications/unsubscribe-auth/AD23UBQ5HLKXOSY3RHAV4A3Y2LVL5AVCNFSM6AAAAABFLFXFYGVHI2DSMVQWIX3LMV43ASLTON2WKOZSGIYTCMRQGY3DQOA>
.
You are receiving this because you are subscribed to this thread.Message
ID: ***@***.***>
|
Maybe a more sophisticated pivot_longer fixes the second piece?
https://stackoverflow.com/questions/70700654/pivot-longer-with-names-pattern-and-pairs-of-columns
…-Dexter
On Wed, Mar 27, 2024 at 3:22 PM Dexter Locke ***@***.***> wrote:
Another approach.. has to be an easier way
library(sfdep)
library(tidyverse)
sums_with_I <-
guerry |>
select(code_dept, region, department, starts_with('crime')) |>
mutate(
nb = st_contiguity(geometry),
wt = st_weights(nb)
) |>
sf::st_drop_geometry() |>
summarise(across(where(is.numeric)
, .names = "{.col}__{.fn}" # puts a second underscore
between variable name and the function (for easy splitting later)
, list( mean = mean
, sd = sd
, I = ~broom::tidy(global_moran_perm(.x, nb,
wt)))))
# what I like about the approach above is that:
# - it is easy to swap mean and sd to median and IQR, min, max, range..
etc..
# - the syntax builds logically on what the across vignette sets up.
# what I don't like
# - the output is wiiiide: variable, variable_function_1,
variable_function_2, next_variable, next_variable_function_1 and more
# here's how we might pry out and reshape
# but first, make a Not IN function and a list of items to exclude
`%nin%` <- Negate(`%in%`) # Trent Reznor function
exclude <- c('I.parameter', 'I.method', 'I.alternative')
sums_with_I # yuuck: too wide, gross column names with `$` embedded
sums_with_I |>
t() |>
data.frame() |>
rownames_to_column() |>
tibble() |>
separate(rowname, into = c('variable', 'statistic'), sep = '__') |> #
double __ makes sep easy while allowing column names with "_"
filter(statistic %nin% exclude) |>
pivot_wider(id_cols = variable, names_from = statistic, values_from =
t.sums_with_I.) |>
readr::type_convert()
# handsome output, but not a fun process
On Wed, Mar 27, 2024 at 12:09 PM Josiah Parry ***@***.***>
wrote:
> Semi-working idea
>
> library(sfdep)
> library(tidyverse)
> guerry |>
> mutate(
> nb = st_contiguity(geometry),
> wt = st_weights(nb)
> ) |>
> sf::st_drop_geometry() |>
> summarise(
> avg = mean(crime_pers),
> std_dev = sd(crime_pers),
> I = list(broom::tidy(global_moran_perm(crimea_pers, nb, wt)))
> ) |>
> tidyr::unnest_wider(I) |>
> knitr::kable()
> summarise_var <- function(x, nb, wt) {
> tibble::tibble(
> avg = mean(x),
> std_dev = sd(x),
> I = list(broom::tidy(global_moran_perm(x, nb, wt)))
> ) |>
> tidyr::unnest(I)
> }
> summarize_all_vars <- function(x, nb, wt) {
> lapply(x, summarise_var, nb, wt) |>
> collapse::rowbind()
> }
>
> —
> Reply to this email directly, view it on GitHub
> <#52>, or unsubscribe
> <https://github.com/notifications/unsubscribe-auth/AD23UBQ5HLKXOSY3RHAV4A3Y2LVL5AVCNFSM6AAAAABFLFXFYGVHI2DSMVQWIX3LMV43ASLTON2WKOZSGIYTCMRQGY3DQOA>
> .
> You are receiving this because you are subscribed to this thread.Message
> ID: ***@***.***>
>
|
Here's another abomination
guerry |>
select(code_dept, region, department, starts_with('crime')) |>
mutate(
nb = st_contiguity(geometry),
wt = st_weights(nb)
) |>
sf::st_drop_geometry() |>
summarise(across(where(is.numeric)
, .names = "{.col}__{.fn}" # puts a second underscore
between variable name and the function (for easy splitting later)
, list(
mean = mean
, sd = sd
, I = ~broom::tidy(global_moran_perm(.x, nb, wt)))))
|>
tidylog::mutate_if(is.tibble, ~select(., `__Moran's I` = statistic, `__p`
= p.value)) |>
unnest() |>
tibble::rownames_to_column() |>
pivot_longer(-rowname) |>
select(-rowname) |>
separate(name, sep = '__', into = c('var', 'stat'), convert = TRUE) |>
mutate(var = ifelse(var == '', NA, var),
stat = str_remove(stat, '[0-9]')) |>
tidylog::fill(var, .direction = 'down') |>
pivot_wider(id_cols = 'var', names_from = 'stat', values_from = 'value')
But it works?
…On Wed, Mar 27, 2024 at 3:43 PM Dexter Locke ***@***.***> wrote:
Maybe a more sophisticated pivot_longer fixes the second piece?
https://stackoverflow.com/questions/70700654/pivot-longer-with-names-pattern-and-pairs-of-columns
-Dexter
On Wed, Mar 27, 2024 at 3:22 PM Dexter Locke ***@***.***>
wrote:
> Another approach.. has to be an easier way
>
> library(sfdep)
> library(tidyverse)
>
> sums_with_I <-
> guerry |>
> select(code_dept, region, department, starts_with('crime')) |>
> mutate(
> nb = st_contiguity(geometry),
> wt = st_weights(nb)
> ) |>
> sf::st_drop_geometry() |>
> summarise(across(where(is.numeric)
> , .names = "{.col}__{.fn}" # puts a second underscore
> between variable name and the function (for easy splitting later)
> , list( mean = mean
> , sd = sd
> , I = ~broom::tidy(global_moran_perm(.x, nb,
> wt)))))
>
> # what I like about the approach above is that:
> # - it is easy to swap mean and sd to median and IQR, min, max, range..
> etc..
> # - the syntax builds logically on what the across vignette sets up.
>
> # what I don't like
> # - the output is wiiiide: variable, variable_function_1,
> variable_function_2, next_variable, next_variable_function_1 and more
>
> # here's how we might pry out and reshape
> # but first, make a Not IN function and a list of items to exclude
> `%nin%` <- Negate(`%in%`) # Trent Reznor function
> exclude <- c('I.parameter', 'I.method', 'I.alternative')
>
> sums_with_I # yuuck: too wide, gross column names with `$` embedded
>
> sums_with_I |>
> t() |>
> data.frame() |>
> rownames_to_column() |>
> tibble() |>
> separate(rowname, into = c('variable', 'statistic'), sep = '__') |> #
> double __ makes sep easy while allowing column names with "_"
> filter(statistic %nin% exclude) |>
> pivot_wider(id_cols = variable, names_from = statistic, values_from =
> t.sums_with_I.) |>
> readr::type_convert()
>
> # handsome output, but not a fun process
>
>
> On Wed, Mar 27, 2024 at 12:09 PM Josiah Parry ***@***.***>
> wrote:
>
>> Semi-working idea
>>
>> library(sfdep)
>> library(tidyverse)
>> guerry |>
>> mutate(
>> nb = st_contiguity(geometry),
>> wt = st_weights(nb)
>> ) |>
>> sf::st_drop_geometry() |>
>> summarise(
>> avg = mean(crime_pers),
>> std_dev = sd(crime_pers),
>> I = list(broom::tidy(global_moran_perm(crimea_pers, nb, wt)))
>> ) |>
>> tidyr::unnest_wider(I) |>
>> knitr::kable()
>> summarise_var <- function(x, nb, wt) {
>> tibble::tibble(
>> avg = mean(x),
>> std_dev = sd(x),
>> I = list(broom::tidy(global_moran_perm(x, nb, wt)))
>> ) |>
>> tidyr::unnest(I)
>> }
>> summarize_all_vars <- function(x, nb, wt) {
>> lapply(x, summarise_var, nb, wt) |>
>> collapse::rowbind()
>> }
>>
>> —
>> Reply to this email directly, view it on GitHub
>> <#52>, or unsubscribe
>> <https://github.com/notifications/unsubscribe-auth/AD23UBQ5HLKXOSY3RHAV4A3Y2LVL5AVCNFSM6AAAAABFLFXFYGVHI2DSMVQWIX3LMV43ASLTON2WKOZSGIYTCMRQGY3DQOA>
>> .
>> You are receiving this because you are subscribed to this thread.Message
>> ID: ***@***.***>
>>
>
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Semi-working idea
The text was updated successfully, but these errors were encountered: