update summaries

AquaAuma · Jan 11, 2023 · ea56c56 · ea56c56
1 parent 0ac6ca0
commit ea56c56
Show file tree

Hide file tree

Showing 15 changed files with 1,445 additions and 1,384 deletions.
diff --git a/summary/scs.Rmd b/summary/scs.Rmd
diff --git a/summary/scs.log b/summary/scs.log
diff --git a/summary/scs.pdf b/summary/scs.pdf
diff --git a/summary/seus.Rmd b/summary/seus.Rmd
diff --git a/summary/seus.log b/summary/seus.log
diff --git a/summary/seus.pdf b/summary/seus.pdf
diff --git a/summary/swc-ibts.Rmd b/summary/swc-ibts.Rmd
diff --git a/summary/swc-ibts.log b/summary/swc-ibts.log
diff --git a/summary/swc-ibts.pdf b/summary/swc-ibts.pdf
diff --git a/summary/wcann.Rmd b/summary/wcann.Rmd
@@ -56,7 +56,7 @@ This document presents the cleaning code and summary of the West Coast US Annual
 
 ## Data cleaning in R
 
-```{r cleaning_code, code = readLines(here("./cleaning.codes/get.wcann.R")), eval = FALSE}
+```{r cleaning_code, code = readLines(here("./cleaning_codes/get_wcann.R")), eval = FALSE}
 
 ```
 
@@ -92,22 +92,22 @@ World_map <- rnaturalearth::ne_countries(scale = 'medium', returnclass = c("sf")
 ## 1. Overview of the survey data table
 
 ```{r head_survey, eval = T, echo = F}
-kable(survey[1:5,1:7], format = "latex", booktabs = T) %>%
+kable(survey[1:5,1:6], format = "latex", booktabs = T) %>%
 kable_styling(latex_options = c("striped","HOLD_position"))
 
-kable(survey[1:5,8:15], format = "latex", booktabs = T) %>%
+kable(survey[1:5,7:15], format = "latex", booktabs = T) %>%
 kable_styling(latex_options = c("striped","HOLD_position"))
 
-kable(survey[1:5,16:21], format = "latex", booktabs = T) %>%
+kable(survey[1:5,16:23], format = "latex", booktabs = T) %>%
 kable_styling(latex_options = c("striped","HOLD_position"))
 
-kable(survey[1:5,22:27], format = "latex", booktabs = T) %>%
+kable(survey[1:5,24:30], format = "latex", booktabs = T) %>%
 kable_styling(latex_options = c("striped","HOLD_position"))
 
-kable(survey[1:5,28:32], format = "latex", booktabs = T) %>%
+kable(survey[1:5,31:35], format = "latex", booktabs = T) %>%
 kable_styling(latex_options = c("striped","HOLD_position"))
 
-kable(survey[1:5,33:39], format = "latex", booktabs = T) %>%
+kable(survey[1:5,36:42], format = "latex", booktabs = T) %>%
 kable_styling(latex_options = c("striped","HOLD_position"))
 
 
@@ -184,18 +184,18 @@ var_plot
 
 Here we display the yearly total and average across hauls of the following variables recorded in the data:
 
-- *num_cpue*, number of individuals (abundance) in $\frac{individuals}{km^2}$
-- *num_h*, number of individuals (abundance) in $\frac{individuals}{h}$
+- *num_cpua*, number of individuals (abundance) in $\frac{individuals}{km^2}$
+- *num_cpue*, number of individuals (abundance) in $\frac{individuals}{h}$
 - *num*, number of individuals (abundance)
-- *wgt_cpue*, weight in $\frac{kg}{km^2}$
-- *wgt_h*, weight in $\frac{kg}{h}$
+- *wgt_cpua*, weight in $\frac{kg}{km^2}$
+- *wgt_cpue*, weight in $\frac{kg}{h}$
 - *wgt*, weight in ${kg}$
 
 ```{r summary_var_plot, eval = T, echo = F, message =  F,warning =  F}
 
 var_plot <-  survey %>%
   group_by(year) %>% 
-  summarise_at(vars(num:wgt_cpue),
+  summarise_at(vars(num:wgt_cpua),
                funs(sum,mean),na.rm=T) %>% 
   # head()
   gather("var","val",2:13) %>% 
@@ -234,15 +234,15 @@ var_plot
 
 Here we show a yearly total distribution of the biomass data to visualize outliers:
 
-- *wgt*, total weight in ${kg}$ per haul and year per haul and year, if available in the survey data
-- *num*, total number of individuals, if available in the survey data
+- *num_cpue*, number of individuals (abundance) in $\frac{individuals}{km^2}$
+- *wgt_cpue*, weight in $\frac{kg}{km^2}$
 
 ```{r extreme_biomass, eval = T, echo = F, message =  F,warning =  F}
 
-if(!is.na(mean(survey$num_cpue, na.rm=T)) & !is.na(mean(survey$wgt_cpue, na.rm=T))){
+if(!is.na(mean(survey$num_cpua, na.rm=T)) & !is.na(mean(survey$wgt_cpua, na.rm=T))){
   var_plot <-  survey %>%
   group_by(year, haul_id) %>% 
-  summarize(Weight = sum(wgt_cpue), Abundance = sum(num_cpue)) %>% 
+  summarize(Weight = sum(wgt_cpua), Abundance = sum(num_cpua)) %>% 
   gather("var","val",3:4) %>% 
   ggplot() +
   geom_boxplot(
@@ -259,10 +259,10 @@ if(!is.na(mean(survey$num_cpue, na.rm=T)) & !is.na(mean(survey$wgt_cpue, na.rm=T
   theme(axis.text.x = element_text(angle = 90))
 }
 
-if(!is.na(mean(survey$num_cpue, na.rm=T)) & is.na(mean(survey$wgt_cpue, na.rm=T))){
+if(!is.na(mean(survey$num_cpua, na.rm=T)) & is.na(mean(survey$wgt_cpua, na.rm=T))){
 var_plot <-  survey %>%
   group_by(year, haul_id) %>% 
-  summarize(Abundance = sum(num_cpue)) %>% 
+  summarize(Abundance = sum(num_cpua)) %>% 
   # head()
   ggplot() +
   geom_boxplot(
@@ -278,10 +278,10 @@ var_plot <-  survey %>%
   theme(axis.text.x = element_text(angle = 90))
 } 
 
-if(is.na(mean(survey$num_cpue, na.rm=T)) & !is.na(mean(survey$wgt_cpue, na.rm=T))){
+if(is.na(mean(survey$num_cpua, na.rm=T)) & !is.na(mean(survey$wgt_cpua, na.rm=T))){
 var_plot <-  survey %>%
   group_by(year, haul_id) %>% 
-  summarize(Weight = sum(wgt_cpue)) %>% 
+  summarize(Weight = sum(wgt_cpua)) %>% 
   # head()
   ggplot() +
   geom_boxplot(
@@ -297,7 +297,7 @@ var_plot <-  survey %>%
   theme(axis.text.x = element_text(angle = 90))
 } 
 var_plot
-rm(var_plot)
+
 ```
 
 
@@ -308,16 +308,17 @@ rm(var_plot)
 Here we show the total abundance and number of taxa relationships with the area swept:
 
 - *nbr_taxa*, number of marine fish taxa after taxonomic data cleaning
-- *num*, number of individuals, if available in the survey data
-- *wgt*, weight in ${kg}$, if available in the survey data
+- *num_cpua*, number of individuals (abundance) in $\frac{individuals}{km^2}$
+- *wgt_cpua*, weight in $\frac{kg}{km^2}$
+
 
 
 ```{r summary_var_swept, eval = T, echo = F, message =  F,warning =  F}
 
-if(!is.na(mean(survey$num, na.rm=T)) & !is.na(mean(survey$wgt, na.rm=T))){
+if(!is.na(mean(survey$num_cpua, na.rm=T)) & !is.na(mean(survey$wgt_cpua, na.rm=T))){
   var_plot <-  survey %>%
   group_by(haul_id, haul_dur, area_swept) %>%
-  summarize(Number_Taxa = length(accepted_name), Abundance = sum(num),Weight = sum(wgt)) %>% 
+  summarize(Number_Taxa = length(accepted_name), Abundance = sum(num_cpua),Weight = sum(wgt_cpua)) %>% 
   gather("var","val",4:6) %>% 
   # head()
   ggplot() +
@@ -327,10 +328,10 @@ if(!is.na(mean(survey$num, na.rm=T)) & !is.na(mean(survey$wgt, na.rm=T))){
   theme_bw()
 }
 
-if(!is.na(mean(survey$num, na.rm=T)) & is.na(mean(survey$wgt, na.rm=T))){
+if(!is.na(mean(survey$num_cpue, na.rm=T)) & mean(survey$wgt_cpue, na.rm=T)){
   var_plot <-  survey %>%
   group_by(haul_id, haul_dur, area_swept) %>%
-  summarize(Number_Taxa = length(accepted_name), Abundance = sum(num)) %>% 
+  summarize(Number_Taxa = length(accepted_name), Abundance = sum(num_cpue)) %>% 
   gather("var","val",4:5) %>% 
   # head()
   ggplot() +
@@ -340,10 +341,10 @@ if(!is.na(mean(survey$num, na.rm=T)) & is.na(mean(survey$wgt, na.rm=T))){
   theme_bw()
 }
 
-if(is.na(mean(survey$num, na.rm=T)) & !is.na(mean(survey$wgt, na.rm=T))){
+if(is.na(mean(survey$num_cpua, na.rm=T)) & !is.na(mean(survey$wgt_cpua, na.rm=T))){
   var_plot <-  survey %>%
   group_by(haul_id, haul_dur, area_swept) %>%
-  summarize(Number_Taxa = length(accepted_name), Weight = sum(wgt)) %>% 
+  summarize(Number_Taxa = length(accepted_name), Weight = sum(wgt_cpua)) %>% 
   gather("var","val",4:5) %>% 
   # head()
   ggplot() +
@@ -354,7 +355,6 @@ if(is.na(mean(survey$num, na.rm=T)) & !is.na(mean(survey$wgt, na.rm=T))){
 }
 
 var_plot
-
 ```
 
 \clearpage
@@ -363,10 +363,10 @@ var_plot
 
 ```{r abundant_spp, eval=T, echo=F, message=F, warning=F}
 
-if(!is.na(mean(survey$wgt_cpue, na.rm=T))){
+if(!is.na(mean(survey$num_cpua, na.rm=T))){
 spp <- survey %>% 
   group_by(year, accepted_name) %>% 
-  summarize(wgt = sum(wgt_cpue), nbr_years = length(year)) %>% 
+  summarize(wgt = sum(wgt_cpua), nbr_years = length(year)) %>% 
   filter(nbr_years>10) %>% 
   group_by(accepted_name) %>% 
   summarize(wgt = median(wgt)) %>% 
@@ -377,7 +377,7 @@ spp <- survey %>%
 spp_plot <- survey %>% 
   filter(accepted_name %in% spp) %>% 
   group_by(year, accepted_name) %>%
-  summarize(wgt = sum(wgt_cpue, na.rm=T)) %>% 
+  summarize(wgt = sum(wgt_cpua, na.rm=T)) %>% 
   ggplot() +
   geom_point( aes(x = year, y = wgt), size=0.5 ) +
   geom_line(aes(x = year,y = wgt), size=0.5) +
@@ -386,10 +386,10 @@ spp_plot <- survey %>%
   ylab("Species Weight (kg)") + xlab("Year")
 }
 
-if(is.na(mean(survey$wgt_cpue, na.rm=T))){
+if(is.na(mean(survey$wgt_cpua, na.rm=T))){
   spp <- survey %>% 
   group_by(year, accepted_name) %>% 
-  summarize(num = sum(num_cpue), nbr_years = length(year)) %>% 
+  summarize(num = sum(num_cpua), nbr_years = length(year)) %>% 
   filter(nbr_years>10) %>% 
   group_by(accepted_name) %>% 
   summarize(num = median(num)) %>% 
@@ -400,7 +400,7 @@ if(is.na(mean(survey$wgt_cpue, na.rm=T))){
 spp_plot <- survey %>% 
   filter(accepted_name %in% spp) %>% 
   group_by(year, accepted_name) %>%
-  summarize(num = sum(num_cpue, na.rm=T)) %>% 
+  summarize(num = sum(num_cpua, na.rm=T)) %>% 
   ggplot() +
   geom_point( aes(x = year, y = num), size=0.5 ) +
   geom_line(aes(x = year,y = num), size=0.5) +
@@ -410,7 +410,6 @@ spp_plot <- survey %>%
 }
 
 spp_plot
-
 ```
 
 \clearpage
@@ -420,8 +419,6 @@ spp_plot
 Map of the sampling distribution in space. Note that we only show one year per coordinate.
 
 ```{r fixed_point_map, eval = T, echo = F, fig.width=10, fig.height= 5, message =  F,warning =  F}
-
-# Fixed map
 survey %>% 
   select(longitude,latitude) %>% 
   distinct() %>% 
@@ -441,7 +438,7 @@ survey %>%
 
 ```
 
-
+\clearpage
 
 ## 9. Taxonomic flagging
 
@@ -450,16 +447,18 @@ This species flagging method was adapted from https://github.com/pinskylab/Ocean
 Visualization of flagged taxa
 
 ```{r, echo=FALSE, out.width = '80%'}
-knitr::include_graphics(here::here("standardization_steps", "outputs", "taxonomic_flagging", paste0(survey$survey[1],"_taxonomic_flagging.png")))
+knitr::include_graphics(here::here("outputs", "Flags","taxonomic_flagging", paste0(survey$survey[1],"_taxonomic_flagging.png")))
 ```
 
 Statistics related to the taxonomic flagging outputs
 
 ```{r, echo=FALSE}
-df <- read.csv(here::here("standardization_steps", "outputs", "taxonomic_flagging", paste0(survey$survey[1],'_stats.csv')))
+df <- read.csv(here::here("outputs", "Flags","taxonomic_flagging", paste0(survey$survey[1],'_stats.csv')))
 knitr::kable(df, col.names = NULL)
 ```
 
+\clearpage 
+
 ## 10. Spatio-temporal standardization
 
 ### a. Standardization method 1
@@ -471,32 +470,32 @@ It was run for hex resolution 7 and 8.
 Plot of number of cells x years with overlaid flagging options
 
 ```{r, echo=FALSE, out.width = '80%'}
-knitr::include_graphics(here::here("standardization_steps", "outputs", "trimming_method1", "hex_res7", paste0(survey$survey[1],"_hex_res_7_plot.png")))
+knitr::include_graphics(here::here("outputs", "Flags","trimming_method1", "hex_res7", paste0(survey$survey[1],"_hex_res_7_plot.png")))
 ```
 ```{r, echo=FALSE, out.width = '80%'}
-knitr::include_graphics(here::here("standardization_steps", "outputs", "trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_plot.png")))
+knitr::include_graphics(here::here("outputs", "Flags","trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_plot.png")))
 ```
 
 Map of hauls retained and removed per flagging method and threshold
 
 ```{r, echo=FALSE, out.width = '100%'}
-knitr::include_graphics(here::here("standardization_steps", "outputs", "trimming_method1", "hex_res7",  paste0(survey$survey[1],"_hex_res_7_map_per_haul.png")))
+knitr::include_graphics(here::here("outputs", "Flags","trimming_method1", "hex_res7",  paste0(survey$survey[1],"_hex_res_7_map_per_haul.png")))
 ```
 
 ```{r, echo=FALSE, out.width = '100%'}
-knitr::include_graphics(here::here("standardization_steps", "outputs", "trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_map_per_haul.png")))
+knitr::include_graphics(here::here("outputs", "Flags", "trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_map_per_haul.png")))
 ```
 
 
 Map of numbers of years removed per grid cell and flagging method/threshold
 
 ```{r, echo=FALSE, out.width = '100%'}
-knitr::include_graphics(here::here("standardization_steps", "outputs", "trimming_method1", "hex_res7", paste0(survey$survey[1],"_hex_res_7_map_per_grid_nyears.png")))
+knitr::include_graphics(here::here("outputs", "Flags","trimming_method1", "hex_res7", paste0(survey$survey[1],"_hex_res_7_map_per_grid_nyears.png")))
 ```
 
 
 ```{r, echo=FALSE, out.width = '100%'}
-knitr::include_graphics(here::here("standardization_steps", "outputs", "trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_map_per_grid_nyears.png")))
+knitr::include_graphics(here::here("outputs", "Flags","trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_map_per_grid_nyears.png")))
 ```
 
 
@@ -507,7 +506,7 @@ This standardization method was adapted from BioTIME code from https://github.co
 Map of hauls retained and removed
 
 ```{r, echo=FALSE, out.width = '100%'}
-knitr::include_graphics(here::here("standardization_steps", "outputs", "trimming_method2", 
+knitr::include_graphics(here::here("outputs", "Flags","trimming_method2", 
                                    paste0(survey$survey[1],"_map_per_haul.png")))
 ```
 
@@ -516,9 +515,9 @@ knitr::include_graphics(here::here("standardization_steps", "outputs", "trimming
 Statistics of hauls removed for each standardization method
 
 ```{r, echo=FALSE}
-met1_7 <- read.csv(here::here("standardization_steps", "outputs", "trimming_method1", "hex_res7", paste0(survey$survey[1],"_hex_res_7_stats_hauls.csv")))
-met1_8 <- read.csv(here::here("standardization_steps", "outputs", "trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_stats_hauls.csv")))
-met2 <- read.csv(here::here("standardization_steps", "outputs", "trimming_method2", 
+met1_7 <- read.csv(here::here("outputs", "Flags","trimming_method1", "hex_res7", paste0(survey$survey[1],"_hex_res_7_stats_hauls.csv")))
+met1_8 <- read.csv(here::here("outputs", "Flags","trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_stats_hauls.csv")))
+met2 <- read.csv(here::here("outputs", "Flags", "trimming_method2", 
                             paste0(survey$survey[1],"_stats_hauls.csv")))
 knitr::kable(cbind(met1_7, met1_8[,2:3], met2[,2]), 
              col.names = c("summary", "grid cell 7, 0% threshold", "grid cell 7, 2% threshold",