Skip to content

Commit

Permalink
update summaries
Browse files Browse the repository at this point in the history
  • Loading branch information
AquaAuma committed Jan 11, 2023
1 parent 0ac6ca0 commit ea56c56
Show file tree
Hide file tree
Showing 15 changed files with 1,445 additions and 1,384 deletions.
134 changes: 66 additions & 68 deletions summary/scs.Rmd

Large diffs are not rendered by default.

537 changes: 274 additions & 263 deletions summary/scs.log

Large diffs are not rendered by default.

Binary file modified summary/scs.pdf
Binary file not shown.
132 changes: 66 additions & 66 deletions summary/seus.Rmd

Large diffs are not rendered by default.

611 changes: 309 additions & 302 deletions summary/seus.log

Large diffs are not rendered by default.

Binary file modified summary/seus.pdf
Binary file not shown.
120 changes: 59 additions & 61 deletions summary/swc-ibts.Rmd

Large diffs are not rendered by default.

472 changes: 243 additions & 229 deletions summary/swc-ibts.log

Large diffs are not rendered by default.

Binary file modified summary/swc-ibts.pdf
Binary file not shown.
103 changes: 51 additions & 52 deletions summary/wcann.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ This document presents the cleaning code and summary of the West Coast US Annual

## Data cleaning in R

```{r cleaning_code, code = readLines(here("./cleaning.codes/get.wcann.R")), eval = FALSE}
```{r cleaning_code, code = readLines(here("./cleaning_codes/get_wcann.R")), eval = FALSE}
```

Expand Down Expand Up @@ -92,22 +92,22 @@ World_map <- rnaturalearth::ne_countries(scale = 'medium', returnclass = c("sf")
## 1. Overview of the survey data table

```{r head_survey, eval = T, echo = F}
kable(survey[1:5,1:7], format = "latex", booktabs = T) %>%
kable(survey[1:5,1:6], format = "latex", booktabs = T) %>%
kable_styling(latex_options = c("striped","HOLD_position"))
kable(survey[1:5,8:15], format = "latex", booktabs = T) %>%
kable(survey[1:5,7:15], format = "latex", booktabs = T) %>%
kable_styling(latex_options = c("striped","HOLD_position"))
kable(survey[1:5,16:21], format = "latex", booktabs = T) %>%
kable(survey[1:5,16:23], format = "latex", booktabs = T) %>%
kable_styling(latex_options = c("striped","HOLD_position"))
kable(survey[1:5,22:27], format = "latex", booktabs = T) %>%
kable(survey[1:5,24:30], format = "latex", booktabs = T) %>%
kable_styling(latex_options = c("striped","HOLD_position"))
kable(survey[1:5,28:32], format = "latex", booktabs = T) %>%
kable(survey[1:5,31:35], format = "latex", booktabs = T) %>%
kable_styling(latex_options = c("striped","HOLD_position"))
kable(survey[1:5,33:39], format = "latex", booktabs = T) %>%
kable(survey[1:5,36:42], format = "latex", booktabs = T) %>%
kable_styling(latex_options = c("striped","HOLD_position"))
Expand Down Expand Up @@ -184,18 +184,18 @@ var_plot

Here we display the yearly total and average across hauls of the following variables recorded in the data:

- *num_cpue*, number of individuals (abundance) in $\frac{individuals}{km^2}$
- *num_h*, number of individuals (abundance) in $\frac{individuals}{h}$
- *num_cpua*, number of individuals (abundance) in $\frac{individuals}{km^2}$
- *num_cpue*, number of individuals (abundance) in $\frac{individuals}{h}$
- *num*, number of individuals (abundance)
- *wgt_cpue*, weight in $\frac{kg}{km^2}$
- *wgt_h*, weight in $\frac{kg}{h}$
- *wgt_cpua*, weight in $\frac{kg}{km^2}$
- *wgt_cpue*, weight in $\frac{kg}{h}$
- *wgt*, weight in ${kg}$

```{r summary_var_plot, eval = T, echo = F, message = F,warning = F}
var_plot <- survey %>%
group_by(year) %>%
summarise_at(vars(num:wgt_cpue),
summarise_at(vars(num:wgt_cpua),
funs(sum,mean),na.rm=T) %>%
# head()
gather("var","val",2:13) %>%
Expand Down Expand Up @@ -234,15 +234,15 @@ var_plot

Here we show a yearly total distribution of the biomass data to visualize outliers:

- *wgt*, total weight in ${kg}$ per haul and year per haul and year, if available in the survey data
- *num*, total number of individuals, if available in the survey data
- *num_cpue*, number of individuals (abundance) in $\frac{individuals}{km^2}$
- *wgt_cpue*, weight in $\frac{kg}{km^2}$

```{r extreme_biomass, eval = T, echo = F, message = F,warning = F}
if(!is.na(mean(survey$num_cpue, na.rm=T)) & !is.na(mean(survey$wgt_cpue, na.rm=T))){
if(!is.na(mean(survey$num_cpua, na.rm=T)) & !is.na(mean(survey$wgt_cpua, na.rm=T))){
var_plot <- survey %>%
group_by(year, haul_id) %>%
summarize(Weight = sum(wgt_cpue), Abundance = sum(num_cpue)) %>%
summarize(Weight = sum(wgt_cpua), Abundance = sum(num_cpua)) %>%
gather("var","val",3:4) %>%
ggplot() +
geom_boxplot(
Expand All @@ -259,10 +259,10 @@ if(!is.na(mean(survey$num_cpue, na.rm=T)) & !is.na(mean(survey$wgt_cpue, na.rm=T
theme(axis.text.x = element_text(angle = 90))
}
if(!is.na(mean(survey$num_cpue, na.rm=T)) & is.na(mean(survey$wgt_cpue, na.rm=T))){
if(!is.na(mean(survey$num_cpua, na.rm=T)) & is.na(mean(survey$wgt_cpua, na.rm=T))){
var_plot <- survey %>%
group_by(year, haul_id) %>%
summarize(Abundance = sum(num_cpue)) %>%
summarize(Abundance = sum(num_cpua)) %>%
# head()
ggplot() +
geom_boxplot(
Expand All @@ -278,10 +278,10 @@ var_plot <- survey %>%
theme(axis.text.x = element_text(angle = 90))
}
if(is.na(mean(survey$num_cpue, na.rm=T)) & !is.na(mean(survey$wgt_cpue, na.rm=T))){
if(is.na(mean(survey$num_cpua, na.rm=T)) & !is.na(mean(survey$wgt_cpua, na.rm=T))){
var_plot <- survey %>%
group_by(year, haul_id) %>%
summarize(Weight = sum(wgt_cpue)) %>%
summarize(Weight = sum(wgt_cpua)) %>%
# head()
ggplot() +
geom_boxplot(
Expand All @@ -297,7 +297,7 @@ var_plot <- survey %>%
theme(axis.text.x = element_text(angle = 90))
}
var_plot
rm(var_plot)
```


Expand All @@ -308,16 +308,17 @@ rm(var_plot)
Here we show the total abundance and number of taxa relationships with the area swept:

- *nbr_taxa*, number of marine fish taxa after taxonomic data cleaning
- *num*, number of individuals, if available in the survey data
- *wgt*, weight in ${kg}$, if available in the survey data
- *num_cpua*, number of individuals (abundance) in $\frac{individuals}{km^2}$
- *wgt_cpua*, weight in $\frac{kg}{km^2}$



```{r summary_var_swept, eval = T, echo = F, message = F,warning = F}
if(!is.na(mean(survey$num, na.rm=T)) & !is.na(mean(survey$wgt, na.rm=T))){
if(!is.na(mean(survey$num_cpua, na.rm=T)) & !is.na(mean(survey$wgt_cpua, na.rm=T))){
var_plot <- survey %>%
group_by(haul_id, haul_dur, area_swept) %>%
summarize(Number_Taxa = length(accepted_name), Abundance = sum(num),Weight = sum(wgt)) %>%
summarize(Number_Taxa = length(accepted_name), Abundance = sum(num_cpua),Weight = sum(wgt_cpua)) %>%
gather("var","val",4:6) %>%
# head()
ggplot() +
Expand All @@ -327,10 +328,10 @@ if(!is.na(mean(survey$num, na.rm=T)) & !is.na(mean(survey$wgt, na.rm=T))){
theme_bw()
}
if(!is.na(mean(survey$num, na.rm=T)) & is.na(mean(survey$wgt, na.rm=T))){
if(!is.na(mean(survey$num_cpue, na.rm=T)) & mean(survey$wgt_cpue, na.rm=T)){
var_plot <- survey %>%
group_by(haul_id, haul_dur, area_swept) %>%
summarize(Number_Taxa = length(accepted_name), Abundance = sum(num)) %>%
summarize(Number_Taxa = length(accepted_name), Abundance = sum(num_cpue)) %>%
gather("var","val",4:5) %>%
# head()
ggplot() +
Expand All @@ -340,10 +341,10 @@ if(!is.na(mean(survey$num, na.rm=T)) & is.na(mean(survey$wgt, na.rm=T))){
theme_bw()
}
if(is.na(mean(survey$num, na.rm=T)) & !is.na(mean(survey$wgt, na.rm=T))){
if(is.na(mean(survey$num_cpua, na.rm=T)) & !is.na(mean(survey$wgt_cpua, na.rm=T))){
var_plot <- survey %>%
group_by(haul_id, haul_dur, area_swept) %>%
summarize(Number_Taxa = length(accepted_name), Weight = sum(wgt)) %>%
summarize(Number_Taxa = length(accepted_name), Weight = sum(wgt_cpua)) %>%
gather("var","val",4:5) %>%
# head()
ggplot() +
Expand All @@ -354,7 +355,6 @@ if(is.na(mean(survey$num, na.rm=T)) & !is.na(mean(survey$wgt, na.rm=T))){
}
var_plot
```

\clearpage
Expand All @@ -363,10 +363,10 @@ var_plot

```{r abundant_spp, eval=T, echo=F, message=F, warning=F}
if(!is.na(mean(survey$wgt_cpue, na.rm=T))){
if(!is.na(mean(survey$num_cpua, na.rm=T))){
spp <- survey %>%
group_by(year, accepted_name) %>%
summarize(wgt = sum(wgt_cpue), nbr_years = length(year)) %>%
summarize(wgt = sum(wgt_cpua), nbr_years = length(year)) %>%
filter(nbr_years>10) %>%
group_by(accepted_name) %>%
summarize(wgt = median(wgt)) %>%
Expand All @@ -377,7 +377,7 @@ spp <- survey %>%
spp_plot <- survey %>%
filter(accepted_name %in% spp) %>%
group_by(year, accepted_name) %>%
summarize(wgt = sum(wgt_cpue, na.rm=T)) %>%
summarize(wgt = sum(wgt_cpua, na.rm=T)) %>%
ggplot() +
geom_point( aes(x = year, y = wgt), size=0.5 ) +
geom_line(aes(x = year,y = wgt), size=0.5) +
Expand All @@ -386,10 +386,10 @@ spp_plot <- survey %>%
ylab("Species Weight (kg)") + xlab("Year")
}
if(is.na(mean(survey$wgt_cpue, na.rm=T))){
if(is.na(mean(survey$wgt_cpua, na.rm=T))){
spp <- survey %>%
group_by(year, accepted_name) %>%
summarize(num = sum(num_cpue), nbr_years = length(year)) %>%
summarize(num = sum(num_cpua), nbr_years = length(year)) %>%
filter(nbr_years>10) %>%
group_by(accepted_name) %>%
summarize(num = median(num)) %>%
Expand All @@ -400,7 +400,7 @@ if(is.na(mean(survey$wgt_cpue, na.rm=T))){
spp_plot <- survey %>%
filter(accepted_name %in% spp) %>%
group_by(year, accepted_name) %>%
summarize(num = sum(num_cpue, na.rm=T)) %>%
summarize(num = sum(num_cpua, na.rm=T)) %>%
ggplot() +
geom_point( aes(x = year, y = num), size=0.5 ) +
geom_line(aes(x = year,y = num), size=0.5) +
Expand All @@ -410,7 +410,6 @@ spp_plot <- survey %>%
}
spp_plot
```

\clearpage
Expand All @@ -420,8 +419,6 @@ spp_plot
Map of the sampling distribution in space. Note that we only show one year per coordinate.

```{r fixed_point_map, eval = T, echo = F, fig.width=10, fig.height= 5, message = F,warning = F}
# Fixed map
survey %>%
select(longitude,latitude) %>%
distinct() %>%
Expand All @@ -441,7 +438,7 @@ survey %>%
```


\clearpage

## 9. Taxonomic flagging

Expand All @@ -450,16 +447,18 @@ This species flagging method was adapted from https://github.com/pinskylab/Ocean
Visualization of flagged taxa

```{r, echo=FALSE, out.width = '80%'}
knitr::include_graphics(here::here("standardization_steps", "outputs", "taxonomic_flagging", paste0(survey$survey[1],"_taxonomic_flagging.png")))
knitr::include_graphics(here::here("outputs", "Flags","taxonomic_flagging", paste0(survey$survey[1],"_taxonomic_flagging.png")))
```

Statistics related to the taxonomic flagging outputs

```{r, echo=FALSE}
df <- read.csv(here::here("standardization_steps", "outputs", "taxonomic_flagging", paste0(survey$survey[1],'_stats.csv')))
df <- read.csv(here::here("outputs", "Flags","taxonomic_flagging", paste0(survey$survey[1],'_stats.csv')))
knitr::kable(df, col.names = NULL)
```

\clearpage

## 10. Spatio-temporal standardization

### a. Standardization method 1
Expand All @@ -471,32 +470,32 @@ It was run for hex resolution 7 and 8.
Plot of number of cells x years with overlaid flagging options

```{r, echo=FALSE, out.width = '80%'}
knitr::include_graphics(here::here("standardization_steps", "outputs", "trimming_method1", "hex_res7", paste0(survey$survey[1],"_hex_res_7_plot.png")))
knitr::include_graphics(here::here("outputs", "Flags","trimming_method1", "hex_res7", paste0(survey$survey[1],"_hex_res_7_plot.png")))
```
```{r, echo=FALSE, out.width = '80%'}
knitr::include_graphics(here::here("standardization_steps", "outputs", "trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_plot.png")))
knitr::include_graphics(here::here("outputs", "Flags","trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_plot.png")))
```

Map of hauls retained and removed per flagging method and threshold

```{r, echo=FALSE, out.width = '100%'}
knitr::include_graphics(here::here("standardization_steps", "outputs", "trimming_method1", "hex_res7", paste0(survey$survey[1],"_hex_res_7_map_per_haul.png")))
knitr::include_graphics(here::here("outputs", "Flags","trimming_method1", "hex_res7", paste0(survey$survey[1],"_hex_res_7_map_per_haul.png")))
```

```{r, echo=FALSE, out.width = '100%'}
knitr::include_graphics(here::here("standardization_steps", "outputs", "trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_map_per_haul.png")))
knitr::include_graphics(here::here("outputs", "Flags", "trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_map_per_haul.png")))
```


Map of numbers of years removed per grid cell and flagging method/threshold

```{r, echo=FALSE, out.width = '100%'}
knitr::include_graphics(here::here("standardization_steps", "outputs", "trimming_method1", "hex_res7", paste0(survey$survey[1],"_hex_res_7_map_per_grid_nyears.png")))
knitr::include_graphics(here::here("outputs", "Flags","trimming_method1", "hex_res7", paste0(survey$survey[1],"_hex_res_7_map_per_grid_nyears.png")))
```


```{r, echo=FALSE, out.width = '100%'}
knitr::include_graphics(here::here("standardization_steps", "outputs", "trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_map_per_grid_nyears.png")))
knitr::include_graphics(here::here("outputs", "Flags","trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_map_per_grid_nyears.png")))
```


Expand All @@ -507,7 +506,7 @@ This standardization method was adapted from BioTIME code from https://github.co
Map of hauls retained and removed

```{r, echo=FALSE, out.width = '100%'}
knitr::include_graphics(here::here("standardization_steps", "outputs", "trimming_method2",
knitr::include_graphics(here::here("outputs", "Flags","trimming_method2",
paste0(survey$survey[1],"_map_per_haul.png")))
```

Expand All @@ -516,9 +515,9 @@ knitr::include_graphics(here::here("standardization_steps", "outputs", "trimming
Statistics of hauls removed for each standardization method

```{r, echo=FALSE}
met1_7 <- read.csv(here::here("standardization_steps", "outputs", "trimming_method1", "hex_res7", paste0(survey$survey[1],"_hex_res_7_stats_hauls.csv")))
met1_8 <- read.csv(here::here("standardization_steps", "outputs", "trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_stats_hauls.csv")))
met2 <- read.csv(here::here("standardization_steps", "outputs", "trimming_method2",
met1_7 <- read.csv(here::here("outputs", "Flags","trimming_method1", "hex_res7", paste0(survey$survey[1],"_hex_res_7_stats_hauls.csv")))
met1_8 <- read.csv(here::here("outputs", "Flags","trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_stats_hauls.csv")))
met2 <- read.csv(here::here("outputs", "Flags", "trimming_method2",
paste0(survey$survey[1],"_stats_hauls.csv")))
knitr::kable(cbind(met1_7, met1_8[,2:3], met2[,2]),
col.names = c("summary", "grid cell 7, 0% threshold", "grid cell 7, 2% threshold",
Expand Down
Loading

0 comments on commit ea56c56

Please sign in to comment.