-
Notifications
You must be signed in to change notification settings - Fork 1
/
ReformatModelData_allnodes.R
193 lines (163 loc) · 5.81 KB
/
ReformatModelData_allnodes.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
#-- RDS_model_output.R:
# Create rds's of model outputs for shiny,
# including top and bottom layer timeseries for both models,
# and yearly mean surface and bottom TP.
#to read model output netCDF
library(ncdf4)
#for ymd_hms
library(lubridate)
#To append dataframes to list: list.append
library(rlist)
#df operations: arrange, distinct
library(dplyr)
#--Model data
##Compare no sink out w/sinkout
var1file <- "output_nosinkout/leem_0001.nc"
var2file <- "output_sinkout/leem_0001.nc"
#Compare sinkout w/LEEM
#var1file <- "output_sinkout/leem_0001.nc"
#var2file <- "output_leem/TP_crop.nc"
#Compare nosinkout w/LEEM
#var1file <- "output_nosinkout/leem_0001.nc"
#var2file <- "output_leem/TP_crop.nc"
#--Final Shiny rds files
#shiny_dir <- "data_nosinkout_vs_sinkout"
#shiny_dir <- "data_nosinkout_vs_leem"
#shiny_dir <- "data_sinkout_vs_leem"
shiny_dir <- "data"
#-----END USER MODIFY---
##Don't change this one
#--Shiny fixed rds files
fixed_dir <- "data_fixed"
#-------Directory checking-----
#--The inputs should exist
message <- paste("Var 1 file",var1file,"not found, exiting.")
if (!file.exists(var1file)) stop(message)
message <- paste("Var 2 file",var2file,"not found, exiting.")
if (!file.exists(var2file)) stop(message)
# and the grid, river, and station rds file
grid_rds <- file.path(fixed_dir,"le_grid.rds")
riv_rds <- file.path(fixed_dir,"le_rivers.rds")
st_rds <- file.path(fixed_dir,"le_stationdata.rds")
if (!file.exists(grid_rds)) stop("STEP1 output le_grid.rds not found, exiting.")
if (!file.exists(riv_rds)) stop("STEP2 output le_rivers.rds not found, exiting.")
if (!file.exists(st_rds)) stop("STEP3 output le_stationdata.rds not found, exiting.")
# Create a shiny model output directory if it does not exist
if (!dir.exists(shiny_dir)) dir.create(shiny_dir)
#--End Directory checking-----
# Grid df has node,depth,X,Y,lat,lon
df_grid <- readRDS(grid_rds)
# River df has node, name, image
df_riv <- readRDS(riv_rds)
# Station df_st has data
df_st <- readRDS(st_rds)
# Sort each dataframe by node so it is easier to check results
df_grid <- arrange(df_grid,node)
df_riv <- arrange(df_riv,node)
df_st <- arrange(df_st,node)
#Get list of all nodes with stations and rivers
inodes <- append(df_riv$node,df_st$node)
cat("There are",length(inodes),"total nodes.","\n")
#Get list of all unique nodes
uni_nodes <- unique(inodes)
#cat("Extracting data from the",length(uni_nodes),"unique nodes.","\n")
#Keep only the grid points with data
#df_grid <- df_grid[df_grid$node %in% uni_nodes,]
#Nope, in this version we keep ALL the data
#--Open model data
# netCDF files
ncvar1 <- nc_open(var1file)
ncvar2 <- nc_open(var2file)
# Use tp_file because of better metadata (MJD)
time_var1 <- ncvar_get(ncvar1, "time")
origin <- ymd_hms("1858-11-17 00:00:00")
time <- as.POSIXct(time_var1*86400, tz="UTC",origin=origin)
nt <- length(time)
#Create a list of dataframes
the_data <- list()
the_stats <- list()
#Loop through unique nodes
#for (i in uni_nodes) {
#Loop through ALL nodes
for (i in df_grid$node){
#create a data frame for data
df_data <- data.frame(matrix(ncol = 4, nrow = nt))
colnames(df_data) <- c("v1surf","v1bot","v2surf","v2bot")
#create a data frame for stats
df_stats <- data.frame(matrix(ncol = 4, nrow = 1))
colnames(df_stats) <- c("v1meansurf","v1meanbot","v2meansurf","v2meanbot")
#surface, j=1
j <- 1
df_data$v1surf <- ncvar_get(ncvar1, "TP", start=c(i,j,1), count=c(1,1,-1))
df_stats$v1meansurf <- mean(df_data$v1surf)
df_data$v2surf <- ncvar_get(ncvar2, "TP", start=c(i,j,1), count=c(1,1,-1))
df_stats$v2meansurf <- mean(df_data$v2surf)
#top, j=20
j <- 20
df_data$v1bot <- ncvar_get(ncvar1, "TP", start=c(i,j,1), count=c(1,1,-1))
df_stats$v1meanbot <- mean(df_data$v1bot)
df_data$v2bot <- ncvar_get(ncvar2, "TP", start=c(i,j,1), count=c(1,1,-1))
df_stats$v2meanbot <- mean(df_data$v2bot)
the_data <- list.append(the_data,df_data)
the_stats <- list.append(the_stats,df_stats)
}
#The node numbers are a subset of all nodes, but the list of dataframes must be
# accessed by index. Create a 'global id' that matches list index to node number.
df_grid$gid <- df_grid$node
#For rivers, add gid, lat, lon
df_riv$gid <- 0
df_riv$lat <- 0
df_riv$lon <- 0
ii <- 1
for (i in df_riv$node){
#i is a node
index <- which(df_grid$node == i)
#find the corresponding gid
df_riv$gid[ii] <- df_grid$gid[index]
#and the corresponding lat/lon
df_riv$lat[ii] <- df_grid$lat[index]
df_riv$lon[ii] <- df_grid$lon[index]
ii <- ii + 1
}
#For station data, add gid
df_st$gid <- 0
ii <- 1
for (i in df_st$node){
#i is a node
index <- which(df_grid$node == i)
#find the corresponding gid
df_st$gid[ii] <- df_grid$gid[index]
#and the corresponding lat/lon
df_st$lat[ii] <- df_grid$lat[index]
df_st$lon[ii] <- df_grid$lon[index]
ii <- ii + 1
}
#Create a dataframe for station popups
#Keep unique station ids
df_pop <- df_st %>% distinct(id, .keep_all = TRUE)
df_pop <- df_pop %>% select(gid,name,lat,lon)
#Clean up station data
df_st <- df_st %>% select(gid,date,TP)
#And grid
df_grid <- df_grid %>% select(gid,h,lat,lon,X,Y)
#Add the statistics to the dataframe
df_grid$v1meansurf = 0
df_grid$v1meanbot = 0
df_grid$v2meansurf = 0
df_grid$v2meanbot = 0
#For each grid point, each having a gid
for (j in df_grid$gid) {
df_grid$v1meansurf[j] <- the_stats[[j]]$v1meansurf
df_grid$v1meanbot[j] <- the_stats[[j]]$v1meanbot
df_grid$v2meansurf[j] <- the_stats[[j]]$v2meansurf
df_grid$v2meanbot[j] <- the_stats[[j]]$v2meanbot
}
#Save everything to the data_model directory
saveRDS(df_grid,file=file.path(shiny_dir,"le_grid.rds"))
saveRDS(df_riv,file=file.path(shiny_dir,"le_rivers.rds"))
saveRDS(time,file=file.path(shiny_dir,"le_time.rds"))
saveRDS(the_data,file=file.path(shiny_dir,"le_data.rds"))
saveRDS(df_st,file=file.path(shiny_dir,"le_stations.rds"))
saveRDS(df_pop,file=file.path(shiny_dir,"le_pop.rds"))
#Optional clean up
#rm(list=ls())