Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
bolliger32 committed May 21, 2021
0 parents commit 7f462c2
Show file tree
Hide file tree
Showing 106 changed files with 23,136 additions and 0 deletions.
152 changes: 152 additions & 0 deletions README.md

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions code/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Copyright ©2021 The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, [email protected], http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.

IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Binary file added code/__pycache__/version.cpython-38.pyc
Binary file not shown.
215 changes: 215 additions & 0 deletions code/analysis/0_grid_creation/create_grid_CONTUS_and_WORLD.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
############################################################

# This script saves the grid lat and lon files and generates a subsample of grid cells (i.e. "subgrid") from within the U.S. or from across the world
# It does this for both UAR and POP sampling in the US. In the global sample we only do UAR sampling.

############################################################

rm(list=ls())

## Packages
library(RcppCNPy)
library(reticulate)
library(raster)
library(rgdal)
library(maps)
library(mapdata)
library(rgeos)
library(doParallel)
library(here)

## Import config.R to set filepaths
mosaiks_code <- Sys.getenv("MOSAIKS_CODE")
if (mosaiks_code=="") {
mosaiks_code = here("code")
}
source(file.path(mosaiks_code,"mosaiks","config.R"))

## Source the necessary helper files
source(file.path(utils_dir, "R_utils.R"))

########### INPUTS REQUIRED ############
zoom <- 16 # Choose your zoom level
pixels <- 640 # Pixels per grid cell
multiplier <- 3.25 # This is how many extra draws you want to take from the grid to make sure in the end you get N
seed <- 0

# Number of cores when parallelizing:
args = commandArgs(trailingOnly=TRUE)
if (length(args)==0) {
no_cores = 4
} else {
no_cores = args[2]
}

########################################

########### Make the full grid ############
for (extent in c("global", "USA"))
{
# Set bounding box to global or USA
if (extent=="USA") {
latmin <- 25
latmax <- 50
lonmin <- -125
lonmax <- -66
N <- 100000
} else if (extent == "global") {
latmin <- -63
latmax <- 80
lonmin <- -180
lonmax <- 180
N <- 1000000
}

# Get vectors of lat and lon grid cell center values, which when crossed together, define the entire grid.
gridvals <- makegrid(zoom, pixels, lonmin, lonmax, latmin, latmax)
latVals <- gridvals[[2]]
lonVals <- gridvals[[1]]

###### Save just the grid values for future use.
np <- import("numpy")
if (extent=="USA") {
filename = "grid_CONTUS_16_640"
} else if (extent=="global") {
filename = "grid_WORLD_16_640"
}

np$savez(file.path(data_dir, "int/grids", paste0(filename, ".npz")), lon = lonVals, lat = latVals, zoom = zoom, pixels = pixels)
for (sampling in c("UAR", "POP"))
{

# don't need pop sample globally
if (extent=="global" && sampling=="POP") {
next
}
# Note that the original random samples were created prior to setting a RNG
# seed, so the random sample that is generated below will not contain the same
# grid points as those used in our published analysis.
set.seed(seed)

########### Sample obs ############

if (sampling=="UAR") {

subgridN2 <- as.data.frame(matrix(NaN, nrow=N*multiplier, ncol=2 ))
colnames(subgridN2) <- c("lon", "lat")
subgridN2$lon <- sample(lonVals, size = N*multiplier, replace = TRUE)
subgridN2$lat <- sample(latVals, size = N*multiplier, replace = TRUE)

# drop any duplicate entries
subgridN2 <- subgridN2[!duplicated(subgridN2), ]

} else if (sampling=="POP") { # POPULATION WEIGHTED SAMPLING
weightR = raster(file.path(data_dir, "raw/applications/population/gpw_v4_population_density_rev10_2015_30_sec.tif"))
weightR = crop(weightR, extent(lonmin, lonmax, latmin, latmax))
weightRPts = rasterToPoints(weightR)

vals = weightRPts[,3]
index = 1:length(vals)

starttime = Sys.time()
selectedInd = sample(x = index, size = N*multiplier, prob = vals, replace = T)
endtime = Sys.time()
endtime - starttime

## for each of the indicies, find the grid coordinates within the raster cell and randomly choose one of those.
selected = weightRPts[selectedInd,]

resRaster = xres(weightR)

registerDoParallel(no_cores)

starttime = Sys.time()
selectedLatLons = foreach (i = 1:nrow(selected),
.combine = rbind,
.export = c("selected","resRaster","lonVals","latVals"),
.packages = c("raster")) %dopar%
{
lon = selected[i,1]
lat = selected[i,2]

goodLons = lonVals[lonVals < (lon + resRaster) & lonVals > (lon - resRaster)]
goodLats = latVals[latVals < (lat + resRaster) & latVals > (lat - resRaster)]

if(min(length(goodLons),length(goodLats)) == 0) {
toReturn = (c(NA,NA))
} else {

if(length(goodLons) == 1) {
goodLons = c(goodLons,goodLons)
}
if(length(goodLats) == 1) {
goodLats = c(goodLats,goodLats)
}

toReturn = (c(sample(x = goodLons, size = 1), sample(x = goodLats, size = 1) ) )
}
toReturn

return(toReturn)
}
endtime = Sys.time()
endtime - starttime

stopImplicitCluster()

# drop any duplicate entries
selectedLatLons <- selectedLatLons[!is.na(selectedLatLons[,1]), ]
selectedLatLons <- selectedLatLons[!is.na(selectedLatLons[,2]), ]
selectedLatLons <- selectedLatLons[!duplicated(selectedLatLons), ]

subgridN2 = as.data.frame(selectedLatLons)
colnames(subgridN2) <- c("lon", "lat")

}

########### Crop observations to appropriate land areas (USA or global land area) ############
if (extent=="USA") {
subgrid <- subsetToLand(subgridN2, lonmin, lonmax, latmin, latmax, file.path(data_dir, "raw/shapefiles/gadm36_USA_shp/gadm36_USA_0.shp"))
} else if (extent=="global") {
subgrid <- subsetToLand(subgridN2, lonmin, lonmax, latmin, latmax, file.path(data_dir, "raw/shapefiles/world/land_polygons.shp"))
}

# Only keep the first N instances
if (dim(subgrid)[1] > N) {
subgrid <- subgrid[1:N,]
print("Excess draws dropped -- N observsations now saving...")
} else if (dim(subgrid)[1] < N) {
print("STOP: Too few observations -- need to increase the multiplier!")
} else {
print("Exactly N observations drawn and matched!")
}

############ Identify each obs as i,j ############

# find the i(lat), j(lon) location of each subgrid
S <- dim(subgrid)[1]

starttime <- Sys.time()
for (n in 1:S) { # This should take about 2 minutes
subgrid$i[n] <- which(latVals==subgrid$lat[n])
subgrid$j[n] <- which(lonVals==subgrid$lon[n])
}
endtime <- Sys.time()
endtime - starttime

subgrid$ID <- paste(subgrid$i, subgrid$j, sep=",")

########### Save ############
if (extent=="USA") {
filename <- paste0("CONTUS_", as.character(zoom), "_", as.character(pixels), "_",
sampling, "_", format(N, scientific=FALSE), "_", as.character(seed))
} else if (extent=="global") {
filename <- paste0("WORLD_", as.character(zoom), "_", as.character(pixels), "_",
sampling, "_", format(N, scientific=FALSE), "_", as.character(seed))
}

# Export to npz
np <- import("numpy")
np$savez(file.path(data_dir, "int/grids", paste0(filename, ".npz")), lon = subgrid$lon, lat = subgrid$lat,
ID = subgrid$ID, zoom = zoom, pixels = pixels)

print("-------- Saved! -----------")
}
}
Loading

0 comments on commit 7f462c2

Please sign in to comment.