Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion R/annotate.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,17 @@
#' @param envDates a vector of dates the same length as `env`. The vector should have class `POSIXct`, e.g., as obtained from using `lubridate::parse_date_time`
#' @param dateScale string: 'year', 'month', or 'day'
# @examples
#'
#' \dontrun{
#' # Declare the date scale
#' dateScale = "year"
#' # Declare dates for simulated env data
#' envDates <- parse_date_time(paste0("",2010:2014, ""), orders = c("Y", "Ym"))
#' ### Preparing data
#' # convert to spatial object
#' coordinates(datedOccs) <- c("x", "y")
#' ## Performing data-driven masking. First find the values of the environment at point locations
#' datedOccs <- annotate(datedOccs, env, envDates, dateScale)
#' }
#'
# @return
#' @author Cory Merow <cory.merow@@gmail.com>,
Expand Down
11 changes: 10 additions & 1 deletion R/mask.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,16 @@
#' \item{mask}{mask cells with values outside the bounds}
#' }
# @examples
#'
#' \dontrun{
#' # find suitable bounds
#' bounds <- min(datedOccs$env)
#' ## Create mask, and use it on SDM
#' logicString = paste0('maskLayers >', bounds)
#' # use 'most recent' environmental variable as base for mask
#' SDMmask <- env[[5]]
#' names(SDMmask) <- "SDM_mask"
#' maskedDist <- maskRanger(potentialDist = sdm, maskLayers = SDMmask, logicString = logicString)
#' }
#'
# @return
#' @author Cory Merow <cory.merow@@gmail.com>,
Expand Down
69 changes: 69 additions & 0 deletions R/simulateData.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#==================================================================================
#==================================================================================
#' @title Data Simulation
#'
#' @description Simulates data for vignette-like function demonstrations. This function returns a number of variables to the global environment so that examples can be run.
#'
#' @details
#' See Examples.
#' @param
# @examples
#' \dontrun{
#' system.time({simulateData()})
#' # Look at some of the example variables needed
#' datedOccs
#' class(env)
#' head(sp1.xy)
#' }
#'
# @return
#' @author Peter Galante <pgalante@@amnh.org>,
#' @note
# @seealso
# @references
# @aliases
# @family - a family name. All functions that have the same family tag will be linked in the documentation.
#' @export
# FUNCTION TO CREATE ALL SIMULATED DATA AND EXPORT A LIST USABLE FOR EACH EXAMPLE IN USECASE
simulateData <- function(){
options(warn=-1)
require(raster)
require(dismo)
require(maskRangeR)
library(devtools)
library(lubridate)
library(rgdal)
### Initiating data for use case # 1
# Initiate empty raster object with extent and resolution
r1 <<- raster(extent(c(-72, -64, 41, 50)), res = c(0.008333333, 0.008333333))
# Generate random occurrence points
datedOccs<<-data.frame(randomPoints(r1, 5))
# Pair dates (as years) with the occurrences
datedOccs$date <<- 2010:2014
# convert dates to formal date objects
datedOccs$date <<- parse_date_time(datedOccs$date, orders = c("Y", "Ym"))
# Create values showing distance from random points to simulate suitability surface
sdm <<- distanceFromPoints(r1, datedOccs[,1:2])
# Create 5 different rasters as env data
env1 <<- distanceFromPoints(r1, data.frame(randomPoints(sdm, 3)))
env2 <<- distanceFromPoints(r1, data.frame(randomPoints(sdm, 3)))
env3 <<- distanceFromPoints(r1, data.frame(randomPoints(sdm, 3)))
env4 <<- distanceFromPoints(r1, data.frame(randomPoints(sdm, 3)))
env5 <<- distanceFromPoints(r1, data.frame(randomPoints(sdm, 3)))
env <<- stack(env1, env2, env3, env4, env5)
### Initiating data for use case # 2
## Generate random polygon
coords <- randomPoints(sdm, 3)
polyg <- Polygon(coords)
polyg <<- SpatialPolygons(list(Polygons(list(polyg), ID = "a")))
### Initiating data for use case # 3

### Initiating data for use case # 4
## Generate some species occurrence records
r1.svm <<- raster(extent(c(-72, -64, 41, 50)), res = c(0.008333333, 0.008333333))
values(r1.svm) <<- (1:ncell(r1.svm))^2
r2.svm <<- raster(extent(c(-72, -64, 41, 50)), res = c(0.008333333, 0.008333333))
values(r2.svm) <<- (ncell(r2.svm):1)^2
sp1.xy <<- data.frame(randomPoints(r1.svm, 15, prob = T)); colnames(sp1.xy) <<- c("longitude", "latitude")
sp2.xy <<- data.frame(randomPoints(r2.svm, 15, prob = T)); colnames(sp2.xy) <<- c("longitude", "latitude")
}
4 changes: 3 additions & 1 deletion R/spatialUtils.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
#' @param expertMap A binary map, either as a polygon or a raster.
#' @param maskListRaw A list of rasters, each corresponding to layers from which masks will eventually be made (in another function).
# @examples
#'
#'\dontrun{
#'
#'}
#'
# @return
#' @author Cory Merow <cory.merow@@gmail.com>,
Expand Down
104 changes: 50 additions & 54 deletions R/svm.R
Original file line number Diff line number Diff line change
@@ -1,50 +1,51 @@
#' Classify species ranges based on occurrence coordinates and SDM scores.
#' Classify species ranges based on occurrence coordinates and continuous values from an SDM
#'
#' \code{rangeSVM()} returns a tuned support vector machine (SVM) model that predicts
#' species identity based on predictors that are solely spatial, based on occurrence coordinates,
#' or a combination of spatial and environmental, based on both occurrence coordinates and
#' environmental suitability values. Suitability values can be predicted with species distribution
#' models (SDMs; a.k.a. ecological niche models).
#' species identity based on predictors that are either (1) solely spatial, (2) based
#' on occurrence coordinates, or (3) a combination of spatial and environmental, based
#' on both occurrence coordinates and environmental suitability values. Suitability
#' values can be predicted with species distribution models (SDMs; a.k.a. ecological
#' niche models).
#'
#' @param xy1 Matrix or data frame of occurrence coordinates for species 1.
#' @param xy2 Matrix or data frame of occurrence coordinates for species 2.
#' @param sdm Raster or RasterStack representing environmental suitability (can be predictions from SDMs).
#' These must have the same extent as both species' occurrence points. Default is NULL.
#' @param sdm Raster or RasterStack representing environmental suitability (can be predictions from SDMs).
#' Default is NULL.
#' @param nrep Numeric for number of SVM tuning iterations. Default is 100.
#' @param weight Boolean. If TRUE, the species with less occurrence records is weighted higher in the SVM.
#' Default is FALSE.
#' @return The tuned SVM model.
#' @details The tuning operation uses \code{tune.svm()} from the e1071 package, which performs 10-fold
#' cross validation and selects the best model based on classification error. Ranges of the cost and gamma
#' parameters are explored in the tuning exercise. The tuning function is iterated \code{nrep} times, and the
#' parameter combination used most frequently across all iterations is used to build a final SVM model.
#' @details The tuning operation uses \code{tune.svm()} from the e1071 package,
#' which performs 10-fold cross validation and selects the best model based on
#' classification error. Ranges of the cost and gamma parameters are explored in
#' the tuning exercise. The tuning function is iterated \code{nrep} times, and the
#' parameter combination used most frequently across all iterations is used to
#' build a final SVM model.
#'
#' When \code{sdm = NULL}, the SVM is purely spatial, based only on the occurrence coordinates of
#' each species. Otherwise, the SVM is fit with both a spatial predictor and any additional ones added as
#' rasters. These extra predictors can be based on predictions from a species distribution model
#' (SDM; a.k.a. ecological niche model), and in this case would represent environmental or climatic
#' suitability, depending on the variables used in the SDM.
#' When \code{sdm = NULL}, the SVM is purely spatial, based only on the occurrence
#' coordinates of each species. Otherwise, the SVM is fit with both a spatial
#' predictor and any additional ones added as rasters. These extra predictors can be
#' based on predictions from a species distribution model (SDM; a.k.a. ecological
#' niche model), and in this case would represent environmental or climatic suitability,
#' depending on the variables used in the SDM.
#'
#'
#' @examples
#' \dontrun{
#' # tune SVMs on coordinates only (spatial)
#' rangeSVM(xy1, xy2)
#'
#' # tune SVMs on coordinates and SDM prediction rasters (spatial + environmental)
#' rangeSVM(xy1, xy2, raster::stack(sdm1, sdm2))
#' svmHYB <- rangeSVM(sp1.xy, as.matrix(sp2.xy), raster::stack(r1.svm, r2.svm), nrep=10)
#' spp.svmHYB <- rasterizeRangeSVM(svmHYB, r1.svm, stack(r1.svm, r2.svm))
#' plot(spp.svmHYB)
#' # These locations may not necessarily occupied however; we’ll use these as masks over the SDM predictions.
#' sp1.svmHYB <- spp.svmHYB == 1
#' sp1.svmHYB[sp1.svmHYB == 0] <- NA
#' sp2.svmHYB <- spp.svmHYB == 2
#' sp2.svmHYB[sp2.svmHYB == 0] <- NA
#' # Mask "suitability" maps to SVM
#' sp1.svmHYB.ext <- mask(r1.svm, sp1.svmHYB)
#' sp2.svmHYB.ext <- mask(r2.svm, sp2.svmHYB)
#' plot(stack(sp1.svmHYB.ext, sp2.svmHYB.ext))
#' }
#' @export
#'
#'

# to clarify:
# 1. specifics on optimality criterion (weights false positives and false negatives equally?)
# 2. nothing to break tie (how do we choose when there are ties?)
# 3. could easily allow users to change k
# 4. sensitivity to relative number of points near border (and assumes that sampling bias was addressed?)
# 5. which params are varied and what they mean
# 6. convey to Cory to make generalizable to more than 2 species

rangeSVM <- function(xy1, xy2, sdm = NULL, nrep = 100, weight = FALSE) {
# define class weights
Expand All @@ -54,15 +55,15 @@ rangeSVM <- function(xy1, xy2, sdm = NULL, nrep = 100, weight = FALSE) {
cw <- c("0" = 1, "1" = nrow(xy1)/nrow(xy2))
} else {
cw <- c("0" = nrow(xy2)/nrow(xy1), "1" = 1)
}
}
}
}
} else {
cw <- c("0" = 1, "1" = 1)
}

# bind both coordinate matrices
xy <- rbind(xy1, xy2)
# if using sdm prediction rasters as extra predictor variable, add the values to the matrix
# if sdm prediction raster input, add the values to the matrix
if(!is.null(sdm)) {
sdm.vals <- raster::extract(sdm, xy)
xy <- cbind(xy, sdm = sdm.vals)
Expand All @@ -83,17 +84,12 @@ rangeSVM <- function(xy1, xy2, sdm = NULL, nrep = 100, weight = FALSE) {
# year={2005},
# publisher={Department of Statistics and Mathematics, WU Vienna University of Economics and Business}
params_best <- list()
# performance_best <- list()
gamma_best <- numeric(nrep)
C_best <- numeric(nrep)

for(i in 1:nrep) {
m.tune <- e1071::tune.svm(sp ~ ., data = xy, gamma = gamma_range, cost = C_range, class.weights = cw)
# print(sum(as.numeric(as.character(predict(m.tune$best.model, aus.xy)))))
# print(sum(as.numeric(as.character(predict(m.tune$best.model, tel.xy)))))
# get optimal parameter values
params_best[[i]] <- m.tune$best.parameters
# performance_best[[i]] <- m.tune$best.performance
# gamma_best[i] <- m.tune$best.parameters[[1]]
# C_best[i] <- m.tune$best.parameters[[2]]
message(paste("Run", i, "complete."))
Expand All @@ -106,23 +102,23 @@ rangeSVM <- function(xy1, xy2, sdm = NULL, nrep = 100, weight = FALSE) {
params_best_df <- do.call(rbind, params_best)
params_best_df$params <- paste0(params_best_df$gamma, params_best_df$cost)
mostFreq <- names(which(table(params_best_df$params) == max(table(params_best_df$params))))
params_best_df_mostFreq <- params_best_df[params_best_df$params == mostFreq, 1:2]

params_best_df_mostFreq <- params_best_df[params_best_df$params == mostFreq,1:2]
# run final model
m <- e1071::svm(sp ~ ., data = xy, gamma = params_best_df_mostFreq$gamma[1],
cost = params_best_df_mostFreq$cost[1], class.weights = cw)

return(m)
}

###############################################################################

#' Generate binary raster based on predictions of SVM model.
#'
#' \code{rangeSVM_predict()} returns a binary raster representing the ranges of the two species
#' \code{rasterizeRangeSVM()} returns a binary raster representing the ranges of the two species
#' predicted by the fitted SVM tuned with \code{rangeSVM()}.
#'
#' @param svm Model object for the SVM, returned by \code{rangeSVM()}.
#' @param r Raster with the extent desired for the prediction. The values for cells used for predictions must
#' have non-NA values, but the particular values do not matter.
#' @param r Raster with the extent desired for the prediction. The values do not matter.
#' @param sdm Raster or RasterStack representing environmental suitability (can be predictions from SDMs).
#' These rasters must match the predictor variables used in the SVM. Default is NULL.
#' @return The binary Raster representing the SVM predictions.
Expand All @@ -131,20 +127,20 @@ rangeSVM <- function(xy1, xy2, sdm = NULL, nrep = 100, weight = FALSE) {
#'
#' @examples
#' \dontrun{
#' # raster prediction for xy SVM (spatial)
#' svm.xy <- rangeSVM(xy1, xy2)
#' svm.xy.r <- rangeSVM_predict(svm.xy, r)
#' plot(svm.xy.r)
#' # raster prediction for spatial SVM
#' svm.sp <- rangeSVM(xy1, xy2)
#' svm.sp.r <- rasterizeRangeSVM(svm.sp, r)
#' plot(svm.sp.r)
#'
#' # raster prediction for xyEnv SVM (spatial + SDM predictions)
#' svm.xyEnv <- rangeSVM(xy1, xy2, raster::stack(sdm1, sdm2))
#' svm.xyEnv.r <- rangeSVM_predict(svm.xyEnv, r, raster::stack(sdm1, sdm2))
#' plot(svm.xyEnv.r)
#' # raster prediction for hybrid SVM
#' svm.hyb <- rangeSVM(xy1, xy2, raster::stack(sdm1, sdm2))
#' svm.hyb.r <- rasterizeRangeSVM(svm.hyb, r, raster::stack(sdm1, sdm2))
#' plot(svm.hyb.r)
#' }
#' @export

rangeSVM_predict <- function(svm, r, sdm = NULL) {
# extract centroid coordinates from shared extent raster cells
rasterizeRangeSVM <- function(svm, r, sdm = NULL) {
# extract coordinates from shared extent
r.pts <- raster::rasterToPoints(r, spatial = TRUE)
r.xy <- sp::coordinates(r.pts)
# if sdm prediction raster input, add the values to the matrix
Expand Down