wallaceEcoMod · Pgalante · Jan 3, 2019
diff --git a/R/annotate.R b/R/annotate.R
@@ -10,7 +10,17 @@
 #' @param envDates a vector of dates the same length as `env`. The vector should have class `POSIXct`, e.g., as obtained from using `lubridate::parse_date_time`
 #' @param dateScale string: 'year', 'month', or 'day'
 # @examples
-#'
+#' \dontrun{
+#' # Declare the date scale
+#' dateScale = "year"
+#' # Declare dates for simulated env data
+#' envDates <- parse_date_time(paste0("",2010:2014, ""), orders = c("Y", "Ym"))
+#' ###  Preparing data
+#' # convert to spatial object
+#' coordinates(datedOccs) <- c("x", "y")
+#' ##  Performing data-driven masking. First find the values of the environment at point locations
+#' datedOccs <- annotate(datedOccs, env, envDates, dateScale)
+#' }
 #'
 # @return
 #' @author Cory Merow <cory.merow@@gmail.com>,

diff --git a/R/mask.R b/R/mask.R
@@ -16,7 +16,16 @@
 #'  \item{mask}{mask cells with values outside the bounds}
 #' }
 # @examples
-#'
+#' \dontrun{
+#' # find suitable bounds
+#' bounds <- min(datedOccs$env)
+#' ## Create mask, and use it on SDM
+#' logicString = paste0('maskLayers >', bounds)
+#' # use 'most recent' environmental variable as base for mask
+#' SDMmask <- env[[5]]
+#' names(SDMmask) <- "SDM_mask"
+#' maskedDist <- maskRanger(potentialDist = sdm, maskLayers = SDMmask, logicString = logicString)
+#' }
 #'
 # @return
 #' @author Cory Merow <cory.merow@@gmail.com>,

diff --git a/R/simulateData.R b/R/simulateData.R
@@ -0,0 +1,69 @@
+#==================================================================================
+#==================================================================================
+#' @title Data Simulation 
+#'
+#' @description Simulates data for vignette-like function demonstrations. This function returns a number of variables to the global environment so that examples can be run.
+#'  
+#' @details
+#' See Examples.
+#' @param 
+# @examples
+#' \dontrun{
+#' system.time({simulateData()})
+#' # Look at some of the example variables needed
+#' datedOccs
+#' class(env)
+#' head(sp1.xy)
+#' }
+#'
+# @return
+#' @author Peter Galante <pgalante@@amnh.org>,
+#' @note 
+# @seealso
+# @references
+# @aliases
+# @family - a family name. All functions that have the same family tag will be linked in the documentation.
+#' @export
+# FUNCTION TO CREATE ALL SIMULATED DATA AND EXPORT A LIST USABLE FOR EACH EXAMPLE IN USECASE
+simulateData <- function(){
+  options(warn=-1)
+  require(raster)
+  require(dismo)
+  require(maskRangeR)
+  library(devtools)
+  library(lubridate)
+  library(rgdal)
+  ###  Initiating data for use case # 1
+  # Initiate empty raster object with extent and resolution 
+  r1 <<- raster(extent(c(-72, -64, 41, 50)), res = c(0.008333333, 0.008333333))
+  # Generate random occurrence points
+  datedOccs<<-data.frame(randomPoints(r1, 5))
+  # Pair dates (as years) with the occurrences
+  datedOccs$date <<- 2010:2014
+  # convert dates to formal date objects
+  datedOccs$date <<- parse_date_time(datedOccs$date, orders = c("Y", "Ym"))
+  # Create values showing distance from random points to simulate suitability surface
+  sdm <<- distanceFromPoints(r1, datedOccs[,1:2])
+  # Create 5 different rasters as env data
+  env1 <<- distanceFromPoints(r1, data.frame(randomPoints(sdm, 3)))
+  env2 <<- distanceFromPoints(r1, data.frame(randomPoints(sdm, 3)))
+  env3 <<- distanceFromPoints(r1, data.frame(randomPoints(sdm, 3)))
+  env4 <<- distanceFromPoints(r1, data.frame(randomPoints(sdm, 3)))
+  env5 <<- distanceFromPoints(r1, data.frame(randomPoints(sdm, 3)))
+  env <<- stack(env1, env2, env3, env4, env5)
+  ### Initiating data for use case # 2
+  ## Generate random polygon
+  coords <- randomPoints(sdm, 3)
+  polyg <- Polygon(coords)
+  polyg <<- SpatialPolygons(list(Polygons(list(polyg), ID = "a")))
+  ### Initiating data for use case # 3
+
+  ### Initiating data for use case # 4
+  ## Generate some species occurrence records
+  r1.svm <<- raster(extent(c(-72, -64, 41, 50)), res = c(0.008333333, 0.008333333))
+  values(r1.svm) <<- (1:ncell(r1.svm))^2
+  r2.svm <<- raster(extent(c(-72, -64, 41, 50)), res = c(0.008333333, 0.008333333))
+  values(r2.svm) <<- (ncell(r2.svm):1)^2
+  sp1.xy <<- data.frame(randomPoints(r1.svm, 15, prob = T)); colnames(sp1.xy) <<- c("longitude", "latitude")
+  sp2.xy <<- data.frame(randomPoints(r2.svm, 15, prob = T)); colnames(sp2.xy) <<- c("longitude", "latitude")
+}
diff --git a/R/spatialUtils.R b/R/spatialUtils.R
@@ -8,7 +8,9 @@
 #' @param expertMap A binary map, either as a polygon or a raster.
 #' @param maskListRaw A list of rasters, each corresponding to layers from which masks will eventually be made (in another function).
 # @examples
-#'
+#'\dontrun{
+#' 
+#'}
 #'
 # @return
 #' @author Cory Merow <cory.merow@@gmail.com>,

diff --git a/R/svm.R b/R/svm.R
@@ -1,50 +1,51 @@
-#' Classify species ranges based on occurrence coordinates and SDM scores.
+#' Classify species ranges based on occurrence coordinates and continuous values from an SDM
 #'
 #' \code{rangeSVM()} returns a tuned support vector machine (SVM) model that predicts 
-#' species identity based on predictors that are solely spatial, based on occurrence coordinates,
-#' or a combination of spatial and environmental, based on both occurrence coordinates and 
-#' environmental suitability values. Suitability values can be predicted with species distribution
-#' models (SDMs; a.k.a. ecological niche models).
+#' species identity based on predictors that are either (1) solely spatial, (2) based 
+#' on occurrence coordinates, or (3) a combination of spatial and environmental, based 
+#' on both occurrence coordinates and environmental suitability values. Suitability 
+#' values can be predicted with species distribution models (SDMs; a.k.a. ecological 
+#' niche models).
 #'
 #' @param xy1 Matrix or data frame of occurrence coordinates for species 1.
 #' @param xy2 Matrix or data frame of occurrence coordinates for species 2.
-#' @param sdm Raster or RasterStack representing environmental suitability (can be predictions from SDMs). 
-#' These must have the same extent as both species' occurrence points. Default is NULL.
+#' @param sdm Raster or RasterStack representing environmental suitability (can be predictions from SDMs).
+#' Default is NULL.
 #' @param nrep Numeric for number of SVM tuning iterations. Default is 100.
 #' @param weight Boolean. If TRUE, the species with less occurrence records is weighted higher in the SVM.
 #' Default is FALSE.
 #' @return The tuned SVM model.
-#' @details The tuning operation uses \code{tune.svm()} from the e1071 package, which performs 10-fold
-#' cross validation and selects the best model based on classification error. Ranges of the cost and gamma
-#' parameters are explored in the tuning exercise. The tuning function is iterated \code{nrep} times, and the
-#' parameter combination used most frequently across all iterations is used to build a final SVM model.
+#' @details The tuning operation uses \code{tune.svm()} from the e1071 package, 
+#' which performs 10-fold cross validation and selects the best model based on 
+#' classification error. Ranges of the cost and gamma parameters are explored in 
+#' the tuning exercise. The tuning function is iterated \code{nrep} times, and the
+#' parameter combination used most frequently across all iterations is used to 
+#' build a final SVM model.
 #' 
-#' When \code{sdm = NULL}, the SVM is purely spatial, based only on the occurrence coordinates of
-#' each species. Otherwise, the SVM is fit with both a spatial predictor and any additional ones added as
-#' rasters. These extra predictors can be based on predictions from a species distribution model 
-#' (SDM; a.k.a. ecological niche model), and in this case would represent environmental or climatic
-#' suitability, depending on the variables used in the SDM.
+#' When \code{sdm = NULL}, the SVM is purely spatial, based only on the occurrence 
+#' coordinates of each species. Otherwise, the SVM is fit with both a spatial 
+#' predictor and any additional ones added as rasters. These extra predictors can be 
+#' based on predictions from a species distribution model (SDM; a.k.a. ecological 
+#' niche model), and in this case would represent environmental or climatic suitability, 
+#' depending on the variables used in the SDM.
 #' 
 #' 
 #' @examples
 #' \dontrun{
-#' # tune SVMs on coordinates only (spatial)
-#' rangeSVM(xy1, xy2)
-#' 
-#' # tune SVMs on coordinates and SDM prediction rasters (spatial + environmental)
-#' rangeSVM(xy1, xy2, raster::stack(sdm1, sdm2))
+#' svmHYB <- rangeSVM(sp1.xy, as.matrix(sp2.xy), raster::stack(r1.svm, r2.svm), nrep=10)
+#' spp.svmHYB <- rasterizeRangeSVM(svmHYB, r1.svm, stack(r1.svm, r2.svm))
+#' plot(spp.svmHYB)
+#' # These locations may not necessarily occupied however; we’ll use these as masks over the SDM predictions.
+#' sp1.svmHYB <- spp.svmHYB == 1
+#' sp1.svmHYB[sp1.svmHYB == 0] <- NA
+#' sp2.svmHYB <- spp.svmHYB == 2
+#' sp2.svmHYB[sp2.svmHYB == 0] <- NA
+#' # Mask "suitability" maps to SVM
+#' sp1.svmHYB.ext <- mask(r1.svm, sp1.svmHYB)
+#' sp2.svmHYB.ext <- mask(r2.svm, sp2.svmHYB)
+#' plot(stack(sp1.svmHYB.ext, sp2.svmHYB.ext))
 #' }
 #' @export
-#' 
-#' 
-
-# to clarify:
-# 1. specifics on optimality criterion (weights false positives and false negatives equally?)
-# 2. nothing to break tie (how do we choose when there are ties?)
-# 3. could easily allow users to change k
-# 4. sensitivity to relative number of points near border (and assumes that sampling bias was addressed?)
-# 5. which params are varied and what they mean
-# 6. convey to Cory to make generalizable to more than 2 species
 
 rangeSVM <- function(xy1, xy2, sdm = NULL, nrep = 100, weight = FALSE) {
   # define class weights
@@ -54,15 +55,15 @@ rangeSVM <- function(xy1, xy2, sdm = NULL, nrep = 100, weight = FALSE) {
         cw <- c("0" = 1, "1" = nrow(xy1)/nrow(xy2))
       } else {
         cw <- c("0" = nrow(xy2)/nrow(xy1), "1" = 1)
-      }
-    }
+      }  
+    }  
   } else {
     cw <- c("0" = 1, "1" = 1)
   }
 
   # bind both coordinate matrices
   xy <- rbind(xy1, xy2)
-  # if using sdm prediction rasters as extra predictor variable, add the values to the matrix 
+  # if sdm prediction raster input, add the values to the matrix 
   if(!is.null(sdm)) {
     sdm.vals <- raster::extract(sdm, xy)
     xy <- cbind(xy, sdm = sdm.vals)
@@ -83,17 +84,12 @@ rangeSVM <- function(xy1, xy2, sdm = NULL, nrep = 100, weight = FALSE) {
   # year={2005},
   # publisher={Department of Statistics and Mathematics, WU Vienna University of Economics and Business}
   params_best <- list()
-  # performance_best <- list()
   gamma_best <- numeric(nrep)
   C_best <- numeric(nrep)
-
   for(i in 1:nrep) {
     m.tune <- e1071::tune.svm(sp ~ ., data = xy, gamma = gamma_range, cost = C_range, class.weights = cw)  
-    # print(sum(as.numeric(as.character(predict(m.tune$best.model, aus.xy)))))
-    # print(sum(as.numeric(as.character(predict(m.tune$best.model, tel.xy)))))
     # get optimal parameter values
     params_best[[i]] <- m.tune$best.parameters
-    # performance_best[[i]] <- m.tune$best.performance
     # gamma_best[i] <- m.tune$best.parameters[[1]]
     # C_best[i] <- m.tune$best.parameters[[2]]
     message(paste("Run", i, "complete."))
@@ -106,23 +102,23 @@ rangeSVM <- function(xy1, xy2, sdm = NULL, nrep = 100, weight = FALSE) {
   params_best_df <- do.call(rbind, params_best)
   params_best_df$params <- paste0(params_best_df$gamma, params_best_df$cost)
   mostFreq <- names(which(table(params_best_df$params) == max(table(params_best_df$params))))
-  params_best_df_mostFreq <- params_best_df[params_best_df$params == mostFreq, 1:2]
-
+  params_best_df_mostFreq <- params_best_df[params_best_df$params == mostFreq,1:2]
   # run final model
   m <- e1071::svm(sp ~ ., data = xy, gamma = params_best_df_mostFreq$gamma[1], 
                   cost = params_best_df_mostFreq$cost[1], class.weights = cw)
 
   return(m)
 }
 
+###############################################################################
+
 #' Generate binary raster based on predictions of SVM model.
 #'
-#' \code{rangeSVM_predict()} returns a binary raster representing the ranges of the two species
+#' \code{rasterizeRangeSVM()} returns a binary raster representing the ranges of the two species
 #' predicted by the fitted SVM tuned with \code{rangeSVM()}.
 #'
 #' @param svm Model object for the SVM, returned by \code{rangeSVM()}.
-#' @param r Raster with the extent desired for the prediction. The values for cells used for predictions must 
-#' have non-NA values, but the particular values do not matter.
+#' @param r Raster with the extent desired for the prediction. The values do not matter.
 #' @param sdm Raster or RasterStack representing environmental suitability (can be predictions from SDMs).
 #' These rasters must match the predictor variables used in the SVM. Default is NULL.
 #' @return The binary Raster representing the SVM predictions.
@@ -131,20 +127,20 @@ rangeSVM <- function(xy1, xy2, sdm = NULL, nrep = 100, weight = FALSE) {
 #' 
 #' @examples
 #' \dontrun{
-#' # raster prediction for xy SVM (spatial)
-#' svm.xy <- rangeSVM(xy1, xy2)
-#' svm.xy.r <- rangeSVM_predict(svm.xy, r)
-#' plot(svm.xy.r)
+#' # raster prediction for spatial SVM
+#' svm.sp <- rangeSVM(xy1, xy2)
+#' svm.sp.r <- rasterizeRangeSVM(svm.sp, r)
+#' plot(svm.sp.r)
 #' 
-#' # raster prediction for xyEnv SVM (spatial + SDM predictions)
-#' svm.xyEnv <- rangeSVM(xy1, xy2, raster::stack(sdm1, sdm2))
-#' svm.xyEnv.r <- rangeSVM_predict(svm.xyEnv, r, raster::stack(sdm1, sdm2))
-#' plot(svm.xyEnv.r)
+#' # raster prediction for hybrid SVM
+#' svm.hyb <- rangeSVM(xy1, xy2, raster::stack(sdm1, sdm2))
+#' svm.hyb.r <- rasterizeRangeSVM(svm.hyb, r, raster::stack(sdm1, sdm2))
+#' plot(svm.hyb.r)
 #' }
 #' @export
 
-rangeSVM_predict <- function(svm, r, sdm = NULL) {
-  # extract centroid coordinates from shared extent raster cells
+rasterizeRangeSVM <- function(svm, r, sdm = NULL) {
+  # extract coordinates from shared extent
   r.pts <- raster::rasterToPoints(r, spatial = TRUE)
   r.xy <- sp::coordinates(r.pts)
   # if sdm prediction raster input, add the values to the matrix