diff --git a/R/annotate.R b/R/annotate.R index d70089c..bf49bac 100644 --- a/R/annotate.R +++ b/R/annotate.R @@ -10,7 +10,17 @@ #' @param envDates a vector of dates the same length as `env`. The vector should have class `POSIXct`, e.g., as obtained from using `lubridate::parse_date_time` #' @param dateScale string: 'year', 'month', or 'day' # @examples -#' +#' \dontrun{ +#' # Declare the date scale +#' dateScale = "year" +#' # Declare dates for simulated env data +#' envDates <- parse_date_time(paste0("",2010:2014, ""), orders = c("Y", "Ym")) +#' ### Preparing data +#' # convert to spatial object +#' coordinates(datedOccs) <- c("x", "y") +#' ## Performing data-driven masking. First find the values of the environment at point locations +#' datedOccs <- annotate(datedOccs, env, envDates, dateScale) +#' } #' # @return #' @author Cory Merow , diff --git a/R/mask.R b/R/mask.R index beba7db..c4419c6 100644 --- a/R/mask.R +++ b/R/mask.R @@ -16,7 +16,16 @@ #' \item{mask}{mask cells with values outside the bounds} #' } # @examples -#' +#' \dontrun{ +#' # find suitable bounds +#' bounds <- min(datedOccs$env) +#' ## Create mask, and use it on SDM +#' logicString = paste0('maskLayers >', bounds) +#' # use 'most recent' environmental variable as base for mask +#' SDMmask <- env[[5]] +#' names(SDMmask) <- "SDM_mask" +#' maskedDist <- maskRanger(potentialDist = sdm, maskLayers = SDMmask, logicString = logicString) +#' } #' # @return #' @author Cory Merow , diff --git a/R/simulateData.R b/R/simulateData.R new file mode 100644 index 0000000..d41dfec --- /dev/null +++ b/R/simulateData.R @@ -0,0 +1,69 @@ +#================================================================================== +#================================================================================== +#' @title Data Simulation +#' +#' @description Simulates data for vignette-like function demonstrations. This function returns a number of variables to the global environment so that examples can be run. +#' +#' @details +#' See Examples. +#' @param +# @examples +#' \dontrun{ +#' system.time({simulateData()}) +#' # Look at some of the example variables needed +#' datedOccs +#' class(env) +#' head(sp1.xy) +#' } +#' +# @return +#' @author Peter Galante , +#' @note +# @seealso +# @references +# @aliases +# @family - a family name. All functions that have the same family tag will be linked in the documentation. +#' @export +# FUNCTION TO CREATE ALL SIMULATED DATA AND EXPORT A LIST USABLE FOR EACH EXAMPLE IN USECASE +simulateData <- function(){ + options(warn=-1) + require(raster) + require(dismo) + require(maskRangeR) + library(devtools) + library(lubridate) + library(rgdal) + ### Initiating data for use case # 1 + # Initiate empty raster object with extent and resolution + r1 <<- raster(extent(c(-72, -64, 41, 50)), res = c(0.008333333, 0.008333333)) + # Generate random occurrence points + datedOccs<<-data.frame(randomPoints(r1, 5)) + # Pair dates (as years) with the occurrences + datedOccs$date <<- 2010:2014 + # convert dates to formal date objects + datedOccs$date <<- parse_date_time(datedOccs$date, orders = c("Y", "Ym")) + # Create values showing distance from random points to simulate suitability surface + sdm <<- distanceFromPoints(r1, datedOccs[,1:2]) + # Create 5 different rasters as env data + env1 <<- distanceFromPoints(r1, data.frame(randomPoints(sdm, 3))) + env2 <<- distanceFromPoints(r1, data.frame(randomPoints(sdm, 3))) + env3 <<- distanceFromPoints(r1, data.frame(randomPoints(sdm, 3))) + env4 <<- distanceFromPoints(r1, data.frame(randomPoints(sdm, 3))) + env5 <<- distanceFromPoints(r1, data.frame(randomPoints(sdm, 3))) + env <<- stack(env1, env2, env3, env4, env5) + ### Initiating data for use case # 2 + ## Generate random polygon + coords <- randomPoints(sdm, 3) + polyg <- Polygon(coords) + polyg <<- SpatialPolygons(list(Polygons(list(polyg), ID = "a"))) + ### Initiating data for use case # 3 + + ### Initiating data for use case # 4 + ## Generate some species occurrence records + r1.svm <<- raster(extent(c(-72, -64, 41, 50)), res = c(0.008333333, 0.008333333)) + values(r1.svm) <<- (1:ncell(r1.svm))^2 + r2.svm <<- raster(extent(c(-72, -64, 41, 50)), res = c(0.008333333, 0.008333333)) + values(r2.svm) <<- (ncell(r2.svm):1)^2 + sp1.xy <<- data.frame(randomPoints(r1.svm, 15, prob = T)); colnames(sp1.xy) <<- c("longitude", "latitude") + sp2.xy <<- data.frame(randomPoints(r2.svm, 15, prob = T)); colnames(sp2.xy) <<- c("longitude", "latitude") +} diff --git a/R/spatialUtils.R b/R/spatialUtils.R index 1820189..5ce5f68 100644 --- a/R/spatialUtils.R +++ b/R/spatialUtils.R @@ -8,7 +8,9 @@ #' @param expertMap A binary map, either as a polygon or a raster. #' @param maskListRaw A list of rasters, each corresponding to layers from which masks will eventually be made (in another function). # @examples -#' +#'\dontrun{ +#' +#'} #' # @return #' @author Cory Merow , diff --git a/R/svm.R b/R/svm.R index 44790fc..7cd7b2f 100644 --- a/R/svm.R +++ b/R/svm.R @@ -1,50 +1,51 @@ -#' Classify species ranges based on occurrence coordinates and SDM scores. +#' Classify species ranges based on occurrence coordinates and continuous values from an SDM #' #' \code{rangeSVM()} returns a tuned support vector machine (SVM) model that predicts -#' species identity based on predictors that are solely spatial, based on occurrence coordinates, -#' or a combination of spatial and environmental, based on both occurrence coordinates and -#' environmental suitability values. Suitability values can be predicted with species distribution -#' models (SDMs; a.k.a. ecological niche models). +#' species identity based on predictors that are either (1) solely spatial, (2) based +#' on occurrence coordinates, or (3) a combination of spatial and environmental, based +#' on both occurrence coordinates and environmental suitability values. Suitability +#' values can be predicted with species distribution models (SDMs; a.k.a. ecological +#' niche models). #' #' @param xy1 Matrix or data frame of occurrence coordinates for species 1. #' @param xy2 Matrix or data frame of occurrence coordinates for species 2. -#' @param sdm Raster or RasterStack representing environmental suitability (can be predictions from SDMs). -#' These must have the same extent as both species' occurrence points. Default is NULL. +#' @param sdm Raster or RasterStack representing environmental suitability (can be predictions from SDMs). +#' Default is NULL. #' @param nrep Numeric for number of SVM tuning iterations. Default is 100. #' @param weight Boolean. If TRUE, the species with less occurrence records is weighted higher in the SVM. #' Default is FALSE. #' @return The tuned SVM model. -#' @details The tuning operation uses \code{tune.svm()} from the e1071 package, which performs 10-fold -#' cross validation and selects the best model based on classification error. Ranges of the cost and gamma -#' parameters are explored in the tuning exercise. The tuning function is iterated \code{nrep} times, and the -#' parameter combination used most frequently across all iterations is used to build a final SVM model. +#' @details The tuning operation uses \code{tune.svm()} from the e1071 package, +#' which performs 10-fold cross validation and selects the best model based on +#' classification error. Ranges of the cost and gamma parameters are explored in +#' the tuning exercise. The tuning function is iterated \code{nrep} times, and the +#' parameter combination used most frequently across all iterations is used to +#' build a final SVM model. #' -#' When \code{sdm = NULL}, the SVM is purely spatial, based only on the occurrence coordinates of -#' each species. Otherwise, the SVM is fit with both a spatial predictor and any additional ones added as -#' rasters. These extra predictors can be based on predictions from a species distribution model -#' (SDM; a.k.a. ecological niche model), and in this case would represent environmental or climatic -#' suitability, depending on the variables used in the SDM. +#' When \code{sdm = NULL}, the SVM is purely spatial, based only on the occurrence +#' coordinates of each species. Otherwise, the SVM is fit with both a spatial +#' predictor and any additional ones added as rasters. These extra predictors can be +#' based on predictions from a species distribution model (SDM; a.k.a. ecological +#' niche model), and in this case would represent environmental or climatic suitability, +#' depending on the variables used in the SDM. #' #' #' @examples #' \dontrun{ -#' # tune SVMs on coordinates only (spatial) -#' rangeSVM(xy1, xy2) -#' -#' # tune SVMs on coordinates and SDM prediction rasters (spatial + environmental) -#' rangeSVM(xy1, xy2, raster::stack(sdm1, sdm2)) +#' svmHYB <- rangeSVM(sp1.xy, as.matrix(sp2.xy), raster::stack(r1.svm, r2.svm), nrep=10) +#' spp.svmHYB <- rasterizeRangeSVM(svmHYB, r1.svm, stack(r1.svm, r2.svm)) +#' plot(spp.svmHYB) +#' # These locations may not necessarily occupied however; we’ll use these as masks over the SDM predictions. +#' sp1.svmHYB <- spp.svmHYB == 1 +#' sp1.svmHYB[sp1.svmHYB == 0] <- NA +#' sp2.svmHYB <- spp.svmHYB == 2 +#' sp2.svmHYB[sp2.svmHYB == 0] <- NA +#' # Mask "suitability" maps to SVM +#' sp1.svmHYB.ext <- mask(r1.svm, sp1.svmHYB) +#' sp2.svmHYB.ext <- mask(r2.svm, sp2.svmHYB) +#' plot(stack(sp1.svmHYB.ext, sp2.svmHYB.ext)) #' } #' @export -#' -#' - -# to clarify: -# 1. specifics on optimality criterion (weights false positives and false negatives equally?) -# 2. nothing to break tie (how do we choose when there are ties?) -# 3. could easily allow users to change k -# 4. sensitivity to relative number of points near border (and assumes that sampling bias was addressed?) -# 5. which params are varied and what they mean -# 6. convey to Cory to make generalizable to more than 2 species rangeSVM <- function(xy1, xy2, sdm = NULL, nrep = 100, weight = FALSE) { # define class weights @@ -54,15 +55,15 @@ rangeSVM <- function(xy1, xy2, sdm = NULL, nrep = 100, weight = FALSE) { cw <- c("0" = 1, "1" = nrow(xy1)/nrow(xy2)) } else { cw <- c("0" = nrow(xy2)/nrow(xy1), "1" = 1) - } - } + } + } } else { cw <- c("0" = 1, "1" = 1) } # bind both coordinate matrices xy <- rbind(xy1, xy2) - # if using sdm prediction rasters as extra predictor variable, add the values to the matrix + # if sdm prediction raster input, add the values to the matrix if(!is.null(sdm)) { sdm.vals <- raster::extract(sdm, xy) xy <- cbind(xy, sdm = sdm.vals) @@ -83,17 +84,12 @@ rangeSVM <- function(xy1, xy2, sdm = NULL, nrep = 100, weight = FALSE) { # year={2005}, # publisher={Department of Statistics and Mathematics, WU Vienna University of Economics and Business} params_best <- list() - # performance_best <- list() gamma_best <- numeric(nrep) C_best <- numeric(nrep) - for(i in 1:nrep) { m.tune <- e1071::tune.svm(sp ~ ., data = xy, gamma = gamma_range, cost = C_range, class.weights = cw) - # print(sum(as.numeric(as.character(predict(m.tune$best.model, aus.xy))))) - # print(sum(as.numeric(as.character(predict(m.tune$best.model, tel.xy))))) # get optimal parameter values params_best[[i]] <- m.tune$best.parameters - # performance_best[[i]] <- m.tune$best.performance # gamma_best[i] <- m.tune$best.parameters[[1]] # C_best[i] <- m.tune$best.parameters[[2]] message(paste("Run", i, "complete.")) @@ -106,8 +102,7 @@ rangeSVM <- function(xy1, xy2, sdm = NULL, nrep = 100, weight = FALSE) { params_best_df <- do.call(rbind, params_best) params_best_df$params <- paste0(params_best_df$gamma, params_best_df$cost) mostFreq <- names(which(table(params_best_df$params) == max(table(params_best_df$params)))) - params_best_df_mostFreq <- params_best_df[params_best_df$params == mostFreq, 1:2] - + params_best_df_mostFreq <- params_best_df[params_best_df$params == mostFreq,1:2] # run final model m <- e1071::svm(sp ~ ., data = xy, gamma = params_best_df_mostFreq$gamma[1], cost = params_best_df_mostFreq$cost[1], class.weights = cw) @@ -115,14 +110,15 @@ rangeSVM <- function(xy1, xy2, sdm = NULL, nrep = 100, weight = FALSE) { return(m) } +############################################################################### + #' Generate binary raster based on predictions of SVM model. #' -#' \code{rangeSVM_predict()} returns a binary raster representing the ranges of the two species +#' \code{rasterizeRangeSVM()} returns a binary raster representing the ranges of the two species #' predicted by the fitted SVM tuned with \code{rangeSVM()}. #' #' @param svm Model object for the SVM, returned by \code{rangeSVM()}. -#' @param r Raster with the extent desired for the prediction. The values for cells used for predictions must -#' have non-NA values, but the particular values do not matter. +#' @param r Raster with the extent desired for the prediction. The values do not matter. #' @param sdm Raster or RasterStack representing environmental suitability (can be predictions from SDMs). #' These rasters must match the predictor variables used in the SVM. Default is NULL. #' @return The binary Raster representing the SVM predictions. @@ -131,20 +127,20 @@ rangeSVM <- function(xy1, xy2, sdm = NULL, nrep = 100, weight = FALSE) { #' #' @examples #' \dontrun{ -#' # raster prediction for xy SVM (spatial) -#' svm.xy <- rangeSVM(xy1, xy2) -#' svm.xy.r <- rangeSVM_predict(svm.xy, r) -#' plot(svm.xy.r) +#' # raster prediction for spatial SVM +#' svm.sp <- rangeSVM(xy1, xy2) +#' svm.sp.r <- rasterizeRangeSVM(svm.sp, r) +#' plot(svm.sp.r) #' -#' # raster prediction for xyEnv SVM (spatial + SDM predictions) -#' svm.xyEnv <- rangeSVM(xy1, xy2, raster::stack(sdm1, sdm2)) -#' svm.xyEnv.r <- rangeSVM_predict(svm.xyEnv, r, raster::stack(sdm1, sdm2)) -#' plot(svm.xyEnv.r) +#' # raster prediction for hybrid SVM +#' svm.hyb <- rangeSVM(xy1, xy2, raster::stack(sdm1, sdm2)) +#' svm.hyb.r <- rasterizeRangeSVM(svm.hyb, r, raster::stack(sdm1, sdm2)) +#' plot(svm.hyb.r) #' } #' @export -rangeSVM_predict <- function(svm, r, sdm = NULL) { - # extract centroid coordinates from shared extent raster cells +rasterizeRangeSVM <- function(svm, r, sdm = NULL) { + # extract coordinates from shared extent r.pts <- raster::rasterToPoints(r, spatial = TRUE) r.xy <- sp::coordinates(r.pts) # if sdm prediction raster input, add the values to the matrix