From 86ff67853994ca4342ed45d267ebd9566a9629cf Mon Sep 17 00:00:00 2001
From: jsocolar <jacob.socolar@gmail.com>
Date: Sat, 8 Feb 2025 00:28:12 -0500
Subject: [PATCH 1/2] add new_data argument to log_lik_flocker. still requires
 testing on all model types

---
 R/log_lik_occupancy.R | 134 +++++++++++++++++++++++++++++++++++-------
 1 file changed, 112 insertions(+), 22 deletions(-)

diff --git a/R/log_lik_occupancy.R b/R/log_lik_occupancy.R
index fc2aceb..53c5e89 100644
--- a/R/log_lik_occupancy.R
+++ b/R/log_lik_occupancy.R
@@ -1,6 +1,14 @@
 #' Compute unit-wise or series-wise log-likelihood matrix for a flocker_fit object
 #' @param flocker_fit A flocker_fit object
-#' @param draw_ids the draw ids to compute log-likelihoods for. Defaults to using the full posterior. 
+#' @param new_data optional new data at which to compute log likelihood
+#' @param allow_new_levels allow new levels for random effect terms in 
+#'    'new_data'? Will error if set to 'FALSE' and new levels are provided in 
+#'    'new_data'.
+#' @param sample_new_levels If new_data is provided and contains random effect 
+#'    levels not present in the original data, how should predictions be 
+#'    handled? See '?brms::prepare_predictions' for options.
+#' @param draw_ids the draw ids to compute log-likelihoods for. Defaults to 
+#'    using the full posterior. 
 #' @return A posterior log-likelihood matrix, where iterations are rows and 
 #'    units, series, or species are columns.
 #' @details In single-season models, rows are units (e.g. points or 
@@ -12,7 +20,10 @@
 #' @examples 
 #' log_lik_flocker(example_flocker_model_single)
 
-log_lik_flocker <- function(flocker_fit, draw_ids = NULL) {
+log_lik_flocker <- function(
+    flocker_fit, new_data = NULL, allow_new_levels = FALSE,
+    sample_new_levels = "uncertainty", draw_ids = NULL
+    ) {
   assertthat::assert_that(is_flocker_fit(flocker_fit))
   ndraws <- brms::ndraws(flocker_fit)
   if(!is.null(draw_ids)){
@@ -23,21 +34,44 @@ log_lik_flocker <- function(flocker_fit, draw_ids = NULL) {
     ndraws <- length(draw_ids)
   }
   
+  if(!is.null(new_data)){
+    assertthat::assert_that(
+      is_flocker_data(new_data),
+      msg = paste0(
+        "new_data, if provided, must be a flocker_data object formatted ",
+        "by `make_flocker_data`"
+      )
+    )
+    a <- attributes(flocker_fit)
+    fit_datatype <- a$data_type
+    assertthat::assert_that(
+      flocker_data$type == fit_datatype,
+      msg = "the data type in new_data does not match the data type in flocker_fit"
+    )
+  }
   
   lik_type <- type_flocker_fit(flocker_fit)
   
   if (lik_type %in% c("single")) {
     lps <- fitted_flocker(
-      flocker_fit, draw_ids = draw_ids, new_data = NULL, allow_new_levels = FALSE, 
+      flocker_fit, draw_ids = draw_ids, new_data = new_data, 
+      allow_new_levels = allow_new_levels, 
+      sample_new_levels = sample_new_levels, 
       response = TRUE, unit_level = FALSE
     )
     
     psi_all <- lps$linpred_occ[ , 1, ] # first index is unit, second is visit, third is draw
     n_unit <- nrow(psi_all)
     theta_all <- lps$linpred_det
+    if (is.null(new_data)) {
+      gp <- get_positions(flocker_fit)
+      the_data <- flocker_fit$data
+    } else {
+      gp <- get_positions(new_data)
+      the_data <- new_data$data
+    }
     
-    gp <- get_positions(flocker_fit)
-    obs <- new_matrix(gp, flocker_fit$data$ff_y[gp])
+    obs <- new_matrix(gp, the_data$ff_y[gp])
 
     # get emission likelihoods
     el_0 <- el_1 <- matrix(nrow = nrow(psi_all), ncol = ncol(psi_all))
@@ -52,13 +86,23 @@ log_lik_flocker <- function(flocker_fit, draw_ids = NULL) {
     ll <- log((psi_all * el_1) + (1 - psi_all) * el_0) |>
       t()
   } else if (lik_type == "single_C") {
-    ll <- brms::log_lik(flocker_fit, draw_ids = draw_ids)
+    ll <- brms::log_lik(
+      flocker_fit, newdata = new_data, draw_ids = draw_ids, 
+      allow_new_levels = allow_new_levels,
+      sample_new_levels = sample_new_levels)
   } else if (lik_type == "augmented") {
-    gp <- get_positions(flocker_fit)
-    obs <- new_array(gp, flocker_fit$data$ff_y[gp])
-    
+    if(is.null(new_data)){
+      gp <- get_positions(flocker_fit)
+      obs <- new_array(gp, flocker_fit$data$ff_y[gp])
+    } else {
+      gp <- get_positions(new_data)
+      obs <- new_array(gp, new_data$data$ff_y[gp])
+    }
+
     lps <- fitted_flocker(
-      flocker_fit, draw_ids = draw_ids, new_data = NULL, allow_new_levels = FALSE, 
+      flocker_fit, draw_ids = draw_ids, new_data = new_data, 
+      allow_new_levels = allow_new_levels, 
+      sample_new_levels = sample_new_levels, 
       response = TRUE, unit_level = FALSE
     )
     lpo <- lps$linpred_occ[ , 1, , ] # first index is point, second is visit, third is species, fourth is draw
@@ -87,15 +131,26 @@ log_lik_flocker <- function(flocker_fit, draw_ids = NULL) {
     ll <- log(elw1 * Omega + elw0 * (1 - Omega)) |>
       t()
   } else if (lik_type %in% c("multi_colex")) {
-    gp <- get_positions(flocker_fit)
-    obs <- new_array(gp, flocker_fit$data$ff_y[gp])
+    if(is.null(new_data)){
+      gp <- get_positions(flocker_fit)
+      obs <- new_array(gp, flocker_fit$data$ff_y[gp])
+    } else {
+      gp <- get_positions(new_data)
+      obs <- new_array(gp, new_data$data$ff_y[gp])
+    }
     
     lps1 <- fitted_flocker(
       flocker_fit, components = c("det"),
+      new_data = new_data,
+      allow_new_levels = allow_new_levels,
+      sample_new_levels = sample_new_levels,
       draw_ids = draw_ids
     )
     lps2 <- fitted_flocker(
       flocker_fit, components = c("occ", "colo", "ex"),
+      new_data = new_data,
+      allow_new_levels = allow_new_levels,
+      sample_new_levels = sample_new_levels,
       draw_ids = draw_ids, unit_level = TRUE
     )
     init <- lps2$linpred_occ[,1,]
@@ -105,15 +160,26 @@ log_lik_flocker <- function(flocker_fit, draw_ids = NULL) {
     ll <- log_lik_dynamic(init, colo, ex, obs, det) |>
       t()
   } else if (lik_type %in% c("multi_colex_eq")) {
-    gp <- get_positions(flocker_fit)
-    obs <- new_array(gp, flocker_fit$data$ff_y[gp])
+    if(is.null(new_data)){
+      gp <- get_positions(flocker_fit)
+      obs <- new_array(gp, flocker_fit$data$ff_y[gp])
+    } else {
+      gp <- get_positions(new_data)
+      obs <- new_array(gp, new_data$data$ff_y[gp])
+    }
     
     lps1 <- fitted_flocker(
       flocker_fit, components = c("det"),
+      new_data = new_data,
+      allow_new_levels = allow_new_levels,
+      sample_new_levels = sample_new_levels,
       draw_ids = draw_ids
     )
     lps2 <- fitted_flocker(
       flocker_fit, components = c("col", "ex"),
+      new_data = new_data,
+      allow_new_levels = allow_new_levels,
+      sample_new_levels = sample_new_levels,
       draw_ids = draw_ids, unit_level = TRUE
     )
     colo <- lps2$linpred_col
@@ -123,14 +189,26 @@ log_lik_flocker <- function(flocker_fit, draw_ids = NULL) {
     ll <- log_lik_dynamic(init, colo, ex, obs, det) |>
       t()
   } else if (lik_type == "multi_autologistic") {
-    gp <- get_positions(flocker_fit)
-    obs <- new_array(gp, flocker_fit$data$ff_y[gp])
+    if(is.null(new_data)){
+      gp <- get_positions(flocker_fit)
+      obs <- new_array(gp, flocker_fit$data$ff_y[gp])
+    } else {
+      gp <- get_positions(new_data)
+      obs <- new_array(gp, new_data$data$ff_y[gp])
+    }
     
-    lps1 <- fitted_flocker(flocker_fit, components = c("det"), 
-                           draw_ids = draw_ids
+    lps1 <- fitted_flocker(
+      flocker_fit, components = c("det"), 
+      new_data = new_data,
+      allow_new_levels = allow_new_levels,
+      sample_new_levels = sample_new_levels,
+      draw_ids = draw_ids
     )
     lps2 <- fitted_flocker(
       flocker_fit, components = c("occ", "colo", "auto"),
+      new_data = new_data,
+      allow_new_levels = allow_new_levels,
+      sample_new_levels = sample_new_levels,
       draw_ids = draw_ids, unit_level = TRUE
     )
     init <- lps2$linpred_occ[,1,]
@@ -140,14 +218,26 @@ log_lik_flocker <- function(flocker_fit, draw_ids = NULL) {
     ll <- log_lik_dynamic(init, colo, ex, obs, det) |>
       t()
   } else if (lik_type == "multi_autologistic_eq") {
-    gp <- get_positions(flocker_fit)
-    obs <- new_array(gp, flocker_fit$data$ff_y[gp])
+    if(is.null(new_data)){
+      gp <- get_positions(flocker_fit)
+      obs <- new_array(gp, flocker_fit$data$ff_y[gp])
+    } else {
+      gp <- get_positions(new_data)
+      obs <- new_array(gp, new_data$data$ff_y[gp])
+    }
     
-    lps1 <- fitted_flocker(flocker_fit, components = c("det"), 
-                           draw_ids = draw_ids
+    lps1 <- fitted_flocker(
+      flocker_fit, components = c("det"), 
+      new_data = new_data,
+      allow_new_levels = allow_new_levels,
+      sample_new_levels = sample_new_levels,
+      draw_ids = draw_ids
     )
     lps2 <- fitted_flocker(
       flocker_fit, components = c("col", "auto"),
+      new_data = new_data,
+      allow_new_levels = allow_new_levels,
+      sample_new_levels = sample_new_levels,
       draw_ids = draw_ids, unit_level = TRUE
     )
     colo <- lps2$linpred_col

From fe56cee2703ae93e84e05a77d2a72d21820cbc2e Mon Sep 17 00:00:00 2001
From: jsocolar <jacob.socolar@gmail.com>
Date: Sat, 8 Feb 2025 00:29:15 -0500
Subject: [PATCH 2/2] update doc

---
 man/log_lik_flocker.Rd | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/man/log_lik_flocker.Rd b/man/log_lik_flocker.Rd
index 043ba70..98aa485 100644
--- a/man/log_lik_flocker.Rd
+++ b/man/log_lik_flocker.Rd
@@ -4,12 +4,29 @@
 \alias{log_lik_flocker}
 \title{Compute unit-wise or series-wise log-likelihood matrix for a flocker_fit object}
 \usage{
-log_lik_flocker(flocker_fit, draw_ids = NULL)
+log_lik_flocker(
+  flocker_fit,
+  new_data = NULL,
+  allow_new_levels = FALSE,
+  sample_new_levels = "uncertainty",
+  draw_ids = NULL
+)
 }
 \arguments{
 \item{flocker_fit}{A flocker_fit object}
 
-\item{draw_ids}{the draw ids to compute log-likelihoods for. Defaults to using the full posterior.}
+\item{new_data}{optional new data at which to compute log likelihood}
+
+\item{allow_new_levels}{allow new levels for random effect terms in 
+'new_data'? Will error if set to 'FALSE' and new levels are provided in 
+'new_data'.}
+
+\item{sample_new_levels}{If new_data is provided and contains random effect 
+levels not present in the original data, how should predictions be 
+handled? See '?brms::prepare_predictions' for options.}
+
+\item{draw_ids}{the draw ids to compute log-likelihoods for. Defaults to 
+using the full posterior.}
 }
 \value{
 A posterior log-likelihood matrix, where iterations are rows and