diff --git a/.gitignore b/.gitignore index a3c4e964..85e31240 100644 --- a/.gitignore +++ b/.gitignore @@ -23,7 +23,6 @@ !data/*.RData !data/*.rda *.csv - - inst/doc revdep/ +*.Rproj diff --git a/DESCRIPTION b/DESCRIPTION index 0927c386..32b7fea2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: eventstudyr Title: Estimation and Visualization of Linear Panel Event Studies -Version: 1.1.4 +Version: 1.1.5 Authors@R: c(person(given = "Simon", family = "Freyaldenhoven", diff --git a/NAMESPACE b/NAMESPACE index be899643..7867b6ac 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,7 +13,8 @@ importFrom(data.table,":=") importFrom(data.table,.SD) importFrom(data.table,CJ) importFrom(data.table,as.data.table) -importFrom(data.table,setDT) +importFrom(data.table,copy) +importFrom(data.table,is.data.table) importFrom(data.table,setnames) importFrom(data.table,setorder) importFrom(data.table,setorderv) diff --git a/R/AddMeans.R b/R/AddMeans.R index ed62b06e..d50e11b9 100644 --- a/R/AddMeans.R +++ b/R/AddMeans.R @@ -19,12 +19,11 @@ AddMeans <- function(df, normalization_column, policyvar, outcomevar, if (is.null(custom_scalar)) { if (grepl(paste0(policyvar, "_lead"), normalization_column)) { - df_change <- df[df[, normalization_column] == 0 & !is.na(df[, normalization_column]), ] - }else { - df_change <- df[df[, normalization_column] != 0 & !is.na(df[, normalization_column]), ] + df_change <- df[get(normalization_column) == 0 & !is.na(get(normalization_column))] + } else { + df_change <- df[get(normalization_column) != 0 & !is.na(get(normalization_column))] } - mean <- mean(df_change[[outcomevar]], na.rm = T) - + mean <- df_change[, mean(get(outcomevar), na.rm = TRUE)] } else { mean <- custom_scalar } diff --git a/R/ComputeFirstDifferences.R b/R/ComputeFirstDifferences.R index 2ebeb5eb..d2fc51ab 100644 --- a/R/ComputeFirstDifferences.R +++ b/R/ComputeFirstDifferences.R @@ -1,11 +1,10 @@ #' Adds first differences of a variable, robustly to gaps in time variable, as new columns in a panel dataset #' -#' @param df Data frame that will be modified. +#' @param dt Data frame that will be modified. #' @param idvar Character indicating column of units. #' @param timevar Character indicating column of time periods. #' @param diffvar Character indicating column of variable whose first difference will be taken. #' @param timevar_holes Logical indicating whether the panel contains gaps in the time variable. Defaults to FALSE. -#' @param return_df Logical indicating whether the function should return a data frame (TRUE) or data.table (FALSE). Defaults to TRUE. #' #' @return The passed dataset augmented with columns that reflect the desired first differences #' @@ -19,59 +18,51 @@ #' diffvar = "z" #' ) #' -#' @importFrom data.table setDT setorderv setnames shift := CJ .SD +#' @importFrom data.table setorderv setnames shift := CJ .SD #' #' @keywords internal #' @noRd -ComputeFirstDifferences <- function(df, idvar, timevar, diffvar, - timevar_holes = FALSE, return_df = TRUE) { - if (! is.data.frame(df)) { - stop("df should be a data frame.") +ComputeFirstDifferences <- function(dt, idvar, timevar, diffvar, + timevar_holes = FALSE) { + if (! data.table::is.data.table(dt)) { + stop("Input data should be a data.table.") } for (var in c(idvar, timevar, diffvar)) { if ((! is.character(var))) { stop(paste0(var, " should be a character.")) } - if (! var %in% colnames(df)) { + if (! var %in% colnames(dt)) { stop(paste0(var, " should be the name of a variable in the dataset.")) } } if (! is.logical(timevar_holes)) { stop("timevar_holes should be logical.") } - if (! is.logical(return_df)) { - stop("return_df should be logical.") - } - data.table::setDT(df) - data.table::setorderv(df, cols = c(idvar, timevar)) + data.table::setorderv(dt, cols = c(idvar, timevar)) if (!timevar_holes) { - df[, paste0(diffvar, "_fd") := get(diffvar) - shift((get(diffvar))), + dt[, paste0(diffvar, "_fd") := get(diffvar) - data.table::shift((get(diffvar))), by = idvar] } else { ## Create dataset with all combinations to compute first differences - all_combinations <- CJ(unique(df[[idvar]]), - min(df[[timevar]]):max(df[[timevar]])) - setnames(all_combinations, new = c(idvar, timevar)) + all_combinations <- data.table::CJ(unique(dt[[idvar]]), + min(dt[[timevar]]):max(dt[[timevar]])) + data.table::setnames(all_combinations, new = c(idvar, timevar)) - df_all <- merge(df, all_combinations, - by = c(idvar, timevar), all = TRUE) + dt_all <- data.table::merge.data.table(dt, all_combinations, + by = c(idvar, timevar), all = TRUE) - df_all[, paste0(diffvar, "_fd") := get(diffvar) - shift((get(diffvar))), + dt_all[, paste0(diffvar, "_fd") := get(diffvar) - data.table::shift((get(diffvar))), by = idvar] ## Bring first differences back to the original dataset vars_to_keep <- c(idvar, timevar, paste0(diffvar, "_fd")) - df <- merge(df, df_all[, .SD, .SDcols = vars_to_keep], - by = c(idvar, timevar), all.x = TRUE) - } - - if (return_df) { - df <- as.data.frame(df) + dt <- data.table::merge.data.table(dt, dt_all[, .SD, .SDcols = vars_to_keep], + by = c(idvar, timevar), all.x = TRUE) } - return(df) + return(dt) } diff --git a/R/ComputeShifts.R b/R/ComputeShifts.R index d3de31fe..0b07a154 100644 --- a/R/ComputeShifts.R +++ b/R/ComputeShifts.R @@ -6,7 +6,6 @@ #' @param shiftvar Character indicating column of variable that will be shifted. #' @param shiftvalues Numeric vector specifying the leads/lags to be computed. For example, c(-1, 0, 1) will compute the lead, current, and lag values. #' @param timevar_holes Logical indicating whether the panel contains gaps in the time variable. Defaults to FALSE. -#' @param return_df Logical indicating whether the function should return a data frame (TRUE) or data.table (FALSE). Defaults to TRUE. #' #' @return The passed dataset augmented with columns that reflect the desired shift values @@ -22,14 +21,14 @@ #' ) #' #' -#' @importFrom data.table setDT setorderv shift := CJ .SD +#' @importFrom data.table setorderv shift := CJ .SD #' @keywords internal #' @noRd ComputeShifts <- function(df, idvar, timevar, shiftvar, shiftvalues, - timevar_holes = FALSE, return_df = TRUE) { - if (! is.data.frame(df)) { - stop("df should be a data frame.") + timevar_holes = FALSE) { + if (! data.table::is.data.table(df)) { + stop("df should be a data.table.") } for (var in c(idvar, timevar, shiftvar)) { if ((! is.character(var))) { @@ -48,11 +47,7 @@ ComputeShifts <- function(df, idvar, timevar, shiftvar, shiftvalues, if (! is.logical(timevar_holes)) { stop("timevar_holes should be logical.") } - if (! is.logical(return_df)) { - stop("return_df should be logical.") - } - data.table::setDT(df) data.table::setorderv(df, cols = c(idvar, timevar)) lags <- shiftvalues[shiftvalues > 0] @@ -92,10 +87,5 @@ ComputeShifts <- function(df, idvar, timevar, shiftvar, shiftvalues, df <- merge(df, df_all, by = c(idvar, timevar, shiftvar), all.x = TRUE) } - - if (return_df) { - df <- as.data.frame(df) - } - return(df) } diff --git a/R/EventStudy.R b/R/EventStudy.R index 7e40a7e9..9d26f5b4 100644 --- a/R/EventStudy.R +++ b/R/EventStudy.R @@ -32,12 +32,14 @@ #' @param normalize Specifies the event-time coefficient to be normalized. Defaults to - pre - 1. #' @param anticipation_effects_normalization If set to TRUE, runs the default process and switches coefficient to be normalized to 0 #' when there are anticipation effects. If set to FALSE, does not make the switch. Defaults to TRUE. +#' @param allow_duplicate_id If TRUE, the function estimates a regression where duplicated ID-time rows are weighted by their duplication count. If FALSE, the function raises an error if duplicate unit-time keys exist in the input data. Default is FALSE. +#' @param avoid_internal_copy If TRUE, the function avoids making an internal deep copy of the input data, and instead directly modifies the input data.table. Default is FALSE. #' #' @return A list that contains, under "output", the estimation output as an lm_robust object, and under "arguments", the arguments passed to the function. #' @import dplyr #' @import estimatr #' @importFrom stats reformulate -#' @importFrom data.table setorderv as.data.table .SD +#' @importFrom data.table setorderv as.data.table is.data.table .SD copy #' @export #' #' @examples @@ -142,15 +144,20 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, controls = NULL, proxy = NULL, proxyIV = NULL, FE = TRUE, TFE = TRUE, post, overidpost = 1, pre, overidpre = post + pre, - normalize = -1 * (pre + 1), cluster = TRUE, anticipation_effects_normalization = TRUE) { + normalize = -1 * (pre + 1), cluster = TRUE, anticipation_effects_normalization = TRUE, + allow_duplicate_id = FALSE, avoid_internal_copy = FALSE) { # Check for errors in arguments if (! estimator %in% c("OLS", "FHS")) {stop("estimator should be either 'OLS' or 'FHS'.")} if (! is.data.frame(data)) {stop("data should be a data frame.")} - if (! is.character(outcomevar)) {stop("outcomevar should be a character.")} - if (! is.character(policyvar)) {stop("policyvar should be a character.")} - if (! is.character(idvar)) {stop("idvar should be a character.")} - if (! is.character(timevar)) {stop("timevar should be a character.")} + for (var in c(idvar, timevar, outcomevar, policyvar)) { + if ((! is.character(var))) { + stop(paste0(var, " should be a character.")) + } + if (! var %in% colnames(data)) { + stop(paste0(var, " should be the name of a variable in the dataset.")) + } + } if (! (is.null(controls) | is.character(controls))) {stop("controls should be either NULL or a character.")} if ((estimator == "OLS" & ! is.null(proxy))) {stop("proxy should only be specified when estimator = 'FHS'.")} @@ -162,11 +169,13 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c stop("When estimator is 'FHS' and there are no leads in the model, proxyIV must be specified explicitly.") } - if (! is.logical(FE)) {stop("FE should be either TRUE or FALSE.")} - if (! is.logical(TFE)) {stop("TFE should be either TRUE or FALSE.")} - if (! is.logical(cluster)) {stop("cluster should be either TRUE or FALSE.")} + for (var in c(FE, TFE, cluster, anticipation_effects_normalization, allow_duplicate_id, avoid_internal_copy)) { + if (! is.logical(var)) { + stop(paste0(var, " should be either TRUE or FALSE.")) + } + } + if (FE & !cluster) {stop("cluster=TRUE is required when FE=TRUE.")} - if (! is.logical(anticipation_effects_normalization)) {stop("anticipation_effects_normalization should be either TRUE or FALSE.")} if (! (is.numeric(post) & post >= 0 & post %% 1 == 0)) {stop("post should be a whole number.")} if (! (is.numeric(overidpost) & overidpost >= 0 & overidpost %% 1 == 0)) {stop("overidpost should be a whole number.")} @@ -177,6 +186,9 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c & normalize >= -(pre + overidpre + 1) & normalize <= post + overidpost)) { stop("normalize should be an integer between -(pre + overidpre + 1) and (post + overidpost).") } + if (avoid_internal_copy & ! data.table::is.data.table(data)) { + warning("`avoid_internal_copy` has no effect because dataset passed to `data` is not a `data.table`.") + } # Check for errors in data if (! is.numeric(data[[timevar]])) {stop("timevar column in dataset should be numeric.")} @@ -184,8 +196,17 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c stop("timevar column in dataset should be a vector of integers.") } - data_ids <- as.data.frame(data)[, c(idvar, timevar)] + if (data.table::is.data.table(data)) { + if (!avoid_internal_copy) { + data <- data.table::copy(data) + } + } else { + data <- data.table::as.data.table(data) + } + data.table::setorderv(data, c(idvar, timevar)) + data_ids <- data[, .SD, .SDcols = c(idvar, timevar)] + # Check panel balance and unique keys n_units <- length(base::unique(data[[idvar]])) n_periods <- length(base::unique(data[[timevar]])) n_unique_rows <- nrow(data[!base::duplicated(data_ids),]) @@ -195,11 +216,15 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c } else { unbalanced <- FALSE } - - data.table::setorderv(data, c(idvar, timevar)) + if (n_unique_rows != nrow(data)) { + if (allow_duplicate_id == TRUE) { + warning("idvar-timevar pairs do not uniquely identify all rows in the data.") + } else if (allow_duplicate_id == FALSE) { + stop("idvar-timevar pairs do not uniquely identify all rows in the data. Turn on allow_duplicate_id if you want to proceed with weighted duplicated rows.") + } + } detect_holes <- function(dt, idvar, timevar) { - dt <- data.table::as.data.table(dt) holes_per_id <- dt[, .SD[!is.na(base::get(timevar))], by = c(idvar) ][, list(holes = any(base::diff(base::get(timevar)) != 1)), by = c(idvar)] @@ -264,7 +289,7 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c timevar_holes = timevar_holes) lead_endpoint_var <- paste0(policyvar, "_lead", num_fd_leads) - data[lead_endpoint_var] <- 1 - data[lead_endpoint_var] + data[, (lead_endpoint_var) := 1 - get(lead_endpoint_var)] } if (pre != 0 & normalize == -1 & anticipation_effects_normalization) { diff --git a/man/EventStudy.Rd b/man/EventStudy.Rd index 115de017..6cb118ca 100644 --- a/man/EventStudy.Rd +++ b/man/EventStudy.Rd @@ -22,7 +22,9 @@ EventStudy( overidpre = post + pre, normalize = -1 * (pre + 1), cluster = TRUE, - anticipation_effects_normalization = TRUE + anticipation_effects_normalization = TRUE, + allow_duplicate_id = FALSE, + avoid_internal_copy = FALSE ) } \arguments{ @@ -73,6 +75,10 @@ Defaults to TRUE. Must be TRUE if FE is TRUE.} \item{anticipation_effects_normalization}{If set to TRUE, runs the default process and switches coefficient to be normalized to 0 when there are anticipation effects. If set to FALSE, does not make the switch. Defaults to TRUE.} + +\item{allow_duplicate_id}{If TRUE, the function estimates a regression where duplicated ID-time rows are weighted by their duplication count. If FALSE, the function raises an error if duplicate unit-time keys exist in the input data. Default is FALSE.} + +\item{avoid_internal_copy}{If TRUE, the function avoids making an internal deep copy of the input data, and instead directly modifies the input data.table. Default is FALSE.} } \value{ A list that contains, under "output", the estimation output as an lm_robust object, and under "arguments", the arguments passed to the function. diff --git a/tests/testthat/test-AddCIs.R b/tests/testthat/test-AddCIs.R index 9f53b5c1..52ddcbdd 100644 --- a/tests/testthat/test-AddCIs.R +++ b/tests/testthat/test-AddCIs.R @@ -1,5 +1,5 @@ test_that("correctly calculates conf_level at 0.95", { - df_test <- as.data.frame(data.table::fread("./input/df_test_AddCI.csv")) + df_test <- data.table::fread("./input/df_test_AddCI.csv") policyvar <- "z" eventstudy_coefficients <- c("z_fd_lag1", "z_fd_lead1") diff --git a/tests/testthat/test-AddMeans.R b/tests/testthat/test-AddMeans.R index b6d7e947..b19d6e56 100644 --- a/tests/testthat/test-AddMeans.R +++ b/tests/testthat/test-AddMeans.R @@ -13,8 +13,7 @@ test_that("means are computed correctly when a first differenced variable is nor sample_estimation[[2]]$policyvar, sample_estimation[[2]]$outcomevar) - mean_manual <- mean(df_estimation[df_estimation[,"z_fd_lead1"] != 0, ]$y_base, - na.rm = T) + mean_manual <- df_estimation[z_fd_lead1 != 0, mean(y_base, na.rm = TRUE)] expect_equal(mean_function, mean_manual) @@ -33,8 +32,7 @@ test_that("means are computed correctly when the furthest lead is normalized", { sample_estimation[[2]]$policyvar, sample_estimation[[2]]$outcomevar) - mean_manual <- mean(df_estimation[df_estimation[,"z_lead4"] == 0, ]$y_base, - na.rm = T) + mean_manual <- df_estimation[z_lead4 == 0, mean(y_base, na.rm = TRUE)] expect_equal(mean_function, mean_manual) }) diff --git a/tests/testthat/test-ComputeFirstDifferences.R b/tests/testthat/test-ComputeFirstDifferences.R index 313ff90b..ac35470f 100644 --- a/tests/testthat/test-ComputeFirstDifferences.R +++ b/tests/testthat/test-ComputeFirstDifferences.R @@ -1,25 +1,27 @@ +library(data.table) + test_that("correctly recognizes wrong variable type for grouping variable", { - df_test <- read.csv("./input/df_test.csv") + dt_test <- data.table::fread("./input/df_test.csv") - expect_error(ComputeFirstDifferences(df_test, country, "periods", "values")) + expect_error(ComputeFirstDifferences(dt_test, country, "periods", "values")) }) test_that("correctly recognizes wrong variable type for diffvar variable", { - df_test <- read.csv("./input/df_test.csv") + dt_test <- data.table::fread("./input/df_test.csv") - expect_error(ComputeFirstDifferences(df_test, "country", "periods", values)) + expect_error(ComputeFirstDifferences(dt_test, "country", "periods", values)) }) test_that("a column with _fd suffix is added", { - df_test <- read.csv("./input/df_test.csv") + dt_test <- data.table::fread("./input/df_test.csv") - df_diff <- ComputeFirstDifferences(df_test, "country", "periods", "values") + df_diff <- ComputeFirstDifferences(dt_test, "country", "periods", "values") expect_true("values_fd" %in% colnames(df_diff)) }) test_that("correctly computes differences with a balanced dataset", { - df <- data.frame( + df <- data.table( id = rep(c("A", "B"), each = 4), time = rep(1:4, times = 2), policy = c(10, 12, 11, 13, 8, 9, 10, 11), @@ -32,7 +34,7 @@ test_that("correctly computes differences with a balanced dataset", { }) test_that("correctly computes differences with a dataset that has holes in time var", { - df <- data.frame( + df <- data.table( id = c(rep("A", 4), rep("B", 2), rep("C", 3)), time = c(1, 2, 4, 5, 2, 3, 2, 3, 4), policy = c(10, 12, 13, 14, 8, 9, 10, 11, 12), diff --git a/tests/testthat/test-ComputeShifts.R b/tests/testthat/test-ComputeShifts.R index 89c93e86..adfca7d6 100644 --- a/tests/testthat/test-ComputeShifts.R +++ b/tests/testthat/test-ComputeShifts.R @@ -23,32 +23,39 @@ test_that("correctly recognizes variables missing from dataset", { expect_error(ComputeShifts(df_test, "state", "periods", "values", 3, F, T)) }) -test_that("correctly adds the desired number of shifts", { +test_that("correctly adds the desired column(s) of shifts", { df_test <- read.csv("./input/df_test.csv") + cols_original <- colnames(df_test) # Add 1 lead/lag - expect_equal(ncol(ComputeShifts(df_test, idvar = "country", timevar = "periods", + dt_test <- data.table::as.data.table(df_test) + cols_new <- colnames(ComputeShifts(dt_test, idvar = "country", timevar = "periods", shiftvar = "values", - shiftvalues = 2)), - ncol(df_test) + 1) - expect_equal(ncol(ComputeShifts(df_test, idvar = "country", timevar = "periods", + shiftvalues = 2)) + expect_equal(cols_new, c(cols_original, "values_lag2")) + + dt_test <- data.table::as.data.table(df_test) + cols_new <- colnames(ComputeShifts(dt_test, idvar = "country", timevar = "periods", shiftvar = "values", - shiftvalues = -2)), - ncol(df_test) + 1) + shiftvalues = -2)) + expect_equal(cols_new, c(cols_original, "values_lead2")) # Add multiple leads/lags - expect_equal(ncol(ComputeShifts(df_test, idvar = "country", timevar = "periods", + dt_test <- data.table::as.data.table(df_test) + cols_new <- colnames(ComputeShifts(dt_test, idvar = "country", timevar = "periods", shiftvar = "values", - shiftvalues = 1:2)), - ncol(df_test) + 2) - expect_equal(ncol(ComputeShifts(df_test, idvar = "country", timevar = "periods", + shiftvalues = 1:2)) + expect_equal(cols_new, c(cols_original, "values_lag1", "values_lag2")) + + dt_test <- data.table::as.data.table(df_test) + cols_new <- colnames(ComputeShifts(dt_test, idvar = "country", timevar = "periods", shiftvar = "values", - shiftvalues = -2:-1)), - ncol(df_test) + 2) + shiftvalues = -2:-1)) + expect_equal(cols_new, c(cols_original, "values_lead2", "values_lead1")) }) test_that("the columns added have correct suffixes", { - df_test <- read.csv("./input/df_test.csv") + df_test <- data.table::fread("./input/df_test.csv") df_lags <- ComputeShifts(df_test, idvar = "country", timevar = "periods", shiftvar = "values", @@ -68,7 +75,7 @@ test_that("the columns added have correct suffixes", { }) test_that("correctly shifts variable when there are no holes in timevar", { - df <- data.frame( + df <- data.table::data.table( id = c(rep("A", 4), rep("B", 2), rep("C", 3)), time = c(1, 2, 3, 4, 1, 2, 1, 2, 3), z = c(10, 12, 13, 14, 8, 9, 10, 11, 12), @@ -89,7 +96,7 @@ test_that("correctly shifts variable when there are no holes in timevar", { }) test_that("correctly shifts variable when there are holes in timevar", { - df <- data.frame( + df <- data.table::data.table( id = c(rep("A", 4), rep("B", 2), rep("C", 3)), time = c(1, 2, 4, 5, 2, 3, 2, 3, 4), z = c(10, 12, 13, 14, 8, 9, 10, 11, 12), diff --git a/tests/testthat/test-EventStudy.R b/tests/testthat/test-EventStudy.R index be8b888f..5c094dd3 100644 --- a/tests/testthat/test-EventStudy.R +++ b/tests/testthat/test-EventStudy.R @@ -1,4 +1,46 @@ +# Housekeeping ------------------------------------------------------------ + +test_that("does not modify input data (even if input is data.table) when avoid_internal_copy = FALSE", { + + example_dt <- data.table::as.data.table(example_data) + example_dt_copy <- data.table::copy(example_dt) + + outputs <- suppressWarnings( + EventStudy( + estimator = "OLS", data = example_dt, outcomevar = "y_base", + policyvar = "z", idvar = "id", timevar = "t", + controls = "x_r", FE = TRUE, TFE = TRUE, + post = 2, pre = 3, overidpre = 4, + overidpost = 11, normalize = - 1, + cluster = TRUE, anticipation_effects_normalization = TRUE) + ) + + expect_true(isTRUE(all.equal(example_dt, example_dt_copy, check.attributes = FALSE))) +}) + +test_that("input dt IS modified in-place when avoid_internal_copy = TRUE", { + + example_dt <- data.table::as.data.table(example_data) + example_dt_copy <- data.table::copy(example_dt) + address_before <- rlang::obj_address(example_dt) + + outputs <- suppressWarnings( + EventStudy( + estimator = "OLS", data = example_dt, outcomevar = "y_base", + policyvar = "z", idvar = "id", timevar = "t", + controls = "x_r", FE = TRUE, TFE = TRUE, + post = 2, pre = 3, overidpre = 4, + overidpost = 11, normalize = - 1, + cluster = TRUE, anticipation_effects_normalization = TRUE, + avoid_internal_copy = TRUE) + ) + address_after <- rlang::obj_address(example_dt) + + expect_equal(address_before, address_after) + expect_true(isFALSE(identical(example_dt, example_dt_copy))) +}) + # OLS --------------------------------------------------------------------- test_that("correctly creates highest order shiftvalues", { @@ -254,42 +296,6 @@ test_that("removes the correct column when normalize = post + overidpost", { expect_true(!normalization_column %in% shiftvalues) }) -test_that("subtraction is peformed on the correct column", { - - post <- 1 - pre <- 1 - overidpre <- 2 - overidpost <- 2 - - df_first_diff <- ComputeFirstDifferences(df = df_sample_static, idvar = "id", timevar = "t", diffvar = "z") - - num_fd_lag_periods <- post + overidpost - 1 - num_fd_lead_periods <- pre + overidpre - - furthest_lag_period <- num_fd_lag_periods + 1 - - df_fd_leads <- ComputeShifts(df_first_diff, idvar = "id", timevar = "t", - shiftvar = paste0("z", "_fd"), shiftvalues = -num_fd_lead_periods:-1) - df_fd_leads_shifted <- ComputeShifts(df_fd_leads, idvar = "id", timevar = "t", - shiftvar = paste0("z", "_fd"), shiftvalues = 1:num_fd_lag_periods) - - df_lag <- ComputeShifts(df_fd_leads_shifted, idvar = "id", timevar = "t", - shiftvar = "z", shiftvalues = furthest_lag_period) - df_lag_lead <- ComputeShifts(df_lag, idvar = "id", timevar = "t", - shiftvar = "z", shiftvalues = -num_fd_lead_periods) - - - col_subtract_1 <- paste0("z", "_lead", num_fd_lead_periods) - df_shift_minus_1 <- 1 - df_lag_lead[col_subtract_1] - - num_equal <- sum(df_shift_minus_1[col_subtract_1] == 1 - df_lag_lead[col_subtract_1], na.rm = TRUE) - num_na <- sum(is.na(df_shift_minus_1[col_subtract_1])) - column_subtract_degree <- as.double(stringr::str_extract(col_subtract_1, "(?<=lead)[0-9]+")) - - expect_equal(num_equal + num_na, nrow(df_lag_lead)) - expect_equal(column_subtract_degree, pre + overidpre) -}) - # FHS --------------------------------------------------------------------- test_that("correctly creates highest order leads and shiftvalues", {