diff --git a/.gitignore b/.gitignore
index a3c4e964..85e31240 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,7 +23,6 @@
 !data/*.RData
 !data/*.rda
 *.csv
-
-
 inst/doc
 revdep/
+*.Rproj
diff --git a/DESCRIPTION b/DESCRIPTION
index 0927c386..32b7fea2 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: eventstudyr
 Title: Estimation and Visualization of Linear Panel Event Studies
-Version: 1.1.4
+Version: 1.1.5
 Authors@R: 
     c(person(given = "Simon",
            family = "Freyaldenhoven",
diff --git a/NAMESPACE b/NAMESPACE
index be899643..7867b6ac 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -13,7 +13,8 @@ importFrom(data.table,":=")
 importFrom(data.table,.SD)
 importFrom(data.table,CJ)
 importFrom(data.table,as.data.table)
-importFrom(data.table,setDT)
+importFrom(data.table,copy)
+importFrom(data.table,is.data.table)
 importFrom(data.table,setnames)
 importFrom(data.table,setorder)
 importFrom(data.table,setorderv)
diff --git a/R/AddMeans.R b/R/AddMeans.R
index ed62b06e..d50e11b9 100644
--- a/R/AddMeans.R
+++ b/R/AddMeans.R
@@ -19,12 +19,11 @@ AddMeans <- function(df, normalization_column, policyvar, outcomevar,
 
     if (is.null(custom_scalar)) {
         if (grepl(paste0(policyvar, "_lead"), normalization_column)) {
-            df_change <- df[df[, normalization_column] == 0 & !is.na(df[, normalization_column]), ]
-        }else {
-            df_change <- df[df[, normalization_column] != 0 & !is.na(df[, normalization_column]), ]
+            df_change <- df[get(normalization_column) == 0 & !is.na(get(normalization_column))]
+        } else {
+            df_change <- df[get(normalization_column) != 0 & !is.na(get(normalization_column))]
         }
-        mean <- mean(df_change[[outcomevar]], na.rm = T)
-
+        mean <- df_change[, mean(get(outcomevar), na.rm = TRUE)]
     } else {
         mean <- custom_scalar
     }
diff --git a/R/ComputeFirstDifferences.R b/R/ComputeFirstDifferences.R
index 2ebeb5eb..d2fc51ab 100644
--- a/R/ComputeFirstDifferences.R
+++ b/R/ComputeFirstDifferences.R
@@ -1,11 +1,10 @@
 #' Adds first differences of a variable, robustly to gaps in time variable, as new columns in a panel dataset
 #'
-#' @param df Data frame that will be modified.
+#' @param dt Data frame that will be modified.
 #' @param idvar Character indicating column of units.
 #' @param timevar Character indicating column of time periods.
 #' @param diffvar Character indicating column of variable whose first difference will be taken.
 #' @param timevar_holes Logical indicating whether the panel contains gaps in the time variable. Defaults to FALSE.
-#' @param return_df Logical indicating whether the function should return a data frame (TRUE) or data.table (FALSE). Defaults to TRUE.
 #'
 #' @return The passed dataset augmented with columns that reflect the desired first differences
 #'
@@ -19,59 +18,51 @@
 #'     diffvar = "z"
 #' )
 #'
-#' @importFrom data.table setDT setorderv setnames shift := CJ .SD
+#' @importFrom data.table setorderv setnames shift := CJ .SD
 #'
 #' @keywords internal
 #' @noRd
 
-ComputeFirstDifferences <- function(df, idvar, timevar, diffvar,
-                                    timevar_holes = FALSE, return_df = TRUE) {
-    if (! is.data.frame(df)) {
-        stop("df should be a data frame.")
+ComputeFirstDifferences <- function(dt, idvar, timevar, diffvar,
+                                    timevar_holes = FALSE) {
+    if (! data.table::is.data.table(dt)) {
+        stop("Input data should be a data.table.")
     }
     for (var in c(idvar, timevar, diffvar)) {
         if ((! is.character(var))) {
             stop(paste0(var, " should be a character."))
         }
-        if (! var %in% colnames(df)) {
+        if (! var %in% colnames(dt)) {
             stop(paste0(var, " should be the name of a variable in the dataset."))
         }
     }
     if (! is.logical(timevar_holes)) {
         stop("timevar_holes should be logical.")
     }
-    if (! is.logical(return_df)) {
-        stop("return_df should be logical.")
-    }
 
-    data.table::setDT(df)
-    data.table::setorderv(df, cols = c(idvar, timevar))
+    data.table::setorderv(dt, cols = c(idvar, timevar))
 
     if (!timevar_holes) {
-        df[, paste0(diffvar, "_fd") := get(diffvar) - shift((get(diffvar))),
+        dt[, paste0(diffvar, "_fd") := get(diffvar) - data.table::shift((get(diffvar))),
            by = idvar]
     } else {
         ## Create dataset with all combinations to compute first differences
-        all_combinations <- CJ(unique(df[[idvar]]),
-                               min(df[[timevar]]):max(df[[timevar]]))
-        setnames(all_combinations, new = c(idvar, timevar))
+        all_combinations <- data.table::CJ(unique(dt[[idvar]]),
+                                           min(dt[[timevar]]):max(dt[[timevar]]))
+        data.table::setnames(all_combinations, new = c(idvar, timevar))
 
-        df_all <- merge(df, all_combinations,
-                        by = c(idvar, timevar), all = TRUE)
+        dt_all <- data.table::merge.data.table(dt, all_combinations,
+                                               by = c(idvar, timevar), all = TRUE)
 
-        df_all[, paste0(diffvar, "_fd") := get(diffvar) - shift((get(diffvar))),
+        dt_all[, paste0(diffvar, "_fd") := get(diffvar) - data.table::shift((get(diffvar))),
                 by = idvar]
 
         ## Bring first differences back to the original dataset
         vars_to_keep <- c(idvar, timevar, paste0(diffvar, "_fd"))
 
-        df <- merge(df, df_all[, .SD, .SDcols = vars_to_keep],
-                    by = c(idvar, timevar), all.x = TRUE)
-    }
-
-    if (return_df) {
-        df <- as.data.frame(df)
+        dt <- data.table::merge.data.table(dt, dt_all[, .SD, .SDcols = vars_to_keep],
+                                           by = c(idvar, timevar), all.x = TRUE)
     }
 
-    return(df)
+    return(dt)
 }
diff --git a/R/ComputeShifts.R b/R/ComputeShifts.R
index d3de31fe..0b07a154 100644
--- a/R/ComputeShifts.R
+++ b/R/ComputeShifts.R
@@ -6,7 +6,6 @@
 #' @param shiftvar Character indicating column of variable that will be shifted.
 #' @param shiftvalues Numeric vector specifying the leads/lags to be computed. For example, c(-1, 0, 1) will compute the lead, current, and lag values.
 #' @param timevar_holes Logical indicating whether the panel contains gaps in the time variable. Defaults to FALSE.
-#' @param return_df Logical indicating whether the function should return a data frame (TRUE) or data.table (FALSE). Defaults to TRUE.
 #'
 #' @return The passed dataset augmented with columns that reflect the desired shift values
 
@@ -22,14 +21,14 @@
 #' )
 #'
 #'
-#' @importFrom data.table setDT setorderv shift := CJ .SD
+#' @importFrom data.table setorderv shift := CJ .SD
 #' @keywords internal
 #' @noRd
 
 ComputeShifts <- function(df, idvar, timevar, shiftvar, shiftvalues,
-                          timevar_holes = FALSE, return_df = TRUE) {
-    if (! is.data.frame(df)) {
-        stop("df should be a data frame.")
+                          timevar_holes = FALSE) {
+    if (! data.table::is.data.table(df)) {
+        stop("df should be a data.table.")
     }
     for (var in c(idvar, timevar, shiftvar)) {
         if ((! is.character(var))) {
@@ -48,11 +47,7 @@ ComputeShifts <- function(df, idvar, timevar, shiftvar, shiftvalues,
     if (! is.logical(timevar_holes)) {
         stop("timevar_holes should be logical.")
     }
-    if (! is.logical(return_df)) {
-        stop("return_df should be logical.")
-    }
 
-    data.table::setDT(df)
     data.table::setorderv(df, cols = c(idvar, timevar))
 
     lags  <-    shiftvalues[shiftvalues >  0]
@@ -92,10 +87,5 @@ ComputeShifts <- function(df, idvar, timevar, shiftvar, shiftvalues,
         df <- merge(df, df_all,
                     by = c(idvar, timevar, shiftvar), all.x = TRUE)
     }
-
-    if (return_df) {
-        df <- as.data.frame(df)
-    }
-
     return(df)
 }
diff --git a/R/EventStudy.R b/R/EventStudy.R
index 7e40a7e9..9d26f5b4 100644
--- a/R/EventStudy.R
+++ b/R/EventStudy.R
@@ -32,12 +32,14 @@
 #' @param normalize Specifies the event-time coefficient to be normalized. Defaults to - pre - 1.
 #' @param anticipation_effects_normalization If set to TRUE, runs the default process and switches coefficient to be normalized to 0
 #' when there are anticipation effects. If set to FALSE, does not make the switch. Defaults to TRUE.
+#' @param allow_duplicate_id If TRUE, the function estimates a regression where duplicated ID-time rows are weighted by their duplication count. If FALSE, the function raises an error if duplicate unit-time keys exist in the input data. Default is FALSE.
+#' @param avoid_internal_copy If TRUE, the function avoids making an internal deep copy of the input data, and instead directly modifies the input data.table. Default is FALSE.
 #'
 #' @return A list that contains, under "output", the estimation output as an lm_robust object, and under "arguments", the arguments passed to the function.
 #' @import dplyr
 #' @import estimatr
 #' @importFrom stats reformulate
-#' @importFrom data.table setorderv as.data.table .SD
+#' @importFrom data.table setorderv as.data.table is.data.table .SD copy
 #' @export
 #'
 #' @examples
@@ -142,15 +144,20 @@
 
 EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, controls = NULL,
                        proxy = NULL, proxyIV = NULL, FE = TRUE, TFE = TRUE, post, overidpost = 1, pre, overidpre = post + pre,
-                       normalize = -1 * (pre + 1), cluster = TRUE, anticipation_effects_normalization = TRUE) {
+                       normalize = -1 * (pre + 1), cluster = TRUE, anticipation_effects_normalization = TRUE,
+                       allow_duplicate_id = FALSE, avoid_internal_copy = FALSE) {
 
     # Check for errors in arguments
     if (! estimator %in% c("OLS", "FHS")) {stop("estimator should be either 'OLS' or 'FHS'.")}
     if (! is.data.frame(data))            {stop("data should be a data frame.")}
-    if (! is.character(outcomevar))       {stop("outcomevar should be a character.")}
-    if (! is.character(policyvar))        {stop("policyvar should be a character.")}
-    if (! is.character(idvar))            {stop("idvar should be a character.")}
-    if (! is.character(timevar))          {stop("timevar should be a character.")}
+    for (var in c(idvar, timevar, outcomevar, policyvar)) {
+        if ((! is.character(var))) {
+            stop(paste0(var, " should be a character."))
+        }
+        if (! var %in% colnames(data)) {
+            stop(paste0(var, " should be the name of a variable in the dataset."))
+        }
+    }
     if (! (is.null(controls) | is.character(controls))) {stop("controls should be either NULL or a character.")}
 
     if ((estimator == "OLS" & ! is.null(proxy)))        {stop("proxy should only be specified when estimator = 'FHS'.")}
@@ -162,11 +169,13 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c
         stop("When estimator is 'FHS' and there are no leads in the model, proxyIV must be specified explicitly.")
     }
 
-    if (! is.logical(FE))      {stop("FE should be either TRUE or FALSE.")}
-    if (! is.logical(TFE))     {stop("TFE should be either TRUE or FALSE.")}
-    if (! is.logical(cluster)) {stop("cluster should be either TRUE or FALSE.")}
+    for (var in c(FE, TFE, cluster, anticipation_effects_normalization, allow_duplicate_id, avoid_internal_copy)) {
+        if (! is.logical(var)) {
+            stop(paste0(var, " should be either TRUE or FALSE."))
+        }
+    }
+
     if (FE & !cluster)         {stop("cluster=TRUE is required when FE=TRUE.")}
-    if (! is.logical(anticipation_effects_normalization)) {stop("anticipation_effects_normalization should be either TRUE or FALSE.")}
 
     if (! (is.numeric(post)       &  post >= 0      &  post %% 1 == 0))           {stop("post should be a whole number.")}
     if (! (is.numeric(overidpost) & overidpost >= 0 & overidpost %% 1 == 0))      {stop("overidpost should be a whole number.")}
@@ -177,6 +186,9 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c
            & normalize >= -(pre + overidpre + 1) & normalize <= post + overidpost)) {
         stop("normalize should be an integer between -(pre + overidpre + 1) and (post + overidpost).")
     }
+    if (avoid_internal_copy & ! data.table::is.data.table(data)) {
+        warning("`avoid_internal_copy` has no effect because dataset passed to `data` is not a `data.table`.")
+    }
 
     # Check for errors in data
     if (! is.numeric(data[[timevar]])) {stop("timevar column in dataset should be numeric.")}
@@ -184,8 +196,17 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c
         stop("timevar column in dataset should be a vector of integers.")
     }
 
-    data_ids <- as.data.frame(data)[, c(idvar, timevar)]
+    if (data.table::is.data.table(data)) {
+        if (!avoid_internal_copy) {
+            data <- data.table::copy(data)
+        }
+    } else {
+        data <- data.table::as.data.table(data)
+    }
+    data.table::setorderv(data, c(idvar, timevar))
+    data_ids <- data[, .SD, .SDcols = c(idvar, timevar)]
 
+    # Check panel balance and unique keys
     n_units       <- length(base::unique(data[[idvar]]))
     n_periods     <- length(base::unique(data[[timevar]]))
     n_unique_rows <- nrow(data[!base::duplicated(data_ids),])
@@ -195,11 +216,15 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c
     } else {
         unbalanced <- FALSE
     }
-
-    data.table::setorderv(data, c(idvar, timevar))
+    if (n_unique_rows != nrow(data)) {
+        if (allow_duplicate_id == TRUE) {
+            warning("idvar-timevar pairs do not uniquely identify all rows in the data.")
+        } else if (allow_duplicate_id == FALSE) {
+            stop("idvar-timevar pairs do not uniquely identify all rows in the data. Turn on allow_duplicate_id if you want to proceed with weighted duplicated rows.")
+        }
+    }
 
     detect_holes <- function(dt, idvar, timevar) {
-        dt <- data.table::as.data.table(dt)
         holes_per_id <- dt[, .SD[!is.na(base::get(timevar))], by = c(idvar)
                          ][, list(holes = any(base::diff(base::get(timevar)) != 1)),
                             by = c(idvar)]
@@ -264,7 +289,7 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c
                               timevar_holes = timevar_holes)
 
         lead_endpoint_var <- paste0(policyvar, "_lead", num_fd_leads)
-        data[lead_endpoint_var] <- 1 - data[lead_endpoint_var]
+        data[, (lead_endpoint_var) := 1 - get(lead_endpoint_var)]
     }
 
     if (pre != 0 & normalize == -1 & anticipation_effects_normalization) {
diff --git a/man/EventStudy.Rd b/man/EventStudy.Rd
index 115de017..6cb118ca 100644
--- a/man/EventStudy.Rd
+++ b/man/EventStudy.Rd
@@ -22,7 +22,9 @@ EventStudy(
   overidpre = post + pre,
   normalize = -1 * (pre + 1),
   cluster = TRUE,
-  anticipation_effects_normalization = TRUE
+  anticipation_effects_normalization = TRUE,
+  allow_duplicate_id = FALSE,
+  avoid_internal_copy = FALSE
 )
 }
 \arguments{
@@ -73,6 +75,10 @@ Defaults to TRUE. Must be TRUE if FE is TRUE.}
 
 \item{anticipation_effects_normalization}{If set to TRUE, runs the default process and switches coefficient to be normalized to 0
 when there are anticipation effects. If set to FALSE, does not make the switch. Defaults to TRUE.}
+
+\item{allow_duplicate_id}{If TRUE, the function estimates a regression where duplicated ID-time rows are weighted by their duplication count. If FALSE, the function raises an error if duplicate unit-time keys exist in the input data. Default is FALSE.}
+
+\item{avoid_internal_copy}{If TRUE, the function avoids making an internal deep copy of the input data, and instead directly modifies the input data.table. Default is FALSE.}
 }
 \value{
 A list that contains, under "output", the estimation output as an lm_robust object, and under "arguments", the arguments passed to the function.
diff --git a/tests/testthat/test-AddCIs.R b/tests/testthat/test-AddCIs.R
index 9f53b5c1..52ddcbdd 100644
--- a/tests/testthat/test-AddCIs.R
+++ b/tests/testthat/test-AddCIs.R
@@ -1,5 +1,5 @@
 test_that("correctly calculates conf_level at 0.95", {
-    df_test <- as.data.frame(data.table::fread("./input/df_test_AddCI.csv"))
+    df_test <- data.table::fread("./input/df_test_AddCI.csv")
 
     policyvar <- "z"
     eventstudy_coefficients <- c("z_fd_lag1", "z_fd_lead1")
diff --git a/tests/testthat/test-AddMeans.R b/tests/testthat/test-AddMeans.R
index b6d7e947..b19d6e56 100644
--- a/tests/testthat/test-AddMeans.R
+++ b/tests/testthat/test-AddMeans.R
@@ -13,8 +13,7 @@ test_that("means are computed correctly when a first differenced variable is nor
                               sample_estimation[[2]]$policyvar,
                               sample_estimation[[2]]$outcomevar)
 
-    mean_manual   <- mean(df_estimation[df_estimation[,"z_fd_lead1"] != 0, ]$y_base,
-                          na.rm = T)
+    mean_manual <- df_estimation[z_fd_lead1 != 0, mean(y_base, na.rm = TRUE)]
 
     expect_equal(mean_function, mean_manual)
 
@@ -33,8 +32,7 @@ test_that("means are computed correctly when the furthest lead is normalized", {
                               sample_estimation[[2]]$policyvar,
                               sample_estimation[[2]]$outcomevar)
 
-    mean_manual   <- mean(df_estimation[df_estimation[,"z_lead4"] == 0, ]$y_base,
-                          na.rm = T)
+    mean_manual <- df_estimation[z_lead4 == 0, mean(y_base, na.rm = TRUE)]
 
     expect_equal(mean_function, mean_manual)
 })
diff --git a/tests/testthat/test-ComputeFirstDifferences.R b/tests/testthat/test-ComputeFirstDifferences.R
index 313ff90b..ac35470f 100644
--- a/tests/testthat/test-ComputeFirstDifferences.R
+++ b/tests/testthat/test-ComputeFirstDifferences.R
@@ -1,25 +1,27 @@
+library(data.table)
+
 test_that("correctly recognizes wrong variable type for grouping variable", {
-    df_test <- read.csv("./input/df_test.csv")
+    dt_test <- data.table::fread("./input/df_test.csv")
 
-    expect_error(ComputeFirstDifferences(df_test, country, "periods", "values"))
+    expect_error(ComputeFirstDifferences(dt_test, country, "periods", "values"))
 })
 
 test_that("correctly recognizes wrong variable type for diffvar variable", {
-    df_test <- read.csv("./input/df_test.csv")
+    dt_test <- data.table::fread("./input/df_test.csv")
 
-    expect_error(ComputeFirstDifferences(df_test, "country", "periods", values))
+    expect_error(ComputeFirstDifferences(dt_test, "country", "periods", values))
 })
 
 test_that("a column with _fd suffix is added", {
-    df_test <- read.csv("./input/df_test.csv")
+    dt_test <- data.table::fread("./input/df_test.csv")
 
-    df_diff <- ComputeFirstDifferences(df_test, "country", "periods", "values")
+    df_diff <- ComputeFirstDifferences(dt_test, "country", "periods", "values")
 
     expect_true("values_fd" %in% colnames(df_diff))
 })
 
 test_that("correctly computes differences with a balanced dataset", {
-    df <- data.frame(
+    df <- data.table(
         id     = rep(c("A", "B"), each = 4),
         time   = rep(1:4, times = 2),
         policy = c(10, 12, 11, 13, 8, 9, 10, 11),
@@ -32,7 +34,7 @@ test_that("correctly computes differences with a balanced dataset", {
 })
 
 test_that("correctly computes differences with a dataset that has holes in time var", {
-    df <- data.frame(
+    df <- data.table(
         id = c(rep("A", 4), rep("B", 2), rep("C", 3)),
         time = c(1, 2, 4, 5, 2, 3, 2, 3, 4),
         policy = c(10, 12, 13, 14, 8, 9, 10, 11, 12),
diff --git a/tests/testthat/test-ComputeShifts.R b/tests/testthat/test-ComputeShifts.R
index 89c93e86..adfca7d6 100644
--- a/tests/testthat/test-ComputeShifts.R
+++ b/tests/testthat/test-ComputeShifts.R
@@ -23,32 +23,39 @@ test_that("correctly recognizes variables missing from dataset", {
     expect_error(ComputeShifts(df_test, "state",  "periods", "values", 3, F, T))
 })
 
-test_that("correctly adds the desired number of shifts", {
+test_that("correctly adds the desired column(s) of shifts", {
     df_test <- read.csv("./input/df_test.csv")
+    cols_original <- colnames(df_test)
 
     # Add 1 lead/lag
-    expect_equal(ncol(ComputeShifts(df_test, idvar = "country", timevar = "periods",
+    dt_test <- data.table::as.data.table(df_test)
+    cols_new <- colnames(ComputeShifts(dt_test, idvar = "country", timevar = "periods",
                                     shiftvar = "values",
-                                    shiftvalues = 2)),
-                 ncol(df_test) + 1)
-    expect_equal(ncol(ComputeShifts(df_test, idvar = "country", timevar = "periods",
+                                    shiftvalues = 2))
+    expect_equal(cols_new, c(cols_original, "values_lag2"))
+
+    dt_test <- data.table::as.data.table(df_test)
+    cols_new <- colnames(ComputeShifts(dt_test, idvar = "country", timevar = "periods",
                                     shiftvar = "values",
-                                    shiftvalues = -2)),
-                 ncol(df_test) + 1)
+                                    shiftvalues = -2))
+    expect_equal(cols_new, c(cols_original, "values_lead2"))
 
     # Add multiple leads/lags
-    expect_equal(ncol(ComputeShifts(df_test, idvar = "country", timevar = "periods",
+    dt_test <- data.table::as.data.table(df_test)
+    cols_new <- colnames(ComputeShifts(dt_test, idvar = "country", timevar = "periods",
                                     shiftvar = "values",
-                                    shiftvalues = 1:2)),
-                 ncol(df_test) + 2)
-    expect_equal(ncol(ComputeShifts(df_test, idvar = "country", timevar = "periods",
+                                    shiftvalues = 1:2))
+    expect_equal(cols_new, c(cols_original, "values_lag1", "values_lag2"))
+
+    dt_test <- data.table::as.data.table(df_test)
+    cols_new <- colnames(ComputeShifts(dt_test, idvar = "country", timevar = "periods",
                                     shiftvar = "values",
-                                    shiftvalues = -2:-1)),
-                 ncol(df_test) + 2)
+                                    shiftvalues = -2:-1))
+    expect_equal(cols_new, c(cols_original, "values_lead2", "values_lead1"))
 })
 
 test_that("the columns added have correct suffixes", {
-    df_test <- read.csv("./input/df_test.csv")
+    df_test <- data.table::fread("./input/df_test.csv")
 
     df_lags <- ComputeShifts(df_test, idvar = "country", timevar = "periods",
                              shiftvar = "values",
@@ -68,7 +75,7 @@ test_that("the columns added have correct suffixes", {
 })
 
 test_that("correctly shifts variable when there are no holes in timevar", {
-    df <- data.frame(
+    df <- data.table::data.table(
         id      = c(rep("A", 4), rep("B", 2), rep("C", 3)),
         time    = c(1, 2, 3, 4, 1, 2, 1, 2, 3),
         z       = c(10, 12, 13, 14, 8, 9, 10, 11, 12),
@@ -89,7 +96,7 @@ test_that("correctly shifts variable when there are no holes in timevar", {
 })
 
 test_that("correctly shifts variable when there are holes in timevar", {
-    df <- data.frame(
+    df <- data.table::data.table(
         id      = c(rep("A", 4), rep("B", 2), rep("C", 3)),
         time    = c(1, 2, 4, 5, 2, 3, 2, 3, 4),
         z       = c(10, 12, 13, 14, 8, 9, 10, 11, 12),
diff --git a/tests/testthat/test-EventStudy.R b/tests/testthat/test-EventStudy.R
index be8b888f..5c094dd3 100644
--- a/tests/testthat/test-EventStudy.R
+++ b/tests/testthat/test-EventStudy.R
@@ -1,4 +1,46 @@
 
+# Housekeeping ------------------------------------------------------------
+
+test_that("does not modify input data (even if input is data.table) when avoid_internal_copy = FALSE", {
+
+    example_dt <- data.table::as.data.table(example_data)
+    example_dt_copy <- data.table::copy(example_dt)
+
+    outputs <- suppressWarnings(
+        EventStudy(
+            estimator = "OLS", data = example_dt, outcomevar = "y_base",
+                          policyvar = "z", idvar = "id", timevar = "t",
+                          controls = "x_r", FE = TRUE, TFE = TRUE,
+                          post = 2, pre = 3, overidpre = 4,
+                          overidpost = 11, normalize = - 1,
+                          cluster = TRUE, anticipation_effects_normalization = TRUE)
+    )
+
+    expect_true(isTRUE(all.equal(example_dt, example_dt_copy, check.attributes = FALSE)))
+})
+
+test_that("input dt IS modified in-place when avoid_internal_copy = TRUE", {
+
+    example_dt <- data.table::as.data.table(example_data)
+    example_dt_copy <- data.table::copy(example_dt)
+    address_before  <- rlang::obj_address(example_dt)
+
+    outputs <- suppressWarnings(
+        EventStudy(
+            estimator = "OLS", data = example_dt, outcomevar = "y_base",
+            policyvar = "z", idvar = "id", timevar = "t",
+            controls = "x_r", FE = TRUE, TFE = TRUE,
+            post = 2, pre = 3, overidpre = 4,
+            overidpost = 11, normalize = - 1,
+            cluster = TRUE, anticipation_effects_normalization = TRUE,
+            avoid_internal_copy = TRUE)
+    )
+    address_after <- rlang::obj_address(example_dt)
+
+    expect_equal(address_before, address_after)
+    expect_true(isFALSE(identical(example_dt, example_dt_copy)))
+})
+
 # OLS ---------------------------------------------------------------------
 
 test_that("correctly creates highest order shiftvalues", {
@@ -254,42 +296,6 @@ test_that("removes the correct column when normalize = post + overidpost", {
     expect_true(!normalization_column %in% shiftvalues)
 })
 
-test_that("subtraction is peformed on the correct column", {
-
-    post       <- 1
-    pre        <- 1
-    overidpre  <- 2
-    overidpost <- 2
-
-    df_first_diff <- ComputeFirstDifferences(df = df_sample_static, idvar = "id", timevar = "t", diffvar = "z")
-
-    num_fd_lag_periods   <- post + overidpost - 1
-    num_fd_lead_periods  <- pre + overidpre
-
-    furthest_lag_period <- num_fd_lag_periods + 1
-
-    df_fd_leads         <- ComputeShifts(df_first_diff, idvar = "id", timevar = "t",
-                                         shiftvar = paste0("z", "_fd"), shiftvalues = -num_fd_lead_periods:-1)
-    df_fd_leads_shifted <- ComputeShifts(df_fd_leads, idvar = "id", timevar = "t",
-                                         shiftvar = paste0("z", "_fd"), shiftvalues = 1:num_fd_lag_periods)
-
-    df_lag           <- ComputeShifts(df_fd_leads_shifted, idvar = "id", timevar = "t",
-                                      shiftvar = "z", shiftvalues = furthest_lag_period)
-    df_lag_lead      <- ComputeShifts(df_lag, idvar = "id", timevar = "t",
-                                      shiftvar = "z", shiftvalues = -num_fd_lead_periods)
-
-
-    col_subtract_1   <- paste0("z", "_lead", num_fd_lead_periods)
-    df_shift_minus_1 <- 1 - df_lag_lead[col_subtract_1]
-
-    num_equal <- sum(df_shift_minus_1[col_subtract_1] == 1 - df_lag_lead[col_subtract_1], na.rm = TRUE)
-    num_na    <- sum(is.na(df_shift_minus_1[col_subtract_1]))
-    column_subtract_degree <- as.double(stringr::str_extract(col_subtract_1, "(?<=lead)[0-9]+"))
-
-    expect_equal(num_equal + num_na, nrow(df_lag_lead))
-    expect_equal(column_subtract_degree, pre + overidpre)
-})
-
 # FHS ---------------------------------------------------------------------
 
 test_that("correctly creates highest order leads and shiftvalues", {