diff --git a/docs/api/models.rst b/docs/api/models.rst
index 7368dec94..e3bb4032c 100644
--- a/docs/api/models.rst
+++ b/docs/api/models.rst
@@ -184,6 +184,7 @@ API Reference
     models/pyhealth.models.SafeDrug
     models/pyhealth.models.MoleRec
     models/pyhealth.models.Deepr
+    models/pyhealth.models.DuETT
     models/pyhealth.models.EHRMamba
     models/pyhealth.models.JambaEHR
     models/pyhealth.models.ContraWR
diff --git a/docs/api/models/pyhealth.models.DuETT.rst b/docs/api/models/pyhealth.models.DuETT.rst
new file mode 100644
index 000000000..0e5c1493a
--- /dev/null
+++ b/docs/api/models/pyhealth.models.DuETT.rst
@@ -0,0 +1,14 @@
+pyhealth.models.DuETT
+===================================
+
+The separate callable DuETTLayer and the complete DuETT model.
+
+.. autoclass:: pyhealth.models.DuETTLayer
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. autoclass:: pyhealth.models.DuETT
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/api/tasks.rst b/docs/api/tasks.rst
index 399b8f1aa..381dcd3be 100644
--- a/docs/api/tasks.rst
+++ b/docs/api/tasks.rst
@@ -212,6 +212,7 @@ Available Tasks
     COVID-19 CXR Classification <tasks/pyhealth.tasks.COVID19CXRClassification>
     DKA Prediction (MIMIC-IV) <tasks/pyhealth.tasks.dka>
     Drug Recommendation <tasks/pyhealth.tasks.drug_recommendation>
+    ICU Mortality DuETT (MIMIC-IV) <tasks/pyhealth.tasks.ICUMortalityDuETTMIMIC4>
     Length of Stay Prediction <tasks/pyhealth.tasks.length_of_stay_prediction>
     Medical Transcriptions Classification <tasks/pyhealth.tasks.MedicalTranscriptionsClassification>
     Mortality Prediction (Next Visit) <tasks/pyhealth.tasks.mortality_prediction>
diff --git a/docs/api/tasks/pyhealth.tasks.ICUMortalityDuETTMIMIC4.rst b/docs/api/tasks/pyhealth.tasks.ICUMortalityDuETTMIMIC4.rst
new file mode 100644
index 000000000..ff87cdd30
--- /dev/null
+++ b/docs/api/tasks/pyhealth.tasks.ICUMortalityDuETTMIMIC4.rst
@@ -0,0 +1,7 @@
+pyhealth.tasks.ICUMortalityDuETTMIMIC4
+=======================================
+
+.. autoclass:: pyhealth.tasks.icu_mortality_duett_mimic4.ICUMortalityDuETTMIMIC4
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/examples/mortality_prediction/mortality_mimic4_duett.py b/examples/mortality_prediction/mortality_mimic4_duett.py
new file mode 100644
index 000000000..2721e37cd
--- /dev/null
+++ b/examples/mortality_prediction/mortality_mimic4_duett.py
@@ -0,0 +1,255 @@
+"""DuETT for ICU Mortality Prediction on MIMIC-IV with Ablation Study.
+
+This example demonstrates:
+1. Loading MIMIC-IV data with the DuETT-specific mortality task
+2. Training the DuETT model for ICU mortality prediction
+3. Running an ablation study over model hyperparameters
+4. Comparing ROC-AUC and PR-AUC across configurations
+
+Paper: Labach et al. 2023. DuETT: Dual Event Time Transformer for
+Electronic Health Records. ML4H 2023, PMLR 219:295-315.
+https://proceedings.mlr.press/v219/labach23a.html
+
+Ablation Study Design:
+    - Primary: Vary d_embedding (64, 128, 256), dropout (0.1, 0.3, 0.5),
+      and layer depth (1x1, 2x2) to compare ROC-AUC and PR-AUC.
+    - Secondary: Vary n_time_bins (12, 24, 48) for preprocessing.
+
+Reported Results (MIMIC-IV 3.1, dev=True ~1000-patient subset, 816
+patient samples; 20 epochs, CPU, lr=1e-4, batch=64):
+
+    Configuration           Params    ROC-AUC    PR-AUC
+    --------------------------------------------------
+    Small (d=64)            111,425    0.5974    0.0312
+    Medium (d=128)          435,841    0.9221    0.1429   <-- best
+    Large (d=256)         1,723,649    0.8052    0.0625
+    Low dropout (0.1)       435,841    0.8312    0.0714
+    High dropout (0.5)      435,841    0.8831    0.1000
+    Deeper (2x2 layers)     832,385    0.7922    0.0588
+
+Findings:
+    - Capacity sweet spot is d=128; d=64 underfits and d=256 overfits
+      with only ~650 training samples.
+    - Default dropout 0.3 outperforms both 0.1 (too weak) and 0.5 (too
+      aggressive) on this subset.
+    - Depth 1x1 is sufficient; 2x2 overfits at this data scale.
+    - Absolute numbers are lower than the paper's full-MIMIC-IV results
+      because this uses the dev=True ~1000-patient subset; the pattern
+      across configurations matches paper-reported capacity behavior.
+
+Usage:
+    # Set the path to your MIMIC-IV root, then run:
+    export MIMIC_ROOT=/path/to/mimic-iv/3.1
+    python mortality_mimic4_duett.py
+
+    # Full-data mode (disable the dev=True subset of ~1000 patients):
+    export MIMIC_DEV=0
+    python mortality_mimic4_duett.py
+
+Notes:
+    - Requires MIMIC-IV access (PhysioNet credentialing).
+    - Use MIMIC-IV demo for testing: physionet.org/content/mimic-iv-demo/2.2/
+    - Designed for GPU (RTX 4060 Ti / Colab T4). CPU works but is slower.
+    - By default runs in dev mode (1000-patient subset) for tractability.
+"""
+
+import os
+
+import torch
+from pyhealth.datasets import (
+    MIMIC4Dataset,
+    get_dataloader,
+    split_by_patient,
+)
+from pyhealth.models import DuETT
+from pyhealth.tasks import ICUMortalityDuETTMIMIC4
+from pyhealth.trainer import Trainer
+
+
+def run_experiment(
+    sample_dataset,
+    train_dataset,
+    val_dataset,
+    test_dataset,
+    config,
+    device="cuda" if torch.cuda.is_available() else "cpu",
+):
+    """Train and evaluate a single DuETT configuration.
+
+    Args:
+        sample_dataset: Full SampleDataset (for model init).
+        train_dataset: Training split.
+        val_dataset: Validation split.
+        test_dataset: Test split.
+        config: Dict with d_embedding, n_event_layers, n_time_layers,
+            n_heads, dropout, fusion_method.
+        device: Compute device.
+
+    Returns:
+        Dict with config name and test metrics.
+    """
+    train_loader = get_dataloader(
+        train_dataset, batch_size=64, shuffle=True
+    )
+    val_loader = get_dataloader(
+        val_dataset, batch_size=64, shuffle=False
+    )
+    test_loader = get_dataloader(
+        test_dataset, batch_size=64, shuffle=False
+    )
+
+    model = DuETT(
+        dataset=sample_dataset,
+        d_embedding=config["d_embedding"],
+        n_event_layers=config["n_event_layers"],
+        n_time_layers=config["n_time_layers"],
+        n_heads=config["n_heads"],
+        dropout=config["dropout"],
+        fusion_method=config.get("fusion_method", "rep_token"),
+    )
+
+    num_params = sum(p.numel() for p in model.parameters())
+    print(f"\n  Config: {config['name']}")
+    print(f"  Parameters: {num_params:,}")
+
+    trainer = Trainer(
+        model=model,
+        device=device,
+        metrics=["pr_auc", "roc_auc"],
+    )
+
+    trainer.train(
+        train_dataloader=train_loader,
+        val_dataloader=val_loader,
+        epochs=20,
+        monitor="roc_auc",
+        optimizer_params={"lr": 1e-4},
+    )
+
+    results = trainer.evaluate(test_loader)
+    results["config"] = config["name"]
+    return results
+
+
+if __name__ == "__main__":
+    # ---- Configuration ----
+    # Set the MIMIC_ROOT environment variable to your MIMIC-IV directory,
+    # or update the default path below.
+    MIMIC_ROOT = os.environ.get("MIMIC_ROOT", "/path/to/mimic-iv/2.2")
+    DEV_MODE = os.environ.get("MIMIC_DEV", "1") == "1"
+    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+    N_TIME_BINS = 24
+    INPUT_WINDOW_HOURS = 48
+
+    print("=" * 60)
+    print("DuETT ICU Mortality Prediction - Ablation Study")
+    print("=" * 60)
+
+    # ---- Step 1: Load MIMIC-IV ----
+    print(f"\n[1/4] Loading MIMIC-IV dataset (dev_mode={DEV_MODE})...")
+    base_dataset = MIMIC4Dataset(
+        ehr_root=MIMIC_ROOT,
+        ehr_tables=["patients", "admissions", "labevents"],
+        dev=DEV_MODE,
+    )
+
+    # ---- Step 2: Apply DuETT mortality task ----
+    print("\n[2/4] Applying DuETT mortality prediction task...")
+    task = ICUMortalityDuETTMIMIC4(
+        n_time_bins=N_TIME_BINS,
+        input_window_hours=INPUT_WINDOW_HOURS,
+    )
+    sample_dataset = base_dataset.set_task(task)
+    print(f"  Total samples: {len(sample_dataset)}")
+
+    # ---- Step 3: Split dataset ----
+    print("\n[3/4] Splitting dataset (80/10/10)...")
+    train_ds, val_ds, test_ds = split_by_patient(
+        sample_dataset, [0.8, 0.1, 0.1]
+    )
+    print(f"  Train: {len(train_ds)}, Val: {len(val_ds)}, "
+          f"Test: {len(test_ds)}")
+
+    # ---- Step 4: Ablation Study ----
+    print("\n[4/4] Running ablation study...")
+
+    # Define ablation configurations
+    configs = [
+        {
+            "name": "Small (d=64)",
+            "d_embedding": 64,
+            "n_event_layers": 1,
+            "n_time_layers": 1,
+            "n_heads": 4,
+            "dropout": 0.3,
+        },
+        {
+            "name": "Medium (d=128)",
+            "d_embedding": 128,
+            "n_event_layers": 1,
+            "n_time_layers": 1,
+            "n_heads": 4,
+            "dropout": 0.3,
+        },
+        {
+            "name": "Large (d=256)",
+            "d_embedding": 256,
+            "n_event_layers": 1,
+            "n_time_layers": 1,
+            "n_heads": 4,
+            "dropout": 0.3,
+        },
+        {
+            "name": "Low dropout (0.1)",
+            "d_embedding": 128,
+            "n_event_layers": 1,
+            "n_time_layers": 1,
+            "n_heads": 4,
+            "dropout": 0.1,
+        },
+        {
+            "name": "High dropout (0.5)",
+            "d_embedding": 128,
+            "n_event_layers": 1,
+            "n_time_layers": 1,
+            "n_heads": 4,
+            "dropout": 0.5,
+        },
+        {
+            "name": "Deeper (2x2 layers)",
+            "d_embedding": 128,
+            "n_event_layers": 2,
+            "n_time_layers": 2,
+            "n_heads": 4,
+            "dropout": 0.3,
+        },
+    ]
+
+    all_results = []
+    for config in configs:
+        try:
+            result = run_experiment(
+                sample_dataset, train_ds, val_ds, test_ds,
+                config, device=DEVICE,
+            )
+            all_results.append(result)
+            print(f"  ROC-AUC: {result.get('roc_auc', 'N/A'):.4f}, "
+                  f"PR-AUC: {result.get('pr_auc', 'N/A'):.4f}")
+        except Exception as e:
+            print(f"  FAILED: {e}")
+            all_results.append({"config": config["name"], "error": str(e)})
+
+    # ---- Print Results Table ----
+    print("\n" + "=" * 60)
+    print("ABLATION RESULTS")
+    print("=" * 60)
+    print(f"{'Configuration':<25} {'ROC-AUC':>10} {'PR-AUC':>10}")
+    print("-" * 45)
+    for r in all_results:
+        if "error" in r:
+            print(f"{r['config']:<25} {'ERROR':>10} {'ERROR':>10}")
+        else:
+            roc = r.get("roc_auc", 0.0)
+            pr = r.get("pr_auc", 0.0)
+            print(f"{r['config']:<25} {roc:>10.4f} {pr:>10.4f}")
+    print("=" * 60)
diff --git a/pyhealth/models/__init__.py b/pyhealth/models/__init__.py
index 5233b1726..f2f665a8f 100644
--- a/pyhealth/models/__init__.py
+++ b/pyhealth/models/__init__.py
@@ -6,6 +6,7 @@
 from .concare import ConCare, ConCareLayer
 from .contrawr import ContraWR, ResBlock2D
 from .deepr import Deepr, DeeprLayer
+from .duett import DuETT, DuETTLayer
 from .embedding import EmbeddingModel
 from .gamenet import GAMENet, GAMENetLayer
 from .jamba_ehr import JambaEHR, JambaLayer
diff --git a/pyhealth/models/duett.py b/pyhealth/models/duett.py
new file mode 100644
index 000000000..f6ec2f6bc
--- /dev/null
+++ b/pyhealth/models/duett.py
@@ -0,0 +1,448 @@
+"""DuETT: Dual Event Time Transformer for Electronic Health Records.
+
+Author: Shubham Srivastava (ss253@illinois.edu)
+Paper: DuETT: Dual Event Time Transformer for Electronic Health Records.
+Paper Link: https://proceedings.mlr.press/v219/labach23a.html
+
+Description:
+    This module implements the DuETT model which treats EHR data as a
+    two-dimensional event-type x time matrix and applies alternating
+    Transformer attention over each axis. The event-axis attention captures
+    inter-variable relationships at each timestep, while the time-axis
+    attention captures temporal dynamics per variable. The model accepts
+    pre-binned tensors where irregular observations have been aggregated
+    into fixed time windows, with observation counts retained per cell to
+    distinguish true zeros from missing entries.
+
+    Reference: Labach, A.; Pokhrel, A.; Huang, X. S.; Zuberi, S.;
+    Yi, S. E.; Volkovs, M.; Poutanen, T.; and Krishnan, R. G. 2023.
+    DuETT: Dual Event Time Transformer for Electronic Health Records.
+    In Proceedings of the 4th Machine Learning for Health Symposium,
+    volume 219 of Proceedings of Machine Learning Research, 295-315.
+"""
+
+from typing import Dict
+
+import torch
+import torch.nn as nn
+
+from pyhealth.datasets import SampleDataset
+from pyhealth.models import BaseModel
+
+
+class DuETTLayer(nn.Module):
+    """Core DuETT encoder with dual-axis attention.
+
+    Applies alternating Transformer attention over the event dimension
+    (cross-variable relationships) and the time dimension (temporal
+    dynamics). Accepts pre-binned event-by-time tensors with separate
+    value and observation count inputs.
+
+    Args:
+        d_time_series: Number of event-type variables (V).
+        d_static: Dimension of static patient features.
+        d_embedding: Hidden dimension for Transformer layers.
+            Default is 128.
+        n_event_layers: Number of event-axis Transformer encoder
+            layers. Default is 1.
+        n_time_layers: Number of time-axis Transformer encoder
+            layers. Default is 1.
+        n_heads: Number of attention heads. Default is 4.
+        dropout: Dropout rate. Default is 0.3.
+        fusion_method: Method for pooling the final representation.
+            One of "rep_token", "averaging", or "masked_embed".
+            Default is "rep_token".
+
+    Examples:
+        >>> layer = DuETTLayer(d_time_series=10, d_static=2)
+        >>> x_values = torch.randn(4, 24, 10)  # (B, T, V)
+        >>> x_counts = torch.ones(4, 24, 10)   # (B, T, V)
+        >>> static = torch.randn(4, 2)          # (B, S)
+        >>> times = torch.linspace(0, 1, 24).unsqueeze(0).expand(4, -1)
+        >>> emb = layer(x_values, x_counts, static, times)
+        >>> emb.shape
+        torch.Size([4, 128])
+    """
+
+    def __init__(
+        self,
+        d_time_series: int,
+        d_static: int,
+        d_embedding: int = 128,
+        n_event_layers: int = 1,
+        n_time_layers: int = 1,
+        n_heads: int = 4,
+        dropout: float = 0.3,
+        fusion_method: str = "rep_token",
+    ):
+        super().__init__()
+
+        self.d_time_series = d_time_series
+        self.d_static = d_static
+        self.d_embedding = d_embedding
+        self.n_event_layers = n_event_layers
+        self.n_time_layers = n_time_layers
+        self.fusion_method = fusion_method
+
+        # Per-variable value embeddings: project each scalar to d_embedding
+        self.value_embeddings = nn.ModuleList(
+            [nn.Linear(1, d_embedding) for _ in range(d_time_series)]
+        )
+
+        # Per-variable count embeddings
+        self.count_embeddings = nn.ModuleList(
+            [nn.Linear(1, d_embedding) for _ in range(d_time_series)]
+        )
+
+        # Static feature encoder
+        if d_static > 0:
+            self.static_encoder = nn.Sequential(
+                nn.Linear(d_static, d_embedding),
+                nn.ReLU(),
+                nn.Dropout(dropout),
+                nn.Linear(d_embedding, d_embedding),
+            )
+        else:
+            self.static_encoder = None
+
+        # Time projection: project scalar bin times to d_embedding
+        self.time_proj = nn.Linear(1, d_embedding)
+
+        # Event-axis Transformer layers
+        event_encoder_layer = nn.TransformerEncoderLayer(
+            d_model=d_embedding,
+            nhead=n_heads,
+            dim_feedforward=d_embedding * 4,
+            dropout=dropout,
+            batch_first=True,
+            norm_first=True,
+        )
+        self.event_transformer = nn.TransformerEncoder(
+            event_encoder_layer, num_layers=n_event_layers
+        )
+
+        # Time-axis Transformer layers
+        time_encoder_layer = nn.TransformerEncoderLayer(
+            d_model=d_embedding,
+            nhead=n_heads,
+            dim_feedforward=d_embedding * 4,
+            dropout=dropout,
+            batch_first=True,
+            norm_first=True,
+        )
+        self.time_transformer = nn.TransformerEncoder(
+            time_encoder_layer, num_layers=n_time_layers
+        )
+
+        # Representation token for pooling
+        if fusion_method == "rep_token":
+            self.rep_token = nn.Parameter(
+                torch.randn(1, 1, d_embedding) * 0.02
+            )
+
+        # Layer norm before output
+        self.output_norm = nn.LayerNorm(d_embedding)
+
+    def forward(
+        self,
+        x_values: torch.Tensor,
+        x_counts: torch.Tensor,
+        static: torch.Tensor,
+        times: torch.Tensor,
+    ) -> torch.Tensor:
+        """Forward pass of the DuETT encoder.
+
+        Args:
+            x_values: Binned time-series values of shape (B, T, V).
+            x_counts: Observation counts of shape (B, T, V).
+            static: Static patient features of shape (B, S).
+            times: Bin endpoint times of shape (B, T).
+
+        Returns:
+            Patient embedding tensor of shape (B, d_embedding).
+        """
+        B, T, V = x_values.shape
+
+        # Per-variable embedding: project each variable independently
+        var_embeddings = []
+        for v in range(V):
+            val_emb = self.value_embeddings[v](
+                x_values[:, :, v : v + 1]
+            )  # (B, T, D)
+            cnt_emb = self.count_embeddings[v](
+                x_counts[:, :, v : v + 1]
+            )  # (B, T, D)
+            var_embeddings.append(val_emb + cnt_emb)
+
+        # Stack: (B, T, V, D)
+        x = torch.stack(var_embeddings, dim=2)
+
+        # Add time-based positional encoding using actual bin times
+        time_emb = self.time_proj(
+            times.unsqueeze(-1)
+        )  # (B, T, D)
+        x = x + time_emb.unsqueeze(2)  # broadcast across V
+
+        # Fuse static features (broadcast across T and V)
+        if self.static_encoder is not None:
+            static_emb = self.static_encoder(static)  # (B, D)
+            x = x + static_emb.unsqueeze(1).unsqueeze(2)
+
+        # Dual-axis attention: event then time
+        # Event-axis attention: attend across variables at each timestep
+        # Reshape (B, T, V, D) -> (B*T, V, D)
+        x = x.reshape(B * T, V, -1)
+
+        if self.fusion_method == "rep_token":
+            # Prepend rep token along variable dimension
+            rep = self.rep_token.expand(B * T, -1, -1)  # (B*T, 1, D)
+            x = torch.cat([rep, x], dim=1)  # (B*T, V+1, D)
+
+        x = self.event_transformer(x)  # (B*T, V(+1), D)
+
+        if self.fusion_method == "rep_token":
+            # Extract rep token and variable embeddings separately
+            rep_out = x[:, 0, :]  # (B*T, D)
+            x = x[:, 1:, :]  # (B*T, V, D)
+
+        # Reshape back: (B, T, V, D)
+        x = x.reshape(B, T, V, -1)
+
+        # Time-axis attention: attend across timesteps for each variable
+        # Reshape (B, T, V, D) -> (B*V, T, D)
+        x = x.permute(0, 2, 1, 3).reshape(B * V, T, -1)
+        x = self.time_transformer(x)  # (B*V, T, D)
+
+        # Reshape back: (B, V, T, D)
+        x = x.reshape(B, V, T, -1)
+
+        # Pooling to (B, D)
+        if self.fusion_method == "rep_token":
+            # Use rep token output, averaged over time
+            rep_out = rep_out.reshape(B, T, -1)  # (B, T, D)
+            patient_emb = rep_out.mean(dim=1)  # (B, D)
+        elif self.fusion_method == "averaging":
+            # Average over both V and T
+            patient_emb = x.mean(dim=(1, 2))  # (B, D)
+        elif self.fusion_method == "masked_embed":
+            # Weight by observation counts
+            # Sum counts across variables for each timestep
+            count_weights = x_counts.sum(dim=2)  # (B, T)
+            count_weights = count_weights / (
+                count_weights.sum(dim=1, keepdim=True) + 1e-8
+            )
+            # Average over variables, weighted average over time
+            x_var_avg = x.mean(dim=1)  # (B, T, D)
+            patient_emb = (
+                x_var_avg * count_weights.unsqueeze(-1)
+            ).sum(dim=1)  # (B, D)
+        else:
+            raise ValueError(
+                f"Unknown fusion method: {self.fusion_method}"
+            )
+
+        patient_emb = self.output_norm(patient_emb)
+        return patient_emb
+
+
+class DuETT(BaseModel):
+    """DuETT model for clinical prediction from EHR time series.
+
+    DuETT (Dual Event Time Transformer) models electronic health records
+    along two explicit axes: event type and time, using alternating
+    attention over each. It accepts pre-binned event-by-time tensors where
+    irregular observations have been aggregated into fixed time windows.
+
+    This model does NOT use EmbeddingModel because DuETT requires
+    per-variable linear projections, which is integral to its
+    architecture.
+
+    Args:
+        dataset: The SampleDataset used to train the model.
+        ts_values_key: Key for binned time-series values in the sample
+            dict. Default is "ts_values".
+        ts_counts_key: Key for observation counts in the sample dict.
+            Default is "ts_counts".
+        static_key: Key for static patient features. Default is "static".
+        times_key: Key for bin endpoint times. Default is "times".
+        d_embedding: Hidden dimension for Transformer layers.
+            Default is 128.
+        n_event_layers: Number of event-axis Transformer layers.
+            Default is 1.
+        n_time_layers: Number of time-axis Transformer layers.
+            Default is 1.
+        n_heads: Number of attention heads. Default is 4.
+        dropout: Dropout rate. Default is 0.3.
+        fusion_method: Pooling method. One of "rep_token", "averaging",
+            or "masked_embed". Default is "rep_token".
+
+    Examples:
+        >>> from pyhealth.datasets import create_sample_dataset
+        >>> samples = [
+        ...     {
+        ...         "patient_id": "p0",
+        ...         "ts_values": [[0.5, 0.3], [0.1, 0.0]],
+        ...         "ts_counts": [[1.0, 1.0], [1.0, 0.0]],
+        ...         "static": [0.65, 1.0],
+        ...         "times": [0.5, 1.0],
+        ...         "mortality": 0,
+        ...     },
+        ...     {
+        ...         "patient_id": "p1",
+        ...         "ts_values": [[0.8, 0.2], [0.4, 0.6]],
+        ...         "ts_counts": [[1.0, 1.0], [1.0, 1.0]],
+        ...         "static": [0.45, 0.0],
+        ...         "times": [0.5, 1.0],
+        ...         "mortality": 1,
+        ...     },
+        ... ]
+        >>> dataset = create_sample_dataset(
+        ...     samples=samples,
+        ...     input_schema={
+        ...         "ts_values": "tensor",
+        ...         "ts_counts": "tensor",
+        ...         "static": "tensor",
+        ...         "times": "tensor",
+        ...     },
+        ...     output_schema={"mortality": "binary"},
+        ...     dataset_name="test",
+        ... )
+        >>> model = DuETT(dataset=dataset, d_embedding=64)
+        >>> from pyhealth.datasets import get_dataloader
+        >>> loader = get_dataloader(dataset, batch_size=2)
+        >>> batch = next(iter(loader))
+        >>> output = model(**batch)
+        >>> output["y_prob"].shape
+        torch.Size([2, 1])
+
+    Note:
+        Paper: Labach et al. 2023. DuETT: Dual Event Time Transformer
+        for Electronic Health Records. ML4H 2023, PMLR 219:295-315.
+    """
+
+    def __init__(
+        self,
+        dataset: SampleDataset,
+        ts_values_key: str = "ts_values",
+        ts_counts_key: str = "ts_counts",
+        static_key: str = "static",
+        times_key: str = "times",
+        d_embedding: int = 128,
+        n_event_layers: int = 1,
+        n_time_layers: int = 1,
+        n_heads: int = 4,
+        dropout: float = 0.3,
+        fusion_method: str = "rep_token",
+    ):
+        super().__init__(dataset=dataset)
+
+        self.ts_values_key = ts_values_key
+        self.ts_counts_key = ts_counts_key
+        self.static_key = static_key
+        self.times_key = times_key
+        self.d_embedding = d_embedding
+        self.n_event_layers = n_event_layers
+        self.n_time_layers = n_time_layers
+        self.n_heads = n_heads
+        self.dropout = dropout
+        self.fusion_method = fusion_method
+
+        assert (
+            len(self.label_keys) == 1
+        ), "DuETT supports a single label key"
+        self.label_key = self.label_keys[0]
+
+        # Determine dimensions from the dataset
+        first_sample = dataset[0]
+        ts_sample = first_sample[ts_values_key]
+        if isinstance(ts_sample, torch.Tensor):
+            d_time_series = ts_sample.shape[-1]
+        else:
+            d_time_series = len(ts_sample[0]) if ts_sample else 1
+
+        static_sample = first_sample[static_key]
+        if isinstance(static_sample, torch.Tensor):
+            d_static = static_sample.shape[-1]
+        else:
+            d_static = len(static_sample) if static_sample else 0
+
+        self.d_time_series = d_time_series
+        self.d_static = d_static
+
+        # Core DuETT encoder
+        self.duett_layer = DuETTLayer(
+            d_time_series=d_time_series,
+            d_static=d_static,
+            d_embedding=d_embedding,
+            n_event_layers=n_event_layers,
+            n_time_layers=n_time_layers,
+            n_heads=n_heads,
+            dropout=dropout,
+            fusion_method=fusion_method,
+        )
+
+        # Classification head
+        output_size = self.get_output_size()
+        self.fc = nn.Sequential(
+            nn.Linear(d_embedding, d_embedding),
+            nn.ReLU(),
+            nn.Dropout(dropout),
+            nn.Linear(d_embedding, output_size),
+        )
+
+    def forward(
+        self, **kwargs: torch.Tensor
+    ) -> Dict[str, torch.Tensor]:
+        """Forward pass of the DuETT model.
+
+        Args:
+            **kwargs: Keyword arguments containing input tensors keyed
+                by ts_values_key, ts_counts_key, static_key, times_key,
+                and the label key.
+
+        Returns:
+            Dict with keys "loss", "y_prob", "y_true", "logit", and
+            optionally "embed".
+        """
+        ts_values = kwargs[self.ts_values_key].float().to(self.device)
+        ts_counts = kwargs[self.ts_counts_key].float().to(self.device)
+        static = kwargs[self.static_key].float().to(self.device)
+        times = kwargs[self.times_key].float().to(self.device)
+
+        # Ensure 2D tensors are expanded to 3D if needed
+        if ts_values.dim() == 2:
+            ts_values = ts_values.unsqueeze(-1)
+            ts_counts = ts_counts.unsqueeze(-1)
+
+        # Get patient embedding from DuETT encoder
+        patient_emb = self.duett_layer(
+            ts_values, ts_counts, static, times
+        )
+
+        # Classification
+        logits = self.fc(patient_emb)
+
+        # Compute loss and probabilities
+        y_true = kwargs[self.label_key].to(self.device)
+        loss_fn = self.get_loss_function()
+
+        if self.mode == "multiclass":
+            # cross_entropy expects (N,) long targets
+            loss = loss_fn(logits, y_true.long())
+        else:
+            # binary/multilabel expect matching shapes
+            if y_true.dim() == 1 and logits.dim() == 2:
+                y_true = y_true.unsqueeze(-1).float()
+            loss = loss_fn(logits, y_true)
+        y_prob = self.prepare_y_prob(logits)
+
+        results = {
+            "loss": loss,
+            "y_prob": y_prob,
+            "y_true": y_true,
+            "logit": logits,
+        }
+        if kwargs.get("embed", False):
+            results["embed"] = patient_emb
+
+        return results
diff --git a/pyhealth/tasks/__init__.py b/pyhealth/tasks/__init__.py
index 797988377..e6226ab51 100644
--- a/pyhealth/tasks/__init__.py
+++ b/pyhealth/tasks/__init__.py
@@ -21,6 +21,7 @@
     drug_recommendation_mimic4_fn,
     drug_recommendation_omop_fn,
 )
+from .icu_mortality_duett_mimic4 import ICUMortalityDuETTMIMIC4
 from .in_hospital_mortality_mimic4 import InHospitalMortalityMIMIC4
 from .length_of_stay_prediction import (
     LengthOfStayPredictioneICU,
diff --git a/pyhealth/tasks/icu_mortality_duett_mimic4.py b/pyhealth/tasks/icu_mortality_duett_mimic4.py
new file mode 100644
index 000000000..9cf68fb61
--- /dev/null
+++ b/pyhealth/tasks/icu_mortality_duett_mimic4.py
@@ -0,0 +1,330 @@
+"""ICU Mortality Prediction Task for DuETT using MIMIC-IV.
+
+Author: Shubham Srivastava (ss253@illinois.edu)
+
+Description:
+    This task converts irregular MIMIC-IV lab events into the fixed
+    event-by-time tensor format required by DuETT. Observations are
+    binned into uniform time windows within a configurable observation
+    period after admission. Each cell stores the mean lab value and
+    the observation count, allowing the model to distinguish true zeros
+    from missing entries.
+
+    The task produces patient-level samples with ICU mortality labels
+    derived from the hospital_expire_flag field in MIMIC-IV admissions.
+"""
+
+from datetime import datetime, timedelta
+from typing import Any, ClassVar, Dict, List, Optional, Tuple
+
+import polars as pl
+
+from .base_task import BaseTask
+
+
+class ICUMortalityDuETTMIMIC4(BaseTask):
+    """Task for ICU mortality prediction using DuETT format on MIMIC-IV.
+
+    Converts irregular lab events into a fixed event-by-time tensor by
+    binning into uniform time windows. Produces two tensors per sample:
+    binned mean values and observation counts, plus static features
+    (age, sex) and bin endpoint times.
+
+    Args:
+        n_time_bins: Number of uniform time bins within the observation
+            window. Default is 24.
+        input_window_hours: Hours after admission to observe.
+            Default is 48.
+
+    Attributes:
+        task_name (str): Name of the task.
+        input_schema (Dict[str, str]): Schema for input data.
+        output_schema (Dict[str, str]): Schema for output data.
+
+    Examples:
+        >>> from pyhealth.datasets import MIMIC4EHRDataset
+        >>> from pyhealth.tasks import ICUMortalityDuETTMIMIC4
+        >>> dataset = MIMIC4EHRDataset(
+        ...     root="/path/to/mimic-iv/2.2",
+        ...     tables=["patients", "admissions", "labevents"],
+        ... )
+        >>> task = ICUMortalityDuETTMIMIC4(n_time_bins=24)
+        >>> samples = dataset.set_task(task)
+    """
+
+    task_name: str = "ICUMortalityDuETTMIMIC4"
+
+    input_schema: ClassVar[Dict[str, str]] = {
+        "ts_values": "tensor",
+        "ts_counts": "tensor",
+        "static": "tensor",
+        "times": "tensor",
+    }
+    output_schema: ClassVar[Dict[str, str]] = {"mortality": "binary"}
+
+    # 10 lab categories matching existing MIMIC-IV mortality tasks
+    LAB_CATEGORIES: ClassVar[Dict[str, List[str]]] = {
+        "Sodium": ["50824", "52455", "50983", "52623"],
+        "Potassium": ["50822", "52452", "50971", "52610"],
+        "Chloride": ["50806", "52434", "50902", "52535"],
+        "Bicarbonate": ["50803", "50804"],
+        "Glucose": ["50809", "52027", "50931", "52569"],
+        "Calcium": ["50808", "51624"],
+        "Magnesium": ["50960"],
+        "Anion Gap": ["50868", "52500"],
+        "Osmolality": ["52031", "50964", "51701"],
+        "Phosphate": ["50970"],
+    }
+
+    LAB_CATEGORY_NAMES: ClassVar[List[str]] = [
+        "Sodium",
+        "Potassium",
+        "Chloride",
+        "Bicarbonate",
+        "Glucose",
+        "Calcium",
+        "Magnesium",
+        "Anion Gap",
+        "Osmolality",
+        "Phosphate",
+    ]
+
+    LABITEMS: ClassVar[List[str]] = [
+        item
+        for itemids in LAB_CATEGORIES.values()
+        for item in itemids
+    ]
+
+    # Map each itemid to its category index for fast lookup
+    _ITEMID_TO_CAT_IDX: ClassVar[Dict[str, int]] = {}
+    for _idx, _cat in enumerate(LAB_CATEGORY_NAMES):
+        for _itemid in LAB_CATEGORIES[_cat]:
+            _ITEMID_TO_CAT_IDX[_itemid] = _idx
+    del _idx, _cat, _itemid  # clean up loop variables
+
+    D_VARS: ClassVar[int] = len(LAB_CATEGORY_NAMES)  # 10
+
+    def __init__(
+        self,
+        n_time_bins: int = 24,
+        input_window_hours: int = 48,
+    ):
+        """Initialize the task.
+
+        Args:
+            n_time_bins: Number of uniform time bins. Default is 24.
+            input_window_hours: Observation window in hours after
+                admission. Default is 48.
+        """
+        self.n_time_bins = n_time_bins
+        self.input_window_hours = input_window_hours
+
+    def __call__(self, patient: Any) -> List[Dict[str, Any]]:
+        """Process a patient to create DuETT-format mortality samples.
+
+        Creates one sample per qualifying admission. Bins lab events
+        into a fixed event-by-time tensor with observation counts.
+
+        Args:
+            patient: Patient object with get_events method.
+
+        Returns:
+            List of sample dicts with ts_values, ts_counts, static,
+            times, and mortality label.
+        """
+        # Filter by age >= 18
+        demographics = patient.get_events(event_type="patients")
+        if not demographics:
+            return []
+
+        demographics = demographics[0]
+        try:
+            anchor_age = int(demographics.anchor_age)
+            if anchor_age < 18:
+                return []
+        except (ValueError, TypeError, AttributeError):
+            return []
+
+        # Determine gender for static features
+        try:
+            gender = demographics.gender
+            gender_val = 1.0 if gender == "M" else 0.0
+        except AttributeError:
+            gender_val = 0.0
+
+        admissions = patient.get_events(event_type="admissions")
+        if not admissions:
+            return []
+
+        samples = []
+        window_td = timedelta(hours=self.input_window_hours)
+
+        for admission in admissions:
+            try:
+                admission_time = admission.timestamp
+                dischtime = datetime.strptime(
+                    admission.dischtime, "%Y-%m-%d %H:%M:%S"
+                )
+            except (ValueError, AttributeError):
+                continue
+
+            if dischtime < admission_time:
+                continue
+
+            # Require admission longer than observation window
+            duration_hours = (
+                dischtime - admission_time
+            ).total_seconds() / 3600.0
+            if duration_hours < self.input_window_hours:
+                continue
+
+            # Get mortality label
+            try:
+                mortality = int(admission.hospital_expire_flag)
+            except (ValueError, TypeError, AttributeError):
+                mortality = 0
+
+            # Get lab events within observation window
+            window_end = admission_time + window_td
+            labevents_df = patient.get_events(
+                event_type="labevents",
+                start=admission_time,
+                end=window_end,
+                return_df=True,
+            )
+
+            # Filter to relevant lab items
+            labevents_df = labevents_df.filter(
+                pl.col("labevents/itemid").is_in(self.LABITEMS)
+            )
+
+            if labevents_df.height == 0:
+                continue
+
+            # Parse storetime
+            labevents_df = labevents_df.with_columns(
+                pl.col("labevents/storetime").str.strptime(
+                    pl.Datetime, "%Y-%m-%d %H:%M:%S"
+                )
+            )
+            labevents_df = labevents_df.filter(
+                pl.col("labevents/storetime") <= window_end
+            )
+
+            if labevents_df.height == 0:
+                continue
+
+            # Build event-by-time tensor via binning
+            ts_values, ts_counts = self._bin_observations(
+                labevents_df, admission_time
+            )
+
+            # Compute bin endpoint times in fractional days
+            bin_duration_days = (
+                self.input_window_hours / self.n_time_bins / 24.0
+            )
+            times = [
+                (b + 1) * bin_duration_days
+                for b in range(self.n_time_bins)
+            ]
+
+            # Static features: normalized age + binary sex
+            static = [anchor_age / 100.0, gender_val]
+
+            samples.append(
+                {
+                    "patient_id": patient.patient_id,
+                    "ts_values": ts_values,
+                    "ts_counts": ts_counts,
+                    "static": static,
+                    "times": times,
+                    "mortality": mortality,
+                }
+            )
+
+        return samples
+
+    def _bin_observations(
+        self,
+        labevents_df: pl.DataFrame,
+        admission_time: datetime,
+    ) -> Tuple[List[List[float]], List[List[float]]]:
+        """Bin lab events into a fixed event-by-time tensor.
+
+        Args:
+            labevents_df: Filtered lab events DataFrame with itemid and
+                valuenum columns.
+            admission_time: Admission timestamp used as the reference
+                point for time-bin offset calculation.
+
+        Returns:
+            A tuple ``(ts_values, ts_counts)``:
+
+            - ``ts_values``: Nested list of shape ``(n_time_bins, D_VARS)``
+              containing the mean lab value within each (bin, variable)
+              cell. Cells with zero observations contain ``0.0``.
+            - ``ts_counts``: Nested list of shape ``(n_time_bins, D_VARS)``
+              containing the number of observations that fell into each
+              (bin, variable) cell, preserving the distinction between
+              truly zero values and missing measurements.
+        """
+        n_bins = self.n_time_bins
+        d_vars = self.D_VARS
+        window_seconds = self.input_window_hours * 3600.0
+
+        # Accumulators: sum and count per (bin, variable)
+        sums = [[0.0] * d_vars for _ in range(n_bins)]
+        counts = [[0.0] * d_vars for _ in range(n_bins)]
+
+        # Iterate through lab events
+        for row in labevents_df.iter_rows(named=True):
+            itemid = row["labevents/itemid"]
+            valuenum = row.get("labevents/valuenum")
+
+            if itemid not in self._ITEMID_TO_CAT_IDX:
+                continue
+            if valuenum is None:
+                continue
+
+            try:
+                value = float(valuenum)
+            except (ValueError, TypeError):
+                continue
+
+            # Compute time offset from admission
+            event_time = row["timestamp"]
+            offset_seconds = (
+                event_time - admission_time
+            ).total_seconds()
+            if offset_seconds < 0 or offset_seconds >= window_seconds:
+                continue
+
+            # Determine bin index
+            bin_idx = int(
+                offset_seconds / window_seconds * n_bins
+            )
+            bin_idx = min(bin_idx, n_bins - 1)
+
+            # Get category index
+            cat_idx = self._ITEMID_TO_CAT_IDX[itemid]
+
+            sums[bin_idx][cat_idx] += value
+            counts[bin_idx][cat_idx] += 1.0
+
+        # Compute means (zero-impute where count is 0)
+        ts_values = []
+        ts_counts = []
+        for b in range(n_bins):
+            row_vals = []
+            row_cnts = []
+            for v in range(d_vars):
+                cnt = counts[b][v]
+                if cnt > 0:
+                    row_vals.append(sums[b][v] / cnt)
+                else:
+                    row_vals.append(0.0)
+                row_cnts.append(cnt)
+            ts_values.append(row_vals)
+            ts_counts.append(row_cnts)
+
+        return ts_values, ts_counts
diff --git a/tests/core/test_duett.py b/tests/core/test_duett.py
new file mode 100644
index 000000000..02464df4e
--- /dev/null
+++ b/tests/core/test_duett.py
@@ -0,0 +1,332 @@
+"""Test cases for the DuETT model.
+
+Author: Shubham Srivastava (ss253@illinois.edu)
+
+Description:
+    Unit tests for the DuETT model implementation. Tests cover model
+    initialization, forward pass, backward pass, embedding extraction,
+    various fusion methods, and custom hyperparameters. All tests use
+    synthetic data and complete in milliseconds.
+"""
+
+import unittest
+
+import torch
+
+from pyhealth.datasets import create_sample_dataset, get_dataloader
+from pyhealth.models.duett import DuETT, DuETTLayer
+
+
+class TestDuETT(unittest.TestCase):
+    """Test cases for the DuETT model."""
+
+    def setUp(self):
+        """Set up test data and model."""
+        # Synthetic samples: T=4 time bins, V=5 variables, S=2 static
+        self.samples = [
+            {
+                "patient_id": "patient-0",
+                "ts_values": [
+                    [0.5, 0.3, 0.0, 0.8, 0.1],
+                    [0.2, 0.0, 0.4, 0.7, 0.3],
+                    [0.0, 0.6, 0.1, 0.0, 0.5],
+                    [0.9, 0.2, 0.3, 0.4, 0.0],
+                ],
+                "ts_counts": [
+                    [1.0, 1.0, 0.0, 2.0, 1.0],
+                    [1.0, 0.0, 1.0, 1.0, 1.0],
+                    [0.0, 2.0, 1.0, 0.0, 1.0],
+                    [3.0, 1.0, 1.0, 1.0, 0.0],
+                ],
+                "static": [0.65, 1.0],
+                "times": [0.25, 0.5, 0.75, 1.0],
+                "mortality": 1,
+            },
+            {
+                "patient_id": "patient-1",
+                "ts_values": [
+                    [0.1, 0.7, 0.2, 0.0, 0.4],
+                    [0.3, 0.5, 0.0, 0.6, 0.2],
+                    [0.8, 0.0, 0.5, 0.3, 0.1],
+                    [0.4, 0.1, 0.7, 0.9, 0.6],
+                ],
+                "ts_counts": [
+                    [1.0, 2.0, 1.0, 0.0, 1.0],
+                    [1.0, 1.0, 0.0, 1.0, 1.0],
+                    [2.0, 0.0, 1.0, 1.0, 1.0],
+                    [1.0, 1.0, 2.0, 1.0, 1.0],
+                ],
+                "static": [0.45, 0.0],
+                "times": [0.25, 0.5, 0.75, 1.0],
+                "mortality": 0,
+            },
+            {
+                "patient_id": "patient-2",
+                "ts_values": [
+                    [0.3, 0.4, 0.1, 0.5, 0.2],
+                    [0.6, 0.2, 0.3, 0.1, 0.7],
+                    [0.1, 0.8, 0.0, 0.4, 0.3],
+                    [0.7, 0.3, 0.5, 0.2, 0.1],
+                ],
+                "ts_counts": [
+                    [1.0, 1.0, 1.0, 1.0, 1.0],
+                    [2.0, 1.0, 1.0, 1.0, 2.0],
+                    [1.0, 1.0, 0.0, 1.0, 1.0],
+                    [1.0, 1.0, 1.0, 1.0, 1.0],
+                ],
+                "static": [0.72, 1.0],
+                "times": [0.25, 0.5, 0.75, 1.0],
+                "mortality": 1,
+            },
+            {
+                "patient_id": "patient-3",
+                "ts_values": [
+                    [0.2, 0.1, 0.6, 0.3, 0.8],
+                    [0.5, 0.4, 0.2, 0.7, 0.1],
+                    [0.4, 0.3, 0.8, 0.1, 0.5],
+                    [0.1, 0.6, 0.4, 0.5, 0.3],
+                ],
+                "ts_counts": [
+                    [1.0, 1.0, 2.0, 1.0, 1.0],
+                    [1.0, 1.0, 1.0, 2.0, 1.0],
+                    [1.0, 1.0, 1.0, 1.0, 1.0],
+                    [1.0, 1.0, 1.0, 1.0, 1.0],
+                ],
+                "static": [0.55, 0.0],
+                "times": [0.25, 0.5, 0.75, 1.0],
+                "mortality": 0,
+            },
+        ]
+
+        self.input_schema = {
+            "ts_values": "tensor",
+            "ts_counts": "tensor",
+            "static": "tensor",
+            "times": "tensor",
+        }
+        self.output_schema = {"mortality": "binary"}
+
+        self.dataset = create_sample_dataset(
+            samples=self.samples,
+            input_schema=self.input_schema,
+            output_schema=self.output_schema,
+            dataset_name="test_duett",
+        )
+
+        self.model = DuETT(
+            dataset=self.dataset,
+            d_embedding=16,
+            n_event_layers=1,
+            n_time_layers=1,
+            n_heads=2,
+            dropout=0.1,
+        )
+
+    def test_model_initialization(self):
+        """Test that the DuETT model initializes correctly."""
+        self.assertIsInstance(self.model, DuETT)
+        self.assertEqual(self.model.d_embedding, 16)
+        self.assertEqual(self.model.d_time_series, 5)
+        self.assertEqual(self.model.d_static, 2)
+        self.assertEqual(self.model.n_event_layers, 1)
+        self.assertEqual(self.model.n_time_layers, 1)
+        self.assertEqual(self.model.label_key, "mortality")
+        self.assertEqual(self.model.fusion_method, "rep_token")
+
+    def test_model_forward(self):
+        """Test that the forward pass produces correct output keys."""
+        loader = get_dataloader(self.dataset, batch_size=2, shuffle=False)
+        batch = next(iter(loader))
+
+        with torch.no_grad():
+            ret = self.model(**batch)
+
+        self.assertIn("loss", ret)
+        self.assertIn("y_prob", ret)
+        self.assertIn("y_true", ret)
+        self.assertIn("logit", ret)
+
+        self.assertEqual(ret["y_prob"].shape[0], 2)
+        self.assertEqual(ret["y_true"].shape[0], 2)
+        self.assertEqual(ret["logit"].shape[0], 2)
+        self.assertEqual(ret["loss"].dim(), 0)
+
+    def test_model_backward(self):
+        """Test that gradients flow correctly through the model."""
+        loader = get_dataloader(self.dataset, batch_size=2, shuffle=False)
+        batch = next(iter(loader))
+
+        ret = self.model(**batch)
+        ret["loss"].backward()
+
+        has_gradient = any(
+            p.requires_grad and p.grad is not None
+            for p in self.model.parameters()
+        )
+        self.assertTrue(
+            has_gradient,
+            "No parameters have gradients after backward pass",
+        )
+
+    def test_model_with_embedding(self):
+        """Test that embed=True returns patient embeddings."""
+        loader = get_dataloader(self.dataset, batch_size=2, shuffle=False)
+        batch = next(iter(loader))
+        batch["embed"] = True
+
+        with torch.no_grad():
+            ret = self.model(**batch)
+
+        self.assertIn("embed", ret)
+        self.assertEqual(ret["embed"].shape[0], 2)
+        self.assertEqual(ret["embed"].shape[1], 16)
+
+    def test_custom_hyperparameters(self):
+        """Test DuETT with different hyperparameter configurations."""
+        model = DuETT(
+            dataset=self.dataset,
+            d_embedding=32,
+            n_event_layers=2,
+            n_time_layers=2,
+            n_heads=4,
+            dropout=0.5,
+        )
+
+        self.assertEqual(model.d_embedding, 32)
+        self.assertEqual(model.n_event_layers, 2)
+        self.assertEqual(model.n_time_layers, 2)
+
+        loader = get_dataloader(self.dataset, batch_size=2, shuffle=False)
+        batch = next(iter(loader))
+
+        with torch.no_grad():
+            ret = model(**batch)
+
+        self.assertIn("loss", ret)
+        self.assertIn("y_prob", ret)
+
+    def test_fusion_rep_token(self):
+        """Test DuETT with rep_token fusion method."""
+        model = DuETT(
+            dataset=self.dataset,
+            d_embedding=16,
+            n_heads=2,
+            fusion_method="rep_token",
+        )
+        loader = get_dataloader(self.dataset, batch_size=2, shuffle=False)
+        batch = next(iter(loader))
+
+        with torch.no_grad():
+            ret = model(**batch)
+
+        self.assertIn("loss", ret)
+        self.assertEqual(ret["y_prob"].shape[0], 2)
+
+    def test_fusion_averaging(self):
+        """Test DuETT with averaging fusion method."""
+        model = DuETT(
+            dataset=self.dataset,
+            d_embedding=16,
+            n_heads=2,
+            fusion_method="averaging",
+        )
+        loader = get_dataloader(self.dataset, batch_size=2, shuffle=False)
+        batch = next(iter(loader))
+
+        with torch.no_grad():
+            ret = model(**batch)
+
+        self.assertIn("loss", ret)
+        self.assertEqual(ret["y_prob"].shape[0], 2)
+
+    def test_fusion_masked_embed(self):
+        """Test DuETT with masked_embed fusion method."""
+        model = DuETT(
+            dataset=self.dataset,
+            d_embedding=16,
+            n_heads=2,
+            fusion_method="masked_embed",
+        )
+        loader = get_dataloader(self.dataset, batch_size=2, shuffle=False)
+        batch = next(iter(loader))
+
+        with torch.no_grad():
+            ret = model(**batch)
+
+        self.assertIn("loss", ret)
+        self.assertEqual(ret["y_prob"].shape[0], 2)
+
+    def test_duett_layer_standalone(self):
+        """Test DuETTLayer independently from the BaseModel wrapper."""
+        layer = DuETTLayer(
+            d_time_series=5,
+            d_static=2,
+            d_embedding=16,
+            n_event_layers=1,
+            n_time_layers=1,
+            n_heads=2,
+            dropout=0.0,
+        )
+
+        x_values = torch.randn(2, 4, 5)
+        x_counts = torch.ones(2, 4, 5)
+        static = torch.randn(2, 2)
+        times = torch.linspace(0, 1, 4).unsqueeze(0).expand(2, -1)
+
+        emb = layer(x_values, x_counts, static, times)
+
+        self.assertEqual(emb.shape, (2, 16))
+        self.assertFalse(torch.isnan(emb).any())
+
+    def test_multiclass_classification(self):
+        """Test DuETT with multiclass classification."""
+        samples = [
+            {
+                "patient_id": f"patient-{i}",
+                "ts_values": [[0.5, 0.3], [0.1, 0.4]],
+                "ts_counts": [[1.0, 1.0], [1.0, 1.0]],
+                "static": [0.5, 1.0],
+                "times": [0.5, 1.0],
+                "label": i % 3,
+            }
+            for i in range(4)
+        ]
+        dataset = create_sample_dataset(
+            samples=samples,
+            input_schema={
+                "ts_values": "tensor",
+                "ts_counts": "tensor",
+                "static": "tensor",
+                "times": "tensor",
+            },
+            output_schema={"label": "multiclass"},
+            dataset_name="test_multiclass",
+        )
+
+        model = DuETT(
+            dataset=dataset,
+            d_embedding=16,
+            n_heads=2,
+        )
+        loader = get_dataloader(dataset, batch_size=2, shuffle=False)
+        batch = next(iter(loader))
+
+        with torch.no_grad():
+            ret = model(**batch)
+
+        self.assertEqual(ret["y_prob"].shape[1], 3)
+
+    def test_loss_finite(self):
+        """Test that loss is finite and not NaN."""
+        loader = get_dataloader(self.dataset, batch_size=4, shuffle=False)
+        batch = next(iter(loader))
+
+        with torch.no_grad():
+            ret = self.model(**batch)
+
+        self.assertFalse(torch.isnan(ret["loss"]))
+        self.assertTrue(torch.isfinite(ret["loss"]))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/core/test_icu_mortality_duett_mimic4.py b/tests/core/test_icu_mortality_duett_mimic4.py
new file mode 100644
index 000000000..38e9a4ad4
--- /dev/null
+++ b/tests/core/test_icu_mortality_duett_mimic4.py
@@ -0,0 +1,165 @@
+"""Test cases for the ICUMortalityDuETTMIMIC4 task.
+
+Author: Shubham Srivastava (ss253@illinois.edu)
+
+Description:
+    Unit tests for the DuETT mortality prediction task. Tests cover
+    task instantiation, schema validation, and binning logic.
+"""
+
+import unittest
+
+from pyhealth.tasks.icu_mortality_duett_mimic4 import (
+    ICUMortalityDuETTMIMIC4,
+)
+
+
+class TestICUMortalityDuETTMIMIC4(unittest.TestCase):
+    """Test cases for the ICUMortalityDuETTMIMIC4 task."""
+
+    def test_task_instantiation_defaults(self):
+        """Test task initializes with default parameters."""
+        task = ICUMortalityDuETTMIMIC4()
+
+        self.assertEqual(task.task_name, "ICUMortalityDuETTMIMIC4")
+        self.assertEqual(task.n_time_bins, 24)
+        self.assertEqual(task.input_window_hours, 48)
+
+    def test_task_instantiation_custom(self):
+        """Test task initializes with custom parameters."""
+        task = ICUMortalityDuETTMIMIC4(
+            n_time_bins=12, input_window_hours=24
+        )
+
+        self.assertEqual(task.n_time_bins, 12)
+        self.assertEqual(task.input_window_hours, 24)
+
+    def test_input_schema(self):
+        """Test that input schema has the correct keys and types."""
+        task = ICUMortalityDuETTMIMIC4()
+
+        self.assertIn("ts_values", task.input_schema)
+        self.assertIn("ts_counts", task.input_schema)
+        self.assertIn("static", task.input_schema)
+        self.assertIn("times", task.input_schema)
+
+        for key in task.input_schema:
+            self.assertEqual(task.input_schema[key], "tensor")
+
+    def test_output_schema(self):
+        """Test that output schema has the correct key and type."""
+        task = ICUMortalityDuETTMIMIC4()
+
+        self.assertIn("mortality", task.output_schema)
+        self.assertEqual(task.output_schema["mortality"], "binary")
+
+    def test_lab_categories(self):
+        """Test that lab categories are properly defined."""
+        task = ICUMortalityDuETTMIMIC4()
+
+        self.assertEqual(len(task.LAB_CATEGORY_NAMES), 10)
+        self.assertEqual(task.D_VARS, 10)
+        self.assertIn("Sodium", task.LAB_CATEGORY_NAMES)
+        self.assertIn("Glucose", task.LAB_CATEGORY_NAMES)
+
+        # All itemids should map to a category index
+        for itemid, idx in task._ITEMID_TO_CAT_IDX.items():
+            self.assertGreaterEqual(idx, 0)
+            self.assertLess(idx, 10)
+
+    def test_binning_output_shape(self):
+        """Test that _bin_observations produces correct shapes."""
+        import polars as pl
+        from datetime import datetime
+
+        task = ICUMortalityDuETTMIMIC4(
+            n_time_bins=4, input_window_hours=8
+        )
+
+        admission_time = datetime(2023, 1, 1, 0, 0, 0)
+
+        # Create a minimal labevents DataFrame
+        df = pl.DataFrame(
+            {
+                "timestamp": [
+                    datetime(2023, 1, 1, 1, 0, 0),
+                    datetime(2023, 1, 1, 3, 0, 0),
+                    datetime(2023, 1, 1, 5, 0, 0),
+                ],
+                "labevents/itemid": ["50983", "50971", "50902"],
+                "labevents/valuenum": [140.0, 4.2, 102.0],
+                "labevents/storetime": [
+                    "2023-01-01 01:00:00",
+                    "2023-01-01 03:00:00",
+                    "2023-01-01 05:00:00",
+                ],
+            }
+        )
+
+        ts_values, ts_counts = task._bin_observations(
+            df, admission_time
+        )
+
+        # Should be (n_time_bins, D_VARS) = (4, 10)
+        self.assertEqual(len(ts_values), 4)
+        self.assertEqual(len(ts_values[0]), 10)
+        self.assertEqual(len(ts_counts), 4)
+        self.assertEqual(len(ts_counts[0]), 10)
+
+    def test_binning_values_correct(self):
+        """Test that binning produces correct values."""
+        import polars as pl
+        from datetime import datetime
+
+        task = ICUMortalityDuETTMIMIC4(
+            n_time_bins=2, input_window_hours=4
+        )
+
+        admission_time = datetime(2023, 1, 1, 0, 0, 0)
+
+        # Sodium (idx 0): two events in first bin
+        # 50983 is Sodium itemid
+        df = pl.DataFrame(
+            {
+                "timestamp": [
+                    datetime(2023, 1, 1, 0, 30, 0),
+                    datetime(2023, 1, 1, 1, 30, 0),
+                ],
+                "labevents/itemid": ["50983", "50983"],
+                "labevents/valuenum": [140.0, 142.0],
+                "labevents/storetime": [
+                    "2023-01-01 00:30:00",
+                    "2023-01-01 01:30:00",
+                ],
+            }
+        )
+
+        ts_values, ts_counts = task._bin_observations(
+            df, admission_time
+        )
+
+        # Sodium is category index 0
+        # Both events are in bin 0 (first half of 4-hour window)
+        self.assertAlmostEqual(ts_values[0][0], 141.0)  # mean
+        self.assertEqual(ts_counts[0][0], 2.0)
+
+        # Bin 1 should be zero-imputed
+        self.assertEqual(ts_values[1][0], 0.0)
+        self.assertEqual(ts_counts[1][0], 0.0)
+
+    def test_empty_patient_returns_empty(self):
+        """Test that a patient with no demographics returns empty."""
+        task = ICUMortalityDuETTMIMIC4()
+
+        class MockPatient:
+            patient_id = "test"
+
+            def get_events(self, **kwargs):
+                return []
+
+        result = task(MockPatient())
+        self.assertEqual(result, [])
+
+
+if __name__ == "__main__":
+    unittest.main()