From 9301acc1fdd9c85a208e2c9ba86d0d1ab6b65e73 Mon Sep 17 00:00:00 2001
From: Kevin <kevintrancs@gmail.com>
Date: Sun, 19 Apr 2026 15:57:30 -0700
Subject: [PATCH 1/2] init after rebase and squashing previous commits on
 branch

---
 docs/api/tasks.rst                            |   1 +
 ...alth.tasks.sofa_lab_forecasting_mimic3.rst |   7 +
 .../mimic3_sofa_lab_forecasting_linear.py     | 225 ++++++++++++++
 pyhealth/tasks/__init__.py                    |   1 +
 pyhealth/tasks/sofa_lab_forecasting_mimic3.py | 229 +++++++++++++++
 tests/core/test_mimic3_sofa_lab.py            | 274 ++++++++++++++++++
 6 files changed, 737 insertions(+)
 create mode 100644 docs/api/tasks/pyhealth.tasks.sofa_lab_forecasting_mimic3.rst
 create mode 100644 examples/clinical_tasks/mimic3_sofa_lab_forecasting_linear.py
 create mode 100644 pyhealth/tasks/sofa_lab_forecasting_mimic3.py
 create mode 100644 tests/core/test_mimic3_sofa_lab.py

diff --git a/docs/api/tasks.rst b/docs/api/tasks.rst
index 23a4e06e5..d457153f4 100644
--- a/docs/api/tasks.rst
+++ b/docs/api/tasks.rst
@@ -218,6 +218,7 @@ Available Tasks
     Mortality Prediction (StageNet MIMIC-IV) <tasks/pyhealth.tasks.mortality_prediction_stagenet_mimic4>
     Patient Linkage (MIMIC-III) <tasks/pyhealth.tasks.patient_linkage_mimic3_fn>
     Readmission Prediction <tasks/pyhealth.tasks.readmission_prediction>
+    SOFA Lab Forecasting (MIMIC-III) <tasks/pyhealth.tasks.sofa_lab_forecasting_mimic3>
     Sleep Staging <tasks/pyhealth.tasks.sleep_staging>
     Sleep Staging (SleepEDF) <tasks/pyhealth.tasks.SleepStagingSleepEDF>
     Temple University EEG Tasks <tasks/pyhealth.tasks.temple_university_EEG_tasks>
diff --git a/docs/api/tasks/pyhealth.tasks.sofa_lab_forecasting_mimic3.rst b/docs/api/tasks/pyhealth.tasks.sofa_lab_forecasting_mimic3.rst
new file mode 100644
index 000000000..39a927002
--- /dev/null
+++ b/docs/api/tasks/pyhealth.tasks.sofa_lab_forecasting_mimic3.rst
@@ -0,0 +1,7 @@
+pyhealth.tasks.sofa_lab_forecasting_mimic3
+==========================================
+
+.. autoclass:: pyhealth.tasks.sofa_lab_forecasting_mimic3.SofaLabForecastingMIMIC3
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/examples/clinical_tasks/mimic3_sofa_lab_forecasting_linear.py b/examples/clinical_tasks/mimic3_sofa_lab_forecasting_linear.py
new file mode 100644
index 000000000..76fd5f0ce
--- /dev/null
+++ b/examples/clinical_tasks/mimic3_sofa_lab_forecasting_linear.py
@@ -0,0 +1,225 @@
+"""Example/Ablation for SofaLabForecastingMIMIC3 on synthetic MIMIC-III data. 
+
+Runs a 12h vs 24h lookback ablation over the forecasting task with a
+scikit-learn linear regressor, mirroring the paper's `Linear` baseline:
+NOTE: Very very very important that the task was for pyhealth model, but issues with vectorization of 
+the unique input and nature of the task being a forecasting task scikit-learn was the most
+reasonable option to demonstrate the task.
+
+    Staniek et al. (2024), "Early Prediction of Causes (not Effects) in
+    Healthcare by Long-Term Clinical Time Series Forecasting."
+    https://arxiv.org/abs/2408.03816
+
+This example follows the pattern of Section 5.2 and Appendix B of the paper showing 24h lookback.
+The use of MIMIC-III demo data is not allowed, s ynthetic patients are used so the script runs without MIMIC-III access.
+This is the same way the test cases for the task was implemented.
+Since example is using sythetic data, might as well just use it for ablation too.
+
+This example demonstrates:
+1. Building synthetic patients with lab trajectories that mirror the paper's deterioration patterns
+2. Applying the SofaLabForecastingMIMIC3 task to collect samples
+3. Converting samples into arrays for modeling
+4. Running a 12h vs 24h lookback ablation with a linear regression baseline
+5. Computing both the paper's masked MSE and a SOFA proxy MSE for evaluation    
+
+"""
+
+from __future__ import annotations
+
+from datetime import datetime, timedelta
+from typing import Iterable, List, Sequence, Tuple
+
+import numpy as np
+import polars as pl
+from sklearn.linear_model import LinearRegression
+from sklearn.model_selection import train_test_split
+
+from pyhealth.data import Patient
+from pyhealth.tasks import SofaLabForecastingMIMIC3
+
+# important constants for synthetic data generation and task processing
+BILI = "50885"
+CREAT = "50912"
+PLT = "51265"
+T0 = datetime(2023, 1, 1, 0, 0, 0)
+
+NUM_PATIENTS = 2000
+LOOKBACK_SHORT = 12
+LOOKBACK_LONG = 24
+PREDICTION_HOURS = 24
+STAY_HOURS = LOOKBACK_LONG + PREDICTION_HOURS + 1
+
+# Baseline values from paper's Appendix B, just using to make synthetic data more real.
+BASELINE_INTERCEPTS = {BILI: 0.8, CREAT: 0.9, PLT: 190.0}
+BASELINE_SLOPES = {BILI: 0.05, CREAT: 0.03, PLT: -2.5}
+DETERIORATION_PATTERNS = [
+    {},
+    {BILI: 2.5},
+    {BILI: 6.5},
+    {CREAT: 3.8, PLT: 45.0},
+]
+
+def make_patient(
+    patient_id: str,
+    icu_intime: datetime,
+    icu_outtime: datetime,
+    lab_events: Sequence[Tuple[datetime, str, float]],
+    icustay_id: str,
+) -> Patient:
+    """Build a synthetic Patient with one ICU stay and lab events."""
+    rows: List[dict] = [{
+        "event_type": "icustays",
+        "timestamp": icu_intime,
+        "icustays/icustay_id": icustay_id,
+        "icustays/outtime": icu_outtime.strftime("%Y-%m-%d %H:%M:%S"),
+    }]
+    for ts, itemid, valuenum in lab_events:
+        rows.append({
+            "event_type": "labevents",
+            "timestamp": ts,
+            "labevents/itemid": str(itemid),
+            "labevents/valuenum": float(valuenum),
+        })
+    df = pl.DataFrame(rows).with_columns(pl.col("timestamp").cast(pl.Datetime))
+    return Patient(patient_id=patient_id, data_source=df)
+
+
+def build_synthetic_patients() -> List[Patient]:
+    """Creates synthetic patients with two observation events split across 12h halves
+
+    Splitting observations across the short/long halves is what makes the
+    12h vs 24h lookback ablation meaningful: the 12h window only sees the
+    first event per lab. 
+    This is so we can use for example + ablation
+    """
+    rng = np.random.default_rng(42)
+    patients: List[Patient] = []
+    pred_lo, pred_hi = LOOKBACK_LONG, LOOKBACK_LONG + PREDICTION_HOURS
+
+    for idx in range(NUM_PATIENTS):
+        stay_start = T0 + timedelta(days=idx)
+        stay_end = stay_start + timedelta(hours=STAY_HOURS)
+        pattern = DETERIORATION_PATTERNS[idx % len(DETERIORATION_PATTERNS)]
+
+        lab_events: List[Tuple[datetime, str, float]] = []
+        for lab in (BILI, CREAT, PLT):
+            base = BASELINE_INTERCEPTS[lab] + BASELINE_SLOPES[lab] * idx
+            future = pattern.get(lab, base)
+            hours = (
+                int(rng.integers(0, LOOKBACK_SHORT)),
+                int(rng.integers(LOOKBACK_SHORT, LOOKBACK_LONG)),
+                int(rng.integers(pred_lo, pred_hi)),
+                int(rng.integers(pred_lo, pred_hi)),
+            )
+            values = (base, base + 0.1, future, future)
+            for hour, value in zip(hours, values):
+                lab_events.append((stay_start + timedelta(hours=hour), lab, value))
+
+        patients.append(make_patient(
+            patient_id=f"patient-{idx}",
+            icu_intime=stay_start,
+            icu_outtime=stay_end,
+            lab_events=lab_events,
+            icustay_id=f"{100000 + idx}",
+        ))
+    return patients
+
+
+def samples_to_arrays(
+    samples: Sequence[dict],
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """Convert task samples into mask matrix for scikit-learn model"""
+    x_train = np.asarray(
+        [np.concatenate([s["observation_values"], s["observation_masks"]])
+         for s in samples], dtype=np.float32,
+    )
+    y_predict = np.asarray([s["target_values"] for s in samples], dtype=np.float32)
+    m_evaluation = np.asarray([s["target_masks"] for s in samples], dtype=np.float32)
+    return x_train, y_predict, m_evaluation
+
+
+def paper_masked_mse(
+    y_true: np.ndarray,
+    y_pred: np.ndarray,
+    masks: np.ndarray,
+    prediction_hours: int = PREDICTION_HOURS,
+    num_labs: int = SofaLabForecastingMIMIC3.NUM_LABS,
+) -> float:
+    """Equation 5 from paper for computing masked MSE, average over patients and timesteps."""
+    shape = (-1, prediction_hours, num_labs)
+    sq_error = ((y_true.reshape(shape) - y_pred.reshape(shape))
+                * masks.reshape(shape)) ** 2
+    return float(sq_error.sum() / (y_true.shape[0] * prediction_hours))
+
+
+def future_lab_sofa(values: np.ndarray, masks: np.ndarray) -> np.ndarray:
+    """Undo standardization to apply SOFA direct to each patient from the predicted lab values.
+
+    Applies the SOFA task's Appendix Bthresholds to max-bili, max-creat, and min-platelets per patient.
+    Convert back, find worst value in prediction window and apply and sum.
+    """
+    task = SofaLabForecastingMIMIC3()
+    num_labs = SofaLabForecastingMIMIC3.NUM_LABS
+    stats = np.asarray(
+        [task.LAB_NORMALIZATION_STATS[lab] for lab in (BILI, CREAT, PLT)],
+        dtype=np.float32,
+    )
+    mean, std = stats[:, 0], stats[:, 1]
+
+    v = values.reshape(values.shape[0], -1, num_labs) * std + mean
+    m = masks.reshape(masks.shape[0], -1, num_labs) > 0
+
+    scorers = (task._sofa_bilirubin, task._sofa_creatinine, task._sofa_platelets)
+    aggregators = (np.max, np.max, np.min)
+    scores = np.zeros(values.shape[0], dtype=np.float32)
+    for patient_idx in range(values.shape[0]):
+        for lab_idx, (aggregate, scorer) in enumerate(zip(aggregators, scorers)):
+            observed = v[patient_idx, :, lab_idx][m[patient_idx, :, lab_idx]]
+            if observed.size:
+                scores[patient_idx] += scorer(float(aggregate(observed)))
+    return scores
+
+def run_ablation(lookback_hours: int, patients: Sequence[Patient]) -> dict:
+    """Run one arm of the lookback ablation with a linear regression baseline. 24HOUR is default"""
+    task = SofaLabForecastingMIMIC3(
+        lookback_hours=lookback_hours,
+        prediction_hours=PREDICTION_HOURS,
+    )
+    samples = [sample for patient in patients for sample in task(patient)]
+    x, y, masks = samples_to_arrays(samples)
+    x_tr, x_te, y_tr, y_te, _, m_te = train_test_split(
+        x, y, masks, test_size=0.25, random_state=42,
+    )
+
+    model = LinearRegression().fit(x_tr, y_tr)
+    y_pred = model.predict(x_te)
+
+    mse_sofa = float(np.mean(
+        (future_lab_sofa(y_te, m_te) - future_lab_sofa(y_pred, m_te)) ** 2
+    ))
+    return {
+        "lookback_hours": lookback_hours,
+        "num_samples": len(samples),
+        "masked_mse": paper_masked_mse(y_te, y_pred, m_te),
+        "mse_sofa": mse_sofa,
+    }
+
+
+def print_results(results: Sequence[dict]) -> None:
+    """This printing style is from the other task examples like: mp_stagenet_mimic4_interpret.py"""
+    print("\nSOFA Lab Forecasting Ablation")
+    print("=" * 60)
+    print(f"{'lookback':>10} | {'samples':>7} | "
+          f"{'masked_mse':>12} | {'mse_sofa':>10}")
+    print("-" * 60)
+    for r in results:
+        print(f"{r['lookback_hours']:>10} | {r['num_samples']:>7} | "
+              f"{r['masked_mse']:>12.4f} | {r['mse_sofa']:>10.4f}")
+    print("=" * 60)
+
+patients = build_synthetic_patients()
+results = [
+    run_ablation(LOOKBACK_SHORT, patients),
+    run_ablation(LOOKBACK_LONG, patients),
+]
+print_results(results)
diff --git a/pyhealth/tasks/__init__.py b/pyhealth/tasks/__init__.py
index a32618f9c..32db1f6f4 100644
--- a/pyhealth/tasks/__init__.py
+++ b/pyhealth/tasks/__init__.py
@@ -58,6 +58,7 @@
     sleep_staging_sleepedf_fn,
 )
 from .sleep_staging_v2 import SleepStagingSleepEDF
+from .sofa_lab_forecasting_mimic3 import SofaLabForecastingMIMIC3
 from .temple_university_EEG_tasks import (
     EEGEventsTUEV,
     EEGAbnormalTUAB
diff --git a/pyhealth/tasks/sofa_lab_forecasting_mimic3.py b/pyhealth/tasks/sofa_lab_forecasting_mimic3.py
new file mode 100644
index 000000000..b88ad0fa2
--- /dev/null
+++ b/pyhealth/tasks/sofa_lab_forecasting_mimic3.py
@@ -0,0 +1,229 @@
+from __future__ import annotations
+
+from datetime import datetime, timedelta
+from typing import Any, ClassVar, Dict, List, Optional, Tuple
+
+import polars as pl
+
+from pyhealth.data import Patient
+
+from .base_task import BaseTask
+
+
+class SofaLabForecastingMIMIC3(BaseTask):
+    """Forecast future SOFA-related lab values from early ICU measurements.
+
+    Task-only adaptation of Staniek et al. (2024), "Early Prediction of Causes
+    (not Effects) in Healthcare by Long-Term Clinical Time Series Forecasting."
+
+    Inputs are hourly binned lab values from an observation window; outputs are
+    hourly binned lab values from a future prediction window plus a derived
+    binary SOFA-deterioration label. Three SOFA-related labs are modeled:
+    bilirubin (50885), creatinine (50912), platelets (51265).
+
+    Note:
+        Unlike most PyHealth tasks, samples are emitted per ICU stay rather
+        than per patient, so patients with multiple stays produce multiple
+        samples. This matches the paper's per-stay forecasting setup.
+        Also unlike the paper, no static features are included and no lookback
+        it is a very simple adaptation of SOFA related labs for 3 values from MIMIC-III
+        with a generalized standardization approach.
+
+    Args:
+        lookback_hours: Length of the observation window in hours.
+        prediction_hours: Length of the future target window in hours.
+
+    Examples:
+        >>> from pyhealth.datasets import MIMIC3Dataset
+        >>> from pyhealth.tasks import SofaLabForecastingMIMIC3
+        >>> dataset = MIMIC3Dataset(root="/path/to/mimic3",
+        ...                         tables=["labevents", "icustays"])
+        >>> samples = dataset.set_task(SofaLabForecastingMIMIC3())
+    """
+
+    task_name: str = "SofaLabForecastingMIMIC3"
+
+    input_schema: Dict[str, str] = {
+        "observation_values": "tensor",
+        "observation_masks": "tensor",
+    }
+    output_schema: Dict[str, str] = {
+        "target_values": "tensor",
+        "target_masks": "tensor",
+        "sofa_label": "binary",
+    }
+
+    BILIRUBIN_ITEMID: str = "50885"
+    CREATININE_ITEMID: str = "50912"
+    PLATELETS_ITEMID: str = "51265"
+
+    LAB_ITEMIDS: List[str] = [
+        BILIRUBIN_ITEMID,
+        CREATININE_ITEMID,
+        PLATELETS_ITEMID,
+    ]
+    NUM_LABS: int = 3
+
+    # Approximate MIMIC-III (mean, std) per lab from MIMIC-Extract
+    # (Wang et al., 2020). Used only to standardize forecasting tensors;
+    # raw values drive SOFA scoring. 
+    # generalize forecasting from this paper is more consistent on the pattern
+    LAB_NORMALIZATION_STATS: ClassVar[Dict[str, Tuple[float, float]]] = {
+        BILIRUBIN_ITEMID: (2.6, 5.4),
+        CREATININE_ITEMID: (1.4, 1.5),
+        PLATELETS_ITEMID: (205.2, 113.3),
+    }
+
+    def __init__(
+        self,
+        lookback_hours: int = 24,
+        prediction_hours: int = 24,
+    ) -> None:
+        self.lookback_hours = lookback_hours
+        self.prediction_hours = prediction_hours
+
+    def __call__(self, patient: Patient) -> List[Dict[str, Any]]:
+        """Generate one forecasting sample per valid ICU stay."""
+        samples: List[Dict[str, Any]] = []
+        required = timedelta(hours=self.lookback_hours + self.prediction_hours)
+
+        for stay in patient.get_events(event_type="icustays"):
+            icu_in = stay.timestamp
+            icu_out = self._parse_outtime(getattr(stay, "outtime", None))
+            if icu_in is None or icu_out is None or (icu_out - icu_in) < required:
+                continue
+
+            obs_end = icu_in + timedelta(hours=self.lookback_hours)
+            pred_end = obs_end + timedelta(hours=self.prediction_hours)
+            obs_df = patient.get_events(
+                event_type="labevents", start=icu_in, end=obs_end, return_df=True
+            )
+            pred_df = patient.get_events(
+                event_type="labevents", start=obs_end, end=pred_end, return_df=True
+            )
+
+            current_sofa = self._compute_lab_sofa(obs_df)
+            future_sofa = self._compute_lab_sofa(pred_df)
+            if current_sofa is None or future_sofa is None:
+                continue
+
+            obs_values, obs_masks = self._bin_hourly(obs_df, icu_in, self.lookback_hours)
+            tgt_values, tgt_masks = self._bin_hourly(
+                pred_df, obs_end, self.prediction_hours
+            )
+
+            samples.append({
+                "patient_id": patient.patient_id,
+                "visit_id": str(getattr(stay, "icustay_id", "")),
+                "observation_values": obs_values,
+                "observation_masks": obs_masks,
+                "target_values": tgt_values,
+                "target_masks": tgt_masks,
+                "sofa_label": 1 if (future_sofa - current_sofa) >= 2 else 0,
+            })
+
+        return samples
+
+    @staticmethod
+    def _parse_outtime(value: Any) -> Optional[datetime]:
+        if isinstance(value, datetime):
+            return value
+        if value is None:
+            return None
+        try:
+            return datetime.strptime(str(value), "%Y-%m-%d %H:%M:%S")
+        except ValueError:
+            return None
+
+    def _bin_hourly(
+        self,
+        labs_df: pl.DataFrame,
+        window_start: datetime,
+        num_hours: int,
+    ) -> Tuple[List[float], List[float]]:
+        """Bin all lab events into fixed hourly value and mask tensors.
+
+        Follows Section 3.1 of Staniek et al. (2024)
+        Keep the first observed
+        value per (hour, lab) slot and zero-impute misses.
+        """
+        size = num_hours * self.NUM_LABS
+        values = [0.0] * size
+        masks = [0.0] * size
+        if labs_df.height == 0:
+            return values, masks
+
+        filtered = (
+            labs_df.filter(
+                pl.col("labevents/itemid").cast(pl.Utf8).is_in(self.LAB_ITEMIDS)
+            )
+            .filter(pl.col("labevents/valuenum").is_not_null())
+            .sort("timestamp")
+        )
+
+        filled = set()
+        for row in filtered.iter_rows(named=True):
+            hour = int((row["timestamp"] - window_start).total_seconds() // 3600)
+            if not 0 <= hour < num_hours:
+                continue
+            itemid = str(row["labevents/itemid"])
+            lab_idx = self.LAB_ITEMIDS.index(itemid)
+            key = (hour, lab_idx)
+            if key in filled:
+                continue
+            filled.add(key)
+            flat = hour * self.NUM_LABS + lab_idx
+            mean, std = self.LAB_NORMALIZATION_STATS[itemid]
+            values[flat] = (float(row["labevents/valuenum"]) - mean) / std if std > 0 else 0.0
+            masks[flat] = 1.0
+
+        return values, masks
+
+    def _get_lab_values(self, labs_df: pl.DataFrame, itemid: str) -> List[float]:
+        """Return all numeric values for a given lab item."""
+        if labs_df.height == 0:
+            return []
+        filtered = labs_df.filter(
+            pl.col("labevents/itemid").cast(pl.Utf8) == itemid
+        ).filter(pl.col("labevents/valuenum").is_not_null())
+        if filtered.height == 0:
+            return []
+        return filtered["labevents/valuenum"].cast(pl.Float64).to_list()
+
+    def _sofa_bilirubin(self, value: float) -> int:
+        """Paper Appendix B, Table 6: bilirubin -> subscore."""
+        for threshold, score in ((12.0, 4), (6.0, 3), (2.0, 2), (1.2, 1)):
+            if value >= threshold:
+                return score
+        return 0
+
+    def _sofa_creatinine(self, value: float) -> int:
+        """Paper Appendix B, Table 6: creatinine -> subscore."""
+        for threshold, score in ((5.0, 4), (3.5, 3), (2.0, 2), (1.2, 1)):
+            if value >= threshold:
+                return score
+        return 0
+
+    def _sofa_platelets(self, value: float) -> int:
+        """Paper Appendix B, Table 6: platelets -> subscore (lower is worse)."""
+        for threshold, score in ((20.0, 4), (50.0, 3), (100.0, 2), (150.0, 1)):
+            if value < threshold:
+                return score
+        return 0
+
+    def _compute_lab_sofa(self, labs_df: pl.DataFrame) -> Optional[int]:
+        """Lab-only SOFA proxy from the worst values in the window."""
+        bili = self._get_lab_values(labs_df, self.BILIRUBIN_ITEMID)
+        creat = self._get_lab_values(labs_df, self.CREATININE_ITEMID)
+        plt = self._get_lab_values(labs_df, self.PLATELETS_ITEMID)
+        if not (bili or creat or plt):
+            return None
+
+        score = 0
+        if bili:
+            score += self._sofa_bilirubin(max(bili))
+        if creat:
+            score += self._sofa_creatinine(max(creat))
+        if plt:
+            score += self._sofa_platelets(min(plt))
+        return score
diff --git a/tests/core/test_mimic3_sofa_lab.py b/tests/core/test_mimic3_sofa_lab.py
new file mode 100644
index 000000000..bd3388575
--- /dev/null
+++ b/tests/core/test_mimic3_sofa_lab.py
@@ -0,0 +1,274 @@
+from __future__ import annotations
+
+import unittest
+from datetime import datetime, timedelta
+from typing import List, Sequence, Tuple
+
+import polars as pl
+
+from pyhealth.data import Patient
+from pyhealth.tasks import SofaLabForecastingMIMIC3
+
+# MIMIC-III item IDs from the paper's Appendix A, Table 4.
+BILI = "50885"
+CREAT = "50912"
+PLT = "51265"
+T0 = datetime(2026, 1, 1, 0, 0, 0)
+
+Stay = Tuple[datetime, datetime, Sequence[Tuple[datetime, str, float]]]
+
+def normalized(itemid: str, value: float) -> float:
+    mean, std = SofaLabForecastingMIMIC3.LAB_NORMALIZATION_STATS[itemid]
+    return (value - mean) / std
+
+
+def _lab_row(ts: datetime, itemid: str, valuenum: float) -> dict:
+    return {
+        "event_type": "labevents",
+        "timestamp": ts,
+        "labevents/itemid": str(itemid),
+        "labevents/valuenum": float(valuenum),
+    }
+
+
+def _icu_row(intime: datetime, outtime: datetime, icustay_id: str) -> dict:
+    return {
+        "event_type": "icustays",
+        "timestamp": intime,
+        "icustays/icustay_id": icustay_id,
+        "icustays/outtime": outtime.strftime("%Y-%m-%d %H:%M:%S"),
+    }
+
+
+def make_patient(patient_id: str, stays: Sequence[Stay]) -> Patient:
+    """Build a synthetic Patient from one or more ICU stays."""
+    rows: List[dict] = []
+    for idx, (intime, outtime, lab_events) in enumerate(stays):
+        rows.append(_icu_row(intime, outtime, f"{100000 + idx}"))
+        for ts, itemid, valuenum in lab_events:
+            rows.append(_lab_row(ts, itemid, valuenum))
+    df = pl.DataFrame(rows).with_columns(pl.col("timestamp").cast(pl.Datetime))
+    return Patient(patient_id=patient_id, data_source=df)
+
+
+def single_stay_patient(
+    patient_id: str,
+    lab_events: Sequence[Tuple[datetime, str, float]],
+    hours: int = 49,
+) -> Patient:
+    return make_patient(patient_id, [(T0, T0 + timedelta(hours=hours), lab_events)])
+
+
+class TestTaskSchemaAndInit(unittest.TestCase):
+    """Test the schema and init simple behavior."""
+
+    def test_task_contract_and_windows(self) -> None:
+        default = SofaLabForecastingMIMIC3()
+        custom = SofaLabForecastingMIMIC3(lookback_hours=12, prediction_hours=6)
+        self.assertEqual(
+            SofaLabForecastingMIMIC3.task_name, "SofaLabForecastingMIMIC3"
+        )
+        self.assertEqual(
+            SofaLabForecastingMIMIC3.input_schema,
+            {"observation_values": "tensor", "observation_masks": "tensor"},
+        )
+        self.assertEqual(
+            SofaLabForecastingMIMIC3.output_schema,
+            {
+                "target_values": "tensor",
+                "target_masks": "tensor",
+                "sofa_label": "binary",
+            },
+        )
+        self.assertEqual((default.lookback_hours, default.prediction_hours), (24, 24))
+        self.assertEqual((custom.lookback_hours, custom.prediction_hours), (12, 6))
+
+
+class TestSofaScoring(unittest.TestCase):
+    """Test direct SOFA subscore helpers follow the paper's Appendix B thresholds."""
+
+    def setUp(self) -> None:
+        self.task = SofaLabForecastingMIMIC3()
+
+    def test_threshold_boundaries(self) -> None:
+        self.assertEqual(
+            [self.task._sofa_bilirubin(v) for v in (1.1, 1.2, 2.0, 6.0, 12.0)],
+            [0, 1, 2, 3, 4],
+        )
+        self.assertEqual(
+            [self.task._sofa_creatinine(v) for v in (1.1, 1.2, 2.0, 3.5, 5.0)],
+            [0, 1, 2, 3, 4],
+        )
+        self.assertEqual(
+            [self.task._sofa_platelets(v) for v in (151.0, 149.0, 99.0, 49.0, 19.0)],
+            [0, 1, 2, 3, 4],
+        )
+
+    def test_compute_lab_sofa_worst_values(self) -> None:
+        patient = single_stay_patient(
+            "abnormal",
+            [
+                (T0 + timedelta(hours=1), BILI, 6.0),
+                (T0 + timedelta(hours=2), CREAT, 2.2),
+                (T0 + timedelta(hours=3), PLT, 140.0),
+                (T0 + timedelta(hours=4), PLT, 40.0),
+            ],
+        )
+        labs_df = patient.get_events(event_type="labevents", return_df=True)
+        self.assertEqual(self.task._compute_lab_sofa(labs_df), 8)
+
+
+class TestFeatureExtraction(unittest.TestCase):
+    """Tests the hourly binning behavior for the forecasting."""
+
+    def setUp(self) -> None:
+        self.task = SofaLabForecastingMIMIC3()
+
+    def _labs_df(self, patient: Patient) -> pl.DataFrame:
+        return patient.get_events(
+            event_type="labevents",
+            start=T0,
+            end=T0 + timedelta(hours=24),
+            return_df=True,
+        )
+
+    def test_hourly_binning_and_masks(self) -> None:
+        patient = single_stay_patient(
+            "binning",
+            [
+                (T0 + timedelta(hours=2, minutes=30), BILI, 1.5),
+                (T0 + timedelta(hours=0, minutes=10), PLT, 180.0),
+            ],
+        )
+        values, masks = self.task._bin_hourly(self._labs_df(patient), T0, 24)
+        self.assertAlmostEqual(values[6], normalized(BILI, 1.5), places=6)
+        self.assertEqual(masks[6], 1.0)
+        self.assertAlmostEqual(values[2], normalized(PLT, 180.0), places=6)
+        self.assertEqual(masks[2], 1.0)
+        self.assertEqual(values[5], 0.0)
+        self.assertEqual(masks[5], 0.0)
+
+
+class TestSampleGeneration(unittest.TestCase):
+    """Sample-level contract and SOFA label derivation."""
+
+    def setUp(self) -> None:
+        self.task = SofaLabForecastingMIMIC3()
+
+    def _baseline_events(self) -> List[Tuple[datetime, str, float]]:
+        return [
+            (T0 + timedelta(hours=1), BILI, 1.0),
+            (T0 + timedelta(hours=2), CREAT, 1.0),
+            (T0 + timedelta(hours=3), PLT, 180.0),
+        ]
+
+    def _make_forecasting_patient(
+        self,
+        patient_id: str,
+        future_bili: float = 1.0,
+        future_creat: float = 1.0,
+        future_plt: float = 180.0,
+    ) -> Patient:
+        return single_stay_patient(
+            patient_id,
+            self._baseline_events() + [
+                (T0 + timedelta(hours=25), BILI, future_bili),
+                (T0 + timedelta(hours=26), CREAT, future_creat),
+                (T0 + timedelta(hours=27), PLT, future_plt),
+            ],
+        )
+
+    def test_sample_shapes(self) -> None:
+        patient = self._make_forecasting_patient("shape")
+        sample = self.task(patient)[0]
+        self.assertEqual(len(sample["observation_values"]), 72)
+        self.assertEqual(len(sample["observation_masks"]), 72)
+        self.assertEqual(len(sample["target_values"]), 72)
+        self.assertEqual(len(sample["target_masks"]), 72)
+        self.assertIn("sofa_label", sample)
+
+    def test_custom_window_shapes(self) -> None:
+        task = SofaLabForecastingMIMIC3(lookback_hours=12, prediction_hours=12)
+        patient = single_stay_patient(
+            "custom",
+            [
+                (T0 + timedelta(hours=1), BILI, 1.0),
+                (T0 + timedelta(hours=13), BILI, 2.0),
+            ],
+            hours=25,
+        )
+        sample = task(patient)[0]
+        for key in ("observation_values", "observation_masks",
+                    "target_values", "target_masks"):
+            self.assertEqual(len(sample[key]), 36)
+
+    def test_sofa_labels(self) -> None:
+        """Negative, borderline (delta=1), boundary (delta=2), and spike cases."""
+        cases = [
+            # (future_bili, expected_label)
+            (1.0, 0),  # no change
+            (1.5, 0),  # delta 1
+            (2.0, 1),  # delta 2 -> positive at boundary
+            (6.0, 1),  # severe spike
+        ]
+        for future_bili, expected in cases:
+            patient = self._make_forecasting_patient(
+                f"label-{future_bili}",
+                future_bili=future_bili,
+            )
+            sample = self.task(patient)[0]
+            self.assertEqual(
+                sample["sofa_label"], expected,
+                f"future_bili={future_bili}",
+            )
+
+
+class TestEdgeCases(unittest.TestCase):
+    """Stays that cannot produce a valid forecasting sample should be skipped."""
+
+    def setUp(self) -> None:
+        self.task = SofaLabForecastingMIMIC3()
+
+    def test_invalid_stays_are_skipped(self) -> None:
+        patients = [
+            single_stay_patient("no-labs", []),
+            single_stay_patient(
+                "short",
+                [(T0 + timedelta(hours=1), BILI, 1.0)],
+                hours=40,
+            ),
+            single_stay_patient(
+                "obs-only",
+                [
+                    (T0 + timedelta(hours=1), BILI, 1.0),
+                    (T0 + timedelta(hours=2), CREAT, 1.0),
+                    (T0 + timedelta(hours=3), PLT, 180.0),
+                ],
+            ),
+            single_stay_patient(
+                "pred-only",
+                [
+                    (T0 + timedelta(hours=25), BILI, 6.0),
+                    (T0 + timedelta(hours=26), CREAT, 2.0),
+                    (T0 + timedelta(hours=27), PLT, 80.0),
+                ],
+            ),
+        ]
+        for patient in patients:
+            self.assertEqual(self.task(patient), [])
+
+    def test_too_many_stays_are_skipped(self) -> None:
+        t1 = T0 + timedelta(days=4)
+        stay = lambda start, itemid: (
+            start, start + timedelta(hours=49),
+            [
+                (start + timedelta(hours=1), itemid, 1.0),
+                (start + timedelta(hours=25), itemid, 5.0),
+            ],
+        )
+        patient = make_patient("multi", [stay(T0, BILI), stay(t1, CREAT)])
+        self.assertEqual(len(self.task(patient)), 2)
+
+
+if __name__ == "__main__":
+    unittest.main()

From c913c2bd7bf79ee5048f98246c5fba6e1e715c15 Mon Sep 17 00:00:00 2001
From: Kevin <kevintrancs@gmail.com>
Date: Sun, 19 Apr 2026 16:09:56 -0700
Subject: [PATCH 2/2] updated placement of example file based on rubric for
 opt3

---
 .../{clinical_tasks => }/mimic3_sofa_lab_forecasting_linear.py    | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename examples/{clinical_tasks => }/mimic3_sofa_lab_forecasting_linear.py (100%)

diff --git a/examples/clinical_tasks/mimic3_sofa_lab_forecasting_linear.py b/examples/mimic3_sofa_lab_forecasting_linear.py
similarity index 100%
rename from examples/clinical_tasks/mimic3_sofa_lab_forecasting_linear.py
rename to examples/mimic3_sofa_lab_forecasting_linear.py