sunlabuiuc · will-pang · Feb 11, 2026 · Feb 11, 2026 · Feb 11, 2026 · Feb 12, 2026
diff --git a/examples/foundation_ehr/multimodal_task.py b/examples/foundation_ehr/multimodal_task.py
@@ -0,0 +1,39 @@
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+import os
+
+# PyHealth Packages
+from pyhealth.datasets import MIMIC4Dataset
+from pyhealth.tasks.ehr_foundational_model_mimic4 import EHRFoundationalModelMIMIC4
+from pyhealth.tasks.base_task import BaseTask
+
+# Load MIMIC4 Files
+# There's probably better ways dealing with this on the cluster, but working locally for now 
+# (see: https://github.com/sunlabuiuc/PyHealth/blob/master/examples/mortality_prediction/multimodal_mimic4_minimal.py)
+
+PYHEALTH_REPO_ROOT = '/Users/wpang/Desktop/PyHealth'
+
+EHR_ROOT = os.path.join(PYHEALTH_REPO_ROOT, "srv/local/data/physionet.org/files/mimiciv/2.2")
+NOTE_ROOT = os.path.join(PYHEALTH_REPO_ROOT, "srv/local/data/physionet.org/files/mimic-iv-note/2.2")
+CXR_ROOT = os.path.join(PYHEALTH_REPO_ROOT,"srv/local/data/physionet.org/files/mimic-cxr-jpg/2.0.0")
+CACHE_DIR = os.path.join(PYHEALTH_REPO_ROOT,"srv/local/data/wp/pyhealth_cache")
+
+if __name__ == "__main__":
+
+    dataset = MIMIC4Dataset(
+            ehr_root=EHR_ROOT,
+            note_root=NOTE_ROOT,
+            ehr_tables=["diagnoses_icd", "procedures_icd", "prescriptions", "labevents"],
+            note_tables=["discharge", "radiology"],
+            cache_dir=CACHE_DIR,
+            num_workers=8,
+            dev=True
+        )
+
+    # Apply multimodal task
+    task = EHRFoundationalModelMIMIC4() 
+    samples = dataset.set_task(task)
+
+    # Get and print sample
+    sample = samples[0]
+    print(sample)
diff --git a/pyhealth/processors/tuple_time_text_processor.py b/pyhealth/processors/tuple_time_text_processor.py
@@ -8,99 +8,102 @@
     Input:  Tuple[List[str], List[float]]
             - List[str]: Clinical text entries (e.g., discharge notes, progress notes)
             - List[float]: Time differences between entries (in any time unit)
-    
-    Output: Tuple[List[str], torch.Tensor, str]
-            - List[str]: Same text entries (unmodified)
-            - torch.Tensor: 1D float tensor of time differences
+
+    Output: Tuple[torch.Tensor, torch.Tensor, str]
+            - torch.Tensor: Text Token IDs from tokenizer [shape: (num_texts, max_seq_len)]
+            - torch.Tensor: 1D float tensor of time differences [shape: (N,)]
             - str: Type tag for automatic modality routing (default: "note")
 
 Use Case:
     This processor enables automatic modality bucketing in multimodal pipelines.
     The type_tag allows downstream models to automatically route different feature
     types to appropriate encoders without hardcoding feature names:
-    
+
     - type_tag="note" routes to text encoder
     - type_tag="image" routes to vision encoder
     - type_tag="ehr" routes to EHR encoder
-    
+
     This design eliminates the need to manually map task schema feature_keys to
     specific model components.
 
 Example:
     >>> from pyhealth.processors import TupleTimeTextProcessor
-    >>> processor = TupleTimeTextProcessor(type_tag="note")
-    >>> 
+    >>> processor = TupleTimeTextProcessor(type_tag="note", tokenizer_name="dmis-lab/biobert-base-cased-v1.1")
+    >>>
     >>> # Clinical notes with time differences
     >>> texts = [
     ...     "Patient admitted with chest pain.",
     ...     "Follow-up: symptoms improved.",
     ...     "Discharge: stable condition."
     ... ]
     >>> time_diffs = [0.0, 2.5, 5.0]  # hours since admission
-    >>> 
+    >>>
     >>> result = processor.process((texts, time_diffs))
-    >>> texts_out, time_tensor, tag = result
-    >>> print(f"Texts: {texts_out}")
+    >>> token_ids, time_tensor, tag = result
+    >>> print(f"Text Token IDs shape: {token_ids.shape}")
     >>> print(f"Time tensor: {time_tensor}")
     >>> print(f"Type tag: {tag}")
-    
+
 Args:
     type_tag (str): Modality identifier for automatic routing in multimodal
         models. Common values: "note", "image", "ehr", "signal".
         Default: "note"
+    tokenizer_name (str): HuggingFace model name for the tokenizer.
+        Default: "dmis-lab/biobert-base-cased-v1.1"
 """
 
-from typing import Any, List, Tuple
+from typing import Any, Dict, List, Tuple
 import torch
+from transformers import AutoTokenizer
 from .base_processor import FeatureProcessor
 from . import register_processor
 
 
 @register_processor("tuple_time_text")
 class TupleTimeTextProcessor(FeatureProcessor):
     """Processes (text, time_diff) tuples for multimodal temporal fusion.
-    
-    Converts paired text and temporal data into a format suitable for models
-    that need to distinguish between different modality types automatically.
+
+    Tokenizes text entries using a HuggingFace tokenizer and converts
+    temporal data into tensors for downstream model consumption.
     """
-    
-    def __init__(self, type_tag: str = "note"):
+
+    def __init__(self, type_tag: str = "note", tokenizer_name: str = "dmis-lab/biobert-base-cased-v1.1"):
         """Initialize the processor.
-        
+
         Args:
             type_tag: Modality identifier for automatic routing. Default: "note"
+            tokenizer_name: HuggingFace model name for the tokenizer.
+                Default: "dmis-lab/biobert-base-cased-v1.1"
         """
         super().__init__()
         self.type_tag = type_tag
+        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
 
-    def process(self, value: Tuple[List[str], List[float]]) -> Tuple[List[str], torch.Tensor, str]:
+    def process(self, value: Tuple[List[str], List[float]]) -> Tuple[Any, Any, str]:
         """Process a tuple of texts and time differences.
-
+
+        Tokenizes the text entries using the HuggingFace tokenizer and
+        converts time differences to a float tensor.
+
         Args:
             value: Tuple containing:
                 - List[str]: Text entries (clinical notes, observations, etc.)
                 - List[float]: Time differences corresponding to each text entry
-        
+
         Returns:
             Tuple containing:
-                - List[str]: Original text entries (unmodified)
+                - torch.Tensor: Text Token IDs [shape: (T: num_texts, L: max_token_len)]
                 - torch.Tensor: 1D float tensor of time differences [shape: (N,)]
                 - str: Type tag for modality routing
-
-        Example:
-            >>> processor = TupleTimeTextProcessor(type_tag="clinical_note")
-            >>> texts = ["Note 1", "Note 2"]
-            >>> times = [0.0, 24.0]  # hours
-            >>> result = processor.process((texts, times))
-            >>> print(result[1])  # tensor([0., 24.])
         """
         texts, time_diffs = value
+        text_token_ids = self.tokenizer(texts, padding=True, truncation=True, return_tensors="pt")["input_ids"]
         time_tensor = torch.tensor(time_diffs, dtype=torch.float32)
-        return texts, time_tensor, self.type_tag
-    
+        return text_token_ids, time_tensor, self.type_tag
+
     def size(self):
-        """Return the size of the processor vocabulary (not applicable for this processor)."""
-        return None
-    
+        """Return the vocabulary size of the tokenizer."""
+        return self.tokenizer.vocab_size
+
     def __repr__(self):
-        return f"TupleTimeTextProcessor(type_tag='{self.type_tag}')"
+        return f"TupleTimeTextProcessor(type_tag='{self.type_tag}', tokenizer='{self.tokenizer.name_or_path}')"