diff --git a/docs/api/models.rst b/docs/api/models.rst
index 7c3ac7c4b..0cd3e1b62 100644
--- a/docs/api/models.rst
+++ b/docs/api/models.rst
@@ -206,3 +206,4 @@ API Reference
     models/pyhealth.models.BIOT
     models/pyhealth.models.unified_multimodal_embedding_docs
     models/pyhealth.models.califorest
+    models/pyhealth.models.word_basis_linear_model
\ No newline at end of file
diff --git a/docs/api/models/pyhealth.models.word_basis_linear_model.rst b/docs/api/models/pyhealth.models.word_basis_linear_model.rst
new file mode 100644
index 000000000..af2d269e1
--- /dev/null
+++ b/docs/api/models/pyhealth.models.word_basis_linear_model.rst
@@ -0,0 +1,7 @@
+WordBasisLinearModel
+====================
+
+.. automodule:: pyhealth.models.word_basis_linear_model
+   :members:
+   :undoc-members:
+   :show-inheritance:
\ No newline at end of file
diff --git a/examples/sample_binary_word_basis_linear_model.py b/examples/sample_binary_word_basis_linear_model.py
new file mode 100644
index 000000000..e616194b7
--- /dev/null
+++ b/examples/sample_binary_word_basis_linear_model.py
@@ -0,0 +1,229 @@
+"""Example ablation for WordBasisLinearModel.
+
+This script demonstrates a minimal, runnable ablation for the paper-inspired
+WordBasisLinearModel using synthetic/demo data.
+
+Ablation:
+    Vary weight decay during training and compare:
+    - validation accuracy
+    - validation loss
+    - cosine similarity between the learned classifier weights and the
+      word-basis reconstruction
+
+Why this ablation:
+    The updated rubric for model contributions asks for a hyperparameter
+    variation or similar concrete model ablation. Weight decay is the
+    simplest PyTorch/PyHealth-friendly analogue to regularization in the
+    paper's linear classifier.
+
+Experimental setup:
+    - Binary classification on synthetic embedding inputs
+    - Bias-free linear classifier
+    - Fixed word-embedding matrix for explanation
+    - Three weight decay settings: 0.0, 1e-4, 1e-2
+
+Example findings:
+    In a representative run, lower weight decay produced the best validation
+    accuracy, while a larger weight decay slightly improved cosine similarity
+    between the learned classifier weights and the word-basis reconstruction.
+    Exact numbers may vary slightly across environments.
+
+This script is intentionally small and deterministic so it is easy to run
+locally and easy for reviewers to inspect.
+"""
+
+from __future__ import annotations
+
+import random
+from typing import List, Tuple
+
+import numpy as np
+import torch
+from pyhealth.datasets import create_sample_dataset
+from pyhealth.models import WordBasisLinearModel
+
+
+INPUT_DIM = 8
+FEATURE_KEY = "embedding"
+LABEL_KEY = "label"
+
+
+def set_seed(seed: int = 42) -> None:
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+
+
+def build_dataset():
+    """Builds a tiny PyHealth sample dataset required by the model constructor."""
+    samples = [
+        {
+            "patient_id": "patient-0",
+            "visit_id": "visit-0",
+            "embedding": [0.0] * INPUT_DIM,
+            "label": 0,
+        },
+        {
+            "patient_id": "patient-1",
+            "visit_id": "visit-1",
+            "embedding": [0.1] * INPUT_DIM,
+            "label": 1,
+        },
+    ]
+
+    dataset = create_sample_dataset(
+        samples=samples,
+        input_schema={FEATURE_KEY: "tensor"},
+        output_schema={LABEL_KEY: "binary"},
+        dataset_name="word_basis_linear_model_example",
+    )
+    return dataset
+
+
+def make_synthetic_split(
+    n_train: int = 64,
+    n_val: int = 32,
+    input_dim: int = INPUT_DIM,
+) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+    """Creates a simple synthetic binary classification problem in embedding space."""
+    true_beta = torch.tensor(
+        [1.2, -0.8, 0.6, 1.0, -1.1, 0.4, 0.7, -0.5],
+        dtype=torch.float32,
+    )
+
+    x_train = torch.randn(n_train, input_dim)
+    x_val = torch.randn(n_val, input_dim)
+
+    train_noise = 0.35 * torch.randn(n_train)
+    val_noise = 0.35 * torch.randn(n_val)
+
+    train_logits = x_train @ true_beta + train_noise
+    val_logits = x_val @ true_beta + val_noise
+
+    y_train = (torch.sigmoid(train_logits) > 0.5).float()
+    y_val = (torch.sigmoid(val_logits) > 0.5).float()
+
+    return x_train, y_train, x_val, y_val
+
+
+def make_word_embeddings() -> Tuple[torch.Tensor, List[str]]:
+    """Creates a fixed word basis in the same embedding space as the classifier."""
+    word_list = [
+        "dark",
+        "light",
+        "round",
+        "pointed",
+        "large",
+        "small",
+    ]
+
+    word_embeddings = torch.tensor(
+        [
+            [1.0, 0.2, 0.1, 0.0, 0.0, 0.1, 0.0, 0.0],
+            [0.1, 1.0, 0.0, 0.2, 0.1, 0.0, 0.1, 0.0],
+            [0.0, 0.1, 1.0, 0.2, 0.0, 0.1, 0.0, 0.2],
+            [0.2, 0.0, 0.1, 1.0, 0.1, 0.0, 0.2, 0.0],
+            [0.0, 0.1, 0.0, 0.1, 1.0, 0.2, 0.1, 0.0],
+            [0.1, 0.0, 0.2, 0.0, 0.2, 1.0, 0.0, 0.1],
+        ],
+        dtype=torch.float32,
+    )
+
+    return word_embeddings, word_list
+
+
+def accuracy_from_probs(y_prob: torch.Tensor, y_true: torch.Tensor) -> float:
+    y_pred = (y_prob.squeeze(1) >= 0.5).float()
+    return (y_pred == y_true).float().mean().item()
+
+
+def train_and_evaluate(
+    weight_decay: float,
+    epochs: int = 200,
+    lr: float = 0.05,
+) -> dict:
+    dataset = build_dataset()
+    model = WordBasisLinearModel(
+        dataset=dataset,
+        input_dim=INPUT_DIM,
+        feature_key=FEATURE_KEY,
+        ridge_lambda=1e-4,
+    )
+
+    optimizer = torch.optim.Adam(
+        model.parameters(),
+        lr=lr,
+        weight_decay=weight_decay,
+    )
+
+    x_train, y_train, x_val, y_val = make_synthetic_split()
+    word_embeddings, word_list = make_word_embeddings()
+
+    model.train()
+    for _ in range(epochs):
+        optimizer.zero_grad()
+        output = model(**{FEATURE_KEY: x_train, LABEL_KEY: y_train})
+        output["loss"].backward()
+        optimizer.step()
+
+    model.eval()
+    with torch.no_grad():
+        train_output = model(**{FEATURE_KEY: x_train, LABEL_KEY: y_train})
+        val_output = model(**{FEATURE_KEY: x_val, LABEL_KEY: y_val})
+
+        coeffs = model.fit_word_basis(word_embeddings)
+        cosine = model.compute_word_basis_cosine_similarity(
+            word_embeddings=word_embeddings,
+            word_coeffs=coeffs,
+        ).item()
+
+        top_words = model.explain_words(
+            word_embeddings=word_embeddings,
+            word_list=word_list,
+        )[:3]
+
+    return {
+        "weight_decay": weight_decay,
+        "train_loss": train_output["loss"].item(),
+        "val_loss": val_output["loss"].item(),
+        "train_acc": accuracy_from_probs(train_output["y_prob"], y_train),
+        "val_acc": accuracy_from_probs(val_output["y_prob"], y_val),
+        "cosine_similarity": cosine,
+        "top_words": top_words,
+    }
+
+
+def main() -> None:
+    set_seed(42)
+    
+    # Hyperparameter ablation required by the rubric for model contributions.
+    weight_decays = [0.0, 1e-4, 1e-2]
+    results = [train_and_evaluate(weight_decay=wd) for wd in weight_decays]
+
+    print("\nWordBasisLinearModel ablation: weight decay sweep")
+    print("-" * 95)
+    print(
+        f"{'weight_decay':>12} | {'train_acc':>9} | {'val_acc':>7} | "
+        f"{'train_loss':>10} | {'val_loss':>8} | {'cosine_sim':>10}"
+    )
+    print("-" * 95)
+
+    for row in results:
+        print(
+            f"{row['weight_decay']:>12.4g} | "
+            f"{row['train_acc']:>9.3f} | "
+            f"{row['val_acc']:>7.3f} | "
+            f"{row['train_loss']:>10.4f} | "
+            f"{row['val_loss']:>8.4f} | "
+            f"{row['cosine_similarity']:>10.4f}"
+        )
+
+    print("\nTop 3 explanatory words by configuration:")
+    for row in results:
+        print(f"\nweight_decay={row['weight_decay']}")
+        for word, coeff in row["top_words"]:
+            print(f"  {word:>10}: {coeff:+.4f}")
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/pyhealth/models/__init__.py b/pyhealth/models/__init__.py
index 4c168d3e3..76c688f22 100644
--- a/pyhealth/models/__init__.py
+++ b/pyhealth/models/__init__.py
@@ -45,4 +45,5 @@
 from .sdoh import SdohClassifier
 from .medlink import MedLink
 from .unified_embedding import UnifiedMultimodalEmbeddingModel, SinusoidalTimeEmbedding
-from .califorest import CaliForest
\ No newline at end of file
+from .califorest import CaliForest
+from .word_basis_linear_model import WordBasisLinearModel
\ No newline at end of file
diff --git a/pyhealth/models/word_basis_linear_model.py b/pyhealth/models/word_basis_linear_model.py
new file mode 100644
index 000000000..12e53e6d6
--- /dev/null
+++ b/pyhealth/models/word_basis_linear_model.py
@@ -0,0 +1,285 @@
+from __future__ import annotations
+
+from typing import Any, List, Optional, Sequence, Tuple
+
+import torch
+import torch.nn as nn
+
+from pyhealth.datasets import SampleDataset
+from pyhealth.models import BaseModel
+
+
+class WordBasisLinearModel(BaseModel):
+    """Linear classifier on frozen embeddings with word-basis explanations.
+
+    This model reproduces the core two-step idea from
+    "Representing visual classification as a linear combination of words":
+
+    1. Learn a linear classifier over precomputed frozen embeddings.
+    2. Approximate the learned classifier weight vector as a linear
+       combination of fixed word embeddings.
+
+    Args:
+        dataset: PyHealth SampleDataset.
+        input_dim: Dimension of the precomputed embedding vector.
+        feature_key: Optional feature field name. If None, the model expects
+            exactly one input feature in the dataset schema and uses it.
+        ridge_lambda: Default ridge penalty used when solving for word-basis
+            coefficients.
+
+    Example:
+    >>> from pyhealth.datasets import create_sample_dataset
+    >>> from pyhealth.models import WordBasisLinearModel
+    >>> samples = [
+    ...     {
+    ...         "patient_id": "p0",
+    ...         "visit_id": "v0",
+    ...         "embedding": [0.1] * 8,
+    ...         "label": 1,
+    ...     },
+    ...     {
+    ...         "patient_id": "p1",
+    ...         "visit_id": "v1",
+    ...         "embedding": [0.0] * 8,
+    ...         "label": 0,
+    ...     },
+    ... ]
+    >>> dataset = create_sample_dataset(
+    ...     samples=samples,
+    ...     input_schema={"embedding": "tensor"},
+    ...     output_schema={"label": "binary"},
+    ...     dataset_name="word_basis_linear_model_example",
+    ... )
+    >>> model = WordBasisLinearModel(
+    ...     dataset=dataset,
+    ...     input_dim=8,
+    ...     feature_key="embedding",
+    ... )
+    """
+
+    def __init__(
+        self,
+        dataset: SampleDataset,
+        input_dim: int,
+        feature_key: Optional[str] = None,
+        ridge_lambda: float = 0.0,
+    ) -> None:
+        super().__init__(dataset=dataset)
+        self.mode = "binary"
+        if len(self.label_keys) != 1:
+            raise ValueError(
+                "WordBasisLinearModel currently supports exactly one label key."
+            )
+        self.label_key = self.label_keys[0]
+
+        if feature_key is None:
+            if len(self.feature_keys) != 1:
+                raise ValueError(
+                    "feature_key was not provided, but the dataset has "
+                    f"{len(self.feature_keys)} feature keys. Please pass feature_key "
+                    "explicitly."
+                )
+            self.feature_key = self.feature_keys[0]
+        else:
+            if feature_key not in self.feature_keys:
+                raise ValueError(
+                    f"feature_key '{feature_key}' not found in dataset feature keys: "
+                    f"{self.feature_keys}"
+                )
+            self.feature_key = feature_key
+
+        if self.get_output_size() != 1:
+            raise ValueError(
+                "WordBasisLinearModel currently supports binary classification only."
+            )
+
+        self.input_dim = input_dim
+        self.ridge_lambda = ridge_lambda
+        self.classifier = nn.Linear(input_dim, 1, bias=False)
+
+    def _get_input_tensor(self, feature: Any) -> torch.Tensor:
+        """Extracts the dense tensor from a PyHealth feature payload."""
+        if isinstance(feature, torch.Tensor):
+            x = feature
+        elif isinstance(feature, (tuple, list)):
+            if len(feature) == 0 or not isinstance(feature[0], torch.Tensor):
+                raise TypeError(
+                    "Expected feature payload to contain a tensor as the first item."
+                )
+            x = feature[0]
+        else:
+            raise TypeError(
+                f"Unsupported feature type for {self.feature_key}: {type(feature)}"
+            )
+
+        x = x.to(self.device).float()
+
+        if x.ndim != 2:
+            raise ValueError(
+                f"Expected feature tensor to have shape (batch_size, input_dim), "
+                f"but got shape {tuple(x.shape)}."
+            )
+        if x.shape[1] != self.input_dim:
+            raise ValueError(
+                f"Expected input_dim={self.input_dim}, but got tensor with shape "
+                f"{tuple(x.shape)}."
+            )
+        return x
+
+    def _prepare_labels(self, y_true: torch.Tensor) -> torch.Tensor:
+        """Normalizes labels to shape (batch_size, 1)."""
+        y_true = y_true.to(self.device).float()
+        if y_true.ndim == 1:
+            y_true = y_true.unsqueeze(1)
+        elif y_true.ndim == 2 and y_true.shape[1] == 1:
+            pass
+        else:
+            raise ValueError(
+                f"Expected labels of shape (batch_size,) or (batch_size, 1), "
+                f"but got {tuple(y_true.shape)}."
+            )
+        return y_true
+
+    def forward(self, **kwargs) -> dict[str, torch.Tensor]:
+        """Forward pass for binary classification on dense embeddings."""
+        if self.feature_key not in kwargs:
+            raise KeyError(f"Missing required feature key: {self.feature_key}")
+
+        x = self._get_input_tensor(kwargs[self.feature_key])
+        logit = self.classifier(x)
+        y_prob = self.prepare_y_prob(logit)
+
+        result = {
+            "logit": logit,
+            "y_prob": y_prob,
+        }
+
+        if self.label_key in kwargs:
+            y_true = self._prepare_labels(kwargs[self.label_key])
+            loss = self.get_loss_function()(logit, y_true)
+            result["loss"] = loss
+            result["y_true"] = y_true
+
+        return result
+
+    def forward_from_embedding(
+        self,
+        feature_embeddings: torch.Tensor,
+        y: Optional[torch.Tensor] = None,
+    ) -> dict[str, torch.Tensor]:
+        """Forward pass that bypasses feature processing.
+
+        This is useful for interpretability-style workflows where the input is
+        already in embedding space.
+        """
+        kwargs = {self.feature_key: feature_embeddings}
+        if y is not None:
+            kwargs[self.label_key] = y
+        return self.forward(**kwargs)
+
+    def get_classifier_weight(self) -> torch.Tensor:
+        """Returns the learned classifier weight vector of shape (input_dim,)."""
+        return self.classifier.weight.squeeze(0)
+
+    def fit_word_basis(
+        self,
+        word_embeddings: torch.Tensor,
+        ridge_lambda: Optional[float] = None,
+    ) -> torch.Tensor:
+        """Solves for word coefficients that reconstruct the classifier weight.
+
+        Args:
+            word_embeddings: Tensor of shape (num_words, input_dim).
+            ridge_lambda: Optional ridge penalty. If None, uses self.ridge_lambda.
+
+        Returns:
+            Tensor of shape (num_words,) containing the word coefficients.
+        """
+        if ridge_lambda is None:
+            ridge_lambda = self.ridge_lambda
+
+        word_embeddings = word_embeddings.to(self.device).float()
+
+        if word_embeddings.ndim != 2:
+            raise ValueError(
+                "word_embeddings must have shape (num_words, input_dim)."
+            )
+        if word_embeddings.shape[1] != self.input_dim:
+            raise ValueError(
+                f"Expected word_embeddings.shape[1] == {self.input_dim}, but got "
+                f"{word_embeddings.shape[1]}."
+            )
+
+        beta = self.get_classifier_weight()  # (input_dim,)
+        num_words = word_embeddings.shape[0]
+
+        if ridge_lambda > 0:
+            gram = word_embeddings @ word_embeddings.T
+            rhs = word_embeddings @ beta
+            eye = torch.eye(num_words, device=self.device, dtype=word_embeddings.dtype)
+            coeffs = torch.linalg.solve(gram + ridge_lambda * eye, rhs)
+        else:
+            # Solve W^T c ≈ beta in least-squares sense.
+            coeffs = torch.linalg.lstsq(word_embeddings.T, beta).solution
+
+        return coeffs
+
+    def reconstruct_from_word_basis(
+        self,
+        word_embeddings: torch.Tensor,
+        word_coeffs: torch.Tensor,
+    ) -> torch.Tensor:
+        """Reconstructs classifier weights from word embeddings and coefficients."""
+        word_embeddings = word_embeddings.to(self.device).float()
+        word_coeffs = word_coeffs.to(self.device).float()
+
+        if word_embeddings.ndim != 2:
+            raise ValueError(
+                "word_embeddings must have shape (num_words, input_dim)."
+            )
+        if word_coeffs.ndim != 1:
+            raise ValueError("word_coeffs must have shape (num_words,).")
+        if word_embeddings.shape[0] != word_coeffs.shape[0]:
+            raise ValueError(
+                "word_embeddings and word_coeffs must agree on num_words."
+            )
+
+        return word_coeffs @ word_embeddings
+
+    def compute_word_basis_cosine_similarity(
+        self,
+        word_embeddings: torch.Tensor,
+        word_coeffs: torch.Tensor,
+    ) -> torch.Tensor:
+        """Computes cosine similarity between true and reconstructed weights."""
+        beta = self.get_classifier_weight()
+        beta_hat = self.reconstruct_from_word_basis(word_embeddings, word_coeffs)
+
+        cosine = nn.CosineSimilarity(dim=0)
+        return cosine(beta, beta_hat)
+
+    def explain_words(
+        self,
+        word_embeddings: torch.Tensor,
+        word_list: Sequence[str],
+        ridge_lambda: Optional[float] = None,
+        sort_by_abs: bool = True,
+    ) -> List[Tuple[str, float]]:
+        """Returns (word, coefficient) pairs for interpretation."""
+        coeffs = self.fit_word_basis(
+            word_embeddings=word_embeddings,
+            ridge_lambda=ridge_lambda,
+        )
+
+        if len(word_list) != coeffs.shape[0]:
+            raise ValueError(
+                f"word_list has length {len(word_list)}, but coeffs has length "
+                f"{coeffs.shape[0]}."
+            )
+
+        pairs = list(zip(word_list, coeffs.detach().cpu().tolist()))
+        if sort_by_abs:
+            pairs.sort(key=lambda x: abs(x[1]), reverse=True)
+        else:
+            pairs.sort(key=lambda x: x[1], reverse=True)
+        return pairs
\ No newline at end of file
diff --git a/tests/models/test_word_basis_linear_model.py b/tests/models/test_word_basis_linear_model.py
new file mode 100644
index 000000000..c4c2213d5
--- /dev/null
+++ b/tests/models/test_word_basis_linear_model.py
@@ -0,0 +1,178 @@
+import torch
+
+from pyhealth.datasets import create_sample_dataset
+from pyhealth.models import WordBasisLinearModel
+
+
+INPUT_DIM = 8
+FEATURE_KEY = "embedding"
+LABEL_KEY = "label"
+
+
+def make_test_dataset():
+    samples = [
+        {
+            "patient_id": "patient-0",
+            "visit_id": "visit-0",
+            "embedding": [0.1, 0.2, 0.3, 0.4, 0.0, 0.1, 0.2, 0.3],
+            "label": 1,
+        },
+        {
+            "patient_id": "patient-1",
+            "visit_id": "visit-1",
+            "embedding": [0.3, 0.1, 0.0, 0.5, 0.2, 0.1, 0.4, 0.2],
+            "label": 0,
+        },
+        {
+            "patient_id": "patient-2",
+            "visit_id": "visit-2",
+            "embedding": [0.0, 0.4, 0.2, 0.1, 0.6, 0.3, 0.2, 0.1],
+            "label": 1,
+        },
+    ]
+
+    dataset = create_sample_dataset(
+        samples=samples,
+        input_schema={FEATURE_KEY: "tensor"},
+        output_schema={LABEL_KEY: "binary"},
+        dataset_name="word_basis_linear_model_test",
+    )
+    return dataset
+
+
+def make_model():
+    dataset = make_test_dataset()
+    model = WordBasisLinearModel(
+        dataset=dataset,
+        input_dim=INPUT_DIM,
+        feature_key=FEATURE_KEY,
+        ridge_lambda=1e-4,
+    )
+    return model
+
+
+def make_batch():
+    x = torch.tensor(
+        [
+            [0.1, 0.2, 0.3, 0.4, 0.0, 0.1, 0.2, 0.3],
+            [0.3, 0.1, 0.0, 0.5, 0.2, 0.1, 0.4, 0.2],
+            [0.0, 0.4, 0.2, 0.1, 0.6, 0.3, 0.2, 0.1],
+        ],
+        dtype=torch.float32,
+    )
+    y = torch.tensor([1.0, 0.0, 1.0], dtype=torch.float32)
+    return x, y
+
+
+def make_word_embeddings():
+    # 6 words, each embedded in the same 8-dim space as the classifier weights
+    return torch.tensor(
+        [
+            [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+            [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+            [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+            [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0],
+            [0.2, 0.1, 0.3, 0.4, 0.5, 0.0, 0.1, 0.2],
+            [0.1, 0.3, 0.2, 0.0, 0.4, 0.5, 0.2, 0.1],
+        ],
+        dtype=torch.float32,
+    )
+
+
+def test_model_instantiation():
+    model = make_model()
+    assert isinstance(model, WordBasisLinearModel)
+    assert model.input_dim == INPUT_DIM
+    assert model.feature_key == FEATURE_KEY
+    assert model.label_key == LABEL_KEY
+    assert model.classifier.bias is None
+
+
+def test_forward_returns_expected_keys_and_shapes():
+    model = make_model()
+    x, y = make_batch()
+
+    output = model(**{FEATURE_KEY: x, LABEL_KEY: y})
+
+    assert "loss" in output
+    assert "y_prob" in output
+    assert "y_true" in output
+    assert "logit" in output
+
+    assert output["logit"].shape == (3, 1)
+    assert output["y_prob"].shape == (3, 1)
+    assert output["y_true"].shape == (3, 1)
+    assert output["loss"].ndim == 0
+
+
+def test_backward_computes_gradients():
+    model = make_model()
+    x, y = make_batch()
+
+    output = model(**{FEATURE_KEY: x, LABEL_KEY: y})
+    output["loss"].backward()
+
+    assert model.classifier.weight.grad is not None
+    assert model.classifier.weight.grad.shape == (1, INPUT_DIM)
+
+
+def test_forward_from_embedding_runs():
+    model = make_model()
+    x, y = make_batch()
+
+    output = model.forward_from_embedding(feature_embeddings=x, y=y)
+
+    assert "loss" in output
+    assert "logit" in output
+    assert output["logit"].shape == (3, 1)
+    assert output["y_prob"].shape == (3, 1)
+
+
+def test_get_classifier_weight_shape():
+    model = make_model()
+    beta = model.get_classifier_weight()
+    assert beta.shape == (INPUT_DIM,)
+
+
+def test_fit_word_basis_and_reconstruct_shapes():
+    model = make_model()
+    x, y = make_batch()
+    _ = model(**{FEATURE_KEY: x, LABEL_KEY: y})
+
+    word_embeddings = make_word_embeddings()
+    coeffs = model.fit_word_basis(word_embeddings)
+
+    assert coeffs.ndim == 1
+    assert coeffs.shape[0] == word_embeddings.shape[0]
+
+    beta_hat = model.reconstruct_from_word_basis(word_embeddings, coeffs)
+    assert beta_hat.shape == (INPUT_DIM,)
+
+
+def test_compute_word_basis_cosine_similarity_runs():
+    model = make_model()
+    x, y = make_batch()
+    _ = model(**{FEATURE_KEY: x, LABEL_KEY: y})
+
+    word_embeddings = make_word_embeddings()
+    coeffs = model.fit_word_basis(word_embeddings)
+    cosine = model.compute_word_basis_cosine_similarity(word_embeddings, coeffs)
+
+    assert cosine.ndim == 0
+    assert torch.isfinite(cosine)
+
+
+def test_explain_words_returns_pairs():
+    model = make_model()
+    x, y = make_batch()
+    _ = model(**{FEATURE_KEY: x, LABEL_KEY: y})
+
+    word_embeddings = make_word_embeddings()
+    word_list = ["word_a", "word_b", "word_c", "word_d", "word_e", "word_f"]
+
+    pairs = model.explain_words(word_embeddings, word_list)
+
+    assert isinstance(pairs, list)
+    assert len(pairs) == len(word_list)
+    assert isinstance(pairs[0][0], str)
+    assert isinstance(pairs[0][1], float)
\ No newline at end of file