PolicyEngine · MaxGhenis · Apr 18, 2026 · Apr 18, 2026 · Apr 19, 2026 · Apr 19, 2026
diff --git a/changelog.d/pre-launch-cleanup.removed.md b/changelog.d/pre-launch-cleanup.removed.md
@@ -0,0 +1,6 @@
+Pre-launch cleanup — remove dead code and drop `plotly` from the core dependency set:
+
+- Delete `policyengine.tax_benefit_models.us` and `policyengine.tax_benefit_models.uk` module shims. Python resolves the package directory first, so the `.py` shims were always shadowed; worse, both attempted to re-export `general_policy_reform_analysis` which is not defined anywhere, making `from policyengine.tax_benefit_models.us import general_policy_reform_analysis` raise `ImportError` at runtime.
+- Delete `_create_entity_output_model` plus the `PersonOutput` / `BenunitOutput` / `HouseholdEntityOutput` factory products in `policyengine.tax_benefit_models.uk.analysis` — built via `pydantic.create_model` but never referenced anywhere in the codebase.
+- Delete `policyengine.core.DatasetVersion` (only consumer was an `Optional` field on `Dataset` that was never set, and the `policyengine.core` re-export).
+- Move `plotly>=5.0.0` from the base install to a new `policyengine[plotting]` extra. Only `policyengine.utils.plotting` uses it, and that module is itself only used by the `examples/` scripts. The package now imports cleanly without `plotly`.
diff --git a/changelog.d/v4-drop-filter-fields.removed.md b/changelog.d/v4-drop-filter-fields.removed.md
@@ -0,0 +1,13 @@
+**BREAKING (v4):** Remove the legacy `filter_field` / `filter_value`
+fields from `Simulation` and `Region`, the `_auto_construct_strategy`
+model validator that rewrote them into a `RowFilterStrategy`, and the
+`_filter_dataset_by_household_variable` methods they fed on both
+country models. All scoping now flows through `scoping_strategy:
+Optional[ScopingStrategy]`. `Region.requires_filter` becomes a derived
+property (`True` iff `scoping_strategy is not None`). The sub-national
+region factories (`countries/us/regions.py`, `countries/uk/regions.py`)
+construct `scoping_strategy=RowFilterStrategy(...)` /
+`WeightReplacementStrategy(...)` directly. Callers that previously
+passed `filter_field="place_fips", filter_value="44000"` now pass
+`scoping_strategy=RowFilterStrategy(variable_name="place_fips",
+variable_value="44000")`.
diff --git a/changelog.d/v4-facade.added.md b/changelog.d/v4-facade.added.md
@@ -0,0 +1,47 @@
+**BREAKING (v4):** Collapse the household-calculator surface into a
+single agent-friendly entry point, ``pe.us.calculate_household`` /
+``pe.uk.calculate_household``.
+
+New public API:
+
+- ``policyengine/__init__.py`` populated with canonical accessors:
+  ``pe.us``, ``pe.uk``, ``pe.Simulation`` (replacing the empty top-level
+  module). ``import policyengine as pe`` now gives you everything a
+  new coding session needs to reach in one line.
+- ``pe.us.calculate_household(**kwargs)`` and ``pe.uk.calculate_household``
+  take flat keyword arguments (``people``, per-entity overrides,
+  ``year``, ``reform``, ``extra_variables``) instead of a pydantic
+  input wrapper.
+- ``reform=`` accepts a plain dict: ``{parameter_path: value}`` or
+  ``{parameter_path: {effective_date: value}}``. Compiles internally.
+- Returns :class:`HouseholdResult` (new) with dot-access:
+  ``result.tax_unit.income_tax``, ``result.household.household_net_income``,
+  ``result.person[0].age``. Singleton entities are
+  :class:`EntityResult`; ``person`` is a list of them. ``to_dict()``
+  and ``write(path)`` serialize to JSON.
+- ``extra_variables=[...]`` is now a flat list; the library dispatches
+  each name to its entity by looking it up on the model.
+- Unknown variable names (in ``people``, entity overrides, or
+  ``extra_variables``) raise ``ValueError`` with a ``difflib`` close-match
+  suggestion and a paste-able fix hint.
+- Unknown dot-access on a result raises ``AttributeError`` with the
+  list of available variables plus the ``extra_variables=[...]`` call
+  that would surface the requested one.
+
+Removed (v4 breaking):
+
+- ``USHouseholdInput`` / ``UKHouseholdInput`` / ``USHouseholdOutput`` /
+  ``UKHouseholdOutput`` pydantic wrappers.
+- ``calculate_household_impact`` — the name was misleading (it
+  returned levels, not an impact vs. baseline). Reserved for a future
+  delta function.
+- The bare ``us_model`` / ``uk_model`` label-only singletons; each
+  country module now exposes ``.model`` pointing at the real
+  ``TaxBenefitModelVersion`` (kept ``us_latest`` / ``uk_latest``
+  aliases for compatibility with any in-flight downstream code).
+
+New internal module:
+
+- ``policyengine.tax_benefit_models.common`` — ``compile_reform``,
+  ``dispatch_extra_variables``, ``EntityResult``, ``HouseholdResult``
+  shared by both country implementations.
diff --git a/pyproject.toml b/pyproject.toml
@@ -24,7 +24,6 @@ dependencies = [
     "pydantic>=2.0.0",
     "pandas>=2.0.0",
     "microdf_python>=1.2.1",
-    "plotly>=5.0.0",
     "requests>=2.31.0",
     "psutil>=5.9.0",
     "packaging>=23.0",
@@ -34,6 +33,9 @@ dependencies = [
 policyengine = "policyengine.cli:main"
 
 [project.optional-dependencies]
+plotting = [
+    "plotly>=5.0.0",
+]
 uk = [
     "policyengine_core>=3.25.0",
     "policyengine-uk==2.88.0",
@@ -51,6 +53,7 @@ dev = [
     "itables",
     "build",
     "jsonschema>=4.0.0",
+    "plotly>=5.0.0",
     "pytest-asyncio>=0.26.0",
     "ruff>=0.9.0",
     "policyengine_core>=3.25.0",

diff --git a/src/policyengine/__init__.py b/src/policyengine/__init__.py
@@ -0,0 +1,45 @@
+"""PolicyEngine — one Python API for tax and benefit policy.
+
+Canonical entry points for a fresh coding session:
+
+.. code-block:: python
+
+    import policyengine as pe
+
+    # Single-household calculator (US).
+    result = pe.us.calculate_household(
+        people=[{"age": 35, "employment_income": 60000}],
+        tax_unit={"filing_status": "SINGLE"},
+        year=2026,
+        reform={"gov.irs.credits.ctc.amount.adult_dependent": 1000},
+    )
+    print(result.tax_unit.income_tax, result.household.household_net_income)
+
+    # UK:
+    uk_result = pe.uk.calculate_household(
+        people=[{"age": 30, "employment_income": 50000}],
+        year=2026,
+    )
+
+    # Lower-level microsimulation building blocks.
+    from policyengine import Simulation  # or: pe.Simulation
+
+Each country module exposes ``calculate_household``, ``model``
+(the pinned ``TaxBenefitModelVersion``), and the microsim helpers.
+"""
+
+from importlib.util import find_spec
+
+from policyengine.core import Simulation as Simulation
+
+if find_spec("policyengine_us") is not None:
+    from policyengine.tax_benefit_models import us as us
+else:  # pragma: no cover
+    us = None  # type: ignore[assignment]
+
+if find_spec("policyengine_uk") is not None:
+    from policyengine.tax_benefit_models import uk as uk
+else:  # pragma: no cover
+    uk = None  # type: ignore[assignment]
+
+__all__ = ["Simulation", "uk", "us"]
diff --git a/src/policyengine/core/__init__.py b/src/policyengine/core/__init__.py
@@ -1,7 +1,6 @@
 from .dataset import Dataset
 from .dataset import YearData as YearData
 from .dataset import map_to_entity as map_to_entity
-from .dataset_version import DatasetVersion as DatasetVersion
 from .dynamic import Dynamic as Dynamic
 from .output import Output as Output
 from .output import OutputCollection as OutputCollection

diff --git a/src/policyengine/core/dataset.py b/src/policyengine/core/dataset.py
@@ -6,7 +6,6 @@
 from microdf import MicroDataFrame
 from pydantic import BaseModel, ConfigDict, Field
 
-from .dataset_version import DatasetVersion
 from .tax_benefit_model import TaxBenefitModel
 
 
@@ -85,7 +84,6 @@ class MyDataset(Dataset):
     id: str = Field(default_factory=lambda: str(uuid4()))
     name: str
     description: str
-    dataset_version: Optional[DatasetVersion] = None
     filepath: str
     is_output_dataset: bool = False
     tax_benefit_model: Optional[TaxBenefitModel] = None

diff --git a/src/policyengine/core/dataset_version.py b/src/policyengine/core/dataset_version.py
diff --git a/src/policyengine/core/region.py b/src/policyengine/core/region.py
@@ -3,7 +3,8 @@
 This module provides the Region and RegionRegistry classes for defining
 geographic regions that a tax-benefit model supports. Regions can have:
 1. A dedicated dataset (e.g., US states, congressional districts)
-2. Filter from a parent region's dataset (e.g., US places/cities, UK countries)
+2. A scoping strategy that derives the region from a parent dataset
+   (row filter or weight replacement)
 """
 
 from typing import Literal, Optional, Union
@@ -22,8 +23,9 @@ class Region(BaseModel):
     """Geographic region for tax-benefit simulations.
 
     Regions can either have:
-    1. A dedicated dataset (dataset_path is set, requires_filter is False)
-    2. Filter from a parent region's dataset (requires_filter is True)
+    1. A dedicated dataset (``dataset_path`` is set).
+    2. A scoping strategy that derives the region from a parent dataset
+       (``scoping_strategy`` is set).
 
     The unique identifier is the code field, which uses a prefixed format:
     - National: "us", "uk"
@@ -57,25 +59,16 @@ class Region(BaseModel):
         description="GCS path to dedicated dataset (e.g., 'gs://policyengine-us-data/states/CA.h5')",
     )
 
-    # Scoping strategy (preferred over legacy filter fields)
+    # Scoping strategy for regions that derive from a parent dataset
     scoping_strategy: Optional[ScopingStrategy] = Field(
         default=None,
         description="Strategy for scoping dataset to this region (row filtering or weight replacement)",
     )
 
-    # Legacy filtering configuration (kept for backward compatibility)
-    requires_filter: bool = Field(
-        default=False,
-        description="True if this region filters from a parent dataset rather than having its own",
-    )
-    filter_field: Optional[str] = Field(
-        default=None,
-        description="Dataset field to filter on (e.g., 'place_fips', 'country')",
-    )
-    filter_value: Optional[str] = Field(
-        default=None,
-        description="Value to match when filtering (defaults to code suffix if not set)",
-    )
+    @property
+    def requires_filter(self) -> bool:
+        """Whether this region needs a parent dataset + a scoping strategy."""
+        return self.scoping_strategy is not None
 
     # Metadata (primarily for US congressional districts)
     state_code: Optional[str] = Field(
@@ -180,24 +173,12 @@ def get_children(self, parent_code: str) -> list[Region]:
         return [r for r in self.regions if r.parent_code == parent_code]
 
     def get_dataset_regions(self) -> list[Region]:
-        """Get all regions that have dedicated datasets.
-
-        Returns:
-            List of regions with dataset_path set and requires_filter False
-        """
-        return [
-            r
-            for r in self.regions
-            if r.dataset_path is not None and not r.requires_filter
-        ]
+        """Get all regions that have a dedicated dataset on disk."""
+        return [r for r in self.regions if r.dataset_path is not None]
 
     def get_filter_regions(self) -> list[Region]:
-        """Get all regions that require filtering from parent datasets.
-
-        Returns:
-            List of regions with requires_filter True
-        """
-        return [r for r in self.regions if r.requires_filter]
+        """Get all regions that derive from a parent dataset via a scoping strategy."""
+        return [r for r in self.regions if r.scoping_strategy is not None]
 
     def __len__(self) -> int:
         """Return the number of regions in the registry."""

diff --git a/src/policyengine/core/simulation.py b/src/policyengine/core/simulation.py
@@ -3,13 +3,13 @@
 from typing import Optional
 from uuid import uuid4
 
-from pydantic import BaseModel, Field, model_validator
+from pydantic import BaseModel, Field
 
 from .cache import LRUCache
 from .dataset import Dataset
 from .dynamic import Dynamic
 from .policy import Policy
-from .scoping_strategy import RowFilterStrategy, ScopingStrategy
+from .scoping_strategy import ScopingStrategy
 from .tax_benefit_model_version import TaxBenefitModelVersion
 
 logger = logging.getLogger(__name__)
@@ -26,42 +26,22 @@ class Simulation(BaseModel):
     dynamic: Optional[Dynamic] = None
     dataset: Dataset = None
 
-    # Scoping strategy (preferred over legacy filter fields)
     scoping_strategy: Optional[ScopingStrategy] = Field(
         default=None,
         description="Strategy for scoping dataset to a sub-national region",
     )
 
-    # Legacy regional filtering parameters (kept for backward compatibility)
-    filter_field: Optional[str] = Field(
-        default=None,
-        description="Household-level variable to filter dataset by (e.g., 'place_fips', 'country')",
-    )
-    filter_value: Optional[str] = Field(
-        default=None,
-        description="Value to match when filtering (e.g., '44000', 'ENGLAND')",
+    extra_variables: dict[str, list[str]] = Field(
+        default_factory=dict,
+        description=(
+            "Additional variables to calculate beyond the model version's "
+            "default entity_variables, keyed by entity name. Use when a "
+            "caller needs variables that are not in the bundled default set."
+        ),
     )
 
     tax_benefit_model_version: TaxBenefitModelVersion = None
 
-    @model_validator(mode="after")
-    def _auto_construct_strategy(self) -> "Simulation":
-        """Auto-construct a RowFilterStrategy from legacy filter fields.
-
-        If filter_field and filter_value are set but scoping_strategy is not,
-        create a RowFilterStrategy for backward compatibility.
-        """
-        if (
-            self.scoping_strategy is None
-            and self.filter_field is not None
-            and self.filter_value is not None
-        ):
-            self.scoping_strategy = RowFilterStrategy(
-                variable_name=self.filter_field,
-                variable_value=self.filter_value,
-            )
-        return self
-
     output_dataset: Optional[Dataset] = None
 
     def run(self):

diff --git a/src/policyengine/countries/uk/regions.py b/src/policyengine/countries/uk/regions.py
@@ -140,9 +140,6 @@ def build_uk_region_registry(
                 label=name,
                 region_type="country",
                 parent_code="uk",
-                requires_filter=True,
-                filter_field="country",
-                filter_value=code.upper(),
                 scoping_strategy=RowFilterStrategy(
                     variable_name="country",
                     variable_value=code.upper(),
@@ -161,9 +158,6 @@ def build_uk_region_registry(
                     label=const["name"],
                     region_type="constituency",
                     parent_code="uk",
-                    requires_filter=True,
-                    filter_field="household_weight",
-                    filter_value=const["code"],
                     scoping_strategy=WeightReplacementStrategy(
                         weight_matrix_bucket="policyengine-uk-data-private",
                         weight_matrix_key="parliamentary_constituency_weights.h5",
@@ -185,9 +179,6 @@ def build_uk_region_registry(
                     label=la["name"],
                     region_type="local_authority",
                     parent_code="uk",
-                    requires_filter=True,
-                    filter_field="household_weight",
-                    filter_value=la["code"],
                     scoping_strategy=WeightReplacementStrategy(
                         weight_matrix_bucket="policyengine-uk-data-private",
                         weight_matrix_key="local_authority_weights.h5",

diff --git a/src/policyengine/countries/us/regions.py b/src/policyengine/countries/us/regions.py
@@ -101,9 +101,6 @@ def build_us_region_registry() -> RegionRegistry:
                 label=place["name"],
                 region_type="place",
                 parent_code=f"state/{state_abbrev.lower()}",
-                requires_filter=True,
-                filter_field="place_fips",
-                filter_value=fips,
                 state_code=state_abbrev,
                 state_name=place["state_name"],
                 scoping_strategy=RowFilterStrategy(

diff --git a/src/policyengine/tax_benefit_models/common/__init__.py b/src/policyengine/tax_benefit_models/common/__init__.py
@@ -0,0 +1,11 @@
+"""Country-agnostic helpers for household calculation and reform analysis.
+
+The country modules (:mod:`policyengine.tax_benefit_models.us`,
+:mod:`policyengine.tax_benefit_models.uk`) thread these helpers through
+their public ``calculate_household`` / ``analyze_reform`` entry points.
+"""
+
+from .extra_variables import dispatch_extra_variables as dispatch_extra_variables
+from .reform import compile_reform as compile_reform
+from .result import EntityResult as EntityResult
+from .result import HouseholdResult as HouseholdResult