From d629f335ff8236b57d4145633eaa05780035bcca Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sat, 18 Apr 2026 22:14:50 -0400 Subject: [PATCH] Unify ProgramStatistics / ProgrammeStatistics into one class Two byte-identical classes split only by British/American spelling (program_name vs programme_name). Collapsed into a single policyengine.outputs.ProgramStatistics; both country analysis helpers import it from there now. Saves ~106 LOC of duplication and removes an API-surface footgun for cross-country code. Changes: - Add policyengine/outputs/program_statistics.py with the unified class. - Re-export from policyengine/outputs/__init__.py. - Delete tax_benefit_models/us/outputs.py and tax_benefit_models/uk/outputs.py. - us/__init__.py and uk/__init__.py re-export from policyengine.outputs. - uk/analysis.py: rename programme_name -> program_name, programme_statistics -> program_statistics, programmes -> programs, programme_df/collection -> program_df/collection. Field on PolicyReformAnalysis also changes. Migration for callers: - from policyengine.tax_benefit_models.uk import ProgrammeStatistics -> from policyengine.outputs import ProgramStatistics - stats.programme_name -> stats.program_name 205 tests pass locally. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/policyengine/outputs/__init__.py | 2 + .../program_statistics.py} | 2 +- .../tax_benefit_models/uk/__init__.py | 6 +- .../tax_benefit_models/uk/analysis.py | 33 +++--- .../tax_benefit_models/uk/outputs.py | 105 ------------------ .../tax_benefit_models/us/__init__.py | 2 +- .../tax_benefit_models/us/analysis.py | 3 +- 7 files changed, 24 insertions(+), 129 deletions(-) rename src/policyengine/{tax_benefit_models/us/outputs.py => outputs/program_statistics.py} (98%) delete mode 100644 src/policyengine/tax_benefit_models/uk/outputs.py diff --git a/src/policyengine/outputs/__init__.py b/src/policyengine/outputs/__init__.py index 61311f46..13ff2a26 100644 --- a/src/policyengine/outputs/__init__.py +++ b/src/policyengine/outputs/__init__.py @@ -49,6 +49,7 @@ calculate_us_poverty_by_race, calculate_us_poverty_rates, ) +from policyengine.outputs.program_statistics import ProgramStatistics __all__ = [ "Output", @@ -59,6 +60,7 @@ "ChangeAggregateType", "DecileImpact", "calculate_decile_impacts", + "ProgramStatistics", "IntraDecileImpact", "compute_intra_decile_impacts", "Poverty", diff --git a/src/policyengine/tax_benefit_models/us/outputs.py b/src/policyengine/outputs/program_statistics.py similarity index 98% rename from src/policyengine/tax_benefit_models/us/outputs.py rename to src/policyengine/outputs/program_statistics.py index 1dd6f001..a48ff8a8 100644 --- a/src/policyengine/tax_benefit_models/us/outputs.py +++ b/src/policyengine/outputs/program_statistics.py @@ -1,4 +1,4 @@ -"""US-specific output templates.""" +"""Shared `ProgramStatistics` for reform-impact tables (US + UK).""" from typing import Optional diff --git a/src/policyengine/tax_benefit_models/uk/__init__.py b/src/policyengine/tax_benefit_models/uk/__init__.py index b8d65593..3ab098e2 100644 --- a/src/policyengine/tax_benefit_models/uk/__init__.py +++ b/src/policyengine/tax_benefit_models/uk/__init__.py @@ -15,6 +15,7 @@ if find_spec("policyengine_uk") is not None: from policyengine.core import Dataset + from policyengine.outputs import ProgramStatistics from .analysis import economic_impact_analysis from .datasets import ( @@ -31,7 +32,6 @@ managed_microsimulation, uk_latest, ) - from .outputs import ProgrammeStatistics model = uk_latest """The pinned UK ``TaxBenefitModelVersion`` for this policyengine release.""" @@ -40,7 +40,7 @@ UKYearData.model_rebuild() PolicyEngineUKDataset.model_rebuild() PolicyEngineUKLatest.model_rebuild() - ProgrammeStatistics.model_rebuild() + ProgramStatistics.model_rebuild() __all__ = [ "UKYearData", @@ -55,7 +55,7 @@ "uk_latest", "calculate_household", "economic_impact_analysis", - "ProgrammeStatistics", + "ProgramStatistics", ] else: __all__ = [] diff --git a/src/policyengine/tax_benefit_models/uk/analysis.py b/src/policyengine/tax_benefit_models/uk/analysis.py index 07d325e8..f37d18be 100644 --- a/src/policyengine/tax_benefit_models/uk/analysis.py +++ b/src/policyengine/tax_benefit_models/uk/analysis.py @@ -10,6 +10,7 @@ from pydantic import BaseModel from policyengine.core import OutputCollection, Simulation +from policyengine.outputs import ProgramStatistics from policyengine.outputs.decile_impact import ( DecileImpact, calculate_decile_impacts, @@ -23,14 +24,12 @@ calculate_uk_poverty_rates, ) -from .outputs import ProgrammeStatistics - class PolicyReformAnalysis(BaseModel): """Complete policy reform analysis result.""" decile_impacts: OutputCollection[DecileImpact] - programme_statistics: OutputCollection[ProgrammeStatistics] + program_statistics: OutputCollection[ProgramStatistics] baseline_poverty: OutputCollection[Poverty] reform_poverty: OutputCollection[Poverty] baseline_inequality: Inequality @@ -57,7 +56,7 @@ def economic_impact_analysis( reform_simulation=reform_simulation, ) - programmes = { + programs = { "income_tax": {"is_tax": True}, "national_insurance": {"is_tax": True}, "vat": {"is_tax": True}, @@ -70,27 +69,27 @@ def economic_impact_analysis( "child_tax_credit": {"is_tax": False}, } - programme_statistics = [] - for programme_name, programme_info in programmes.items(): + program_statistics = [] + for program_name, program_info in programs.items(): entity = baseline_simulation.tax_benefit_model_version.get_variable( - programme_name + program_name ).entity - stats = ProgrammeStatistics( + stats = ProgramStatistics( baseline_simulation=baseline_simulation, reform_simulation=reform_simulation, - programme_name=programme_name, + program_name=program_name, entity=entity, - is_tax=programme_info["is_tax"], + is_tax=program_info["is_tax"], ) stats.run() - programme_statistics.append(stats) + program_statistics.append(stats) - programme_df = pd.DataFrame( + program_df = pd.DataFrame( [ { "baseline_simulation_id": p.baseline_simulation.id, "reform_simulation_id": p.reform_simulation.id, - "programme_name": p.programme_name, + "program_name": p.program_name, "entity": p.entity, "is_tax": p.is_tax, "baseline_total": p.baseline_total, @@ -101,11 +100,11 @@ def economic_impact_analysis( "winners": p.winners, "losers": p.losers, } - for p in programme_statistics + for p in program_statistics ] ) - programme_collection = OutputCollection( - outputs=programme_statistics, dataframe=programme_df + program_collection = OutputCollection( + outputs=program_statistics, dataframe=program_df ) baseline_poverty = calculate_uk_poverty_rates(baseline_simulation) @@ -115,7 +114,7 @@ def economic_impact_analysis( return PolicyReformAnalysis( decile_impacts=decile_impacts, - programme_statistics=programme_collection, + program_statistics=program_collection, baseline_poverty=baseline_poverty, reform_poverty=reform_poverty, baseline_inequality=baseline_inequality, diff --git a/src/policyengine/tax_benefit_models/uk/outputs.py b/src/policyengine/tax_benefit_models/uk/outputs.py deleted file mode 100644 index 97032a9c..00000000 --- a/src/policyengine/tax_benefit_models/uk/outputs.py +++ /dev/null @@ -1,105 +0,0 @@ -"""UK-specific output templates.""" - -from typing import Optional - -from pydantic import ConfigDict - -from policyengine.core import Output, Simulation -from policyengine.outputs.aggregate import Aggregate, AggregateType -from policyengine.outputs.change_aggregate import ( - ChangeAggregate, - ChangeAggregateType, -) - - -class ProgrammeStatistics(Output): - """Single programme's statistics from a policy reform - represents one database row.""" - - model_config = ConfigDict(arbitrary_types_allowed=True) - - baseline_simulation: Simulation - reform_simulation: Simulation - programme_name: str - entity: str - is_tax: bool = False - - # Results populated by run() - baseline_total: Optional[float] = None - reform_total: Optional[float] = None - change: Optional[float] = None - baseline_count: Optional[float] = None - reform_count: Optional[float] = None - winners: Optional[float] = None - losers: Optional[float] = None - - def run(self): - """Calculate statistics for this programme.""" - # Baseline totals - baseline_total = Aggregate( - simulation=self.baseline_simulation, - variable=self.programme_name, - aggregate_type=AggregateType.SUM, - entity=self.entity, - ) - baseline_total.run() - - # Reform totals - reform_total = Aggregate( - simulation=self.reform_simulation, - variable=self.programme_name, - aggregate_type=AggregateType.SUM, - entity=self.entity, - ) - reform_total.run() - - # Count of recipients/payers (baseline) - baseline_count = Aggregate( - simulation=self.baseline_simulation, - variable=self.programme_name, - aggregate_type=AggregateType.COUNT, - entity=self.entity, - filter_variable=self.programme_name, - filter_variable_geq=0.01, - ) - baseline_count.run() - - # Count of recipients/payers (reform) - reform_count = Aggregate( - simulation=self.reform_simulation, - variable=self.programme_name, - aggregate_type=AggregateType.COUNT, - entity=self.entity, - filter_variable=self.programme_name, - filter_variable_geq=0.01, - ) - reform_count.run() - - # Winners and losers - winners = ChangeAggregate( - baseline_simulation=self.baseline_simulation, - reform_simulation=self.reform_simulation, - variable=self.programme_name, - aggregate_type=ChangeAggregateType.COUNT, - entity=self.entity, - change_geq=0.01 if not self.is_tax else -0.01, - ) - winners.run() - - losers = ChangeAggregate( - baseline_simulation=self.baseline_simulation, - reform_simulation=self.reform_simulation, - variable=self.programme_name, - aggregate_type=ChangeAggregateType.COUNT, - entity=self.entity, - change_leq=-0.01 if not self.is_tax else 0.01, - ) - losers.run() - - # Populate results - self.baseline_total = float(baseline_total.result) - self.reform_total = float(reform_total.result) - self.change = float(reform_total.result - baseline_total.result) - self.baseline_count = float(baseline_count.result) - self.reform_count = float(reform_count.result) - self.winners = float(winners.result) - self.losers = float(losers.result) diff --git a/src/policyengine/tax_benefit_models/us/__init__.py b/src/policyengine/tax_benefit_models/us/__init__.py index b6af56b0..d49d46d4 100644 --- a/src/policyengine/tax_benefit_models/us/__init__.py +++ b/src/policyengine/tax_benefit_models/us/__init__.py @@ -28,6 +28,7 @@ if find_spec("policyengine_us") is not None: from policyengine.core import Dataset + from policyengine.outputs import ProgramStatistics from .analysis import economic_impact_analysis from .datasets import ( @@ -44,7 +45,6 @@ managed_microsimulation, us_latest, ) - from .outputs import ProgramStatistics model = us_latest """The pinned US ``TaxBenefitModelVersion`` for this policyengine release.""" diff --git a/src/policyengine/tax_benefit_models/us/analysis.py b/src/policyengine/tax_benefit_models/us/analysis.py index b27ef4bb..8b3eefc8 100644 --- a/src/policyengine/tax_benefit_models/us/analysis.py +++ b/src/policyengine/tax_benefit_models/us/analysis.py @@ -12,6 +12,7 @@ from pydantic import BaseModel from policyengine.core import OutputCollection, Simulation +from policyengine.outputs import ProgramStatistics from policyengine.outputs.decile_impact import ( DecileImpact, calculate_decile_impacts, @@ -26,8 +27,6 @@ calculate_us_poverty_rates, ) -from .outputs import ProgramStatistics - class PolicyReformAnalysis(BaseModel): """Complete policy reform analysis result."""