diff --git a/us/states/sc/data_exploration.ipynb b/us/states/sc/data_exploration.ipynb new file mode 100644 index 0000000..e6978a6 --- /dev/null +++ b/us/states/sc/data_exploration.ipynb @@ -0,0 +1,479 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# SC Dataset Exploration\n", + "\n", + "This notebook explores the South Carolina (SC) dataset to understand household counts, income distribution, and demographic characteristics." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from policyengine_us import Microsimulation\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "SC_DATASET = \"hf://policyengine/policyengine-us-data/states/SC.h5\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Load SC dataset\n", + "sim = Microsimulation(dataset=SC_DATASET)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of households in dataset: 35,324\n", + "Household count (weighted): 1,887,388\n", + "Person count (weighted): 5,451,832\n" + ] + } + ], + "source": [ + "# Check dataset size - use .values to get raw arrays (avoid MicroSeries auto-weighting)\n", + "household_weight = sim.calculate(\"household_weight\", period=2025).values\n", + "household_count = sim.calculate(\"household_count\", period=2025, map_to=\"household\").values\n", + "person_count = sim.calculate(\"person_count\", period=2025, map_to=\"household\").values\n", + "\n", + "# Weighted sums using raw arrays\n", + "weighted_household_count = (household_count * household_weight).sum()\n", + "weighted_person_count = (person_count * household_weight).sum()\n", + "\n", + "print(f\"Number of households in dataset: {len(household_weight):,}\")\n", + "print(f\"Household count (weighted): {weighted_household_count:,.0f}\")\n", + "print(f\"Person count (weighted): {weighted_person_count:,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "============================================================\n", + "INCOME DISTRIBUTION SUMMARY\n", + "============================================================\n", + "\n", + "Household AGI:\n", + " Unweighted median: $41,884\n", + " Weighted median: $43,222\n", + " Weighted average: $103,858\n", + "\n", + "Person AGI:\n", + " Unweighted median: $40,216\n", + " Weighted median: $38,962\n", + " Weighted average: $93,926\n", + "\n", + "Average household size: 2.9\n", + "\n", + "Weighted household AGI percentiles:\n", + " 25th percentile: $9,425\n", + " 50th percentile: $43,222\n", + " 75th percentile: $91,877\n", + " 90th percentile: $167,068\n", + " 95th percentile: $268,311\n", + " Max AGI: $6,430,892\n" + ] + } + ], + "source": [ + "# Check income distribution (weighted vs unweighted, household and person level)\n", + "# Use .values to get raw numpy arrays (avoid MicroSeries auto-weighting)\n", + "agi_hh_array = sim.calculate(\"adjusted_gross_income\", period=2025, map_to=\"household\").values\n", + "hh_weights = sim.calculate(\"household_weight\", period=2025).values\n", + "\n", + "agi_person_array = sim.calculate(\"adjusted_gross_income\", period=2025, map_to=\"person\").values\n", + "person_weights = sim.calculate(\"person_weight\", period=2025).values\n", + "\n", + "# Weighted percentile calculation\n", + "def weighted_percentile(values, weights, percentile):\n", + " sorted_indices = np.argsort(values)\n", + " sorted_values = values[sorted_indices]\n", + " sorted_weights = weights[sorted_indices]\n", + " cumulative_weight = np.cumsum(sorted_weights)\n", + " idx = np.searchsorted(cumulative_weight, cumulative_weight[-1] * percentile / 100)\n", + " return sorted_values[min(idx, len(sorted_values)-1)]\n", + "\n", + "# Unweighted medians\n", + "unweighted_median_hh = np.median(agi_hh_array)\n", + "unweighted_median_person = np.median(agi_person_array)\n", + "\n", + "# Weighted medians\n", + "weighted_median_hh = weighted_percentile(agi_hh_array, hh_weights, 50)\n", + "weighted_median_person = weighted_percentile(agi_person_array, person_weights, 50)\n", + "\n", + "# Weighted averages\n", + "weighted_avg_hh = np.average(agi_hh_array, weights=hh_weights)\n", + "weighted_avg_person = np.average(agi_person_array, weights=person_weights)\n", + "\n", + "# Average household size\n", + "total_persons = person_weights.sum()\n", + "total_households = hh_weights.sum()\n", + "avg_hh_size = total_persons / total_households\n", + "\n", + "print(\"=\" * 60)\n", + "print(\"INCOME DISTRIBUTION SUMMARY\")\n", + "print(\"=\" * 60)\n", + "print(f\"\\nHousehold AGI:\")\n", + "print(f\" Unweighted median: ${unweighted_median_hh:,.0f}\")\n", + "print(f\" Weighted median: ${weighted_median_hh:,.0f}\")\n", + "print(f\" Weighted average: ${weighted_avg_hh:,.0f}\")\n", + "\n", + "print(f\"\\nPerson AGI:\")\n", + "print(f\" Unweighted median: ${unweighted_median_person:,.0f}\")\n", + "print(f\" Weighted median: ${weighted_median_person:,.0f}\")\n", + "print(f\" Weighted average: ${weighted_avg_person:,.0f}\")\n", + "\n", + "print(f\"\\nAverage household size: {avg_hh_size:.1f}\")\n", + "\n", + "print(f\"\\nWeighted household AGI percentiles:\")\n", + "print(f\" 25th percentile: ${weighted_percentile(agi_hh_array, hh_weights, 25):,.0f}\")\n", + "print(f\" 50th percentile: ${weighted_percentile(agi_hh_array, hh_weights, 50):,.0f}\")\n", + "print(f\" 75th percentile: ${weighted_percentile(agi_hh_array, hh_weights, 75):,.0f}\")\n", + "print(f\" 90th percentile: ${weighted_percentile(agi_hh_array, hh_weights, 90):,.0f}\")\n", + "print(f\" 95th percentile: ${weighted_percentile(agi_hh_array, hh_weights, 95):,.0f}\")\n", + "print(f\" Max AGI: ${agi_hh_array.max():,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Households with children (weighted):\n", + " Total households with children: 598,564\n", + " Households with 1 child: 247,956\n", + " Households with 2 children: 190,545\n", + " Households with 3+ children: 160,063\n" + ] + } + ], + "source": [ + "# Check households with children - use .values for raw arrays\n", + "is_child = sim.calculate(\"is_child\", period=2025, map_to=\"person\").values\n", + "household_id = sim.calculate(\"household_id\", period=2025, map_to=\"person\").values\n", + "household_weight_person = sim.calculate(\"household_weight\", period=2025, map_to=\"person\").values\n", + "\n", + "# Create DataFrame\n", + "df_households = pd.DataFrame({\n", + " 'household_id': household_id,\n", + " 'is_child': is_child,\n", + " 'household_weight': household_weight_person\n", + "})\n", + "\n", + "# Count children per household\n", + "children_per_household = df_households.groupby('household_id').agg({\n", + " 'is_child': 'sum',\n", + " 'household_weight': 'first'\n", + "}).reset_index()\n", + "\n", + "# Calculate weighted household counts\n", + "total_households_with_children = children_per_household[children_per_household['is_child'] > 0]['household_weight'].sum()\n", + "households_with_1_child = children_per_household[children_per_household['is_child'] == 1]['household_weight'].sum()\n", + "households_with_2_children = children_per_household[children_per_household['is_child'] == 2]['household_weight'].sum()\n", + "households_with_3plus_children = children_per_household[children_per_household['is_child'] >= 3]['household_weight'].sum()\n", + "\n", + "print(f\"\\nHouseholds with children (weighted):\")\n", + "print(f\" Total households with children: {total_households_with_children:,.0f}\")\n", + "print(f\" Households with 1 child: {households_with_1_child:,.0f}\")\n", + "print(f\" Households with 2 children: {households_with_2_children:,.0f}\")\n", + "print(f\" Households with 3+ children: {households_with_3plus_children:,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Children by age:\n", + " Total children under 18: 1,198,147\n", + " Children under 6: 349,101\n", + " Children under 3: 169,412\n" + ] + } + ], + "source": [ + "# Check children by age groups - use .values for raw arrays\n", + "df = pd.DataFrame({\n", + " \"household_id\": sim.calculate(\"household_id\", map_to=\"person\").values,\n", + " \"tax_unit_id\": sim.calculate(\"tax_unit_id\", map_to=\"person\").values,\n", + " \"person_id\": sim.calculate(\"person_id\", map_to=\"person\").values,\n", + " \"age\": sim.calculate(\"age\", map_to=\"person\").values,\n", + " \"person_weight\": sim.calculate(\"person_weight\", map_to=\"person\").values\n", + "})\n", + "\n", + "# Filter for children and apply weights\n", + "children_under_18_df = df[df['age'] < 18]\n", + "children_under_6_df = df[df['age'] < 6]\n", + "children_under_3_df = df[df['age'] < 3]\n", + "\n", + "# Calculate weighted totals\n", + "total_children = children_under_18_df['person_weight'].sum()\n", + "children_under_6 = children_under_6_df['person_weight'].sum()\n", + "children_under_3 = children_under_3_df['person_weight'].sum()\n", + "\n", + "print(f\"\\nChildren by age:\")\n", + "print(f\" Total children under 18: {total_children:,.0f}\")\n", + "print(f\" Children under 6: {children_under_6:,.0f}\")\n", + "print(f\" Children under 3: {children_under_3:,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "=================================================================\n", + "SC DATASET SUMMARY - WEIGHTED (Population Estimates)\n", + "=================================================================\n", + " Metric Value\n", + " Household count (weighted) 1,887,388\n", + " Person count (weighted) 5,451,832\n", + " Average household size 2.9\n", + " Weighted median household AGI $43,222\n", + " Weighted average household AGI $103,858\n", + " Weighted median person AGI $38,962\n", + " Weighted average person AGI $93,926\n", + "Unweighted median household AGI $41,884\n", + " Unweighted median person AGI $40,216\n", + " 25th percentile household AGI $9,425\n", + " 75th percentile household AGI $91,877\n", + " 90th percentile household AGI $167,068\n", + " 95th percentile household AGI $268,311\n", + " Max household AGI $6,430,892\n", + " Total households with children 598,564\n", + " Households with 1 child 247,956\n", + " Households with 2 children 190,545\n", + " Households with 3+ children 160,063\n", + " Total children under 18 1,198,147\n", + " Children under 6 349,101\n", + " Children under 3 169,412\n", + "=================================================================\n", + "\n", + "Summary saved to: sc_dataset_summary_weighted.csv\n" + ] + } + ], + "source": [ + "# Create comprehensive summary table\n", + "summary_data = {\n", + " 'Metric': [\n", + " 'Household count (weighted)',\n", + " 'Person count (weighted)',\n", + " 'Average household size',\n", + " 'Weighted median household AGI',\n", + " 'Weighted average household AGI',\n", + " 'Weighted median person AGI',\n", + " 'Weighted average person AGI',\n", + " 'Unweighted median household AGI',\n", + " 'Unweighted median person AGI',\n", + " '25th percentile household AGI',\n", + " '75th percentile household AGI',\n", + " '90th percentile household AGI',\n", + " '95th percentile household AGI',\n", + " 'Max household AGI',\n", + " 'Total households with children',\n", + " 'Households with 1 child',\n", + " 'Households with 2 children',\n", + " 'Households with 3+ children',\n", + " 'Total children under 18',\n", + " 'Children under 6',\n", + " 'Children under 3'\n", + " ],\n", + " 'Value': [\n", + " f\"{weighted_household_count:,.0f}\",\n", + " f\"{weighted_person_count:,.0f}\",\n", + " f\"{avg_hh_size:.1f}\",\n", + " f\"${weighted_median_hh:,.0f}\",\n", + " f\"${weighted_avg_hh:,.0f}\",\n", + " f\"${weighted_median_person:,.0f}\",\n", + " f\"${weighted_avg_person:,.0f}\",\n", + " f\"${unweighted_median_hh:,.0f}\",\n", + " f\"${unweighted_median_person:,.0f}\",\n", + " f\"${weighted_percentile(agi_hh_array, hh_weights, 25):,.0f}\",\n", + " f\"${weighted_percentile(agi_hh_array, hh_weights, 75):,.0f}\",\n", + " f\"${weighted_percentile(agi_hh_array, hh_weights, 90):,.0f}\",\n", + " f\"${weighted_percentile(agi_hh_array, hh_weights, 95):,.0f}\",\n", + " f\"${agi_hh_array.max():,.0f}\",\n", + " f\"{total_households_with_children:,.0f}\",\n", + " f\"{households_with_1_child:,.0f}\",\n", + " f\"{households_with_2_children:,.0f}\",\n", + " f\"{households_with_3plus_children:,.0f}\",\n", + " f\"{total_children:,.0f}\",\n", + " f\"{children_under_6:,.0f}\",\n", + " f\"{children_under_3:,.0f}\"\n", + " ]\n", + "}\n", + "\n", + "summary_df = pd.DataFrame(summary_data)\n", + "\n", + "print(\"\\n\" + \"=\"*65)\n", + "print(\"SC DATASET SUMMARY - WEIGHTED (Population Estimates)\")\n", + "print(\"=\"*65)\n", + "print(summary_df.to_string(index=False))\n", + "print(\"=\"*65)\n", + "\n", + "# Save table\n", + "summary_df.to_csv('sc_dataset_summary_weighted.csv', index=False)\n", + "print(\"\\nSummary saved to: sc_dataset_summary_weighted.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "======================================================================\n", + "HOUSEHOLDS WITH $0 INCOME\n", + "======================================================================\n", + "Household count: 179,119\n", + "Percentage of all households: 9.49%\n", + "======================================================================\n" + ] + } + ], + "source": [ + "# Households with $0 income - using raw arrays\n", + "agi_hh = sim.calculate(\"adjusted_gross_income\", period=2025, map_to=\"household\").values\n", + "weights = sim.calculate(\"household_weight\", period=2025).values\n", + "\n", + "zero_income_mask = agi_hh == 0\n", + "zero_income_count = weights[zero_income_mask].sum()\n", + "total_households = weights.sum()\n", + "\n", + "print(\"\\n\" + \"=\"*70)\n", + "print(\"HOUSEHOLDS WITH $0 INCOME\")\n", + "print(\"=\"*70)\n", + "print(f\"Household count: {zero_income_count:,.0f}\")\n", + "print(f\"Percentage of all households: {zero_income_count / total_households * 100:.2f}%\")\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "======================================================================\n", + "HOUSEHOLD COUNTS BY INCOME BRACKET\n", + "======================================================================\n", + "Income Bracket Households % of All Households\n", + " $0-$10k 434,505 23.02%\n", + " $10k-$20k 155,370 8.23%\n", + " $20k-$30k 149,595 7.93%\n", + " $30k-$40k 115,365 6.11%\n", + " $40k-$50k 127,566 6.76%\n", + " $50k-$60k 110,405 5.85%\n", + "======================================================================\n", + "\n", + "Total households in $0-$60k range: 1,092,805\n", + "Percentage of all households in $0-$60k range: 57.90%\n" + ] + } + ], + "source": [ + "# Household counts by income brackets\n", + "income_brackets = [\n", + " (0, 10000, \"$0-$10k\"),\n", + " (10000, 20000, \"$10k-$20k\"),\n", + " (20000, 30000, \"$20k-$30k\"),\n", + " (30000, 40000, \"$30k-$40k\"),\n", + " (40000, 50000, \"$40k-$50k\"),\n", + " (50000, 60000, \"$50k-$60k\")\n", + "]\n", + "\n", + "bracket_data = []\n", + "for lower, upper, label in income_brackets:\n", + " mask = (agi_hh >= lower) & (agi_hh < upper)\n", + " count = weights[mask].sum()\n", + " pct_of_total = (count / total_households) * 100\n", + " \n", + " bracket_data.append({\n", + " \"Income Bracket\": label,\n", + " \"Households\": f\"{count:,.0f}\",\n", + " \"% of All Households\": f\"{pct_of_total:.2f}%\"\n", + " })\n", + "\n", + "income_df = pd.DataFrame(bracket_data)\n", + "\n", + "print(\"\\n\" + \"=\"*70)\n", + "print(\"HOUSEHOLD COUNTS BY INCOME BRACKET\")\n", + "print(\"=\"*70)\n", + "print(income_df.to_string(index=False))\n", + "print(\"=\"*70)\n", + "\n", + "# Total in $0-$60k range\n", + "total_in_range = sum([weights[(agi_hh >= lower) & (agi_hh < upper)].sum() for lower, upper, _ in income_brackets])\n", + "print(f\"\\nTotal households in $0-$60k range: {total_in_range:,.0f}\")\n", + "print(f\"Percentage of all households in $0-$60k range: {total_in_range / total_households * 100:.2f}%\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/us/states/sc/data_exploration_test.ipynb b/us/states/sc/data_exploration_test.ipynb new file mode 100644 index 0000000..459ab3e --- /dev/null +++ b/us/states/sc/data_exploration_test.ipynb @@ -0,0 +1,500 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "cell-0", + "metadata": {}, + "source": [ + "# SC Dataset Exploration (Test - March 2025)\n", + "\n", + "This notebook explores the South Carolina (SC) **test** dataset to understand household counts, income distribution, and demographic characteristics." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "cell-1", + "metadata": {}, + "outputs": [], + "source": [ + "from policyengine_us import Microsimulation\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "SC_DATASET = \"hf://policyengine/test/mar/SC.h5\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "cell-2", + "metadata": {}, + "outputs": [], + "source": [ + "# Load SC test dataset\n", + "sim = Microsimulation(dataset=SC_DATASET)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "cell-3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of households in dataset: 37,545\n", + "Household count (weighted): 1,921,642\n", + "Person count (weighted): 5,446,194\n" + ] + } + ], + "source": [ + "# Check dataset size - use .values to get raw arrays (avoid MicroSeries auto-weighting)\n", + "household_weight = sim.calculate(\"household_weight\", period=2025).values\n", + "household_count = sim.calculate(\"household_count\", period=2025, map_to=\"household\").values\n", + "person_count = sim.calculate(\"person_count\", period=2025, map_to=\"household\").values\n", + "\n", + "# Weighted sums using raw arrays\n", + "weighted_household_count = (household_count * household_weight).sum()\n", + "weighted_person_count = (person_count * household_weight).sum()\n", + "\n", + "print(f\"Number of households in dataset: {len(household_weight):,}\")\n", + "print(f\"Household count (weighted): {weighted_household_count:,.0f}\")\n", + "print(f\"Person count (weighted): {weighted_person_count:,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cell-4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "============================================================\n", + "INCOME DISTRIBUTION SUMMARY\n", + "============================================================\n", + "\n", + "Household AGI:\n", + " Unweighted median: $55,711\n", + " Weighted median: $68,193\n", + " Weighted average: $148,865\n", + "\n", + "Person AGI:\n", + " Unweighted median: $52,622\n", + " Weighted median: $52,025\n", + " Weighted average: $121,624\n", + "\n", + "Average household size: 2.8\n", + "\n", + "Weighted household AGI percentiles:\n", + " 25th percentile: $22,381\n", + " 50th percentile: $68,193\n", + " 75th percentile: $138,373\n", + " 90th percentile: $244,734\n", + " 95th percentile: $371,597\n", + " Max AGI: $2,379,085,312\n" + ] + } + ], + "source": [ + "# Check income distribution (weighted vs unweighted, household and person level)\n", + "# Use .values to get raw numpy arrays (avoid MicroSeries auto-weighting)\n", + "agi_hh_array = sim.calculate(\"adjusted_gross_income\", period=2025, map_to=\"household\").values\n", + "hh_weights = sim.calculate(\"household_weight\", period=2025).values\n", + "\n", + "agi_person_array = sim.calculate(\"adjusted_gross_income\", period=2025, map_to=\"person\").values\n", + "person_weights = sim.calculate(\"person_weight\", period=2025).values\n", + "\n", + "# Weighted percentile calculation\n", + "def weighted_percentile(values, weights, percentile):\n", + " sorted_indices = np.argsort(values)\n", + " sorted_values = values[sorted_indices]\n", + " sorted_weights = weights[sorted_indices]\n", + " cumulative_weight = np.cumsum(sorted_weights)\n", + " idx = np.searchsorted(cumulative_weight, cumulative_weight[-1] * percentile / 100)\n", + " return sorted_values[min(idx, len(sorted_values)-1)]\n", + "\n", + "# Unweighted medians\n", + "unweighted_median_hh = np.median(agi_hh_array)\n", + "unweighted_median_person = np.median(agi_person_array)\n", + "\n", + "# Weighted medians\n", + "weighted_median_hh = weighted_percentile(agi_hh_array, hh_weights, 50)\n", + "weighted_median_person = weighted_percentile(agi_person_array, person_weights, 50)\n", + "\n", + "# Weighted averages\n", + "weighted_avg_hh = np.average(agi_hh_array, weights=hh_weights)\n", + "weighted_avg_person = np.average(agi_person_array, weights=person_weights)\n", + "\n", + "# Average household size\n", + "total_persons = person_weights.sum()\n", + "total_households = hh_weights.sum()\n", + "avg_hh_size = total_persons / total_households\n", + "\n", + "print(\"=\" * 60)\n", + "print(\"INCOME DISTRIBUTION SUMMARY\")\n", + "print(\"=\" * 60)\n", + "print(f\"\\nHousehold AGI:\")\n", + "print(f\" Unweighted median: ${unweighted_median_hh:,.0f}\")\n", + "print(f\" Weighted median: ${weighted_median_hh:,.0f}\")\n", + "print(f\" Weighted average: ${weighted_avg_hh:,.0f}\")\n", + "\n", + "print(f\"\\nPerson AGI:\")\n", + "print(f\" Unweighted median: ${unweighted_median_person:,.0f}\")\n", + "print(f\" Weighted median: ${weighted_median_person:,.0f}\")\n", + "print(f\" Weighted average: ${weighted_avg_person:,.0f}\")\n", + "\n", + "print(f\"\\nAverage household size: {avg_hh_size:.1f}\")\n", + "\n", + "print(f\"\\nWeighted household AGI percentiles:\")\n", + "print(f\" 25th percentile: ${weighted_percentile(agi_hh_array, hh_weights, 25):,.0f}\")\n", + "print(f\" 50th percentile: ${weighted_percentile(agi_hh_array, hh_weights, 50):,.0f}\")\n", + "print(f\" 75th percentile: ${weighted_percentile(agi_hh_array, hh_weights, 75):,.0f}\")\n", + "print(f\" 90th percentile: ${weighted_percentile(agi_hh_array, hh_weights, 90):,.0f}\")\n", + "print(f\" 95th percentile: ${weighted_percentile(agi_hh_array, hh_weights, 95):,.0f}\")\n", + "print(f\" Max AGI: ${agi_hh_array.max():,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "cell-5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Households with children (weighted):\n", + " Total households with children: 641,478\n", + " Households with 1 child: 266,099\n", + " Households with 2 children: 241,486\n", + " Households with 3+ children: 133,893\n" + ] + } + ], + "source": [ + "# Check households with children - use .values for raw arrays\n", + "is_child = sim.calculate(\"is_child\", period=2025, map_to=\"person\").values\n", + "household_id = sim.calculate(\"household_id\", period=2025, map_to=\"person\").values\n", + "household_weight_person = sim.calculate(\"household_weight\", period=2025, map_to=\"person\").values\n", + "\n", + "# Create DataFrame\n", + "df_households = pd.DataFrame({\n", + " 'household_id': household_id,\n", + " 'is_child': is_child,\n", + " 'household_weight': household_weight_person\n", + "})\n", + "\n", + "# Count children per household\n", + "children_per_household = df_households.groupby('household_id').agg({\n", + " 'is_child': 'sum',\n", + " 'household_weight': 'first'\n", + "}).reset_index()\n", + "\n", + "# Calculate weighted household counts\n", + "total_households_with_children = children_per_household[children_per_household['is_child'] > 0]['household_weight'].sum()\n", + "households_with_1_child = children_per_household[children_per_household['is_child'] == 1]['household_weight'].sum()\n", + "households_with_2_children = children_per_household[children_per_household['is_child'] == 2]['household_weight'].sum()\n", + "households_with_3plus_children = children_per_household[children_per_household['is_child'] >= 3]['household_weight'].sum()\n", + "\n", + "print(f\"\\nHouseholds with children (weighted):\")\n", + "print(f\" Total households with children: {total_households_with_children:,.0f}\")\n", + "print(f\" Households with 1 child: {households_with_1_child:,.0f}\")\n", + "print(f\" Households with 2 children: {households_with_2_children:,.0f}\")\n", + "print(f\" Households with 3+ children: {households_with_3plus_children:,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "cell-6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Children by age:\n", + " Total children under 18: 1,197,559\n", + " Children under 6: 359,192\n", + " Children under 3: 159,871\n" + ] + } + ], + "source": [ + "# Check children by age groups - use .values for raw arrays\n", + "df = pd.DataFrame({\n", + " \"household_id\": sim.calculate(\"household_id\", map_to=\"person\").values,\n", + " \"tax_unit_id\": sim.calculate(\"tax_unit_id\", map_to=\"person\").values,\n", + " \"person_id\": sim.calculate(\"person_id\", map_to=\"person\").values,\n", + " \"age\": sim.calculate(\"age\", map_to=\"person\").values,\n", + " \"person_weight\": sim.calculate(\"person_weight\", map_to=\"person\").values\n", + "})\n", + "\n", + "# Filter for children and apply weights\n", + "children_under_18_df = df[df['age'] < 18]\n", + "children_under_6_df = df[df['age'] < 6]\n", + "children_under_3_df = df[df['age'] < 3]\n", + "\n", + "# Calculate weighted totals\n", + "total_children = children_under_18_df['person_weight'].sum()\n", + "children_under_6 = children_under_6_df['person_weight'].sum()\n", + "children_under_3 = children_under_3_df['person_weight'].sum()\n", + "\n", + "print(f\"\\nChildren by age:\")\n", + "print(f\" Total children under 18: {total_children:,.0f}\")\n", + "print(f\" Children under 6: {children_under_6:,.0f}\")\n", + "print(f\" Children under 3: {children_under_3:,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "cell-7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "=================================================================\n", + "SC TEST DATASET SUMMARY - WEIGHTED (Population Estimates)\n", + "=================================================================\n", + " Metric Value\n", + " Household count (weighted) 1,921,642\n", + " Person count (weighted) 5,446,194\n", + " Average household size 2.8\n", + " Weighted median household AGI $68,193\n", + " Weighted average household AGI $148,865\n", + " Weighted median person AGI $52,025\n", + " Weighted average person AGI $121,624\n", + "Unweighted median household AGI $55,711\n", + " Unweighted median person AGI $52,622\n", + " 25th percentile household AGI $22,381\n", + " 75th percentile household AGI $138,373\n", + " 90th percentile household AGI $244,734\n", + " 95th percentile household AGI $371,597\n", + " Max household AGI $2,379,085,312\n", + " Total households with children 641,478\n", + " Households with 1 child 266,099\n", + " Households with 2 children 241,486\n", + " Households with 3+ children 133,893\n", + " Total children under 18 1,197,559\n", + " Children under 6 359,192\n", + " Children under 3 159,871\n", + "=================================================================\n", + "\n", + "Summary saved to: sc_test_dataset_summary_weighted.csv\n" + ] + } + ], + "source": [ + "# Create comprehensive summary table\n", + "summary_data = {\n", + " 'Metric': [\n", + " 'Household count (weighted)',\n", + " 'Person count (weighted)',\n", + " 'Average household size',\n", + " 'Weighted median household AGI',\n", + " 'Weighted average household AGI',\n", + " 'Weighted median person AGI',\n", + " 'Weighted average person AGI',\n", + " 'Unweighted median household AGI',\n", + " 'Unweighted median person AGI',\n", + " '25th percentile household AGI',\n", + " '75th percentile household AGI',\n", + " '90th percentile household AGI',\n", + " '95th percentile household AGI',\n", + " 'Max household AGI',\n", + " 'Total households with children',\n", + " 'Households with 1 child',\n", + " 'Households with 2 children',\n", + " 'Households with 3+ children',\n", + " 'Total children under 18',\n", + " 'Children under 6',\n", + " 'Children under 3'\n", + " ],\n", + " 'Value': [\n", + " f\"{weighted_household_count:,.0f}\",\n", + " f\"{weighted_person_count:,.0f}\",\n", + " f\"{avg_hh_size:.1f}\",\n", + " f\"${weighted_median_hh:,.0f}\",\n", + " f\"${weighted_avg_hh:,.0f}\",\n", + " f\"${weighted_median_person:,.0f}\",\n", + " f\"${weighted_avg_person:,.0f}\",\n", + " f\"${unweighted_median_hh:,.0f}\",\n", + " f\"${unweighted_median_person:,.0f}\",\n", + " f\"${weighted_percentile(agi_hh_array, hh_weights, 25):,.0f}\",\n", + " f\"${weighted_percentile(agi_hh_array, hh_weights, 75):,.0f}\",\n", + " f\"${weighted_percentile(agi_hh_array, hh_weights, 90):,.0f}\",\n", + " f\"${weighted_percentile(agi_hh_array, hh_weights, 95):,.0f}\",\n", + " f\"${agi_hh_array.max():,.0f}\",\n", + " f\"{total_households_with_children:,.0f}\",\n", + " f\"{households_with_1_child:,.0f}\",\n", + " f\"{households_with_2_children:,.0f}\",\n", + " f\"{households_with_3plus_children:,.0f}\",\n", + " f\"{total_children:,.0f}\",\n", + " f\"{children_under_6:,.0f}\",\n", + " f\"{children_under_3:,.0f}\"\n", + " ]\n", + "}\n", + "\n", + "summary_df = pd.DataFrame(summary_data)\n", + "\n", + "print(\"\\n\" + \"=\"*65)\n", + "print(\"SC TEST DATASET SUMMARY - WEIGHTED (Population Estimates)\")\n", + "print(\"=\"*65)\n", + "print(summary_df.to_string(index=False))\n", + "print(\"=\"*65)\n", + "\n", + "# Save table\n", + "summary_df.to_csv('sc_test_dataset_summary_weighted.csv', index=False)\n", + "print(\"\\nSummary saved to: sc_test_dataset_summary_weighted.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "cell-8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "======================================================================\n", + "HOUSEHOLDS WITH $0 INCOME\n", + "======================================================================\n", + "Household count: 100,886\n", + "Percentage of all households: 5.25%\n", + "======================================================================\n" + ] + } + ], + "source": [ + "# Households with $0 income - using raw arrays\n", + "agi_hh = sim.calculate(\"adjusted_gross_income\", period=2025, map_to=\"household\").values\n", + "weights = sim.calculate(\"household_weight\", period=2025).values\n", + "\n", + "zero_income_mask = agi_hh == 0\n", + "zero_income_count = weights[zero_income_mask].sum()\n", + "total_households = weights.sum()\n", + "\n", + "print(\"\\n\" + \"=\"*70)\n", + "print(\"HOUSEHOLDS WITH $0 INCOME\")\n", + "print(\"=\"*70)\n", + "print(f\"Household count: {zero_income_count:,.0f}\")\n", + "print(f\"Percentage of all households: {zero_income_count / total_households * 100:.2f}%\")\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cell-9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "======================================================================\n", + "HOUSEHOLD COUNTS BY INCOME BRACKET\n", + "======================================================================\n", + "Income Bracket Households % of All Households\n", + " $0-$10k 299,332 15.58%\n", + " $10k-$20k 132,377 6.89%\n", + " $20k-$30k 130,407 6.79%\n", + " $30k-$40k 114,824 5.98%\n", + " $40k-$50k 93,211 4.85%\n", + " $50k-$60k 94,847 4.94%\n", + "======================================================================\n", + "\n", + "Total households in $0-$60k range: 864,999\n", + "Percentage of all households in $0-$60k range: 45.01%\n" + ] + }, + { + "ename": "", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n", + "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n", + "\u001b[1;31mClick here for more info. \n", + "\u001b[1;31mView Jupyter log for further details." + ] + } + ], + "source": [ + "# Household counts by income brackets\n", + "income_brackets = [\n", + " (0, 10000, \"$0-$10k\"),\n", + " (10000, 20000, \"$10k-$20k\"),\n", + " (20000, 30000, \"$20k-$30k\"),\n", + " (30000, 40000, \"$30k-$40k\"),\n", + " (40000, 50000, \"$40k-$50k\"),\n", + " (50000, 60000, \"$50k-$60k\")\n", + "]\n", + "\n", + "bracket_data = []\n", + "for lower, upper, label in income_brackets:\n", + " mask = (agi_hh >= lower) & (agi_hh < upper)\n", + " count = weights[mask].sum()\n", + " pct_of_total = (count / total_households) * 100\n", + " \n", + " bracket_data.append({\n", + " \"Income Bracket\": label,\n", + " \"Households\": f\"{count:,.0f}\",\n", + " \"% of All Households\": f\"{pct_of_total:.2f}%\"\n", + " })\n", + "\n", + "income_df = pd.DataFrame(bracket_data)\n", + "\n", + "print(\"\\n\" + \"=\"*70)\n", + "print(\"HOUSEHOLD COUNTS BY INCOME BRACKET\")\n", + "print(\"=\"*70)\n", + "print(income_df.to_string(index=False))\n", + "print(\"=\"*70)\n", + "\n", + "# Total in $0-$60k range\n", + "total_in_range = sum([weights[(agi_hh >= lower) & (agi_hh < upper)].sum() for lower, upper, _ in income_brackets])\n", + "print(f\"\\nTotal households in $0-$60k range: {total_in_range:,.0f}\")\n", + "print(f\"Percentage of all households in $0-$60k range: {total_in_range / total_households * 100:.2f}%\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/us/states/sc/h4216_analysis/5.21_rate/rfa_h4216_5.21_analysis.csv b/us/states/sc/h4216_analysis/5.21_rate/rfa_h4216_5.21_analysis.csv new file mode 100644 index 0000000..a150bea --- /dev/null +++ b/us/states/sc/h4216_analysis/5.21_rate/rfa_h4216_5.21_analysis.csv @@ -0,0 +1,16 @@ +Federal AGI Range,Est # Returns,Est % Returns,Old Avg Tax Liability,New Avg Tax Liability,Returns with Tax Change,% Returns in Range with Change,Old Avg Tax (Changed),New Avg Tax (Changed),Avg Tax Change,Total Dollar Change,Tax Decrease # Returns,Tax Decrease % in Range,Total Decrease Amount,Avg Decrease Amount,Tax Increase # Returns,Tax Increase % in Range,Total Increase Amount,Avg Increase Amount,No Tax Change # Returns,No Change % Returns,Zero Tax # Returns,Zero Tax % Returns +$0*,78854,2.9%,$50,$42,1080,1.4%,$3683,$3062,$-622,$-671000,576,0.7%,$-704000,$-1222,504,0.6%,$34000,$68,77774,98.6%,77824,98.7% +$1 to $10000,286253,10.4%,$3,$9,43699,15.3%,$20,$58,$38,$1653000,834,0.3%,$-78000,$-94,42865,15.0%,$1731000,$40,242554,84.7%,243249,85.0% +$10001 to $20000,310122,11.2%,$16,$26,75652,24.4%,$67,$105,$38,$2867000,5591,1.8%,$-363000,$-65,70060,22.6%,$3230000,$46,234471,75.6%,235107,75.8% +$20001 to $30000,275560,10.0%,$107,$110,140713,51.1%,$210,$216,$5,$762000,51551,18.7%,$-2682000,$-52,89162,32.4%,$3444000,$39,134847,48.9%,134332,48.7% +$30001 to $40000,269566,9.8%,$288,$216,160474,59.5%,$483,$362,$-121,$-19416000,131752,48.9%,$-21120000,$-160,28722,10.7%,$1704000,$59,109091,40.5%,110638,41.0% +$40001 to $50000,234386,8.5%,$569,$388,174125,74.3%,$767,$522,$-244,$-42568000,127554,54.4%,$-46871000,$-367,46572,19.9%,$4303000,$92,60260,25.7%,61891,26.4% +$50001 to $75000,407593,14.8%,$1192,$971,351754,86.3%,$1381,$1125,$-256,$-89935000,287674,70.6%,$-101116000,$-351,64080,15.7%,$11181000,$174,55839,13.7%,61960,15.2% +$75001 to $100000,250437,9.1%,$2020,$1826,225194,89.9%,$2246,$2030,$-216,$-48624000,177430,70.8%,$-61900000,$-349,47764,19.1%,$13276000,$278,25243,10.1%,27729,11.1% +$100001 to $150000,298343,10.8%,$3258,$3171,289948,97.2%,$3352,$3262,$-90,$-26092000,199040,66.7%,$-58517000,$-294,90908,30.5%,$32425000,$357,8395,2.8%,9188,3.1% +$150001 to $200000,143398,5.2%,$5518,$5684,141433,98.6%,$5595,$5763,$168,$23766000,61936,43.2%,$-14937000,$-241,79497,55.4%,$38703000,$487,1965,1.4%,1459,1.0% +$200001 to $300000,109340,4.0%,$8741,$8777,108016,98.8%,$8848,$8885,$37,$3955000,63636,58.2%,$-27603000,$-434,44380,40.6%,$31558000,$711,1324,1.2%,945,0.9% +$300001 to $500000,56123,2.0%,$14926,$14355,55090,98.2%,$15206,$14624,$-582,$-32054000,42933,76.5%,$-47609000,$-1109,12157,21.7%,$15555000,$1280,1032,1.8%,762,1.4% +$500001 to $1000000,25664,0.9%,$25969,$24512,24758,96.5%,$26919,$25410,$-1510,$-37381000,19803,77.2%,$-51185000,$-2585,4955,19.3%,$13804000,$2786,906,3.5%,684,2.7% +Over $1000000,11936,0.4%,$78228,$74458,11159,93.5%,$83671,$79639,$-4031,$-44989000,8693,72.8%,$-87454000,$-10060,2466,20.7%,$42465000,$17221,776,6.5%,703,5.9% +Total,2757573,100.0%,$2321,$2209,1803095,65.4%,$3549,$3378,$-171,$-308700000,1179002,42.8%,$-522100000,$-443,624092,22.6%,$213400000,$342,954478,34.6%,966471,35.0% diff --git a/us/states/sc/h4216_analysis/5.21_rate/state/pe_h4216_5.21_state_analysis.csv b/us/states/sc/h4216_analysis/5.21_rate/state/pe_h4216_5.21_state_analysis.csv new file mode 100644 index 0000000..5d7b774 --- /dev/null +++ b/us/states/sc/h4216_analysis/5.21_rate/state/pe_h4216_5.21_state_analysis.csv @@ -0,0 +1,16 @@ +Federal AGI Range,Est # Returns,Est % Returns,Old Avg Tax Liability,New Avg Tax Liability,Returns with Tax Change,% Returns in Range with Change,Old Avg Tax (Changed),New Avg Tax (Changed),Avg Tax Change,Total Dollar Change,Tax Decrease # Returns,Tax Decrease % in Range,Total Decrease Amount,Avg Decrease Amount,Tax Increase # Returns,Tax Increase % in Range,Total Increase Amount,Avg Increase Amount,No Tax Change # Returns,No Change % Returns,Zero Tax # Returns,Zero Tax % Returns +$0*,619010,21.1%,$0,$0,0,0.0%,$0,$0,$0,$0,0,0.0%,$0,$0,0,0.0%,$0,$0,619010,100.0%,619010,100.0% +$1 to $10000,502276,17.1%,$0,$0,0,0.0%,$0,$0,$0,$0,0,0.0%,$0,$0,0,0.0%,$0,$0,502276,100.0%,502276,100.0% +$10001 to $20000,279412,9.5%,$0,$10,53961,19.3%,$0,$50,$50,$2672942,0,0.0%,$0,$0,53961,19.3%,$2672922,$50,225451,80.7%,225413,80.7% +$20001 to $30000,252863,8.6%,$64,$101,136052,53.8%,$119,$188,$68,$9294693,5029,2.0%,$-40734,$-8,131023,51.8%,$9335378,$71,116811,46.2%,116751,46.2% +$30001 to $40000,215980,7.4%,$225,$200,135926,62.9%,$356,$316,$-40,$-5431497,88710,41.1%,$-8472465,$-96,47216,21.9%,$3040994,$64,80055,37.1%,79265,36.7% +$40001 to $50000,197525,6.7%,$547,$404,152733,77.3%,$706,$522,$-184,$-28145982,99989,50.6%,$-34226980,$-342,52744,26.7%,$6080948,$115,44792,22.7%,44131,22.3% +$50001 to $75000,300857,10.2%,$822,$722,254734,84.7%,$971,$853,$-118,$-30064724,164685,54.7%,$-45636192,$-277,90049,29.9%,$15571469,$173,46123,15.3%,46125,15.3% +$75001 to $100000,177284,6.0%,$1781,$1631,168284,94.9%,$1876,$1718,$-157,$-26475178,128443,72.5%,$-39583444,$-308,39841,22.5%,$13108268,$329,9000,5.1%,9124,5.1% +$100001 to $150000,187946,6.4%,$3292,$3387,186839,99.4%,$3311,$3407,$96,$17889888,111928,59.6%,$-22415936,$-200,74911,39.9%,$40305824,$538,1107,0.6%,1105,0.6% +$150001 to $200000,73396,2.5%,$6049,$6413,73395,100.0%,$6049,$6412,$363,$26678432,14400,19.6%,$-3249580,$-226,58996,80.4%,$29928012,$507,1,0.0%,0,0.0% +$200001 to $300000,52882,1.8%,$9164,$9358,52878,100.0%,$9164,$9358,$194,$10258680,21154,40.0%,$-5374373,$-254,31724,60.0%,$15633049,$493,4,0.0%,0,0.0% +$300001 to $500000,36977,1.3%,$17163,$16717,36977,100.0%,$17163,$16717,$-447,$-16518335,28313,76.6%,$-27952982,$-987,8664,23.4%,$11434646,$1320,0,0.0%,0,0.0% +$500001 to $1000000,16526,0.6%,$26140,$24911,16526,100.0%,$26140,$24911,$-1229,$-20314260,14769,89.4%,$-25823908,$-1749,1757,10.6%,$5509648,$3136,0,0.0%,0,0.0% +Over $1000000,22686,0.8%,$139623,$124950,22686,100.0%,$139623,$124950,$-14672,$-332860608,22658,99.9%,$-333138432,$-14703,29,0.1%,$277836,$9684,0,0.0%,0,0.0% +Total,2935621,100.0%,$2220,$2086,1290992,44.0%,$5048,$4744,$-304,$-393015936,700078,23.8%,$-545915008,$-780,590915,20.1%,$152898992,$259,1644629,56.0%,1643201,56.0% diff --git a/us/states/sc/h4216_analysis/5.21_rate/state/sc_h4216_5.21_state_analysis.ipynb b/us/states/sc/h4216_analysis/5.21_rate/state/sc_h4216_5.21_state_analysis.ipynb new file mode 100644 index 0000000..b58e742 --- /dev/null +++ b/us/states/sc/h4216_analysis/5.21_rate/state/sc_h4216_5.21_state_analysis.ipynb @@ -0,0 +1,550 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "cell-0", + "metadata": {}, + "source": [ + "# SC H.4216 Tax Reform Analysis - 5.21% Top Rate (State Dataset)\n", + "\n", + "This notebook produces analysis in the same format as the RFA fiscal note for direct comparison.\n", + "\n", + "**Dataset:** `hf://policyengine/policyengine-us-data/states/SC.h5` (Production)\n", + "\n", + "**Reform:** H.4216 with 5.21% top rate (bill default)\n", + "\n", + "**RFA Estimate:** -$308,700,000" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "cell-1", + "metadata": {}, + "outputs": [], + "source": [ + "from policyengine_us import Microsimulation\n", + "from policyengine_us.reforms.states.sc.h4216.sc_h4216 import create_sc_h4216\n", + "from policyengine_core.reforms import Reform\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "SC_DATASET = \"hf://policyengine/policyengine-us-data/states/SC.h5\"\n", + "TAX_YEAR = 2026\n", + "TOP_RATE = 0.0521 # 5.21% top rate\n", + "RFA_ESTIMATE = -308700000" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "cell-2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading simulations...\n", + "Done!\n" + ] + } + ], + "source": [ + "def create_h4216_reform(top_rate=0.0521):\n", + " \"\"\"\n", + " SC H.4216 Reform:\n", + " - 1.99% up to $30k\n", + " - top_rate over $30k (default 5.21% for bill version)\n", + " \"\"\"\n", + " param_reform = Reform.from_dict(\n", + " {\n", + " \"gov.contrib.states.sc.h4216.in_effect\": {\n", + " \"2026-01-01.2100-12-31\": True\n", + " },\n", + " \"gov.contrib.states.sc.h4216.rates[1].rate\": {\n", + " \"2026-01-01.2100-12-31\": top_rate\n", + " }\n", + " },\n", + " country_id=\"us\",\n", + " )\n", + " base_reform = create_sc_h4216()\n", + " return (base_reform, param_reform)\n", + "\n", + "print(\"Loading simulations...\")\n", + "baseline = Microsimulation(dataset=SC_DATASET)\n", + "reform_sim = Microsimulation(dataset=SC_DATASET, reform=create_h4216_reform(TOP_RATE))\n", + "print(\"Done!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "cell-3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total tax units: 49,486\n", + "Weighted tax units: 2,935,621\n" + ] + } + ], + "source": [ + "# Get data - use .values to avoid double-weighting\n", + "baseline_tax = baseline.calculate(\"sc_income_tax\", period=TAX_YEAR, map_to=\"tax_unit\").values\n", + "reform_tax = reform_sim.calculate(\"sc_income_tax\", period=TAX_YEAR, map_to=\"tax_unit\").values\n", + "agi = baseline.calculate(\"adjusted_gross_income\", period=TAX_YEAR, map_to=\"tax_unit\").values\n", + "weight = baseline.calculate(\"tax_unit_weight\", period=TAX_YEAR).values\n", + "\n", + "tax_change = reform_tax - baseline_tax\n", + "\n", + "print(f\"Total tax units: {len(baseline_tax):,}\")\n", + "print(f\"Weighted tax units: {weight.sum():,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cell-4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bracket analysis complete!\n" + ] + } + ], + "source": [ + "# Define income brackets matching RFA exactly\n", + "income_brackets = [\n", + " (float('-inf'), 0, \"$0*\"),\n", + " (0, 10000, \"$1 to $10000\"),\n", + " (10000, 20000, \"$10001 to $20000\"),\n", + " (20000, 30000, \"$20001 to $30000\"),\n", + " (30000, 40000, \"$30001 to $40000\"),\n", + " (40000, 50000, \"$40001 to $50000\"),\n", + " (50000, 75000, \"$50001 to $75000\"),\n", + " (75000, 100000, \"$75001 to $100000\"),\n", + " (100000, 150000, \"$100001 to $150000\"),\n", + " (150000, 200000, \"$150001 to $200000\"),\n", + " (200000, 300000, \"$200001 to $300000\"),\n", + " (300000, 500000, \"$300001 to $500000\"),\n", + " (500000, 1000000, \"$500001 to $1000000\"),\n", + " (1000000, float('inf'), \"Over $1000000\")\n", + "]\n", + "\n", + "total_weight = weight.sum()\n", + "results = []\n", + "\n", + "for lower, upper, label in income_brackets:\n", + " if lower == float('-inf'):\n", + " mask = agi <= upper\n", + " elif upper == float('inf'):\n", + " mask = agi > lower\n", + " else:\n", + " mask = (agi > lower) & (agi <= upper)\n", + " \n", + " if mask.sum() == 0:\n", + " continue\n", + " \n", + " # Basic stats\n", + " est_returns = weight[mask].sum()\n", + " pct_returns = est_returns / total_weight * 100\n", + " \n", + " old_avg_tax = np.average(baseline_tax[mask], weights=weight[mask]) if est_returns > 0 else 0\n", + " new_avg_tax = np.average(reform_tax[mask], weights=weight[mask]) if est_returns > 0 else 0\n", + " \n", + " # Returns with tax change (threshold $1)\n", + " change_mask = mask & (np.abs(tax_change) > 1)\n", + " returns_with_change = weight[change_mask].sum()\n", + " pct_with_change = returns_with_change / est_returns * 100 if est_returns > 0 else 0\n", + " \n", + " if returns_with_change > 0:\n", + " old_avg_changed = np.average(baseline_tax[change_mask], weights=weight[change_mask])\n", + " new_avg_changed = np.average(reform_tax[change_mask], weights=weight[change_mask])\n", + " avg_change = np.average(tax_change[change_mask], weights=weight[change_mask])\n", + " else:\n", + " old_avg_changed = 0\n", + " new_avg_changed = 0\n", + " avg_change = 0\n", + " \n", + " total_change = (tax_change[mask] * weight[mask]).sum()\n", + " \n", + " # Tax decrease\n", + " decrease_mask = mask & (tax_change < -1)\n", + " decrease_returns = weight[decrease_mask].sum()\n", + " decrease_pct = decrease_returns / est_returns * 100 if est_returns > 0 else 0\n", + " total_decrease = (tax_change[decrease_mask] * weight[decrease_mask]).sum() if decrease_returns > 0 else 0\n", + " avg_decrease = np.average(tax_change[decrease_mask], weights=weight[decrease_mask]) if decrease_returns > 0 else 0\n", + " \n", + " # Tax increase\n", + " increase_mask = mask & (tax_change > 1)\n", + " increase_returns = weight[increase_mask].sum()\n", + " increase_pct = increase_returns / est_returns * 100 if est_returns > 0 else 0\n", + " total_increase = (tax_change[increase_mask] * weight[increase_mask]).sum() if increase_returns > 0 else 0\n", + " avg_increase = np.average(tax_change[increase_mask], weights=weight[increase_mask]) if increase_returns > 0 else 0\n", + " \n", + " # No change\n", + " no_change_mask = mask & (np.abs(tax_change) <= 1)\n", + " no_change_returns = weight[no_change_mask].sum()\n", + " no_change_pct = no_change_returns / est_returns * 100 if est_returns > 0 else 0\n", + " \n", + " # Zero tax\n", + " zero_tax_mask = mask & (reform_tax <= 0)\n", + " zero_tax_returns = weight[zero_tax_mask].sum()\n", + " zero_tax_pct = zero_tax_returns / est_returns * 100 if est_returns > 0 else 0\n", + " \n", + " results.append({\n", + " \"Federal AGI Range\": label,\n", + " \"Est # Returns\": int(round(est_returns)),\n", + " \"Est % Returns\": f\"{pct_returns:.1f}%\",\n", + " \"Old Avg Tax Liability\": f\"${int(round(old_avg_tax))}\",\n", + " \"New Avg Tax Liability\": f\"${int(round(new_avg_tax))}\",\n", + " \"Returns with Tax Change\": int(round(returns_with_change)),\n", + " \"% Returns in Range with Change\": f\"{pct_with_change:.1f}%\",\n", + " \"Old Avg Tax (Changed)\": f\"${int(round(old_avg_changed))}\",\n", + " \"New Avg Tax (Changed)\": f\"${int(round(new_avg_changed))}\",\n", + " \"Avg Tax Change\": f\"${int(round(avg_change))}\",\n", + " \"Total Dollar Change\": f\"${int(round(total_change))}\",\n", + " \"Tax Decrease # Returns\": int(round(decrease_returns)),\n", + " \"Tax Decrease % in Range\": f\"{decrease_pct:.1f}%\",\n", + " \"Total Decrease Amount\": f\"${int(round(total_decrease))}\",\n", + " \"Avg Decrease Amount\": f\"${int(round(avg_decrease))}\",\n", + " \"Tax Increase # Returns\": int(round(increase_returns)),\n", + " \"Tax Increase % in Range\": f\"{increase_pct:.1f}%\",\n", + " \"Total Increase Amount\": f\"${int(round(total_increase))}\",\n", + " \"Avg Increase Amount\": f\"${int(round(avg_increase))}\",\n", + " \"No Tax Change # Returns\": int(round(no_change_returns)),\n", + " \"No Change % Returns\": f\"{no_change_pct:.1f}%\",\n", + " \"Zero Tax # Returns\": int(round(zero_tax_returns)),\n", + " \"Zero Tax % Returns\": f\"{zero_tax_pct:.1f}%\"\n", + " })\n", + "\n", + "print(\"Bracket analysis complete!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "cell-5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Totals calculated!\n" + ] + } + ], + "source": [ + "# Calculate totals\n", + "change_mask_all = np.abs(tax_change) > 1\n", + "decrease_mask_all = tax_change < -1\n", + "increase_mask_all = tax_change > 1\n", + "no_change_mask_all = np.abs(tax_change) <= 1\n", + "zero_tax_mask_all = reform_tax <= 0\n", + "\n", + "total_old_avg = np.average(baseline_tax, weights=weight)\n", + "total_new_avg = np.average(reform_tax, weights=weight)\n", + "total_change_amount = (tax_change * weight).sum()\n", + "\n", + "returns_with_change_all = weight[change_mask_all].sum()\n", + "old_avg_changed_all = np.average(baseline_tax[change_mask_all], weights=weight[change_mask_all]) if returns_with_change_all > 0 else 0\n", + "new_avg_changed_all = np.average(reform_tax[change_mask_all], weights=weight[change_mask_all]) if returns_with_change_all > 0 else 0\n", + "avg_change_all = np.average(tax_change[change_mask_all], weights=weight[change_mask_all]) if returns_with_change_all > 0 else 0\n", + "\n", + "decrease_returns_all = weight[decrease_mask_all].sum()\n", + "total_decrease_all = (tax_change[decrease_mask_all] * weight[decrease_mask_all]).sum()\n", + "avg_decrease_all = np.average(tax_change[decrease_mask_all], weights=weight[decrease_mask_all]) if decrease_returns_all > 0 else 0\n", + "\n", + "increase_returns_all = weight[increase_mask_all].sum()\n", + "total_increase_all = (tax_change[increase_mask_all] * weight[increase_mask_all]).sum()\n", + "avg_increase_all = np.average(tax_change[increase_mask_all], weights=weight[increase_mask_all]) if increase_returns_all > 0 else 0\n", + "\n", + "no_change_returns_all = weight[no_change_mask_all].sum()\n", + "zero_tax_returns_all = weight[zero_tax_mask_all].sum()\n", + "\n", + "results.append({\n", + " \"Federal AGI Range\": \"Total\",\n", + " \"Est # Returns\": int(round(total_weight)),\n", + " \"Est % Returns\": \"100.0%\",\n", + " \"Old Avg Tax Liability\": f\"${int(round(total_old_avg))}\",\n", + " \"New Avg Tax Liability\": f\"${int(round(total_new_avg))}\",\n", + " \"Returns with Tax Change\": int(round(returns_with_change_all)),\n", + " \"% Returns in Range with Change\": f\"{returns_with_change_all / total_weight * 100:.1f}%\",\n", + " \"Old Avg Tax (Changed)\": f\"${int(round(old_avg_changed_all))}\",\n", + " \"New Avg Tax (Changed)\": f\"${int(round(new_avg_changed_all))}\",\n", + " \"Avg Tax Change\": f\"${int(round(avg_change_all))}\",\n", + " \"Total Dollar Change\": f\"${int(round(total_change_amount))}\",\n", + " \"Tax Decrease # Returns\": int(round(decrease_returns_all)),\n", + " \"Tax Decrease % in Range\": f\"{decrease_returns_all / total_weight * 100:.1f}%\",\n", + " \"Total Decrease Amount\": f\"${int(round(total_decrease_all))}\",\n", + " \"Avg Decrease Amount\": f\"${int(round(avg_decrease_all))}\",\n", + " \"Tax Increase # Returns\": int(round(increase_returns_all)),\n", + " \"Tax Increase % in Range\": f\"{increase_returns_all / total_weight * 100:.1f}%\",\n", + " \"Total Increase Amount\": f\"${int(round(total_increase_all))}\",\n", + " \"Avg Increase Amount\": f\"${int(round(avg_increase_all))}\",\n", + " \"No Tax Change # Returns\": int(round(no_change_returns_all)),\n", + " \"No Change % Returns\": f\"{no_change_returns_all / total_weight * 100:.1f}%\",\n", + " \"Zero Tax # Returns\": int(round(zero_tax_returns_all)),\n", + " \"Zero Tax % Returns\": f\"{zero_tax_returns_all / total_weight * 100:.1f}%\"\n", + "})\n", + "\n", + "df_results = pd.DataFrame(results)\n", + "print(\"Totals calculated!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "cell-6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "====================================================================================================\n", + "H.4216 - POLICYENGINE ANALYSIS (State Dataset, 5.21% Top Rate)\n", + "====================================================================================================\n", + "\n", + "Total Returns: 2,935,621\n", + "General Fund Impact: $-393,015,936\n", + "\n", + "RFA Estimate: $-308,700,000\n", + "Difference: $-84,315,936\n", + "Accuracy: 72.7%\n", + "====================================================================================================\n" + ] + } + ], + "source": [ + "# Display summary\n", + "print(\"=\"*100)\n", + "print(f\"H.4216 - POLICYENGINE ANALYSIS (State Dataset, {TOP_RATE*100:.2f}% Top Rate)\")\n", + "print(\"=\"*100)\n", + "print(f\"\\nTotal Returns: {int(total_weight):,}\")\n", + "print(f\"General Fund Impact: ${total_change_amount:,.0f}\")\n", + "print(f\"\\nRFA Estimate: ${RFA_ESTIMATE:,}\")\n", + "print(f\"Difference: ${total_change_amount - RFA_ESTIMATE:,.0f}\")\n", + "print(f\"Accuracy: {(1 - abs(total_change_amount - RFA_ESTIMATE) / abs(RFA_ESTIMATE)) * 100:.1f}%\")\n", + "print(\"=\"*100)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "cell-7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Exported to: pe_h4216_5.21_state_analysis.csv\n" + ] + } + ], + "source": [ + "# Export to CSV in RFA format\n", + "df_results.to_csv('pe_h4216_5.21_state_analysis.csv', index=False)\n", + "print(\"Exported to: pe_h4216_5.21_state_analysis.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "cell-8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "KEY METRICS:\n", + " Federal AGI Range Est # Returns Est % Returns Old Avg Tax Liability New Avg Tax Liability Total Dollar Change\n", + " $0* 619010 21.1% $0 $0 $0\n", + " $1 to $10000 502276 17.1% $0 $0 $0\n", + " $10001 to $20000 279412 9.5% $0 $10 $2672942\n", + " $20001 to $30000 252863 8.6% $64 $101 $9294693\n", + " $30001 to $40000 215980 7.4% $225 $200 $-5431497\n", + " $40001 to $50000 197525 6.7% $547 $404 $-28145982\n", + " $50001 to $75000 300857 10.2% $822 $722 $-30064724\n", + " $75001 to $100000 177284 6.0% $1781 $1631 $-26475178\n", + " $100001 to $150000 187946 6.4% $3292 $3387 $17889888\n", + " $150001 to $200000 73396 2.5% $6049 $6413 $26678432\n", + " $200001 to $300000 52882 1.8% $9164 $9358 $10258680\n", + " $300001 to $500000 36977 1.3% $17163 $16717 $-16518335\n", + "$500001 to $1000000 16526 0.6% $26140 $24911 $-20314260\n", + " Over $1000000 22686 0.8% $139623 $124950 $-332860608\n", + " Total 2935621 100.0% $2220 $2086 $-393015936\n" + ] + } + ], + "source": [ + "# Display key columns for quick comparison\n", + "display_cols = [\n", + " \"Federal AGI Range\", \"Est # Returns\", \"Est % Returns\",\n", + " \"Old Avg Tax Liability\", \"New Avg Tax Liability\", \"Total Dollar Change\"\n", + "]\n", + "print(\"\\nKEY METRICS:\")\n", + "print(df_results[display_cols].to_string(index=False))" + ] + }, + { + "cell_type": "markdown", + "id": "cell-9", + "metadata": {}, + "source": [ + "## Side-by-Side Comparison with RFA" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "cell-10", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "====================================================================================================\n", + "POLICYENGINE (State) vs RFA COMPARISON (5.21% Rate)\n", + "====================================================================================================\n", + " AGI Range PE Returns RFA Returns PE Impact RFA Impact Diff\n", + " $0* 619,010 78,854 $0 $-671,000 $+671,000\n", + " $1 to $10000 502,276 286,253 $0 $1,653,000 $-1,653,000\n", + " $10001 to $20000 279,412 310,122 $2,672,942 $2,867,000 $-194,058\n", + " $20001 to $30000 252,863 275,560 $9,294,693 $762,000 $+8,532,693\n", + " $30001 to $40000 215,980 269,566 $-5,431,497 $-19,416,000 $+13,984,503\n", + " $40001 to $50000 197,525 234,386 $-28,145,982 $-42,568,000 $+14,422,018\n", + " $50001 to $75000 300,857 407,593 $-30,064,724 $-89,935,000 $+59,870,276\n", + " $75001 to $100000 177,284 250,437 $-26,475,178 $-48,624,000 $+22,148,822\n", + " $100001 to $150000 187,946 298,343 $17,889,888 $-26,092,000 $+43,981,888\n", + " $150001 to $200000 73,396 143,398 $26,678,432 $23,766,000 $+2,912,432\n", + " $200001 to $300000 52,882 109,340 $10,258,680 $3,955,000 $+6,303,680\n", + " $300001 to $500000 36,977 56,123 $-16,518,335 $-32,054,000 $+15,535,665\n", + "$500001 to $1000000 16,526 25,664 $-20,314,260 $-37,381,000 $+17,066,740\n", + " Over $1000000 22,686 11,936 $-332,860,608 $-44,989,000 $-287,871,608\n", + " Total 2,935,621 2,757,573 $-393,015,936 $-308,700,000 $-84,315,936\n", + "====================================================================================================\n" + ] + } + ], + "source": [ + "# Load RFA data\n", + "rfa_df = pd.read_csv('../rfa_h4216_5.21_analysis.csv')\n", + "\n", + "def parse_dollar(val):\n", + " if isinstance(val, str):\n", + " return float(val.replace('$', '').replace(',', '').replace('%', ''))\n", + " return val\n", + "\n", + "# Create comparison\n", + "comparison = []\n", + "for idx, pe_row in df_results.iterrows():\n", + " agi_range = pe_row['Federal AGI Range']\n", + " rfa_match = rfa_df[rfa_df['Federal AGI Range'] == agi_range]\n", + " \n", + " pe_returns = pe_row['Est # Returns']\n", + " pe_impact = parse_dollar(pe_row['Total Dollar Change'])\n", + " \n", + " if len(rfa_match) > 0:\n", + " rfa_returns = rfa_match['Est # Returns'].values[0]\n", + " rfa_impact = parse_dollar(rfa_match['Total Dollar Change'].values[0])\n", + " else:\n", + " rfa_returns = 0\n", + " rfa_impact = 0\n", + " \n", + " comparison.append({\n", + " 'AGI Range': agi_range,\n", + " 'PE Returns': f\"{pe_returns:,}\",\n", + " 'RFA Returns': f\"{rfa_returns:,}\" if rfa_returns else \"N/A\",\n", + " 'PE Impact': f\"${pe_impact:,.0f}\",\n", + " 'RFA Impact': f\"${rfa_impact:,.0f}\" if rfa_impact else \"N/A\",\n", + " 'Diff': f\"${pe_impact - rfa_impact:+,.0f}\" if rfa_impact else \"N/A\"\n", + " })\n", + "\n", + "comparison_df = pd.DataFrame(comparison)\n", + "print(\"\\n\" + \"=\"*100)\n", + "print(\"POLICYENGINE (State) vs RFA COMPARISON (5.21% Rate)\")\n", + "print(\"=\"*100)\n", + "print(comparison_df.to_string(index=False))\n", + "print(\"=\"*100)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "cell-11", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "========================================================================================================================\n", + "FULL POLICYENGINE ANALYSIS (RFA Format)\n", + "========================================================================================================================\n", + " Federal AGI Range Est # Returns Est % Returns Old Avg Tax Liability New Avg Tax Liability Returns with Tax Change % Returns in Range with Change Old Avg Tax (Changed) New Avg Tax (Changed) Avg Tax Change Total Dollar Change Tax Decrease # Returns Tax Decrease % in Range Total Decrease Amount Avg Decrease Amount Tax Increase # Returns Tax Increase % in Range Total Increase Amount Avg Increase Amount No Tax Change # Returns No Change % Returns Zero Tax # Returns Zero Tax % Returns\n", + " $0* 619010 21.1% $0 $0 0 0.0% $0 $0 $0 $0 0 0.0% $0 $0 0 0.0% $0 $0 619010 100.0% 619010 100.0%\n", + " $1 to $10000 502276 17.1% $0 $0 0 0.0% $0 $0 $0 $0 0 0.0% $0 $0 0 0.0% $0 $0 502276 100.0% 502276 100.0%\n", + " $10001 to $20000 279412 9.5% $0 $10 53961 19.3% $0 $50 $50 $2672942 0 0.0% $0 $0 53961 19.3% $2672922 $50 225451 80.7% 225413 80.7%\n", + " $20001 to $30000 252863 8.6% $64 $101 136052 53.8% $119 $188 $68 $9294693 5029 2.0% $-40734 $-8 131023 51.8% $9335378 $71 116811 46.2% 116751 46.2%\n", + " $30001 to $40000 215980 7.4% $225 $200 135926 62.9% $356 $316 $-40 $-5431497 88710 41.1% $-8472465 $-96 47216 21.9% $3040994 $64 80055 37.1% 79265 36.7%\n", + " $40001 to $50000 197525 6.7% $547 $404 152733 77.3% $706 $522 $-184 $-28145982 99989 50.6% $-34226980 $-342 52744 26.7% $6080948 $115 44792 22.7% 44131 22.3%\n", + " $50001 to $75000 300857 10.2% $822 $722 254734 84.7% $971 $853 $-118 $-30064724 164685 54.7% $-45636192 $-277 90049 29.9% $15571469 $173 46123 15.3% 46125 15.3%\n", + " $75001 to $100000 177284 6.0% $1781 $1631 168284 94.9% $1876 $1718 $-157 $-26475178 128443 72.5% $-39583444 $-308 39841 22.5% $13108268 $329 9000 5.1% 9124 5.1%\n", + " $100001 to $150000 187946 6.4% $3292 $3387 186839 99.4% $3311 $3407 $96 $17889888 111928 59.6% $-22415936 $-200 74911 39.9% $40305824 $538 1107 0.6% 1105 0.6%\n", + " $150001 to $200000 73396 2.5% $6049 $6413 73395 100.0% $6049 $6412 $363 $26678432 14400 19.6% $-3249580 $-226 58996 80.4% $29928012 $507 1 0.0% 0 0.0%\n", + " $200001 to $300000 52882 1.8% $9164 $9358 52878 100.0% $9164 $9358 $194 $10258680 21154 40.0% $-5374373 $-254 31724 60.0% $15633049 $493 4 0.0% 0 0.0%\n", + " $300001 to $500000 36977 1.3% $17163 $16717 36977 100.0% $17163 $16717 $-447 $-16518335 28313 76.6% $-27952982 $-987 8664 23.4% $11434646 $1320 0 0.0% 0 0.0%\n", + "$500001 to $1000000 16526 0.6% $26140 $24911 16526 100.0% $26140 $24911 $-1229 $-20314260 14769 89.4% $-25823908 $-1749 1757 10.6% $5509648 $3136 0 0.0% 0 0.0%\n", + " Over $1000000 22686 0.8% $139623 $124950 22686 100.0% $139623 $124950 $-14672 $-332860608 22658 99.9% $-333138432 $-14703 29 0.1% $277836 $9684 0 0.0% 0 0.0%\n", + " Total 2935621 100.0% $2220 $2086 1290992 44.0% $5048 $4744 $-304 $-393015936 700078 23.8% $-545915008 $-780 590915 20.1% $152898992 $259 1644629 56.0% 1643201 56.0%\n" + ] + } + ], + "source": [ + "# Full results table\n", + "print(\"\\n\" + \"=\"*120)\n", + "print(\"FULL POLICYENGINE ANALYSIS (RFA Format)\")\n", + "print(\"=\"*120)\n", + "pd.set_option('display.max_columns', None)\n", + "pd.set_option('display.width', None)\n", + "print(df_results.to_string(index=False))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/us/states/sc/h4216_analysis/5.21_rate/test/pe_h4216_5.21_analysis.csv b/us/states/sc/h4216_analysis/5.21_rate/test/pe_h4216_5.21_analysis.csv new file mode 100644 index 0000000..f0b6ff8 --- /dev/null +++ b/us/states/sc/h4216_analysis/5.21_rate/test/pe_h4216_5.21_analysis.csv @@ -0,0 +1,16 @@ +Federal AGI Range,Est # Returns,Est % Returns,Old Avg Tax Liability,New Avg Tax Liability,Returns with Tax Change,% Returns in Range with Change,Old Avg Tax (Changed),New Avg Tax (Changed),Avg Tax Change,Total Dollar Change,Tax Decrease # Returns,Tax Decrease % in Range,Total Decrease Amount,Avg Decrease Amount,Tax Increase # Returns,Tax Increase % in Range,Total Increase Amount,Avg Increase Amount,No Tax Change # Returns,No Change % Returns,Zero Tax # Returns,Zero Tax % Returns +$0*,373647,12.8%,$0,$0,0,0.0%,$0,$0,$0,$0,0,0.0%,$0,$0,0,0.0%,$0,$0,373647,100.0%,373647,100.0% +$1 to $10000,373813,12.8%,$0,$0,0,0.0%,$0,$0,$0,$0,0,0.0%,$0,$0,0,0.0%,$0,$0,373813,100.0%,373813,100.0% +$10001 to $20000,241968,8.3%,$0,$9,47764,19.7%,$0,$47,$47,$2240890,0,0.0%,$0,$0,47764,19.7%,$2240890,$47,194204,80.3%,194204,80.3% +$20001 to $30000,255054,8.8%,$73,$100,123142,48.3%,$146,$201,$55,$6800610,9861,3.9%,$-42960,$-4,113281,44.4%,$6842208,$60,131912,51.7%,129254,50.7% +$30001 to $40000,227861,7.8%,$270,$231,149355,65.5%,$410,$350,$-60,$-9016051,106139,46.6%,$-12090502,$-114,43216,19.0%,$3074185,$71,78506,34.5%,76627,33.6% +$40001 to $50000,179336,6.2%,$560,$401,127890,71.3%,$785,$562,$-223,$-28534652,94456,52.7%,$-31997250,$-339,33434,18.6%,$3462593,$104,51446,28.7%,51358,28.6% +$50001 to $75000,378636,13.0%,$1134,$992,336620,88.9%,$1275,$1116,$-159,$-53582640,247822,65.5%,$-73675784,$-297,88798,23.5%,$20093184,$226,42015,11.1%,41503,11.0% +$75001 to $100000,228594,7.8%,$2112,$1973,218544,95.6%,$2200,$2054,$-145,$-31782714,165436,72.4%,$-49094312,$-297,53108,23.2%,$17311328,$326,10050,4.4%,9209,4.0% +$100001 to $150000,274752,9.4%,$3559,$3614,273870,99.7%,$3567,$3621,$55,$15006415,168279,61.2%,$-37706040,$-224,105591,38.4%,$52712572,$499,883,0.3%,655,0.2% +$150001 to $200000,141758,4.9%,$6562,$6767,141756,100.0%,$6562,$6767,$205,$29099144,41012,28.9%,$-13193018,$-322,100743,71.1%,$42292160,$420,2,0.0%,2,0.0% +$200001 to $300000,117052,4.0%,$10291,$10295,116634,99.6%,$10295,$10299,$4,$482173,63054,53.9%,$-28775124,$-456,53580,45.8%,$29257386,$546,417,0.4%,0,0.0% +$300001 to $500000,69902,2.4%,$18869,$17682,69807,99.9%,$18895,$17706,$-1189,$-83014912,67040,95.9%,$-85382400,$-1274,2767,4.0%,$2367489,$856,95,0.1%,95,0.1% +$500001 to $1000000,31613,1.1%,$33653,$30882,31612,100.0%,$33653,$30883,$-2771,$-87592584,30227,95.6%,$-88131376,$-2916,1385,4.4%,$538793,$389,0,0.0%,0,0.0% +Over $1000000,20591,0.7%,$223984,$217667,20544,99.8%,$224495,$218163,$-6332,$-130075544,14264,69.3%,$-346570912,$-24296,6280,30.5%,$216495344,$34475,47,0.2%,47,0.2% +Total,2914575,100.0%,$3843,$3716,1657539,56.9%,$6752,$6529,$-223,$-369969856,1007591,34.6%,$-766659584,$-761,649948,22.3%,$396688128,$610,1257036,43.1%,1250414,42.9% diff --git a/us/states/sc/h4216_analysis/5.21_rate/test/sc_h4216_5.21_analysis.ipynb b/us/states/sc/h4216_analysis/5.21_rate/test/sc_h4216_5.21_analysis.ipynb new file mode 100644 index 0000000..d52ae84 --- /dev/null +++ b/us/states/sc/h4216_analysis/5.21_rate/test/sc_h4216_5.21_analysis.ipynb @@ -0,0 +1,586 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "cell-0", + "metadata": {}, + "source": [ + "# SC H.4216 Tax Reform Analysis - 5.21% Top Rate\n", + "\n", + "This notebook produces analysis in the same format as the RFA fiscal note for direct comparison.\n", + "\n", + "**Dataset:** `hf://policyengine/test/mar/SC.h5`\n", + "\n", + "**Reform:** H.4216 with 5.21% top rate (bill default)\n", + "\n", + "**RFA Estimate:** -$308,700,000" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "cell-1", + "metadata": {}, + "outputs": [], + "source": [ + "from policyengine_us import Microsimulation\n", + "from policyengine_us.reforms.states.sc.h4216.sc_h4216 import create_sc_h4216\n", + "from policyengine_core.reforms import Reform\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "SC_DATASET = \"hf://policyengine/test/mar/SC.h5\"\n", + "TAX_YEAR = 2026\n", + "TOP_RATE = 0.0521 # 5.21% top rate\n", + "RFA_ESTIMATE = -308700000" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "cell-2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading simulations...\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "2ea24db599374f01bda5ba7e6cd6f6dc", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "SC.h5: 0%| | 0.00/57.1M [00:00 lower\n", + " else:\n", + " mask = (agi > lower) & (agi <= upper)\n", + " \n", + " if mask.sum() == 0:\n", + " continue\n", + " \n", + " # Basic stats\n", + " est_returns = weight[mask].sum()\n", + " pct_returns = est_returns / total_weight * 100\n", + " \n", + " old_avg_tax = np.average(baseline_tax[mask], weights=weight[mask]) if est_returns > 0 else 0\n", + " new_avg_tax = np.average(reform_tax[mask], weights=weight[mask]) if est_returns > 0 else 0\n", + " \n", + " # Returns with tax change (threshold $1)\n", + " change_mask = mask & (np.abs(tax_change) > 1)\n", + " returns_with_change = weight[change_mask].sum()\n", + " pct_with_change = returns_with_change / est_returns * 100 if est_returns > 0 else 0\n", + " \n", + " if returns_with_change > 0:\n", + " old_avg_changed = np.average(baseline_tax[change_mask], weights=weight[change_mask])\n", + " new_avg_changed = np.average(reform_tax[change_mask], weights=weight[change_mask])\n", + " avg_change = np.average(tax_change[change_mask], weights=weight[change_mask])\n", + " else:\n", + " old_avg_changed = 0\n", + " new_avg_changed = 0\n", + " avg_change = 0\n", + " \n", + " total_change = (tax_change[mask] * weight[mask]).sum()\n", + " \n", + " # Tax decrease\n", + " decrease_mask = mask & (tax_change < -1)\n", + " decrease_returns = weight[decrease_mask].sum()\n", + " decrease_pct = decrease_returns / est_returns * 100 if est_returns > 0 else 0\n", + " total_decrease = (tax_change[decrease_mask] * weight[decrease_mask]).sum() if decrease_returns > 0 else 0\n", + " avg_decrease = np.average(tax_change[decrease_mask], weights=weight[decrease_mask]) if decrease_returns > 0 else 0\n", + " \n", + " # Tax increase\n", + " increase_mask = mask & (tax_change > 1)\n", + " increase_returns = weight[increase_mask].sum()\n", + " increase_pct = increase_returns / est_returns * 100 if est_returns > 0 else 0\n", + " total_increase = (tax_change[increase_mask] * weight[increase_mask]).sum() if increase_returns > 0 else 0\n", + " avg_increase = np.average(tax_change[increase_mask], weights=weight[increase_mask]) if increase_returns > 0 else 0\n", + " \n", + " # No change\n", + " no_change_mask = mask & (np.abs(tax_change) <= 1)\n", + " no_change_returns = weight[no_change_mask].sum()\n", + " no_change_pct = no_change_returns / est_returns * 100 if est_returns > 0 else 0\n", + " \n", + " # Zero tax\n", + " zero_tax_mask = mask & (reform_tax <= 0)\n", + " zero_tax_returns = weight[zero_tax_mask].sum()\n", + " zero_tax_pct = zero_tax_returns / est_returns * 100 if est_returns > 0 else 0\n", + " \n", + " results.append({\n", + " \"Federal AGI Range\": label,\n", + " \"Est # Returns\": int(round(est_returns)),\n", + " \"Est % Returns\": f\"{pct_returns:.1f}%\",\n", + " \"Old Avg Tax Liability\": f\"${int(round(old_avg_tax))}\",\n", + " \"New Avg Tax Liability\": f\"${int(round(new_avg_tax))}\",\n", + " \"Returns with Tax Change\": int(round(returns_with_change)),\n", + " \"% Returns in Range with Change\": f\"{pct_with_change:.1f}%\",\n", + " \"Old Avg Tax (Changed)\": f\"${int(round(old_avg_changed))}\",\n", + " \"New Avg Tax (Changed)\": f\"${int(round(new_avg_changed))}\",\n", + " \"Avg Tax Change\": f\"${int(round(avg_change))}\",\n", + " \"Total Dollar Change\": f\"${int(round(total_change))}\",\n", + " \"Tax Decrease # Returns\": int(round(decrease_returns)),\n", + " \"Tax Decrease % in Range\": f\"{decrease_pct:.1f}%\",\n", + " \"Total Decrease Amount\": f\"${int(round(total_decrease))}\",\n", + " \"Avg Decrease Amount\": f\"${int(round(avg_decrease))}\",\n", + " \"Tax Increase # Returns\": int(round(increase_returns)),\n", + " \"Tax Increase % in Range\": f\"{increase_pct:.1f}%\",\n", + " \"Total Increase Amount\": f\"${int(round(total_increase))}\",\n", + " \"Avg Increase Amount\": f\"${int(round(avg_increase))}\",\n", + " \"No Tax Change # Returns\": int(round(no_change_returns)),\n", + " \"No Change % Returns\": f\"{no_change_pct:.1f}%\",\n", + " \"Zero Tax # Returns\": int(round(zero_tax_returns)),\n", + " \"Zero Tax % Returns\": f\"{zero_tax_pct:.1f}%\"\n", + " })\n", + "\n", + "print(\"Bracket analysis complete!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "cell-5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Totals calculated!\n" + ] + } + ], + "source": [ + "# Calculate totals\n", + "change_mask_all = np.abs(tax_change) > 1\n", + "decrease_mask_all = tax_change < -1\n", + "increase_mask_all = tax_change > 1\n", + "no_change_mask_all = np.abs(tax_change) <= 1\n", + "zero_tax_mask_all = reform_tax <= 0\n", + "\n", + "total_old_avg = np.average(baseline_tax, weights=weight)\n", + "total_new_avg = np.average(reform_tax, weights=weight)\n", + "total_change_amount = (tax_change * weight).sum()\n", + "\n", + "returns_with_change_all = weight[change_mask_all].sum()\n", + "old_avg_changed_all = np.average(baseline_tax[change_mask_all], weights=weight[change_mask_all]) if returns_with_change_all > 0 else 0\n", + "new_avg_changed_all = np.average(reform_tax[change_mask_all], weights=weight[change_mask_all]) if returns_with_change_all > 0 else 0\n", + "avg_change_all = np.average(tax_change[change_mask_all], weights=weight[change_mask_all]) if returns_with_change_all > 0 else 0\n", + "\n", + "decrease_returns_all = weight[decrease_mask_all].sum()\n", + "total_decrease_all = (tax_change[decrease_mask_all] * weight[decrease_mask_all]).sum()\n", + "avg_decrease_all = np.average(tax_change[decrease_mask_all], weights=weight[decrease_mask_all]) if decrease_returns_all > 0 else 0\n", + "\n", + "increase_returns_all = weight[increase_mask_all].sum()\n", + "total_increase_all = (tax_change[increase_mask_all] * weight[increase_mask_all]).sum()\n", + "avg_increase_all = np.average(tax_change[increase_mask_all], weights=weight[increase_mask_all]) if increase_returns_all > 0 else 0\n", + "\n", + "no_change_returns_all = weight[no_change_mask_all].sum()\n", + "zero_tax_returns_all = weight[zero_tax_mask_all].sum()\n", + "\n", + "results.append({\n", + " \"Federal AGI Range\": \"Total\",\n", + " \"Est # Returns\": int(round(total_weight)),\n", + " \"Est % Returns\": \"100.0%\",\n", + " \"Old Avg Tax Liability\": f\"${int(round(total_old_avg))}\",\n", + " \"New Avg Tax Liability\": f\"${int(round(total_new_avg))}\",\n", + " \"Returns with Tax Change\": int(round(returns_with_change_all)),\n", + " \"% Returns in Range with Change\": f\"{returns_with_change_all / total_weight * 100:.1f}%\",\n", + " \"Old Avg Tax (Changed)\": f\"${int(round(old_avg_changed_all))}\",\n", + " \"New Avg Tax (Changed)\": f\"${int(round(new_avg_changed_all))}\",\n", + " \"Avg Tax Change\": f\"${int(round(avg_change_all))}\",\n", + " \"Total Dollar Change\": f\"${int(round(total_change_amount))}\",\n", + " \"Tax Decrease # Returns\": int(round(decrease_returns_all)),\n", + " \"Tax Decrease % in Range\": f\"{decrease_returns_all / total_weight * 100:.1f}%\",\n", + " \"Total Decrease Amount\": f\"${int(round(total_decrease_all))}\",\n", + " \"Avg Decrease Amount\": f\"${int(round(avg_decrease_all))}\",\n", + " \"Tax Increase # Returns\": int(round(increase_returns_all)),\n", + " \"Tax Increase % in Range\": f\"{increase_returns_all / total_weight * 100:.1f}%\",\n", + " \"Total Increase Amount\": f\"${int(round(total_increase_all))}\",\n", + " \"Avg Increase Amount\": f\"${int(round(avg_increase_all))}\",\n", + " \"No Tax Change # Returns\": int(round(no_change_returns_all)),\n", + " \"No Change % Returns\": f\"{no_change_returns_all / total_weight * 100:.1f}%\",\n", + " \"Zero Tax # Returns\": int(round(zero_tax_returns_all)),\n", + " \"Zero Tax % Returns\": f\"{zero_tax_returns_all / total_weight * 100:.1f}%\"\n", + "})\n", + "\n", + "df_results = pd.DataFrame(results)\n", + "print(\"Totals calculated!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "cell-6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "====================================================================================================\n", + "H.4216 - POLICYENGINE ANALYSIS (Test Dataset, 5.21% Top Rate)\n", + "====================================================================================================\n", + "\n", + "Total Returns: 2,914,574\n", + "General Fund Impact: $-369,969,856\n", + "\n", + "RFA Estimate: $-308,700,000\n", + "Difference: $-61,269,856\n", + "Accuracy: 80.2%\n", + "====================================================================================================\n" + ] + } + ], + "source": [ + "# Display summary\n", + "print(\"=\"*100)\n", + "print(f\"H.4216 - POLICYENGINE ANALYSIS (Test Dataset, {TOP_RATE*100:.2f}% Top Rate)\")\n", + "print(\"=\"*100)\n", + "print(f\"\\nTotal Returns: {int(total_weight):,}\")\n", + "print(f\"General Fund Impact: ${total_change_amount:,.0f}\")\n", + "print(f\"\\nRFA Estimate: ${RFA_ESTIMATE:,}\")\n", + "print(f\"Difference: ${total_change_amount - RFA_ESTIMATE:,.0f}\")\n", + "print(f\"Accuracy: {(1 - abs(total_change_amount - RFA_ESTIMATE) / abs(RFA_ESTIMATE)) * 100:.1f}%\")\n", + "print(\"=\"*100)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "cell-7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Exported to: pe_h4216_5.21_analysis.csv\n" + ] + } + ], + "source": [ + "# Export to CSV in RFA format\n", + "df_results.to_csv('pe_h4216_5.21_analysis.csv', index=False)\n", + "print(\"Exported to: pe_h4216_5.21_analysis.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "cell-8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "KEY METRICS:\n", + " Federal AGI Range Est # Returns Est % Returns Old Avg Tax Liability New Avg Tax Liability Total Dollar Change\n", + " $0* 373647 12.8% $0 $0 $0\n", + " $1 to $10000 373813 12.8% $0 $0 $0\n", + " $10001 to $20000 241968 8.3% $0 $9 $2240890\n", + " $20001 to $30000 255054 8.8% $73 $100 $6800610\n", + " $30001 to $40000 227861 7.8% $270 $231 $-9016051\n", + " $40001 to $50000 179336 6.2% $560 $401 $-28534652\n", + " $50001 to $75000 378636 13.0% $1134 $992 $-53582640\n", + " $75001 to $100000 228594 7.8% $2112 $1973 $-31782714\n", + " $100001 to $150000 274752 9.4% $3559 $3614 $15006415\n", + " $150001 to $200000 141758 4.9% $6562 $6767 $29099144\n", + " $200001 to $300000 117052 4.0% $10291 $10295 $482173\n", + " $300001 to $500000 69902 2.4% $18869 $17682 $-83014912\n", + "$500001 to $1000000 31613 1.1% $33653 $30882 $-87592584\n", + " Over $1000000 20591 0.7% $223984 $217667 $-130075544\n", + " Total 2914575 100.0% $3843 $3716 $-369969856\n" + ] + } + ], + "source": [ + "# Display key columns for quick comparison\n", + "display_cols = [\n", + " \"Federal AGI Range\", \"Est # Returns\", \"Est % Returns\",\n", + " \"Old Avg Tax Liability\", \"New Avg Tax Liability\", \"Total Dollar Change\"\n", + "]\n", + "print(\"\\nKEY METRICS:\")\n", + "print(df_results[display_cols].to_string(index=False))" + ] + }, + { + "cell_type": "markdown", + "id": "cell-9", + "metadata": {}, + "source": [ + "## Side-by-Side Comparison with RFA" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "cell-10", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "====================================================================================================\n", + "POLICYENGINE (Test) vs RFA COMPARISON (5.21% Rate)\n", + "====================================================================================================\n", + " AGI Range PE Returns RFA Returns PE Impact RFA Impact Diff\n", + " $0* 373,647 78,854 $0 $-671,000 $+671,000\n", + " $1 to $10000 373,813 286,253 $0 $1,653,000 $-1,653,000\n", + " $10001 to $20000 241,968 310,122 $2,240,890 $2,867,000 $-626,110\n", + " $20001 to $30000 255,054 275,560 $6,800,610 $762,000 $+6,038,610\n", + " $30001 to $40000 227,861 269,566 $-9,016,051 $-19,416,000 $+10,399,949\n", + " $40001 to $50000 179,336 234,386 $-28,534,652 $-42,568,000 $+14,033,348\n", + " $50001 to $75000 378,636 407,593 $-53,582,640 $-89,935,000 $+36,352,360\n", + " $75001 to $100000 228,594 250,437 $-31,782,714 $-48,624,000 $+16,841,286\n", + " $100001 to $150000 274,752 298,343 $15,006,415 $-26,092,000 $+41,098,415\n", + " $150001 to $200000 141,758 143,398 $29,099,144 $23,766,000 $+5,333,144\n", + " $200001 to $300000 117,052 109,340 $482,173 $3,955,000 $-3,472,827\n", + " $300001 to $500000 69,902 56,123 $-83,014,912 $-32,054,000 $-50,960,912\n", + "$500001 to $1000000 31,613 25,664 $-87,592,584 $-37,381,000 $-50,211,584\n", + " Over $1000000 20,591 11,936 $-130,075,544 $-44,989,000 $-85,086,544\n", + " Total 2,914,575 2,757,573 $-369,969,856 $-308,700,000 $-61,269,856\n", + "====================================================================================================\n" + ] + } + ], + "source": [ + "# Load RFA data\n", + "rfa_df = pd.read_csv('../rfa_h4216_5.21_analysis.csv')\n", + "\n", + "def parse_dollar(val):\n", + " if isinstance(val, str):\n", + " return float(val.replace('$', '').replace(',', '').replace('%', ''))\n", + " return val\n", + "\n", + "def parse_pct(val):\n", + " if isinstance(val, str):\n", + " return float(val.replace('%', ''))\n", + " return val\n", + "\n", + "# Create comparison\n", + "comparison = []\n", + "for idx, pe_row in df_results.iterrows():\n", + " agi_range = pe_row['Federal AGI Range']\n", + " rfa_match = rfa_df[rfa_df['Federal AGI Range'] == agi_range]\n", + " \n", + " pe_returns = pe_row['Est # Returns']\n", + " pe_impact = parse_dollar(pe_row['Total Dollar Change'])\n", + " \n", + " if len(rfa_match) > 0:\n", + " rfa_returns = rfa_match['Est # Returns'].values[0]\n", + " rfa_impact = parse_dollar(rfa_match['Total Dollar Change'].values[0])\n", + " else:\n", + " rfa_returns = 0\n", + " rfa_impact = 0\n", + " \n", + " comparison.append({\n", + " 'AGI Range': agi_range,\n", + " 'PE Returns': f\"{pe_returns:,}\",\n", + " 'RFA Returns': f\"{rfa_returns:,}\" if rfa_returns else \"N/A\",\n", + " 'PE Impact': f\"${pe_impact:,.0f}\",\n", + " 'RFA Impact': f\"${rfa_impact:,.0f}\" if rfa_impact else \"N/A\",\n", + " 'Diff': f\"${pe_impact - rfa_impact:+,.0f}\" if rfa_impact else \"N/A\"\n", + " })\n", + "\n", + "comparison_df = pd.DataFrame(comparison)\n", + "print(\"\\n\" + \"=\"*100)\n", + "print(\"POLICYENGINE (Test) vs RFA COMPARISON (5.21% Rate)\")\n", + "print(\"=\"*100)\n", + "print(comparison_df.to_string(index=False))\n", + "print(\"=\"*100)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cell-11", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "========================================================================================================================\n", + "FULL POLICYENGINE ANALYSIS (RFA Format)\n", + "========================================================================================================================\n", + " Federal AGI Range Est # Returns Est % Returns Old Avg Tax Liability New Avg Tax Liability Returns with Tax Change % Returns in Range with Change Old Avg Tax (Changed) New Avg Tax (Changed) Avg Tax Change Total Dollar Change Tax Decrease # Returns Tax Decrease % in Range Total Decrease Amount Avg Decrease Amount Tax Increase # Returns Tax Increase % in Range Total Increase Amount Avg Increase Amount No Tax Change # Returns No Change % Returns Zero Tax # Returns Zero Tax % Returns\n", + " $0* 373647 12.8% $0 $0 0 0.0% $0 $0 $0 $0 0 0.0% $0 $0 0 0.0% $0 $0 373647 100.0% 373647 100.0%\n", + " $1 to $10000 373813 12.8% $0 $0 0 0.0% $0 $0 $0 $0 0 0.0% $0 $0 0 0.0% $0 $0 373813 100.0% 373813 100.0%\n", + " $10001 to $20000 241968 8.3% $0 $9 47764 19.7% $0 $47 $47 $2240890 0 0.0% $0 $0 47764 19.7% $2240890 $47 194204 80.3% 194204 80.3%\n", + " $20001 to $30000 255054 8.8% $73 $100 123142 48.3% $146 $201 $55 $6800610 9861 3.9% $-42960 $-4 113281 44.4% $6842208 $60 131912 51.7% 129254 50.7%\n", + " $30001 to $40000 227861 7.8% $270 $231 149355 65.5% $410 $350 $-60 $-9016051 106139 46.6% $-12090502 $-114 43216 19.0% $3074185 $71 78506 34.5% 76627 33.6%\n", + " $40001 to $50000 179336 6.2% $560 $401 127890 71.3% $785 $562 $-223 $-28534652 94456 52.7% $-31997250 $-339 33434 18.6% $3462593 $104 51446 28.7% 51358 28.6%\n", + " $50001 to $75000 378636 13.0% $1134 $992 336620 88.9% $1275 $1116 $-159 $-53582640 247822 65.5% $-73675784 $-297 88798 23.5% $20093184 $226 42015 11.1% 41503 11.0%\n", + " $75001 to $100000 228594 7.8% $2112 $1973 218544 95.6% $2200 $2054 $-145 $-31782714 165436 72.4% $-49094312 $-297 53108 23.2% $17311328 $326 10050 4.4% 9209 4.0%\n", + " $100001 to $150000 274752 9.4% $3559 $3614 273870 99.7% $3567 $3621 $55 $15006415 168279 61.2% $-37706040 $-224 105591 38.4% $52712572 $499 883 0.3% 655 0.2%\n", + " $150001 to $200000 141758 4.9% $6562 $6767 141756 100.0% $6562 $6767 $205 $29099144 41012 28.9% $-13193018 $-322 100743 71.1% $42292160 $420 2 0.0% 2 0.0%\n", + " $200001 to $300000 117052 4.0% $10291 $10295 116634 99.6% $10295 $10299 $4 $482173 63054 53.9% $-28775124 $-456 53580 45.8% $29257386 $546 417 0.4% 0 0.0%\n", + " $300001 to $500000 69902 2.4% $18869 $17682 69807 99.9% $18895 $17706 $-1189 $-83014912 67040 95.9% $-85382400 $-1274 2767 4.0% $2367489 $856 95 0.1% 95 0.1%\n", + "$500001 to $1000000 31613 1.1% $33653 $30882 31612 100.0% $33653 $30883 $-2771 $-87592584 30227 95.6% $-88131376 $-2916 1385 4.4% $538793 $389 0 0.0% 0 0.0%\n", + " Over $1000000 20591 0.7% $223984 $217667 20544 99.8% $224495 $218163 $-6332 $-130075544 14264 69.3% $-346570912 $-24296 6280 30.5% $216495344 $34475 47 0.2% 47 0.2%\n", + " Total 2914575 100.0% $3843 $3716 1657539 56.9% $6752 $6529 $-223 $-369969856 1007591 34.6% $-766659584 $-761 649948 22.3% $396688128 $610 1257036 43.1% 1250414 42.9%\n" + ] + }, + { + "ename": "", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n", + "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n", + "\u001b[1;31mClick here for more info. \n", + "\u001b[1;31mView Jupyter log for further details." + ] + } + ], + "source": [ + "# Full results table\n", + "print(\"\\n\" + \"=\"*120)\n", + "print(\"FULL POLICYENGINE ANALYSIS (RFA Format)\")\n", + "print(\"=\"*120)\n", + "pd.set_option('display.max_columns', None)\n", + "pd.set_option('display.width', None)\n", + "print(df_results.to_string(index=False))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/us/states/sc/h4216_analysis/h4216_analysis_comparison.md b/us/states/sc/h4216_analysis/h4216_analysis_comparison.md new file mode 100644 index 0000000..1f834f8 --- /dev/null +++ b/us/states/sc/h4216_analysis/h4216_analysis_comparison.md @@ -0,0 +1,220 @@ +# SC H.4216 Analysis: PolicyEngine vs RFA Comparison + +## Executive Summary + +This analysis compares PolicyEngine estimates against the SC Revenue and Fiscal Affairs (RFA) fiscal note for H.4216, a tax reform bill that restructures South Carolina's income tax system. + +### Budget Impact Summary (5.21% Top Rate) + +| Source | Budget Impact | vs RFA | Accuracy | +|--------|---------------|--------|----------| +| **RFA** | **-$308.7M** | - | - | +| Test Dataset | -$370.0M | -$61.3M | 80% (20% over) | + +**Key Finding:** The policy encoding is correct. The Test dataset now closely matches RFA with 80% accuracy, slightly overestimating the tax cut. + +--- + +## H.4216 Reform Structure + +### Current SC Tax System (Baseline) +- 0% on income up to $3,640 +- 3% on income $3,640 to $18,230 +- 6% on income over $18,230 +- Taxable income = Federal Taxable Income + SC Additions - SC Subtractions + +### H.4216 Reform (5.21% Top Rate) +- 1.99% on income up to $30,000 +- 5.21% on income over $30,000 +- Taxable income = AGI - SC Subtractions - SCIAD (new deduction) +- No federal standard/itemized deductions in base + +### SCIAD (SC Individual Adjustment Deduction) + +| Filing Status | Deduction | Phase-out Start | Phase-out End | +|---------------|-----------|-----------------|---------------| +| Single | $15,000 | $40,000 AGI | $95,000 AGI | +| MFJ | $30,000 | $80,000 AGI | $190,000 AGI | +| HoH | $22,500 | $60,000 AGI | $142,500 AGI | + +--- + +## Dataset Comparison + +### Overview + +| Metric | RFA | Test | +|--------|-----|------| +| **Total Returns** | 2,757,573 | 2,914,575 (+5.7%) | +| **Millionaire Returns** | 11,936 | 20,591 (+73%) | +| **Median HH AGI** | N/A | $68,193 | +| **Avg HH AGI** | N/A | $148,865 | +| **Max AGI** | N/A | $2.38B | + +### Dataset Path +- **Test:** `hf://policyengine/test/mar/SC.h5` + +--- + +## Budget Impact by Income Bracket (5.21% Rate) + +| AGI Range | RFA | Test | Test vs RFA | +|-----------|-----|------|-------------| +| $0* | -$671K | $0 | +$671K | +| $1-$10K | +$1.7M | $0 | -$1.7M | +| $10K-$20K | +$2.9M | +$2.2M | -$0.6M | +| $20K-$30K | +$0.8M | +$6.8M | +$6.0M | +| $30K-$40K | -$19.4M | -$9.0M | +$10.4M | +| $40K-$50K | -$42.6M | -$28.5M | +$14.0M | +| $50K-$75K | -$89.9M | -$53.6M | +$36.4M | +| $75K-$100K | -$48.6M | -$31.8M | +$16.8M | +| $100K-$150K | -$26.1M | +$15.0M | +$41.1M | +| $150K-$200K | +$23.8M | +$29.1M | +$5.3M | +| $200K-$300K | +$4.0M | +$0.5M | -$3.5M | +| $300K-$500K | -$32.1M | -$83.0M | -$51.0M | +| $500K-$1M | -$37.4M | -$87.6M | -$50.2M | +| **Over $1M** | **-$45.0M** | **-$130.1M** | **-$85.1M** | +| **TOTAL** | **-$308.7M** | **-$370.0M** | **-$61.3M** | + +### Winner/Loser Distribution + +| Metric | RFA | Test | +|--------|-----|------| +| **Tax Decrease** | 42.8% | 34.6% | +| **Tax Increase** | 22.6% | 22.3% | +| **No Change** | 34.6% | 43.1% | +| **Total Decrease $** | -$522.1M | -$766.7M | +| **Total Increase $** | +$213.4M | +$396.7M | + +--- + +## Root Cause Analysis + +### 1. Millionaire Distribution (Primary Driver) + +The millionaire bracket (>$1M AGI) is the dominant driver of discrepancies: + +| Metric | RFA | Test | +|--------|-----|------| +| **Millionaire Count** | 11,936 | 20,591 (+73%) | +| **Budget Impact** | -$45.0M | -$130.1M | +| **Avg Tax Change** | -$4,031 | -$6,332 | + +**Test Dataset:** Has 73% more millionaires than RFA, contributing to larger overall tax cut estimate. + +### 2. High-Income Brackets ($300K+) + +Test dataset shows significantly larger tax cuts in high-income brackets: + +| Bracket Range | RFA Impact | Test Impact | Difference | +|---------------|------------|-------------|------------| +| $300K-$500K | -$32.1M | -$83.0M | -$51.0M | +| $500K-$1M | -$37.4M | -$87.6M | -$50.2M | +| Over $1M | -$45.0M | -$130.1M | -$85.1M | +| **Total $300K+** | **-$114.5M** | **-$300.7M** | **-$186.2M** | + +The $300K+ brackets account for most of the overestimate. + +### 3. Middle-Income Brackets ($30K-$100K) + +| Bracket Range | RFA Impact | Test Impact | Difference | +|---------------|------------|-------------|------------| +| $30K-$100K combined | -$200.5M | -$122.9M | +$77.6M | + +Test dataset underweights middle-income tax cuts, partially offsetting the high-income overestimate. + +### 4. Upper-Middle Income ($100K-$300K) + +| Bracket Range | RFA Impact | Test Impact | Difference | +|---------------|------------|-------------|------------| +| $100K-$300K | -$22.1M | +$44.6M | +$66.7M | + +Test shows tax increases in this range where RFA shows cuts, suggesting different SCIAD phase-out distributions. + +--- + +## Summary of Dataset Characteristics + +### Test Dataset +- **Slightly overestimates** tax cuts (80% accuracy) +- Has 73% more millionaires than RFA (20,591 vs 11,936) +- Higher average incomes ($149K avg HH AGI) +- Return count close to RFA (+5.7%) +- High-income brackets drive the overestimate + +### Key Differences from RFA +- More returns in $300K+ brackets +- Larger average tax cuts for high earners +- Fewer middle-income filers seeing tax cuts + +--- + +## Recommendations + +### For Data Team +1. Investigate millionaire overcount (20,591 vs 11,936 RFA) +2. Validate $300K-$1M bracket weighting +3. Compare income distributions within brackets to RFA + +### For Reporting + +| Estimate Type | Value | Source | +|---------------|-------|--------| +| Central | -$309M | RFA | +| PE Estimate | -$370M | Test Dataset (80% accuracy) | + +--- + +## File Structure + +``` +sc/ +├── data_exploration.ipynb # State dataset exploration +├── data_exploration_test.ipynb # Test dataset exploration +├── sc_dataset_summary_weighted.csv # State dataset summary stats +├── sc_test_dataset_summary_weighted.csv # Test dataset summary stats +└── h4216_analysis/ + ├── h4216_analysis_comparison.md # This file + └── 5.21_rate/ + ├── rfa_h4216_5.21_analysis.csv # RFA fiscal note data + └── test/ + ├── pe_h4216_5.21_analysis.csv + └── sc_h4216_5.21_analysis.ipynb +``` + +--- + +## Technical Notes + +### PR #7514 Fix (February 2025) + +Fixed bug where `sc_additions` (QBI and SALT addbacks) were incorrectly applied under H.4216. Since H.4216 starts from AGI (before federal deductions), addbacks are inappropriate. + +- **Before fix:** +$39.8M (wrong direction - showed revenue increase) +- **After fix:** -$370M (Test dataset) + +### Policy Parameters Location +``` +policyengine-us/policyengine_us/parameters/gov/contrib/states/sc/h4216/ +``` + +### Microsimulation Usage +```python +from policyengine_us import Microsimulation +from policyengine_us.reforms.states.sc.h4216.sc_h4216 import create_sc_h4216 +from policyengine_core.reforms import Reform + +# Create reform with 5.21% top rate +param_reform = Reform.from_dict({ + "gov.contrib.states.sc.h4216.in_effect": {"2026-01-01.2100-12-31": True}, + "gov.contrib.states.sc.h4216.rates[1].rate": {"2026-01-01.2100-12-31": 0.0521} +}, country_id="us") + +base_reform = create_sc_h4216() +reform = (base_reform, param_reform) + +sim = Microsimulation(dataset="hf://policyengine/test/mar/SC.h5", reform=reform) +``` + +### RFA Fiscal Note +- [H.4216 Fiscal Note (5.21% Rate)](https://www.scstatehouse.gov/sess126_2025-2026/fiscalimpact/H4216.pdf) diff --git a/us/states/sc/sc_dataset_summary_weighted.csv b/us/states/sc/sc_dataset_summary_weighted.csv new file mode 100644 index 0000000..6ff9465 --- /dev/null +++ b/us/states/sc/sc_dataset_summary_weighted.csv @@ -0,0 +1,22 @@ +Metric,Value +Household count (weighted),"1,887,388" +Person count (weighted),"5,451,832" +Average household size,2.9 +Weighted median household AGI,"$43,222" +Weighted average household AGI,"$103,858" +Weighted median person AGI,"$38,962" +Weighted average person AGI,"$93,926" +Unweighted median household AGI,"$41,884" +Unweighted median person AGI,"$40,216" +25th percentile household AGI,"$9,425" +75th percentile household AGI,"$91,877" +90th percentile household AGI,"$167,068" +95th percentile household AGI,"$268,311" +Max household AGI,"$6,430,892" +Total households with children,"598,564" +Households with 1 child,"247,956" +Households with 2 children,"190,545" +Households with 3+ children,"160,063" +Total children under 18,"1,198,147" +Children under 6,"349,101" +Children under 3,"169,412" diff --git a/us/states/sc/sc_test_dataset_summary_weighted.csv b/us/states/sc/sc_test_dataset_summary_weighted.csv new file mode 100644 index 0000000..02944d4 --- /dev/null +++ b/us/states/sc/sc_test_dataset_summary_weighted.csv @@ -0,0 +1,22 @@ +Metric,Value +Household count (weighted),"1,921,642" +Person count (weighted),"5,446,194" +Average household size,2.8 +Weighted median household AGI,"$68,193" +Weighted average household AGI,"$148,865" +Weighted median person AGI,"$52,025" +Weighted average person AGI,"$121,624" +Unweighted median household AGI,"$55,711" +Unweighted median person AGI,"$52,622" +25th percentile household AGI,"$22,381" +75th percentile household AGI,"$138,373" +90th percentile household AGI,"$244,734" +95th percentile household AGI,"$371,597" +Max household AGI,"$2,379,085,312" +Total households with children,"641,478" +Households with 1 child,"266,099" +Households with 2 children,"241,486" +Households with 3+ children,"133,893" +Total children under 18,"1,197,559" +Children under 6,"359,192" +Children under 3,"159,871"