diff --git a/changelog.d/370.md b/changelog.d/370.md new file mode 100644 index 000000000..dd421dc13 --- /dev/null +++ b/changelog.d/370.md @@ -0,0 +1 @@ +Add LA-level main residence value calibration targets for all 360 UK local authorities, built from directly observed indicators (HMLR UK HPI × English Housing Survey ownership share × Census household count) and wired into the LA reweighter alongside the existing tenure and rent targets. diff --git a/policyengine_uk_data/datasets/local_areas/local_authorities/loss.py b/policyengine_uk_data/datasets/local_areas/local_authorities/loss.py index fd5ed9440..a58b66bd2 100644 --- a/policyengine_uk_data/datasets/local_areas/local_authorities/loss.py +++ b/policyengine_uk_data/datasets/local_areas/local_authorities/loss.py @@ -11,6 +11,7 @@ - ONS income: ONS small area income estimates - Tenure: English Housing Survey - Private rent: VOA/ONS private rental market statistics +- Main residence value: HMLR UK HPI × ownership share × household count """ from policyengine_uk import Microsimulation @@ -38,6 +39,7 @@ load_tenure_data, load_private_rents, ) +from policyengine_uk_data.targets.sources.la_land import load_la_avg_prices def create_local_authority_target_matrix( @@ -252,6 +254,44 @@ def create_local_authority_target_matrix( national_rent * la_household_share, ) + # ── Main residence value (HMLR × ownership share × households) ─ + # Mirrors the private-rent target pattern: directly observed + # LA-level housing indicators multiplied together, with a + # national-share fallback for LAs missing any input. + la_prices = load_la_avg_prices() + tenure_merged = tenure_merged.merge( + la_prices[["code", "avg_house_price"]], on="code", how="left" + ) + + matrix["housing/main_residence_value"] = sim.calculate( + "main_residence_value" + ).values + + ownership_share_la = ( + tenure_merged["owned_outright_pct"].fillna(0) + + tenure_merged["owned_mortgage_pct"].fillna(0) + ) / 100 + tenure_merged["main_residence_value_target"] = ( + tenure_merged["avg_house_price"] + * ownership_share_la + * tenure_merged["households"] + ) + + has_property = ( + tenure_merged["avg_house_price"].notna() + & tenure_merged["owned_outright_pct"].notna() + & tenure_merged["households"].notna() + ).values + national_property = ( + original_weights * matrix["housing/main_residence_value"].values + ).sum() + + y["housing/main_residence_value"] = np.where( + has_property, + tenure_merged["main_residence_value_target"].values, + national_property * la_household_share, + ) + # ── Country mask ─────────────────────────────────────────────── country_mask = create_country_mask( household_countries=sim.calculate("country").values, diff --git a/policyengine_uk_data/storage/la_land_values.csv b/policyengine_uk_data/storage/la_land_values.csv new file mode 100644 index 000000000..ef0023a32 --- /dev/null +++ b/policyengine_uk_data/storage/la_land_values.csv @@ -0,0 +1,361 @@ +code,name,households,avg_house_price +E06000001,Hartlepool,42687,132463 +E06000002,Middlesbrough,62873,141270 +E06000003,Redcar and Cleveland,64597,146735 +E06000004,Stockton-on-Tees,88187,170575 +E06000005,Darlington,50525,159542 +E06000006,Halton,58738,189414 +E06000007,Warrington,96224,251903 +E06000008,Blackburn with Darwen,61298,162893 +E06000009,Blackpool,69328,136485 +E06000010,"Kingston upon Hull, City of",119535,131323 +E06000011,East Riding of Yorkshire,160848,221028 +E06000012,North East Lincolnshire,72731,148043 +E06000013,North Lincolnshire,77080,180202 +E06000014,York,90598,306571 +E06000015,Derby,110196,205530 +E06000016,Leicester,134204,226491 +E06000017,Rutland,18295,318903 +E06000018,Nottingham,131657,193888 +E06000019,"Herefordshire, County of",86625,287302 +E06000020,Telford and Wrekin,79145,215752 +E06000021,Stoke-on-Trent,115950,147831 +E06000022,Bath and North East Somerset,84855,407049 +E06000023,"Bristol, City of",205211,353265 +E06000024,North Somerset,102261,312318 +E06000025,South Gloucestershire,129657,332736 +E06000026,Plymouth,126166,218085 +E06000027,Torbay,66499,232041 +E06000030,Swindon,100125,260905 +E06000031,Peterborough,88023,231757 +E06000032,Luton,84690,281149 +E06000033,Southend-on-Sea,80611,333356 +E06000034,Thurrock,70478,325936 +E06000035,Medway,118085,298520 +E06000036,Bracknell Forest,52392,394751 +E06000037,West Berkshire,70169,400144 +E06000038,Reading,70322,354805 +E06000039,Slough,54550,337206 +E06000040,Windsor and Maidenhead,64836,572852 +E06000041,Wokingham,72272,503052 +E06000042,Milton Keynes,119789,328697 +E06000043,Brighton and Hove,126374,410203 +E06000044,Portsmouth,93797,249460 +E06000045,Southampton,107827,233920 +E06000046,Isle of Wight,71945,247936 +E06000047,County Durham,253106,143291 +E06000049,Cheshire East,185025,306039 +E06000050,Cheshire West and Chester,162209,265955 +E06000051,Shropshire,146609,281161 +E06000052,Cornwall,265515,277318 +E06000053,Isles of Scilly,1115,308582 +E06000054,Wiltshire,230273,332483 +E06000055,Bedford,82624,331140 +E06000056,Central Bedfordshire,129846,358488 +E06000057,Northumberland,154547,214989 +E06000058,"Bournemouth, Christchurch and Poole",180181,309673 +E06000059,Dorset,180376,332378 +E06000060,Buckinghamshire,237680,487653 +E06000061,North Northamptonshire,164829,258515 +E06000062,West Northamptonshire,180295,294010 +E06000063,Cumberland,91136,174281 +E06000064,Westmorland and Furness,94053,227777 +E06000065,North Yorkshire,127165,272111 +E06000066,Somerset,107037,278440 +E07000008,Cambridge,55946,485985 +E07000009,East Cambridgeshire,39423,357866 +E07000010,Fenland,46560,234696 +E07000011,Huntingdonshire,81045,310990 +E07000012,South Cambridgeshire,71683,433729 +E07000032,Amber Valley,59550,234786 +E07000033,Bolsover,37016,185199 +E07000034,Chesterfield,49895,200389 +E07000035,Derbyshire Dales,34180,344946 +E07000036,Erewash,53328,219887 +E07000037,High Peak,42696,262663 +E07000038,North East Derbyshire,48876,249829 +E07000039,South Derbyshire,47323,257691 +E07000040,East Devon,69365,343715 +E07000041,Exeter,54901,283194 +E07000042,Mid Devon,37599,299716 +E07000043,North Devon,44797,286805 +E07000044,South Hams,41310,368805 +E07000045,Teignbridge,62939,297853 +E07000046,Torridge,31384,269713 +E07000047,West Devon,25342,308836 +E07000061,Eastbourne,48596,251894 +E07000062,Hastings,42726,240579 +E07000063,Lewes,45592,354787 +E07000064,Rother,44092,340936 +E07000065,Wealden,69502,399752 +E07000066,Basildon,80242,362898 +E07000067,Braintree,69538,332140 +E07000068,Brentwood,33376,520013 +E07000069,Castle Point,39330,365893 +E07000070,Chelmsford,77893,383278 +E07000071,Colchester,84604,300310 +E07000072,Epping Forest,57631,549606 +E07000073,Harlow,40148,314356 +E07000074,Maldon,29084,395716 +E07000075,Rochford,37856,407373 +E07000076,Tendring,73646,268088 +E07000077,Uttlesford,38874,490567 +E07000078,Cheltenham,55163,336877 +E07000079,Cotswold,42623,415906 +E07000080,Forest of Dean,39023,299003 +E07000081,Gloucester,57916,236963 +E07000082,Stroud,54475,333064 +E07000083,Tewkesbury,43553,331405 +E07000084,Basingstoke and Deane,82589,373985 +E07000085,East Hampshire,55752,440091 +E07000086,Eastleigh,60138,311946 +E07000087,Fareham,52074,332945 +E07000088,Gosport,37662,230613 +E07000089,Hart,43057,495632 +E07000090,Havant,55546,316941 +E07000091,New Forest,88338,392680 +E07000092,Rushmoor,42222,332762 +E07000093,Test Valley,58786,370524 +E07000094,Winchester,56541,465183 +E07000095,Broxbourne,41490,407544 +E07000096,Dacorum,67995,467070 +E07000098,Hertsmere,46391,552787 +E07000099,North Hertfordshire,59481,419688 +E07000102,Three Rivers,38141,585189 +E07000103,Watford,42014,392605 +E07000105,Ashford,55344,347180 +E07000106,Canterbury,67957,335342 +E07000107,Dartford,47304,356253 +E07000108,Dover,52754,293719 +E07000109,Gravesham,44362,347503 +E07000110,Maidstone,76476,358161 +E07000111,Sevenoaks,52383,535114 +E07000112,Folkestone and Hythe,50991,310400 +E07000113,Swale,63290,289925 +E07000114,Thanet,65339,263646 +E07000115,Tonbridge and Malling,56505,406667 +E07000116,Tunbridge Wells,51447,465399 +E07000117,Burnley,41847,131476 +E07000118,Chorley,52608,208587 +E07000119,Fylde,39584,227834 +E07000120,Hyndburn,37173,135525 +E07000121,Lancaster,63938,200827 +E07000122,Pendle,39879,146756 +E07000123,Preston,63040,189448 +E07000124,Ribble Valley,28002,280677 +E07000125,Rossendale,31819,197028 +E07000126,South Ribble,51099,207382 +E07000127,West Lancashire,51070,229049 +E07000128,Wyre,53177,193044 +E07000129,Blaby,45683,295289 +E07000130,Charnwood,75849,275442 +E07000131,Harborough,42531,344277 +E07000132,Hinckley and Bosworth,52155,261427 +E07000133,Melton,23700,290884 +E07000134,North West Leicestershire,46330,287928 +E07000135,Oadby and Wigston,23560,271073 +E07000136,Boston,30365,192397 +E07000137,East Lindsey,67863,218674 +E07000138,Lincoln,44186,186704 +E07000139,North Kesteven,53366,244502 +E07000140,South Holland,42889,227339 +E07000141,South Kesteven,66371,255822 +E07000142,West Lindsey,44831,212770 +E07000143,Breckland,63896,277510 +E07000144,Broadland,59286,315312 +E07000145,Great Yarmouth,46015,208770 +E07000146,King's Lynn and West Norfolk,72550,268435 +E07000147,North Norfolk,51309,288661 +E07000148,Norwich,66263,222723 +E07000149,South Norfolk,64294,312743 +E07000170,Ashfield,56709,190280 +E07000171,Bassetlaw,54577,207390 +E07000172,Broxtowe,51864,256619 +E07000173,Gedling,55718,249519 +E07000174,Mansfield,50295,190540 +E07000175,Newark and Sherwood,58096,240202 +E07000176,Rushcliffe,53065,333758 +E07000177,Cherwell,71480,355186 +E07000178,Oxford,58982,480531 +E07000179,South Oxfordshire,64446,462111 +E07000180,Vale of White Horse,62792,411832 +E07000181,West Oxfordshire,51488,422389 +E07000192,Cannock Chase,45724,234415 +E07000193,East Staffordshire,54623,223975 +E07000194,Lichfield,50572,326691 +E07000195,Newcastle-under-Lyme,57040,202139 +E07000196,South Staffordshire,50237,295861 +E07000197,Stafford,66115,265685 +E07000198,Staffordshire Moorlands,45583,216108 +E07000199,Tamworth,34257,233451 +E07000200,Babergh,42926,331953 +E07000202,Ipswich,64321,221494 +E07000203,Mid Suffolk,49895,319139 +E07000207,Elmbridge,61225,743009 +E07000208,Epsom and Ewell,33649,545227 +E07000209,Guildford,60529,523409 +E07000210,Mole Valley,40275,557926 +E07000211,Reigate and Banstead,64563,466628 +E07000212,Runnymede,36674,480402 +E07000213,Spelthorne,46050,440782 +E07000214,Surrey Heath,38463,462511 +E07000215,Tandridge,37661,495400 +E07000216,Waverley,55267,555732 +E07000217,Woking,43207,433269 +E07000218,North Warwickshire,29186,269540 +E07000219,Nuneaton and Bedworth,59777,234032 +E07000220,Rugby,49396,275985 +E07000221,Stratford-on-Avon,62587,390081 +E07000222,Warwick,66686,366063 +E07000223,Adur,29062,370708 +E07000224,Arun,76206,325042 +E07000225,Chichester,57159,441151 +E07000226,Crawley,47401,336354 +E07000227,Horsham,64939,441285 +E07000228,Mid Sussex,66723,436743 +E07000229,Worthing,53504,308125 +E07000234,Bromsgrove,43061,333847 +E07000235,Malvern Hills,36187,338287 +E07000236,Redditch,38123,248001 +E07000237,Worcester,47138,249218 +E07000238,Wychavon,61914,333519 +E07000239,Wyre Forest,47963,234081 +E07000240,St Albans,61308,609646 +E07000241,Welwyn Hatfield,48609,443795 +E07000242,East Hertfordshire,64958,460270 +E07000243,Stevenage,38761,323392 +E07000244,East Suffolk,118154,283660 +E07000245,West Suffolk,81715,295650 +E08000001,Bolton,123006,200491 +E08000002,Bury,83538,237721 +E08000003,Manchester,230104,257630 +E08000004,Oldham,96814,213245 +E08000005,Rochdale,93421,209799 +E08000006,Salford,120347,226427 +E08000007,Stockport,134807,306235 +E08000008,Tameside,106293,211680 +E08000009,Trafford,99920,378514 +E08000010,Wigan,150180,191180 +E08000011,Knowsley,68335,190405 +E08000012,Liverpool,215816,184804 +E08000013,St. Helens,86307,179209 +E08000014,Sefton,130413,220702 +E08000015,Wirral,154379,218516 +E08000016,Barnsley,111728,172458 +E08000017,Doncaster,140435,173424 +E08000018,Rotherham,120506,191161 +E08000019,Sheffield,245451,220445 +E08000021,Newcastle upon Tyne,127262,207936 +E08000022,North Tyneside,100515,202840 +E08000023,South Tyneside,71355,165647 +E08000024,Sunderland,129173,146527 +E08000025,Birmingham,443632,232844 +E08000026,Coventry,141539,226361 +E08000027,Dudley,146542,227378 +E08000028,Sandwell,135966,200069 +E08000029,Solihull,93737,328744 +E08000030,Walsall,121137,215676 +E08000031,Wolverhampton,106933,213273 +E08000032,Bradford,218386,189396 +E08000033,Calderdale,95154,186573 +E08000034,Kirklees,185181,205944 +E08000035,Leeds,378060,246293 +E08000036,Wakefield,159785,199323 +E08000037,Gateshead,93642,151480 +E09000001,City of London,5133,740433 +E09000002,Barking and Dagenham,76891,353512 +E09000003,Barnet,156752,594093 +E09000004,Bexley,102241,410346 +E09000005,Brent,130863,568171 +E09000006,Bromley,142895,535306 +E09000007,Camden,94816,783812 +E09000008,Croydon,163059,402126 +E09000009,Ealing,145231,575503 +E09000010,Enfield,125214,471381 +E09000011,Greenwich,119526,474935 +E09000012,Hackney,114015,614552 +E09000013,Hammersmith and Fulham,83515,713773 +E09000014,Haringey,106436,626807 +E09000015,Harrow,92681,530409 +E09000016,Havering,108202,452231 +E09000017,Hillingdon,116295,477979 +E09000018,Hounslow,113871,519639 +E09000019,Islington,98568,699726 +E09000020,Kensington and Chelsea,70165,1178497 +E09000021,Kingston upon Thames,70208,573027 +E09000022,Lambeth,138311,538500 +E09000023,Lewisham,127327,493356 +E09000024,Merton,83414,601814 +E09000025,Newham,122280,405619 +E09000026,Redbridge,112886,495269 +E09000027,Richmond upon Thames,84981,777164 +E09000028,Southwark,134900,589636 +E09000029,Sutton,86112,453058 +E09000030,Tower Hamlets,123601,463527 +E09000031,Waltham Forest,109286,525738 +E09000032,Wandsworth,141843,689285 +E09000033,Westminster,100112,880389 +N09000001,Antrim and Newtownabbey,83744,197918 +N09000002,"Armagh City, Banbridge and Craigavon",87066,179907 +N09000003,Belfast,87441,178459 +N09000004,Causeway Coast and Glens,85707,213957 +N09000005,Derry City and Strabane,86420,177589 +N09000006,Fermanagh and Omagh,87392,194970 +N09000007,Lisburn and Castlereagh,89622,231628 +N09000008,Mid and East Antrim,84384,173261 +N09000009,Mid Ulster,85900,189185 +N09000010,"Newry, Mourne and Down",88089,218595 +S12000005,Clackmannanshire,85677,171785 +S12000006,Dumfries and Galloway,85940,163620 +S12000008,East Ayrshire,82669,131065 +S12000010,East Lothian,82119,280390 +S12000011,East Renfrewshire,84440,297395 +S12000013,Na h-Eileanan Siar,86183,139148 +S12000014,Falkirk,86048,171236 +S12000017,Highland,90706,216711 +S12000018,Inverclyde,83811,113267 +S12000019,Midlothian,83766,286803 +S12000020,Moray,84248,197451 +S12000021,North Ayrshire,84155,134830 +S12000023,Orkney Islands,83272,229610 +S12000026,Scottish Borders,82091,182102 +S12000027,Shetland Islands,83113,201503 +S12000028,South Ayrshire,84785,173377 +S12000029,South Lanarkshire,95010,186880 +S12000030,Stirling,84552,228054 +S12000033,Aberdeen City,86478,133119 +S12000034,Aberdeenshire,90347,202362 +S12000035,Argyll and Bute,85250,186309 +S12000036,City of Edinburgh,112532,293243 +S12000038,Renfrewshire,83979,160277 +S12000039,West Dunbartonshire,84070,131097 +S12000040,West Lothian,85547,219317 +S12000041,Angus,84966,174680 +S12000042,Dundee City,95901,141246 +S12000045,East Dunbartonshire,84536,262223 +S12000047,Fife,100135,177750 +S12000048,Perth and Kinross,84587,228534 +S12000049,Glasgow City,131728,189093 +S12000050,North Lanarkshire,92029,158859 +W06000001,Isle of Anglesey,39750,242141 +W06000002,Gwynedd,64757,196260 +W06000003,Conwy,64876,211833 +W06000004,Denbighshire,51535,195430 +W06000005,Flintshire,81654,213990 +W06000006,Wrexham,70846,206799 +W06000008,Ceredigion,39495,233722 +W06000009,Pembrokeshire,68590,213226 +W06000010,Carmarthenshire,98615,196607 +W06000011,Swansea,125018,208872 +W06000012,Neath Port Talbot,76913,160856 +W06000013,Bridgend,75306,208808 +W06000014,Vale of Glamorgan,72758,299757 +W06000015,Cardiff,162839,271273 +W06000016,Rhondda Cynon Taf,117872,162675 +W06000018,Caerphilly,93171,196048 +W06000019,Blaenau Gwent,38230,142090 +W06000020,Torfaen,50955,189702 +W06000021,Monmouthshire,52772,335746 +W06000022,Newport,80610,226573 +W06000023,Powys,76021,229762 +W06000024,Merthyr Tydfil,32578,143596 diff --git a/policyengine_uk_data/targets/sources/la_land.py b/policyengine_uk_data/targets/sources/la_land.py new file mode 100644 index 000000000..193f8dedb --- /dev/null +++ b/policyengine_uk_data/targets/sources/la_land.py @@ -0,0 +1,116 @@ +"""LA-level main residence value targets. + +Each local authority's target is built from directly observed LA-level +housing indicators, mirroring the existing private-rent calibration: + + target_la = avg_house_price_la × ownership_share_la × n_households_la + +This is the symmetric counterpart of the rent target for the +owner-occupier side. No national-total apportionment. + +Data sources: +- Average house price by LA: HM Land Registry UK HPI (Dec 2025). + For LAs whose ONS code changed between releases, the CSV matches on + LA name. For Northern Ireland LGDs missing from a specific month, + the NI country-level HPI price is used as a fallback. +- Ownership share by LA: English Housing Survey, via load_tenure_data + (owned_outright_pct + owned_mortgage_pct). +- Households by LA: Census 2021, via load_household_counts. +""" + +import pandas as pd + +from policyengine_uk_data.targets.schema import ( + GeographicLevel, + Target, + Unit, +) +from policyengine_uk_data.targets.sources._common import STORAGE + + +_REF_URL_HMLR = ( + "https://www.gov.uk/government/statistical-data-sets/" + "uk-house-price-index-data-downloads-december-2025" +) + + +def load_la_avg_prices() -> pd.DataFrame: + """Load HMLR average house price by LA. + + Returns DataFrame with columns: code, name, avg_house_price. + """ + csv_path = STORAGE / "la_land_values.csv" + if not csv_path.exists(): + return pd.DataFrame(columns=["code", "name", "avg_house_price"]) + df = pd.read_csv(csv_path) + return df[["code", "name", "avg_house_price"]] + + +def _compute_la_targets() -> dict[str, float]: + """Per-LA main residence value target. + + target_la = avg_house_price_la × ownership_share_la × n_households_la + + Returns a dict ``code -> £``. LAs missing any input drop out and + are handled in loss.py by the national-share fallback (same + pattern as the tenure and rent targets). + """ + from policyengine_uk_data.targets.sources.local_la_extras import ( + load_household_counts, + load_tenure_data, + ) + + prices = load_la_avg_prices() + tenure = load_tenure_data() + households = load_household_counts() + + if prices.empty or tenure.empty or households.empty: + return {} + + merged = prices.merge(tenure, left_on="code", right_on="la_code", how="left").merge( + households, on="la_code", how="left" + ) + + ownership_share = ( + merged["owned_outright_pct"].fillna(0) + merged["owned_mortgage_pct"].fillna(0) + ) / 100 + targets = merged["avg_house_price"] * ownership_share * merged["households"] + + return { + code: float(value) + for code, value in zip(merged["code"], targets) + if pd.notna(value) and value > 0 + } + + +def get_targets() -> list[Target]: + prices = load_la_avg_prices() + if prices.empty: + return [] + + la_targets = _compute_la_targets() + + targets: list[Target] = [] + for _, row in prices.iterrows(): + code = row["code"] + target_value = la_targets.get(code) + if target_value is None: + continue + # HMLR Dec 2025 snapshot; same value across calibration years + # until a year-varying HMLR series is wired in. + values = {year: target_value for year in (2024, 2025, 2026)} + targets.append( + Target( + name=f"housing/main_residence_value/{code}", + variable="main_residence_value", + source="hmlr", + unit=Unit.GBP, + geographic_level=GeographicLevel.LOCAL_AUTHORITY, + geo_code=code, + geo_name=row["name"], + values=values, + reference_url=_REF_URL_HMLR, + ) + ) + + return targets diff --git a/policyengine_uk_data/tests/test_la_land_value_targets.py b/policyengine_uk_data/tests/test_la_land_value_targets.py new file mode 100644 index 000000000..3942cef4d --- /dev/null +++ b/policyengine_uk_data/tests/test_la_land_value_targets.py @@ -0,0 +1,216 @@ +"""Tests for LA-level main residence value calibration targets. + +Targets are built from directly observed LA-level housing indicators +(HMLR avg house price × English Housing Survey ownership share × Census +household count), mirroring the existing private-rent target. No +national-total apportionment. +""" + +import pandas as pd + +from policyengine_uk_data.targets.schema import GeographicLevel +from policyengine_uk_data.targets.sources._common import STORAGE +from policyengine_uk_data.targets.sources.la_land import ( + _compute_la_targets, + get_targets, + load_la_avg_prices, +) +from policyengine_uk_data.targets.sources.local_la_extras import ( + load_household_counts, + load_tenure_data, +) + + +LA_PRICES = load_la_avg_prices() +LA_TARGETS = _compute_la_targets() + + +# ── CSV data quality ───────────────────────────────────────────────── + + +def test_csv_row_count_matches_la_code_list(): + """la_land_values.csv should have the same 360 LAs as local_authorities_2021.csv.""" + la_codes = pd.read_csv(STORAGE / "local_authorities_2021.csv") + raw = pd.read_csv(STORAGE / "la_land_values.csv") + assert len(raw) == len(la_codes) + assert set(raw["code"]) == set(la_codes["code"]) + + +def test_csv_columns_match_schema(): + """CSV should have exactly the columns code, name, households, avg_house_price.""" + raw = pd.read_csv(STORAGE / "la_land_values.csv") + assert list(raw.columns) == ["code", "name", "households", "avg_house_price"] + + +def test_csv_no_missing_values(): + """No LA should have NaN in any column.""" + raw = pd.read_csv(STORAGE / "la_land_values.csv") + assert not raw.isna().any().any() + + +def test_csv_covers_all_four_countries(): + """All four UK countries (E/W/S/NI) should appear.""" + prefixes = LA_PRICES["code"].str[0].unique() + assert set(prefixes) == {"E", "W", "S", "N"} + + +def test_house_prices_within_plausible_range(): + """Avg house prices should be between £50k and £2m per LA.""" + for _, row in LA_PRICES.iterrows(): + assert 50_000 <= row["avg_house_price"] <= 2_000_000, ( + f"{row['name']}: avg_house_price £{row['avg_house_price']:,} " + "outside plausible range" + ) + + +def test_csv_households_within_plausible_range(): + """Smallest UK billing authority (Isles of Scilly) has ~1,100 + households; largest (Birmingham) has ~450,000. The CSV `households` + column is retained as a regression fixture for the IoS fallback leak + even though the calibration target uses Census counts. + """ + raw = pd.read_csv(STORAGE / "la_land_values.csv") + out_of_range = raw[~raw["households"].between(500, 500_000)] + assert out_of_range.empty, ( + "CSV households out of plausible [500, 500_000] range: " + f"{out_of_range[['code', 'name', 'households']].to_dict('records')}" + ) + + +def test_isles_of_scilly_households_are_thousands_not_millions(): + """Explicit regression for the IoS fallback leak (was 2,492,115).""" + raw = pd.read_csv(STORAGE / "la_land_values.csv") + ios = raw[raw["code"] == "E06000053"] + assert len(ios) == 1 + hh = int(ios["households"].iloc[0]) + assert 500 <= hh <= 5_000, ( + f"Isles of Scilly households = {hh:,}; ONS mid-2023 estimate is ~1,115" + ) + + +# ── Target value constraints ───────────────────────────────────────── + + +def test_targets_match_observed_product(): + """Every target equals avg_price × ownership_share × n_households exactly. + + No national-total apportionment, no rescaling: just the directly + observed product, identical in shape to the rent target. + """ + prices = LA_PRICES.set_index("code")["avg_house_price"] + tenure = load_tenure_data().set_index("la_code") + households = load_household_counts().set_index("la_code")["households"] + + for code, target in LA_TARGETS.items(): + if code not in tenure.index or code not in households.index: + continue + ownership = ( + tenure.loc[code, "owned_outright_pct"] + + tenure.loc[code, "owned_mortgage_pct"] + ) / 100 + expected = prices.loc[code] * ownership * households.loc[code] + assert abs(target - expected) < 1e-3, ( + f"{code}: target {target:,.2f} != expected {expected:,.2f}" + ) + + +def test_all_targets_positive(): + """Every per-LA target should be positive.""" + assert all(value > 0 for value in LA_TARGETS.values()) + + +def test_explicit_targets_cover_english_las(): + """Direct-formula targets are produced for LAs with EHS tenure data + (England). Wales, Scotland and Northern Ireland LAs are handled by + the national-share fallback in loss.py — same as the existing + tenure target, by construction.""" + prefixes = {code[0] for code in LA_TARGETS} + assert prefixes == {"E"}, ( + f"Expected English-only targets from EHS data, got {sorted(prefixes)}" + ) + + +def test_kensington_and_chelsea_above_blackpool(): + """K&C aggregate main-residence-value target should exceed Blackpool's.""" + name_to_code = dict(zip(LA_PRICES["name"], LA_PRICES["code"])) + kc = LA_TARGETS[name_to_code["Kensington and Chelsea"]] + bp = LA_TARGETS[name_to_code["Blackpool"]] + assert kc > bp, ( + f"K&C target (£{kc / 1e9:.1f}bn) should exceed Blackpool (£{bp / 1e9:.1f}bn)" + ) + + +def test_london_total_exceeds_north_east(): + """Sum of London LA targets should exceed sum of North-East LA targets.""" + london_codes = [c for c in LA_TARGETS if c.startswith("E09")] + ne_prefixes = { + "E06000001", + "E06000002", + "E06000003", + "E06000004", + "E06000005", + "E06000047", + "E08000021", + "E08000022", + "E08000023", + "E08000024", + "E08000037", + "E06000057", + } + ne_codes = [c for c in LA_TARGETS if c in ne_prefixes] + london_total = sum(LA_TARGETS[c] for c in london_codes) + ne_total = sum(LA_TARGETS[c] for c in ne_codes) + assert london_total > ne_total * 3, ( + f"London total (£{london_total / 1e9:.0f}bn) should exceed " + f"NE total (£{ne_total / 1e9:.0f}bn) by at least 3x" + ) + + +# ── Target registry integration ────────────────────────────────────── + + +def test_get_targets_returns_targets_for_covered_las(): + """get_targets() returns one Target per LA with all inputs available.""" + targets = get_targets() + assert len(targets) == len(LA_TARGETS) + assert {t.geo_code for t in targets} == set(LA_TARGETS) + + +def test_target_names_follow_code_pattern(): + """Target names should follow the housing/main_residence_value/{code} pattern.""" + for t in get_targets(): + assert t.name.startswith("housing/main_residence_value/") + assert t.name.removeprefix("housing/main_residence_value/") == t.geo_code + + +def test_targets_declare_local_authority_geographic_level(): + """All LA targets should be tagged with GeographicLevel.LOCAL_AUTHORITY.""" + for t in get_targets(): + assert t.geographic_level == GeographicLevel.LOCAL_AUTHORITY + + +def test_targets_declare_hmlr_source(): + """LA property-value targets are sourced from HMLR UK HPI.""" + for t in get_targets(): + assert t.source == "hmlr" + + +def test_targets_have_calibration_year_values(): + """LA targets should carry values for the supported calibration years.""" + for t in get_targets(): + assert {2024, 2025, 2026} <= set(t.values) + + +def test_target_registry_includes_la_targets(): + """LA property-value targets should appear in the global registry.""" + from policyengine_uk_data.targets import get_all_targets + + targets = get_all_targets( + year=2024, geographic_level=GeographicLevel.LOCAL_AUTHORITY + ) + la_property = [ + t for t in targets if t.name.startswith("housing/main_residence_value/") + ] + assert len(la_property) == len(LA_TARGETS), ( + f"Expected {len(LA_TARGETS)} LA property-value targets, got {len(la_property)}" + ) diff --git a/policyengine_uk_data/tests/test_la_loss_land_value.py b/policyengine_uk_data/tests/test_la_loss_land_value.py new file mode 100644 index 000000000..48bcfcb68 --- /dev/null +++ b/policyengine_uk_data/tests/test_la_loss_land_value.py @@ -0,0 +1,254 @@ +"""Tests for the LA-level main-residence-value column wired into the +local-authority calibration loss matrix. + +Two layers: + +1. Light-weight checks against the per-LA target dict from la_land.py — + these run without a Microsimulation and exercise the ordering / + shape properties the loss-matrix code relies on. +2. Full ``create_local_authority_target_matrix`` build, gated on the + enhanced FRS fixture so CI environments without the dataset skip + gracefully. +""" + +import numpy as np +import pandas as pd + +from policyengine_uk_data.storage import STORAGE_FOLDER +from policyengine_uk_data.targets.sources.la_land import ( + _compute_la_targets, + load_la_avg_prices, +) +from policyengine_uk_data.targets.sources.local_la_extras import ( + load_household_counts, + load_tenure_data, +) + + +LA_CODES = pd.read_csv(STORAGE_FOLDER / "local_authorities_2021.csv") +LA_TARGETS = _compute_la_targets() + + +# ── Layer 1: per-LA targets line up with the LA code ordering ──────── + + +def test_explicit_targets_cover_english_las(): + """Direct-formula targets are produced for LAs with EHS tenure data + (England). Other UK countries fall through to the national-share + fallback in loss.py — same as the existing tenure target.""" + prefixes = {code[0] for code in LA_TARGETS} + assert prefixes == {"E"} + + +def test_target_vector_in_la_codes_order_is_finite_positive_where_present(): + """Reindexing by la_codes order yields a clean float vector for + LAs with a target; LAs missing inputs become NaN (later filled by + the national-share fallback inside loss.py).""" + vec = LA_CODES["code"].map(LA_TARGETS).values + finite = vec[~np.isnan(vec.astype(float))] + assert len(vec) == 360 + assert (finite > 0).all() + + +def test_targets_match_observed_product_inline(): + """Per-LA target equals avg_price × ownership_share × n_households — + the same shape as private rent's ``median_rent × renter_pct × n_hh``. + """ + prices = load_la_avg_prices().set_index("code")["avg_house_price"] + tenure = load_tenure_data().set_index("la_code") + households = load_household_counts().set_index("la_code")["households"] + + for code, target in LA_TARGETS.items(): + if code not in tenure.index or code not in households.index: + continue + ownership = ( + tenure.loc[code, "owned_outright_pct"] + + tenure.loc[code, "owned_mortgage_pct"] + ) / 100 + expected = prices.loc[code] * ownership * households.loc[code] + assert abs(target - expected) < 1e-3 + + +# ── Layer 2: full LA loss matrix build ─────────────────────────────── + + +def test_la_loss_matrix_includes_main_residence_value(enhanced_frs): + """The LA target matrix must expose housing/main_residence_value in + both matrix (per-household) and y (per-LA) so the calibrator can + train on it.""" + from policyengine_uk_data.datasets.local_areas.local_authorities.loss import ( + create_local_authority_target_matrix, + ) + + matrix, y, _ = create_local_authority_target_matrix( + enhanced_frs, time_period=enhanced_frs.time_period + ) + + assert "housing/main_residence_value" in matrix.columns + assert "housing/main_residence_value" in y.columns + + +def test_la_loss_y_vector_length_360(enhanced_frs): + """y has one entry per LA and matches local_authorities_2021.csv ordering + by length.""" + from policyengine_uk_data.datasets.local_areas.local_authorities.loss import ( + create_local_authority_target_matrix, + ) + + _, y, _ = create_local_authority_target_matrix( + enhanced_frs, time_period=enhanced_frs.time_period + ) + + assert len(y) == 360 + assert len(y["housing/main_residence_value"]) == 360 + + +def test_la_loss_y_matches_observed_product_for_covered_las(enhanced_frs): + """For LAs with all inputs present, y equals avg_price × ownership × n_households. + + LAs missing inputs use the national-share fallback (covered in + test_la_loss_y_all_positive).""" + from policyengine_uk_data.datasets.local_areas.local_authorities.loss import ( + create_local_authority_target_matrix, + ) + + _, y, _ = create_local_authority_target_matrix( + enhanced_frs, time_period=enhanced_frs.time_period + ) + + expected_by_code = LA_TARGETS + for i, code in enumerate(LA_CODES["code"].values): + if code not in expected_by_code: + continue # fallback path + actual = y["housing/main_residence_value"].iloc[i] + expected = expected_by_code[code] + assert abs(actual - expected) < 1e-3, ( + f"{code}: y {actual:,.2f} != expected {expected:,.2f}" + ) + + +def test_la_loss_y_all_positive(enhanced_frs): + """No LA should have a non-positive main-residence-value target.""" + from policyengine_uk_data.datasets.local_areas.local_authorities.loss import ( + create_local_authority_target_matrix, + ) + + _, y, _ = create_local_authority_target_matrix( + enhanced_frs, time_period=enhanced_frs.time_period + ) + + assert (y["housing/main_residence_value"] > 0).all() + + +def test_la_loss_matrix_column_matches_main_residence_value(enhanced_frs): + """matrix['housing/main_residence_value'] should equal the per-household + main_residence_value pulled from policyengine-uk for the calibration year.""" + from policyengine_uk import Microsimulation + from policyengine_uk_data.datasets.local_areas.local_authorities.loss import ( + create_local_authority_target_matrix, + ) + + matrix, _, _ = create_local_authority_target_matrix( + enhanced_frs, time_period=enhanced_frs.time_period + ) + + sim = Microsimulation(dataset=enhanced_frs) + sim.default_calculation_period = enhanced_frs.time_period + expected = sim.calculate("main_residence_value").values + + np.testing.assert_array_equal( + matrix["housing/main_residence_value"].values, expected + ) + + +# ── Layer 2b: calibration well-formedness ───────────────────────────── + + +def test_la_loss_y_has_no_nan(enhanced_frs): + """Every LA must have a numeric target. NaN entries would propagate + through the optimiser and fail calibration silently.""" + from policyengine_uk_data.datasets.local_areas.local_authorities.loss import ( + create_local_authority_target_matrix, + ) + + _, y, _ = create_local_authority_target_matrix( + enhanced_frs, time_period=enhanced_frs.time_period + ) + + assert not y["housing/main_residence_value"].isna().any() + + +def test_la_loss_fallback_applied_to_non_english_las(enhanced_frs): + """Wales / Scotland / NI LAs use the national-share fallback because + EHS only covers England. Their y entry must still be positive + (so the optimiser has a target to fit) and must NOT equal the + direct-formula value (which is undefined when ownership share + is missing).""" + from policyengine_uk_data.datasets.local_areas.local_authorities.loss import ( + create_local_authority_target_matrix, + ) + + _, y, _ = create_local_authority_target_matrix( + enhanced_frs, time_period=enhanced_frs.time_period + ) + + fallback_codes = [c for c in LA_CODES["code"] if not c.startswith("E")] + fallback_indices = [ + i for i, c in enumerate(LA_CODES["code"].values) if c in fallback_codes + ] + fallback_values = y["housing/main_residence_value"].iloc[fallback_indices] + + assert (fallback_values > 0).all() + assert fallback_values.notna().all() + + +def test_la_loss_matrix_column_carries_calibration_signal(enhanced_frs): + """matrix['housing/main_residence_value'] must vary across households — + a constant column gives the optimiser no signal to differentiate + LAs and the new target would be inert.""" + from policyengine_uk_data.datasets.local_areas.local_authorities.loss import ( + create_local_authority_target_matrix, + ) + + matrix, _, _ = create_local_authority_target_matrix( + enhanced_frs, time_period=enhanced_frs.time_period + ) + + column = matrix["housing/main_residence_value"].values + assert column.var() > 0 + assert (column > 0).any(), "no households with positive main_residence_value" + + +def test_la_loss_english_target_total_within_reach_of_initial_weights(enhanced_frs): + """Sum of English LA targets should be in the same order of magnitude + as the implied initial English main-residence-value — so the + optimiser has a chance of hitting them via reweighting rather than + requiring weights to inflate by 100x.""" + from policyengine_uk import Microsimulation + from policyengine_uk_data.datasets.local_areas.local_authorities.loss import ( + create_local_authority_target_matrix, + ) + + _, y, _ = create_local_authority_target_matrix( + enhanced_frs, time_period=enhanced_frs.time_period + ) + + sim = Microsimulation(dataset=enhanced_frs) + original_weights = sim.calculate("household_weight", 2025).values + main_res = sim.calculate("main_residence_value", enhanced_frs.time_period).values + country = sim.calculate("country", enhanced_frs.time_period).values + + england_mask = country == "ENGLAND" + england_initial = (original_weights[england_mask] * main_res[england_mask]).sum() + + english_indices = [ + i for i, c in enumerate(LA_CODES["code"].values) if c.startswith("E") + ] + english_targets = y["housing/main_residence_value"].iloc[english_indices].sum() + + ratio = english_targets / england_initial + assert 0.5 < ratio < 3.0, ( + f"English LA target sum (£{english_targets / 1e9:.0f}bn) / " + f"initial English main-residence-value (£{england_initial / 1e9:.0f}bn) " + f"= {ratio:.2f}; calibration target may be hard to reach" + )