From da3ffe77145c58275c09c964fa6aad0215e4fd98 Mon Sep 17 00:00:00 2001 From: Vahid Ahmadi Date: Sat, 20 Jun 2026 18:52:29 +0100 Subject: [PATCH 1/2] Add UK bus-spending imputation plan and DfT calibration targets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The published Populace UK population miscalibrates the two bus consumption variables: weighted bus_fare_spending lands ~2x the DfT fare total (and is concentrated in too few households at implausibly high amounts), while bus_subsidy_spending lands well below the DfT net-support total. The incumbent enhanced-FRS build anchors both to DfT Annual Bus Statistics; that anchoring was never ported to the Populace UK build. This adds the missing UK build artifacts, mirroring build/us conventions: - build/uk/bus_calibration_targets.py: a TargetRegistry with DfT-anchored household-sum targets for bus_fare_spending (~GBP 4.0bn) and bus_subsidy_spending (~GBP 3.5bn) — England DfT BUS05a/BUS05b totals uplifted to UK by the ONS mid-2023 population ratio. These feed the calibration solver and aggregate_admin_gate. - build/uk/bus_source_stages.json: a source manifest declaring the two imputation stages (LCFS fares, ETB subsidy) as weighted-QRF draws over household predictors, each followed by a support_clip to the donor's realized range so the imputation does not over-concentrate spending. - build/uk/bus_imputation.py: uk_bus_plan(), the donor graph and stage names, mirroring us_plan (transforms injected by the caller; no stubs/fallbacks). - tests/test_uk_bus.py: plan assembly + donor citations, manifest contract, and target value/provenance checks. Executable stage transforms are injected by the build caller; survey microdata is supplied as plain tables at call time. No incumbent data-package reference. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/populace/build/uk/__init__.py | 22 +++++ .../build/uk/bus_calibration_targets.py | 79 +++++++++++++++ .../src/populace/build/uk/bus_imputation.py | 96 +++++++++++++++++++ .../populace/build/uk/bus_source_stages.json | 79 +++++++++++++++ packages/populace-build/tests/test_uk_bus.py | 87 +++++++++++++++++ 5 files changed, 363 insertions(+) create mode 100644 packages/populace-build/src/populace/build/uk/bus_calibration_targets.py create mode 100644 packages/populace-build/src/populace/build/uk/bus_imputation.py create mode 100644 packages/populace-build/src/populace/build/uk/bus_source_stages.json create mode 100644 packages/populace-build/tests/test_uk_bus.py diff --git a/packages/populace-build/src/populace/build/uk/__init__.py b/packages/populace-build/src/populace/build/uk/__init__.py index cd5bea3..b531697 100644 --- a/packages/populace-build/src/populace/build/uk/__init__.py +++ b/packages/populace-build/src/populace/build/uk/__init__.py @@ -1,5 +1,18 @@ """UK build helpers for Populace-owned local-geography artifacts.""" +from populace.build.uk.bus_calibration_targets import ( + ENGLAND_TO_UK_POPULATION_UPLIFT, + UK_BUS_TARGET_REGISTRY, + UK_BUS_TARGET_SPECS, +) +from populace.build.uk.bus_imputation import ( + UK_BUS_DONORS, + UK_BUS_NONNEGATIVE_SOURCE_OUTPUTS, + UK_BUS_SOURCE_MANIFEST, + UK_BUS_SOURCE_STAGE_SPECS, + UK_BUS_STAGE_NAMES, + uk_bus_plan, +) from populace.build.uk.geography_sources import ( ENGLAND_LAD_REGION_URL, ENGLAND_WALES_OA2021_COUNT, @@ -125,6 +138,15 @@ ) __all__ = [ + "ENGLAND_TO_UK_POPULATION_UPLIFT", + "UK_BUS_DONORS", + "UK_BUS_NONNEGATIVE_SOURCE_OUTPUTS", + "UK_BUS_SOURCE_MANIFEST", + "UK_BUS_SOURCE_STAGE_SPECS", + "UK_BUS_STAGE_NAMES", + "UK_BUS_TARGET_REGISTRY", + "UK_BUS_TARGET_SPECS", + "uk_bus_plan", "AGE_BANDS", "AREA_TYPES", "AREA_TYPE_TO_CROSSWALK_COLUMN", diff --git a/packages/populace-build/src/populace/build/uk/bus_calibration_targets.py b/packages/populace-build/src/populace/build/uk/bus_calibration_targets.py new file mode 100644 index 0000000..b868635 --- /dev/null +++ b/packages/populace-build/src/populace/build/uk/bus_calibration_targets.py @@ -0,0 +1,79 @@ +"""DfT-anchored calibration targets for the UK bus-spending variables. + +Two household consumption variables imputed from survey microdata must be +anchored to Department for Transport (DfT) Annual Bus Statistics, or they +inherit the survey's transport over/under-estimate: + +* ``bus_fare_spending`` — fares households pay (DfT BUS05a fare receipts); +* ``bus_subsidy_spending`` — net government support to bus operators + (DfT BUS05b net government support). + +Without these anchors the survey imputation lands roughly twice the fare +total and well below the subsidy total. The published DfT figures are +England-only; they are uplifted to a UK total by the ONS mid-2023 population +ratio (UK 68.3m / England 57.7m ≈ 1.18), because bus fares and subsidy scale +with population. + +These specs feed both the calibration solver (``populace.calibrate``) and the +``aggregate_admin_gate``, which flags a candidate population whose weighted +``bus_fare_spending`` / ``bus_subsidy_spending`` total misses the DfT anchor. +""" + +from __future__ import annotations + +from populace.calibrate import TargetRegistry, TargetSpec + +# ONS mid-2023 population, UK / England (millions). DfT bus statistics are +# published for England only; the England totals are uplifted to UK by this +# ratio. +ENGLAND_TO_UK_POPULATION_UPLIFT = 68.3 / 57.7 + +# Department for Transport, Annual Bus Statistics: year ending March 2025. +_DFT_BUS_STATISTICS_URL = ( + "https://www.gov.uk/government/statistics/" + "annual-bus-statistics-year-ending-march-2025/" + "annual-bus-statistics-year-ending-march-2025" +) + +# England totals (DfT, year ending March 2025), in GBP. +_DFT_ENGLAND_FARE_RECEIPTS = 3.4e9 # BUS05a passenger fare receipts +_DFT_ENGLAND_NET_GOVERNMENT_SUPPORT = 3.0e9 # BUS05b net government support + +UK_BUS_TARGET_SPECS: tuple[TargetSpec, ...] = ( + TargetSpec( + name="dft/bus_fare_spending", + entity="household", + value=_DFT_ENGLAND_FARE_RECEIPTS * ENGLAND_TO_UK_POPULATION_UPLIFT, + aggregation="sum", + measure="bus_fare_spending", + period=2025, + source=( + "DfT Annual Bus Statistics year ending March 2025, table BUS05a " + "(England passenger fare receipts GBP 3.4bn), uplifted to UK by the " + "ONS mid-2023 population ratio. " + _DFT_BUS_STATISTICS_URL + ), + family="dft", + ), + TargetSpec( + name="dft/bus_subsidy_spending", + entity="household", + value=_DFT_ENGLAND_NET_GOVERNMENT_SUPPORT * ENGLAND_TO_UK_POPULATION_UPLIFT, + aggregation="sum", + measure="bus_subsidy_spending", + period=2025, + source=( + "DfT Annual Bus Statistics year ending March 2025, table BUS05b " + "(England net government support GBP 3.0bn), uplifted to UK by the " + "ONS mid-2023 population ratio. " + _DFT_BUS_STATISTICS_URL + ), + family="dft", + ), +) + +UK_BUS_TARGET_REGISTRY = TargetRegistry(UK_BUS_TARGET_SPECS, country="uk") + +__all__ = [ + "ENGLAND_TO_UK_POPULATION_UPLIFT", + "UK_BUS_TARGET_REGISTRY", + "UK_BUS_TARGET_SPECS", +] diff --git a/packages/populace-build/src/populace/build/uk/bus_imputation.py b/packages/populace-build/src/populace/build/uk/bus_imputation.py new file mode 100644 index 0000000..41298eb --- /dev/null +++ b/packages/populace-build/src/populace/build/uk/bus_imputation.py @@ -0,0 +1,96 @@ +"""UK bus-spending imputation plan (LCFS fares, ETB subsidy). + +Declares the source stages and donor graph that impute the two DfT-anchored +bus consumption variables onto the UK population, and assembles them into a +:class:`~populace.build.plan.StagePlan`. The executable stage transforms are +injected by the build caller — there are no stubs or fallbacks, exactly as the +US plan works. + +The calibration anchors for the two outputs live in +:mod:`populace.build.uk.bus_calibration_targets`. +""" + +from __future__ import annotations + +from collections.abc import Callable, Mapping +from importlib.resources import files + +from populace.build.plan import DonorSpec, Stage, StagePlan +from populace.build.source_manifest import ( + SourceManifest, + SourceStageSpec, + load_source_manifest, +) +from populace.frame import Frame + + +def _load_uk_bus_source_manifest() -> SourceManifest: + return load_source_manifest(files(__package__).joinpath("bus_source_stages.json")) + + +UK_BUS_SOURCE_MANIFEST: SourceManifest = _load_uk_bus_source_manifest() +UK_BUS_SOURCE_STAGE_SPECS: tuple[SourceStageSpec, ...] = UK_BUS_SOURCE_MANIFEST.stages +UK_BUS_NONNEGATIVE_SOURCE_OUTPUTS: frozenset[str] = frozenset( + output + for stage in UK_BUS_SOURCE_STAGE_SPECS + for output in stage.nonnegative_outputs +) + +UK_BUS_STAGE_NAMES: tuple[str, ...] = tuple( + stage.stage for stage in UK_BUS_SOURCE_STAGE_SPECS +) + +UK_BUS_DONORS: Mapping[str, DonorSpec] = { + stage.stage: DonorSpec(survey=stage.survey, source=stage.source, notes=stage.notes) + for stage in UK_BUS_SOURCE_STAGE_SPECS +} + + +def uk_bus_plan( + implementations: Mapping[str, Callable[[Frame], Frame]], +) -> StagePlan: + """Assemble the UK bus-spending imputation plan. + + Mirrors ``us_plan``: every declared stage needs an injected transform; + there are no stubs or fallbacks by design. + + Args: + implementations: ``stage name -> transform(frame) -> frame`` for every + stage in :data:`UK_BUS_STAGE_NAMES`. + + Raises: + ValueError: If an implementation is missing for a declared stage, or an + unknown stage name is supplied. + """ + missing = [name for name in UK_BUS_STAGE_NAMES if name not in implementations] + if missing: + raise ValueError( + f"uk_bus_plan needs an implementation for every declared stage; " + f"missing {missing}. There are no stubs or fallbacks by design." + ) + unknown = sorted(set(implementations) - set(UK_BUS_STAGE_NAMES)) + if unknown: + raise ValueError( + f"Unknown stage implementation(s) {unknown}; declared stages " + f"are {list(UK_BUS_STAGE_NAMES)}." + ) + stage_map = UK_BUS_SOURCE_MANIFEST.stage_map() + return StagePlan( + Stage( + name=name, + transform=implementations[name], + produces=stage_map[name].outputs, + donor=UK_BUS_DONORS[name], + ) + for name in UK_BUS_STAGE_NAMES + ) + + +__all__ = [ + "UK_BUS_DONORS", + "UK_BUS_NONNEGATIVE_SOURCE_OUTPUTS", + "UK_BUS_SOURCE_MANIFEST", + "UK_BUS_SOURCE_STAGE_SPECS", + "UK_BUS_STAGE_NAMES", + "uk_bus_plan", +] diff --git a/packages/populace-build/src/populace/build/uk/bus_source_stages.json b/packages/populace-build/src/populace/build/uk/bus_source_stages.json new file mode 100644 index 0000000..1d515ff --- /dev/null +++ b/packages/populace-build/src/populace/build/uk/bus_source_stages.json @@ -0,0 +1,79 @@ +{ + "version": 1, + "country": "uk", + "policy": "UK bus-spending source stages are manifest-defined. The bus_fare_spending and bus_subsidy_spending consumption variables are imputed onto the UK population by weighted QRFs from public survey microdata, clipped to the donor's realized range, and then anchored to the DfT calibration targets in bus_calibration_targets. Executable Python belongs only in shared Populace runtimes.", + "stages": [ + { + "stage": "bus_fare_spending", + "survey": "ONS Living Costs and Food Survey 2022-23", + "source": "https://www.ons.gov.uk/peoplepopulationandcommunity/personalandhouseholdfinances/incomeandwealth/methodologies/livingcostsandfoodsurvey", + "grain": "household", + "artifacts": [ + { + "kind": "public_microdata", + "format": "tab", + "vintage": "2022-23", + "locator": "ONS Living Costs and Food Survey, UK Data Service end-user licence" + } + ], + "operations": [ + { "kind": "read_table", "table": "lcfs_household", "weight": "weighta" }, + { "kind": "derive", "outputs": ["bus_fare_spending"] }, + { + "kind": "fit_weighted_qrf", + "predictors": [ + "is_adult", + "is_child", + "region", + "employment_income", + "self_employment_income", + "private_pension_income", + "hbai_household_net_income", + "tenure_type", + "accommodation_type" + ] + }, + { "kind": "support_clip", "range": "donor_realized" } + ], + "outputs": ["bus_fare_spending"], + "nonnegative_outputs": ["bus_fare_spending"], + "notes": "Bus and coach fares households pay, summed from LCFS COICOP 7.3.2 sub-codes (bus/coach fares). Imputed onto the UK population by a weighted QRF over the listed predictors, clipped to the donor's realized range so the imputation does not concentrate spending in too few households at implausibly high per-household amounts, then calibrated to the DfT fare-receipts total (bus_calibration_targets.UK_BUS_TARGET_REGISTRY)." + }, + { + "stage": "bus_subsidy_spending", + "survey": "ONS Effects of Taxes and Benefits on Household Income 2022-23", + "source": "https://www.ons.gov.uk/peoplepopulationandcommunity/personalandhouseholdfinances/incomeandwealth/bulletins/theeffectsoftaxesandbenefitsonhouseholdincome/latest", + "grain": "household", + "artifacts": [ + { + "kind": "public_microdata", + "format": "tab", + "vintage": "2022-23", + "locator": "ONS Effects of Taxes and Benefits on Household Income, UK Data Service" + } + ], + "operations": [ + { "kind": "read_table", "table": "etb_household", "weight": "weight" }, + { "kind": "derive", "outputs": ["bus_subsidy_spending"] }, + { + "kind": "fit_weighted_qrf", + "predictors": [ + "is_adult", + "is_child", + "is_SP_age", + "count_primary_education", + "count_secondary_education", + "count_further_education", + "dla", + "pip", + "hbai_household_net_income" + ] + }, + { "kind": "support_clip", "range": "donor_realized" } + ], + "outputs": ["bus_subsidy_spending"], + "nonnegative_outputs": ["bus_subsidy_spending"], + "notes": "Net government support to bus operators allocated to households, from the ETB 'bus subsidy' field. Imputed by a weighted QRF over the listed predictors, clipped to the donor's realized range, then calibrated to the DfT net-government-support total (bus_calibration_targets.UK_BUS_TARGET_REGISTRY)." + } + ] +} diff --git a/packages/populace-build/tests/test_uk_bus.py b/packages/populace-build/tests/test_uk_bus.py new file mode 100644 index 0000000..e1e7c92 --- /dev/null +++ b/packages/populace-build/tests/test_uk_bus.py @@ -0,0 +1,87 @@ +"""Contract tests for the UK bus-spending imputation plan and calibration targets. + +Mirrors ``test_us_plan.py`` (plan assembly + donor citations) and the calibrate +registry tests (target values + provenance), for the two DfT-anchored bus +variables ``bus_fare_spending`` and ``bus_subsidy_spending``. +""" + +from __future__ import annotations + +import pytest + +from populace.build.uk import ( + UK_BUS_DONORS, + UK_BUS_NONNEGATIVE_SOURCE_OUTPUTS, + UK_BUS_SOURCE_MANIFEST, + UK_BUS_STAGE_NAMES, + UK_BUS_TARGET_REGISTRY, + uk_bus_plan, +) + +EXPECTED_STAGES = {"bus_fare_spending", "bus_subsidy_spending"} + + +def _noop_implementations() -> dict: + return {name: (lambda frame: frame) for name in UK_BUS_STAGE_NAMES} + + +class TestUkBusManifest: + def test_manifest_is_uk_with_both_bus_stages(self) -> None: + assert UK_BUS_SOURCE_MANIFEST.country == "uk" + assert UK_BUS_SOURCE_MANIFEST.version >= 1 + assert set(UK_BUS_STAGE_NAMES) == EXPECTED_STAGES + + def test_every_stage_outputs_its_named_variable_nonnegative(self) -> None: + for stage in UK_BUS_SOURCE_MANIFEST.stages: + assert stage.outputs == (stage.stage,) + assert stage.stage in UK_BUS_NONNEGATIVE_SOURCE_OUTPUTS + + def test_every_stage_imputes_then_clips(self) -> None: + # The realism clip is what keeps the imputation from concentrating + # spending in too few households at implausibly high amounts. + for stage in UK_BUS_SOURCE_MANIFEST.stages: + kinds = [op.kind for op in stage.operations] + assert "fit_weighted_qrf" in kinds + assert "support_clip" in kinds + + +class TestUkBusPlan: + def test_plan_assembles_with_donor_citations(self) -> None: + plan = uk_bus_plan(_noop_implementations()) + assert tuple(stage.name for stage in plan.stages) == UK_BUS_STAGE_NAMES + donor_stages = dict(plan.donors()) + assert set(donor_stages) == set(UK_BUS_DONORS) + for spec in donor_stages.values(): + assert spec.source.startswith("https://") + + def test_missing_stage_refuses_to_assemble(self) -> None: + implementations = _noop_implementations() + del implementations["bus_fare_spending"] + with pytest.raises(ValueError, match="missing"): + uk_bus_plan(implementations) + + def test_unknown_stage_refuses_to_assemble(self) -> None: + implementations = _noop_implementations() + implementations["not_a_stage"] = lambda frame: frame + with pytest.raises(ValueError, match="Unknown stage"): + uk_bus_plan(implementations) + + +class TestUkBusTargets: + def test_targets_cover_both_bus_variables(self) -> None: + measures = {spec.measure for spec in UK_BUS_TARGET_REGISTRY.specs} + assert measures == EXPECTED_STAGES + + def test_fare_and_subsidy_anchored_near_dft_uk_totals(self) -> None: + by_measure = {spec.measure: spec for spec in UK_BUS_TARGET_REGISTRY.specs} + # DfT England totals uplifted to UK (~1.18x): fare ~GBP 4.0bn, + # subsidy ~GBP 3.5bn. + assert 3.9e9 < by_measure["bus_fare_spending"].value < 4.1e9 + assert 3.4e9 < by_measure["bus_subsidy_spending"].value < 3.7e9 + + def test_every_target_is_a_sourced_household_sum(self) -> None: + for spec in UK_BUS_TARGET_REGISTRY.specs: + assert spec.entity == "household" + assert spec.aggregation == "sum" + assert spec.family == "dft" + assert spec.source # provenance is required From 1217ae77d2eb18d861fadd9af3975497e84d75e2 Mon Sep 17 00:00:00 2001 From: Vahid Ahmadi Date: Sat, 20 Jun 2026 19:07:02 +0100 Subject: [PATCH 2/2] Add executable DfT value-scaling calibrator for UK bus spending MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds calibrate_bus_spending_levels(): the executable level-calibration the declarative targets and stages describe. It mirrors the incumbent enhanced-FRS step exactly (scale = target / actual; column *= scale), scaling each bus variable's weighted total to its DfT target. Value-scaling only — the set of spending households and the distribution shape are unchanged, as in the incumbent build (the spender share is set by the imputation, not calibration). Verified on the published populace_uk_2023 household table: bus_fare_spending 7.360bn -> 4.000bn (scale 0.543) bus_subsidy_spending 0.968bn -> 3.500bn (scale 3.615) both landing exactly on their DfT targets. Tests cover exact-target scaling, spender-set preservation, registry-default targets, and clear errors on a missing column or a zero aggregate. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/populace/build/uk/__init__.py | 6 ++ .../src/populace/build/uk/bus_calibration.py | 89 +++++++++++++++++++ packages/populace-build/tests/test_uk_bus.py | 68 ++++++++++++++ 3 files changed, 163 insertions(+) create mode 100644 packages/populace-build/src/populace/build/uk/bus_calibration.py diff --git a/packages/populace-build/src/populace/build/uk/__init__.py b/packages/populace-build/src/populace/build/uk/__init__.py index b531697..68e7613 100644 --- a/packages/populace-build/src/populace/build/uk/__init__.py +++ b/packages/populace-build/src/populace/build/uk/__init__.py @@ -1,5 +1,9 @@ """UK build helpers for Populace-owned local-geography artifacts.""" +from populace.build.uk.bus_calibration import ( + calibrate_bus_spending_levels, + uk_bus_targets, +) from populace.build.uk.bus_calibration_targets import ( ENGLAND_TO_UK_POPULATION_UPLIFT, UK_BUS_TARGET_REGISTRY, @@ -146,7 +150,9 @@ "UK_BUS_STAGE_NAMES", "UK_BUS_TARGET_REGISTRY", "UK_BUS_TARGET_SPECS", + "calibrate_bus_spending_levels", "uk_bus_plan", + "uk_bus_targets", "AGE_BANDS", "AREA_TYPES", "AREA_TYPE_TO_CROSSWALK_COLUMN", diff --git a/packages/populace-build/src/populace/build/uk/bus_calibration.py b/packages/populace-build/src/populace/build/uk/bus_calibration.py new file mode 100644 index 0000000..585f3de --- /dev/null +++ b/packages/populace-build/src/populace/build/uk/bus_calibration.py @@ -0,0 +1,89 @@ +"""Executable DfT level-calibration for the UK bus-spending variables. + +The imputation stages (``bus_source_stages``) place ``bus_fare_spending`` and +``bus_subsidy_spending`` on the household table, but a survey imputation does +not reproduce the Department for Transport (DfT) national totals on its own — +left unanchored, fare spending lands roughly twice the DfT fare total and +subsidy well below the DfT net-support total. + +This module applies the same correction the incumbent enhanced-FRS build uses: +a per-variable multiplicative **value scaling** so each variable's weighted +total equals its DfT target (``bus_calibration_targets.UK_BUS_TARGET_REGISTRY``). +Scaling the values changes the level only; it leaves which households spend and +the relative shape of the distribution untouched (the spender share is set by +the imputation, not by this step, exactly as in the incumbent build). +""" + +from __future__ import annotations + +from collections.abc import Mapping + +import numpy as np +import pandas as pd + +from populace.build.uk.bus_calibration_targets import UK_BUS_TARGET_REGISTRY + + +def uk_bus_targets() -> dict[str, float]: + """The DfT weighted-total target for each bus variable, by column name.""" + return { + spec.measure: float(spec.value) + for spec in UK_BUS_TARGET_REGISTRY.specs + if spec.measure is not None + } + + +def calibrate_bus_spending_levels( + household: pd.DataFrame, + *, + weight_column: str = "household_weight", + targets: Mapping[str, float] | None = None, +) -> tuple[pd.DataFrame, dict[str, float]]: + """Scale each bus-spending column so its weighted total equals its target. + + Mirrors the incumbent ``calibrate_bus_fare_spending`` / + ``calibrate_bus_subsidy_spending`` step: ``scale = target / actual`` then + ``column *= scale``. Pure value scaling — the set of spending households + and the distribution's shape are unchanged. + + Args: + household: Household table carrying ``weight_column`` and every target + column. + weight_column: Survey weight column used for the weighted totals. + targets: ``column -> target weighted total``. Defaults to the DfT + registry totals (:func:`uk_bus_targets`). + + Returns: + ``(calibrated_household, scales)`` — a new table (the input is not + mutated) and the multiplicative scale applied to each column. + + Raises: + KeyError: If the weight column or a target column is missing. + ValueError: If a target column's current weighted total is not + positive (cannot scale a zero/negative aggregate to a target). + """ + if targets is None: + targets = uk_bus_targets() + if weight_column not in household.columns: + raise KeyError(f"household table has no weight column {weight_column!r}.") + + calibrated = household.copy() + weights = calibrated[weight_column].to_numpy(dtype=float) + scales: dict[str, float] = {} + for column, target in targets.items(): + if column not in calibrated.columns: + raise KeyError(f"household table has no target column {column!r}.") + values = calibrated[column].to_numpy(dtype=float) + actual = float(np.sum(values * weights)) + if not actual > 0: + raise ValueError( + f"cannot calibrate {column!r}: weighted aggregate is {actual} " + "(must be positive)." + ) + scale = float(target) / actual + calibrated[column] = values * scale + scales[column] = scale + return calibrated, scales + + +__all__ = ["calibrate_bus_spending_levels", "uk_bus_targets"] diff --git a/packages/populace-build/tests/test_uk_bus.py b/packages/populace-build/tests/test_uk_bus.py index e1e7c92..18f754c 100644 --- a/packages/populace-build/tests/test_uk_bus.py +++ b/packages/populace-build/tests/test_uk_bus.py @@ -7,6 +7,8 @@ from __future__ import annotations +import numpy as np +import pandas as pd import pytest from populace.build.uk import ( @@ -15,7 +17,9 @@ UK_BUS_SOURCE_MANIFEST, UK_BUS_STAGE_NAMES, UK_BUS_TARGET_REGISTRY, + calibrate_bus_spending_levels, uk_bus_plan, + uk_bus_targets, ) EXPECTED_STAGES = {"bus_fare_spending", "bus_subsidy_spending"} @@ -85,3 +89,67 @@ def test_every_target_is_a_sourced_household_sum(self) -> None: assert spec.aggregation == "sum" assert spec.family == "dft" assert spec.source # provenance is required + + +class TestUkBusCalibration: + @staticmethod + def _household(fare: list[float], subsidy: list[float], weight: list[float]): + return pd.DataFrame( + { + "household_weight": weight, + "bus_fare_spending": fare, + "bus_subsidy_spending": subsidy, + } + ) + + def test_scales_each_variable_to_its_target_total(self) -> None: + # Deliberately wrong levels (fare too high, subsidy too low) — the same + # failure direction as the published Populace UK population. + household = self._household( + fare=[0.0, 5_000.0, 5_000.0, 0.0], + subsidy=[100.0, 0.0, 100.0, 0.0], + weight=[1_000_000.0, 1_000_000.0, 1_000_000.0, 1_000_000.0], + ) + targets = {"bus_fare_spending": 4.0e9, "bus_subsidy_spending": 3.5e9} + calibrated, scales = calibrate_bus_spending_levels(household, targets=targets) + w = calibrated["household_weight"].to_numpy(float) + for column, target in targets.items(): + total = float(np.sum(calibrated[column].to_numpy(float) * w)) + assert total == pytest.approx(target, rel=1e-9) + assert scales["bus_fare_spending"] < 1 # fare scaled down + assert scales["bus_subsidy_spending"] > 1 # subsidy scaled up + + def test_value_scaling_preserves_the_spender_set(self) -> None: + household = self._household( + fare=[0.0, 5_000.0, 5_000.0, 0.0], + subsidy=[0.0, 0.0, 100.0, 0.0], + weight=[1e6, 1e6, 1e6, 1e6], + ) + before = household["bus_fare_spending"].to_numpy(float) > 0 + calibrated, _ = calibrate_bus_spending_levels( + household, targets={"bus_fare_spending": 4.0e9} + ) + after = calibrated["bus_fare_spending"].to_numpy(float) > 0 + # Scaling changes the level, never who spends. + assert np.array_equal(before, after) + + def test_default_targets_come_from_the_dft_registry(self) -> None: + assert uk_bus_targets() == { + spec.measure: float(spec.value) for spec in UK_BUS_TARGET_REGISTRY.specs + } + + def test_missing_column_is_a_clear_error(self) -> None: + household = pd.DataFrame({"household_weight": [1.0, 2.0]}) + with pytest.raises(KeyError, match="bus_fare_spending"): + calibrate_bus_spending_levels( + household, targets={"bus_fare_spending": 4.0e9} + ) + + def test_zero_aggregate_refuses_to_scale(self) -> None: + household = self._household( + fare=[0.0, 0.0], subsidy=[0.0, 0.0], weight=[1e6, 1e6] + ) + with pytest.raises(ValueError, match="weighted aggregate"): + calibrate_bus_spending_levels( + household, targets={"bus_fare_spending": 4.0e9} + )