Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions packages/populace-build/src/populace/build/uk/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,22 @@
"""UK build helpers for Populace-owned local-geography artifacts."""

from populace.build.uk.bus_calibration import (
calibrate_bus_spending_levels,
uk_bus_targets,
)
from populace.build.uk.bus_calibration_targets import (
ENGLAND_TO_UK_POPULATION_UPLIFT,
UK_BUS_TARGET_REGISTRY,
UK_BUS_TARGET_SPECS,
)
from populace.build.uk.bus_imputation import (
UK_BUS_DONORS,
UK_BUS_NONNEGATIVE_SOURCE_OUTPUTS,
UK_BUS_SOURCE_MANIFEST,
UK_BUS_SOURCE_STAGE_SPECS,
UK_BUS_STAGE_NAMES,
uk_bus_plan,
)
from populace.build.uk.geography_sources import (
ENGLAND_LAD_REGION_URL,
ENGLAND_WALES_OA2021_COUNT,
Expand Down Expand Up @@ -125,6 +142,17 @@
)

__all__ = [
"ENGLAND_TO_UK_POPULATION_UPLIFT",
"UK_BUS_DONORS",
"UK_BUS_NONNEGATIVE_SOURCE_OUTPUTS",
"UK_BUS_SOURCE_MANIFEST",
"UK_BUS_SOURCE_STAGE_SPECS",
"UK_BUS_STAGE_NAMES",
"UK_BUS_TARGET_REGISTRY",
"UK_BUS_TARGET_SPECS",
"calibrate_bus_spending_levels",
"uk_bus_plan",
"uk_bus_targets",
"AGE_BANDS",
"AREA_TYPES",
"AREA_TYPE_TO_CROSSWALK_COLUMN",
Expand Down
89 changes: 89 additions & 0 deletions packages/populace-build/src/populace/build/uk/bus_calibration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
"""Executable DfT level-calibration for the UK bus-spending variables.

The imputation stages (``bus_source_stages``) place ``bus_fare_spending`` and
``bus_subsidy_spending`` on the household table, but a survey imputation does
not reproduce the Department for Transport (DfT) national totals on its own —
left unanchored, fare spending lands roughly twice the DfT fare total and
subsidy well below the DfT net-support total.

This module applies the same correction the incumbent enhanced-FRS build uses:
a per-variable multiplicative **value scaling** so each variable's weighted
total equals its DfT target (``bus_calibration_targets.UK_BUS_TARGET_REGISTRY``).
Scaling the values changes the level only; it leaves which households spend and
the relative shape of the distribution untouched (the spender share is set by
the imputation, not by this step, exactly as in the incumbent build).
"""

from __future__ import annotations

from collections.abc import Mapping

import numpy as np
import pandas as pd

from populace.build.uk.bus_calibration_targets import UK_BUS_TARGET_REGISTRY


def uk_bus_targets() -> dict[str, float]:
"""The DfT weighted-total target for each bus variable, by column name."""
return {
spec.measure: float(spec.value)
for spec in UK_BUS_TARGET_REGISTRY.specs
if spec.measure is not None
}


def calibrate_bus_spending_levels(
household: pd.DataFrame,
*,
weight_column: str = "household_weight",
targets: Mapping[str, float] | None = None,
) -> tuple[pd.DataFrame, dict[str, float]]:
"""Scale each bus-spending column so its weighted total equals its target.

Mirrors the incumbent ``calibrate_bus_fare_spending`` /
``calibrate_bus_subsidy_spending`` step: ``scale = target / actual`` then
``column *= scale``. Pure value scaling — the set of spending households
and the distribution's shape are unchanged.

Args:
household: Household table carrying ``weight_column`` and every target
column.
weight_column: Survey weight column used for the weighted totals.
targets: ``column -> target weighted total``. Defaults to the DfT
registry totals (:func:`uk_bus_targets`).

Returns:
``(calibrated_household, scales)`` — a new table (the input is not
mutated) and the multiplicative scale applied to each column.

Raises:
KeyError: If the weight column or a target column is missing.
ValueError: If a target column's current weighted total is not
positive (cannot scale a zero/negative aggregate to a target).
"""
if targets is None:
targets = uk_bus_targets()
if weight_column not in household.columns:
raise KeyError(f"household table has no weight column {weight_column!r}.")

calibrated = household.copy()
weights = calibrated[weight_column].to_numpy(dtype=float)
scales: dict[str, float] = {}
for column, target in targets.items():
if column not in calibrated.columns:
raise KeyError(f"household table has no target column {column!r}.")
values = calibrated[column].to_numpy(dtype=float)
actual = float(np.sum(values * weights))
if not actual > 0:
raise ValueError(
f"cannot calibrate {column!r}: weighted aggregate is {actual} "
"(must be positive)."
)
scale = float(target) / actual
calibrated[column] = values * scale
scales[column] = scale
return calibrated, scales


__all__ = ["calibrate_bus_spending_levels", "uk_bus_targets"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
"""DfT-anchored calibration targets for the UK bus-spending variables.

Two household consumption variables imputed from survey microdata must be
anchored to Department for Transport (DfT) Annual Bus Statistics, or they
inherit the survey's transport over/under-estimate:

* ``bus_fare_spending`` — fares households pay (DfT BUS05a fare receipts);
* ``bus_subsidy_spending`` — net government support to bus operators
(DfT BUS05b net government support).

Without these anchors the survey imputation lands roughly twice the fare
total and well below the subsidy total. The published DfT figures are
England-only; they are uplifted to a UK total by the ONS mid-2023 population
ratio (UK 68.3m / England 57.7m ≈ 1.18), because bus fares and subsidy scale
with population.

These specs feed both the calibration solver (``populace.calibrate``) and the
``aggregate_admin_gate``, which flags a candidate population whose weighted
``bus_fare_spending`` / ``bus_subsidy_spending`` total misses the DfT anchor.
"""

from __future__ import annotations

from populace.calibrate import TargetRegistry, TargetSpec

# ONS mid-2023 population, UK / England (millions). DfT bus statistics are
# published for England only; the England totals are uplifted to UK by this
# ratio.
ENGLAND_TO_UK_POPULATION_UPLIFT = 68.3 / 57.7

# Department for Transport, Annual Bus Statistics: year ending March 2025.
_DFT_BUS_STATISTICS_URL = (
"https://www.gov.uk/government/statistics/"
"annual-bus-statistics-year-ending-march-2025/"
"annual-bus-statistics-year-ending-march-2025"
)

# England totals (DfT, year ending March 2025), in GBP.
_DFT_ENGLAND_FARE_RECEIPTS = 3.4e9 # BUS05a passenger fare receipts
_DFT_ENGLAND_NET_GOVERNMENT_SUPPORT = 3.0e9 # BUS05b net government support

UK_BUS_TARGET_SPECS: tuple[TargetSpec, ...] = (
TargetSpec(
name="dft/bus_fare_spending",
entity="household",
value=_DFT_ENGLAND_FARE_RECEIPTS * ENGLAND_TO_UK_POPULATION_UPLIFT,
aggregation="sum",
measure="bus_fare_spending",
period=2025,
source=(
"DfT Annual Bus Statistics year ending March 2025, table BUS05a "
"(England passenger fare receipts GBP 3.4bn), uplifted to UK by the "
"ONS mid-2023 population ratio. " + _DFT_BUS_STATISTICS_URL
),
family="dft",
),
TargetSpec(
name="dft/bus_subsidy_spending",
entity="household",
value=_DFT_ENGLAND_NET_GOVERNMENT_SUPPORT * ENGLAND_TO_UK_POPULATION_UPLIFT,
aggregation="sum",
measure="bus_subsidy_spending",
period=2025,
source=(
"DfT Annual Bus Statistics year ending March 2025, table BUS05b "
"(England net government support GBP 3.0bn), uplifted to UK by the "
"ONS mid-2023 population ratio. " + _DFT_BUS_STATISTICS_URL
),
family="dft",
),
)

UK_BUS_TARGET_REGISTRY = TargetRegistry(UK_BUS_TARGET_SPECS, country="uk")

__all__ = [
"ENGLAND_TO_UK_POPULATION_UPLIFT",
"UK_BUS_TARGET_REGISTRY",
"UK_BUS_TARGET_SPECS",
]
96 changes: 96 additions & 0 deletions packages/populace-build/src/populace/build/uk/bus_imputation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""UK bus-spending imputation plan (LCFS fares, ETB subsidy).

Declares the source stages and donor graph that impute the two DfT-anchored
bus consumption variables onto the UK population, and assembles them into a
:class:`~populace.build.plan.StagePlan`. The executable stage transforms are
injected by the build caller — there are no stubs or fallbacks, exactly as the
US plan works.

The calibration anchors for the two outputs live in
:mod:`populace.build.uk.bus_calibration_targets`.
"""

from __future__ import annotations

from collections.abc import Callable, Mapping
from importlib.resources import files

from populace.build.plan import DonorSpec, Stage, StagePlan
from populace.build.source_manifest import (
SourceManifest,
SourceStageSpec,
load_source_manifest,
)
from populace.frame import Frame


def _load_uk_bus_source_manifest() -> SourceManifest:
return load_source_manifest(files(__package__).joinpath("bus_source_stages.json"))


UK_BUS_SOURCE_MANIFEST: SourceManifest = _load_uk_bus_source_manifest()
UK_BUS_SOURCE_STAGE_SPECS: tuple[SourceStageSpec, ...] = UK_BUS_SOURCE_MANIFEST.stages
UK_BUS_NONNEGATIVE_SOURCE_OUTPUTS: frozenset[str] = frozenset(
output
for stage in UK_BUS_SOURCE_STAGE_SPECS
for output in stage.nonnegative_outputs
)

UK_BUS_STAGE_NAMES: tuple[str, ...] = tuple(
stage.stage for stage in UK_BUS_SOURCE_STAGE_SPECS
)

UK_BUS_DONORS: Mapping[str, DonorSpec] = {
stage.stage: DonorSpec(survey=stage.survey, source=stage.source, notes=stage.notes)
for stage in UK_BUS_SOURCE_STAGE_SPECS
}


def uk_bus_plan(
implementations: Mapping[str, Callable[[Frame], Frame]],
) -> StagePlan:
"""Assemble the UK bus-spending imputation plan.

Mirrors ``us_plan``: every declared stage needs an injected transform;
there are no stubs or fallbacks by design.

Args:
implementations: ``stage name -> transform(frame) -> frame`` for every
stage in :data:`UK_BUS_STAGE_NAMES`.

Raises:
ValueError: If an implementation is missing for a declared stage, or an
unknown stage name is supplied.
"""
missing = [name for name in UK_BUS_STAGE_NAMES if name not in implementations]
if missing:
raise ValueError(
f"uk_bus_plan needs an implementation for every declared stage; "
f"missing {missing}. There are no stubs or fallbacks by design."
)
unknown = sorted(set(implementations) - set(UK_BUS_STAGE_NAMES))
if unknown:
raise ValueError(
f"Unknown stage implementation(s) {unknown}; declared stages "
f"are {list(UK_BUS_STAGE_NAMES)}."
)
stage_map = UK_BUS_SOURCE_MANIFEST.stage_map()
return StagePlan(
Stage(
name=name,
transform=implementations[name],
produces=stage_map[name].outputs,
donor=UK_BUS_DONORS[name],
)
for name in UK_BUS_STAGE_NAMES
)


__all__ = [
"UK_BUS_DONORS",
"UK_BUS_NONNEGATIVE_SOURCE_OUTPUTS",
"UK_BUS_SOURCE_MANIFEST",
"UK_BUS_SOURCE_STAGE_SPECS",
"UK_BUS_STAGE_NAMES",
"uk_bus_plan",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
{
"version": 1,
"country": "uk",
"policy": "UK bus-spending source stages are manifest-defined. The bus_fare_spending and bus_subsidy_spending consumption variables are imputed onto the UK population by weighted QRFs from public survey microdata, clipped to the donor's realized range, and then anchored to the DfT calibration targets in bus_calibration_targets. Executable Python belongs only in shared Populace runtimes.",
"stages": [
{
"stage": "bus_fare_spending",
"survey": "ONS Living Costs and Food Survey 2022-23",
"source": "https://www.ons.gov.uk/peoplepopulationandcommunity/personalandhouseholdfinances/incomeandwealth/methodologies/livingcostsandfoodsurvey",
"grain": "household",
"artifacts": [
{
"kind": "public_microdata",
"format": "tab",
"vintage": "2022-23",
"locator": "ONS Living Costs and Food Survey, UK Data Service end-user licence"
}
],
"operations": [
{ "kind": "read_table", "table": "lcfs_household", "weight": "weighta" },
{ "kind": "derive", "outputs": ["bus_fare_spending"] },
{
"kind": "fit_weighted_qrf",
"predictors": [
"is_adult",
"is_child",
"region",
"employment_income",
"self_employment_income",
"private_pension_income",
"hbai_household_net_income",
"tenure_type",
"accommodation_type"
]
},
{ "kind": "support_clip", "range": "donor_realized" }
],
"outputs": ["bus_fare_spending"],
"nonnegative_outputs": ["bus_fare_spending"],
"notes": "Bus and coach fares households pay, summed from LCFS COICOP 7.3.2 sub-codes (bus/coach fares). Imputed onto the UK population by a weighted QRF over the listed predictors, clipped to the donor's realized range so the imputation does not concentrate spending in too few households at implausibly high per-household amounts, then calibrated to the DfT fare-receipts total (bus_calibration_targets.UK_BUS_TARGET_REGISTRY)."
},
{
"stage": "bus_subsidy_spending",
"survey": "ONS Effects of Taxes and Benefits on Household Income 2022-23",
"source": "https://www.ons.gov.uk/peoplepopulationandcommunity/personalandhouseholdfinances/incomeandwealth/bulletins/theeffectsoftaxesandbenefitsonhouseholdincome/latest",
"grain": "household",
"artifacts": [
{
"kind": "public_microdata",
"format": "tab",
"vintage": "2022-23",
"locator": "ONS Effects of Taxes and Benefits on Household Income, UK Data Service"
}
],
"operations": [
{ "kind": "read_table", "table": "etb_household", "weight": "weight" },
{ "kind": "derive", "outputs": ["bus_subsidy_spending"] },
{
"kind": "fit_weighted_qrf",
"predictors": [
"is_adult",
"is_child",
"is_SP_age",
"count_primary_education",
"count_secondary_education",
"count_further_education",
"dla",
"pip",
"hbai_household_net_income"
]
},
{ "kind": "support_clip", "range": "donor_realized" }
],
"outputs": ["bus_subsidy_spending"],
"nonnegative_outputs": ["bus_subsidy_spending"],
"notes": "Net government support to bus operators allocated to households, from the ETB 'bus subsidy' field. Imputed by a weighted QRF over the listed predictors, clipped to the donor's realized range, then calibrated to the DfT net-government-support total (bus_calibration_targets.UK_BUS_TARGET_REGISTRY)."
}
]
}
Loading
Loading