From 5196aa203c8b8b5aa265fdd98a111127a203e048 Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Sat, 20 Jun 2026 18:11:51 -0400
Subject: [PATCH 1/3] Add UK raw-source and local-geography parity

---
 packages/populace-build/README.md             |  45 +-
 .../src/populace/build/gates.py               |  10 +-
 .../src/populace/build/source_manifest.py     |   1 +
 .../src/populace/build/uk/__init__.py         | 197 +++-
 .../src/populace/build/uk/local_geography.py  | 293 +++++-
 .../src/populace/build/uk/local_runner.py     | 237 ++++-
 .../src/populace/build/uk/local_solver.py     | 103 +-
 .../src/populace/build/uk/source_stages.json  | 990 ++++++++++++++++++
 .../src/populace/build/uk/spi_support.py      |  14 +-
 .../tests/test_uk_local_geography.py          | 108 ++
 .../tests/test_uk_local_runner.py             | 159 ++-
 .../tests/test_uk_local_solver.py             |  63 ++
 .../tests/test_uk_source_manifest.py          | 318 ++++++
 .../tests/test_uk_spi_support.py              |   2 +-
 14 files changed, 2447 insertions(+), 93 deletions(-)
 create mode 100644 packages/populace-build/src/populace/build/uk/source_stages.json
 create mode 100644 packages/populace-build/tests/test_uk_source_manifest.py

diff --git a/packages/populace-build/README.md b/packages/populace-build/README.md
index 267056b..75a7269 100644
--- a/packages/populace-build/README.md
+++ b/packages/populace-build/README.md
@@ -15,11 +15,9 @@ names its donor survey and fails loudly — no silent fallbacks), and the
   short-term capital gains to −$3.9T);
 - **export surface** — every replacement artifact can prove that its
   exported variables match a reference surface, with only documented
-  structural extras or reviewed exclusions (for UK, this is the eFRS
-  compatibility check);
+  structural extras or reviewed exclusions;
 - **target surface** — the calibration target set covers the reference
-  target surface and may only be wider, not narrower (for UK, Populace must
-  calibrate to at least the eFRS target surface);
+  target surface and may only be wider, not narrower;
 - **per-family fit** — the calibration's within-10% share is reported per
   source family, while only broad family-level misses block publication so
   one family cannot hide inside the global average;
@@ -37,11 +35,12 @@ JSON manifests and executed by shared Populace runtimes.
 ## UK local-geography path
 
 `populace.build.uk.local_geography` holds the Populace-owned replacement shape
-for UK constituency and local-authority geography. It uses the same stacked
-local-area layout as the US local ECPS flow:
+for UK constituency and local-authority geography. The production local path is
+row-wise assigned, matching the longwise direction of the US local ECPS flow:
 
 ```text
-column = area_index * n_households + household_index
+column = household_index
+target rows only see households assigned to that area code
 ```
 
 The solved weights export to a long sidecar with `(area_type, area_code,
@@ -50,12 +49,13 @@ the format PolicyEngine can group by directly for constituency and local
 authority outputs, and it avoids preserving the legacy dense
 `areas x households` matrix artifact.
 
-The module does not import the incumbent UK data package. Engine runners and
+The module does not import an incumbent UK data package. Engine runners and
 target providers pass household metric tables and aligned target tables into
-`build_stacked_local_matrix`; this keeps Populace clean while the target source
-files move over. The helper `sort_households_by_id` also codifies the 2024-25
-FRS fix: household attributes and weights must be sorted by the same stable
-household ID before any positional assignment.
+`build_assigned_local_matrix` / `build_local_candidate`; this keeps Populace as
+the owner of the build surface while historical incumbent comparisons remain
+external migration benchmarks. The helper `sort_households_by_id` also
+codifies the 2024-25 FRS fix: household attributes and weights must be sorted
+by the same stable household ID before any positional assignment.
 
 `populace.build.uk.local_targets` declares the constituency and local-authority
 metric surface used by the local build: HMRC employment/self-employment amount
@@ -63,9 +63,9 @@ and count rows, ONS age bands, Universal Credit household rows, constituency
 UC-by-children rows, and the LA income/tenure/rent rows. It accepts a
 PolicyEngine-UK-like simulation object and returns household-indexed metric
 tables; it still takes target values as explicit input tables. `local_solver`
-wraps the Populace calibrator's log-weight optimizer for stacked local weights
+wraps the Populace calibrator's log-weight optimizer for assigned local weights
 and records per-area/per-metric diagnostics before the solved weights are
-exported with `stacked_weights_to_long`.
+exported with `assigned_weights_to_long`.
 
 `populace.build.uk.local_runner` is the Populace-owned candidate build path. It
 loads explicit area and target tables, aligns a sorted household frame with
@@ -95,6 +95,23 @@ postcode sources. It writes the cloned row-wise H5, a geography coverage CSV,
 and `rowwise_build_manifest.json` with input/output hashes, row counts, target
 coverage, weight preservation, and weakest local-support diagnostics.
 
+Like the US plan, UK migration comparisons against earlier production datasets
+belong in release/benchmark harnesses outside this package. The build code here
+must not import or depend on the incumbent UK data package; `source_manifest.py`
+rejects incumbent country data-package references in declarative source specs.
+
+`populace.build.uk` now also exposes `UK_SOURCE_MANIFEST`,
+`UK_SOURCE_STAGE_SPECS`, `UK_SOURCE_OUTPUTS`, `UK_NONNEGATIVE_SOURCE_OUTPUTS`,
+`UK_DONORS`, `UK_STAGE_NAMES`, and `uk_plan(implementations)`. The packaged
+`uk/source_stages.json` is the Populace-owned raw-input parity contract for the
+UK build: FRS base tables, WAS wealth/debt/vehicles, LCFS consumption and fuel,
+ETB VAT and public services, NHS usage, SPI high-income income/reliefs,
+FRS-only pension/savings/reported-benefit fill, Advani-Summers capital gains,
+salary sacrifice, SLC student-loan plan assignment, and row-wise OA/LA/
+constituency geography. Stage implementations are injected and the plan refuses
+to assemble with any missing or unknown stage, matching the US complete-or-fail
+source-plan behavior.
+
 ## US plan status
 
 `populace.build.us` declares the US build: stage order, donor graph with
diff --git a/packages/populace-build/src/populace/build/gates.py b/packages/populace-build/src/populace/build/gates.py
index b8d950c..f58e322 100644
--- a/packages/populace-build/src/populace/build/gates.py
+++ b/packages/populace-build/src/populace/build/gates.py
@@ -24,7 +24,8 @@
   member names, not raw source-system codes.
 - :func:`export_surface_gate` and :func:`target_surface_gate` — replacement
   builds can prove they cover a reference artifact's export variables and
-  calibration targets, e.g. UK Populace against eFRS.
+  calibration targets. Reference artifacts are comparison surfaces, not build
+  inputs.
 
 Scoring uses :func:`relative_error_loss` — the calibrator's own objective —
 so there is no calibrator-vs-scorer objective mismatch: what the solver
@@ -750,10 +751,9 @@ def export_surface_gate(
     This is stricter than :func:`parity_gate`: parity checks whether populated
     reference layers are also populated, while this gate checks the exported
     variable *surface* itself. It is intended for live release blocking where a
-    country has a known incumbent-compatible artifact, such as UK Populace
-    matching eFRS exported variables. Extra columns are refused unless the
-    build declares them as structural/compatibility additions; missing
-    reference columns require a named reviewed exclusion.
+    country has a known reference export surface. Extra columns are refused
+    unless the build declares them as structural/compatibility additions;
+    missing reference columns require a named reviewed exclusion.
     """
     candidate = {str(name) for name in candidate_columns}
     reference = {str(name) for name in reference_columns}
diff --git a/packages/populace-build/src/populace/build/source_manifest.py b/packages/populace-build/src/populace/build/source_manifest.py
index 808809d..8580c96 100644
--- a/packages/populace-build/src/populace/build/source_manifest.py
+++ b/packages/populace-build/src/populace/build/source_manifest.py
@@ -40,6 +40,7 @@
         "assign_by_plan_type",
         "assign_binary_from_rate",
         "calibrate_binary_assignment",
+        "calibrate_weights",
         "convert_interest_to_structural_mortgage_inputs",
         "compute_ratio",
         "derive",
diff --git a/packages/populace-build/src/populace/build/uk/__init__.py b/packages/populace-build/src/populace/build/uk/__init__.py
index cd5bea3..e125e23 100644
--- a/packages/populace-build/src/populace/build/uk/__init__.py
+++ b/packages/populace-build/src/populace/build/uk/__init__.py
@@ -1,4 +1,16 @@
-"""UK build helpers for Populace-owned local-geography artifacts."""
+"""UK build helpers for Populace-owned raw-source and local artifacts."""
+
+from __future__ import annotations
+
+from collections.abc import Callable, Mapping
+from importlib.resources import files
+
+from populace.build.plan import DonorSpec, Stage, StagePlan
+from populace.build.source_manifest import (
+    SourceManifest,
+    SourceStageSpec,
+    load_source_manifest,
+)
 
 from populace.build.uk.geography_sources import (
     ENGLAND_LAD_REGION_URL,
@@ -42,11 +54,15 @@
     write_geography_crosswalk,
 )
 from populace.build.uk.local_geography import (
+    AREA_TYPE_TO_ROWWISE_HOUSEHOLD_COLUMN,
     LONG_GEOGRAPHY_COLUMNS,
     StackedLocalMatrix,
     align_area_targets,
     area_support_summary,
+    assigned_weights_to_long,
+    build_assigned_local_matrix,
     build_stacked_local_matrix,
+    rowwise_assignment_column,
     sort_households_by_id,
     stacked_design_weights,
     stacked_weights_to_long,
@@ -68,6 +84,7 @@
 )
 from populace.build.uk.local_solver import (
     StackedLocalSolveResult,
+    solve_assigned_local_weights,
     solve_stacked_local_weights,
 )
 from populace.build.uk.local_targets import (
@@ -123,11 +140,175 @@
     support_clone_index_column,
     support_source_id_column,
 )
+from populace.frame import Frame
+
+UK_DONORS: Mapping[str, DonorSpec] = {
+    "was_wealth": DonorSpec(
+        survey="Wealth and Assets Survey",
+        source="https://www.ons.gov.uk/peoplepopulationandcommunity/personalandhouseholdfinances/debt/methodologies/wealthandassetssurveyqmi",
+        notes="Household wealth, debts, vehicles, and student-loan balances.",
+    ),
+    "regional_property_uprating": DonorSpec(
+        survey="UK House Price Index and regional land-value tables",
+        source="https://www.gov.uk/government/collections/uk-house-price-index-reports",
+        notes="Regional property-value uprating after WAS wealth imputation.",
+    ),
+    "lcfs_consumption": DonorSpec(
+        survey="Living Costs and Food Survey",
+        source="https://www.ons.gov.uk/peoplepopulationandcommunity/personalandhouseholdfinances/incomeandwealth/methodologies/livingcostsandfoodsurveyqmi",
+        notes="COICOP consumption, fuel spending, and domestic energy use.",
+    ),
+    "road_fuel_energy_calibration": DonorSpec(
+        survey="Road fuel and household energy administrative totals",
+        source="https://www.gov.uk/government/collections/road-transport-consumption-at-regional-and-local-level",
+        notes="Fuel and energy calibration targets for LCFS-imputed amounts.",
+    ),
+    "etb_vat": DonorSpec(
+        survey="Effects of Taxes and Benefits",
+        source="https://www.ons.gov.uk/peoplepopulationandcommunity/personalandhouseholdfinances/incomeandwealth/datasets/theeffectsoftaxesandbenefitsonhouseholdincomehistoricaldatasets",
+        notes="Full-rate VAT expenditure-rate imputation.",
+    ),
+    "nhs_usage": DonorSpec(
+        survey="NHS activity and unit-cost tables",
+        source="https://www.england.nhs.uk/statistics/statistical-work-areas/hospital-activity/monthly-hospital-activity/",
+        notes="A&E, inpatient, outpatient visit and spending inputs.",
+    ),
+    "etb_public_services": DonorSpec(
+        survey="Effects of Taxes and Benefits public-service tables",
+        source="https://www.ons.gov.uk/peoplepopulationandcommunity/personalandhouseholdfinances/incomeandwealth/datasets/theeffectsoftaxesandbenefitsonhouseholdincomehistoricaldatasets",
+        notes="Education, rail, and bus public-service benefit inputs.",
+    ),
+    "rail_public_service_calibration": DonorSpec(
+        survey="Rail public-service administrative totals",
+        source="https://www.gov.uk/government/collections/rail-statistics",
+        notes="Post-weight rail subsidy and usage scaling.",
+    ),
+    "spi_income": DonorSpec(
+        survey="Survey of Personal Incomes",
+        source="https://www.gov.uk/government/collections/personal-incomes-statistics",
+        notes="High-income components, Gift Aid, and investment-gift reliefs.",
+    ),
+    "frs_only_spi_fill": DonorSpec(
+        survey="Family Resources Survey 2023-24",
+        source="https://www.gov.uk/government/collections/family-resources-survey--2",
+        notes=(
+            "Second-stage pension, savings, and reported-benefit behavior for "
+            "SPI support rows."
+        ),
+    ),
+    "advani_summers_capital_gains": DonorSpec(
+        survey="Advani-Summers capital gains distribution",
+        source="https://ideas.repec.org/p/hal/wpaper/halshs-03022609.html",
+        notes="Capital gains assignment and clone flag.",
+    ),
+    "frs_salary_sacrifice": DonorSpec(
+        survey="Family Resources Survey salary-sacrifice subsample",
+        source="https://www.gov.uk/government/collections/family-resources-survey--2",
+        notes="Salary-sacrifice pension contributions and employee adjustment.",
+    ),
+    "slc_student_loan_plan": DonorSpec(
+        survey="Student Loans Company repayment-plan statistics",
+        source="https://www.gov.uk/government/collections/student-loans-for-higher-and-further-education",
+        notes="Student-loan repayment plan assignment by cohort and balance.",
+    ),
+}
+
+UK_STAGE_NAMES: tuple[str, ...] = (
+    "frs_base",
+    "was_wealth",
+    "regional_property_uprating",
+    "lcfs_consumption",
+    "etb_vat",
+    "nhs_usage",
+    "etb_public_services",
+    UK_SPI_SUPPORT_STAGE_NAME,
+    "spi_income",
+    "frs_only_spi_fill",
+    "advani_summers_capital_gains",
+    "frs_salary_sacrifice",
+    "slc_student_loan_plan",
+    "rowwise_oa_geography",
+    "national_calibration",
+    "local_geography_weights",
+    "rail_public_service_calibration",
+    "road_fuel_energy_calibration",
+    "export",
+)
+
+UK_STRUCTURAL_SOURCE_STAGES: tuple[str, ...] = (
+    "frs_base",
+    UK_SPI_SUPPORT_STAGE_NAME,
+    "rowwise_oa_geography",
+    "national_calibration",
+    "local_geography_weights",
+)
+
+
+def _load_uk_source_manifest() -> SourceManifest:
+    return load_source_manifest(files(__package__).joinpath("source_stages.json"))
+
+
+UK_SOURCE_MANIFEST = _load_uk_source_manifest()
+_UK_SOURCE_STAGE_MAP = UK_SOURCE_MANIFEST.stage_map()
+_UNKNOWN_UK_SOURCE_STAGES = sorted(set(_UK_SOURCE_STAGE_MAP) - set(UK_STAGE_NAMES))
+if _UNKNOWN_UK_SOURCE_STAGES:
+    raise ValueError(
+        "UK source manifest stage(s) are not declared in UK_STAGE_NAMES: "
+        f"{_UNKNOWN_UK_SOURCE_STAGES}."
+    )
+UK_SOURCE_STAGE_SPECS: tuple[SourceStageSpec, ...] = tuple(
+    _UK_SOURCE_STAGE_MAP[name] for name in UK_STAGE_NAMES if name in _UK_SOURCE_STAGE_MAP
+)
+UK_SOURCE_OUTPUTS: frozenset[str] = frozenset(
+    output for stage in UK_SOURCE_STAGE_SPECS for output in stage.outputs
+)
+UK_SOURCE_OUTPUT_STAGES: Mapping[str, tuple[str, ...]] = {
+    output: tuple(
+        stage.stage for stage in UK_SOURCE_STAGE_SPECS if output in stage.outputs
+    )
+    for output in sorted(UK_SOURCE_OUTPUTS)
+}
+UK_REWRITTEN_SOURCE_OUTPUT_STAGES: Mapping[str, tuple[str, ...]] = {
+    output: stages
+    for output, stages in UK_SOURCE_OUTPUT_STAGES.items()
+    if len(stages) > 1
+}
+UK_NONNEGATIVE_SOURCE_OUTPUTS: frozenset[str] = frozenset(
+    output for stage in UK_SOURCE_STAGE_SPECS for output in stage.nonnegative_outputs
+)
+
+
+def uk_plan(
+    implementations: Mapping[str, Callable[[Frame], Frame]],
+) -> StagePlan:
+    """Assemble the UK build plan from injected stage implementations."""
+
+    missing = [name for name in UK_STAGE_NAMES if name not in implementations]
+    if missing:
+        raise ValueError(
+            f"uk_plan needs an implementation for every declared stage; "
+            f"missing {missing}. There are no stubs or fallbacks by design."
+        )
+    unknown = sorted(set(implementations) - set(UK_STAGE_NAMES))
+    if unknown:
+        raise ValueError(
+            f"Unknown stage implementation(s) {unknown}; declared stages "
+            f"are {list(UK_STAGE_NAMES)}."
+        )
+    return StagePlan(
+        Stage(
+            name=name,
+            transform=implementations[name],
+            donor=UK_DONORS.get(name),
+        )
+        for name in UK_STAGE_NAMES
+    )
 
 __all__ = [
     "AGE_BANDS",
     "AREA_TYPES",
     "AREA_TYPE_TO_CROSSWALK_COLUMN",
+    "AREA_TYPE_TO_ROWWISE_HOUSEHOLD_COLUMN",
     "BASE_FRS_SUPPORT_CHANNEL",
     "BENUNIT_ID_COLUMNS",
     "COUNTRY_TO_REGION",
@@ -167,17 +348,28 @@
     "StackedLocalSolveResult",
     "UK_POSTCODE_OA_MAY25_ZIP_URL",
     "UK_POSTCODE_PCON_MAY24_ZIP_URL",
+    "UK_DONORS",
     "UKLocalCandidateResult",
+    "UK_NONNEGATIVE_SOURCE_OUTPUTS",
     "UKRowwiseDatasetResult",
     "UKSPISupportResult",
+    "UK_SOURCE_MANIFEST",
+    "UK_SOURCE_OUTPUTS",
+    "UK_SOURCE_OUTPUT_STAGES",
+    "UK_SOURCE_STAGE_SPECS",
+    "UK_REWRITTEN_SOURCE_OUTPUT_STAGES",
     "UK_SINGLE_YEAR_TABLES",
     "UK_SPI_SUPPORT_STAGE_NAME",
+    "UK_STAGE_NAMES",
+    "UK_STRUCTURAL_SOURCE_STAGES",
     "align_area_targets",
+    "assigned_weights_to_long",
     "area_support_summary",
     "area_groups_from_codes",
     "assign_household_geography",
     "build_local_candidate",
     "build_local_candidate_from_dataset",
+    "build_assigned_local_matrix",
     "build_complete_uk_geography_crosswalk",
     "build_england_wales_crosswalk",
     "build_great_britain_crosswalk",
@@ -216,7 +408,9 @@
     "prepare_geography_crosswalk",
     "prepare_household_frame",
     "read_local_table",
+    "rowwise_assignment_column",
     "set_simulation_area_group",
+    "solve_assigned_local_weights",
     "solve_stacked_local_weights",
     "sort_households_by_id",
     "stacked_design_weights",
@@ -225,6 +419,7 @@
     "support_channel_column",
     "support_clone_index_column",
     "support_source_id_column",
+    "uk_plan",
     "update_england_wales_lad_codes",
     "validate_uk_rowwise_dataset_tables",
     "validate_geography_coverage",
diff --git a/packages/populace-build/src/populace/build/uk/local_geography.py b/packages/populace-build/src/populace/build/uk/local_geography.py
index f87c164..b6cc367 100644
--- a/packages/populace-build/src/populace/build/uk/local_geography.py
+++ b/packages/populace-build/src/populace/build/uk/local_geography.py
@@ -1,10 +1,12 @@
 """Longwise UK local-geography build primitives.
 
-This module owns the representation that lets Populace replace the legacy
+This module owns the representations that let Populace replace the legacy
 UK incumbent ``areas x households`` matrix artifacts:
 
 * a stacked sparse matrix whose columns are
   ``area_index * n_households + household_index``; and
+* an assigned sparse matrix whose columns are household weights and whose
+  target rows only see households assigned to that local area; and
 * a longweight sidecar with one row per non-zero
   ``(area, household, weight)`` assignment.
 
@@ -39,6 +41,12 @@
     "weight_source",
 )
 
+AREA_TYPE_TO_ROWWISE_HOUSEHOLD_COLUMN = {
+    "constituency": "constituency_code_oa",
+    "la": "la_code_oa",
+    "local_authority": "la_code_oa",
+}
+
 _AREA_METADATA_COLUMNS = frozenset(
     {
         "area_code",
@@ -54,7 +62,7 @@
 
 @dataclass(frozen=True)
 class StackedLocalMatrix:
-    """Sparse stacked local-area calibration matrix and aligned targets."""
+    """Sparse local-area calibration matrix and aligned targets."""
 
     matrix: sp.csr_matrix
     targets: np.ndarray
@@ -226,8 +234,10 @@ def build_stacked_local_matrix(
             )
             cache_key = (group, metric_index)
             if cache_key not in nonzero_cache:
-                column = metric_tables[group].iloc[:, metric_index].to_numpy(
-                    dtype=np.float64
+                column = (
+                    metric_tables[group]
+                    .iloc[:, metric_index]
+                    .to_numpy(dtype=np.float64)
                 )
                 if not np.isfinite(column).all():
                     raise ValueError(
@@ -267,6 +277,162 @@ def build_stacked_local_matrix(
     )
 
 
+def rowwise_assignment_column(
+    area_type: str,
+    *,
+    assignment_column: str | None = None,
+) -> str:
+    """Return the household column carrying rowwise local geography codes."""
+
+    if assignment_column is not None:
+        column = str(assignment_column).strip()
+        if column == "":
+            raise ValueError("assignment_column must not be blank.")
+        return column
+    if area_type not in AREA_TYPE_TO_ROWWISE_HOUSEHOLD_COLUMN:
+        raise ValueError(
+            f"No default rowwise assignment column is defined for {area_type!r}."
+        )
+    return AREA_TYPE_TO_ROWWISE_HOUSEHOLD_COLUMN[area_type]
+
+
+def build_assigned_local_matrix(
+    metrics: pd.DataFrame | Mapping[str, pd.DataFrame],
+    targets: pd.DataFrame,
+    *,
+    household_frame: pd.DataFrame,
+    area_codes: Sequence[str] | None = None,
+    area_groups: Mapping[str, str] | None = None,
+    household_ids: Sequence[Any] | None = None,
+    area_type: str = "constituency",
+    code_column: str = "code",
+    assignment_column: str | None = None,
+) -> StackedLocalMatrix:
+    """Build a rowwise-assigned sparse matrix for local-area calibration.
+
+    Unlike :func:`build_stacked_local_matrix`, each household has a single
+    column. A household contributes only to the target rows for the local area
+    stored in its rowwise geography assignment column, such as
+    ``constituency_code_oa`` or ``la_code_oa``.
+    """
+
+    if area_codes is None:
+        if code_column not in targets.columns:
+            raise ValueError(
+                "area_codes must be supplied when targets has no "
+                f"{code_column!r} column."
+            )
+        area_codes = targets[code_column].astype(str).tolist()
+    codes = _area_code_tuple(area_codes)
+    if household_ids is None:
+        if "household_id" not in household_frame.columns:
+            raise ValueError("household_frame must include 'household_id'.")
+        household_ids = household_frame["household_id"].to_numpy()
+    hh_ids = np.asarray(household_ids)
+    aligned_households = _align_household_frame(household_frame, hh_ids)
+    assert aligned_households is not None
+    assignment_name = rowwise_assignment_column(
+        area_type,
+        assignment_column=assignment_column,
+    )
+    if assignment_name not in aligned_households.columns:
+        raise ValueError(
+            f"household_frame is missing rowwise assignment column {assignment_name!r}."
+        )
+    assignments = _normalise_area_assignments(aligned_households[assignment_name])
+
+    metric_tables, groups = _normalise_metric_tables(
+        metrics,
+        area_codes=codes,
+        area_groups=area_groups,
+        household_ids=hh_ids,
+    )
+    first = next(iter(metric_tables.values()))
+    metric_names = tuple(str(col) for col in first.columns)
+    target_values = align_area_targets(
+        targets,
+        codes,
+        metric_names=metric_names,
+        code_column=code_column,
+    )
+
+    n_households = len(first)
+    n_areas = len(codes)
+    n_metrics = len(metric_names)
+    n_targets = n_areas * n_metrics
+    rows: list[np.ndarray] = []
+    cols: list[np.ndarray] = []
+    data: list[np.ndarray] = []
+    target_rows: list[dict[str, Any]] = []
+    assignment_indices = {
+        area_code: np.flatnonzero(assignments == area_code) for area_code in codes
+    }
+    metric_cache: dict[tuple[str, int], np.ndarray] = {}
+
+    for area_index, area_code in enumerate(codes):
+        group = groups[area_code]
+        household_positions = assignment_indices[area_code]
+        for metric_index, metric_name in enumerate(metric_names):
+            target_index = area_index * n_metrics + metric_index
+            target_rows.append(
+                {
+                    "target_index": target_index,
+                    "area_type": area_type,
+                    "area_code": area_code,
+                    "area_index": area_index,
+                    "area_group": group,
+                    "metric": metric_name,
+                    "metric_index": metric_index,
+                    "value": float(target_values.loc[area_code, metric_name]),
+                }
+            )
+            if len(household_positions) == 0:
+                continue
+            cache_key = (group, metric_index)
+            if cache_key not in metric_cache:
+                column = (
+                    metric_tables[group]
+                    .iloc[:, metric_index]
+                    .to_numpy(dtype=np.float64)
+                )
+                if not np.isfinite(column).all():
+                    raise ValueError(
+                        f"metric {metric_name!r} for group {group!r} "
+                        "contains non-finite values."
+                    )
+                metric_cache[cache_key] = column
+            values = metric_cache[cache_key][household_positions]
+            nz = np.flatnonzero(values)
+            if len(nz) == 0:
+                continue
+            rows.append(np.full(len(nz), target_index, dtype=np.int64))
+            cols.append(household_positions[nz].astype(np.int64))
+            data.append(values[nz].astype(np.float64, copy=False))
+
+    if rows:
+        row_array = np.concatenate(rows)
+        col_array = np.concatenate(cols)
+        data_array = np.concatenate(data)
+    else:
+        row_array = np.array([], dtype=np.int64)
+        col_array = np.array([], dtype=np.int64)
+        data_array = np.array([], dtype=np.float64)
+    matrix = sp.csr_matrix(
+        (data_array, (row_array, col_array)),
+        shape=(n_targets, n_households),
+        dtype=np.float64,
+    )
+    target_frame = pd.DataFrame(target_rows)
+    return StackedLocalMatrix(
+        matrix=matrix,
+        targets=target_frame["value"].to_numpy(dtype=np.float64),
+        target_frame=target_frame,
+        area_codes=codes,
+        metric_names=metric_names,
+        n_households=n_households,
+    )
+
+
 def stacked_design_weights(
     base_weights: Sequence[float],
     n_areas: int,
@@ -362,6 +528,114 @@ def stacked_weights_to_long(
     return out.loc[:, LONG_GEOGRAPHY_COLUMNS]
 
 
+def assigned_weights_to_long(
+    weights: Sequence[float],
+    area_codes: Sequence[str],
+    household_ids: Sequence[Any],
+    *,
+    area_type: str,
+    household_frame: pd.DataFrame,
+    assignment_column: str | None = None,
+    base_weights: Sequence[float] | None = None,
+    drop_weight_atol: float = 0.0,
+    source_year: int | None = None,
+    weight_source: str = "populace_local_assigned",
+    drop_zero: bool = True,
+) -> pd.DataFrame:
+    """Convert assigned household weights to the local-geography sidecar."""
+
+    codes = _area_code_tuple(area_codes)
+    hh_ids = np.asarray(household_ids)
+    n_households = len(hh_ids)
+    w = np.asarray(weights, dtype=np.float64).reshape(-1)
+    if len(w) != n_households:
+        raise ValueError(
+            f"weights length must equal household count ({n_households}), got {len(w)}."
+        )
+    if not np.isfinite(w).all() or (w < 0).any():
+        raise ValueError("weights must be finite and non-negative.")
+    base = None if base_weights is None else np.asarray(base_weights, dtype=np.float64)
+    if base is not None:
+        if base.shape != w.shape:
+            raise ValueError(
+                f"base_weights must align with weights, got {base.shape} vs {w.shape}."
+            )
+        if not np.isfinite(base).all() or (base < 0).any():
+            raise ValueError("base_weights must be finite and non-negative.")
+    if not np.isfinite(drop_weight_atol) or drop_weight_atol < 0:
+        raise ValueError("drop_weight_atol must be finite and non-negative.")
+
+    household_frame = _align_household_frame(household_frame, hh_ids)
+    assert household_frame is not None
+    assignment_name = rowwise_assignment_column(
+        area_type,
+        assignment_column=assignment_column,
+    )
+    if assignment_name not in household_frame.columns:
+        raise ValueError(
+            f"household_frame is missing rowwise assignment column {assignment_name!r}."
+        )
+    assignments = _normalise_area_assignments(household_frame[assignment_name])
+    area_index_by_code = {area_code: idx for idx, area_code in enumerate(codes)}
+    in_requested_area = np.fromiter(
+        (area_code in area_index_by_code for area_code in assignments),
+        dtype=bool,
+        count=n_households,
+    )
+    if drop_zero:
+        if base is None:
+            in_requested_area &= w > drop_weight_atol
+        else:
+            zero_base_floor = (base == 0) & (w <= drop_weight_atol)
+            in_requested_area &= (w != 0) & ~zero_base_floor
+    selected = np.flatnonzero(in_requested_area)
+
+    source_year_values = _metadata_values(
+        household_frame,
+        "source_year",
+        default=source_year,
+        length=n_households,
+    )
+    source_household_ids = _metadata_values(
+        household_frame,
+        "source_household_id",
+        default=hh_ids,
+        length=n_households,
+    )
+    source_keys = _metadata_values(
+        household_frame,
+        "source_household_key",
+        default=_source_keys(source_year_values, source_household_ids),
+        length=n_households,
+    )
+    clone_index = _metadata_values(
+        household_frame,
+        "clone_index",
+        default=0,
+        length=n_households,
+    )
+
+    selected_area_codes = assignments[selected]
+    out = pd.DataFrame(
+        {
+            "area_type": area_type,
+            "area_code": selected_area_codes,
+            "area_index": [
+                area_index_by_code[area_code] for area_code in selected_area_codes
+            ],
+            "household_index": selected.astype(np.int64),
+            "household_id": hh_ids[selected],
+            "source_year": source_year_values[selected],
+            "source_household_id": source_household_ids[selected],
+            "source_household_key": source_keys[selected],
+            "clone_index": clone_index[selected],
+            "weight": w[selected],
+            "weight_source": weight_source,
+        }
+    )
+    return out.loc[:, LONG_GEOGRAPHY_COLUMNS]
+
+
 def area_support_summary(
     long_weights: pd.DataFrame,
     *,
@@ -477,8 +751,7 @@ def _normalise_metric_tables(
     for group, frame in tables.items():
         if len(frame) != len(first):
             raise ValueError(
-                f"metric table {group!r} has {len(frame)} rows; expected "
-                f"{len(first)}."
+                f"metric table {group!r} has {len(frame)} rows; expected {len(first)}."
             )
         if not frame.index.equals(first.index):
             raise ValueError(
@@ -580,6 +853,14 @@ def _align_household_frame(
     return aligned.reset_index(drop=True)
 
 
+def _normalise_area_assignments(values: Sequence[Any]) -> np.ndarray:
+    series = pd.Series(values)
+    missing = series.isna()
+    strings = series.astype(str).str.strip()
+    strings = strings.mask(missing | (strings == ""), None)
+    return strings.to_numpy(dtype=object)
+
+
 def _source_keys(
     source_year: Sequence[Any],
     source_household_id: Sequence[Any],
diff --git a/packages/populace-build/src/populace/build/uk/local_runner.py b/packages/populace-build/src/populace/build/uk/local_runner.py
index 04b2e1a..2e6ea6e 100644
--- a/packages/populace-build/src/populace/build/uk/local_runner.py
+++ b/packages/populace-build/src/populace/build/uk/local_runner.py
@@ -20,13 +20,17 @@
 from populace.build.uk.local_geography import (
     StackedLocalMatrix,
     area_support_summary,
+    assigned_weights_to_long,
+    build_assigned_local_matrix,
     build_stacked_local_matrix,
+    rowwise_assignment_column,
     sort_households_by_id,
     stacked_weights_to_long,
     write_long_geography_weights,
 )
 from populace.build.uk.local_solver import (
     StackedLocalSolveResult,
+    solve_assigned_local_weights,
     solve_stacked_local_weights,
 )
 from populace.build.uk.local_targets import (
@@ -44,6 +48,7 @@ class UKLocalCandidateResult:
     solve_result: StackedLocalSolveResult
     long_weights: pd.DataFrame
     support_summary: pd.DataFrame
+    support_mode: str
 
 
 def read_local_table(path: str | Path) -> pd.DataFrame:
@@ -260,6 +265,8 @@ def build_local_candidate(
     max_areas: int | None = None,
     source_year: int | None = None,
     weight_source: str = "populace_uk_local",
+    support_mode: str = "auto",
+    assignment_column: str | None = None,
     solver_options: Mapping[str, Any] | None = None,
 ) -> UKLocalCandidateResult:
     """Build, solve, and export a UK local candidate in longwise form."""
@@ -281,32 +288,81 @@ def build_local_candidate(
         code_column=code_column,
         group_column=group_column,
     )
-    household_ids = households["household_id"].to_numpy()
-    base_weights = households["household_weight"].to_numpy(dtype=np.float64)
-    target_frame = _as_frame(targets)
-    problem = build_stacked_local_matrix(
-        metrics,
-        target_frame,
-        area_codes=area_codes,
-        area_groups=area_groups,
-        household_ids=household_ids,
-        area_type=area_type,
-        code_column=code_column,
-    )
-    solve_result = solve_stacked_local_weights(
-        problem,
-        base_weights,
-        **dict(solver_options or {}),
-    )
-    long_weights = stacked_weights_to_long(
-        solve_result.weights,
-        area_codes,
-        household_ids,
+    resolved_support_mode = _resolve_support_mode(
+        support_mode,
         area_type=area_type,
         household_frame=households,
-        source_year=source_year,
-        weight_source=weight_source,
+        assignment_column=assignment_column,
     )
+    if resolved_support_mode == "assigned":
+        households = _filter_assigned_households_to_areas(
+            households,
+            area_codes=area_codes,
+            area_type=area_type,
+            assignment_column=assignment_column,
+        )
+        metrics = _subset_metric_tables_to_households(
+            metrics,
+            households["household_id"].to_numpy(),
+        )
+    household_ids = households["household_id"].to_numpy()
+    base_weights = households["household_weight"].to_numpy(dtype=np.float64)
+    target_frame = _as_frame(targets)
+    solver_config = dict(solver_options or {})
+    if resolved_support_mode == "assigned":
+        problem = build_assigned_local_matrix(
+            metrics,
+            target_frame,
+            household_frame=households,
+            area_codes=area_codes,
+            area_groups=area_groups,
+            household_ids=household_ids,
+            area_type=area_type,
+            code_column=code_column,
+            assignment_column=assignment_column,
+        )
+        solve_result = solve_assigned_local_weights(
+            problem,
+            base_weights,
+            **solver_config,
+        )
+        min_initial_weight = float(solver_config.get("min_initial_weight", 1e-4))
+        long_weights = assigned_weights_to_long(
+            solve_result.weights,
+            area_codes,
+            household_ids,
+            area_type=area_type,
+            household_frame=households,
+            assignment_column=assignment_column,
+            base_weights=base_weights,
+            drop_weight_atol=min_initial_weight,
+            source_year=source_year,
+            weight_source=weight_source,
+        )
+    else:
+        problem = build_stacked_local_matrix(
+            metrics,
+            target_frame,
+            area_codes=area_codes,
+            area_groups=area_groups,
+            household_ids=household_ids,
+            area_type=area_type,
+            code_column=code_column,
+        )
+        solve_result = solve_stacked_local_weights(
+            problem,
+            base_weights,
+            **solver_config,
+        )
+        long_weights = stacked_weights_to_long(
+            solve_result.weights,
+            area_codes,
+            household_ids,
+            area_type=area_type,
+            household_frame=households,
+            source_year=source_year,
+            weight_source=weight_source,
+        )
     return UKLocalCandidateResult(
         problem=problem,
         solve_result=solve_result,
@@ -316,6 +372,7 @@ def build_local_candidate(
             area_codes=area_codes,
             area_type=area_type,
         ),
+        support_mode=resolved_support_mode,
     )
 
 
@@ -333,12 +390,16 @@ def build_local_candidate_from_dataset(
     max_areas: int | None = None,
     source_year: int | None = None,
     weight_source: str = "populace_uk_local",
+    support_mode: str = "auto",
+    assignment_column: str | None = None,
     simulation_factory: Callable[[Any], Any] | None = None,
     solver_options: Mapping[str, Any] | None = None,
 ) -> UKLocalCandidateResult:
     """Build a UK local candidate from a Populace UK H5 or dataset object."""
 
-    dataset_obj = load_uk_dataset(dataset) if isinstance(dataset, str | Path) else dataset
+    dataset_obj = (
+        load_uk_dataset(dataset) if isinstance(dataset, str | Path) else dataset
+    )
     areas = prepare_area_frame(
         area_frame,
         code_column=code_column,
@@ -375,6 +436,8 @@ def build_local_candidate_from_dataset(
         sort_areas_by_code=False,
         source_year=source_year,
         weight_source=weight_source,
+        support_mode=support_mode,
+        assignment_column=assignment_column,
         solver_options=solver_options,
     )
 
@@ -394,6 +457,7 @@ def summarize_local_candidate(result: UKLocalCandidateResult) -> dict[str, Any]:
         "n_targets": int(len(result.problem.targets)),
         "n_long_rows": int(len(result.long_weights)),
         "n_nonzero": int(result.solve_result.n_nonzero),
+        "support_mode": result.support_mode,
         "initial_loss": float(result.solve_result.initial_loss),
         "final_loss": float(result.solve_result.final_loss),
         "weight_sum": float(result.long_weights["weight"].sum()),
@@ -418,14 +482,10 @@ def summarize_local_candidate(result: UKLocalCandidateResult) -> dict[str, Any]:
             0 if support.empty else int(support["nonzero_source_households"].max())
         ),
         "min_area_effective_sample_size": (
-            0.0
-            if support.empty
-            else float(support["effective_sample_size"].min())
+            0.0 if support.empty else float(support["effective_sample_size"].min())
         ),
         "median_area_effective_sample_size": (
-            0.0
-            if support.empty
-            else float(support["effective_sample_size"].median())
+            0.0 if support.empty else float(support["effective_sample_size"].median())
         ),
     }
 
@@ -465,6 +525,109 @@ def _normalise_nonblank_strings(values: pd.Series, *, column: str) -> pd.Series:
     return strings
 
 
+def _resolve_support_mode(
+    support_mode: str,
+    *,
+    area_type: str,
+    household_frame: pd.DataFrame,
+    assignment_column: str | None,
+) -> str:
+    mode = str(support_mode).strip().lower()
+    valid_modes = {"auto", "assigned", "stacked"}
+    if mode not in valid_modes:
+        raise ValueError(f"support_mode must be one of {sorted(valid_modes)}.")
+    if mode == "stacked":
+        return mode
+    try:
+        column = rowwise_assignment_column(
+            area_type,
+            assignment_column=assignment_column,
+        )
+    except ValueError:
+        if mode == "auto":
+            return "stacked"
+        raise
+    if mode == "assigned":
+        return mode
+    return "assigned" if column in household_frame.columns else "stacked"
+
+
+def _filter_assigned_households_to_areas(
+    households: pd.DataFrame,
+    *,
+    area_codes: Sequence[str],
+    area_type: str,
+    assignment_column: str | None,
+) -> pd.DataFrame:
+    column = rowwise_assignment_column(area_type, assignment_column=assignment_column)
+    if column not in households.columns:
+        raise ValueError(
+            f"household_frame is missing rowwise assignment column {column!r}."
+        )
+    assignments = _normalise_optional_strings(households[column])
+    mask = assignments.isin(set(map(str, area_codes)))
+    filtered = households.loc[mask].reset_index(drop=True)
+    if filtered.empty:
+        raise ValueError(
+            "no households are assigned to the requested local area codes."
+        )
+    return filtered
+
+
+def _subset_metric_tables_to_households(
+    metrics: pd.DataFrame | Mapping[str, pd.DataFrame],
+    household_ids: Sequence[Any],
+) -> pd.DataFrame | dict[str, pd.DataFrame]:
+    if isinstance(metrics, pd.DataFrame):
+        return _subset_metric_table_to_households(
+            metrics,
+            household_ids,
+            group="__all__",
+        )
+    return {
+        str(group): _subset_metric_table_to_households(
+            frame,
+            household_ids,
+            group=str(group),
+        )
+        for group, frame in metrics.items()
+    }
+
+
+def _subset_metric_table_to_households(
+    table: pd.DataFrame,
+    household_ids: Sequence[Any],
+    *,
+    group: str,
+) -> pd.DataFrame:
+    expected = pd.Index(household_ids)
+    if expected.has_duplicates:
+        duplicates = expected[expected.duplicated()].unique()
+        raise ValueError(
+            "assigned household IDs must be unique before metric subsetting; "
+            f"duplicate value(s): {list(map(str, duplicates[:5]))}."
+        )
+    if table.index.has_duplicates:
+        duplicates = table.index[table.index.duplicated()].unique()
+        raise ValueError(
+            f"metric table {group!r} household index must be unique; "
+            f"duplicate value(s): {list(map(str, duplicates[:5]))}."
+        )
+    missing = expected.difference(table.index)
+    if len(missing):
+        raise ValueError(
+            f"metric table {group!r} is missing household_id value(s): "
+            f"{list(map(str, missing[:5]))}."
+        )
+    return table.reindex(expected)
+
+
+def _normalise_optional_strings(values: pd.Series) -> pd.Series:
+    missing = values.isna()
+    strings = values.astype(str).str.strip()
+    return strings.mask(missing | (strings == ""), None)
+
+
 def _source_household_keys(
     household_frame: pd.DataFrame,
     *,
@@ -494,9 +657,7 @@ def _metric_table_from_frame(
     group: str,
 ) -> pd.DataFrame:
     if household_id_column not in frame.columns:
-        raise ValueError(
-            f"metric table {group!r} is missing {household_id_column!r}."
-        )
+        raise ValueError(f"metric table {group!r} is missing {household_id_column!r}.")
     table = frame.copy()
     if table[household_id_column].isna().any():
         raise ValueError(
@@ -552,12 +713,6 @@ def _align_metric_table_to_households(
             f"metric table {group!r} is missing household_id value(s): "
             f"{list(map(str, missing[:5]))}."
         )
-    extra = table.index.difference(expected)
-    if len(extra):
-        raise ValueError(
-            f"metric table {group!r} has unexpected household_id value(s): "
-            f"{list(map(str, extra[:5]))}."
-        )
     return table.reindex(expected)
 
 
@@ -568,9 +723,7 @@ def _infer_period(dataset: Any, period: int | str | None) -> int | str:
         value = getattr(dataset, attr, None)
         if value is not None:
             return value
-    raise ValueError(
-        "period is required when it cannot be inferred from the dataset."
-    )
+    raise ValueError("period is required when it cannot be inferred from the dataset.")
 
 
 def _default_uk_simulation_factory(dataset: Any) -> Any:
diff --git a/packages/populace-build/src/populace/build/uk/local_solver.py b/packages/populace-build/src/populace/build/uk/local_solver.py
index 5560f2b..d2b9705 100644
--- a/packages/populace-build/src/populace/build/uk/local_solver.py
+++ b/packages/populace-build/src/populace/build/uk/local_solver.py
@@ -1,4 +1,4 @@
-"""Solver wrapper for UK stacked local-geography weights."""
+"""Solver wrappers for UK local-geography weights."""
 
 from __future__ import annotations
 
@@ -72,6 +72,100 @@ def solve_stacked_local_weights(
         problem.n_areas,
         min_weight=min_initial_weight,
     )
+    if len(initial_weights) != problem.matrix.shape[1]:
+        raise ValueError(
+            "base_weights expanded to the wrong stacked length: "
+            f"{len(initial_weights)} vs {problem.matrix.shape[1]}."
+        )
+    return _solve_local_weights(
+        problem,
+        initial_weights,
+        epochs=epochs,
+        learning_rate=learning_rate,
+        max_weight_ratio=max_weight_ratio,
+        conserve_mass=conserve_mass,
+        target_records=target_records,
+        l0_lambda=l0_lambda,
+        target_loss_weights=target_loss_weights,
+        target_loss_scales=target_loss_scales,
+        target_loss_cap=target_loss_cap,
+        budget_iters=budget_iters,
+        seed=seed,
+    )
+
+
+def solve_assigned_local_weights(
+    problem: StackedLocalMatrix,
+    base_weights: Sequence[float],
+    *,
+    epochs: int = 512,
+    learning_rate: float = 0.15,
+    max_weight_ratio: float | None = None,
+    conserve_mass: bool = False,
+    target_records: int | None = None,
+    l0_lambda: float = 0.0,
+    min_initial_weight: float = 1e-4,
+    target_loss_weights: Sequence[float] | None = None,
+    target_loss_scales: Sequence[float] | None = None,
+    target_loss_cap: float = 10.0,
+    budget_iters: int = 10,
+    seed: int = 0,
+) -> StackedLocalSolveResult:
+    """Solve rowwise-assigned local weights for a Populace UK local build.
+
+    ``base_weights`` align one-to-one with the household columns in ``problem``.
+    The optional ``min_initial_weight`` floor mirrors the stacked solver and is
+    required by the torch log-weight optimizer. The assigned path defaults to
+    no ``max_weight_ratio`` cap so zero-weight support rows, such as synthetic
+    SPI rows, can be upweighted from the optimizer floor.
+    """
+
+    weights = np.asarray(base_weights, dtype=np.float64)
+    if weights.ndim != 1:
+        raise ValueError("base_weights must be one-dimensional.")
+    if not np.isfinite(weights).all() or (weights < 0).any():
+        raise ValueError("base_weights must be finite and non-negative.")
+    if not np.isfinite(min_initial_weight) or min_initial_weight < 0:
+        raise ValueError("min_initial_weight must be finite and non-negative.")
+    initial_weights = np.maximum(weights, min_initial_weight)
+    if len(initial_weights) != problem.matrix.shape[1]:
+        raise ValueError(
+            "base_weights must align with the assigned local matrix columns: "
+            f"{len(initial_weights)} vs {problem.matrix.shape[1]}."
+        )
+    return _solve_local_weights(
+        problem,
+        initial_weights,
+        epochs=epochs,
+        learning_rate=learning_rate,
+        max_weight_ratio=max_weight_ratio,
+        conserve_mass=conserve_mass,
+        target_records=target_records,
+        l0_lambda=l0_lambda,
+        target_loss_weights=target_loss_weights,
+        target_loss_scales=target_loss_scales,
+        target_loss_cap=target_loss_cap,
+        budget_iters=budget_iters,
+        seed=seed,
+    )
+
+
+def _solve_local_weights(
+    problem: StackedLocalMatrix,
+    initial_weights: np.ndarray,
+    *,
+    epochs: int,
+    learning_rate: float,
+    max_weight_ratio: float | None,
+    conserve_mass: bool,
+    target_records: int | None,
+    l0_lambda: float,
+    target_loss_weights: Sequence[float] | None,
+    target_loss_scales: Sequence[float] | None,
+    target_loss_cap: float,
+    budget_iters: int,
+    seed: int,
+) -> StackedLocalSolveResult:
     targets = np.asarray(problem.targets, dtype=np.float64)
     scales = (
         default_target_loss_scales(targets)
@@ -93,14 +187,9 @@ def solve_stacked_local_weights(
             "target_loss_weights must align with targets, got "
             f"{loss_weights.shape} vs {targets.shape}."
         )
-    if len(initial_weights) != problem.matrix.shape[1]:
-        raise ValueError(
-            "base_weights expanded to the wrong stacked length: "
-            f"{len(initial_weights)} vs {problem.matrix.shape[1]}."
-        )
     if (initial_weights <= 0).any():
         raise ValueError(
-            "all expanded initial weights must be strictly positive for the "
+            "all initial weights must be strictly positive for the "
             "log-weight optimizer; use a positive min_initial_weight or remove "
             "zero-weight records before solving."
         )
diff --git a/packages/populace-build/src/populace/build/uk/source_stages.json b/packages/populace-build/src/populace/build/uk/source_stages.json
new file mode 100644
index 0000000..91ab42e
--- /dev/null
+++ b/packages/populace-build/src/populace/build/uk/source_stages.json
@@ -0,0 +1,990 @@
+{
+  "version": 1,
+  "country": "uk",
+  "policy": "UK source stages are manifest-defined. Country/source content may declare primary artifacts, columns, sentinel handling, derivations, imputation recipes, outputs, and validation requirements here; executable Python belongs only in shared Populace runtimes.",
+  "stages": [
+    {
+      "stage": "frs_base",
+      "survey": "Family Resources Survey 2023-24",
+      "source": "https://www.gov.uk/government/collections/family-resources-survey--2",
+      "grain": "household_person_benunit",
+      "artifacts": [
+        {
+          "kind": "survey_microdata",
+          "format": "tabular_release",
+          "vintage": "2023-24",
+          "locator": "DWP Family Resources Survey household, benefit unit, and person tables"
+        }
+      ],
+      "operations": [
+        {
+          "kind": "read_tables",
+          "tables": [
+            "frs_household",
+            "frs_benunit",
+            "frs_person"
+          ],
+          "weight": "household_weight"
+        },
+        {
+          "kind": "replace_sentinels",
+          "scope": "frs_missing_or_not_applicable_fields"
+        },
+        {
+          "kind": "derive",
+          "outputs": [
+            "entity_ids",
+            "source_household_lineage",
+            "household_income_predictors",
+            "education_counts",
+            "housing_predictors"
+          ]
+        }
+      ],
+      "outputs": [
+        "household_id",
+        "benunit_id",
+        "person_id",
+        "person_household_id",
+        "person_benunit_id",
+        "household_weight",
+        "region",
+        "age",
+        "gender",
+        "employment_income",
+        "self_employment_income",
+        "private_pension_income",
+        "capital_income",
+        "household_net_income",
+        "hbai_household_net_income",
+        "tenure_type",
+        "accommodation_type",
+        "num_adults",
+        "num_children",
+        "num_bedrooms",
+        "council_tax"
+      ],
+      "nonnegative_outputs": [
+        "household_weight",
+        "age",
+        "employment_income",
+        "self_employment_income",
+        "private_pension_income",
+        "household_net_income",
+        "hbai_household_net_income",
+        "num_adults",
+        "num_children",
+        "num_bedrooms",
+        "council_tax"
+      ],
+      "notes": "This stage owns the raw FRS base entities and lineage. The compact UK artifact remains the fast national input; local variants can pool years or clone from this base before row-wise geography assignment."
+    },
+    {
+      "stage": "was_wealth",
+      "survey": "Wealth and Assets Survey",
+      "source": "https://www.ons.gov.uk/peoplepopulationandcommunity/personalandhouseholdfinances/debt/methodologies/wealthandassetssurveyqmi",
+      "grain": "household",
+      "artifacts": [
+        {
+          "kind": "survey_microdata",
+          "format": "secure_or_licensed_extract",
+          "vintage": "latest_available",
+          "locator": "ONS Wealth and Assets Survey household/person extract"
+        }
+      ],
+      "operations": [
+        {
+          "kind": "read_table",
+          "table": "was_household",
+          "weight": "weight"
+        },
+        {
+          "kind": "replace_sentinels",
+          "scope": "was_missing_or_not_applicable_fields"
+        },
+        {
+          "kind": "fit_weighted_qrf",
+          "predictors": [
+            "household_net_income",
+            "num_adults",
+            "num_children",
+            "private_pension_income",
+            "employment_income",
+            "self_employment_income",
+            "capital_income",
+            "num_bedrooms",
+            "council_tax",
+            "is_renting",
+            "region"
+          ]
+        },
+        {
+          "kind": "support_clip",
+          "range": "donor_realized"
+        }
+      ],
+      "outputs": [
+        "owned_land",
+        "property_wealth",
+        "corporate_wealth",
+        "gross_financial_wealth",
+        "net_financial_wealth",
+        "main_residence_value",
+        "other_residential_property_value",
+        "non_residential_property_value",
+        "savings",
+        "num_vehicles",
+        "student_loan_balance",
+        "mortgage_debt",
+        "consumer_debt"
+      ],
+      "nonnegative_outputs": [
+        "owned_land",
+        "property_wealth",
+        "corporate_wealth",
+        "gross_financial_wealth",
+        "main_residence_value",
+        "other_residential_property_value",
+        "non_residential_property_value",
+        "savings",
+        "num_vehicles",
+        "student_loan_balance",
+        "mortgage_debt",
+        "consumer_debt"
+      ],
+      "notes": "Northern Ireland can borrow the Wales region support when the donor survey does not identify Northern Ireland with enough detail."
+    },
+    {
+      "stage": "regional_property_uprating",
+      "survey": "UK House Price Index and regional land-value tables",
+      "source": "https://www.gov.uk/government/collections/uk-house-price-index-reports",
+      "grain": "household",
+      "artifacts": [
+        {
+          "kind": "administrative_table",
+          "format": "published_table",
+          "vintage": "build_year",
+          "locator": "UK House Price Index regional series and land-value adjustment tables"
+        }
+      ],
+      "operations": [
+        {
+          "kind": "read_table",
+          "table": "regional_property_uprating"
+        },
+        {
+          "kind": "uprate",
+          "from_year": "wealth_survey_vintage",
+          "to_year_from_build_config": true,
+          "by": [
+            "region",
+            "property_type"
+          ]
+        }
+      ],
+      "outputs": [
+        "property_wealth",
+        "main_residence_value"
+      ],
+      "nonnegative_outputs": [
+        "property_wealth",
+        "main_residence_value"
+      ]
+    },
+    {
+      "stage": "lcfs_consumption",
+      "survey": "Living Costs and Food Survey",
+      "source": "https://www.ons.gov.uk/peoplepopulationandcommunity/personalandhouseholdfinances/incomeandwealth/methodologies/livingcostsandfoodsurveyqmi",
+      "grain": "household",
+      "artifacts": [
+        {
+          "kind": "survey_microdata",
+          "format": "licensed_extract",
+          "vintage": "latest_available",
+          "locator": "ONS Living Costs and Food Survey household/person extract"
+        }
+      ],
+      "operations": [
+        {
+          "kind": "read_tables",
+          "tables": [
+            "lcfs_household",
+            "lcfs_person"
+          ],
+          "weight": "weight"
+        },
+        {
+          "kind": "derive",
+          "outputs": [
+            "lcfs_coicop_consumption",
+            "lcfs_fuel_spending",
+            "has_fuel_consumption"
+          ]
+        },
+        {
+          "kind": "fit_weighted_qrf",
+          "predictors": [
+            "is_adult",
+            "is_child",
+            "region",
+            "employment_income",
+            "self_employment_income",
+            "private_pension_income",
+            "hbai_household_net_income",
+            "tenure_type",
+            "accommodation_type",
+            "has_fuel_consumption"
+          ]
+        },
+        {
+          "kind": "support_clip",
+          "range": "donor_realized"
+        }
+      ],
+      "outputs": [
+        "has_fuel_consumption",
+        "food_and_non_alcoholic_beverages_consumption",
+        "alcohol_and_tobacco_consumption",
+        "clothing_and_footwear_consumption",
+        "housing_water_and_electricity_consumption",
+        "household_furnishings_consumption",
+        "health_consumption",
+        "transport_consumption",
+        "communication_consumption",
+        "recreation_consumption",
+        "education_consumption",
+        "restaurants_and_hotels_consumption",
+        "miscellaneous_consumption",
+        "petrol_spending",
+        "diesel_spending",
+        "domestic_energy_consumption",
+        "electricity_consumption",
+        "gas_consumption"
+      ],
+      "nonnegative_outputs": [
+        "food_and_non_alcoholic_beverages_consumption",
+        "alcohol_and_tobacco_consumption",
+        "clothing_and_footwear_consumption",
+        "housing_water_and_electricity_consumption",
+        "household_furnishings_consumption",
+        "health_consumption",
+        "transport_consumption",
+        "communication_consumption",
+        "recreation_consumption",
+        "education_consumption",
+        "restaurants_and_hotels_consumption",
+        "miscellaneous_consumption",
+        "petrol_spending",
+        "diesel_spending",
+        "domestic_energy_consumption",
+        "electricity_consumption",
+        "gas_consumption"
+      ],
+      "notes": "The fuel-consumption bridge uses the WAS vehicle signal on recipient households and LCFS fuel purchases on donors."
+    },
+    {
+      "stage": "road_fuel_energy_calibration",
+      "survey": "Road fuel and household energy administrative totals",
+      "source": "https://www.gov.uk/government/collections/road-transport-consumption-at-regional-and-local-level",
+      "grain": "household",
+      "artifacts": [
+        {
+          "kind": "administrative_table",
+          "format": "published_table",
+          "vintage": "build_year",
+          "locator": "DfT road-fuel consumption totals and DESNZ household energy tables"
+        }
+      ],
+      "operations": [
+        {
+          "kind": "read_tables",
+          "tables": [
+            "road_fuel_consumption",
+            "domestic_energy_targets"
+          ]
+        },
+        {
+          "kind": "uprate",
+          "variables": [
+            "petrol_spending",
+            "diesel_spending",
+            "electricity_consumption",
+            "gas_consumption"
+          ],
+          "targets": [
+            "road_fuel_consumption",
+            "domestic_energy_targets"
+          ],
+          "weight": "household_weight"
+        },
+        {
+          "kind": "derive",
+          "outputs": [
+            "domestic_energy_consumption"
+          ],
+          "formula": "electricity_consumption + gas_consumption"
+        }
+      ],
+      "outputs": [
+        "petrol_spending",
+        "diesel_spending",
+        "domestic_energy_consumption",
+        "electricity_consumption",
+        "gas_consumption"
+      ],
+      "nonnegative_outputs": [
+        "petrol_spending",
+        "diesel_spending",
+        "domestic_energy_consumption",
+        "electricity_consumption",
+        "gas_consumption"
+      ]
+    },
+    {
+      "stage": "etb_vat",
+      "survey": "Effects of Taxes and Benefits",
+      "source": "https://www.ons.gov.uk/peoplepopulationandcommunity/personalandhouseholdfinances/incomeandwealth/datasets/theeffectsoftaxesandbenefitsonhouseholdincomehistoricaldatasets",
+      "grain": "household",
+      "artifacts": [
+        {
+          "kind": "published_microdata_table",
+          "format": "spreadsheet",
+          "vintage": "build_year",
+          "locator": "ONS Effects of Taxes and Benefits indirect-tax tables"
+        }
+      ],
+      "operations": [
+        {
+          "kind": "read_table",
+          "table": "etb_household"
+        },
+        {
+          "kind": "derive",
+          "outputs": [
+            "full_rate_vat_expenditure_rate"
+          ]
+        },
+        {
+          "kind": "fit_weighted_qrf",
+          "predictors": [
+            "is_adult",
+            "is_child",
+            "is_SP_age",
+            "household_net_income"
+          ]
+        },
+        {
+          "kind": "support_clip",
+          "range": "donor_realized"
+        }
+      ],
+      "outputs": [
+        "full_rate_vat_expenditure_rate"
+      ],
+      "nonnegative_outputs": [
+        "full_rate_vat_expenditure_rate"
+      ]
+    },
+    {
+      "stage": "nhs_usage",
+      "survey": "NHS activity and unit-cost tables",
+      "source": "https://www.england.nhs.uk/statistics/statistical-work-areas/hospital-activity/monthly-hospital-activity/",
+      "grain": "person",
+      "artifacts": [
+        {
+          "kind": "administrative_table",
+          "format": "published_table",
+          "vintage": "build_year",
+          "locator": "NHS activity counts and service-cost totals by age/sex where available"
+        }
+      ],
+      "operations": [
+        {
+          "kind": "read_tables",
+          "tables": [
+            "nhs_activity",
+            "nhs_unit_costs"
+          ]
+        },
+        {
+          "kind": "fit_weighted_imputer",
+          "predictors": [
+            "age",
+            "gender",
+            "region",
+            "disability_benefit_indicators",
+            "hbai_household_net_income"
+          ]
+        },
+        {
+          "kind": "derive",
+          "outputs": [
+            "nhs_visits",
+            "nhs_spending"
+          ]
+        },
+        {
+          "kind": "support_clip",
+          "range": "administrative_nonnegative"
+        }
+      ],
+      "outputs": [
+        "a_and_e_visits",
+        "admitted_patient_visits",
+        "outpatient_visits",
+        "nhs_a_and_e_spending",
+        "nhs_admitted_patient_spending",
+        "nhs_outpatient_spending",
+        "nhs_visits",
+        "nhs_spending"
+      ],
+      "nonnegative_outputs": [
+        "a_and_e_visits",
+        "admitted_patient_visits",
+        "outpatient_visits",
+        "nhs_a_and_e_spending",
+        "nhs_admitted_patient_spending",
+        "nhs_outpatient_spending",
+        "nhs_visits",
+        "nhs_spending"
+      ]
+    },
+    {
+      "stage": "etb_public_services",
+      "survey": "Effects of Taxes and Benefits public-service tables",
+      "source": "https://www.ons.gov.uk/peoplepopulationandcommunity/personalandhouseholdfinances/incomeandwealth/datasets/theeffectsoftaxesandbenefitsonhouseholdincomehistoricaldatasets",
+      "grain": "household",
+      "artifacts": [
+        {
+          "kind": "published_microdata_table",
+          "format": "spreadsheet",
+          "vintage": "build_year",
+          "locator": "ONS Effects of Taxes and Benefits benefits-in-kind tables"
+        }
+      ],
+      "operations": [
+        {
+          "kind": "read_table",
+          "table": "etb_household_services"
+        },
+        {
+          "kind": "derive",
+          "outputs": [
+            "rail_usage"
+          ]
+        },
+        {
+          "kind": "fit_weighted_qrf",
+          "predictors": [
+            "is_adult",
+            "is_child",
+            "is_SP_age",
+            "count_primary_education",
+            "count_secondary_education",
+            "count_further_education",
+            "dla",
+            "pip",
+            "hbai_household_net_income"
+          ]
+        },
+        {
+          "kind": "support_clip",
+          "range": "donor_realized"
+        }
+      ],
+      "outputs": [
+        "dfe_education_spending",
+        "rail_subsidy_spending",
+        "bus_subsidy_spending",
+        "rail_usage"
+      ],
+      "nonnegative_outputs": [
+        "dfe_education_spending",
+        "rail_subsidy_spending",
+        "bus_subsidy_spending",
+        "rail_usage"
+      ]
+    },
+    {
+      "stage": "rail_public_service_calibration",
+      "survey": "Rail public-service administrative totals",
+      "source": "https://www.gov.uk/government/collections/rail-statistics",
+      "grain": "household",
+      "artifacts": [
+        {
+          "kind": "administrative_table",
+          "format": "published_table",
+          "vintage": "build_year",
+          "locator": "DfT rail passenger and subsidy totals"
+        }
+      ],
+      "operations": [
+        {
+          "kind": "read_table",
+          "table": "rail_public_service_targets"
+        },
+        {
+          "kind": "uprate",
+          "variables": [
+            "rail_subsidy_spending",
+            "rail_usage"
+          ],
+          "targets": [
+            "rail_public_service_targets"
+          ],
+          "weight": "household_weight"
+        }
+      ],
+      "outputs": [
+        "rail_subsidy_spending",
+        "rail_usage"
+      ],
+      "nonnegative_outputs": [
+        "rail_subsidy_spending",
+        "rail_usage"
+      ],
+      "notes": "This post-weight stage scales rail usage and subsidy after the local/final household weights are available."
+    },
+    {
+      "stage": "spi_support_channel",
+      "survey": "Family Resources Survey 2023-24 support copy",
+      "source": "https://www.gov.uk/government/collections/family-resources-survey--2",
+      "grain": "household_person_benunit",
+      "artifacts": [
+        {
+          "kind": "derived_support_frame",
+          "format": "in_memory_tables",
+          "vintage": "2023-24",
+          "locator": "zero-weight FRS support channel for high-income SPI imputation"
+        }
+      ],
+      "operations": [
+        {
+          "kind": "derive",
+          "outputs": [
+            "zero_weight_spi_support_copy",
+            "source_household_lineage"
+          ]
+        }
+      ],
+      "outputs": [
+        "household_is_spi_synthetic",
+        "household_support_channel",
+        "person_support_channel",
+        "benunit_support_channel",
+        "household_support_clone_index",
+        "person_support_clone_index",
+        "benunit_support_clone_index",
+        "household_source_id",
+        "person_source_id",
+        "benunit_source_id",
+        "source_household_id",
+        "source_household_key"
+      ],
+      "notes": "The support copy has zero initial household weight and source-household lineage so local-geography support accounting does not count it as independent FRS sample."
+    },
+    {
+      "stage": "spi_income",
+      "survey": "Survey of Personal Incomes",
+      "source": "https://www.gov.uk/government/collections/personal-incomes-statistics",
+      "grain": "person",
+      "artifacts": [
+        {
+          "kind": "administrative_microdata_or_tabulation",
+          "format": "hmrc_spi_extract",
+          "vintage": "latest_available",
+          "locator": "HMRC Survey of Personal Incomes person-level income and relief records"
+        }
+      ],
+      "operations": [
+        {
+          "kind": "read_table",
+          "table": "spi_person",
+          "weight": "spi_weight"
+        },
+        {
+          "kind": "fit_weighted_qrf",
+          "predictors": [
+            "age",
+            "gender",
+            "region"
+          ]
+        },
+        {
+          "kind": "support_clip",
+          "range": "donor_realized"
+        }
+      ],
+      "outputs": [
+        "employment_income",
+        "self_employment_income",
+        "savings_interest_income",
+        "dividend_income",
+        "private_pension_income",
+        "property_income",
+        "gift_aid",
+        "charitable_investment_gifts"
+      ],
+      "nonnegative_outputs": [
+        "employment_income",
+        "self_employment_income",
+        "savings_interest_income",
+        "dividend_income",
+        "private_pension_income",
+        "property_income",
+        "gift_aid",
+        "charitable_investment_gifts"
+      ],
+      "notes": "The SPI-trained first stage fills the SPI support channel jointly for income components, Gift Aid, and qualifying investment gifts."
+    },
+    {
+      "stage": "frs_only_spi_fill",
+      "survey": "Family Resources Survey 2023-24",
+      "source": "https://www.gov.uk/government/collections/family-resources-survey--2",
+      "grain": "person",
+      "artifacts": [
+        {
+          "kind": "survey_microdata",
+          "format": "tabular_release",
+          "vintage": "2023-24",
+          "locator": "DWP Family Resources Survey person-level pension, savings, and reported-benefit fields"
+        }
+      ],
+      "operations": [
+        {
+          "kind": "read_table",
+          "table": "frs_person"
+        },
+        {
+          "kind": "fit_weighted_qrf",
+          "predictors": [
+            "age",
+            "gender",
+            "region",
+            "employment_income",
+            "self_employment_income",
+            "savings_interest_income",
+            "dividend_income",
+            "private_pension_income",
+            "property_income"
+          ]
+        },
+        {
+          "kind": "support_clip",
+          "range": "donor_realized"
+        }
+      ],
+      "outputs": [
+        "employee_pension_contributions",
+        "employer_pension_contributions",
+        "personal_pension_contributions",
+        "pension_contributions_via_salary_sacrifice",
+        "tax_free_savings_income",
+        "universal_credit_reported",
+        "pension_credit_reported",
+        "child_benefit_reported",
+        "housing_benefit_reported",
+        "income_support_reported",
+        "working_tax_credit_reported",
+        "child_tax_credit_reported",
+        "attendance_allowance_reported",
+        "state_pension_reported",
+        "dla_sc_reported",
+        "dla_m_reported",
+        "pip_m_reported",
+        "pip_dl_reported",
+        "sda_reported",
+        "carers_allowance_reported",
+        "iidb_reported",
+        "afcs_reported",
+        "bsp_reported",
+        "incapacity_benefit_reported",
+        "maternity_allowance_reported",
+        "winter_fuel_allowance_reported",
+        "council_tax_benefit_reported",
+        "jsa_contrib_reported",
+        "jsa_income_reported",
+        "esa_contrib_reported",
+        "esa_income_reported"
+      ],
+      "nonnegative_outputs": [
+        "employee_pension_contributions",
+        "employer_pension_contributions",
+        "personal_pension_contributions",
+        "pension_contributions_via_salary_sacrifice",
+        "tax_free_savings_income",
+        "universal_credit_reported",
+        "pension_credit_reported",
+        "child_benefit_reported",
+        "housing_benefit_reported",
+        "income_support_reported",
+        "working_tax_credit_reported",
+        "child_tax_credit_reported",
+        "attendance_allowance_reported",
+        "state_pension_reported",
+        "dla_sc_reported",
+        "dla_m_reported",
+        "pip_m_reported",
+        "pip_dl_reported",
+        "sda_reported",
+        "carers_allowance_reported",
+        "iidb_reported",
+        "afcs_reported",
+        "bsp_reported",
+        "incapacity_benefit_reported",
+        "maternity_allowance_reported",
+        "winter_fuel_allowance_reported",
+        "council_tax_benefit_reported",
+        "jsa_contrib_reported",
+        "jsa_income_reported",
+        "esa_contrib_reported",
+        "esa_income_reported"
+      ],
+      "notes": "This stage replaces benefit receipt and pension/savings behavior on SPI support rows with draws conditional on the SPI-imputed income surface."
+    },
+    {
+      "stage": "advani_summers_capital_gains",
+      "survey": "Advani-Summers capital gains distribution",
+      "source": "https://ideas.repec.org/p/hal/wpaper/halshs-03022609.html",
+      "grain": "household",
+      "artifacts": [
+        {
+          "kind": "research_table",
+          "format": "csv",
+          "vintage": "latest_available",
+          "locator": "capital gains distribution by income/rank cell"
+        }
+      ],
+      "operations": [
+        {
+          "kind": "read_table",
+          "table": "capital_gains_distribution"
+        },
+        {
+          "kind": "calibrate_binary_assignment",
+          "variable": "household_is_capital_gains_clone",
+          "weight": "household_weight"
+        },
+        {
+          "kind": "fit_weighted_imputer",
+          "predictors": [
+            "household_net_income",
+            "employment_income",
+            "self_employment_income",
+            "dividend_income",
+            "property_income"
+          ]
+        },
+        {
+          "kind": "support_clip",
+          "range": "donor_realized"
+        }
+      ],
+      "outputs": [
+        "capital_gains",
+        "household_is_capital_gains_clone"
+      ],
+      "nonnegative_outputs": [
+        "capital_gains"
+      ]
+    },
+    {
+      "stage": "frs_salary_sacrifice",
+      "survey": "Family Resources Survey salary-sacrifice subsample",
+      "source": "https://www.gov.uk/government/collections/family-resources-survey--2",
+      "grain": "person",
+      "artifacts": [
+        {
+          "kind": "survey_microdata",
+          "format": "tabular_release",
+          "vintage": "2023-24",
+          "locator": "FRS person-level salary-sacrifice fields with OBR/ASHE aggregate target"
+        }
+      ],
+      "operations": [
+        {
+          "kind": "read_table",
+          "table": "frs_person_salary_sacrifice"
+        },
+        {
+          "kind": "fit_weighted_qrf",
+          "predictors": [
+            "age",
+            "employment_income"
+          ]
+        },
+        {
+          "kind": "fold_into",
+          "target": "employee_pension_contributions",
+          "amount": "pension_contributions_via_salary_sacrifice"
+        },
+        {
+          "kind": "support_clip",
+          "range": "donor_realized"
+        }
+      ],
+      "outputs": [
+        "pension_contributions_via_salary_sacrifice",
+        "employee_pension_contributions"
+      ],
+      "nonnegative_outputs": [
+        "pension_contributions_via_salary_sacrifice",
+        "employee_pension_contributions"
+      ]
+    },
+    {
+      "stage": "slc_student_loan_plan",
+      "survey": "Student Loans Company repayment-plan statistics",
+      "source": "https://www.gov.uk/government/collections/student-loans-for-higher-and-further-education",
+      "grain": "person",
+      "artifacts": [
+        {
+          "kind": "administrative_table",
+          "format": "published_table",
+          "vintage": "build_year",
+          "locator": "SLC borrower plan snapshot by cohort and geography"
+        }
+      ],
+      "operations": [
+        {
+          "kind": "read_table",
+          "table": "slc_student_loan_snapshot"
+        },
+        {
+          "kind": "assign_by_plan_type",
+          "predictors": [
+            "age",
+            "student_loan_balance",
+            "education_status",
+            "region"
+          ],
+          "output": "student_loan_plan"
+        }
+      ],
+      "outputs": [
+        "student_loan_plan"
+      ]
+    },
+    {
+      "stage": "rowwise_oa_geography",
+      "survey": "UK official small-area geography crosswalks",
+      "source": "https://geoportal.statistics.gov.uk/",
+      "grain": "household",
+      "artifacts": [
+        {
+          "kind": "public_geography",
+          "format": "csv_or_geojson",
+          "vintage": "build_year",
+          "locator": "ONS, NRS, NISRA, and postcode-directory OA/DZ to LA/constituency crosswalks"
+        }
+      ],
+      "operations": [
+        {
+          "kind": "read_tables",
+          "tables": [
+            "uk_official_geography_crosswalk",
+            "household_region_frame"
+          ]
+        },
+        {
+          "kind": "derive",
+          "outputs": [
+            "rowwise_household_clones",
+            "finest_available_geography_assignment"
+          ]
+        },
+        {
+          "kind": "join",
+          "on": [
+            "household_id",
+            "source_household_id"
+          ]
+        }
+      ],
+      "outputs": [
+        "oa_code",
+        "lsoa_code",
+        "msoa_code",
+        "la_code_oa",
+        "constituency_code_oa",
+        "region_code_oa"
+      ],
+      "notes": "This is the long-format local geography path: each household row receives one finest-area assignment, and later local weights export as area-household rows rather than a dense area-by-household matrix."
+    },
+    {
+      "stage": "national_calibration",
+      "survey": "UK national calibration target registry",
+      "source": "https://github.com/PolicyEngine/populace/tree/main/packages/populace-calibrate",
+      "grain": "household",
+      "artifacts": [
+        {
+          "kind": "target_registry",
+          "format": "json_or_yaml",
+          "vintage": "build_year",
+          "locator": "Populace national calibration target registry and supplied administrative target tables"
+        }
+      ],
+      "operations": [
+        {
+          "kind": "read_tables",
+          "tables": [
+            "national_calibration_targets",
+            "household_metric_tables"
+          ]
+        },
+        {
+          "kind": "calibrate_weights",
+          "weight": "household_weight",
+          "outputs": [
+            "household_weight"
+          ]
+        }
+      ],
+      "outputs": [
+        "household_weight"
+      ],
+      "nonnegative_outputs": [
+        "household_weight"
+      ],
+      "notes": "This stage rewrites design weights to national target weights before local and post-weight amount scaling stages."
+    },
+    {
+      "stage": "local_geography_weights",
+      "survey": "UK local calibration target tables",
+      "source": "https://github.com/PolicyEngine/populace/tree/main/packages/populace-build",
+      "grain": "household_area",
+      "artifacts": [
+        {
+          "kind": "target_tables",
+          "format": "csv_or_parquet",
+          "vintage": "build_year",
+          "locator": "Explicit constituency and local-authority target tables supplied to the UK local runner"
+        }
+      ],
+      "operations": [
+        {
+          "kind": "read_tables",
+          "tables": [
+            "local_area_targets",
+            "household_metric_tables"
+          ]
+        },
+        {
+          "kind": "calibrate_weights",
+          "weight": "household_weight",
+          "outputs": [
+            "local_geography_weight"
+          ]
+        }
+      ],
+      "outputs": [
+        "local_geography_weight",
+        "local_solve_diagnostic"
+      ],
+      "nonnegative_outputs": [
+        "local_geography_weight"
+      ],
+      "notes": "Assigned long local weights are solved after row-wise geography assignment and before post-weight rail/fuel amount scaling."
+    }
+  ]
+}
diff --git a/packages/populace-build/src/populace/build/uk/spi_support.py b/packages/populace-build/src/populace/build/uk/spi_support.py
index 630fda0..9aff3b5 100644
--- a/packages/populace-build/src/populace/build/uk/spi_support.py
+++ b/packages/populace-build/src/populace/build/uk/spi_support.py
@@ -33,9 +33,9 @@
     "property_income",
 )
 
-# Mirrors the eFRS SPI-trained first-stage QRF output surface. Gift Aid and
-# qualifying investment gifts are relief variables, not income components, but
-# they need to be drawn jointly with high-income SPI rows.
+# UK SPI-trained first-stage QRF output surface. Gift Aid and qualifying
+# investment gifts are relief variables, not income components, but they need
+# to be drawn jointly with high-income SPI rows.
 SPI_INCOME_IMPUTATION_COLUMNS = SPI_INCOME_COMPONENT_COLUMNS + (
     "gift_aid",
     "charitable_investment_gifts",
@@ -48,9 +48,9 @@
     *SPI_INCOME_COMPONENT_COLUMNS,
 )
 
-# Mirrors the eFRS second-stage FRS-only QRF output surface. These fields are
-# replaced on SPI support rows so high-income synthetic rows do not retain a
-# random middle-income FRS donor's benefit receipt or pension behavior.
+# UK second-stage FRS-only QRF output surface. These fields are replaced on SPI
+# support rows so high-income synthetic rows do not retain a random
+# middle-income FRS donor's benefit receipt or pension behavior.
 FRS_ONLY_SPI_FILL_PERSON_COLUMNS = (
     "employee_pension_contributions",
     "employer_pension_contributions",
@@ -245,7 +245,7 @@ def fill_support_channel_from_source(
     QRF prediction frame keyed by original ``person_id``. Rows outside
     ``channel`` are left unchanged. Missing target columns are initialized to
     ``fill_missing_columns_with`` before the channel-specific update, matching
-    the eFRS treatment of SPI-only variables such as charitable-giving fields.
+    the UK SPI treatment of variables such as charitable-giving fields.
     """
 
     entity = _require_entity(entity)
diff --git a/packages/populace-build/tests/test_uk_local_geography.py b/packages/populace-build/tests/test_uk_local_geography.py
index 9ba714f..1363f61 100644
--- a/packages/populace-build/tests/test_uk_local_geography.py
+++ b/packages/populace-build/tests/test_uk_local_geography.py
@@ -7,6 +7,8 @@
 from populace.build.uk import (
     LONG_GEOGRAPHY_COLUMNS,
     area_support_summary,
+    assigned_weights_to_long,
+    build_assigned_local_matrix,
     build_stacked_local_matrix,
     sort_households_by_id,
     stacked_design_weights,
@@ -82,6 +84,61 @@ def test_build_stacked_local_matrix_uses_area_blocks_and_group_metrics() -> None
     np.testing.assert_allclose(dense[3], [0.0, 0.0, 0.0, 0.0, 2.0, 2.0])
 
 
+def test_build_assigned_local_matrix_uses_rowwise_area_assignments() -> None:
+    metrics = {
+        "England": pd.DataFrame(
+            {
+                "population": [1.0, 2.0, 3.0],
+                "earnings": [10.0, 20.0, 30.0],
+            },
+            index=[101, 102, 103],
+        ),
+        "Scotland": pd.DataFrame(
+            {
+                "population": [100.0, 200.0, 300.0],
+                "earnings": [1000.0, 2000.0, 3000.0],
+            },
+            index=[101, 102, 103],
+        ),
+    }
+    targets = pd.DataFrame(
+        {
+            "code": ["S001", "E001"],
+            "population": [300.0, 1.0],
+            "earnings": [3000.0, 10.0],
+        }
+    )
+    households = pd.DataFrame(
+        {
+            "household_id": [103, 101, 102],
+            "constituency_code_oa": ["S001", "E001", "E999"],
+        }
+    )
+
+    assigned = build_assigned_local_matrix(
+        metrics,
+        targets,
+        household_frame=households,
+        area_codes=["E001", "S001"],
+        area_groups={"E001": "England", "S001": "Scotland"},
+        household_ids=[101, 102, 103],
+    )
+
+    assert assigned.matrix.shape == (4, 3)
+    assert assigned.targets.tolist() == [1.0, 10.0, 300.0, 3000.0]
+    assert assigned.target_frame["area_code"].tolist() == [
+        "E001",
+        "E001",
+        "S001",
+        "S001",
+    ]
+    dense = assigned.matrix.toarray()
+    np.testing.assert_allclose(dense[0], [1.0, 0.0, 0.0])
+    np.testing.assert_allclose(dense[1], [10.0, 0.0, 0.0])
+    np.testing.assert_allclose(dense[2], [0.0, 0.0, 300.0])
+    np.testing.assert_allclose(dense[3], [0.0, 0.0, 3000.0])
+
+
 def test_build_stacked_local_matrix_rejects_drifted_household_index() -> None:
     metrics = {
         "England": pd.DataFrame({"population": [1.0, 2.0]}, index=[101, 102]),
@@ -156,6 +213,57 @@ def test_stacked_weights_to_long_preserves_source_metadata() -> None:
     assert long["clone_index"].tolist() == [0, 0, 3]
 
 
+def test_assigned_weights_to_long_preserves_metadata_and_filters_area_codes() -> None:
+    household_frame = pd.DataFrame(
+        {
+            "household_id": [102, 103, 101],
+            "constituency_code_oa": ["S001", "E999", "E001"],
+            "source_year": [2022, 2021, 2023],
+            "source_household_id": ["b", "c", "a"],
+            "source_household_key": ["2022:b", "2021:c", "2023:a"],
+            "clone_index": [3, 2, 0],
+        }
+    )
+
+    long = assigned_weights_to_long(
+        [1.5, 2.5, 3.5],
+        ["E001", "S001"],
+        [101, 102, 103],
+        area_type="constituency",
+        household_frame=household_frame,
+    )
+
+    assert tuple(long.columns) == LONG_GEOGRAPHY_COLUMNS
+    assert long["weight"].tolist() == [1.5, 2.5]
+    assert long["area_code"].tolist() == ["E001", "S001"]
+    assert long["area_index"].tolist() == [0, 1]
+    assert long["household_id"].tolist() == [101, 102]
+    assert long["source_household_key"].tolist() == ["2023:a", "2022:b"]
+    assert long["clone_index"].tolist() == [0, 3]
+
+
+def test_assigned_weights_to_long_drops_unused_zero_base_floor_weights() -> None:
+    household_frame = pd.DataFrame(
+        {
+            "household_id": [101, 102, 103],
+            "constituency_code_oa": ["E001", "E001", "E001"],
+        }
+    )
+
+    long = assigned_weights_to_long(
+        [1.0, 1e-4, 0.5],
+        ["E001"],
+        [101, 102, 103],
+        area_type="constituency",
+        household_frame=household_frame,
+        base_weights=[1.0, 0.0, 0.0],
+        drop_weight_atol=1e-4,
+    )
+
+    assert long["household_id"].tolist() == [101, 103]
+    assert long["weight"].tolist() == [1.0, 0.5]
+
+
 def test_stacked_weights_to_long_rejects_missing_household_metadata() -> None:
     household_frame = pd.DataFrame({"household_id": [101], "source_year": [2023]})
 
diff --git a/packages/populace-build/tests/test_uk_local_runner.py b/packages/populace-build/tests/test_uk_local_runner.py
index 0ae45b7..046f971 100644
--- a/packages/populace-build/tests/test_uk_local_runner.py
+++ b/packages/populace-build/tests/test_uk_local_runner.py
@@ -45,9 +45,7 @@ def calculate(self, variable, **_kwargs):
 
 
 def test_prepare_area_frame_sorts_and_validates_codes() -> None:
-    areas = pd.DataFrame(
-        {"code": ["S001", "E001"], "country": ["Scotland", "England"]}
-    )
+    areas = pd.DataFrame({"code": ["S001", "E001"], "country": ["Scotland", "England"]})
 
     prepared = prepare_area_frame(areas)
 
@@ -129,10 +127,7 @@ def fake_compute(sim, area_type, *, period=None, household_ids=None):
 
     assert set(tables) == {"England", "Scotland"}
     assert tables["England"].index.tolist() == [101, 102]
-    regions = [
-        sim.inputs[("region", 2023)][0]
-        for sim in calls
-    ]
+    regions = [sim.inputs[("region", 2023)][0] for sim in calls]
     assert regions == ["SOUTH_EAST", "SCOTLAND"]
 
 
@@ -165,10 +160,37 @@ def fake_compute(sim, area_type, *, period=None, household_ids=None):
     assert tables["England"]["population"].tolist() == [10.0, 20.0]
 
 
-def test_build_local_candidate_solves_and_exports_long_weights() -> None:
-    areas = pd.DataFrame(
-        {"code": ["S001", "E001"], "country": ["Scotland", "England"]}
+def test_build_metric_tables_from_dataset_allows_selected_households(
+    monkeypatch,
+) -> None:
+    class ExtraHouseholdSimulation(FakeSimulation):
+        def calculate(self, variable, **_kwargs):
+            assert variable == "household_id"
+            return Result([103, 102, 101])
+
+    def fake_compute(sim, area_type, *, period=None, household_ids=None):
+        assert household_ids is None
+        return pd.DataFrame(
+            {"population": [30.0, 20.0, 10.0]},
+            index=pd.Index([103, 102, 101]),
+        )
+
+    monkeypatch.setattr(local_runner, "compute_household_metrics", fake_compute)
+
+    tables = build_metric_tables_from_dataset(
+        dataset=type("Dataset", (), {"time_period": 2023})(),
+        area_groups={"E001": "England"},
+        area_type="constituency",
+        household_ids=[101, 102],
+        simulation_factory=ExtraHouseholdSimulation,
     )
+
+    assert tables["England"].index.tolist() == [101, 102]
+    assert tables["England"]["population"].tolist() == [10.0, 20.0]
+
+
+def test_build_local_candidate_solves_and_exports_long_weights() -> None:
+    areas = pd.DataFrame({"code": ["S001", "E001"], "country": ["Scotland", "England"]})
     targets = pd.DataFrame(
         {
             "code": ["E001", "S001"],
@@ -210,6 +232,87 @@ def test_build_local_candidate_solves_and_exports_long_weights() -> None:
     assert "effective_sample_size" in result.support_summary.columns
 
 
+def test_build_local_candidate_uses_assigned_support_when_available() -> None:
+    areas = pd.DataFrame({"code": ["S001", "E001"], "country": ["Scotland", "England"]})
+    targets = pd.DataFrame(
+        {
+            "code": ["E001", "S001"],
+            "population": [1.5, 0.5],
+        }
+    )
+    metrics = {
+        "England": pd.DataFrame(
+            {"population": [1.0, 1.0, 10.0]},
+            index=[101, 102, 103],
+        ),
+        "Scotland": pd.DataFrame(
+            {"population": [1.0, 1.0, 10.0]},
+            index=[101, 102, 103],
+        ),
+    }
+    households = pd.DataFrame(
+        {
+            "household_id": [102, 103, 101],
+            "household_weight": [1.0, 100.0, 1.0],
+            "constituency_code_oa": ["S001", "E999", "E001"],
+        }
+    )
+
+    result = build_local_candidate(
+        area_type="constituency",
+        area_frame=areas,
+        targets=targets,
+        metrics=metrics,
+        household_frame=households,
+        solver_options={"epochs": 80, "learning_rate": 0.2, "seed": 1},
+    )
+
+    assert result.support_mode == "assigned"
+    assert result.problem.matrix.shape == (2, 2)
+    assert result.problem.n_households == 2
+    assert result.solve_result.weights.shape == (2,)
+    assert result.solve_result.final_loss < result.solve_result.initial_loss
+    assert result.long_weights["area_code"].tolist() == ["E001", "S001"]
+    assert result.support_summary["nonzero_households"].tolist() == [1, 1]
+
+
+def test_build_local_candidate_uses_la_assigned_support_and_zero_area() -> None:
+    areas = pd.DataFrame({"code": ["E06000002", "E06000001"]})
+    targets = pd.DataFrame(
+        {
+            "code": ["E06000001", "E06000002"],
+            "population": [1.0, 0.0],
+        }
+    )
+    metrics = pd.DataFrame({"population": [1.0, 10.0]}, index=[101, 102])
+    households = pd.DataFrame(
+        {
+            "household_id": [102, 101],
+            "household_weight": [100.0, 1.0],
+            "la_code_oa": ["E99999999", "E06000001"],
+        }
+    )
+
+    result = build_local_candidate(
+        area_type="la",
+        area_frame=areas,
+        targets=targets,
+        metrics=metrics,
+        household_frame=households,
+        solver_options={"epochs": 5, "learning_rate": 0.2, "seed": 1},
+    )
+
+    assert result.support_mode == "assigned"
+    assert result.problem.area_codes == ("E06000001", "E06000002")
+    assert result.problem.matrix.shape == (2, 1)
+    assert result.long_weights["area_code"].tolist() == ["E06000001"]
+    assert result.support_summary["area_code"].tolist() == [
+        "E06000001",
+        "E06000002",
+    ]
+    assert result.support_summary["nonzero_households"].tolist() == [1, 0]
+
+
 def test_build_local_candidate_can_limit_pilot_areas() -> None:
     areas = pd.DataFrame(
         {
@@ -271,6 +374,42 @@ def fake_compute(sim, area_type, *, period=None, household_ids=None):
     assert result.support_summary["nonzero_households"].tolist() == [1]
 
 
+def test_build_local_candidate_from_dataset_auto_uses_assigned_support(
+    monkeypatch,
+) -> None:
+    areas = pd.DataFrame({"code": ["E001"], "country": ["England"]})
+    targets = pd.DataFrame({"code": ["E001"], "population": [1.0]})
+    households = pd.DataFrame(
+        {
+            "household_id": [101],
+            "household_weight": [1.0],
+            "constituency_code_oa": ["E001"],
+        }
+    )
+
+    def fake_compute(sim, area_type, *, period=None, household_ids=None):
+        assert area_type == "constituency"
+        assert period == 2023
+        assert household_ids is None
+        return pd.DataFrame({"population": [1.0]}, index=pd.Index([101]))
+
+    monkeypatch.setattr(local_runner, "compute_household_metrics", fake_compute)
+
+    result = build_local_candidate_from_dataset(
+        dataset=type("Dataset", (), {"time_period": 2023})(),
+        area_type="constituency",
+        area_frame=areas,
+        targets=targets,
+        household_frame=households,
+        simulation_factory=SingleHouseholdSimulation,
+        solver_options={"epochs": 2},
+    )
+
+    assert result.support_mode == "assigned"
+    assert result.problem.matrix.shape == (1, 1)
+    assert result.long_weights["area_code"].tolist() == ["E001"]
+
+
 def test_write_local_candidate_outputs(tmp_path: Path) -> None:
     areas = pd.DataFrame({"code": ["E001"], "country": ["England"]})
     targets = pd.DataFrame({"code": ["E001"], "population": [1.0]})
diff --git a/packages/populace-build/tests/test_uk_local_solver.py b/packages/populace-build/tests/test_uk_local_solver.py
index 52ca571..b73b8b5 100644
--- a/packages/populace-build/tests/test_uk_local_solver.py
+++ b/packages/populace-build/tests/test_uk_local_solver.py
@@ -7,7 +7,9 @@
 
 import populace.build.uk.local_solver as local_solver
 from populace.build.uk import (
+    build_assigned_local_matrix,
     build_stacked_local_matrix,
+    solve_assigned_local_weights,
     solve_stacked_local_weights,
 )
 
@@ -39,6 +41,67 @@ def test_solve_stacked_local_weights_reduces_loss_and_reports_diagnostics() -> N
     np.testing.assert_allclose(result.diagnostics["target"], [1.5, 0.5])
 
 
+def test_solve_assigned_local_weights_uses_household_weight_columns() -> None:
+    metrics = pd.DataFrame({"population": [1.0, 1.0]}, index=[101, 102])
+    targets = pd.DataFrame({"code": ["E001", "S001"], "population": [1.5, 0.5]})
+    households = pd.DataFrame(
+        {
+            "household_id": [101, 102],
+            "constituency_code_oa": ["E001", "S001"],
+        }
+    )
+    problem = build_assigned_local_matrix(
+        metrics,
+        targets,
+        household_frame=households,
+        area_codes=["E001", "S001"],
+        household_ids=[101, 102],
+    )
+
+    result = solve_assigned_local_weights(
+        problem,
+        [1.0, 1.0],
+        epochs=80,
+        learning_rate=0.2,
+        max_weight_ratio=10.0,
+        seed=1,
+    )
+
+    assert result.weights.shape == (2,)
+    assert result.initial_weights.tolist() == [1.0, 1.0]
+    assert result.final_loss < result.initial_loss
+    assert result.diagnostics["area_code"].tolist() == ["E001", "S001"]
+
+
+def test_solve_assigned_local_weights_can_upweight_zero_base_support() -> None:
+    metrics = pd.DataFrame({"income": [1_000_000.0]}, index=[101])
+    targets = pd.DataFrame({"code": ["E001"], "income": [1_000_000.0]})
+    households = pd.DataFrame(
+        {
+            "household_id": [101],
+            "constituency_code_oa": ["E001"],
+        }
+    )
+    problem = build_assigned_local_matrix(
+        metrics,
+        targets,
+        household_frame=households,
+        area_codes=["E001"],
+        household_ids=[101],
+    )
+
+    result = solve_assigned_local_weights(
+        problem,
+        [0.0],
+        epochs=80,
+        learning_rate=0.3,
+        seed=1,
+    )
+
+    assert result.weights[0] > 0.01
+    assert result.final_loss < 0.05
+
+
 def test_solve_stacked_local_weights_uses_explicit_positive_floor() -> None:
     metrics = pd.DataFrame({"population": [1.0, 1.0]}, index=[101, 102])
     targets = pd.DataFrame({"code": ["E001"], "population": [1.0]})
diff --git a/packages/populace-build/tests/test_uk_source_manifest.py b/packages/populace-build/tests/test_uk_source_manifest.py
new file mode 100644
index 0000000..2120386
--- /dev/null
+++ b/packages/populace-build/tests/test_uk_source_manifest.py
@@ -0,0 +1,318 @@
+"""UK raw-source plan declaration: full surface or nothing."""
+
+from __future__ import annotations
+
+import pytest
+
+from populace.build.source_manifest import SourceManifest, SourceOperationSpec
+from populace.build.uk import (
+    FRS_ONLY_SPI_FILL_PERSON_COLUMNS,
+    ROWWISE_GEOGRAPHY_COLUMNS,
+    SPI_INCOME_IMPUTATION_COLUMNS,
+    UK_DONORS,
+    UK_NONNEGATIVE_SOURCE_OUTPUTS,
+    UK_REWRITTEN_SOURCE_OUTPUT_STAGES,
+    UK_SOURCE_MANIFEST,
+    UK_SOURCE_OUTPUTS,
+    UK_SOURCE_OUTPUT_STAGES,
+    UK_SOURCE_STAGE_SPECS,
+    UK_SPI_SUPPORT_STAGE_NAME,
+    UK_STAGE_NAMES,
+    UK_STRUCTURAL_SOURCE_STAGES,
+    uk_plan,
+)
+
+
+def _noop_implementations() -> dict:
+    return {name: (lambda frame: frame) for name in UK_STAGE_NAMES}
+
+
+class TestUkPlan:
+    def test_assembles_with_all_stages_and_donor_citations(self) -> None:
+        plan = uk_plan(_noop_implementations())
+
+        assert tuple(stage.name for stage in plan.stages) == UK_STAGE_NAMES
+        donor_stages = dict(plan.donors())
+        assert set(donor_stages) == set(UK_DONORS)
+        for spec in donor_stages.values():
+            assert spec.source.startswith("https://")
+
+    def test_missing_stage_refuses_to_assemble(self) -> None:
+        implementations = _noop_implementations()
+        del implementations["was_wealth"]
+
+        with pytest.raises(ValueError, match="missing \\['was_wealth'\\]"):
+            uk_plan(implementations)
+
+    def test_unknown_stage_is_refused(self) -> None:
+        implementations = _noop_implementations()
+        implementations["legacy_fill"] = lambda frame: frame
+
+        with pytest.raises(ValueError, match="Unknown stage implementation"):
+            uk_plan(implementations)
+
+
+class TestUkSources:
+    def test_source_manifest_loads_as_spec_contract(self) -> None:
+        assert UK_SOURCE_MANIFEST.country == "uk"
+        assert UK_SOURCE_MANIFEST.version == 1
+        assert len(UK_SOURCE_STAGE_SPECS) >= len(UK_DONORS)
+
+    def test_every_donor_stage_has_matching_source_spec(self) -> None:
+        specs = UK_SOURCE_MANIFEST.stage_map()
+        for stage, donor in UK_DONORS.items():
+            assert stage in specs
+            assert specs[stage].survey == donor.survey
+            assert specs[stage].source == donor.source
+
+    def test_source_specs_align_with_declared_plan(self) -> None:
+        source_stage_names = {spec.stage for spec in UK_SOURCE_STAGE_SPECS}
+
+        assert set(UK_SOURCE_MANIFEST.stage_map()) == source_stage_names
+        assert source_stage_names == set(UK_DONORS) | set(UK_STRUCTURAL_SOURCE_STAGES)
+        assert source_stage_names.issubset(UK_STAGE_NAMES)
+        assert tuple(spec.stage for spec in UK_SOURCE_STAGE_SPECS) == tuple(
+            name for name in UK_STAGE_NAMES if name in source_stage_names
+        )
+        assert UK_STAGE_NAMES.index("rowwise_oa_geography") < UK_STAGE_NAMES.index(
+            "local_geography_weights"
+        )
+
+    def test_stage_order_keeps_required_upstream_surfaces_available(self) -> None:
+        assert UK_STAGE_NAMES.index("was_wealth") < UK_STAGE_NAMES.index(
+            "regional_property_uprating"
+        )
+        assert UK_STAGE_NAMES.index("was_wealth") < UK_STAGE_NAMES.index(
+            "lcfs_consumption"
+        )
+        assert UK_STAGE_NAMES.index(UK_SPI_SUPPORT_STAGE_NAME) < UK_STAGE_NAMES.index(
+            "spi_income"
+        )
+        assert UK_STAGE_NAMES.index("spi_income") < UK_STAGE_NAMES.index(
+            "frs_only_spi_fill"
+        )
+        assert UK_STAGE_NAMES.index("local_geography_weights") < UK_STAGE_NAMES.index(
+            "rail_public_service_calibration"
+        )
+        assert UK_STAGE_NAMES.index("local_geography_weights") < UK_STAGE_NAMES.index(
+            "road_fuel_energy_calibration"
+        )
+
+    def test_source_specs_are_manifest_only_not_python_loaders(self) -> None:
+        for spec in UK_SOURCE_STAGE_SPECS:
+            assert spec.operations
+            for operation in spec.operations:
+                assert "module" not in operation.parameters
+                assert "function" not in operation.parameters
+                assert operation.kind not in {
+                    "python_module",
+                    "python_function",
+                    "import_module",
+                }
+
+    def test_weight_calibration_stages_are_manifest_declared(self) -> None:
+        specs = UK_SOURCE_MANIFEST.stage_map()
+        for stage in ("national_calibration", "local_geography_weights"):
+            kinds = [operation.kind for operation in specs[stage].operations]
+            assert "calibrate_weights" in kinds
+
+    def test_raw_source_surface_declares_salient_outputs_from_each_input(self) -> None:
+        required_outputs = {
+            "property_wealth",
+            "mortgage_debt",
+            "consumer_debt",
+            "student_loan_balance",
+            "num_vehicles",
+            "full_rate_vat_expenditure_rate",
+            "food_and_non_alcoholic_beverages_consumption",
+            "electricity_consumption",
+            "gas_consumption",
+            "petrol_spending",
+            "diesel_spending",
+            "dfe_education_spending",
+            "rail_subsidy_spending",
+            "bus_subsidy_spending",
+            "rail_usage",
+            "a_and_e_visits",
+            "admitted_patient_visits",
+            "outpatient_visits",
+            "nhs_spending",
+            "gift_aid",
+            "charitable_investment_gifts",
+            "capital_gains",
+            "household_is_capital_gains_clone",
+            "pension_contributions_via_salary_sacrifice",
+            "student_loan_plan",
+            "household_is_spi_synthetic",
+            "source_household_key",
+            "local_geography_weight",
+        }
+
+        required_outputs.update(SPI_INCOME_IMPUTATION_COLUMNS)
+        required_outputs.update(FRS_ONLY_SPI_FILL_PERSON_COLUMNS)
+        required_outputs.update(ROWWISE_GEOGRAPHY_COLUMNS)
+
+        assert sorted(required_outputs - UK_SOURCE_OUTPUTS) == []
+
+    def test_nonnegative_surface_covers_key_money_and_count_outputs(self) -> None:
+        required_nonnegative = {
+            "owned_land",
+            "property_wealth",
+            "mortgage_debt",
+            "consumer_debt",
+            "student_loan_balance",
+            "food_and_non_alcoholic_beverages_consumption",
+            "electricity_consumption",
+            "gas_consumption",
+            "petrol_spending",
+            "diesel_spending",
+            "full_rate_vat_expenditure_rate",
+            "a_and_e_visits",
+            "nhs_spending",
+            "dfe_education_spending",
+            "rail_usage",
+            "gift_aid",
+            "charitable_investment_gifts",
+            "capital_gains",
+            "pension_contributions_via_salary_sacrifice",
+            "local_geography_weight",
+        }
+
+        assert sorted(required_nonnegative - UK_NONNEGATIVE_SOURCE_OUTPUTS) == []
+        assert "student_loan_plan" not in UK_NONNEGATIVE_SOURCE_OUTPUTS
+
+    def test_rewritten_outputs_are_explicit_and_have_reviewed_final_writers(
+        self,
+    ) -> None:
+        expected_rewrites = {
+            "diesel_spending": (
+                "lcfs_consumption",
+                "road_fuel_energy_calibration",
+            ),
+            "domestic_energy_consumption": (
+                "lcfs_consumption",
+                "road_fuel_energy_calibration",
+            ),
+            "electricity_consumption": (
+                "lcfs_consumption",
+                "road_fuel_energy_calibration",
+            ),
+            "gas_consumption": (
+                "lcfs_consumption",
+                "road_fuel_energy_calibration",
+            ),
+            "petrol_spending": (
+                "lcfs_consumption",
+                "road_fuel_energy_calibration",
+            ),
+            "main_residence_value": (
+                "was_wealth",
+                "regional_property_uprating",
+            ),
+            "property_wealth": (
+                "was_wealth",
+                "regional_property_uprating",
+            ),
+            "household_weight": (
+                "frs_base",
+                "national_calibration",
+            ),
+            "employment_income": (
+                "frs_base",
+                "spi_income",
+            ),
+            "private_pension_income": (
+                "frs_base",
+                "spi_income",
+            ),
+            "self_employment_income": (
+                "frs_base",
+                "spi_income",
+            ),
+            "employee_pension_contributions": (
+                "frs_only_spi_fill",
+                "frs_salary_sacrifice",
+            ),
+            "pension_contributions_via_salary_sacrifice": (
+                "frs_only_spi_fill",
+                "frs_salary_sacrifice",
+            ),
+            "rail_subsidy_spending": (
+                "etb_public_services",
+                "rail_public_service_calibration",
+            ),
+            "rail_usage": (
+                "etb_public_services",
+                "rail_public_service_calibration",
+            ),
+        }
+
+        assert dict(UK_REWRITTEN_SOURCE_OUTPUT_STAGES) == expected_rewrites
+        for output, stages in expected_rewrites.items():
+            assert UK_SOURCE_OUTPUT_STAGES[output] == stages
+            indices = [UK_STAGE_NAMES.index(stage) for stage in stages]
+            assert indices == sorted(indices)
+
+    def test_fuel_energy_amount_scaling_is_not_binary_assignment(self) -> None:
+        operations = UK_SOURCE_MANIFEST.stage_map()[
+            "road_fuel_energy_calibration"
+        ].operations
+        kinds = [operation.kind for operation in operations]
+
+        assert "calibrate_binary_assignment" not in kinds
+        assert "uprate" in kinds
+        uprate = operations[kinds.index("uprate")]
+        assert tuple(uprate.parameters["variables"]) == (
+            "petrol_spending",
+            "diesel_spending",
+            "electricity_consumption",
+            "gas_consumption",
+        )
+        derive = operations[kinds.index("derive")]
+        assert tuple(derive.parameters["outputs"]) == ("domestic_energy_consumption",)
+
+    def test_spi_stage_declares_support_channel_before_income_fit(self) -> None:
+        specs = UK_SOURCE_MANIFEST.stage_map()
+        spi_kinds = [operation.kind for operation in specs["spi_income"].operations]
+
+        assert spi_kinds.index("read_table") < spi_kinds.index("fit_weighted_qrf")
+        assert spi_kinds.index("fit_weighted_qrf") < spi_kinds.index("support_clip")
+        assert "household_is_spi_synthetic" in specs[UK_SPI_SUPPORT_STAGE_NAME].outputs
+
+    def test_source_operation_parser_rejects_python_loader_shapes(self) -> None:
+        with pytest.raises(ValueError, match="executable-loader"):
+            SourceOperationSpec.from_mapping(
+                {
+                    "kind": "python_module",
+                    "module": "populace.build.uk.sources",
+                    "function": "add_was_wealth",
+                }
+            )
+
+    def test_source_manifest_parser_rejects_incumbent_package_artifacts(self) -> None:
+        with pytest.raises(ValueError, match="forbidden incumbent dependency"):
+            SourceManifest.from_mapping(
+                {
+                    "version": 1,
+                    "country": "uk",
+                    "policy": "spec only",
+                    "stages": [
+                        {
+                            "stage": "was_wealth",
+                            "survey": "Wealth and Assets Survey",
+                            "source": "https://example.test/was",
+                            "grain": "household",
+                            "artifacts": [
+                                {
+                                    "kind": "derived_dataset",
+                                    "locator": "policyengine_" + "uk_data",
+                                }
+                            ],
+                            "operations": [
+                                {"kind": "read_table", "table": "was_household"}
+                            ],
+                            "outputs": ["property_wealth"],
+                        }
+                    ],
+                }
+            )
diff --git a/packages/populace-build/tests/test_uk_spi_support.py b/packages/populace-build/tests/test_uk_spi_support.py
index e1641c4..3b3888a 100644
--- a/packages/populace-build/tests/test_uk_spi_support.py
+++ b/packages/populace-build/tests/test_uk_spi_support.py
@@ -209,7 +209,7 @@ def test_spi_fill_only_updates_spi_channel_and_can_initialize_new_columns() -> N
     assert spi["gift_aid"].tolist() == [9.0, 10.0, 11.0, 12.0]
 
 
-def test_spi_variable_surfaces_include_efrs_stage1_and_stage2_fixes() -> None:
+def test_spi_variable_surfaces_include_recent_stage1_and_stage2_fixes() -> None:
     assert SPI_INCOME_COMPONENT_COLUMNS == (
         "employment_income",
         "self_employment_income",

From f5e7283526c0e698f50d1e93fbcaabc7f180ee4a Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Sat, 20 Jun 2026 18:25:19 -0400
Subject: [PATCH 2/3] Move UK source plan metadata into manifest

---
 .../src/populace/build/source_manifest.py     |  18 ++-
 .../src/populace/build/uk/__init__.py         | 116 +++---------------
 .../src/populace/build/uk/source_stages.json  |  96 ++++++++++++---
 .../tests/test_uk_source_manifest.py          |  48 ++++++++
 4 files changed, 155 insertions(+), 123 deletions(-)

diff --git a/packages/populace-build/src/populace/build/source_manifest.py b/packages/populace-build/src/populace/build/source_manifest.py
index 8580c96..5915fee 100644
--- a/packages/populace-build/src/populace/build/source_manifest.py
+++ b/packages/populace-build/src/populace/build/source_manifest.py
@@ -41,6 +41,7 @@
         "assign_binary_from_rate",
         "calibrate_binary_assignment",
         "calibrate_weights",
+        "compile_ledger_targets",
         "convert_interest_to_structural_mortgage_inputs",
         "compute_ratio",
         "derive",
@@ -128,6 +129,7 @@ class SourceStageSpec:
     outputs: tuple[str, ...]
     nonnegative_outputs: tuple[str, ...] = ()
     notes: str = ""
+    role: str = "source"
 
     @classmethod
     def from_mapping(cls, raw: Mapping[str, Any]) -> SourceStageSpec:
@@ -161,6 +163,9 @@ def from_mapping(cls, raw: Mapping[str, Any]) -> SourceStageSpec:
         notes = raw.get("notes", "")
         if not isinstance(notes, str):
             raise ValueError("source stage 'notes' must be a string when provided.")
+        role = raw.get("role", "source")
+        if not isinstance(role, str) or not role:
+            raise ValueError("source stage 'role' must be a non-empty string.")
         _reject_executable_parameter_keys(raw, context=f"stage {raw['stage']!r}")
         _reject_incumbent_dependencies(raw, context=f"stage {raw['stage']!r}")
         return cls(
@@ -168,6 +173,7 @@ def from_mapping(cls, raw: Mapping[str, Any]) -> SourceStageSpec:
             survey=raw["survey"],
             source=raw["source"],
             grain=raw["grain"],
+            role=role,
             artifacts=artifacts,
             operations=operations,
             outputs=outputs,
@@ -183,6 +189,7 @@ class SourceManifest:
     country: str
     version: int
     policy: str
+    plan_stages: tuple[str, ...]
     stages: tuple[SourceStageSpec, ...]
 
     @classmethod
@@ -204,9 +211,18 @@ def from_mapping(cls, raw: Mapping[str, Any]) -> SourceManifest:
         duplicates = sorted({name for name in names if names.count(name) > 1})
         if duplicates:
             raise ValueError(f"duplicate source stage spec(s): {duplicates}.")
+        plan_stages = tuple(
+            _require_string_sequence(raw.get("plan_stages", names), key="plan_stages")
+        )
         _reject_executable_parameter_keys(raw, context=f"{country} source manifest")
         _reject_incumbent_dependencies(raw, context=f"{country} source manifest")
-        return cls(country=country, version=version, policy=policy, stages=stages)
+        return cls(
+            country=country,
+            version=version,
+            policy=policy,
+            plan_stages=plan_stages,
+            stages=stages,
+        )
 
     def stage_map(self) -> Mapping[str, SourceStageSpec]:
         return {stage.stage: stage for stage in self.stages}
diff --git a/packages/populace-build/src/populace/build/uk/__init__.py b/packages/populace-build/src/populace/build/uk/__init__.py
index e125e23..0887721 100644
--- a/packages/populace-build/src/populace/build/uk/__init__.py
+++ b/packages/populace-build/src/populace/build/uk/__init__.py
@@ -142,123 +142,35 @@
 )
 from populace.frame import Frame
 
-UK_DONORS: Mapping[str, DonorSpec] = {
-    "was_wealth": DonorSpec(
-        survey="Wealth and Assets Survey",
-        source="https://www.ons.gov.uk/peoplepopulationandcommunity/personalandhouseholdfinances/debt/methodologies/wealthandassetssurveyqmi",
-        notes="Household wealth, debts, vehicles, and student-loan balances.",
-    ),
-    "regional_property_uprating": DonorSpec(
-        survey="UK House Price Index and regional land-value tables",
-        source="https://www.gov.uk/government/collections/uk-house-price-index-reports",
-        notes="Regional property-value uprating after WAS wealth imputation.",
-    ),
-    "lcfs_consumption": DonorSpec(
-        survey="Living Costs and Food Survey",
-        source="https://www.ons.gov.uk/peoplepopulationandcommunity/personalandhouseholdfinances/incomeandwealth/methodologies/livingcostsandfoodsurveyqmi",
-        notes="COICOP consumption, fuel spending, and domestic energy use.",
-    ),
-    "road_fuel_energy_calibration": DonorSpec(
-        survey="Road fuel and household energy administrative totals",
-        source="https://www.gov.uk/government/collections/road-transport-consumption-at-regional-and-local-level",
-        notes="Fuel and energy calibration targets for LCFS-imputed amounts.",
-    ),
-    "etb_vat": DonorSpec(
-        survey="Effects of Taxes and Benefits",
-        source="https://www.ons.gov.uk/peoplepopulationandcommunity/personalandhouseholdfinances/incomeandwealth/datasets/theeffectsoftaxesandbenefitsonhouseholdincomehistoricaldatasets",
-        notes="Full-rate VAT expenditure-rate imputation.",
-    ),
-    "nhs_usage": DonorSpec(
-        survey="NHS activity and unit-cost tables",
-        source="https://www.england.nhs.uk/statistics/statistical-work-areas/hospital-activity/monthly-hospital-activity/",
-        notes="A&E, inpatient, outpatient visit and spending inputs.",
-    ),
-    "etb_public_services": DonorSpec(
-        survey="Effects of Taxes and Benefits public-service tables",
-        source="https://www.ons.gov.uk/peoplepopulationandcommunity/personalandhouseholdfinances/incomeandwealth/datasets/theeffectsoftaxesandbenefitsonhouseholdincomehistoricaldatasets",
-        notes="Education, rail, and bus public-service benefit inputs.",
-    ),
-    "rail_public_service_calibration": DonorSpec(
-        survey="Rail public-service administrative totals",
-        source="https://www.gov.uk/government/collections/rail-statistics",
-        notes="Post-weight rail subsidy and usage scaling.",
-    ),
-    "spi_income": DonorSpec(
-        survey="Survey of Personal Incomes",
-        source="https://www.gov.uk/government/collections/personal-incomes-statistics",
-        notes="High-income components, Gift Aid, and investment-gift reliefs.",
-    ),
-    "frs_only_spi_fill": DonorSpec(
-        survey="Family Resources Survey 2023-24",
-        source="https://www.gov.uk/government/collections/family-resources-survey--2",
-        notes=(
-            "Second-stage pension, savings, and reported-benefit behavior for "
-            "SPI support rows."
-        ),
-    ),
-    "advani_summers_capital_gains": DonorSpec(
-        survey="Advani-Summers capital gains distribution",
-        source="https://ideas.repec.org/p/hal/wpaper/halshs-03022609.html",
-        notes="Capital gains assignment and clone flag.",
-    ),
-    "frs_salary_sacrifice": DonorSpec(
-        survey="Family Resources Survey salary-sacrifice subsample",
-        source="https://www.gov.uk/government/collections/family-resources-survey--2",
-        notes="Salary-sacrifice pension contributions and employee adjustment.",
-    ),
-    "slc_student_loan_plan": DonorSpec(
-        survey="Student Loans Company repayment-plan statistics",
-        source="https://www.gov.uk/government/collections/student-loans-for-higher-and-further-education",
-        notes="Student-loan repayment plan assignment by cohort and balance.",
-    ),
-}
-
-UK_STAGE_NAMES: tuple[str, ...] = (
-    "frs_base",
-    "was_wealth",
-    "regional_property_uprating",
-    "lcfs_consumption",
-    "etb_vat",
-    "nhs_usage",
-    "etb_public_services",
-    UK_SPI_SUPPORT_STAGE_NAME,
-    "spi_income",
-    "frs_only_spi_fill",
-    "advani_summers_capital_gains",
-    "frs_salary_sacrifice",
-    "slc_student_loan_plan",
-    "rowwise_oa_geography",
-    "national_calibration",
-    "local_geography_weights",
-    "rail_public_service_calibration",
-    "road_fuel_energy_calibration",
-    "export",
-)
-
-UK_STRUCTURAL_SOURCE_STAGES: tuple[str, ...] = (
-    "frs_base",
-    UK_SPI_SUPPORT_STAGE_NAME,
-    "rowwise_oa_geography",
-    "national_calibration",
-    "local_geography_weights",
-)
-
 
 def _load_uk_source_manifest() -> SourceManifest:
     return load_source_manifest(files(__package__).joinpath("source_stages.json"))
 
 
 UK_SOURCE_MANIFEST = _load_uk_source_manifest()
+UK_STAGE_NAMES: tuple[str, ...] = UK_SOURCE_MANIFEST.plan_stages
 _UK_SOURCE_STAGE_MAP = UK_SOURCE_MANIFEST.stage_map()
 _UNKNOWN_UK_SOURCE_STAGES = sorted(set(_UK_SOURCE_STAGE_MAP) - set(UK_STAGE_NAMES))
 if _UNKNOWN_UK_SOURCE_STAGES:
     raise ValueError(
-        "UK source manifest stage(s) are not declared in UK_STAGE_NAMES: "
+        "UK source manifest stage(s) are not declared in plan_stages: "
         f"{_UNKNOWN_UK_SOURCE_STAGES}."
     )
 UK_SOURCE_STAGE_SPECS: tuple[SourceStageSpec, ...] = tuple(
     _UK_SOURCE_STAGE_MAP[name] for name in UK_STAGE_NAMES if name in _UK_SOURCE_STAGE_MAP
 )
+UK_DONORS: Mapping[str, DonorSpec] = {
+    stage.stage: DonorSpec(
+        survey=stage.survey,
+        source=stage.source,
+        notes=stage.notes,
+    )
+    for stage in UK_SOURCE_STAGE_SPECS
+    if stage.role == "donor"
+}
+UK_STRUCTURAL_SOURCE_STAGES: tuple[str, ...] = tuple(
+    stage.stage for stage in UK_SOURCE_STAGE_SPECS if stage.role != "donor"
+)
 UK_SOURCE_OUTPUTS: frozenset[str] = frozenset(
     output for stage in UK_SOURCE_STAGE_SPECS for output in stage.outputs
 )
diff --git a/packages/populace-build/src/populace/build/uk/source_stages.json b/packages/populace-build/src/populace/build/uk/source_stages.json
index 91ab42e..334e729 100644
--- a/packages/populace-build/src/populace/build/uk/source_stages.json
+++ b/packages/populace-build/src/populace/build/uk/source_stages.json
@@ -2,9 +2,31 @@
   "version": 1,
   "country": "uk",
   "policy": "UK source stages are manifest-defined. Country/source content may declare primary artifacts, columns, sentinel handling, derivations, imputation recipes, outputs, and validation requirements here; executable Python belongs only in shared Populace runtimes.",
+  "plan_stages": [
+    "frs_base",
+    "was_wealth",
+    "regional_property_uprating",
+    "lcfs_consumption",
+    "etb_vat",
+    "nhs_usage",
+    "etb_public_services",
+    "spi_support_channel",
+    "spi_income",
+    "frs_only_spi_fill",
+    "advani_summers_capital_gains",
+    "frs_salary_sacrifice",
+    "slc_student_loan_plan",
+    "rowwise_oa_geography",
+    "national_calibration",
+    "local_geography_weights",
+    "rail_public_service_calibration",
+    "road_fuel_energy_calibration",
+    "export"
+  ],
   "stages": [
     {
       "stage": "frs_base",
+      "role": "base",
       "survey": "Family Resources Survey 2023-24",
       "source": "https://www.gov.uk/government/collections/family-resources-survey--2",
       "grain": "household_person_benunit",
@@ -81,6 +103,7 @@
     },
     {
       "stage": "was_wealth",
+      "role": "donor",
       "survey": "Wealth and Assets Survey",
       "source": "https://www.ons.gov.uk/peoplepopulationandcommunity/personalandhouseholdfinances/debt/methodologies/wealthandassetssurveyqmi",
       "grain": "household",
@@ -156,6 +179,7 @@
     },
     {
       "stage": "regional_property_uprating",
+      "role": "donor",
       "survey": "UK House Price Index and regional land-value tables",
       "source": "https://www.gov.uk/government/collections/uk-house-price-index-reports",
       "grain": "household",
@@ -193,6 +217,7 @@
     },
     {
       "stage": "lcfs_consumption",
+      "role": "donor",
       "survey": "Living Costs and Food Survey",
       "source": "https://www.ons.gov.uk/peoplepopulationandcommunity/personalandhouseholdfinances/incomeandwealth/methodologies/livingcostsandfoodsurveyqmi",
       "grain": "household",
@@ -284,6 +309,7 @@
     },
     {
       "stage": "road_fuel_energy_calibration",
+      "role": "donor",
       "survey": "Road fuel and household energy administrative totals",
       "source": "https://www.gov.uk/government/collections/road-transport-consumption-at-regional-and-local-level",
       "grain": "household",
@@ -342,6 +368,7 @@
     },
     {
       "stage": "etb_vat",
+      "role": "donor",
       "survey": "Effects of Taxes and Benefits",
       "source": "https://www.ons.gov.uk/peoplepopulationandcommunity/personalandhouseholdfinances/incomeandwealth/datasets/theeffectsoftaxesandbenefitsonhouseholdincomehistoricaldatasets",
       "grain": "household",
@@ -387,6 +414,7 @@
     },
     {
       "stage": "nhs_usage",
+      "role": "donor",
       "survey": "NHS activity and unit-cost tables",
       "source": "https://www.england.nhs.uk/statistics/statistical-work-areas/hospital-activity/monthly-hospital-activity/",
       "grain": "person",
@@ -451,6 +479,7 @@
     },
     {
       "stage": "etb_public_services",
+      "role": "donor",
       "survey": "Effects of Taxes and Benefits public-service tables",
       "source": "https://www.ons.gov.uk/peoplepopulationandcommunity/personalandhouseholdfinances/incomeandwealth/datasets/theeffectsoftaxesandbenefitsonhouseholdincomehistoricaldatasets",
       "grain": "household",
@@ -507,6 +536,7 @@
     },
     {
       "stage": "rail_public_service_calibration",
+      "role": "donor",
       "survey": "Rail public-service administrative totals",
       "source": "https://www.gov.uk/government/collections/rail-statistics",
       "grain": "household",
@@ -547,6 +577,7 @@
     },
     {
       "stage": "spi_support_channel",
+      "role": "support",
       "survey": "Family Resources Survey 2023-24 support copy",
       "source": "https://www.gov.uk/government/collections/family-resources-survey--2",
       "grain": "household_person_benunit",
@@ -585,6 +616,7 @@
     },
     {
       "stage": "spi_income",
+      "role": "donor",
       "survey": "Survey of Personal Incomes",
       "source": "https://www.gov.uk/government/collections/personal-incomes-statistics",
       "grain": "person",
@@ -639,6 +671,7 @@
     },
     {
       "stage": "frs_only_spi_fill",
+      "role": "donor",
       "survey": "Family Resources Survey 2023-24",
       "source": "https://www.gov.uk/government/collections/family-resources-survey--2",
       "grain": "person",
@@ -744,6 +777,7 @@
     },
     {
       "stage": "advani_summers_capital_gains",
+      "role": "donor",
       "survey": "Advani-Summers capital gains distribution",
       "source": "https://ideas.repec.org/p/hal/wpaper/halshs-03022609.html",
       "grain": "household",
@@ -790,6 +824,7 @@
     },
     {
       "stage": "frs_salary_sacrifice",
+      "role": "donor",
       "survey": "Family Resources Survey salary-sacrifice subsample",
       "source": "https://www.gov.uk/government/collections/family-resources-survey--2",
       "grain": "person",
@@ -834,6 +869,7 @@
     },
     {
       "stage": "slc_student_loan_plan",
+      "role": "donor",
       "survey": "Student Loans Company repayment-plan statistics",
       "source": "https://www.gov.uk/government/collections/student-loans-for-higher-and-further-education",
       "grain": "person",
@@ -867,6 +903,7 @@
     },
     {
       "stage": "rowwise_oa_geography",
+      "role": "geography",
       "survey": "UK official small-area geography crosswalks",
       "source": "https://geoportal.statistics.gov.uk/",
       "grain": "household",
@@ -913,28 +950,37 @@
     },
     {
       "stage": "national_calibration",
-      "survey": "UK national calibration target registry",
-      "source": "https://github.com/PolicyEngine/populace/tree/main/packages/populace-calibrate",
+      "role": "calibration",
+      "survey": "PolicyEngine Ledger UK national calibration facts",
+      "source": "https://github.com/PolicyEngine/arch-data",
       "grain": "household",
       "artifacts": [
         {
-          "kind": "target_registry",
-          "format": "json_or_yaml",
+          "kind": "ledger_consumer_facts",
+          "format": "jsonl",
           "vintage": "build_year",
-          "locator": "Populace national calibration target registry and supplied administrative target tables"
+          "locator": "Ledger consumer facts filtered to the UK national calibration target profile"
         }
       ],
       "operations": [
         {
-          "kind": "read_tables",
-          "tables": [
-            "national_calibration_targets",
-            "household_metric_tables"
-          ]
+          "kind": "read_table",
+          "table": "ledger_consumer_facts"
+        },
+        {
+          "kind": "compile_ledger_targets",
+          "country": "uk",
+          "target_profile": "uk_national_calibration",
+          "geography_levels": [
+            "country"
+          ],
+          "target_table": "national_calibration_targets"
         },
         {
           "kind": "calibrate_weights",
           "weight": "household_weight",
+          "targets": "national_calibration_targets",
+          "metrics": "household_metric_tables",
           "outputs": [
             "household_weight"
           ]
@@ -950,28 +996,38 @@
     },
     {
       "stage": "local_geography_weights",
-      "survey": "UK local calibration target tables",
-      "source": "https://github.com/PolicyEngine/populace/tree/main/packages/populace-build",
+      "role": "calibration",
+      "survey": "PolicyEngine Ledger UK local geography facts",
+      "source": "https://github.com/PolicyEngine/arch-data",
       "grain": "household_area",
       "artifacts": [
         {
-          "kind": "target_tables",
-          "format": "csv_or_parquet",
+          "kind": "ledger_consumer_facts",
+          "format": "jsonl",
           "vintage": "build_year",
-          "locator": "Explicit constituency and local-authority target tables supplied to the UK local runner"
+          "locator": "Ledger consumer facts filtered to the UK constituency and local-authority target profile"
         }
       ],
       "operations": [
         {
-          "kind": "read_tables",
-          "tables": [
-            "local_area_targets",
-            "household_metric_tables"
-          ]
+          "kind": "read_table",
+          "table": "ledger_consumer_facts"
+        },
+        {
+          "kind": "compile_ledger_targets",
+          "country": "uk",
+          "target_profile": "uk_local_geography",
+          "area_types": [
+            "constituency",
+            "la"
+          ],
+          "target_table": "local_area_targets"
         },
         {
           "kind": "calibrate_weights",
           "weight": "household_weight",
+          "targets": "local_area_targets",
+          "metrics": "household_metric_tables",
           "outputs": [
             "local_geography_weight"
           ]
diff --git a/packages/populace-build/tests/test_uk_source_manifest.py b/packages/populace-build/tests/test_uk_source_manifest.py
index 2120386..70775bb 100644
--- a/packages/populace-build/tests/test_uk_source_manifest.py
+++ b/packages/populace-build/tests/test_uk_source_manifest.py
@@ -6,6 +6,7 @@
 
 from populace.build.source_manifest import SourceManifest, SourceOperationSpec
 from populace.build.uk import (
+    AREA_TYPES,
     FRS_ONLY_SPI_FILL_PERSON_COLUMNS,
     ROWWISE_GEOGRAPHY_COLUMNS,
     SPI_INCOME_IMPUTATION_COLUMNS,
@@ -68,6 +69,7 @@ def test_every_donor_stage_has_matching_source_spec(self) -> None:
     def test_source_specs_align_with_declared_plan(self) -> None:
         source_stage_names = {spec.stage for spec in UK_SOURCE_STAGE_SPECS}
 
+        assert UK_STAGE_NAMES == UK_SOURCE_MANIFEST.plan_stages
         assert set(UK_SOURCE_MANIFEST.stage_map()) == source_stage_names
         assert source_stage_names == set(UK_DONORS) | set(UK_STRUCTURAL_SOURCE_STAGES)
         assert source_stage_names.issubset(UK_STAGE_NAMES)
@@ -78,6 +80,19 @@ def test_source_specs_align_with_declared_plan(self) -> None:
             "local_geography_weights"
         )
 
+    def test_donor_and_structural_stage_groups_are_manifest_derived(self) -> None:
+        donor_stage_names = tuple(
+            spec.stage for spec in UK_SOURCE_STAGE_SPECS if spec.role == "donor"
+        )
+        structural_stage_names = tuple(
+            spec.stage for spec in UK_SOURCE_STAGE_SPECS if spec.role != "donor"
+        )
+
+        assert tuple(UK_DONORS) == donor_stage_names
+        assert UK_STRUCTURAL_SOURCE_STAGES == structural_stage_names
+        assert "national_calibration" in UK_STRUCTURAL_SOURCE_STAGES
+        assert "local_geography_weights" in UK_STRUCTURAL_SOURCE_STAGES
+
     def test_stage_order_keeps_required_upstream_surfaces_available(self) -> None:
         assert UK_STAGE_NAMES.index("was_wealth") < UK_STAGE_NAMES.index(
             "regional_property_uprating"
@@ -113,8 +128,41 @@ def test_source_specs_are_manifest_only_not_python_loaders(self) -> None:
     def test_weight_calibration_stages_are_manifest_declared(self) -> None:
         specs = UK_SOURCE_MANIFEST.stage_map()
         for stage in ("national_calibration", "local_geography_weights"):
+            artifact_kinds = {artifact["kind"] for artifact in specs[stage].artifacts}
             kinds = [operation.kind for operation in specs[stage].operations]
+            compile_operation = next(
+                operation
+                for operation in specs[stage].operations
+                if operation.kind == "compile_ledger_targets"
+            )
+
+            assert specs[stage].source == "https://github.com/PolicyEngine/arch-data"
+            assert artifact_kinds == {"ledger_consumer_facts"}
+            assert "target_registry" not in artifact_kinds
+            assert "target_tables" not in artifact_kinds
+            assert kinds.index("read_table") < kinds.index("compile_ledger_targets")
+            assert kinds.index("compile_ledger_targets") < kinds.index(
+                "calibrate_weights"
+            )
             assert "calibrate_weights" in kinds
+            assert compile_operation.parameters["country"] == "uk"
+
+        assert (
+            next(
+                operation
+                for operation in specs["national_calibration"].operations
+                if operation.kind == "compile_ledger_targets"
+            ).parameters["target_profile"]
+            == "uk_national_calibration"
+        )
+        assert (
+            local_compile_operation := next(
+                operation
+                for operation in specs["local_geography_weights"].operations
+                if operation.kind == "compile_ledger_targets"
+            )
+        ).parameters["target_profile"] == "uk_local_geography"
+        assert tuple(local_compile_operation.parameters["area_types"]) == AREA_TYPES
 
     def test_raw_source_surface_declares_salient_outputs_from_each_input(self) -> None:
         required_outputs = {

From 0e6ca255bdcefb72357a550c12ecf951f2529a1f Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Sat, 20 Jun 2026 18:26:42 -0400
Subject: [PATCH 3/3] Sort UK manifest imports

---
 packages/populace-build/src/populace/build/uk/__init__.py | 1 -
 packages/populace-build/tests/test_uk_source_manifest.py  | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/packages/populace-build/src/populace/build/uk/__init__.py b/packages/populace-build/src/populace/build/uk/__init__.py
index 0887721..5861066 100644
--- a/packages/populace-build/src/populace/build/uk/__init__.py
+++ b/packages/populace-build/src/populace/build/uk/__init__.py
@@ -11,7 +11,6 @@
     SourceStageSpec,
     load_source_manifest,
 )
-
 from populace.build.uk.geography_sources import (
     ENGLAND_LAD_REGION_URL,
     ENGLAND_WALES_OA2021_COUNT,
diff --git a/packages/populace-build/tests/test_uk_source_manifest.py b/packages/populace-build/tests/test_uk_source_manifest.py
index 70775bb..e1a2cdf 100644
--- a/packages/populace-build/tests/test_uk_source_manifest.py
+++ b/packages/populace-build/tests/test_uk_source_manifest.py
@@ -14,8 +14,8 @@
     UK_NONNEGATIVE_SOURCE_OUTPUTS,
     UK_REWRITTEN_SOURCE_OUTPUT_STAGES,
     UK_SOURCE_MANIFEST,
-    UK_SOURCE_OUTPUTS,
     UK_SOURCE_OUTPUT_STAGES,
+    UK_SOURCE_OUTPUTS,
     UK_SOURCE_STAGE_SPECS,
     UK_SPI_SUPPORT_STAGE_NAME,
     UK_STAGE_NAMES,