PolicyEngine · MaxGhenis · Apr 11, 2026 · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026
diff --git a/Makefile b/Makefile
@@ -74,6 +74,8 @@ documentation-dev:
 	myst clean && \
 	myst start
 
+DATABASE_YEAR ?= 2024
+
 database:
 	rm -f policyengine_us_data/storage/calibration/policy_data.db
 	python policyengine_us_data/db/create_database_tables.py
@@ -85,6 +87,7 @@ database:
 	python policyengine_us_data/db/etl_tanf.py --year $(YEAR)
 	python policyengine_us_data/db/etl_state_income_tax.py --year $(YEAR)
 	python policyengine_us_data/db/etl_irs_soi.py --year $(YEAR)
+	python policyengine_us_data/db/etl_aca_agi_state_targets.py --year $(YEAR)
 	python policyengine_us_data/db/etl_pregnancy.py --year $(YEAR)
 	python policyengine_us_data/db/validate_database.py
 

diff --git a/changelog.d/743.added.md b/changelog.d/743.added.md
@@ -0,0 +1 @@
+Added ACA Marketplace spending and enrollment targets plus state AGI targets to the database build.
diff --git a/policyengine_us_data/calibration/source_impute.py b/policyengine_us_data/calibration/source_impute.py
@@ -290,7 +290,9 @@ def _impute_acs(
     acs = Microsimulation(dataset=ACS_2022)
     predictors = ACS_PREDICTORS + ["state_fips"]
 
-    acs_df = acs.calculate_dataframe(ACS_PREDICTORS + ACS_IMPUTED_VARIABLES)
+    acs_df = acs.calculate_dataframe(
+        ACS_PREDICTORS + ACS_IMPUTED_VARIABLES, map_to="person"
+    )
     acs_df["state_fips"] = acs.calculate("state_fips", map_to="person").values.astype(
         np.float32
     )
@@ -301,7 +303,7 @@ def _impute_acs(
 
     if dataset_path is not None:
         cps_sim = Microsimulation(dataset=dataset_path)
-        cps_df = cps_sim.calculate_dataframe(ACS_PREDICTORS)
+        cps_df = cps_sim.calculate_dataframe(ACS_PREDICTORS, map_to="person")
         del cps_sim
     else:
         cps_df = pd.DataFrame()

diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
@@ -238,15 +238,15 @@ def add_rent(self, cps: h5py.File, person: DataFrame, household: DataFrame):
         "household_size",
     ]
     IMPUTATIONS = ["rent", "real_estate_taxes"]
-    train_df = acs.calculate_dataframe(PREDICTORS + IMPUTATIONS)
+    train_df = acs.calculate_dataframe(PREDICTORS + IMPUTATIONS, map_to="person")
     train_df.tenure_type = train_df.tenure_type.map(
         {
             "OWNED_OUTRIGHT": "OWNED_WITH_MORTGAGE",
         },
         na_action="ignore",
     ).fillna(train_df.tenure_type)
     train_df = train_df[train_df.is_household_head].sample(10_000)
-    inference_df = cps_sim.calculate_dataframe(PREDICTORS)
+    inference_df = cps_sim.calculate_dataframe(PREDICTORS, map_to="person")
     mask = inference_df.is_household_head.values
     inference_df = inference_df[mask]
 
@@ -1872,24 +1872,73 @@ def _update_documentation_with_numbers(log_df, docs_dir):
 
 def add_tips(self, cps: h5py.File):
     self.save_dataset(cps)
-    from policyengine_us import Microsimulation
 
-    sim = Microsimulation(dataset=self)
-    cps = sim.calculate_dataframe(
-        [
-            "person_id",
-            "household_id",
-            "employment_income",
-            "interest_income",
-            "dividend_income",
-            "rental_income",
-            "age",
-            "household_weight",
-            "is_female",
-        ],
-        2025,
+    existing_data = self.load_dataset()
+    person_household_id = np.asarray(
+        existing_data.get(
+            "person_household_id",
+            existing_data.get("household_id"),
+        )
     )
-    cps = pd.DataFrame(cps)
+    interest_income = existing_data.get("interest_income")
+    if interest_income is None:
+        interest_income = np.asarray(
+            existing_data.get(
+                "taxable_interest_income",
+                np.zeros(len(person_household_id), dtype=np.float32),
+            )
+        ) + np.asarray(
+            existing_data.get(
+                "tax_exempt_interest_income",
+                np.zeros(len(person_household_id), dtype=np.float32),
+            )
+        )
+    dividend_income = existing_data.get("dividend_income")
+    if dividend_income is None:
+        dividend_income = np.asarray(
+            existing_data.get(
+                "qualified_dividend_income",
+                np.zeros(len(person_household_id), dtype=np.float32),
+            )
+        ) + np.asarray(
+            existing_data.get(
+                "non_qualified_dividend_income",
+                np.zeros(len(person_household_id), dtype=np.float32),
+            )
+        )
+    cps = pd.DataFrame(
+        {
+            "person_id": np.asarray(existing_data["person_id"]),
+            "household_id": person_household_id,
+            "employment_income": np.asarray(existing_data["employment_income"]),
+            "interest_income": np.asarray(interest_income),
+            "dividend_income": np.asarray(dividend_income),
+            "rental_income": np.asarray(
+                existing_data.get(
+                    "rental_income",
+                    np.zeros(len(person_household_id), dtype=np.float32),
+                )
+            ),
+            "age": np.asarray(existing_data["age"]),
+            "is_female": np.asarray(existing_data["is_female"]),
+        }
+    )
+    household_weight = existing_data.get("household_weight")
+    if household_weight is not None:
+        household_weight = np.asarray(household_weight)
+        if len(household_weight) == len(cps):
+            cps["household_weight"] = household_weight
+        else:
+            household_ids = np.asarray(existing_data["household_id"])
+            household_weight_map = dict(zip(household_ids, household_weight))
+            cps["household_weight"] = (
+                pd.Series(person_household_id)
+                .map(household_weight_map)
+                .fillna(0)
+                .values
+            )
+    else:
+        cps["household_weight"] = 0.0
 
     # Get is_married from raw CPS data (A_MARITL codes: 1,2 = married)
     # Note: is_married in policyengine-us is Family-level, but we need

diff --git a/policyengine_us_data/db/create_field_valid_values.py b/policyengine_us_data/db/create_field_valid_values.py
@@ -69,6 +69,7 @@ def populate_field_valid_values(session: Session) -> None:
     source_values = [
         ("source", "Census ACS S0101", "survey"),
         ("source", "IRS SOI", "administrative"),
+        ("source", "CMS Marketplace", "administrative"),
         ("source", "CMS Medicaid", "administrative"),
         ("source", "Census ACS S2704", "survey"),
         ("source", "USDA FNS SNAP", "administrative"),

diff --git a/policyengine_us_data/db/create_initial_strata.py b/policyengine_us_data/db/create_initial_strata.py
@@ -69,7 +69,10 @@ def fetch_congressional_districts(year):
 
 
 def main():
-    _, year = etl_argparser("Create initial geographic strata for calibration")
+    _, year = etl_argparser(
+        "Create initial geographic strata for calibration",
+        allow_year=True,
+    )
 
     # State FIPS to name/abbreviation mapping
     STATE_NAMES = {
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Added ACA Marketplace spending and enrollment targets plus state AGI targets to the database build.