Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ documentation-dev:
myst clean && \
myst start

DATABASE_YEAR ?= 2024

database:
rm -f policyengine_us_data/storage/calibration/policy_data.db
python policyengine_us_data/db/create_database_tables.py
Expand All @@ -85,6 +87,7 @@ database:
python policyengine_us_data/db/etl_tanf.py --year $(YEAR)
python policyengine_us_data/db/etl_state_income_tax.py --year $(YEAR)
python policyengine_us_data/db/etl_irs_soi.py --year $(YEAR)
python policyengine_us_data/db/etl_aca_agi_state_targets.py --year $(YEAR)
python policyengine_us_data/db/etl_pregnancy.py --year $(YEAR)
python policyengine_us_data/db/validate_database.py

Expand Down
1 change: 1 addition & 0 deletions changelog.d/743.added.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added ACA Marketplace spending and enrollment targets plus state AGI targets to the database build.
6 changes: 4 additions & 2 deletions policyengine_us_data/calibration/source_impute.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,9 @@ def _impute_acs(
acs = Microsimulation(dataset=ACS_2022)
predictors = ACS_PREDICTORS + ["state_fips"]

acs_df = acs.calculate_dataframe(ACS_PREDICTORS + ACS_IMPUTED_VARIABLES)
acs_df = acs.calculate_dataframe(
ACS_PREDICTORS + ACS_IMPUTED_VARIABLES, map_to="person"
)
acs_df["state_fips"] = acs.calculate("state_fips", map_to="person").values.astype(
np.float32
)
Expand All @@ -301,7 +303,7 @@ def _impute_acs(

if dataset_path is not None:
cps_sim = Microsimulation(dataset=dataset_path)
cps_df = cps_sim.calculate_dataframe(ACS_PREDICTORS)
cps_df = cps_sim.calculate_dataframe(ACS_PREDICTORS, map_to="person")
del cps_sim
else:
cps_df = pd.DataFrame()
Expand Down
85 changes: 67 additions & 18 deletions policyengine_us_data/datasets/cps/cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,15 +238,15 @@ def add_rent(self, cps: h5py.File, person: DataFrame, household: DataFrame):
"household_size",
]
IMPUTATIONS = ["rent", "real_estate_taxes"]
train_df = acs.calculate_dataframe(PREDICTORS + IMPUTATIONS)
train_df = acs.calculate_dataframe(PREDICTORS + IMPUTATIONS, map_to="person")
train_df.tenure_type = train_df.tenure_type.map(
{
"OWNED_OUTRIGHT": "OWNED_WITH_MORTGAGE",
},
na_action="ignore",
).fillna(train_df.tenure_type)
train_df = train_df[train_df.is_household_head].sample(10_000)
inference_df = cps_sim.calculate_dataframe(PREDICTORS)
inference_df = cps_sim.calculate_dataframe(PREDICTORS, map_to="person")
mask = inference_df.is_household_head.values
inference_df = inference_df[mask]

Expand Down Expand Up @@ -1872,24 +1872,73 @@ def _update_documentation_with_numbers(log_df, docs_dir):

def add_tips(self, cps: h5py.File):
self.save_dataset(cps)
from policyengine_us import Microsimulation

sim = Microsimulation(dataset=self)
cps = sim.calculate_dataframe(
[
"person_id",
"household_id",
"employment_income",
"interest_income",
"dividend_income",
"rental_income",
"age",
"household_weight",
"is_female",
],
2025,
existing_data = self.load_dataset()
person_household_id = np.asarray(
existing_data.get(
"person_household_id",
existing_data.get("household_id"),
)
)
cps = pd.DataFrame(cps)
interest_income = existing_data.get("interest_income")
if interest_income is None:
interest_income = np.asarray(
existing_data.get(
"taxable_interest_income",
np.zeros(len(person_household_id), dtype=np.float32),
)
) + np.asarray(
existing_data.get(
"tax_exempt_interest_income",
np.zeros(len(person_household_id), dtype=np.float32),
)
)
dividend_income = existing_data.get("dividend_income")
if dividend_income is None:
dividend_income = np.asarray(
existing_data.get(
"qualified_dividend_income",
np.zeros(len(person_household_id), dtype=np.float32),
)
) + np.asarray(
existing_data.get(
"non_qualified_dividend_income",
np.zeros(len(person_household_id), dtype=np.float32),
)
)
cps = pd.DataFrame(
{
"person_id": np.asarray(existing_data["person_id"]),
"household_id": person_household_id,
"employment_income": np.asarray(existing_data["employment_income"]),
"interest_income": np.asarray(interest_income),
"dividend_income": np.asarray(dividend_income),
"rental_income": np.asarray(
existing_data.get(
"rental_income",
np.zeros(len(person_household_id), dtype=np.float32),
)
),
"age": np.asarray(existing_data["age"]),
"is_female": np.asarray(existing_data["is_female"]),
}
)
household_weight = existing_data.get("household_weight")
if household_weight is not None:
household_weight = np.asarray(household_weight)
if len(household_weight) == len(cps):
cps["household_weight"] = household_weight
else:
household_ids = np.asarray(existing_data["household_id"])
household_weight_map = dict(zip(household_ids, household_weight))
cps["household_weight"] = (
pd.Series(person_household_id)
.map(household_weight_map)
.fillna(0)
.values
)
else:
cps["household_weight"] = 0.0

# Get is_married from raw CPS data (A_MARITL codes: 1,2 = married)
# Note: is_married in policyengine-us is Family-level, but we need
Expand Down
1 change: 1 addition & 0 deletions policyengine_us_data/db/create_field_valid_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def populate_field_valid_values(session: Session) -> None:
source_values = [
("source", "Census ACS S0101", "survey"),
("source", "IRS SOI", "administrative"),
("source", "CMS Marketplace", "administrative"),
("source", "CMS Medicaid", "administrative"),
("source", "Census ACS S2704", "survey"),
("source", "USDA FNS SNAP", "administrative"),
Expand Down
5 changes: 4 additions & 1 deletion policyengine_us_data/db/create_initial_strata.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,10 @@ def fetch_congressional_districts(year):


def main():
_, year = etl_argparser("Create initial geographic strata for calibration")
_, year = etl_argparser(
"Create initial geographic strata for calibration",
allow_year=True,
)

# State FIPS to name/abbreviation mapping
STATE_NAMES = {
Expand Down
Loading
Loading