BHI layer v1: docs, schema, Phase A ingestion stubs
This commit is contained in:
137
jobs/ingestion/cms_ipfqr.py
Normal file
137
jobs/ingestion/cms_ipfqr.py
Normal file
@@ -0,0 +1,137 @@
|
||||
#!/usr/bin/env python3
|
||||
# READY TO DEPLOY — requires base Brain Postgres schema + run schemas/bhi_tables.sql
|
||||
"""
|
||||
CMS Inpatient Psychiatric Facility Quality Reporting (IPFQR) ingestion.
|
||||
|
||||
Source: https://data.cms.gov/provider-data/api/1/datastore/query/q9vs-r7wp/0
|
||||
Writes facilities to bhi_facilities and measures to bhi_facility_quality.
|
||||
"""
|
||||
import logging
|
||||
import sys
|
||||
from typing import Any
|
||||
|
||||
from _common import RateLimitedSession, bulk_insert, job_run
|
||||
|
||||
LOG = logging.getLogger("bhi.cms_ipfqr")
|
||||
|
||||
DATASET_ID = "q9vs-r7wp" # IPFQR by Facility
|
||||
BASE = f"https://data.cms.gov/provider-data/api/1/datastore/query/{DATASET_ID}/0"
|
||||
PAGE_SIZE = 500
|
||||
|
||||
MEASURE_FIELDS = [
|
||||
("hbips2", "HBIPS-2", "Hours of physical-restraint use"),
|
||||
("hbips3", "HBIPS-3", "Hours of seclusion use"),
|
||||
("smd", "SMD", "Screening for metabolic disorders"),
|
||||
("sub2", "SUB-2", "Alcohol use brief intervention"),
|
||||
("sub3", "SUB-3", "Alcohol/other drug use treatment at discharge"),
|
||||
("tob3", "TOB-3", "Tobacco use treatment at discharge"),
|
||||
]
|
||||
|
||||
|
||||
# --- TEST function (no DB) --------------------------------------------------
|
||||
|
||||
def test_endpoint():
|
||||
"""Run standalone to verify the endpoint works."""
|
||||
s = RateLimitedSession()
|
||||
r = s.get(BASE, params={"limit": 3})
|
||||
data = r.json()
|
||||
rows = data.get("results", [])
|
||||
print(f"OK: fetched {len(rows)} rows from {BASE}")
|
||||
if rows:
|
||||
print("Sample keys:", list(rows[0].keys())[:12])
|
||||
print("Sample facility:", rows[0].get("facility_name"), rows[0].get("state"))
|
||||
return len(rows) > 0
|
||||
|
||||
|
||||
# --- Fetch ------------------------------------------------------------------
|
||||
|
||||
def fetch_rows() -> list[dict[str, Any]]:
|
||||
s = RateLimitedSession(min_interval=0.25)
|
||||
offset = 0
|
||||
out: list[dict[str, Any]] = []
|
||||
while True:
|
||||
r = s.get(BASE, params={"limit": PAGE_SIZE, "offset": offset})
|
||||
batch = r.json().get("results", [])
|
||||
if not batch:
|
||||
break
|
||||
out.extend(batch)
|
||||
LOG.info("fetched %d (total %d)", len(batch), len(out))
|
||||
if len(batch) < PAGE_SIZE:
|
||||
break
|
||||
offset += PAGE_SIZE
|
||||
return out
|
||||
|
||||
|
||||
# --- Write ------------------------------------------------------------------
|
||||
|
||||
def write_rows(conn, raw_rows: list[dict[str, Any]]) -> tuple[int, int]:
|
||||
facility_rows = []
|
||||
for r in raw_rows:
|
||||
facility_rows.append((
|
||||
r.get("facility_id"), # ccn
|
||||
None, # npi
|
||||
r.get("facility_name"),
|
||||
r.get("address"),
|
||||
r.get("citytown"),
|
||||
r.get("state"),
|
||||
r.get("zip_code"),
|
||||
None, # county_fips (join later via zip->fips)
|
||||
None, None, # lat, lon
|
||||
"IPF", # facility_type
|
||||
None, None, None, None, # ownership, bed counts
|
||||
None, None, # adolescent_unit, young_adult_unit
|
||||
[], [], [], None, # arrays, medicaid_accepted
|
||||
None, None, None, # accreditation, opened, closed
|
||||
None, # last_verified
|
||||
"cms_ipfqr", # source
|
||||
None, # source_raw_id
|
||||
))
|
||||
|
||||
facility_cols = [
|
||||
"ccn","npi","name","address","city","state","zip","county_fips",
|
||||
"lat","lon","facility_type","ownership","bed_count","psych_bed_count",
|
||||
"pediatric_psych_bed_count","adolescent_unit","young_adult_unit",
|
||||
"services_offered","populations_served","payment_accepted",
|
||||
"medicaid_accepted","accreditation","opened_date","closed_date",
|
||||
"last_verified","source","source_raw_id",
|
||||
]
|
||||
bulk_insert(conn, "bhi_facilities", facility_cols, facility_rows)
|
||||
|
||||
# Map ccn -> facility_id for measures
|
||||
with conn.cursor() as c:
|
||||
c.execute(
|
||||
"SELECT ccn, facility_id FROM bhi_facilities WHERE source='cms_ipfqr'"
|
||||
)
|
||||
ccn_map = dict(c.fetchall())
|
||||
|
||||
measure_rows = []
|
||||
for r in raw_rows:
|
||||
fid = ccn_map.get(r.get("facility_id"))
|
||||
if not fid:
|
||||
continue
|
||||
for field, mid, mname in MEASURE_FIELDS:
|
||||
val = r.get(field) or r.get(f"{field}_overall_rate_per_1000")
|
||||
try:
|
||||
v = float(val) if val not in (None, "", "Not Available") else None
|
||||
except (TypeError, ValueError):
|
||||
v = None
|
||||
if v is None:
|
||||
continue
|
||||
measure_rows.append((fid, mid, mname, v, None, None, None, "cms_ipfqr"))
|
||||
|
||||
cols = ["facility_id","measure_id","measure_name","value","benchmark","period","reported_at","source"]
|
||||
bulk_insert(conn, "bhi_facility_quality", cols, measure_rows)
|
||||
return len(facility_rows), len(measure_rows)
|
||||
|
||||
|
||||
def main():
|
||||
with job_run("bhi_cms_ipfqr") as (conn, run_id):
|
||||
rows = fetch_rows()
|
||||
f, m = write_rows(conn, rows)
|
||||
LOG.info("inserted %d facilities, %d measures (run %s)", f, m, run_id)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) > 1 and sys.argv[1] == "test":
|
||||
sys.exit(0 if test_endpoint() else 1)
|
||||
main()
|
||||
Reference in New Issue
Block a user