#!/usr/bin/env python3 # READY TO DEPLOY — requires base Brain Postgres schema + run schemas/bhi_tables.sql """ CMS Inpatient Psychiatric Facility Quality Reporting (IPFQR) ingestion. Source: https://data.cms.gov/provider-data/api/1/datastore/query/q9vs-r7wp/0 Writes facilities to bhi_facilities and measures to bhi_facility_quality. """ import logging import sys from typing import Any from _common import RateLimitedSession, bulk_insert, job_run LOG = logging.getLogger("bhi.cms_ipfqr") DATASET_ID = "q9vs-r7wp" # IPFQR by Facility BASE = f"https://data.cms.gov/provider-data/api/1/datastore/query/{DATASET_ID}/0" PAGE_SIZE = 500 MEASURE_FIELDS = [ ("hbips2", "HBIPS-2", "Hours of physical-restraint use"), ("hbips3", "HBIPS-3", "Hours of seclusion use"), ("smd", "SMD", "Screening for metabolic disorders"), ("sub2", "SUB-2", "Alcohol use brief intervention"), ("sub3", "SUB-3", "Alcohol/other drug use treatment at discharge"), ("tob3", "TOB-3", "Tobacco use treatment at discharge"), ] # --- TEST function (no DB) -------------------------------------------------- def test_endpoint(): """Run standalone to verify the endpoint works.""" s = RateLimitedSession() r = s.get(BASE, params={"limit": 3}) data = r.json() rows = data.get("results", []) print(f"OK: fetched {len(rows)} rows from {BASE}") if rows: print("Sample keys:", list(rows[0].keys())[:12]) print("Sample facility:", rows[0].get("facility_name"), rows[0].get("state")) return len(rows) > 0 # --- Fetch ------------------------------------------------------------------ def fetch_rows() -> list[dict[str, Any]]: s = RateLimitedSession(min_interval=0.25) offset = 0 out: list[dict[str, Any]] = [] while True: r = s.get(BASE, params={"limit": PAGE_SIZE, "offset": offset}) batch = r.json().get("results", []) if not batch: break out.extend(batch) LOG.info("fetched %d (total %d)", len(batch), len(out)) if len(batch) < PAGE_SIZE: break offset += PAGE_SIZE return out # --- Write ------------------------------------------------------------------ def write_rows(conn, raw_rows: list[dict[str, Any]]) -> tuple[int, int]: facility_rows = [] for r in raw_rows: facility_rows.append(( r.get("facility_id"), # ccn None, # npi r.get("facility_name"), r.get("address"), r.get("citytown"), r.get("state"), r.get("zip_code"), None, # county_fips (join later via zip->fips) None, None, # lat, lon "IPF", # facility_type None, None, None, None, # ownership, bed counts None, None, # adolescent_unit, young_adult_unit [], [], [], None, # arrays, medicaid_accepted None, None, None, # accreditation, opened, closed None, # last_verified "cms_ipfqr", # source None, # source_raw_id )) facility_cols = [ "ccn","npi","name","address","city","state","zip","county_fips", "lat","lon","facility_type","ownership","bed_count","psych_bed_count", "pediatric_psych_bed_count","adolescent_unit","young_adult_unit", "services_offered","populations_served","payment_accepted", "medicaid_accepted","accreditation","opened_date","closed_date", "last_verified","source","source_raw_id", ] bulk_insert(conn, "bhi_facilities", facility_cols, facility_rows) # Map ccn -> facility_id for measures with conn.cursor() as c: c.execute( "SELECT ccn, facility_id FROM bhi_facilities WHERE source='cms_ipfqr'" ) ccn_map = dict(c.fetchall()) measure_rows = [] for r in raw_rows: fid = ccn_map.get(r.get("facility_id")) if not fid: continue for field, mid, mname in MEASURE_FIELDS: val = r.get(field) or r.get(f"{field}_overall_rate_per_1000") try: v = float(val) if val not in (None, "", "Not Available") else None except (TypeError, ValueError): v = None if v is None: continue measure_rows.append((fid, mid, mname, v, None, None, None, "cms_ipfqr")) cols = ["facility_id","measure_id","measure_name","value","benchmark","period","reported_at","source"] bulk_insert(conn, "bhi_facility_quality", cols, measure_rows) return len(facility_rows), len(measure_rows) def main(): with job_run("bhi_cms_ipfqr") as (conn, run_id): rows = fetch_rows() f, m = write_rows(conn, rows) LOG.info("inserted %d facilities, %d measures (run %s)", f, m, run_id) if __name__ == "__main__": if len(sys.argv) > 1 and sys.argv[1] == "test": sys.exit(0 if test_endpoint() else 1) main()