BHI layer v1: docs, schema, Phase A ingestion stubs

This commit is contained in:
BHI Staging Agent
2026-04-05 20:15:36 +00:00
commit 3dfd9ea3c6
21 changed files with 2399 additions and 0 deletions

View File

@@ -0,0 +1,82 @@
#!/usr/bin/env python3
# READY TO DEPLOY — requires base Brain Postgres schema + run schemas/bhi_tables.sql
"""
CMS Nursing Home Provider Information — captures SNFs that house behavioral
health residents (SNF-IMD dynamic) for later filtering on chain + ownership.
Source: https://data.cms.gov/provider-data/api/1/datastore/query/4pq5-n9py/0
"""
import logging
import sys
from _common import RateLimitedSession, bulk_insert, job_run
LOG = logging.getLogger("bhi.cms_nursing_home")
BASE = "https://data.cms.gov/provider-data/api/1/datastore/query/4pq5-n9py/0"
PAGE = 1000
def test_endpoint():
s = RateLimitedSession()
r = s.get(BASE, params={"limit": 2}).json()
rows = r.get("results", [])
print(f"OK: {len(rows)} rows, sample:", rows[0].get("provider_name") if rows else None)
return bool(rows)
def fetch_rows():
s = RateLimitedSession(min_interval=0.25)
offset, out = 0, []
while True:
b = s.get(BASE, params={"limit": PAGE, "offset": offset}).json().get("results", [])
if not b:
break
out.extend(b)
if len(b) < PAGE:
break
offset += PAGE
LOG.info("fetched %d nursing homes", len(out))
return out
def write_rows(conn, raw):
cols = [
"ccn","npi","name","address","city","state","zip","county_fips",
"lat","lon","facility_type","ownership","bed_count","psych_bed_count",
"pediatric_psych_bed_count","adolescent_unit","young_adult_unit",
"services_offered","populations_served","payment_accepted",
"medicaid_accepted","accreditation","opened_date","closed_date",
"last_verified","source","source_raw_id",
]
rows = []
for r in raw:
try:
beds = int(r.get("number_of_certified_beds") or 0) or None
except (TypeError, ValueError):
beds = None
opened = r.get("date_first_approved_to_provide_medicare_and_medicaid_services")
rows.append((
r.get("cms_certification_number_ccn"), None,
r.get("provider_name"), r.get("provider_address"),
r.get("citytown"), r.get("state"), r.get("zip_code"), None,
None, None,
"nursing_home",
r.get("ownership_type"),
beds, None, None, None, None,
[], [], [], None, None,
opened if opened else None, None, None,
"cms_nursing_home", None,
))
bulk_insert(conn, "bhi_facilities", cols, rows)
return len(rows)
def main():
with job_run("bhi_cms_nursing_home") as (conn, _):
n = write_rows(conn, fetch_rows())
LOG.info("inserted %d", n)
if __name__ == "__main__":
if len(sys.argv) > 1 and sys.argv[1] == "test":
sys.exit(0 if test_endpoint() else 1)
main()