BHI layer v1: docs, schema, Phase A ingestion stubs
This commit is contained in:
82
jobs/ingestion/cms_nursing_home.py
Normal file
82
jobs/ingestion/cms_nursing_home.py
Normal file
@@ -0,0 +1,82 @@
|
||||
#!/usr/bin/env python3
|
||||
# READY TO DEPLOY — requires base Brain Postgres schema + run schemas/bhi_tables.sql
|
||||
"""
|
||||
CMS Nursing Home Provider Information — captures SNFs that house behavioral
|
||||
health residents (SNF-IMD dynamic) for later filtering on chain + ownership.
|
||||
|
||||
Source: https://data.cms.gov/provider-data/api/1/datastore/query/4pq5-n9py/0
|
||||
"""
|
||||
import logging
|
||||
import sys
|
||||
from _common import RateLimitedSession, bulk_insert, job_run
|
||||
|
||||
LOG = logging.getLogger("bhi.cms_nursing_home")
|
||||
BASE = "https://data.cms.gov/provider-data/api/1/datastore/query/4pq5-n9py/0"
|
||||
PAGE = 1000
|
||||
|
||||
|
||||
def test_endpoint():
|
||||
s = RateLimitedSession()
|
||||
r = s.get(BASE, params={"limit": 2}).json()
|
||||
rows = r.get("results", [])
|
||||
print(f"OK: {len(rows)} rows, sample:", rows[0].get("provider_name") if rows else None)
|
||||
return bool(rows)
|
||||
|
||||
|
||||
def fetch_rows():
|
||||
s = RateLimitedSession(min_interval=0.25)
|
||||
offset, out = 0, []
|
||||
while True:
|
||||
b = s.get(BASE, params={"limit": PAGE, "offset": offset}).json().get("results", [])
|
||||
if not b:
|
||||
break
|
||||
out.extend(b)
|
||||
if len(b) < PAGE:
|
||||
break
|
||||
offset += PAGE
|
||||
LOG.info("fetched %d nursing homes", len(out))
|
||||
return out
|
||||
|
||||
|
||||
def write_rows(conn, raw):
|
||||
cols = [
|
||||
"ccn","npi","name","address","city","state","zip","county_fips",
|
||||
"lat","lon","facility_type","ownership","bed_count","psych_bed_count",
|
||||
"pediatric_psych_bed_count","adolescent_unit","young_adult_unit",
|
||||
"services_offered","populations_served","payment_accepted",
|
||||
"medicaid_accepted","accreditation","opened_date","closed_date",
|
||||
"last_verified","source","source_raw_id",
|
||||
]
|
||||
rows = []
|
||||
for r in raw:
|
||||
try:
|
||||
beds = int(r.get("number_of_certified_beds") or 0) or None
|
||||
except (TypeError, ValueError):
|
||||
beds = None
|
||||
opened = r.get("date_first_approved_to_provide_medicare_and_medicaid_services")
|
||||
rows.append((
|
||||
r.get("cms_certification_number_ccn"), None,
|
||||
r.get("provider_name"), r.get("provider_address"),
|
||||
r.get("citytown"), r.get("state"), r.get("zip_code"), None,
|
||||
None, None,
|
||||
"nursing_home",
|
||||
r.get("ownership_type"),
|
||||
beds, None, None, None, None,
|
||||
[], [], [], None, None,
|
||||
opened if opened else None, None, None,
|
||||
"cms_nursing_home", None,
|
||||
))
|
||||
bulk_insert(conn, "bhi_facilities", cols, rows)
|
||||
return len(rows)
|
||||
|
||||
|
||||
def main():
|
||||
with job_run("bhi_cms_nursing_home") as (conn, _):
|
||||
n = write_rows(conn, fetch_rows())
|
||||
LOG.info("inserted %d", n)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) > 1 and sys.argv[1] == "test":
|
||||
sys.exit(0 if test_endpoint() else 1)
|
||||
main()
|
||||
Reference in New Issue
Block a user