BHI layer v1: docs, schema, Phase A ingestion stubs
This commit is contained in:
114
jobs/ingestion/nppes.py
Normal file
114
jobs/ingestion/nppes.py
Normal file
@@ -0,0 +1,114 @@
|
||||
#!/usr/bin/env python3
|
||||
# READY TO DEPLOY — requires base Brain Postgres schema + run schemas/bhi_tables.sql
|
||||
"""
|
||||
CMS NPPES (National Plan & Provider Enumeration System) — behavioral health
|
||||
providers by taxonomy + state.
|
||||
|
||||
API: https://npiregistry.cms.hhs.gov/api/?version=2.1
|
||||
Filter: taxonomy codes for psychiatry, psychology, counseling, SUD.
|
||||
"""
|
||||
import logging
|
||||
import sys
|
||||
from _common import RateLimitedSession, bulk_insert, job_run
|
||||
|
||||
LOG = logging.getLogger("bhi.nppes")
|
||||
BASE = "https://npiregistry.cms.hhs.gov/api/"
|
||||
|
||||
BH_TAXONOMY_CODES = [
|
||||
"2084P0800X", # Psychiatry
|
||||
"2084P0802X", # Addiction Psychiatry
|
||||
"2084P0804X", # Child & Adolescent Psychiatry
|
||||
"103T00000X", # Psychologist
|
||||
"103TC2200X", # Clinical Child & Adolescent Psychologist
|
||||
"101YM0800X", # Mental Health Counselor
|
||||
"1041C0700X", # Clinical Social Worker
|
||||
"324500000X", # Substance Abuse Rehabilitation Facility
|
||||
"283Q00000X", # Psychiatric Hospital
|
||||
"323P00000X", # Psychiatric Residential Treatment Facility
|
||||
]
|
||||
STATES = ["AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA","HI","ID","IL","IN",
|
||||
"IA","KS","KY","LA","ME","MD","MA","MI","MN","MS","MO","MT","NE","NV",
|
||||
"NH","NJ","NM","NY","NC","ND","OH","OK","OR","PA","RI","SC","SD","TN",
|
||||
"TX","UT","VT","VA","WA","WV","WI","WY","DC"]
|
||||
|
||||
|
||||
def test_endpoint():
|
||||
s = RateLimitedSession()
|
||||
r = s.get(BASE, params={
|
||||
"version": "2.1", "taxonomy_description": "psychiatric",
|
||||
"state": "NY", "limit": 2,
|
||||
}).json()
|
||||
print(f"OK: result_count={r.get('result_count')}")
|
||||
return r.get("result_count", 0) > 0
|
||||
|
||||
|
||||
def fetch_rows():
|
||||
s = RateLimitedSession(min_interval=0.1)
|
||||
all_rows = []
|
||||
for state in STATES:
|
||||
for taxonomy in BH_TAXONOMY_CODES:
|
||||
skip = 0
|
||||
while True:
|
||||
r = s.get(BASE, params={
|
||||
"version": "2.1",
|
||||
"taxonomy_description": taxonomy,
|
||||
"state": state,
|
||||
"limit": 200,
|
||||
"skip": skip,
|
||||
}).json()
|
||||
results = r.get("results", [])
|
||||
if not results:
|
||||
break
|
||||
for row in results:
|
||||
row["_state"] = state
|
||||
row["_taxonomy"] = taxonomy
|
||||
all_rows.extend(results)
|
||||
if len(results) < 200:
|
||||
break
|
||||
skip += 200
|
||||
if skip > 1200: # NPPES caps paging
|
||||
break
|
||||
LOG.info("state=%s tax=%s total=%d", state, taxonomy, len(all_rows))
|
||||
return all_rows
|
||||
|
||||
|
||||
def write_rows(conn, raw):
|
||||
cols = [
|
||||
"ccn","npi","name","address","city","state","zip","county_fips",
|
||||
"lat","lon","facility_type","ownership","bed_count","psych_bed_count",
|
||||
"pediatric_psych_bed_count","adolescent_unit","young_adult_unit",
|
||||
"services_offered","populations_served","payment_accepted",
|
||||
"medicaid_accepted","accreditation","opened_date","closed_date",
|
||||
"last_verified","source","source_raw_id",
|
||||
]
|
||||
rows = []
|
||||
for r in raw:
|
||||
addresses = r.get("addresses") or []
|
||||
location = next((a for a in addresses if a.get("address_purpose") == "LOCATION"), addresses[0] if addresses else {})
|
||||
basic = r.get("basic") or {}
|
||||
name = basic.get("organization_name") or " ".join(filter(None, [basic.get("first_name"), basic.get("last_name")]))
|
||||
rows.append((
|
||||
None, str(r.get("number", "")),
|
||||
name,
|
||||
location.get("address_1"), location.get("city"),
|
||||
location.get("state"), location.get("postal_code"), None,
|
||||
None, None,
|
||||
"provider" if basic.get("name_prefix") is None else "org",
|
||||
None, None, None, None, None, None,
|
||||
[r.get("_taxonomy", "")], [], [], None, None, None, None, None,
|
||||
"nppes", None,
|
||||
))
|
||||
bulk_insert(conn, "bhi_facilities", cols, rows)
|
||||
return len(rows)
|
||||
|
||||
|
||||
def main():
|
||||
with job_run("bhi_nppes") as (conn, _):
|
||||
n = write_rows(conn, fetch_rows())
|
||||
LOG.info("inserted %d", n)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) > 1 and sys.argv[1] == "test":
|
||||
sys.exit(0 if test_endpoint() else 1)
|
||||
main()
|
||||
Reference in New Issue
Block a user