#!/usr/bin/env python3 # READY TO DEPLOY — requires base Brain Postgres schema + run schemas/bhi_tables.sql """ CMS NPPES (National Plan & Provider Enumeration System) — behavioral health providers by taxonomy + state. API: https://npiregistry.cms.hhs.gov/api/?version=2.1 Filter: taxonomy codes for psychiatry, psychology, counseling, SUD. """ import logging import sys from _common import RateLimitedSession, bulk_insert, job_run LOG = logging.getLogger("bhi.nppes") BASE = "https://npiregistry.cms.hhs.gov/api/" BH_TAXONOMY_CODES = [ "2084P0800X", # Psychiatry "2084P0802X", # Addiction Psychiatry "2084P0804X", # Child & Adolescent Psychiatry "103T00000X", # Psychologist "103TC2200X", # Clinical Child & Adolescent Psychologist "101YM0800X", # Mental Health Counselor "1041C0700X", # Clinical Social Worker "324500000X", # Substance Abuse Rehabilitation Facility "283Q00000X", # Psychiatric Hospital "323P00000X", # Psychiatric Residential Treatment Facility ] STATES = ["AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA","HI","ID","IL","IN", "IA","KS","KY","LA","ME","MD","MA","MI","MN","MS","MO","MT","NE","NV", "NH","NJ","NM","NY","NC","ND","OH","OK","OR","PA","RI","SC","SD","TN", "TX","UT","VT","VA","WA","WV","WI","WY","DC"] def test_endpoint(): s = RateLimitedSession() r = s.get(BASE, params={ "version": "2.1", "taxonomy_description": "psychiatric", "state": "NY", "limit": 2, }).json() print(f"OK: result_count={r.get('result_count')}") return r.get("result_count", 0) > 0 def fetch_rows(): s = RateLimitedSession(min_interval=0.1) all_rows = [] for state in STATES: for taxonomy in BH_TAXONOMY_CODES: skip = 0 while True: r = s.get(BASE, params={ "version": "2.1", "taxonomy_description": taxonomy, "state": state, "limit": 200, "skip": skip, }).json() results = r.get("results", []) if not results: break for row in results: row["_state"] = state row["_taxonomy"] = taxonomy all_rows.extend(results) if len(results) < 200: break skip += 200 if skip > 1200: # NPPES caps paging break LOG.info("state=%s tax=%s total=%d", state, taxonomy, len(all_rows)) return all_rows def write_rows(conn, raw): cols = [ "ccn","npi","name","address","city","state","zip","county_fips", "lat","lon","facility_type","ownership","bed_count","psych_bed_count", "pediatric_psych_bed_count","adolescent_unit","young_adult_unit", "services_offered","populations_served","payment_accepted", "medicaid_accepted","accreditation","opened_date","closed_date", "last_verified","source","source_raw_id", ] rows = [] for r in raw: addresses = r.get("addresses") or [] location = next((a for a in addresses if a.get("address_purpose") == "LOCATION"), addresses[0] if addresses else {}) basic = r.get("basic") or {} name = basic.get("organization_name") or " ".join(filter(None, [basic.get("first_name"), basic.get("last_name")])) rows.append(( None, str(r.get("number", "")), name, location.get("address_1"), location.get("city"), location.get("state"), location.get("postal_code"), None, None, None, "provider" if basic.get("name_prefix") is None else "org", None, None, None, None, None, None, [r.get("_taxonomy", "")], [], [], None, None, None, None, None, "nppes", None, )) bulk_insert(conn, "bhi_facilities", cols, rows) return len(rows) def main(): with job_run("bhi_nppes") as (conn, _): n = write_rows(conn, fetch_rows()) LOG.info("inserted %d", n) if __name__ == "__main__": if len(sys.argv) > 1 and sys.argv[1] == "test": sys.exit(0 if test_endpoint() else 1) main()