#!/usr/bin/env python3 # READY TO DEPLOY — requires base Brain Postgres schema + run schemas/bhi_tables.sql """ HRSA Mental Health HPSA (Health Professional Shortage Areas) bulk CSV. Source: https://data.hrsa.gov/DataDownload/DD_Files/BCD_HPSA_FCT_DET_MH.csv Confirmed: ~23 MB CSV, all active + historical MH HPSAs. """ import csv import io import logging import sys from datetime import datetime from _common import RateLimitedSession, bulk_insert, job_run LOG = logging.getLogger("bhi.hrsa_hpsa") URL = "https://data.hrsa.gov/DataDownload/DD_Files/BCD_HPSA_FCT_DET_MH.csv" def test_endpoint(): s = RateLimitedSession() r = s.get(URL, stream=True) first = next(r.iter_lines()) print(f"OK: content-length={r.headers.get('content-length')}") print("header:", first.decode("utf-8", errors="replace")[:200]) return True def fetch_rows(): s = RateLimitedSession(min_interval=0.5) r = s.get(URL) r.encoding = "utf-8" reader = csv.DictReader(io.StringIO(r.text)) rows = list(reader) LOG.info("fetched %d HPSA rows", len(rows)) return rows def _parse_date(s): if not s: return None for fmt in ("%Y-%m-%d", "%m/%d/%Y"): try: return datetime.strptime(s, fmt).date() except ValueError: continue return None def _parse_int(s): try: return int(float(s)) if s not in (None, "") else None except (TypeError, ValueError): return None def write_rows(conn, raw): cols = ["hpsa_id","state","county_fips","score","population_served", "designated_date","withdrawn_date","source"] rows = [] for r in raw: rows.append(( r.get("HPSA ID"), r.get("Primary State Abbreviation"), r.get("Common County FIPS Code") or r.get("HPSA Geography Identification Number"), _parse_int(r.get("HPSA Score")), _parse_int(r.get("HPSA Designation Population")), _parse_date(r.get("HPSA Designation Date")), _parse_date(r.get("Withdrawn Date")), "hrsa_hpsa_mh", )) bulk_insert(conn, "bhi_shortages", cols, rows) return len(rows) def main(): with job_run("bhi_hrsa_hpsa") as (conn, _): n = write_rows(conn, fetch_rows()) LOG.info("inserted %d", n) if __name__ == "__main__": if len(sys.argv) > 1 and sys.argv[1] == "test": sys.exit(0 if test_endpoint() else 1) main()