Files
2026-04-05 20:15:36 +00:00

86 lines
2.4 KiB
Python

#!/usr/bin/env python3
# READY TO DEPLOY — requires base Brain Postgres schema + run schemas/bhi_tables.sql
"""
HRSA Mental Health HPSA (Health Professional Shortage Areas) bulk CSV.
Source: https://data.hrsa.gov/DataDownload/DD_Files/BCD_HPSA_FCT_DET_MH.csv
Confirmed: ~23 MB CSV, all active + historical MH HPSAs.
"""
import csv
import io
import logging
import sys
from datetime import datetime
from _common import RateLimitedSession, bulk_insert, job_run
LOG = logging.getLogger("bhi.hrsa_hpsa")
URL = "https://data.hrsa.gov/DataDownload/DD_Files/BCD_HPSA_FCT_DET_MH.csv"
def test_endpoint():
s = RateLimitedSession()
r = s.get(URL, stream=True)
first = next(r.iter_lines())
print(f"OK: content-length={r.headers.get('content-length')}")
print("header:", first.decode("utf-8", errors="replace")[:200])
return True
def fetch_rows():
s = RateLimitedSession(min_interval=0.5)
r = s.get(URL)
r.encoding = "utf-8"
reader = csv.DictReader(io.StringIO(r.text))
rows = list(reader)
LOG.info("fetched %d HPSA rows", len(rows))
return rows
def _parse_date(s):
if not s:
return None
for fmt in ("%Y-%m-%d", "%m/%d/%Y"):
try:
return datetime.strptime(s, fmt).date()
except ValueError:
continue
return None
def _parse_int(s):
try:
return int(float(s)) if s not in (None, "") else None
except (TypeError, ValueError):
return None
def write_rows(conn, raw):
cols = ["hpsa_id","state","county_fips","score","population_served",
"designated_date","withdrawn_date","source"]
rows = []
for r in raw:
rows.append((
r.get("HPSA ID"),
r.get("Primary State Abbreviation"),
r.get("Common County FIPS Code") or r.get("HPSA Geography Identification Number"),
_parse_int(r.get("HPSA Score")),
_parse_int(r.get("HPSA Designation Population")),
_parse_date(r.get("HPSA Designation Date")),
_parse_date(r.get("Withdrawn Date")),
"hrsa_hpsa_mh",
))
bulk_insert(conn, "bhi_shortages", cols, rows)
return len(rows)
def main():
with job_run("bhi_hrsa_hpsa") as (conn, _):
n = write_rows(conn, fetch_rows())
LOG.info("inserted %d", n)
if __name__ == "__main__":
if len(sys.argv) > 1 and sys.argv[1] == "test":
sys.exit(0 if test_endpoint() else 1)
main()