Files
economic-brain-bhi/jobs/ingestion/cdc_brfss.py
2026-04-05 20:15:36 +00:00

93 lines
2.5 KiB
Python

#!/usr/bin/env python3
# READY TO DEPLOY — requires base Brain Postgres schema + run schemas/bhi_tables.sql
"""
CDC BRFSS Prevalence Data (Socrata).
Source: https://data.cdc.gov/resource/dttw-5yxu.json
Pulls depression + mental-health-not-good items by state, with
young-adult (18-24) breakouts where available.
"""
import logging
import sys
from _common import RateLimitedSession, bulk_insert, job_run
LOG = logging.getLogger("bhi.cdc_brfss")
BASE = "https://data.cdc.gov/resource/dttw-5yxu.json"
# BRFSS topics of interest for BHI
TOPICS = [
"Depression",
"Mental Health Status",
"Poor Mental Health",
]
def test_endpoint():
s = RateLimitedSession()
r = s.get(BASE, params={"$limit": 2}).json()
print(f"OK: returned {len(r)} rows")
if r:
print("sample topic:", r[0].get("topic"))
return bool(r)
def fetch_rows():
s = RateLimitedSession(min_interval=0.2)
out = []
for topic in TOPICS:
offset = 0
while True:
batch = s.get(BASE, params={
"$where": f"topic='{topic}'",
"$limit": 5000,
"$offset": offset,
}).json()
if not batch:
break
out.extend(batch)
if len(batch) < 5000:
break
offset += 5000
LOG.info("topic=%s total=%d", topic, len(out))
return out
def write_rows(conn, raw):
cols = ["geo_type","geo_code","measure","age_bracket","period","value","source"]
rows = []
for r in raw:
try:
val = float(r.get("data_value") or 0)
except (TypeError, ValueError):
continue
breakout = (r.get("break_out") or "Overall").lower()
if "18" in breakout and "24" in breakout:
bracket = "18-25"
elif "overall" in breakout:
bracket = "all"
else:
bracket = breakout
rows.append((
"state",
r.get("locationabbr"),
(r.get("question") or r.get("topic") or "").strip()[:120],
bracket,
str(r.get("year") or ""),
val,
"cdc_brfss",
))
bulk_insert(conn, "bhi_demand_indicators", cols, rows)
return len(rows)
def main():
with job_run("bhi_cdc_brfss") as (conn, _):
n = write_rows(conn, fetch_rows())
LOG.info("inserted %d", n)
if __name__ == "__main__":
if len(sys.argv) > 1 and sys.argv[1] == "test":
sys.exit(0 if test_endpoint() else 1)
main()