#!/usr/bin/env python3 # READY TO DEPLOY — requires base Brain Postgres schema + run schemas/bhi_tables.sql """ CDC BRFSS Prevalence Data (Socrata). Source: https://data.cdc.gov/resource/dttw-5yxu.json Pulls depression + mental-health-not-good items by state, with young-adult (18-24) breakouts where available. """ import logging import sys from _common import RateLimitedSession, bulk_insert, job_run LOG = logging.getLogger("bhi.cdc_brfss") BASE = "https://data.cdc.gov/resource/dttw-5yxu.json" # BRFSS topics of interest for BHI TOPICS = [ "Depression", "Mental Health Status", "Poor Mental Health", ] def test_endpoint(): s = RateLimitedSession() r = s.get(BASE, params={"$limit": 2}).json() print(f"OK: returned {len(r)} rows") if r: print("sample topic:", r[0].get("topic")) return bool(r) def fetch_rows(): s = RateLimitedSession(min_interval=0.2) out = [] for topic in TOPICS: offset = 0 while True: batch = s.get(BASE, params={ "$where": f"topic='{topic}'", "$limit": 5000, "$offset": offset, }).json() if not batch: break out.extend(batch) if len(batch) < 5000: break offset += 5000 LOG.info("topic=%s total=%d", topic, len(out)) return out def write_rows(conn, raw): cols = ["geo_type","geo_code","measure","age_bracket","period","value","source"] rows = [] for r in raw: try: val = float(r.get("data_value") or 0) except (TypeError, ValueError): continue breakout = (r.get("break_out") or "Overall").lower() if "18" in breakout and "24" in breakout: bracket = "18-25" elif "overall" in breakout: bracket = "all" else: bracket = breakout rows.append(( "state", r.get("locationabbr"), (r.get("question") or r.get("topic") or "").strip()[:120], bracket, str(r.get("year") or ""), val, "cdc_brfss", )) bulk_insert(conn, "bhi_demand_indicators", cols, rows) return len(rows) def main(): with job_run("bhi_cdc_brfss") as (conn, _): n = write_rows(conn, fetch_rows()) LOG.info("inserted %d", n) if __name__ == "__main__": if len(sys.argv) > 1 and sys.argv[1] == "test": sys.exit(0 if test_endpoint() else 1) main()