#!/usr/bin/env python3 # READY TO DEPLOY — requires base Brain Postgres schema + run schemas/bhi_tables.sql """ CDC YRBSS — Youth Risk Behavior Survey (high and middle school). Sources (Socrata): - High school: https://data.cdc.gov/resource/3qty-g4aq.json - Middle school: https://data.cdc.gov/resource/uqmk-4y2w.json Key items: "considered suicide", "attempted suicide", "persistent sadness", substance use — all adolescent (13-17) bracket. """ import logging import sys from _common import RateLimitedSession, bulk_insert, job_run LOG = logging.getLogger("bhi.cdc_yrbss") DATASETS = { "hs": "https://data.cdc.gov/resource/3qty-g4aq.json", "ms": "https://data.cdc.gov/resource/uqmk-4y2w.json", } KEYWORDS = ["suicide", "sad", "hopeless", "mental health", "electronic"] def test_endpoint(): s = RateLimitedSession() ok = True for k, url in DATASETS.items(): r = s.get(url, params={"$limit": 1}) print(f"{k}: status={r.status_code}, rows={len(r.json())}") ok = ok and r.status_code == 200 return ok def fetch_rows(): s = RateLimitedSession(min_interval=0.2) out = [] for key, url in DATASETS.items(): offset = 0 while True: batch = s.get(url, params={"$limit": 5000, "$offset": offset}).json() if not batch: break for row in batch: row["_dataset"] = key out.extend(batch) if len(batch) < 5000: break offset += 5000 LOG.info("yrbss %s -> %d", key, len(out)) return out def _question_is_relevant(q: str) -> bool: ql = (q or "").lower() return any(k in ql for k in KEYWORDS) def write_rows(conn, raw): cols = ["geo_type","geo_code","measure","age_bracket","period","value","source"] rows = [] for r in raw: question = r.get("questioncode") or r.get("shortquestiontext") or r.get("question") or "" if not _question_is_relevant(question): continue try: val = float(r.get("data_value") or r.get("greater_risk_data_value") or 0) except (TypeError, ValueError): continue if val == 0: continue rows.append(( "state" if r.get("locationdesc") else "district", r.get("locationabbr") or r.get("sitecode"), question[:120], "13-17", str(r.get("year") or ""), val, f"cdc_yrbss_{r.get('_dataset','hs')}", )) bulk_insert(conn, "bhi_demand_indicators", cols, rows) return len(rows) def main(): with job_run("bhi_cdc_yrbss") as (conn, _): n = write_rows(conn, fetch_rows()) LOG.info("inserted %d", n) if __name__ == "__main__": if len(sys.argv) > 1 and sys.argv[1] == "test": sys.exit(0 if test_endpoint() else 1) main()