93 lines
2.5 KiB
Python
93 lines
2.5 KiB
Python
#!/usr/bin/env python3
|
|
# READY TO DEPLOY — requires base Brain Postgres schema + run schemas/bhi_tables.sql
|
|
"""
|
|
CDC BRFSS Prevalence Data (Socrata).
|
|
|
|
Source: https://data.cdc.gov/resource/dttw-5yxu.json
|
|
Pulls depression + mental-health-not-good items by state, with
|
|
young-adult (18-24) breakouts where available.
|
|
"""
|
|
import logging
|
|
import sys
|
|
from _common import RateLimitedSession, bulk_insert, job_run
|
|
|
|
LOG = logging.getLogger("bhi.cdc_brfss")
|
|
BASE = "https://data.cdc.gov/resource/dttw-5yxu.json"
|
|
|
|
# BRFSS topics of interest for BHI
|
|
TOPICS = [
|
|
"Depression",
|
|
"Mental Health Status",
|
|
"Poor Mental Health",
|
|
]
|
|
|
|
|
|
def test_endpoint():
|
|
s = RateLimitedSession()
|
|
r = s.get(BASE, params={"$limit": 2}).json()
|
|
print(f"OK: returned {len(r)} rows")
|
|
if r:
|
|
print("sample topic:", r[0].get("topic"))
|
|
return bool(r)
|
|
|
|
|
|
def fetch_rows():
|
|
s = RateLimitedSession(min_interval=0.2)
|
|
out = []
|
|
for topic in TOPICS:
|
|
offset = 0
|
|
while True:
|
|
batch = s.get(BASE, params={
|
|
"$where": f"topic='{topic}'",
|
|
"$limit": 5000,
|
|
"$offset": offset,
|
|
}).json()
|
|
if not batch:
|
|
break
|
|
out.extend(batch)
|
|
if len(batch) < 5000:
|
|
break
|
|
offset += 5000
|
|
LOG.info("topic=%s total=%d", topic, len(out))
|
|
return out
|
|
|
|
|
|
def write_rows(conn, raw):
|
|
cols = ["geo_type","geo_code","measure","age_bracket","period","value","source"]
|
|
rows = []
|
|
for r in raw:
|
|
try:
|
|
val = float(r.get("data_value") or 0)
|
|
except (TypeError, ValueError):
|
|
continue
|
|
breakout = (r.get("break_out") or "Overall").lower()
|
|
if "18" in breakout and "24" in breakout:
|
|
bracket = "18-25"
|
|
elif "overall" in breakout:
|
|
bracket = "all"
|
|
else:
|
|
bracket = breakout
|
|
rows.append((
|
|
"state",
|
|
r.get("locationabbr"),
|
|
(r.get("question") or r.get("topic") or "").strip()[:120],
|
|
bracket,
|
|
str(r.get("year") or ""),
|
|
val,
|
|
"cdc_brfss",
|
|
))
|
|
bulk_insert(conn, "bhi_demand_indicators", cols, rows)
|
|
return len(rows)
|
|
|
|
|
|
def main():
|
|
with job_run("bhi_cdc_brfss") as (conn, _):
|
|
n = write_rows(conn, fetch_rows())
|
|
LOG.info("inserted %d", n)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) > 1 and sys.argv[1] == "test":
|
|
sys.exit(0 if test_endpoint() else 1)
|
|
main()
|