#!/usr/bin/env python3 # READY TO DEPLOY — requires base Brain Postgres schema + run schemas/bhi_tables.sql """ IDEA Part B child count — specifically "Emotional Disturbance" (ED) classification by state and local education agency (LEA). Static CSVs hosted by US Department of Education / OSEP. No API. This job pulls the most recent static tables. Update MANIFEST when new year drops. """ import csv import io import logging import sys from _common import RateLimitedSession, bulk_insert, job_run LOG = logging.getLogger("bhi.idea_part_b") # Static CSV links — placeholder pattern. The user confirmed landing at # https://www2.ed.gov/programs/osepidea/618-data/static-tables/index.html MANIFEST = [ # (year, scope, url) ("2022-23", "state", "https://www2.ed.gov/programs/osepidea/618-data/static-tables/part-b/child-count-and-educational-environment/bchildcountandedenvironments2022-23.csv"), ] def test_endpoint(): s = RateLimitedSession() ok = True for year, scope, url in MANIFEST: r = s.head(url, allow_redirects=True) print(f"{year} {scope}: {r.status_code}") ok = ok and r.status_code in (200, 302) return ok def fetch_rows(): s = RateLimitedSession(min_interval=0.5) out = [] for year, scope, url in MANIFEST: try: r = s.get(url) r.encoding = "utf-8" reader = csv.DictReader(io.StringIO(r.text)) for row in reader: row["_year"] = year row["_scope"] = scope out.append(row) except Exception as e: LOG.warning("failed %s: %s", url, e) LOG.info("IDEA rows: %d", len(out)) return out def _int(v): try: return int(str(v).replace(",", "")) if v not in (None, "", "-") else None except (TypeError, ValueError): return None def write_rows(conn, raw): cols = ["geo_type","geo_code","measure","age_bracket","period","value","source"] rows = [] for r in raw: disability = (r.get("Disability Category") or r.get("SEA Disability Category") or "").lower() if "emotional" not in disability: continue val = _int(r.get("Students Served") or r.get("Total") or r.get("ED")) if val is None: continue rows.append(( "state", r.get("State") or r.get("SEA State"), "idea_emotional_disturbance_count", "13-17", # ED classification predominantly school-age; approximate r["_year"], float(val), "idea_part_b", )) bulk_insert(conn, "bhi_demand_indicators", cols, rows) return len(rows) def main(): with job_run("bhi_idea_part_b") as (conn, _): n = write_rows(conn, fetch_rows()) LOG.info("inserted %d", n) if __name__ == "__main__": if len(sys.argv) > 1 and sys.argv[1] == "test": sys.exit(0 if test_endpoint() else 1) main()