BHI layer v1: docs, schema, Phase A ingestion stubs
This commit is contained in:
93
jobs/ingestion/idea_part_b.py
Normal file
93
jobs/ingestion/idea_part_b.py
Normal file
@@ -0,0 +1,93 @@
|
||||
#!/usr/bin/env python3
|
||||
# READY TO DEPLOY — requires base Brain Postgres schema + run schemas/bhi_tables.sql
|
||||
"""
|
||||
IDEA Part B child count — specifically "Emotional Disturbance" (ED)
|
||||
classification by state and local education agency (LEA).
|
||||
|
||||
Static CSVs hosted by US Department of Education / OSEP. No API. This job
|
||||
pulls the most recent static tables. Update MANIFEST when new year drops.
|
||||
"""
|
||||
import csv
|
||||
import io
|
||||
import logging
|
||||
import sys
|
||||
from _common import RateLimitedSession, bulk_insert, job_run
|
||||
|
||||
LOG = logging.getLogger("bhi.idea_part_b")
|
||||
|
||||
# Static CSV links — placeholder pattern. The user confirmed landing at
|
||||
# https://www2.ed.gov/programs/osepidea/618-data/static-tables/index.html
|
||||
MANIFEST = [
|
||||
# (year, scope, url)
|
||||
("2022-23", "state", "https://www2.ed.gov/programs/osepidea/618-data/static-tables/part-b/child-count-and-educational-environment/bchildcountandedenvironments2022-23.csv"),
|
||||
]
|
||||
|
||||
|
||||
def test_endpoint():
|
||||
s = RateLimitedSession()
|
||||
ok = True
|
||||
for year, scope, url in MANIFEST:
|
||||
r = s.head(url, allow_redirects=True)
|
||||
print(f"{year} {scope}: {r.status_code}")
|
||||
ok = ok and r.status_code in (200, 302)
|
||||
return ok
|
||||
|
||||
|
||||
def fetch_rows():
|
||||
s = RateLimitedSession(min_interval=0.5)
|
||||
out = []
|
||||
for year, scope, url in MANIFEST:
|
||||
try:
|
||||
r = s.get(url)
|
||||
r.encoding = "utf-8"
|
||||
reader = csv.DictReader(io.StringIO(r.text))
|
||||
for row in reader:
|
||||
row["_year"] = year
|
||||
row["_scope"] = scope
|
||||
out.append(row)
|
||||
except Exception as e:
|
||||
LOG.warning("failed %s: %s", url, e)
|
||||
LOG.info("IDEA rows: %d", len(out))
|
||||
return out
|
||||
|
||||
|
||||
def _int(v):
|
||||
try:
|
||||
return int(str(v).replace(",", "")) if v not in (None, "", "-") else None
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def write_rows(conn, raw):
|
||||
cols = ["geo_type","geo_code","measure","age_bracket","period","value","source"]
|
||||
rows = []
|
||||
for r in raw:
|
||||
disability = (r.get("Disability Category") or r.get("SEA Disability Category") or "").lower()
|
||||
if "emotional" not in disability:
|
||||
continue
|
||||
val = _int(r.get("Students Served") or r.get("Total") or r.get("ED"))
|
||||
if val is None:
|
||||
continue
|
||||
rows.append((
|
||||
"state",
|
||||
r.get("State") or r.get("SEA State"),
|
||||
"idea_emotional_disturbance_count",
|
||||
"13-17", # ED classification predominantly school-age; approximate
|
||||
r["_year"],
|
||||
float(val),
|
||||
"idea_part_b",
|
||||
))
|
||||
bulk_insert(conn, "bhi_demand_indicators", cols, rows)
|
||||
return len(rows)
|
||||
|
||||
|
||||
def main():
|
||||
with job_run("bhi_idea_part_b") as (conn, _):
|
||||
n = write_rows(conn, fetch_rows())
|
||||
LOG.info("inserted %d", n)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) > 1 and sys.argv[1] == "test":
|
||||
sys.exit(0 if test_endpoint() else 1)
|
||||
main()
|
||||
Reference in New Issue
Block a user