x cd /pub/data/sf12000/Tools; %let pgm=aggsf1_schls; filename pgm "&pgm..sas"; /*----------------------------------------------------------------------------------------------------------------- This code aggregates sf12000 data to Missouri School Districts, complete and within-county. Creates the SAS datasets: sf1.moschlsph: Complete school districts, unified and elementary - P and H tables. sf1.moschlcosph: School districts within counties, - P and H tables. Coded by John Blodgett, OSEDA, U. of Missouri, blodgettj@umsystem.edu Under contract with the Missouri Census Data Center Revision History: 7.13.01: Coding begins. 7.14.01: Problem with accessing the sf1.moph view. Replacing with our own merge code. 7.28.01: Rerun to fix problem with pct set aggregation - forgot to specify facvar option. ------------------------------------------------------------------------------------------------------*/ libname user '/tmp/storage/john'; *<===; %macro atoI(list,x); %*--utility macro to take a list of variables containing a wild card character ("x", by defaut) and replacing it with the letters a, b, c, d, e, f, g, h, and I in order. To generate the lists used in sf1 for the race/hispanic tables.--; %if &x eq %then %let x=x; %*--default wild card character is "x"--; %sysfunc(translate(&list,a,&x)) %sysfunc(translate(&list,b,&x)) %sysfunc(translate(&list,c,&x)) %sysfunc(translate(&list,d,&x)) %sysfunc(translate(&list,e,&x)) %sysfunc(translate(&list,f,&x)) %sysfunc(translate(&list,g,&x)) %sysfunc(translate(&list,h,&x)) %sysfunc(translate(&list,I,&x)) %mend atoI; %let geos80=county cousubfp placefp tract; /*<=========================Comment out===================================== data agginsf1(compress=yes); length sumlev $3 county $5 ncescode $5; *set sf1.moph; *--sas fails accessing the view, so we try doing our own merge (made no difference! - now try using compress=yes) ---; merge sf1.mogeos(where=(sumlev='101') in=keep) sf1.mophng; by geo_id; if keep; *where sumlev='101'; *<---blocks---; *---We make up sum lev codes here: se2 for elementary school district within county and su2 for unified-; keep sumlev ncescode county _numeric_; *--These sumlev codes will be edited in post-agg steps. We'll be creating both complete district and distict/county summaries here.---; if sdelm ne ' ' then do; sumlev='se2'; ncescode=sdelm; output; end; if sduni ne ' ' then do; sumlev='su2'; ncescode=sduni; output; end; run; *<======Sort failed, apparently too big for sas: proc sort; * by ncescode county; * run; proc datasets library=user; modify agginsf1; index create school=(ncescode county); quit; options compress=yes; *<---to get the aggregated output set to be compressed--; %agg(aggin=agginsf1,aggout=aggoutsf1, aggby=ncescode county, idvars=sumlev, aggvars=p1i1--h16Ii19 IntPtLon IntPtLat AreaSQMI, naggvars=3063, means= IntPtLon IntPtLat p17i1 p33i1 p17ai1 p17bi1 p17ci1 p17di1 p17ei1 p17fi1 p17gi1 p17hi1 p17Ii1 p33ai1 p33bi1 p33ci1 p33di1 p33ei1 p33fi1 p33gi1 p33hi1 p33Ii1 h12i1 h12i2 h12i3 %atoI(h12xi1 h12xi2 h12xi3) , mweights=p1i1 p1i1 p15i1 p31i1 p15ai1 p15bi1 p15ci1 p15di1 p15ei1 p15fi1 p15gi1 p15hi1 p15Ii1 p31ai1 p31bi1 p31ci1 p31di1 p31ei1 p31fi1 p31gi1 p31hi1 p31Ii1 h4i1 h4i2 h4i3 %atoI(h15xi1 h15xi2 h15xi10) , debug=1, agglvls=2,fillby=9) run; data sf1.moschlsph(label='Missouri School Districts P & H Tables' drop=county cnty partflag compress=yes) sf1.moschlcosph(label='Missouri School Districts within county, P & H tables' compress=yes); length sumlev $3 geocode $11 desedist $6 ncescode $5 areaname $40 county $5 cnty $3 partflag $1; set aggoutsf1; by ncescode _lvl_ county; if ncescode ne '99999' then areaname=put(ncescode,$school.); else select(sumlev); when('se2') areaname='Not in an Elementary District'; when('su2') areaname='Not in a Unified District'; end; if _lvl_=1 then do; *--this is a district within county summary--; geocode=ncescode||'-'||county; if not (first._lvl_ and last._lvl_) then do; partflag='y'; areaname=trim(areaname)||'/'||put(cnty,$mocnty.); end; else partflag='n'; end; else do; *--this is a complete district (_lvl_=2) summary--; if sumlev='su2' then sumlev='sdu'; else sumlev='sde'; geocode=ncescode; county=' '; end; drop _lvl_ _nag_; cnty=substr(county,3,3); label ncescode='NCES School District Code' desedist='Mo Dept of Elem & Sec Educ. District Code' partflag='District Spans Counties?'; *--See if it is split across counties--; desedist=put(ncescode,$schcnvt.); *--add dese codes and the dist name-; format intptlon 11.6 intptlat 10.6; *---Calculate all the age medians from the corresponding distributions. Note that we do not have explicit distrib tables for total age, but have to create the totals by summing male and female counts in each case.--; array _p12t {23} _temporary_; array _p12w {23} _temporary_ (5, 5, 5, 3, 2,1,1,3,5,5,5,5,5,5,5,2,3,2,3,5,5,5,5); option NOmprint; %macro domedian(r); %*--r is the race code for the table. We do the same processing on corresponding p13 and p12 tables depending on the race code. When r is blank we are working with the unqualified total-pop tables--; array _p12&r.m p12&r.i3-p12&r.i25; array _p12&r.f p12&r.i27-p12&r.i49; do _i_ = 1 to 23; _p12t{_i_}=_p12&r.m{_i_} + _p12&r.f{_i_}; end; %*--Example: when r=a then the first macro invocation says to calculate the est median variable p13ai1 by using the data distribution values in array _p12t (data from table p12a, summed over sex) and using the array _p12w of constant "interval widths" data. The second median invocation repeats this process but assigns result to p13ai2 and uses the _p12am data distribution array which is the set of male counts. And the 3rd invocation creates p13ai3 as the female median by working with the array of females counts in _p12af.---; %median(mdn=p13&r.i1,intvals=_p12t,intsize=_p12w,samestep=1) ; %*--medians for total pop-; %median(mdn=p13&r.i2,intvals=_p12&r.m,intsize=_p12w,samestep=1) ; %*--medians for males-; %median(mdn=p13&r.i3,intvals=_p12&r.f,intsize=_p12w,samestep=1) ; %*--medians for females-; %mend domedian; %domedian() ;*--Assigns p13i1, p13i2 and p13i3 using data in distribution array p12-; %domedian(a) ;*--Assigns p13ai1, p13ai2 and p13ai3 using data in distribution array p12a-; %domedian(b) ;*--etc.--; %domedian(c) %domedian(d) %domedian(e) %domedian(f) %domedian(g) %domedian(h) %domedian(I) if _lvl_=1 then output sf1.moschlcosph; else output sf1.moschlsph; run; *---create macro variable lists of counties that contain elementary districts and the mcds within those--; proc sql noprint; select distinct '"'||cousubfp||'"' into :emcds separated by ',' from sf1.mogeos where sumlev='101' and sdelm ne '99999'; quit; %put emcds= &emcds; *---begin by generating a split tract (sumlev 080) to school district corr. list---*; data blks1(compress=no) ; set sf1.mogeos(keep=sumlev county cousubfp placefp tract pop100 sduni sdelm); where sumlev='101' and pop100; sumlev='su2'; ncescode=sduni; output; if cousubfp in (&emcds) then do; *--we only output obs for mcds in which there are elementary districts--; sumlev='se2'; ncescode=sdelm; output; end; drop sduni sdelm; run; proc sort; by sumlev &geos80 ncescode; run; *--This corrwt utility macro turns this into a correlation list relating sumlev/level 080 groups to districts. There are really sort of 2 corr lists here because of the use of sumlev. A typical output obs created here will have values for all the g1 and g2 codes (each unique combination), pop100 (containing the total population of the intersection) and afact, the "allocation factor", with a value between 0 and 1 indicating the decimal portion of the sumlev 080 area that falls within the ncescode (School District) area. Example: ---; %corrwt(setin=blks1,setout=corrlsts, g1=sumlev &geos80, g2=ncescode, weight=pop100,keepwt=1) run; proc sort; by &geos80 ncescode sumlev; run; *---merge the correlation list with the 080 level summaries in the pct sf1 data set--; data merged; merge corrlsts (in=inclist rename=(sumlev=sumlevC) drop=pop100) sf1.mopct(in=insf1 where=(sumlev='080' and pop100) keep=sumlev &geos80 pop100 hu100 pct1i1 -- pct17Ii75); by &geos80; if inclist; sumlev=sumlevC; drop sumlevC; *--the sumlev on the sf1 data set overrides the one on clist, so we fix it-; if not insf1 then do; _nbad+1; if _nbad le 20 then do; put '***No sf1 data for ' &geos80 ; delete; end; else stop; drop _nbad; end; run; proc sort data=merged; by ncescode county; run; *----Aggregate the data by the districts, total and within county.----; %agg(aggin=merged,aggout=aggoutpct, aggby=ncescode county, idvars=sumlev, aggvars=pop100 hu100 pct1i1 -- pct17Ii75, facvar=afact, agglvls=2, fillby=9) run; ===========================End commented out================================ */ data sf1.moschlspct(label='Missouri School Districts PCT Tables (est)' drop=county cnty partflag compress=yes) sf1.moschlcospct(label='Missouri School Districts within county PCT Tables (est)' compress=yes); length sumlev $3 geocode $11 desedist $6 ncescode $5 areaname $40 county $5 cnty $3 partflag $1; set aggoutpct; by ncescode _lvl_ county; if ncescode ne '99999' then areaname=put(ncescode,$school.); else select(sumlev); when('se2') areaname='Not in an Elementary District'; when('su2') areaname='Not in a Unified District'; end; desedist=put(ncescode,$schcnvt.); *--add dese codes and the dist name-; if _lvl_=1 then do; *--this is a district within county summary--; geocode=ncescode||'-'||county; if not (first._lvl_ and last._lvl_) then do; partflag='y'; areaname=trim(areaname)||'/'||put(cnty,$mocnty.); end; else partflag='n'; end; else do; *--this is a complete district (_lvl_=2) summary--; if sumlev='su2' then sumlev='sdu'; else sumlev='sde'; geocode=ncescode; county=' '; end; drop _lvl_ _nag_ afact; cnty=substr(county,3,3); if _lvl_=1 then output sf1.moschlcospct; else output sf1.moschlspct; format pop100 hu100 6.; *<--so they print as integers--; label ncescode='NCES School District Code' desedist='Mo Dept of Elem & Sec Educ. District Code' partflag='District Spans Counties?'; *--See if it is split across counties--; %include sascode(notify);