x cd /pub/data/sf32000/Tools; %let pgm=aggsf3_MIslds; filename pgm "&pgm..sas"; /*----------------------------------------------------------------------------------------------------------------- This code aggregates sf32000 data to Michigan State Legislative Districts (Senate , House and CD108). Creates the SAS datasets: sf3.misldsph, sf3.misldsphct and sf3.misldsphctr: complete House, Senate and cd108 as well as split-by-county datasets for the 3 sets of tables. These are the "real" datasets created, the rest are views of these. sf3.misenateph: Complete Senate districts - P and H tables. sf3.misenatephct: Complete Senate districts - PCT and HCT tables. sf3.misenatephctr: Complete Senate districts - PCT and HCT tables. sf3.mihouseph: Complete House districts- P and H tables. sf3.mihousephct: Complete House districts- PCT and HCT tables. sf3.mihousephctr: Complete House districts- PCT and HCT tables. We also create the misbg2k_sldl02, misbg2k_sldu02 & misbg2k_cd108 correlations lists and the corresponding mistr --Michigan split-tract-- lists in corrlst directory based on the Michigan block to slds correlation list provided by the Mi SDC, 10-02. Coded by John Blodgett, OSEDA, U. of Missouri, blodgettj@umsystem.edu Under contract with the Michigan State Data Center Revision History: 10.30.02: Coding begins. ------------------------------------------------------------------------------------------------------*/ libname sf32000 '/pub/data/sf32000'; libname sf3 (sf32000); *<--alternate name--; libname mable2k '/pub/data/mable2k' access=readonly; libname corrlst '/pub/data/corrlst'; libname user '/tmp/scratch/user'; options msglevel=i; title "&pgm: Aggregate 2000 sf3 Datasets to 2002 Legislative Districts"; title2 'For MICHIGAN'; *---------------------Part I: Create the 6 Correlation Lists (for split-bg and split-tract for cd108, sldl02 and sldu02)----------*; %let sbg_geos_mable=county cousubfp placefp tract ur bg; *<---geocodes comprising the "split block group" -SumLev 090-; *---We have 2 versions of the sbg_geos list because we called the urban-rural var on mable2k ur and called it urbanrur on the sfs; %let sbg_geos=county cousubfp placefp tract urbanrur bg; *<---geocodes comprising the "split block group" -SumLev 090-; %let str_geos=county cousubfp placefp tract urbanrur; *<---geocodes comprising the "split tract" -SumLev 085-; *---Merge the correlation list provided by the Michigan SDC with additional geocodes from the mable2k database---; data miblocks; merge corrlst.mibl2slds02(in=in1 rename=(sldl=sldl02 sldu=sldu02)) mable2k.miv(keep=&sbg_geos_mable block pop2k rename=(ur=urbanrur) in=in2 ) ; by county tract block; format tract ; if not (in1 and in2) then do; if not in1 and pop2k=0 then delete; put '***Block missing from one of the sets**** ' in1= in2= county= tract= block=; _nbad+1; drop _nbad; if _nbad gt 10 then abort abend; delete; end; run; %let clist=misbg2k_sldl02; *---create corrlst.&clist--- state House districts 2002; proc sort data=miblocks(keep=&sbg_geos block pop2k sldl02 sldu02 cd108) out=blkcodes; by &sbg_geos sldl02; run; *--We use the corrwt utility macro to create the correlation list by collapsing--; %corrwt(setin=blkcodes,setout=corrlst.&clist, geocds1=&sbg_geos, geocds2=sldl02, weight=pop2k, keepwt=1); proc print data=corrlst.&clist(obs=50); title3 "Sample Obs from corrlst.&clist"; by county; run; %let clist=misbg2k_sldu02; *---create corrlst.&clist--- Senate districts 2002; proc sort data=blkcodes out=blkcodesU; by &sbg_geos sldu02; run; *--We use the corrwt utility macro to create the correlation list by collapsing--; %corrwt(setin=blkcodesU,setout=corrlst.&clist, geocds1=&sbg_geos, geocds2=sldu02, weight=pop2k, keepwt=1); proc print data=corrlst.&clist(obs=50); title3 "Sample Obs from corrlst.&clist"; by county; run; %let clist=misbg2k_cd108; *---create corrlst.&clist--- 108th congressional districts; proc sort data=blkcodes out=blkcodesC; by &sbg_geos cd108; run; *--We use the corrwt utility macro to create the correlation list by collapsing--; %corrwt(setin=blkcodesC,setout=corrlst.&clist, geocds1=&sbg_geos, geocds2=cd108, weight=pop2k, keepwt=1); proc print data=corrlst.&clist(obs=50); title3 "Sample Obs from corrlst.&clist"; by county; run; *--------and now create the corresponding split-tract correlation lists needed to work with the "ct" tables that do not have 090 level summaries but stop at 085 (split census tract)---; proc sort data=blkcodes; by &str_geos sldl02; run; %let clist=mistr2k_sldl02; %corrwt(setin=blkcodes,setout=corrlst.&clist, geocds1=&str_geos, geocds2=sldl02, weight=pop2k, keepwt=1); proc print data=corrlst.&clist(obs=50); title3 "Sample Obs from corrlst.&clist"; by county; run; proc sort data=blkcodesU; by &str_geos sldu02; run; %let clist=mistr2k_sldu02; %corrwt(setin=blkcodesU,setout=corrlst.&clist, geocds1=&str_geos, geocds2=sldu02, weight=pop2k, keepwt=1); proc print data=corrlst.&clist(obs=100); title3 "Sample Obs from corrlst.&clist"; by county; run; %let clist=mistr2k_cd108; *---create corrlst.&clist--- 108th congressional districts; proc sort data=blkcodes out=blkcodesC; by &str_geos cd108; run; %corrwt(setin=blkcodesC,setout=corrlst.&clist, geocds1=&str_geos, geocds2=cd108, weight=pop2k, keepwt=1); proc print data=corrlst.&clist(obs=50); title3 "Sample Obs from corrlst.&clist"; by county; run; *=========================================Part 2=====================================================*; *------------- Create the data sets to be aggregated by merging with the corr. lists to attach the legislative district and the afact (allocation factor) var. Then invoke the agg macro to aggregate each of the 3 sets.---------------------------------; data temp_ph1; merge sf3.miph(where=(sumlev='090') in=insf3) corrlst.misbg2k_sldl02(in=inclist); by &sbg_geos; if inclist; if not insf3 then do; put '**Geography on the corr. list not found on sf3: ' &sbg_geos; delete; end; sldcode=sldl02; SumLev='620'; output; run; data temp_ph2; length sldcode $3; merge sf3.miph(where=(sumlev='090') in=insf3) corrlst.misbg2k_sldu02(in=inclistu); by &sbg_geos; if inclistu; if not insf3 then do; put '**Geography on the corr. list not found on sf3: ' &sbg_geos; delete; end; sldcode=sldu02; SumLev='610'; output; run; proc datasets library=user; modify temp_ph1; index delete ph; index create ph=(Sumlev sldcode county); modify temp_ph2; index delete ph; index create ph=(Sumlev sldcode county); quit; data temp_ph3; *<--new for Michigan processing. cd108 aggregation--; length sldcode $3; merge sf3.miph(where=(sumlev='090') in=insf3) corrlst.misbg2k_cd108(in=inclistu); by &sbg_geos; if inclistu; if not insf3 then do; put '**Geography on the corr. list not found on sf3: ' &sbg_geos; delete; end; sldcode=cd108; SumLev='500'; output; run; proc datasets library=user; modify temp_ph1; index delete ph; index create ph=(Sumlev sldcode county); modify temp_ph2; index delete ph; index create ph=(Sumlev sldcode county); modify temp_ph3; index delete ph; index create ph=(Sumlev sldcode county); quit; *----We replace the pre step (generated by the aggsf3 macro) with our own here. Then we use the steps parm to tell aggsf3 to skip the prestep. Of course this only works if we create something called aggin&settype. ----*; *----------------------------------(1 of 3): Aggregate the ph dataset--------------------------------------------*; data agginph/view=agginph; set temp_ph3 temp_ph2 temp_ph1 ; by SumLev sldcode county; *--this code allows us to skip the pre step generated by the aggsf3 macro--; if H4i1 then H3i2=100*(H2i2/H4i1); *--100% count of occ units is occ units sampled divided by pct occ units sampled-; if H4i2 then H3i3=100*(H2i3/H4i2); *--100% count of vac units is vac units sampled divided by pct vac units sampled-; label H3i2='100% Count of Occupied hus (derived)' H3i3='100% Count of Vacant units (derived)'; keep SumLev sldcode county _numeric_; *<----specify id vars and then keep all the table cells with _numeric_; drop PCT; run; *--the indices should be used to allow this sorted concatenation--; %macro preexit; %*--macro invoked by aggsf3--; %mend preexit; *---We need to define our custom code for the post step using an "exit macro" prior to invoking aggsf3---; *<=============This macro needs to be define and accessed for EACH OF THE 3 AGG steps that follow. So if commenting out earlier steps, make sure NOT TO COMMENT OUT THIS CODE============================================; %macro postexit; %*--invoked during post-aggregation processing--; by sldcode notsorted _lvl_ notsorted; select(sumlev); when('610') do; areaname='State Senate District '||sldcode; sldu02=sldcode; end; when('620') do; areaname='State House District '||sldcode; sldl02=sldcode; end; when('500') do; areaname='Congressional District '||sldcode; cd108=sldcode; end; end; *--select--; if _lvl_=1 then do; *--this is a district within county summary--; substr(sumlev,3,1)='c'; *<---We make up the codes, 61c and 62c for Senate/county and House/county, resp. --; if sumlev='50c' then sumlev='510'; *--cd within county has a recognized sumlev code so we override--; geocode=trim(sldcode)||'-'||county; cnty=substr(county,3,3); if not (first._lvl_ and last._lvl_) then do; partflag='y'; areaname=trim(areaname)||'/'||put(cnty,$mocnty.); end; else partflag='n'; end; else do; *--this is a complete district (_lvl_=2) summary--; geocode=sldcode; county=' '; end; drop _lvl_ _nag_; label sldcode='Legislative District Code' partflag='District Spans Counties?'; format intptlon 11.6 intptlat 10.6; %mend postexit; %aggsf3(settypes=ph, setinlib=user, setin=temp, setoutlib=sf3, setout=mislds, templib=user, aggby=sumlev sldcode county, idlens=%str(SumLev $3 geocode $9 sldcode $3 county $5 cd108 $2 sldl02 sldu02 $3 AreaName $40 cnty $3 partflag $1), agglvl=2, steps=agg post, afact=afact, report=0) run; *----------------------------------(2 of 3): Aggregate the phct dataset--------------------------------------------*; data temp_phct1; length sldcode $3; merge sf3.miphct(where=(sumlev='085') in=insf3) corrlst.mistr2k_sldl02(in=inclist); by &str_geos; if inclist; if not insf3 then do; put '**Geographcty on the corr. list not found on sf3: ' &str_geos; delete; end; sldcode=sldl02; SumLev='620'; output; run; data temp_phct2; length sldcode $3; merge sf3.miphct(where=(sumlev='085') in=insf3) corrlst.mistr2k_sldu02(in=inclistU); by &str_geos; if inclistU; if not insf3 then do; put '**Geographcty on the corr. list not found on sf3: ' &str_geos; delete; end; sldcode=sldu02; SumLev='610'; output; run; data temp_phct3; length sldcode $3; merge sf3.miphct(where=(sumlev='085') in=insf3) corrlst.mistr2k_cd108(in=inclistU); by &str_geos; if inclistU; if not insf3 then do; put '**Geography on the corr. list not found on sf3: ' &str_geos; delete; end; sldcode=cd108; SumLev='500'; output; run; proc datasets library=user; modify temp_phct1; index delete phct; index create phct=(Sumlev sldcode county); modify temp_phct2; index delete phct; index create phct=(Sumlev sldcode county); modify temp_phct3; index delete phct; index create phct=(Sumlev sldcode county); quit; *----We replace the pre step (generated by the aggsf3 macro) with our own here. Then we use the steps parm to tell aggsf3 to skip the prestep. Of course this only works if we create something called aggin&settype. ----*; data agginphct/view=agginphct; set temp_phct3 temp_phct2 temp_phct1 ; by SumLev sldcode county; *--this code allows us to skip the pre step generated by the aggsf3 macro--; keep SumLev sldcode county _numeric_; *<----specify id vars and then keep all the table cells with _numeric_; drop PCT; run; *--the indices should be used to allow this sorted concatenation--; %macro preexit; %*--macro invoked by aggsf3--; %mend preexit; ******************Be sure that the postexit macro defn (above) is NOT COMMENTED OUT if doing a partial run. We could replicate it here, but will not do that until we have debugged it**********************************************; %aggsf3(settypes=phct, setinlib=user, setin=temp, setoutlib=sf3, setout=mislds, templib=user, aggby=sumlev sldcode county, idlens=%str(SumLev $3 geocode $11 sldcode $3 county $5 cd108 $2 sldl02 sldu02 $3 AreaName $40 cnty $3 partflag $1), agglvl=2, steps=agg post, afact=afact, report=0) run; *----------------------------------(3 of 3): Aggregate the phctr dataset--------------------------------------------*; data temp_phctr1; length sldcode $3; merge sf3.miphctr(where=(sumlev='085') in=insf3) corrlst.mistr2k_sldl02(in=inclist); by &str_geos; if inclist; if not insf3 then do; put '**Geography on the corr. list not found on sf3: ' &str_geos; delete; end; sldcode=sldl02; SumLev='620'; output; run; data temp_phctr2; length sldcode $3; merge sf3.miphctr(where=(sumlev='085') in=insf3) corrlst.mistr2k_sldu02(in=inclistu); by &str_geos; if inclistu; if not insf3 then do; put '**Geography on the corr. list not found on sf3: ' &str_geos; delete; end; sldcode=sldu02; SumLev='610'; output; run; data temp_phctr3; length sldcode $3; merge sf3.miphctr(where=(sumlev='085') in=insf3) corrlst.mistr2k_cd108(in=inclistu); by &str_geos; if inclistu; if not insf3 then do; put '**Geography on the corr. list not found on sf3: ' &str_geos; delete; end; sldcode=cd108; SumLev='500'; output; run; proc datasets library=user; modify temp_phctr1; index delete phctr; index create phctr=(Sumlev sldcode county); modify temp_phctr2; index delete phctr; index create phctr=(Sumlev sldcode county); modify temp_phctr3; index delete phctr; index create phctr=(Sumlev sldcode county); quit; *----We replace the pre step (generated by the aggsf3 macro) with our own here. Then we use the steps parm to tell aggsf3 to skip the prestep. Of course this only works if we create something called aggin&settype. ----*; data agginphctr/view=agginphctr; set temp_phctr3 temp_phctr2 temp_phctr1 ; by SumLev sldcode county; *--this code allows us to skip the pre step generated by the aggsf3 macro--; label H3i2='100% Count of Occupied hus (derived)' H3i3='100% Count of Vacant units (derived)'; keep SumLev sldcode county _numeric_; *<----specify id vars and then keep all the table cells with _numeric_; drop PCT; run; *--the indices should be used to allow this sorted concatenation--; %macro preexit; %*--macro invoked by aggsf3--; %mend preexit; ******************Be sure that the postexit macro defn (above) is NOT COMMENTED OUT if doing a partial run. We could *---We need to define our custom code for the post step using an "exit macro" prior to invoking aggsf3---; *--this is the same postexit routine run for the other aggregations--; %aggsf3(settypes=phctr, setinlib=user, setin=temp, setoutlib=sf3, setout=mislds, templib=user, aggby=sumlev sldcode county, idlens=%str(SumLev $3 geocode $11 sldcode $3 county $5 cd108 $2 sldl02 sldu02 $3 AreaName $40 cnty $3 partflag $1), agglvl=2, steps=agg post, afact=afact, report=0) run; proc sql; create view sf3.misenateph as select * from misldsph where SumLev='610'; create view sf3.misenatephct as select * from misldsphct where SumLev='610'; create view sf3.misenatephctr as select * from misldsphctr where SumLev='610'; create view sf3.mihouseph as select * from misldsph where SumLev='620'; create view sf3.mihousephct as select * from misldsphct where SumLev='620'; create view sf3.mihousephctr as select * from misldsphctr where SumLev='620'; quit; create view sf3.micd108ph as select * from misldsph where SumLev='500'; create view sf3.micd108phct as select * from misldsphct where SumLev='500'; create view sf3.micd108phctr as select * from misldsphctr where SumLev='500'; quit; proc freq data=sf3.misldsph(keep=SumLev Geocode); table SumLev; title2 'Frequency Report on the sf3.misldsph Dataset'; run; proc print data=sf3.misldsph(keep=_char_ p1i1); title2 'First 100 Obs, ID Variables Only, from sf3.misldsph Dataset'; run; proc print data=sf3.misenateph(keep=_char_ p1i1); title2 'sf3.misenateph - ID Variables only'; run; proc print data=sf3.mihousephct(keep=_char_ pct1i1 hct1i1); title2 'sf3.mihousephct - ID Variables only'; run; %include sascode(notify);