x cd /pub/data/sf32000/Tools; %let pgm=usgeos; filename pgm "&pgm..sas"; /*--Create sf3.usgeos by reading the geography-only file for the US (NO LONGER concatenating other XXgeos datasets.) jgb, march, 2009. (after the recovery). Edited (to drop more variables) 4-20-09 . Code added to assign State for ZCTAs at 860 sumlev. 5-5-09. Modified 9/09 to add data for the PUMA level, taken from our custom sf3.uspumasph data set. */ libname sf32000 "/pub/data/sf32000"; *--we read from the rawdata directory but write to sf32000 so we cannot use &inpath to define this libref now.----; libname sf3 (sf32000); *---alias name--; title "Create sf3.usgoes"; /* =======================Begin comment================ Decided NOT TO DO IT TTHIS WAY. See below. data sf32000.usgeos (compress=yes label="2000 Summary File 3 (SF3) Geo Headers data for US" keep=geocode--AreaSQMI ); set sf3.usstcntygeos sf3.usregdivgeos sf3.usuasgeos sf3.usplacesgeos sf3.uszipsgeos sf3.uspumasph(keep=sumlev geocode areaname state in=inpuma) open=defer; if geocomp gt '00' then delete; run; proc sort; by sumlev logrecno; run; ======================End comment================== */ filename geos pipe "unzip -a -p /pub/data/sf32000/rawdata/usgeo_uf3"; data sf32000.usgeos (compress=yes sortedby=LogRecNo label="2000 Summary File 3 (SF3) Geo Headers data for US (all levels)") sf32000.usgeocompgeos(compress=yes sortedby=LogRecNo label="2000 Summary File 3 (SF3) Geo Headers data for geographic components"); infile geos missover lrecl=1024 end=last; retain _first 1; drop _first; length GeoCode $44 SumLev $3 GeoComp $2 AreaName $90; *--establish variable order -- these go first--; length geoid $34; retain State " " Stab " "; length County $5 ; *--and then these... -; format county $county.; if _first then do; input FileId $char6. stusab $2. @; retain FileId stusab; drop fileid stusab; if substr(FileId,2,3) ne 'SF3' then do; file log; put '******Problem with input geographic headers file. Did not find "SF3" in cols. 2-4 ' FileId= / '***Conversion will not run***'; list; stop; end; end; input @9 SumLev $3. geocomp $2. @19 LogRecNo 7. @26 Region $1. Division $1. StateCe $2. State $2. Cnty $3. CntySC $2. @37 CouSubFP $5. CouSubCC $2. CouSubSC $2. PlaceFP $5. PlaceCC $2. PlaceDC $1. PlaceSC $2. @56 _TractIn $6. BG $1. Block $4. @69 ConCit $5. @78 aianhh $char4. aianhhfp $char5. aianhhcc $char2. aihhtli $1. aitsce $char3. aits $char5. aitscc $char2. anrc $char5. anrccc $char2. @107 MSACMSA $4. MASC $2. CMSA2 $2. MACCI $1. PMSA $4. NECMA $4. @128 UA $5. UASC $2. UAType $1. @136 UrbanRur $1. cd106 $2. cd108 $2. +4 (sldu sldl)($char3.) vtd $char6. vtdi $1. @158 ZCTA3 $3. ZCTA5 $5. @168 SubMCD $5. @173 (AreaLand AreaWatr)(14.) AreaName $90. FuncStat $1. gcuni $1. @293 Pop100 9. HU100 9. IntPtLat 9.6 IntPtLon 10.6 LSADC $2. @332 PartFlag $1. (SDElm SDSec SDUni)($5.) TAZ $6. UGA $5. PUMA5 $5. PUMA1 $5. @384 MACC $char5. UACP $char5. ; if cnty ne ' ' then County=State||cnty; if _tractin ne ' ' then tract=substr(_tractin,1,4)||'.'||substr(_tractin,5,2); %include '/pub/data/sf32000/Tools/assign_geocode.sas'; *---Select statement to assign value to geocode variable-; if zcta5 ne ' ' and state=' ' then do; stab=put(zcta5,$zipstab.); if stab=' ' then do; *---Mostly PR ZIPs--; state=zipfips(zcta5); stab=put(state,$fipstab2.); end; else state=stfips(stab); end; *---Create land and total area values in square miles, as we do with sf32000 full table data sets--; LandSQMI=AreaLand/2589988; AreaSQMI=LandSQMI + (AreaWatr/2589988); *<--where 2589988 is # sq km in a sq mile--; attrib LandSQMI format=9.2 label='Land Area in Sq Miles' AreaSQMI format=9.2 label='Total Area incl Water in Sq Miles'; if geocode ne ' ' then geoid=sumlev||geocomp||'US'||compress(geocode,'-'); _lgeocode=length(geocode); retain _maxl; _maxl=max(_maxl,_lgeocode); if _n_=487093 then put _all_; drop cd108 sldu sldl vtd puma1 puma5 sdelm sdsec sduni taz submcd ; *---these are never defined on this file---; if geocomp='00' then output sf32000.usgeos; else output sf32000.usgeocompgeos; if last then do; *----Read the uspumasph data set and add these new rows---; array cfields(*) geocode-character-tract; do _i=1 to dim( cfields); cfields{_i}=' '; end; do until(nomorepumas); set sf3.uspumasph(keep=geocode SumLev Areaname State intptlat intptlon puma5 landsqmi areasqmi pop100 HU100) end=nomorepumas; geoid='79500US'||geocode; geocomp='00'; stab=put(state,$fipstab2.); *---Assign new LogRecNo values in the 500,000 series. Note that we specify sortedby=logrecno on these datasets-; retain _logrecnopuma 500000; *--We already have the area variables in square miles so we reverse the process and calculate the sq km variables--; AreaLand=LandSqmi* 2589988; AreaWatr=2589988*(Areasqmi-LandSqmi); _logrecnopuma+1; logrecno=_logrecnopuma; output sf32000.usgeos; end; end; drop _: ; run; proc print data=sf3.usgeos(obs=150); *by sumlev; id logrecno; run; proc freq data=sf32000.usgeos(keep=state sumlev); table state sumlev; run; proc datasets library=sf32000 nolist; modify usgeos; index create slgeo=(sumlev geocode); index create geoid; index create logrecno; quit; %include sascode(notify); *<==========Missouri only, delete everywhere else!!!=====;