/***************************************************************************** < Macro CALQNTIL calculates a quantile (such as the 95th percentile, < the first quartile, or by default the median) from grouped data < (a frequency distribution) by using linear interpolation to estimate < the exact location of the quantile within the target class of the < frequency distribution. Modification by Roy Williams at MISER (Mass. < State Data Ctr.) of code originally produced by the California SDC. > Macro CalcMedian calculates a single median from a frequency distribution (a > one-dimensional SAS ARRAY of "grouped data") by using linear interpolation to > estimate the exact location of the median within the target class of the > frequency distribution. Modification by Roy Williams at MISER (Mass. State > Data Ctr.) of code originally produced by the California SDC. *****************************************************************************/ < %MACRO CALQNTIL(QNTILE, N, LOWBOUND, FREQNCY, QDESIRED=0.50); > %MACRO CalcMedian (Median, N, LowBound, Frequency, TotFreq) ; /***************************************************************************** < Parameters: < (first four parameters must be provided and in the given order) < QNTILE (output) Estimated location of desired quantile (the name of the < variable the result should be returned to) < N (input) Number of classes in the frequency distribution < LOWBOUND (input) Array of size N, which contains the lower bounds of < the N classes < FREQNCY (input) Array of size N which contains the frequencies < QDESIRED (input) Quantile desired; e.g., 0.25, 0.5 (median), 0.75, etc. < The median is the default. See sample calls below. > Parameters must be provided in the given order: > Median (output) Estimated location of median (the name of the > variable the result should be returned to) > N (input) Number of classes in the frequency distribution > LowBound (input) Array of size N, which contains the lower bounds of > the N classes > Frequency (input) Array of size N which contains the frequencies > TotFreq (input) The total of the frequency distribution < Local variables (just SAS vars with unlikely names, not %LOCAL Macro vars): < _TOTFRQ_ is the total of the frequency distribution < _RUNSUM_ is the running sum while searching for the target class < _TRGFRQ_ is the frequency below the exact quantile desired < _SHARE_ is the estimated proportion of the target class above _TRGFRQ_ < _WIDTH_ is the width of the target class > Local variables (just SAS vars with unlikely names, not %LOCAL Macro vars): > _RunningSum_ is the running sum while searching for the target class > _TargetFreq_ is the frequency below the median (1/2 of the total frequency) > _Share_ is the estimated proportion of the target class above _TargetFreq_ > _Width_ is the width of the target class *****************************************************************************/ < _TOTFRQ_ = SUM (OF &FREQNCY[*]) ; < if _TOTFRQ_ = 0 then &QNTILE = . ; /* empty frequency distribution */ < else do ; < _TRGFRQ_ = _TOTFRQ_ * &QDESIRED ; > IF &TotFreq = 0 > THEN &Median = . ; /* empty frequency distribution */ > ELSE DO ; > _TargetFreq_ = &TotFreq * 0.5 ; /* Compute cumulative distribution until target exceeded */ < _RUNSUM_ = 0 ; < do i = 1 to &N while (_RUNSUM_ < _TRGFRQ_) ; < _RUNSUM_ = _RUNSUM_ + &FREQNCY[i] ; < end ; < i = i - 1 ; /* since loop goes one past the target class */ > _RunningSum_ = 0 ; > DO i = 1 TO &N while (_RunningSum_ < _TargetFreq_) ; > _RunningSum_ = _RunningSum_ + &Frequency[i] ; > END ; > i = i - 1 ; /* since loop goes one past the target class */ < /* linearity assumption rarely reasonable in top and bottom classes */ < if i = &N or i = 1 then &QNTILE = . ; < else do ; < _SHARE_ = (_RUNSUM_ - _TRGFRQ_) / &FREQNCY[i] ; < _WIDTH_ = &LOWBOUND[i+1] - &LOWBOUND[i] ; < &QNTILE = &LOWBOUND[i+1] - _WIDTH_ * _SHARE_ ; < end ; < end ; < %MEND CALQNTIL ; < > /* linearity assumption rarely reasonable in top and bottom classes */ > IF i = &N OR i = 1 > THEN &Median = . ; > ELSE DO ; > _Share_ = (_RunningSum_ - _TargetFreq_) / &Frequency[i] ; > _Width_ = &LowBound[i+1] - &LowBound[i] ; > &Median = &LowBound[i+1] - _Width_ * _Share_ ; > END ; > END ; > %MEND CalcMedian ; > /***************************************************************************** < Sample calls: (Macro definition must be placed BEFORE Macro call). > Sample call (Median Age of Latinos, both sexes, male & female, > used in Census2000 SF1 profiles Age-2, AgeRace-H, and Race-H): > (Macro definition must be placed BEFORE Macro call). < * Median Ages of Hispanic Persons from Table P15 in STF3 ; < array agecat [31] _temporary_ (0,1,3,5,6,7,10,12,14,15,16,17,18,19, < 20,21,22,25,30,35,40,45,50,55,60,62,65,70,75,80,85) ; < array agehispm [31] P015_01 - P015_31 ; < array agehispf [31] P015_32 - P015_62 ; < %CALQNTIL(medhm, 31, agecat, agehispm) < %CALQNTIL(medhf, 31, agecat, agehispf) > ARRAY PCT12LowBounds [103] _TEMPORARY_ (0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, > 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, > 41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65, > 66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90, > 91,92,93,94,95,96,97,98,99,100,105,110) ; > ARRAY P12LowBounds [23] _TEMPORARY_ (0, 5,10, 15,18, 20,21,22, 25,30,35,40, > 45,50,55, 60,62, 65,67, 70,75,80,85) ; > ARRAY P12Hmale [23] P12Hi3 -P12Hi25 ; > ARRAY P12Hfeml [23] P12Hi27-P12Hi49 ; > ARRAY P12HBoth [23] ; * if not already defined ; > DO age = 1 TO 23 ; > P12HBoth[age] = P12Hmale[age] + P12Hfeml[age] ; > END ; < * Median Incomes (needed when generating STF3 data for custom areas) ; < array inccat [25] _temporary_ (0,5000,10000,12500,15000,17500,20000, < 22500,25000,27500,30000,32500,35000,37500,40000,42500,45000, < 47500,50000,55000,60000,75000,100000,125000,150000) ; < array hhinc [25] P080_01 - P080_25 ; < array fminc [25] P107_01 - P107_25 ; < array uiinc [25] P110_01 - P110_25 ; < IF CUSTOM THEN DO ; * if 'custom' is true (non-0) for this observation ; < %CALQNTIL(P080A, 25, inccat, hhinc) < %CALQNTIL(P107A, 25, inccat, fminc) < %CALQNTIL(P110A, 25, inccat, uiinc) < END ; > IF Custom THEN DO ; * if 'Custom' is true (non-0) for this observation ; > %CalcMedian (P13Hi1, 23, P12LowBounds, P12HBoth, P12Hi1) > %CalcMedian (P13Hi2, 23, P12LowBounds, P12Hmale, P12Hi2) > %CalcMedian (P13Hi3, 23, P12LowBounds, P12Hfeml, P12Hi26) > END ; *****************************************************************************/