/***
File: Conf_Int_Exp.sas
Purpose: Replicate Confidence Intervals in Exposure Report
for 2013-2014 Total Blood Mercury 95th percentile
estimates for 6-11, 12-19, and 20+ age groups,
males and females, and various racial groups;
Date: 22 APR 05
Date Revised: 26 APR 05
05 MAY 05
27 OCT 08
23 JUN 11
02 FEB 17
31 JAN 25
Note: This dataset is compliant with Executive Order 14168, titled "Defending Women from Gender Ideology Extremism and Restoring Biological Truth to the Federal Government".
Input Datasets: PbCd_h and demo_h
Programmer: Lisa Mirel / Sam Caudill / Wellington Onyenwe
****/
options nomprint nosymbolgen nomlogic nonotes nosource nosource2;
***bring in datasets;
LIBNAME l06 XPORT 'PATH_TO_PbCd_h.xpt';
Libname demo xport 'PATH_TO_demo_h.xpt';
***Note: default method for CI of percentile in SUDAAN uses the Logit;
data lab6;
set l06.PbCd_h;
run;
data demo;
set demo.demo_h;
run;
proc sort data=lab6;
by seqn;
proc sort data=demo;
by seqn;
data l6dem;
merge lab6 (in=a) demo (in=b);
by seqn;
if a=1;
*Define Age Groups;
if ridageyr ge 6 and ridageyr le 11 then age_grp = 1;
if ridageyr ge 12 and ridageyr le 19 then age_grp = 2;
if ridageyr ge 20 then age_grp = 3;
if ridageyr ge 6 then age_group = 1;
if riagendr eq 1 then sex = 1;
if riagendr eq 2 then sex = 2;
if ridreth3 eq 1 then race = 1; *'MA';
*if ridreth3 eq 2 then race = 2; *'OH'; *An estimate for this category by itself is not calculated. Instead OH is combined with MA to create AH;
if ridreth3 eq 3 then race = 2; *'NHW';
if ridreth3 eq 4 then race = 3; *'NHB';
if ridreth3 eq 6 then race = 4; *'NHA';
if ridreth3 eq 7 then race = 5; *Non-Hispanic Multi-racial;
racial = 2;
if ridreth3 eq 1 or ridreth3 eq 2 then racial = 1; *'AH';
run;
*
Macro for calculating totals -- following steps in
Appendix A Fourth Exposure Report:
Confidence Interval Estimation for Percentiles
*;
%macro pcntci (var1, var2, var3, var4, var5, var6, var7);
data L6dem2;
set L6dem;
mvar=1;
anal_var_orig = &var6;
wt_orig = &var7;
wt_orig_rnd = round(wt_orig,1);
run;
***Step 1a;
proc univariate data = l6dem2;
where &var1=&var2 and sddsrvyr=&var3;
var anal_var_orig;
output out=xpercenta pctlpre=P_ pctlpts=&var4 n=wtn;
freq wt_orig_rnd;
run;
data xpercent1a;
set xpercenta;
mvar=1;
RUN;
proc sort data=l6dem2;
by mvar;
proc sort data=xpercent1a;
by mvar;
data xchperc1a;
merge l6dem2 (in=a) xpercent1a (in=b);
by mvar;
run;
***Step 1b;
proc sort data=l6dem2;
by anal_var_orig;
proc means noprint data=l6dem2;
by anal_var_orig;
where &var1=&var2 and sddsrvyr=&var3;
var wt_orig;
output out=xwt_mean mean=wt_mean; *wt_mean is now the mean weight of all
subjects in the same domain/subsample
with the same measured result;
run;
data xl6dem2;
merge xwt_mean l6dem2;
by anal_var_orig;
wt_mean_rnd = round(wt_mean,1);
run;
data xxl6dem2;
set xl6dem2;
by anal_var_orig;
if first.anal_var_orig then do;
num = 1;
end;
else do;
num + 1;
end;
anal_var_incr = anal_var_orig + num/1000000000;
run;
proc univariate data = xxl6dem2;
where &var1=&var2 and sddsrvyr=&var3;
var anal_var_incr;
output out=xpercentb pctlpre=P_ pctlpts=&var4 n=wtn;
freq wt_mean_rnd;
run;
data xpercent1b;
set xpercentb;
mvar=1;
RUN;
proc sort data=xxl6dem2;
by mvar;
proc sort data=xpercent1b;
by mvar;
data xchperc1b;
merge xxl6dem2 (in=a) xpercent1b (in=b);
by mvar;
run;
***Step 2a;
data xchperc2a;
set xchperc1a;
if &var1=&var2 and sddsrvyr=&var3 and 0<=anal_var_orig=P_&var4 then ind2=0;
run;
proc sort data=xchperc2a;
by sdmvstra sdmvpsu;
proc descript data=xchperc2a design=wr atlevel1=1 atlevel2=2 ;
SUBPOPN &var1=&var2 and sddsrvyr=&var3;
nest sdmvstra sdmvpsu /missunit;
weight wt_orig; *BE SURE TO USE THE PROPER WEIGHT;
class ind2;
var ind2;
print nsum="samsize" mean semean geomean segeomean
deffmean="deff #4"/style=nchs nsumfmt=f8.0 meanfmt=f9.4 semeanfmt=f9.4
geomeanfmt=f5.3 segeomeanfmt=f5.3;
output atlev1 atlev2 nsum mean semean geomean segeomean deffmean/filename=xest1 replace
nsumfmt=f8.0 meanfmt=f9.4 semeanfmt=f9.4
geomeanfmt=f5.3 segeomeanfmt=f5.3;
run;
DATA xpest2a;
SET xest1;
if _N_=1;
mvar = 1;
semean_orig = semean;
deffmean = max(1,deffmean);
deffmean_orig = deffmean;
drop nsum mean semean geomean segeomean deffmean atlev2 atlev1;
run;
proc sort data=xpest2a;
by mvar;
proc sort data=xPERCENT1a;
by mvar;
run;
***Step 2b;
data xchperc2b;
set xchperc1b;
if &var1=&var2 and sddsrvyr=&var3 and 0<=anal_var_incr=P_&var4 then ind2=0;
run;
proc sort data=xchperc2b;
by sdmvstra sdmvpsu;
proc descript data=xchperc2b design=wr atlevel1=1 atlevel2=2 ;
SUBPOPN &var1=&var2 and sddsrvyr=&var3;
nest sdmvstra sdmvpsu /missunit;
weight wt_mean; *BE SURE TO USE THE PROPER WEIGHT;
class ind2;
var ind2;
print nsum="samsize" mean semean geomean segeomean
deffmean="deff #4"/style=nchs nsumfmt=f8.0 meanfmt=f9.4 semeanfmt=f9.4
geomeanfmt=f5.3 segeomeanfmt=f5.3;
output atlev1 atlev2 nsum mean semean geomean segeomean deffmean/filename=xest2 replace
nsumfmt=f8.0 meanfmt=f9.4 semeanfmt=f9.4
geomeanfmt=f5.3 segeomeanfmt=f5.3;
run;
DATA xpest2b;
SET xest2;
if _N_=1;
mvar = 1;
ddf=atlev2-atlev1;
run;
proc sort data=xpest2b;
by mvar;
proc sort data=xPERCENT1b;
by mvar;
run;
***Step 3;
*;
*The forumlas of Korn et al are used to estimate the proportion of
subjects below the selected percentile-- from Sam Caudill code
*;
data xtest;
merge xpest2a xpest2b xPERCENT1b;
by mvar;
N_ACT =NSUM; *ACTUAL SAMPLE SIZE;
PT = MEAN; *SUDAAN WEIGHTED MEAN PROPORTION;
CALL SYMPUT('N_ACT',NSUM);
T_NUM = TINV(0.975,NSUM-1);
T_DEN = TINV(0.975,ddf);
N1 = ((T_NUM/T_DEN)**2)*N_ACT/DEFFMEAN_orig; *EFFECTIVE SAMPLE SIZE - SAM METHOD;
N=((T_NUM/T_DEN)**2)*MEAN*(1 - MEAN)/(SEMEAN_orig**2); *EFFECTIVE SAMPLE SIZE DUE TO;
*COMPLEX STRATIFIED SAMPLING - KORN METHOD;
IF N eq . then N = N1;
IF N GT NSUM THEN N = NSUM;
IF MEAN EQ 0.0 THEN N = NSUM;
NA=MEAN*N; *EFFECTIVE NUMBER OF SUBJECTS;
OUTPUT;
RUN;
DATA xCYTO;
SET xtest;
V1 = 2*NA;
V2 = 2*(N - NA + 1);
V3 = 2*(NA + 1);
V4 = 2*(N - NA);
PL = V1*FINV(0.025,V1,V2)/(V2 + V1*FINV(0.025,V1,V2));
PU = V3*FINV(0.975,V3,V4)/(V4 + V3*FINV(0.975,V3,V4));
PT = PT*100;
N_EFF = N;
RUN;
DATA xxCYTO;
SET xCYTO;
L95 = PL*100;
U95 = PU*100;
IF L95 EQ . THEN L95 = 0.0;
IF U95 EQ . THEN U95 = 100.0;
%IF &var4 EQ 10 %THEN %DO; PT = 10.0 ; %END;
%IF &var4 EQ 25 %THEN %DO; PT = 25.0 ; %END;
%IF &var4 EQ 50 %THEN %DO; PT = 50.0 ; %END;
%IF &var4 EQ 75 %THEN %DO; PT = 75.0 ; %END;
%IF &var4 EQ 90 %THEN %DO; PT = 90.0 ; %END;
%IF &var4 EQ 95 %THEN %DO; PT = 95.0 ; %END;
if L95 gt PT then L95 = PT;
if U95 lt PT then U95 = PT;
title3 "PERCENTILE (WITH CIs)";
RUN;
***Step 4;
data xXtest;
set xxcyto;
CALL SYMPUT('L95',LEFT(PUT(L95,8.1)));
CALL SYMPUT('U95',LEFT(PUT(U95,8.1)));
CALL SYMPUT('MEAN',LEFT(PUT(PT,8.1)));
mvar=1;
run;
proc sort data=XXtest;
by mvar;
proc sort data=xxl6dem2;
by mvar;
data xcomp;
merge XXtest (in=a) xxl6dem2 (in=b);
by mvar;
if b=1;
run;
proc univariate data = xcomp;
var anal_var_incr;
where &var1=&var2 and sddsrvyr=&var3;
output out=xxEST pctlpre=P_ pctlpts=&L95 &U95 &MEAN
PCTLPRE = A
PCTLNAME = L95 U95 MEAN ;
freq wt_mean_rnd;
run;
proc print data=xxEST;
run;
*creates multiple final dataset to be put in a final large dataset with all
datapoints;
data fin&var5;
set xxest;
if "&var1" eq 'age_grp' and &var2 eq 1 then Population_Group = "Age: 6-11";
if "&var1" eq 'age_grp' and &var2 eq 2 then Population_Group = "Age: 12-19";
if "&var1" eq 'age_grp' and &var2 eq 3 then Population_Group = "Age: 20+ ";
if "&var1" eq 'age_group' and &var2 eq 1 then Population_Group = "Age: ALL";
if "&var1" eq 'sex' and &var2 eq 1 then Population_Group = "MALE ";
if "&var1" eq 'sex' and &var2 eq 2 then Population_Group = "FEMALE";
if "&var1" eq 'race' and &var2 eq 1 then Population_Group = "MA ";
if "&var1" eq 'race' and &var2 eq 2 then Population_Group = "NHW ";
if "&var1" eq 'race' and &var2 eq 3 then Population_Group = "NHB ";
if "&var1" eq 'race' and &var2 eq 4 then Population_Group = "NHA ";
if "&var1" eq 'racial' and &var2 eq 1 then Population_Group = "AH ";
yr=&var3;
per=&var4;
anly="&var6";
P_MEAN = round(P_MEAN,0.01);
P_L95 = round(P_L95,0.01);
P_U95 = round(P_U95,0.01);
perci=compress (P_MEAN||"("||P_L95||"-"||P_U95||")");
N_ACT = &N_ACT;
keep anly Population_Group yr per perci N_ACT;
run;
proc print data=fin&var5 noobs;
var Population_Group yr per perci N_ACT;
run;
%mend;
** Variable definitions used in Macro
VAR1 = SubPopulation Variable Name
VAR2 = SubPopulation Value to Select
VAR3 = SURVEY YEAR
VAR4 = PERCENTILE
VAR5 = INDICATES DATASET NUMBER for Concatination of All Data Sets
VAR6 = ANALYTE OF INTEREST
VAR7 = WEIGHT VARIABLE;
***NOTE: CAN ADD MORE MACRO VARIABLES IN CODE TO SELECT ON SEX AND RACE/ETHNICTY;
%pcntci (age_grp, 1, 8, 95, 1, lbxthg, wtmec2yr);
%pcntci (age_grp, 2, 8, 95, 2, lbxthg, wtmec2yr);
%pcntci (age_grp, 3, 8, 95, 3, lbxthg, wtmec2yr);
%pcntci (age_group, 1, 8, 95, 4, lbxthg, wtmec2yr);
%pcntci (sex, 1, 8, 95, 5, lbxthg, wtmec2yr);
%pcntci (sex, 2, 8, 95, 6, lbxthg, wtmec2yr);
%pcntci (race, 1, 8, 95, 7, lbxthg, wtmec2yr);
%pcntci (race, 2, 8, 95, 8, lbxthg, wtmec2yr);
%pcntci (race, 3, 8, 95, 9, lbxthg, wtmec2yr);
%pcntci (race, 4, 8, 95, 10, lbxthg, wtmec2yr);
%pcntci (racial, 1, 8, 95, 11, lbxthg, wtmec2yr);
***CREATE ONE LARGE DATASET USING ALL DATASETS CREATED IN THE MACRO;
options nocenter;
data allperc;
set fin1-fin11;
run;
proc print data=allperc;
run;
Quit;