################################################################################################# # Provides Code for Creating Different Confounding Situations and to Generate Data # # Created by: Andrea Cook, Robert Wellman, and Tracey Marsh for FDA Sentinel PRISM Project # # Date: November 30, 2010 # ################################################################################################# ################################################################################################# #fun.scena: One Binary Confounders (sex: Z1~binary(0.5) # # and Multiple Sites determined by length of Site_Dist # # N: Number of Observations # # Site_Dist: vector of the proportion of observations per site # #output: list with Z (matrix of one confounder) and S (matrix of number of sites-1 # # indicator variables # ################################################################################################# fun.scena<-function(N,Site_Dist=c(1/3,1/3,1/3)) { #confounders z1<-rbinom(N,1,0.5) Z<-cbind(z1) #site distribution tmp<-site.gen(N,site.p=Site_Dist) S<-tmp$S return(list(Z=Z, S=S)) } ################################################################################################# #fun.scenb: One Continous Confounder (10yr Age: z1~(unif(35,65)-50)/10)) # # and Multiple Sites determined by length of Site_Dist # # N: Number of Observations # # Site_Dist: vector of the proportion of observations per site # #output: list with Z (matrix of one confounder) and S (matrix of number of sites-1 # # indicator variables # ################################################################################################# fun.scenb<-function(N,Site_Dist=c(1/3,1/3,1/3)) { #confounders Z1_0<-runif(N,35,65) z1<-(Z1_0-50)/10 Z<-z1 #site distribution tmp<-site.gen(N,site.p=Site_Dist) S<-tmp$S return(list(Z=Z, S=S)) } ################################################################################################# #fun.scenc: Two Confounders (sex: Z1~binary(0.5); 10yr Age: Z2~(unif(35,65)-50)/10) # # and Multiple Sites determined by length of Site_Dist # # N: Number of Observations # # Site_Dist: vector of the proportion of observations per site # #output: list with Z (matrix of two confounders) and S (matrix of number of sites-1 # # indicator variables # ################################################################################################# fun.scenc<-function(N,Site_Dist=c(1/3,1/3,1/3)) { z1<-rbinom(N,1,0.5) Z2_0<-runif(N,35,65) z2<-(Z2_0-50)/10 Z<-cbind(z1,z2) #site distribution tmp<-site.gen(N,site.p=Site_Dist) S<-tmp$S return(list(Z=Z, S=S)) } ################################################################################################# #fun.scend: Three Confounders (sex: z1~binary(0.5); # # 10yr Age: z2~(unif(35,65)-50)/10; Obesity: z3~multinomial(0.45, 0.35, 0.20) # # and Multiple Sites determined by length of Site_Dist # # N: Number of Observations # # Site_Dist: vector of the proportion of observations per site # #output: list with Z (matrix of two confounders) and S (matrix of number of sites-1 # # indicator variables # ################################################################################################# fun.scend<-function(N,Site_Dist=c(1/3,1/3,1/3)) { z1<-rbinom(N,1,0.5) Z2_0<-runif(N,35,65) z2<-(Z2_0-50)/10 z3<-model.matrix(~factor(rep(c(0,1,2), times = rmultinom(1,N,c(0.45,0.35,0.20)))))[,2:3] z3.vars<-paste(rep('z3_',dim(z3)[[2]]),as.character(1:dim(z3)[[2]]),sep="") colnames(z3)<-z3.vars Z<-cbind(z1,z2,z3) #site distribution tmp<-site.gen(N,site.p=Site_Dist) S<-tmp$S return(list(Z=Z, S=S)) } ###Create site indicators site.gen<-function(N,site.p=c(1/3,1/3,1/3)) { s0<-sample(0:(length(site.p)-1),size=N,replace=T,prob=site.p) # Creates indicator matrix of sites # s.mat<-model.matrix(~factor(s0))[,-1] colnames(s.mat)<-paste(rep('s',dim(s.mat)[[2]]),as.character(1:dim(s.mat)[[2]]),sep="") S<-cbind(s.mat) return(list(S=S)) } ################################################################################################# ## simrun : Function to run a simulation evaluation of a single scenario # #input: Nobs: number of total observations # # LTime: a vector of times of each look (length 1 if no sequential monitoring) # # confound.fun: name of the function specifying confounder distribution (examples: # # fun.scena, fun.scenb, fun.scenc, fun.scend # # Beta_y: a vector of the log(OR) denoting the strength of confounding between # # z (confounders) and s (sites as indicator variables) on outcome # # Beta_x: a vector of the log(OR) denoting the strength of confounding between # # z (confounders) and s (sites as indicator variables) on x # # Site_Dist: a vector of the proportion of observations per site (e.g. c(1/3,1/3,1/3))# # py0: marginal probability of outcome, y, if x=0 P(Y|X=0) # # py1: marginal probability of outcome, y, if x=1 P(Y|X=1) # # px: marginal probability of x across all sites # # Nsim: number of simulations to use in the permutation test (larger is better) # # sumoutput: If True then provides summary output such as power, # # mean time to signal (sigtime), and mean time to study end (studyendtime). # # If false then provides all simulations results for each row if they # # signaled or not (signal) and minimum of end of study or time of signal # # (endtime) # #output: sumoutput=T a list that returns py0, py1, RD (risk difference), px, Beta_y, Beta_x, # # Site_Dist, outputIPW (power, sigtime, studyendtime) for unstratified IPW, # # outputIPW_s (power, sigtime, studyendtime) for stratified IPW estimator. # # sumoutput=F a list that returns py0, py1, RD (risk difference), px, Beta_y, Beta_x, # # Site_Dist, outputIPWall (signal, endtime) for unstratified IPW, # # outputIPW_sall (signal,endtime) for stratified IPW estimator. # ################################################################################################# simrun<-function(Nobs,LTime,confound.fun=fun.scenc,Beta_y,Beta_x,Site_Dist,py0,py1,px,Nsim=1500,sumoutput=T) { Beta_y<-as.matrix(Beta_y) Beta_x<-as.matrix(Beta_x) params<-calc_params(Nsim=1000, Nset=Nobs,confound.gen=confound.fun,Beta_y=Beta_y,Beta_x=Beta_x,Site_Dist=Site_Dist, tgt_p0=py0,tgt_p1=py1,tgt_px=px) outputIPWtemp<-NULL outputIPWtemp_s<-NULL for(i in 1:Nsim) { #Start date is uniform across study start<-floor(runif(Nobs,1,max(LTime)+1)) #generate counfounders and sort according to start day confounders<-confound.fun(N=Nobs, Site_Dist=Site_Dist) z<-confounders$Z[order(start),] S<-confounders$S[order(start),] zs<-cbind(z,S) start<-start[order(start)] #generate individual exposures and outcomes for desired scenario# #DOI flag x<-rbinom(Nobs,1,1/(1+exp(- params$x0 - zs%*%Beta_x))) #Outcome y<-rbinom(Nobs,1,1/(1+exp(- params$y0 - params$y1*x - zs%*%Beta_y))) s<-as.numeric(apply(S,1,sum)==0) for(i in 1:ncol(S)) { s[S[,i]==1]<-i+1 } res<-IPWseq(y,x,z,s,start,LTime,alpha=0.05,delta=0.5,nperm=1500) outputIPWtemp<-rbind(outputIPWtemp,c(signal=res$signal,endtime=res$endtime)) outputIPWtemp_s<-rbind(outputIPWtemp_s,c(signal=res$signal_s,endtime=res$endtime_s)) } #Summary if(sumoutput) { outputIPW<-cbind(power=mean(outputIPWtemp[,1]),sigtime=mean(outputIPWtemp[outputIPWtemp[,1]==1,2]), studyendtime=mean(outputIPWtemp[,2])) outputIPW_s<-cbind(power=mean(outputIPWtemp_s[,1]),sigtime=mean(outputIPWtemp_s[outputIPWtemp_s[,1]==1,2]), studyendtime=mean(outputIPWtemp_s[,2])) return(list(py0=py0,py1=py1,RD=py1-py0,px=px,Beta_y=Beta_y,Beta_x=Beta_x,Site_Dist=Site_Dist, outputIPW=outputIPW,outputIPW_s=outputIPW_s)) } else { return(list(py0=py0,py1=py1,RD=py1-py0,px=px,Beta_y=Beta_y,Beta_x=Beta_x,Site_Dist=Site_Dist,outputIPWall=outputIPWtemp, outputIPW_sall=outputIPWtemp_s)) } } ################################################################################################# ## datagen : Function to a single dataset for a given scenario # #input: Nobs: number of total observations # # LTime: a vector of times of each look (length 1 if no sequential monitoring) # # confound.fun: name of the function specifying confounder distribution (examples: # # fun.scena, fun.scenb, fun.scenc, fun.scend # # Beta_y: a vector of the log(OR) denoting the strength of confounding between # # z (confounders) and s (sites as indicator variables) on outcome # # Beta_x: a vector of the log(OR) denoting the strength of confounding between # # z (confounders) and s (sites as indicator variables) on x # # Site_Dist: a vector of the proportion of observations per site (e.g. c(1/3,1/3,1/3))# # py0: marginal probability of outcome, y, if x=0 P(Y|X=0) # # py1: marginal probability of outcome, y, if x=1 P(Y|X=1) # # px: marginal probability of x across all sites # # studylength: length of time of study (e.g. 720 for a two year study) # # Nsim: number of simulations to use to find parameters to get correct marginal probs # #output: a list with elements y (outcome), x (predictor of interest), start (time subject # # enters study based on uniform distribution), z (matrix of confounders, and # # s (vector of unique site indicators) # ################################################################################################# datagen<-function(Nobs,confound.fun=fun.scenc,Beta_y,Beta_x,Site_Dist,py0,py1,px,studylength=720,Nsim=1000) { Beta_y<-as.matrix(Beta_y) Beta_x<-as.matrix(Beta_x) params<-calc_params(Nsim=Nsim, Nset=Nobs,confound.gen=confound.fun,Beta_y=Beta_y,Beta_x=Beta_x,Site_Dist=Site_Dist, tgt_p0=py0,tgt_p1=py1,tgt_px=px) #single time use = one day exposure - no need to define end day of exposure yet start<-floor(runif(Nobs,1,studylength+1)) #generate counfounders and sort according to start day confounders<-confound.fun(N=Nobs, Site_Dist=Site_Dist) z<-confounders$Z[order(start),] S<-confounders$S[order(start),] zs<-cbind(z,S) start<-start[order(start)] #generate individual exposures and outcomes for desired scenario# #DOI flag x<-rbinom(Nobs,1,1/(1+exp(- params$x0 - zs%*%Beta_x))) #Outcome y<-rbinom(Nobs,1,1/(1+exp(- params$y0 - params$y1*x - zs%*%Beta_y))) s<-as.numeric(apply(S,1,sum)==0) for(i in 1:ncol(S)) { s[S[,i]==1]<-i+1 } return(list(y=y,x=x,start=start,z=z,s=s)) }