################################################################################################# # Provides Code for Running IPW sequential method with permutation distribution # # Created by: Andrea Cook, Robert Wellman, and Tracey Marsh for FDA Sentinel PRISM Project # # Date: November 30, 2010 # ################################################################################################# library(geepack) library(compiler) ## support functions ## # support functions for exact distribution of test statistics under the null (RD=0) # calculated via permutation method (of exposure variable holding outcome and confounders fixed) flat_scale <- function(St, propinfo, delta) { stat <- (propinfo)^(1-2*delta)*St return(stat) } undo_flat_scale <- function(St, propinfo, delta) { stat <- (propinfo)^(2*delta-1)*St return(stat) } perm.x<-function(x,nperm,xperm) { ob<-vector("list", nperm) for(i in 1:nperm) { ob[[i]]<-c(xperm[[i]],sample(x,length(x))) } return(ob) } perm.x<-cmpfun(perm.x) perm.x_s<-function(x,s,nperm,xperm) { ob<-vector("list", nperm) for(i in 1:nperm) { tmp<-NULL suniq<-unique(s) for (si in suniq) { tmp[s==si]<-sample(x[s==si],sum(s==si)) } ob[[i]]<-c(xperm[[i]],tmp) } ob } perm.x_s<-cmpfun(perm.x_s) ########################### # # RD Estimators in Simulation Framework # # ########################### ################################################################################################# #RD.IPW: Calculate Inverse Probability Estimator and Variance # #input: y: a vector of observed outcomes # # x: a vector of the current indicator of exposure data # # z: a matrix of all confounders (excluding intercept) # # p: a vector of propensity scores estimating P(x|z) # #output: # # matrix with the first column the risk difference and the second column the variance # # of the risk difference # ################################################################################################# RD.IPW<-function(y,x,z,p) { n<-length(y) J<-as.matrix(rep(1,n)) # Estimated P(y|x=1) mu1<-t(x/p)%*%(y)/(t(x/p)%*%(J)) # Estimated P(y|x=0) mu0<-t((1-x)/(1-p))%*%(y)/(t((1-x)/(1-p))%*%(J)) # Risk Difference delta<-mu1-mu0 Z<-as.matrix(cbind(1,z)) H.hat<-comp.Hhat(x,y,Z,p,mu1,mu0) E.hat<-comp.EhatInv(n,p,Z=Z) # Variance of Risk Differnce var<-t(J)%*%(x*(y-mu1)/p- (1-x)*(y-mu0)/(1-p)- t(t(x-p)*(H.hat%*%E.hat%*%t(Z))))^2/n^2 return(cbind(rd=delta,var.rd=t(var))) } # support functions for calculation of variance of IPW estimators comp.EhatInv<-function(n,p,Z) { E<-matrix(-1,nrow=ncol(Z),ncol=ncol(Z)) for(i in 1:ncol(Z)) { for(j in i:ncol(Z)) { E[i,j]<-t(p*(1-p))%*%(Z[,i]*Z[,j]) E[j,i]<-t(p*(1-p))%*%(Z[,j]*Z[,i]) } } E.hat<-(1/n)*E return(E.hat) } comp.EhatInv<-cmpfun(comp.EhatInv) comp.Hhat<-function(x,y,Z,p,mu1,mu0) { n<-length(y) B<-(x*(1-p)/p)*(y-mu1)+ ((1-x)*p/(1-p))*(y-mu0) H.hat<-t(B)%*%Z/n return(H.hat) } ################################################################################################# #RD.IPWperm: Calculates IPW when P(X|Z)=P(X) (Case of Permuted Distribution) # #input: y: a vector of observed outcomes # # x: a vector or matrix of the current indicator of exposure data (permuted) # # p: a vector of propensity scores estimating P(x|z) # #output: # # matrix with the first column the risk difference and the second column the variance # # of the risk difference # ################################################################################################# RD.IPWperm<-function(x,y,p) { n<-length(y) J<-as.matrix(rep(1,n)) mu1<-t(x/p)%*%(y)/(t(x/p)%*%(J)) mu0<-t((1-x)/(1-p))%*%(y)/(t((1-x)/(1-p))%*%(J)) delta<-mu1-mu0 var<-t(J)%*%(x*(y-mu1)/p - (1-x)*(y-mu0)/(1-p))^2/n^2 return(cbind(delta,t(var))) } RD.IPWperm2<-function(x,y,p) { n<-length(y) J<-as.matrix(rep(1,n)) mu1<-t(x/p)%*%(y)/(t(x/p)%*%(J)) mu0<-t((1-x)/(1-p))%*%(y)/(t((1-x)/(1-p))%*%(J)) delta<-mu1-mu0 var<-t(J)%*%(x*(y-matrix(mu1,byrow=T,nrow=nrow(x),ncol=ncol(x)))/p - (1-x)*(y-matrix(mu0,byrow=T,nrow=nrow(x),ncol=ncol(x)))/(1-p))^2/n^2 return(cbind(delta,t(var))) } ################################################################################################# #RD.IPW_s: Calculates Site-Specific Inverse Probability Estimator and Variance # #input: y: a vector of observed outcomes # # x: a vector of the current indicator of exposure data # # z: a matrix of all confounders (excluding intercept) # # s: a vector of unique indicators of site # # p_s: a vector of site specific propensity scores estimating P(x|z) # #output: # # matrix with the first column the risk difference and the second column the variance # # of the risk difference # ################################################################################################# RD.IPW_s<-function(y,x,z,s,p_s) { x<-as.matrix(x) Z<-as.matrix(z) estimates<-NULL varest<-NULL samples<-NULL suniq<-unique(s) for (i in suniq) { temp<-RD.IPW(x=x[s==i,],y=y[s==i],z=Z[s==i,],p=p_s[s==i]) estimates<-rbind(estimates,temp[,1]) varest<-rbind(varest,temp[,2]) samples<-c(samples,sum(s==i)) } estRD.IPW_s<-apply(estimates*samples,2,sum)/sum(samples) varRD.IPW_s<-apply(varest*samples^2,2,sum)/sum(samples)^2 return(cbind(estRD=estRD.IPW_s,varRD=varRD.IPW_s)) } ################################################################################################# #RD.IPWperm: Calculates site specific IPW when P(X|Z)=P(X) (Case of Permuted Distribution) # #input: y: a vector of observed outcomes # # x: a vector of the current indicator of exposure data # # s: a vector of unique indicators of site # # p_s: a vector of site specific propensity scores estimating P(x|z) # #output: # # matrix with the first column the risk difference and the second column the variance # # of the risk difference # ################################################################################################# RD.IPW_s_perm<-function(x,y,s,p_s) { estimates<-NULL varest<-NULL samples<-NULL suniq<-unique(s) for (i in suniq) { temp<-RD.IPWperm(x=x[s==i],y=y[s==i],p=p_s[s==i]) estimates<-rbind(estimates,temp[,1]) varest<-rbind(varest,temp[,2]) samples<-c(samples,sum(s==i)) } estRD.IPW_s<-apply(estimates*samples,2,sum)/sum(samples) varRD.IPW_s<-apply(varest*samples^2,2,sum)/sum(samples)^2 return(cbind(estRD.IPW_s,varRD.IPW_s)) } ################################################################################ #calc_params: Calculates parameters for generating exposure and outcome data # # with desired marginal probabilitites of exposure, outcome in # # exposed and unexposed and causal risk difference. # #input: Nsim: number of simulations for estimating parameters # # Nset: number of observations in each simulated data set # # confound.gen: type of confounder distribution (see data gen code) # # Beta_y,Beta_x: coefficents for outcome and exposure model # # Site_Dist: vector given the proportionate sample size from each # # site # # tgt_p0: marginal probability of outcome in unexposed # # tgt_p1: marginal probability of outcome in exposed # # tgt_px: marginal probability of exposure # # # #output: vector of paremeters: intercept & exposure effect for outcome # # model, intercept for exposure model # ################################################################################ calc_params<-function(Nsim,Nset,confound.gen,Beta_y,Beta_x,Site_Dist,tgt_p0=0.01,tgt_p1=0.02,tgt_px=0.5){ y0<-NULL y1<-NULL x0<-NULL for (i in 1:Nsim){ #generate counfounders confounders<-confound.gen(Nset,Site_Dist) Z_only<-confounders$Z S_only<-confounders$S Z<-cbind(Z_only,S_only) #solve for counterfactual unexposed odds risk param y0res<-bisection2(ftn=parm.by0,x.l=-10,x.r=10,by0=1,byx=0,x=0,beta=Beta_y,z=Z,tgt=tgt_p0) #y0res #solve for counterfactual exposed odds risk param #NOTE: parm.by1 uses y0res, Beta, Z, and tgt_p1 from global environment# y1res<-bisection2(ftn=parm.by1,x.l=-10,x.r=10,by0=y0res,byx=0,x=1,beta=Beta_y,z=Z,tgt=tgt_p1) #y1res #solve for baseline odds of exposure param x0res<-bisection2(ftn=parm.by0,x.l=-10,x.r=10,by0=0,byx=0,x=0,beta=Beta_x,z=Z,tgt=tgt_px) #x0res y0<-cbind(y0,y0res) y1<-cbind(y1,y1res) x0<-cbind(x0,x0res) } return(list(y0=mean(y0),y1=mean(y1),x0=mean(x0))) } ################################################################################ #ind.risks: Computes person-specific probability of outcome using logit model # #input: by0: intercept in logit model # # byx: log odds ratio for exposure # # x: vector of exposures # # beta: vector of confounder coefficients (log scale) # # z: matrix of confounders not including intercept # # # #output: vector of probabilities of outcome # ################################################################################ ind.risks<-function(by0,byx,x,beta,z) { return(1/(1+exp(-by0-byx*x-z%*%beta))) } ################################################################################ #parm.by0: Function for use in bisection algorithm to solve for intecept in # # logit model to yield target risk in unexposed population given the # # assumed distribution of covariates. # #input: c: input value to be varied in order to rind function root # # beta: coefficients for confounders # # z: vector of confounders not including intercept # # tgt: desired marginal probability of outcome in unexposed # #output: evaluated value of the function (scalar) # ################################################################################ parm.by0<-function(c,by0,byx,x,beta,z,tgt) { risk<-ind.risks(by0=c,byx=0,x=0,beta=beta,z=z) avg.risk<-mean(risk) return(avg.risk-tgt) } ################################################################################ #parm.by1: Function for use in bisection algorithm to solve for exposure # # coefficieint in logit model to yield target risk in exposed # # population given the assumed distribution of covariates. # #input: c: input value to be varied in order to rind function root # # by0: given value of intercept term, obtained from function above # # beta: coefficients for confounders # # z: vector of confounders not including intercept # # tgt: desired marginal probability of outcome in exposed # #output: evaluated value of the function (scalar) # ################################################################################ parm.by1<-function(c,by0,byx,x,beta,z,tgt) { risk<-ind.risks(by0=by0,byx=c,x=1,beta=beta, z=z) avg.risk<-mean(risk) return(avg.risk-tgt) } ####################################################################################### #bisection2: Root finding algorithm to solve for data generation model parameters. # # Function adapted from example give in Introduction to Scientific Programming and # # Simulation Using R, by Jones, Maillardet and Robinson # #input: ftn: function for which root is desired # # x.l: starting left endpoint # # x.r: starting rigjht endpoint # # tol: precision of the approximate root # # by0: parmater to solve, intercept for logit model # # byx: parameter to solve or fixed parameter if byo is constant # # x: exposure variable, either 0 or 1 for unexposed or exposed # # z: a matrix of all confounders (excluding intercept) # # tgt: desired prevalence # # beta: covariate parameters # # # #output: approximate root (scalar) # ####################################################################################### bisection2<-function(ftn, x.l, x.r, tol = 1e-9,by0,byx,x,beta,z,tgt) { # applies the bisection algorithm to find x such that ftn(x) == 0 # # x.l and x.r must bracket the fixed point, that is # x.l < x.r and ftn(x.l) * ftn(x.r) < 0 # # the algorithm iteratively refines x.l and x.r and terminates when # x.r - x.l <= tol # check inputs and attempt to correct if (x.l >= x.r) { cat("error: x.l >= x.r \n") return(NULL) x.l<- x.r - 2*abs(x.r) } f.l <- ftn(c=x.l,by0=by0,byx=byx,x=x,beta=beta,z=z,tgt=tgt) f.r <- ftn(c=x.r,by0=by0,byx=byx,x=x,beta=beta,z=z,tgt=tgt) if (f.l == 0) { return(x.l) } else if (f.r == 0) { return(x.r) } else if (f.l * f.r > 0) { attempts<-1 while(f.l*f.r > 0 & attempts < 10){ delta<-x.r-x.l x.r<-x.r + 2*delta x.l<-x.l - 2*delta f.l <- ftn(c=x.l,by0=by0,byx=byx,x=x,beta=beta,z=z,tgt=tgt) f.r <- ftn(c=x.r,by0=by0,byx=byx,x=x,beta=beta,z=z,tgt=tgt) attempts<-attempts + 1 } if (f.l * f.r > 0){ cat("error:unable to correct ftn(c=x.l) * ftn(c=x.r) > 0 \n") return(NULL) } } # successively refine x.l and x.r n <- 0 while ((x.r - x.l) > tol) { x.m <- (x.l + x.r)/2 f.m <- ftn(c=x.m,by0=by0,byx=byx,x=x,beta=beta,z=z,tgt=tgt) if (f.m == 0) { return(x.m) } else if (f.l * f.m < 0) { x.r <- x.m f.r <- f.m } else { x.l <- x.m f.l <- f.m } n <- n + 1 } # return (approximate) root return((x.l + x.r)/2) } ################################################################################################# ## bdIPWseq : Function to calculated permuted boundaries for IPW and IPW_s # #input: nperm: number of permutations to use when finding boundary # # y: a vector of observed outcomes # # x: a vector of the current indicator of exposure data # # z: a matrix of all confounders (excluding intercept) # # s: a matrix of indicator variables of site # # alpha: the critical level for signaling # # delta: shape parameter for the boundary (delta=.5 is Pocock) (delta=0 is Fleming) # # LTime: a vector of times of each look (length 1 if no sequential monitoring) # # Ncum: a vector of number of people observed up to each corresponding LTime # #output: a list with first element a vector of critical values at each look (bd_ipw) for # # unstratified IPW and second element a vector of critical values at each look # # (bd_ipw_s) for stratified IPW # ################################################################################################# bdIPWseq<-function(nperm,y,x,s,alpha=0.05, delta=0.5, LTime, Ncum) { #propinfo=Ncum/Ncum[length(Ncum)] propinfo=LTime/max(LTime) #initalize Ncumstart<-c(1,Ncum+1) xperm<-NULL statIPW_permdist<-NULL xperm_s<-NULL statIPW_s_permdist<-NULL #for every look: # permute exposures (within entry look) # calculate IPW statistic # for site aggregate and stratified for (T in 1:length(Ncum)) { #aggregated across sites xperm<-perm.x(x[Ncumstart[T]:Ncum[T]],nperm,xperm) estIPW<-t(sapply(xperm,RD.IPWperm,y=y[1:Ncum[T]],p=rep(mean(x[1:Ncum[T]]),Ncum[T]) ) ) statIPW_permdist<-cbind(statIPW_permdist,estIPW[,1]/sqrt(estIPW[,2])) #stratified by site xperm_s<-perm.x_s(x[Ncumstart[T]:Ncum[T]],s[Ncumstart[T]:Ncum[T]],nperm,xperm_s) p_s_perm<-rep(NULL,Ncum[T]) xT<-x[1:Ncum[T]] sT<-s[1:Ncum[T]] suniq<-unique(sT) for(i in suniq) { p_s_perm[sT==i]<-mean(xT[sT==i]) } estIPW_s<-t(sapply(xperm_s,RD.IPW_s_perm,y=y[1:Ncum[T]],s=sT,p_s=p_s_perm)) statIPW_s_permdist<-cbind(statIPW_s_permdist,estIPW_s[,1]/sqrt(estIPW_s[,2])) } #put standardized statistics on flat scale flat_stats_ipw<-t(matrix(apply(statIPW_permdist,1,flat_scale,propinfo=propinfo,delta=delta),nrow=length(LTime), ncol=nperm)) flat_stats_ipw_s<-t(matrix(apply(statIPW_s_permdist,1,flat_scale,propinfo=propinfo,delta=delta),nrow=length(LTime), ncol=nperm)) #max over all looks bd_dist_ipw<-apply(flat_stats_ipw,1,max) bd_dist_ipw_s<-apply(flat_stats_ipw_s,1,max) #boundary is rank corresponding to critical level, transformed back to standardized statistic scale bd_ipw<-undo_flat_scale(St=quantile(bd_dist_ipw,1-alpha,na.rm=T,type=9),propinfo=propinfo,delta=delta) bd_ipw_s<-undo_flat_scale(St=quantile(bd_dist_ipw_s,1-alpha,na.rm=T,type=9),propinfo=propinfo,delta=delta) return(rbind(bd_ipw=bd_ipw, bd_ipw_s=bd_ipw_s)) } bdIPWseq<-cmpfun(bdIPWseq) ################################################################################################# ## IPWseq : Function to calculate Inverse Probability Method if data is combined across sites # #input: y: a vector of observed outcomes # # x: a vector of the current indicator of exposure data # # z: a matrix of all confounders (excluding intercept) # # s: a vector of unique indicators of site # # start: a vector of the start times observed up to time t # # LTime: a vector of times of each look (length 1 if no sequential monitoring) # # delta: shape parameter for the boundary (delta=.5 is Pocock) (delta=0 is Fleming) # # nperm: Number of simulations to calculate the boundary # #output: a list that returns alpha, delta, signal (T or F) for unstratified IPW, # # endtime (time of signal or end of study) for unstratified IPW, # # output a result dataset for unstratfied IPW(LTime, RD, ObsStat, Boundary, signal), # # signal_s (T or F) for stratified IPW, # # endtime_s (time of signal or end of study) for stratified IPW, and # # output_s a result dataset for stratfied IPW (LTime, RD, ObsStat, Boundary, signal) # ################################################################################################# IPWseq<-function(y,x,z,s,start,LTime,alpha=0.05,delta=0.5,nperm=1500) { # Order Data by time of enrollemnt in study # y<-y[order(start)] x<-x[order(start)] z<-as.matrix(z[order(start),]) s<-s[order(start)] start<-start[order(start)] # Create matrix of Indicator of Site: S# suniq<-unique(s)[order(unique(s))] S<-NULL for(i in suniq[-1]) { S<-cbind(S,as.numeric(s==i)) } # Number of sites snum<-length(suniq) # Total Number of Planned looks at the data nlooks<-length(LTime) ################################################# # Calculate Boundaries Using Permutation Approach # Collect Data on Number of Observations, Events and Exposed at each look Ncum<-NULL Nevent<-NULL Nexp<-NULL for(T in 1:length(LTime)){ Ncum<-c(Ncum,sum(start<=LTime[T])) Nevent<-c(Nevent,sum(y[1:Ncum[T]])) Nexp<-c(Nexp,sum(x[1:Ncum[T]])) } #Skip looks with no events firstlook<-min(c(1:nlooks)[Nevent>=1]) bd_ipw<-bdIPWseq(nperm=nperm,y=y,x=x, s=s,alpha=alpha,delta=delta, LTime[firstlook:nlooks],Ncum[firstlook:nlooks]) crit.ipw<-bd_ipw[1,] crit.ipw_s<-bd_ipw[2,] ############################################################# # Calculates Estimates and Determines Signalling at each look # Initialize parameters sigIPW<-0 endtimeIPW<-max(LTime) RD<-rep(NA,nlooks) statIPW<-rep(NA,nlooks) sigIPW_s<-0 endtimeIPW_s<-max(LTime) RD_s<-rep(NA,nlooks) statIPW_s<-rep(NA,nlooks) # UNSTRATIFIED ESTIMATE T<-firstlook[1] while(T<=nlooks) { #data for this look xT<-x[1:Ncum[T]] yT<-y[1:Ncum[T]] ST<-S[1:Ncum[T],] zT<-z[1:Ncum[T],] #propensity scores - aggregate model - site is confounder# p_T<-glm(xT ~ zT + ST,family=binomial)$fitted.values obsIPW<-RD.IPW(x=xT,y=yT,z=cbind(zT,ST),p=p_T) RD[T]<-obsIPW[,1] statIPW[T]<-obsIPW[,1]/sqrt(obsIPW[,2]) if(!is.finite(statIPW[T])) statIPW[T]<-(-999) if(crit.ipw[T]<=statIPW[T]) { sigIPW<-1 endtimeIPW<-LTime[T] endstat<-statIPW[T] T<-nlooks+1 } # Last look then the final test statistic is the last one observed # if(T==nlooks) endstat<-statIPW[T] T<-T+1 } ## SITE STRATIFIED ESTIMATE T<-firstlook[1] while(T<=nlooks) #looks left and at least one unsignalled test { #data for this look xT<-x[1:Ncum[T]] yT<-y[1:Ncum[T]] sT<-s[1:Ncum[T]] zT<-z[1:Ncum[T],] #propensity scores - stratified - site specific models# pT_s<-rep(-99,Ncum[T]) suniqT<-unique(sT) for (i in suniq) { #site-specific propensity scores pT_s[sT==i]<-glm(xT[sT==i]~zT[sT==i,],family=binomial)$fitted.values } if(min(pT_s)< 0) { print('ERROR - Site Specific Propensity Score Failure') } obsIPW_s<-RD.IPW_s(y=yT,x=xT,z=zT,s=sT,p_s=pT_s) RD_s[T]<-obsIPW_s[,1] statIPW_s[T]<-obsIPW_s[,1]/sqrt(obsIPW_s[,2]) if(!is.finite(statIPW_s[T])) statIPW_s[T]<-(-999) if(crit.ipw_s[T]<=statIPW_s[T]){ sigIPW_s<-1 endtimeIPW_s<-LTime[T] endstat_s<-statIPW_s[T] T<-nlooks+1 } # Last look then the final test statistic is the last one observed # if(T==nlooks) endstat_s<-statIPW_s[T] T<-T+1 } #calculate statistic value at end times names(statIPW)<-NULL names(statIPW_s)<-NULL #all summary data for one simulation within one scenario return(list(alpha=alpha,delta=delta,signal=(sigIPW==1),endtime=endtimeIPW, output=cbind(LTime=LTime,RD=RD,ObsStat=statIPW,Boundary=crit.ipw,signal=sigIPW), signal_s=(sigIPW_s==1),endtime_s=endtimeIPW_s, output_s=cbind(LTime=LTime,RD=RD_s,ObsStat=statIPW_s,Boundary=crit.ipw_s,signal=sigIPW_s))) }