####################################################################### # Regression estimates with weighted residual corrections ####################################################################### samp <- read.table("Diet0001.dat", row.names="ID", col.names=c("ID", "DISTR.1", "BLACK", "NBHISP", "GRADE", "SLFHLTH", "SLFWGHT", "WORKHARD", "GOODQUAL", "PHYSFIT", "PROUD", "LIKESLF", "ACCEPTED", "FEELLOVD", "DIET", "Y.0", "Y.1", "DISTR.2", "PI.TRUE")) t <- samp$DIET n <- nrow(samp) y <- samp$DISTR.2 samp0 <- samp[ t==0, ] samp1 <- samp[ t==1, ] # regression model for Y.0 tmp0 <- lm( DISTR.2 ~ DISTR.1 + BLACK + NBHISP + GRADE + SLFHLTH + SLFWGHT + WORKHARD + GOODQUAL + PHYSFIT + PROUD + LIKESLF + ACCEPTED + FEELLOVD, data=samp0) yhat0 <- predict(tmp0, newdata=samp) # regression model for Y.1 tmp1 <- lm( DISTR.2 ~ DISTR.1 + BLACK + NBHISP + GRADE + SLFHLTH + SLFWGHT + WORKHARD + GOODQUAL + PHYSFIT + PROUD + LIKESLF + ACCEPTED + FEELLOVD, data=samp1) yhat1 <- predict(tmp1, newdata=samp) # propensity model pihat <- glm( DIET ~ DISTR.1 + BLACK + NBHISP + GRADE + SLFHLTH + SLFWGHT + WORKHARD + GOODQUAL + PHYSFIT + PROUD + LIKESLF + ACCEPTED, family=binomial, data=samp)$fitted # predictors for the imputation models x <- cbind( const=1, samp$DISTR.1, samp$BLACK, samp$NBHISP, samp$GRADE, samp$SLFHLTH, samp$SLFWGHT, samp$WORKHARD, samp$GOODQUAL, samp$PHYSFIT, samp$PROUD, samp$LIKESLF, samp$ACCEPTED, samp$FEELLOVD) p <- ncol(x) # predictors for the logit model z <- cbind( const=1, samp$DISTR.1, samp$BLACK, samp$NBHISP, samp$GRADE, samp$SLFHLTH, samp$SLFWGHT, samp$WORKHARD, samp$GOODQUAL, samp$PHYSFIT, samp$PROUD, samp$LIKESLF, samp$ACCEPTED, samp$FEELLOVD) q <- ncol(z) # ACE and its standard error mu0 <- sum(yhat0)/n mu1 <- sum(yhat1)/n xi1 <- sum(t*(1/pihat)*(y-yhat1)) / sum(t/pihat) xi0 <- sum((1-t)*(1/(1-pihat))*(y-yhat0)) / sum((1-t)/(1-pihat)) score <- matrix(NA,n,q+2*p+4) score[,1:q] <- (t-pihat)*z score[,(q+1):(q+p)] <- (1-t)*(y-yhat0)*x score[,(q+p+1):(q+2*p)] <- t*(y-yhat1)*x score[,q+2*p+1] <- (yhat0-mu0) score[,q+2*p+2] <- (yhat1-mu1) score[,q+2*p+3] <- (1-t)*(1/(1-pihat))*(y-yhat0-xi0) score[,q+2*p+4] <- t*(1/pihat)*(y-yhat1-xi1) A <- matrix(0,q+2*p+4,q+2*p+4) A[1:q,1:q] <- t(pihat*(1-pihat)*z)%*%z A[(q+1):(q+p),(q+1):(q+p)] <- t((1-t)*x)%*%x A[(q+p+1):(q+2*p),(q+p+1):(q+2*p)] <- t(t*x)%*%x A[q+2*p+1,(q+1):(q+p)] <- -apply(x,2,sum) A[q+2*p+1,q+2*p+1] <- n A[q+2*p+2,(q+p+1):(q+2*p)] <- -apply(x,2,sum) A[q+2*p+2,q+2*p+2] <- n A[q+2*p+3,1:q] <- apply( -(1-t)*pihat*(1/(1-pihat))*(y-yhat0-xi0)*z, 2, sum) A[q+2*p+3,(q+1):(q+p)] <- apply((1-t)*(1/(1-pihat))*x, 2, sum) A[q+2*p+3,q+2*p+3] <- sum((1-t)/(1-pihat)) A[q+2*p+4,1:q] <- apply( t*(1/pihat)*(1-pihat)*(y-yhat1-xi1)*z, 2, sum) A[q+2*p+4,(q+p+1):(q+2*p)] <- apply(t*(1/pihat)*x, 2, sum) A[q+2*p+4,q+2*p+4] <- sum(t/pihat) a <- c( rep(0,q+2*p), -1, 1, -1, 1 ) Ainv <- solve(A) B <- t(score)%*%score ACE <- mu1 + xi1 - mu0 - xi0 SE <- sqrt( t(a) %*% Ainv %*% B %*% t(Ainv) %*% a ) # > print(ACE) # [1] 0.005597254 # > print(SE) # [,1] # [1,] 0.02212808 # ACE.1 and its standard error mu1 <- sum(t*y)/sum(t) mu0 <- sum(t*yhat0)/sum(t) xi0 <- sum( (1-t)*(1/(1-pihat))*pihat*(y-yhat0) ) / sum( (1-t)*(1/(1-pihat))*pihat ) score <- matrix(NA,n,q+p+3) score[,1:q] <- (t-pihat)*z score[,(q+1):(q+p)] <- (1-t)*(y-yhat0)*x score[,q+p+1] <- t*(yhat0-mu0) score[,q+p+2] <- t*(y-mu1) score[,q+p+3] <- (1-t)*(1/(1-pihat))*pihat*(y-yhat0-xi0) A <- matrix(0,q+p+3,q+p+3) A[1:q,1:q] <- t(pihat*(1-pihat)*z)%*%z A[(q+1):(q+p),(q+1):(q+p)] <- t((1-t)*x)%*%x A[q+p+1,(q+1):(q+p)] <- apply(-t*x,2,sum) A[q+p+1,q+p+1] <- sum(t) A[q+p+2,q+p+2] <- sum(t) A[q+p+3,1:q] <- apply( -(1-t)*pihat*(1/(1-pihat))*(y-yhat0-xi0)*z, 2, sum) A[q+p+3,(q+1):(q+p)] <- apply( (1-t)*pihat*(1/(1-pihat))*x, 2, sum) A[q+p+3,q+p+3] <- sum( (1-t)*pihat*(1/(1-pihat)) ) a <- c( rep(0,q+p), -1, 1, -1) Ainv <- solve(A) B <- t(score)%*%score ACE.1 <- mu1 - mu0 - xi0 SE.1 <- sqrt( t(a) %*% Ainv %*% B %*% t(Ainv) %*% a ) # > print(ACE.1) # [1] -0.02048411 # > print(SE.1) # [,1] # [1,] 0.01489583