####################################################################### # Mahalanobis-metric matching within calipers # defined by the estimated logit-propensity score ####################################################################### samp <- read.table("Diet0001.dat", row.names="ID", col.names=c("ID", "DISTR.1", "BLACK", "NBHISP", "GRADE", "SLFHLTH", "SLFWGHT", "WORKHARD", "GOODQUAL", "PHYSFIT", "PROUD", "LIKESLF", "ACCEPTED", "FEELLOVD", "DIET", "Y.0", "Y.1", "DISTR.2", "PI.TRUE")) # compute estimated logit-propensities eta <- glm( DIET ~ DISTR.1 + BLACK + NBHISP + GRADE + SLFHLTH + SLFWGHT + WORKHARD + GOODQUAL + PHYSFIT + PROUD + LIKESLF + ACCEPTED + FEELLOVD, family=binomial, data=samp)$linear.predictors # matching algorithm samp$eta <- eta dieters <- samp[ samp$DIET==1, ] y.dieters <- samp$DISTR.2[ samp$DIET==1] nondieters <- samp[ samp$DIET==0, ] y.nondieters <- samp$DISTR.2[ samp$DIET==0] n0 <- nrow(nondieters) n1 <- nrow(dieters) caliper <- sqrt( ( (n1-1)*var( dieters$eta ) + (n0-1)*var( nondieters$eta ) ) / (n0 + n1 - 2) ) / 4 taken <- rep( F, nrow(nondieters) ) matched <- rep( F, nrow(dieters) ) x0 <- nondieters[, c("DISTR.1","BLACK","NBHISP","GRADE", "SLFHLTH","SLFWGHT","WORKHARD","GOODQUAL","PHYSFIT","PROUD", "LIKESLF","ACCEPTED","FEELLOVD")] x1 <- dieters[, c("DISTR.1","BLACK","NBHISP","GRADE", "SLFHLTH","SLFWGHT","WORKHARD","GOODQUAL","PHYSFIT","PROUD", "LIKESLF","ACCEPTED","FEELLOVD")] x0 <- as.matrix(x0) x1 <- as.matrix(x1) Cinv <- solve( var(x0) ) for( i in 1:nrow(dieters) ){ lower <- dieters$eta[i] - caliper upper <- dieters$eta[i] + caliper w <- (!taken) & (nondieters$eta>=lower) & (nondieters$eta<=upper) if( sum(w) > 0 ){ posn <- (1:nrow(x0))[w] tmp <- x0[w,] - matrix( x1[i,], sum(w), ncol(x0), byrow=T) mahal <- diag( tmp %*% Cinv %*% t(tmp) ) select <- posn[ mahal==min(mahal) ] select <- select[1] # in case of a tie taken[select] <- T matched[i] <- T} } y1 <- y.dieters[matched] y0 <- y.nondieters[taken] # matched sample: newsamp <- data.frame( rbind( x1[matched,], x0[taken,] ) ) newsamp$DISTR.2 <- c(y1, y0) newsamp$DIET <- c( rep(1,length(y1)), rep(0,length(y0)) ) # ordinary mean comparison: fit <- lm( DISTR.2 ~ DIET, data=newsamp) # > print(summary(fit)) # # Call: # lm(formula = DISTR.2 ~ DIET, data = newsamp) # # Residuals: # Min 1Q Median 3Q Max # -0.6966 -0.3766 -0.0866 0.2825 1.8334 # # Coefficients: # Estimate Std. Error t value Pr(>|t|) # (Intercept) 0.687462 0.013532 50.803 <2e-16 *** # DIET 0.009137 0.019137 0.477 0.633 # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # # Residual standard error: 0.4676 on 2386 degrees of freedom # Multiple R-Squared: 9.554e-05, Adjusted R-squared: -0.0003235 # F-statistic: 0.228 on 1 and 2386 DF, p-value: 0.6331 # ANCOVA adjustment: fit <- lm( DISTR.2 ~ DISTR.1 + BLACK + NBHISP + GRADE + SLFHLTH + SLFWGHT + WORKHARD + GOODQUAL + PHYSFIT + PROUD + LIKESLF + ACCEPTED + FEELLOVD + DIET, data=newsamp) # > print(summary(fit)) # # Call: # lm(formula = DISTR.2 ~ DISTR.1 + BLACK + NBHISP + GRADE + SLFHLTH + # SLFWGHT + WORKHARD + GOODQUAL + PHYSFIT + PROUD + LIKESLF + # ACCEPTED + FEELLOVD + DIET, data = newsamp) # # Residuals: # Min 1Q Median 3Q Max # -1.69319 -0.24286 -0.05227 0.19815 1.81120 # # Coefficients: # Estimate Std. Error t value Pr(>|t|) # (Intercept) -0.003787 0.078018 -0.049 0.961290 # DISTR.1 0.507311 0.019893 25.502 < 2e-16 *** # BLACK 0.087825 0.021859 4.018 6.06e-05 *** # NBHISP 0.027836 0.022555 1.234 0.217279 # GRADE 0.001861 0.006069 0.307 0.759155 # SLFHLTH 0.009339 0.010080 0.927 0.354270 # SLFWGHT 0.011170 0.012232 0.913 0.361242 # WORKHARD 0.006367 0.010047 0.634 0.526320 # GOODQUAL 0.018829 0.016858 1.117 0.264151 # PHYSFIT -0.005209 0.011472 -0.454 0.649829 # PROUD 0.017466 0.017509 0.998 0.318609 # LIKESLF 0.039497 0.010277 3.843 0.000125 *** # ACCEPTED 0.006587 0.011070 0.595 0.551890 # FEELLOVD 0.041043 0.014355 2.859 0.004283 ** # DIET -0.018058 0.015793 -1.143 0.252977 # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # # Residual standard error: 0.3848 on 2373 degrees of freedom # Multiple R-Squared: 0.3265, Adjusted R-squared: 0.3226 # F-statistic: 82.18 on 14 and 2373 DF, p-value: < 2.2e-16