####Figure 1: etavec<-seq(-3, 3, length=250) #Make a vector of eta values. (In the ER scenario eta = -3 corresponds to a binomial probability for each edge of b = 0.047. Likewise, eta = 3 corresponds to b = 0.953, so this selection for the eta-vector only excludes the tails in the range of possible eta values in this context.) naiveapprox <- function(etavec, eta0, n, gobs, rounddig=Inf) { g <- 0:n p <- exp(eta0)/(1+exp(eta0)) if (rounddig==Inf) { logp <- dbinom(g, n, p, log=TRUE) } else { logp <- log(round(dbinom(g, n, p), rounddig)) } tmp <- sweep(outer(g,etavec-eta0), 1, logp, "+") shifts <- apply(tmp, 2, max) # Note: One shift for each etavec value tmp <- sweep(tmp, 2, shifts, "-") (etavec-eta0)*gobs - log(colSums(exp(tmp)))-shifts } lognormapprox<-function(eta, eta0, n, gobs, rounddig=Inf) { g <- 0:n p <- exp(eta0)/(1+exp(eta0)) if (rounddig==Inf) { logp <- dbinom(g, n, p, log=TRUE) } else { logp <- log(round(dbinom(g, n, p), rounddig)) } cenetavec <- eta - eta0 mu <- sum(exp(logp)*g) sigma <- sum(exp(logp)* (g-mu)^2) cenetavec*gobs - (cenetavec*mu) - .5*cenetavec*sigma*cenetavec } #### Figure 1(a): Plot with \(\eta_0 = MLE = 0.625\): par(mar=c(5,5,4,2)+.1) plot(etavec, naiveapprox(etavec, eta0=log(272/508), n=780, gobs=272), type="l", lwd=3, lty=1, xlab=expression(eta), ylab=expression(paste("\u2113(",eta,") - \u2113(", eta[0],")")), cex.axis=1.5 ,cex.lab=2) lines(etavec, lognormapprox(etavec, eta0=log(272/508), n=780, gobs=272, rounddig=3), type="l", lwd=3, lty=3) for(i in c(3,5,10,15)) lines(etavec, naiveapprox(etavec, eta0=log(272/508), n=780, gobs=272, rounddig=i), type="l", lwd=3, lty=2) #### Figure 1(b): Plot with \(\eta_0 = 1.099\): par(mar=c(5,5,4,2)+.1) plot(etavec, naiveapprox(etavec, eta0=1.099, n=780, gobs=272), ylim=c(-800,400),type="l", lwd=3, lty=1, xlab=expression(eta), ylab=expression(paste("\u2113(",eta,") - \u2113(", eta[0],")")), cex.axis=1.5 ,cex.lab=2) lines(etavec, lognormapprox(etavec, eta0=1.099, n=780, gobs=272, rounddig=3), type="l", lwd=3, lty=3) for(i in c(3,5,10,15)) lines(etavec, naiveapprox(etavec, eta0=1.099, n=780, gobs=272, rounddig=i), type="l", lwd=3, lty=2) ####Code for step-halving in binomial example: ####Read in these functions: logit<-function(p) log(p/(1-p)) ilogit<-function(eta) exp(eta)/(1+exp(eta)) approx <- function(eta, eta0, s, obs) # Note: This is the negative of the naive #approx loglik -(eta-eta0)*obs + log(mean(exp((eta-eta0)*s))) binomialstephalving<-function(){ eta<-1.099 xihatvec<-NULL etavec<-eta s0<-273 # make sure we go through loop at least once while (272max(s0)) { b<-ilogit(eta) s0 <- rbinom(100, 780, b) xibar <- mean(s0) p<-1 xihat <- 272 while (xihat <= min(s0) | xihat >= max(s0)) { p<-p/2; xihat <- (1-p)*xibar + p*272 } eta <- optimize(approx, eta0=eta, s=s0, obs=xihat, lower=-2, upper=2)$min etavec<-c(etavec,eta) xihatvec<-c(xihatvec,xihat) } #last sample: b <- ilogit(eta) lastsample <- c(s0, rbinom(1e+5, 780, b)) eta <- optimize(approx, eta0=eta, s=lastsample, obs=xihat, lower=-2, upper=2)$min #etavec<-c(etavec,eta) #xihatvec<-c(xihatvec,xihat) return(list("xihat_i"=xihatvec,"eta_i"=etavec, "final_eta"=eta)) } ####We can now run the function with the specified seed to duplicate the results #in the paper: set.seed(123) binomialstephalving() ####Figure 2: ####The true log-likelihood ratio function: trueLLReq<-function(eta,eta0,gyobs,n){ gyobs*(eta-eta0)+choose(n,2)*(log(1+exp(eta0))-log(1+exp(eta))) } ####The lognormal approximation to the log-likelihood ratio: lognormapprox<-function(eta, eta0, s, obs) { cenetavec <- eta - eta0 mu<-mean(s) sigma<-var(s) cenetavec*obs - (cenetavec*mu) - .5*cenetavec*sigma*cenetavec } ####The naive approximation to the log-likelihood ratio: ShiftedNaive <- function(etavec, eta0, s, xi) { m<-length(s) logU <- outer(s,etavec-eta0) shifts <- apply(logU, 2, max) # Note: One shift for each etavec value tmp <- sweep(logU, 2, shifts, "-") shiftedusum <- colSums(exp(tmp)) hatr <- (etavec-eta0)*xi - log(shiftedusum/m) - shifts return(hatr) } ####Fig. 2(a): set.seed(123) gyobs<-272 etavec<-seq(-2, 3, length=250) eta0 <- 1.099 beta0 <- exp(eta0)/(1+exp(eta0)) s <- rbinom(10000,780,beta0) l<-log(min(s)/(780-min(s))) u<-log(max(s)/(780-max(s))) topbox<-max(trueLLReq(eta=l,eta0=eta0,gyobs=gyobs,n=40), trueLLReq(eta=u,eta0=eta0,gyobs=gyobs,n=40))+100 bottombox<-min(trueLLReq(eta=l,eta0=eta0,gyobs=gyobs,n=40), trueLLReq(eta=u,eta0=eta0,gyobs=gyobs,n=40))-100 par(mar=c(5,5,4,2)+.1) plot(etavec,ShiftedNaive(etavec,eta0=eta0, s=s, xi=gyobs), type = "n", xlab=expression(paste(eta)), ylab=expression(paste("\u2113(",eta,") - \u2113(", eta[0],")")), cex.lab=2, cex.axis=1.5) lines(etavec,ShiftedNaive(etavec=etavec, eta0=eta0, s=s, xi=gyobs),col=1,lwd=3,lty=2) #approx llr lines(etavec,lognormapprox(eta=etavec, eta0=eta0, s=s, obs=gyobs),col=1,lwd=3,lty=3) #lognorm llr lines(etavec, trueLLReq(eta=etavec,eta0=eta0,gyobs=gyobs,n=40),col=1,lwd=3) #true llr axis(1,at=round(c(l,u),2), pos=-500, cex.axis=1.5) ####Fig 2(b): set.seed(123) gyobs<-272 etavec<-seq(-2, 3, length=250) eta0 <- -.571 beta0 <- exp(eta0)/(1+exp(eta0)) s <- rbinom(10000,780,beta0) l<-log(min(s)/(780-min(s))) u<-log(max(s)/(780-max(s))) topbox<-max(trueLLReq(eta=l,eta0=eta0,gyobs=gyobs,n=40), trueLLReq(eta=u,eta0=eta0,gyobs=gyobs,n=40))+100 bottombox<-min(trueLLReq(eta=l,eta0=eta0,gyobs=gyobs,n=40), trueLLReq(eta=u,eta0=eta0,gyobs=gyobs,n=40))-100 par(mar=c(5,5,4,2)+.1) plot(etavec,ShiftedNaive(etavec,eta0=eta0, s=s, xi=gyobs), xlim=c(-2,2), type = "n", xlab=expression(paste(eta)), ylab=expression(paste("\u2113(",eta,") - \u2113(", eta[0],")")), cex.lab=2, cex.axis=1.5) lines(etavec,ShiftedNaive(etavec=etavec, eta0=eta0, s=s, xi=gyobs),col=1,lwd=3,lty=2) #approx llr lines(etavec,lognormapprox(eta=etavec, eta0=eta0, s=s, obs=gyobs),col=1,lwd=3,lty=3) #lognorm llr lines(etavec, trueLLReq(eta=etavec,eta0=eta0,gyobs=gyobs,n=40),col=1,lwd=3) #true llr axis(1,at=round(c(l,u),2), pos=-150, cex.axis=1.5) ####Code for comparing naive and lognormal approximations in binomial example ilogit <- function(eta) exp(eta)/(1+exp(eta)) negtrueLLReq<-function(eta,eta0,gyobs,n){ -gyobs*(eta-eta0)-choose(n,2)*(log(1+exp(eta0))-log(1+exp(eta))) } trueLLReq<-function(eta,eta0,gyobs,n){ gyobs*(eta-eta0)+choose(n,2)*(log(1+exp(eta0))-log(1+exp(eta))) } approx <- function(eta, eta0, s, obs) # Note: This is the negative of the naive approx loglik -(eta-eta0)*obs + log(mean(exp((eta-eta0)*s))) neglognormapprox<-function(eta, eta0, s, obs) { #this is the negative of the lognormal approx to llr cenetavec <- eta - eta0 mu<-mean(s) sigma<-var(s) -cenetavec*obs + (cenetavec*mu) + .5*cenetavec*sigma*cenetavec } varofmax.fixedxi<-function(n,eta0,xihat1){ xibar0<-rep(0,n) s0var<-rep(0,n) eta1naive<-rep(0,n) eta1lognorm<-rep(0,n) notCH<-rep(0,n) for (i in 1:n){ b0<-ilogit(eta0) s0 <- rbinom(100, 780, b0) xibar0[i] <- mean(s0) s0var[i] <- var(s0) eta1naive[i] <- optimize(approx, eta0=eta0, s=s0, obs=xihat1, lower=-5, upper=2)$min eta1lognorm[i] <- optimize(neglognormapprox, eta0=eta0, s=s0, obs=xihat1, lower=-5, upper=2)$min if(xihat1max(s0)){notCH[i]<-1} } ppp<-matrix(c(eta1naive, eta1lognorm, notCH),nrow=n,ncol=3) return(ppp) } ############ ##For Figure 3a (left side of Figure 3): eta0 <- -.53 ####and find the value of the true MLE for this \(\hat{\xi}_1 = 564.93\): trueeta1<- optimize(negtrueLLReq, eta0=1.099, gyobs=564.93, n=40, lower=-2, upper=2)$min ####or the value of the true MLE for \(g)y^{obs}) = 272\) and a stepped version of #\(\eta_0\), \(\eta_1 = -.55\): trueetaO <- optimize(negtrueLLReq, eta0=eta0, gyobs=272, n=40, lower=-2, upper=2)$min #set.seed(123) #ppp<-varofmax.fixedxi(n=10000,eta0=1.099,xihat1=564.93) #####To cut the cases where \(\hat{\xi}_1\) is not in the CH: #sss <- ppp[ppp[,3]==0,] set.seed(123) ### Running this function will now calculate the estimated \eta's for this step, ### according to each approximation: ppp <- varofmax.fixedxi(n=10000,eta0=eta0,xihat1=272) sss <- ppp[ppp[,3]==0,] ll<-min(min(sss[,1]),min(sss[,2])) uu<-max(max(sss[,1]),max(sss[,2])) bbb<-seq(ll,uu, length.out=100) ### Code for plot: op <- par(mfrow = c(2,1), mgp = c(2,.8,0), mar = .1+c(3,3,3,1)) hist(sss[,1], breaks=bbb, xlab="", ylab="", main=expression(paste("Naive approximation for ", eta[0]==-0.53)), cex.axis=1.5 ,cex.lab=2, cex.main=1.5) abline(v=trueetaO, lty=1, lwd=5); abline(v=range(sss[,1]), lty=2, lwd=4) legend("topleft", legend=c(expression(paste("True ", hat(eta))), "Maximum and Minimum"), lty=1:2, lwd=3, inset=.1, cex=1.5) hist(sss[,2], breaks=bbb, xlab="", ylab="", main=expression(paste("Lognormal approximation for ", eta[0]==-0.53)), cex.axis=1.5 ,cex.lab=2, cex.main=1.5) abline(v=trueetaO, lty=1, lwd=5); abline(v=range(sss[,2]), lty=2, lwd=4) ############ ############ ##For Figure 3b (right side of Figure 3): eta0 <- -.55 #### and find the value of the true MLE for this \hat{\xi}_1 = 564.93: trueeta1 <- optimize(negtrueLLReq, eta0=1.099, gyobs=564.93, n=40, lower=-2, upper=2)$min #### or the value of the true MLE for g(y^{obs}) = 272 and a stepped version of #### \eta_0, \eta_1 = -.55: trueetaO <- optimize(negtrueLLReq, eta0=eta0, gyobs=272, n=40, lower=-2, upper=2)$min set.seed(123) #### Running this function will now calculate the estimated \eta's for this step, #### according to each approximation: ppp <- varofmax.fixedxi(n=10000,eta0=eta0,xihat1=272) sss <- ppp[ppp[,3]==0,] ll<-min(min(sss[,1]),min(sss[,2])) uu<-max(max(sss[,1]),max(sss[,2])) bbb<-seq(ll,uu, length.out=100) ####Code for plot: op <- par(mfrow = c(2,1), mgp = c(2,.8,0), mar = .1+c(3,3,3,1)) hist(sss[,1], breaks=bbb, xlab="", ylab="", main=expression(paste("Naive approximation for ", eta[0]==-0.55)), cex.axis=1.5 ,cex.lab=2, cex.main=1.5) abline(v=trueetaO, lty=1, lwd=5); abline(v=range(sss[,1]), lty=2, lwd=4) legend("topleft", legend=c(expression(paste("True ", hat(eta))), "Maximum and Minimum"), lty=1:2, lwd=3, inset=.1, cex=1.5) hist(sss[,2], breaks=bbb, xlab="", ylab="", main=expression(paste("Lognormal approximation for ", eta[0]==-0.55)), cex.axis=1.5 ,cex.lab=2, cex.main=1.5) abline(v=trueetaO, lty=1, lwd=5); abline(v=range(sss[,2]), lty=2, lwd=4) ############ ####code for Table 1 (sort of): #### To generate the information in Table 1, we can print the means and #### variances for each approximation for this step: mean(sss[,1]) #mean of eta for naive approx mean(sss[,2]) #mean of eta for lognormal approx var(sss[,1]) #variance of eta for naive approx var(sss[,2]) #variance of eta for lognormal approx dim(sss) #Determine if hatxi is in the CH of all 10,000 samples (or how many fewer) ###########Code for biological example############ ####In this example we make use of the updated ergm package in R: library(ergm) library(Rglpk) ####Load the data (provided in the package): data(ecoli) ## Code for Figure 4: par(mfrow=c(1,1)) sides <- rep(8,423) self <- ecoli1 %v% "self" sides[self] <- 3 plot.network(ecoli1, vertex.col=0, vertex.sides=sides, arrowhead.cex=.75) ## Code for Figure 5: form0 <- ecoli2 ~ edges + degree(2:5) + kstar(2) + gwdegree(3.75, fixed = T) m0 <- ergm(formula=form0, MPLEonly=TRUE) s <- simulate(m0, nsim=25, seed=123, burnin=0, interval=50000, statsonly=T) par(mar=c(5,4.5,4,2)+.1) plot(50*(0:24),s[,1], xlab="MCMC steps x 1000", ylab="edges in network", ylim=c(0, 87153), cex.axis=1.5, cex.lab=1.8, type="b", col=1, lwd=2, pch=20) abline(h=c(519,87153), lty=2, lwd=2, col=1) text(500, 84000, "Max possible (87153)", cex=1.5) text(500, 2500, "Observed (519)", cex=1.5) ####Specify the model: form.old <- ecoli2 ~ edges + degree(2:5) + kstar(2) + gwdegree(3.75, fixed = T) form <- ecoli2 ~ edges + degree(2:5) + gwdegree(0.25, fixed = T) ####code for Table 2 ####For MPLE fit: m.old <- ergm(form.old, MPLEonly=TRUE) round(m.old$coef, 2) m1 <- ergm(formula=form, MPLEonly=TRUE) round(m1$coef, 2) summary(m1) ####For lognormal approximation MLE fit (For Figure 6): m2<-ergm(formula=form, theta0=m1$coef, seed=12345, control=control.ergm(style="Stepping", stepMCMCsize=300, gridsize=10000, metric="lognormal"), maxit=1, MCMCsamplesize=2000, burnin=1e+5, interval=20000, verb=TRUE) summary(m2) #### For Table 3 (left column): round(m2$coef, 2) round(sqrt(diag(m2$covar)),3) round(m2$mc.se,3) ####For naive approximation MLE fit (not included in JCGS article): m3<-ergm(formula=form, theta0=m1$coef, seed=12345, control=control.ergm(style="Stepping", stepMCMCsize=300, gridsize=10000, metric="naive"), maxit=1, MCMCsamplesize=2000, burnin=1e+5, interval=20000, verb=TRUE) summary(m3) ### For additional model incorporating self-edges: form2 <- ecoli2 ~ edges + degree(2:5) + gwdegree(0.25, fixed = T) + nodemix("self", base=1) m4<-ergm(formula=form2, seed=12345, control=control.ergm(style="Stepping", stepMCMCsize=300, gridsize=10000, metric="lognormal"), maxit=1, MCMCsamplesize=2000, burnin=1e+5, interval=20000, verb=TRUE) summary(m4) #### For Table 3 (right column): round(m4$coef, 2) round(sqrt(diag(m4$covar)),3) round(m4$mc.se,3) ## Kapferer dataset data(kapferer) kformula <- kapferer ~ edges + gwdegree(0.25, fixed=TRUE) + gwesp(0.25, fixed=TRUE) + gwdsp(0.25, fixed=TRUE) kap1 <- ergm(kformula, seed=123, control=control.ergm(stepMCMCsize=1000, style="Stepping")) summary(kap1) kformula2 <- kapferer ~ edges + gwesp(0.25, fixed=TRUE) + gwdsp(0.25, fixed=TRUE) kap2 <- ergm(kformula2, maxit=1, seed=123, control=control.ergm(stepMCMCsize=1000, style="Stepping", metric="lognormal")) summary(kap2) # For Section 7.3: # These two methods produce reasonable parameter estimates: kap2.eq6 <- ergm(kformula2, maxit=10, seed=123, control=control.ergm(metric="naive")) kap2.eq7 <- ergm(kformula2, maxit=10, seed=123, control=control.ergm(metric="lognormal")) # Naive MCMC MLE (doesn't work at all): m2.eq6 <- ergm(formula = form, seed=12345, maxit=10, control=control.ergm(metric="naive")) # MCMC MLE with lognormal approximation: m2.eq7a <- ergm(formula = form, seed = 1234, maxit = 15) s1 <- simulate(form, interval=5e+4, nsim=500, seed=123, statsonly=TRUE, theta0=m2.eq7a$coef) m2.eq7b <- ergm(formula = form, seed = 1234, maxit = 20) s2 <- simulate(form, interval=5e+4, nsim=500, seed=123, statsonly=TRUE, theta0=m2.eq7b$coef) # Figure 7 obs <- summary(form) # 7a: plot(rbind(s1,obs)[-1,c(1,6)], xlab="edges", ylab="gwdegree", cex.lab=1.4, cex.axis=1.4, type="n") points(s1[-1,c(1,6)], pch=46, cex=2) points(obs[1], obs[6], pch=13, cex=1.5) # 7b: plot(rbind(s2,obs)[-1,c(1,6)], xlab="edges", ylab="gwdegree", cex.lab=1.4, cex.axis=1.4, type="n") points(s2[-1,c(1,6)], pch=46, cex=2) points(obs[1], obs[6], pch=13, cex=1.5)