Skip to content

Commit ec6d329

Browse files
authored
Merge pull request stan-dev#68 from ericnovik/arm-ch12-model-updates
Thanks @ericnovik
2 parents 67e4ba7 + 9ceef12 commit ec6d329

12 files changed

+610
-513
lines changed
Lines changed: 75 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,68 +1,75 @@
1-
library(rstan)
2-
library(ggplot2)
3-
4-
srrs2 <- read.table ("srrs2.dat", header=T, sep=",")
5-
mn <- srrs2$state=="MN"
6-
radon <- srrs2$activity[mn]
7-
log.radon <- log (ifelse (radon==0, .1, radon))
8-
floor <- srrs2$floor[mn] # 0 for basement, 1 for first floor
9-
n <- length(radon)
10-
y <- log.radon
11-
x <- floor
12-
13-
# get county index variable
14-
county.name <- as.vector(srrs2$county[mn])
15-
uniq <- unique(county.name)
16-
J <- length(uniq)
17-
county <- rep (NA, J)
18-
for (i in 1:J){
19-
county[county.name==uniq[i]] <- i
20-
}
21-
22-
# no predictors
23-
ybarbar = mean(y)
24-
25-
sample.size <- as.vector (table (county))
26-
sample.size.jittered <- sample.size*exp (runif (J, -.1, .1))
27-
cty.mns = tapply(y,county,mean)
28-
cty.vars = tapply(y,county,var)
29-
cty.sds = mean(sqrt(cty.vars[!is.na(cty.vars)]))/sqrt(sample.size)
30-
cty.sds.sep = sqrt(tapply(y,county,var)/sample.size)
31-
32-
# varying-intercept model, no predictors
33-
dataList.1 <- list(N=length(y), y=y, county=county)
34-
radon_intercept.sf1 <- stan(file='radon_intercept.stan', data=dataList.1,
35-
iter=1000, chains=4)
36-
print(radon_intercept.sf1)
37-
38-
post <- extract(radon_intercept.sf1)
39-
mean.a <- rep (NA, 85)
40-
sd.a <- rep (NA, 85)
41-
for (n in 1:85) {
42-
mean.a[n] <- mean(post$a[,n])
43-
sd.a[n] <- sd(post$a[,n])
44-
}
45-
## Figure 12.1 (a)
46-
frame1 = data.frame(x1=sample.size.jittered,y1=cty.mns,x2=sample.size.jittered[36],y2=cty.mns[36])
47-
limits <- aes(ymax=cty.mns + cty.sds,ymin=cty.mns - cty.sds)
48-
p1 <- ggplot(frame1,aes(x=x1,y=y1)) +
49-
geom_point(aes(x=x2,y=y2),shape=1,size=30) +
50-
scale_y_continuous("Avg. Log Radon in County j") +
51-
scale_x_log10("Sample Size in County j") +
52-
theme_bw() +
53-
geom_pointrange(limits) +
54-
labs(title="No Pooling")
55-
print(p1)
56-
57-
## Figure 12.1 (b)
58-
dev.new()
59-
frame2 = data.frame(x1=sample.size.jittered,y1=mean.a,x2=sample.size.jittered[36],y2=mean.a[36])
60-
limits <- aes(ymax=mean.a+sd.a, ymin=mean.a-sd.a)
61-
p2 <- ggplot(frame2,aes(x=x1,y=y1)) +
62-
geom_point(aes(x=x2,y=y2),shape=1,size=30) +
63-
scale_y_continuous("Avg. Log Radon in County j") +
64-
scale_x_log10("Sample Size in County j") +
65-
theme_bw() +
66-
geom_pointrange(limits) +
67-
labs(title="Multilevel Model")
68-
print(p2)
1+
library(rstan)
2+
library(lme4)
3+
library(ggplot2)
4+
5+
srrs2 <- read.table ("ARM/Ch.12/srrs2.dat", header = TRUE, sep = ",")
6+
mn <- srrs2$state=="MN"
7+
radon <- srrs2$activity[mn]
8+
log.radon <- log (ifelse (radon==0, .1, radon))
9+
floor <- srrs2$floor[mn] # 0 for basement, 1 for first floor
10+
n <- length(radon)
11+
y <- log.radon
12+
x <- floor
13+
14+
# get county index variable
15+
county.name <- as.vector(srrs2$county[mn])
16+
uniq <- unique(county.name)
17+
J <- length(uniq)
18+
county <- rep (NA, J)
19+
for (i in 1:J){
20+
county[county.name == uniq[i]] <- i
21+
}
22+
23+
# no predictors
24+
ybarbar <- mean(y)
25+
26+
sample.size <- as.vector (table (county))
27+
sample.size.jittered <- sample.size * exp(runif(J, -.1, .1))
28+
cty.mns <- tapply(y, county, mean)
29+
cty.vars <- tapply(y, county, var)
30+
cty.sds <- mean(sqrt(cty.vars[!is.na(cty.vars)])) / sqrt(sample.size)
31+
cty.sds.sep <- sqrt(tapply(y, county, var) / sample.size)
32+
33+
# lme4 varying-intercept model, no predictors
34+
M0 <- lmer(y ~ 1 + (1 | county))
35+
summary(M0)
36+
M0_coef <- coef(M0)$county[, 1]
37+
38+
dataList.1 <- list(N = length(y), J = J, y = y, county = county)
39+
radon_intercept.sf1 <- stan(file = 'ARM/Ch.12/radon_intercept.stan', data = dataList.1,
40+
iter = 200, chains = 4, control = list(stepsize = 0.05))
41+
print(radon_intercept.sf1)
42+
post <- extract(radon_intercept.sf1)
43+
mean.a <- rep (NA, J)
44+
sd.a <- rep (NA, J)
45+
for (n in 1:J) {
46+
mean.a[n] <- mean(post$a[ ,n])
47+
sd.a[n] <- sd(post$a[ ,n])
48+
}
49+
50+
# comparing lme4 and stan posterior mean estimates
51+
sqrt(mean(M0_coef - mean.a)^2)
52+
53+
## Figure 12.1 (a)
54+
frame1 = data.frame(x1=sample.size.jittered,y1=cty.mns,x2=sample.size.jittered[36],y2=cty.mns[36])
55+
limits <- aes(ymax=cty.mns + cty.sds,ymin=cty.mns - cty.sds)
56+
p1 <- ggplot(frame1,aes(x=x1,y=y1)) +
57+
geom_point(aes(x=x2,y=y2),shape=1,size=30) +
58+
scale_y_continuous("Avg. Log Radon in County j") +
59+
scale_x_log10("Sample Size in County j") +
60+
theme_bw() +
61+
geom_pointrange(limits) +
62+
labs(title="No Pooling")
63+
print(p1)
64+
65+
## Figure 12.1 (b)
66+
frame2 = data.frame(x1=sample.size.jittered,y1=mean.a,x2=sample.size.jittered[36],y2=mean.a[36])
67+
limits <- aes(ymax=mean.a+sd.a, ymin=mean.a-sd.a)
68+
p2 <- ggplot(frame2,aes(x=x1,y=y1)) +
69+
geom_point(aes(x=x2,y=y2),shape=1,size=30) +
70+
scale_y_continuous("Avg. Log Radon in County j") +
71+
scale_x_log10("Sample Size in County j") +
72+
theme_bw() +
73+
geom_pointrange(limits) +
74+
labs(title="Multilevel Model")
75+
print(p2)
Lines changed: 93 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -1,91 +1,93 @@
1-
library(rstan)
2-
library(ggplot2)
3-
4-
srrs2 <- read.table ("srrs2.dat", header=T, sep=",")
5-
mn <- srrs2$state=="MN"
6-
radon <- srrs2$activity[mn]
7-
log.radon <- log (ifelse (radon==0, .1, radon))
8-
floor <- srrs2$floor[mn] # 0 for basement, 1 for first floor
9-
n <- length(radon)
10-
y <- log.radon
11-
x <- floor
12-
13-
# get county index variable
14-
county.name <- as.vector(srrs2$county[mn])
15-
uniq <- unique(county.name)
16-
J <- length(uniq)
17-
county <- rep (NA, J)
18-
for (i in 1:J){
19-
county[county.name==uniq[i]] <- i
20-
}
21-
22-
# no predictors
23-
ybarbar = mean(y)
24-
25-
sample.size <- as.vector (table (county))
26-
sample.size.jittered <- sample.size*exp (runif (J, -.1, .1))
27-
cty.mns = tapply(y,county,mean)
28-
cty.vars = tapply(y,county,var)
29-
cty.sds = mean(sqrt(cty.vars[!is.na(cty.vars)]))/sqrt(sample.size)
30-
cty.sds.sep = sqrt(tapply(y,county,var)/sample.size)
31-
32-
## Complete pooling regression
33-
dataList.1 <- list(N=length(y), y=y,x=x)
34-
radon_complete_pool.sf1 <- stan(file='radon_complete_pool.stan',
35-
data=dataList.1,
36-
iter=1000, chains=4)
37-
print(radon_complete_pool.sf1)
38-
post.pooled <- extract(radon_complete_pool.sf1)
39-
pooled <- colMeans(post.pooled$beta)
40-
41-
## No pooling regression
42-
43-
dataList.2 <- list(N=length(y), y=y,x=x,county=county)
44-
radon_no_pool.sf1 <- stan(file='radon_no_pool.stan', data=dataList.2,
45-
iter=1000, chains=4)
46-
print(radon_no_pool.sf1)
47-
post.unpooled <- extract(radon_no_pool.sf1)
48-
unpooled <- colMeans(post.unpooled$a)
49-
sd.unpooled <- rep(NA,85)
50-
for (n in 1:85) {
51-
sd.unpooled[n] <- sd(post.unpooled$a[,n])
52-
}
53-
54-
## Comparing-complete pooling & no-pooling (Figure 12.2)
55-
x.jitter <- x + runif(n,-.05,.05)
56-
display8 <- c (36, 1, 35, 21, 14, 71, 61, 70) # counties to be displayed
57-
y.range <- range (y[!is.na(match(county,display8))])
58-
59-
radon.data <- data.frame(y, x.jitter, county)
60-
radon8.data <- subset(radon.data, county %in% display8)
61-
radon8.data$county.name <- radon8.data$county
62-
radon8.data$county.name <- factor(radon8.data$county.name,levels=c("36","1","35","21","14","71","61","70"),
63-
labels=c("LAC QUI PARLE", "AITKIN", "KOOCHICHING",
64-
"DOUGLAS", "CLAY", "STEARNS", "RAMSEY",
65-
"ST LOUIS"))
66-
radon8.data$pooled.int <- pooled[1]
67-
radon8.data$pooled.slope <- pooled[2]
68-
radon8.data$unpooled.int <- unpooled[radon8.data$county]
69-
radon8.data$unpooled.slope <- mean(post.unpooled$beta)
70-
71-
p1 <- ggplot(radon8.data, aes(x.jitter, y)) +
72-
geom_jitter(position = position_jitter(width = .05, height = 0)) +
73-
scale_x_continuous(breaks=c(0,1), labels=c("0", "1")) +
74-
geom_abline(aes(intercept = pooled.int, slope = pooled.slope), linetype = "dashed") +
75-
geom_abline(aes(intercept = unpooled.int, slope = unpooled.slope), size = 0.25) +
76-
facet_wrap(~ county.name, ncol = 4)
77-
print(p1)
78-
79-
## No-pooling ests vs. sample size (plot on the left on figure 12.3)
80-
sample.size <- as.vector (table (county))
81-
sample.size.jittered <- sample.size*exp (runif (J, -.1, .1))
82-
dev.new()
83-
frame1 = data.frame(x1=sample.size.jittered,y1=unpooled)
84-
limits <- aes(ymax=unpooled+sd.unpooled, ymin=unpooled-sd.unpooled)
85-
p2 <- ggplot(frame1,aes(x=x1,y=y1)) +
86-
geom_point() +
87-
scale_y_continuous("estimated intercept alpha (no pooling)") +
88-
scale_x_log10("Sample Size in County j") +
89-
theme_bw() +
90-
geom_pointrange(limits)
91-
print(p2)
1+
library(rstan)
2+
library(ggplot2)
3+
4+
srrs2 <- read.table ("ARM/Ch.12/srrs2.dat", header=T, sep=",")
5+
mn <- srrs2$state=="MN"
6+
radon <- srrs2$activity[mn]
7+
log.radon <- log (ifelse (radon==0, .1, radon))
8+
floor <- srrs2$floor[mn] # 0 for basement, 1 for first floor
9+
n <- length(radon)
10+
y <- log.radon
11+
x <- floor
12+
13+
# get county index variable
14+
county.name <- as.vector(srrs2$county[mn])
15+
uniq <- unique(county.name)
16+
J <- length(uniq)
17+
county <- rep (NA, J)
18+
for (i in 1:J){
19+
county[county.name==uniq[i]] <- i
20+
}
21+
22+
# no predictors
23+
ybarbar = mean(y)
24+
25+
sample.size <- as.vector (table (county))
26+
sample.size.jittered <- sample.size*exp (runif (J, -.1, .1))
27+
cty.mns = tapply(y,county,mean)
28+
cty.vars = tapply(y,county,var)
29+
cty.sds = mean(sqrt(cty.vars[!is.na(cty.vars)]))/sqrt(sample.size)
30+
cty.sds.sep = sqrt(tapply(y,county,var)/sample.size)
31+
32+
## Complete pooling regression
33+
dataList.1 <- list(N = length(y), y = y, x = x)
34+
radon_complete_pool.sf1 <- stan(file = 'ARM/Ch.12/radon_complete_pool.stan',
35+
data = dataList.1,
36+
iter = 500, chains = 4,
37+
control = list(stepsize = 0.05))
38+
print(radon_complete_pool.sf1)
39+
post.pooled <- extract(radon_complete_pool.sf1)
40+
pooled <- colMeans(post.pooled$beta)
41+
42+
## No pooling regression
43+
n_counties <- max(county)
44+
dataList.2 <- list(N = length(y), J = n_counties, y = y, x = x, county = county)
45+
radon_no_pool.sf1 <- stan(file = 'ARM/Ch.12/radon_no_pool.stan',
46+
data = dataList.2,
47+
iter = 500, chains = 4,
48+
control = list(stepsize = 0.05))
49+
print(radon_no_pool.sf1)
50+
post.unpooled <- extract(radon_no_pool.sf1)
51+
unpooled <- colMeans(post.unpooled$a)
52+
sd.unpooled <- rep(NA, n_counties)
53+
for (n in 1:n_counties) {
54+
sd.unpooled[n] <- sd(post.unpooled$a[,n])
55+
}
56+
57+
## Comparing-complete pooling & no-pooling (Figure 12.2)
58+
x.jitter <- x + runif(length(radon), -.05, .05)
59+
display8 <- c (36, 1, 35, 21, 14, 71, 61, 70) # counties to be displayed
60+
y.range <- range (y[!is.na(match(county,display8))])
61+
62+
radon.data <- data.frame(y, x.jitter, county)
63+
radon8.data <- subset(radon.data, county %in% display8)
64+
radon8.data$county.name <- radon8.data$county
65+
radon8.data$county.name <- factor(radon8.data$county.name,levels=c("36","1","35","21","14","71","61","70"),
66+
labels=c("LAC QUI PARLE", "AITKIN", "KOOCHICHING",
67+
"DOUGLAS", "CLAY", "STEARNS", "RAMSEY",
68+
"ST LOUIS"))
69+
radon8.data$pooled.int <- pooled[1]
70+
radon8.data$pooled.slope <- pooled[2]
71+
radon8.data$unpooled.int <- unpooled[radon8.data$county]
72+
radon8.data$unpooled.slope <- mean(post.unpooled$beta)
73+
74+
p1 <- ggplot(radon8.data, aes(x.jitter, y)) +
75+
geom_jitter(position = position_jitter(width = .05, height = 0)) +
76+
scale_x_continuous(breaks=c(0,1), labels=c("0", "1")) +
77+
geom_abline(aes(intercept = pooled.int, slope = pooled.slope), linetype = "dashed") +
78+
geom_abline(aes(intercept = unpooled.int, slope = unpooled.slope), size = 0.25) +
79+
facet_wrap(~ county.name, ncol = 4)
80+
print(p1)
81+
82+
## No-pooling ests vs. sample size (plot on the left on figure 12.3)
83+
sample.size <- as.vector (table (county))
84+
sample.size.jittered <- sample.size*exp (runif (J, -.1, .1))
85+
frame1 = data.frame(x1=sample.size.jittered,y1=unpooled)
86+
limits <- aes(ymax=unpooled+sd.unpooled, ymin=unpooled-sd.unpooled)
87+
p2 <- ggplot(frame1,aes(x=x1,y=y1)) +
88+
geom_point() +
89+
scale_y_continuous("estimated intercept alpha (no pooling)") +
90+
scale_x_log10("Sample Size in County j") +
91+
theme_bw() +
92+
geom_pointrange(limits)
93+
print(p2)

0 commit comments

Comments
 (0)