|
1 | | -library(rstan) |
2 | | -library(ggplot2) |
3 | | - |
4 | | -srrs2 <- read.table ("srrs2.dat", header=T, sep=",") |
5 | | -mn <- srrs2$state=="MN" |
6 | | -radon <- srrs2$activity[mn] |
7 | | -log.radon <- log (ifelse (radon==0, .1, radon)) |
8 | | -floor <- srrs2$floor[mn] # 0 for basement, 1 for first floor |
9 | | -n <- length(radon) |
10 | | -y <- log.radon |
11 | | -x <- floor |
12 | | - |
13 | | -# get county index variable |
14 | | -county.name <- as.vector(srrs2$county[mn]) |
15 | | -uniq <- unique(county.name) |
16 | | -J <- length(uniq) |
17 | | -county <- rep (NA, J) |
18 | | -for (i in 1:J){ |
19 | | - county[county.name==uniq[i]] <- i |
20 | | -} |
21 | | - |
22 | | - # no predictors |
23 | | -ybarbar = mean(y) |
24 | | - |
25 | | -sample.size <- as.vector (table (county)) |
26 | | -sample.size.jittered <- sample.size*exp (runif (J, -.1, .1)) |
27 | | -cty.mns = tapply(y,county,mean) |
28 | | -cty.vars = tapply(y,county,var) |
29 | | -cty.sds = mean(sqrt(cty.vars[!is.na(cty.vars)]))/sqrt(sample.size) |
30 | | -cty.sds.sep = sqrt(tapply(y,county,var)/sample.size) |
31 | | - |
32 | | -## Complete pooling regression |
33 | | -dataList.1 <- list(N=length(y), y=y,x=x) |
34 | | -radon_complete_pool.sf1 <- stan(file='radon_complete_pool.stan', |
35 | | - data=dataList.1, |
36 | | - iter=1000, chains=4) |
37 | | -print(radon_complete_pool.sf1) |
38 | | -post.pooled <- extract(radon_complete_pool.sf1) |
39 | | -pooled <- colMeans(post.pooled$beta) |
40 | | - |
41 | | -## No pooling regression |
42 | | - |
43 | | -dataList.2 <- list(N=length(y), y=y,x=x,county=county) |
44 | | -radon_no_pool.sf1 <- stan(file='radon_no_pool.stan', data=dataList.2, |
45 | | - iter=1000, chains=4) |
46 | | -print(radon_no_pool.sf1) |
47 | | -post.unpooled <- extract(radon_no_pool.sf1) |
48 | | -unpooled <- colMeans(post.unpooled$a) |
49 | | -sd.unpooled <- rep(NA,85) |
50 | | -for (n in 1:85) { |
51 | | - sd.unpooled[n] <- sd(post.unpooled$a[,n]) |
52 | | -} |
53 | | - |
54 | | -## Comparing-complete pooling & no-pooling (Figure 12.2) |
55 | | -x.jitter <- x + runif(n,-.05,.05) |
56 | | -display8 <- c (36, 1, 35, 21, 14, 71, 61, 70) # counties to be displayed |
57 | | -y.range <- range (y[!is.na(match(county,display8))]) |
58 | | - |
59 | | -radon.data <- data.frame(y, x.jitter, county) |
60 | | -radon8.data <- subset(radon.data, county %in% display8) |
61 | | -radon8.data$county.name <- radon8.data$county |
62 | | -radon8.data$county.name <- factor(radon8.data$county.name,levels=c("36","1","35","21","14","71","61","70"), |
63 | | - labels=c("LAC QUI PARLE", "AITKIN", "KOOCHICHING", |
64 | | - "DOUGLAS", "CLAY", "STEARNS", "RAMSEY", |
65 | | - "ST LOUIS")) |
66 | | -radon8.data$pooled.int <- pooled[1] |
67 | | -radon8.data$pooled.slope <- pooled[2] |
68 | | -radon8.data$unpooled.int <- unpooled[radon8.data$county] |
69 | | -radon8.data$unpooled.slope <- mean(post.unpooled$beta) |
70 | | - |
71 | | -p1 <- ggplot(radon8.data, aes(x.jitter, y)) + |
72 | | - geom_jitter(position = position_jitter(width = .05, height = 0)) + |
73 | | - scale_x_continuous(breaks=c(0,1), labels=c("0", "1")) + |
74 | | - geom_abline(aes(intercept = pooled.int, slope = pooled.slope), linetype = "dashed") + |
75 | | - geom_abline(aes(intercept = unpooled.int, slope = unpooled.slope), size = 0.25) + |
76 | | - facet_wrap(~ county.name, ncol = 4) |
77 | | -print(p1) |
78 | | - |
79 | | -## No-pooling ests vs. sample size (plot on the left on figure 12.3) |
80 | | -sample.size <- as.vector (table (county)) |
81 | | -sample.size.jittered <- sample.size*exp (runif (J, -.1, .1)) |
82 | | -dev.new() |
83 | | -frame1 = data.frame(x1=sample.size.jittered,y1=unpooled) |
84 | | -limits <- aes(ymax=unpooled+sd.unpooled, ymin=unpooled-sd.unpooled) |
85 | | -p2 <- ggplot(frame1,aes(x=x1,y=y1)) + |
86 | | - geom_point() + |
87 | | - scale_y_continuous("estimated intercept alpha (no pooling)") + |
88 | | - scale_x_log10("Sample Size in County j") + |
89 | | - theme_bw() + |
90 | | - geom_pointrange(limits) |
91 | | -print(p2) |
| 1 | +library(rstan) |
| 2 | +library(ggplot2) |
| 3 | + |
| 4 | +srrs2 <- read.table ("ARM/Ch.12/srrs2.dat", header=T, sep=",") |
| 5 | +mn <- srrs2$state=="MN" |
| 6 | +radon <- srrs2$activity[mn] |
| 7 | +log.radon <- log (ifelse (radon==0, .1, radon)) |
| 8 | +floor <- srrs2$floor[mn] # 0 for basement, 1 for first floor |
| 9 | +n <- length(radon) |
| 10 | +y <- log.radon |
| 11 | +x <- floor |
| 12 | + |
| 13 | +# get county index variable |
| 14 | +county.name <- as.vector(srrs2$county[mn]) |
| 15 | +uniq <- unique(county.name) |
| 16 | +J <- length(uniq) |
| 17 | +county <- rep (NA, J) |
| 18 | +for (i in 1:J){ |
| 19 | + county[county.name==uniq[i]] <- i |
| 20 | +} |
| 21 | + |
| 22 | + # no predictors |
| 23 | +ybarbar = mean(y) |
| 24 | + |
| 25 | +sample.size <- as.vector (table (county)) |
| 26 | +sample.size.jittered <- sample.size*exp (runif (J, -.1, .1)) |
| 27 | +cty.mns = tapply(y,county,mean) |
| 28 | +cty.vars = tapply(y,county,var) |
| 29 | +cty.sds = mean(sqrt(cty.vars[!is.na(cty.vars)]))/sqrt(sample.size) |
| 30 | +cty.sds.sep = sqrt(tapply(y,county,var)/sample.size) |
| 31 | + |
| 32 | +## Complete pooling regression |
| 33 | +dataList.1 <- list(N = length(y), y = y, x = x) |
| 34 | +radon_complete_pool.sf1 <- stan(file = 'ARM/Ch.12/radon_complete_pool.stan', |
| 35 | + data = dataList.1, |
| 36 | + iter = 500, chains = 4, |
| 37 | + control = list(stepsize = 0.05)) |
| 38 | +print(radon_complete_pool.sf1) |
| 39 | +post.pooled <- extract(radon_complete_pool.sf1) |
| 40 | +pooled <- colMeans(post.pooled$beta) |
| 41 | + |
| 42 | +## No pooling regression |
| 43 | +n_counties <- max(county) |
| 44 | +dataList.2 <- list(N = length(y), J = n_counties, y = y, x = x, county = county) |
| 45 | +radon_no_pool.sf1 <- stan(file = 'ARM/Ch.12/radon_no_pool.stan', |
| 46 | + data = dataList.2, |
| 47 | + iter = 500, chains = 4, |
| 48 | + control = list(stepsize = 0.05)) |
| 49 | +print(radon_no_pool.sf1) |
| 50 | +post.unpooled <- extract(radon_no_pool.sf1) |
| 51 | +unpooled <- colMeans(post.unpooled$a) |
| 52 | +sd.unpooled <- rep(NA, n_counties) |
| 53 | +for (n in 1:n_counties) { |
| 54 | + sd.unpooled[n] <- sd(post.unpooled$a[,n]) |
| 55 | +} |
| 56 | + |
| 57 | +## Comparing-complete pooling & no-pooling (Figure 12.2) |
| 58 | +x.jitter <- x + runif(length(radon), -.05, .05) |
| 59 | +display8 <- c (36, 1, 35, 21, 14, 71, 61, 70) # counties to be displayed |
| 60 | +y.range <- range (y[!is.na(match(county,display8))]) |
| 61 | + |
| 62 | +radon.data <- data.frame(y, x.jitter, county) |
| 63 | +radon8.data <- subset(radon.data, county %in% display8) |
| 64 | +radon8.data$county.name <- radon8.data$county |
| 65 | +radon8.data$county.name <- factor(radon8.data$county.name,levels=c("36","1","35","21","14","71","61","70"), |
| 66 | + labels=c("LAC QUI PARLE", "AITKIN", "KOOCHICHING", |
| 67 | + "DOUGLAS", "CLAY", "STEARNS", "RAMSEY", |
| 68 | + "ST LOUIS")) |
| 69 | +radon8.data$pooled.int <- pooled[1] |
| 70 | +radon8.data$pooled.slope <- pooled[2] |
| 71 | +radon8.data$unpooled.int <- unpooled[radon8.data$county] |
| 72 | +radon8.data$unpooled.slope <- mean(post.unpooled$beta) |
| 73 | + |
| 74 | +p1 <- ggplot(radon8.data, aes(x.jitter, y)) + |
| 75 | + geom_jitter(position = position_jitter(width = .05, height = 0)) + |
| 76 | + scale_x_continuous(breaks=c(0,1), labels=c("0", "1")) + |
| 77 | + geom_abline(aes(intercept = pooled.int, slope = pooled.slope), linetype = "dashed") + |
| 78 | + geom_abline(aes(intercept = unpooled.int, slope = unpooled.slope), size = 0.25) + |
| 79 | + facet_wrap(~ county.name, ncol = 4) |
| 80 | +print(p1) |
| 81 | + |
| 82 | +## No-pooling ests vs. sample size (plot on the left on figure 12.3) |
| 83 | +sample.size <- as.vector (table (county)) |
| 84 | +sample.size.jittered <- sample.size*exp (runif (J, -.1, .1)) |
| 85 | +frame1 = data.frame(x1=sample.size.jittered,y1=unpooled) |
| 86 | +limits <- aes(ymax=unpooled+sd.unpooled, ymin=unpooled-sd.unpooled) |
| 87 | +p2 <- ggplot(frame1,aes(x=x1,y=y1)) + |
| 88 | + geom_point() + |
| 89 | + scale_y_continuous("estimated intercept alpha (no pooling)") + |
| 90 | + scale_x_log10("Sample Size in County j") + |
| 91 | + theme_bw() + |
| 92 | + geom_pointrange(limits) |
| 93 | +print(p2) |
0 commit comments