When I made a plot using geom_point() and geom_line(), a few values in the middle didn't appear and I can't figure out why.
I have a dataframe with 3 columns (K_value, CV_error, and Run), where K_value is categorical.
dput() output:
structure(list(K_value = structure(c(10L, 11L, 12L, 13L, 14L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L), levels = c("1", "2", "3",
"4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14"), class = "factor"),
CV_error = c(0.3496, 0.48953, 0.22838, 0.3241, 0.48187, 0.81215,
0.64932, 0.48208, 0.34502, 0.29175, 0.38106, 0.34349, 0.29372,
0.31848, 0.28904, 0.36266, 0.35706, 0.40682, 0.22942, 0.81252,
0.66357, 0.48312, 0.34643, 0.29845, 0.33101, 0.44156, 0.32816,
0.26834, 0.38874, 0.32601, 0.33054, 0.5124, 0.4978, 0.81195,
0.62714, 0.49569, 0.34549, 0.29434, 0.21027, 0.35551, 0.23482,
0.36595, 0.33906, 0.30915, 0.38615, 0.42463, 0.38548, 0.81222,
0.64116, 0.48115, 0.34543, 0.31653, 0.39421, 0.23617, 0.26476,
0.30773, 0.29044, 0.23667, 0.40504, 0.24453, 0.38279, 0.81107,
0.62831, 0.48073, 0.34307, 0.25076, 0.18189, 0.24538, 0.30349,
0.31099, 0.26404, 0.26664, 0.37712, 0.38249, 0.27946, 0.81362,
0.66236, 0.48343, 0.34475, 0.29682, 0.20412, 0.20799, 0.25753,
0.28842, 0.25157, 0.41521, 0.34065, 0.24796, 0.30641, 0.81291,
0.65986, 0.4821, 0.34447, 0.24829, 0.20115, 0.22076, 0.31345,
0.39544, 0.40846, 0.26986, 0.27907, 0.33826, 0.37872, 0.81762,
0.65032, 0.48309, 0.34895, 0.31037, 0.39639, 0.222, 0.33737,
0.23645, 0.35719, 0.42435, 0.2783, 0.41588, 0.43157, 0.81294,
0.6575, 0.47089, 0.34488, 0.24524, 0.29636, 0.22649, 0.23698,
0.30698, 0.40407, 0.3819, 0.31701, 0.47138, 0.34162, 0.81551,
0.66211, 0.49685, 0.34662, 0.23958, 0.32928, 0.19703, 0.25929,
0.29533), Run = c("1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2", "2", "3", "3", "3", "3",
"3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "4", "4",
"4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4",
"5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5",
"5", "5", "6", "6", "6", "6", "6", "6", "6", "6", "6", "6",
"6", "6", "6", "6", "7", "7", "7", "7", "7", "7", "7", "7",
"7", "7", "7", "7", "7", "7", "8", "8", "8", "8", "8", "8",
"8", "8", "8", "8", "8", "8", "8", "8", "9", "9", "9", "9",
"9", "9", "9", "9", "9", "9", "9", "9", "9", "9", "10", "10",
"10", "10", "10", "10", "10", "10", "10", "10", "10", "10",
"10", "10")), class = "data.frame", row.names = c(NA, -140L
))
I wanted to plot the mean rate of change across K_value, like so:
df %>%
mutate(rate = CV_error - lag(CV_error)) %>%
mutate(sd = sd(rate),
.by=K_value) %>%
mutate(meanrate = mean(rate),
.by=K_value) %>%
ggplot(aes(x=K_value, y=meanrate)) +
geom_point() +
geom_errorbar(aes(ymin=meanrate-sd, ymax=meanrate+sd), width=0.2) +
geom_line(aes(group=1))
This yields:
Any idea why the data for K=10 is not appearing? The data for K=10 is there in the original df, and when I plot other things (e.g., the mean of CV_error) it shows up fine. I'm wondering if it has something to do with lag(). Am I using it incorrectly?


mutates and look at it. If it all looks good to you, please rundput()on it to create copy/pasteable R code to reproduce the data and add it to your question so that it it reproducible.dput()output, apologies.