0

I'm creating a forest plot for my logistic regression model in R. I am not happy with the forest plot created by some packages, especially because the names of the predictors and the levels of the factor in the model are very long. This is my data.frame:

> constructive::construct(tt)
data.frame(
  ind_vars = rep(1:14, c(3L, 7L, 6L, 4L, 4L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 4L, 4L)),
  data_classes = rep(c("factor", "numeric", "factor"), c(24L, 1L, 38L)),
  reflevel = rep(
    c(
      "female", "employed", "committed to a stable relationship", "no", "[35,50]",
      "0", "never", "not at all willing", "never", "always", "not at all",
      "no, I have never been vaccinated against either seasonal flu or covid",
      "no, I was not vaccinated against either seasonal flu or covid last year"
    ),
    c(3L, 7L, 6L, 4L, 4L, 1L, 10L, 5L, 5L, 5L, 5L, 4L, 4L)
  ),
  vars = factor(
    rep(
      c(
        "Gender", "Employment status", "Marital Status", "Living with cohabitants",
        "Age", "Recently searched local news related to publich health",
        "During the Covid-19 pandemic, did you increase your\nuse of social media platforms to discuss health\nissues or to stay informed about the evolution of the pandemic?",
        "In the event of an outbreak of a respiratory infection similar\nto the Covid-19 pandemic, would you prefer to shop online\n(e.g., masks, medications, food, or other products) to avoid leaving your home?",
        "How willing would you be to get vaccinated against an emerging\npathogen if safe and effective vaccines were approved and\nmade available on the market?",
        "If infections were to spread, would you consider wearing masks useful?",
        "If infections were to spread, do you think your family members and friends\nwould adopt individual protective measures (e.g., wearing masks, social distancing, lockdowns)?",
        "If infections were to spread, would adopting individual protective behaviors\n (e.g., wearing masks, social distancing, lockdowns, etc.) require a high economic cost?",
        "Have you ever been vaccinated against seasonal influenza and/or Covid?",
        "In the past year (or last winter season), have you been vaccinated against seasonal influenza and/or Covid?"
      ),
      c(3L, 7L, 6L, 4L, 4L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 4L, 4L)
    ),
    levels = c(
      "Gender", "Employment status", "Marital Status", "Living with cohabitants",
      "Age", "Recently searched local news related to publich health",
      "During the Covid-19 pandemic, did you increase your\nuse of social media platforms to discuss health\nissues or to stay informed about the evolution of the pandemic?",
      "In the event of an outbreak of a respiratory infection similar\nto the Covid-19 pandemic, would you prefer to shop online\n(e.g., masks, medications, food, or other products) to avoid leaving your home?",
      "How willing would you be to get vaccinated against an emerging\npathogen if safe and effective vaccines were approved and\nmade available on the market?",
      "If infections were to spread, would you consider wearing masks useful?",
      "If infections were to spread, do you think your family members and friends\nwould adopt individual protective measures (e.g., wearing masks, social distancing, lockdowns)?",
      "If infections were to spread, would adopting individual protective behaviors\n (e.g., wearing masks, social distancing, lockdowns, etc.) require a high economic cost?",
      "Have you ever been vaccinated against seasonal influenza and/or Covid?",
      "In the past year (or last winter season), have you been vaccinated against seasonal influenza and/or Covid?"
    )
  ),
  coef = c(
    "female ", "other ", "male *", "employed ", "self-employed ",
    "prefer not to answer ", "student ", "inactive **",
    "employed with on-call, seasonal, casual work ", "unemployed **",
    "committed to a stable relationship ", "widowed ",
    "never married or civilly united ", "married or civilly united .",
    "separated or divorced or dissolved civil union .",
    "prefer not to answer ***", "no ", "yes both types ", "yes familiar ",
    "yes not familiar **", "[35,50] ", "(50,65] *", "(65,75] ***", "(75,100] .",
    "d3 ***", "never ", "always ", "sometimes ", "rarely ", "often *", "never ",
    "rarely ", "sometimes **", "always ***", "often ***", "not at all willing ",
    "quite willing .", "little willing ", "very willing ***",
    "extremely willing ***", "never ", "always ***", "often ***", "rarely ***",
    "sometimes ***", "always ", "often *", "sometimes **", "rarely **",
    "never ***", "not at all ", "quite *", "slightly *", "very ***",
    "extremely **",
    "no, I have never been vaccinated against either seasonal flu or covid ",
    "yes, I have been vaccinated against seasonal flu **",
    "yes, I have been vaccinated against covid ***",
    "yes, I have been vaccinated against both seasonal flu and covid ***",
    "no, I was not vaccinated against either seasonal flu or covid last year ",
    "yes, I was vaccinated against seasonal flu last year ***",
    "yes, I was vaccinated against covid last year ***",
    "yes, I was vaccinated against both seasonal flu and covid last year ***"
  ),
  estimate = c(
    1, 1.1594381176560349, 1.1938990313409903, 1, 0.9345113103023006,
    1.182961198511645, 1.1986525531956205, 1.3885987619435227, 1.4249393997680262,
    1.6608221007597275, 1, 1.2306190558844832, 1.2511698137826779,
    1.3025146544308737, 1.3921678095031182, 2.5765770390418052, 1,
    1.0501974244025936, 0.9173415285717724, 1.6630854660369543, 1,
    0.800201285826906, 0.619147977085642, 0.5916851874362801, 1.3446738044826476,
    1, 0.9821138738140281, 1.115752845992493, 1.151676302402397,
    1.3922179488382054, 1, 0.7963755128809387, 0.6371712438181103,
    0.5359168828200498, 0.52285129136739, 1, 1.3006766155072604,
    0.7505100003548196, 1.7776842754118605, 2.703051479564682, 1,
    4.741038392845822, 5.934362782762892, 6.036773899188224, 8.825434764755212, 1,
    1.2592273055270102, 1.5557681273924433, 1.8486058288997373,
    3.8802172100549277, 1, 1.535155861618323, 1.561145156620264,
    1.9720490757147962, 2.1060302234145145, 1, 1.822390024254432,
    2.5834083197529223, 3.19131783617297, 1, 1.8573631891630529,
    11.749226988364809, 22.39402505515249
  ),
  se = c(
    0, 0.7957345407506708, 0.07569629175474867, 0, 0.12934240102667208,
    0.3581432018092095, 0.7186617050966417, 0.11453425505512978,
    0.24970014024395928, 0.17541003295888669, 0, 0.21787717379030114,
    0.16561962733872138, 0.14055065342933543, 0.17758880314032413,
    0.2673745275652827, 0, 0.21907120018625223, 0.10567040412382916,
    0.19404722520361742, 0, 0.08931527483025398, 0.13566079829196406,
    0.28889507837780726, 0.04027571944271817, 0, 0.20402191086067092,
    0.1121123274188254, 0.11464110133052731, 0.12973172877640954, 0,
    0.17244861947164766, 0.16244297378932024, 0.18264891069682213,
    0.1683475894323182, 0, 0.15516969255754776, 0.1784961281145401,
    0.16653435112184062, 0.16939006691926656, 0, 0.41716301464407385,
    0.4195492072923107, 0.4219772930530366, 0.4172887856538571, 0,
    0.1049755192658886, 0.13883787906399103, 0.19818533001974975,
    0.33943935080446835, 0, 0.17562649853946533, 0.1770368138991044,
    0.19409880094417853, 0.22703298633448182, 0, 0.22044384043316081,
    0.17267511404056463, 0.18558845913735647, 0, 0.15106861356248374,
    0.11820785166827097, 0.1351064300228206
  ),
  z = c(
    0, 0.1859106257938456, 2.3412566708408757, 0, -0.5236608302452392,
    0.46914414228773427, 0.2521326129922885, 2.8663490550709376,
    1.4182182116188318, 2.8921533884970017, 0, 0.9524510375713973,
    1.3529734869317107, 1.8804376865993249, 1.8630797752989627,
    3.5398352925174055, 0, 0.2235719240785752, -0.8164578870445477,
    2.6213958537286572, 0, -2.4955639010459687, -3.5338947036046258,
    -1.8165091855083595, 7.353101650063636, 0, -0.08846116655031708,
    0.9769610335418417, 1.2318316350105765, 2.5506337209733743, 0,
    -1.3203031443446245, -2.7746157339042767, -3.4151651763027124,
    -3.851900673274625, 0, 1.69417492683233, -1.6078909167715072,
    3.454611883758754, 5.870363773637503, 0, 3.730570849534812, 4.244459589819272,
    4.260584102982726, 5.2185391546570425, 0, 2.195733680377346,
    3.1833488039876507, 3.1002887495513214, 3.9945019068287726, 0,
    2.4405879406729816, 2.515971773931635, 3.498595245475999, 3.2806015404762188,
    0, 2.722456833250876, 5.496504731156791, 6.252726875744174, 0,
    4.098520712454235, 20.84284094017656, 23.009964693357368
  ),
  p_value = c(
    1, 0.852514849292188, 0.019218949341118965, 1, 0.6005144639826616,
    0.6389666085886305, 0.8009385625517982, 0.004152361260663706,
    0.15612706651143315, 0.003826110982753214, 1, 0.34086828611885434,
    0.1760641006276458, 0.06004845140810552, 0.062451043246119525,
    0.0004003768235061839, 1, 0.8230904120221726, 0.41423830024367947,
    0.00875705139523374, 1, 0.012575710232363623, 0.00040948417655822014,
    0.06929230019089422, 1.936595465432012e-13, 1, 0.9295101479009097,
    0.3285884438638566, 0.21801198338904584, 0.010752726571772354, 1,
    0.18673382619559387, 0.005526696589432396, 0.0006374334411249112,
    0.00011720456520099901, 1, 0.0902320478216673, 0.10785907154033761,
    0.0005510855081592766, 4.348399555275052e-09, 1, 0.00019104640780832482,
    2.19120848940901e-05, 2.03893337885495e-05, 1.8033985782047306e-07, 1,
    0.028111010978579744, 0.0014558212298114914, 0.0019333206855010002,
    6.483039974388384e-05, 1, 0.01466337531542233, 0.01187046890443521,
    0.00046771600441410024, 0.0010358597091038562, 1, 0.006479849826805965,
    3.8739270628393594e-08, 4.033471760062014e-10, 1, 4.157990352063954e-05,
    1.7701583701819876e-96, 3.704764437784754e-117
  ),
  lwr = c(
    1, 0.24367715600341078, 1.0292599381972212, 1, 0.7252228585004926,
    0.586235908033007, 0.29300496659814207, 1.1093544153322326,
    0.8734119959888871, 1.1775823198514948, 1, 0.8028570811372586,
    0.9043140657189745, 0.9888436249589735, 0.9828899536894536,
    1.5255243781518248, 1, 0.6835480436331928, 0.7457111902735307,
    1.1368844512616407, 1, 0.6716800729903878, 0.4745722490287588,
    0.33585021021936473, 1.2425933146287218, 1, 0.6583727615149036,
    0.8956192214729547, 0.919883887061643, 1.0795995736797042, 1,
    0.5679462015981974, 0.4634080525899224, 0.37463032186735795,
    0.37588830767731246, 1, 0.9595524180683677, 0.5289289755252778,
    1.2825636496959223, 1.9393109796811518, 1, 2.0928111774206113,
    2.60734937349293, 2.639750883971089, 3.894804027068178, 1, 1.0250270217941126,
    1.1850809204433688, 1.2534966671910905, 1.99471615545096, 1,
    1.0880186823389362, 1.1033835873462692, 1.347955543470295, 1.3495363508098424,
    1, 1.1829621666049654, 1.841575522237812, 2.2180586223282983, 1,
    1.38129862008169, 9.319130500231545, 17.183514383836002
  ),
  upr = c(
    1, 5.516712238117854, 1.384873581627568, 1, 1.2041972737713005,
    2.3870888459894837, 4.903561738094752, 1.7381339047482132, 2.324735980655225,
    2.342367071815249, 1, 1.8862924626146595, 1.7310644191698927,
    1.7156852531436066, 1.971869996779993, 4.351781809059186, 1,
    1.6135144273980102, 1.128473718804895, 2.4328358649588253, 1,
    0.9533141202004918, 0.8077678758371987, 1.0424032809234791,
    1.4551403256198105, 1, 1.4650479447518308, 1.389992960728278,
    1.4418757890759486, 1.7953608581567542, 1, 1.1166796357325808,
    0.8760900715464749, 0.7666408417235587, 0.7272731481694007, 1,
    1.7630716428530586, 1.06491662717705, 2.463941172662909, 3.7675686765709426,
    1, 10.740312019042985, 13.506690739439877, 13.805332666504496,
    19.998002016440463, 1, 1.5469381521371337, 2.042404383073395,
    2.7262485813383694, 7.54798398562256, 1, 2.16605059978827, 2.2088186084954544,
    2.885093336992002, 3.2865830544496077, 1, 2.8074485340756543,
    3.6240699694241325, 4.591632262985475, 1, 2.497503411864645,
    14.813005872242064, 29.184504809012516
  ),
  sign_stars = c(
    "", "", "*", "", "", "", "", "**", "", "**", "", "", "", ".", ".", "***", "",
    "", "", "**", "", "*", "***", ".", "***", "", "", "", "", "*", "", "", "**",
    "***", "***", "", ".", "", "***", "***", "", "***", "***", "***", "***", "",
    "*", "**", "**", "***", "", "*", "*", "***", "**", "", "**", "***", "***", "",
    "***", "***", "***"
  ),
  row.names = 2:64
)

and this is the ggplot2 code I use

point_shape = 1

point_size = 2

outcome <- "Covid vaccination willingness or uptake:\nYes ref. no"

p <- ggplot(tt) + 
  geom_point(aes(x = estimate, y = coef),
             shape = point_shape,
             size = point_size) + 
  geom_vline(xintercept = 1, col = "black", linewidth = .2, linetype = 1) + 
  geom_errorbar(aes(x = estimate, y = coef, xmin = lwr, xmax = upr),
                linewidth = .5,
                width = 0) + 
  facet_grid(rows = vars(vars),
             scales = "free_y",
             space = "free_y",
             switch = "y") + 
  theme_minimal() +
  labs(title = paste0("Outcome: ", outcome),
       caption = "p-value: <0.001 ***; <0.01 **; <0.05 *; < 0.1 .") + 
  xlab(paste0("Estimate (", level*100, "% CI)")) + ylab("") +
  theme(
    # Pannelli delle strip
    strip.background = element_rect(fill = "white", color = "white"),
    strip.text = element_text(face = "bold", size = 9),
    strip.text.y.left = element_text(angle = 0, hjust = 0.5, vjust = 0.5),
    strip.placement = "outside",
    # Sfondo
    panel.background = element_rect(fill = "white", color = NA),
    plot.background = element_rect(fill = "white", color = NA),
    # Margini
    plot.margin = margin(1, 1, 1, 1))

What I would like to do is to put the name of the variables, which are the bold black text on the left of the picture, just right above the coefficients associated with them. The idea is to save horizontal space.

I tried to play with the options for faceting but couldn't make it myself. Thank you in advance!

2
  • 2
    I wonder if it would be better to re-think the design of the plot. That's an awful lot of text to include in a figure. Some of the questions run to three lines, and you will really struggle for vertical space even if you manage to implement your plan. Making the left labels a bit less wide might help (tt$vars <- stringr::str_wrap(gsub("\n", " ", tt$vars), 60) works better) Commented Jun 23 at 14:26
  • Your plot would look a lot better if the x-axis were on a log scale. Commented Jun 24 at 4:10

1 Answer 1

0

My first thought was the same as was already commented: this is never going to be easy to plot. I would thoroughly consider shortening all labels, for example by moving some of it into a footnote or similar.

That said, you can gain some flexibility by manually creating the lattice from individual plots. Here's one way to insert the group labels as subtitles into each panel:

  1. wrap your existing plot into a function, to be called for each group. I made the text and margins a bit smaller, removed the faceting, and note the direct xlim specification (you could make this more data-driven).
plt <- \(data, xlim) {
  ggplot(data) + 
    geom_point(aes(x = estimate, y = coef),
               shape = point_shape,
               size = point_size) + 
    geom_vline(xintercept = 1, col = "black", linewidth = .2, linetype = 1) + 
    geom_errorbar(aes(x = estimate, y = coef, xmin = lwr, xmax = upr),
                  linewidth = .5,
                  width = 0) + 
    xlim(xlim) +
    theme_minimal() +
    theme(
      axis.title.y=element_blank(),
      plot.title=element_text(size=10),
      plot.subtitle=element_text(size=8),
      axis.text.y=element_text(size=7),
      # Sfondo
      panel.background = element_rect(fill = "white", color = NA),
      plot.background = element_rect(fill = "white", color = NA),
      # Margini
      plot.margin = margin(0, 0, 1, 0))
}
  1. Create one of these plots for each group. The first and last get special treatment for titles/captions.
## You forgot to specify this?
level <- 0.95

## Remove line breaks from the group labels
tt$vars2 <- gsub("\\n", "", tt$vars)

## Get unique groups, and the number of coefficients (for relative plot heights)
groups <- unique(tt$vars2)
ncoef <- vapply(groups, \(g) length(unique(subset(tt, vars2==g)$coef)), numeric(1))

## Plot each group
plts <- list()

for (i in seq_along(groups)) {
  plts[[i]] <- plt(subset(tt, vars2==groups[i]), xlim=c(0, 30)) +
    labs(subtitle=groups[i])
  if (i == 1) { ## Add main title in first plot
    plts[[i]] <- plts[[i]] + labs(title=paste0("Outcome: ", outcome))
  }
  if (i < length(groups)) {
    plts[[i]] <- plts[[i]] + theme(axis.title.x=element_blank(), axis.text.x=element_blank())
  } else { ## Add x-axis and caption in last plot
    plts[[i]] <- plts[[i]] +
      labs(caption="p-value: <0.001 ***; <0.01 **; <0.05 *; < 0.1 .") +
      xlab(paste0("Estimate (", level*100, "% CI)"))
  }
}
  1. Stitch all of it together.
patchwork::wrap_plots(plts, ncol=1, heights=ncoef)

stitched forest plots

Again: I don't think this is a good plot, but if you really want that much text in there this is at least a bit better. I'll leave it as an exercise to put the group levels into a sensible order (e.g. ascending age or frequency).

Sign up to request clarification or add additional context in comments.

1 Comment

Thank you very much. That is very good and you were very kind.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.