0

This is a follow up to my earlier question, elegantly answered by the community.

I am using the same dataset as in the earlier question, given below:

topclones<- structure(list(CTaa_beta = c("CASSEGTSGGASTQYF", "CASSEGTSGGASTQYF", 
"CSVEDPSSGSYEQYF", "CASSVAGPNTEAFF", "CSARDPETYEQYF", "CASSVAGPNTEAFF", 
"CASSLGGLGTSTDTQYF", "CASSLRQGPSYEQYF", "CASSLGEYYGYTF", "CASSFGYTGELFF", 
"CASSVGNRGGTDTQYF", "CASSELAGGQETQYF", "CSVPAGYTDTQYF", "CASSPGTAVQGYTF", 
"CASSNWGGRGHTDTQYF", "CASRDSDGLAYQPQHF", "CSVPGTSTSGEQFF", "CASKPGTTSNQPQHF", 
"CASSYPTSGANVLTF", "CSASTGADYNEQFF", "CSARVPTSGDYNEQFF", "CASSYPTSGANVLTF", 
"CASRPEQGGPYEQYF", "CSARGGKENSPLHF", "CSVAGTGVYNEQFF", "CSVVPGGQGGYEQYF", 
"CASSLEGRERYEQFF", "CASSVGLFSTDTQYF", "CASSLRGGPYNEQFF", "CASSLLAGGNNEQFF", 
"CASSPLQGPSQPQHF", "CATSGRGDEVGELFF", "CSARAGESGRAMEQFF", "CASSLRQGPSYEQYF", 
"CASTPAVRDGNYEQYF", "CASSPSTGYNEQFF", "CASSSGGLDEQYF", "CASSQDRGTGANVLTF", 
"CASSFGTENTGELFF", "CATSGRGDEVGELFF", "CAWSVQSGGHEQYF", "CASSPGTAVQGYTF", 
"CSARGGKENSPLHF", "CASSDSGGAYNEQFF", "CASSQDSGSGANVLTF", "CASSAGLAGGYEQYF", 
"CASSSPGTTNEKLFF", "CASSLRGGPYNEQFF", "CASSQAKGGGETQYF"), Group = structure(c(6L, 
7L, 3L, 4L, 1L, 3L, 1L, 4L, 1L, 1L, 4L, 2L, 1L, 2L, 2L, 6L, 2L, 
4L, 7L, 2L, 7L, 6L, 7L, 7L, 4L, 4L, 4L, 7L, 6L, 4L, 7L, 6L, 1L, 
3L, 4L, 2L, 4L, 1L, 7L, 7L, 2L, 3L, 6L, 2L, 6L, 4L, 4L, 7L, 7L
), levels = c("HC PBMC", "axSpA PBMC", "axSpA SFMC", "InEx", 
"PD-1+ TIGIT+", "ReA PBMC", "ReA SFMC"), class = "factor"), n = c(441L, 
292L, 345L, 303L, 268L, 264L, 242L, 200L, 218L, 211L, 163L, 242L, 
166L, 225L, 223L, 59L, 209L, 125L, 53L, 177L, 48L, 44L, 46L, 
45L, 99L, 96L, 94L, 41L, 39L, 89L, 40L, 38L, 97L, 97L, 85L, 128L, 
80L, 88L, 36L, 35L, 117L, 84L, 30L, 106L, 29L, 67L, 66L, 29L, 
29L), Total_n_per_group = c(2770L, 2899L, 7226L, 6400L, 7081L, 
7226L, 7081L, 6400L, 7081L, 7081L, 6400L, 9999L, 7081L, 9999L, 
9999L, 2770L, 9999L, 6400L, 2899L, 9999L, 2899L, 2770L, 2899L, 
2899L, 6400L, 6400L, 6400L, 2899L, 2770L, 6400L, 2899L, 2770L, 
7081L, 7226L, 6400L, 9999L, 6400L, 7081L, 2899L, 2899L, 9999L, 
7226L, 2770L, 9999L, 2770L, 6400L, 6400L, 2899L, 2899L), Percent = c(15.92, 
10.07, 4.77, 4.73, 3.78, 3.65, 3.42, 3.12, 3.08, 2.98, 2.55, 
2.42, 2.34, 2.25, 2.23, 2.13, 2.09, 1.95, 1.83, 1.77, 1.66, 1.59, 
1.59, 1.55, 1.55, 1.5, 1.47, 1.41, 1.41, 1.39, 1.38, 1.37, 1.37, 
1.34, 1.33, 1.28, 1.25, 1.24, 1.24, 1.21, 1.17, 1.16, 1.08, 1.06, 
1.05, 1.05, 1.03, 1, 1)), row.names = c(NA, -49L), class = c("tbl_df", 
"tbl", "data.frame"))

However, I tweaked the codes a little bit since the set.seed and automatically defining of the point shapes aspects were giving me a bit of trouble:

library(dplyr)
library(scales)
library(colorspace)
library(ggplot2)
library(patchwork)



# Remove unused factor level (PD1+TIGIT+ in Group)
topclones$Group <- droplevels(topclones$Group)

# Create tibble defining unique point shapes for each duplicated value in CTaa_beta
styles <- topclones %>%
  filter(n() > 1, .by = CTaa_beta) %>%
  select(CTaa_beta, Group) %>%
  mutate(point_shape = setNames(sample(0:20, length(unique(CTaa_beta))), unique(CTaa_beta))[CTaa_beta])


# Sort topclones, join styles to topclones while matching Group to Group and CTaa_beta to CTaa_beta
topclones <- topclones |>
  arrange(Group, CTaa_beta) |>
  left_join(styles, by = join_by(Group, CTaa_beta))


# Generate min and max colour hues for each Group based on hue_pal()
colour1 <- darken(hue_pal()(length(unique(topclones$Group))), 0.5)
colour2 <- lighten(hue_pal()(length(unique(topclones$Group))), 0.5)


# Initialise colours column, loop through groups, assign colourramps
topclones$colours <- NA
ct <- 0
for (i in unique(topclones$Group)) {
  
  ct <- ct + 1
  n <- length(unique(topclones$CTaa_beta[topclones$Group == i]))
  groups <- unique(topclones$CTaa_beta[topclones$Group == i])
  group_colours <- setNames(colorRampPalette(c(colour1[ct], colour2[ct]))(n), groups)
  topclones$colours[topclones$Group == i] <- group_colours[topclones$CTaa_beta[topclones$Group == i]]
  
}

# Generate list of plots for each Group
p_tmp <- lapply(unique(topclones$Group), \(x) {
  
  tmp <- topclones |> filter(Group == x)
  
  ggplot(tmp) +
    geom_col(aes(CTaa_beta, Percent, fill = CTaa_beta),
             width = 1) +
    geom_point(aes(CTaa_beta, 0.25, shape = CTaa_beta),
               size = 2,
               stroke = 1,
               colour = "white") +
    labs(x = x) +
    scale_fill_manual(name = "", values = tmp$colours) +
    scale_shape_manual(name = "", values = tmp$point_shape)
  
})


# Get rounded up max summed Percent for y-axis (in case of > 1 CTaa_beta value per Group)
max_y <- ceiling(max(aggregate(Percent ~ Group + CTaa_beta,
                               data = topclones,
                               FUN = sum)$Percent))

# Get vector of Group lengths for plot widths
pw <- topclones |>
  summarise(n = n_distinct(CTaa_beta), .by = Group) |>
  pull(n)

# Plot
p <- wrap_plots(p_tmp,
                nrow = 1,
                axes = "collect_y") +
  plot_layout(widths = unit(pw, rep("cm", length(pw))),
              heights = unit(rep(16, length(pw)), rep("cm", length(pw)))) &
  geom_text(aes(CTaa_beta, Percent, label = Percent),
            vjust = -.5,
            position = position_dodge(1)) &
  scale_y_continuous(limits = c(0, max_y),
                     breaks = seq(0, max_y, length.out = 5)) &
  guides(fill = guide_legend(nrow = 10),
         shape = guide_legend(override.aes = list(size = 1, stroke = 0.75))) &
  ylab("Frequency of CTaa_beta (% total cells per group)") &
  coord_cartesian(expand = FALSE,
                  clip = "off") &
  theme_classic() &
  theme(axis.text = element_text(size = 18),
        axis.title = element_text(size = 18),
        axis.title.x = element_text(vjust = -1),
        axis.ticks.x = element_blank(),
        legend.text = element_text(size = 10),
        legend.position = "bottom",
        legend.justification = c(0.5, 0.5),
        legend.key.spacing.y = unit(0, "mm"),
        axis.text.x = element_blank())


suppressWarnings(
  ggsave("/Users/zoyaqaiyum/Downloads/gradient_colours_by_group_no_thatching.jpg",
         p,
         width = 30, 
         height = 12,
         dpi = 150)
  )

This produces the graph below, where you can see the legend is misaligned.

enter image description here

How do I align the legend, along with the symbols under the plot as was produced as an answer to my earlier question?

1 Answer 1

6

The issue is the setup of your shape (and color) palettes which prevent that the fill and shape legends get merged. In the code below I fix this by using named vectors of colors and shapes. Additionally I slightly refactored your plotting code:

library(dplyr)
library(scales)
library(colorspace)
library(ggplot2)
library(patchwork)

p_tmp <- topclones |>
  split(~Group) |>
  purrr::imap(\(x, y) {
    # Use named color and shape palettes
    pal_fill <- x |> 
      select(CTaa_beta, colours) |> 
      tibble::deframe()
    
    pal_shape <- x |> 
      select(CTaa_beta, point_shape) |> 
      tibble::deframe()
    
    ggplot(x) +
      geom_col(aes(CTaa_beta, Percent, fill = CTaa_beta),
        width = 1
      ) +
      geom_point(aes(CTaa_beta, 0.25, shape = CTaa_beta),
        size = 2,
        stroke = 1,
        colour = "white"
      ) +
      labs(x = y) +
      scale_fill_manual(name = NULL, values = pal_fill) +
      scale_shape_manual(name = NULL, values = pal_shape)
  })

enter image description here

Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.