This is a follow up to my earlier question, elegantly answered by the community.
I am using the same dataset as in the earlier question, given below:
topclones<- structure(list(CTaa_beta = c("CASSEGTSGGASTQYF", "CASSEGTSGGASTQYF",
"CSVEDPSSGSYEQYF", "CASSVAGPNTEAFF", "CSARDPETYEQYF", "CASSVAGPNTEAFF",
"CASSLGGLGTSTDTQYF", "CASSLRQGPSYEQYF", "CASSLGEYYGYTF", "CASSFGYTGELFF",
"CASSVGNRGGTDTQYF", "CASSELAGGQETQYF", "CSVPAGYTDTQYF", "CASSPGTAVQGYTF",
"CASSNWGGRGHTDTQYF", "CASRDSDGLAYQPQHF", "CSVPGTSTSGEQFF", "CASKPGTTSNQPQHF",
"CASSYPTSGANVLTF", "CSASTGADYNEQFF", "CSARVPTSGDYNEQFF", "CASSYPTSGANVLTF",
"CASRPEQGGPYEQYF", "CSARGGKENSPLHF", "CSVAGTGVYNEQFF", "CSVVPGGQGGYEQYF",
"CASSLEGRERYEQFF", "CASSVGLFSTDTQYF", "CASSLRGGPYNEQFF", "CASSLLAGGNNEQFF",
"CASSPLQGPSQPQHF", "CATSGRGDEVGELFF", "CSARAGESGRAMEQFF", "CASSLRQGPSYEQYF",
"CASTPAVRDGNYEQYF", "CASSPSTGYNEQFF", "CASSSGGLDEQYF", "CASSQDRGTGANVLTF",
"CASSFGTENTGELFF", "CATSGRGDEVGELFF", "CAWSVQSGGHEQYF", "CASSPGTAVQGYTF",
"CSARGGKENSPLHF", "CASSDSGGAYNEQFF", "CASSQDSGSGANVLTF", "CASSAGLAGGYEQYF",
"CASSSPGTTNEKLFF", "CASSLRGGPYNEQFF", "CASSQAKGGGETQYF"), Group = structure(c(6L,
7L, 3L, 4L, 1L, 3L, 1L, 4L, 1L, 1L, 4L, 2L, 1L, 2L, 2L, 6L, 2L,
4L, 7L, 2L, 7L, 6L, 7L, 7L, 4L, 4L, 4L, 7L, 6L, 4L, 7L, 6L, 1L,
3L, 4L, 2L, 4L, 1L, 7L, 7L, 2L, 3L, 6L, 2L, 6L, 4L, 4L, 7L, 7L
), levels = c("HC PBMC", "axSpA PBMC", "axSpA SFMC", "InEx",
"PD-1+ TIGIT+", "ReA PBMC", "ReA SFMC"), class = "factor"), n = c(441L,
292L, 345L, 303L, 268L, 264L, 242L, 200L, 218L, 211L, 163L, 242L,
166L, 225L, 223L, 59L, 209L, 125L, 53L, 177L, 48L, 44L, 46L,
45L, 99L, 96L, 94L, 41L, 39L, 89L, 40L, 38L, 97L, 97L, 85L, 128L,
80L, 88L, 36L, 35L, 117L, 84L, 30L, 106L, 29L, 67L, 66L, 29L,
29L), Total_n_per_group = c(2770L, 2899L, 7226L, 6400L, 7081L,
7226L, 7081L, 6400L, 7081L, 7081L, 6400L, 9999L, 7081L, 9999L,
9999L, 2770L, 9999L, 6400L, 2899L, 9999L, 2899L, 2770L, 2899L,
2899L, 6400L, 6400L, 6400L, 2899L, 2770L, 6400L, 2899L, 2770L,
7081L, 7226L, 6400L, 9999L, 6400L, 7081L, 2899L, 2899L, 9999L,
7226L, 2770L, 9999L, 2770L, 6400L, 6400L, 2899L, 2899L), Percent = c(15.92,
10.07, 4.77, 4.73, 3.78, 3.65, 3.42, 3.12, 3.08, 2.98, 2.55,
2.42, 2.34, 2.25, 2.23, 2.13, 2.09, 1.95, 1.83, 1.77, 1.66, 1.59,
1.59, 1.55, 1.55, 1.5, 1.47, 1.41, 1.41, 1.39, 1.38, 1.37, 1.37,
1.34, 1.33, 1.28, 1.25, 1.24, 1.24, 1.21, 1.17, 1.16, 1.08, 1.06,
1.05, 1.05, 1.03, 1, 1)), row.names = c(NA, -49L), class = c("tbl_df",
"tbl", "data.frame"))
However, I tweaked the codes a little bit since the set.seed and automatically defining of the point shapes aspects were giving me a bit of trouble:
library(dplyr)
library(scales)
library(colorspace)
library(ggplot2)
library(patchwork)
# Remove unused factor level (PD1+TIGIT+ in Group)
topclones$Group <- droplevels(topclones$Group)
# Create tibble defining unique point shapes for each duplicated value in CTaa_beta
styles <- topclones %>%
filter(n() > 1, .by = CTaa_beta) %>%
select(CTaa_beta, Group) %>%
mutate(point_shape = setNames(sample(0:20, length(unique(CTaa_beta))), unique(CTaa_beta))[CTaa_beta])
# Sort topclones, join styles to topclones while matching Group to Group and CTaa_beta to CTaa_beta
topclones <- topclones |>
arrange(Group, CTaa_beta) |>
left_join(styles, by = join_by(Group, CTaa_beta))
# Generate min and max colour hues for each Group based on hue_pal()
colour1 <- darken(hue_pal()(length(unique(topclones$Group))), 0.5)
colour2 <- lighten(hue_pal()(length(unique(topclones$Group))), 0.5)
# Initialise colours column, loop through groups, assign colourramps
topclones$colours <- NA
ct <- 0
for (i in unique(topclones$Group)) {
ct <- ct + 1
n <- length(unique(topclones$CTaa_beta[topclones$Group == i]))
groups <- unique(topclones$CTaa_beta[topclones$Group == i])
group_colours <- setNames(colorRampPalette(c(colour1[ct], colour2[ct]))(n), groups)
topclones$colours[topclones$Group == i] <- group_colours[topclones$CTaa_beta[topclones$Group == i]]
}
# Generate list of plots for each Group
p_tmp <- lapply(unique(topclones$Group), \(x) {
tmp <- topclones |> filter(Group == x)
ggplot(tmp) +
geom_col(aes(CTaa_beta, Percent, fill = CTaa_beta),
width = 1) +
geom_point(aes(CTaa_beta, 0.25, shape = CTaa_beta),
size = 2,
stroke = 1,
colour = "white") +
labs(x = x) +
scale_fill_manual(name = "", values = tmp$colours) +
scale_shape_manual(name = "", values = tmp$point_shape)
})
# Get rounded up max summed Percent for y-axis (in case of > 1 CTaa_beta value per Group)
max_y <- ceiling(max(aggregate(Percent ~ Group + CTaa_beta,
data = topclones,
FUN = sum)$Percent))
# Get vector of Group lengths for plot widths
pw <- topclones |>
summarise(n = n_distinct(CTaa_beta), .by = Group) |>
pull(n)
# Plot
p <- wrap_plots(p_tmp,
nrow = 1,
axes = "collect_y") +
plot_layout(widths = unit(pw, rep("cm", length(pw))),
heights = unit(rep(16, length(pw)), rep("cm", length(pw)))) &
geom_text(aes(CTaa_beta, Percent, label = Percent),
vjust = -.5,
position = position_dodge(1)) &
scale_y_continuous(limits = c(0, max_y),
breaks = seq(0, max_y, length.out = 5)) &
guides(fill = guide_legend(nrow = 10),
shape = guide_legend(override.aes = list(size = 1, stroke = 0.75))) &
ylab("Frequency of CTaa_beta (% total cells per group)") &
coord_cartesian(expand = FALSE,
clip = "off") &
theme_classic() &
theme(axis.text = element_text(size = 18),
axis.title = element_text(size = 18),
axis.title.x = element_text(vjust = -1),
axis.ticks.x = element_blank(),
legend.text = element_text(size = 10),
legend.position = "bottom",
legend.justification = c(0.5, 0.5),
legend.key.spacing.y = unit(0, "mm"),
axis.text.x = element_blank())
suppressWarnings(
ggsave("/Users/zoyaqaiyum/Downloads/gradient_colours_by_group_no_thatching.jpg",
p,
width = 30,
height = 12,
dpi = 150)
)
This produces the graph below, where you can see the legend is misaligned.
How do I align the legend, along with the symbols under the plot as was produced as an answer to my earlier question?

