7. Analyse 18S data (genus-level Bray–Curtis)
Repeat the workflow using 18S data to demonstrate genus-level community analyses and PCoA.
Load 18S datasets
Set the path to the downloaded 18S data and load it:
data_path <- "/srv/course-data/18S"
# or, for data downloaded in Option B:
# data_path <- "local/path/to/course-data/18S"
list.files(data_path)
# this should list "KTH-2013-Baltic-18S.zip" and "PRJEB55296-18S.zip"
loaded <- load_data(data_path)Merge datasets and convert to data frames
merged <- merge_data(loaded)
merged_df <- convert_to_df(merged, convert_counts = TRUE, max_cells = 1e9)Aggregate taxa and convert to data frames
cladecounts <- sum_by_clade(merged$counts, merged$asvs)
cladecounts_df <- convert_to_df(cladecounts)Plot data on map
We can plot the datasets on the map using the plot_map function defined in 3:
# Identify datasets
DS_2013 <- grep("KTH-2013-Baltic", rownames(merged_df$events))
DS_2019_2020 <- grep("PRJEB55296", rownames(merged_df$events))
# Extract season/salinity again (adjust column names if needed)
lat <- merged_df$events$decimalLatitude
lon <- merged_df$events$decimalLongitude
month <- month(merged_df$events$eventDate)
yday <- yday(merged_df$events$eventDate)
salinity <- rep(NA, nrow(merged_df$emof))
salinity[DS_2013] <- as.numeric(merged_df$emof$`salinity (psu)`[DS_2013])
salinity[DS_2019_2020] <- as.numeric(merged_df$emof$`salinity_average (psu)`[DS_2019_2020])
# Set plotting symbols & colour scale
pch <- rep(NA, nrow(merged_df$events))
pch[DS_2013] <- 21
pch[DS_2019_2020] <- 22
color_yday <- colorRampPalette(
c("#2c7fb8", "#addd8e", "#edf8b1", "#fa9fb5", "#2c7fb8")
)(366)
plot_map(DS_2013)
plot_map(DS_2019_2020)Check primers
unique(merged_df$events$pcr_primer_name_forward[DS_2013])
unique(merged_df$events$pcr_primer_name_forward[DS_2019_2020])
unique(merged_df$events$pcr_primer_name_reverse[DS_2013])
unique(merged_df$events$pcr_primer_name_reverse[DS_2019_2020])Different primers were used in the two datasets!
Bray–Curtis distances on genus composition
Since different primers were used, ASVs will likely not be shared and calculating Bray-Curtis based on ASVs will not give meaningful information (distances will be 1 between all cross-dataset sample pairs). Instead we calculate it based on genus-level counts.
bray_dist <- as.matrix(vegdist(t(cladecounts_df$norm$genus), method = "bray"))(Note that cladecounts$norm already provides normalised counts so we don’t need to do normalisation.)
Run PCoA
pcoa <- pcoa(bray_dist, correction = "cailliez")Plot PCoA (genus level)
This plot uses the same encoding as in the 16S PCoA section.
layout(matrix(c(1,1,1,1,2,2,2,2,3,4), 5, 2, byrow = TRUE))
par(mar = c(5, 5, 1, 1), xpd = TRUE, cex.axis = 1)
xlab <- paste("PC1 ", round(pcoa$values$Eigenvalues[1]), "%", sep = "")
ylab <- paste("PC2 ", round(pcoa$values$Eigenvalues[2]), "%", sep = "")
plot(pcoa$vectors[,1], pcoa$vectors[,2],
pch = pch, col = "white", bg = "white",
xlab = xlab, ylab = ylab, main = "2019–2020 (18S, genus)")
points(pcoa$vectors[DS_2019_2020,1], pcoa$vectors[DS_2019_2020,2],
pch = pch[DS_2019_2020], col = "black",
bg = color_yday[yday[DS_2019_2020]],
cex = 1.0 + as.numeric(salinity[DS_2019_2020]) / 10)
plot(pcoa$vectors[,1], pcoa$vectors[,2],
pch = pch, col = "white", bg = "white",
xlab = xlab, ylab = ylab, main = "2013 (18S, genus)")
points(pcoa$vectors[DS_2013,1], pcoa$vectors[DS_2013,2],
pch = pch[DS_2013], col = "black",
bg = color_yday[yday[DS_2013]],
cex = 1.0 + as.numeric(salinity[DS_2013]) / 10)
plot(1:365, rep(1, 365), col = color_yday, pch = "|", cex = 3, axes = FALSE, ylim = c(0.9, 1.3))
axis(1, at = c(1, 182, 365), labels = c("12", "182", "365"), cex = 3)
text(182, 1.2, "Day of year", cex = 2)
plot(c(0, 10, 20), rep(1, 3),
col = "black", pch = 1, cex = c(1.6, 2.4, 3.2),
xlim = c(-5, 25), ylim = c(0.9, 1.3), axes = FALSE, ylab = "", xlab = "")
axis(1, at = c(0, 10, 20), labels = c("2", "18", "34"), cex = 3)
text(10, 1.2, "Salinity", cex = 2)Make barplot
We can reuse the plot_barplot function defined in 5 to make taxonomic barplots. E.g.:
plot_barplots(rank = 4, top_x = 8)