7. Analyse 18S data (genus-level Bray–Curtis)

Repeat the workflow using 18S data to demonstrate genus-level community analyses and PCoA.

Load 18S datasets

Set the path to the downloaded 18S data and load it:

data_path <- "/srv/course-data/18S"  
# or, for data downloaded in Option B:
# data_path <- "local/path/to/course-data/18S"

list.files(data_path)
# this should list "KTH-2013-Baltic-18S.zip" and "PRJEB55296-18S.zip"

loaded <- load_data(data_path)

Merge datasets and convert to data frames

merged <- merge_data(loaded)
merged_df <- convert_to_df(merged, convert_counts = TRUE, max_cells = 1e9)

Aggregate taxa and convert to data frames

cladecounts <- sum_by_clade(merged$counts, merged$asvs)
cladecounts_df <- convert_to_df(cladecounts)

Plot data on map

We can plot the datasets on the map using the plot_map function defined in 3:

# Identify datasets
DS_2013 <- grep("KTH-2013-Baltic", rownames(merged_df$events))
DS_2019_2020 <- grep("PRJEB55296", rownames(merged_df$events))

# Extract season/salinity again (adjust column names if needed)
lat <- merged_df$events$decimalLatitude
lon <- merged_df$events$decimalLongitude
month <- month(merged_df$events$eventDate)
yday <- yday(merged_df$events$eventDate)

salinity <- rep(NA, nrow(merged_df$emof))
salinity[DS_2013] <- as.numeric(merged_df$emof$`salinity (psu)`[DS_2013])
salinity[DS_2019_2020] <- as.numeric(merged_df$emof$`salinity_average (psu)`[DS_2019_2020])

# Set plotting symbols & colour scale
pch <- rep(NA, nrow(merged_df$events))
pch[DS_2013] <- 21
pch[DS_2019_2020] <- 22

color_yday <- colorRampPalette(
  c("#2c7fb8", "#addd8e", "#edf8b1", "#fa9fb5", "#2c7fb8")
)(366)

plot_map(DS_2013)
plot_map(DS_2019_2020)

Check primers

unique(merged_df$events$pcr_primer_name_forward[DS_2013])
unique(merged_df$events$pcr_primer_name_forward[DS_2019_2020])

unique(merged_df$events$pcr_primer_name_reverse[DS_2013])
unique(merged_df$events$pcr_primer_name_reverse[DS_2019_2020])

Different primers were used in the two datasets!


Bray–Curtis distances on genus composition

Since different primers were used, ASVs will likely not be shared and calculating Bray-Curtis based on ASVs will not give meaningful information (distances will be 1 between all cross-dataset sample pairs). Instead we calculate it based on genus-level counts.

bray_dist <- as.matrix(vegdist(t(cladecounts_df$norm$genus), method = "bray"))

(Note that cladecounts$norm already provides normalised counts so we don’t need to do normalisation.)

Run PCoA

pcoa <- pcoa(bray_dist, correction = "cailliez")

Plot PCoA (genus level)

This plot uses the same encoding as in the 16S PCoA section.


layout(matrix(c(1,1,1,1,2,2,2,2,3,4), 5, 2, byrow = TRUE))
par(mar = c(5, 5, 1, 1), xpd = TRUE, cex.axis = 1)

xlab <- paste("PC1 ", round(pcoa$values$Eigenvalues[1]), "%", sep = "")
ylab <- paste("PC2 ", round(pcoa$values$Eigenvalues[2]), "%", sep = "")

plot(pcoa$vectors[,1], pcoa$vectors[,2],
     pch = pch, col = "white", bg = "white",
     xlab = xlab, ylab = ylab, main = "2019–2020 (18S, genus)")
points(pcoa$vectors[DS_2019_2020,1], pcoa$vectors[DS_2019_2020,2],
       pch = pch[DS_2019_2020], col = "black",
       bg = color_yday[yday[DS_2019_2020]],
       cex = 1.0 + as.numeric(salinity[DS_2019_2020]) / 10)

plot(pcoa$vectors[,1], pcoa$vectors[,2],
     pch = pch, col = "white", bg = "white",
     xlab = xlab, ylab = ylab, main = "2013 (18S, genus)")
points(pcoa$vectors[DS_2013,1], pcoa$vectors[DS_2013,2],
       pch = pch[DS_2013], col = "black",
       bg = color_yday[yday[DS_2013]],
       cex = 1.0 + as.numeric(salinity[DS_2013]) / 10)

plot(1:365, rep(1, 365), col = color_yday, pch = "|", cex = 3, axes = FALSE, ylim = c(0.9, 1.3))
axis(1, at = c(1, 182, 365), labels = c("12", "182", "365"), cex = 3)
text(182, 1.2, "Day of year", cex = 2)

plot(c(0, 10, 20), rep(1, 3),
     col = "black", pch = 1, cex = c(1.6, 2.4, 3.2),
     xlim = c(-5, 25), ylim = c(0.9, 1.3), axes = FALSE, ylab = "", xlab = "")
axis(1, at = c(0, 10, 20), labels = c("2", "18", "34"), cex = 3)
text(10, 1.2, "Salinity", cex = 2)

Make barplot

We can reuse the plot_barplot function defined in 5 to make taxonomic barplots. E.g.:

plot_barplots(rank = 4, top_x = 8)

← Previous · Overview