1 Example with fish data from SERS
In this example we are interested in exploring data from a specific data resource – the Swedish Electrofishing Registry - SERS (Department of Aquatic Resources, SLU Aqua). This database has 2.8 M observations starting in the 1950’s.
SBDI is a collection of many biodiversity databases. We start by searching for the data resource we are interested in by using the function pick_filter()
. This is an interactive query guiding you through the many resources available to filtering your query (data resources, spatial layers, and curated species lists).
library(SBDI4R)
<- pick_filter("resource")
fq_str # follow the instructions
Follow the instructions. Your choices here would have been “in3” :arrow_right: “dr10” (data resource 10 = SERS). Your variable fq_str
will now contain a string “data_resource_uid:dr10.”
But we are not interested in the complete database, we only want to look at the data from the last 10 years. For this we concatenate (add to a vector) another filter string. Both filter strings (for data resource and for time period) will be treated as AND factors.
<- 2008
y1 <- 2012
y2 <- c(fq_str, paste0("year:[", y1, " TO ", y2,"]"))
fq_str # Note the square brackets are hard limits
For references on how to use the filters see the SBDI APIs documentation.
Using the function occurrences()
we can now query for the observations fulfilling our filter. If you haven’t specified your email and the download reason in the sbdi_config()
before, you need to pass this here.
<- occurrences(fq = fq_str,
xf email = "sbdi4r-test@biodiversitydata.se",
download_reason_id = 10)
# Remove what is not a species
$data <- xf$data[xf$data$rank == "species",]
xf
# Simply summarise all records by data source
table(xf$data$dataResourceName)
##
## SLU Aqua Institute of Freshwater Research Swedish Electrofishing Registry - SERS
## 93200
1.1 Plotting data on a map
You can quickly plot all the observations as a PDF file with the function ocurrence_plot()
, one page per species:
occurrences_plot(xf, "obsPlot.pdf",
grouped = FALSE,
taxon_level = "species",
pch='.')
Note that the plot is saved to a .pdf file in the current working directory. You can find that with getwd()
.
There are many other ways of producing spatial plots in R. The leaflet package provides a simple method of producing browser-based maps with panning, zooming, and background layers:
library(leaflet)
# drop any records with missing lat/lon values
<- xf$data[!is.na(xf$data$longitude) | !is.na(xf$data$latitude),]
xfl <- rep("#d95f02", nrow(xfl))
marker_colour # blank map, with imagery background
leaflet(width = "100%") %>%
addProviderTiles("Esri.WorldImagery") %>%
# add markers
addCircleMarkers(xfl$longitude, xfl$latitude,
radius = 1,
fillOpacity = .5,
opacity = 1,
col = marker_colour,
clusterOptions = markerClusterOptions())
1.2 Temporal summary
A quick summary over the years reveals a drop in number of records over time.
table(xf$data$year)
##
## 2008 2009 2010 2011 2012
## 17757 19300 19643 16853 19647
hist(xf$data$year,
breaks = seq(y1, y2),
xlab = "Year",
main = "")
1.3 Species summary
In the same way we can summarise the number of observations for each species, by common or scientific name.
<- table(xf$data$commonName)
sppTab <- as.data.frame(sppTab)
sppDF colnames(sppDF)[1] <- "species"
head(sppDF)
## species Freq
## 1 61
## 2 Alpine bullhead 4615
## 3 American burbot 7081
## 4 Aral asp 6
## 5 Arctic char 46
## 6 aurora trout 856
<- table(xf$data$scientificName)
sppTab <- as.data.frame(sppTab)
sppDF colnames(sppDF)[1] <- "species"
head(sppDF)
## species Freq
## 1 Abramis brama (Linnaeus, 1758) 61
## 2 Alburnus alburnus (Linnaeus, 1758) 660
## 3 Anguilla anguilla (Linnaeus, 1758) 2140
## 4 Astacus astacus (Linnaeus, 1758) 618
## 5 Barbatula barbatula (Linnaeus, 1758) 620
## 6 Blicca bjoerkna (Linnaeus, 1758) 74
Perhaps, you want to send this table as a .CSV file to a colleague. Save the table:
write.csv(sppDF, "SERS_species_summary.csv")
# NOTE: again this will be saved on your working directory
1.4 Spatial biodiversity analysis
Let’s now ask: How does the species richness vary across Sweden?
For this we want to summarise occurrences species-wise over a defined grid instead of plotting every observation point. First we need to overlay the observations with a grid. Here we are using the standard Swedish grids with grid square size of 50, 25, 10 or 5 km provided as data in the SBDI4R package (with Coordinate Reference System = WGS84, EPSG:4326).
library(sf) # the function coordinates() and proj4string() are in sp
# load some shapes over Sweden's political borders
data("swe_wgs84", package = "SBDI4R", envir = environment())
# a standard 50 km grid
data("Sweden_Grid_50km_Wgs84", package = "SBDI4R", envir = environment())
<- Sweden_Grid_50km_Wgs84
grid
# make the observations spatial
# NOTE: make sure there are no NAs in the columns defining the coordinates
# xf$data[!is.na(xf$data$longitude) | !is.na(xf$data$latitude),]
<- st_as_sf(as.data.frame(xf$data),
obs coords = c("longitude","latitude"),
crs = st_crs(4326))
# overlay the occurrence data with the grid
<- st_intersects(grid, obs)
ObsInGridListID <- lapply(ObsInGridListID, function(x) st_drop_geometry(obs[x,]))
ObsInGridList <- unname( which( unlist(lapply(ObsInGridList, nrow)) != 0) ) wNonEmpty
The result ObsInGridList
is a list
object with a subset of the data for each grid cell. Now summarise occurrences within grid cells:
# check n the total number of observations
sum(unlist(lapply(ObsInGridList, nrow)))
## [1] 93200
# apply a summary over the grid cells
<- length(ObsInGridList)
nCells <- data.frame("nObs" = as.numeric(rep(NA,nCells)),
res "nYears" = as.numeric(rep(NA,nCells)),
"nSpp" = as.numeric(rep(NA,nCells)),
row.names = row.names(grid),
stringsAsFactors = FALSE)
<- c("scientificName", "year")
cols2use <- lapply(ObsInGridList[wNonEmpty],
dataRes function(x){
<- x[,cols2use]
x colnames(x) <- c("scientificName", "year")
return(c("nObs" = length(x[,"scientificName"]),
"nYears" = length(unique(x[,"year"])),
"nSpp" = length(unique(x[,"scientificName"]))
)
)
}
)<- as.data.frame(dplyr::bind_rows(dataRes, .id = "gridID"))
dataRes <- dataRes[,-1]
res[wNonEmpty,] <- st_as_sf(data.frame(res, st_geometry(grid))) resSf
And finally plot the grid summary as a map:
<- leaflet::colorNumeric(c("white", "navyblue"),
palBW c(0, max(resSf$nSpp, na.rm = TRUE)),
na.color = "transparent")
<- par()
oldpar par(mar = c(1,1,0,0))
plot(resSf$geometry, col = palBW(resSf$nSpp), border = NA)
plot(swe_wgs84$Border$geometry, border = 1, lwd = 1, add = T)
legend("bottomleft",
legend = round(seq(0, max(resSf$nSpp, na.rm = TRUE), length.out = 5)),
col = palBW(seq(0, max(resSf$nSpp, na.rm = TRUE), length.out = 5)),
title = "Number of \nspecies", pch = 15, bty="n")
par(oldpar)
We may now ask whether species richness varies across latitude. So we go further by arranging the observations by latitude:
library(dplyr)
library(tidyr)
<- xf$data %>%
xgridded mutate(longitude = round(longitude * 4)/4,
latitude = round(latitude * 4)/4) %>%
group_by(longitude,latitude) %>%
## subset to vars of interest
select(longitude, latitude, species) %>%
## take one row per cell per species (presence)
distinct() %>%
## calculate species richness
mutate(richness = n()) %>%
## convert to wide format (sites by species)
mutate(present = 1) %>%
do(tidyr::pivot_wider(data = .,
names_from = species,
values_from = present,
values_fill = 0)) %>%
ungroup()
## where a species was not present, it will have NA: convert these to 0
<- setdiff(names(xgridded),
sppcols c("longitude", "latitude", "richness"))
<- xgridded %>%
xgridded mutate_at(sppcols, function(z) ifelse(is.na(z), 0, z))
And plot it accordingly:
library(ggplot2)
ggplot(xgridded, aes(latitude, richness)) +
labs(x = "Latitude (Âş)",
y = "Species richness") +
lims(y = c(0,20)) +
geom_point() +
theme_bw()