In this section we apply ordinary kriging to predict the spatial distribution of zinc in Lake Rangsdorf based on sediment samples taken during a field survey in 2017.
We start with our data set lake_data_sf, an object of
class sf.
library(sf)
load(url("https://userpage.fu-berlin.de/soga/data/r-data/Lake_Rangsdorf.RData"))
str(lake_data_sf)
## Classes 'sf' and 'data.frame': 32 obs. of 23 variables:
## $ ID : chr "10a" "11a" "12a" "13b" ...
## $ depth_m : num 0.68 0.66 1.93 3.98 4.95 0.93 3.92 8.4 5.15 4 ...
## $ visual_depth_cm: int 34 28 15 15 18 20 18 18 15 15 ...
## $ elec_cond_muS : int 305 274 680 825 903 416 1060 1278 795 910 ...
## $ pH : num 7.19 7.33 7.25 7.22 7.05 7.31 6.82 7.02 6.99 7.01 ...
## $ LOI550 : num 1.04 1.27 27.32 36.93 38.33 ...
## $ LOI990 : num 0.2 0.19 17.46 18.25 15.5 ...
## $ LOI : num 98.5 98.3 33 21.6 26.4 ...
## $ C : num 0.85 0.93 19 24.4 23.7 1.79 24.7 23.1 22.8 25.4 ...
## $ N : num NA NA 1.18 1.85 1.89 NA 1.94 1.82 1.75 2.09 ...
## $ Ca_mg_g : num 9.83 3.78 172.12 182.74 193.42 ...
## $ Fe_mg_g : num 0.52 0.42 4.73 8.58 10.12 ...
## $ K_mg_g : num 0.22 0.13 0.34 0.43 0.5 0.14 0.55 0.64 0.41 0.52 ...
## $ Pb_mug_g : num 1.37 1.81 18.49 34.19 31 ...
## $ Mg_mg_g : num 0.15 0.13 1.41 1.47 1.42 0.2 1.46 1.45 1.35 1.44 ...
## $ Mn_mug_g : num 15.4 12 481.6 454.4 546.7 ...
## $ Na_mg_g : num 0.26 0.22 0.54 0.7 0.88 0.32 0.74 0.72 0.63 0.82 ...
## $ PO4_mg_g : num 0.3 0.37 4.83 4.93 5.31 1.18 5.34 5.97 4.83 5.51 ...
## $ S_mg_g : num 0.77 0.43 8.24 17.18 20.03 ...
## $ Sr_mug_g : num 7.87 6.77 238.28 261.05 245.53 ...
## $ Zi_mug_g : num 11.5 17.4 84.2 94.9 98.5 ...
## $ geometry :sfc_POINT of length 32; first list element: 'XY' num 391829 5794052
## $ dist : num 27.54 6.85 10.09 68.47 93.63 ...
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:22] "ID" "depth_m" "visual_depth_cm" "elec_cond_muS" ...
Here, we are only interested in the Zi_mug_g variable.
Hence we subset the data set. Note that we do not have to specify the
geometry column to be included. After subsetting to one
variable the object is still of class sf.
lake_zinc <- lake_data_sf[, "Zi_mug_g"]
str(lake_zinc)
## Classes 'sf' and 'data.frame': 32 obs. of 2 variables:
## $ Zi_mug_g: num 11.5 17.4 84.2 94.9 98.5 ...
## $ geometry:sfc_POINT of length 32; first list element: 'XY' num 391829 5794052
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA
## ..- attr(*, "names")= chr "Zi_mug_g"
For future usage we cast the sf object into a spatial
object of class Spatial.
library(sp)
lake_zinc_sp <- as(lake_zinc, "Spatial")
For the sake of reproducibility we first define a
formula object, that defines the dependent variable as a
linear model of independent variable.
# formula object
f <- formula(Zi_mug_g ~ 1)
Now, we build the sample variogram using the variogram()
function.
library(gstat)
ev <- variogram(f, lake_zinc_sp)
plot(ev)
We build a variogram model using the a self written best fit
algorithm function using the gstat package, and plot
it.
models <- c("Sph", "Exp", "Gau", "Ste")
fits <- lapply(models, function(m) {
try(fit.variogram(ev, vgm(psill = 100, model = m, range = 1000, nugget = 10)), silent = TRUE)
})
fits <- Filter(function(x) !inherits(x, "try-error"), fits)
rss <- sapply(fits, attr, "SSErr")
vm_best <- fits[[which.min(rss)]]
plot(ev, model = vm_best)
We evaluate the variogram model by applying leave-one-out
cross-validation (LOOCV), using the krige.cv()
command.
OK_LOOCV <- krige.cv(
formula = f,
locations = lake_zinc_sp,
model = vm_best
)
We plot the model residuals using the bubble() function.
We add the lake shore line to the plot. Note that we use the
:: notation to make sure we apply the layer()
function from the correct package namespace.
install.packages('latticeExtra')
## The following package(s) will be installed:
## - latticeExtra [0.6-30]
## These packages will be installed into "~/UNI/Tutorium/GitHub/soga/Soga-R/300/30900_geostatistics/renv/library/R-4.3/x86_64-w64-mingw32".
##
## # Installing packages --------------------------------------------------------
## - Installing latticeExtra ... OK [linked from cache]
## Successfully installed 1 package in 38 milliseconds.
library(latticeExtra)
## Lade nötiges Paket: lattice
library(sp)
bubble(OK_LOOCV,
"residual",
main = "Ordinary Kriging"
)+
latticeExtra::layer(sp.polygons(as(lake, "Spatial")))
Finally, we may use our model to predict the zinc concentration in
Lake Rangsdorf. Note that we could apply the krige()
function, a wrapper method around the gstat() and
predict() functions. This time however, we use another
handy function, the interpolate function from the
raster package.
First we build a gstat object using the
gstat() function.
g <- gstat(
formula = f,
data = lake_zinc_sp,
model = vm_best
)
Then, we use the predict function, which accepts a
SpatialPixelsDataFrame object and a model object.
library(gstat)
library(terra)
# Kriging-prediction
zinc_pred <- predict(g, newdata = lake_interpolation)
## [using ordinary kriging]
zinc<- rast(zinc_pred)
The resulting RasterLayer can be processed like any
other raster data set. Hence we mask out the area outside the lake
shoreline. Then we plot it using the generic plot()
function.
# masking
zinc <- mask(zinc, vect(lake))
plot(zinc, main = "Zinc concentration [µg/g]")
plot(vect(shoreline), add = TRUE)
plot(vect(lake_zinc), add = TRUE, pch = 16, cex = 0.75)
Exercise: Now it is your turn. Model the concentration of lead (
Pb_mug_g) in Lake Rangsdorf and visualize the residuals and results.
## Your code here...
# subset the data
lake_lead <- NULL
str(lake_lead)
# sp and formula abject
lake_lead_sp <- NULL
f <- NULL
# sample variogram
lead_ev <- NULL
plot(lead_ev)
# variogram model
lead_vm <- NULL
plot(lead_vm)
# model evaluation
lead_OK_LOOCV <- NULL
# interpolation
g <- NULL
lead <- NULL
# masking
lead <- NULL
# plotting
plot(lead,
main = expression(paste("Lead concentration Lake Rangsdorf [", mu, "g/g]"))
)
plot(shoreline$geometry, add = TRUE)
plot(lake_lead$geometry, add = TRUE, pch = 16, cex = 0.75)
install.packages('latticeExtra')
library(lattice)
library(sf)
library(sp)
library(gstat)
library(raster)
library(latticeExtra)
# 1. Subset the data
lake_lead <- lake_data_sf[, "Pb_mug_g"]
str(lake_lead)
# 2. sp object and formula
lake_lead_sp <- as(lake_lead, "Spatial")
f <- formula(Pb_mug_g ~ 1)
# 3. calculationg variogram
lead_ev <- variogram(f, lake_lead_sp)
plot(lead_ev)
#starting values
nugget_guess <- min(lead_ev$gamma)
psill_guess <- max(lead_ev$gamma) - nugget_guess
range_guess <- lead_ev$dist[which.max(lead_ev$gamma >= (max(lead_ev$gamma)*0.95))]
# 4. function for fitting models and calculationg residuals
fit_vgm_model <- function(model_name, variogram_data) {
start_model <- vgm(psill = psill_guess, model = model_name, range = range_guess, nugget = nugget_guess)
fit <- fit.variogram(variogram_data, model = start_model)
ssq <- attr(fit, "SSErr")
list(fit_model = fit, error = ssq)
}
# 5. testing various models (w/o Matern)
model_names <- c("Sph", "Exp", "Gau")
fits <- lapply(model_names, fit_vgm_model, variogram_data = lead_ev)
errors <- sapply(fits, function(x) x$error)
best_idx <- which.min(errors)
best_fit <- fits[[best_idx]]$fit_model
best_model_name <- model_names[best_idx]
cat("Bestes Modell ohne Matern:", best_model_name, "\n")
plot(lead_ev, best_fit)
# 6. Matern-model with kappa-optimisation
kappas <- seq(0.1, 2, by=0.1)
errors_kappa <- numeric(length(kappas))
fits_kappa <- vector("list", length(kappas))
for(i in seq_along(kappas)) {
mod <- vgm(psill = psill_guess, model = "Mat", range = range_guess, nugget = nugget_guess, kappa = kappas[i])
fit <- fit.variogram(lead_ev, model = mod)
errors_kappa[i] <- attr(fit, "SSErr")
fits_kappa[[i]] <- fit
}
best_kappa_idx <- which.min(errors_kappa)
best_matern_fit <- fits_kappa[[best_kappa_idx]]
best_kappa <- kappas[best_kappa_idx]
cat("best Matern kappa:", best_kappa, "\n")
plot(lead_ev, best_matern_fit)
# 7. compare Matern-model with best fit
if(attr(best_matern_fit, "SSErr") < attr(best_fit, "SSErr")) {
lead_vm <- best_matern_fit
cat("Matern-model ist being used.\n")
} else {
lead_vm <- best_fit
cat("No better model found, remain with previous model.\n")
}
# 8. Modell-Evaluation mit krige.cv()
lead_OK_LOOCV <- krige.cv(
formula = f,
locations = lake_lead_sp,
model = lead_vm
)
bubble(lead_OK_LOOCV,
"residual",
main = "Ordinary Kriging"
)+
latticeExtra::layer(sp.polygons(as(lake, "Spatial")))
# 9. preparing interpolation
g <- gstat(
formula = f,
data = lake_lead_sp,
model = lead_vm
)
lead <- interpolate(
object = raster(lake_interpolation),
model = g
)
# 10. Masking & Plot
lead <- mask(lead, as(lake, "Spatial"))
plot(lead,
main = expression(paste("Lead concentration Lake Rangsdorf [", mu, "g/g]"))
)
plot(shoreline$geometry, add = TRUE)
plot(lake_lead$geometry, add = TRUE, pch = 16, cex = 0.75)
Citation
The E-Learning project SOGA-R was developed at the Department of Earth Sciences by Kai Hartmann, Joachim Krois and Annette Rudolph. You can reach us via mail by soga[at]zedat.fu-berlin.de.
Please cite as follow: Hartmann, K., Krois, J., Rudolph, A. (2023): Statistics and Geodata Analysis using R (SOGA-R). Department of Earth Sciences, Freie Universitaet Berlin.