In this section we apply ordinary kriging to predict the spatial distribution of zinc in Lake Rangsdorf based on sediment samples taken during a field survey in 2017.
We start with our data set lake_data_sf
, an object of
class sf
.
library(sf)
load(url("https://userpage.fu-berlin.de/soga/data/r-data/Lake_Rangsdorf.RData"))
str(lake_data_sf)
## Classes 'sf' and 'data.frame': 32 obs. of 23 variables:
## $ ID : chr "10a" "11a" "12a" "13b" ...
## $ depth_m : num 0.68 0.66 1.93 3.98 4.95 0.93 3.92 8.4 5.15 4 ...
## $ visual_depth_cm: int 34 28 15 15 18 20 18 18 15 15 ...
## $ elec_cond_muS : int 305 274 680 825 903 416 1060 1278 795 910 ...
## $ pH : num 7.19 7.33 7.25 7.22 7.05 7.31 6.82 7.02 6.99 7.01 ...
## $ LOI550 : num 1.04 1.27 27.32 36.93 38.33 ...
## $ LOI990 : num 0.2 0.19 17.46 18.25 15.5 ...
## $ LOI : num 98.5 98.3 33 21.6 26.4 ...
## $ C : num 0.85 0.93 19 24.4 23.7 1.79 24.7 23.1 22.8 25.4 ...
## $ N : num NA NA 1.18 1.85 1.89 NA 1.94 1.82 1.75 2.09 ...
## $ Ca_mg_g : num 9.83 3.78 172.12 182.74 193.42 ...
## $ Fe_mg_g : num 0.52 0.42 4.73 8.58 10.12 ...
## $ K_mg_g : num 0.22 0.13 0.34 0.43 0.5 0.14 0.55 0.64 0.41 0.52 ...
## $ Pb_mug_g : num 1.37 1.81 18.49 34.19 31 ...
## $ Mg_mg_g : num 0.15 0.13 1.41 1.47 1.42 0.2 1.46 1.45 1.35 1.44 ...
## $ Mn_mug_g : num 15.4 12 481.6 454.4 546.7 ...
## $ Na_mg_g : num 0.26 0.22 0.54 0.7 0.88 0.32 0.74 0.72 0.63 0.82 ...
## $ PO4_mg_g : num 0.3 0.37 4.83 4.93 5.31 1.18 5.34 5.97 4.83 5.51 ...
## $ S_mg_g : num 0.77 0.43 8.24 17.18 20.03 ...
## $ Sr_mug_g : num 7.87 6.77 238.28 261.05 245.53 ...
## $ Zi_mug_g : num 11.5 17.4 84.2 94.9 98.5 ...
## $ geometry :sfc_POINT of length 32; first list element: 'XY' num 391829 5794052
## $ dist : num 27.54 6.85 10.09 68.47 93.63 ...
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:22] "ID" "depth_m" "visual_depth_cm" "elec_cond_muS" ...
Here, we are only interested in the Zi_mug_g
variable.
Hence we subset the data set. Note that we do not have to specify the
geometry
column to be included. After subsetting to one
variable the object is still of class sf
.
lake_zinc <- lake_data_sf[, "Zi_mug_g"]
str(lake_zinc)
## Classes 'sf' and 'data.frame': 32 obs. of 2 variables:
## $ Zi_mug_g: num 11.5 17.4 84.2 94.9 98.5 ...
## $ geometry:sfc_POINT of length 32; first list element: 'XY' num 391829 5794052
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA
## ..- attr(*, "names")= chr "Zi_mug_g"
For future usage we cast the sf
object into a spatial
object of class sp
.
library(sp)
lake_zinc_sp <- as(lake_zinc, "Spatial")
For the sake of reproducibility we first define a
formula
object, that defines the dependent variable as a
linear model of independent variable.
# formula object
f <- formula(Zi_mug_g ~ 1)
Now, we build the sample variogram using the variogram()
function.
library(gstat)
ev <- variogram(f, lake_zinc_sp)
plot(ev)
We build a variogram model using the autofitVariogram()
function from the automap
package, and plot it.
library(automap)
vm <- autofitVariogram(f, lake_zinc_sp)
plot(vm)
We evaluate the variogram model by applying leave-one-out
cross-validation (LOOCV), using the krige.cv()
command.
OK_LOOCV <- krige.cv(
formula = f,
locations = lake_zinc_sp,
model = vm$var_model
)
We plot the model residuals using the bubble()
function.
We add the lake shore line to the plot. Note that we use the
::
notation to make sure we apply the layer()
function from the correct package namespace.
bubble(OK_LOOCV,
"residual",
main = "Ordinary Kriging"
) +
latticeExtra::layer(sp.polygons(as(lake, "Spatial")))
Finally, we may use our model to predict the zinc concentration in
Lake Rangsdorf. Note that we could apply the krige()
function, a wrapper method around the gstat()
and
predict()
functions. This time however, we use another
handy function, the interpolate
function from the
raster
package.
First we build a gstat
object using the
gstat()
function.
g <- gstat(
formula = f,
data = lake_zinc_sp,
model = vm$var_model
)
Then, we use the interpolate
function, which accepts a
Raster
object and a model object. Note that we first cast
the the variable lake.interpolation
, an object of class
SpatialPixelsDataFrame
to a RasterLayer
object
by applying the raster
function.
library(raster)
zinc <- interpolate(
object = raster(lake_interpolation),
model = g
)
## [using ordinary kriging]
The resulting RasterLayer
can be processed like any
other raster data set. Hence we mask out the area outside the lake
shoreline. Then we plot it using the generic plot()
function. However, please note that we could as well have used all of
the functionality provided by the raster
and
rasterVis
package for advanced visualizations.
# masking
zinc <- mask(zinc, as(lake, "Spatial"))
# plotting
plot(zinc,
main = expression(paste("Zinc concentration Lake Rangsdorf [", mu, "g/g]"))
)
plot(shoreline$geometry, add = TRUE)
plot(lake_zinc$geometry, add = TRUE, pch = 16, cex = 0.75)
Exercise: Now it is your turn. Model the concentration of lead (
Pb_mug_g
) in Lake Rangsdorf and visualize the residuals and results.
## Your code here...
# subset the data
lake_lead <- NULL
str(lake_lead)
# sp and formula abject
lake_lead_sp <- NULL
f <- NULL
# sample variogram
lead_ev <- NULL
plot(lead_ev)
# variogram model
lead_vm <- NULL
plot(lead_vm)
# model evaluation
lead_OK_LOOCV <- NULL
# interpolation
g <- NULL
lead <- NULL
# masking
lead <- NULL
# plotting
plot(lead,
main = expression(paste("Lead concentration Lake Rangsdorf [", mu, "g/g]"))
)
plot(shoreline$geometry, add = TRUE)
plot(lake_lead$geometry, add = TRUE, pch = 16, cex = 0.75)
# subset the data
lake_lead <- lake_data_sf[, "Pb_mug_g"]
str(lake_lead)
# sp and formula abject
lake_lead_sp <- as(lake_lead, "Spatial")
f <- formula(Pb_mug_g ~ 1)
# sample variogram
lead_ev <- variogram(f, lake_lead_sp)
plot(lead_ev)
# variogram model
lead_vm <- autofitVariogram(f, lake_lead_sp)
plot(lead_vm)
# model evaluation
lead_OK_LOOCV <- krige.cv(
formula = f,
locations = lake_lead_sp,
model = lead_vm$var_model
)
bubble(lead_OK_LOOCV,
"residual",
main = "Ordinary Kriging"
) +
latticeExtra::layer(sp.polygons(as(lake, "Spatial")))
# interpolation
g <- gstat(
formula = f,
data = lake_lead_sp,
model = lead_vm$var_model
)
lead <- interpolate(
object = raster(lake_interpolation),
model = g
)
# masking
lead <- mask(lead, as(lake, "Spatial"))
# plotting
plot(lead,
main = expression(paste("Lead concentration Lake Rangsdorf [", mu, "g/g]"))
)
plot(shoreline$geometry, add = TRUE)
plot(lake_lead$geometry, add = TRUE, pch = 16, cex = 0.75)
Citation
The E-Learning project SOGA-R was developed at the Department of Earth Sciences by Kai Hartmann, Joachim Krois and Annette Rudolph. You can reach us via mail by soga[at]zedat.fu-berlin.de.
Please cite as follow: Hartmann, K., Krois, J., Rudolph, A. (2023): Statistics and Geodata Analysis using R (SOGA-R). Department of Earth Sciences, Freie Universitaet Berlin.