# Import modules
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist
91 Simple kriging
Simple kriging assumes that the mean of the random variable is known and constant across the entire study area. This is a strong assumption, that may not be valid for complex spatial datasets. The predictions made by simple kriging are based on take into account the difference between observed values and the global mean, weighting these differences based on the spatial correlation using a semivariogram, to then predict values at unsampled locations.
The main assumption of simple kriging is that the random variable (detrended or not) is stationary.
def spherical_model(h,n,s,r):
"""Function that implements a spehrical semivariogram model."""
# Calculate the semivariance (g is for gamma)
= n + (s-n)*( (3*h)/(2*r) - 1/2*(h/r)**3)
g >r] = s
g[h= np.maximum(g, 0)
g return g
= 0
nugget = 0.78
sill = 4141 range_
92 Example for a single point
Before we do any spatial interpolation, let’s learn about the inner workings of simple kriging. More advanced kriging methods stem from the same principles, albeits some minor modifications to capture local variability (ordinary kriging) or systematic trends (regression kriging)
Define observation dataset
# Example coordinates
= np.array([880, 2700, 3700, 2300, 950, 500])
x_obs = np.array([3700, 4300, 5000, 5650, 5100, 4950])
y_obs
# Example of porosity observations for x,y coordinates
= np.array([13.84, 12.15, 12.87, 12.68, 14.41, 14.59])
z_obs
# Population mean
= np.mean(z_obs) mu
Define coordinates of prediction points
This are the x and y coordinates of points with unknon variable value where we want to make predictions.
# Point coordinates where we want to estimate the variable
= np.array([2000]) # meters
x_pred = np.array([4700]) # meters y_pred
# Convert vectors into pairwise arrays
# so that we meet the requirements of spatial function libraries
= np.vstack((x_obs, y_obs)).T
obs_points = np.vstack((x_pred, y_pred)).T
pred_points
print(obs_points)
[[ 880 3700]
[2700 4300]
[3700 5000]
[2300 5650]
[ 950 5100]
[ 500 4950]]
Compute lag distances between all observed points
# Compute lag distance among all observed points
= np.round(cdist(obs_points, obs_points))
h_obs h_obs
array([[ 0., 1916., 3105., 2412., 1402., 1306.],
[1916., 0., 1221., 1408., 1924., 2294.],
[3105., 1221., 0., 1544., 2752., 3200.],
[2412., 1408., 1544., 0., 1458., 1931.],
[1402., 1924., 2752., 1458., 0., 474.],
[1306., 2294., 3200., 1931., 474., 0.]])
### Compute lag distances between all observed points and prediction points
# Compute lag distance among all observed points
= np.round(cdist(obs_points, pred_points))
h_pred = np.round(sill - spherical_model(h_pred,n=nugget,s=sill,r=range_),2)
k k
array([[0.37],
[0.56],
[0.32],
[0.5 ],
[0.47],
[0.37]])
Compute covariance matrix
# Covariance matrix
= np.round(sill - spherical_model(h_obs, n=nugget, s=sill, r=range_), 2)
K K
array([[0.78, 0.28, 0.07, 0.18, 0.4 , 0.42],
[0.28, 0.78, 0.45, 0.4 , 0.28, 0.2 ],
[0.07, 0.45, 0.78, 0.36, 0.12, 0.06],
[0.18, 0.4 , 0.36, 0.78, 0.39, 0.27],
[0.4 , 0.28, 0.12, 0.39, 0.78, 0.65],
[0.42, 0.2 , 0.06, 0.27, 0.65, 0.78]])
Compute kriging weights
# kriging weights
= np.dot(np.linalg.inv(K),k)
lmd lmd
array([[ 0.12542978],
[ 0.46726265],
[-0.03056315],
[ 0.26383161],
[ 0.25613684],
[-0.01541376]])
Compute residuals
# Residuals
= z_obs - mu
res_obs res_obs
array([ 0.41666667, -1.27333333, -0.55333333, -0.74333333, 0.98666667,
1.16666667])
# Residual at prediction point (deviation from the global or local mean)
= np.dot(lmd.T, res_obs)
res_pred res_pred
array([-0.48718296])
Estimate value and variance at prediction point
# Mean value at prediction point
= mu + res_pred
z_pred z_pred
array([12.93615037])
# Compute variance at prediction (unsampled) location
# We need to get the diagonal (which is the variance at each point)
= sill - np.dot(lmd.T,k)
v_pred 0], dtype=bool)] v_pred[np.identity(v_pred.shape[
array([0.23510707])
# Standard deviation
np.sqrt(v_pred)
array([[0.48487841]])
Create function
def simple_kriging(x_obs,y_obs,z_obs,x_pred,y_pred,semivariogram,par,mu=None):
"""
Function that interpolates values at known observation
points to unknown points using simple kriging.
All inputs: x_obs, y_obs, z_obs, x_pred, and y_pred are vectors.
Returns the predicted value and the variance.
"""
# Get semivariogram parameters
= par[0]
nugget = par[1]
sill = par[2]
range_
# Compute global mean if not provided
if mu is None:
= np.nanmean(z_obs)
mu
# Create point arrays in the format required by the cdist function
= np.vstack((x_obs, y_obs)).T
obs_points = np.vstack((x_pred, y_pred)).T
pred_points
# Compute distance between each pair of observed.
= cdist(obs_points, obs_points)
h_obs
# Covariance matrix for all observed points
= sill - semivariogram(h_obs, n=nugget, s=sill, r=range_)
K
# Residuals of the observed values relative tot he global mean
= z_obs - mu
res_obs
# Distance from prediction points to all observed points
= cdist(obs_points, pred_points)
h_pred
# Covariance at prediction point
= sill - semivariogram(h_pred, n=nugget, s=sill, r=range_)
k
# kriging weights
= np.dot(np.linalg.inv(K), k)
lmd
# Residual at prediction point (deviation from the global or local mean)
= np.dot(lmd.T, res_obs)
res_pred
# Mean value at prediction point
= mu + res_pred
z_pred
# Compute variance at prediction (unsampled) location
= sill - np.dot(lmd.T, k)
v_pred
# Get diagonal of covariance matrix
= v_pred[np.identity(v_pred.shape[0], dtype=bool)]
v_pred
return z_pred, v_pred
# Call the function with same inpit data
=mu) simple_kriging(x_obs,y_obs,z_obs,x_pred,y_pred, spherical_model,[nugget,sill,range_], mu
(array([12.94343464]), array([0.23315372]))
Example for multiple points using real dataset
# Import additional modules
import pandas as pd
import geopandas as gpd
from scipy.optimize import curve_fit
# Define coordiante reference systems
= 32614 # UTM Zone 14
crs_utm = 4326 # WGS84 crs_wgs
# Read dataset
= pd.read_csv('../datasets/spatial/soil_moisture_surveys/kona_15_jul_2019.csv')
df
# Inspect a few rows
3) df.head(
TimeStamp | Record | Zone | Latitude | Longitude | Moisture | Period | Attenuation | Permittivity | Probe Model | Sensor | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 7/15/2019 7:15 | 320 | ZONE 00001 | 39.11060 | -96.61089 | 38.88 | 1.6988 | 1.8181 | 23.8419 | CD659 12cm rods | 3543 |
1 | 7/15/2019 7:17 | 321 | ZONE 00001, ZONE 00011 | 39.11058 | -96.61116 | 41.71 | 1.7474 | 1.8310 | 26.7794 | CD659 12cm rods | 3543 |
2 | 7/15/2019 7:17 | 322 | ZONE 00001, ZONE 00011 | 39.11055 | -96.61146 | 40.59 | 1.7271 | 1.7911 | 25.5712 | CD659 12cm rods | 3543 |
# Drop lines with NaN in 'Moisture' column
='Moisture', inplace=True)
df.dropna(subset=True, inplace=True) df.reset_index(drop
# Convert Dataframe to GeoDataframe
= gpd.GeoDataFrame(df) gdf
# Add Point geometry from lat and long values
'points'] = gpd.points_from_xy(gdf['Longitude'], gdf['Latitude'])
gdf['points', drop=True, inplace=True, crs=crs_wgs) gdf.set_geometry(
# Convert point coordinates to UTM, so that we can work in meters
# to compute distances within the field
=crs_utm, inplace=True) gdf.to_crs(crs
# Get values of longitude (x) and latitude (y) in meters
= gdf['geometry'].x.values
x_obs = gdf['geometry'].y.values
y_obs = gdf['Moisture'].values z_obs
# Generate grid
= np.linspace(x_obs.min(), x_obs.max(), num=50)
x_vec = np.linspace(y_obs.min(), y_obs.max(), num=50)
y_vec
# Create grid
= np.meshgrid(x_vec, y_vec) X_pred, Y_pred
# Define simple linear model for removing the main trend in a dataset
= lambda data,b0,b1,b2: b0 + b1*data[0] + b2*data[1]
detrend_model
# Fit simple detrend model to data
= curve_fit(detrend_model,[x_obs,y_obs],z_obs)
popt, pcov print(popt)
# Create trend surface using observed x and y coordinates
= detrend_model([x_obs,y_obs], *popt)
z_trend
# Remove trend from data (resulting in residuals)
= z_obs - z_trend z_res
[-4.67047351e+04 2.96492268e-02 5.95374753e-03]
# Inspect data and fitted trend
# Compute moisture values for each grid point
= detrend_model([X_pred,Y_pred], *popt)
Z_trend
# Plot
= plt.figure(figsize=(5,5))
fig = plt.axes(projection='3d')
ax =2, marker='.', facecolor='None', edgecolor='k')
ax.scatter3D(x_obs, y_obs, z_obs, s=10, cstride=10)
ax.plot_wireframe(X_pred, Y_pred, Z_trend, rstride=20, azim=50)
ax.view_init(elev=None, zoom=0.8)
ax.set_box_aspect(aspect
fig.tight_layout()'Easting')
ax.set_xlabel('Northing')
ax.set_ylabel('Moisture (%)')
ax.set_zlabel( plt.show()
= 2.23
nugget = 27.7
sill = 15.0
range_
# Define semivariogram model
= lambda h,n,s,r: n + (s-n) * (1-np.exp(-h/r)) exp_semivar
# Estiamte moisture values at each grid point
= simple_kriging(x_obs,y_obs,z_res,X_pred.flatten(),Y_pred.flatten(),
Z_res, V_pred
exp_semivar,[nugget,sill,range_])
# Restore shape of arrays
= np.reshape(Z_res, X_pred.shape)
Z_res = np.reshape(V_pred, X_pred.shape)
V_pred
# Add trend model back to the results
= Z_res + Z_trend Z_pred
=(10,4))
plt.figure(figsize
1,2,1)
plt.subplot(=plt.pcolormesh(X_pred, Y_pred, Z_pred,
p1=z_obs.min(), vmax=z_obs.max(),
vmin='Spectral')
cmap='+', color='k', linewidth=0.5)
plt.scatter(x_obs, y_obs, marker
plt.colorbar(p1)
1,2,2)
plt.subplot(=plt.pcolormesh(X_pred, Y_pred, V_pred,
p2=V_pred.min(), vmax=V_pred.max(),
vmin='Spectral')
cmap='+', color='k', linewidth=0.5)
plt.scatter(x_obs, y_obs, marker
plt.colorbar(p2)
plt.show()
Note on simple versus ordinary kriging
In situations where we have a good trend model that accurately captures the underlying trend in the spatial dataset, the residuals should ideally contain no trend and only reflect spatial autocorrelation. Using simple kriging on these residuals can provide excellent results because the assumptions are well-satisfied.
If the trend model fails to capture some aspects of the underlying trend, then the residuals will still contain some trend component. In this scenario, simple kriging might not be the best choice since its fundamental assumption (constant mean) is violated. In this scenario, ordinary kriging can be more robust method since it estimates a local mean for each prediction.
In summary, an accurate trend model coupled with simple kriging can provide excellent interpolation results. Ordinary kriging offers a more flexibility and robust method especially in datasets that exhibit complex spatial variability and where the trend model may not fully capture the underlying trend.