84  Management zones yield monitor

Author

Andres Patrignani

Published

January 22, 2024

Yield monitor data collected from sensors onboard of modern combines offer a wealth of spatial information crucial for precision agriculture. By analyzing these data, farmers and agronomists can create and delineate field management zones based on varying crop yield levels. This process involves importing, cleaning, and aggregating yield observations across a field to identify areas that exhibit similar yield levels.

These management zones can then be used to apply specific agronomic practices, such as variable rate seeding, fertilization, or irrigation, which are adjusted to the specific needs of each zone. By segmenting fields based on yield data, this approach allows for more efficient use of resources, optimizing crop production, and more sustainable farming practices by reducing unnecessary inputs in lower-yielding areas.

In this exercise we will use a yield monitor data to implement a series of geospatial operations to remove outliers, clip the observations to the field boundaries, and cluster the resulting yield map.

Go to https://geojson.io/ and export a field boundary for the field. Just in case, the boundary is also available in the “datasets/shapefiles/Mortimers” folder

# Import modules
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
from shapely.geometry import box
from scipy.interpolate import griddata
# Read the file
gdf = gpd.read_file("../datasets/spatial/Mortimers/Soybeans.shp")
gdf.head(3)
GrowerName FieldName CrpZnName Load Variety LoadVar MachineID TimeStamp Date Time Yield Flow Moisture Duration Distance Width Area geometry
0 Knopf Farms Mortimer's Soybeans 17/09/30-18:37:10 NaN 17/09/30-18:37:10 222486 2017-09-30 18:38:56 2017-09-30 18:38:56 61.45 2.7965 13.0 1.0 21.8504 480.0 0.001672 POINT (-97.43268 38.71256)
1 Knopf Farms Mortimer's Soybeans 17/09/30-18:37:10 NaN 17/09/30-18:37:10 222486 2017-09-30 18:38:57 2017-09-30 18:38:57 49.74 2.1531 13.0 1.0 20.7874 480.0 0.001591 POINT (-97.43269 38.71255)
2 Knopf Farms Mortimer's Soybeans 17/09/30-18:37:10 NaN 17/09/30-18:37:10 222486 2017-09-30 18:38:58 2017-09-30 18:38:58 42.12 1.8202 13.0 1.0 20.7480 480.0 0.001588 POINT (-97.43269 38.71255)
# Add a speed column
gdf['Speed'] = gdf['Distance']/gdf['Duration']/12*0.68 # in/s to mph
# Inspect data in tabular format
gdf[['Yield','Moisture','Speed']].describe()
Yield Moisture Speed
count 36426.000000 36426.000000 36426.000000
mean 46.005093 10.472791 3.613725
std 38.166806 2.189875 0.785171
min 0.000000 4.700000 0.267716
25% 34.080000 9.600000 3.043045
50% 44.690000 9.900000 3.725720
75% 56.467500 10.100000 4.232150
max 4150.990000 18.100000 6.893698
# Visualize data
fig, ax = plt.subplots(nrows=1, ncols=1)
gdf.plot(ax=ax,
        column='Yield',
        k=8,
        scheme='quantiles',
        cmap='RdYlGn',
        legend=True,
        marker='.',
        markersize=10,
        legend_kwds={'loc':'upper left', 'bbox_to_anchor':(1.05,1), 'title':'Grain yield (lbs/harvested area)'});

ax.ticklabel_format(useOffset=False)

# Read boundaries of the file 
bnd = gpd.read_file('../datasets/spatial/Mortimers/mortimer_bnd.geojson')
bnd.head()
geometry
0 POLYGON ((-97.43696 38.71800, -97.43692 38.714...
# Clip the yield monitor data to field boundaries
gdf = gpd.clip(gdf, bnd['geometry'].iloc[0])
gdf.reset_index(drop=True, inplace=True)
gdf.head()
GrowerName FieldName CrpZnName Load Variety LoadVar MachineID TimeStamp Date Time Yield Flow Moisture Duration Distance Width Area geometry Speed
0 Knopf Farms Mortimer's Soybeans 11 NaN 11 222486 2017-10-02 11:44:21 2017-10-02 11:44:21 5.21 0.7464 13.0 1.0 68.8189 480.0 0.005266 POINT (-97.43575 38.71092) 3.899738
1 Knopf Farms Mortimer's Soybeans 11 NaN 11 222486 2017-10-02 11:44:20 2017-10-02 11:44:20 6.94 0.9996 13.0 1.0 69.1732 480.0 0.005293 POINT (-97.43573 38.71092) 3.919815
2 Knopf Farms Mortimer's Soybeans 11 NaN 11 222486 2017-10-02 11:44:23 2017-10-02 11:44:23 6.07 0.8866 13.0 1.0 70.0787 480.0 0.005363 POINT (-97.43579 38.71092) 3.971126
3 Knopf Farms Mortimer's Soybeans 11 NaN 11 222486 2017-10-02 11:44:22 2017-10-02 11:44:22 4.22 0.6124 13.0 1.0 69.7244 480.0 0.005336 POINT (-97.43577 38.71092) 3.951049
4 Knopf Farms Mortimer's Soybeans 11 NaN 11 222486 2017-10-02 11:44:19 2017-10-02 11:44:19 7.09 1.0478 13.0 1.0 70.9449 480.0 0.005429 POINT (-97.43571 38.71092) 4.020211
# Visualize data
fig, ax = plt.subplots(nrows=1, ncols=1)
gdf.plot(ax=ax,
        column='Yield',
        k=8,
        scheme='quantiles',
        cmap='RdYlGn',
        legend=True,
        marker='.',
        markersize=10,
        legend_kwds={'loc':'upper left', 'bbox_to_anchor':(1.05,1), 'title':'Grain yield (lbs/harvested area)'});

bnd.plot(ax=ax, linewidth=2, facecolor='None', edgecolor='k')

ax.ticklabel_format(useOffset=False)

# Create histograms of yield and speed
plt.figure(figsize=(8,3))

plt.subplot(1,2,1)
plt.hist(gdf['Yield'], bins='scott')
plt.xlabel('Yield (lbs/harvested acre)')

plt.subplot(1,2,2)
plt.hist(gdf['Speed'], bins='scott', edgecolor='k', color='skyblue')
plt.xlabel('Speed (mph)')

plt.show()

# Create a function to apply the IQR method for detecting outliers

def iqr_fn(y):
    '''Function to compute the IQR'''
    quantiles = [0.25, 0.75]
    q1,q3 = y.quantile(quantiles)
    iqr = q3 - q1
    idx_outliers = (y < q1-1.5*iqr) | (y > q3+1.5*iqr)
    return idx_outliers
# Use our function to find outliers
idx_yield = iqr_fn(gdf['Yield'])
idx_speed = iqr_fn(gdf['Speed'])

idx_all_outliers = idx_yield | idx_speed
gdf.loc[idx_all_outliers, 'Yield'] = np.nan
# Visualize data
fig, ax = plt.subplots(nrows=1, ncols=1)
gdf.plot(ax=ax,
        column='Yield',
        k=8,
        scheme='quantiles',
        cmap='RdYlGn',
        legend=True,
        marker='.',
        markersize=10,
        legend_kwds={'loc':'upper left', 'bbox_to_anchor':(1.05,1), 'title':'Grain yield (lbs/harvested area)'});

bnd.plot(ax=ax, linewidth=2, facecolor='None', edgecolor='k')

ax.ticklabel_format(useOffset=False)

# Learn what the box function does
box(0,0, 10, 10)

# Specify coordinate reference systems
epsg_utm = 32614 # UTM Zone 14
epsg_wgs = 4326 # WGS84
# Define boxes of the field
xmin, ymin, xmax, ymax = bnd.to_crs(epsg=epsg_utm).total_bounds

# Define box size
xdelta = 20 # meters
ydelta = 20 # meters

# Create an empty numpy array
grid = np.array([])

# Iterate to create each box
for x in np.arange(xmin,xmax,xdelta):
    for y in np.arange(ymin,ymax,ydelta):
        cell = box(x, y, x+xdelta, y+ydelta)
        grid = np.append(grid, cell)
        
# Conver the grid to a GeoDataFrame
gdf_grid = gpd.GeoDataFrame(grid, columns=['geometry'], crs=epsg_utm)

# Get centroids of each cell
gdf_grid['centroids'] = gdf_grid['geometry'].centroid

gdf_grid.head()
geometry centroids
0 POLYGON ((635905.410 4285847.629, 635905.410 4... POINT (635895.410 4285857.629)
1 POLYGON ((635905.410 4285867.629, 635905.410 4... POINT (635895.410 4285877.629)
2 POLYGON ((635905.410 4285887.629, 635905.410 4... POINT (635895.410 4285897.629)
3 POLYGON ((635905.410 4285907.629, 635905.410 4... POINT (635895.410 4285917.629)
4 POLYGON ((635905.410 4285927.629, 635905.410 4... POINT (635895.410 4285937.629)
# Change CRS
gdf_grid['geometry'] = gdf_grid['geometry'].to_crs(epsg=epsg_wgs)
gdf_grid['centroids'] = gdf_grid['centroids'].to_crs(epsg=epsg_wgs)
# Create plot of field boundary and grid
fig, ax = plt.subplots(1,1)
bnd.plot(ax=ax, facecolor='w', edgecolor='r')
gdf_grid.plot(ax=ax, facecolor='None', edgecolor='k')
ax.ticklabel_format(useOffset=False)
plt.show()

# Clip cells to field boundary
gdf_grid = gpd.clip(gdf_grid, bnd['geometry'].iloc[0])
gdf_grid.reset_index(drop=True, inplace=True)
# Create plot of field boundary and grid
fig, ax = plt.subplots(1,1)
bnd.plot(ax=ax, facecolor='w', edgecolor='r')
gdf_grid.plot(ax=ax, facecolor='None', edgecolor='k')
ax.ticklabel_format(useOffset=False)
plt.show()

# Iterate over each cell to compute the median yield
grid_yield = []
for k,row in gdf_grid.iterrows():
    idx_within_cell = gdf['geometry'].within(row['geometry'])
    yield_value = gdf.loc[idx_within_cell, 'Yield'].median()
    grid_yield.append(yield_value)
    
# Append new column to the gdf_grid 
gdf_grid['yield'] = grid_yield
gdf_grid.head()
geometry centroids yield
0 POLYGON ((-97.43229 38.71095, -97.43229 38.710... POINT (-97.43217 38.71086) NaN
1 POLYGON ((-97.43252 38.71095, -97.43229 38.710... POINT (-97.43240 38.71086) NaN
2 POLYGON ((-97.43229 38.71095, -97.43229 38.710... POINT (-97.43217 38.71104) NaN
3 POLYGON ((-97.43229 38.71095, -97.43252 38.710... POINT (-97.43240 38.71104) 36.565
4 POLYGON ((-97.43251 38.71113, -97.43251 38.711... POINT (-97.43240 38.71122) 53.040
# Create plot of field boundary and grid
fig, ax = plt.subplots(1,1)
bnd.plot(ax=ax, facecolor='w', edgecolor='k')
gdf_grid.plot(ax=ax, column='yield', edgecolor='k', cmap='RdYlGn')
ax.ticklabel_format(useOffset=False)
plt.show()

# CLustering using K-Means
from sklearn.cluster import KMeans

gdf_grid.dropna(inplace=True)
kmeans_info = KMeans(n_clusters=3, n_init='auto').fit(gdf_grid['yield'].values.reshape(-1,1))
gdf_grid['zone'] = kmeans_info.labels_.flatten()
gdf_grid.head()
geometry centroids yield zone
3 POLYGON ((-97.43229 38.71095, -97.43252 38.710... POINT (-97.43240 38.71104) 36.565 2
4 POLYGON ((-97.43251 38.71113, -97.43251 38.711... POINT (-97.43240 38.71122) 53.040 0
5 POLYGON ((-97.43251 38.71131, -97.43250 38.711... POINT (-97.43239 38.71140) 46.680 2
7 POLYGON ((-97.43275 38.71096, -97.43252 38.710... POINT (-97.43263 38.71087) 47.360 2
8 POLYGON ((-97.43298 38.71096, -97.43275 38.710... POINT (-97.43286 38.71087) 75.450 1
# Visualize the management zones
# Create plot of field boundary and grid
fig, ax = plt.subplots(1,3, figsize=(14,12))

gdf.plot(ax=ax[0],
        column='Yield',
        k=8,
        scheme='quantiles',
        cmap='RdYlGn',
        legend=True,
        marker='.',
        markersize=10,
        legend_kwds={'title':'Grain yield (lbs/harvested area)'});
bnd.plot(ax=ax[0], linewidth=2, facecolor='None', edgecolor='k')

bnd.plot(ax=ax[1], facecolor='w', edgecolor='k')
gdf_grid.plot(ax=ax[1], column='yield', edgecolor='k', cmap='RdYlGn')

bnd.plot(ax=ax[2], facecolor='w', edgecolor='k')
gdf_grid.plot(ax=ax[2], column='zone', edgecolor='k', cmap='Set3')

ax[0].ticklabel_format(useOffset=False)
ax[1].ticklabel_format(useOffset=False)
ax[2].ticklabel_format(useOffset=False)

plt.show()

# Create interactive map
m = gdf.explore(column='Yield', cmap='RdYlGn', tooltip='Yield',popup=True,
           tiles="CartoDB positron",
           style_kwds=dict(color="None", fillOpacity=1.0), # use black outline
           tooltip_kwds=dict(aliases=['Yield (%)'])
           )
m
Make this Notebook Trusted to load map: File -> Trust Notebook