84 Management zones yield monitor

Author

Andres Patrignani

Published

January 22, 2024

Keywords

yield, yield monitor, management zones

Yield monitor data collected from sensors onboard of modern combines offer a wealth of spatial information crucial for precision agriculture. By analyzing these data, farmers and agronomists can create and delineate field management zones based on varying crop yield levels. This process involves importing, cleaning, and aggregating yield observations across a field to identify areas that exhibit similar yield levels.

These management zones can then be used to apply specific agronomic practices, such as variable rate seeding, fertilization, or irrigation, which are adjusted to the specific needs of each zone. By segmenting fields based on yield data, this approach allows for more efficient use of resources, optimizing crop production, and more sustainable farming practices by reducing unnecessary inputs in lower-yielding areas.

In this exercise we will use a yield monitor data to implement a series of geospatial operations to remove outliers, clip the observations to the field boundaries, and cluster the resulting yield map.

Go to https://geojson.io/ and export a field boundary for the field. Just in case, the boundary is also available in the “datasets/shapefiles/Mortimers” folder

# Import modules
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
from shapely.geometry import box
from scipy.interpolate import griddata

# Read the file
gdf = gpd.read_file("../datasets/spatial/Mortimers/Soybeans.shp")
gdf.head(3)

	GrowerName	FieldName	CrpZnName	Load	Variety	LoadVar	MachineID	TimeStamp	Date	Time	Yield	Flow	Moisture	Duration	Distance	Width	Area	geometry
0	Knopf Farms	Mortimer's	Soybeans	17/09/30-18:37:10	NaN	17/09/30-18:37:10	222486	2017-09-30 18:38:56	2017-09-30	18:38:56	61.45	2.7965	13.0	1.0	21.8504	480.0	0.001672	POINT (-97.43268 38.71256)
1	Knopf Farms	Mortimer's	Soybeans	17/09/30-18:37:10	NaN	17/09/30-18:37:10	222486	2017-09-30 18:38:57	2017-09-30	18:38:57	49.74	2.1531	13.0	1.0	20.7874	480.0	0.001591	POINT (-97.43269 38.71255)
2	Knopf Farms	Mortimer's	Soybeans	17/09/30-18:37:10	NaN	17/09/30-18:37:10	222486	2017-09-30 18:38:58	2017-09-30	18:38:58	42.12	1.8202	13.0	1.0	20.7480	480.0	0.001588	POINT (-97.43269 38.71255)

# Add a speed column
gdf['Speed'] = gdf['Distance']/gdf['Duration']/12*0.68 # in/s to mph

# Inspect data in tabular format
gdf[['Yield','Moisture','Speed']].describe()

	Yield	Moisture	Speed
count	36426.000000	36426.000000	36426.000000
mean	46.005093	10.472791	3.613725
std	38.166806	2.189875	0.785171
min	0.000000	4.700000	0.267716
25%	34.080000	9.600000	3.043045
50%	44.690000	9.900000	3.725720
75%	56.467500	10.100000	4.232150
max	4150.990000	18.100000	6.893698

# Visualize data
fig, ax = plt.subplots(nrows=1, ncols=1)
gdf.plot(ax=ax,
        column='Yield',
        k=8,
        scheme='quantiles',
        cmap='RdYlGn',
        legend=True,
        marker='.',
        markersize=10,
        legend_kwds={'loc':'upper left', 'bbox_to_anchor':(1.05,1), 'title':'Grain yield (lbs/harvested area)'});

ax.ticklabel_format(useOffset=False)

# Read boundaries of the file 
bnd = gpd.read_file('../datasets/spatial/Mortimers/mortimer_bnd.geojson')
bnd.head()

	geometry
0	POLYGON ((-97.43696 38.71800, -97.43692 38.714...

# Clip the yield monitor data to field boundaries
gdf = gpd.clip(gdf, bnd['geometry'].iloc[0])
gdf.reset_index(drop=True, inplace=True)
gdf.head()

	GrowerName	FieldName	CrpZnName	Load	Variety	LoadVar	MachineID	TimeStamp	Date	Time	Yield	Flow	Moisture	Duration	Distance	Width	Area	geometry	Speed
0	Knopf Farms	Mortimer's	Soybeans	11	NaN	11	222486	2017-10-02 11:44:21	2017-10-02	11:44:21	5.21	0.7464	13.0	1.0	68.8189	480.0	0.005266	POINT (-97.43575 38.71092)	3.899738
1	Knopf Farms	Mortimer's	Soybeans	11	NaN	11	222486	2017-10-02 11:44:20	2017-10-02	11:44:20	6.94	0.9996	13.0	1.0	69.1732	480.0	0.005293	POINT (-97.43573 38.71092)	3.919815
2	Knopf Farms	Mortimer's	Soybeans	11	NaN	11	222486	2017-10-02 11:44:23	2017-10-02	11:44:23	6.07	0.8866	13.0	1.0	70.0787	480.0	0.005363	POINT (-97.43579 38.71092)	3.971126
3	Knopf Farms	Mortimer's	Soybeans	11	NaN	11	222486	2017-10-02 11:44:22	2017-10-02	11:44:22	4.22	0.6124	13.0	1.0	69.7244	480.0	0.005336	POINT (-97.43577 38.71092)	3.951049
4	Knopf Farms	Mortimer's	Soybeans	11	NaN	11	222486	2017-10-02 11:44:19	2017-10-02	11:44:19	7.09	1.0478	13.0	1.0	70.9449	480.0	0.005429	POINT (-97.43571 38.71092)	4.020211

# Visualize data
fig, ax = plt.subplots(nrows=1, ncols=1)
gdf.plot(ax=ax,
        column='Yield',
        k=8,
        scheme='quantiles',
        cmap='RdYlGn',
        legend=True,
        marker='.',
        markersize=10,
        legend_kwds={'loc':'upper left', 'bbox_to_anchor':(1.05,1), 'title':'Grain yield (lbs/harvested area)'});

bnd.plot(ax=ax, linewidth=2, facecolor='None', edgecolor='k')

ax.ticklabel_format(useOffset=False)

# Create histograms of yield and speed
plt.figure(figsize=(8,3))

plt.subplot(1,2,1)
plt.hist(gdf['Yield'], bins='scott')
plt.xlabel('Yield (lbs/harvested acre)')

plt.subplot(1,2,2)
plt.hist(gdf['Speed'], bins='scott', edgecolor='k', color='skyblue')
plt.xlabel('Speed (mph)')

plt.show()

# Create a function to apply the IQR method for detecting outliers

def iqr_fn(y):
    '''Function to compute the IQR'''
    quantiles = [0.25, 0.75]
    q1,q3 = y.quantile(quantiles)
    iqr = q3 - q1
    idx_outliers = (y < q1-1.5*iqr) | (y > q3+1.5*iqr)
    return idx_outliers

# Use our function to find outliers
idx_yield = iqr_fn(gdf['Yield'])
idx_speed = iqr_fn(gdf['Speed'])

idx_all_outliers = idx_yield | idx_speed
gdf.loc[idx_all_outliers, 'Yield'] = np.nan

# Visualize data
fig, ax = plt.subplots(nrows=1, ncols=1)
gdf.plot(ax=ax,
        column='Yield',
        k=8,
        scheme='quantiles',
        cmap='RdYlGn',
        legend=True,
        marker='.',
        markersize=10,
        legend_kwds={'loc':'upper left', 'bbox_to_anchor':(1.05,1), 'title':'Grain yield (lbs/harvested area)'});

bnd.plot(ax=ax, linewidth=2, facecolor='None', edgecolor='k')

ax.ticklabel_format(useOffset=False)

# Learn what the box function does
box(0,0, 10, 10)

# Specify coordinate reference systems
epsg_utm = 32614 # UTM Zone 14
epsg_wgs = 4326 # WGS84

# Define boxes of the field
xmin, ymin, xmax, ymax = bnd.to_crs(epsg=epsg_utm).total_bounds

# Define box size
xdelta = 20 # meters
ydelta = 20 # meters

# Create an empty numpy array
grid = np.array([])

# Iterate to create each box
for x in np.arange(xmin,xmax,xdelta):
    for y in np.arange(ymin,ymax,ydelta):
        cell = box(x, y, x+xdelta, y+ydelta)
        grid = np.append(grid, cell)
        
# Conver the grid to a GeoDataFrame
gdf_grid = gpd.GeoDataFrame(grid, columns=['geometry'], crs=epsg_utm)

# Get centroids of each cell
gdf_grid['centroids'] = gdf_grid['geometry'].centroid

gdf_grid.head()

	geometry	centroids
0	POLYGON ((635905.410 4285847.629, 635905.410 4...	POINT (635895.410 4285857.629)
1	POLYGON ((635905.410 4285867.629, 635905.410 4...	POINT (635895.410 4285877.629)
2	POLYGON ((635905.410 4285887.629, 635905.410 4...	POINT (635895.410 4285897.629)
3	POLYGON ((635905.410 4285907.629, 635905.410 4...	POINT (635895.410 4285917.629)
4	POLYGON ((635905.410 4285927.629, 635905.410 4...	POINT (635895.410 4285937.629)

# Change CRS
gdf_grid['geometry'] = gdf_grid['geometry'].to_crs(epsg=epsg_wgs)
gdf_grid['centroids'] = gdf_grid['centroids'].to_crs(epsg=epsg_wgs)

# Create plot of field boundary and grid
fig, ax = plt.subplots(1,1)
bnd.plot(ax=ax, facecolor='w', edgecolor='r')
gdf_grid.plot(ax=ax, facecolor='None', edgecolor='k')
ax.ticklabel_format(useOffset=False)
plt.show()

# Clip cells to field boundary
gdf_grid = gpd.clip(gdf_grid, bnd['geometry'].iloc[0])
gdf_grid.reset_index(drop=True, inplace=True)

# Create plot of field boundary and grid
fig, ax = plt.subplots(1,1)
bnd.plot(ax=ax, facecolor='w', edgecolor='r')
gdf_grid.plot(ax=ax, facecolor='None', edgecolor='k')
ax.ticklabel_format(useOffset=False)
plt.show()

# Iterate over each cell to compute the median yield
grid_yield = []
for k,row in gdf_grid.iterrows():
    idx_within_cell = gdf['geometry'].within(row['geometry'])
    yield_value = gdf.loc[idx_within_cell, 'Yield'].median()
    grid_yield.append(yield_value)
    
# Append new column to the gdf_grid 
gdf_grid['yield'] = grid_yield
gdf_grid.head()

	geometry	centroids	yield
0	POLYGON ((-97.43229 38.71095, -97.43229 38.710...	POINT (-97.43217 38.71086)	NaN
1	POLYGON ((-97.43252 38.71095, -97.43229 38.710...	POINT (-97.43240 38.71086)	NaN
2	POLYGON ((-97.43229 38.71095, -97.43229 38.710...	POINT (-97.43217 38.71104)	NaN
3	POLYGON ((-97.43229 38.71095, -97.43252 38.710...	POINT (-97.43240 38.71104)	36.565
4	POLYGON ((-97.43251 38.71113, -97.43251 38.711...	POINT (-97.43240 38.71122)	53.040

# Create plot of field boundary and grid
fig, ax = plt.subplots(1,1)
bnd.plot(ax=ax, facecolor='w', edgecolor='k')
gdf_grid.plot(ax=ax, column='yield', edgecolor='k', cmap='RdYlGn')
ax.ticklabel_format(useOffset=False)
plt.show()

# CLustering using K-Means
from sklearn.cluster import KMeans

gdf_grid.dropna(inplace=True)
kmeans_info = KMeans(n_clusters=3, n_init='auto').fit(gdf_grid['yield'].values.reshape(-1,1))
gdf_grid['zone'] = kmeans_info.labels_.flatten()
gdf_grid.head()

	geometry	centroids	yield	zone
3	POLYGON ((-97.43229 38.71095, -97.43252 38.710...	POINT (-97.43240 38.71104)	36.565	2
4	POLYGON ((-97.43251 38.71113, -97.43251 38.711...	POINT (-97.43240 38.71122)	53.040	0
5	POLYGON ((-97.43251 38.71131, -97.43250 38.711...	POINT (-97.43239 38.71140)	46.680	2
7	POLYGON ((-97.43275 38.71096, -97.43252 38.710...	POINT (-97.43263 38.71087)	47.360	2
8	POLYGON ((-97.43298 38.71096, -97.43275 38.710...	POINT (-97.43286 38.71087)	75.450	1

# Visualize the management zones
# Create plot of field boundary and grid
fig, ax = plt.subplots(1,3, figsize=(14,12))

gdf.plot(ax=ax[0],
        column='Yield',
        k=8,
        scheme='quantiles',
        cmap='RdYlGn',
        legend=True,
        marker='.',
        markersize=10,
        legend_kwds={'title':'Grain yield (lbs/harvested area)'});
bnd.plot(ax=ax[0], linewidth=2, facecolor='None', edgecolor='k')

bnd.plot(ax=ax[1], facecolor='w', edgecolor='k')
gdf_grid.plot(ax=ax[1], column='yield', edgecolor='k', cmap='RdYlGn')

bnd.plot(ax=ax[2], facecolor='w', edgecolor='k')
gdf_grid.plot(ax=ax[2], column='zone', edgecolor='k', cmap='Set3')

ax[0].ticklabel_format(useOffset=False)
ax[1].ticklabel_format(useOffset=False)
ax[2].ticklabel_format(useOffset=False)

plt.show()

# Create interactive map
m = gdf.explore(column='Yield', cmap='RdYlGn', tooltip='Yield',popup=True,
           tiles="CartoDB positron",
           style_kwds=dict(color="None", fillOpacity=1.0), # use black outline
           tooltip_kwds=dict(aliases=['Yield (%)'])
           )
m

Make this Notebook Trusted to load map: File -> Trust Notebook