# Import modules and initialize
import ee
import json
import folium
import numpy as np
from pprint import pprint
from matplotlib import colormaps, colors
import requests
import xarray as xr
13 Unsupervised classification
Unsupervised clustering offers a powerful method for identifying patterns and categorizing data within satellite imagery, without prior labeling. Google Earth Engine (GEE) provides a robust platform for implementing these unsupervised clustering algorithms, enabling researchers to analyze and segment images based on natural groupings of spectral or spatial characteristics.
This tutorial demonstrates how to use unsupervised K-Means clustering to better understand landscapes and its inherent patterns. We will cover tutorials at the watershed level and state level using soil, climate, and landform datasets to generate regions of similar characteristics.
# Authenticate
#ee.Authenticate()
# Initialize the library.
ee.Initialize()
# Create helper functions
def get_cmap(name,n=10):
"""
Function to get list of colors from user-defined colormap in hex form.
Example: get_cmap('viridis', 3)
"""
= colormaps[name]
c = np.linspace(0,c.N-1,n).astype(int)
color_range = [colors.rgb2hex(c(k)) for k in color_range]
cmap return cmap
# Get Color for different polygons
def get_polygon_color(polygon,cmap):
= cmap[polygon['properties']['label']]
color return {'fillColor':color, 'fillOpacity':0.8}
Example 1: Washed clustering
Load region from local file
In this tutorial we will use the Kings Creek watershed, which is fully contained withing the Konza Prairie Biological station. The watershed is dominated by a tallgrass vegetation and riparian areas surrounding the Kings Creek.
# Open geoJSON file and store it in a variable
with open("../datasets/kings_creek.geojson") as file:
= json.load(file)
kings_creek
# Define GEE Geometry using local file
= ee.Geometry(kings_creek['features'][0]['geometry']) geom
# Test GEE Geometry import printing the area in km^2
/1000000 geom.area().getInfo()
11.353858144557279
Prepare clustering dataset
This step consists of reading images from different products, and then merging them into an image of multiple bands, where each band is one of the selected features for clustering.
# Import individual layers
= ee.Image("WORLDCLIM/V1/BIO").select(['bio01', 'bio12']).resample('bicubic')
weather = ee.Image("projects/soilgrids-isric/sand_mean").select('sand_0-5cm_mean').resample('bicubic')
soil = ee.Image('CSP/ERGo/1_0/Global/ALOS_landforms').select('constant').resample('bicubic')
landforms
# Merge layers as bands
= weather.addBands(soil).addBands(landforms) dataset
Generate clusters
# Select random points across the image in the defined region for training
= dataset.sample(region=geom, scale=20, numPixels=1000) sample_points
# Define number of cluster to classify
= 5
cluster_number = ee.Clusterer.wekaKMeans(cluster_number).train(sample_points) classificator
Reduce image to vector
# Generate the clusters for the whole region
= dataset.cluster(classificator).reduceToVectors(scale=20, geometry=geom).getInfo() clusters
Interactive plot
# Create Folium Map
= folium.Map(location=[39.08648, -96.582], zoom_start=13)
m
= get_cmap('Set1',n=cluster_number)
cmap = folium.GeoJson(clusters, name="Kings Creek",
cluster_layer =lambda x:get_polygon_color(x,cmap))
style_function
cluster_layer.add_to(m)
m
Export clusters as geoJSON
# Inspect resulting clusters
# pprint(clusters)
# Save result to a geoJSON file
with open('outputs/output.json', 'w') as file:
file)
json.dump(clusters,
Export clusters as GeoTIFF
# Create mask
= ee.Image.constant(1).clip(geom).mask() mask
# Create image URL
= dataset.cluster(classificator).mask(mask).getDownloadUrl({
image_url 'region': geom,
'scale':30,
'crs': 'EPSG:4326',
'format': 'GEO_TIFF'
})
# Request data using URL and save data as a new GeoTiff file
= requests.get(image_url)
response = '../outputs/kings_creek_clusters.tif'
filename with open(filename, 'wb') as f:
f.write(response.content)
# Read GeoTiff file using the Xarray package
= xr.open_dataarray(filename).squeeze()
raster
#fig, ax = plt.subplots(1,1,figsize=(6,5))
='Set2', figsize=(4,4), add_colorbar=False); raster.plot(cmap
Example 2: State-level clustering
In this tutorial we will identify macroregions across Kansas determined by climate and soil variables.
Load region
# Read US states
= ee.FeatureCollection("TIGER/2018/States")
US_states
# Select Kansas
= US_states.filter(ee.Filter.eq('NAME','Kansas')) kansas
Prepare clustering dataset
# Import image layers
= ee.ImageCollection('OREGONSTATE/PRISM/Norm91m').select('ppt').sum().resample('bicubic')
ppt = ee.ImageCollection('OREGONSTATE/PRISM/Norm91m').select('tmean').mean().resample('bicubic')
tmean = ee.ImageCollection('OREGONSTATE/PRISM/Norm91m').select('vpdmax').mean().resample('bicubic')
vpdmax = ee.Image("projects/soilgrids-isric/sand_mean").select('sand_0-5cm_mean').resample('bicubic')
soil
# Merge layers into a single dataset
= ppt.addBands(soil).addBands(tmean).addBands(vpdmax) dataset
Generate clusters
# Select random points across the image in the defined region for training
= dataset.sample(region=kansas, scale=1000, numPixels=1000) sample_points
# Define number of cluster to classify
= 9
cluster_number = ee.Clusterer.wekaKMeans(cluster_number).train(sample_points) classificator
Reduce image to vectors
# Generate the clusters for the whole region
= dataset.cluster(classificator).reduceToVectors(scale=1000, geometry=kansas).getInfo() clusters
Interactive plot
# Create Folium Map
= folium.Map(location=[38.5, -98.5], zoom_start=7)
m
= get_cmap('Set1',n=cluster_number)
cmap = folium.GeoJson(clusters, name="Kansas",
cluster_layer =lambda x:get_polygon_color(x,cmap))
style_function
cluster_layer.add_to(m)
m
Export clusters as GeoTIFF
# Create mask
= ee.Image.constant(1).clip(kansas).mask() mask
# Create image URL
= dataset.cluster(classificator).mask(mask).getDownloadUrl({
image_url 'region': kansas.geometry(),
'scale':1000,
'crs': 'EPSG:4326',
'format': 'GEO_TIFF'
})
# Request data using URL and save data as a new GeoTiff file
= requests.get(image_url)
response = '../outputs/kansas_clusters.tif'
filename with open(filename, 'wb') as f:
f.write(response.content)
# Read GeoTiff file using the Xarray package
= xr.open_dataarray(filename).squeeze()
raster
#fig, ax = plt.subplots(1,1,figsize=(6,5))
='Set2', figsize=(8,3)); raster.plot(cmap