# Import modules
import ee
import json
import requests
import matplotlib.pyplot as plt
import xarray as xr
import io
from pprint import pprint
9 Use my vector data
What if we want to use Google Earth Engine for our own region of interest? This could be our own field boundary, watershed, or perhaps some experimental plot. The nice part, is the we can turn any vector map into a GEE Geometry using a GeoJSON file. Let’s dive into an example using a boundary layer for the Ogallala Aquifer.
You can download the file from the Github repository
# Authenticate
#ee.Authenticate()
# Initialize the library.
ee.Initialize()
Load vector file from local drive
In this case we will load the boundary of the Ogallala Aquifer, spanning multiple states in the U.S. Great Plains. The map is in the GeoJSON
format. You can also use the GeoPandas
module to read shapefiles, and then convert them into GeJSON siles.
# Import boundary for Ogallala Aquifer
# From local drive
= '../datasets/ogallala_aquifer_bnd.geojson'
filename_bnd
# Read the file
with open(filename_bnd) as file:
= json.load(file)
roi_json
Convert local geojson file into GEEO geometry
# Define the ee.Geometry so that GEE can use it
= ee.Geometry(roi_json['features'][0]['geometry'])
roi_geom
# Create mask
= ee.Image.constant(1).clip(roi_geom).mask() mask
Learn how to unpack coordinates
For plotting purposes we will need to access the list of geographic coordinates from the GeoJSON
structure. The coordinates are stored in [lon, lat]
pairs. Here is an example:
[[-105.08669315719459, 41.81747353490203],
[-105.12647954793552, 41.819996817840824],
[-105.14733301403585, 41.821369666176125]]
The catch is that for plotting we need latitude and longitude in separate lists. Here is where Python shines. We typically use the zip
function to create the above structure from individual columns. We can use the same zip
function with the *
operator to do the inverse. Sometimes we call this operation unpacking
, because we turn a list packed with other lists, into separate lists.
First we need to learn how to access the raw data that we need. The code line belows shows how to access the nested coordinates, so that then we can use this line with the zip
function. Remember to go step by step when breaking down the problem.
# Visualize the first 10 coordinate pairs inside of the geojson file (full output is long!)
'features'][0]['geometry']['coordinates'][0][0:10]) pprint(roi_json[
[[-105.08669315719459, 41.81747353490203],
[-105.12647954793552, 41.819996817840824],
[-105.14733301403585, 41.821369666176125],
[-105.18923395737376, 41.807798034351976],
[-105.24321303184018, 41.771571878769855],
[-105.26291012100836, 41.78519856399359],
[-105.28731840106028, 41.77917046505193],
[-105.29891733873183, 41.79071058205539],
[-105.30166992435787, 41.811543641132005],
[-105.30989321696639, 41.83043187567255]]
# Apply the zip function (note the use of *
# Recall that lon is first and lat second in the geojson file
# this is because coordinates are stored as x,y pairs (lon is x, and lat is y)
= zip(*roi_json['features'][0]['geometry']['coordinates'][0]) lon,lat
# Display vector map to ensure everything looks good.
plt.figure()='k', linewidth=1)
plt.plot(lon, lat, color'Ogallala Aquifer')
plt.title('Longitude')
plt.xlabel('Latitude')
plt.ylabel('equal')
plt.axis( plt.show()
Load GEE datasets
For this tutorial we will load the mean saturated hydraulic conductivity layer from the USDA-NRCS Soil Survey GeoDatabase (SSURGO, see Walkinshaw et al. 2020). Since the saturated hydraulic conductivity represents the permeablity of the soil under saturated conditions, this layer will reveal places where the potential for aquifer recharge is greatest. Note the word “potential”, since soils in this region are rarely saturated due to the low annual precipitation regime.
# Load GEE layers
= ee.Image('projects/earthengine-legacy/assets/projects/sat-io/open-datasets/CSRL_soil_properties/physical/ksat_mean')
ksat
# Clip image to region of interest
= 1/(10e4) * 86400 # To convert micrometers/second to cm/day
factor = ksat.select('b1').multiply(factor).clip(roi_geom).mask(mask) ksat
# Note that this cell will take a few seconds, the area we are requesting
# is large and has a complex shape
# Get map from url
= ksat.getDownloadUrl({
image_url 'region': roi_geom,
'scale':800,
'crs': 'EPSG:4326',
'format': 'GEO_TIFF'})
# Request data using URL and save data as a new GeoTiff file
= requests.get(image_url)
response
# Check if the request was successful
if response.status_code == 200:
# Read image data into a BytesIO object
= io.BytesIO(response.content)
image_data
# Use Xarray to open the raster image directly from memory
= xr.open_dataarray(image_data, engine='rasterio').squeeze()
raster
=(5,5) , cmap='OrRd', add_colorbar=True,
raster.plot.imshow(figsize={'label':'Ksat mean (cm/day)'})
cbar_kwargs='k', linewidth=1)
plt.plot(lon, lat, color
'Ogallala Aquifer')
plt.title('Longitude')
plt.xlabel('Latitude')
plt.ylabel('equal')
plt.axis(
plt.tight_layout()=':')
plt.grid(linestyle
plt.show()
Add state boundaries to map
This part requires using the GeoPandas library to clip and overlay the map of U.S. states on the aquifer map.
import geopandas as gpd
roi_geom.bounds().getInfo()
{'geodesic': False,
'type': 'Polygon',
'coordinates': [[[-105.92127815341775, 31.74341518646195],
[-96.25860479316862, 31.74341518646195],
[-96.25860479316862, 43.66377283318694],
[-105.92127815341775, 43.66377283318694],
[-105.92127815341775, 31.74341518646195]]]}
# Read US states that overlap the region of interest
# Note that filterBounds() is not the same as clip()
= ee.FeatureCollection("TIGER/2018/States").filterBounds(roi_geom)
states
## Get the bounding box of the region of interest
= roi_geom.bounds()
bbox
## Create a rectangle geometry from the bounding box
= ee.Geometry(bbox)
rectangle
# Define function to intersect and simplify each feature (to download less data)
= lambda feature: feature.intersection(rectangle, maxError=100).simplify(maxError=100)
clipping_fun
# Apply function to all features of the FeatureCollection
# This line can take several seconds
= states.map(clipping_fun).getInfo() states_clipped
Plotting the clipped layer of U.S. states can be accomplished using two methods: 1) using a regular for loop that iterates over each feature or 2) simply using the GeoPandas module to automatically convert all the features into a GeoDataFrame. The latter option makes things easier since GeoPandas will handle all the heavy lifting.
Iterating over each figure using a for loop would require extracting the coordinates of each geometry. However, because we clipped the features using the bounding box of the region, a complicating factor is that not all the resulting clipped state geometries consist of a single polygon. After clipping, a few states may consist of a Polygon and one or more LineStrings, which requires grouping and nesting using a GeometryCollection, and we would need to handle this during the loop.
Let’s look at this problem quickly and then we will load the GeoPandas module to avoid handling this manually.
# Find out how many states we have
print('There are', len(states_clipped['features']), 'features in this layer')
print('') # Add an empty line for clarity
for f in states_clipped['features']:
print(f['geometry']['type'])
# Remove comment from line below to examine one of the GeometryCollections
#print(states_clipped['features'][2]['geometry'])
There are 8 features in this layer
Polygon
Polygon
GeometryCollection
GeometryCollection
GeometryCollection
Polygon
Polygon
Polygon
# Convert region of interest from json to GeoDataframe
= gpd.GeoDataFrame.from_features(states_clipped) states_gdf
# Check if the request was successful
if response.status_code == 200:
# Read image data into a BytesIO object
= io.BytesIO(response.content)
image_data
# Use Xarray to open the raster image directly from memory
= xr.open_dataarray(image_data, engine='rasterio').squeeze()
raster
=(5,5))
plt.figure(figsize='OrRd', add_colorbar=True,
raster.plot.imshow(cmap={'label':'Ksat mean (cm/day)'})
cbar_kwargs='k', linewidth=1)
plt.plot(lon, lat, color
# Mute this line to avoid
=plt.gca(),
states_gdf.plot(ax='None',
facecolor='k',
edgecolor=0.5, linestyle='--')
linewidth
'Ogallala Aquifer')
plt.title('Longitude')
plt.xlabel('Latitude')
plt.ylabel('equal')
plt.axis(
plt.tight_layout()
plt.show()
References
- Walkinshaw, M., O’Geen, A., & Beaudette, D. (2021). Soil properties. California Soil Resource Lab.