# Import modules
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
94 Machine learning supervised classification
Machine learning classification aims at categorizing data points into distinct classes based on their features. Perceptron neural networks and random forest classifiers are two popular approaches for this task.
Logistic regression is a statistical model used for binary classification. It estimates the probability that a given input belongs to a certain class using a logistic function.
Stochastic gradient descent is commonly used to train machine learning models like perceptron neural networks and logistic regression. It iteratively updates the parameters of the model by computing the gradient of the loss function on a small subset of the training data (a mini-batch) and adjusting the parameters in the opposite direction of the gradient to minimize the loss.
Random forest classifiers are ensemble learning methods that combine multiple decision trees to improve classification accuracy. Each tree is trained on a random subset of the data and features, and the final classification is determined by a majority vote or averaging of the individual tree predictions.
In this example we will use red, green, and blue pixels to classify green canopy cover. Data was collected using the pixlabel app
Read dataset of RGB data and labels
# Read training dataset
= pd.read_csv('../datasets/pixlabel.csv')
df 3) df.head(
RECORD | FILENAME | LABEL | COL | ROW | TOTALCOLS | TOTALROWS | TIMESTAMP | R1 | G1 | B1 | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | example.jpg | canopy | 858 | 208 | 917 | 687 | 2024-03-29T04:59:28.723Z | 237 | 2 | 2 |
1 | 2 | example.jpg | canopy | 874 | 223 | 917 | 687 | 2024-03-29T04:59:36.440Z | 110 | 159 | 129 |
2 | 3 | example.jpg | canopy | 777 | 213 | 917 | 687 | 2024-03-29T04:59:41.010Z | 135 | 184 | 145 |
# Create index for each label
= df['LABEL'] == 'canopy'
idx_canopy = df['LABEL'] == 'background' idx_background
# Create figure to inspect data
=(5,4))
plt.figure(figsize'R1'], df.loc[idx_canopy,'G1'],
plt.scatter(df.loc[idx_canopy,='w', edgecolor='k', label='Green canopy')
facecolor'R1'], df.loc[idx_background,'G1'],
plt.scatter(df.loc[idx_background,='k', edgecolor='k',label='Background')
facecolor'Red band')
plt.xlabel('Green band')
plt.ylabel(
plt.legend() plt.show()
Load image to classify
# Read image
= plt.imread('../datasets/images/grassland.jpg')
RGB = RGB[:,:,0]
R = RGB[:,:,1]
G = RGB[:,:,2]
B
= np.column_stack( (R.flatten(), G.flatten(), B.flatten()) ) X_img
# Create function to cmpute green canopy cover
= lambda I: round(np.sum(I)/I.size*100,1) compute_gcc
Define inputs and outputs
# Gather inputs in float data type
= df[['R1','G1','B1']].values/255
X
# Define output as a binary response
= df['LABEL'].factorize(sort=True) y,unique_labels
Train Logistic Regression
# Fit Logitsitc Regression model
= LogisticRegression(random_state=0).fit(X, y)
LR
# Compute mean accuracy on the training dataset
LR.score(X, y)
0.902
# Classifiy image
= LR.predict(X_img)
BW_LR = np.reshape(BW_LR, R.shape) BW_LR
# Create figure of classified image using Logisitc Regression
plt.figure()1,2,1)
plt.subplot(
plt.imshow(RGB)'off')
plt.axis(
1,2,2)
plt.subplot(='binary_r')
plt.imshow(BW_LR, cmap'off')
plt.axis(
plt.show()
# Compute percent green canopy cover
print('Canopy cover using Logistic Regression:', compute_gcc(BW_LR), '%')
Canopy cover using Logistic Regression: 37.7 %
Train Stochastic Gradient Descent classifier
= SGDClassifier(loss="hinge", alpha=0.0001, max_iter=200)
SGD
SGD.fit(X, y)
SGDClassifier(max_iter=200)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
SGDClassifier(max_iter=200)
# Classifiy image
= SGD.predict(X_img)
BW_SGD = np.reshape(BW_SGD, R.shape) BW_SGD
# Create figure of classified image using Logisitc Regression
plt.figure()1,2,1)
plt.subplot(
plt.imshow(RGB)'off')
plt.axis(
1,2,2)
plt.subplot(='binary_r')
plt.imshow(BW_SGD, cmap'off')
plt.axis(
plt.show()
# Compute percent green canopy cover
print('Canopy cover using Stochastic Gradient Descent:', compute_gcc(BW_SGD), '%')
Canopy cover using Stochastic Gradient Descent: 45.6 %
Train Random Forest classifier
# Define Random Forest model
= RandomForestClassifier(n_estimators=20).fit(X, y)
RF RF.score(X,y)
1.0
# Classifiy image
= RF.predict(X_img)
BW_RF = np.reshape(BW_RF, R.shape) BW_RF
plt.figure()1,2,1)
plt.subplot(
plt.imshow(RGB)'off')
plt.axis(
1,2,2)
plt.subplot(='binary_r')
plt.imshow(BW_RF, cmap'off')
plt.axis(
plt.show()
# Compute green canopy cover
print('Canopy cover using Random Forest:', compute_gcc(BW_RF), '%')
Canopy cover using Random Forest: 30.8 %