# Import modules
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import circmean
50 Air temperature model
The daily mean air temperature exhibits a sinusoidal pattern due to the Earth’s axial tilt and its orbit around the Sun. During the year, as the Earth orbits the Sun, the angle at which sunlight hits the Earth changes, leading to warmer temperatures when the sun is more directly overhead and cooler temperatures when it is at a lower angle. The smooth transition between these temperature extremes follows a wave-like, or sinusoidal, pattern, peaking during summer and troughing in winter, with transitions through spring and fall.
As a result, a sinusoidal model can be used to represent this pattern as a function of the calendar day of the year, incorporating the annual mean temperature, mean annual thermal amplitude, and a phase constant corresponding to the timing of the minimum temperature. Mathematically, this model can be expressed as:
T(d) = T_{avg} + A \; cos \Bigg[2 \pi \Bigg(\frac{d-d_{min}}{365} \Bigg) + \phi \Bigg]
where: - T(d) is the predicted temperature on day of the year ( d ), - T_{avg} is the annual mean temperature, - A is the mean annual thermal amplitude (half the difference between the maximum and minimum air temperatures), - d is the day of the year, - d_{min} is an offset day of the year corresponding to the minimum temperature, and - \phi is the phase shift needed to align the model with the observed data, in this case \pi .
This sinusoidal model captures the regular, predictable changes in temperature due to the Earth’s position relative to the Sun.
Basics of waves
= np.arange(1, 366)
doy = lambda x,a,b,c: a + b * np.cos(2*np.pi*(x-c)/365 + np.pi) wave_model
= np.arange(1,366,1)
doy
=(12,3))
plt.figure(figsize
1,3,1)
plt.subplot('Change mean value')
plt.title(=10, b=20, c=1), '-k')
plt.plot(doy, wave_model(doy, a=25, b=20, c=1), '--r')
plt.plot(doy, wave_model(doy, a'Day of the year')
plt.xlabel(
1,3,2)
plt.subplot('Change amplitude value')
plt.title(=25, b=20, c=1), '-k')
plt.plot(doy, wave_model(doy, a=25, b=5, c=20), '--r')
plt.plot(doy, wave_model(doy, a'Day of the year')
plt.xlabel(
1,3,3)
plt.subplot('Change offset value')
plt.title(=25, b=5, c=0), '-k')
plt.plot(doy, wave_model(doy, a=25, b=5, c=50), '--r')
plt.plot(doy, wave_model(doy, a'Day of the year')
plt.xlabel(
plt.show()
Load and inspect data
# Read dataset
= pd.read_csv('../datasets/KS_Manhattan_6_SSW.csv',
df =['LST_DATE'], date_format='%Y%m%d', na_values=[-9999,-99])
parse_dates
# Retain columns of interest
= df[['LST_DATE','T_DAILY_AVG']]
df
# Add year and day of the year columns
'DOY'] = df['LST_DATE'].dt.dayofyear
df['YEAR'] = df['LST_DATE'].dt.year
df[
# Inspect a few rows
3) df.head(
LST_DATE | T_DAILY_AVG | DOY | YEAR | |
---|---|---|---|---|
0 | 2003-10-01 | NaN | 274 | 2003 |
1 | 2003-10-02 | 11.7 | 275 | 2003 |
2 | 2003-10-03 | 14.8 | 276 | 2003 |
# Inspect complete air temperature data to see seasonal pattern
=(8,4))
plt.figure(figsize'LST_DATE'], df['T_DAILY_AVG'], linewidth=0.5)
plt.plot(df['Air temperature (Celsius)')
plt.ylabel( plt.show()
Split dataset into training and testing sets
In this tutorial we are using the train set to estimate the model parameters. We then make predictions of average air temperature using these parameters in both the train and test sets.
# Split set into training and test
= round(df.shape[0]*0.75)
N = df[:N]
df_train = df[N+1:].reset_index(drop=True) df_test
Define model for air temperature
# Daily air temperature model
= lambda doy,a,b,c: a + b * np.cos(2*np.pi*( (doy-c)/365) + np.pi) model
Estimate model parameters from data
# Annual mean temperature
= df['T_DAILY_AVG'].mean()
T_avg print(f'Mean annual temperature: {T_avg:.2f}')
Mean annual temperature: 13.09
# Annual mean thermal amplitude
= df_train.groupby(by='DOY')["T_DAILY_AVG"].mean().quantile([0.05,0.95])
T_min,T_max = (T_max - T_min)/2
A print(f'Mean annual thermal amplitude: {A:.2f}')
Mean annual thermal amplitude: 14.36
# Timing of minimum air temperature
= df_train.groupby(by='YEAR')['T_DAILY_AVG'].idxmin()
idx_min = np.round(df_train.loc[idx_min,'DOY'].apply(circmean).mean())
doy_T_min print(f'DOY of minimum temperature (phase constant): {doy_T_min}')
DOY of minimum temperature (phase constant): 3.0
Predict air temperature for training set
# Predict daily T with model
= model(df_train['DOY'], T_avg, A, doy_T_min) T_pred_train
# Compute mean absolute error on train set
= np.mean(np.abs(df_train['T_DAILY_AVG'] - T_pred_train))
mae_train
print(f'MAE on train set: {mae_train:.2f} Celsius')
MAE on train set: 4.53 Celsius
# Create figure
=(8,4))
plt.figure(figsize"LST_DATE"], df_train["T_DAILY_AVG"], s=5, color='gray', label="Observed")
plt.scatter(df_train["LST_DATE"],T_pred_train, label="Predicted", color='tomato', linewidth=1)
plt.plot(df_train["Air temperature (Celsius)")
plt.ylabel(
plt.grid() plt.show()
Predict daily average air temperature for test set
In this step we use the same parameters determined using the training set.
# Predict daily T with model
= model(df_test['DOY'], T_avg, A, doy_T_min) T_pred_test
# Compute mean absolute error on test set
= np.mean(np.abs(df_test['T_DAILY_AVG'] - T_pred_test))
mae_test
print(f'MAE on test set: {mae_test:.2f} Celsius')
MAE on test set: 4.54 Celsius
# Create figure
=(8,4))
plt.figure(figsize"LST_DATE"], df_test["T_DAILY_AVG"], s=5, color='gray', label="Observed")
plt.scatter(df_test["LST_DATE"],T_pred_test, label="Predicted", color='tomato', linewidth=1)
plt.plot(df_test["Air temperature (Celsius)")
plt.ylabel(
plt.grid()
plt.legend() plt.show()
Practice
Use the
curve_fit()
function from SciPy to fit the model and compare parameters values with this exercise.Read a dataset of hourly observations of mean air temperature and fit a model with an additional harmonic to simulate diurnal thermal osciallations in addition to the seasonal oscillations.