# Import modules
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
42 First and last frost
The concept of first and last frost dates is essential in the context of gardening and agriculture, as it helps in determining the suitable planting and harvesting times. A frost date typically refers to when temperatures are expected to fall to the threshold level that can cause damage to plants. This is generally at or below 0°C. It’s important to note that these frost dates are not absolute but are probabilistic estimates based on historical data.
In addition to the first and last frost dates, the concept of the “frost-free window” or “growing season” refers to the period between the last spring frost and the first fall frost, during which temperatures are generally above the freezing point.
The length of the frost-free window varies depending on the geographical location and local climatic conditions. For instance, in warmer regions, this period may be quite long, allowing for an extended growing season suitable for a wide variety of crops. In contrast, colder regions may have a shorter frost-free window, limiting the types of plants that can be grown.
In this exercise we will use a long-term dataset from 1980 to 2020 to estimate the:
- typical date for first and last frost
- frost-free period
- probability density and cumulative density functions
# Read data
= pd.read_csv('../datasets/Johnson_Kansas.csv', parse_dates=['timestamp'])
df 3) df.head(
id | longitude | latitude | timestamp | doy | pr | rmax | rmin | sph | srad | ... | tmmn | tmmx | vs | erc | eto | bi | fm100 | fm1000 | etr | vpd | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 19800101 | -94.822258 | 38.883761 | 1980-01-01 | 1 | 0.0 | 77.683876 | 43.286247 | 0.002620 | 8.263647 | ... | -3.557654 | 7.847253 | 2.192723 | 40.656410 | 1.040586 | 29.367460 | 13.726248 | 13.835570 | 1.627161 | 0.353715 |
1 | 19800102 | -94.822258 | 38.883761 | 1980-01-02 | 2 | 0.0 | 78.571579 | 35.635567 | 0.002420 | 7.208553 | ... | -3.577643 | 5.967126 | 3.296802 | 41.918404 | 1.219669 | 35.399471 | 13.522950 | 13.820871 | 1.931061 | 0.320570 |
2 | 19800103 | -94.822258 | 38.883761 | 1980-01-03 | 3 | 0.0 | 82.316978 | 39.000980 | 0.001985 | 5.930851 | ... | -7.448096 | 0.307275 | 3.042193 | 40.172096 | 0.770442 | 32.923172 | 13.797589 | 13.864967 | 1.172392 | 0.174911 |
3 rows × 21 columns
# Add year column. This will help us grouping analyses by year.
'year'] = df['timestamp'].dt.year df[
Find date of last frost
# Define variables for last frost
= 1
lf_start_doy = 183 # Around July 15 (to be on the safe side)
lf_end_doy = 0 # Celsius freeze_temp
# Get unique years
= df['year'].unique()
unique_years = []
lf_doy
for year in unique_years:
= df['year'] == year
idx_year = (df['doy'] > lf_start_doy) & (df['doy'] <= lf_end_doy)
idx_lf_period = df['tmmn'] < freeze_temp
idx_frost = idx_year & idx_lf_period & idx_frost
idx
# Select all DOY for current year that meet all conditions.
# Sort in ASCENDING order. The last value was the last freezing DOY
= df.loc[idx, 'doy'].sort_values()
all_doy_current_year -1])
lf_doy.append(all_doy_current_year.iloc[
Find date of first frost
# Define variables for first frost
= 183 # Around July 15 (to be on the safe side)
ff_start_doy = 365 ff_end_doy
# Get unique years
= []
ff_doy
for year in unique_years:
= df['year'] == year
idx_year = (df['doy'] > ff_start_doy) & (df['doy'] <= ff_end_doy)
idx_ff_period = df['tmmn'] < freeze_temp
idx_frost = idx_year & idx_ff_period & idx_frost
idx
# Select all DOY for current year that meet all conditions.
# Sort in DESCENDING order. The last value was the last freezing DOY
= df.loc[idx, 'doy'].sort_values(ascending=False)
all_doy_current_year -1])
ff_doy.append(all_doy_current_year.iloc[
Find median date for first and last frost
# Create dataframe with the first and last frost for each year
# The easiest is to create a dictionary with the variables we already have
= pd.DataFrame({'year':unique_years,
df_frost 'first_frost_doy':ff_doy,
'last_frost_doy':lf_doy})
3) df_frost.head(
year | first_frost_doy | last_frost_doy | |
---|---|---|---|
0 | 1980 | 299 | 104 |
1 | 1981 | 296 | 79 |
2 | 1982 | 294 | 100 |
# Print median days of the year
'first_frost_doy','last_frost_doy']].median() df_frost[[
first_frost_doy 298.0
last_frost_doy 96.0
dtype: float64
# Compute median DOY and calculate date for first frost
= df_frost['first_frost_doy'].median()
first_frost_median_doy = pd.to_datetime('2000-01-01') + pd.Timedelta(first_frost_median_doy, 'days')
first_frost_median_date print(f"Median date first frost: {first_frost_median_date.strftime('%d-%B')}")
= df_frost['first_frost_doy'].min() # Min value for earliest first frost
first_frost_earliest_doy = pd.to_datetime('2000-01-01') + pd.Timedelta(first_frost_earliest_doy, 'days')
first_frost_earliest_date print(f"Earliest date first frost on record: {first_frost_earliest_date.strftime('%d-%B')}")
# Compute median DOY and calculate date for first frost
= df_frost['last_frost_doy'].median()
last_frost_median_doy = pd.to_datetime('2000-01-01') + pd.Timedelta(last_frost_median_doy, 'days')
last_frost_median_date print(f"Median date last frost: {last_frost_median_date.strftime('%d-%B')}")
= df_frost['last_frost_doy'].max() # Max value for latest last frost
last_frost_latest_doy = pd.to_datetime('2000-01-01') + pd.Timedelta(last_frost_latest_doy, 'days')
last_frost_latest_date print(f"Latest date last frost on record: {last_frost_latest_date.strftime('%d-%B')}")
Median date first frost: 25-October
Earliest date first frost on record: 23-September
Median date last frost: 06-April
Latest date last frost on record: 30-April
Compute frost-free period
# Period without any risk of frost
= first_frost_earliest_doy - last_frost_latest_doy
frost_free print(f'Frost-free period: {frost_free} days')
Frost-free period: 146 days
Probability density functions
Let’s first examine if a normal distribution fits the observations using probability plots, which compare the distribution of our data against the quantiles of a specified theoretical distribution (normal distribution in this case, similar to qq-plots). If the agreement is good, then this provides some support for using the selected distribution.
# Check distribution of data
=(8,3))
plt.figure(figsize1,2,1)
plt.subplot('first_frost_doy'], dist="norm", rvalue=True, plot=plt)
stats.probplot(df_frost['First frost')
plt.title(1,2,2)
plt.subplot('last_frost_doy'], dist="norm", rvalue=True, plot=plt)
stats.probplot(df_frost['Last frost')
plt.title('')
plt.ylabel( plt.show()
# Fit normal distributions
= stats.fit(stats.norm, df_frost['first_frost_doy'], bounds=((180,365),(1,25)))
fitted_pdf_ff print(fitted_pdf_ff.params)
= stats.fit(stats.norm, df_frost['last_frost_doy'], bounds=((1,180),(1,25)))
fitted_pdf_lf print(fitted_pdf_lf.params)
FitParams(loc=296.12195397938365, scale=13.816206118531046)
FitParams(loc=95.39034225034837, scale=11.17897667307393)
# Create vector for the normal pdf of first frost
= np.linspace(df_frost['first_frost_doy'].min(),
x_ff 'first_frost_doy'].max(),
df_frost[=1000)
num
# Create vector for the normal pdf of last frost
= np.linspace(df_frost['last_frost_doy'].min(),
x_lf 'last_frost_doy'].max(),
df_frost[=1000) num
# Figure of free-frost period
=(6,3))
plt.figure(figsize'Johnson County, KS')
plt.title(
# Add histograms for first and last frost
'first_frost_doy'], bins='scott', density=True,
plt.hist(df_frost[='Median first frost', facecolor=(0,0.5,1,0.25), edgecolor='navy')
label'last_frost_doy'], bins='scott', density=True,
plt.hist(df_frost[='Median last frost', facecolor=(1,0.2,0,0.25), edgecolor='tomato')
label
# Add median lines
='--', color='tomato')
plt.axvline(last_frost_median_doy, linestyle='--', color='navy')
plt.axvline(first_frost_median_doy, linestyle
# Overlay fitted distributions to each histogram
*fitted_pdf_ff.params),
plt.plot(x_ff, stats.norm.pdf(x_ff, ='navy', label='First frost pdf')
color*fitted_pdf_lf.params),
plt.plot(x_lf, stats.norm.pdf(x_lf, ='tomato', label='Last frost pdf')
color
# Add filled area to show the frost-free period
0,0.05), last_frost_latest_doy, first_frost_earliest_doy,
plt.fill_betweenx(np.linspace(=(0, 0.5, 0.5, 0.2), edgecolor='k')
facecolor
# Add some annotations
145, 0.04, "Frost-free period", size=14)
plt.text(165, 0.035, f"{frost_free} days", size=14)
plt.text(
0, 0.05])
plt.ylim(['Day of the year')
plt.xlabel('Density')
plt.ylabel(
plt.minorticks_on()
plt.legend() plt.show()
# Cumulative distributions
# Create vector from 0 to x_max to plot the lognorm pdf
= np.linspace(df_frost['first_frost_doy'].min(),
x 'first_frost_doy'].max(),
df_frost[=1000)
num
=(10,4))
plt.figure(figsize'Johnson County, KS 1980-2020')
plt.suptitle(
# First frost
1,2,1)
plt.subplot('first_frost_doy'], label="Empirical CDF")
plt.ecdf(df_frost[*fitted_pdf_ff.params), label='Theoretical CDF')
plt.plot(x_ff, stats.norm.cdf(x_ff, 'First frost')
plt.title('Day of the year')
plt.xlabel('Cumulative density')
plt.ylabel(
plt.legend()
# Last frost (note the use of the complementary CDF)
1,2,2)
plt.subplot('last_frost_doy'], complementary=True, label="Empirical CDF")
plt.ecdf(df_frost[1-stats.norm.cdf(x_lf, *fitted_pdf_lf.params), label='Theoretical CDF')
plt.plot(x_lf, 'Last frost')
plt.title('Day of the year')
plt.xlabel('Cumulative density')
plt.ylabel(
plt.legend()
plt.show()
# Determine the probability of a first frost occurying on or before:
= 245 # September 1
doy *fitted_pdf_ff.params)
stats.norm.cdf(doy,
# As expected, if you change the DOY for a value closer to July 1,
# the chances of frost in the north hemisphere are going to decrease to nearly zero.
0.00010773852588002489
# Determine the probability of a frost occurying on or after:
= 122 # May 1
doy *fitted_pdf_lf.params)
stats.norm.sf(doy,
# As expected, if you change the DOY for a value closer to January 1,
# the chances of frost in the north hemisphere are going to increase to 1 (or 100%).
0.008648561213750895
Why did we use the complementary of the CDF for determining the probability of last frost? We used the complementary CDF (or sometimes known as the survival function, hence the syntax .sf()
, because in most cases we are interested in knowing the probability of a frost “on or after” a given date. For instance, a farmer that is risk averse will prefer to plant corn when the chances of a late frost that can kill the entire crop overnight is very low.