import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import random


t_global = pd.read_json(
    "http://userpage.fu-berlin.de/soga/soga-py/300/307000_time_series/t_global.json"
)
t_global["Date"] = pd.to_datetime(t_global["Date"], format="%Y-%m-%d", errors="coerce")
t_global = t_global.set_index("Date")["Monthly Anomaly_global"]

t_global

Date
1750-01-01   -0.993
1750-02-01   -1.679
1750-03-01   -0.192
1750-04-01   -0.531
1750-05-01   -1.881
              ...  
2022-05-01    1.023
2022-06-01    1.315
2022-07-01    1.289
2022-08-01    1.231
2022-09-01    1.090
Name: Monthly Anomaly_global, Length: 3273, dtype: float64


## Your code here ...


temp_global_year = t_global.groupby(t_global.index.to_period("Y")).agg("mean")


## Your code here...


temp_global_training = temp_global_year["1850-01-01":"2000-01-01"]
temp_global_test = temp_global_year["2000-01-01":]


plt.figure(figsize=(18, 6))
plt.title("Earth Surface Temperature Anomalies", fontsize=14)
temp_global_training.plot(label="training set 1850-2000", fontsize=14)
temp_global_test.plot(label="test set 2001-2016", fontsize=14)

plt.legend()
plt.show()


t_global

Date
1750-01-01   -0.993
1750-02-01   -1.679
1750-03-01   -0.192
1750-04-01   -0.531
1750-05-01   -1.881
              ...  
2022-05-01    1.023
2022-06-01    1.315
2022-07-01    1.289
2022-08-01    1.231
2022-09-01    1.090
Name: Monthly Anomaly_global, Length: 3273, dtype: float64


from scipy.stats import boxcox

boxcox_transformed_data, boxcox_lamba = boxcox(temp_global_training + 10)
boxcox_transformed_data = pd.Series(
    boxcox_transformed_data, index=temp_global_training.index
)


fig, ax = plt.subplots(2, 1, figsize=(16, 8))
temp_global_training.plot(ax=ax[0], color="black", fontsize=14)
ax[0].set_title("Original time series", fontsize=14)


boxcox_transformed_data.plot(
    ax=ax[1],
    color="grey",
)
ax[1].set_title("Box-Cox transformed time series", fontsize=14)

ax[0].grid()
ax[1].grid()

plt.tight_layout()
plt.show()


# KPSS test
from statsmodels.tsa.stattools import kpss


def kpss_test(series, **kw):
    statistic, p_value, n_lags, critical_values = kpss(series, **kw)
    # Format Output
    print(f"KPSS Statistic: {statistic}")
    print(f"p-value: {p_value}")
    print(f"num lags: {n_lags}")
    print("Critial Values:")
    for key, value in critical_values.items():
        print(f"   {key} : {value}")
    print(f'Result: The series is {"not " if p_value < 0.05 else ""}stationary')


kpss_test(temp_global_training)

KPSS Statistic: 1.613241516211716
p-value: 0.01
num lags: 8
Critial Values:
   10% : 0.347
   5% : 0.463
   2.5% : 0.574
   1% : 0.739
Result: The series is not stationary

C:\Users\mceck\miniconda3\envs\rasterdata\lib\site-packages\statsmodels\tsa\stattools.py:2018: InterpolationWarning: The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is smaller than the p-value returned.

  warnings.warn(


## Your code here...


temp_global_training_diff1 = temp_global_training.diff()


kpss_test(temp_global_training_diff1.dropna())  ## ignore NaN for kpss

KPSS Statistic: 0.0976339172178381
p-value: 0.1
num lags: 24
Critial Values:
   10% : 0.347
   5% : 0.463
   2.5% : 0.574
   1% : 0.739
Result: The series is stationary

C:\Users\mceck\miniconda3\envs\rasterdata\lib\site-packages\statsmodels\tsa\stattools.py:2022: InterpolationWarning: The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is greater than the p-value returned.

  warnings.warn(


plt.figure(figsize=(18, 6))
plt.title("Differenced data set: temp_global_training_diff1`")
temp_global_training_diff1.plot(color="black")

plt.grid()
plt.show()


from statsmodels.graphics.tsaplots import plot_pacf, plot_acf

plt.figure(figsize=(18, 6))


plot_acf(temp_global_training_diff1.dropna())
plt.title("ACF for Differenced Series")

plt.show()

<Figure size 1800x600 with 0 Axes>


fig, ax = plt.subplots(1, 2, figsize=(13, 5))

plot_acf(temp_global_training_diff1.dropna(), ax=ax[0])
ax[0].set_title("ACF")


plot_pacf(
    temp_global_training_diff1.dropna(), method="ywm", ax=ax[1]
)  ## add the calculation method running in the background ("ywm")

ax[1].set_title("PACF")
plt.show()


from statsmodels.tsa.arima.model import ARIMA

# fit model
model = ARIMA(temp_global_training, order=(3, 1, 0))
model_fit = model.fit()
print(model_fit.summary())

                                 SARIMAX Results                                  
==================================================================================
Dep. Variable:     Monthly Anomaly_global   No. Observations:                  151
Model:                     ARIMA(3, 1, 0)   Log Likelihood                  32.139
Date:                    Mon, 03 Apr 2023   AIC                            -56.278
Time:                            14:43:37   BIC                            -44.235
Sample:                        12-31-1850   HQIC                           -51.385
                             - 12-31-2000                                         
Covariance Type:                      opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.5353      0.074     -7.225      0.000      -0.681      -0.390
ar.L2         -0.3974      0.078     -5.079      0.000      -0.551      -0.244
ar.L3         -0.3856      0.068     -5.699      0.000      -0.518      -0.253
sigma2         0.0380      0.005      7.476      0.000       0.028       0.048
===================================================================================
Ljung-Box (L1) (Q):                   0.17   Jarque-Bera (JB):                 1.45
Prob(Q):                              0.68   Prob(JB):                         0.48
Heteroskedasticity (H):               0.94   Skew:                            -0.13
Prob(H) (two-sided):                  0.83   Kurtosis:                         2.60
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).


## extract arima parameters:

print(
    "AR 1 =",
    round(model_fit.params["ar.L1"], 4),
    "AR 2 =",
    round(model_fit.params["ar.L2"], 4),
    "AR 3 =",
    round(model_fit.params["ar.L3"], 4),
    "sigma =",
    round(model_fit.params["sigma2"], 4),
)

model_fit.aicc

AR 1 = -0.5353 AR 2 = -0.3974 AR 3 = -0.3856 sigma = 0.038

-56.0018772466296


## Your code here...


model = ARIMA(temp_global_training, order=(3, 1, 0))
model_fit = model.fit()
print(f"ARIMA(3,1,0) - AICc: {round(model_fit.aicc,2)}")

model = ARIMA(temp_global_training, order=(3, 1, 1))
model_fit = model.fit()
print(f"ARIMA(3,1,1) - AICc: {round(model_fit.aicc,2)}")

model = ARIMA(temp_global_training, order=(3, 1, 2))
model_fit = model.fit()
print(f"ARIMA(3,1,2) - AICc: {round(model_fit.aicc,2)}")

model = ARIMA(temp_global_training, order=(2, 1, 2))
model_fit = model.fit()
print(f"ARIMA(2,1,2) - AICc: {round(model_fit.aicc,2)}")

ARIMA(3,1,0) - AICc: -56.0
ARIMA(3,1,1) - AICc: -55.48
ARIMA(3,1,2) - AICc: -55.84
ARIMA(2,1,2) - AICc: -53.28


from pmdarima.arima import auto_arima

auto_model = auto_arima(temp_global_training)
auto_model.summary()


## Your code here...


residuals = pd.DataFrame(auto_model.resid())


fig, ax = plt.subplots(1, 2, figsize=(13, 5))

residuals.plot(ax=ax[0], legend=False)
ax[0].grid()
ax[0].set_title("Residuals of ARIMA(2, 1, 3))")


plot_acf(residuals, ax=ax[1])

ax[1].set_title("ACF of Residuals")
plt.show()


from statsmodels.stats.diagnostic import acorr_ljungbox

# perform Ljung-Box test on residuals with lag=10
Btest = acorr_ljungbox(
    auto_model.resid(), lags=[10], return_df=True, model_df=5
)  ## ARIMA model_df = lags - df , df =  p +q


Btest


model = ARIMA(temp_global_training, order=(2, 1, 3))
fitted = model.fit()


forecast_series = fitted.forecast(40, alpha=0.05)


plt.figure(figsize=(13, 4))
plt.title("Earth Surface Temperature Anomalies")
temp_global_training.plot(color="black", label="training set 1850-2000")
plt.plot(fitted.fittedvalues, color="blue", label="fitted values")

plt.legend()
plt.show()


forecast = fitted.get_forecast(40)
conf_int_95 = forecast.conf_int(alpha=0.05)  # 95% conf
conf_int_80 = forecast.conf_int(alpha=0.2)  # 80% conf


plt.figure(figsize=(13, 4))
plt.title("Earth Surface Temperature Anomalies")
temp_global_training.plot(color="black", label="training set 1850-2000")
temp_global_test.plot(color="red", label="test set 2001-2016")
plt.plot(forecast_series, label="forecast", color="blue")


plt.fill_between(
    conf_int_95.index,
    conf_int_95["lower Monthly Anomaly_global"],
    conf_int_95["upper Monthly Anomaly_global"],
    color="b",
    alpha=0.1,
)

plt.fill_between(
    conf_int_80.index,
    conf_int_80["lower Monthly Anomaly_global"],
    conf_int_80["upper Monthly Anomaly_global"],
    color="b",
    alpha=0.2,
)


plt.legend()
plt.show()

	coef	std err	z	P>\|z\|	[0.025	0.975]
intercept	0.0193	0.006	3.013	0.003	0.007	0.032
ar.L1	-0.6014	0.114	-5.289	0.000	-0.824	-0.379
ar.L2	-0.6555	0.110	-5.974	0.000	-0.871	-0.440
ma.L1	-0.0103	0.105	-0.098	0.922	-0.216	0.196
ma.L2	0.1291	0.093	1.390	0.165	-0.053	0.311
ma.L3	-0.7448	0.084	-8.882	0.000	-0.909	-0.580
sigma2	0.0336	0.004	7.524	0.000	0.025	0.042

The data¶

Data transformation¶

Stationary check¶

Examine the ACF/PACF¶

Model selection¶

Check the residuals¶

Calculate forecasts¶

Dep. Variable:	y	No. Observations:	151
Model:	SARIMAX(2, 1, 3)	Log Likelihood	40.811
Date:	Mon, 03 Apr 2023	AIC	-67.622
Time:	14:43:43	BIC	-46.547
Sample:	12-31-1850	HQIC	-59.060
	- 12-31-2000
Covariance Type:	opg

Ljung-Box (L1) (Q):	0.03	Jarque-Bera (JB):	0.58
Prob(Q):	0.87	Prob(JB):	0.75
Heteroskedasticity (H):	0.92	Skew:	-0.02
Prob(H) (two-sided):	0.76	Kurtosis:	2.70