# First, let's import the needed libraries.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime


df_co2_1990_2021 = pd.read_json(
    "http://userpage.fu-berlin.de/soga/soga-py/300/307000_time_series/KeelingCurve_1990-2021.json"
)
df_co2_1990_2021 = df_co2_1990_2021.set_index("Date")  # set datetimeindex


## ignore date, convert to lenght

t = np.arange(0, len(df_co2_1990_2021))


## linear fit
z = np.polyfit(t, df_co2_1990_2021.values.flatten(), 1)
sum_err = np.polyfit(t, df_co2_1990_2021.values.flatten(), 1, full=True)[1]

p = np.poly1d(z)
fit1 = p(t)


## quadratic fit
z = np.polyfit(t, df_co2_1990_2021.values.flatten(), 1)
p = np.poly1d(z)
fit2 = p(t)


## cubic fit
z = np.polyfit(t, df_co2_1990_2021.values.flatten(), 3)
p = np.poly1d(z)
fit3 = p(t)


## quartic fit
z = np.polyfit(t, df_co2_1990_2021.values.flatten(), 4)
p = np.poly1d(z)
fit4 = p(t)


fig, axs = plt.subplots(2, 2, figsize=(10, 8))
axs[0, 0].plot(t, fit1 - df_co2_1990_2021.values.flatten())
axs[0, 0].set_title("CO2 residuals from a linear fit")

axs[0, 1].plot(t, fit2 - df_co2_1990_2021.values.flatten())
axs[0, 1].set_title("CO2 residuals from a quadratic fit")

axs[1, 0].plot(t, fit3 - df_co2_1990_2021.values.flatten())
axs[1, 0].set_title("CO2 residuals from a cubic fit")

axs[1, 1].plot(t, fit4 - df_co2_1990_2021.values.flatten())
axs[1, 1].set_title("CO2 residuals from a quartic fit")

plt.show()


import statsmodels.formula.api as smf


results = smf.ols(formula="t ~ fit3", data=df_co2_1990_2021).fit()

results.summary()


## Your code here ...


diff1 = df_co2_1990_2021.diff()
diff2 = diff1.diff()
diff3 = diff2.diff()
diff4 = diff3.diff()


fig, axs = plt.subplots(2, 2, figsize=(18, 8))
axs[0, 0].plot(diff1)
axs[0, 0].set_title("Differencing: $\u0394^1 y_t$")

axs[0, 1].plot(diff2)
axs[0, 1].set_title("Differencing: $\u0394^2 y_t$")

axs[1, 0].plot(diff3)
axs[1, 0].set_title("Differencing: $\u0394^3 y_t$")

axs[1, 1].plot(diff4)
axs[1, 1].set_title("Differencing: $\u0394^4 y_t$")

plt.show()


from statsmodels.tsa.stattools import kpss


def kpss_test(series, **kw):
    statistic, p_value, n_lags, critical_values = kpss(series, **kw)
    # Format Output
    print(f"KPSS Statistic: {statistic}")
    print(f"p-value: {p_value}")
    print(f"num lags: {n_lags}")
    print("Critial Values:")
    for key, value in critical_values.items():
        print(f"   {key} : {value}")
    print(f'Result: The series is {"not " if p_value < 0.05 else ""}stationary')


kpss_test(df_co2_1990_2021)

KPSS Statistic: 2.483341384260723
p-value: 0.01
num lags: 10
Critial Values:
   10% : 0.347
   5% : 0.463
   2.5% : 0.574
   1% : 0.739
Result: The series is not stationary

C:\Users\mceck\miniconda3\envs\rasterdata\lib\site-packages\statsmodels\tsa\stattools.py:2018: InterpolationWarning: The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is smaller than the p-value returned.

  warnings.warn(


kpss_test(diff1.dropna())  ## ignore NA values

KPSS Statistic: 0.007266415372046855
p-value: 0.1
num lags: 3
Critial Values:
   10% : 0.347
   5% : 0.463
   2.5% : 0.574
   1% : 0.739
Result: The series is stationary

C:\Users\mceck\miniconda3\envs\rasterdata\lib\site-packages\statsmodels\tsa\stattools.py:2022: InterpolationWarning: The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is greater than the p-value returned.

  warnings.warn(

Dep. Variable:	t	R-squared:	0.998
Model:	OLS	Adj. R-squared:	0.998
Method:	Least Squares	F-statistic:	1.100e+05
Date:	Mon, 03 Apr 2023	Prob (F-statistic):	0.00
Time:	14:40:35	Log-Likelihood:	-721.04
No. Observations:	264	AIC:	1446.
Df Residuals:	262	BIC:	1453.
Df Model:	1
Covariance Type:	nonrobust

	coef	std err	t	P>\|t\|	[0.025	0.975]
Intercept	-1990.9915	6.403	-310.934	0.000	-2003.600	-1978.383
fit3	5.4154	0.016	331.684	0.000	5.383	5.448

Omnibus:	41.087	Durbin-Watson:	0.001
Prob(Omnibus):	0.000	Jarque-Bera (JB):	25.535
Skew:	-0.628	Prob(JB):	2.85e-06
Kurtosis:	2.137	Cond. No.	1.09e+04

Least squares estimation¶

Differencing¶