# First, let's import the needed libraries.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime


df_co2 = pd.read_json(
    "http://userpage.fu-berlin.de/soga/soga-py/300/307000_time_series/KeelingCurve.json"
)
df_co2 = df_co2.set_index("Date")  # set datetimeindex


from statsmodels.tsa.seasonal import seasonal_decompose

# Time Series Decomposition
stl_co2 = seasonal_decompose(
    df_co2["interpolated"], model="additive", extrapolate_trend="freq"
)


print(stl_co2.trend)
print(stl_co2.seasonal)
print(stl_co2.resid)
print(stl_co2.observed)

Date
1958-03-01    315.017963
1958-04-01    315.078159
1958-05-01    315.138354
1958-06-01    315.198549
1958-07-01    315.258744
                 ...    
2022-05-01    418.189994
2022-06-01    418.356774
2022-07-01    418.523554
2022-08-01    418.690335
2022-09-01    418.857115
Name: trend, Length: 775, dtype: float64
Date
1958-03-01    1.426096
1958-04-01    2.578268
1958-05-01    3.023456
1958-06-01    2.324759
1958-07-01    0.661465
                ...   
2022-05-01    3.023456
2022-06-01    2.324759
2022-07-01    0.661465
2022-08-01   -1.482930
2022-09-01   -3.167891
Name: seasonal, Length: 775, dtype: float64
Date
1958-03-01   -0.744059
1958-04-01   -0.206427
1958-05-01   -0.651809
1958-06-01   -0.283308
1958-07-01   -0.060210
                ...   
2022-05-01   -0.223449
2022-06-01    0.308467
2022-07-01   -0.285020
2022-08-01   -0.017404
2022-09-01    0.260776
Name: resid, Length: 775, dtype: float64
Date
1958-03-01    315.70
1958-04-01    317.45
1958-05-01    317.51
1958-06-01    317.24
1958-07-01    315.86
               ...  
2022-05-01    420.99
2022-06-01    420.99
2022-07-01    418.90
2022-08-01    417.19
2022-09-01    415.95
Name: interpolated, Length: 775, dtype: float64


## with seasonal_decompose().plot change the figure size after creating:

fig = stl_co2.plot()
fig.set_size_inches((18, 8))  ##
plt.show()


df_co2_2000_2020 = df_co2["2000-01-01":"2021-12-31"]


# Time Series Decomposition
stl_co2_21 = seasonal_decompose(
    df_co2_2000_2020["interpolated"], model="additive", extrapolate_trend="freq"
)


fig = stl_co2_21.plot()

fig.set_size_inches((18, 8))  ##
plt.show()


## Your code here...


plt.figure(figsize=(18, 6))


plt.plot(stl_co2_21.observed, color="grey", linewidth=1)
plt.plot(stl_co2_21.trend, color="red", linewidth=1, linestyle="dashed")

plt.ylabel("$CO_2$ (ppm)")
plt.xlabel("Time")

plt.show()


detrended = df_co2_2000_2020["interpolated"] - stl_co2_21.trend


plt.figure(figsize=(18, 6))

detrended.plot()
plt.title("Seasonal variability of CO2 in the atmosphere")
plt.ylabel("CO2")

Text(0, 0.5, 'CO2')


plt.figure(figsize=(18, 6))

plt.plot(stl_co2_21.resid)
plt.title("Residbual/Remainder")
plt.ylabel("CO2")

Text(0, 0.5, 'CO2')


## Your code here...


import scipy.stats as stats

plt.figure(figsize=(18, 6))

plt.figure()
stats.probplot(stl_co2_21.resid, dist="norm", plot=plt)
plt.title("QQ-Plot")
plt.show()

<Figure size 1800x600 with 0 Axes>


# histogram plot
plt.figure(figsize=(9, 3))

plt.hist(stl_co2_21.resid)
plt.title("Residuals")
plt.show()


## Your code here...


# Shapiro-Wilk Test
from scipy.stats import shapiro

stat, p = shapiro(stl_co2_21.resid)

print("Statistics=%.3f, p=%.3f" % (stat, p))
# interpret
alpha = 0.05
if p > alpha:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")

Statistics=0.987, p=0.017
Sample does not look Gaussian (reject H0)