# First, let's import the needed libraries.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime


ts_FUB_monthly = pd.read_json("../data/ts_FUB_monthly.json")
ts_FUB_monthly["Date"] = pd.to_datetime(
    ts_FUB_monthly["Date"], format="%Y-%m-%d", errors="coerce"
)

ts_FUB_daily = pd.read_json("../data/ts_FUB_daily.json")
ts_FUB_daily["MESS_DATUM"] = pd.to_datetime(
    ts_FUB_daily["MESS_DATUM"], format="%Y-%m-%d", errors="coerce"
)

ts_FUB_hourly = pd.read_json("../data/ts_FUB_hourly.json")
ts_FUB_hourly["MESS_DATUM"] = pd.to_datetime(
    ts_FUB_hourly["MESS_DATUM"], format="%Y-%m-%d", errors="coerce"
)


print(type(ts_FUB_monthly))
print(str(ts_FUB_monthly))

<class 'pandas.core.frame.DataFrame'>
           Date  rainfall
0    1719-01-01      2.80
1    1719-02-01      1.10
2    1719-03-01      5.20
3    1719-04-01      9.00
4    1719-05-01     15.10
...         ...       ...
3631 2021-08-01     17.43
3632 2021-09-01     15.55
3633 2021-10-01     10.49
3634 2021-11-01      6.28
3635 2021-12-01      2.19

[3636 rows x 2 columns]


print(type(ts_FUB_daily))
print(str(ts_FUB_daily))

<class 'pandas.core.frame.DataFrame'>
      MESS_DATUM  Temp  Rain
0     1950-01-01  -3.2   2.2
1     1950-01-02   1.0  12.6
2     1950-01-03   2.8   0.5
3     1950-01-04  -0.1   0.5
4     1950-01-05  -2.8  10.3
...          ...   ...   ...
26293 2021-12-27  -3.7   0.0
26294 2021-12-28  -0.5   1.5
26295 2021-12-29   4.0   0.3
26296 2021-12-30   9.0   3.2
26297 2021-12-31  12.8   5.5

[26298 rows x 3 columns]


print(type(ts_FUB_hourly))
print(str(ts_FUB_hourly))

<class 'pandas.core.frame.DataFrame'>
                MESS_DATUM  rainfall
0      2002-01-28 11:00:00       0.0
1      2002-01-28 13:00:00       0.0
2      2002-01-28 15:00:00       1.7
3      2002-01-28 18:00:00       1.1
4      2002-01-28 21:00:00       0.0
...                    ...       ...
174018 2021-12-31 19:00:00       0.7
174019 2021-12-31 20:00:00       0.7
174020 2021-12-31 21:00:00       0.1
174021 2021-12-31 22:00:00       0.1
174022 2021-12-31 23:00:00       0.0

[174023 rows x 2 columns]


plt.figure(figsize=(18, 6))
plt.plot(ts_FUB_monthly.Date, ts_FUB_monthly.rainfall)
plt.show()


## Your code here...


fig, ax = plt.subplots(2, 1, figsize=(18, 8))

ax[0].plot(ts_FUB_daily["Temp"])
ax[0].set_title("Temp")

ax[1].plot(ts_FUB_daily["Rain"], color="orange")
ax[1].set_title("Rain")
plt.show()


## Your code here...


plt.figure(figsize=(18, 6))
plt.plot(ts_FUB_hourly.MESS_DATUM, ts_FUB_hourly.rainfall)
plt.show()


### 10-year period from 2000 to 2009 daily data ###
daily_2000_2009 = ts_FUB_daily.set_index(["MESS_DATUM"])
daily_2000_2009 = daily_2000_2009["2000-01-01":"2009-12-31"]

### 10-year period from 2000 to 2009 monthly data ###
monthly_2000_2009 = ts_FUB_monthly.set_index(["Date"])
monthly_2000_2009 = monthly_2000_2009["2000-01-01":"2009-12-31"]


## Plotting ##

import matplotlib.gridspec as gridspec

plt.figure(figsize=(18, 8))

gs = gridspec.GridSpec(
    2, 2, wspace=0.1, hspace=0.2
)  # optionaL: width_ratios=[2, 1.5], height_ratios=[1, 1])

gs.update(wspace=0.5)
ax1 = plt.subplot(gs[:1, 0])

ax1.plot(daily_2000_2009["Temp"])
ax1.set_title("Daily temperature at Berlin-Dahlem")


ax2 = plt.subplot(gs[1:, 0])
ax2.plot(daily_2000_2009["Rain"], color="orange")
ax2.set_title("Daily rainfall at Berlin-Dahlem")

ax3 = plt.subplot(gs[0:2, 1])
ax3.plot(monthly_2000_2009, color="black")
ax3.set_title("Mean monthly temperature \nat Berlin-Dahlem")


plt.show()


monthly_2000_2009.describe()


## Your code here...


daily_2000_2009.describe()


import statsmodels.api as sm

plt.figure(figsize=(25, 4))
fig = sm.graphics.tsa.month_plot(monthly_2000_2009)

plt.xlabel("Month")
plt.ylabel("Temperature")

plt.show()

---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In [17], line 1
----> 1 import statsmodels.api as sm
      3 plt.figure(figsize=(25, 4))
      4 fig = sm.graphics.tsa.month_plot(monthly_2000_2009)

ModuleNotFoundError: No module named 'statsmodels'


## group the datetimeindex by month
monthly_2000_2009.index.month


import seaborn as sns

plt.figure(figsize=(18, 7))

sns.boxplot(x=monthly_2000_2009.index.month, y=monthly_2000_2009.values.flatten())
plt.xlabel("Month", fontsize = 18)
plt.ylabel("Temperature", fontsize = 18)

plt.show()

	rainfall
count	120.000000
mean	9.937917
std	6.808907
min	-3.590000
25%	4.092500
50%	9.780000
75%	15.677500
max	23.200000

	Temp	Rain
count	3653.000000	3653.000000
mean	9.975938	1.684369
std	7.528174	4.020982
min	-15.100000	0.000000
25%	4.000000	0.000000
50%	10.300000	0.000000
75%	16.000000	1.500000
max	27.200000	63.200000

Loading the sample data¶

Plotting¶

Summary statistics¶