# First, let's import the needed libraries.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime


current_time = datetime.now().strftime("%Y-%m-%d")
print("Date of download: ", current_time)

Date of download:  2023-04-03


### GLOBAL DATA SET ###

import requests
from io import StringIO


url_global = "http://berkeleyearth.lbl.gov/auto/Global/Complete_TAVG_complete.txt"
s = requests.get(url_global).text

temp_global = pd.read_csv(
    StringIO(s), sep=" ", skiprows=33, skipinitialspace=True
)  ## skiprows to skip the intro text


header = [
    "Year_global",
    "Month_global",
    "Monthly Anomaly_global",
    "Monthly Unc_global",
    "Annual Anomaly_global",
    "Annual Unc_global",
    "Five-year Anomaly_global",
    "Five-year Unc_global",
    "Ten-year Anomaly_global",
    "Ten-year Unc_global",
    "Twenty-year Anomaly_global",
    "Twenty-year Unc_global",
    "noname",
]

temp_global.columns = header
temp_global = temp_global.drop(temp_global.index[[0]])


### GERMANY DATA SET ###

url_ger = "http://berkeleyearth.lbl.gov/auto/Regional/TAVG/Text/germany-TAVG-Trend.txt"
s = requests.get(url_ger).text

temp_germany = pd.read_csv(
    StringIO(s), sep=" ", skiprows=69, skipinitialspace=True
)  ## skiprows to skip the intro text

header = [
    "Year_germany",
    "Month_germany",
    "Monthly Anomaly_germany",
    "Monthly Unc_germany",
    "Annual Anomaly_germany",
    "Annual Unc_germany",
    "Five-year Anomaly_germany",
    "Five-year Unc_germany",
    "Ten-year Anomaly_germany",
    "Ten-year Unc_germany",
    "Twenty-year Anomaly_germany",
    "Twenty-year Unc_germany",
    "noname",
]

temp_germany.columns = header

temp_germany = temp_germany.drop(temp_germany.index[[0]])


temp_germany.sample(4)


### GLOBAL DATA SET ###
temp_global["Date"] = (
    temp_global["Year_global"].astype(int).astype(str)  ## convert float to integer
    + "-"
    + temp_global["Month_global"].astype(int).astype(str)
)
temp_global["Date"] = pd.to_datetime(temp_global["Date"], format="%Y-%m")


t_global = pd.concat(
    [temp_global["Monthly Anomaly_global"], temp_global["Monthly Unc_global"]], axis=1
).set_index(temp_global["Date"])


### GERMANY DATA SET ###
temp_germany["Date"] = (
    temp_germany["Year_germany"].astype(int).astype(str)  ## convert float to integer
    + "-"
    + temp_germany["Month_germany"].astype(int).astype(str)
)
temp_germany["Date"] = pd.to_datetime(temp_germany["Date"], format="%Y-%m")


t_germany = pd.concat(
    [temp_germany["Monthly Anomaly_germany"], temp_germany["Monthly Unc_germany"]],
    axis=1,
).set_index(temp_germany["Date"])


plt.figure(figsize=(18, 6))
t_global["Monthly Anomaly_global"].plot()

plt.title("Monthly global Temperature anomaly", fontsize=15)
plt.ylabel("°C", fontsize=15)
plt.xlabel("Date", fontsize=15)
plt.show()


plt.figure(figsize=(18, 6))
t_germany["Monthly Anomaly_germany"].plot()

plt.title("Monthly Germany Temperature anomaly", fontsize=15)
plt.ylabel("°C", fontsize=15)
plt.xlabel("Date", fontsize=15)
plt.show()


# reset datetimeindex
t_global = t_global.reset_index()
t_germany = t_germany.reset_index()


t_global.to_json("../data/t_global.json", date_format="iso")
t_germany.to_json("../data/t_germany.json", date_format="iso")


t_global = pd.read_json("../data/t_global.json")
t_global["Date"] = pd.to_datetime(t_global["Date"], format="%Y-%m-%d", errors="coerce")
t_global

	Year_germany	Month_germany	Monthly Anomaly_germany	Monthly Unc_germany	Annual Anomaly_germany	Annual Unc_germany	Five-year Anomaly_germany	Five-year Unc_germany	Ten-year Anomaly_germany	Ten-year Unc_germany	Twenty-year Anomaly_germany	Twenty-year Unc_germany	noname
2279	1939.0	11.0	1.246	0.187	-1.209	0.121	-0.693	0.088	-0.134	0.072	0.088	0.074	NaN
3144	2011.0	12.0	3.034	0.190	1.147	0.085	0.907	0.074	1.214	0.068	NaN	NaN	NaN
2969	1997.0	5.0	0.816	0.170	0.412	0.043	0.608	0.035	0.886	0.032	0.987	0.022	NaN
2236	1936.0	4.0	-1.033	0.207	0.203	0.070	0.389	0.081	-0.037	0.091	-0.055	0.086	NaN

	Date	Monthly Anomaly_global	Monthly Unc_global
0	1750-01-01	-0.927	3.253
1	1750-02-01	-1.645	3.612
2	1750-03-01	-0.150	3.448
3	1750-04-01	-0.427	1.731
4	1750-05-01	-1.750	1.331
...	...	...	...
3273	2022-10-01	1.581	0.059
3274	2022-11-01	0.731	0.074
3275	2022-12-01	0.971	0.073
3276	2023-01-01	1.209	0.097
3277	2023-02-01	1.360	0.059