import pandas as pd
import numpy as np

dahlem_weather = pd.read_csv("https://userpage.fu-berlin.de/soga/data/raw-data/dahlem_station_weather_data.csv").dropna()


import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(12,7))

sns.scatterplot(
    data=dahlem_weather,
    x="temperature", y="humidity", 
    hue="season"
).set(
    xlabel='Temperature in 2 m above ground (°C)',
    ylabel='Humidity (g/kg)'
)

[Text(0.5, 0, 'Temperature in 2 m above ground (°C)'),
 Text(0, 0.5, 'Humidity (g/kg)')]


### your solution


from scipy.stats import pearsonr

summer = dahlem_weather.loc[dahlem_weather.season == "summer"]

print("r summer season: {}".
      format(round(pearsonr(summer["temperature"], summer["humidity"]).statistic, 4)))


winter = dahlem_weather.loc[dahlem_weather.season == "winter"]

print("r winter season: {}".
      format(round(pearsonr(winter["temperature"], winter["humidity"]).statistic, 4)))


from scipy.stats import pearsonr, spearmanr
def reg_coef(x, y , label=None, color=None, **kwargs):
    ax = plt.gca()
    r,p = pearsonr(x,y)
    ax.annotate('r = {:.2f}'.format(r), xy=(0.5,0.5), xycoords='axes fraction', ha='center', size = 20)
    ax.set_axis_off()

def reg_spear_coef(x, y , label=None, color=None, **kwargs):
    ax = plt.gca()
    r,p = spearmanr(x,y)
    ax.annotate('r = {:.2f}'.format(r), xy=(0.5,0.5), xycoords='axes fraction', ha='center', size = 20)
    ax.set_axis_off()


### your solution


plt.figure(figsize=(12,7))
g = sns.PairGrid(dahlem_weather[["humidity", "temperature", "cloudiness", "sunlight"]])
g.map_diag(sns.histplot)
g.map_lower(sns.scatterplot)
g.map_upper(reg_coef)


### your solution


plt.figure(figsize=(12,7))
g = sns.PairGrid(dahlem_weather[["humidity", "temperature", "cloudiness", "sunlight"]])
g.map_diag(sns.histplot)
g.map_lower(sns.scatterplot)
g.map_upper(reg_spear_coef)


### your solution


from scipy.stats import spearmanr

def perform_and_print_cor_test(x, y, season, alpha = 0.05):
    test_result = spearmanr(x, y)
    
    if (test_result.pvalue < alpha):
        print("".join(["At a significance level of {} %, the data provide very strong evidence ",
                       "to conclude a linear relation between the temperature and the humidity for the {}. ",
                       "The results are statistically significant with a p-value of {}"]).
              format(int(alpha * 100), season, test_result.pvalue))
    else:
        print("".join(["At a significance level of {} %, the data provide no evidence for rejecting H0. ",
                       "Hence there we can not conclue a linear relation between ",
                       "the temperature and the humidity for the {}. The p-value is {}"]).
             format(int(alpha * 100), season, test_result.pvalue))

perform_and_print_cor_test(summer["temperature"], summer["humidity"], "summer", alpha = 0.01)
print("\n")
perform_and_print_cor_test(winter["temperature"], winter["humidity"], "summer", alpha = 0.01)

Column	Meaning
`humidity`	mean absolute humidity in g/kg.
`temperature`	mean daily temperature in 2 m above ground in °C.
`cloudiness`	mean daily cloud coverage in 1/8.
`sunlight`	mean daily sunlight hours.
`season`	winter = DJF, spring = MAM, summer = JJA, autumn = SON.