# First, let's import the needed libraries.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np


# open the data set as csv file using pandas

df = pd.read_csv('ftp://sidads.colorado.edu/DATASETS/NOAA/G02135//north/monthly/data/N_09_extent_v3.0.csv')


df.head()


# be aware of the spaces in the keys:
df.keys()

Index(['year', ' mo', '    data-type', ' region', ' extent', '   area'], dtype='object')


# use seaborn for plotting

import seaborn as sns
sns.set()

sns.scatterplot(data = df, x='year', y = ' extent')
plt.title("Ice cover extent september")
plt.ylabel('million sqrkm')

Text(0, 0.5, 'million sqrkm')


plt.xlim(1975,2100)
plt.ylim(0,10)
sns.regplot(data = df, x='year', y = ' extent', truncate = False, ci = 95)
plt.title("Prediction of the ice cover extent september")
plt.ylabel('million sqrkm')

Text(0, 0.5, 'million sqrkm')


df[' extent lrt'] = df[' extent']/max_extent  

# if we hit the maximum, the outcome is 1. For this value, we can not calculate the logarithm. Therefore, we need
# to exclude the value or approximate it by, e.g. 0.999999:

for i in range(0,len(df[' extent lrt'])):

    if df.iloc[i,6] == 1:
        df.iloc[i,6] = 0.999999


df[' extent lrt'] = np.log(df[' extent lrt']/(1-df[' extent lrt']))


## Plot the data:

sns.scatterplot(data = df, x='year', y = ' extent lrt')
plt.title("Ice cover extent after logistic transformation")
plt.ylim(0,5)
plt.ylabel('ICE logistic space')

Text(0, 0.5, 'ICE logistic space')


plt.xlim(1977,2100)
plt.ylim(-6,5)
plot_des = sns.regplot(data = df, x='year', y = ' extent lrt', truncate = False)
plt.axhline(0)
plt.title("Prediction of ICE decrease for september log-space")
plt.ylabel('ICE logistic space')

Text(0, 0.5, 'ICE logistic space')


# Since sns.regplot does not provide the coefficients of the regression line, we calculate the 
# coefficients by statmodels. Both approaches calculate the parameters analogously, so we may do this.

import statsmodels.api as sm

y = df.iloc[:,6]
x =  df.iloc[:,0]
x = sm.add_constant(x)
alpha = 0.05 # 95% confidence interval
model = sm.OLS(y, x)
lr = model.fit()
#lr = sm.OLS(y, sm.add_constant(x)).fit()
#print(lr.params)

print(lr.summary())

# Slope: a
a = lr.params[1]

# Intercept: b
b = lr.params[0]

# Confidence intervals: 
lr_ci = lr.conf_int(alpha = 0.05)

# enlarge data until the year 2100:
x_1979_2100 = np.arange(1979,2100)
x_1979_2100 = sm.add_constant(x_test)

y_1979_2100 = lr.predict(x_1979_2100)

                            OLS Regression Results                            
==============================================================================
Dep. Variable:             extent lrt   R-squared:                       0.381
Model:                            OLS   Adj. R-squared:                  0.366
Method:                 Least Squares   F-statistic:                     26.42
Date:                Tue, 12 Dec 2023   Prob (F-statistic):           6.40e-06
Time:                        13:34:36   Log-Likelihood:                -86.781
No. Observations:                  45   AIC:                             177.6
Df Residuals:                      43   BIC:                             181.2
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        202.7756     39.110      5.185      0.000     123.902     281.649
year          -0.1005      0.020     -5.140      0.000      -0.140      -0.061
==============================================================================
Omnibus:                       81.579   Durbin-Watson:                   2.337
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1377.022
Skew:                           4.699   Prob(JB):                    9.63e-300
Kurtosis:                      28.418   Cond. No.                     3.08e+05
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 3.08e+05. This might indicate that there are
strong multicollinearity or other numerical problems.


df[' extent back trafo'] = (np.exp(df[' extent lrt']))/(1+(np.exp(df[' extent lrt'])))*df[' extent'].max()


#predict until 2021:

lr_back  = ((np.exp(a*x_1979_2100[:,1]+b))/(1+(np.exp(a*x_1979_2100[:,1]+b))))*df[' extent'].max()


sns.scatterplot(data = df, x='year', y = ' extent back trafo')
plt.title("ICE decrease for september log-spac")
plt.ylabel('Sea ice extend in mio. sqrkm')

Text(0, 0.5, 'Sea ice extend in mio. sqrkm')


# In order to plot the logistic regresion line (orange line below, we create a new data set with the back transformed data 
# of the linear regression line

d_1979_2100 = {'year': np.array(x_1979_2100)[:,1], 'extent' : lr_back }
df_1979_2100 = pd.DataFrame(data = d_1979_2100)


plt.xlim(1977,2100)
plot_des = sns.regplot(data = df, x='year', y = ' extent back trafo', truncate = False)
plt.axhline(0)
plt.axhline(1)
plt.title("Prediction of ICE decrease for september log-space")
plt.ylabel('ICE logistic space')

sns.lineplot(data = df_1979_2100 , x='year', y = 'extent')

<Axes: title={'center': 'Prediction of ICE decrease for september log-space'}, xlabel='year', ylabel='ICE logistic space'>

	year	mo	data-type	region	extent	area
0	1979	9	Goddard	N	7.05	4.58
1	1980	9	Goddard	N	7.67	4.87
2	1981	9	Goddard	N	7.14	4.44
3	1982	9	Goddard	N	7.30	4.43
4	1983	9	Goddard	N	7.39	4.70

Double constrained feature spaces: the logistic transformation¶

Example: Northern hemispheric ice extent under climate change¶

Prediction¶

Estimating the trend¶

Back transformation in the original space:¶

Estimating the trend:¶