# Pakete laden ####
library(report) # Einfaches Erstellen von statistischen Berichten
library(marginaleffects) # Vorhersagen aus Regressionsmodellen
library(modelsummary) # Darstellung von Regressionsmodellen
library(parameters) # Darstellung von Regressionsmodellen
## 
## Attaching package: 'parameters'
## The following object is masked from 'package:modelsummary':
## 
##     supported_models
library(performance) # Prüfen der Voraussetzungen
library(tidyverse) # Datenmanagement und Visualisierung: https://www.tidyverse.org/

# Datensatz lesen und relevante Variablen auswählen und umbenennen ####
d <- haven::read_stata(here::here("data/Vanerkel_Vanaelst_2021.dta")) |>
  rename(
    Political_knowledge = PK,
    Personalized_news = personalized_news,
    Radio = News_channels_w4_1,
    Television = News_channels_w4_2,
    Newspapers = News_channels_w4_3,
    Online_news_sites = News_channels_w4_4,
    Twitter = News_channels_w4_5,
    Facebook = News_channels_w4_6
  ) |>
  mutate(
    Gender = as_factor(Gender),
    Education = as_factor(Education),
    trad = factor(trad, labels = c(
      "traditional news diet: no",
      "traditional news diet: yes"
    ))
  )

# Modelle in Tabellen 5 & 6 ####
# PNE
# we presented respondents **who use social
# media** for news consumption three statements about the extent to which they encounter
# news on SNSs in the first wave: 1) On social media I often encounter news from media that
# I normally don’t use, 2) On social media I often encounter news that I am not interested in
# and 3) On social media I rarely encounter political news with which I disagree. For each
# statement they could indicate whether they agree or not on a five-point scale, running
# from fully disagree (0) to fully agree (4). [...]
# Based on the three items an additive scale of personalized news environment was created,
# in which all items were coded in the same direction so that a higher score indicates a more
# personalized SNSs environment

# IO
# We selected four
# items from their extensive information overload scale where citizens had to indicate on
# a five-point scale whether they fully disagreed (0) to fully agreed (4) (wave 16). These
# four statements are: 1) I regularly feel overwhelmed by too much information these
# days, 2) There is so much information available on topics of interest to me that
# I sometimes have difficulties to determine what is important and what is not, 3) I am
# being confronted by an avalanche of personal messages (via e-mail, text messages, chat,
# social media, etc.), 4) When I search for information on a topic of interest to me,
# I usually get too much rather than too little information. [...]
# Moreover, the additive information overload scale, which
# runs from 0 (no overload) to 16 (full overload),

# Übersicht
d |> 
  select(Personalized_news, Information_overload) |> 
  haven::zap_labels() |> 
  report_table()
## Variable             | n_Obs | Mean |   SD | Median |  MAD | Min | Max
## ----------------------------------------------------------------------
## Personalized_news    |   993 | 5.16 | 1.92 |      5 | 1.48 |   0 |  12
## Information_overload |   993 | 8.46 | 3.16 |      8 | 2.97 |   0 |  16
## 
## Variable             | Skewness | Kurtosis | percentage_Missing
## ---------------------------------------------------------------
## Personalized_news    |    -0.02 |     0.35 |              21.55
## Information_overload |    -0.22 |     0.09 |               0.00
# Erklärung von Personalized News Environment (Table 5)
# av: Personalized_news
# Alle anderen Variablennamen finden Sie im Modell M4 oben
# Achten Sie auch darauf, welche Variablen NICHT im Modell sind
m_pne <- lm(Personalized_news ~  Radio + Television + Newspapers + Online_news_sites + Twitter + 
              Facebook + Gender + Age + Education, data = d)

# Erklärung von Information overload (Table 6)
# Alle Variablennamen finden Sie im Modell M4 in der Vorlesung
m_io <- lm(Information_overload ~ Radio + Television + Newspapers + Online_news_sites + Twitter + 
             Facebook + Gender + Age + Education + Political_interest, data = d)

# Betrachten Sie die Ergebnisse der Modelle indem Sie aus der Vorlesung entsprechende
# Code-Zeilen kopieren und das Modell austauschen.
# z.B. modelsummary(), report_table(), parameters() |> plot(), predictions()

# Erklärung von Personalized News Environment (Table 5)
m_pne |> 
  report_table(metrics = "R2_adj")
## Parameter          | Coefficient |         95% CI | t(768) |      p
## -------------------------------------------------------------------
## (Intercept)        |        6.79 | [ 5.98,  7.59] |  16.56 | < .001
## Radio              |        0.07 | [-0.01,  0.16] |   1.65 | 0.099 
## Television         |       -0.07 | [-0.19,  0.04] |  -1.24 | 0.215 
## Newspapers         |       -0.08 | [-0.17,  0.01] |  -1.73 | 0.084 
## Online news sites  |        0.02 | [-0.07,  0.11] |   0.45 | 0.655 
## Twitter            |        0.05 | [-0.08,  0.18] |   0.78 | 0.434 
## Facebook           |       -0.22 | [-0.29, -0.14] |  -5.48 | < .001
## Gender [female]    |       -0.01 | [-0.29,  0.26] |  -0.09 | 0.926 
## Age                |       -0.01 | [-0.02,  0.00] |  -2.14 | 0.033 
## Education [Middle] |       -0.35 | [-0.76,  0.06] |  -1.66 | 0.098 
## Education [High]   |       -0.09 | [-0.51,  0.33] |  -0.44 | 0.663 
##                    |             |                |        |       
## R2 (adj.)          |             |                |        |       
## 
## Parameter          | Std. Coef. | Std. Coef. 95% CI |  Fit
## ----------------------------------------------------------
## (Intercept)        |       0.10 |    [-0.10,  0.29] |     
## Radio              |       0.07 |    [-0.01,  0.14] |     
## Television         |      -0.05 |    [-0.13,  0.03] |     
## Newspapers         |      -0.07 |    [-0.15,  0.01] |     
## Online news sites  |       0.02 |    [-0.06,  0.10] |     
## Twitter            |       0.03 |    [-0.04,  0.10] |     
## Facebook           |      -0.22 |    [-0.30, -0.14] |     
## Gender [female]    |  -6.82e-03 |    [-0.15,  0.14] |     
## Age                |      -0.08 |    [-0.16, -0.01] |     
## Education [Middle] |      -0.18 |    [-0.40,  0.03] |     
## Education [High]   |      -0.05 |    [-0.27,  0.17] |     
##                    |            |                   |     
## R2 (adj.)          |            |                   | 0.05
m_pne |> 
  parameters() |>
  plot()

m_pne |> 
  parameters(standardize = "refit") |> 
  plot()

# Facebook in den folgenden Darstellungen nur ein Beispiel. Gerne andere 
# Variablen einfügen. 
m_pne |> 
  plot_predictions(by = "Facebook", 
                   newdata = datagrid(Facebook = 1:6, 
                                      grid_type = "mean_or_mode", # Werte der anderen Prädiktoren
                                      FUN_integer = mean)) # Notwendig, da sonst für Integer Prädiktoren (keine Nachkommastellen) der Modus ausgewählt wird.

m_pne |> 
  predictions(newdata = datagrid(Facebook = 1:6, 
                                 grid_type = "mean_or_mode", # Werte der anderen Prädiktoren
                                 FUN_integer = mean)) # Notwendig, da sonst für Integer Prädiktoren (keine Nachkommastellen) der Modus ausgewählt wird.
## 
##  Facebook Estimate Std. Error    z Pr(>|z|)     S 2.5 % 97.5 %
##         1     5.71      0.142 40.3   <0.001   Inf  5.43   5.98
##         2     5.49      0.124 44.4   <0.001   Inf  5.25   5.73
##         3     5.27      0.117 45.0   <0.001   Inf  5.04   5.50
##         4     5.06      0.123 41.0   <0.001   Inf  4.82   5.30
##         5     4.84      0.141 34.3   <0.001 855.4  4.56   5.12
##         6     4.62      0.166 27.8   <0.001 562.7  4.30   4.95
## 
## Type: response
m_pne |> 
  avg_comparisons(variables = list(Facebook = "2sd", Newspapers = "2sd",
                                   Online_news_sites = "2sd", Twitter = "2sd",
                                   Gender = "reference", 
                                   Education = "pairwise"))
## 
##               Term            Contrast Estimate Std. Error       z Pr(>|z|)
##  Education         High - Lower         -0.0931      0.213 -0.4362   0.6627
##  Education         High - Middle         0.2545      0.149  1.7122   0.0869
##  Education         Middle - Lower       -0.3476      0.210 -1.6555   0.0978
##  Facebook          (x + sd) - (x - sd)  -0.8510      0.155 -5.4768   <0.001
##  Gender            female - male        -0.0131      0.140 -0.0935   0.9255
##  Newspapers        (x + sd) - (x - sd)  -0.2705      0.156 -1.7298   0.0837
##  Online_news_sites (x + sd) - (x - sd)   0.0696      0.156  0.4474   0.6546
##  Twitter           (x + sd) - (x - sd)   0.1130      0.144  0.7832   0.4335
##     S   2.5 %  97.5 %
##   0.6 -0.5112  0.3251
##   3.5 -0.0368  0.5459
##   3.4 -0.7590  0.0639
##  24.5 -1.1555 -0.5464
##   0.1 -0.2873  0.2611
##   3.6 -0.5770  0.0360
##   0.6 -0.2353  0.3744
##   1.2 -0.1698  0.3958
## 
## Type: response
# Erklärung von Information overload (Table 6)
m_io |> 
  report_table(metrics = "R2_adj")
## Parameter          | Coefficient |        95% CI | t(981) |      p | Std. Coef.
## -------------------------------------------------------------------------------
## (Intercept)        |        6.05 | [ 4.80, 7.30] |   9.50 | < .001 |      -0.03
## Radio              |        0.04 | [-0.09, 0.16] |   0.58 | 0.565  |       0.02
## Television         |        0.12 | [-0.06, 0.29] |   1.32 | 0.186  |       0.05
## Newspapers         |       -0.02 | [-0.15, 0.12] |  -0.25 | 0.801  |  -9.21e-03
## Online news sites  |        0.03 | [-0.10, 0.16] |   0.43 | 0.665  |       0.02
## Twitter            |        0.22 | [ 0.01, 0.44] |   2.06 | 0.040  |       0.07
## Facebook           |        0.16 | [ 0.04, 0.27] |   2.67 | 0.008  |       0.10
## Gender [female]    |        0.55 | [ 0.14, 0.96] |   2.62 | 0.009  |       0.17
## Age                |        0.02 | [ 0.01, 0.04] |   2.79 | 0.005  |       0.10
## Education [Middle] |       -0.19 | [-0.81, 0.43] |  -0.61 | 0.540  |      -0.06
## Education [High]   |       -0.19 | [-0.83, 0.44] |  -0.60 | 0.547  |      -0.06
## Political interest |       -0.05 | [-0.14, 0.03] |  -1.23 | 0.218  |      -0.04
##                    |             |               |        |        |           
## R2 (adj.)          |             |               |        |        |           
## 
## Parameter          | Std. Coef. 95% CI |  Fit
## ---------------------------------------------
## (Intercept)        |     [-0.21, 0.15] |     
## Radio              |     [-0.05, 0.09] |     
## Television         |     [-0.02, 0.12] |     
## Newspapers         |     [-0.08, 0.06] |     
## Online news sites  |     [-0.06, 0.09] |     
## Twitter            |     [ 0.00, 0.14] |     
## Facebook           |     [ 0.03, 0.17] |     
## Gender [female]    |     [ 0.04, 0.30] |     
## Age                |     [ 0.03, 0.17] |     
## Education [Middle] |     [-0.26, 0.13] |     
## Education [High]   |     [-0.26, 0.14] |     
## Political interest |     [-0.11, 0.03] |     
##                    |                   |     
## R2 (adj.)          |                   | 0.03
m_io |> 
  parameters() |>
  plot()

m_io |> 
  parameters(standardize = "refit") |> 
  plot()

# Facebook in den folgenden Darstellungen nur ein Beispiel. Gerne andere 
# Variablen einfügen. 
m_io |> 
  plot_predictions(by = "Facebook", 
                   newdata = datagrid(Facebook = 1:6, 
                                      grid_type = "mean_or_mode", # Werte der anderen Prädiktoren
                                      FUN_integer = mean)) # Notwendig, da sonst für Integer Prädiktoren (keine Nachkommastellen) der Modus ausgewählt wird.

m_io |> 
  predictions(newdata = datagrid(Facebook = 1:6, 
                                 grid_type = "mean_or_mode", # Werte der anderen Prädiktoren
                                 FUN_integer = mean)) # Notwendig, da sonst für Integer Prädiktoren (keine Nachkommastellen) der Modus ausgewählt wird.
## 
##  Facebook Estimate Std. Error    z Pr(>|z|)     S 2.5 % 97.5 %
##         1     7.91      0.198 39.9   <0.001   Inf  7.52   8.29
##         2     8.06      0.185 43.5   <0.001   Inf  7.70   8.43
##         3     8.22      0.191 43.1   <0.001   Inf  7.85   8.60
##         4     8.38      0.213 39.3   <0.001   Inf  7.96   8.80
##         5     8.54      0.248 34.4   <0.001 861.4  8.05   9.02
##         6     8.70      0.291 29.9   <0.001 650.4  8.13   9.27
## 
## Type: response
m_io |> 
  avg_comparisons(variables = list(Facebook = "2sd", Newspapers = "2sd",
                                   Online_news_sites = "2sd", Twitter = "2sd",
                                   Gender = "reference", 
                                   Education = "pairwise"))
## 
##               Term            Contrast Estimate Std. Error        z Pr(>|z|)
##  Education         High - Lower        -0.19442      0.323 -0.60202  0.54716
##  Education         High - Middle       -0.00147      0.219 -0.00671  0.99465
##  Education         Middle - Lower      -0.19295      0.315 -0.61242  0.54026
##  Facebook          (x + sd) - (x - sd)  0.61588      0.231  2.66691  0.00766
##  Gender            female - male        0.54953      0.210  2.61601  0.00890
##  Newspapers        (x + sd) - (x - sd) -0.05820      0.231 -0.25201  0.80103
##  Online_news_sites (x + sd) - (x - sd)  0.10041      0.232  0.43310  0.66494
##  Twitter           (x + sd) - (x - sd)  0.44362      0.216  2.05817  0.03957
##    S   2.5 % 97.5 %
##  0.9 -0.8274  0.439
##  0.0 -0.4317  0.429
##  0.9 -0.8104  0.425
##  7.0  0.1633  1.069
##  6.8  0.1378  0.961
##  0.3 -0.5108  0.394
##  0.6 -0.3540  0.555
##  4.7  0.0212  0.866
## 
## Type: response
# Bonus: Prüfen Sie die statistischen Annahmen beider Modelle
m_pne |> 
  check_model(check = c("linearity", "normality", "homogeneity", "outliers", "vif"))

m_pne |> 
  check_residuals() |>
  plot()

m_pne |> 
  check_outliers() |>
  plot()

correlation::correlation(m_pne$model) |>
  summary(redundant = TRUE)
## # Correlation Matrix (pearson-method)
## 
## Parameter         | Personalized_news |   Radio | Television | Newspapers
## -------------------------------------------------------------------------
## Personalized_news |                   |   -0.03 |    -0.12** |     -0.10*
## Radio             |             -0.03 |         |    0.40*** |    0.31***
## Television        |           -0.12** | 0.40*** |            |    0.34***
## Newspapers        |            -0.10* | 0.31*** |    0.34*** |           
## Online_news_sites |             -0.06 | 0.23*** |    0.30*** |    0.35***
## Twitter           |             -0.02 |   0.10* |       0.05 |      0.12*
## Facebook          |          -0.20*** | 0.24*** |    0.25*** |      0.11*
## Age               |             -0.08 |    0.04 |    0.24*** |    0.22***
## 
## Parameter         | Online_news_sites | Twitter | Facebook |      Age
## ---------------------------------------------------------------------
## Personalized_news |             -0.06 |   -0.02 | -0.20*** |    -0.08
## Radio             |           0.23*** |   0.10* |  0.24*** |     0.04
## Television        |           0.30*** |    0.05 |  0.25*** |  0.24***
## Newspapers        |           0.35*** |   0.12* |    0.11* |  0.22***
## Online_news_sites |                   | 0.26*** |  0.32*** |    -0.06
## Twitter           |           0.26*** |         |  0.29*** |  -0.12**
## Facebook          |           0.32*** | 0.29*** |          | -0.17***
## Age               |             -0.06 | -0.12** | -0.17*** |         
## 
## p-value adjustment method: Holm (1979)
m_io |> 
  check_model(check = c("linearity", "normality", "homogeneity", "outliers", "vif"))

m_io |> 
  check_residuals() |>
  plot()

m_io |> 
  check_outliers() |>
  plot()

correlation::correlation(m_io$model) |>
  summary(redundant = TRUE)
## # Correlation Matrix (pearson-method)
## 
## Parameter            | Information_overload |   Radio | Television | Newspapers
## -------------------------------------------------------------------------------
## Information_overload |                      |    0.05 |       0.08 |       0.02
## Radio                |                 0.05 |         |    0.39*** |    0.28***
## Television           |                 0.08 | 0.39*** |            |    0.31***
## Newspapers           |                 0.02 | 0.28*** |    0.31*** |           
## Online_news_sites    |                 0.04 | 0.22*** |    0.28*** |    0.33***
## Twitter              |                 0.09 |    0.09 |       0.04 |       0.09
## Facebook             |               0.12** | 0.18*** |    0.19*** |       0.06
## Age                  |                 0.05 |    0.05 |    0.24*** |    0.21***
## Political_interest   |                -0.02 | 0.20*** |    0.30*** |    0.33***
## 
## Parameter            | Online_news_sites |  Twitter | Facebook |      Age | Political_interest
## ----------------------------------------------------------------------------------------------
## Information_overload |              0.04 |     0.09 |   0.12** |     0.05 |              -0.02
## Radio                |           0.22*** |     0.09 |  0.18*** |     0.05 |            0.20***
## Television           |           0.28*** |     0.04 |  0.19*** |  0.24*** |            0.30***
## Newspapers           |           0.33*** |     0.09 |     0.06 |  0.21*** |            0.33***
## Online_news_sites    |                   |  0.23*** |  0.27*** |    -0.08 |            0.32***
## Twitter              |           0.23*** |          |  0.33*** | -0.15*** |               0.05
## Facebook             |           0.27*** |  0.33*** |          | -0.25*** |               0.06
## Age                  |             -0.08 | -0.15*** | -0.25*** |          |            0.14***
## Political_interest   |           0.32*** |     0.05 |     0.06 |  0.14*** |                   
## 
## p-value adjustment method: Holm (1979)