In this section we download and preprocess a composite ice core atmospheric carbon dioxide (CO2) data set provided by the World Data Center for Paleoclimatology, National Oceanic and Atmospheric Administration (NOAA) that extends back 800,000 years. The data may be directly accessed here or if the data set is not available you may download a copy here (downloaded on 2017-06-09).

The EPICA (European Project for Ice Coring in Antarctica) Dome C and Antarctic composite ice core atmospheric (CO2) data is a new version of (CO2) composite replaces the old version of Luthi et al. (2008). For details see Bereiter et al. (2015).

Age unit is in years before present (yr BP) where present refers to 1950 AD. The composite is built from the following:

In order to get a better intuition about the different data sources used, we plot a map of Antarctica, including the geographical locations of the drilling sites, using the plotting machinery in R.

# data dervied from
# http://cdiac.ornl.gov/trends/co2/ice_core_co2.html and
# https://doi.pangaea.de/10.1594/PANGAEA.602127
antarctica <- data.frame('Site'=c('EDML', 'Law Dome', 
                                  'Dome C', 'Taylor Dome',
                                  'Vostok', 'Dome A', 
                                  'South Pole station', 
                                  'Siple Station'),
                        'Elevation masl' =c(2892, 1390, 3233, 2365,
                                            3500, 4084, 2810, 1054),
                        'Lat' = c(-75.003, -66.7333, -75.1, -77.8,
                                  -78.467, -80.367,-90, -75.917),
                        'Lon' = c(0.068,112.083, 123.4, 158.717,
                                  106.867, 77.367, 0.0, 276.083))
antarctica 
##                 Site Elevation.masl      Lat     Lon
## 1               EDML           2892 -75.0030   0.068
## 2           Law Dome           1390 -66.7333 112.083
## 3             Dome C           3233 -75.1000 123.400
## 4        Taylor Dome           2365 -77.8000 158.717
## 5             Vostok           3500 -78.4670 106.867
## 6             Dome A           4084 -80.3670  77.367
## 7 South Pole station           2810 -90.0000   0.000
## 8      Siple Station           1054 -75.9170 276.083
library(maps)
library(ggplot2)
library(mapproj)
world <- map_data("world") # World map
antmap <- ggplot(world, aes(x = long, y = lat), fill='light blue') +
  # plot shoreline
  geom_path(aes(group=group), size=0.5) +
  # add point data
  geom_point(data = antarctica, aes(x = Lon, y = Lat),size=2) + 
  # define projection
  coord_map("ortho", orientation = c(-90, 0, 0)) +
  # graphical layout
  theme_bw() +
  # axes layout
  scale_y_continuous(limits = c(-90, -60), breaks = seq(-90, -60, 5))+
  scale_x_continuous(limits = c(-180, 360), breaks = seq(0, 360, 22.5))+
  theme(axis.title = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        plot.title = element_text(hjust = 0.5),
        panel.border = element_rect(colour = 'black', fill = NA)) +
  # annotation
  geom_text(data = antarctica, aes(x = Lon,y = Lat, label = Site),
            hjust = -0.1, vjust = 1.1, size=3) + 
  # add title
  ggtitle("Antarctic composite ice core data sampling sites")
# show map
antmap

Now we are ready to download the data. If the data sets are not available you may download a copy of the global data set here (downloaded on 2017-06-09).

paste('Date of download: ', Sys.time())
## [1] "Date of download:  2019-01-20 18:37:06"
url <- "ftp://ftp.ncdc.noaa.gov/pub/data/paleo/icecore/antarctica/antarctica2015co2composite.txt"
co2.EPICA.raw <- read.table(url, #blank.lines.skip = T, 
                        header = T,
                        skip = 1,
                        #fill=TRUE, 
                        sep = "\t")
head(co2.EPICA.raw, 3)
##   age_gas_calBP co2_ppm co2_1s_ppm
## 1        -51.03  368.02       0.06
## 2        -48.00  361.78       0.37
## 3        -46.28  359.65       0.10
tail(co2.EPICA.raw, 3)
##      age_gas_calBP co2_ppm co2_1s_ppm
## 1899      804522.7  204.86       1.64
## 1900      805132.4  202.23       0.69
## 1901      805668.9  207.29       2.20

The data set consist of 1901 observations (rows) and 3 features (columns). The first column, age_gas_calBP, contains the age of the sample in calibrated years (calendar years) before present (BP). For detailed information on the Antarctic ice core chronology 2012 (AICC2012) refer to Bazin et al. (2013). The second column, co2_ppm, is our variable of interest, the concentration of carbon dioxide (CO2) in the atmosphere, given in parts per million (ppm). The third column, co2_1s_ppm gives the standard deviations for the CO2 measurements.

For the sake of simplicity we round the age BP, given in age_gas_calBP, to an integer number and divide that by 1000.

co2.EPICA <- co2.EPICA.raw[, c('age_gas_calBP','co2_ppm')]
co2.EPICA$age_gas_calBP <- round(co2.EPICA$age_gas_calBP)/1000
colnames(co2.EPICA)  <- c('ky_BP_AICC2012','co2_ppm')
head(co2.EPICA, 10)
##    ky_BP_AICC2012 co2_ppm
## 1          -0.051  368.02
## 2          -0.048  361.78
## 3          -0.046  359.65
## 4          -0.044  357.11
## 5          -0.043  353.95
## 6          -0.042  353.72
## 7          -0.041  352.42
## 8          -0.040  350.81
## 9          -0.039  349.80
## 10         -0.039  349.28

In order to avoid ambiguity of dates we perform an aggregation step using the dplyr package in R and take the mean of ambiguous ages.

library(dplyr)
co2.EPICA.df <- co2.EPICA  %>%
  group_by(ky_BP_AICC2012) %>% 
  summarize(co2_ppm = mean(co2_ppm, na.rm = F))

head(co2.EPICA.df, 10)
## # A tibble: 10 x 2
##    ky_BP_AICC2012 co2_ppm
##             <dbl>   <dbl>
##  1         -0.051    368.
##  2         -0.048    362.
##  3         -0.046    360.
##  4         -0.044    357.
##  5         -0.043    354.
##  6         -0.042    354.
##  7         -0.041    352.
##  8         -0.04     351.
##  9         -0.039    350.
## 10         -0.038    348.

Finally we transform the data frame into a zoo object and plot the data. Similar to the xts() function, the zoo() function need a vector defining the data, but as well a vector with dates.

library(zoo)
co2.EPICA.ts <- zoo(x = co2.EPICA.df$co2_ppm, 
                    order.by = co2.EPICA.df$ky_BP_AICC2012)

Now we plot our data set.

plot(co2.EPICA.ts, 
     xlab="kyr BP", 
     ylab=expression("CO"[2]*" in ppm"), 
     xlim=c(rev(range(index(co2.EPICA.ts)))), # reverse x axis
     col='blue',
     main=expression('Antarctic Ice Cores Revised 800 kyr CO'[2]* ' Data'))

Finally, we store the time series data set in a .RData file for further processing.

save(file = "Antartica_Ice_Core.Rdata", co2.EPICA.ts)