0.1 Dates, Times, Time Series objects

http://www.statmethods.net/advstats/timeseries.html

http://a-little-book-of-r-for-time-series.readthedocs.org/en/latest/src/timeseries.html#arima-models

http://www.noamross.net/blog/2014/2/10/using-times-and-dates-in-r---presentation-code.html

0.2 Time series analysis in R

https://cran.r-project.org/web/views/TimeSeries.html

0.2.1 Date

Date objects have only dates, but no times

# return "Date" object
Sys.Date()+2:4
[1] "2015-10-01" "2015-10-02" "2015-10-03"

# find weekends in xts time series object
date <- as.Date(index(xts))
date[ weekdays(date) %in% c("Saturday", "Sunday") ] <- NA
weekends <- index(xts)[which(is.na(date))]

# convert to character
x <- as.Date("2010-11-04 00:00:00")
as.character(x)                         # "2010-11-04" 
format(x, format="%Y-%m-%d %H:%M:%S")   # "2010-11-04 00:00:00"

0.2.2 POSIXct, POSIXlt

POSIXct : “ct” stand for calender time

POSIXlt : “lt” stands for local time. POXIXlt objects are lists, it enables easy extraction of specific componants of a time

date format: https://jeffkayser.com/projects/date-format-string-composer/index.html

# read time stamp with format, return "POSIXlt" "POSIXt" object
time <- strptime("21/05/2015 10:09", format="%d/%m/%Y %H:%M")

# POSIXct objects are a measure of seconds from an origin
time <- Sys.time()
# add 3 hours
time + 3*60*60

0.2.3 ts

ts1 <- ts(c(2,3,2,3,4,5))

ts1 <- ts(1:10, frequency=4, start=c(1959, 2)) # 2nd Quarter of 1959

frequency(t1) # get frequency
frequency(t1) <- 2 # assign frequency

# extract data 
window(t2, start=as.Date('2000-01-01'), end=as.Date('2000-31-12') )

# merge 
merge(ts1, ts2)

0.2.4 xts, zoo

# ------------- xts or zoo for data with time stamp

# xts object is a subclass of zoo, which means zoo methods are called if an xts method doesn't exist for a generic function (e.g. $.zoo and $<-.zoo). Both zoo and xts objects are a matrix with an ordered index attribute. xts requires that the index be time-based.

# convert a data frame to xts
prices <- c(1.1, 2.2, 3.3)
timestamps <-  c('2011-01-05 11:00', '2011-01-05 12:00', '2011-01-05 13:00')
stockprices <- data.frame(prices, timestamps)

# prices(numeric)    timestamps(character)
# 1.1         2011-01-05 11:00
# 2.2         2011-01-05 12:00
# 3.3         2011-01-05 13:00

require(xts)
xts <- xts(stockprices$prices, order.by=as.POSIXct(stockprices$timestamps))

# convert a xts to a data frame
data.frame(date=index(xts), coredata(xts))

# select a time range
t1 <- xts["2011-01-04T05:00/2011-01-05"]

# merge operation on xts objects by time (index). 
merge(x,y)
merge(x,y, join='inner') # join=inner, left, or right

# zero-width objects (only index values) can be used
xi <- xts( , index(x))
merge(y, xi)

# average in time series based on time and date
aggregate(list(temperature = temp$temperature)
          , list(hourofday = cut(temp$date_time, "1 hour"))
          , mean)

# or

# endpoints : Extract index values of a given xts object corresponding to the last observations given a period specified by on
ep <- endpoints(xts,'hours') 
# period.apply: apply a specified function to data over a given interval, where the interval is taken to be the data from INDEX[k] to INDEX[k+1]
period.apply(xts, ep, mean) 


# sliding window
require(zoo)
TS <- zoo(c(4, 5, 7, 3, 9, 8))
rollapply(TS, width = 3, by = 2, FUN = mean, align = "left")
       1        3 
5.333333 6.333333

Deal with inregular time series, time internvals are not constant

tMinHour <- trunc(min(time(ts)), units ='hour') # get minimum at an hour granularity
tMin <- tMinHour - (30 * 60) # back off 30 minute of the current hour
tMax <- max(time(ts)) + (30 * 60)  # get max  
cSeq <- seq(tMin, tMax, by = '60 min') # create sequence for 'cut'
cCut <- cut(time(ts), cSeq) # converted to a factor by cutting, cCut is a factor
newValue <- tapply(ts, cCut, mean)  # compute means , newValue is a array

newTs <- xts(newValue, order.by=seq(tMinHour, tMax, by='60 min') )
plot(newTs)

# ------- fix NA value ------------

# assign 0 to NA
ts[is.na(ts)] <- 0 
# na.locf (Last Observation Carried Forward) from package zoo
ts <- na.locf(ts) 

Identify and replace outliers and missing values in a time series

tsclean(x, replace.missing = TRUE, lambda = NULL)

Subset XTS weekdays

library(xts)
dates <- seq(as.Date("2019-05-24"), length = 50, by = "days") # Create dates as a Date class object starting from 2019-05-24
smith <- xts(x = rnorm(50), order.by = dates) # Use xts() to create smith


smith[.indexwday(smith) %in% 1:5] # uses .indexwday 

smith[!weekdays(index(smith)) %in% c("Saturday", "Sunday")]  # uses weekdays

0.3 Moving Averages

http://rss.acs.unt.edu/Rdoc/library/TTR/html/MovingAverages.html

0.4 ACF

http://a-little-book-of-r-for-time-series.readthedocs.org/en/latest/src/timeseries.html

kings <- c(60, 43, 67, 50, 56, 42, 50, 65, 68, 43, 65, 34, 47, 34, 49, 41, 13, 35, 53, 56, 16, 43, 69, 59, 48, 59, 86, 55, 68, 51, 33, 49, 67, 77, 81, 67, 71, 81, 68, 70, 77, 56)
kingsts = ts(kings)
kingsts
## Time Series:
## Start = 1 
## End = 42 
## Frequency = 1 
##  [1] 60 43 67 50 56 42 50 65 68 43 65 34 47 34 49 41 13 35 53 56 16 43 69
## [24] 59 48 59 86 55 68 51 33 49 67 77 81 67 71 81 68 70 77 56
acf(kingsts, lag.max = 20)

kings1diff = diff(kingsts, differences = 1)
acf(kings1diff, lag.max = 20)

0.5 Decomposition

STL: Seasonal-Trend decomposition based on the Loess

sales<-c(39,  73,  41,  76,  75,  47,   4,  53,  40,  47,  31,  33,
         58,  85,  61,  98,  90,  59,  34,  74,  78,  74,  56,  55,
         91, 125,  96, 135, 131, 103,  86, 116, 117, 128, 113, 123)

time.series <- ts(data=sales, frequency = 12, start=c(2000, 1), end=c(2002, 12))
plot(time.series, xlab="Time", ylab="Sales (USD)")

decomposed <- stl(time.series, s.window="periodic")

# The two main parameters to be chosen when using STL are the trend window (t.window) and seasonal window (s.window). These control how rapidly the trend and seasonal components can change. Small values allow more rapid change. Setting the seasonal window to be infinite is equivalent to forcing the seasonal component to be periodic (i.e., identical across years).


# origional_value - seasonal = trend + remainder

seasonal <- decomposed$time.series[,"seasonal"]   # or decomposed$time.series[,1]
trend <- decomposed$time.series[,"trend"]         # or decomposed$time.series[,2]
remainder <- decomposed$time.series[,"remainder"] # or decomposed$time.series[,3]

plot(decomposed)

decomposed <- decompose(time.series)
plot(decomposed)

0.6 Seasonal Subseries Plot

For each season (or other category), a time series is plotted.

require(graphics)

## The CO2 data
fit <- stl(log(co2), s.window = 20, t.window = 20)
plot(fit)

op <- par(mfrow = c(2,2), mar=c(2,2,2,2), oma=c(0,0,0,0))
monthplot(co2, ylab = "data", cex.axis = 0.8)
monthplot(fit, choice = "seasonal", cex.axis = 0.8)
monthplot(fit, choice = "trend", cex.axis = 0.8)
monthplot(fit, choice = "remainder", type = "h", cex.axis = 0.8)

par(op)

0.7 time series cluster

0.8 DTW

library(dtw)
## Loading required package: proxy
## 
## Attaching package: 'proxy'
## The following objects are masked from 'package:stats':
## 
##     as.dist, dist
## The following object is masked from 'package:base':
## 
##     as.matrix
## Loaded dtw v1.20-1. See ?dtw for help, citation("dtw") for use in publication.
idx <- seq(0,2*pi, len=100)
a <- sin(idx)+ runif(100)/10
b <- cos(idx)
align <- dtw(a,b, step=asymmetricP1, keep=T)
dtwPlotTwoWay(align)

0.9 Finding the period

http://robjhyndman.com/hyndsight/tscharacteristics/

if you really have no idea what the periodicity is, probably the best approach is to find the frequency corresponding to the maximum of the spectral density.

find.freq <- function(x)
{
  n <- length(x)
  spec <- spec.ar(c(na.contiguous(x)),plot=FALSE)
  if(max(spec$spec)>10) # Arbitrary threshold chosen by trial and error.
  {
    period <- round(1/spec$freq[which.max(spec$spec)])
    if(period==Inf) # Find next local maximum
    {
      j <- which(diff(spec$spec)>0)
      if(length(j)>0)
      {
        nextmax <- j[1] + which.max(spec$spec[j[1]:500])
        if(nextmax <= length(spec$freq))
          period <- round(1/spec$freq[nextmax])
        else
          period <- 1
      }
      else
        period <- 1
    }
  }
  else
    period <- 1
  
  return(period)
}


temp <- sales
temp <- c(temp, temp)

freq <- print (find.freq(temp))
## [1] 33
spec <- spec.ar(c(na.contiguous(temp)),plot=T)

decomposed  <- stl(ts(temp, frequency=freq), s.window="periodic")
plot(decomposed)

Home Page