1 PCA

prcomp or princomp

prcomp is preferred according to: https://stats.stackexchange.com/questions/20101/what-is-the-difference-between-r-functions-prcomp-and-princomp

#------------ generate data ----------
N <- 500
factor1 <- rnorm(N)
factor2 <- rnorm(N) 
x1 <- rnorm(N) + factor1
x2 <- rnorm(N) + factor1
x3 <- rnorm(N) + factor2 
x4 <- rnorm(N) + factor2
mydat <- data.frame(x1,x2,x3,x4)

# ---------- use prcomp ------------
pca <- prcomp(mydat)
names(pca)
## [1] "sdev"     "rotation" "center"   "scale"    "x"
plot(pca) # plot the eigenvalues

biplot(pca) # A two dimensional plot

# --------- use princomp ------------

pca2 <- princomp(mydat)
pca2 <- princomp(~ x1 + x2 + x3 + x4, data = mydat) # princomp with a formula syntax
biplot(pca2)

2 SVD

# sequence 1 to 240 with gap 2
dat <- seq(1,120, 2)
# dat

X <- matrix(dat,ncol=6)
X
##       [,1] [,2] [,3] [,4] [,5] [,6]
##  [1,]    1   21   41   61   81  101
##  [2,]    3   23   43   63   83  103
##  [3,]    5   25   45   65   85  105
##  [4,]    7   27   47   67   87  107
##  [5,]    9   29   49   69   89  109
##  [6,]   11   31   51   71   91  111
##  [7,]   13   33   53   73   93  113
##  [8,]   15   35   55   75   95  115
##  [9,]   17   37   57   77   97  117
## [10,]   19   39   59   79   99  119
s <- svd(X)

# diag() : constructs a diagonal matrix
# contract a diagonal matrix from a vector s$d
D <- diag(s$d)

# X = U D V'
# where U and V are orthogonal, V' means V transposed, and D is a diagonal matrix with the singular values D[i,i]. 
s$u %*% D %*% t(s$v) 
##       [,1] [,2] [,3] [,4] [,5] [,6]
##  [1,]    1   21   41   61   81  101
##  [2,]    3   23   43   63   83  103
##  [3,]    5   25   45   65   85  105
##  [4,]    7   27   47   67   87  107
##  [5,]    9   29   49   69   89  109
##  [6,]   11   31   51   71   91  111
##  [7,]   13   33   53   73   93  113
##  [8,]   15   35   55   75   95  115
##  [9,]   17   37   57   77   97  117
## [10,]   19   39   59   79   99  119

2.1 Golf example

http://datascientistinsights.com/2013/02/17/single-value-decomposition-a-golfers-tutotial/

##        Phil Tiger Vijay
## Hole 1    4     4     5
## Hole 2    4     5     5
## Hole 3    3     3     2
## Hole 4    4     5     4
## Hole 5    4     4     4
## Hole 6    3     5     4
## Hole 7    4     4     3
## Hole 8    2     4     4
## Hole 9    5     5     5
## $d
## [1] 21.116733  2.014004  1.423864
## 
## $u
##             [,1]        [,2]        [,3]        [,4]        [,5]
##  [1,] -0.3548528  0.08912626  0.63510622  0.02415090 -0.39368790
##  [2,] -0.3842357  0.18894413  0.10273509 -0.22710064  0.04782861
##  [3,] -0.2180618 -0.39604843 -0.28094945 -0.44166040 -0.14576123
##  [4,] -0.3567627 -0.07556623 -0.32999583  0.82355844 -0.09298384
##  [5,] -0.3273797 -0.17538409  0.20237530  0.01945606  0.87590065
##  [6,] -0.3317737  0.33260802 -0.48022986 -0.22352269 -0.01309690
##  [7,] -0.2999067 -0.43989445 -0.23035562 -0.13422444 -0.14184593
##  [8,] -0.2774018  0.64096440 -0.09809276 -0.07470621  0.03567452
##  [9,] -0.4092247 -0.21923012  0.25296912  0.02432008 -0.15512419
##              [,6]        [,7]        [,8]        [,9]
##  [1,]  0.23659893 -0.08987515  0.03120815 -0.49210988
##  [2,] -0.57153066  0.10537224 -0.64103145  0.05978576
##  [3,] -0.25498511 -0.59189539  0.22758935 -0.18220154
##  [4,] -0.15868783 -0.17094933 -0.05747638 -0.11622980
##  [5,]  0.12940988 -0.05947292  0.09580829 -0.15512419
##  [6,]  0.63944123 -0.08321240 -0.28716904 -0.01637113
##  [7,]  0.01083064  0.75618197  0.14826422 -0.17730742
##  [8,] -0.27433199  0.11600095  0.63642295  0.04459315
##  [9,]  0.16176235 -0.07434115  0.11976036  0.80609476
## 
## $v
##            [,1]       [,2]       [,3]
## [1,] -0.5276850 -0.8220644  0.2139128
## [2,] -0.6204716  0.2010335 -0.7580241
## [3,] -0.5801409  0.5327248  0.6161500
##           [,1]     [,2]     [,3]
##  [1,] 3.954119 4.649399 4.347188
##  [2,] 4.281532 5.034384 4.707149
##  [3,] 2.429859 2.857118 2.671405
##  [4,] 3.975401 4.674423 4.370586
##  [5,] 3.647987 4.289438 4.010625
##  [6,] 3.696949 4.347010 4.064454
##  [7,] 3.341855 3.929477 3.674061
##  [8,] 3.091084 3.634611 3.398361
##  [9,] 4.559984 5.361798 5.013281
##           [,1]     [,2]     [,3]
##  [1,] 3.806558 4.685485 4.442813
##  [2,] 3.968709 5.110884 4.909869
##  [3,] 3.085572 2.696765 2.246481
##  [4,] 4.100511 4.643828 4.289510
##  [5,] 3.938360 4.218428 3.822453
##  [6,] 3.146270 4.481677 4.421312
##  [7,] 4.070162 3.751372 3.202094
##  [8,] 2.029877 3.894126 4.086058
##  [9,] 4.922950 5.273035 4.778067

3 MDS

Multidimensional Scaling

#eurodist (dist object) that gives the road distances (in km) between 21 cities in Europe. 
euromat = as.matrix(eurodist) # convert eurodist to matrix
euromat[1:5, 1:5] # inspect first five elements
##           Athens Barcelona Brussels Calais Cherbourg
## Athens         0      3313     2963   3175      3339
## Barcelona   3313         0     1318   1326      1294
## Brussels    2963      1318        0    204       583
## Calais      3175      1326      204      0       460
## Cherbourg   3339      1294      583    460         0
# as.dist(myMatrix) # convert to dist object from co matrix

# MDS 'cmdscale'
mds1 = cmdscale(eurodist, k = 2)

# plot
plot(mds1[,1], mds1[,2], type = "n", xlab = "", ylab = "", axes = FALSE,
     main = "cmdscale (stats)")
text(mds1[,1], mds1[,2], labels(eurodist), cex = 0.9, xpd = TRUE)

#------------------ 

data(iris)

irisDist <- dist(iris[,1:4]) # calculate the distance matrix for first 4 columns

irisMds = cmdscale(irisDist, k = 2)

# plot with color based on the 5th column 
plot(irisMds)
text(irisMds[,1], (irisMds[,2]+max(irisMds[,2])*0.05), labels=1:nrow(irisMds), cex = 0.6, xpd = TRUE)
points(irisMds, col=c("red", "green", "blue")[as.numeric(iris$Species)])

Home Page