prcomp or princomp
prcomp is preferred according to: https://stats.stackexchange.com/questions/20101/what-is-the-difference-between-r-functions-prcomp-and-princomp
#------------ generate data ----------
N <- 500
factor1 <- rnorm(N)
factor2 <- rnorm(N)
x1 <- rnorm(N) + factor1
x2 <- rnorm(N) + factor1
x3 <- rnorm(N) + factor2
x4 <- rnorm(N) + factor2
mydat <- data.frame(x1,x2,x3,x4)
# ---------- use prcomp ------------
pca <- prcomp(mydat)
names(pca)
## [1] "sdev" "rotation" "center" "scale" "x"
plot(pca) # plot the eigenvalues
biplot(pca) # A two dimensional plot
# --------- use princomp ------------
pca2 <- princomp(mydat)
pca2 <- princomp(~ x1 + x2 + x3 + x4, data = mydat) # princomp with a formula syntax
biplot(pca2)
# sequence 1 to 240 with gap 2
dat <- seq(1,120, 2)
# dat
X <- matrix(dat,ncol=6)
X
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 1 21 41 61 81 101
## [2,] 3 23 43 63 83 103
## [3,] 5 25 45 65 85 105
## [4,] 7 27 47 67 87 107
## [5,] 9 29 49 69 89 109
## [6,] 11 31 51 71 91 111
## [7,] 13 33 53 73 93 113
## [8,] 15 35 55 75 95 115
## [9,] 17 37 57 77 97 117
## [10,] 19 39 59 79 99 119
s <- svd(X)
# diag() : constructs a diagonal matrix
# contract a diagonal matrix from a vector s$d
D <- diag(s$d)
# X = U D V'
# where U and V are orthogonal, V' means V transposed, and D is a diagonal matrix with the singular values D[i,i].
s$u %*% D %*% t(s$v)
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 1 21 41 61 81 101
## [2,] 3 23 43 63 83 103
## [3,] 5 25 45 65 85 105
## [4,] 7 27 47 67 87 107
## [5,] 9 29 49 69 89 109
## [6,] 11 31 51 71 91 111
## [7,] 13 33 53 73 93 113
## [8,] 15 35 55 75 95 115
## [9,] 17 37 57 77 97 117
## [10,] 19 39 59 79 99 119
http://datascientistinsights.com/2013/02/17/single-value-decomposition-a-golfers-tutotial/
## Phil Tiger Vijay
## Hole 1 4 4 5
## Hole 2 4 5 5
## Hole 3 3 3 2
## Hole 4 4 5 4
## Hole 5 4 4 4
## Hole 6 3 5 4
## Hole 7 4 4 3
## Hole 8 2 4 4
## Hole 9 5 5 5
## $d
## [1] 21.116733 2.014004 1.423864
##
## $u
## [,1] [,2] [,3] [,4] [,5]
## [1,] -0.3548528 0.08912626 0.63510622 0.02415090 -0.39368790
## [2,] -0.3842357 0.18894413 0.10273509 -0.22710064 0.04782861
## [3,] -0.2180618 -0.39604843 -0.28094945 -0.44166040 -0.14576123
## [4,] -0.3567627 -0.07556623 -0.32999583 0.82355844 -0.09298384
## [5,] -0.3273797 -0.17538409 0.20237530 0.01945606 0.87590065
## [6,] -0.3317737 0.33260802 -0.48022986 -0.22352269 -0.01309690
## [7,] -0.2999067 -0.43989445 -0.23035562 -0.13422444 -0.14184593
## [8,] -0.2774018 0.64096440 -0.09809276 -0.07470621 0.03567452
## [9,] -0.4092247 -0.21923012 0.25296912 0.02432008 -0.15512419
## [,6] [,7] [,8] [,9]
## [1,] 0.23659893 -0.08987515 0.03120815 -0.49210988
## [2,] -0.57153066 0.10537224 -0.64103145 0.05978576
## [3,] -0.25498511 -0.59189539 0.22758935 -0.18220154
## [4,] -0.15868783 -0.17094933 -0.05747638 -0.11622980
## [5,] 0.12940988 -0.05947292 0.09580829 -0.15512419
## [6,] 0.63944123 -0.08321240 -0.28716904 -0.01637113
## [7,] 0.01083064 0.75618197 0.14826422 -0.17730742
## [8,] -0.27433199 0.11600095 0.63642295 0.04459315
## [9,] 0.16176235 -0.07434115 0.11976036 0.80609476
##
## $v
## [,1] [,2] [,3]
## [1,] -0.5276850 -0.8220644 0.2139128
## [2,] -0.6204716 0.2010335 -0.7580241
## [3,] -0.5801409 0.5327248 0.6161500
## [,1] [,2] [,3]
## [1,] 3.954119 4.649399 4.347188
## [2,] 4.281532 5.034384 4.707149
## [3,] 2.429859 2.857118 2.671405
## [4,] 3.975401 4.674423 4.370586
## [5,] 3.647987 4.289438 4.010625
## [6,] 3.696949 4.347010 4.064454
## [7,] 3.341855 3.929477 3.674061
## [8,] 3.091084 3.634611 3.398361
## [9,] 4.559984 5.361798 5.013281
## [,1] [,2] [,3]
## [1,] 3.806558 4.685485 4.442813
## [2,] 3.968709 5.110884 4.909869
## [3,] 3.085572 2.696765 2.246481
## [4,] 4.100511 4.643828 4.289510
## [5,] 3.938360 4.218428 3.822453
## [6,] 3.146270 4.481677 4.421312
## [7,] 4.070162 3.751372 3.202094
## [8,] 2.029877 3.894126 4.086058
## [9,] 4.922950 5.273035 4.778067
Multidimensional Scaling
#eurodist (dist object) that gives the road distances (in km) between 21 cities in Europe.
euromat = as.matrix(eurodist) # convert eurodist to matrix
euromat[1:5, 1:5] # inspect first five elements
## Athens Barcelona Brussels Calais Cherbourg
## Athens 0 3313 2963 3175 3339
## Barcelona 3313 0 1318 1326 1294
## Brussels 2963 1318 0 204 583
## Calais 3175 1326 204 0 460
## Cherbourg 3339 1294 583 460 0
# as.dist(myMatrix) # convert to dist object from co matrix
# MDS 'cmdscale'
mds1 = cmdscale(eurodist, k = 2)
# plot
plot(mds1[,1], mds1[,2], type = "n", xlab = "", ylab = "", axes = FALSE,
main = "cmdscale (stats)")
text(mds1[,1], mds1[,2], labels(eurodist), cex = 0.9, xpd = TRUE)
#------------------
data(iris)
irisDist <- dist(iris[,1:4]) # calculate the distance matrix for first 4 columns
irisMds = cmdscale(irisDist, k = 2)
# plot with color based on the 5th column
plot(irisMds)
text(irisMds[,1], (irisMds[,2]+max(irisMds[,2])*0.05), labels=1:nrow(irisMds), cex = 0.6, xpd = TRUE)
points(irisMds, col=c("red", "green", "blue")[as.numeric(iris$Species)])