iris[1:3,]
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
model <- kmeans(iris[,1:4], 3)
# plot with first two attributes: Sepal.Length Sepal.Width
plot(iris[,c(1,2)], col=model$cluster)
# point center of first two attributes
points(model$centers[, c(1,2)], col=1:3, pch=8, cex=2)
set.seed(1984)
x1 <- sample(seq(0.0, 0.5, by=0.01), 50, replace = T)
x2 <- sample(seq(0.3, 0.4, by=0.01), 300, replace = T)
x3 <- sample(seq(0.8, 1.0, by=0.01), 300, replace = T)
xx <- c(x1, x2, x3)
yy <- runif(length(xx))
zz <- cbind(xx, yy)
k=2
# kmean
model <- kmeans(zz, centers = k)
plot(zz, col = model$cluster, pch = 19)
points(model$centers[, c(1,2)], col=1:k, pch=8, cex=2)
# kmean++
library(LICORS) # kmeanspp
model <- kmeanspp(zz, k = k, start = "random")
plot(zz, col = model$cluster, pch = 19)
points(model$centers[, c(1,2)], col=1:k, pch=8, cex=2)
# kmean++
# the result may different to kmeanspp as there is no nstart paramater
kmpp <- function(data, k) {
n <- nrow(data)
center_ids <- numeric(k)
center_ids[1:2]<- sample(1:n, 1)
for (i in 2:k) {
# Computes the distance between rows of two matrices.
dists <- apply(data[center_ids, ], 1,
function(center) {rowSums((data - center)^2)})
probs <- apply(dists, 1, min)
probs[center_ids] <- 0
center_ids[i] <- sample.int(n, 1, prob = probs)
}
kmeans(data, centers = data[center_ids, ])
}
model <- kmpp(zz, k)
plot(zz, col = model$cluster, pch = 19)
points(model$centers[, c(1,2)], col=1:k, pch=8, cex=2)
sampleiris <- iris[sample(1:150, 30),]
distance <- dist(sampleiris[,-5], method="euclidean")
model <- hclust(distance, method="average")
plot(model, hang=-1, label=sampleiris$Species)
library(e1071)
model <- cmeans(iris[,1:4], 3, 100, m=2, method="cmeans")
plot(iris[,c(1,2)], col=model$cluster)
points(model$centers[,c(1,2)], col=1:3, pch=8, cex=2)
# Method for determining the optimal eps value
# The function kNNdistplot() can be used to draw the k-distance plot:
# install.packages("dbscan")
# dbscan::kNNdistplot(iris[,1:4], k = 5)
# abline(h = 0.15, lty = 2)
library(fpc)
# eps is radius of neighborhood, MinPts is number of neighbors within eps
model <- dbscan(iris[,1:4], eps=0.6, MinPts=4)
# plot(model, sampleiris)
# Notice points in cluster 0 are unassigned outliers
plot(iris[,c(1,2)], col=model$cluster)
library(mclust)
model <- Mclust(iris[,1:4], 3)
# plot(iris[,c(1,2)], col=model$classification)
plot(model, iris[, 1:4], what=c('classification'), dimens=c(1,2))
references:
http://horicky.blogspot.ie/2012/04/machine-learning-in-r-clustering.html