1 Different cluster techniques

# set plot layout 
# par(mfrow=c(3,2),
#      mar=c(2,2,2,1), oma=c(0,0,0,0) # margin: buttom left, top, right
#      )

# training data
trainingData <- iris[,1:4]
# plot with two attributes: Sepal.Length Sepal.Width 
plotData <- iris[,c(1,3)]


#  -------- K-Means

model <- kmeans(trainingData, 3)
plot(plotData, col=model$cluster, main="K-Means")
# point center of two attributes of plotData
points(model$centers[, c(1,3)], col=1:3, pch=8, cex=2)

#  -------- Fuzzy C-Means

library(e1071)
model <- cmeans(trainingData, 3, 100, m=2, method="cmeans")
plot(plotData, col=model$cluster, main="Fuzzy C-Means")
points(model$centers[, c(1,3)], col=1:3, pch=8, cex=2)

#  -------- Expectation-Maximization

library(mclust)
model <- Mclust(trainingData, 3)
plot(plotData, col=model$classification, main="Expectation-Maximization")


#  -------- Density-based 

library(fpc)

model <- dbscan(trainingData, eps=0.6, MinPts=4)
plot(plotData, col=4-model$cluster, main="Density-based")
mtext("Noise/outlier observations are coded as 0, and plotted in blue", cex=0.5)

# --------- Hierarchical

distance <- dist(trainingData, method="euclidean") 
hc <- hclust(distance, method="average")
model <- cutree(hc, 3)
plot(plotData, col=model, main="Hierarchical")

# ---------- Self-Organising Maps 

library(kohonen) 
som <- som(as.matrix(scale(trainingData)), somgrid(xdim=5, ydim=30, topo="hexagonal"))
## use hierarchical clustering to cluster the codebook vectors
model <- cutree(hclust(dist(som$codes)), 3)
plot(plotData, col=model, main="Self-Organising Maps")

# ---------- Spectral  

library(kernlab)
model <- specc(as.matrix(trainingData), centers=3) # only take matrix as input
plot(plotData, col=model, main="Spectral") 

2 plot with cluster package

library(cluster)
clusplot(x = iris[,c(1,3)], model, color=T, shade=F, labels=4, lines=0, plotchar=T)

3 cluster evaluation

#----------- cluster evaluation function ------

library("fpc")
clustering_evaluation <- function(cluster_result) {
  
  # cross-tabulation
  print(table(iris$Species, cluster_result))
  
  # External clustering validation 
  species <- as.numeric(iris$Species)
  # Compute cluster stats using fpc library
  # d = a distance matrix among object
  clust_stats <- cluster.stats(d = dist(trainingData), species, cluster_result) 
  # Corrected Rand index
  print("Rand index: ", clust_stats$corrected.rand)
}

4 3D plot

library(plotly) #3D plotting
plot_ly(data = iris, x=iris$Sepal.Length, y=iris$Sepal.Width, z=iris$Petal.Length, 
         color=model, type="scatter3d", mode="markers", marker=list(size=3)
         ) 
^Home Page^