Decision Tree

0.1 CART Modeling

rpart

# Classification Tree with rpart
library(rpart)

kyphosis[1:3,]

##   Kyphosis Age Number Start
## 1   absent  71      3     5
## 2   absent 158      3    14
## 3  present 128      4     5

#------------ build tree --------------

# grow tree  
fit <- rpart(Kyphosis ~ Age+Number+Start, method="class", data=kyphosis) # classfication tree

# prune the tree
pfit <- prune(fit, cp=fit$cptable[which.min(fit$cptable[,"xerror"]),"CP"])

#----- model diagnostic -----------

printcp(fit) # display the results

## 
## Classification tree:
## rpart(formula = Kyphosis ~ Age + Number + Start, data = kyphosis, 
##     method = "class")
## 
## Variables actually used in tree construction:
## [1] Age   Start
## 
## Root node error: 17/81 = 0.20988
## 
## n= 81 
## 
##         CP nsplit rel error  xerror    xstd
## 1 0.176471      0   1.00000 1.00000 0.21559
## 2 0.019608      1   0.82353 0.82353 0.20018
## 3 0.010000      4   0.76471 0.82353 0.20018

plotcp(fit) # visualize cross-validation results

# summary(fit) # detailed summary of splits

# ---------- plot tree -----------

plot(fit, uniform=TRUE)
text(fit, use.n=TRUE, all=TRUE, cex=.8)

library(rpart.plot)

## Warning: package 'rpart.plot' was built under R version 3.3.3

rpart.plot(fit, under=TRUE)

rpart.plot(pfit, under=TRUE) # plot the pruned tree

# create attractive postscript plot of tree
# post(fit, file = "")

0.2 Random forest tree

randomForest

library(randomForest)

## Classification:
## data(iris)

iris[sample(nrow(iris), 5), ]
#         Sepal.Length Sepal.Width Petal.Length Petal.Width Species
# 49           5.3         3.7          1.5         0.2     setosa
# 133          6.4         2.8          5.6         2.2  virginica
# 1            5.1         3.5          1.4         0.2     setosa
# 99           5.1         2.5          3.0         1.1 versicolor
# 69           6.2         2.2          4.5         1.5 versicolor


#----------------------------------------------------------

set.seed(1)
iris.rf <- randomForest(Species ~ ., data=iris, proximity=TRUE, keep.forest=TRUE)

# ntree: Number of trees to grow. 
# importance: Should importance of predictors be assessed?
# sampsize : for example, sampsize=c(20, 30, 20), stratified sampling: draw 20, 30, and 20 of the species to grow each tree.

# --------- Plot of feature important 

imp <- importance(iris.rf) # imp is a matrix
plot(imp)
lines(imp)
text(imp, labels=rownames(imp), cex=1)


# --------- more detailed variable importance information

iris.rf <- randomForest(Species ~ ., data=iris, importance=TRUE, proximity=TRUE)
print(iris.rf)

# Look at variable importance:
round(importance(iris.rf), 2)


# --------- Multi-dimensional Scaling Plot of Proximity matrix from randomForest  -------------

# The unsupervised clustering case:

# MDSplot(rf, fac, ...)
# rf= randomForest object. fac=a factor that was used as response to train rf.
x<-MDSplot(iris.rf, iris$Species)

#add legend
legend("topleft", legend=levels(iris.rf$predicted), 
       fill=brewer.pal(length(levels(iris.rf$predicted)), "Set1"))


str(x)

# need to identify points?
# text(x$points, labels=attr(x$points,"dimnames")[[1]], cex=0.5)


##--------------------   ------------

## Do MDS on 1 - proximity:
iris.mds <- cmdscale(1 - iris.rf$proximity, eig=TRUE)


plot(iris.mds$points[,1], iris.mds$points[,2], type = "n", xlab = "", ylab = "", axes = FALSE,
     main = "cmdscale (stats)")



op <- par(pty="s")
x <- cbind(iris[,1:4], iris.mds$points)
pairs(x, cex=0.6, gap=0,
      col=c("red", "green", "blue")[as.numeric(iris$Species)],
      main="Iris Data: Predictors and MDS of Proximity Based on RandomForest")
par(op)


#  "GOF" component of the result ("goodness of fit")
print(iris.mds$GOF)

0.3 Conditional inference trees

party

The party package provides nonparametric regression trees for nominal, ordinal, numeric, censored, and multivariate responses

library(rpart) # for cu.summary dataset

library(party)
fit <- ctree(Mileage ~ Price+Country+Reliability+Type, data=na.omit(cu.summary) )
plot(fit, main="Conditional Inference Tree")

^Home Page^

Decision Tree

Updated: 28 June, 2017

0.1 CART Modeling

0.2 Random forest tree

0.3 Conditional inference trees