from sklearn import datasets
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# ------ Load iris -----------
iris = datasets.load_iris()
X = iris.data
y = iris.target
# ----- Build decision tree classifier ------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 100)
dt = tree.DecisionTreeClassifier(criterion='entropy', max_depth=5)
dt.fit(X_train, y_train)
y_pred = dt.predict(X_test)
print("Accuracy is ", accuracy_score(y_test,y_pred)*100)
# ---- plot decision tree -----------
# plot tree in .dot file
dotfile = open("dt.dot", 'w')
tree.export_graphviz(dt, out_file=dotfile, feature_names=iris.feature_names, class_names=iris.target_names, filled=True)
dotfile.close()
# set graphviz path
import os
os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/'
# convert the .dot to .png
from subprocess import check_call
check_call(['dot','-Tpng','dt.dot','-o','dt.png'])
# convert the .dot to .pdf file
import graphviz
g = graphviz.Source(open('dt.dot').read()) # returns a graphviz.files.Source object
g.render()
g.view()
Reference: https://www.kdnuggets.com/2017/05/simplifying-decision-tree-interpretation-decision-rules-python.html