决策树的一个小例子:
# -*- coding:utf-8 -*-
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
from sklearn import metrics
from sklearn.ensemble import ExtraTreesClassifier
def iris_type(s):
it = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}
return it[s]
if __name__ == "__main__":
# 读取数据
path = u"4.iris.data"
data = np.loadtxt(path, dtype=float, delimiter=',', converters={4: iris_type})
X = data[:,:2]
y = data[:,4]
# 数算法通过计算特征的信息量,查看重要性
import_test_data = data[:,:4]
model = ExtraTreesClassifier()
model.fit(import_test_data, y)
print model.feature_importances_
# 训练分类器
clf = DecisionTreeClassifier(criterion='entropy', max_depth=20)
dt_clf = clf.fit(X,y)
# 保存
f = open("iris_tree.dot", 'w')
tree.export_graphviz(dt_clf, out_file=f)
# 数据可视化
M, N = 500, 500
x1_min, x1_max = np.min(X[:,0]), np.max(X[:,0])
x2_min, x2_max = np.min(X[:,1]), np.max(X[:,1])
t1 = np.linspace(x1_min, x1_max, M)
t2 = np.linspace(x2_min, x2_max, N)
x1, x2 = np.meshgrid(t1, t2) #生成网格采样点
x_test = np.stack((x1.ravel(),x2.flat),axis=1)
y_hat = clf.predict(x_test)
y_hat = y_hat.reshape(x1.shape)
plt.pcolormesh(x1, x2, y_hat, cmap=plt.cm.summer, alpha=0.3) # 预测值的显示Paired/Spectral/coolwarm/summer/spring/OrRd/Oranges
plt.scatter(X[:,0], X[:,1], c=y, s=50, edgecolors='k', cmap=plt.cm.prism) # 样本的显示
plt.xlim(x1_min, x1_max)
plt.ylim(x2_min, x2_max)
plt.grid()
plt.show()
# 预测评估
predict = clf.predict(X)
accuracy = metrics.accuracy_score(y, predict)
print "Accuracy: %.2f%%" %(100 * accuracy)
report = metrics.classification_report(y, predict)
print report
result = (predict == y) # True则预测正确,False则预测错误
print result
c = np.count_nonzero(result) # 统计预测正确的个数
print c
print 'Accuracy: %.2f%%' % (100 * float(c) / float(len(result)))