1.知识点
2.实现
2.1 导包
from sklearn.tree import DecisionTreeClassifier
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inlineimport numpy as np
import pandas as pd
from pandas import Series, DataFrame
2.2 获取特征向量X和标签y
lenses = pd.read_table('./lenses.txt', header=None)
X = lenses.iloc[:,:-1].copy()
y = lenses.iloc[:,-1].copy()
VSM 结构转换 (二维表格、数值类型)
因为数据列中不存在有序关系,所以可以使用哑编码
dummy_features = []
for col_name in X.columns:
data = X[col_name]
features_df = pd.get_dummies(data)
dummy_features.append(features_df)
train = pd.concat(dummy_features, axis=1)
2.3 建模
dt = DecisionTreeClassifier() #构建算法模型
dt.fit(train, y) #训练数据
dt.score(train, y) #模型评分,预测正确的占所有的比例 # 1.0
2.4 贝叶斯建模
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
mnb = MultinomialNB()
mnb.fit(train, y)
mnb.score(train, y) # 0.9583333333333334
bnb = BernoulliNB()
bnb.fit(train, y)
bnb.score(train, y) # 0.875
2.5 绘制决策树
dot = tree.export_graphviz(dt, out_file=None,
filled=True,feature_names= train.columns,class_names=y.values)
**filed=True # 增加方框颜色 feature_names 为 reduces,no 等 ,class_names 为class的结果, soft等。
graphviz.Source(dot)