预测隐形眼镜的类型
分析lenses.txt文件, 最后一列为眼镜类型
分别用决策树和随机森林模型训练, 并查看哪些特征最重要
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from pandas import Series, DataFrame
from sklearn.model_selection import train_test_split
lenses = pd.read_table('../data/lenses.txt', header=None)
lenses
data = lenses.iloc[:, :-1].copy()
data
target = lenses[4].values
target
# 将object类型转换成int
for i in range(4):
unique = data[i].unique()
# print(unique)
def fn(item):
return np.argwhere(unique==item)[0,0]
data[i] = data[i].map(fn)
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2)
# 用决策树
from sklearn.tree import DecisionTreeClassifier
tree = DecisionTreeClassifier(max_depth=3)
tree.fit(X_train, y_train)
tree.score(X_train, y_train)
tree.predict(X_test)
tree.score(X_test, y_test)
# 使用随机森林
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(n_estimators=10, max_depth=3)
rfc.fit(X_train, y_train)
rfc.score(X_train, y_train)
rfc.predict(X_test)
rfc.score(X_test, y_test)
# 特征重要性
rfc.feature_importances_