import pandas as pd
import numpy as np
from sklearn import neighbors#KNN分类
from sklearn.ensemble import RandomForestClassifier#随机森林分类
from sklearn.linear_model import Perceptron#感知机分类
from sklearn.svm import SVC#支持向量机SVC分类
from sklearn.tree import DecisionTreeClassifier#决策树分类
import matplotlib.pyplot as plt
data=pd.DataFrame({'name':['北京遇上西雅图','喜欢你','疯狂动物城','战狼2','力王','敢死队'],
'fight':[3,2,1,101,99,98],
'kiss':[104,100,81,10,5,2],
'type':['Romance','Romance','Romance','Action','Action','Action']})
plt.scatter(data[data['type']=='Romance']['fight'],data[data['type']=='Romance']['kiss'],c='r')
plt.scatter(data[data['type']=='Action']['fight'],data[data['type']=='Action']['kiss'],c='b')
test=pd.DataFrame(np.random.randn(300,2)*50,columns=['fight','kiss'])
knn=neighbors.KNeighborsClassifier()
knn.fit(data[['fight','kiss']],data['type'])
test['type']=knn.predict(test)
plt.scatter(test[test['type']=='Romance']['fight'],test[test['type']=='Romance']['kiss'],c='r')
plt.scatter(test[test['type']=='Action']['fight'],test[test['type']=='Action']['kiss'],c='b')
#鸢尾花例子
from sklearn import datasets
iris=datasets.load_iris()
print(iris['data'][:5])
print(iris.feature_names)
print(iris.target_names)
df=pd.DataFrame(iris.data,columns=iris.feature_names)
df['target']=iris.target
ty=pd.DataFrame({'target':[0,1,2],
'target_names':iris.target_names})
df=pd.merge(df,ty,on='target')#
example=np.array([4.2,3.5,1.6,0.2])
knn=neighbors.KNeighborsClassifier()
knn.fit(iris.data,df['target_names'])
ex=example.reshape(1,-1)
result=knn.predict(ex)
'''1.数据了解:最后一列是结果,性别、教育程度、婚姻状况是分类数据,进行预测要转化为数值型数据,
但是又要考虑计算机识别数值型数据时,认为是有大小之分的,所以对这些分类的数据要做哑变量处理。'''
data=pd.read_csv('/Users/liyili2/Downloads/datas/credit_card_train.csv', header=0)
data=pd.get_dummies(data,columns=['SEX', 'EDUCATION', 'MARRIAGE'])
columns=list(data.columns)
columns.remove('DEFAULT')
columns.append('DEFAULT')
data=data.reindex(columns=columns)
train_features=data.iloc[:,1:23].values[:15000]
train_target = data['DEFAULT'].values[:15000]
test_feature= data.iloc[:, 1:23].values[15001:]
test_target = data['DEFAULT'].values[15001:]
knn=neighbors.KNeighborsClassifier()
model = knn.fit(train_features,train_target)
result=model.predict(test_feature)
score1 = model.score(test_feature, test_target)
print("KNN预测分类准确性:",score1)
model2=RandomForestClassifier()
model2.fit(train_features,train_target)
result2=model2.predict(test_feature)
score2= model2.score(test_feature, test_target)
print("随机森林预测分类准确性:",score2)
model3 = Perceptron(max_iter=10000, tol=1e-3)
model3.fit(train_features,train_target)
result3=model3.predict(test_feature)
score3 = model3.score(test_feature, test_target)
print("感知机分类预测准确性:",score3)
model4=SVC(gamma='scale')
model4.fit(train_features,train_target)
result4=model4.predict(test_feature)
score4 = model4.score(test_feature, test_target)
print("支持向量机SVC预测分类准确性:",score4)
结果:
KNN预测分类准确性: 0.7489581596932
随机森林预测分类准确性: 0.7654609101516919
感知机分类预测准确性: 0.6196032672112018
支持向量机SVC预测分类准确性: 0.76379396566094