#!/usr/bin/env python
# -*- coding: utf-8 -*-
__author__="rochuan"
fromsklearn.feature_extractionimportDictVectorizer
importcsv
fromsklearnimportpreprocessing
fromsklearnimporttree
fromsklearn.externals.siximportStringIO
defimportCsv():
csvfile =file('csv_test.csv','wb')
writer = csv.writer(csvfile)
writer.writerow(['编号','色泽','根蒂','敲声','纹理','脐部','触感','好瓜'])
data = [
('1','青绿','蜷缩','浊响','清晰','凹陷','硬滑','是'),
('2','乌黑','蜷缩','沉闷','清晰','凹陷','硬滑','是'),
('3','乌黑','蜷缩','浊响','清晰','凹陷','硬滑','是'),
('4','青绿','蜷缩','沉闷','清晰','凹陷','硬滑','是'),
('5','浅白','蜷缩','浊响','清晰','凹陷','硬滑','是'),
('6','青绿','稍蜷','浊响','清晰','稍凹','软粘','是'),
('7','乌黑','稍蜷','浊响','稍糊','稍凹','软粘','是'),
('8','乌黑','稍蜷','浊响','清晰','稍凹','硬滑','是'),
('9','乌黑','稍蜷','沉闷','稍糊','稍凹','硬滑','否'),
('10','青绿','硬挺','清脆','清晰','平坦','软粘','否'),
('11','浅白','硬挺','清脆','模糊','平坦','硬滑','否'),
('12','浅白','蜷缩','浊响','模糊','平坦','软粘','否'),
('13','青绿','稍蜷','浊响','稍糊','凹陷','硬滑','否'),
('14','浅白','稍蜷','沉闷','稍糊','凹陷','硬滑','否'),
('15','乌黑','稍蜷','浊响','清晰','稍凹','软粘','否'),
('16','浅白','蜷缩','浊响','模糊','平坦','硬滑','否'),
('17','青绿','蜷缩','沉闷','稍糊','稍凹','硬滑','否')
]
writer.writerows(data)
csvfile.close()
defmain():
csvfile =file('csv_test.csv','rb')
reader = csv.reader(csvfile)
headers = reader.next();
featureList = []
lebelList = []
# 对象转换称kv字典
forrowinreader:
lebelList.append(row[len(row) -1])
rowDist = {}
foriinrange(1,len(row) -1):
rowDist[headers[i]] = row[i]
featureList.append(rowDist)
printfeatureList;
vec = DictVectorizer()
dummyX = vec.fit_transform(featureList).toarray()
print"dummyX:", dummyX
lb = preprocessing.LabelBinarizer()
dummyY = lb.fit_transform(lebelList)
print"dummyY:", dummyY
clf = tree.DecisionTreeClassifier(criterion="entropy")# 创建一个分类器,entropy决定了用ID3算法
clf = clf.fit(dummyX, dummyY)
print"clf:"+str(clf)
withopen("doctione-tree.dot","w")asf:
f= tree.export_graphviz(clf,feature_names=vec.get_feature_names(),out_file=f)
newRow = dummyX[0, :]
newRow[0] =1;
newRow[2] =0;
print"newRow:"+str(newRow)
csvfile.close()
main()