决策树实战

#!/usr/bin/env python

# -*- coding: utf-8 -*-

__author__="rochuan"

fromsklearn.feature_extractionimportDictVectorizer

importcsv

fromsklearnimportpreprocessing

fromsklearnimporttree

fromsklearn.externals.siximportStringIO

defimportCsv():

csvfile =file('csv_test.csv','wb')

writer = csv.writer(csvfile)

writer.writerow(['编号','色泽','根蒂','敲声','纹理','脐部','触感','好瓜'])

data = [

('1','青绿','蜷缩','浊响','清晰','凹陷','硬滑','是'),

('2','乌黑','蜷缩','沉闷','清晰','凹陷','硬滑','是'),

('3','乌黑','蜷缩','浊响','清晰','凹陷','硬滑','是'),

('4','青绿','蜷缩','沉闷','清晰','凹陷','硬滑','是'),

('5','浅白','蜷缩','浊响','清晰','凹陷','硬滑','是'),

('6','青绿','稍蜷','浊响','清晰','稍凹','软粘','是'),

('7','乌黑','稍蜷','浊响','稍糊','稍凹','软粘','是'),

('8','乌黑','稍蜷','浊响','清晰','稍凹','硬滑','是'),

('9','乌黑','稍蜷','沉闷','稍糊','稍凹','硬滑','否'),

('10','青绿','硬挺','清脆','清晰','平坦','软粘','否'),

('11','浅白','硬挺','清脆','模糊','平坦','硬滑','否'),

('12','浅白','蜷缩','浊响','模糊','平坦','软粘','否'),

('13','青绿','稍蜷','浊响','稍糊','凹陷','硬滑','否'),

('14','浅白','稍蜷','沉闷','稍糊','凹陷','硬滑','否'),

('15','乌黑','稍蜷','浊响','清晰','稍凹','软粘','否'),

('16','浅白','蜷缩','浊响','模糊','平坦','硬滑','否'),

('17','青绿','蜷缩','沉闷','稍糊','稍凹','硬滑','否')

]

writer.writerows(data)

csvfile.close()

defmain():

csvfile =file('csv_test.csv','rb')

reader = csv.reader(csvfile)

headers = reader.next();

featureList = []

lebelList = []

# 对象转换称kv字典

forrowinreader:

lebelList.append(row[len(row) -1])

rowDist = {}

foriinrange(1,len(row) -1):

rowDist[headers[i]] = row[i]

featureList.append(rowDist)

printfeatureList;

vec = DictVectorizer()

dummyX = vec.fit_transform(featureList).toarray()

print"dummyX:", dummyX

lb = preprocessing.LabelBinarizer()

dummyY = lb.fit_transform(lebelList)

print"dummyY:", dummyY

clf = tree.DecisionTreeClassifier(criterion="entropy")# 创建一个分类器,entropy决定了用ID3算法

clf = clf.fit(dummyX, dummyY)

print"clf:"+str(clf)

withopen("doctione-tree.dot","w")asf:

f= tree.export_graphviz(clf,feature_names=vec.get_feature_names(),out_file=f)

newRow = dummyX[0, :]

newRow[0] =1;

newRow[2] =0;

print"newRow:"+str(newRow)

csvfile.close()

main()

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。