资源简介
ID3决策树python代码,有注释,有数据读入和处理,正确率统计等功能,非常实用,欢迎下载
代码片段和文件信息
# -*- coding: utf-8 -*-
“““
Created on Sun Dec 31 20:53:34 2017
@author: Administrator
“““
# -*- coding: utf-8 -*-
“““
Created on Sun Dec 31 14:30:17 2017
@author: Administrator
“““
from sklearn.feature_extraction import DictVectorizer
##涉及到对csv文件的读取,故导入csv接口
import csv
from sklearn import preprocessing
from sklearn import tree
from sklearn.externals.six import StringIO
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
##将csv文件中的数据读取到变量allElectronicsData中
traindata=open(r‘D:\Spyderworkspcce\data\atrain.csv‘)
##csv自带的reader可以按行读取allElectronicsData中的数据
reader=csv.reader(traindata)
##读取第一行数据即title
headers=reader.next()
#headers2=readertest.next()
#print headers
featureList = []
labelList = []
for row in reader:
labelList.append(row[len(row)-1])
rowDict = {}
for i in range(0len(row)-1):
# print row[i]
rowDict[headers[i]] = row[i]
# print “rowDict:“rowDict
featureList.append(rowDict)
### list中的每一个字典对应原始数据中的一行数据
vec= DictVectorizer()
train_x = vec.fit_transform(featureList).toarray()
#定义样本训练属性集和测试属性集
train_xx=train_x[0:30164]
test_xx=train_x[30164:]
lb = preprocessing.LabelBinarizer()
train_y=lb.fit_transform(labelList)
#定义样本训练标签集和测试标签集
train_yy=train_y[0:30164]
test_yy=train_y[30164:]
model = DecisionTreeClassifier(criterion=‘entropy‘min_samples_leaf=3)
model = model.fit(train_xx train_yy)
##
##
y_train_pred = model.predict(train_xx)
y_test_pred = model.predict(test_xx)
##print y_train_pred
print ‘测试集预测类标 ‘+str(y_test_pred)
###
acc_train = accuracy_score(train_yy y_train_pred)
acc_test = accuracy_score(test_yy y_test_pred)
###
print ‘\t训练集准确率: %.4f%%‘ % (100*acc_train)
print ‘\t测试集准确率: %.4f%%\n‘ % (100*acc_test)
predict
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
文件 2118 2018-01-03 20:06 ID3.py
----------- --------- ---------- ----- ----
2118 1
- 上一篇:django入门-增删改
- 下一篇:python实现决策树分类算法
评论
共有 条评论