资源简介
决策树算法.rar
代码片段和文件信息
from math import log
import operator
import pickle
import xlrd
import xlwt
def creatData():
data = xlrd.open_workbook(‘医院数据.xls‘)
table = data.sheets()[0]
numrow = table.nrows
numcol = table.ncols
datalist =[]
for i in range(numrow): #获取除了第一行的数据
if i ==0: #第一行为标签不是数据不需要
continue
datalist.append(list(table.row_values(i)))
for each in datalist:
#删除无用列
removelist=[0348912]
x = 0
for y in removelist:
each.pop(y-x)
x+=1
#年龄分类为 0 1 2 3
if each[1] > 0 and each[1] <=30:
each[1] = 0
if each[1] >30 and each[1] <=50:
each[1] =1
if each[1] >50 and each[1] <=60:
each[1] =2
if each[1] >60 and each[1] <=100:
each[1] = 3
#患病年限分类为 0 1 2
if each[2] >0 and each[2] <= 5:
each[2] =0
if each[2] >5 and each[2] <= 10:
each[2] =1
if each[2] >10:
each[2] =2
#家族史分类 0 1 其中为null的设为1
if each[3] == ‘NULL‘:
each[3] =1
#标签为出院结果,
if each[6] ==‘NULL‘ :
each[6] = ‘有‘
label =[‘性别‘‘年龄‘‘病程‘‘家族史‘‘侧别‘‘即可面抽‘]
return(datalistlabel)
def dataformat(nianlingbingcheng):
‘‘‘
#性别格式化
if xingbie == ‘男‘:
xingbie = 1
if xingbie == ‘女‘:
xingbie =0
‘‘‘
#年龄格式化用each方便
each = nianling
if each > 0 and each <=30:
each= 0
if each >30 and each <=50:
each =1
if each >50 and each <=60:
each =2
if each >60 and each <=100:
each= 3
nianling = each
#病程格式化
each1= bingcheng
if each1 >0 and each1 <= 5:
each1 =0
if each1 >5 and each1 <= 10:
each1 =1
if each1 >10:
each1 =2
bingcheng = each1
return(nianlingbingcheng)
#信息熵
def inforEnt(data):
datanum = len(data)
labelsdict = {}
for each in data:
label = each[-1]
labelsdict[label] = labelsdict.get(label0) + 1
Ent = 0.0
for key in labelsdict:
P = labelsdict[key]/datanum
Ent -= P*log(P2)
return(Ent)
#数据划分
def splitData(dataaxisvalue):
newdata = []
for each in data:
if each[axis] == value:
data1 = each[:axis]
data1.extend(each[axis+1:])
newdata.append(data1)
return(newdata)
#最佳属性
def bestfeature(data):
bestfea = -1
bestinforgain = 0.0
numfea = len(data[0])-1
oriEnt = inforEnt(data)
for i in range(numfea):
feavalue = [each[i] for each in data]
uniquefea = set(feavalue)
newent=0.0
for each in uniquefea:
spliteddata = splitData(dataieach)
P = len(spliteddata)/len(data)
newent += P*inforEnt(spliteddata)
inforgain = oriEnt -newent
if (i
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
文件 39424 2017-10-18 19:15 决策树算法\医院数据.xls
文件 6274 2017-11-21 18:00 决策树算法\医院问题决策树分类器.py
文件 5339 2017-11-21 16:19 决策树算法\天气预报决策树分类器.py
目录 0 2018-03-13 14:51 决策树算法
----------- --------- ---------- ----- ----
51037 4
相关资源
- 锁相环的改进及仿真_李尧.pdf
- 新建压缩(zipped)文件夹.zip
- 9-Axis.zip
- 接口.txt
- 支付存管系统(PDS)与商户接口规范
- 2019校园宿舍系统.zip
- zed-examples-master.zip
- bk.cmhaaoso.top.zip
- 2019_全国大学生电子设计大赛_C题_线路
- 37724122try_fxlms.rar
- UCI数据集data格式.rar
- baiduyuanpan.txt
- 分形高斯噪声完整版.zip
- CenterLib.rar
- reptilercnaxxo.zip
- 463.txt
- 小程序入门到实战.txt
- checkcode.exe
- C600磁盘阵列驱动.zip
- BaiduPanKey.rar
- ssm框架视频.txt
- ISE14.7.txt
- 世界地图shp.zip
- 1111.txt
- 现代操作系统第四版答案.zip
- 全功能codewarrior破解教程.rar
- 积分兑换小程序源码.rar
- 1bbaa9493bf64fa6b626d351c0c84423.zip
- qp198906011234_1095678.zip
- 论文.rar
评论
共有 条评论