资源简介
个人课设时实现的基于决策树和朴素贝叶斯算法对Adult数据集进行分类的源码!
代码片段和文件信息
# -*- coding: utf-8 -*-
“““
Create on 12.13.2017
@author: zouxi
“““
import numpy as np
import copy
import sys
import importlib
importlib.reload(sys)
# return the majority of the label
def majority(data attributes target):
“““
return the majority of the label
:param data: 数据集
:param attributes: 特征属性列表
:param target: 目标值
:return: 集合中占多数的类别
“““
target_index = attributes.index(target) # 目标值对应的下标
valFreq = {} # 每个目标值对应的样本数目
for i in range(data.shape[0]):
if data[i target_index] in valFreq:
valFreq[data[i target_index]] += 1
else:
valFreq[data[i target_index]] = 1
maxLabel = 0
major = ““
for label in valFreq.keys():
if valFreq[label] > maxLabel:
maxLabel = valFreq[label]
major = label
return major
# 计算信息熵
def get_entropy_data(data attributes target rows):
“““
计算信息熵
:param data: 数据集
:param attributes: 特征属性列表
:param target: 目标值
:param rows:
:return:信息熵
“““
data_len = data.shape[0]
target_index = attributes.index(target)
target_list = [data[i target_index] for i in range(data_len) if rows[i] == 1]
target_set = set(target_list) # 目标值类别数
len_of_each_target_value = []
for target_val in target_set:
len_of_each_target_value.append(target_list.count(target_val))
entropy_data = 0.0
for target_count in len_of_each_target_value:
entropy_data += -target_count * 1.0 / sum(len_of_each_target_value) * np.log(target_count * 1.0 / sum(len_of_each_target_value))
return entropy_data * sum(rows) * 1.0 / len(rows)
# 计算按某个属性划分后的信息熵
def get_excepted_entropy_data(data attributes attri target):
“““
计算按某个属性划分后的信息熵
:param data: 数据集
:param attributes: 特征属性列表
:param attri: 选择的划分属性
:param target: 目标值
:return: 计算按某个属性划分后的信息熵
“““
attri_index = attributes.index(attri)
attri_value_set = set(data[: attri_index]) # 选择的划分属性的类别数
data_len = data.shape[0]
sum_excepted_entropy = 0.0
for attri_value in attri_value_set:
attri_selected_rows = np.zeros(data_len)
for i in range(data_len):
if data[i attri_index] == attri_value:
attri_selected_rows[i] = 1
sum_excepted_entropy += get_entropy_data(data attributes target attri_selected_rows)
return sum_excepted_entropy
# 信息增益
def infoGain(data attributes attri target):
entropy_data = get_entropy_data(data attributes target rows=np.ones(data.shape[0]))
excepted_entropy_data = get_excepted_entropy_data(data attributes attri target)
return entropy_data - excepted_entropy_data
# ID3算法
def best_split(data attributes target):
max_info = 0.000001
best_attri = ““
print(“include attriburtes:“)
print(attributes)
print(“data_len:“ data.shape[0])
for
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
文件 481 2017-12-14 20:35 adult\.idea\adult.iml
文件 88 2017-12-13 21:56 adult\.idea\dictionaries\zxhohai.xm
文件 200 2017-12-13 21:54 adult\.idea\encodings.xm
文件 410 2017-12-13 22:09 adult\.idea\inspectionProfiles\Project_Default.xm
文件 229 2017-12-13 21:56 adult\.idea\misc.xm
文件 262 2017-12-13 21:54 adult\.idea\modules.xm
文件 27481 2018-01-17 19:04 adult\.idea\workspace.xm
文件 3974303 2015-10-25 17:22 adult\adult.data
文件 5337 2017-12-12 21:47 adult\adult.names
文件 1986849 2015-10-25 17:22 adult\adult.test
文件 5989 2017-12-14 20:55 adult\DecisionTrees.py
文件 5175 2018-01-16 21:37 adult\main.py
文件 18600 2017-12-19 22:47 adult\NB.py
文件 4423 2017-12-14 20:55 adult\__pycache__\DecisionTrees.cpython-36.pyc
目录 0 2017-12-13 21:56 adult\.idea\dictionaries
目录 0 2017-12-13 22:09 adult\.idea\inspectionProfiles
目录 0 2018-01-17 19:04 adult\.idea
目录 0 2017-12-14 20:55 adult\__pycache__
目录 0 2018-01-17 10:50 adult
----------- --------- ---------- ----- ----
6029827 19
相关资源
- 基于最小错误率的贝叶斯手写数字分
- decisiontree决策树在adult数据集上的实现
- Data Analysis A Bayesian Tutorial
- Bayesian Networks With Examples in R280858
- Bayesian Networks in R贝叶斯网络的R语言实
- Applied Bayesian Statistics---With R and OpenB
- super-decision 使用说明
- Introduction of decision trees_J.R. Quinlan
- Learning Bayesian Networks - Neapolitan R. E..
- crystaldecisions.crystalreports.engine.dll
- 贝叶斯网络软件BayesiaLab教程
- 水晶报表CrystalDecisions组件全部8个组件
- zw_DecisionTree.zip
- A First Course in Bayesian Statistical Methods
- GeNie 贝叶斯网络工具
- super_decision操作方法中文版、英文版、
- 稀疏信号与压缩感知系列讲座PPT李廉
- Bayesian Estimation of DSGE Models
- bayesian modeling using winbugs125504
- Bayesian Computation with R 带详细目录.pd
- Bayesian Programming
- Bayesian Networks and Decision Graphs第二版
- CrystalDecisions.Windows.Forms
- 基于BayesShrink软阈值的Bandelet域SAR图像
- Bayesian Reasoning and Machine Learning--配书源
- bayesian networks with examples in r
-
论文笔记—Recasting gradient-ba
sed me< - Bayesian Networks in R
- 贝叶斯网络 Bayesian Networks经典合集
- 动态贝叶斯网络
评论
共有 条评论