#include#include#include#include#include#include#include using namespace std;#define MAXLEN 6//定义的属性个数 //自定义的决策树节点struct Node{ string attribute;//属性值 string arrived_value;//到达的属性值 vector childs;//所有的分差节点,vector装入 Node(){ // 初始化的时候均设为空值 attribute = ““; arrived_value = ““; }}* root;vector > state;//保存整个实例集vector item(MAXLEN);//对应实例集中的某一行vector attribute_row;//保存首行即属性行数据string attrName[MAXLEN] = {“RID“ “model“ “perfume“ “price“ “income“ “buyaction“};map > map_attribute_values;//存储属性对应的所有的值int tree_size = 0;// 将数据以文件流的形式输入 void DataInit(){ ifstream fin(“data2.txt“); string s; int i j; while(fin >> s s.compare(“end“) != 0){ //当输入结束时s.compare(“end“)的值为-1 item[0] = s; //第一个值保存的是RID for(int i = 1;i < MAXLEN; i++){ fin >> item[i]; } state.push_back(item); } for(j = 0; j < MAXLEN; j++){ attribute_row.push_back(attrName[j]); // 初始化属性栏值 } fin.close();}// log2为底通过换底公式换成自然数底数double lg2(double n) { return log(n)/log(2); } //根据数据实例存储属性对应的所有的值void MapAttributeValue(){ unsigned int ijk; bool exited = false; vector values; //按照列遍历(每个属性对应一列) for(i = 1; i < MAXLEN-1; i++){ for (j = 0; j < state.size(); j++){ for (k = 0; k < values.size(); k++){ //如果该属性值已经存在则退出循环 if(!values[k].compare(state[j][i])) { exited = true; break; } } //如果循环后的exited值仍为false说明该属性值并未出现过 //将其加入到暂时存放的向量中 if(!exited){ values.push_back(state[j][i]); } exited = false; //注意此处重置值 } map_attribute_values[attrName[i]] = values; //相对应的放入 values.clear(); //为了存放下一次的属性值清空 } }//根据具体属性和值来计算熵double Entropy(vector > remain_state string attribute string valuebool ifparent){ vector count (20); int ij; bool flag = false; for(j = 1; j < MAXLEN; j++){ if(flag) break; if(!attribute_row[j].compare(attribute)){ for(i = 1; i < remain_state.size(); i++){ if((!ifparent&&!remain_state[i][j].compare(value)) || ifparent){//ifparent记录是否算父节点 if(!remain_state[i][MAXLEN - 1].compare(“yes“)){ count[0]++; //count[0]记录实例中yes的值 } else count[1]++;//count[1]记录实例中no的值 } } flag = true; } } //全部是正实例或者负实例 if(count[0] == 0 || count[1] == 0 ) return 0; //具体计算熵 double sum = count[0] + count[1]; double entropy = -count[0]/sum * lg2((double)count[0] / sum) - count[1]/sum * lg2((double)count[1] / sum); return entropy;} //计算按照属性attribute划分当前剩余实例的信息增益double Gain(vector > remain_state string attribute){ unsigned int jkm; //首先求不做划分时的熵,即根的熵值 double parent_entropy = Entropy(remain_state attribute ““ true); double children_entropy = 0; //然后求做划分后各个值的熵 vector values = map_attribute_values[attribute]; vector<
共有 条评论
评论
共有 条评论