#include#include#include#include#includeusing namespace std;typedef struct Item//只有一个词的频繁项{ string sItem; int iSupport;}ITEM;typedef vector VEC_STR;typedef vector VEC_VEC_STR;typedef struct MultiItem//高层的频繁项{ VEC_STR vsItem; int iSupport;}MULTIITEM;typedef vector VEC_ITEM;//只有一个词的频繁项集合typedef vector VEC_MULTIITEM;//高层的频繁项集合typedef map MAP_STR_INT;//存储词语及其出现频率void readFile(ifstream & const string & VEC_STR &);void countWord(VEC_STR * MAP_STR_INT & const char separator=‘\\‘);void generateLevel1Set(MAP_STR_INT * VEC_ITEM &);void generateLevel2(VEC_ITEM * VEC_MULTIITEM &);void cycGenerator(VEC_MULTIITEM * VEC_STR & ofstream &);void generateHighLevelSet(VEC_MULTIITEM * VEC_MULTIITEM & VEC_STR &);void generateInitialHigh(VEC_MULTIITEM * VEC_VEC_STR &);void pruning(VEC_VEC_STR * VEC_MULTIITEM * VEC_MULTIITEM &);bool find(VEC_MULTIITEM * VEC_STR *);void countSupport(VEC_STR * VEC_MULTIITEM &);void generateFrequentSet(VEC_MULTIITEM * VEC_MULTIITEM &);void printFrequentSet(VEC_ITEM * ostream &os=cout);void printFrequentSet(VEC_MULTIITEM * ostream &os=cout);const int MINSUPPORT = 2;//最小支持度int main(){ //从源文件读取数据 ifstream infile; VEC_STR vs_word; readFile(infile“in.txt“vs_word); infile.close(); //计算所有词语的出现频率 MAP_STR_INT word_count; countWord(&vs_word word_count); //生成单个词语的频繁项集合 VEC_ITEM level1Set; generateLevel1Set(&word_count level1Set); //生成具有两个词语的频繁项集合 VEC_MULTIITEM level2 level2Set; generateLevel2(&level1Set level2); countSupport(&vs_word level2); generateFrequentSet(&level2 level2Set); //生成具有三个词语的频繁项集合 VEC_MULTIITEM level3Set; generateHighLevelSet(&level2Set level3Set vs_word); //输出单个词的频繁项到文件 ofstream outfile; outfile.open(“out.txt“); if(!outfile) cout<<“Unable to open this file!“< printFrequentSet(&level1Set outfile); //循环产生高层的频繁项集合并输出到文件 cycGenerator(&level2Set vs_word outfile); cout<<“OK!“< return 0;}/**从源文件读取词语*每一行作为一个字符串存入向量中*/void readFile(ifstream &infile const string &filename VEC_STR &vs_word){ infile.close(); infile.clear(); infile.open(filename.c_str()); if(!infile) cout<<“Unable to open this file!“< string word; while(getline(infile word)) vs_word.push_back(word); }/**计算每个词语的支持度*从字符串中提取出所有词语,与其支持度一道存入map中*/void countWord(VEC_STR *vs_word MAP_STR_INT &word_count const char separator){ string sentenceword; for(unsigned int i=0; isize(); ++i) { sentence = (*vs_word)[i]; while(sentence.find(separator)!=-1) { word = sentence.substr(0sentence.find(separator)); ++word_count[word]; sentence = sentence.substr(sentence.find(separator)+1 sentence.size()-1); } ++word_count[sentence]; }}/**找出频繁1项集的集合*/void generateLevel1Set(MAP_STR_INT *pWord_Co
共有 条评论
评论
共有 条评论