自己写的Apriori算法c++实现

大小: 3KB

文件类型: .rar

金币: 2

下载: 0 次

发布日期: 2021-05-16
语言: C/C++
标签: 数据挖掘 Apriori c++

高速下载

资源简介

数据挖掘中的经典关联规则算法，网上找了很多，不是不能运行，就是只能处理英文字母或数字，就花了几天时间自己写了个c++程序，用于处理字符串频繁项的挖掘。在c-free4.0和visual studio 2008上都能运行。

资源截图

小图大图

代码片段和文件信息

#include
#include
#include
#include
#include

using namespace std;

typedef struct Item//只有一个词的频繁项
{
	string sItem;
	int iSupport;
}ITEM;

typedef vector VEC_STR;
typedef vector VEC_VEC_STR;

typedef struct MultiItem//高层的频繁项
{
	VEC_STR vsItem;
	int iSupport;
}MULTIITEM;

typedef vector VEC_ITEM;//只有一个词的频繁项集合
typedef vector VEC_MULTIITEM;//高层的频繁项集合
typedef map MAP_STR_INT;//存储词语及其出现频率

void readFile（ifstream & const string & VEC_STR &）;
void countWord（VEC_STR * MAP_STR_INT & const char separator=‘\\‘）;
void generateLevel1Set（MAP_STR_INT * VEC_ITEM &）;
void generateLevel2（VEC_ITEM * VEC_MULTIITEM &）;
void cycGenerator（VEC_MULTIITEM * VEC_STR & ofstream &）;
void generateHighLevelSet（VEC_MULTIITEM * VEC_MULTIITEM & VEC_STR &）;
void generateInitialHigh（VEC_MULTIITEM * VEC_VEC_STR &）;
void pruning（VEC_VEC_STR * VEC_MULTIITEM * VEC_MULTIITEM &）;
bool find（VEC_MULTIITEM * VEC_STR *）;
void countSupport（VEC_STR * VEC_MULTIITEM &）;
void generateFrequentSet（VEC_MULTIITEM * VEC_MULTIITEM &）;
void printFrequentSet（VEC_ITEM * ostream &os=cout）;
void printFrequentSet（VEC_MULTIITEM * ostream &os=cout）;

const int MINSUPPORT = 2;//最小支持度

int main（）
{
	//从源文件读取数据
	ifstream infile;
	VEC_STR vs_word;
	readFile（infile“in.txt“vs_word）;
	infile.close（）;
	
	//计算所有词语的出现频率
	MAP_STR_INT word_count;
	countWord（&vs_word word_count）;
	
	//生成单个词语的频繁项集合
	VEC_ITEM level1Set;
	generateLevel1Set（&word_count level1Set）;
	
	//生成具有两个词语的频繁项集合
	VEC_MULTIITEM level2 level2Set;
	generateLevel2（&level1Set level2）;
	countSupport（&vs_word level2）;
	generateFrequentSet（&level2 level2Set）;

	//生成具有三个词语的频繁项集合
	VEC_MULTIITEM level3Set;
	generateHighLevelSet（&level2Set level3Set vs_word）;
	
	//输出单个词的频繁项到文件
	ofstream outfile;
	outfile.open（“out.txt“）;
	if（!outfile）
		cout<<“Unable to open this file!“<	printFrequentSet（&level1Set outfile）;
	
	//循环产生高层的频繁项集合并输出到文件
	cycGenerator（&level2Set vs_word outfile）;

	cout<<“OK!“<	return 0;
}

/**从源文件读取词语
*每一行作为一个字符串存入向量中
*/
void readFile（ifstream &infile const string &filename VEC_STR &vs_word）
{
	infile.close（）;
	infile.clear（）;
	
	infile.open（filename.c_str（））; 
	if（!infile）
		cout<<“Unable to open this file!“<
	string word;
	while（getline（infile word））
		vs_word.push_back（word）;		
}

/**计算每个词语的支持度
*从字符串中提取出所有词语，与其支持度一道存入map中
*/
void countWord（VEC_STR *vs_word MAP_STR_INT &word_count const char separator）
{
	string sentenceword;
	for（unsigned int i=0; isize（）; ++i）
	{
		sentence = （*vs_word）[i];
		while（sentence.find（separator）!=-1）
		{
			word = sentence.substr（0sentence.find（separator））;
			++word_count[word];
			sentence = sentence.substr（sentence.find（separator）+1 sentence.size（）-1）;
		}
		++word_count[sentence];
	}
}

/**找出频繁1项集的集合
*/
void generateLevel1Set（MAP_STR_INT *pWord_Co

属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----

     文件       8080  2008-11-06 23:13  Apriori.cpp

     文件        295  2008-11-05 16:19  in.txt

----------- ---------  ---------- -----  ----

                 8375                    2

共有条评论

自己写的Apriori算法c++实现

资源简介

资源截图

代码片段和文件信息

评论

相关资源