资源简介
包括两个文件,kmeans聚类算法cpp文件,和用于测试的鸢尾花数据集txt文件,代码带详细注释,简洁明了,下载之后马上可以进行测试
代码片段和文件信息
#include
#include
#include
#include
#include
#include
#include
using namespace std;
#define K 3 //簇的数目
#define DIM_NUM 5
#define DATA_NUM 140
//存放元组的属性信息
typedef vector Tuple;//存储一条数据记录
//输入两个元组,计算两个元组间的欧几里距离
double getDistXY(const Tuple& t1 const Tuple& t2)
{
double sum = 0;
for (int i = 1; i <= DIM_NUM; ++i)
{
sum += (t1[i] - t2[i]) * (t1[i] - t2[i]);
}
return sqrt(sum);
}
//输入k个质心和1个元组,根据质心,决定当前元组属于哪个簇
int clusterOfTuple(const Tuple means[] const Tuple& tuple){
double dist = getDistXY(means[0] tuple);
double tmp;
int label = 0; //标示属于哪一个簇
for (int i = 1; i {
tmp = getDistXY(means[i] tuple);
if (tmp {
dist = tmp;
label = i;
}
}
return label;
}
//输入簇集和质心,获得给定簇集的平方误差
double getVar(vector clusters[] Tuple means[])
{
double var = 0;
for (int i = 0; i < K; i++) //遍历质心
{
vector t = clusters[i];
for (unsigned int j = 0; j< t.size(); j++)
{
var += getDistXY(t[j] means[i]); //簇集中所有元组到质心的距离之和
}
}
//cout<<“sum:“< return var;
}
//输入簇,获得当前簇的均值(质心)
Tuple getMeans(const vector& cluster)
{
int num = cluster.size();
Tuple t(DIM_NUM + 1 0); //初始化dimNum + 1个0填充数组
//第一个位置存放记录编号,第2到dimNum+1个位置存放实际元素
for (int i = 0; i < num; i++)
{
for (int j = 1; j <= DIM_NUM; ++j)
{
t[j] += cluster[i][j];
}
}
for (int j = 1; j <= DIM_NUM; ++j)
{
t[j] /= num;
}
return t; //返回簇集中所有元组平均值作为质心
//cout<<“sum:“< }
void print_Means(const vector clusters[])
{
for (int lable = 0; lable < K; lable++)
{
cout << clusters[lable].size() << “ “ ;
Tuple temp = getMeans(clusters[lable]);
cout << “(“;
for (int j = 0; j <= DIM_NUM; ++j)
{
cout << temp[j] << “\t“;
}
cout << “)\n“;
}
}
void print(const vector clusters[])
{
for (int lable = 0; lable {
cout << “第“ << lable + 1 << “个簇:“ << endl;
vector t = clusters[lable];
for (unsigned int i = 0; i {
cout << i + 1 << “.(“;
for (int j = 0; j <= DIM_NUM; ++j)
{
cout << t[i][j] << “ “;
}
cout << “)\n“;
}
}
}
vector* KMeans(vector& tuples)
{
vector clusters[K]; //k个簇
Tuple means[K]; //k个质心
int i = 0;
//一开始随机选取k条记录的值作为k个簇的质心(均值)
srand((unsigned int)time(NULL)); //随机数发生器的初始化
for (i = 0; i {
int iToSelect = rand() % tuples.size();
if (means[iToSelect].size() == 0)
{
for (int j = 0; j <= DIM_NUM; ++j)
{
means[i].push_back(tuples[iToSelect][j]); //初始化个质心
}
++i;
}
}
int lable = 0;
//根据默认的质心,将输入的tuples分配给各个簇
for (i = 0; i != tuples.size(); ++i)
{
lable = clusterOfTuple(means tuples[i]);
clusters[lable].push_back(tuples[i]);
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
文件 2660 2016-06-16 22:44 iris_train.txt
文件 4872 2017-08-23 11:20 Source.cpp
----------- --------- ---------- ----- ----
7532 2
- 上一篇:Qt实现的转灰度图源代码
- 下一篇:Games-Chant算法
评论
共有 条评论