资源简介
此算是是对k-means算法的改进
代码片段和文件信息
#include “Utils.h“
#include
#include
#include
#include
/////////////////////
// DATA GENERATION //
/////////////////////
/**
* A data generator.
**/
class AbstractGen {
public:
virtual vector generateData(ostream &out) const = 0;
};
/**
* Generates a collection of points according to the following scheme:
* - Choose numCenters centers uniformly at random in a hypercube.
* - Choose numPoints/numCenters points offset by a normal distribution from each center.
**/
class SeparateNormalGen: public AbstractGen {
private:
int myNumPoints myNumCenters myNumDims;
Scalar myCenterRange myClusterStdDev;
public:
SeparateNormalGen(int numPoints int numCenters int numDims Scalar centerRange
Scalar clusterStdDev) {
myNumPoints = numPoints; myNumCenters = numCenters; myNumDims = numDims;
myCenterRange = centerRange; myClusterStdDev = clusterStdDev;
}
vector generateData(ostream &out) const {
// Generate the data
vector data;
vector realCenters;
for (int i = 0; i < myNumCenters; i++) {
Point normCenter = getRandomPoint(myNumDims) * myCenterRange;
realCenters.push_back(normCenter);
for (int j = 0; j < myNumPoints/myNumCenters; j++)
data.push_back(getByNormDist(normCenter myClusterStdDev));
}
// Report general info about the data set
out << “SEPARATE NORMAL DISTRIBUTIONS“ << endl;
out << “=============================“ << endl << endl;
out << “Number of points: “ << myNumPoints << endl;
out << “Number of centers: “ << myNumCenters << endl;
out << “Number of dimensions: “ << myNumDims << endl;
out << “Cluster center range in each dimension: “ << myCenterRange << endl;
out << “Intra-cluster standard deviation: “ << myClusterStdDev << endl;
out << “\“Real\“ potential: “ << getKMeansPotential(data realCenters) << endl << endl;
// Return the data set
return data;
}
};
/**
* Generates a collection of points uniformly at random in a hypercube.
**/
class UniformGen: public AbstractGen {
private:
int myNumPoints myNumDims;
Scalar myRange;
public:
UniformGen(int numPoints int numDims Scalar range) {
myNumPoints = numPoints; myNumDims = numDims;
myRange = range;
}
vector generateData(ostream &out) const {
// Generate the data
vector data;
for (int i = 0; i < myNumPoints; i++)
data.push_back(getRandomPoint(myNumDims) * myRange);
// Report general info about the data set
out << “UNIFORMLY RANDOM DATA“ << endl;
out << “=====================“ << endl << endl;
out << “Number of points: “ << myNumPoints << endl;
out << “Number of dimensions: “ << myNumDims << endl;
out << “Range in
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
文件 15457 2006-07-01 13:42 cloud(10).txt
文件 15447 2006-07-01 13:43 cloud(25).txt
文件 15388 2006-07-01 13:43 cloud(50).txt
文件 104652 2006-05-19 13:05 cloud_input.txt
文件 16685 2006-07-01 15:55 intrusion(10).txt
文件 16730 2006-07-01 15:55 intrusion(25).txt
文件 15636 2006-07-01 15:55 intrusion(50).txt
文件 61223560 2006-07-01 15:54 intrusion_input.txt
文件 15275 2006-06-18 09:49 norm10(10).txt
文件 15147 2006-06-18 09:49 norm10(25).txt
文件 15186 2006-06-18 09:46 norm10(50).txt
文件 15017 2006-06-18 09:43 norm25(10).txt
文件 15046 2006-06-18 09:42 norm25(25).txt
文件 15133 2006-06-18 09:35 norm25(50).txt
文件 15211 2006-07-01 15:55 spam(10).txt
文件 15143 2006-07-01 15:55 spam(25).txt
文件 15183 2006-07-01 15:56 spam(50).txt
文件 703084 2006-07-01 13:45 spam_input.txt
文件 18074 2006-07-01 15:48 Main.cpp
文件 11037 2006-06-18 09:41 Utils.cpp
文件 2096 2006-06-17 09:19 Utils.h
评论
共有 条评论