资源简介
使用python进行编码,DP-means和 k - means聚类算法的比较,里面有数据集。
代码片段和文件信息
#!/usr/bin/env python
from pprint import pprint
import sys
import random
import math
import timer
class kmeans(object):
def __init__(self _X _k _xVal = 0 _stop=False):
# X is sample size lists of dim length
#
# _xVal is the number of records to hold out cross-validation.
# To use this you must randomize input data!
#
# Setting _stop=True causes iteration to stop when out of cross-validate
# error starts to rise.
#
self.nFeatures = len(_X[0])
self.xValSize = _xVal
self.allSize = len(_X)
self.size = self.allSize - self.xValSize
self.X = _X
self.k = _k
self.stop = _stop
# Initialize group memebership
self.dataClusterId = [-1 for i in range(0 self.allSize)] # index of group for each data pair
self.clusters = {}
idx = 0
# initialize to k random data points
# don‘t assign x-val as a strat center
for i in random.sample(range(0 self.size) self.k):
self.clusters[idx] = self.X[i]
idx += 1
# output records
self.record = []
self.errorRecord = []
def dSquared(self x y):
dist2 = 0.0
for jk in zip(xy):
dist2 += (j - k)**2
return dist2
def error(self):
res = 0.0
for i in range(0 self.size):
res += self.dSquared(self.X[i] self.clusters[self.dataClusterId[i]])
# error on non training data
res1 = 0.0
err1 = 0.0
for i in range(self.size self.allSize):
res1 += self.dSquared(self.X[i] self.clusters[self.dataClusterId[i]])
if res1 > 0.0:
err1 = res1/self.xValSize
return res/self.size err1
def nearestCluster(self x):
cmin = sys.maxint
cidx = -sys.maxint
for j in self.clusters:
dist = math.sqrt(self.dSquared(x self.clusters[j]))
if dist < cmin: # record closest centroid
cmin = dist
cidx = j
return cidx cmin
def assign(self):
for i in range(0 self.allSize):
self.dataClusterId[i] dmin = self.nearestCluster(self.X[i])
def updateClusters(self):
ctemp = {} # dim sums by cluster
for j in range(0 self.k):
ctemp[j] = []
for k in range(0 self.nFeatures):
ctemp[j].append(0.0) # init sums
ctemp[j].append(0) # init counter
# only calculate clusters on training not cross-validation set
for i in range(0self.size):
for j in range(0 self.nFeatures):
ctemp[self.dataClusterId[i]][j] += self.X[i][j]
ctemp[self.dataClusterId[i]][self.nFeatures] += 1 # count
for c in self.clusters:
if ctemp[c][self.nFeatures] <> 0:
self.clusters[c] = [ ctemp[c][k]/ctemp[c][self.nFeatures] for k in range(0self.nFeatures)]
else:
# no members in this cluster
pass
return
def run(self nmax = 100 eps = 1e-7):
prev = 0.0
prevXVal = float(sys.maxint)
for iter in range(0nmax):
# update assignments
self.assign()
# calculate error
err errXVal = self.error()
#
if self.stop and errXVal - prevXVal >= 0.0:
sys.stderr.write(“Cross-validation error increasing at step %d\n“%iter)
break
prevXVal = errXVal
#
if abs(err-prev) < eps:
sys.stderr.write(“Tolerance reached a
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2014-12-01 17:25 Python-DP-Means-Clustering-master\
文件 295 2014-12-01 17:25 Python-DP-Means-Clustering-master\.gitignore
文件 1748 2014-12-01 17:25 Python-DP-Means-Clustering-master\DPopt.py
文件 1317 2014-12-01 17:25 Python-DP-Means-Clustering-master\LICENSE
文件 6058 2014-12-01 17:25 Python-DP-Means-Clustering-master\README
文件 6124 2014-12-01 17:25 Python-DP-Means-Clustering-master\cluster.py
文件 354 2014-12-01 17:25 Python-DP-Means-Clustering-master\costTest.bash
文件 1232 2014-12-01 17:25 Python-DP-Means-Clustering-master\createTestData.py
目录 0 2014-12-01 17:25 Python-DP-Means-Clustering-master\img\
文件 18195 2014-12-01 17:25 Python-DP-Means-Clustering-master\img\2d-sample-data.png
文件 96576 2014-12-01 17:25 Python-DP-Means-Clustering-master\img\3d-sample-data.png
文件 20568 2014-12-01 17:25 Python-DP-Means-Clustering-master\img\error.png
文件 241453 2014-12-01 17:25 Python-DP-Means-Clustering-master\img\iters.png
文件 18088 2014-12-01 17:25 Python-DP-Means-Clustering-master\img\opt_error.png
文件 174533 2014-12-01 17:25 Python-DP-Means-Clustering-master\img\opt_iters.png
文件 20973 2014-12-01 17:25 Python-DP-Means-Clustering-master\img\test_errors.png
文件 22520 2014-12-01 17:25 Python-DP-Means-Clustering-master\img\test_errors_20.png
文件 30375 2014-12-01 17:25 Python-DP-Means-Clustering-master\img\test_errors_20_annotated.png
文件 24465 2014-12-01 17:25 Python-DP-Means-Clustering-master\img\test_times-error.png
文件 26844 2014-12-01 17:25 Python-DP-Means-Clustering-master\img\test_times-error_10.png
文件 27245 2014-12-01 17:25 Python-DP-Means-Clustering-master\img\test_times-error_12.png
文件 27581 2014-12-01 17:25 Python-DP-Means-Clustering-master\img\test_times-error_15.png
文件 26937 2014-12-01 17:25 Python-DP-Means-Clustering-master\img\test_times-error_18.png
文件 24941 2014-12-01 17:25 Python-DP-Means-Clustering-master\img\test_times-error_2.png
文件 26406 2014-12-01 17:25 Python-DP-Means-Clustering-master\img\test_times-error_20.png
文件 27115 2014-12-01 17:25 Python-DP-Means-Clustering-master\img\test_times-error_3.png
文件 26713 2014-12-01 17:25 Python-DP-Means-Clustering-master\img\test_times-error_4.png
文件 27006 2014-12-01 17:25 Python-DP-Means-Clustering-master\img\test_times-error_5.png
文件 26386 2014-12-01 17:25 Python-DP-Means-Clustering-master\img\test_times-error_8.png
文件 21904 2014-12-01 17:25 Python-DP-Means-Clustering-master\img\test_times.png
目录 0 2014-12-01 17:25 Python-DP-Means-Clustering-master\input\
............此处省略14个文件信息
相关资源
- 航空公司客户价值大数据分析源代码
- 基于phash图像特征的图像聚类-kmeans-
- Kmeans.docx K均值聚类算法实验报告
- K-means聚类分析与python实现
- Python聚类分析
- Python实现K-means聚类算法
- 聚类分析OPTICS算法python实现
- Python聚类分析应用干货(基于Python的
- 总年薪预测聚类分析.py
- Python聚类算法之基本K均值详解
- 基于python的微博热点话题舆情聚类分
- 专门处理不平衡数据集的算法,使用
- python实现k-means聚类
- 层次聚类(AGNES)算法(Python)
- kmeans.py yolov3计算anchors
- 算法的python实现代码、测试数据集及
- [python] Kmeans文本聚类算法+PAC降维+Ma
- 西电数据挖掘作业——k中心聚类pyt
- 美国各州一年电力负荷数据海量用户
- Python图像聚类
- Python-利用Python实现中文文本关键词抽
- k均值聚类python实现
- 聚类 k-means k-medoids代码实现
- python实现谱聚类代码并进行可视化
- python内置K-means聚类算法对鸢尾花数据
- FaceClustering.zip
- 模糊聚类python可执行完整代码
- 基于sklearn模块的KMeans聚类算法实现“
- 四种聚类算法实现对控制图时间序列
- python数据挖掘分类聚类回归关联算法
评论
共有 条评论