• 大小: 1.19MB
    文件类型: .zip
    金币: 1
    下载: 0 次
    发布日期: 2023-08-29
  • 语言: Python
  • 标签: 聚类  

资源简介

使用python进行编码,DP-means和 k - means聚类算法的比较,里面有数据集。

资源截图

代码片段和文件信息

#!/usr/bin/env python
from pprint import pprint
import sys
import random
import math
import timer

class kmeans(object):

def __init__(self _X _k _xVal = 0 _stop=False):
# X is sample size lists of dim length
#
# _xVal is the number of records to hold out cross-validation.
# To use this you must randomize input data!
#
# Setting _stop=True causes iteration to stop when out of cross-validate
# error starts to rise.
#
self.nFeatures = len(_X[0])
self.xValSize = _xVal
self.allSize = len(_X)
self.size = self.allSize - self.xValSize
self.X = _X
self.k = _k
self.stop = _stop
# Initialize group memebership
self.dataClusterId = [-1 for i in range(0 self.allSize)] # index of group for each data pair
self.clusters = {}
idx = 0
# initialize to k random data points
# don‘t assign x-val as a strat center
for i in random.sample(range(0 self.size) self.k):
self.clusters[idx] = self.X[i]
idx += 1
# output records
self.record = []
self.errorRecord = []

def dSquared(self x y):
dist2 = 0.0
for jk in zip(xy):
dist2 += (j - k)**2
return dist2

def error(self):
res = 0.0
for i in range(0 self.size):
res += self.dSquared(self.X[i] self.clusters[self.dataClusterId[i]])
# error on non training data
res1 = 0.0
err1 = 0.0
for i in range(self.size self.allSize):
res1 += self.dSquared(self.X[i] self.clusters[self.dataClusterId[i]])
if res1 > 0.0:
err1 = res1/self.xValSize
return res/self.size err1

def nearestCluster(self x):
cmin = sys.maxint
cidx = -sys.maxint
for j in self.clusters:
dist = math.sqrt(self.dSquared(x self.clusters[j]))
if dist < cmin:  # record closest centroid
cmin = dist
cidx = j
return cidx cmin

def assign(self):
for i in range(0 self.allSize):
self.dataClusterId[i] dmin = self.nearestCluster(self.X[i])

def updateClusters(self):
ctemp = {} # dim sums by cluster
for j in range(0 self.k):
ctemp[j] = []
for k in range(0 self.nFeatures):
ctemp[j].append(0.0) # init sums
ctemp[j].append(0) # init counter
# only calculate clusters on training not cross-validation set
for i in range(0self.size):
for j in range(0 self.nFeatures):
ctemp[self.dataClusterId[i]][j] += self.X[i][j]
ctemp[self.dataClusterId[i]][self.nFeatures] += 1 # count
for c in self.clusters:
if ctemp[c][self.nFeatures] <> 0:
self.clusters[c] = [ ctemp[c][k]/ctemp[c][self.nFeatures] for k in range(0self.nFeatures)]
else:
# no members in this cluster
pass
return

def run(self nmax = 100 eps = 1e-7):
prev = 0.0
prevXVal = float(sys.maxint)
for iter in range(0nmax):
# update assignments
self.assign()
# calculate error
err errXVal = self.error()
#
if self.stop and errXVal - prevXVal >= 0.0:
sys.stderr.write(“Cross-validation error increasing at step %d\n“%iter)
break
prevXVal = errXVal
#
if abs(err-prev) < eps:
sys.stderr.write(“Tolerance reached a

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----
     目录           0  2014-12-01 17:25  Python-DP-Means-Clustering-master\
     文件         295  2014-12-01 17:25  Python-DP-Means-Clustering-master\.gitignore
     文件        1748  2014-12-01 17:25  Python-DP-Means-Clustering-master\DPopt.py
     文件        1317  2014-12-01 17:25  Python-DP-Means-Clustering-master\LICENSE
     文件        6058  2014-12-01 17:25  Python-DP-Means-Clustering-master\README
     文件        6124  2014-12-01 17:25  Python-DP-Means-Clustering-master\cluster.py
     文件         354  2014-12-01 17:25  Python-DP-Means-Clustering-master\costTest.bash
     文件        1232  2014-12-01 17:25  Python-DP-Means-Clustering-master\createTestData.py
     目录           0  2014-12-01 17:25  Python-DP-Means-Clustering-master\img\
     文件       18195  2014-12-01 17:25  Python-DP-Means-Clustering-master\img\2d-sample-data.png
     文件       96576  2014-12-01 17:25  Python-DP-Means-Clustering-master\img\3d-sample-data.png
     文件       20568  2014-12-01 17:25  Python-DP-Means-Clustering-master\img\error.png
     文件      241453  2014-12-01 17:25  Python-DP-Means-Clustering-master\img\iters.png
     文件       18088  2014-12-01 17:25  Python-DP-Means-Clustering-master\img\opt_error.png
     文件      174533  2014-12-01 17:25  Python-DP-Means-Clustering-master\img\opt_iters.png
     文件       20973  2014-12-01 17:25  Python-DP-Means-Clustering-master\img\test_errors.png
     文件       22520  2014-12-01 17:25  Python-DP-Means-Clustering-master\img\test_errors_20.png
     文件       30375  2014-12-01 17:25  Python-DP-Means-Clustering-master\img\test_errors_20_annotated.png
     文件       24465  2014-12-01 17:25  Python-DP-Means-Clustering-master\img\test_times-error.png
     文件       26844  2014-12-01 17:25  Python-DP-Means-Clustering-master\img\test_times-error_10.png
     文件       27245  2014-12-01 17:25  Python-DP-Means-Clustering-master\img\test_times-error_12.png
     文件       27581  2014-12-01 17:25  Python-DP-Means-Clustering-master\img\test_times-error_15.png
     文件       26937  2014-12-01 17:25  Python-DP-Means-Clustering-master\img\test_times-error_18.png
     文件       24941  2014-12-01 17:25  Python-DP-Means-Clustering-master\img\test_times-error_2.png
     文件       26406  2014-12-01 17:25  Python-DP-Means-Clustering-master\img\test_times-error_20.png
     文件       27115  2014-12-01 17:25  Python-DP-Means-Clustering-master\img\test_times-error_3.png
     文件       26713  2014-12-01 17:25  Python-DP-Means-Clustering-master\img\test_times-error_4.png
     文件       27006  2014-12-01 17:25  Python-DP-Means-Clustering-master\img\test_times-error_5.png
     文件       26386  2014-12-01 17:25  Python-DP-Means-Clustering-master\img\test_times-error_8.png
     文件       21904  2014-12-01 17:25  Python-DP-Means-Clustering-master\img\test_times.png
     目录           0  2014-12-01 17:25  Python-DP-Means-Clustering-master\input\
............此处省略14个文件信息

评论

共有 条评论