资源简介
K-medoids聚类算法为K-means算法的改进,k均值算法未计算一个新的点作为聚类中心点,K-medoids遍历原有数据点选择一个最优点(距离最小)为中心点。适用于分类数据。
代码片段和文件信息
import random as rand
import math as math
from point import Point
#import pkg_resources
#pkg_resources.require(“matplotlib“)
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
class clustering:
#define creat function initialization
def __init__(self geo_locs_ k_):
self.geo_locations = geo_locs_
self.k = k_
self.clusters = [] #clusters of nodes
self.means = [] #means of clusters
self.debug = False #debug flag
#this method returns the next random node
def next_random(self index points clusters):
#pick next node that has the maximum distance from other nodes
dist = {}
for point_1 in points:
if self.debug:
print ‘point_1: %f %f‘ % (point_1.latit point_1.longit)
#compute this node distance from all other points in cluster
for cluster in clusters.values():
point_2 = cluster[0]
if self.debug:
print ‘point_2: %f %f‘ % (point_2.latit point_2.longit)
if point_1 not in dist: #if the point doesnt have the class put it into the closest class
dist[point_1] = math.sqrt(math.pow(point_1.latit - point_2.latit2.0) + math.pow(point_1.longit - point_2.longit2.0)) ############because the point only have two dimensions
else:
dist[point_1] += math.sqrt(math.pow(point_1.latit - point_2.latit2.0) + math.pow(point_1.longit - point_2.longit2.0))#calculate the sum distance of every clustering
if self.debug:
for key value in dist.items():
print “(%f %f) ==> %f“ % (key.latitkey.longitvalue)
#now let‘s return the point that has the maximum distance from previous nodes
count_ = 0
max_ = 0
# find the max distance
for key value in dist.items(): #put the value and key of the dict into a list
if count_ == 0:
max_ = value
max_point = key
count_ += 1
else:
if value > max_:
max_ = value
max_point = key
return max_point
#this method computes the initial means
def initial_means(self points):
#pick the first node at random
point_ = rand.choice(points)#choose one point in random
if self.debug:
print ‘point#0: %f %f‘ % (point_.latit point_.longit)
clusters = dict() #creat an empty dict
clusters.setdefault(0 []).append(point_)#setdefault():if key doesnt in dictcreat the key and set to default
points.remove(point_)#remove the center of clustering
#now let‘s pick k-1 more random points
for i in range(1 self.k):
point_ = self.next_random(i points clusters)#call child function i:the index of clustering center points:clustering sample clusters:clustering cent
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
文件 7894 2018-01-15 11:49 k-medoids-master\clustering.py
文件 1057 2018-01-15 10:01 k-medoids-master\main.py
文件 177 2014-08-07 16:11 k-medoids-master\point.py
文件 1022 2014-08-07 16:11 k-medoids-master\README.md
评论
共有 条评论