资源简介
用python写的一个k-means聚类算法的实现,测试数据在压缩包的data.txt中,结果通过图示的方法进行直观展示。
代码片段和文件信息
from numpy import *
import time
import matplotlib.pyplot as plt
# calculate Euclidean distance
def euclDistance(vector1 vector2):
return sqrt(sum(power(vector2 - vector1 2)))
# init centroids with random samples
def initCentroids(dataSet k):
numSamples dim = dataSet.shape
centroids = zeros((k dim))
for i in range(k):
index = int(random.uniform(0 numSamples))
centroids[i :] = dataSet[index :]
return centroids
# k-means cluster
def kmeans(dataSet k):
numSamples = dataSet.shape[0]
# first column stores which cluster this sample belongs to
# second column stores the error between this sample and its centroid
clusterAssment = mat(zeros((numSamples 2)))
clusterChanged = True
## step 1: init centroids
centroids = initCentroids(dataSet k)
while clusterChanged:
clusterChanged = False
## for each sample
for i in xrange(numSamples):
minDist = 100000.0
minIndex = 0
## for each centroid
## step 2: find the centroid who is closest
for j in range(k):
distance = euclDistance(centroids[j :] dataSet[i :])
if distance < minDist:
minDist = distance
minIndex = j
## step 3: update its cluster
if clusterAssment[i 0] != minIndex:
clusterChanged = True
clusterAssment[i :] = minIndex minDist**2
## step 4: update centroids
for j in range(k):
pointsInCluster = dataSet[nonzero(clusterAssment[: 0].A == j)[0]]
centroids[j :] = mean(pointsInCluster axis = 0)
print ‘Congratulations cluster complete!‘
return centroids clusterAssment
# show your cluster only available with 2-D data
def showCluster(dataSet k centroids clusterAssment):
numSamples dim = dataSet.shape
if dim != 2:
print “Sorry! I can not draw because the dimension of your data is not 2!“
return 1
mark = [‘or‘ ‘ob‘ ‘og‘ ‘ok‘ ‘^r‘ ‘+r‘ ‘sr‘ ‘dr‘ ‘ if k > len(mark):
print “Sorry! Your k is too large!“
return 1
# draw all samples
for i in xrange(numSamples):
markIndex = int(clusterAssment[i 0])
plt.plot(dataSet[i 0] dataSet[i 1] mark[markIndex])
mark = [‘Dr‘ ‘Db‘ ‘Dg‘ ‘Dk‘ ‘^b‘ ‘+b‘ ‘sb‘ ‘db‘ ‘ # draw the centroids
for i in range(k):
plt.plot(centroids[i 0] centroids[i 1] mark[i] markersize = 12)
plt.show()
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
文件 80 2014-05-26 14:26 kmeans\data.txt
文件 80 2014-05-26 14:25 kmeans\data.txt~
文件 2749 2014-05-26 14:29 kmeans\kmeans.py
文件 2649 2014-05-26 14:29 kmeans\kmeans.pyc
文件 2749 2014-05-26 14:29 kmeans\kmeans.py~
文件 599 2014-05-26 14:28 kmeans\test.py
文件 599 2014-05-26 14:28 kmeans\test.py~
目录 0 2014-05-26 21:32 kmeans\
相关资源
- python实现SGBM图像匹配算法
- python实现灰度直方图均衡化
- scrapy_qunar_one
- Python学习全系列教程永久可用
- python简明教程.chm
- 抽奖大转盘python的图形化界面
- 双边滤波器实验报告及代码python
- python +MYSQL+HTML实现21蛋糕网上商城
- Python-直播答题助手自动检测出题搜索
- OpenCV入门教程+OpenCV官方教程中文版
- Python 串口工具源码+.exe文件
- Python开发的全栈股票系统.zip
- Python操作Excel表格并将其中部分数据写
- python书籍 PDF
- 利用python绘制散点图
- python+labview+No1.vi
- 老男孩python项目实战
- python源码制作whl文件.rar
- python3.5可用的scipy
- PYTHON3 经典50案例.pptx
- 计算机科学导论-python.pdf
- python模拟鼠标点击屏幕
- windows鼠标自动点击py脚本
- 鱼c小甲鱼零基础学python全套课后题和
- Python 练习题100道
- Practical Programming 2nd Edition
- wxPython Application Development Cookbook
- python 3.6
- Python 3.5.2 中文文档 互联网唯一CHM版本
- python3.5.2.chm官方文档
评论
共有 条评论