资源简介
kmeans聚类算法的python实现程序
Ch10
├── Portland.png
├── kMeans.py
├── kMeans.pyc
├── places.txt
├── portlandClubs.txt
├── testSet.txt
└── testSet2.txt
0 directories, 7 files
代码片段和文件信息
‘‘‘
Created on Feb 16 2011
k Means Clustering for Ch10 of Machine Learning in Action
@author: Peter Harrington
‘‘‘
from numpy import *
def loadDataSet(fileName): #general function to parse tab -delimited floats
dataMat = [] #assume last column is target value
fr = open(fileName)
for line in fr.readlines():
curLine = line.strip().split(‘\t‘)
fltLine = map(floatcurLine) #map all elements to float()
dataMat.append(fltLine)
return dataMat
def distEclud(vecA vecB):
return sqrt(sum(power(vecA - vecB 2))) #la.norm(vecA-vecB)
def randCent(dataSet k):
n = shape(dataSet)[1]
centroids = mat(zeros((kn)))#create centroid mat
for j in range(n):#create random cluster centers within bounds of each dimension
minJ = min(dataSet[:j])
rangeJ = float(max(dataSet[:j]) - minJ)
centroids[:j] = mat(minJ + rangeJ * random.rand(k1))
return centroids
def kMeans(dataSet k distMeas=distEclud createCent=randCent):
m = shape(dataSet)[0]
clusterAssment = mat(zeros((m2)))#create mat to assign data points
#to a centroid also holds SE of each point
centroids = createCent(dataSet k)
clusterChanged = True
while clusterChanged:
clusterChanged = False
for i in range(m):#for each data point assign it to the closest centroid
minDist = inf; minIndex = -1
for j in range(k):
distJI = distMeas(centroids[j:]dataSet[i:])
if distJI < minDist:
minDist = distJI; minIndex = j
if clusterAssment[i0] != minIndex: clusterChanged = True
clusterAssment[i:] = minIndexminDist**2
print centroids
for cent in range(k):#recalculate centroids
ptsInClust = dataSet[nonzero(clusterAssment[:0].A==cent)[0]]#get all the point in this cluster
centroids[cent:] = mean(ptsInClust axis=0) #assign centroid to mean
return centroids clusterAssment
def biKmeans(dataSet k distMeas=distEclud):
m = shape(dataSet)[0]
clusterAssment = mat(zeros((m2)))
centroid0 = mean(dataSet axis=0).tolist()[0]
centList =[centroid0] #create a list with one centroid
for j in range(m):#calc initial Error
clusterAssment[j1] = distMeas(mat(centroid0) dataSet[j:])**2
while (len(centList) < k):
lowestSSE = inf
for i in range(len(centList)):
ptsInCurrCluster = dataSet[nonzero(clusterAssment[:0].A==i)[0]:]#get the data points currently in cluster i
centroidMat splitClustAss = kMeans(ptsInCurrCluster 2 distMeas)
sseSplit = sum(splitClustAss[:1])#compare the SSE to the currrent minimum
sseNotSplit = sum(clusterAssment[nonzero(clusterAssment[:0].A!=i)[0]1])
print “sseSplit and notSplit: “sseSplitsseNotSplit
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2011-12-28 15:30 Ch10\
文件 3105 2011-03-17 21:17 Ch10\portlandClubs.txt
目录 0 2020-11-16 01:04 __MACOSX\
目录 0 2020-11-16 01:04 __MACOSX\Ch10\
文件 222 2011-03-17 21:17 __MACOSX\Ch10\._portlandClubs.txt
文件 459112 2011-03-18 12:00 Ch10\Portland.png
文件 222 2011-03-18 12:00 __MACOSX\Ch10\._Portland.png
文件 6397 2011-12-28 15:30 Ch10\kMeans.pyc
文件 222 2011-12-28 15:30 __MACOSX\Ch10\._kMeans.pyc
文件 1600 2011-03-15 13:03 Ch10\testSet.txt
文件 222 2011-03-15 13:03 __MACOSX\Ch10\._testSet.txt
文件 6419 2011-12-28 15:52 Ch10\kMeans.py
文件 222 2011-12-28 15:52 __MACOSX\Ch10\._kMeans.py
文件 1194 2011-03-16 10:16 Ch10\testSet2.txt
文件 222 2011-03-16 10:16 __MACOSX\Ch10\._testSet2.txt
文件 4693 2011-03-18 09:56 Ch10\places.txt
文件 222 2011-03-18 09:56 __MACOSX\Ch10\._places.txt
文件 222 2011-12-28 15:30 __MACOSX\._Ch10
- 上一篇:Python各种树木制作代码
- 下一篇:Python-霍兰德人格分析图绘制
相关资源
- 《机器学习实战》源代码Python3
- python新浪微博爬虫,爬取微博和用户
- 《机器学习实战》Python3代码
- 机器学习实战 Python实现
- 机器学习实战python实现
- 人工智能-python机器学习实战高清完整
- 《机器学习实战》pdf及所和数据集文
- 机器学习实战Python 开发 高清 非扫描
- python 机器学习实战 pdf 中文文字版
- 《Python 3数据分析与机器学习实战》自
- 《Python 3数据分析与机器学习实战》随
- 《机器学习实战》python3完美运行代码
- Python——机器学习实战——Apriori算法
- 老唐的1——python数据分析与机器学习
- 机器学习实战python2SVM 训练数据
- 机器学习实战:基于 Scikit-Learn 和 T
- 《机器学习实战》源代码
- 机器学习实战python2SVM与核函数 训练数
- Python——机器学习实战——AdaBoost分类
- 《机器学习实战》中决策树python2.7代
评论
共有 条评论