• 大小: 8KB
    文件类型: .zip
    金币: 1
    下载: 0 次
    发布日期: 2023-07-19
  • 语言: Python
  • 标签: kmeans  python  

资源简介

Kmeans算法的python3.5实现 带数据可以直接运行

资源截图

代码片段和文件信息

from numpy import *
from matplotlib import pyplot as plt

#classify data
def loadDataSet(path):
    numFeat=len(open(path).readline().split(‘ ‘))
    dataMat=[]
    labelMat=[]
    fr=open(path)
    for line in fr.readlines():
        lineArr=[]
        curline=line.strip().split(‘ ‘)
        for i in range(numFeat-1):
            lineArr.append(float(curline[i]))
        dataMat.append(lineArr)
        labelMat.append(curline[-1])
    return dataMatlabelMat

#cluster data
def loadData(path):
    dataset=[]
    fr=open(path)
    for line in fr.readlines():
        lineArr=line.strip().split(‘\t‘)
        dataset.append([float(lineArr[0])float(lineArr[1])])
    return mat(dataset)

def caeuclDistance(vect1 vect2):
    return sqrt(sum(power(vect1-vect22)))

def initCentroids(dataSetk):
    numSamplesdim= dataSet.shape
    centroids=zeros((kdim))
    for i in range(k):
        index = int(random.uniform(0numSamples))
        centroids[i:]=dataSet[index:]
    return centroids

def kmeans(datasetk):
    numSamples=dataset.shape[0]
    clusterAssment=mat(zeros((numSamples2)))
    clusterChange=True
    centroids=initCentroids(datasetk)
    while clusterChange:
        clusterChange=False
        for i in range(numSamples):
            minDist=100000.00
            clusterlabel=0
            for j in range(k):
                distance = caeuclDistance(centroids[j:]dataset[i:])
                if distance                    minDist=distance
                    clusterlabel=j
            if clusterAssment[i0]!= clusterlabel:
                clusterChange=True
                clusterAssment[i:]=clusterlabelminDist**2
        for j in range(k):
            #why?
            pointsInCluster=dataset[nonzero(clusterAssment[:0].A==j)[0]]
            centroids[j:]=mean(pointsInClusteraxis=0)
    print(‘cluster complete‘)
    return centroidsclusterAssment

def plot(dataSetkcentroidsclusterAssment):
    numSamplesdim = dataSet.shape
    if dim !=2:
        print(‘Sorry! I can not draw the pictrue bescause the dimension of your data is not 2!‘)
        return 1
    mark = [‘or‘ ‘ob‘ ‘og‘ ‘ok‘ ‘^r‘ ‘+r‘ ‘sr‘ ‘dr‘ ‘    if k>len(mark):
        print(‘Sorry! Your k is too large!‘)
        return 1
    for i in range(numSamples):
        markindex=int(clusterAssment[i0])
        plt.plot(dataSet[i0]dataSet[i1]mark[markindex])
    markc=[‘Dr‘ ‘Db‘ ‘Dg‘ ‘Dk‘ ‘^b‘ ‘+b‘ ‘sb‘ ‘db‘ ‘    for i in range(k):
        plt.plot(centroids[i0]centroids[i1]mark[i]markersize=12)
    plt.show()

def main():
    print(‘load data....‘)
    filepath=‘..\data\data.txt‘
    dataset=loadData(filepath)
    k=4
    print(‘cluster data‘)
    centroids clusterAssment=kmeans(datasetk)
    print(‘show the result!‘)
    plot(datasetkcentroidsclusterAssment)

if __name__ == ‘__main__‘:
    main()

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----
     目录           0  2017-08-06 20:42  kmeans\
     目录           0  2017-08-01 22:10  kmeans\.idea\
     文件           6  2017-08-01 22:10  kmeans\.idea\.name
     文件         164  2017-08-01 22:10  kmeans\.idea\encodings.xml
     文件         284  2017-08-01 22:10  kmeans\.idea\kmeans.iml
     文件        1137  2017-08-01 22:10  kmeans\.idea\misc.xml
     文件         264  2017-08-01 22:10  kmeans\.idea\modules.xml
     目录           0  2017-08-01 22:10  kmeans\.idea\scopes\
     文件         143  2017-08-01 22:10  kmeans\.idea\scopes\scope_settings.xml
     文件         164  2017-08-01 22:10  kmeans\.idea\vcs.xml
     文件       25780  2017-08-10 15:14  kmeans\.idea\workspace.xml
     目录           0  2017-08-06 20:54  kmeans\data\
     文件        1598  2017-08-06 20:55  kmeans\data\data.txt
     目录           0  2017-08-10 15:18  kmeans\src\
     文件        2975  2017-08-10 15:17  kmeans\src\kmeans.py
     文件          30  2017-08-06 20:42  kmeans\src\__init__.py

评论

共有 条评论