资源简介
使用的是python3版本,自己编写的,能够完美运行,只需要运行主程序就行,数据啥的都准备好了
代码片段和文件信息
import random
import numpy as np
import pandas as pd
#用pandas的readcsv打开文件,除去最后一行的标签,获得2维data,不采样将数据进行输入
def getdata(data):
f = pd.read_csv(data)
data =f.values
truelabel = np.transpose([data[:-1]])
newdata = np.delete(data-1axis = 1)
return(newdata)
#将数据采样获得的datamat转化为numpy数组
def sampledata(filename k_samplegetdata=getdata):
datamat =[]
data =getdata(filename)
SampleLine = random.sample([i for i in range(len(data))] k_sample)
for i in SampleLine:
datamat.append(data[i])
datamat =np.array(datamat)
return(datamat)
#计算两个numpy向量的欧式距离
def dist(AB):
return np.sqrt(np.sum(np.power(A - B 2)))
#初始化质心
def randcent(datak):
size = len(data)
medoids_idx = random.sample([i for i in range(size)] 5)
return(medoids_idx)
#定义代价,获取质心,将样本到质心的距离存放与distancecache中,减少运算并比较
distances_cache = {}
def totalcost(datamedoids_idx):
size = len(data)
total_cost = 0.0
medoids = {}
for idx in medoids_idx:
medoids[idx] = []
for i in range(size):
choice = None
min_cost = np.inf
for m in medoids:
tmp = distances_cache.get((m i) None)
if tmp == None:
tmp = dist(data[m] data[i])
distances_cache[(m i)] = tmp
if tmp < min_cost:
choice = m
min_cost = tmp
medoids[choice].append(i)
total_cost += min_cost
return(total_cost medoids)
#k中心算法
def PAM(data k):
size = len(data)
medoids_idx = randcent(datak)
pre_cost medoids = totalcost(data medoids_idx)
current_cost = np.inf # maxmum of pearson_distances is 2.
best_choice = []
best_res = {}
iter_count = 0
while 1:
for m in medoids:
for item in medoids[m]:
if item != m:
idx = medoids_idx.index(m)
swap_temp = medoids_idx[idx]
medoids_idx[idx] = item
tmp medoids_ = totalcost(data medoids_idx)
# print tmp‘-------->‘medoids_.keys()
if tmp < current_cost:
best_choice = list(medoids_idx)
best_res = dict(medoids_)
current_cost = tmp
medoids_idx[idx] = swap_temp
iter_count += 1
print(‘while循环次数: ‘iter_count)
if best_choice == medoids_idx: break
if current_cost <= pre_cost:
pre_cost = current_cost
medoids = best_res
medoids_idx = best_choice
return(current_cost best_choice best_res)
def main():
n= int(input(‘对原数据的采样数目n=: ‘))
k = int(input(‘聚类数目K= :‘))
data = sampledata(‘waveform+noise.data‘n)
totalcostclasslabelclasses =a bc =PAM(datak)
print(‘总距离是: ‘totalcost)
print(‘
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
文件 3404 2017-10-25 15:58 K中心data\PAM.py
文件 1077986 2017-10-24 21:33 K中心data\waveform+noise.data
目录 0 2018-03-13 14:51 K中心data
----------- --------- ---------- ----- ----
1081390 3
- 上一篇:python实现SVM
- 下一篇:基于Python的计算机网络实验设计
评论
共有 条评论