资源简介
Rodriguez A, Laio A. Clustering by fast search and find of density peaks[J]. Science, 2014, 344(6191): 1492-1496.基于这篇文章实现的最基本的密度聚类的算法密度峰值聚类py代码
代码片段和文件信息
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import sys
import math
import logging
import numpy as np
logger = logging.getLogger(“dpc_cluster“)
def load_paperdata(distance_f):
‘‘‘
Load distance from data
Args:
distance_f : distance file the format is column1-index 1 column2-index 2 column3-distance
Returns:
distances dict max distance min distance max continues id
‘‘‘
logger.info(“PROGRESS: load data“)
distances = {}
min_dis max_dis = sys.float_info.max 0.0
max_id = 0
with open(distance_f ‘r‘) as fp:
for line in fp:
x1 x2 d = line.strip().split(‘ ‘)
x1 x2 = int(x1) int(x2)
max_id = max(max_id x1 x2)
dis = float(d)
min_dis max_dis = min(min_dis dis) max(max_dis dis)
distances[(x1 x2)] = float(d)
distances[(x2 x1)] = float(d)
for i in xrange(max_id):
distances[(i i)] = 0.0
logger.info(“PROGRESS: load end“)
return distances max_dis min_dis max_id
def select_dc(max_id max_dis min_dis distances auto=False):
‘‘‘
Select the local density threshold default is the method used in paper auto is ‘autoselect_dc‘
Args:
max_id : max continues id
max_dis : max distance for all points
min_dis : min distance for all points
distances : distance dict
auto : use auto dc select or not
Returns:
dc that local density threshold
‘‘‘
logger.info(“PROGRESS: select dc“)
if auto:
return autoselect_dc(max_id max_dis min_dis distances)
percent = 2.0
position = int(max_id * (max_id + 1) / 2 * percent / 100)
dc = sorted(distances.values())[position * 2 + max_id]
logger.info(“PROGRESS: dc - “ + str(dc))
return dc
def autoselect_dc(max_id max_dis min_dis distances):
‘‘‘
Auto select the local density threshold that let average neighbor is 1-2 percent of all nodes.
Args:
max_id : max continues id
max_dis : max distance for all points
min_dis : min distance for all points
distances : distance dict
Returns:
dc that local density threshold
‘‘‘
dc = (max_dis + min_dis) / 2
while True:
nneighs = sum([1 for v in distances.values() if v < dc]) / max_id ** 2
if nneighs >= 0.01 and nneighs <= 0.02:
break
# binary search
if nneighs < 0.01:
min_dis = dc
else:
max_dis = dc
dc = (max_dis + min_dis) / 2
if max_dis - min_dis < 0.0001:
break
return dc
def local_density(max_id distances dc guass=True cutoff=False):
‘‘‘
Compute all points‘ local density
Args:
max_id : max continues id
distances : distance dict
gauss : use guass func or not(can‘t use together with cutoff)
cutoff
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2017-12-08 04:35 DensityPeakCluster-master\
文件 552 2017-12-08 04:35 DensityPeakCluster-master\.gitignore
文件 1072 2017-12-08 04:35 DensityPeakCluster-master\LICENSE
文件 1053 2017-12-08 04:35 DensityPeakCluster-master\README.md
文件 7765 2017-12-08 04:35 DensityPeakCluster-master\cluster.py
目录 0 2017-12-08 04:35 DensityPeakCluster-master\data\
目录 0 2017-12-08 04:35 DensityPeakCluster-master\data\data_in_paper\
文件 4525 2017-12-08 04:35 DensityPeakCluster-master\data\data_in_paper\cluster_dp.m
文件 29771107 2017-12-08 04:35 DensityPeakCluster-master\data\data_in_paper\example_distances.dat
目录 0 2017-12-08 04:35 DensityPeakCluster-master\data\data_iris_flower\
文件 2399 2017-12-08 04:35 DensityPeakCluster-master\data\data_iris_flower\iris.data
文件 174202 2017-12-08 04:35 DensityPeakCluster-master\data\data_iris_flower\iris.forcluster
文件 2151 2017-12-08 04:35 DensityPeakCluster-master\data\data_iris_flower\iris.label
目录 0 2017-12-08 04:35 DensityPeakCluster-master\distance\
文件 130 2017-12-08 04:35 DensityPeakCluster-master\distance\__init__.py
文件 2946 2017-12-08 04:35 DensityPeakCluster-master\distance\distance.py
文件 1358 2017-12-08 04:35 DensityPeakCluster-master\distance\distance_builder.py
文件 444 2017-12-08 04:35 DensityPeakCluster-master\distance\distance_builder_data_iris_flower.py
文件 319 2017-12-08 04:35 DensityPeakCluster-master\distance\error_wrongvec.py
文件 2598 2017-12-08 04:35 DensityPeakCluster-master\plot.py
文件 1712 2017-12-08 04:35 DensityPeakCluster-master\plot_utils.py
文件 706 2017-12-08 04:35 DensityPeakCluster-master\step1_choose_center.py
文件 933 2017-12-08 04:35 DensityPeakCluster-master\step2_cluster.py
评论
共有 条评论