资源简介
《集体智慧编程》(Programming Collective Intelligence)官方源代码,非手敲。
代码片段和文件信息
import random
import math
from math import sqrt
from PIL import ImageImageDrawImageFont
# Returns the Pearson correlation coefficient for p1 and p2
def pearson(v1v2):
# Simple sums
sum1=sum(v1)
sum2=sum(v2)
# Sums of the squares
sum1Sq=sum([pow(v2) for v in v1])
sum2Sq=sum([pow(v2) for v in v2])
# Sum of the products
pSum=sum([v1[i]*v2[i] for i in range(len(v1))])
# Calculate r (Pearson score)
num=pSum-(sum1*sum2/len(v1))
den=sqrt((sum1Sq-pow(sum12)/len(v1))*(sum2Sq-pow(sum22)/len(v1)))
if den==0: return 0
return 1.0-(num/den)
class bicluster:
def __init__(selfvecleft=Noneright=Nonedistance=0.0id=None):
self.left=left
self.right=right
self.vec=vec
self.id=id
self.distance=distance
def euclidean(v1v2):
sqsum=sum([math.pow(v1[i]-v2[i]2) for i in range(len(v1))])
return math.sqrt(sqsum)
def printclust(clustlabels=Nonen=0):
for i in range(n): print ‘ ‘
if clust.id<0:
print ‘-‘
else:
if labels==None: print clust.id
else: print labels[clust.id]
if clust.left!=None: printclust(clust.leftlabels=labelsn=n+1)
if clust.right!=None: printclust(clust.rightlabels=labelsn=n+1)
def hcluster(vecsdistance=pearson):
distances={}
currentclustid=-1
clust=[bicluster(vecs[i]id=i) for i in range(len(vecs))]
while len(clust)>1:
lowestpair=(01)
closest=distance(clust[0].vecclust[1].vec)
for i in range(len(clust)):
for j in range(i+1len(clust)):
if (clust[i].idclust[j].id) not in distances:
distances[(clust[i].idclust[j].id)]=distance(clust[i].vecclust[j].vec)
d=distances[(clust[i].idclust[j].id)]
if d closest=d
lowestpair=(ij)
mergevec=[(clust[lowestpair[0]].vec[i]+clust[lowestpair[1]].vec[i])/2.0 for i in range(len(clust[0].vec))]
error=closest
newcluster=bicluster(mergevecleft=clust[lowestpair[0]]right=clust[lowestpair[1]]distance=errorid=currentclustid)
currentclustid-=1
del clust[lowestpair[1]]
del clust[lowestpair[0]]
clust.append(newcluster)
return clust[0]
def kcluster(vecsdistance=pearsonk=4):
ranges=[(min([vec[i] for vec in vecs])max([vec[i] for vec in vecs])) for i in range(len(vecs[0]))]
clusters=[[random.random()*(ranges[i][1]-ranges[i][0])+ranges[i][0] for i in range(len(vecs[0]))] for j in range(k)]
lastmatches=None
for t in range(100):
print ‘Iteration %d‘ % t
bestmatches=[[] for i in range(k)]
for j in range(len(vecs)):
vec=vecs[j]
bestmatch=0
for i in range(k):
d=distance(clusters[i]vec)
if d bestmatches[bestmatch].append(j)
if bestmatches==lastmatches: break
lastmatches=bestmatches
for i in range(k):
avgs=[0.0]*len(vecs[0])
if len(bestmatches[i])>0:
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
....... 6304 2017-06-27 12:14 Programming Collective Intelligence\logo.png
文件 24856 2017-06-27 12:14 Programming Collective Intelligence\PCI_Code Folder\chapter10\articles.txt
文件 6802 2017-06-27 12:14 Programming Collective Intelligence\PCI_Code Folder\chapter10\clusters.py
文件 6041 2017-06-27 12:14 Programming Collective Intelligence\PCI_Code Folder\chapter10\docclass.py
文件 2086 2017-06-27 12:14 Programming Collective Intelligence\PCI_Code Folder\chapter10\features.txt
文件 4200 2017-06-27 12:14 Programming Collective Intelligence\PCI_Code Folder\chapter10\newsfeatures.py
文件 1036 2017-06-27 12:14 Programming Collective Intelligence\PCI_Code Folder\chapter10\nnmf.py
文件 3916 2017-06-27 12:14 Programming Collective Intelligence\PCI_Code Folder\chapter10\stockfeatures.txt
文件 1296 2017-06-27 12:14 Programming Collective Intelligence\PCI_Code Folder\chapter10\stockvolume.py
文件 5120 2017-06-27 12:14 Programming Collective Intelligence\PCI_Code Folder\chapter10\Thumbs.db
文件 7030 2017-06-27 12:14 Programming Collective Intelligence\PCI_Code Folder\chapter11\gp.py
文件 9029 2017-06-27 12:14 Programming Collective Intelligence\PCI_Code Folder\chapter11\gp.pyc
文件 908 2017-06-27 12:14 Programming Collective Intelligence\PCI_Code Folder\chapter2\deliciousrec.py
文件 28268 2017-06-27 12:14 Programming Collective Intelligence\PCI_Code Folder\chapter2\pydelicious.py
文件 5856 2017-06-27 12:14 Programming Collective Intelligence\PCI_Code Folder\chapter2\recommendations.py
文件 147123 2017-06-27 12:14 Programming Collective Intelligence\PCI_Code Folder\chapter3\blogdata.txt
文件 8569 2017-06-27 12:14 Programming Collective Intelligence\PCI_Code Folder\chapter3\clusters.py
文件 1275 2017-06-27 12:14 Programming Collective Intelligence\PCI_Code Folder\chapter3\downloadzebodata.py
文件 3888 2017-06-27 12:14 Programming Collective Intelligence\PCI_Code Folder\chapter3\feedlist.txt
文件 1539 2017-06-27 12:14 Programming Collective Intelligence\PCI_Code Folder\chapter3\generatefeedvector.py
文件 12288 2017-06-27 12:14 Programming Collective Intelligence\PCI_Code Folder\chapter3\Thumbs.db
文件 37692 2017-06-27 12:14 Programming Collective Intelligence\PCI_Code Folder\chapter3\zebo.txt
文件 5977 2017-06-27 12:14 Programming Collective Intelligence\PCI_Code Folder\chapter4\nn.py
文件 10854 2017-06-27 12:14 Programming Collective Intelligence\PCI_Code Folder\chapter4\searchengine.py
文件 1559 2017-06-27 12:14 Programming Collective Intelligence\PCI_Code Folder\chapter5\dorm.py
文件 2701 2017-06-27 12:14 Programming Collective Intelligence\PCI_Code Folder\chapter5\kayak.py
文件 5810 2017-06-27 12:14 Programming Collective Intelligence\PCI_Code Folder\chapter5\optimization.py
文件 2943 2017-06-27 12:14 Programming Collective Intelligence\PCI_Code Folder\chapter5\schedule.txt
文件 2222 2017-06-27 12:14 Programming Collective Intelligence\PCI_Code Folder\chapter5\socialnetwork.py
文件 5990 2017-06-27 12:14 Programming Collective Intelligence\PCI_Code Folder\chapter6\docclass.py
............此处省略39个文件信息
- 上一篇:视日轨迹太阳跟踪装置控制系统设计
- 下一篇:一万句中英平行语料库,不用预处理
相关资源
- Logistic回归总结非常好的机器学习总结
- Convex Analysis and Optimization (Bertsekas
- 机器学习个人笔记完整版v5.2-A4打印版
- JUNIOR:粒子物理学中无监督机器学习
- 语料库.zip
- 中国科学技术大学 研究生课程 机器学
- 遗传算法越野小车unity5.5
- 吴恩达机器学习编程题
- shape_predictor_68_face_landmarks.dat.bz2 68个标
- 机器学习实战高清pdf,中文版+英文版
- 李宏毅-机器学习(视频2017完整)
- 机器学习深度学习 PPT
- 麻省理工:深度学习介绍PPT-1
- Wikipedia机器学习迷你电子书之四《D
- Learning From Data Yaser S. Abu-Mostafa
- 北大林宙辰:机器学习一阶算法的优
- 李宏毅深度学习ppt
- 机器学习方法R实现-用决策树、神经网
- 数字金融反欺诈白皮书
- 机器学习班PPT原件全邹博
- 机器学习实战(源码和数据样本)
- 计算广告含有目录 刘鹏版
- 数据挖掘导论完整版PPT及课后习题答
- kaggle信用卡欺诈数据
- 机器学习技法原始讲义和课程笔记
- 机器学习数学 陈希孺《 概率论与数理
- 概率论与数理统计陈希孺
- 哈尔滨工业大学深圳 机器学习 2017 考
- [概率论与数理统计]陈希孺带目录
- 刘鹏计算广告完整超清晰带目录版
评论
共有 条评论