资源简介
基于用户的协同过滤算法Python实现
代码片段和文件信息
import random
import math
class UserbasedCF:
def __init__(selfdatafile = None):
self.datafile = datafile
self.readData()
self.splitData(347)
def readData(selfdatafile = None):
“““
read the data from the data file which is a data set
“““
self.datafile = datafile or self.datafile
self.data = []
for line in open(self.datafile):
useriditemidrecordmtime = line.split(“\t“)
self.data.append((useriditemidint(record)))
def splitData(selfkseeddata = NoneM = 8):
“““
split the data set
testdata is a test data set
traindata is a train set
“““
self.testdata = {}
self.traindata = {}
data = data or self.data
random.seed(seed)
for useritemrecord in self.data:
if random.randint(0M) == k:
self.testdata.setdefault(user{})
self.testdata[user][item] = record
else:
self.traindata.setdefault(user{})
self.traindata[user][item] = record
def userSimilarity(selftrain = None):
“““
one method of getting user similarity matrix
“““
train = train or self.traindata
self.userSim = dict()
for u in train.keys():
for v in train.keys():
if u == v:
continue
self.userSim.setdefault(u{})
self.userSim[u][v] = len(set(train[u].keys()) & set(train[v].keys()))
self.userSim[u][v] /= math.sqrt(len(train[u]) * len(train[v]) * 1.0)
def userSimilarityBest(selftrain = None):
“““
the other method of getting user similarity which is better than above
you can get the method on page 46
In this experiment we use this method
“““
train = train or self.traindata
self.userSimBest = dict()
item_users = dict()
#build inverse table for item_users
for uitem in train.items():
for i in item.keys():
item_users.setdefault(iset())
item_users[i].add(u)
#calculate co-rated items between users
user_item_count = dict()
count = dict()
for itemusers in item_users.items():
for u in users:
user_item_count.setdefault(u0)
user_item_count[u] += 1
for v in users:
if u == v:
continue
count.setdefault(u{})
count[u].setdefault(v0)
count[u][v] += 1
#calculate finial similarity matrix W
for urelated_users in count.items():
self.userSimBest.setdefault(udict())
for vcuv in related_users.items():
self.userSimBest[u][v] = cuv / math.sqrt(us
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
文件 6527 2015-03-06 14:59 Userba
文件 266 2015-03-06 15:50 Userba
文件 1979173 2000-07-19 16:09 Userba
目录 0 2015-03-06 15:47 Userba
目录 0 2015-03-06 15:47 Userba
----------- --------- ---------- ----- ----
1985966 5
- 上一篇:决策树回归算法
- 下一篇:crf--python编码
相关资源
- python实现SGBM图像匹配算法
- python实现灰度直方图均衡化
- scrapy_qunar_one
- Python学习全系列教程永久可用
- python简明教程.chm
- 抽奖大转盘python的图形化界面
- 双边滤波器实验报告及代码python
- python +MYSQL+HTML实现21蛋糕网上商城
- Python-直播答题助手自动检测出题搜索
- OpenCV入门教程+OpenCV官方教程中文版
- Python 串口工具源码+.exe文件
- Python开发的全栈股票系统.zip
- Python操作Excel表格并将其中部分数据写
- python书籍 PDF
- 利用python绘制散点图
- python+labview+No1.vi
- 老男孩python项目实战
- python源码制作whl文件.rar
- python3.5可用的scipy
- PYTHON3 经典50案例.pptx
- 计算机科学导论-python.pdf
- python模拟鼠标点击屏幕
- windows鼠标自动点击py脚本
- 鱼c小甲鱼零基础学python全套课后题和
- Python 练习题100道
- Practical Programming 2nd Edition
- wxPython Application Development Cookbook
- python 3.6
- Python 3.5.2 中文文档 互联网唯一CHM版本
- python3.5.2.chm官方文档
评论
共有 条评论