资源简介

代码片段和文件信息
#-*- coding: utf-8 -*-
‘‘‘
Created on 2015-06-22
@author: Lockvictor
‘‘‘
import sys
import random
import math
import os
from operator import itemgetter
random.seed(0)
class ItembasedCF(object):
‘‘‘ TopN recommendation - Item based Collaborative Filtering ‘‘‘
def __init__(self):
self.trainset = {}
self.testset = {}
self.n_sim_movie = 20
self.n_rec_movie = 10
self.movie_sim_mat = {}
self.movie_popular = {}
self.movie_count = 0
print(‘Similar movie number = %d‘ % self.n_sim_movie file=sys.stderr)
print(‘Recommended movie number = %d‘ %
self.n_rec_movie file=sys.stderr)
@staticmethod
def loadfile(filename):
‘‘‘ load a file return a generator. ‘‘‘
fp = open(filename ‘r‘)
for i line in enumerate(fp):
yield line.strip(‘\r\n‘)
if i % 100000 == 0:
print (‘loading %s(%s)‘ % (filename i) file=sys.stderr)
fp.close()
print (‘load %s succ‘ % filename file=sys.stderr)
def generate_dataset(self filename pivot=0.7):
‘‘‘ load rating data and split it to training set and test set ‘‘‘
trainset_len = 0
testset_len = 0
for line in self.loadfile(filename):
user movie rating _ = line.split(‘::‘)
# split the data by pivot
if random.random() < pivot:
self.trainset.setdefault(user {})
self.trainset[user][movie] = int(rating)
trainset_len += 1
else:
self.testset.setdefault(user {})
self.testset[user][movie] = int(rating)
testset_len += 1
print (‘split training set and test set succ‘ file=sys.stderr)
print (‘train set = %s‘ % trainset_len file=sys.stderr)
print (‘test set = %s‘ % testset_len file=sys.stderr)
def calc_movie_sim(self):
‘‘‘ calculate movie similarity matrix ‘‘‘
print(‘counting movies number and popularity...‘ file=sys.stderr)
for user movies in self.trainset.items():
for movie in movies:
# count item popularity
if movie not in self.movie_popular:
self.movie_popular[movie] = 0
self.movie_popular[movie] += 1
print(‘count movies number and popularity succ‘ file=sys.stderr)
# save the total number of movies
self.movie_count = len(self.movie_popular)
print(‘total movie number = %d‘ % self.movie_count file=sys.stderr)
# count co-rated users between items
itemsim_mat = self.movie_sim_mat
print(‘building co-rated users matrix...‘ file=sys.stderr)
for user movies in self.trainset.items():
for m1 in movies:
for m2 in movies:
if m1 == m2:
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2017-06-10 02:27 MovieLens-RecSys-master\
文件 774 2017-06-10 02:27 MovieLens-RecSys-master\.gitignore
文件 2451 2017-06-10 02:27 MovieLens-RecSys-master\README.md
文件 6452 2017-06-10 02:27 MovieLens-RecSys-master\itemcf.py
文件 6837 2017-06-10 02:27 MovieLens-RecSys-master\usercf.py
- 上一篇:边缘检测和SnakeModel结合的轮廓识别
- 下一篇:ADIsimPLL3.0教程
相关资源
- MySQL Notes For Professionals
- MoNyog8.5+破解补丁
- 全国4级地址库,京东数据
- 协同过滤算法源码
- php程序实现数据库的增删改查
- 302 Found
- Navicat 完整版 (Mac 破解版,亲试成功
- 购物网站商品推荐算法论文
- 数据库大量数据导出Excel
- 基于MVC的网上书城系统
- springmvc_2020.rar
- 学生成绩管理系统带数据库
- 网页版聊天程序--网络程序设计课程大
-
Geekli
nk极客设备运维系统 - 滴滴打车系统数据库实现
- 数据库实验报告.docx
- 基于深度神经网络的用户会话推荐算
- 基于SSH框架的电影票订票系统
- ODBC windows64位驱动
- Navicat_Keygen_Patch_v4.9支持最新版Navica
- sequelpro-1.1.2
- 美团推荐算法实践
- 仓库管理系统 数据库课程设计
- 简易版图书信息管理系统
- 阿里移动推荐算法大赛冠军答辩PPT
- Navicat12全系列激活注册机
- 网上商城(ShoppingMallSystem(UML))设
- 数据库原理课程设计---通讯录系统
- 论坛代码毕业设计
- 基于web的ssh在线音乐系统
评论
共有 条评论