资源简介
推荐算法(MovieLens-RecSys-master)
代码片段和文件信息
#-*- coding: utf-8 -*-
‘‘‘
Created on 2015-06-22
@author: Lockvictor
‘‘‘
import sys
import random
import math
import os
from operator import itemgetter
random.seed(0)
class ItembasedCF(object):
‘‘‘ TopN recommendation - Item based Collaborative Filtering ‘‘‘
def __init__(self):
self.trainset = {}
self.testset = {}
self.n_sim_movie = 20
self.n_rec_movie = 10
self.movie_sim_mat = {}
self.movie_popular = {}
self.movie_count = 0
print(‘Similar movie number = %d‘ % self.n_sim_movie file=sys.stderr)
print(‘Recommended movie number = %d‘ %
self.n_rec_movie file=sys.stderr)
@staticmethod
def loadfile(filename):
‘‘‘ load a file return a generator. ‘‘‘
fp = open(filename ‘r‘)
for i line in enumerate(fp):
yield line.strip(‘\r\n‘)
if i % 100000 == 0:
print (‘loading %s(%s)‘ % (filename i) file=sys.stderr)
fp.close()
print (‘load %s succ‘ % filename file=sys.stderr)
def generate_dataset(self filename pivot=0.7):
‘‘‘ load rating data and split it to training set and test set ‘‘‘
trainset_len = 0
testset_len = 0
for line in self.loadfile(filename):
user movie rating _ = line.split(‘::‘)
# split the data by pivot
if random.random() < pivot:
self.trainset.setdefault(user {})
self.trainset[user][movie] = int(rating)
trainset_len += 1
else:
self.testset.setdefault(user {})
self.testset[user][movie] = int(rating)
testset_len += 1
print (‘split training set and test set succ‘ file=sys.stderr)
print (‘train set = %s‘ % trainset_len file=sys.stderr)
print (‘test set = %s‘ % testset_len file=sys.stderr)
def calc_movie_sim(self):
‘‘‘ calculate movie similarity matrix ‘‘‘
print(‘counting movies number and popularity...‘ file=sys.stderr)
for user movies in self.trainset.items():
for movie in movies:
# count item popularity
if movie not in self.movie_popular:
self.movie_popular[movie] = 0
self.movie_popular[movie] += 1
print(‘count movies number and popularity succ‘ file=sys.stderr)
# save the total number of movies
self.movie_count = len(self.movie_popular)
print(‘total movie number = %d‘ % self.movie_count file=sys.stderr)
# count co-rated users between items
itemsim_mat = self.movie_sim_mat
print(‘building co-rated users matrix...‘ file=sys.stderr)
for user movies in self.trainset.items():
for m1 in movies:
for m2 in movies:
if m1 == m2:
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2017-06-10 02:27 MovieLens-RecSys-master\
文件 774 2017-06-10 02:27 MovieLens-RecSys-master\.gitignore
文件 2451 2017-06-10 02:27 MovieLens-RecSys-master\README.md
文件 6452 2017-06-10 02:27 MovieLens-RecSys-master\itemcf.py
文件 6837 2017-06-10 02:27 MovieLens-RecSys-master\usercf.py
- 上一篇:边缘检测和SnakeModel结合的轮廓识别
- 下一篇:ADIsimPLL3.0教程
相关资源
- PerconaXtraBackup-8.0.11.pdf.zip
- 京东商城数据模型.rar
- Navicat Premium 15 mac版
- 单位点餐系统
- 旅行管理系统—数据库实验二
- Spring-Boot Rest学习
- Spring Boot整合Spring Batch,实现批处理
- 数据库准备信息
- ssm框架实现数据库的增删改查完整代
- Navicat附带密钥
- djangol实现学生管理网站
- 车辆管理系统.zip
- Amazon推荐算法,标题要长,汗
- 简单的数据库查询系统窗口实现
-
Item ba
sed collaborative filtering recommen - 数据结构课程设计学生信息管理系统
- msvcr120.dll 官方
- 学籍数据库
- 与backupdbE.bat配合使用
- 今日头条推荐系统ppt
- 聚类做预处理,基于关联规则推荐算
- 基于RFID的门禁系统
- SSM完整框架
- vue仿拼多多教程
- 空闲自习室管理系统
- shopping online 购物系统论文演示ppt
- 大学生就业信息网源码
- navicat premium 11.2.13 英文版 破解 100% 破
- 基于增量更新的协同过滤推荐算法
- 1z0-882_testquestion
评论
共有 条评论