资源简介
推荐系统SVD实现代码,python实现
代码片段和文件信息
#Ver1.0
#Zero @2012.5.2
#
import math
import random
import cPickle as pickle
#calculate the overall average
def Average(fileName):
fi = open(fileName ‘r‘)
result = 0.0
cnt = 0
for line in fi:
cnt += 1
arr = line.split()
result += int(arr[2].strip())
return result / cnt
def InerProduct(v1 v2):
result = 0
for i in range(len(v1)):
result += v1[i] * v2[i]
return result
def PredictScore(av bu bi pu qi):
pScore = av + bu + bi + InerProduct(pu qi)
if pScore < 1:
pScore = 1
elif pScore > 5:
pScore = 5
return pScore
def SVD(configureFile testDataFile trainDataFile modelSaveFile):
#get the configure
fi = open(configureFile ‘r‘)
line = fi.readline()
arr = line.split()
averageScore = float(arr[0].strip())
userNum = int(arr[1].strip())
itemNum = int(arr[2].strip())
factorNum = int(arr[3].strip())
learnRate = float(arr[4].strip())
regularization = float(arr[5].strip())
fi.close()
bi = [0.0 for i in range(itemNum)]
bu = [0.0 for i in range(userNum)]
temp = math.sqrt(factorNum)
qi = [[(0.1 * random.random() / temp) for j in range(factorNum)] for i in range(itemNum)]
pu = [[(0.1 * random.random() / temp) for j in range(factorNum)] for i in range(userNum)]
print(“initialization end\nstart training\n“)
#train model
preRmse = 1000000.0
for step in range(100):
fi = open(trainDataFile ‘r‘)
for line in fi:
arr = line.split()
uid = int(arr[0].strip()) - 1
iid = int(arr[1].strip()) - 1
score = int(arr[2].strip())
prediction = PredictScore(averageScore bu[uid] bi[iid] pu[uid] qi[iid])
eui = score - prediction
#update parameters
bu[uid] += learnRate * (eui - regularization * bu[uid])
bi[iid] += learnRate * (eui - regularization * bi[iid])
for k in range(factorNum):
temp = pu[uid][k] #attention here must save the value of pu before updating
pu[uid][k] += learnRate * (eui * qi[iid][k] - regularization * pu[uid][k])
qi[iid][k] += learnRate * (eui * temp - regularization * qi[iid][k])
fi.close()
#learnRate *= 0.9
curRmse = Validate(testDataFile averageScore bu bi pu qi)
print(“test_RMSE in step %d: %f“ %(step curRmse))
if curRmse >= preRmse:
break
else:
preRmse = curRmse
#write the model to files
fo = file(modelSaveFile ‘wb‘)
pickle.dump(bu fo True)
pickle.dump(bi fo True)
pickle.dump(qi fo True)
pickle.dump(pu fo True)
fo.close()
print(“model generation over“)
#validate the model
def Validate(testDataFile av bu bi pu qi):
cnt = 0
rmse = 0.0
fi = open(testDataFile ‘r‘)
for line in fi:
cnt += 1
arr = line.split()
uid = int(arr[0].strip()) - 1
iid = int(arr[1].strip()) - 1
pScore = PredictScore(av bu[uid] bi[iid] pu[uid] qi[iid])
tScore = int(arr[2].strip())
rmse += (tScore - pScore) * (tScore - pScore)
fi.close()
return math.sqrt(rmse / cnt)
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
文件 235016 2011-04-01 18:24 SVD\ml_data\test.txt
文件 12839563 2011-04-01 18:24 SVD\ml_data\training.txt
文件 97 2012-05-03 14:55 SVD\svd.conf
文件 4153 2012-05-06 20:29 SVD\SVD.py
目录 0 2012-05-06 20:31 SVD\ml_data
目录 0 2012-05-06 20:31 SVD
----------- --------- ---------- ----- ----
13078829 6
- 上一篇:python爬取维基百科程序语言消息盒(源码及截图)
- 下一篇:PythonTank
相关资源
- PythonTank
- python爬取维基百科程序语言消息盒(
- easygui-0.96
- python坦克大战分步骤源码及素材
- django+mysql家具购物网站,包含部署教
- python新浪微博爬虫
- Python3.5.2的IDLE汉化版计算机等级考试
- 12306爬虫实现
- Python求解数独并输出求解过程
- python程序设计基础课件
- 西电python网络处理上机题答案
- VMD变分模态分解算法
- Python网络编程 3版 高清扫描版 完整中
- 使用python编写的打飞机游戏源码
- 计算24点python
- 扑克小游戏python代码
- Python黑客攻防入门.pdf
- 《Python语言程序设计基础第二版PDF+课
- Python-PySimpleGUI一个建立在tkinter之上简
- python3.5 百度ai人脸识别
- python编程从入门到实践的案例和动手
- 编译原理词法分析器、语法分析器p
- python3实现的国密SM2+SM3
- 50G金融资料包python源码包
- Python爬虫相关书籍.zip
- Windows64下通过python调用海康SDK实现登
- Python-使用DeepFakes实现YouTube视频自动换
- python_web实战-源码
- Python-100-Days-master.zip
- python与量化投资-从基础到实战 配套资
评论
共有 条评论