资源简介
recommendation_engines.py Recommenders.py 推荐系统.ipynb
代码片段和文件信息
# coding: utf-8
# # Load Necessary Dependencies
# In[1]:
import pandas as pd
import numpy as np
import time
import sqlite3
data_home = ‘./‘
# # Load and Process the Datasets
# ### Get more information about the Millionsong project from https://labrosa.ee.columbia.edu/millionsong/
#
# #### Refer to Chapter 10: Section ‘The Million Song Dataset Taste Profile‘ for more details
# ## Load Triplets data [user song play_count]
# #### Get the data from http://labrosa.ee.columbia.edu/millionsong/sites/default/files/challenge/train_triplets.txt.zip
# In[2]:
“““
triplet_dataset = pd.read_csv(filepath_or_buffer=data_home+‘train_triplets.txt‘
nrows=10000sep=‘\t‘ header=None
names=[‘user‘‘song‘‘play_count‘])
# In[3]:
triplet_dataset.head(n=10)
# ## Get User and total play counts
# In[5]:
output_dict = {}
with open(data_home+‘train_triplets.txt‘) as f:
for line_number line in enumerate(f):
user = line.split(‘\t‘)[0]
play_count = int(line.split(‘\t‘)[2])
if user in output_dict:
play_count +=output_dict[user]
output_dict.update({user:play_count})
output_dict.update({user:play_count})
output_list = [{‘user‘:k‘play_count‘:v} for kv in output_dict.items()]
play_count_df = pd.Dataframe(output_list)
play_count_df = play_count_df.sort_values(by = ‘play_count‘ ascending = False)
# In[ ]:
play_count_df.to_csv(path_or_buf=‘user_playcount_df.csv‘ index = False)
# ## Get Song and total play counts
# In[7]:
output_dict = {}
with open(data_home+‘train_triplets.txt‘) as f:
for line_number line in enumerate(f):
song = line.split(‘\t‘)[1]
play_count = int(line.split(‘\t‘)[2])
if song in output_dict:
play_count +=output_dict[song]
output_dict.update({song:play_count})
output_dict.update({song:play_count})
output_list = [{‘song‘:k‘play_count‘:v} for kv in output_dict.items()]
song_count_df = pd.Dataframe(output_list)
song_count_df = song_count_df.sort_values(by = ‘play_count‘ ascending = False)
# In[ ]:
song_count_df.to_csv(path_or_buf=‘song_playcount_df.csv‘ index = False)
# ## View top users and songs
# In[14]:
“““
play_count_df = pd.read_csv(filepath_or_buffer=‘user_playcount_df.csv‘)
play_count_df.head(n =10)
# In[15]:
song_count_df = pd.read_csv(filepath_or_buffer=‘song_playcount_df.csv‘)
song_count_df.head(10)
# ## Subsetting the data
# In[15]:
total_play_count = sum(song_count_df.play_count)
(float(play_count_df.head(n=100000).play_count.sum())/total_play_count)*100
play_count_subset = play_count_df.head(n=100000)
# In[17]:
(float(song_count_df.head(n=30000).play_count.sum())/total_play_count)*100
# In[18]:
song_count_subset = song_count_df.head(n=30000)
# In[19]:
user_subset = list(play_count_subset.user)
song_subset = list(song_count_subset.song)
# In[20]:
triplet_dataset = pd.read_csv(filepath_or_buffer=data_h
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
文件 13987 2018-02-27 11:42 recommendation_engines.py
文件 353123 2018-03-01 15:32 推荐系统.ipynb
文件 9456 2018-02-27 11:42 Recommenders.py
- 上一篇:Demo Voltage Read.vi
- 下一篇:labvIEW英文文献
评论
共有 条评论