资源简介
已经封装好因子分解算法,包含简单的模型调参,主要用于二分类、回归问题
代码片段和文件信息
# -*- coding:utf-8 -*-
from sklearn.ensemble import GradientBoostingClassifier
‘‘‘
Author: pany
Create: 2017-12-14
Update: 2017-12-18
Description: factorization machine library
‘‘‘
from pyfm import pylibfm
from sklearn.feature_extraction import DictVectorizer
import numpy as np
import pandas as pd
from sklearn.cross_validation import train_test_split
from sklearn.metrics import roc_auc_scoremean_squared_errorlog_lossaccuracy_score
# train = [
# {“user“: “1“ “item“: “5“ “age“: 19}
# {“user“: “2“ “item“: “43“ “age“: 33}
# {“user“: “3“ “item“: “20“ “age“: 55}
# {“user“: “4“ “item“: “10“ “age“: 20}
# ]
# print train
# v = DictVectorizer()
# X = v.fit_transform(train)
#
# print(X.toarray())
#
# y = np.array([1001])
#
# fm = pylibfm.FM()
# fm.fit(Xy)
# fm.predict(v.transform({“user“: “1“ “item“: “10“ “age“: 24}))
# print fm.predict(v.transform({“user“: “1“ “item“: “10“ “age“: 24}))
class FM:
def __init__(self X_train Y_train task): # data-type is dataframe
self.task = task # task: classificationregression
self.X_train = X_train
self.Y_train = Y_train
self.feat_num = len(X_train.columns)
def __str__(self):
return ‘task: %s feat_num: %s‘ %(self.task self.feat_num)
def get_model(self):
fm = pylibfm.FM()
num_factors = 0
if self.feat_num < 100:
num_factors = self.feat_num/5
elif self.feat_num in range(1011001):
num_factors = self.feat_num/10
else:
num_factors = 100
if self.task == ‘classification‘:
评论
共有 条评论