资源简介
资源包括某超市四个月的各大中小类的销售记录,代码利用星期特征,去除噪声,用xgboost进行预测,代码中包含了一些基础的分析方法,可供新手参考。

代码片段和文件信息
# coding=utf-8 ##以utf-8编码储存中文字符
from sklearn.grid_search import GridSearchCV #Perforing grid search
import pandas as pd
from sklearn.metrics import mean_squared_error
import numpy as np
from scipy.stats import mode
import warnings
from sklearn.cross_validation import train_test_split
import xgboost as xgb
from xgboost.sklearn import XGBClassifier
import operator
import matplotlib.pyplot as plt
import matplotlib
# 根据历史的每天销量预测未来的销量,之前稍微统计,每个月的总量基本是不变的。
# 多少人曾爱慕你年轻的容颜,可是谁有能承受岁月无情的变迁
# 效果其实和求均值差不多
warnings.filterwarnings(‘ignore‘)
def mode_function(df):
df = df.astype(int)
counts = mode(df)
return counts[0][0]
def score(y_testy_pred):
print(y_test)
print(y_pred)
return 1.0 / (1.0 + np.sqrt(mean_squared_error(y_test y_pred)))
train = pd.read_csv(‘input/train.csv‘)
print(‘训练集总数‘train.shape)
print(‘字段‘train.columns)
sub = pd.read_csv(‘input/result.csv‘)
sub[‘SaleM‘] = sub[‘日期‘].map(lambda x:int(str(x)[5:6]))
sub[‘SaleD‘] = sub[‘日期‘].map(lambda x:int(str(x)[6:]))
#sub[‘销售日期_D‘] = sub[‘日期‘].map(lambda x:str(x)[-2:])
sub[‘SaleD‘] = sub[‘SaleD‘] .astype(int)
sub[‘SaleM‘] = sub[‘SaleM‘] .astype(int)
result = sub.copy()
#print(sub)
sub_train = sub[[‘编码‘ ‘SaleM‘ ‘SaleD‘ ‘销量‘ ‘日期‘]]
#print(“sub_train:“)
#print(sub_train)
train[‘SaleM‘] = train[‘销售日期‘].map(lambda x:int(str(x)[5:6]))
train[‘SaleD‘] = train[‘销售日期‘].map(lambda x:int(str(x)[6:]))
train[‘SaleD‘] = train[‘SaleD‘] .astype(int)
train[‘SaleM‘] = train[‘SaleM‘] .astype(int)
train.drop([‘大类名称‘ ‘中类名称‘ ‘小类名称‘ ‘销售月份‘ ‘商品编码‘ ‘小类编码‘ ‘单位‘ ‘销售日期‘ ‘规格型号‘ ‘custid‘ ‘商品类型‘ ‘商品单价‘ ‘销售金额‘ ‘销售数量‘ ‘是否促销‘]axis=1inplace=True)
def cal_w(row):
if row.SaleM == 1:
return (row.SaleD + 3) % 7
elif row.SaleM == 2:
return (row.SaleD + 6) % 7
elif row.SaleM == 3:
return (row.SaleD + 6) % 7
elif row.SaleM == 4:
return (row.SaleD + 2) % 7
elif row.SaleM == 5:
return (row.SaleD + 4) % 7
train[‘SaleW‘] = train.apply(cal_w axis=1)
train[‘SaleW‘] = train[‘SaleW‘].astype(int)
def cal_wn(row):
if row.SaleM == 1:
return (row.SaleD + 2) / 7 + 1
elif row.SaleM == 2:
return (row.SaleD + 5) / 7 + 5
elif row.SaleM == 3:
return (row.SaleD + 5) / 7 + 9
elif row.SaleM == 4:
return (row.SaleD + 1) / 7 + 14
elif row.SaleM == 5:
return (row.SaleD + 3) / 7 + 18
train[‘SaleWn‘] = train.apply(cal_wn axis=1)
train[‘SaleWn‘] = train[‘SaleWn‘].astype(int)
sub_train[‘SaleW‘] = sub_train.apply(cal_w axis=1)
sub_train[‘SaleW‘] = sub_train[‘SaleW‘].astype(int)
sub_train[‘SaleWn‘] = sub_train.apply(cal_wn axis=1)
sub_train[‘SaleWn‘] = sub_train[‘SaleWn‘].astype(int)
#sub_train[‘SaleWn‘] = sub_train.apply(cal_wn axis=1)
#sub_train[‘SaleWn‘] = sub_train[‘SaleWn‘].astype(int)
#sub_train.drop([‘SaleM‘ ‘SaleD‘] axis=1 inplace=True)
sub_train.drop([‘日期‘]axis=1inplace=True)
train[‘is_buy‘
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
文件 74662 2017-10-23 19:05 input\result.csv
文件 5040446 2017-10-23 18:54 input\train.csv
文件 8059 2017-11-16 20:22 xgboost_predict.py
目录 0 2017-10-29 20:47 input\
相关资源
- 二级考试python试题12套(包括选择题和
- pywin32_python3.6_64位
- python+ selenium教程
- PycURL(Windows7/Win32)Python2.7安装包 P
- 英文原版-Scientific Computing with Python
- 7.图像风格迁移 基于深度学习 pyt
- 基于Python的学生管理系统
- A Byte of Python(简明Python教程)(第
- Python实例174946
- Python 人脸识别
- Python 人事管理系统
- 基于python-flask的个人博客系统
- 计算机视觉应用开发流程
- python 调用sftp断点续传文件
- python socket游戏
- 基于Python爬虫爬取天气预报信息
- python函数编程和讲解
- Python开发的个人博客
- 基于python的三层神经网络模型搭建
- python实现自动操作windows应用
- python人脸识别(opencv)
- python 绘图(方形、线条、圆形)
- python疫情卡UN管控
- python 连连看小游戏源码
- 基于PyQt5的视频播放器设计
- 一个简单的python爬虫
- csv文件行列转换python实现代码
- Python操作Mysql教程手册
- Python Machine Learning Case Studies
- python获取硬件信息
评论
共有 条评论