资源简介
资源包括某超市四个月的各大中小类的销售记录,代码利用星期特征,去除噪声,用xgboost进行预测,代码中包含了一些基础的分析方法,可供新手参考。
代码片段和文件信息
# coding=utf-8 ##以utf-8编码储存中文字符
from sklearn.grid_search import GridSearchCV #Perforing grid search
import pandas as pd
from sklearn.metrics import mean_squared_error
import numpy as np
from scipy.stats import mode
import warnings
from sklearn.cross_validation import train_test_split
import xgboost as xgb
from xgboost.sklearn import XGBClassifier
import operator
import matplotlib.pyplot as plt
import matplotlib
# 根据历史的每天销量预测未来的销量,之前稍微统计,每个月的总量基本是不变的。
# 多少人曾爱慕你年轻的容颜,可是谁有能承受岁月无情的变迁
# 效果其实和求均值差不多
warnings.filterwarnings(‘ignore‘)
def mode_function(df):
df = df.astype(int)
counts = mode(df)
return counts[0][0]
def score(y_testy_pred):
print(y_test)
print(y_pred)
return 1.0 / (1.0 + np.sqrt(mean_squared_error(y_test y_pred)))
train = pd.read_csv(‘input/train.csv‘)
print(‘训练集总数‘train.shape)
print(‘字段‘train.columns)
sub = pd.read_csv(‘input/result.csv‘)
sub[‘SaleM‘] = sub[‘日期‘].map(lambda x:int(str(x)[5:6]))
sub[‘SaleD‘] = sub[‘日期‘].map(lambda x:int(str(x)[6:]))
#sub[‘销售日期_D‘] = sub[‘日期‘].map(lambda x:str(x)[-2:])
sub[‘SaleD‘] = sub[‘SaleD‘] .astype(int)
sub[‘SaleM‘] = sub[‘SaleM‘] .astype(int)
result = sub.copy()
#print(sub)
sub_train = sub[[‘编码‘ ‘SaleM‘ ‘SaleD‘ ‘销量‘ ‘日期‘]]
#print(“sub_train:“)
#print(sub_train)
train[‘SaleM‘] = train[‘销售日期‘].map(lambda x:int(str(x)[5:6]))
train[‘SaleD‘] = train[‘销售日期‘].map(lambda x:int(str(x)[6:]))
train[‘SaleD‘] = train[‘SaleD‘] .astype(int)
train[‘SaleM‘] = train[‘SaleM‘] .astype(int)
train.drop([‘大类名称‘ ‘中类名称‘ ‘小类名称‘ ‘销售月份‘ ‘商品编码‘ ‘小类编码‘ ‘单位‘ ‘销售日期‘ ‘规格型号‘ ‘custid‘ ‘商品类型‘ ‘商品单价‘ ‘销售金额‘ ‘销售数量‘ ‘是否促销‘]axis=1inplace=True)
def cal_w(row):
if row.SaleM == 1:
return (row.SaleD + 3) % 7
elif row.SaleM == 2:
return (row.SaleD + 6) % 7
elif row.SaleM == 3:
return (row.SaleD + 6) % 7
elif row.SaleM == 4:
return (row.SaleD + 2) % 7
elif row.SaleM == 5:
return (row.SaleD + 4) % 7
train[‘SaleW‘] = train.apply(cal_w axis=1)
train[‘SaleW‘] = train[‘SaleW‘].astype(int)
def cal_wn(row):
if row.SaleM == 1:
return (row.SaleD + 2) / 7 + 1
elif row.SaleM == 2:
return (row.SaleD + 5) / 7 + 5
elif row.SaleM == 3:
return (row.SaleD + 5) / 7 + 9
elif row.SaleM == 4:
return (row.SaleD + 1) / 7 + 14
elif row.SaleM == 5:
return (row.SaleD + 3) / 7 + 18
train[‘SaleWn‘] = train.apply(cal_wn axis=1)
train[‘SaleWn‘] = train[‘SaleWn‘].astype(int)
sub_train[‘SaleW‘] = sub_train.apply(cal_w axis=1)
sub_train[‘SaleW‘] = sub_train[‘SaleW‘].astype(int)
sub_train[‘SaleWn‘] = sub_train.apply(cal_wn axis=1)
sub_train[‘SaleWn‘] = sub_train[‘SaleWn‘].astype(int)
#sub_train[‘SaleWn‘] = sub_train.apply(cal_wn axis=1)
#sub_train[‘SaleWn‘] = sub_train[‘SaleWn‘].astype(int)
#sub_train.drop([‘SaleM‘ ‘SaleD‘] axis=1 inplace=True)
sub_train.drop([‘日期‘]axis=1inplace=True)
train[‘is_buy‘
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
文件 74662 2017-10-23 19:05 input\result.csv
文件 5040446 2017-10-23 18:54 input\train.csv
文件 8059 2017-11-16 20:22 xgboost_predict.py
目录 0 2017-10-29 20:47 input\
相关资源
- python实现SGBM图像匹配算法
- python实现灰度直方图均衡化
- scrapy_qunar_one
- Python学习全系列教程永久可用
- python简明教程.chm
- 抽奖大转盘python的图形化界面
- 双边滤波器实验报告及代码python
- python +MYSQL+HTML实现21蛋糕网上商城
- Python-直播答题助手自动检测出题搜索
- OpenCV入门教程+OpenCV官方教程中文版
- Python 串口工具源码+.exe文件
- Python开发的全栈股票系统.zip
- Python操作Excel表格并将其中部分数据写
- python书籍 PDF
- 利用python绘制散点图
- python+labview+No1.vi
- 老男孩python项目实战
- python源码制作whl文件.rar
- python3.5可用的scipy
- PYTHON3 经典50案例.pptx
- 计算机科学导论-python.pdf
- python模拟鼠标点击屏幕
- windows鼠标自动点击py脚本
- 鱼c小甲鱼零基础学python全套课后题和
- Python 练习题100道
- Practical Programming 2nd Edition
- wxPython Application Development Cookbook
- python 3.6
- Python 3.5.2 中文文档 互联网唯一CHM版本
- python3.5.2.chm官方文档
评论
共有 条评论