资源简介
机器学习算法XGboost、LightGBM、Catboost的代码架构,满足基本的数据分析,回归、二分类、多分类。
代码片段和文件信息
import pandas as pd
import numpy as np
import scipy as sp
#文件读取f表示文件路径文件名
def red_csv_file(flogging = False):
print(“=================读 取 文 件===================“)
data = pd.read_csv(f)
if loggong:
print(data.head(5))
print(data.columns.values)
print(data.describe())
print(data.info())
return data
#通用的LogisticRegression框架
import pandas as pd
import numpy as np
from scipy import sparse
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandarScaler
#1.读入数据
df_train = pd.Dataframe()
df_test = pd.Dataframe()
y_train = df_train[‘label‘].vslues
#2.处理数据
ss = StandardScaler()
#3.特征处理/重编码
#3.1 对分类的变量
enc = OneHotEncoder()
feats = [“creativeID““adID““campaignID“]
for i feat in enumerate(feats):
x_train = enc.fit_transform(df_train[feat].values.reshape(-11))
x_test = enc.fit_transform(df_test[feat].values.reshape(-11))
if i == 0:
X_train X_test = x_train x_test
else:
X_train X_test = sparse.hstack((X_train x_train)) sparse.hstack((X_test x_test))
#3.2 对数值变量
#对于StandarScalar必须是而分类变量,否则reshape(-1 len(feats)) is required
feats = [“price“ “age“]
x_train = ss.fit_transform(df_train[feats].values)
x_test = ss.fit_transform(df_test[feats].values)
ss.fit_transform(df_test[feats].values)
X_train X_test = sparse.hstack((X_train x_train)) sparse.hstack((X_test x_test))
#模型训练
lr = LogisticRegression()
lr.fit(X_train y_train)
proba_test = lr.predict_predict_proba(X_test)[:1]
#LightGBM二分类
import lightgbm as lgb
import pandas as pd
import numpy as np
import pickle
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
print(“Loading Data ... “)
#导入数据
train_x train_y test_x = load_data()
# 用sklearn.cross_validation进行训练数据集划分,这里训练集和交叉验证集比例为7:3,可以自己根据需要设置
X val_X y val_y = train_test_split(
train_x
train_y
test_size=0.05
random_state=1
stratify=train_y ## 这里保证分割后y的比例分布与原数据一致
)
X_train = X
y_train = y
X_test = val_X
y_test = val_y
#创建LightGBM的数据集
lgb_train = lgb.Dataset(X_train y_train)
lgb_eval = lgb.Dataset(X_test y_test reference=lgb_train)
# specify your configurations as a dict
params = {
‘boosting_type‘: ‘gbdt‘
‘objective‘: ‘binary‘
‘metric‘: {‘binary_logloss‘ ‘auc‘}
‘num_leaves‘: 5
‘max_depth‘: 6
‘min_data_in_leaf‘: 450
‘learning_rate‘: 0.1
‘feature_fraction‘: 0.9
‘bagging_fraction‘: 0.95
‘bagging_freq‘: 5
‘lambda_l1‘: 1
‘lambda_l2‘: 0.001 # 越小l2正则程度越高
‘min_gain_to_split‘: 0.2
‘verbose‘: 5
‘is_unbalance‘: True
}
# train
print(‘Start training...‘)
gbm = lgb.train(params
lgb_train
num_boost_round=10000
valid_sets=lgb_eval
early_stopping_round
相关资源
- python实现SGBM图像匹配算法
- python实现灰度直方图均衡化
- scrapy_qunar_one
- Python学习全系列教程永久可用
- python简明教程.chm
- 抽奖大转盘python的图形化界面
- 双边滤波器实验报告及代码python
- python +MYSQL+HTML实现21蛋糕网上商城
- Python-直播答题助手自动检测出题搜索
- OpenCV入门教程+OpenCV官方教程中文版
- Python 串口工具源码+.exe文件
- Python开发的全栈股票系统.zip
- Python操作Excel表格并将其中部分数据写
- python书籍 PDF
- 利用python绘制散点图
- python+labview+No1.vi
- 老男孩python项目实战
- python源码制作whl文件.rar
- python3.5可用的scipy
- PYTHON3 经典50案例.pptx
- 计算机科学导论-python.pdf
- python模拟鼠标点击屏幕
- windows鼠标自动点击py脚本
- 鱼c小甲鱼零基础学python全套课后题和
- Python 练习题100道
- Practical Programming 2nd Edition
- wxPython Application Development Cookbook
- python 3.6
- Python 3.5.2 中文文档 互联网唯一CHM版本
- python3.5.2.chm官方文档
评论
共有 条评论