• 大小: 17.67MB
    文件类型: .7z
    金币: 2
    下载: 1 次
    发布日期: 2023-07-03
  • 语言: Python
  • 标签: 机器学习  

资源简介

亲自已测可以直接使用安装的LightGBM包,希望帮助不想安装vs的小伙伴们安装python包/

资源截图

代码片段和文件信息

# coding: utf-8
# pylint: disable = invalid-name C0111
import lightgbm as lgb
import pandas as pd
import numpy as np

# load or create your dataset
print(‘Load data...‘)
df_train = pd.read_csv(‘../binary_classification/binary.train‘ header=None sep=‘\t‘)
df_test = pd.read_csv(‘../binary_classification/binary.test‘ header=None sep=‘\t‘)
W_train = pd.read_csv(‘../binary_classification/binary.train.weight‘ header=None)[0]
W_test = pd.read_csv(‘../binary_classification/binary.test.weight‘ header=None)[0]

y_train = df_train[0].values
y_test = df_test[0].values
X_train = df_train.drop(0 axis=1).values
X_test = df_test.drop(0 axis=1).values

num_train num_feature = X_train.shape

# create dataset for lightgbm
# if you want to re-use data remember to set free_raw_data=False
lgb_train = lgb.Dataset(X_train y_train
                        weight=W_train free_raw_data=False)
lgb_eval = lgb.Dataset(X_test y_test reference=lgb_train
                       weight=W_test free_raw_data=False)

# specify your configurations as a dict
params = {
    ‘boosting_type‘: ‘gbdt‘
    ‘objective‘: ‘binary‘
    ‘metric‘: ‘binary_logloss‘
    ‘num_leaves‘: 31
    ‘learning_rate‘: 0.05
    ‘feature_fraction‘: 0.9
    ‘bagging_fraction‘: 0.8
    ‘bagging_freq‘: 5
    ‘verbose‘: 0
}

# generate a feature name
feature_name = [‘feature_‘ + str(col) for col in range(num_feature)]

print(‘Start training...‘)
# feature_name and categorical_feature
gbm = lgb.train(params
                lgb_train
                num_boost_round=10
                valid_sets=lgb_train  # eval training data
                feature_name=feature_name
                categorical_feature=[21])

# check feature name
print(‘Finish first 10 rounds...‘)
print(‘7th feature name is:‘ repr(lgb_train.feature_name[6]))

# save model to file
gbm.save_model(‘model.txt‘)

# continue training
# init_model accepts:
# 1. model file name
# 2. Booster()
gbm = lgb.train(params
                lgb_train
                num_boost_round=10
                init_model=‘model.txt‘
                valid_sets=lgb_eval)

print(‘Finish 10 - 20 rounds with model file...‘)

# decay learning rates
# learning_rates accepts:
# 1. list/tuple with length = num_boost_round
# 2. function(curr_iter)
gbm = lgb.train(params
                lgb_train
                num_boost_round=10
                init_model=gbm
                learning_rates=lambda iter: 0.05 * (0.99 ** iter)
                valid_sets=lgb_eval)

print(‘Finish 20 - 30 rounds with decay learning rates...‘)

# change other parameters during training
gbm = lgb.train(params
                lgb_train
                num_boost_round=10
                init_model=gbm
                valid_sets=lgb_eval
                callbacks=[lgb.reset_parameter(bagging_fraction=[0.7] * 5 + [0.6] * 5)])

print(‘Finish 30 - 40 rounds with changing bagging_frac

评论

共有 条评论