资源简介

Iterative Bagging和MultiBoosting 算法实现。并且比较了bias和variance的值。

资源截图

代码片段和文件信息

#housing data
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
import numpy as np
from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import AdaBoostRegressor
import random as rd

data0=np.loadtxt(“./ML_work1/housing.data“)
data=data0[::13]
labels=data0[:-1]

train_Xtest_X train_y test_y = train_test_split(data labels test_size=0.2)

def r_square(y_test y_test_result):
    sstot = np.sum((y_test - y_test.mean())**2)
    ssres = np.sum((y_test - y_test_result)**2)
    return 1 - ssres / sstot


#multiboosting
MBO=BaggingRegressor(AdaBoostRegressor(base_estimator=DecisionTreeRegressor()n_estimators=20))
MBO.fit(train_Xtrain_y)
MBO_pre=MBO.predict(test_X)
MBO_R=r_square(test_yMBO_pre)
print(‘MBO mse: %f‘ % np.mean((test_y-MBO_pre)**2))
print(‘MBO R^2: %f‘% MBO_R)

#AdaBoost
ADB=AdaBoostRegressor(base_estimator=DecisionTreeRegressor()n_estimators=20)
ADB.fit(train_Xtrain_y)
ADB_pre=ADB.predict(test_X)
print(‘ADB mse:%f‘ % np.mean((test_y-ADB_pre)**2))
print(‘ADB R^2:%f‘% r_square(test_yADB_pre))

#Bagging
BAG=BaggingRegressor(base_estimator=DecisionTreeRegressor()n_estimators=20)
BAG.fit(train_Xtrain_y)
BAG_pre=BAG.predict(test_X)
print(‘BAG mse:%f‘ % np.mean((test_y-BAG_pre)**2))
print(‘BAG R^2:%f‘% r_square(test_yBAG_pre))

#Iterative Bagging
def boostrap(X Y):
    idx = np.random.randint(low=0 high=X.shape[0] size=X.shape[0])
    train_X train_y = X[idx] Y[idx]
    all_idx = [i for i in range(0 X.shape[0])]
    B_idx = list(set(all_idx).difference(set(idx)))
    test_X test_y = X[np.array(B_idx)] Y[np.array(B_idx)]
    return train_X train_y test_X test_y B_idx


# iterative bagging
def ITB_fit(train_Xtrain_y):
    K = 20
    iter_est = []
    threshold = 1e10
    Y = np.zeros(train_y.shape)
    for i in range(train_y.shape[0]):
        Y[i] = train_y[i]
    n = 0
    while 1:
        y = np.zeros(train_y.shape)
        y_count = np.zeros(train_y.shape)
        y_new = np.zeros(train_y.shape)
        # bagging
        for i in range(K):
            X_A y_A X_B y_B B_idx = boostrap(train_X Y)
            clf = DecisionTreeRegressor()
            clf.fit(X_A y_A)
            iter_est.append(clf)
            # y_res=Y-clf.p

评论

共有 条评论