资源简介

pycaret数据挖掘实践

资源截图

代码片段和文件信息

from pycaret.classification import *
import pandas as pd
import time

train_input = ‘hotel_bookings_train.csv‘
hotel_code={‘City Hotel‘:1‘Resort Hotel‘:2}
months={‘January‘:1‘February‘:2‘March‘:3‘April‘:4‘May‘:5‘June‘:6‘July‘:7‘August‘:8‘September‘:9‘October‘:10‘November‘:11‘December‘:12}

def get_data(name = ‘train_x‘):
    data_train=pd.read_csv(train_input header=0 names=[‘id‘‘hotel‘‘is_canceled‘‘lead_time‘‘arrival_date_year‘‘arrival_date_month‘‘arrival_date_week_number‘‘arrival_date_day_of_month‘‘stays_in_weekend_nights‘‘stays_in_week_nights‘‘adults‘‘children‘‘babies‘‘meal‘‘country‘‘market_segment‘‘distribution_channel‘‘is_repeated_guest‘‘previous_cancellations‘‘previous_bookings_not_canceled‘‘reserved_room_type‘‘assigned_room_type‘‘booking_changes‘‘deposit_type‘‘agent‘‘company‘‘days_in_waiting_list‘‘customer_type‘‘adr‘‘required_car_parking_spaces‘‘total_of_special_requests‘‘reservation_status_date‘])
    data_train[‘hotel_code‘] = data_train[‘hotel‘].map(hotel_code)
    data_train[‘month_code‘] = data_train[‘arrival_date_month‘].map(months)
    data_train = data_train.drop( [‘children‘‘agent‘‘hotel‘‘arrival_date_month‘‘meal‘‘country‘‘market_segment‘‘distribution_channel‘‘is_repeated_guest‘‘reserved_room_type‘‘assigned_room_type‘‘deposit_type‘‘company‘‘customer_type‘‘adr‘‘required_car_parking_spaces‘‘total_of_special_requests‘‘reservation_status_date‘] axis=1)
    data_train.fillna(0)

#    if name == ‘train_x‘:
#        data_train = data_train.drop( [‘is_canceled‘] axis=1)
#        return data_train
#    elif name == ‘train_y‘:
#        return data_train[‘is_canceled‘]
    return data_train

def get_testdata(name):
    data_t=pd.read_csv(test_input header=0 names=[‘id‘‘hotel‘‘lead_time‘‘arrival_date_year‘‘arrival_date_month‘‘arrival_date_week_number‘‘arrival_date_day_of_month‘‘stays_in_weekend_nights‘‘stays_in_week_nights‘‘adults‘‘children‘‘babies‘‘meal‘‘country‘‘m

评论

共有 条评论