资源简介
强化学习中Qlearning的简单实现,维度为1,通过Qlearning自主找到最优的Q表
代码片段和文件信息
import numpy as np
import pandas as pd
import time
N_STATES = 6 #1维世界的宽度
ACTIONS = [‘left‘‘right‘] #动作
EPSILON = 0.9 #贪婪都
ALPHA = 0.1 #学习率
GAMMA = 0.9 #奖励递减值
MAX_ROUND = 13 #最大回合数
FRESH_TIME = 0.01 #移动间隔时间
def build_q_table(n_statesactions):
table = pd.Dataframe(np.zeros((n_stateslen(actions)))columns=actions)
return table
def choose_action(stateq_table):
state_actions = q_table.iloc[state:]
if(np.random.uniform()>EPSILON) or (state_actions.all()==0):
action_name = np.random.choice(ACTIONS)
else:
action_name = state_actions.argmax()
return action_name
def get_env_feedback(SA):
if A == ‘right‘:
if S == N_STATES -2:
S_ = ‘terminal‘
R = 1
else:
S_ =
评论
共有 条评论