Python-60DaysRLChallenge中文版强化学习60天

大小: 1.52MB

文件类型: .zip

金币: 2

下载: 0 次

发布日期: 2023-08-30
语言: Python
标签:

高速下载

资源简介

我为了你我设计这个挑战：在这60天里深入学习“深度强化学习”。

资源截图

小图大图

代码片段和文件信息

import gym
import random
from collections import namedtuple
import collections
import numpy as np
import matplotlib.pyplot as plt


def select_eps_greedy_action（table obs n_actions）:
	‘‘‘
	Select the action using a ε-greedy policy （add a randomness ε for the choice of the action）
	‘‘‘
	value action = best_action_value（table obs）
	
	if random.random（） < epsilon:
		return random.randint（0 n_actions - 1）
	else:
		return action


def select_greedy_action（table obs n_actions）:
	‘‘‘
	Select the action using a greedy policy （take the best action according to the policy）
	‘‘‘
	value action = best_action_value（table obs）
	return action


def best_action_value（table state）:
	‘‘‘
	Exploring the table take the best action that maximize Q（sa）
	‘‘‘
	best_action = 0
	max_value = 0
	for action in range（n_actions）:
		if table[（state action）] > max_value:
			best_action = action
			max_value = table[（state action）]
	
	return max_value best_action


def Q_learning（table obs0 obs1 reward action）:
	‘‘‘
	Q-learning. Update Q（obs0action） according to Q（obs1*） and the reward just obtained
	‘‘‘
	
	# Take the best value reachable from the state obs1
	best_value _ = best_action_value（table obs1）
	
	# Calculate Q-target value
	Q_target = reward + GAMMA * best_value
	
	# Calculate the Q-error between the target and the previous value
	Q_error = Q_target - table[（obs0 action）]
	
	# Update Q（obs0action）
	table[（obs0 action）] += LEARNING_RATE * Q_error


def test_game（env table n_actions）:
	‘‘‘
	Test the new table playing TEST_EPISODES games
	‘‘‘
	reward_games = []
	for _ in range（TEST_EPISODES）:
		obs = env.reset（）
		rewards = 0
		while True:
			# Act greedly
			next_obs reward done _ = env.step（select_greedy_action（table obs n_actions））
			obs = next_obs
			rewards += reward
			
			if done:
				reward_games.append（rewards）
				break
	
	return np.mean（reward_games）


# Some hyperparameters..
GAMMA = 0.95

# NB: the decay rate allow to regulate the Exploration - Exploitation trade-off
#     start with a EPSILON of 1 and decay until reach 0
epsilon = 1.0
EPS_DECAY_RATE = 0.9993

LEARNING_RATE = 0.8

# .. and constants
TEST_EPISODES = 100
MAX_GAMES = 15001

# Create the environment
# env = gym.make（‘Taxi-v2‘）
env = gym.make（“FrozenLake-v0“）
obs = env.reset（）

obs_length = env.observation_space.n
n_actions = env.action_space.n

reward_count = 0
games_count = 0

# Create and initialize the table with 0.0
table = collections.defaultdict（float）

test_rewards_list = []

while games_count < MAX_GAMES:
	
	# Select the action following an ε-greedy policy
	action = select_eps_greedy_action（table obs n_actions）
	next_obs reward done _ = env.step（action）
	
	# Update the Q-table
	Q_learning（table obs next_obs reward action）
	
	reward_count += reward
	obs = next_obs
	
	if done:
		epsilon *= EPS_DECAY_RATE
		
		# Test the new ta

属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----
     目录           0  2018-09-15 09:48  60_Days_RL_Challenge-master\
     文件        1203  2018-09-15 09:48  60_Days_RL_Challenge-master\.gitignore
     文件        1075  2018-09-15 09:48  60_Days_RL_Challenge-master\LICENSE
     文件        7868  2018-09-15 09:48  60_Days_RL_Challenge-master\README.md
     目录           0  2018-09-15 09:48  60_Days_RL_Challenge-master\Week2\
     文件       47757  2018-09-15 09:48  60_Days_RL_Challenge-master\Week2\frozenlake_Qlearning.ipynb
     文件        3465  2018-09-15 09:48  60_Days_RL_Challenge-master\Week2\frozenlake_Qlearning.py
     目录           0  2018-09-15 09:48  60_Days_RL_Challenge-master\Week2\img\
     文件       60162  2018-09-15 09:48  60_Days_RL_Challenge-master\Week2\img\Q_function.png
     文件       88665  2018-09-15 09:48  60_Days_RL_Challenge-master\Week2\img\frozenlake_v0.png
     文件       19080  2018-09-15 09:48  60_Days_RL_Challenge-master\Week2\img\short_diag.jpg
     目录           0  2018-09-15 09:48  60_Days_RL_Challenge-master\Week3\
     文件        5205  2018-09-15 09:48  60_Days_RL_Challenge-master\Week3\README.md
     文件        4258  2018-09-15 09:48  60_Days_RL_Challenge-master\Week3\agent.py
     文件        5665  2018-09-15 09:48  60_Days_RL_Challenge-master\Week3\atari_wrappers.py
     文件        1860  2018-09-15 09:48  60_Days_RL_Challenge-master\Week3\buffers.py
     文件        3522  2018-09-15 09:48  60_Days_RL_Challenge-master\Week3\central_control.py
     目录           0  2018-09-15 09:48  60_Days_RL_Challenge-master\Week3\imgs\
     文件      106449  2018-09-15 09:48  60_Days_RL_Challenge-master\Week3\imgs\DQN_variations.png
     文件       21564  2018-09-15 09:48  60_Days_RL_Challenge-master\Week3\imgs\Dueling_img.png
     文件        8790  2018-09-15 09:48  60_Days_RL_Challenge-master\Week3\imgs\double_Qlearning_formula.png
     文件       14527  2018-09-15 09:48  60_Days_RL_Challenge-master\Week3\imgs\multistep_formula.png
     文件        8952  2018-09-15 09:48  60_Days_RL_Challenge-master\Week3\imgs\noisenet_formula.png
     文件      469317  2018-09-15 09:48  60_Days_RL_Challenge-master\Week3\imgs\pong_gif.gif
     文件        2184  2018-09-15 09:48  60_Days_RL_Challenge-master\Week3\main.py
     文件        4661  2018-09-15 09:48  60_Days_RL_Challenge-master\Week3\neural_net.py
     文件         395  2018-09-15 09:48  60_Days_RL_Challenge-master\Week3\utils.py
     目录           0  2018-09-15 09:48  60_Days_RL_Challenge-master\images\
     文件      716068  2018-09-15 09:48  60_Days_RL_Challenge-master\images\logo5.png
     文件       61360  2018-09-15 09:48  60_Days_RL_Challenge-master\images\logo6.png

上一篇：Python-python3实现互信息和左右熵的新词发现
下一篇：Python飞机大战完整素材包字体音乐图片

共有条评论

Python-60DaysRLChallenge中文版强化学习60天

资源简介

资源截图

代码片段和文件信息

评论

相关资源