资源简介
Playing Flappy Bird Using Deep Reinforcement Learning (Based on Deep Q Learning DQN)
代码片段和文件信息
# -----------------------------
# File: Deep Q-Learning Algorithm
# Author: Flood Sung
# Date: 2016.3.21
# -----------------------------
import tensorflow as tf
import numpy as np
import random
from collections import deque
# Hyper Parameters:
frame_PER_ACTION = 1
GAMMA = 0.99 # decay rate of past observations
OBSERVE = 100. # timesteps to observe before training
EXPLORE = 200000. # frames over which to anneal epsilon
FINAL_EPSILON = 0#0.001 # final value of epsilon
INITIAL_EPSILON = 0#0.01 # starting value of epsilon
REPLAY_MEMORY = 50000 # number of previous transitions to remember
BATCH_SIZE = 32 # size of minibatch
UPDATE_TIME = 100
try:
tf.mul
except:
# For new version of tensorflow
# tf.mul has been removed in new version of tensorflow
# Using tf.multiply to replace tf.mul
tf.mul = tf.multiply
class BrainDQN:
def __init__(selfactions):
# init replay memory
self.replayMemory = deque()
# init some parameters
self.timeStep = 0
self.epsilon = INITIAL_EPSILON
self.actions = actions
# init Q network
self.stateInputself.QValueself.W_conv1self.b_conv1self.W_conv2self.b_conv2self.W_conv3self.b_conv3self.W_fc1self.b_fc1self.W_fc2self.b_fc2 = self.createQNetwork()
# init Target Q Network
self.stateInputTself.QValueTself.W_conv1Tself.b_conv1Tself.W_conv2Tself.b_conv2Tself.W_conv3Tself.b_conv3Tself.W_fc1Tself.b_fc1Tself.W_fc2Tself.b_fc2T = self.createQNetwork()
self.copyTargetQNetworkOperation = [self.W_conv1T.assign(self.W_conv1)self.b_conv1T.assign(self.b_conv1)self.W_conv2T.assign(self.W_conv2)self.b_conv2T.assign(self.b_conv2)self.W_conv3T.assign(self.W_conv3)self.b_conv3T.assign(self.b_conv3)self.W_fc1T.assign(self.W_fc1)self.b_fc1T.assign(self.b_fc1)self.W_fc2T.assign(self.W_fc2)self.b_fc2T.assign(self.b_fc2)]
self.createTrainingMethod()
# saving and loading networks
self.saver = tf.train.Saver()
self.session = tf.InteractiveSession()
self.session.run(tf.initialize_all_variables())
checkpoint = tf.train.get_checkpoint_state(“saved_networks“)
if checkpoint and checkpoint.model_checkpoint_path:
self.saver.restore(self.session checkpoint.model_checkpoint_path)
print (“Successfully loaded:“ checkpoint.model_checkpoint_path)
else:
print (“Could not find old network weights“)
def createQNetwork(self):
# network weights
W_conv1 = self.weight_variable([88432])
b_conv1 = self.bias_variable([32])
W_conv2 = self.weight_variable([443264])
b_conv2 = self.bias_variable([64])
W_conv3 = self.weight_variable([336464])
b_conv3 = self.bias_variable([64])
W_fc1 = self.weight_variable([1600512])
b_fc1 = self.bias_variable([512])
W_fc2 = self.weight_variable([512self.actions])
b_fc2 = self.bias_variable([self.actions])
# input layer
stateInput = tf.placeholder(“float“[None80804])
# hidden layers
h_conv1 = tf.nn.relu(self.conv2d(stateInputW_conv14) + b_conv1)
h_pool1 = self.max_pool_2x2(h_conv1)
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2018-06-14 11:41 DRL-FlappyBird-master\
目录 0 2018-06-14 11:41 DRL-FlappyBird-master\assets\
目录 0 2018-06-14 11:41 DRL-FlappyBird-master\assets\audio\
文件 17483 2017-12-04 03:06 DRL-FlappyBird-master\assets\audio\die.ogg
文件 194894 2017-12-04 03:06 DRL-FlappyBird-master\assets\audio\die.wav
文件 15670 2017-12-04 03:06 DRL-FlappyBird-master\assets\audio\hit.ogg
文件 96590 2017-12-04 03:06 DRL-FlappyBird-master\assets\audio\hit.wav
文件 13235 2017-12-04 03:06 DRL-FlappyBird-master\assets\audio\point.ogg
文件 177486 2017-12-04 03:06 DRL-FlappyBird-master\assets\audio\point.wav
文件 13697 2017-12-04 03:06 DRL-FlappyBird-master\assets\audio\swoosh.ogg
文件 354638 2017-12-04 03:06 DRL-FlappyBird-master\assets\audio\swoosh.wav
文件 7728 2017-12-04 03:06 DRL-FlappyBird-master\assets\audio\wing.ogg
文件 29902 2017-12-04 03:06 DRL-FlappyBird-master\assets\audio\wing.wav
目录 0 2018-06-14 11:41 DRL-FlappyBird-master\assets\sprites\
文件 2879 2017-12-04 03:06 DRL-FlappyBird-master\assets\sprites\0.png
文件 2868 2017-12-04 03:06 DRL-FlappyBird-master\assets\sprites\1.png
文件 2888 2017-12-04 03:06 DRL-FlappyBird-master\assets\sprites\2.png
文件 2877 2017-12-04 03:06 DRL-FlappyBird-master\assets\sprites\3.png
文件 2898 2017-12-04 03:06 DRL-FlappyBird-master\assets\sprites\4.png
文件 2888 2017-12-04 03:06 DRL-FlappyBird-master\assets\sprites\5.png
文件 2885 2017-12-04 03:06 DRL-FlappyBird-master\assets\sprites\6.png
文件 2896 2017-12-04 03:06 DRL-FlappyBird-master\assets\sprites\7.png
文件 2878 2017-12-04 03:06 DRL-FlappyBird-master\assets\sprites\8.png
文件 2892 2017-12-04 03:06 DRL-FlappyBird-master\assets\sprites\9.png
文件 4030 2017-12-04 03:06 DRL-FlappyBird-master\assets\sprites\background-black.png
文件 664 2017-12-04 03:06 DRL-FlappyBird-master\assets\sprites\ba
文件 5042 2017-12-04 03:06 DRL-FlappyBird-master\assets\sprites\pipe-green.png
文件 2948 2017-12-04 03:06 DRL-FlappyBird-master\assets\sprites\redbird-downflap.png
文件 2949 2017-12-04 03:06 DRL-FlappyBird-master\assets\sprites\redbird-midflap.png
文件 2944 2017-12-04 03:06 DRL-FlappyBird-master\assets\sprites\redbird-upflap.png
文件 6883 2017-12-04 03:06 DRL-FlappyBird-master\BrainDQN_Nature.py
............此处省略15个文件信息
- 上一篇:python2环境的opencv库
- 下一篇:Sublime Text238239
相关资源
- Hands-On Reinforcement Learning with Python 20
- Hands-On Reinforcement Learning - Sudharsan Ra
- 《Python强化学习实战》随书代码
- Reinforcement Learning - With Open AI TensorFl
- Python强化学习实战:应用OpenAI Gym和
- Hands-On Reinforcement Learning with Python /S
- 18. 强化学习Q Learning python代码实现
- 强化学习Q-learning算法
- 强化学习控制gym下的倒立摆
- 深度强化学习PPO算法(python)
- python版flappybird源码
- dqn_agent-master
- RL-Stock-master 使用强化学习完成股票预
- reinforcement-learning-an-introduction-master
- python编写的Flappybird附图片和声音资源
- 莫烦全部代码Reinforcement-learning-with-
- 强化学习与仿真的结合
- python强化学习(基于matplotlib)
- python pygame flappybird 卷轴类小游戏
- python 飞翔的小鸟 小游戏(flappybird)
- python 像素小鸟小游戏源码(flappybir
- 深度学习之三:深度强化学习DQN-Dee
- 阿里强化学习
- Qlearning简单实现
- Python-DQNchainerPython用Chainer实现的Deep
- Q_Learning_maze.rar
评论
共有 条评论