资源简介

Deep reinforcement learning algorithm

资源截图

代码片段和文件信息

#!/usr/bin/env python2
# -*- coding: utf-8 -*-
“““
Created on Fri Jan 20 20:36:49 2017

@author: daniel
“““

from __future__ import print_function

import gym
import numpy as np
import tensorflow as tf
import time
from collections import deque  
import datetime
import cv2
import os
import sys
from gym import wrappers
import argparse
from memory import ReplayMemory as RPM
from tensorflow.python.client import timeline

class QNet(object):
    def __init__(selfsessnameparamstrain=True):
        self.params=params
        self.sess=sess
        self.name=name
        self.input_shape=[None params[‘framesize‘]params[‘framesize‘]params[‘frames‘]] #add to hyperparamters
        self.images_placeholder = tf.placeholder(tf.float32shape=self.input_shape)
        self.target_placeholder = tf.placeholder(tf.int32shape=[Noneparams[‘actionsize‘]])
        self.reward_placeholder = tf.placeholder(tf.float32shape=[None])
        self.action_placeholder = tf.placeholder(tf.int32shape=[None])
        self.done_placeholder = tf.placeholder(tf.float32shape=[Noneparams[‘actionsize‘]])
        self.train=train
        self.buildNet()
        
    def buildNet(self):
        input_layer = self.images_placeholder

        with tf.name_scope(self.name):
            with tf.name_scope(‘conv1‘):
                # 8x8 conv 4 inputs 32 outputs stride=4
                self.W_conv1 = self._weight_variable([8 8 4 32]“W_conv1“)
                self.b_conv1 = self._bias_variable([32]“b_conv1“)
                h_conv1 = tf.nn.relu(self._conv2d(input_layer self.W_conv1 4) + self.b_conv1)
    
            with tf.name_scope(‘conv2‘):
                # 4x4 conv 32 inputs 64 outputs stride=2
                self.W_conv2 = self._weight_variable([4 4 32 64]“W_conv2“)
                self.b_conv2 = self._bias_variable([64]“b_conv2“)
                h_conv2 = tf.nn.relu(self._conv2d(h_conv1 self.W_conv2 2) + self.b_conv2)
                
            with tf.name_scope(‘conv3‘):
                # 3x3 conv 64 inputs 64 outputs stride=1
                self.W_conv3 = self._weight_variable([3 3 64 64]“W_conv3“)
                self.b_conv3 = self._bias_variable([64]“b_conv3“)
                h_conv3 = tf.nn.relu(self._conv2d(h_conv2 self.W_conv3 1) + self.b_conv3)
            
            dim=h_conv3.get_shape()
            dims=np.array([d.value for d in dim])
            reshaped_dim = np.prod(dims[1:])
            with tf.name_scope(‘dense1‘):
                self.W_fc1 = self._weight_variable([reshaped_dim 512]“W_fc1“)
                self.b_fc1 = self._bias_variable([512]“b_fc1“)
    
                h_conv3_flat = tf.reshape(h_conv3 [-1 reshaped_dim])
                h_fc1 = tf.nn.relu(tf.matmul(h_conv3_flat self.W_fc1) + self.b_fc1)
                
            with tf.name_scope(‘output‘):
                self.W_fc2 = self._weight_variable([512 self.params[‘actionsize‘]]“W_fc2“)
                self.b_fc2 = self._bias_variable([sel

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----
     目录           0  2017-03-24 13:19  dqn_agent-master\
     文件       35141  2017-03-24 13:19  dqn_agent-master\LICENSE
     文件          63  2017-03-24 13:19  dqn_agent-master\README.md
     文件       23671  2017-03-24 13:19  dqn_agent-master\dqn_breakout.py
     文件        2016  2017-03-24 13:19  dqn_agent-master\memory.py

评论

共有 条评论