#!/usr/bin/env python
from __future__ import print_function
import tensorflow as tf
import cv2
import sys
import wrapped_flappy_bird as game
import random
import numpy as np
from collections import deque
GAME = ‘bird‘ # the name of the game being played for log files
ACTIONS = 2 # number of valid actions
GAMMA = 0.99 # decay rate of past observations
OBSERVE = 100000. # timesteps to observe before training
EXPLORE = 2000000. # frames over which to anneal epsilon
FINAL_EPSILON = 0.0001 # final value of epsilon
INITIAL_EPSILON = 0.0001 # starting value of epsilon
REPLAY_MEMORY = 50000 # number of previous transitions to remember
BATCH = 32 # size of minibatch
frame_PER_ACTION = 1
def weight_variable(shape):
initial = tf.truncated_normal(shape stddev = 0.01)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.01 shape = shape)
return tf.Variable(initial)
def conv2d(x W stride):
return tf.nn.conv2d(x W strides = [1 stride stride 1] padding = “SAME“)
def max_pool_2x2(x):
return tf.nn.max_pool(x ksize = [1 2 2 1] strides = [1 2 2 1] padding = “SAME“)
def createNetwork():
# network weights
W_conv1 = weight_variable([8 8 4 32])
b_conv1 = bias_variable([32])
W_conv2 = weight_variable([4 4 32 64])
b_conv2 = bias_variable([64])
W_conv3 = weight_variable([3 3 64 64])
b_conv3 = bias_variable([64])
W_fc1 = weight_variable([1600 512])
b_fc1 = bias_variable([512])
W_fc2 = weight_variable([512 ACTIONS])
b_fc2 = bias_variable([ACTIONS])
# input layer
s = tf.placeholder(“float“ [None 80 80 4])
# hidden layers
h_conv1 = tf.nn.relu(conv2d(s W_conv1 4) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
h_conv2 = tf.nn.relu(conv2d(h_pool1 W_conv2 2) + b_conv2)
#h_pool2 = max_pool_2x2(h_conv2)
h_conv3 = tf.nn.relu(conv2d(h_conv2 W_conv3 1) + b_conv3)
#h_pool3 = max_pool_2x2(h_conv3)
#h_pool3_flat = tf.reshape(h_pool3 [-1 256])
h_conv3_flat = tf.reshape(h_conv3 [-1 1600])
h_fc1 = tf.nn.relu(tf.matmul(h_conv3_flat W_fc1) + b_fc1)
# readout layer
readout = tf.matmul(h_fc1 W_fc2) + b_fc2
return s readout h_fc1
def trainNetwork(s readout h_fc1 sess):
# define the cost function
a = tf.placeholder(“float“ [None ACTIONS])
y = tf.placeholder(“float“ [None])
readout_action = tf.reduce_sum(tf.multiply(readout a) reduction_indices=1)
cost = tf.reduce_mean(tf.square(y - readout_action))
train_step = tf.train.AdamOptimizer(1e-6).minimize(cost)
# open up a game state to communicate with emulator
game_state = game.GameState()
# store the previous observations in replay memory
D = deque()
# printing
a_file = open(“logs_“ + GAME + “/readout.txt“ ‘w‘)
h_file = open(“logs_“ + GAME + “/hidden.txt“ ‘w‘)
# get the first state by doing nothing and preprocess the image to 80x80x4
