-
大小: 780KB文件类型: .zip金币: 1下载: 0 次发布日期: 2021-05-13
- 语言: Python
- 标签: q-learning
资源简介
一个用python语言来实现的 q-learning实例,供学习参考。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。
代码片段和文件信息
#!/usr/bin/env python2
import random
class Agent:
def __init__(self MDP):
self.MDP = MDP
self.state = None
def executeAction(self a):
s2 r = self.MDP.executeAction(a self.state)
return s2 r
def selectRandomAction(self):
return random.choice(self.MDP.A_s[self.state])
def selectBestActionFromQTable(self s Q):
# discover what is the best possible value considering
# all possible actions for the state
# FIXME: usar a tabela V
maxValue = 0.0
for a in self.MDP.A_s[self.state]:
maxValue = max(maxValue Q[s][a])
# obtain all the actions whose value equals the maximum
A = []
for a in self.MDP.A_s[self.state]:
# FIXME: make it a parameter
delta = 1e-10
if abs(Q[s][a] - maxValue) <= delta:
A.append(a)
# obtain a random action from all the possible ones
if len(A) > 0:
a = random.choice(A)
else:
a = ‘---‘
return a
def selectBestActionFromProbPolicy(self s Pi):
P = []
acum = 0.0
# FIXME: eliminar a necessidade de ter que rodar a soma
# cumulativa a toda chamada
#
# Fazer essa checagem na leitura da politica agregar uma lista
# de pares ordenados ja com a probabilidade acumulada.
# Sortear e somente buscar na lista ate encontrar a acao da
# vez.
#
# PROBLEMA: a politica pode mudar! Alternativa: mudar a forma
# como a politica e carregada: Fazer chegar aqui ja uma Pi[s]
# = [(action cumsum)]
for a in Pi[s].iterkeys():
if Pi[s][a] > 0.0:
p = []
p.append(a)
acum = acum + Pi[s][a]
p.append(acum)
P.append(p)
#sorteia um numero no intervalo [0 1]
x = random.random()
for p in P:
if x <= p[1]:
a = p[0]
break
return a
def selectBestAction(self s source = None Q = None Pi = None):
if source == ‘Q-Table‘:
a = self.selectBestActionFromQTable(s Q)
elif source == ‘Probabilistic Policy‘:
a = self.selectBestActionFromProbPolicy(s Pi)
else:
‘ERROR: wrong source (‘ + source
sys.exit(1)
return a
def setInitialState(self):
if self.MDP.P == None:
self.state = random.choice(self.MDP.S)
else:
self.state = self.setInitialStateByProb()
def setInitialStateByProb(self):
x = random.random()
for p in self.MDP.P:
if x <= p[1]:
s = p[0]
break
return s
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2012-09-20 23:06 Q-Learning-in-Python-master\
文件 118 2012-09-20 23:06 Q-Learning-in-Python-master\.gitignore
文件 0 2012-09-20 23:06 Q-Learning-in-Python-master\README
目录 0 2012-09-20 23:06 Q-Learning-in-Python-master\src\
文件 2812 2012-09-20 23:06 Q-Learning-in-Python-master\src\Agent.py
文件 4312 2012-09-20 23:06 Q-Learning-in-Python-master\src\MDP.py
文件 929 2012-09-20 23:06 Q-Learning-in-Python-master\src\PRQL-interval.sh
文件 11108 2012-09-20 23:06 Q-Learning-in-Python-master\src\PRQLearning.py
文件 866 2012-09-20 23:06 Q-Learning-in-Python-master\src\QL-interval.sh
文件 4539 2012-09-20 23:06 Q-Learning-in-Python-master\src\QLearning.py
文件 2157 2012-09-20 23:06 Q-Learning-in-Python-master\src\QabLearning.py
文件 6588 2012-09-20 23:06 Q-Learning-in-Python-master\src\RL-PRQL.py
文件 1072 2012-09-20 23:06 Q-Learning-in-Python-master\src\RL-PRQL.sh
文件 4277 2012-09-20 23:06 Q-Learning-in-Python-master\src\RL-QL.py
文件 893 2012-09-20 23:06 Q-Learning-in-Python-master\src\RL-QL.sh
文件 1072 2012-09-20 23:06 Q-Learning-in-Python-master\src\RL.sh
文件 1138 2012-09-20 23:06 Q-Learning-in-Python-master\src\meanError.py
文件 302 2012-09-20 23:06 Q-Learning-in-Python-master\src\prepareFolders.py
目录 0 2012-09-20 23:06 Q-Learning-in-Python-master\src\tools\
目录 0 2012-09-20 23:06 Q-Learning-in-Python-master\src\tools\Danny\
目录 0 2012-09-20 23:06 Q-Learning-in-Python-master\src\tools\Danny\OOo\
文件 9985 2012-09-20 23:06 Q-Learning-in-Python-master\src\tools\Danny\OOo\OOoLib.py
文件 0 2012-09-20 23:06 Q-Learning-in-Python-master\src\tools\Danny\OOo\__init__.py
文件 0 2012-09-20 23:06 Q-Learning-in-Python-master\src\tools\Danny\__init__.py
文件 1695 2012-09-20 23:06 Q-Learning-in-Python-master\src\tools\apagaCelulasEmBranco.py
文件 77 2012-09-20 23:06 Q-Learning-in-Python-master\src\tools\changeExtension.sh
文件 220 2012-09-20 23:06 Q-Learning-in-Python-master\src\tools\compactaArquivos.sh
文件 221 2012-09-20 23:06 Q-Learning-in-Python-master\src\tools\compactaArquivosSecundarios.sh
文件 92 2012-09-20 23:06 Q-Learning-in-Python-master\src\tools\delBackupFiles.sh
文件 188 2012-09-20 23:06 Q-Learning-in-Python-master\src\tools\delOutputFiles.sh
文件 414 2012-09-20 23:06 Q-Learning-in-Python-master\src\tools\descompacta.sh
............此处省略63个文件信息
评论
共有 条评论