sutton强化学习随书MATLAB代码

大小: 164KB

文件类型: .rar

金币: 2

下载: 1 次

发布日期: 2024-02-04
语言: Matlab
标签: sutton 强化学习 MATLAB 代码

高速下载

资源简介

sutton强化学习随书MATLAB代码，内附使用说明，亲测可运行，对理解书中内容很有帮助。

资源截图

小图大图

代码片段和文件信息

%function [] = binary_bandit_exps（nBnPp_win）
% 
% Duplicates the binary bandit experiments.
% 
% Inputs: 
%   nB: the number of bandits
%   nP: the number of plays （times we will pull a arm）
%   p_win: p_win（i） is the probability we win when we pull arm i.
% 
% Written by:
% -- 
% John L. Weatherwax                2007-11-13
% 
% email: wax@alum.mit.edu
% 
% Please send comments and especially bug reports to the
% above email address.
% 
%-----

%close all; 
%clc; 
%clear; 

% if（ nargin<1 ） % the number of bandits: 
%   nB = 2000;  
% end
% if（ nargin<2 ） % the number of plays （times we will pull a arm）:
%   nP = 2000; 
% end
% if（ nargin<3 ）
%   p_win = [ 0.1 0.2 ]; 
%   p_win = [ 0.8 0.9 ]; 
% end

% the number of arms: 
nA = 2; 
        
[dumbestArm] = max（ p_win ）; 

%randn（‘seed‘0）; 

if（ 1 ） 
  % run the SUPERVISED experiment for two epsilon:
  % 0   => fully greedy
  % 0.1 => inbetween 
  % 1.0 => explore on each trial
  epsArray = [ 0 0.1 ];
  
  perOptAction = zeros（length（epsArray）nP）; 
  for ei=1:length（epsArray） 
    tEps = epsArray（ei）; 
    
    pickedMaxAction = zeros（nBnP）; 
    for bi=1:nB % pick a bandit
      % pick an arm to play initially ... 
      %arm = 1; 
      [dumarm] = histc（rand（1）linspace（01+epsnA+1））; clear dum; 
      for pi=1:nP % make a play
        % determine if this move is exploritory or greedy: 
        if（ rand（1） <= tEps ） % pick a RANDOM arm: 
          [dumarm] = histc（rand（1）linspace（01+epsnA+1））; clear dum; 
        end
        if（ arm==1 ） otherArm=2; else otherArm=1; end
        % determine if the arm selected is the best possible: 
        if（ arm==bestArm ） pickedMaxAction（bipi）=1; end
        % get the reward from drawing on that arm: 
        prob = p_win（arm）;
        if（ rand（1） <= prob ）                % this arm gave SUCCESS keep playing it ...
          % do nothing ... 
        else                                 % this arm gave FAILURE switch to the other ...
          arm = otherArm; 
        end
      end
    end
    
    percentOptAction   = mean（pickedMaxAction1）;
    perOptAction（ei:） = percentOptAction（:）.‘;    
  end
end


%------------------------------------------------------------------------
% Learning with the L_{R-P} （linear reward penalty） algorithm: 
%------------------------------------------------------------------------
alpha = 0.1; 
if（ 1 ） 
  perOptActionRP = zeros（1nP）; 
    
  %qT = zeros（ nB nA ）;  % initialize to zero the probability this arm gives a success （no knowledge）
  qT = 0.5*ones（ nB nA ）;  % initialize to uniform the probability this arm gives a success （no knowledge）
    
  pickedMaxAction = zeros（nBnP）; 
  for bi=1:nB % pick a bandit
    for pi=1:nP % make a play
      % pick an arm based on the distribution in qT: 
      if（ rand（1） < qT（bi1） ）
        arm = 1; 
      else
        arm = 2; 
      end
      if（ arm==1 ） otherArm=2; else otherArm=1; end
      % determine if the arm selected is the best possible: 
      if（

属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----

     文件      41472  2010-09-28 20:38  suntton强化学习书籍所有代码\suntton非matlab代码说明.doc

    ..A..H.       162  2010-09-28 17:08  suntton强化学习书籍所有代码\~$内容说明.doc

     文件     121344  2010-09-28 20:42  suntton强化学习书籍所有代码\内容说明.doc

     文件       2091  2008-03-27 08:32  suntton强化学习书籍所有代码\Chapter 9 （Planning and Learning）\blocking_mz_script.m.m

     文件       2636  2008-03-27 08:32  suntton强化学习书籍所有代码\Chapter 9 （Planning and Learning）\do_ex_9_1_exps.m.m

     文件       7859  2008-03-27 08:32  suntton强化学习书籍所有代码\Chapter 9 （Planning and Learning）\dynaQplus_maze.m.m

     文件       2487  2008-03-27 08:32  suntton强化学习书籍所有代码\Chapter 9 （Planning and Learning）\dynaQplus_maze_script.m.m

     文件       7419  2008-03-27 08:32  suntton强化学习书籍所有代码\Chapter 9 （Planning and Learning）\dynaQ_maze.m.m

     文件       2066  2008-03-27 08:32  suntton强化学习书籍所有代码\Chapter 9 （Planning and Learning）\dynaQ_maze_script.m.m

     文件       8114  2008-03-27 08:32  suntton强化学习书籍所有代码\Chapter 9 （Planning and Learning）\ex_9_4_dynaQplus.m.m

     文件       4322  2008-03-27 08:32  suntton强化学习书籍所有代码\Chapter 9 （Planning and Learning）\ex_9_4_dynaQplus_script.m.m

     文件        516  2008-03-27 08:32  suntton强化学习书籍所有代码\Chapter 9 （Planning and Learning）\mk_ex_9_1_mz.m.m

     文件        565  2008-03-27 08:32  suntton强化学习书籍所有代码\Chapter 9 （Planning and Learning）\mk_ex_9_2_mz.m.m

     文件        563  2008-03-27 08:32  suntton强化学习书籍所有代码\Chapter 9 （Planning and Learning）\mk_ex_9_3_mz.m.m

     文件       1809  2008-03-27 08:32  suntton强化学习书籍所有代码\Chapter 9 （Planning and Learning）\plot_mz_policy.m.m

     文件       1848  2010-09-28 17:22  suntton强化学习书籍所有代码\Chapter 8 （Generailzation and Function Approximation）\do_mnt_car_Exps.m

     文件       2774  2010-09-28 17:22  suntton强化学习书籍所有代码\Chapter 8 （Generailzation and Function Approximation）\GetTiles_Mex.C

     文件        809  2010-09-28 17:22  suntton强化学习书籍所有代码\Chapter 8 （Generailzation and Function Approximation）\GetTiles_Mex_script.m

     文件        939  2010-09-28 17:23  suntton强化学习书籍所有代码\Chapter 8 （Generailzation and Function Approximation）\get_ctg.m

     文件        684  2010-09-28 17:22  suntton强化学习书籍所有代码\Chapter 8 （Generailzation and Function Approximation）\linAppFn.m

     文件       5579  2010-09-28 17:22  suntton强化学习书籍所有代码\Chapter 8 （Generailzation and Function Approximation）\mnt_car_learn.m

     文件        909  2010-09-28 17:22  suntton强化学习书籍所有代码\Chapter 8 （Generailzation and Function Approximation）\next_state.m

     文件       1034  2010-09-28 17:22  suntton强化学习书籍所有代码\Chapter 8 （Generailzation and Function Approximation）\ret_q_in_st.m

     文件       2087  2010-09-28 17:22  suntton强化学习书籍所有代码\Chapter 8 （Generailzation and Function Approximation）\stp_fn_approx_script.m

     文件        348  2010-09-28 17:22  suntton强化学习书籍所有代码\Chapter 8 （Generailzation and Function Approximation）\targetF.m

     文件       4184  2010-09-28 17:22  suntton强化学习书籍所有代码\Chapter 8 （Generailzation and Function Approximation）\tiles.C

     文件        340  2010-09-28 17:22  suntton强化学习书籍所有代码\Chapter 8 （Generailzation and Function Approximation）\tiles.h

     文件       1285  2010-09-28 17:21  suntton强化学习书籍所有代码\Chapter 7 （Eligibility Traces）\eg_7_5_episode.m

     文件       1783  2010-09-28 17:21  suntton强化学习书籍所有代码\Chapter 7 （Eligibility Traces）\eg_7_5_learn_at.m

     文件       1777  2010-09-28 17:21  suntton强化学习书籍所有代码\Chapter 7 （Eligibility Traces）\eg_7_5_learn_rt.m

............此处省略95个文件信息

上一篇：ROC曲线 matlab实现
下一篇：MATLAB多聚焦图像融合将两张到六张不同焦点的图片合成成一张

共有条评论

sutton强化学习随书MATLAB代码

资源简介

资源截图

代码片段和文件信息

评论

相关资源