强化学习代码，2016版，matlab

大小: 145KB

文件类型: .rar

金币: 2

下载: 0 次

发布日期: 2024-01-30
语言: Matlab
标签: 强化学习 suntton

高速下载

资源简介

和大家分享一下，这是2016版的代码，变量名不是特别友好但是可以用来借鉴学习，希望能够有帮助。

资源截图

小图大图

代码片段和文件信息

%function [] = binary_bandit_exps（nBnPp_win）
% 
% Duplicates the binary bandit experiments.
% 
% Inputs: 
%   nB: the number of bandits
%   nP: the number of plays （times we will pull a arm）
%   p_win: p_win（i） is the probability we win when we pull arm i.
% 
% Written by:
% -- 
% John L. Weatherwax                2007-11-13
% 
% email: wax@alum.mit.edu
% 
% Please send comments and especially bug reports to the
% above email address.
% 
%-----

%close all; 
%clc; 
%clear; 

% if（ nargin<1 ） % the number of bandits: 
%   nB = 2000;  
% end
% if（ nargin<2 ） % the number of plays （times we will pull a arm）:
%   nP = 2000; 
% end
% if（ nargin<3 ）
%   p_win = [ 0.1 0.2 ]; 
%   p_win = [ 0.8 0.9 ]; 
% end

% the number of arms: 
nA = 2; 
        
[dumbestArm] = max（ p_win ）; 

%randn（‘seed‘0）; 

if（ 1 ） 
  % run the SUPERVISED experiment for two epsilon:
  % 0   => fully greedy
  % 0.1 => inbetween 
  % 1.0 => explore on each trial
  epsArray = [ 0 0.1 ];
  
  perOptAction = zeros（length（epsArray）nP）; 
  for ei=1:length（epsArray） 
    tEps = epsArray（ei）; 
    
    pickedMaxAction = zeros（nBnP）; 
    for bi=1:nB % pick a bandit
      % pick an arm to play initially ... 
      %arm = 1; 
      [dumarm] = histc（rand（1）linspace（01+epsnA+1））; clear dum; 
      for pi=1:nP % make a play
        % determine if this move is exploritory or greedy: 
        if（ rand（1） <= tEps ） % pick a RANDOM arm: 
          [dumarm] = histc（rand（1）linspace（01+epsnA+1））; clear dum; 
        end
        if（ arm==1 ） otherArm=2; else otherArm=1; end
        % determine if the arm selected is the best possible: 
        if（ arm==bestArm ） pickedMaxAction（bipi）=1; end
        % get the reward from drawing on that arm: 
        prob = p_win（arm）;
        if（ rand（1） <= prob ）                % this arm gave SUCCESS keep playing it ...
          % do nothing ... 
        else                                 % this arm gave FAILURE switch to the other ...
          arm = otherArm; 
        end
      end
    end
    
    percentOptAction   = mean（pickedMaxAction1）;
    perOptAction（ei:） = percentOptAction（:）.‘;    
  end
end


%------------------------------------------------------------------------
% Learning with the L_{R-P} （linear reward penalty） algorithm: 
%------------------------------------------------------------------------
alpha = 0.1; 
if（ 1 ） 
  perOptActionRP = zeros（1nP）; 
    
  %qT = zeros（ nB nA ）;  % initialize to zero the probability this arm gives a success （no knowledge）
  qT = 0.5*ones（ nB nA ）;  % initialize to uniform the probability this arm gives a success （no knowledge）
    
  pickedMaxAction = zeros（nBnP）; 
  for bi=1:nB % pick a bandit
    for pi=1:nP % make a play
      % pick an arm based on the distribution in qT: 
      if（ rand（1） < qT（bi1） ）
        arm = 1; 
      else
        arm = 2; 
      end
      if（ arm==1 ） otherArm=2; else otherArm=1; end
      % determine if the arm selected is the best possible: 
      if（

属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----

     文件       5861  2010-09-28 17:14  suntton强化学习书籍代码\Chapter 2 （Evaluative Feedback）\binary_bandit_exps.m

     文件        791  2010-09-28 17:14  suntton强化学习书籍代码\Chapter 2 （Evaluative Feedback）\binary_bandit_exps_script.m

     文件       5069  2010-09-28 17:14  suntton强化学习书籍代码\Chapter 2 （Evaluative Feedback）\exercise_2_11.m

     文件        590  2010-09-28 17:14  suntton强化学习书籍代码\Chapter 2 （Evaluative Feedback）\exercise_2_11_script.m

     文件       4401  2010-09-28 17:14  suntton强化学习书籍代码\Chapter 2 （Evaluative Feedback）\exercise_2_5.m

     文件       4056  2010-09-28 17:14  suntton强化学习书籍代码\Chapter 2 （Evaluative Feedback）\exercise_2_7.m

     文件       1159  2010-09-28 17:14  suntton强化学习书籍代码\Chapter 2 （Evaluative Feedback）\exercise_2_7_script.m

     文件       4603  2010-09-28 17:14  suntton强化学习书籍代码\Chapter 2 （Evaluative Feedback）\n_armed_testbed.m

     文件       5122  2010-09-28 17:14  suntton强化学习书籍代码\Chapter 2 （Evaluative Feedback）\n_armed_testbed_softmax.m

     文件       3739  2010-09-28 17:14  suntton强化学习书籍代码\Chapter 2 （Evaluative Feedback）\opt_initial_values.m

     文件        777  2010-09-28 17:14  suntton强化学习书籍代码\Chapter 2 （Evaluative Feedback）\opt_initial_values_script.m

     文件       5405  2010-09-28 17:14  suntton强化学习书籍代码\Chapter 2 （Evaluative Feedback）\persuit_method.m

     文件        592  2010-09-28 17:14  suntton强化学习书籍代码\Chapter 2 （Evaluative Feedback）\persuit_method_script.m

     文件       4663  2010-09-28 17:14  suntton强化学习书籍代码\Chapter 2 （Evaluative Feedback）\reinforcement_comparison_methods.m

     文件        887  2010-09-28 17:14  suntton强化学习书籍代码\Chapter 2 （Evaluative Feedback）\reinforcement_comparison_methods_script.m

     文件        962  2010-09-28 17:14  suntton强化学习书籍代码\Chapter 2 （Evaluative Feedback）\sample_discrete.m

     文件       2712  2010-09-28 17:15  suntton强化学习书籍代码\Chapter 3 （The Reinforcement Learning Problem）\rr_action_bellman.m

     文件       2083  2010-09-28 17:15  suntton强化学习书籍代码\Chapter 3 （The Reinforcement Learning Problem）\rr_state_bellman.m

     文件       1732  2010-09-28 17:16  suntton强化学习书籍代码\Chapter 4 （Dynamic Programming）\cmpt_P_and_R.m

     文件        301  2010-09-28 17:16  suntton强化学习书籍代码\Chapter 4 （Dynamic Programming）\ex_4_2_sys_solv.m

     文件       2771  2010-09-28 17:16  suntton强化学习书籍代码\Chapter 4 （Dynamic Programming）\ex_4_5_policy_evaluation.m

     文件       3893  2010-09-28 17:16  suntton强化学习书籍代码\Chapter 4 （Dynamic Programming）\ex_4_5_policy_improvement.m

     文件       1422  2010-09-28 17:17  suntton强化学习书籍代码\Chapter 4 （Dynamic Programming）\ex_4_5_rhs_state_value_bellman.m

     文件       3162  2010-09-28 17:16  suntton强化学习书籍代码\Chapter 4 （Dynamic Programming）\ex_4_5_script.m

     文件       1031  2010-09-28 17:16  suntton强化学习书籍代码\Chapter 4 （Dynamic Programming）\gam_rhs_state_bellman.m

     文件       2785  2010-09-28 17:16  suntton强化学习书籍代码\Chapter 4 （Dynamic Programming）\gam_script.m

     文件       4302  2010-09-28 17:16  suntton强化学习书籍代码\Chapter 4 （Dynamic Programming）\iter_poly_gw_inplace.m

     文件       4500  2010-09-28 17:16  suntton强化学习书籍代码\Chapter 4 （Dynamic Programming）\iter_poly_gw_not_inplace.m

     文件       2290  2010-09-28 17:16  suntton强化学习书籍代码\Chapter 4 （Dynamic Programming）\jcr_example.m

     文件       2679  2010-09-28 17:16  suntton强化学习书籍代码\Chapter 4 （Dynamic Programming）\jcr_policy_evaluation.m

............此处省略94个文件信息

上一篇：LMS语音信号去噪matlab代码
下一篇：数据融合matlab

共有条评论

强化学习代码，2016版，matlab

资源简介

资源截图

代码片段和文件信息

评论

相关资源