%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This is a set of matlab files adapted from the C program posted at
% ftp://ftp.cs.umass.edu/pub/anw/pub/sutton/pole.c
% There are a total of three files: main.m - the main program
% get_box.m - for discrete state 1 to 162
% plant.m - the cart-pole system equations
% The reference for this work is:
% Barto, Sutton, and Anderson,
% "Neuronlike Adaptive Elements That Can Solve Difficult Learning Control Problems,"
% IEEE Trans. Systems, Man, and Cybernetics,
% Vol. SMC-13, pp. 834-846, Sept.-Oct. 1983.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% What you need:
% 1) save these files: main.m, get_box.m, and plant.m
% 2) Run main after starting MATLAB. No matlab tool boxes are required.
% 3) If you do not get the pole balanced the first time when you run the program,
% run it again. I am sure you will get the pole balanced within the first few runs.
%
% Derong Liu, January 2, 2001.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This is the main.m
%
clear
% I add a few lines to generate a different seed every time you run it.
rn=27295; nc=clock; nc(1)=0; nc(3)=2*nc(3); nc(4)=5*nc(4); nc(6)=60*nc(6);
rn=fix(516*sum(nc)+rn); if rn>1.5e9, rn=rn/5e6; end, rand('seed',rn);
randseed=rand('seed')
N_BOXES=162; % number of disjoint boxes of state space
ALPHA=1000; % learning rate for action weights w
BETA=0.5; % learning rate for critic weights v
GAMMA=0.95; % discount factor for critic
DELTA=0.9; % decay rate for w eligibility trace e
LAMBDA=0.8; % decay rate for v eligibility trace xbar
% termination criterion
MAX_FAILURES=1000; % to try the program, set to 20; change back to 1000
MAX_STEPS=100000; % to try the program, set to 100; change back to 100000
% Initialization for weights and states
w=zeros(1,N_BOXES); v=w; xbar=w; e=w; steps=1; failures=0;
x=0; x_dot=0; theta=0; theta_dot=0;
% find the box that contains the initial state
box=get_box(x, x_dot, theta, theta_dot);
% for final display
xstar=zeros(4,MAX_STEPS); F=xstar;
% learning through critic-action loops; stops when succeed.
while steps=MAX_STEPS,
fprintf('The pole has been finally balanced for %d steps in trial %d.\n',...
steps,failures+1);
end
eval(['save all-' num2str(failures+1) ]);
figure(1)
plot(1:steps-1,xstar(1,1:steps-1)*180/pi)
title('\theta')
ylabel('degrees')
figure(2)
plot(1:steps-1,xstar(3,1:steps-1))
title('position')
ylabel('meters')
figure(3)
plot(1:steps-1,F(1:steps-1),'-o')
axis([0 1.1*length(F) -0.2 1.2])
title('force')
ylabel('unit')