function out = ae( arg, arg2) % ae: agent-environment dynamics % to be used with % Agent: ctd.m, mlq.m, (rrl.m) % Environment: pend.m, stick.m, (cartpole.m, acro.m) global E % Environment % E.fun: ode file for dynamics % [tspan,x0,odeopt] = E.fun( [], [], 'init'); default % x0 = E.fun( [], [], 'reset'): radom intial state % xdot = E.fun( t, x, [], u); x:state, u:input % % E.vis: OutputFcn for visualization % E.vis( tspan, x0, 'init'); initialization % status = E.vis( t, x); plot, maybe multiple rows/columns % E.vis( [], [], 'done'); final touch % % E.ni: input dimension % E.ns: state space dimension % E.input(ni): input vector % E.state(ns): state vector % E.ilabel(ni): input name % E.slabel(ni): input name % E.imin, E.imax: range of input vectors % E.smin, E.smax: range of state vectors % E.dt: default iteration step % E.tt: default trial length % E.t: current time % E.data: history table global A % Agent % A.fun: OutputFcn for dynamics % A.fun( tspan, x0, 'init'); initialization % status = A.fun( t, x); plot, maybe multiple rows/columns % call A.vis(t,x) and E.vis(t,x) % A.fun( [], [], 'done'); final touch % % A.vis: OutputFcn for visualization % vis( tspan, x0, 'init'); initialization % status = vis( t, x); plot, maybe multiple rows/columns % vis( [], [], 'done'); final touch % % A.ni: input dimension % A.ns: state space dimension % A.input(ni): input vector % A.state(ns): state vector: value, dval, reward, td % A.ilabel(ni): input name % A.slabel(ns): input name % A.imin, A.imax: range of input vectors % A.smin, A.smax: range of state vectors % A.param: parameter vector % A.trace: eligibility trace % A.rate: learning rates % A.noise: noise level % A.teval: discount time scale % A.telig: eligibility time scale % A.data: history table global T % Trace % T.i: index % T.t: time % T.stat: state % T.act: action % T.dot: derivative % T.rew: reward global Mov if nargin == 0 % sample run ae new ae try return end switch( arg) % % Initialization % case 'new' % initialize agent/environment ae( 'newfig'); ae( 'newenv'); ae( 'newage'); case 'newenv' % create a new environment if nargin >= 2 E.fun = arg2; % select environment else E.fun = 'stick'; % default end feval( E.fun, 'new'); % initialize % E.vis = [ E.fun, '_vis']; % visualization function feval( E.vis, [0,E.tt], E.state, 'init'); feval( E.vis, E.t, E.state); T.t=[]; T.stat=[]; T.dot=[]; T.act=[]; T.rew=[]; T.col=[]; case 'newage' % create a new agent if nargin >= 2 A.fun = arg2; % select agent else A.fun = 'mlq'; % default end feval( A.fun, 'new'); % initialize feval( A.fun, 'setup'); % % A.vis = [ A.fun, '_vis']; % visualization feval( A.vis, 'init'); % % Run a trial % case 'try' ae( 'rand'); ae( 'run'); case {'init','rand'} % a new run % environment feval( E.fun, [], [], 'init'); % reset E.t, E.state if isequal(arg,'rand') E.state = feval( E.fun, 'rand'); % randomize end feval( E.vis, [0,E.tt], E.state, 'init'); feval( E.vis, E.t, E.state); % agent feval( A.fun, [], [], 'init'); feval( A.vis, [], [], 'init'); T.i = 0; feval( A.fun, E.t, E.state); % store trace case 'run' ts = E.t + (0:E.dt:E.tt); % time if length(T.t) < T.i+length(ts)-1 % extend trace buffer ae( 'talloc', T.i+length(ts)-1-length(T.t)); end opts = odeset( 'OutputFcn',A.fun, 'MaxStep',E.dt, 'RelTol',1e-6); % A.fun will also call A.vis and E.vis E.stop = 0; % clear stop button % let ode do the job: A.fun will be called each cycle [time,traj] = ode15s( E.fun, ts, E.state, opts); % E.t = time(end,:)'; % new time E.state = traj(end,:)'; % new state ae( 'ewave'); ae( 'awave'); % feval( A.vis, 'param'); case 'wild' % environment alone ts = E.t + (0:E.dt:E.tt); E.stop = 0; % stop button opts = odeset( 'OutputFcn', E.vis, 'MaxStep', E.dt); [T.t,T.stat] = ode15s( E.fun, ts, E.state, opts); ae( 'ewave'); case 'talloc' % allocate trace buffer s = arg2; % increment T.t = [ T.t; zeros(s,1)]; T.stat = [ T.stat; zeros(s,E.ns)]; T.act = [ T.act; zeros(s,E.ni)]; T.dot = [ T.dot; zeros(s,E.ns)]; T.rew = [ T.rew; zeros(s,1)]; T.col = [ T.col; zeros(s,3)]; feval( A.fun, 'talloc', arg2); % agent specific traces % % Figures % case 'newfig' Fig.ss = get(0,'ScreenSize'); Fig.dx = (Fig.ss(3) - 60)/4; % x grid Fig.dy = (Fig.ss(4) - 60)/2; % y grid Fig.win = [ 10, 60, Fig.dx-30, Fig.dy-45]; % shift and size E.fig(1) = 1; figure(1); clf; set(1,'Position',[0,Fig.dy,0,0]+Fig.win,'Name','Animation'); E.fig(2) = 2; figure(2); clf; set(2,'Position',[Fig.dx,Fig.dy,0,0]+Fig.win,'Name','Env State'); A.fig(1) = 3; figure(3); clf; set(3,'Position',[2*Fig.dx,Fig.dy,0,0]+Fig.win,'Name','Agent State'); A.fig(2) = 4; figure(4); clf; set(4,'Position',[3*Fig.dx,Fig.dy,0,0]+Fig.win,'Name','Agent Param'); % stop button E.stop = 0; uicontrol(E.fig(1),'Style','pushbutton','Position',[40,0,40,20],... 'Callback','ae init','String','Init','FontSize',12); uicontrol(E.fig(1),'Style','pushbutton','Position',[80,0,40,20],... 'Callback','ae rand','String','Rand','FontSize',12); uicontrol(E.fig(1),'Style','pushbutton','Position',[120,0,40,20],... 'Callback','ae run','String','Run','FontSize',12); uicontrol(E.fig(1),'Style','pushbutton','Position',[160,0,40,20],... 'Callback','ae stop','String','Stop','FontSize',12); case 'stop' % to be called back with stop button E.stop = 1; % % Standard visualizaton % case 'anim' feval( E.vis, [], [], 'init'); if nargin > 1, skip = arg2; else skip = 5; end s = 1:skip:T.i; frames = length(s); Mov = moviein( frames); % set(gca,'nextplot','rEnvlacechildren'); for f = 1:frames i = 1+(f-1)*skip; feval( E.vis, T.t(s(f)), T.stat(s(f),:)', '', T.col(s(f),:)); Mov(:,f) = getframe; drawnow; end case 'play' movie( Mov); case 'qtsave' qtwrite( Mov, jet, arg2) case 'wave' ae( 'ewave'); feval( A.vis, 'wave'); case 'ewave' figure( E.fig(2)); clf; rows = E.ns+E.ni+1; for r = 1:E.ns subplot( rows, 1, r); plot( T.t(1:T.i), T.stat(1:T.i,r), 'b', 'LineWidth', 2); grid( 'on'); set( gca, 'YLim', [E.smin(r),E.smax(r)], 'YTick', round(linspace(E.smin(r),E.smax(r),5)*10)/10); ylabel( E.label(r,:)); end for r = 1:E.ni subplot( rows, 1, E.ns+r); plot( T.t(1:T.i), T.act(1:T.i,r), 'g', 'LineWidth', 2); grid( 'on'); set( gca, 'YLim', [E.imin(r),E.imax(r)], 'YTick', round(linspace(E.imin(r),E.imax(r),5)*10)/10); ylabel( E.label(E.ns+r,:)); end subplot( rows, 1, rows); plot( T.t(1:T.i), T.rew(1:T.i), 'r', 'LineWidth', 2); grid( 'on'); ylabel( 'Reward'); case 'awave' feval( A.vis, 'wave'); % % Misceraneous % case 'save' if nargin < 2, arg2 = 'ae.mat', end save( arg2); case 'load' whos( '-file', arg2); load( arg2); otherwise if isnumeric(arg) % repeat arg times n = arg; elseif ischar(arg) n = str2num(arg); else n = 0; end if n>0 & nargin>=2 for i = 1:n, ae( arg2); end else error( [ ' invalid command ', arg]); end end %%%%