function [ a, b, c] = stick( t, x, flag, u) % stick: two link stick figure simulator % x = [ th1, th2, om1, om2]': th=0 upright % u = [ torq]: at joint2 % % To be called by ode % [tspan,x(0),opt] = stick( [], [], 'init') % xdot = stick( t, x) % xdot = stick( t, x, '', u) % status = stick( [], [], 'done') % % [Ns,Na,Nm] = stick( 'new'): deafault setup % [xp,sp] = stick( 'pivot'): suggested pivot points for mlqc % [A,B,c] = stick( 'linear', x): local linear model % xdot = A*x + B*u + c % r = stick( t, x, 'reward', u): reward function % [Q,q1,q0] = stick( 'quadra', x): local quadratic reward model % r = q0 + q1'*x - x'*Q*x/2 global E % environment if ischar( t) flag = t; % use first arg as flag else if nargin<3, flag = ''; end % ode standard end switch( flag) case 'new' % Physical parameters % standard parameter E.ns = 4; % state dimension E.ni = 1; % input dimension E.label = char( 'Th1', 'Th2', 'Om1', 'Om2', 'Tq'); % names of variables E.torus = [ 0; 0; 0; 0]; % 1 if the variable is cyclic E.smax = [ pi/2; pi; 6*pi; 6*pi]; % state variable range E.smin = -E.smax; E.rmin = [ -0.5*pi; 0; 0; 0]; % random initial state range E.rmax = [ -0.5*pi; 0.75*pi; 0; 0]; E.imax = 20; % control input range E.imin = -E.imax; %E.tick = repmat( round([-10:10]*pi*10)/10, E.ns, 1); E.dt = 0.01; % time step for interaction E.vt = 0.05; % time step for visualization E.tt = 10.0; % trial length % specific paramteres E.goal = 0.22; % desired com height E.L = [ 0.3, 0.4]; % link lengths E.D = [0.15, 0.2]; % joint to center of mass E.M = [ 4, 1]; % mass E.I = E.M.*E.L.^2/12; % moment E.gc = 9.8; % gravity E.mu = 0.01; % joint viscosity E.kf = 10000; % floor stiffness E.bf = 100; % floor damping % dependent parameters E.I0 = [ E.I(1)+E.M(1)*E.D(1)^2+E.M(2)*E.L(1)^2, 0; % inertia: constant E.I(2)+E.M(2)*E.D(2)^2, E.I(2)+E.M(2)*E.D(2)^2]; E.I1 = [ 1, 1; 1, 0]*E.M(2)*E.L(1)*E.D(2); % inertia: coef of cos2 E.J1 = [ -1, -1, -2; 1, 0, 0]*E.M(2)*E.L(1)*E.D(2); % colioli: coef of sin2 E.G1 = E.gc*[ E.M(1)*E.D(1)+E.M(2)*E.L(1); E.M(2)*E.D(2)]; % gravity: coef of sin1, sin12 E.E1 = E.gc*(E.M(1)*E.D(1)+E.M(2)*E.L(1)+E.M(2)*E.D(2)); % max/min potential energy stick( [], [], 'init'); case 'init' % variables E.state = zeros(E.ns,1); E.input = zeros(E.ni,1); E.t = 0; a = 0:E.dt:E.tt; % default tspan b = E.state; c = []; % ode parameter case 'rand' % random initial state E.state = E.rmin + (E.rmax-E.rmin).*rand(E.ns,1); a = E.state; case 'pivot' % pivot points p1 = stick( 'itip', [ 0.6; 0]); p2 = stick( 'icom', [ 0; 0.2]); a = [ [-pi/2;pi/2], -p1(:,1), -p2(:,1)]; a = [ a, [0;0], -a(:,end:-1:1); zeros(2,7)]; b = repmat( [1/pi;1/pi;0;0], 1, size(a,2)); % prior sharpness case '' % Floor contact y1 = E.L(1)*cos(x(1)); y2 = y1 + E.L(2)*cos(x(1)+x(2)); dy1 = -E.L(1)*sin(x(1))*x(3); dy2 = dy1 - E.L(2)*sin(x(1)+x(2))*(x(3)+x(4)); f1 = (y1<0)*(-E.kf*y1-E.bf*dy1); f2 = (y2<0)*(-E.kf*y2-E.bf*dy2); % Dynamics if nargin<4, u = E.input; end Torq = [ -u-E.L(1)*(f1*sin(x(1))+f2*cos(x(1)+x(2))*sin(x(2))); u-E.L(2)*f2*sin(x(1)+x(2))]; Visc = E.mu*[ x(3)-x(4); x(4)]; Colio = E.J1*sin(x(2))*[ x(3)^2; x(4)^2; x(3)*x(4)]; Grav = E.G1.*[ sin(x(1)); sin(x(1)+x(2))]; Iinv = (E.I0 + E.I1*cos(x(2)))^-1; Aacc = Iinv*( Torq - Visc - Colio + Grav); a = [ x(3:4); Aacc]; % xdot case 'reward' % height of the com com = stick( 'com', x); a = -abs( com(2)-E.goal); case 'linear' % local linear dynamic model % xdot = a*x + b*u + c a = zeros( E.ns); dx = 1e-2; for i=1:E.ns x1 = x; x1(i) = x1(1) - dx; xdot1 = stick( 0, x1, '', 0); x2 = x; x2(i) = x2(1) + dx; xdot2 = stick( 0, x2, '', 0); a(:,i) = (xdot2 - xdot1)/(2*dx); end % input gain: assume input affine b = zeros( E.ns, E.ni); du = 1e-2; for i=1:E.ni xdot1 = stick( 0, x, '', -du); xdot2 = stick( 0, x, '', du); b(:,i) = (xdot2 - xdot1)/(2*du); end % bias c = stick( 0, x, '', 0) - a*x; % C: bias case 'quadra' % local quadratic reward model % r = c + b'*x - x'*a*x/2 d = 1e-2; dx = [ 0, -d, d, 0, 0, -d, d; 0, 0, 0, -d, d, -d, d; zeros( 2, 7)]; for i=1:7 r(i) = stick( 0, x+dx(:,i), 'reward', 0); end b1 = (r(3)-r(2))/d; b2 = (r(5)-r(4))/d; a11 = (r(3)+r(2)-2*r(1))/d^2; a22 = (r(5)+r(4)-2*r(1))/d^2; a12 = ((r(7)+r(6)-2*r(1))/d^2 - (a11+a22))/2; a = -[ a11, a12, 0, 0; a12, a22, 0, 0; zeros(2,4)]; b = [ b1; b2; 0; 0]; c = stick( [], x, 'reward', 0) + 0.5*x'*a*x - b'*x; % bias case 'tip' % tip xy position from joint angles a = [ E.L(1)*sin(x(1,:)) + E.L(2)*sin(x(1,:)+x(2,:)); E.L(1)*cos(x(1,:)) + E.L(2)*cos(x(1,:)+x(2,:))]; case 'itip' % tip xy position to joint angles tht = atan2( x(1), x(2)); th2 = acos( (x(1)^2+x(2)^2-E.L(1)^2-E.L(2)^2)/(2*E.L(1)*E.L(2))); %th0 = acos( (-(x(1)^2+x(2)^2)-E.L(1)^2+E.L(2)^2); th0 = asin( E.L(2)*sin(th2)/sqrt(x(1)^2+x(2)^2)); a = [ tht-th0, tht+th0; th2, -th2]; case 'com' % center of mass xy position from joint angels a = [ ((E.M(1)*E.D(1)+E.M(2)*E.L(1))*sin(x(1)) + E.M(2)*E.D(2)*sin(x(1)+x(2))); ((E.M(1)*E.D(1)+E.M(2)*E.L(1))*cos(x(1)) + E.M(2)*E.D(2)*cos(x(1)+x(2)))]/(E.M(1)+E.M(2)); case 'icom' % center of mass xy position to joint angels md1 = (E.M(1)*E.D(1)+E.M(2)*E.L(1))/(E.M(1)+E.M(2)); md2 = E.M(2)*E.D(2)/(E.M(1)+E.M(2)); thm = atan2( x(1), x(2)); th2 = acos( (x(1)^2+x(2)^2-md1^2-md2^2)/(2*md1*md2)); th0 = asin( md2*sin(th2)/sqrt(x(1)^2+x(2)^2)); %a = [ thm-th0; th2]; a = [ thm-th0, thm+th0; th2, -th2]; otherwise error( [ ' invalid option: ', flag]); end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%