no action applied to my step function in DDPG
2 views (last 30 days)
Show older comments
i dfine a dynamic env by rlFunctionEnv . It includes myResetFunction and myResetFunction
when i set agentOpts.NoiseOptions.Variance = 0 then i see that action argument in myStepFunction is equal to zero
and this means only noise affected to train model and no action apply to stepfunction.
i use disp(action) in step function and i see its only show zero and when i set agentOpts.NoiseOptions.Variance = 0.6 i see action non zero
On the other hand, i set 'SaveExperienceBufferWithAgent',true and 'ResetExperienceBufferBeforeTraining',false
but i can not access data stored in ExperienceBuffer???
My code is:
Ts = 0.05;Tf = 40;
obsInfo = rlNumericSpec([6 1]);
obsInfo.Name = 'Robot States';
obsInfo.Description = 'Theta';
obsInfo.UpperLimit =theta_max;
obsInfo.LowerLimit =theta_min;
actInfo = rlNumericSpec([6 1]);
actInfo.Name = 'Robot Action';
actInfo.Description ='Deta Theta';
actInfo.UpperLimit =Ts*thetaDot_max;
actInfo.LowerLimit =Ts*thetaDot_min;
numObs=obsInfo.Dimension(1) ;
numAcs=actInfo.Dimension(1) ;
ResetHandle = @()myResetFunction();
StepHandle = @(DeltaTheta,LoggedSignals) myStepFunction(DeltaTheta,LoggedSignals);
env = rlFunctionEnv(obsInfo,actInfo,StepHandle,ResetHandle);
rng(0)
statePath = [
featureInputLayer(numObs,'Normalization','none','Name','observation')
fullyConnectedLayer(400,'Name','CriticStateFC1')
reluLayer('Name', 'CriticRelu1')
fullyConnectedLayer(300,'Name','CriticStateFC2')];
actionPath = [
featureInputLayer(numAcs,'Normalization','none','Name','action')
fullyConnectedLayer(300,'Name','CriticActionFC1','BiasLearnRateFactor',0)];
commonPath = [
additionLayer(2,'Name','add')
reluLayer('Name','CriticCommonRelu')
fullyConnectedLayer(1,'Name','CriticOutput')];
criticOpts = rlRepresentationOptions('LearnRate',1e-03,'GradientThreshold',1,'Optimizer',"adam",...
'L2RegularizationFactor',0.0001,'UseDevice',"gpu");
criticNetwork = layerGraph();
criticNetwork = addLayers(criticNetwork,statePath);
criticNetwork = addLayers(criticNetwork,actionPath);
criticNetwork = addLayers(criticNetwork,commonPath);
criticNetwork = connectLayers(criticNetwork,'CriticStateFC2','add/in1');
criticNetwork = connectLayers(criticNetwork,'CriticActionFC1','add/in2');
figure
plot(criticNetwork)
critic = rlQValueRepresentation(criticNetwork,obsInfo,actInfo,'Observation',{'observation'},'Action',{'action'},criticOpts);
actorNetwork = [
featureInputLayer(numObs,'Normalization','none','Name','observation')
fullyConnectedLayer(400,'Name','ActorFC1')
reluLayer('Name','ActorRelu1')
fullyConnectedLayer(300,'Name','ActorFC2')
reluLayer('Name','ActorRelu2')
fullyConnectedLayer(200,'Name','ActorFC3')
reluLayer('Name','ActorRelu3')
fullyConnectedLayer(100,'Name','ActorFC4')
reluLayer('Name','ActorRelu4')
fullyConnectedLayer(6,'Name','ActorFC5')
tanhLayer('Name','ActorTanh')
scalingLayer('Name','ActorScaling','Scale',actInfo.UpperLimit)];
actorOpts = rlRepresentationOptions('LearnRate',1e-04,'GradientThreshold',1,'Optimizer',"adam",...
'L2RegularizationFactor',0.0001,'UseDevice',"gpu");
actor = rlDeterministicActorRepresentation(actorNetwork,obsInfo,actInfo,'Observation',{'observation'},'Action',{'ActorScaling'},actorOpts);
% act = cell2mat(getAction(actor,{rand(6,1)}))
agentOpts = rlDDPGAgentOptions(...
'SampleTime',Ts,...
'TargetSmoothFactor',1e-3,...
'ExperienceBufferLength',1e6,...
'DiscountFactor',0.99,...
'SaveExperienceBufferWithAgent',true,...
'ResetExperienceBufferBeforeTraining',false,...
'MiniBatchSize',128);
agentOpts.NoiseOptions.Variance = 0.6;
agentOpts.NoiseOptions.VarianceDecayRate = 1e-5;
agent = rlDDPGAgent(actor,critic,agentOpts);
maxEpisode =5000;
maxStepsPerEpisode = ceil(Tf/Ts);
trainOpts = rlTrainingOptions(...
'MaxEpisodes',maxEpisode,...
'MaxStepsPerEpisode',maxStepsPerEpisode,...
'ScoreAveragingWindowLength',5,...
'Verbose',true,...
'Plots','training-progress',...
'StopTrainingCriteria','EpisodeReward',...
'StopTrainingValue',-100,...
'SaveAgentCriteria','EpisodeCount',...
'SaveAgentValue',3,...
'SaveAgentDirectory','C:\Users\Asus\Desktop\ROBOT CODE\Robot Agent');
% Train the agent.
trainingStats = train(agent,env,trainOpts);
function [InitialObservation, LoggedSignal] = myResetFunction()
% Reset function to place Robot environment into a random
% initial state.
global robot
Theta = homeConfiguration(robot);
LoggedSignal.State = Theta ;
InitialObservation = LoggedSignal.State;
end
function [NextObs,Reward,IsDone,LoggedSignals] = myStepFunction(DeltaTheta,LoggedSignals)
global robot
global T60d_StartPoint T60d_EndPoint
global worldCollisionArray
global theta_max theta_min thetaDot_max thetaDot_min
global a1 a2 a3 a4 a5 a6 d1 d2 d3 d4 d5 d6
x_tool=0.0395;
y_tool=0;
z_tool= 0.4333;
beta=pi/4;
Reward=0;
Ts = 0.05; % Sample time
for i=1:6
if DeltaTheta(i)>Ts*thetaDot_max(i)
DeltaTheta(i)=Ts*thetaDot_max(i);
elseif DeltaTheta(i)<Ts*thetaDot_min(i)
DeltaTheta(i)=Ts*thetaDot_min(i);
end
end
Theta=LoggedSignals.State(1:6);
Previous_config=Theta;
Theta_nextStep=Theta+Ts*DeltaTheta;
for i=1:6
if Theta_nextStep(i)>theta_max(i)
Theta_nextStep(i)=theta_max(i);
elseif Theta_nextStep(i)<theta_min(i)
Theta_nextStep(i)=theta_min(i);
end
end
LoggedSignals.State=Theta_nextStep;
NextObs = LoggedSignals.State;
configuration = double(Theta_nextStep);
inCollision = checkCollision(robot,configuration,worldCollisionArray,'IgnoreSelfCollision','on');
if inCollision
Reward =-10;
end
theta1=Theta(1);theta2=Theta(2);theta3=Theta(3);
theta4=Theta(4);theta5=Theta(5);theta6=Theta(6);
c1=cos(theta1);s1=sin(theta1);c2=cos(theta2);s2=sin(theta2);c3=cos(theta3);s3=sin(theta3);
c23=cos(theta2+theta3);s23=sin(theta2+theta3);c4=cos(theta4);s4=sin(theta4);
c5=cos(theta5); s5=sin(theta5);c6=cos(theta6); s6=sin(theta6);
EndeffectorTipFrame_Base.X =a1*c1 - d2*s1 - d6*(s5*(s1*s4 + c1*c4*c23) + c1*c5*s23) - z_tool*(s5*(s1*s4 + c1*c4*c23) + c1*c5*s23) + x_tool*(c6*(c5*(s1*s4 + c1*c4*c23) - c1*s5*s23) + s6*(c4*s1 - c1*c23*s4)) - y_tool*(s6*(c5*(s1*s4 + c1*c4*c23) - c1*s5*s23) - c6*(c4*s1 - c1*c23*s4)) + a2*c1*c2 - c1*d4*s23 - a3*c1*s2*s3 + a3*c1*c2*c3;
EndeffectorTipFrame_Base.Y =y_tool*(s6*(c5*(c1*s4 - c4*c23*s1) + s1*s5*s23) - c6*(c1*c4 + c23*s1*s4)) - x_tool*(c6*(c5*(c1*s4 - c4*c23*s1) + s1*s5*s23) + s6*(c1*c4 + c23*s1*s4)) + c1*d2 + a1*s1 + d6*(s5*(c1*s4 - c4*c23*s1) - c5*s1*s23) + z_tool*(s5*(c1*s4 - c4*c23*s1) - c5*s1*s23) + a2*c2*s1 - d4*s1*s23 - a3*s1*s2*s3 + a3*c2*c3*s1;
EndeffectorTipFrame_Base.Z =d1 - z_tool*(c5*c23 - c4*s5*s23) - c23*d4 - a2*s2 - a3*s23 - x_tool*(c6*(c23*s5 + c4*c5*s23) - s4*s6*s23) + y_tool*(s6*(c23*s5 + c4*c5*s23) + c6*s4*s23) - d6*(c5*c23 - c4*s5*s23);
goal.X=T60d_EndPoint(1,4);
goal.Y=T60d_EndPoint(2,4);
goal.Z=T60d_EndPoint(3,4);
distanceVector=[goal.X-EndeffectorTipFrame_Base.X ; goal.Y-EndeffectorTipFrame_Base.Y ;goal.Z-EndeffectorTipFrame_Base.Z ];
error=norm(distanceVector);
% disp(error);
% disp(stepcounter);
disp(DeltaTheta);
hold on
% Endconfig=InversKinematic(T60d_EndPoint ,Previous_config);
% configuration=wrapToPi(configuration);
% Endconfig=wrapToPi(Endconfig);
% error=abs(configuration-Endconfig);
Q=eye(6);
R=(10^-3)*eye(6);
Reward=Reward-error;
% %Check terminal condition.
IsDone=0;
if error<10^-1
disp('yes')
IsDone=1;
end
end
0 Comments
Answers (0)
See Also
Community Treasure Hunt
Find the treasures in MATLAB Central and discover how the community can help you!
Start Hunting!