Skip to content

Commit

Permalink
-----adding scale as a multiplicative factor
Browse files Browse the repository at this point in the history
  • Loading branch information
manila95 committed Mar 27, 2024
1 parent e31d469 commit b15bcdb
Showing 1 changed file with 8 additions and 5 deletions.
13 changes: 8 additions & 5 deletions cleanrl/ppo_continuous_action_wandb.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,13 +206,15 @@ def __init__(self, envs, risk_size=2, linear_size=64, risk_enc_size=12, risk_act
self.actor_fc1 = layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), linear_size))
self.actor_fc2 = layer_init(nn.Linear(linear_size+risk_enc_size, linear_size))
self.actor_fc3 = layer_init(nn.Linear(linear_size, np.prod(envs.single_action_space.shape)), std=0.01)
self.scale_fc3 = layer_init(nn.Linear(linear_size, np.prod(envs.single_action_space.shape)), std=0.01)
## Critic
self.critic_fc1 = layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), linear_size))
self.critic_fc2 = layer_init(nn.Linear(linear_size+risk_enc_size, linear_size))
self.critic_fc3 = layer_init(nn.Linear(linear_size, 1), std=0.01)

self.actor_logstd = nn.Parameter(torch.zeros(1, np.prod(envs.single_action_space.shape)))
self.tanh = nn.Tanh()
self.sigmoid = nn.Sigmoid()

self.risk_encoder_actor = nn.Sequential(
layer_init(nn.Linear(risk_size, 12)),
Expand All @@ -223,14 +225,14 @@ def __init__(self, envs, risk_size=2, linear_size=64, risk_enc_size=12, risk_act
nn.Tanh())



def forward_actor(self, x, risk):
risk = self.risk_encoder_actor(risk)
x = self.tanh(self.actor_fc1(x))
x = self.tanh(self.actor_fc2(torch.cat([x, risk], axis=1)))
scale = self.sigmoid(self.scale_fc3(x))
x = self.tanh(self.actor_fc3(x))

return x
return x, scale


def get_value(self, x, risk):
Expand All @@ -242,10 +244,11 @@ def get_value(self, x, risk):
return value

def get_action_and_value(self, x, risk, action=None):
action_mean = self.forward_actor(x, risk)
action_mean, scale = self.forward_actor(x, risk)
# print(scale)
action_logstd = self.actor_logstd.expand_as(action_mean)
action_std = torch.exp(action_logstd)
probs = Normal(action_mean, action_std)
probs = Normal(action_mean*scale, action_std*scale)
if action is None:
action = probs.sample()
return action, probs.log_prob(action).sum(1), probs.entropy().sum(1), self.get_value(x, risk)
Expand Down Expand Up @@ -810,7 +813,7 @@ def train(cfg):
writer.add_scalar("Results/Avg_Return", avg_mean_score, global_step)
torch.save(agent.state_dict(), os.path.join(wandb.run.dir, "policy.pt"))
wandb.save("policy.pt")
print(f"cummulative_cost={cum_cost}, global_step={global_step}, episodic_return={avg_mean_score}, episode_cost={ep_cost}")
print(f"cummulative_cost={cum_cost}, global_step={global_step}, episodic_return={avg_mean_score}, episode_cost={ep_cost}, Total Goals={goal_met}")
if cfg.use_risk:
ep_risk = torch.sum(all_risks.squeeze()[last_step:global_step, 0]).item()
cum_risk += ep_risk
Expand Down

0 comments on commit b15bcdb

Please sign in to comment.