From cef05e4bad918d27afc5fdb3b5da4719b63a376f Mon Sep 17 00:00:00 2001 From: Noname Date: Sun, 23 Apr 2023 11:35:33 -0400 Subject: [PATCH 1/2] added ppo-ue --- rl_games/algos_torch/models.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/rl_games/algos_torch/models.py b/rl_games/algos_torch/models.py index e6772fc0..287826cf 100644 --- a/rl_games/algos_torch/models.py +++ b/rl_games/algos_torch/models.py @@ -276,6 +276,9 @@ def forward(self, input_dict): return result else: selected_action = distr.sample() + choice = torch.rand_like(selected_action) > 0.02 + choice = choice.float() + selected_action = selected_action * choice + mu * (1 - choice) neglogp = self.neglogp(selected_action, mu, sigma, logstd) result = { 'neglogpacs' : torch.squeeze(neglogp), From 2880bc62172399eac2c1be2a6dbe86ef6fb35b04 Mon Sep 17 00:00:00 2001 From: Noname Date: Mon, 1 May 2023 19:54:43 -0400 Subject: [PATCH 2/2] updaed const --- rl_games/algos_torch/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rl_games/algos_torch/models.py b/rl_games/algos_torch/models.py index 287826cf..ac234d53 100644 --- a/rl_games/algos_torch/models.py +++ b/rl_games/algos_torch/models.py @@ -276,7 +276,7 @@ def forward(self, input_dict): return result else: selected_action = distr.sample() - choice = torch.rand_like(selected_action) > 0.02 + choice = torch.rand_like(selected_action) > 0.1 choice = choice.float() selected_action = selected_action * choice + mu * (1 - choice) neglogp = self.neglogp(selected_action, mu, sigma, logstd)