From cef05e4bad918d27afc5fdb3b5da4719b63a376f Mon Sep 17 00:00:00 2001
From: Noname <nepalimsa@>
Date: Sun, 23 Apr 2023 11:35:33 -0400
Subject: [PATCH 1/2] added ppo-ue

---
 rl_games/algos_torch/models.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/rl_games/algos_torch/models.py b/rl_games/algos_torch/models.py
index e6772fc0..287826cf 100644
--- a/rl_games/algos_torch/models.py
+++ b/rl_games/algos_torch/models.py
@@ -276,6 +276,9 @@ def forward(self, input_dict):
                 return result
             else:
                 selected_action = distr.sample()
+                choice = torch.rand_like(selected_action) > 0.02
+                choice = choice.float()
+                selected_action = selected_action * choice + mu * (1 - choice)
                 neglogp = self.neglogp(selected_action, mu, sigma, logstd)
                 result = {
                     'neglogpacs' : torch.squeeze(neglogp),

From 2880bc62172399eac2c1be2a6dbe86ef6fb35b04 Mon Sep 17 00:00:00 2001
From: Noname <nepalimsa@>
Date: Mon, 1 May 2023 19:54:43 -0400
Subject: [PATCH 2/2] updaed const

---
 rl_games/algos_torch/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rl_games/algos_torch/models.py b/rl_games/algos_torch/models.py
index 287826cf..ac234d53 100644
--- a/rl_games/algos_torch/models.py
+++ b/rl_games/algos_torch/models.py
@@ -276,7 +276,7 @@ def forward(self, input_dict):
                 return result
             else:
                 selected_action = distr.sample()
-                choice = torch.rand_like(selected_action) > 0.02
+                choice = torch.rand_like(selected_action) > 0.1
                 choice = choice.float()
                 selected_action = selected_action * choice + mu * (1 - choice)
                 neglogp = self.neglogp(selected_action, mu, sigma, logstd)