From 42b700b708f41befe95786784fa18f026e09f3a2 Mon Sep 17 00:00:00 2001
From: Bidipta Sarkar
Date: Sun, 5 Nov 2023 21:59:15 -0800
Subject: [PATCH] Modify ADAP for sb3_2 compatibility
---
.../pantheonrl.algos.adap.adap_learn.ADAP.rst | 3 +-
...ntheonrl.algos.adap.policies.MultModel.rst | 4 +-
...ntheonrl.algos.adap.util.get_L2_sphere.rst | 4 +-
.../pantheonrl.algos.adap.util.rst | 4 +-
.../pantheonrl.algos.bc.BCShell.rst | 10 +-
...pantheonrl.algos.bc.ConstantLRSchedule.rst | 3 +-
...os.bc.EpochOrBatchIteratorWithProgress.rst | 3 +-
...l.algos.modular.policies.ModularPolicy.rst | 3 +-
...nrl.common.multiagentenv.MultiAgentEnv.rst | 3 +-
...l.common.multiagentenv.SimultaneousEnv.rst | 3 +-
...onrl.common.multiagentenv.TurnBasedEnv.rst | 3 +-
...common.wrappers.SimultaneousFrameStack.rst | 3 +-
...l.common.wrappers.SimultaneousRecorder.rst | 3 +-
...rl.common.wrappers.TurnBasedFrameStack.rst | 3 +-
...onrl.common.wrappers.TurnBasedRecorder.rst | 3 +-
...envs.blockworldgym.blockworld.BlockEnv.rst | 3 +-
...rldgym.simpleblockworld.SimpleBlockEnv.rst | 3 +-
.../pantheonrl.envs.liargym.liar.LiarEnv.rst | 3 +-
...l.envs.pettingzoo.PettingZooAECWrapper.rst | 3 +-
.../pantheonrl.envs.rpsgym.rps.RPSEnv.rst | 3 +-
...pantheonrl.algos.adap.adap_learn.ADAP.html | 177 ++----
.../pantheonrl.algos.adap.adap_learn.html | 3 +-
...pantheonrl.algos.adap.agent.AdapAgent.html | 17 +-
.../pantheonrl.algos.adap.agent.html | 1 +
.../_autosummary/pantheonrl.algos.adap.html | 12 +-
...heonrl.algos.adap.policies.AdapPolicy.html | 53 +-
...rl.algos.adap.policies.AdapPolicyMult.html | 49 +-
...theonrl.algos.adap.policies.MultModel.html | 107 +++-
.../pantheonrl.algos.adap.policies.html | 7 +-
...theonrl.algos.adap.util.get_L2_sphere.html | 46 +-
...eonrl.algos.adap.util.get_categorical.html | 9 +-
...l.algos.adap.util.get_context_kl_loss.html | 3 +-
...rl.algos.adap.util.get_natural_number.html | 2 +-
...l.algos.adap.util.get_positive_square.html | 5 +-
...eonrl.algos.adap.util.get_unit_square.html | 5 +-
.../pantheonrl.algos.adap.util.html | 21 +-
.../_autosummary/pantheonrl.algos.bc.BC.html | 8 +-
.../pantheonrl.algos.bc.BCShell.html | 32 ++
...antheonrl.algos.bc.ConstantLRSchedule.html | 9 +
...s.bc.EpochOrBatchIteratorWithProgress.html | 9 +
.../_autosummary/pantheonrl.algos.bc.html | 7 +-
.../html/_autosummary/pantheonrl.algos.html | 7 +-
.../pantheonrl.algos.modular.html | 8 +-
....algos.modular.learn.ModularAlgorithm.html | 22 +-
.../pantheonrl.algos.modular.learn.html | 1 +
....algos.modular.policies.ModularPolicy.html | 172 ++++--
.../pantheonrl.algos.modular.policies.html | 3 +-
.../html/_autosummary/pantheonrl.common.html | 3 +-
...rl.common.multiagentenv.MultiAgentEnv.html | 32 +-
....common.multiagentenv.SimultaneousEnv.html | 32 +-
...nrl.common.multiagentenv.TurnBasedEnv.html | 32 +-
...ommon.wrappers.SimultaneousFrameStack.html | 32 +-
....common.wrappers.SimultaneousRecorder.html | 34 +-
...l.common.wrappers.TurnBasedFrameStack.html | 32 +-
...nrl.common.wrappers.TurnBasedRecorder.html | 34 +-
...nvs.blockworldgym.blockworld.BlockEnv.html | 32 +-
.../pantheonrl.envs.blockworldgym.html | 4 +-
...ldgym.simpleblockworld.SimpleBlockEnv.html | 32 +-
.../html/_autosummary/pantheonrl.envs.html | 6 +-
.../_autosummary/pantheonrl.envs.liargym.html | 3 +-
.../pantheonrl.envs.liargym.liar.LiarEnv.html | 32 +-
....envs.pettingzoo.PettingZooAECWrapper.html | 32 +-
.../_autosummary/pantheonrl.envs.rpsgym.html | 3 +-
.../pantheonrl.envs.rpsgym.rps.RPSEnv.html | 32 +-
.../build/html/_autosummary/pantheonrl.html | 19 +-
.../pantheonrl/algos/adap/adap_learn.html | 539 +++++++++---------
.../_modules/pantheonrl/algos/adap/agent.html | 146 ++---
.../pantheonrl/algos/adap/policies.html | 355 +++++++-----
.../_modules/pantheonrl/algos/adap/util.html | 124 ++--
.../html/_modules/pantheonrl/algos/bc.html | 207 ++++---
.../pantheonrl/algos/modular/learn.html | 343 +++++++----
.../pantheonrl/algos/modular/policies.html | 497 ++++++++++------
.../pantheonrl/common/multiagentenv.html | 10 +-
.../_modules/pantheonrl/common/trajsaver.html | 2 +-
...theonrl.algos.adap.adap_learn.ADAP.rst.txt | 3 +-
...onrl.algos.adap.policies.MultModel.rst.txt | 4 +-
...onrl.algos.adap.util.get_L2_sphere.rst.txt | 4 +-
.../pantheonrl.algos.adap.util.rst.txt | 4 +-
.../pantheonrl.algos.bc.BCShell.rst.txt | 10 +-
...heonrl.algos.bc.ConstantLRSchedule.rst.txt | 3 +-
...c.EpochOrBatchIteratorWithProgress.rst.txt | 3 +-
...gos.modular.policies.ModularPolicy.rst.txt | 3 +-
...common.multiagentenv.MultiAgentEnv.rst.txt | 3 +-
...mmon.multiagentenv.SimultaneousEnv.rst.txt | 3 +-
....common.multiagentenv.TurnBasedEnv.rst.txt | 3 +-
...on.wrappers.SimultaneousFrameStack.rst.txt | 3 +-
...mmon.wrappers.SimultaneousRecorder.rst.txt | 3 +-
...ommon.wrappers.TurnBasedFrameStack.rst.txt | 3 +-
....common.wrappers.TurnBasedRecorder.rst.txt | 3 +-
....blockworldgym.blockworld.BlockEnv.rst.txt | 3 +-
...ym.simpleblockworld.SimpleBlockEnv.rst.txt | 3 +-
...ntheonrl.envs.liargym.liar.LiarEnv.rst.txt | 3 +-
...vs.pettingzoo.PettingZooAECWrapper.rst.txt | 3 +-
.../pantheonrl.envs.rpsgym.rps.RPSEnv.rst.txt | 3 +-
docs_build/build/html/genindex.html | 80 ++-
docs_build/build/html/objects.inv | Bin 7666 -> 7877 bytes
docs_build/build/html/searchindex.js | 2 +-
src/pantheonrl/algos/__init__.py | 3 +
src/pantheonrl/algos/adap/__init__.py | 5 +
src/pantheonrl/algos/adap/adap_learn.py | 400 ++++++-------
src/pantheonrl/algos/adap/agent.py | 178 +-----
src/pantheonrl/algos/adap/policies.py | 262 +++++----
src/pantheonrl/algos/adap/util.py | 16 +-
src/pantheonrl/algos/bc.py | 3 +-
src/pantheonrl/algos/modular/__init__.py | 5 +
src/pantheonrl/common/__init__.py | 3 +
src/pantheonrl/envs/blockworldgym/__init__.py | 5 +
src/pantheonrl/envs/liargym/__init__.py | 3 +
src/pantheonrl/envs/rpsgym/__init__.py | 3 +
tests/README.org | 22 +
tests/test_adap.py | 30 +-
111 files changed, 2613 insertions(+), 1991 deletions(-)
diff --git a/docs_build/_autosummary/pantheonrl.algos.adap.adap_learn.ADAP.rst b/docs_build/_autosummary/pantheonrl.algos.adap.adap_learn.ADAP.rst
index 85d422e..1296b39 100644
--- a/docs_build/_autosummary/pantheonrl.algos.adap.adap_learn.ADAP.rst
+++ b/docs_build/_autosummary/pantheonrl.algos.adap.adap_learn.ADAP.rst
@@ -1,4 +1,4 @@
-pantheonrl.algos.adap.adap\_learn.ADAP
+pantheonrl.algos.adap.adap\_learn.ADAP
======================================
.. currentmodule:: pantheonrl.algos.adap.adap_learn
@@ -40,7 +40,6 @@ pantheonrl.algos.adap.adap\_learn.ADAP
~ADAP.logger
~ADAP.policy_aliases
- ~ADAP.full_obs_shape
~ADAP.rollout_buffer
~ADAP.policy
~ADAP.observation_space
diff --git a/docs_build/_autosummary/pantheonrl.algos.adap.policies.MultModel.rst b/docs_build/_autosummary/pantheonrl.algos.adap.policies.MultModel.rst
index 1fa8ce4..f619fe7 100644
--- a/docs_build/_autosummary/pantheonrl.algos.adap.policies.MultModel.rst
+++ b/docs_build/_autosummary/pantheonrl.algos.adap.policies.MultModel.rst
@@ -1,4 +1,4 @@
-pantheonrl.algos.adap.policies.MultModel
+pantheonrl.algos.adap.policies.MultModel
========================================
.. currentmodule:: pantheonrl.algos.adap.policies
@@ -32,8 +32,6 @@ pantheonrl.algos.adap.policies.MultModel
~MultModel.forward_critic
~MultModel.get_buffer
~MultModel.get_extra_state
- ~MultModel.get_input_size_excluding_ctx
- ~MultModel.get_input_size_inluding_ctx
~MultModel.get_parameter
~MultModel.get_submodule
~MultModel.half
diff --git a/docs_build/_autosummary/pantheonrl.algos.adap.util.get_L2_sphere.rst b/docs_build/_autosummary/pantheonrl.algos.adap.util.get_L2_sphere.rst
index db95f21..778f650 100644
--- a/docs_build/_autosummary/pantheonrl.algos.adap.util.get_L2_sphere.rst
+++ b/docs_build/_autosummary/pantheonrl.algos.adap.util.get_L2_sphere.rst
@@ -1,6 +1,6 @@
-pantheonrl.algos.adap.util.get\_L2\_sphere
+pantheonrl.algos.adap.util.get\_l2\_sphere
==========================================
.. currentmodule:: pantheonrl.algos.adap.util
-.. autofunction:: get_L2_sphere
\ No newline at end of file
+.. autofunction:: get_l2_sphere
\ No newline at end of file
diff --git a/docs_build/_autosummary/pantheonrl.algos.adap.util.rst b/docs_build/_autosummary/pantheonrl.algos.adap.util.rst
index 7f1ccf1..2d6ad59 100644
--- a/docs_build/_autosummary/pantheonrl.algos.adap.util.rst
+++ b/docs_build/_autosummary/pantheonrl.algos.adap.util.rst
@@ -1,4 +1,4 @@
-pantheonrl.algos.adap.util
+pantheonrl.algos.adap.util
==========================
@@ -19,9 +19,9 @@ pantheonrl.algos.adap.util
:toctree:
:nosignatures:
- get_L2_sphere
get_categorical
get_context_kl_loss
+ get_l2_sphere
get_natural_number
get_positive_square
get_unit_square
diff --git a/docs_build/_autosummary/pantheonrl.algos.bc.BCShell.rst b/docs_build/_autosummary/pantheonrl.algos.bc.BCShell.rst
index 440e2e0..5af1b7c 100644
--- a/docs_build/_autosummary/pantheonrl.algos.bc.BCShell.rst
+++ b/docs_build/_autosummary/pantheonrl.algos.bc.BCShell.rst
@@ -1,4 +1,4 @@
-pantheonrl.algos.bc.BCShell
+pantheonrl.algos.bc.BCShell
===========================
.. currentmodule:: pantheonrl.algos.bc
@@ -16,9 +16,17 @@ pantheonrl.algos.bc.BCShell
.. autosummary::
:nosignatures:
+ ~BCShell.get_policy
+ ~BCShell.set_policy
+ .. rubric:: Attributes
+
+ .. autosummary::
+
+ ~BCShell.policy
+
\ No newline at end of file
diff --git a/docs_build/_autosummary/pantheonrl.algos.bc.ConstantLRSchedule.rst b/docs_build/_autosummary/pantheonrl.algos.bc.ConstantLRSchedule.rst
index 7a3363e..cfe941b 100644
--- a/docs_build/_autosummary/pantheonrl.algos.bc.ConstantLRSchedule.rst
+++ b/docs_build/_autosummary/pantheonrl.algos.bc.ConstantLRSchedule.rst
@@ -1,4 +1,4 @@
-pantheonrl.algos.bc.ConstantLRSchedule
+pantheonrl.algos.bc.ConstantLRSchedule
======================================
.. currentmodule:: pantheonrl.algos.bc
@@ -16,6 +16,7 @@ pantheonrl.algos.bc.ConstantLRSchedule
.. autosummary::
:nosignatures:
+ ~ConstantLRSchedule.set_lr
diff --git a/docs_build/_autosummary/pantheonrl.algos.bc.EpochOrBatchIteratorWithProgress.rst b/docs_build/_autosummary/pantheonrl.algos.bc.EpochOrBatchIteratorWithProgress.rst
index 771fb84..31294a3 100644
--- a/docs_build/_autosummary/pantheonrl.algos.bc.EpochOrBatchIteratorWithProgress.rst
+++ b/docs_build/_autosummary/pantheonrl.algos.bc.EpochOrBatchIteratorWithProgress.rst
@@ -1,4 +1,4 @@
-pantheonrl.algos.bc.EpochOrBatchIteratorWithProgress
+pantheonrl.algos.bc.EpochOrBatchIteratorWithProgress
====================================================
.. currentmodule:: pantheonrl.algos.bc
@@ -16,6 +16,7 @@ pantheonrl.algos.bc.EpochOrBatchIteratorWithProgress
.. autosummary::
:nosignatures:
+ ~EpochOrBatchIteratorWithProgress.set_data_loader
diff --git a/docs_build/_autosummary/pantheonrl.algos.modular.policies.ModularPolicy.rst b/docs_build/_autosummary/pantheonrl.algos.modular.policies.ModularPolicy.rst
index a923d24..265a0fb 100644
--- a/docs_build/_autosummary/pantheonrl.algos.modular.policies.ModularPolicy.rst
+++ b/docs_build/_autosummary/pantheonrl.algos.modular.policies.ModularPolicy.rst
@@ -1,4 +1,4 @@
-pantheonrl.algos.modular.policies.ModularPolicy
+pantheonrl.algos.modular.policies.ModularPolicy
===============================================
.. currentmodule:: pantheonrl.algos.modular.policies
@@ -67,7 +67,6 @@ pantheonrl.algos.modular.policies.ModularPolicy
~ModularPolicy.register_parameter
~ModularPolicy.register_state_dict_pre_hook
~ModularPolicy.requires_grad_
- ~ModularPolicy.reset_noise
~ModularPolicy.save
~ModularPolicy.scale_action
~ModularPolicy.set_extra_state
diff --git a/docs_build/_autosummary/pantheonrl.common.multiagentenv.MultiAgentEnv.rst b/docs_build/_autosummary/pantheonrl.common.multiagentenv.MultiAgentEnv.rst
index 98e1e45..8a5fb62 100644
--- a/docs_build/_autosummary/pantheonrl.common.multiagentenv.MultiAgentEnv.rst
+++ b/docs_build/_autosummary/pantheonrl.common.multiagentenv.MultiAgentEnv.rst
@@ -1,4 +1,4 @@
-pantheonrl.common.multiagentenv.MultiAgentEnv
+pantheonrl.common.multiagentenv.MultiAgentEnv
=============================================
.. currentmodule:: pantheonrl.common.multiagentenv
@@ -25,6 +25,7 @@ pantheonrl.common.multiagentenv.MultiAgentEnv
~MultiAgentEnv.n_reset
~MultiAgentEnv.n_step
~MultiAgentEnv.render
+ ~MultiAgentEnv.resample_null
~MultiAgentEnv.resample_random
~MultiAgentEnv.resample_round_robin
~MultiAgentEnv.reset
diff --git a/docs_build/_autosummary/pantheonrl.common.multiagentenv.SimultaneousEnv.rst b/docs_build/_autosummary/pantheonrl.common.multiagentenv.SimultaneousEnv.rst
index 4b4718a..d3edfc2 100644
--- a/docs_build/_autosummary/pantheonrl.common.multiagentenv.SimultaneousEnv.rst
+++ b/docs_build/_autosummary/pantheonrl.common.multiagentenv.SimultaneousEnv.rst
@@ -1,4 +1,4 @@
-pantheonrl.common.multiagentenv.SimultaneousEnv
+pantheonrl.common.multiagentenv.SimultaneousEnv
===============================================
.. currentmodule:: pantheonrl.common.multiagentenv
@@ -27,6 +27,7 @@ pantheonrl.common.multiagentenv.SimultaneousEnv
~SimultaneousEnv.n_reset
~SimultaneousEnv.n_step
~SimultaneousEnv.render
+ ~SimultaneousEnv.resample_null
~SimultaneousEnv.resample_random
~SimultaneousEnv.resample_round_robin
~SimultaneousEnv.reset
diff --git a/docs_build/_autosummary/pantheonrl.common.multiagentenv.TurnBasedEnv.rst b/docs_build/_autosummary/pantheonrl.common.multiagentenv.TurnBasedEnv.rst
index 73d80b8..154abad 100644
--- a/docs_build/_autosummary/pantheonrl.common.multiagentenv.TurnBasedEnv.rst
+++ b/docs_build/_autosummary/pantheonrl.common.multiagentenv.TurnBasedEnv.rst
@@ -1,4 +1,4 @@
-pantheonrl.common.multiagentenv.TurnBasedEnv
+pantheonrl.common.multiagentenv.TurnBasedEnv
============================================
.. currentmodule:: pantheonrl.common.multiagentenv
@@ -28,6 +28,7 @@ pantheonrl.common.multiagentenv.TurnBasedEnv
~TurnBasedEnv.n_reset
~TurnBasedEnv.n_step
~TurnBasedEnv.render
+ ~TurnBasedEnv.resample_null
~TurnBasedEnv.resample_random
~TurnBasedEnv.resample_round_robin
~TurnBasedEnv.reset
diff --git a/docs_build/_autosummary/pantheonrl.common.wrappers.SimultaneousFrameStack.rst b/docs_build/_autosummary/pantheonrl.common.wrappers.SimultaneousFrameStack.rst
index 675e405..a7a1827 100644
--- a/docs_build/_autosummary/pantheonrl.common.wrappers.SimultaneousFrameStack.rst
+++ b/docs_build/_autosummary/pantheonrl.common.wrappers.SimultaneousFrameStack.rst
@@ -1,4 +1,4 @@
-pantheonrl.common.wrappers.SimultaneousFrameStack
+pantheonrl.common.wrappers.SimultaneousFrameStack
=================================================
.. currentmodule:: pantheonrl.common.wrappers
@@ -27,6 +27,7 @@ pantheonrl.common.wrappers.SimultaneousFrameStack
~SimultaneousFrameStack.n_reset
~SimultaneousFrameStack.n_step
~SimultaneousFrameStack.render
+ ~SimultaneousFrameStack.resample_null
~SimultaneousFrameStack.resample_random
~SimultaneousFrameStack.resample_round_robin
~SimultaneousFrameStack.reset
diff --git a/docs_build/_autosummary/pantheonrl.common.wrappers.SimultaneousRecorder.rst b/docs_build/_autosummary/pantheonrl.common.wrappers.SimultaneousRecorder.rst
index bcf183a..97a6ba4 100644
--- a/docs_build/_autosummary/pantheonrl.common.wrappers.SimultaneousRecorder.rst
+++ b/docs_build/_autosummary/pantheonrl.common.wrappers.SimultaneousRecorder.rst
@@ -1,4 +1,4 @@
-pantheonrl.common.wrappers.SimultaneousRecorder
+pantheonrl.common.wrappers.SimultaneousRecorder
===============================================
.. currentmodule:: pantheonrl.common.wrappers
@@ -28,6 +28,7 @@ pantheonrl.common.wrappers.SimultaneousRecorder
~SimultaneousRecorder.n_reset
~SimultaneousRecorder.n_step
~SimultaneousRecorder.render
+ ~SimultaneousRecorder.resample_null
~SimultaneousRecorder.resample_random
~SimultaneousRecorder.resample_round_robin
~SimultaneousRecorder.reset
diff --git a/docs_build/_autosummary/pantheonrl.common.wrappers.TurnBasedFrameStack.rst b/docs_build/_autosummary/pantheonrl.common.wrappers.TurnBasedFrameStack.rst
index de3281d..872318a 100644
--- a/docs_build/_autosummary/pantheonrl.common.wrappers.TurnBasedFrameStack.rst
+++ b/docs_build/_autosummary/pantheonrl.common.wrappers.TurnBasedFrameStack.rst
@@ -1,4 +1,4 @@
-pantheonrl.common.wrappers.TurnBasedFrameStack
+pantheonrl.common.wrappers.TurnBasedFrameStack
==============================================
.. currentmodule:: pantheonrl.common.wrappers
@@ -28,6 +28,7 @@ pantheonrl.common.wrappers.TurnBasedFrameStack
~TurnBasedFrameStack.n_reset
~TurnBasedFrameStack.n_step
~TurnBasedFrameStack.render
+ ~TurnBasedFrameStack.resample_null
~TurnBasedFrameStack.resample_random
~TurnBasedFrameStack.resample_round_robin
~TurnBasedFrameStack.reset
diff --git a/docs_build/_autosummary/pantheonrl.common.wrappers.TurnBasedRecorder.rst b/docs_build/_autosummary/pantheonrl.common.wrappers.TurnBasedRecorder.rst
index 04ce8b1..2e45f86 100644
--- a/docs_build/_autosummary/pantheonrl.common.wrappers.TurnBasedRecorder.rst
+++ b/docs_build/_autosummary/pantheonrl.common.wrappers.TurnBasedRecorder.rst
@@ -1,4 +1,4 @@
-pantheonrl.common.wrappers.TurnBasedRecorder
+pantheonrl.common.wrappers.TurnBasedRecorder
============================================
.. currentmodule:: pantheonrl.common.wrappers
@@ -29,6 +29,7 @@ pantheonrl.common.wrappers.TurnBasedRecorder
~TurnBasedRecorder.n_reset
~TurnBasedRecorder.n_step
~TurnBasedRecorder.render
+ ~TurnBasedRecorder.resample_null
~TurnBasedRecorder.resample_random
~TurnBasedRecorder.resample_round_robin
~TurnBasedRecorder.reset
diff --git a/docs_build/_autosummary/pantheonrl.envs.blockworldgym.blockworld.BlockEnv.rst b/docs_build/_autosummary/pantheonrl.envs.blockworldgym.blockworld.BlockEnv.rst
index b6adb9f..db99c20 100644
--- a/docs_build/_autosummary/pantheonrl.envs.blockworldgym.blockworld.BlockEnv.rst
+++ b/docs_build/_autosummary/pantheonrl.envs.blockworldgym.blockworld.BlockEnv.rst
@@ -1,4 +1,4 @@
-pantheonrl.envs.blockworldgym.blockworld.BlockEnv
+pantheonrl.envs.blockworldgym.blockworld.BlockEnv
=================================================
.. currentmodule:: pantheonrl.envs.blockworldgym.blockworld
@@ -28,6 +28,7 @@ pantheonrl.envs.blockworldgym.blockworld.BlockEnv
~BlockEnv.n_reset
~BlockEnv.n_step
~BlockEnv.render
+ ~BlockEnv.resample_null
~BlockEnv.resample_random
~BlockEnv.resample_round_robin
~BlockEnv.reset
diff --git a/docs_build/_autosummary/pantheonrl.envs.blockworldgym.simpleblockworld.SimpleBlockEnv.rst b/docs_build/_autosummary/pantheonrl.envs.blockworldgym.simpleblockworld.SimpleBlockEnv.rst
index 9792f9e..36e107e 100644
--- a/docs_build/_autosummary/pantheonrl.envs.blockworldgym.simpleblockworld.SimpleBlockEnv.rst
+++ b/docs_build/_autosummary/pantheonrl.envs.blockworldgym.simpleblockworld.SimpleBlockEnv.rst
@@ -1,4 +1,4 @@
-pantheonrl.envs.blockworldgym.simpleblockworld.SimpleBlockEnv
+pantheonrl.envs.blockworldgym.simpleblockworld.SimpleBlockEnv
=============================================================
.. currentmodule:: pantheonrl.envs.blockworldgym.simpleblockworld
@@ -28,6 +28,7 @@ pantheonrl.envs.blockworldgym.simpleblockworld.SimpleBlockEnv
~SimpleBlockEnv.n_reset
~SimpleBlockEnv.n_step
~SimpleBlockEnv.render
+ ~SimpleBlockEnv.resample_null
~SimpleBlockEnv.resample_random
~SimpleBlockEnv.resample_round_robin
~SimpleBlockEnv.reset
diff --git a/docs_build/_autosummary/pantheonrl.envs.liargym.liar.LiarEnv.rst b/docs_build/_autosummary/pantheonrl.envs.liargym.liar.LiarEnv.rst
index acea78f..55816de 100644
--- a/docs_build/_autosummary/pantheonrl.envs.liargym.liar.LiarEnv.rst
+++ b/docs_build/_autosummary/pantheonrl.envs.liargym.liar.LiarEnv.rst
@@ -1,4 +1,4 @@
-pantheonrl.envs.liargym.liar.LiarEnv
+pantheonrl.envs.liargym.liar.LiarEnv
====================================
.. currentmodule:: pantheonrl.envs.liargym.liar
@@ -28,6 +28,7 @@ pantheonrl.envs.liargym.liar.LiarEnv
~LiarEnv.n_reset
~LiarEnv.n_step
~LiarEnv.render
+ ~LiarEnv.resample_null
~LiarEnv.resample_random
~LiarEnv.resample_round_robin
~LiarEnv.reset
diff --git a/docs_build/_autosummary/pantheonrl.envs.pettingzoo.PettingZooAECWrapper.rst b/docs_build/_autosummary/pantheonrl.envs.pettingzoo.PettingZooAECWrapper.rst
index a3a5f3a..51331b7 100644
--- a/docs_build/_autosummary/pantheonrl.envs.pettingzoo.PettingZooAECWrapper.rst
+++ b/docs_build/_autosummary/pantheonrl.envs.pettingzoo.PettingZooAECWrapper.rst
@@ -1,4 +1,4 @@
-pantheonrl.envs.pettingzoo.PettingZooAECWrapper
+pantheonrl.envs.pettingzoo.PettingZooAECWrapper
===============================================
.. currentmodule:: pantheonrl.envs.pettingzoo
@@ -25,6 +25,7 @@ pantheonrl.envs.pettingzoo.PettingZooAECWrapper
~PettingZooAECWrapper.n_reset
~PettingZooAECWrapper.n_step
~PettingZooAECWrapper.render
+ ~PettingZooAECWrapper.resample_null
~PettingZooAECWrapper.resample_random
~PettingZooAECWrapper.resample_round_robin
~PettingZooAECWrapper.reset
diff --git a/docs_build/_autosummary/pantheonrl.envs.rpsgym.rps.RPSEnv.rst b/docs_build/_autosummary/pantheonrl.envs.rpsgym.rps.RPSEnv.rst
index 50d2996..cd0b1bc 100644
--- a/docs_build/_autosummary/pantheonrl.envs.rpsgym.rps.RPSEnv.rst
+++ b/docs_build/_autosummary/pantheonrl.envs.rpsgym.rps.RPSEnv.rst
@@ -1,4 +1,4 @@
-pantheonrl.envs.rpsgym.rps.RPSEnv
+pantheonrl.envs.rpsgym.rps.RPSEnv
=================================
.. currentmodule:: pantheonrl.envs.rpsgym.rps
@@ -27,6 +27,7 @@ pantheonrl.envs.rpsgym.rps.RPSEnv
~RPSEnv.n_reset
~RPSEnv.n_step
~RPSEnv.render
+ ~RPSEnv.resample_null
~RPSEnv.resample_random
~RPSEnv.resample_round_robin
~RPSEnv.reset
diff --git a/docs_build/build/html/_autosummary/pantheonrl.algos.adap.adap_learn.ADAP.html b/docs_build/build/html/_autosummary/pantheonrl.algos.adap.adap_learn.ADAP.html
index cf933a4..2a48cb4 100644
--- a/docs_build/build/html/_autosummary/pantheonrl.algos.adap.adap_learn.ADAP.html
+++ b/docs_build/build/html/_autosummary/pantheonrl.algos.adap.adap_learn.ADAP.html
@@ -99,84 +99,14 @@
pantheonrl.algos.adap.adap_learn.ADAP
-
-class ADAP(policy, env, learning_rate=0.0003, n_steps=2048, batch_size=64, n_epochs=10, gamma=0.99, gae_lambda=0.95, clip_range=0.2, clip_range_vf=None, ent_coef=0.0, vf_coef=0.5, max_grad_norm=0.5, use_sde=False, sde_sample_freq=-1, target_kl=None, tensorboard_log=None, create_eval_env=False, policy_kwargs=None, verbose=0, seed=None, device='auto', _init_setup_model=True, context_loss_coeff=0.1, context_size=3, num_context_samples=5, context_sampler='l2', num_state_samples=32)[source]
+class ADAP(policy, env, learning_rate=0.0003, n_steps=2048, batch_size=64, n_epochs=10, gamma=0.99, gae_lambda=0.95, clip_range=0.2, clip_range_vf=None, normalize_advantage=True, ent_coef=0.0, vf_coef=0.5, max_grad_norm=0.5, use_sde=False, sde_sample_freq=-1, target_kl=None, stats_window_size=100, tensorboard_log=None, policy_kwargs=None, verbose=0, seed=None, device='auto', _init_setup_model=True, context_loss_coeff=0.1, context_size=3, num_context_samples=5, context_sampler='l2', num_state_samples=32)[source]
Bases: OnPolicyAlgorithm
-Borrows from Proximal Policy Optimization algorithm (PPO) (clip version)
-Paper: https://arxiv.org/abs/1707.06347
-Code: This implementation borrows code from OpenAI Spinning Up
-(https://github.com/openai/spinningup/)
-https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail and
-and Stable Baselines (PPO2 from https://github.com/hill-a/stable-baselines)
-Introduction to PPO:
-https://spinningup.openai.com/en/latest/algorithms/ppo.html
-:param policy: The policy model to use (MlpPolicy, CnnPolicy, …)
-:param env: The environment to learn from
-
-(if registered in Gym, can be str)
-
-
-- Parameters:
-
-learning_rate (float | Callable[[float], float]) – The learning rate, it can be a function
-of the current progress remaining (from 1 to 0)
-n_steps (int) – The number of steps to run for each environment per update
-(i.e. rollout buffer size is n_steps * n_envs where n_envs is number of
-environment copies running in parallel)
-NOTE: n_steps * n_envs must be greater than 1 (because of the advantage
-normalization) See https://github.com/pytorch/pytorch/issues/29372
-batch_size (int) – Minibatch size
-n_epochs (int) – Number of epoch when optimizing the surrogate loss
-gamma (float) – Discount factor
-gae_lambda (float) – Factor for trade-off of bias vs variance for Generalized
-Advantage Estimator
-clip_range (float | Callable[[float], float]) – Clipping parameter, it can be a function of the current
-progress remaining (from 1 to 0).
-clip_range_vf (None | float | Callable[[float], float]) – Clipping parameter for the value function,
-it can be a function of the current progress remaining (from 1 to 0).
-This is a parameter specific to the OpenAI implementation. If None is
-passed (default), no clipping will be done on the value function.
-IMPORTANT: this clipping depends on the reward scaling.
-ent_coef (float) – Entropy coefficient for the loss calculation
-vf_coef (float) – Value function coefficient for the loss calculation
-max_grad_norm (float) – The maximum value for the gradient clipping
-use_sde (bool) – Whether to use generalized State Dependent Exploration
-(gSDE) instead of action noise exploration (default: False)
-sde_sample_freq (int) – Sample a new noise matrix every n steps when using
-gSDE
-Default: -1 (only sample at the beginning of the rollout)
-target_kl (float | None) – Limit the KL divergence between updates,
-because the clipping is not enough to prevent large update
-see issue #213
-(cf https://github.com/hill-a/stable-baselines/issues/213)
-By default, there is no limit on the kl div.
-tensorboard_log (str | None) – the log location for tensorboard
-(if None, no logging)
-create_eval_env (bool) – Whether to create a second environment that will be
-used for evaluating the agent periodically. (Only available when
-passing string for the environment)
-policy_kwargs (Dict[str, Any] | None) – additional arguments to be passed to the policy on
-creation
-verbose (int) – the verbosity level: 0 no output, 1 info, 2 debug
-seed (int | None) – Seed for the pseudo random generators
-device (device | str) – Device (cpu, cuda, …) on which the code should be run.
-Setting it to auto, the code will be run on the GPU if possible.
-_init_setup_model (bool) – Whether or not to build the network at the
-creation of the instance
-policy (ActorCriticPolicy) –
-env (Env | VecEnv | str) –
-context_loss_coeff (float) –
-context_size (int) –
-num_context_samples (int) –
-context_sampler (str) –
-num_state_samples (int) –
-
-
-
+Borrows from Proximal Policy Optimization algorithm (PPO) (clip version)
Methods
collect_rollouts
|
-Nearly identical to OnPolicyAlgorithm's collect_rollouts, but it also resamples the context every episode. |
+Collect rollouts using the current policy and fill a RolloutBuffer. |
get_env
|
Returns the current environment (can be None if not defined). |
@@ -200,7 +130,7 @@ pantheonrl.algos.adap.adap_learn.ADAPset_env
|
-Checks the validity of the environment, and if it is coherent, set it as the current environment. |
+Set the env to use |
set_logger
|
Setter for for logger object. |
@@ -225,51 +155,75 @@ pantheonrl.algos.adap.adap_learn.ADAPpolicy_aliases
|
|
-full_obs_shape
|
- |
-
-rollout_buffer
|
+
rollout_buffer
|
|
-policy
|
+
policy
|
|
-observation_space
|
+
observation_space
|
|
-action_space
|
+
action_space
|
|
-n_envs
|
+
n_envs
|
|
-lr_schedule
|
+
lr_schedule
|
|
+
+- Parameters:
+
+policy (ActorCriticPolicy) –
+env (Env | VecEnv | str) –
+learning_rate (float | Callable[[float], float]) –
+n_steps (int) –
+batch_size (int) –
+n_epochs (int) –
+gamma (float) –
+gae_lambda (float) –
+clip_range (float | Callable[[float], float]) –
+clip_range_vf (None | float | Callable[[float], float]) –
+normalize_advantage (bool) –
+ent_coef (float) –
+vf_coef (float) –
+max_grad_norm (float) –
+use_sde (bool) –
+sde_sample_freq (int) –
+target_kl (float | None) –
+stats_window_size (int) –
+tensorboard_log (str | None) –
+policy_kwargs (Dict[str, Any] | None) –
+verbose (int) –
+seed (int | None) –
+device (device | str) –
+_init_setup_model (bool) –
+context_loss_coeff (float) –
+context_size (int) –
+num_context_samples (int) –
+context_sampler (str) –
+num_state_samples (int) –
+
+
+
-
collect_rollouts(env, callback, rollout_buffer, n_rollout_steps)[source]
-Nearly identical to OnPolicyAlgorithm’s collect_rollouts, but it also
-resamples the context every episode.
-Collect experiences using the current policy and fill a
-RolloutBuffer
.
+
Collect rollouts using the current policy and fill a RolloutBuffer.
The term rollout here refers to the model-free notion and should not
-be used with the concept of rollout used in model-based RL or planning.
-:param env: The training environment
-:param callback: Callback that will be called at each step
-
-(and at the beginning and end of the rollout)
-
+be used with the concept of rollout used in model-based RL or planning.
- Parameters:
+env (VecEnv) – The training environment
+callback (BaseCallback) – Callback that will be called at each step
+(and at the beginning and end of the rollout)
rollout_buffer (RolloutBuffer) – Buffer to fill with rollouts
-n_steps – Number of experiences to collect per environment
-env (VecEnv) –
-callback (BaseCallback) –
-n_rollout_steps (int) –
+n_rollout_steps (int) – Number of experiences to collect per env
- Returns:
@@ -328,7 +282,7 @@ pantheonrl.algos.adap.adap_learn.ADAP[source]
Return a trained model.
- Parameters:
@@ -338,19 +292,12 @@ pantheonrl.algos.adap.adap_learn.ADAP
@@ -443,22 +390,8 @@ pantheonrl.algos.adap.adap_learn.ADAP