From 42b700b708f41befe95786784fa18f026e09f3a2 Mon Sep 17 00:00:00 2001 From: Bidipta Sarkar Date: Sun, 5 Nov 2023 21:59:15 -0800 Subject: [PATCH] Modify ADAP for sb3_2 compatibility --- .../pantheonrl.algos.adap.adap_learn.ADAP.rst | 3 +- ...ntheonrl.algos.adap.policies.MultModel.rst | 4 +- ...ntheonrl.algos.adap.util.get_L2_sphere.rst | 4 +- .../pantheonrl.algos.adap.util.rst | 4 +- .../pantheonrl.algos.bc.BCShell.rst | 10 +- ...pantheonrl.algos.bc.ConstantLRSchedule.rst | 3 +- ...os.bc.EpochOrBatchIteratorWithProgress.rst | 3 +- ...l.algos.modular.policies.ModularPolicy.rst | 3 +- ...nrl.common.multiagentenv.MultiAgentEnv.rst | 3 +- ...l.common.multiagentenv.SimultaneousEnv.rst | 3 +- ...onrl.common.multiagentenv.TurnBasedEnv.rst | 3 +- ...common.wrappers.SimultaneousFrameStack.rst | 3 +- ...l.common.wrappers.SimultaneousRecorder.rst | 3 +- ...rl.common.wrappers.TurnBasedFrameStack.rst | 3 +- ...onrl.common.wrappers.TurnBasedRecorder.rst | 3 +- ...envs.blockworldgym.blockworld.BlockEnv.rst | 3 +- ...rldgym.simpleblockworld.SimpleBlockEnv.rst | 3 +- .../pantheonrl.envs.liargym.liar.LiarEnv.rst | 3 +- ...l.envs.pettingzoo.PettingZooAECWrapper.rst | 3 +- .../pantheonrl.envs.rpsgym.rps.RPSEnv.rst | 3 +- ...pantheonrl.algos.adap.adap_learn.ADAP.html | 177 ++---- .../pantheonrl.algos.adap.adap_learn.html | 3 +- ...pantheonrl.algos.adap.agent.AdapAgent.html | 17 +- .../pantheonrl.algos.adap.agent.html | 1 + .../_autosummary/pantheonrl.algos.adap.html | 12 +- ...heonrl.algos.adap.policies.AdapPolicy.html | 53 +- ...rl.algos.adap.policies.AdapPolicyMult.html | 49 +- ...theonrl.algos.adap.policies.MultModel.html | 107 +++- .../pantheonrl.algos.adap.policies.html | 7 +- ...theonrl.algos.adap.util.get_L2_sphere.html | 46 +- ...eonrl.algos.adap.util.get_categorical.html | 9 +- ...l.algos.adap.util.get_context_kl_loss.html | 3 +- ...rl.algos.adap.util.get_natural_number.html | 2 +- ...l.algos.adap.util.get_positive_square.html | 5 +- ...eonrl.algos.adap.util.get_unit_square.html | 5 +- .../pantheonrl.algos.adap.util.html | 21 +- .../_autosummary/pantheonrl.algos.bc.BC.html | 8 +- .../pantheonrl.algos.bc.BCShell.html | 32 ++ ...antheonrl.algos.bc.ConstantLRSchedule.html | 9 + ...s.bc.EpochOrBatchIteratorWithProgress.html | 9 + .../_autosummary/pantheonrl.algos.bc.html | 7 +- .../html/_autosummary/pantheonrl.algos.html | 7 +- .../pantheonrl.algos.modular.html | 8 +- ....algos.modular.learn.ModularAlgorithm.html | 22 +- .../pantheonrl.algos.modular.learn.html | 1 + ....algos.modular.policies.ModularPolicy.html | 172 ++++-- .../pantheonrl.algos.modular.policies.html | 3 +- .../html/_autosummary/pantheonrl.common.html | 3 +- ...rl.common.multiagentenv.MultiAgentEnv.html | 32 +- ....common.multiagentenv.SimultaneousEnv.html | 32 +- ...nrl.common.multiagentenv.TurnBasedEnv.html | 32 +- ...ommon.wrappers.SimultaneousFrameStack.html | 32 +- ....common.wrappers.SimultaneousRecorder.html | 34 +- ...l.common.wrappers.TurnBasedFrameStack.html | 32 +- ...nrl.common.wrappers.TurnBasedRecorder.html | 34 +- ...nvs.blockworldgym.blockworld.BlockEnv.html | 32 +- .../pantheonrl.envs.blockworldgym.html | 4 +- ...ldgym.simpleblockworld.SimpleBlockEnv.html | 32 +- .../html/_autosummary/pantheonrl.envs.html | 6 +- .../_autosummary/pantheonrl.envs.liargym.html | 3 +- .../pantheonrl.envs.liargym.liar.LiarEnv.html | 32 +- ....envs.pettingzoo.PettingZooAECWrapper.html | 32 +- .../_autosummary/pantheonrl.envs.rpsgym.html | 3 +- .../pantheonrl.envs.rpsgym.rps.RPSEnv.html | 32 +- .../build/html/_autosummary/pantheonrl.html | 19 +- .../pantheonrl/algos/adap/adap_learn.html | 539 +++++++++--------- .../_modules/pantheonrl/algos/adap/agent.html | 146 ++--- .../pantheonrl/algos/adap/policies.html | 355 +++++++----- .../_modules/pantheonrl/algos/adap/util.html | 124 ++-- .../html/_modules/pantheonrl/algos/bc.html | 207 ++++--- .../pantheonrl/algos/modular/learn.html | 343 +++++++---- .../pantheonrl/algos/modular/policies.html | 497 ++++++++++------ .../pantheonrl/common/multiagentenv.html | 10 +- .../_modules/pantheonrl/common/trajsaver.html | 2 +- ...theonrl.algos.adap.adap_learn.ADAP.rst.txt | 3 +- ...onrl.algos.adap.policies.MultModel.rst.txt | 4 +- ...onrl.algos.adap.util.get_L2_sphere.rst.txt | 4 +- .../pantheonrl.algos.adap.util.rst.txt | 4 +- .../pantheonrl.algos.bc.BCShell.rst.txt | 10 +- ...heonrl.algos.bc.ConstantLRSchedule.rst.txt | 3 +- ...c.EpochOrBatchIteratorWithProgress.rst.txt | 3 +- ...gos.modular.policies.ModularPolicy.rst.txt | 3 +- ...common.multiagentenv.MultiAgentEnv.rst.txt | 3 +- ...mmon.multiagentenv.SimultaneousEnv.rst.txt | 3 +- ....common.multiagentenv.TurnBasedEnv.rst.txt | 3 +- ...on.wrappers.SimultaneousFrameStack.rst.txt | 3 +- ...mmon.wrappers.SimultaneousRecorder.rst.txt | 3 +- ...ommon.wrappers.TurnBasedFrameStack.rst.txt | 3 +- ....common.wrappers.TurnBasedRecorder.rst.txt | 3 +- ....blockworldgym.blockworld.BlockEnv.rst.txt | 3 +- ...ym.simpleblockworld.SimpleBlockEnv.rst.txt | 3 +- ...ntheonrl.envs.liargym.liar.LiarEnv.rst.txt | 3 +- ...vs.pettingzoo.PettingZooAECWrapper.rst.txt | 3 +- .../pantheonrl.envs.rpsgym.rps.RPSEnv.rst.txt | 3 +- docs_build/build/html/genindex.html | 80 ++- docs_build/build/html/objects.inv | Bin 7666 -> 7877 bytes docs_build/build/html/searchindex.js | 2 +- src/pantheonrl/algos/__init__.py | 3 + src/pantheonrl/algos/adap/__init__.py | 5 + src/pantheonrl/algos/adap/adap_learn.py | 400 ++++++------- src/pantheonrl/algos/adap/agent.py | 178 +----- src/pantheonrl/algos/adap/policies.py | 262 +++++---- src/pantheonrl/algos/adap/util.py | 16 +- src/pantheonrl/algos/bc.py | 3 +- src/pantheonrl/algos/modular/__init__.py | 5 + src/pantheonrl/common/__init__.py | 3 + src/pantheonrl/envs/blockworldgym/__init__.py | 5 + src/pantheonrl/envs/liargym/__init__.py | 3 + src/pantheonrl/envs/rpsgym/__init__.py | 3 + tests/README.org | 22 + tests/test_adap.py | 30 +- 111 files changed, 2613 insertions(+), 1991 deletions(-) diff --git a/docs_build/_autosummary/pantheonrl.algos.adap.adap_learn.ADAP.rst b/docs_build/_autosummary/pantheonrl.algos.adap.adap_learn.ADAP.rst index 85d422e..1296b39 100644 --- a/docs_build/_autosummary/pantheonrl.algos.adap.adap_learn.ADAP.rst +++ b/docs_build/_autosummary/pantheonrl.algos.adap.adap_learn.ADAP.rst @@ -1,4 +1,4 @@ -pantheonrl.algos.adap.adap\_learn.ADAP +pantheonrl.algos.adap.adap\_learn.ADAP ====================================== .. currentmodule:: pantheonrl.algos.adap.adap_learn @@ -40,7 +40,6 @@ pantheonrl.algos.adap.adap\_learn.ADAP ~ADAP.logger ~ADAP.policy_aliases - ~ADAP.full_obs_shape ~ADAP.rollout_buffer ~ADAP.policy ~ADAP.observation_space diff --git a/docs_build/_autosummary/pantheonrl.algos.adap.policies.MultModel.rst b/docs_build/_autosummary/pantheonrl.algos.adap.policies.MultModel.rst index 1fa8ce4..f619fe7 100644 --- a/docs_build/_autosummary/pantheonrl.algos.adap.policies.MultModel.rst +++ b/docs_build/_autosummary/pantheonrl.algos.adap.policies.MultModel.rst @@ -1,4 +1,4 @@ -pantheonrl.algos.adap.policies.MultModel +pantheonrl.algos.adap.policies.MultModel ======================================== .. currentmodule:: pantheonrl.algos.adap.policies @@ -32,8 +32,6 @@ pantheonrl.algos.adap.policies.MultModel ~MultModel.forward_critic ~MultModel.get_buffer ~MultModel.get_extra_state - ~MultModel.get_input_size_excluding_ctx - ~MultModel.get_input_size_inluding_ctx ~MultModel.get_parameter ~MultModel.get_submodule ~MultModel.half diff --git a/docs_build/_autosummary/pantheonrl.algos.adap.util.get_L2_sphere.rst b/docs_build/_autosummary/pantheonrl.algos.adap.util.get_L2_sphere.rst index db95f21..778f650 100644 --- a/docs_build/_autosummary/pantheonrl.algos.adap.util.get_L2_sphere.rst +++ b/docs_build/_autosummary/pantheonrl.algos.adap.util.get_L2_sphere.rst @@ -1,6 +1,6 @@ -pantheonrl.algos.adap.util.get\_L2\_sphere +pantheonrl.algos.adap.util.get\_l2\_sphere ========================================== .. currentmodule:: pantheonrl.algos.adap.util -.. autofunction:: get_L2_sphere \ No newline at end of file +.. autofunction:: get_l2_sphere \ No newline at end of file diff --git a/docs_build/_autosummary/pantheonrl.algos.adap.util.rst b/docs_build/_autosummary/pantheonrl.algos.adap.util.rst index 7f1ccf1..2d6ad59 100644 --- a/docs_build/_autosummary/pantheonrl.algos.adap.util.rst +++ b/docs_build/_autosummary/pantheonrl.algos.adap.util.rst @@ -1,4 +1,4 @@ -pantheonrl.algos.adap.util +pantheonrl.algos.adap.util ========================== @@ -19,9 +19,9 @@ pantheonrl.algos.adap.util :toctree: :nosignatures: - get_L2_sphere get_categorical get_context_kl_loss + get_l2_sphere get_natural_number get_positive_square get_unit_square diff --git a/docs_build/_autosummary/pantheonrl.algos.bc.BCShell.rst b/docs_build/_autosummary/pantheonrl.algos.bc.BCShell.rst index 440e2e0..5af1b7c 100644 --- a/docs_build/_autosummary/pantheonrl.algos.bc.BCShell.rst +++ b/docs_build/_autosummary/pantheonrl.algos.bc.BCShell.rst @@ -1,4 +1,4 @@ -pantheonrl.algos.bc.BCShell +pantheonrl.algos.bc.BCShell =========================== .. currentmodule:: pantheonrl.algos.bc @@ -16,9 +16,17 @@ pantheonrl.algos.bc.BCShell .. autosummary:: :nosignatures: + ~BCShell.get_policy + ~BCShell.set_policy + .. rubric:: Attributes + + .. autosummary:: + + ~BCShell.policy + \ No newline at end of file diff --git a/docs_build/_autosummary/pantheonrl.algos.bc.ConstantLRSchedule.rst b/docs_build/_autosummary/pantheonrl.algos.bc.ConstantLRSchedule.rst index 7a3363e..cfe941b 100644 --- a/docs_build/_autosummary/pantheonrl.algos.bc.ConstantLRSchedule.rst +++ b/docs_build/_autosummary/pantheonrl.algos.bc.ConstantLRSchedule.rst @@ -1,4 +1,4 @@ -pantheonrl.algos.bc.ConstantLRSchedule +pantheonrl.algos.bc.ConstantLRSchedule ====================================== .. currentmodule:: pantheonrl.algos.bc @@ -16,6 +16,7 @@ pantheonrl.algos.bc.ConstantLRSchedule .. autosummary:: :nosignatures: + ~ConstantLRSchedule.set_lr diff --git a/docs_build/_autosummary/pantheonrl.algos.bc.EpochOrBatchIteratorWithProgress.rst b/docs_build/_autosummary/pantheonrl.algos.bc.EpochOrBatchIteratorWithProgress.rst index 771fb84..31294a3 100644 --- a/docs_build/_autosummary/pantheonrl.algos.bc.EpochOrBatchIteratorWithProgress.rst +++ b/docs_build/_autosummary/pantheonrl.algos.bc.EpochOrBatchIteratorWithProgress.rst @@ -1,4 +1,4 @@ -pantheonrl.algos.bc.EpochOrBatchIteratorWithProgress +pantheonrl.algos.bc.EpochOrBatchIteratorWithProgress ==================================================== .. currentmodule:: pantheonrl.algos.bc @@ -16,6 +16,7 @@ pantheonrl.algos.bc.EpochOrBatchIteratorWithProgress .. autosummary:: :nosignatures: + ~EpochOrBatchIteratorWithProgress.set_data_loader diff --git a/docs_build/_autosummary/pantheonrl.algos.modular.policies.ModularPolicy.rst b/docs_build/_autosummary/pantheonrl.algos.modular.policies.ModularPolicy.rst index a923d24..265a0fb 100644 --- a/docs_build/_autosummary/pantheonrl.algos.modular.policies.ModularPolicy.rst +++ b/docs_build/_autosummary/pantheonrl.algos.modular.policies.ModularPolicy.rst @@ -1,4 +1,4 @@ -pantheonrl.algos.modular.policies.ModularPolicy +pantheonrl.algos.modular.policies.ModularPolicy =============================================== .. currentmodule:: pantheonrl.algos.modular.policies @@ -67,7 +67,6 @@ pantheonrl.algos.modular.policies.ModularPolicy ~ModularPolicy.register_parameter ~ModularPolicy.register_state_dict_pre_hook ~ModularPolicy.requires_grad_ - ~ModularPolicy.reset_noise ~ModularPolicy.save ~ModularPolicy.scale_action ~ModularPolicy.set_extra_state diff --git a/docs_build/_autosummary/pantheonrl.common.multiagentenv.MultiAgentEnv.rst b/docs_build/_autosummary/pantheonrl.common.multiagentenv.MultiAgentEnv.rst index 98e1e45..8a5fb62 100644 --- a/docs_build/_autosummary/pantheonrl.common.multiagentenv.MultiAgentEnv.rst +++ b/docs_build/_autosummary/pantheonrl.common.multiagentenv.MultiAgentEnv.rst @@ -1,4 +1,4 @@ -pantheonrl.common.multiagentenv.MultiAgentEnv +pantheonrl.common.multiagentenv.MultiAgentEnv ============================================= .. currentmodule:: pantheonrl.common.multiagentenv @@ -25,6 +25,7 @@ pantheonrl.common.multiagentenv.MultiAgentEnv ~MultiAgentEnv.n_reset ~MultiAgentEnv.n_step ~MultiAgentEnv.render + ~MultiAgentEnv.resample_null ~MultiAgentEnv.resample_random ~MultiAgentEnv.resample_round_robin ~MultiAgentEnv.reset diff --git a/docs_build/_autosummary/pantheonrl.common.multiagentenv.SimultaneousEnv.rst b/docs_build/_autosummary/pantheonrl.common.multiagentenv.SimultaneousEnv.rst index 4b4718a..d3edfc2 100644 --- a/docs_build/_autosummary/pantheonrl.common.multiagentenv.SimultaneousEnv.rst +++ b/docs_build/_autosummary/pantheonrl.common.multiagentenv.SimultaneousEnv.rst @@ -1,4 +1,4 @@ -pantheonrl.common.multiagentenv.SimultaneousEnv +pantheonrl.common.multiagentenv.SimultaneousEnv =============================================== .. currentmodule:: pantheonrl.common.multiagentenv @@ -27,6 +27,7 @@ pantheonrl.common.multiagentenv.SimultaneousEnv ~SimultaneousEnv.n_reset ~SimultaneousEnv.n_step ~SimultaneousEnv.render + ~SimultaneousEnv.resample_null ~SimultaneousEnv.resample_random ~SimultaneousEnv.resample_round_robin ~SimultaneousEnv.reset diff --git a/docs_build/_autosummary/pantheonrl.common.multiagentenv.TurnBasedEnv.rst b/docs_build/_autosummary/pantheonrl.common.multiagentenv.TurnBasedEnv.rst index 73d80b8..154abad 100644 --- a/docs_build/_autosummary/pantheonrl.common.multiagentenv.TurnBasedEnv.rst +++ b/docs_build/_autosummary/pantheonrl.common.multiagentenv.TurnBasedEnv.rst @@ -1,4 +1,4 @@ -pantheonrl.common.multiagentenv.TurnBasedEnv +pantheonrl.common.multiagentenv.TurnBasedEnv ============================================ .. currentmodule:: pantheonrl.common.multiagentenv @@ -28,6 +28,7 @@ pantheonrl.common.multiagentenv.TurnBasedEnv ~TurnBasedEnv.n_reset ~TurnBasedEnv.n_step ~TurnBasedEnv.render + ~TurnBasedEnv.resample_null ~TurnBasedEnv.resample_random ~TurnBasedEnv.resample_round_robin ~TurnBasedEnv.reset diff --git a/docs_build/_autosummary/pantheonrl.common.wrappers.SimultaneousFrameStack.rst b/docs_build/_autosummary/pantheonrl.common.wrappers.SimultaneousFrameStack.rst index 675e405..a7a1827 100644 --- a/docs_build/_autosummary/pantheonrl.common.wrappers.SimultaneousFrameStack.rst +++ b/docs_build/_autosummary/pantheonrl.common.wrappers.SimultaneousFrameStack.rst @@ -1,4 +1,4 @@ -pantheonrl.common.wrappers.SimultaneousFrameStack +pantheonrl.common.wrappers.SimultaneousFrameStack ================================================= .. currentmodule:: pantheonrl.common.wrappers @@ -27,6 +27,7 @@ pantheonrl.common.wrappers.SimultaneousFrameStack ~SimultaneousFrameStack.n_reset ~SimultaneousFrameStack.n_step ~SimultaneousFrameStack.render + ~SimultaneousFrameStack.resample_null ~SimultaneousFrameStack.resample_random ~SimultaneousFrameStack.resample_round_robin ~SimultaneousFrameStack.reset diff --git a/docs_build/_autosummary/pantheonrl.common.wrappers.SimultaneousRecorder.rst b/docs_build/_autosummary/pantheonrl.common.wrappers.SimultaneousRecorder.rst index bcf183a..97a6ba4 100644 --- a/docs_build/_autosummary/pantheonrl.common.wrappers.SimultaneousRecorder.rst +++ b/docs_build/_autosummary/pantheonrl.common.wrappers.SimultaneousRecorder.rst @@ -1,4 +1,4 @@ -pantheonrl.common.wrappers.SimultaneousRecorder +pantheonrl.common.wrappers.SimultaneousRecorder =============================================== .. currentmodule:: pantheonrl.common.wrappers @@ -28,6 +28,7 @@ pantheonrl.common.wrappers.SimultaneousRecorder ~SimultaneousRecorder.n_reset ~SimultaneousRecorder.n_step ~SimultaneousRecorder.render + ~SimultaneousRecorder.resample_null ~SimultaneousRecorder.resample_random ~SimultaneousRecorder.resample_round_robin ~SimultaneousRecorder.reset diff --git a/docs_build/_autosummary/pantheonrl.common.wrappers.TurnBasedFrameStack.rst b/docs_build/_autosummary/pantheonrl.common.wrappers.TurnBasedFrameStack.rst index de3281d..872318a 100644 --- a/docs_build/_autosummary/pantheonrl.common.wrappers.TurnBasedFrameStack.rst +++ b/docs_build/_autosummary/pantheonrl.common.wrappers.TurnBasedFrameStack.rst @@ -1,4 +1,4 @@ -pantheonrl.common.wrappers.TurnBasedFrameStack +pantheonrl.common.wrappers.TurnBasedFrameStack ============================================== .. currentmodule:: pantheonrl.common.wrappers @@ -28,6 +28,7 @@ pantheonrl.common.wrappers.TurnBasedFrameStack ~TurnBasedFrameStack.n_reset ~TurnBasedFrameStack.n_step ~TurnBasedFrameStack.render + ~TurnBasedFrameStack.resample_null ~TurnBasedFrameStack.resample_random ~TurnBasedFrameStack.resample_round_robin ~TurnBasedFrameStack.reset diff --git a/docs_build/_autosummary/pantheonrl.common.wrappers.TurnBasedRecorder.rst b/docs_build/_autosummary/pantheonrl.common.wrappers.TurnBasedRecorder.rst index 04ce8b1..2e45f86 100644 --- a/docs_build/_autosummary/pantheonrl.common.wrappers.TurnBasedRecorder.rst +++ b/docs_build/_autosummary/pantheonrl.common.wrappers.TurnBasedRecorder.rst @@ -1,4 +1,4 @@ -pantheonrl.common.wrappers.TurnBasedRecorder +pantheonrl.common.wrappers.TurnBasedRecorder ============================================ .. currentmodule:: pantheonrl.common.wrappers @@ -29,6 +29,7 @@ pantheonrl.common.wrappers.TurnBasedRecorder ~TurnBasedRecorder.n_reset ~TurnBasedRecorder.n_step ~TurnBasedRecorder.render + ~TurnBasedRecorder.resample_null ~TurnBasedRecorder.resample_random ~TurnBasedRecorder.resample_round_robin ~TurnBasedRecorder.reset diff --git a/docs_build/_autosummary/pantheonrl.envs.blockworldgym.blockworld.BlockEnv.rst b/docs_build/_autosummary/pantheonrl.envs.blockworldgym.blockworld.BlockEnv.rst index b6adb9f..db99c20 100644 --- a/docs_build/_autosummary/pantheonrl.envs.blockworldgym.blockworld.BlockEnv.rst +++ b/docs_build/_autosummary/pantheonrl.envs.blockworldgym.blockworld.BlockEnv.rst @@ -1,4 +1,4 @@ -pantheonrl.envs.blockworldgym.blockworld.BlockEnv +pantheonrl.envs.blockworldgym.blockworld.BlockEnv ================================================= .. currentmodule:: pantheonrl.envs.blockworldgym.blockworld @@ -28,6 +28,7 @@ pantheonrl.envs.blockworldgym.blockworld.BlockEnv ~BlockEnv.n_reset ~BlockEnv.n_step ~BlockEnv.render + ~BlockEnv.resample_null ~BlockEnv.resample_random ~BlockEnv.resample_round_robin ~BlockEnv.reset diff --git a/docs_build/_autosummary/pantheonrl.envs.blockworldgym.simpleblockworld.SimpleBlockEnv.rst b/docs_build/_autosummary/pantheonrl.envs.blockworldgym.simpleblockworld.SimpleBlockEnv.rst index 9792f9e..36e107e 100644 --- a/docs_build/_autosummary/pantheonrl.envs.blockworldgym.simpleblockworld.SimpleBlockEnv.rst +++ b/docs_build/_autosummary/pantheonrl.envs.blockworldgym.simpleblockworld.SimpleBlockEnv.rst @@ -1,4 +1,4 @@ -pantheonrl.envs.blockworldgym.simpleblockworld.SimpleBlockEnv +pantheonrl.envs.blockworldgym.simpleblockworld.SimpleBlockEnv ============================================================= .. currentmodule:: pantheonrl.envs.blockworldgym.simpleblockworld @@ -28,6 +28,7 @@ pantheonrl.envs.blockworldgym.simpleblockworld.SimpleBlockEnv ~SimpleBlockEnv.n_reset ~SimpleBlockEnv.n_step ~SimpleBlockEnv.render + ~SimpleBlockEnv.resample_null ~SimpleBlockEnv.resample_random ~SimpleBlockEnv.resample_round_robin ~SimpleBlockEnv.reset diff --git a/docs_build/_autosummary/pantheonrl.envs.liargym.liar.LiarEnv.rst b/docs_build/_autosummary/pantheonrl.envs.liargym.liar.LiarEnv.rst index acea78f..55816de 100644 --- a/docs_build/_autosummary/pantheonrl.envs.liargym.liar.LiarEnv.rst +++ b/docs_build/_autosummary/pantheonrl.envs.liargym.liar.LiarEnv.rst @@ -1,4 +1,4 @@ -pantheonrl.envs.liargym.liar.LiarEnv +pantheonrl.envs.liargym.liar.LiarEnv ==================================== .. currentmodule:: pantheonrl.envs.liargym.liar @@ -28,6 +28,7 @@ pantheonrl.envs.liargym.liar.LiarEnv ~LiarEnv.n_reset ~LiarEnv.n_step ~LiarEnv.render + ~LiarEnv.resample_null ~LiarEnv.resample_random ~LiarEnv.resample_round_robin ~LiarEnv.reset diff --git a/docs_build/_autosummary/pantheonrl.envs.pettingzoo.PettingZooAECWrapper.rst b/docs_build/_autosummary/pantheonrl.envs.pettingzoo.PettingZooAECWrapper.rst index a3a5f3a..51331b7 100644 --- a/docs_build/_autosummary/pantheonrl.envs.pettingzoo.PettingZooAECWrapper.rst +++ b/docs_build/_autosummary/pantheonrl.envs.pettingzoo.PettingZooAECWrapper.rst @@ -1,4 +1,4 @@ -pantheonrl.envs.pettingzoo.PettingZooAECWrapper +pantheonrl.envs.pettingzoo.PettingZooAECWrapper =============================================== .. currentmodule:: pantheonrl.envs.pettingzoo @@ -25,6 +25,7 @@ pantheonrl.envs.pettingzoo.PettingZooAECWrapper ~PettingZooAECWrapper.n_reset ~PettingZooAECWrapper.n_step ~PettingZooAECWrapper.render + ~PettingZooAECWrapper.resample_null ~PettingZooAECWrapper.resample_random ~PettingZooAECWrapper.resample_round_robin ~PettingZooAECWrapper.reset diff --git a/docs_build/_autosummary/pantheonrl.envs.rpsgym.rps.RPSEnv.rst b/docs_build/_autosummary/pantheonrl.envs.rpsgym.rps.RPSEnv.rst index 50d2996..cd0b1bc 100644 --- a/docs_build/_autosummary/pantheonrl.envs.rpsgym.rps.RPSEnv.rst +++ b/docs_build/_autosummary/pantheonrl.envs.rpsgym.rps.RPSEnv.rst @@ -1,4 +1,4 @@ -pantheonrl.envs.rpsgym.rps.RPSEnv +pantheonrl.envs.rpsgym.rps.RPSEnv ================================= .. currentmodule:: pantheonrl.envs.rpsgym.rps @@ -27,6 +27,7 @@ pantheonrl.envs.rpsgym.rps.RPSEnv ~RPSEnv.n_reset ~RPSEnv.n_step ~RPSEnv.render + ~RPSEnv.resample_null ~RPSEnv.resample_random ~RPSEnv.resample_round_robin ~RPSEnv.reset diff --git a/docs_build/build/html/_autosummary/pantheonrl.algos.adap.adap_learn.ADAP.html b/docs_build/build/html/_autosummary/pantheonrl.algos.adap.adap_learn.ADAP.html index cf933a4..2a48cb4 100644 --- a/docs_build/build/html/_autosummary/pantheonrl.algos.adap.adap_learn.ADAP.html +++ b/docs_build/build/html/_autosummary/pantheonrl.algos.adap.adap_learn.ADAP.html @@ -99,84 +99,14 @@

pantheonrl.algos.adap.adap_learn.ADAP

-class ADAP(policy, env, learning_rate=0.0003, n_steps=2048, batch_size=64, n_epochs=10, gamma=0.99, gae_lambda=0.95, clip_range=0.2, clip_range_vf=None, ent_coef=0.0, vf_coef=0.5, max_grad_norm=0.5, use_sde=False, sde_sample_freq=-1, target_kl=None, tensorboard_log=None, create_eval_env=False, policy_kwargs=None, verbose=0, seed=None, device='auto', _init_setup_model=True, context_loss_coeff=0.1, context_size=3, num_context_samples=5, context_sampler='l2', num_state_samples=32)[source]
+class ADAP(policy, env, learning_rate=0.0003, n_steps=2048, batch_size=64, n_epochs=10, gamma=0.99, gae_lambda=0.95, clip_range=0.2, clip_range_vf=None, normalize_advantage=True, ent_coef=0.0, vf_coef=0.5, max_grad_norm=0.5, use_sde=False, sde_sample_freq=-1, target_kl=None, stats_window_size=100, tensorboard_log=None, policy_kwargs=None, verbose=0, seed=None, device='auto', _init_setup_model=True, context_loss_coeff=0.1, context_size=3, num_context_samples=5, context_sampler='l2', num_state_samples=32)[source]

Bases: OnPolicyAlgorithm

-

Borrows from Proximal Policy Optimization algorithm (PPO) (clip version) -Paper: https://arxiv.org/abs/1707.06347 -Code: This implementation borrows code from OpenAI Spinning Up -(https://github.com/openai/spinningup/) -https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail and -and Stable Baselines (PPO2 from https://github.com/hill-a/stable-baselines) -Introduction to PPO: -https://spinningup.openai.com/en/latest/algorithms/ppo.html -:param policy: The policy model to use (MlpPolicy, CnnPolicy, …) -:param env: The environment to learn from

-
-

(if registered in Gym, can be str)

-
-
-
Parameters:
-
    -
  • learning_rate (float | Callable[[float], float]) – The learning rate, it can be a function -of the current progress remaining (from 1 to 0)

  • -
  • n_steps (int) – The number of steps to run for each environment per update -(i.e. rollout buffer size is n_steps * n_envs where n_envs is number of -environment copies running in parallel) -NOTE: n_steps * n_envs must be greater than 1 (because of the advantage -normalization) See https://github.com/pytorch/pytorch/issues/29372

  • -
  • batch_size (int) – Minibatch size

  • -
  • n_epochs (int) – Number of epoch when optimizing the surrogate loss

  • -
  • gamma (float) – Discount factor

  • -
  • gae_lambda (float) – Factor for trade-off of bias vs variance for Generalized -Advantage Estimator

  • -
  • clip_range (float | Callable[[float], float]) – Clipping parameter, it can be a function of the current -progress remaining (from 1 to 0).

  • -
  • clip_range_vf (None | float | Callable[[float], float]) – Clipping parameter for the value function, -it can be a function of the current progress remaining (from 1 to 0). -This is a parameter specific to the OpenAI implementation. If None is -passed (default), no clipping will be done on the value function. -IMPORTANT: this clipping depends on the reward scaling.

  • -
  • ent_coef (float) – Entropy coefficient for the loss calculation

  • -
  • vf_coef (float) – Value function coefficient for the loss calculation

  • -
  • max_grad_norm (float) – The maximum value for the gradient clipping

  • -
  • use_sde (bool) – Whether to use generalized State Dependent Exploration -(gSDE) instead of action noise exploration (default: False)

  • -
  • sde_sample_freq (int) – Sample a new noise matrix every n steps when using -gSDE -Default: -1 (only sample at the beginning of the rollout)

  • -
  • target_kl (float | None) – Limit the KL divergence between updates, -because the clipping is not enough to prevent large update -see issue #213 -(cf https://github.com/hill-a/stable-baselines/issues/213) -By default, there is no limit on the kl div.

  • -
  • tensorboard_log (str | None) – the log location for tensorboard -(if None, no logging)

  • -
  • create_eval_env (bool) – Whether to create a second environment that will be -used for evaluating the agent periodically. (Only available when -passing string for the environment)

  • -
  • policy_kwargs (Dict[str, Any] | None) – additional arguments to be passed to the policy on -creation

  • -
  • verbose (int) – the verbosity level: 0 no output, 1 info, 2 debug

  • -
  • seed (int | None) – Seed for the pseudo random generators

  • -
  • device (device | str) – Device (cpu, cuda, …) on which the code should be run. -Setting it to auto, the code will be run on the GPU if possible.

  • -
  • _init_setup_model (bool) – Whether or not to build the network at the -creation of the instance

  • -
  • policy (ActorCriticPolicy) –

  • -
  • env (Env | VecEnv | str) –

  • -
  • context_loss_coeff (float) –

  • -
  • context_size (int) –

  • -
  • num_context_samples (int) –

  • -
  • context_sampler (str) –

  • -
  • num_state_samples (int) –

  • -
-
-
+

Borrows from Proximal Policy Optimization algorithm (PPO) (clip version)

Methods

- + @@ -200,7 +130,7 @@

pantheonrl.algos.adap.adap_learn.ADAP

- + @@ -225,51 +155,75 @@

pantheonrl.algos.adap.adap_learn.ADAP

- - - - + - + - + - + - + - +

collect_rollouts

Nearly identical to OnPolicyAlgorithm's collect_rollouts, but it also resamples the context every episode.

Collect rollouts using the current policy and fill a RolloutBuffer.

get_env

Returns the current environment (can be None if not defined).

set_env

Checks the validity of the environment, and if it is coherent, set it as the current environment.

Set the env to use

set_logger

Setter for for logger object.

policy_aliases

full_obs_shape

rollout_buffer

rollout_buffer

policy

policy

observation_space

observation_space

action_space

action_space

n_envs

n_envs

lr_schedule

lr_schedule

+
+
Parameters:
+
    +
  • policy (ActorCriticPolicy) –

  • +
  • env (Env | VecEnv | str) –

  • +
  • learning_rate (float | Callable[[float], float]) –

  • +
  • n_steps (int) –

  • +
  • batch_size (int) –

  • +
  • n_epochs (int) –

  • +
  • gamma (float) –

  • +
  • gae_lambda (float) –

  • +
  • clip_range (float | Callable[[float], float]) –

  • +
  • clip_range_vf (None | float | Callable[[float], float]) –

  • +
  • normalize_advantage (bool) –

  • +
  • ent_coef (float) –

  • +
  • vf_coef (float) –

  • +
  • max_grad_norm (float) –

  • +
  • use_sde (bool) –

  • +
  • sde_sample_freq (int) –

  • +
  • target_kl (float | None) –

  • +
  • stats_window_size (int) –

  • +
  • tensorboard_log (str | None) –

  • +
  • policy_kwargs (Dict[str, Any] | None) –

  • +
  • verbose (int) –

  • +
  • seed (int | None) –

  • +
  • device (device | str) –

  • +
  • _init_setup_model (bool) –

  • +
  • context_loss_coeff (float) –

  • +
  • context_size (int) –

  • +
  • num_context_samples (int) –

  • +
  • context_sampler (str) –

  • +
  • num_state_samples (int) –

  • +
+
+
collect_rollouts(env, callback, rollout_buffer, n_rollout_steps)[source]
-

Nearly identical to OnPolicyAlgorithm’s collect_rollouts, but it also -resamples the context every episode.

-

Collect experiences using the current policy and fill a -RolloutBuffer. +

Collect rollouts using the current policy and fill a RolloutBuffer. The term rollout here refers to the model-free notion and should not -be used with the concept of rollout used in model-based RL or planning. -:param env: The training environment -:param callback: Callback that will be called at each step

-
-

(and at the beginning and end of the rollout)

-
+be used with the concept of rollout used in model-based RL or planning.

Parameters:
    +
  • env (VecEnv) – The training environment

  • +
  • callback (BaseCallback) – Callback that will be called at each step +(and at the beginning and end of the rollout)

  • rollout_buffer (RolloutBuffer) – Buffer to fill with rollouts

  • -
  • n_steps – Number of experiences to collect per environment

  • -
  • env (VecEnv) –

  • -
  • callback (BaseCallback) –

  • -
  • n_rollout_steps (int) –

  • +
  • n_rollout_steps (int) – Number of experiences to collect per env

Returns:
@@ -328,7 +282,7 @@

pantheonrl.algos.adap.adap_learn.ADAP
-learn(total_timesteps, callback=None, log_interval=1, eval_env=None, eval_freq=-1, n_eval_episodes=5, tb_log_name='ADAP', eval_log_path=None, reset_num_timesteps=True)[source]
+learn(total_timesteps, callback=None, log_interval=1, tb_log_name='ADAP', reset_num_timesteps=True, progress_bar=False)[source]

Return a trained model.

Parameters:
@@ -338,19 +292,12 @@

pantheonrl.algos.adap.adap_learn.ADAPReturns:

the trained model

-
Return type:
-

ADAP

-

@@ -443,22 +390,8 @@

pantheonrl.algos.adap.adap_learn.ADAP
-set_env(env)[source]
-

Checks the validity of the environment, and if it is coherent, set it as the current environment. -Furthermore wrap any non vectorized env into a vectorized -checked parameters: -- observation_space -- action_space

-
-
Parameters:
-
-
-
+set_env(env, force_reset=True)[source] +

Set the env to use

diff --git a/docs_build/build/html/_autosummary/pantheonrl.algos.adap.adap_learn.html b/docs_build/build/html/_autosummary/pantheonrl.algos.adap.adap_learn.html index 10e83b3..1bec3ed 100644 --- a/docs_build/build/html/_autosummary/pantheonrl.algos.adap.adap_learn.html +++ b/docs_build/build/html/_autosummary/pantheonrl.algos.adap.adap_learn.html @@ -96,11 +96,12 @@

pantheonrl.algos.adap.adap_learn

+

Modified implementation of PPO to support ADAP

Classes

- +

ADAP

Borrows from Proximal Policy Optimization algorithm (PPO) (clip version) Paper: https://arxiv.org/abs/1707.06347 Code: This implementation borrows code from OpenAI Spinning Up (https://github.com/openai/spinningup/) https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail and and Stable Baselines (PPO2 from https://github.com/hill-a/stable-baselines) Introduction to PPO: https://spinningup.openai.com/en/latest/algorithms/ppo.html :param policy: The policy model to use (MlpPolicy, CnnPolicy, ...) :param env: The environment to learn from (if registered in Gym, can be str) :param learning_rate: The learning rate, it can be a function of the current progress remaining (from 1 to 0) :param n_steps: The number of steps to run for each environment per update (i.e. rollout buffer size is n_steps * n_envs where n_envs is number of environment copies running in parallel) NOTE: n_steps * n_envs must be greater than 1 (because of the advantage normalization) See https://github.com/pytorch/pytorch/issues/29372 :param batch_size: Minibatch size :param n_epochs: Number of epoch when optimizing the surrogate loss :param gamma: Discount factor :param gae_lambda: Factor for trade-off of bias vs variance for Generalized Advantage Estimator :param clip_range: Clipping parameter, it can be a function of the current progress remaining (from 1 to 0). :param clip_range_vf: Clipping parameter for the value function, it can be a function of the current progress remaining (from 1 to 0). This is a parameter specific to the OpenAI implementation. If None is passed (default), no clipping will be done on the value function. IMPORTANT: this clipping depends on the reward scaling. :param ent_coef: Entropy coefficient for the loss calculation :param vf_coef: Value function coefficient for the loss calculation :param max_grad_norm: The maximum value for the gradient clipping :param use_sde: Whether to use generalized State Dependent Exploration (gSDE) instead of action noise exploration (default: False) :param sde_sample_freq: Sample a new noise matrix every n steps when using gSDE Default: -1 (only sample at the beginning of the rollout) :param target_kl: Limit the KL divergence between updates, because the clipping is not enough to prevent large update see issue #213 (cf https://github.com/hill-a/stable-baselines/issues/213) By default, there is no limit on the kl div. :param tensorboard_log: the log location for tensorboard (if None, no logging) :param create_eval_env: Whether to create a second environment that will be used for evaluating the agent periodically. (Only available when passing string for the environment) :param policy_kwargs: additional arguments to be passed to the policy on creation :param verbose: the verbosity level: 0 no output, 1 info, 2 debug :param seed: Seed for the pseudo random generators :param device: Device (cpu, cuda, ...) on which the code should be run. Setting it to auto, the code will be run on the GPU if possible. :param _init_setup_model: Whether or not to build the network at the creation of the instance.

Borrows from Proximal Policy Optimization algorithm (PPO) (clip version)

diff --git a/docs_build/build/html/_autosummary/pantheonrl.algos.adap.agent.AdapAgent.html b/docs_build/build/html/_autosummary/pantheonrl.algos.adap.agent.AdapAgent.html index 2da9015..88444c6 100644 --- a/docs_build/build/html/_autosummary/pantheonrl.algos.adap.agent.AdapAgent.html +++ b/docs_build/build/html/_autosummary/pantheonrl.algos.adap.agent.AdapAgent.html @@ -99,7 +99,7 @@

pantheonrl.algos.adap.agent.AdapAgent

-class AdapAgent(model, log_interval=None, tensorboard_log=None, tb_log_name='AdapAgent', latent_syncer=None)[source]
+class AdapAgent(model, log_interval=None, working_timesteps=1000, callback=None, tb_log_name='AdapAgent', latent_syncer=None)[source]

Bases: OnPolicyAgent

Agent representing an ADAP learning algorithm.

The get_action and update functions are based on the learn function @@ -108,6 +108,10 @@

pantheonrl.algos.adap.agent.AdapAgentParameters:
  • model (ADAP) – Model representing the agent’s learning algorithm

  • +
  • log_interval – Optional log interval for policy logging

  • +
  • working_timesteps – Estimate for number of timesteps to train for.

  • +
  • callback – Optional callback fed into the OnPolicyAlgorithm

  • +
  • tb_log_name – Name for tensorboard log

  • latent_syncer (AdapPolicy | None) –

@@ -128,16 +132,13 @@

pantheonrl.algos.adap.agent.AdapAgent
-get_action(obs, record=True)[source]
+get_action(obs)[source]

Return an action given an observation.

-

When record is True, the agent saves the last transition into its -buffer. It also updates the model if the buffer is full.

+

The agent saves the last transition into its buffer. It also updates +the model if the buffer is full.

Parameters:
-
    -
  • obs (Observation) – The observation to use

  • -
  • record (bool) – Whether to record the obs, action (True when training)

  • -
+

obs (Observation) – The observation to use

Returns:

The action to take

diff --git a/docs_build/build/html/_autosummary/pantheonrl.algos.adap.agent.html b/docs_build/build/html/_autosummary/pantheonrl.algos.adap.agent.html index 512fe2b..c162d83 100644 --- a/docs_build/build/html/_autosummary/pantheonrl.algos.adap.agent.html +++ b/docs_build/build/html/_autosummary/pantheonrl.algos.adap.agent.html @@ -96,6 +96,7 @@

pantheonrl.algos.adap.agent

+

Module defining the ADAP partner agent.

Classes

diff --git a/docs_build/build/html/_autosummary/pantheonrl.algos.adap.html b/docs_build/build/html/_autosummary/pantheonrl.algos.adap.html index c0cdf73..beba36d 100644 --- a/docs_build/build/html/_autosummary/pantheonrl.algos.adap.html +++ b/docs_build/build/html/_autosummary/pantheonrl.algos.adap.html @@ -98,20 +98,22 @@

pantheonrl.algos.adap

- + - + - + - +

pantheonrl.algos.adap.adap_learn

Modified implementation of PPO to support ADAP

pantheonrl.algos.adap.agent

Module defining the ADAP partner agent.

pantheonrl.algos.adap.policies

Module defining the Policy for ADAP

pantheonrl.algos.adap.util

Collection of helper functions for ADAP

-
+

Implementation of the ADAP algorithm (Derek 2021).

+

Paper: https://arxiv.org/abs/2107.07506

+

diff --git a/docs_build/build/html/_autosummary/pantheonrl.algos.adap.policies.AdapPolicy.html b/docs_build/build/html/_autosummary/pantheonrl.algos.adap.policies.AdapPolicy.html index 24e976f..fad1eb8 100644 --- a/docs_build/build/html/_autosummary/pantheonrl.algos.adap.policies.AdapPolicy.html +++ b/docs_build/build/html/_autosummary/pantheonrl.algos.adap.policies.AdapPolicy.html @@ -99,8 +99,9 @@

pantheonrl.algos.adap.policies.AdapPolicy

-class AdapPolicy(observation_space, action_space, lr_schedule, net_arch=None, activation_fn=<class 'torch.nn.modules.activation.Tanh'>, ortho_init=True, use_sde=False, log_std_init=0.0, full_std=True, sde_net_arch=None, use_expln=False, squash_output=False, features_extractor_class=<class 'stable_baselines3.common.torch_layers.FlattenExtractor'>, features_extractor_kwargs=None, normalize_images=True, optimizer_class=<class 'torch.optim.adam.Adam'>, optimizer_kwargs=None, context_size=3)[source]
+class AdapPolicy(observation_space, action_space, lr_schedule, net_arch=None, activation_fn=<class 'torch.nn.modules.activation.Tanh'>, ortho_init=True, use_sde=False, log_std_init=0.0, full_std=True, use_expln=False, squash_output=False, features_extractor_class=<class 'stable_baselines3.common.torch_layers.FlattenExtractor'>, features_extractor_kwargs=None, share_features_extractor=True, normalize_images=True, optimizer_class=<class 'torch.optim.adam.Adam'>, optimizer_kwargs=None, context_size=3)[source]

Bases: ActorCriticPolicy

+

Base Policy for the ADAP Actor-critic policy

Initializes internal Module state, shared by both nn.Module and ScriptModule.

Methods

@@ -133,7 +134,7 @@

pantheonrl.algos.adap.policies.AdapPolicy

- + @@ -150,8 +151,8 @@

pantheonrl.algos.adap.policies.AdapPolicy

- - + + @@ -261,8 +262,8 @@

pantheonrl.algos.adap.policies.AdapPolicy

- - + + @@ -334,17 +335,17 @@

pantheonrl.algos.adap.policies.AdapPolicy evaluate_actions(obs, actions)[source]

Evaluate actions according to the current policy, -given the observations. -:param obs: -:param actions: -:return: estimated value, log likelihood of taking those actions

-
-

and entropy of the action distribution.

-
+given the observations.

Parameters:
    -
  • obs (Tensor) –

  • -
  • actions (Tensor) –

  • +
  • obs (Tensor) – Observation

  • +
  • actions (Tensor) – Actions

-
Return type:
-

Tuple[Tensor, Tensor, Tensor]

+
Returns:
+

estimated value, log likelihood of taking those actions +and entropy of the action distribution.

+
+
Return type:
+

Tuple[Tensor, Tensor, Tensor | None]

@@ -694,7 +693,7 @@

pantheonrl.algos.adap.policies.AdapPolicy
-forward(obs, deterministic=False)
+forward(obs, deterministic=False)[source]

Forward pass in all the networks (actor and critic)

Parameters:
@@ -746,6 +745,12 @@

pantheonrl.algos.adap.policies.AdapPolicy +
+get_context()[source]
+

Get the current context

+

+
get_distribution(obs)
@@ -1320,7 +1325,7 @@

pantheonrl.algos.adap.policies.AdapPolicy
-predict_values(obs)
+predict_values(obs)[source]

Get the estimated values according to the current policy given the observations.

Parameters:
@@ -1804,6 +1809,12 @@

pantheonrl.algos.adap.policies.AdapPolicy +
+set_context(ctxt)[source]
+

Set the context

+

+
set_extra_state(state)
diff --git a/docs_build/build/html/_autosummary/pantheonrl.algos.adap.policies.AdapPolicyMult.html b/docs_build/build/html/_autosummary/pantheonrl.algos.adap.policies.AdapPolicyMult.html index ee2e397..3dd623e 100644 --- a/docs_build/build/html/_autosummary/pantheonrl.algos.adap.policies.AdapPolicyMult.html +++ b/docs_build/build/html/_autosummary/pantheonrl.algos.adap.policies.AdapPolicyMult.html @@ -99,8 +99,9 @@

pantheonrl.algos.adap.policies.AdapPolicyMult

-class AdapPolicyMult(observation_space, action_space, lr_schedule, net_arch=None, activation_fn=<class 'torch.nn.modules.activation.Tanh'>, ortho_init=True, use_sde=False, log_std_init=0.0, full_std=True, sde_net_arch=None, use_expln=False, squash_output=False, features_extractor_class=<class 'stable_baselines3.common.torch_layers.FlattenExtractor'>, features_extractor_kwargs=None, normalize_images=True, optimizer_class=<class 'torch.optim.adam.Adam'>, optimizer_kwargs=None, context_size=3)[source]
+class AdapPolicyMult(observation_space, action_space, lr_schedule, net_arch=None, activation_fn=<class 'torch.nn.modules.activation.Tanh'>, ortho_init=True, use_sde=False, log_std_init=0.0, full_std=True, use_expln=False, squash_output=False, features_extractor_class=<class 'stable_baselines3.common.torch_layers.FlattenExtractor'>, features_extractor_kwargs=None, share_features_extractor=True, normalize_images=True, optimizer_class=<class 'torch.optim.adam.Adam'>, optimizer_kwargs=None, context_size=3)[source]

Bases: AdapPolicy

+

Multiplicative Policy for the ADAP Actor-critic policy

Initializes internal Module state, shared by both nn.Module and ScriptModule.

Methods

evaluate_actions

Evaluate actions according to the current policy, given the observations. :param obs: :param actions: :return: estimated value, log likelihood of taking those actions and entropy of the action distribution.

Evaluate actions according to the current policy, given the observations.

extra_repr

Set the extra representation of the module

get_buffer

Returns the buffer given by target if it exists, otherwise throws an error.

get_context

get_context

Get the current context

get_distribution

Get the current policy distribution given the observations.

scale_action

Rescale the action from [low, high] to [-1, 1] (no need for symmetric action space)

set_context

set_context

Set the context

set_extra_state

This function is called from load_state_dict() to handle any extra state found within the state_dict.

@@ -133,7 +134,7 @@

pantheonrl.algos.adap.policies.AdapPolicyMult

- + @@ -150,8 +151,8 @@

pantheonrl.algos.adap.policies.AdapPolicyMult

- - + + @@ -261,8 +262,8 @@

pantheonrl.algos.adap.policies.AdapPolicyMult

- - + + @@ -334,17 +335,17 @@

pantheonrl.algos.adap.policies.AdapPolicyMult evaluate_actions(obs, actions)

Evaluate actions according to the current policy, -given the observations. -:param obs: -:param actions: -:return: estimated value, log likelihood of taking those actions

-
-

and entropy of the action distribution.

-
+given the observations.

Parameters:
    -
  • obs (Tensor) –

  • -
  • actions (Tensor) –

  • +
  • obs (Tensor) – Observation

  • +
  • actions (Tensor) – Actions

-
Return type:
-

Tuple[Tensor, Tensor, Tensor]

+
Returns:
+

estimated value, log likelihood of taking those actions +and entropy of the action distribution.

+
+
Return type:
+

Tuple[Tensor, Tensor, Tensor | None]

@@ -746,6 +745,12 @@

pantheonrl.algos.adap.policies.AdapPolicyMult +
+get_context()
+

Get the current context

+
+
get_distribution(obs)
@@ -1804,6 +1809,12 @@

pantheonrl.algos.adap.policies.AdapPolicyMult +
+set_context(ctxt)
+

Set the context

+

+
set_extra_state(state)
diff --git a/docs_build/build/html/_autosummary/pantheonrl.algos.adap.policies.MultModel.html b/docs_build/build/html/_autosummary/pantheonrl.algos.adap.policies.MultModel.html index 98a86b1..487b9c6 100644 --- a/docs_build/build/html/_autosummary/pantheonrl.algos.adap.policies.MultModel.html +++ b/docs_build/build/html/_autosummary/pantheonrl.algos.adap.policies.MultModel.html @@ -100,7 +100,8 @@

pantheonrl.algos.adap.policies.MultModel
class MultModel(feature_dim, net_arch, activation_fn, device, context_size)[source]
-

Bases: MlpExtractor

+

Bases: Module

+

Neural Network representing multiplicative layers

Initializes internal Module state, shared by both nn.Module and ScriptModule.

Methods

evaluate_actions

Evaluate actions according to the current policy, given the observations. :param obs: :param actions: :return: estimated value, log likelihood of taking those actions and entropy of the action distribution.

Evaluate actions according to the current policy, given the observations.

extra_repr

Set the extra representation of the module

get_buffer

Returns the buffer given by target if it exists, otherwise throws an error.

get_context

get_context

Get the current context

get_distribution

Get the current policy distribution given the observations.

scale_action

Rescale the action from [low, high] to [-1, 1] (no need for symmetric action space)

set_context

set_context

Set the context

set_extra_state

This function is called from load_state_dict() to handle any extra state found within the state_dict.

@@ -139,18 +140,13 @@

pantheonrl.algos.adap.policies.MultModelfloat datatype.

- + - - + + - - + + @@ -158,12 +154,6 @@

pantheonrl.algos.adap.policies.MultModel

- - - - - - @@ -197,8 +187,8 @@

pantheonrl.algos.adap.policies.MultModel

- - + + @@ -254,8 +244,8 @@

pantheonrl.algos.adap.policies.MultModel

- - + + @@ -572,16 +562,41 @@

pantheonrl.algos.adap.policies.MultModel
forward(features)[source]
-
-
Returns:
-

latent_policy, latent_value of the specified network. -If all layers are shared, then latent_policy == latent_value

+

Returns the action logits and values

+
+
Parameters:
+

features (Tensor) –

-
Parameters:
-

features (Tensor) –

+
Return type:
+

Tuple[Tensor, Tensor]

-
Return type:
-

Tuple[Tensor, Tensor]

+
+
+ +
+
+forward_actor(features)[source]
+

Returns the action logits and values

+
+
Parameters:
+

features (Tensor) –

+
+
Return type:
+

Tensor

+
+
+
+ +
+
+forward_critic(features)[source]
+

Returns the action logits and values

+
+
Parameters:
+

features (Tensor) –

+
+
Return type:
+

Tensor

@@ -1035,6 +1050,23 @@

pantheonrl.algos.adap.policies.MultModel +
+policies(observations, contexts)[source]
+

Returns the logits from the policy function

+
+
Parameters:
+
    +
  • observations (Tensor) –

  • +
  • contexts (Tensor) –

  • +
+
+
Return type:
+

Tensor

+
+
+
+
register_backward_hook(hook)
@@ -1721,6 +1753,23 @@

pantheonrl.algos.adap.policies.MultModel +
+values(observations, contexts)[source]
+

Returns the response from the value function

+
+
Parameters:
+
    +
  • observations (Tensor) –

  • +
  • contexts (Tensor) –

  • +
+
+
Return type:
+

Tensor

+
+
+

+
xpu(device=None)
diff --git a/docs_build/build/html/_autosummary/pantheonrl.algos.adap.policies.html b/docs_build/build/html/_autosummary/pantheonrl.algos.adap.policies.html index 0d83424..3a16261 100644 --- a/docs_build/build/html/_autosummary/pantheonrl.algos.adap.policies.html +++ b/docs_build/build/html/_autosummary/pantheonrl.algos.adap.policies.html @@ -96,17 +96,18 @@

pantheonrl.algos.adap.policies

+

Module defining the Policy for ADAP

Classes

forward

-
return:
-

latent_policy, latent_value of the specified network.

-
-
-

Returns the action logits and values

forward_actor

forward_actor

Returns the action logits and values

forward_critic

forward_critic

Returns the action logits and values

get_buffer

Returns the buffer given by target if it exists, otherwise throws an error.

get_extra_state

Returns any extra state to include in the module's state_dict.

get_input_size_excluding_ctx

get_input_size_inluding_ctx

get_parameter

Returns the parameter given by target if it exists, otherwise throws an error.

parameters

Returns an iterator over module parameters.

policies

policies

Returns the logits from the policy function

register_backward_hook

Registers a backward hook on the module.

type

Casts all parameters and buffers to dst_type.

values

values

Returns the response from the value function

xpu

Moves all model parameters and buffers to the XPU.

- + - + - +

AdapPolicy

Initializes internal Module state, shared by both nn.Module and ScriptModule.

Base Policy for the ADAP Actor-critic policy

AdapPolicyMult

Initializes internal Module state, shared by both nn.Module and ScriptModule.

Multiplicative Policy for the ADAP Actor-critic policy

MultModel

Initializes internal Module state, shared by both nn.Module and ScriptModule.

Neural Network representing multiplicative layers

diff --git a/docs_build/build/html/_autosummary/pantheonrl.algos.adap.util.get_L2_sphere.html b/docs_build/build/html/_autosummary/pantheonrl.algos.adap.util.get_L2_sphere.html index b2348c3..9e28816 100644 --- a/docs_build/build/html/_autosummary/pantheonrl.algos.adap.util.get_L2_sphere.html +++ b/docs_build/build/html/_autosummary/pantheonrl.algos.adap.util.get_L2_sphere.html @@ -4,7 +4,7 @@ - pantheonrl.algos.adap.util.get_L2_sphere — PantheonRL 0.1 documentation + pantheonrl.algos.adap.util.get_l2_sphere — PantheonRL 0.1 documentation