Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add SRSAC #121

Merged
merged 1 commit into from
Apr 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,10 @@ jobs:
- name: HyAR reproductions test
run: |
./bin/test_reproductions --gpu_id -1 --base_env hybrid_env --env FakeHybridNNablaRL-v1
- name: DeepMind control suite reproductions test
run: |
pip install dm_control
./bin/test_reproductions --gpu_id -1 --base_env dm_control --env FakeDMControlNNablaRL-v1
copyright:
runs-on: ubuntu-latest
timeout-minutes: 3
Expand Down
23 changes: 22 additions & 1 deletion bin/evaluate_algorithm
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright 2021,2022,2023 Sony Group Corporation.
# Copyright 2021,2022,2023,2024 Sony Group Corporation.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -110,6 +110,24 @@ HYBRID_ENV_LIST=(
"Platform-v0"
)

DM_CONTROL_LIST=(
"acrobot-swingup"
"cheetah-run"
"finger-turn_hard"
"fish-swim"
"hopper-hop"
"hopper-stand"
"humanoid-run"
"humanoid-stand"
"humanoid-walk"
"pendulum-swingup"
"quadruped-run"
"quadruped-walk"
"reacher-hard"
"swimmer-swimmer6"
"walker-run"
)

GPU_ID=0
ALGO_NAME="dqn"
BASE_ENV_NAME="atari"
Expand Down Expand Up @@ -193,6 +211,9 @@ do
if [ $BASE_ENV_NAME = "hybrid_env" ]; then
ENV_NAME=${HYBRID_ENV_LIST[$INDEX]}
fi
if [ $BASE_ENV_NAME = "dm_control" ]; then
ENV_NAME=${DM_CONTROL_LIST[$INDEX]}
fi
echo "Start running training for: " ${ENV_NAME}
if [ -n "$BATCH_SIZE" ]; then
${ROOT_DIR}/bin/train_with_seeds "${REPRODUCTION_CODE_DIR}/${ALGO_NAME}_reproduction.py" $GPU_ID $ENV_NAME $SAVE_DIR $NUM_SEEDS $BATCH_SIZE &
Expand Down
22 changes: 21 additions & 1 deletion docs/source/nnablarl_api/algorithms.rst
Original file line number Diff line number Diff line change
Expand Up @@ -331,14 +331,34 @@ SAC (ICML 2018 version)

SAC-D
====
.. autoclass:: nnabla_rl.algorithms.sac.SACDConfig
.. autoclass:: nnabla_rl.algorithms.sacd.SACDConfig
:members:
:show-inheritance:

.. autoclass:: nnabla_rl.algorithms.sacd.SACD
:members:
:show-inheritance:

SRSAC
====
.. autoclass:: nnabla_rl.algorithms.srsac.SRSACConfig
:members:
:show-inheritance:

.. autoclass:: nnabla_rl.algorithms.srsac.SRSAC
:members:
:show-inheritance:

SRSAC(Computationally efficient ver.)
====
.. autoclass:: nnabla_rl.algorithms.srsac.EfficientSRSACConfig
:members:
:show-inheritance:

.. autoclass:: nnabla_rl.algorithms.srsac.EfficientSRSAC
:members:
:show-inheritance:

TD3
====
.. autoclass:: nnabla_rl.algorithms.td3.TD3Config
Expand Down
1 change: 1 addition & 0 deletions nnabla_rl/algorithms/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ nnabla-rl offers various (deep) reinforcement learning and optimal control algor
|[SAC](https://arxiv.org/abs/1812.05905)|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|
|[SAC (ICML 2018 version)](https://arxiv.org/abs/1801.01290)|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|
|[SAC-D](https://arxiv.org/abs/2206.13901)|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|
|[SRSAC](https://openreview.net/pdf?id=OpC-9aBBVJe)|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|
|[TD3](https://arxiv.org/abs/1802.09477)|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|
|[TRPO](https://arxiv.org/abs/1502.05477)|:heavy_check_mark:|:x:|:heavy_check_mark:|(We will support discrete action in the future)|:x:|:x:|
|[TRPO (ICML 2015 version)](https://arxiv.org/abs/1502.05477)|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|
Expand Down
5 changes: 4 additions & 1 deletion nnabla_rl/algorithms/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Copyright 2020,2021 Sony Corporation.
# Copyright 2021,2022,2023 Sony Group Corporation.
# Copyright 2021,2022,2023,2024 Sony Group Corporation.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -49,6 +49,7 @@
from nnabla_rl.algorithms.reinforce import REINFORCE, REINFORCEConfig
from nnabla_rl.algorithms.sac import SAC, SACConfig
from nnabla_rl.algorithms.sacd import SACD, SACDConfig
from nnabla_rl.algorithms.srsac import SRSAC, EfficientSRSAC, EfficientSRSACConfig, SRSACConfig
from nnabla_rl.algorithms.td3 import TD3, TD3Config
from nnabla_rl.algorithms.trpo import TRPO, TRPOConfig
from nnabla_rl.algorithms.xql import XQL, XQLConfig
Expand Down Expand Up @@ -112,6 +113,8 @@ def get_class_of(name):
register_algorithm(REINFORCE, REINFORCEConfig)
register_algorithm(SAC, SACConfig)
register_algorithm(SACD, SACDConfig)
register_algorithm(SRSAC, SRSACConfig)
register_algorithm(EfficientSRSAC, EfficientSRSACConfig)
register_algorithm(TD3, TD3Config)
register_algorithm(ICML2015TRPO, ICML2015TRPOConfig)
register_algorithm(TRPO, TRPOConfig)
Expand Down
26 changes: 16 additions & 10 deletions nnabla_rl/algorithms/sac.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Copyright 2020,2021 Sony Corporation.
# Copyright 2021,2022,2023 Sony Group Corporation.
# Copyright 2021,2022,2023,2024 Sony Group Corporation.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -244,20 +244,16 @@ def __init__(self, env_or_env_info: Union[gym.Env, EnvironmentInfo],
self._pi = policy_builder(scope_name="pi", env_info=self._env_info, algorithm_config=self._config)
self._pi_solver = policy_solver_builder(self._env_info, self._config)

self._temperature = MT.policy_trainers.soft_policy_trainer.AdjustableTemperature(
scope_name='temperature',
initial_value=self._config.initial_temperature)
self._temperature = self._setup_temperature_model()
if not self._config.fix_temperature:
self._temperature_solver = temperature_solver_builder(self._env_info, self._config)
else:
self._temperature_solver = None

self._replay_buffer = replay_buffer_builder(self._env_info, self._config)

self._evaluation_actor = _StochasticPolicyActionSelector(
self._env_info, self._pi.shallowcopy(), deterministic=True)
self._exploration_actor = _StochasticPolicyActionSelector(
self._env_info, self._pi.shallowcopy(), deterministic=False)
self._evaluation_actor = self._setup_evaluation_actor()
self._exploration_actor = self._setup_exploration_actor()

@eval_api
def compute_eval_action(self, state, *, begin_of_episode=False, extra_info={}):
Expand All @@ -270,12 +266,22 @@ def _before_training_start(self, env_or_buffer):
context.set_nnabla_context(self._config.gpu_id)
self._environment_explorer = self._setup_environment_explorer(env_or_buffer)
self._policy_trainer = self._setup_policy_training(env_or_buffer)
self._q_function_trainer = self._setup_q_function_training(
env_or_buffer)
self._q_function_trainer = self._setup_q_function_training(env_or_buffer)

def _setup_evaluation_actor(self):
return _StochasticPolicyActionSelector(self._env_info, self._pi.shallowcopy(), deterministic=True)

def _setup_exploration_actor(self):
return _StochasticPolicyActionSelector(self._env_info, self._pi.shallowcopy(), deterministic=False)

def _setup_environment_explorer(self, env_or_buffer):
return None if self._is_buffer(env_or_buffer) else self._explorer_builder(self._env_info, self._config, self)

def _setup_temperature_model(self):
return MT.policy_trainers.soft_policy_trainer.AdjustableTemperature(
scope_name='temperature',
initial_value=self._config.initial_temperature)

def _setup_policy_training(self, env_or_buffer):
policy_trainer_config = MT.policy_trainers.SoftPolicyTrainerConfig(
fixed_temperature=self._config.fix_temperature,
Expand Down
Loading
Loading