go1-rlgames (#2)

* first commit * working example --------- Co-authored-by: nexus <[email protected]>
EESC-LabRoM · Oct 5, 2024 · 667ec4f · 667ec4f
1 parent 3ec3955
commit 667ec4f
Show file tree

Hide file tree

Showing 10 changed files with 1,170 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -1,3 +1,30 @@
+# How to contribute to this EESC FORK
+
+1. Create a branch with this name pattern:  `<firstname>.<surname>/<creation date>/<feature name>`
+2. Do not change any file from the original code, because I constantly sync fork.
+   . Your experiments can be included in a file `source/experiments/`
+   . Create a readme for your experiment so people know how to run the code
+3. When making a Pull Request to main, add a changelog here explaining your feature
+
+
+# Experiments 
+
+### `source/experiments/go1_velocity_flat`
+
+I created this experiment with two purposes:
+   1. To make it clear what the configurations classes for the task actually are (because the original one has many inheritances)
+   2. To run the go1 environments using rl_games as well. 
+
+
+To run it:
+`python source/experiments/go1_velocity_flat/rl_train.py --headless --num_envs 4096`
+`python source/experiments/go1_velocity_flat/skrl_train.py --headless --num_envs 4096`
+
+Results: the skrl works faster
+
+---
+
+
 ![Isaac Lab](docs/source/_static/isaaclab.jpg)
 
 ---

diff --git a/source/experiments/__init__.py b/source/experiments/__init__.py
diff --git a/source/experiments/go1_velocity_flat/__init__.py b/source/experiments/go1_velocity_flat/__init__.py
diff --git a/source/experiments/go1_velocity_flat/agents/__init__.py b/source/experiments/go1_velocity_flat/agents/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
diff --git a/source/experiments/go1_velocity_flat/agents/rl_games_ppo_cfg.yaml b/source/experiments/go1_velocity_flat/agents/rl_games_ppo_cfg.yaml
@@ -0,0 +1,74 @@
+params:
+  seed: 43
+
+  # environment wrapper clipping
+  env:
+    clip_actions: 1.0
+
+  algo:
+    name: a2c_continuous
+
+  model:
+    name: continuous_a2c_logstd
+
+  network:
+    name: actor_critic
+    separate: False
+    space:
+      continuous:
+        mu_activation: None
+        sigma_activation: None
+        mu_init:
+          name: default
+        sigma_init:
+          name: const_initializer
+          val: 0
+        fixed_sigma: True
+    mlp:
+      units: [128, 128, 128]
+      activation: elu
+      d2rl: False
+
+      initializer:
+        name: default
+      regularizer:
+        name: None
+
+  load_checkpoint: False # flag which sets whether to load the checkpoint
+  load_path: '' # path to the checkpoint to load
+
+  config:
+    name: go1_flat
+    env_name: rlgpu
+    device: 'cuda:0'
+    device_name: 'cuda:0'
+    multi_gpu: False
+    ppo: True
+    mixed_precision: False
+    normalize_input: True
+    normalize_value: True
+    value_bootstrap: True
+    num_actors: -1  # configured from the script (based on num_envs)
+    reward_shaper:
+      scale_value: 1.0
+    normalize_advantage: True
+    gamma: 0.99
+    tau : 0.95
+    learning_rate: 1e-3
+    lr_schedule: adaptive
+    kl_threshold: 0.008
+    score_to_win: 20000
+    max_epochs: 36000
+    save_best_after: 100
+    save_frequency: 50
+    grad_norm: 1.0
+    entropy_coef: 0.01
+    truncate_grads: True
+    e_clip: 0.2
+    horizon_length: 24
+    minibatch_size: 12288 # (4096) * 48 / 4
+    mini_epochs: 5
+    critic_coef: 4
+    clip_value: False # not need if value is normalized
+    # seq_length: 21
+    bounds_loss_coef: 0.0001
diff --git a/source/experiments/go1_velocity_flat/agents/skrl_flat_ppo_cfg.yaml b/source/experiments/go1_velocity_flat/agents/skrl_flat_ppo_cfg.yaml
@@ -0,0 +1,80 @@
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: False
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: 0.0
+    network:
+      - name: net
+        input: STATES
+        layers: [128, 128, 128]
+        activations: elu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: STATES
+        layers: [128, 128, 128]
+        activations: elu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+
+# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
+agent:
+  class: PPO
+  rollouts: 24
+  learning_epochs: 5
+  mini_batches: 4
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 1.0e-03
+  learning_rate_scheduler: KLAdaptiveLR
+  learning_rate_scheduler_kwargs:
+    kl_threshold: 0.01
+  state_preprocessor: null
+  state_preprocessor_kwargs: null
+  value_preprocessor: null
+  value_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 1.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.01
+  value_loss_scale: 1.0
+  kl_threshold: 0.0
+  rewards_shaper_scale: 1.0
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "unitree_go1_flat"
+    experiment_name: ""
+    write_interval: 100
+    checkpoint_interval: 5000
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 720000
+  environment_info: log