Juliano.negri/2024 10 10/improve basic example (#3)

* included pd delayed motor * delayed motor and diverse terrain * include material back * removed backpack from commit
EESC-LabRoM · Oct 14, 2024 · 9a9cbe4 · 9a9cbe4
1 parent 667ec4f
commit 9a9cbe4
Show file tree

Hide file tree

Showing 5 changed files with 187 additions and 16 deletions.
diff --git a/source/experiments/go1_velocity_flat/agents/skrl_flat_ppo_cfg.yaml b/source/experiments/go1_velocity_flat/agents/skrl_flat_ppo_cfg.yaml
@@ -15,7 +15,7 @@ models:
     network:
       - name: net
         input: STATES
-        layers: [128, 128, 128]
+        layers: [512, 258, 128]
         activations: elu
     output: ACTIONS
   value:  # see deterministic_model parameters
@@ -24,7 +24,7 @@ models:
     network:
       - name: net
         input: STATES
-        layers: [128, 128, 128]
+        layers: [512, 258, 128]
         activations: elu
     output: ONE
 

diff --git a/source/experiments/go1_velocity_flat/go1_velocity_flat.py b/source/experiments/go1_velocity_flat/go1_velocity_flat.py
@@ -7,7 +7,7 @@
 
 import omni.isaac.lab.sim as sim_utils
 import omni.isaac.lab_tasks.manager_based.locomotion.velocity.mdp as mdp
-from omni.isaac.lab.actuators import ActuatorNetMLPCfg
+from omni.isaac.lab.actuators import DelayedActuatorNetMLPCfg
 from omni.isaac.lab.assets import ArticulationCfg, AssetBaseCfg
 from omni.isaac.lab.envs import ManagerBasedRLEnvCfg
 from omni.isaac.lab.managers import EventTermCfg as EventTerm
@@ -22,13 +22,18 @@
 from omni.isaac.lab.utils import configclass
 from omni.isaac.lab.utils.assets import ISAAC_NUCLEUS_DIR, ISAACLAB_NUCLEUS_DIR
 from omni.isaac.lab.utils.noise import AdditiveUniformNoiseCfg as Unoise
+import omni.isaac.lab.terrains as terrain_gen
 
 ##
 # Configuration - Actuators.
 ##
 
-GO1_ACTUATOR_CFG = ActuatorNetMLPCfg(
-    joint_names_expr=[".*_hip_joint", ".*_thigh_joint", ".*_calf_joint"],
+
+GO1_ACTUATOR_CFG = DelayedActuatorNetMLPCfg(
+    joint_names_expr=[
+        ".*_hip_joint",
+        ".*_thigh_joint",
+    ],
     network_file=f"{ISAACLAB_NUCLEUS_DIR}/ActuatorNets/Unitree/unitree_go1.pt",
     pos_scale=-1.0,
     vel_scale=1.0,
@@ -38,7 +43,25 @@
     effort_limit=23.7,  # taken from spec sheet
     velocity_limit=30.0,  # taken from spec sheet
     saturation_effort=23.7,  # same as effort limit
+    min_delay=4,
+    max_delay=5,
 )
+
+GO1_ACTUATOR_CFG_KNEE = DelayedActuatorNetMLPCfg(
+    joint_names_expr=[".*_calf_joint"],
+    network_file=f"{ISAACLAB_NUCLEUS_DIR}/ActuatorNets/Unitree/unitree_go1.pt",
+    pos_scale=-1.0,
+    vel_scale=1.0,
+    torque_scale=1.0,
+    input_order="pos_vel",
+    input_idx=[0, 1, 2],
+    effort_limit=35.55,
+    velocity_limit=30.0,  # taken from spec sheet
+    saturation_effort=35.55,  # same as effort limit
+    min_delay=4,
+    max_delay=5,  # number of dT (~20ms)
+)
+
 """Configuration of Go1 actuators using MLP model.
 
 Actuator specifications: https://shop.unitree.com/products/go1-motor
@@ -81,10 +104,33 @@
     soft_joint_pos_limit_factor=0.9,
     actuators={
         "base_legs": GO1_ACTUATOR_CFG,
+        "knee": GO1_ACTUATOR_CFG_KNEE,
     },
 )
 """Configuration of Unitree Go1 using MLP-based actuator model."""
 
+COBBLESTONE_ROAD_CFG = terrain_gen.TerrainGeneratorCfg(
+    size=(10.0, 10.0),
+    border_width=20.0,
+    num_rows=50,
+    num_cols=50,
+    horizontal_scale=0.25,
+    vertical_scale=0.01,
+    slope_threshold=0.75,
+    difficulty_range=(0.0, 1.0),
+    use_cache=False,
+    sub_terrains={
+        "random_rough": terrain_gen.HfSteppingStonesTerrainCfg(
+            #proportion=0.2,
+            stone_height_max=0.1,
+            stone_width_range=(0.25, 2),
+            stone_distance_range=(0.25, 2),
+            holes_depth=-.05,
+            platform_width=3,
+        ),
+    },
+)
+
 
 @configclass
 class MySceneCfg(InteractiveSceneCfg):
@@ -93,8 +139,8 @@ class MySceneCfg(InteractiveSceneCfg):
     # ground terrain
     terrain = TerrainImporterCfg(
         prim_path="/World/ground",
-        terrain_type="plane",
-        terrain_generator=None,
+        terrain_type="generator",
+        terrain_generator=COBBLESTONE_ROAD_CFG,
         collision_group=-1,
         physics_material=sim_utils.RigidBodyMaterialCfg(
             friction_combine_mode="multiply",
@@ -122,8 +168,7 @@ class MySceneCfg(InteractiveSceneCfg):
             intensity=750.0,
             texture_file=f"{ISAAC_NUCLEUS_DIR}/Materials/Textures/Skies/PolyHaven/kloofendal_43d_clear_puresky_4k.hdr",
         ),
-    )
-
+    ) 
 
 ##
 # MDP settings
@@ -141,8 +186,8 @@ class CommandsCfg:
         heading_control_stiffness=0.5,
         debug_vis=True,
         ranges=mdp.UniformVelocityCommandCfg.Ranges(
-            lin_vel_x=(-1.0, 1.0),
-            lin_vel_y=(-1.0, 1.0),
+            lin_vel_x=(-.50, 1.5),
+            lin_vel_y=(-.10, .10),
             ang_vel_z=(-1.0, 1.0),
             heading=(-math.pi, math.pi),
         ),
@@ -187,7 +232,7 @@ class PolicyCfg(ObsGroup):
         actions = ObsTerm(func=mdp.last_action)
 
         def __post_init__(self):
-            self.enable_corruption = True
+            self.enable_corruption = False
             self.concatenate_terms = True
 
     # observation groups
@@ -325,7 +370,7 @@ class UnitreeGo1FlatEnvCfg(ManagerBasedRLEnvCfg):
     """Configuration for the locomotion velocity-tracking environment."""
 
     # Scene settings
-    scene: MySceneCfg = MySceneCfg(num_envs=4096, env_spacing=25)
+    scene: MySceneCfg = MySceneCfg(num_envs=4096, env_spacing=10)
     # Basic settings
     observations: ObservationsCfg = ObservationsCfg()
     actions: ActionsCfg = ActionsCfg()
@@ -361,6 +406,10 @@ def __post_init__(self) -> None:
         # make a smaller scene for play
         self.scene.num_envs = 50
         self.scene.env_spacing = 2.5
+        self.scene.terrain.terrain_generator.num_cols = 8
+        self.scene.terrain.terrain_generator.num_rows = 8
+        self.scene.terrain.terrain_generator.size = (2, 2)
+
         # disable randomization for play
         self.observations.policy.enable_corruption = False
         # remove random pushing event

diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/actuators/__init__.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/actuators/__init__.py
@@ -32,6 +32,7 @@
     IdealPDActuatorCfg,
     ImplicitActuatorCfg,
     RemotizedPDActuatorCfg,
+    DelayedActuatorNetMLPCfg
 )
 from .actuator_net import ActuatorNetLSTM, ActuatorNetMLP
 from .actuator_pd import DCMotor, DelayedPDActuator, IdealPDActuator, ImplicitActuator, RemotizedPDActuator
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/actuators/actuator_cfg.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/actuators/actuator_cfg.py
@@ -155,6 +155,49 @@ class ActuatorNetMLPCfg(DCMotorCfg):
     time-step in the past. The allocated history length is `max(input_idx) + 1`.
     """
 
+@configclass
+class DelayedActuatorNetMLPCfg(ActuatorNetMLPCfg):
+    """Configuration for a delayed MLP-based actuator."""
+
+    class_type: type = actuator_net.DelayedActuatorNetMLP
+    # we don't use stiffness and damping for actuator net
+    stiffness = None
+    damping = None
+
+    network_file: str = MISSING
+    """Path to the file containing network weights."""
+
+    pos_scale: float = MISSING
+    """Scaling of the joint position errors input to the network."""
+    vel_scale: float = MISSING
+    """Scaling of the joint velocities input to the network."""
+    torque_scale: float = MISSING
+    """Scaling of the joint efforts output from the network."""
+
+    input_order: Literal["pos_vel", "vel_pos"] = MISSING
+    """Order of the inputs to the network.
+
+    The order can be one of the following:
+
+    * ``"pos_vel"``: joint position errors followed by joint velocities
+    * ``"vel_pos"``: joint velocities followed by joint position errors
+    """
+
+    input_idx: Iterable[int] = MISSING
+    """
+    Indices of the actuator history buffer passed as inputs to the network.
+
+    The index *0* corresponds to current time-step, while *n* corresponds to n-th
+    time-step in the past. The allocated history length is `max(input_idx) + 1`.
+    """
+
+    min_delay: int = 0
+    """Minimum number of physics time-steps with which the actuator command may be delayed. Defaults to 0."""
+
+    max_delay: int = 0
+    """Maximum number of physics time-steps with which the actuator command may be delayed. Defaults to 0."""
+
+
 
 @configclass
 class DelayedPDActuatorCfg(IdealPDActuatorCfg):

diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/actuators/actuator_net.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/actuators/actuator_net.py
@@ -14,6 +14,8 @@
 
 from __future__ import annotations
 
+from omni.isaac.lab.utils.buffers.delay_buffer import DelayBuffer
+
 import torch
 from collections.abc import Sequence
 from typing import TYPE_CHECKING
@@ -25,7 +27,7 @@
 from .actuator_pd import DCMotor
 
 if TYPE_CHECKING:
-    from .actuator_cfg import ActuatorNetLSTMCfg, ActuatorNetMLPCfg
+    from .actuator_cfg import ActuatorNetLSTMCfg, ActuatorNetMLPCfg, DelayedActuatorNetMLPCfg
 
 
 class ActuatorNetLSTM(DCMotor):
@@ -178,12 +180,88 @@ def compute(
         # run network inference
         torques = self.network(network_input).view(self._num_envs, self.num_joints)
         self.computed_effort = torques.view(self._num_envs, self.num_joints) * self.cfg.torque_scale
-
+        # print(f"Self computed effort: {self.computed_effort}")
         # clip the computed effort based on the motor limits
         self.applied_effort = self._clip_effort(self.computed_effort)
-
+        # print(f"Self applied effort: {self.applied_effort}")
         # return torques
         control_action.joint_efforts = self.applied_effort
         control_action.joint_positions = None
         control_action.joint_velocities = None
         return control_action
+
+    def _clip_effort(self, effort: torch.Tensor) -> torch.Tensor:
+        # compute torque limits
+        # -- max limit
+        max_effort = self._saturation_effort #* (1.0 - self._joint_vel / self.velocity_limit)
+        #max_effort = torch.clip(max_effort, min=self._zeros_effort, max=self.effort_limit)
+        # -- min limit
+        min_effort = -self._saturation_effort #* (-1.0 - self._joint_vel / self.velocity_limit)
+        #min_effort = torch.clip(min_effort, min=-self.effort_limit, max=self._zeros_effort)
+
+        # clip the torques based on the motor limits
+        return torch.clip(effort, min=min_effort, max=max_effort)
+
+
+class DelayedActuatorNetMLP(ActuatorNetMLP):
+    """Actuator model based on multi-layer perceptron and joint history with delayed command application.
+
+    This class extends the :class:`ActuatorNetMLP` class by adding a delay to the actuator commands. The delay
+    is implemented using a circular buffer that stores the actuator commands for a certain number of physics steps.
+    The most recent actuation value is pushed to the buffer at every physics step, but the final actuation value
+    applied to the simulation is lagged by a certain number of physics steps.
+
+    The amount of time lag is configurable and can be set to a random value between the minimum and maximum time
+    lag bounds at every reset. The minimum and maximum time lag values are set in the configuration instance passed
+    to the class.
+    """
+
+    cfg: DelayedActuatorNetMLPCfg
+    """The configuration for the actuator model."""
+
+    def __init__(self, cfg: DelayedActuatorNetMLPCfg, *args, **kwargs):
+        super().__init__(cfg, *args, **kwargs)
+        # instantiate the delay buffers
+        self.positions_delay_buffer = DelayBuffer(cfg.max_delay, self._num_envs, device=self._device)
+        self.velocities_delay_buffer = DelayBuffer(cfg.max_delay, self._num_envs, device=self._device)
+        self.efforts_delay_buffer = DelayBuffer(cfg.max_delay, self._num_envs, device=self._device)
+        # all of the envs
+        self._ALL_INDICES = torch.arange(self._num_envs, dtype=torch.long, device=self._device)
+
+    def reset(self, env_ids: Sequence[int]):
+        super().reset(env_ids)
+        # number of environments (since env_ids can be a slice)
+        if env_ids is None or env_ids == slice(None):
+            num_envs = self._num_envs
+        else:
+            num_envs = len(env_ids)
+        # set a new random delay for environments in env_ids
+        time_lags = torch.randint(
+            low=self.cfg.min_delay,
+            high=self.cfg.max_delay + 1,
+            size=(num_envs,),
+            dtype=torch.int,
+            device=self._device,
+        )
+        # set delays
+        self.positions_delay_buffer.set_time_lag(time_lags, env_ids)
+        self.velocities_delay_buffer.set_time_lag(time_lags, env_ids)
+        self.efforts_delay_buffer.set_time_lag(time_lags, env_ids)
+
+        # reset buffers
+        self.positions_delay_buffer.reset(env_ids)
+        self.velocities_delay_buffer.reset(env_ids)
+        self.efforts_delay_buffer.reset(env_ids)
+
+    def compute(
+        self, control_action: ArticulationActions, joint_pos: torch.Tensor, joint_vel: torch.Tensor
+    ) -> ArticulationActions:
+
+        # apply delay based on the delay the model for all the setpoints
+        control_action.joint_positions = self.positions_delay_buffer.compute(control_action.joint_positions)
+        control_action.joint_velocities = self.velocities_delay_buffer.compute(control_action.joint_velocities)
+        control_action.joint_efforts = self.efforts_delay_buffer.compute(control_action.joint_efforts)
+
+        # compute actuator model
+        return super().compute(control_action, joint_pos, joint_vel)
+