Skip to content

Commit

Permalink
Resolving some issu with the modification of the ppo algorithm. Tryin…
Browse files Browse the repository at this point in the history
…g to find the source of the crash when training.
  • Loading branch information
isaac-racine committed Sep 20, 2024
1 parent 6ad1f1e commit 6fa321d
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 170 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,10 @@ def compute_group(self, group_name: str) -> torch.Tensor | dict[str, torch.Tenso
# add value to list
group_obs[name] = obs

for key, tensor in group_obs.items():
if torch.isnan(tensor).any():
print(f"NaN values found in {key}")

# concatenate all observations in the group together
if self._group_obs_concatenate[group_name]:
return torch.cat(list(group_obs.values()), dim=-1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ def compute(self, dt: float) -> tuple[torch.Tensor, dict[str, torch.Tensor]]:
value = term_cfg.func(self._env, **term_cfg.params) * term_cfg.weight * dt
# update total reward
self._reward_buf += value
print(self._reward_buf)
# update episodic sum
self._episode_sums[name] += value

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,47 +8,17 @@

from omni.isaac.lab_tasks.utils.wrappers.rsl_rl_unified_policy import *

#@configclass
#class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
# num_steps_per_env = 24
# max_iterations = 50000
# save_interval = 100
# experiment_name = "locomanip"
# empirical_normalization = False
# policy = RslRlPpoActorCriticCfg(
# init_noise_std=1.0,
# actor_hidden_dims=[512, 256, 128],
# critic_hidden_dims=[512, 256, 128],
# activation="elu",
# #actor_w_zero=True,
# #static_noise=True,
# )
# algorithm = RslRlPpoAlgorithmCfg(
# value_loss_coef=1.0,
# use_clipped_value_loss=True,
# clip_param=0.2,
# entropy_coef=0.0, # 0.0 prevents the noise from increasing above init value
# num_learning_epochs=5,
# num_mini_batches=6,
# learning_rate=1e-3,
# schedule="exponential",
# schedule_gamma=0.99,
# gamma=0.99,
# lam=0.95,
# desired_kl=0.01,
# max_grad_norm=1.0,
# )

@configclass
class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 24
num_steps_per_env = 40
max_iterations = 10000
save_interval = 100
save_interval = 200
experiment_name = "locomanip"
empirical_normalization = False
policy = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_hidden_dims=[512, 256, 128],
actor_hidden_dims=[512, 256, 128], # not like in the paper (suppose to be 128 then seperate in 2 head of 128 (one for th manip and one for the loco))
critic_hidden_dims=[512, 256, 128],
activation="elu",
)
Expand All @@ -59,10 +29,10 @@ class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
entropy_coef=0.01,
num_learning_epochs=5,
num_mini_batches=4,
learning_rate=1.0e-3,
learning_rate=2.0e-4,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
max_grad_norm=1.0, ## ??
)
Original file line number Diff line number Diff line change
Expand Up @@ -71,60 +71,6 @@
# Scene definition
##

# flat_terrain = TerrainImporterCfg(
# prim_path="/World/ground",
# terrain_type="plane",
# collision_group=-1,
# physics_material=sim_utils.RigidBodyMaterialCfg(
# friction_combine_mode="multiply",
# restitution_combine_mode="multiply",
# static_friction=1.0,
# dynamic_friction=1.0,
# ),
# visual_material=sim_utils.MdlFileCfg(
# mdl_path="{NVIDIA_NUCLEUS_DIR}/Materials/Base/Architecture/Shingles_01.mdl",
# project_uvw=True,
# ),
# debug_vis=DEBUG_VIS,
# )

# terrain = TerrainImporterCfg(
# prim_path="/World/ground",
# terrain_type="usd",
# usd_path="omniverse://localhost/Projects/random_terrain.usd",
# collision_group=-1,
# physics_material=sim_utils.RigidBodyMaterialCfg(
# friction_combine_mode="multiply",
# restitution_combine_mode="multiply",
# static_friction=1.0,
# dynamic_friction=1.0,
# ),
# visual_material=sim_utils.MdlFileCfg(
# mdl_path="{NVIDIA_NUCLEUS_DIR}/Materials/Base/Architecture/Shingles_01.mdl",
# project_uvw=True,
# ),
# debug_vis=DEBUG_VIS,
# )

# curriculum_terrain = TerrainImporterCfg(
# prim_path="/World/ground",
# terrain_type="generator",
# terrain_generator=CUSTOM_TERRAIN_CFG.replace(size=(TERRAIN_SZ,TERRAIN_SZ)),
# max_init_terrain_level=0,
# collision_group=-1,
# physics_material=sim_utils.RigidBodyMaterialCfg(
# friction_combine_mode="multiply",
# restitution_combine_mode="multiply",
# static_friction=1.0,
# dynamic_friction=1.0,
# ),
# visual_material=sim_utils.MdlFileCfg(
# mdl_path="{NVIDIA_NUCLEUS_DIR}/Materials/Base/Architecture/Shingles_01.mdl",
# project_uvw=True,
# ),
# debug_vis=DEBUG_VIS,
# )

@configclass
class MySceneCfg(InteractiveSceneCfg):
"""Configuration for the terrain scene with a legged robot."""
Expand Down Expand Up @@ -289,10 +235,6 @@ class PolicyCfg(ObservationGroupCfg):
base_ang_vel = ObservationTermCfg(func=mdp.base_ang_vel, noise=Unoise(n_min=-0.2, n_max=0.2))

#base_lin_vel = ObservationTermCfg(func=mdp.base_lin_vel, noise=Unoise(n_min=-0.1, n_max=0.1))
# projected_gravity = ObservationTermCfg(
# func=mdp.projected_gravity,
# noise=Unoise(n_min=-0.05, n_max=0.05),
# )

#Arm State R12 (joint pose and velocity) (maybe R14)
#Leg State R28 (joint pos, joint vel, foot contact)
Expand All @@ -312,7 +254,10 @@ class PolicyCfg(ObservationGroupCfg):

#Environment extrinsics R20 (...) for sim-to-real transfer


projected_gravity = ObservationTermCfg(
func=mdp.projected_gravity,
noise=Unoise(n_min=-0.05, n_max=0.05),
)

#OTHER
#
Expand Down Expand Up @@ -344,7 +289,7 @@ class EventCfg:
"asset_cfg": SceneEntityCfg("robot", body_names=".*"),
"static_friction_range": (0.8, 0.8),
"dynamic_friction_range": (0.6, 0.6),
"restitution_range": (0.0, 0.5),
"restitution_range": (0.0, 0.0),
"num_buckets": 64,
},
)
Expand All @@ -357,37 +302,38 @@ class EventCfg:
"operation": "add"
},
)
# reset
reset_base = EventTermCfg(
func=mdp.reset_root_state_uniform,
mode="reset",
params={
"pose_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5), "yaw": (-3.14, 3.14)},
"velocity_range": {
"x": (0.0, 0.0),
"y": (0.0, 0.0),
"z": (0.0, 0.0),
"roll": (0.0, 0.0),
"pitch": (0.0, 0.0),
"yaw": (0.0, 0.0),
}
"x": (-0.5, 0.5),
"y": (-0.5, 0.5),
"z": (-0.5, 0.5),
"roll": (-0.5, 0.5),
"pitch": (-0.5, 0.5),
"yaw": (-0.5, 0.5),
},
},
)

reset_robot_joints = EventTermCfg(
func=mdp.reset_joints_by_scale,
mode="reset",
params={
"position_range": (1.0, 1.0),
"position_range": (0.5, 1.5),
"velocity_range": (0.0, 0.0),
},
)

# interval
# push_robot = EventTermCfg(
# func=mdp.push_by_setting_velocity,
# mode="interval",
# interval_range_s=(4.0, _EPISODE_LENGTH),
# params={"velocity_range": {"x": (-MAX_PUSHSPEED, MAX_PUSHSPEED), "y": (-MAX_PUSHSPEED, MAX_PUSHSPEED)}}
# )
push_robot = EventTermCfg(
func=mdp.push_by_setting_velocity,
mode="interval",
interval_range_s=(10.0, 15.0),
params={"velocity_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5)}},
)


@configclass
Expand All @@ -413,6 +359,10 @@ class RewardsCfg:
r_manip_energy = RewardTermCfg(func=mdp.r_joint_arm_power, weight=0.004)
# r_manip_alive = 0

# -- optional penalties
# flat_orientation_l2 = RewardTermCfg(func=mdp.flat_orientation_l2, weight=0.0)
# dof_pos_limits = RewardTermCfg(func=mdp.joint_pos_limits, weight=0.0)


@configclass
class TerminationsCfg:
Expand Down
Loading

0 comments on commit 6fa321d

Please sign in to comment.