diff --git a/docs/CNAME b/docs/CNAME deleted file mode 100644 index 41de4016c..000000000 --- a/docs/CNAME +++ /dev/null @@ -1 +0,0 @@ -metaworld.farama.org \ No newline at end of file diff --git a/metaworld/envs/mujoco/sawyer_xyz/sawyer_xyz_env.py b/metaworld/envs/mujoco/sawyer_xyz/sawyer_xyz_env.py index 211770656..8fd91f3e5 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/sawyer_xyz_env.py +++ b/metaworld/envs/mujoco/sawyer_xyz/sawyer_xyz_env.py @@ -172,13 +172,8 @@ def __init__( np.array([+1, +1, +1, +1]), dtype=np.float64, ) - - # Technically these observation lengths are different between v1 and v2, - # but we handle that elsewhere and just stick with v2 numbers here self._obs_obj_max_len = 14 - self._set_task_called = False - self.hand_init_pos = None # OVERRIDE ME self._target_pos = None # OVERRIDE ME self._random_reset_space = None # OVERRIDE ME @@ -189,6 +184,8 @@ def __init__( # doesn't seem to matter (it will only effect frame-stacking for the # very first observation) + self.init_qpos = np.copy(self.data.qpos) + self.init_qvel = np.copy(self.data.qvel) self._prev_obs = self._get_curr_obs_combined_no_goal() EzPickle.__init__( @@ -538,10 +535,15 @@ def evaluate_state(self, obs, action): # V1 environments don't have to implement it raise NotImplementedError + def reset_model(self): + qpos = self.init_qpos + qvel = self.init_qvel + self.set_state(qpos, qvel) + def reset(self, seed=None, options=None): self.curr_path_length = 0 + self.reset_model() obs, info = super().reset() - mujoco.mj_forward(self.model, self.data) self._prev_obs = obs[:18].copy() obs[18:36] = self._prev_obs obs = np.float64(obs) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_basketball_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_basketball_v2.py index 05684e186..0abac532e 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_basketball_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_basketball_v2.py @@ -105,6 +105,7 @@ def reset_model(self): mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_SITE, "goal") ] self._set_obj_xyz(self.obj_init_pos) + self.model.site("goal").pos = self._target_pos return self._get_obs() def compute_reward(self, action, obs): diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_box_close_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_box_close_v2.py index 3d653bd65..1ba6d48d2 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_box_close_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_box_close_v2.py @@ -107,7 +107,7 @@ def reset_model(self): mujoco.mj_step(self.model, self.data) self._set_obj_xyz(self.obj_init_pos) - + self.model.site("goal").pos = self._target_pos return self._get_obs() @staticmethod diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_coffee_pull_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_coffee_pull_v2.py index 8586fccf1..1fa06c7b6 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_coffee_pull_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_coffee_pull_v2.py @@ -109,6 +109,7 @@ def reset_model(self): ] = pos_machine self._target_pos = pos_mug_goal + self.model.site("mug_goal").pos = self._target_pos return self._get_obs() def compute_reward(self, action, obs): diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_coffee_push_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_coffee_push_v2.py index 6bd0c40c0..583c62d5a 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_coffee_push_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_coffee_push_v2.py @@ -110,6 +110,7 @@ def reset_model(self): ] = pos_machine self._target_pos = pos_mug_goal + self.model.site("coffee_goal").pos = self._target_pos return self._get_obs() def compute_reward(self, action, obs): diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_dial_turn_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_dial_turn_v2.py index 5dfa86d37..98eaac321 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_dial_turn_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_dial_turn_v2.py @@ -101,6 +101,7 @@ def reset_model(self): mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "dial") ] = self.obj_init_pos self.dial_push_position = self._get_pos_objects() + np.array([0.05, 0.02, 0.09]) + self.model.site("goal").pos = self._target_pos mujoco.mj_forward(self.model, self.data) return self._get_obs() diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_door_close_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_door_close_v2.py index 656329d73..2f1511767 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_door_close_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_door_close_v2.py @@ -80,7 +80,7 @@ def reset_model(self): # keep the door open after resetting initial positions self._set_obj_xyz(-1.5708) - + self.model.site("goal").pos = self._target_pos return self._get_obs() @_assert_task_is_set diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_door_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_door_v2.py index 5901361f0..8eb85103e 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_door_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_door_v2.py @@ -108,7 +108,7 @@ def reset_model(self): self.data.geom("handle").xpos[:-1] - self._target_pos[:-1] ) self.target_reward = 1000 * self.maxPullDist + 1000 * 2 - + self.model.site("goal").pos = self._target_pos return self._get_obs() @staticmethod diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_drawer_close_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_drawer_close_v2.py index 6fdd3ee3c..1e08e95a3 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_drawer_close_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_drawer_close_v2.py @@ -108,7 +108,7 @@ def reset_model(self): # Pull drawer out all the way and mark its starting position self._set_obj_xyz(-self.maxDist) self.obj_init_pos = self._get_pos_objects() - + self.model.site("goal").pos = self._target_pos return self._get_obs() def compute_reward(self, action, obs): diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_drawer_open_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_drawer_open_v2.py index 67daebd50..0a5b8906b 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_drawer_open_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_drawer_open_v2.py @@ -103,8 +103,7 @@ def reset_model(self): self._target_pos = self.obj_init_pos + np.array( [0.0, -0.16 - self.maxDist, 0.09] ) - mujoco.mj_forward(self.model, self.data) - + self.model.site("goal").pos = self._target_pos return self._get_obs() def compute_reward(self, action, obs): diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_faucet_close_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_faucet_close_v2.py index 6a14b03e2..a247de154 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_faucet_close_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_faucet_close_v2.py @@ -98,6 +98,7 @@ def reset_model(self): [-self._handle_length, 0.0, 0.125] ) mujoco.mj_forward(self.model, self.data) + self.model.site("goal_close").pos = self._target_pos return self._get_obs() def _reset_hand(self): diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_faucet_open_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_faucet_open_v2.py index 400e0270a..074881840 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_faucet_open_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_faucet_open_v2.py @@ -97,7 +97,7 @@ def reset_model(self): self._target_pos = self.obj_init_pos + np.array( [+self._handle_length, 0.0, 0.125] ) - mujoco.mj_forward(self.model, self.data) + self.model.site("goal_open").pos = self._target_pos return self._get_obs() def _reset_hand(self): diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_hammer_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_hammer_v2.py index 620d66175..281fdd131 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_hammer_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_hammer_v2.py @@ -104,7 +104,6 @@ def reset_model(self): self.nail_init_pos = self._get_site_pos("nailHead") self.obj_init_pos = self.hammer_init_pos.copy() self._set_hammer_xyz(self.hammer_init_pos) - return self._get_obs() @staticmethod diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_hand_insert_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_hand_insert_v2.py index 1a64fee97..01fe020c3 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_hand_insert_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_hand_insert_v2.py @@ -101,6 +101,7 @@ def reset_model(self): self._target_pos = goal_pos[-3:] self._set_obj_xyz(self.obj_init_pos) + self.model.site("goal").pos = self._target_pos return self._get_obs() def compute_reward(self, action, obs): diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_handle_press_side_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_handle_press_side_v2.py index 2d689a333..968fa5bf4 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_handle_press_side_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_handle_press_side_v2.py @@ -74,7 +74,6 @@ def evaluate_state(self, obs, action): object_grasped, in_place, ) = self.compute_reward(action, obs) - info = { "success": float(target_to_obj <= self.TARGET_RADIUS), "near_object": float(tcp_to_obj <= 0.05), diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_lever_pull_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_lever_pull_v2.py index b4c385e81..78087cb69 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_lever_pull_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_lever_pull_v2.py @@ -108,7 +108,7 @@ def reset_model(self): self._target_pos = self.obj_init_pos + np.array( [0.12, 0.0, 0.25 + self.LEVER_RADIUS] ) - mujoco.mj_forward(self.model, self.data) + self.model.site("goal").pos = self._target_pos return self._get_obs() def compute_reward(self, action, obs): diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_peg_insertion_side_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_peg_insertion_side_v2.py index 4bf4a41da..07125cb3c 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_peg_insertion_side_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_peg_insertion_side_v2.py @@ -127,6 +127,7 @@ def reset_model(self): mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "box") ] = pos_box self._target_pos = pos_box + np.array([0.03, 0.0, 0.13]) + self.model.site("goal").pos = self._target_pos return self._get_obs() def compute_reward(self, action, obs): diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_peg_unplug_side_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_peg_unplug_side_v2.py index 23cea6a83..82aa43a03 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_peg_unplug_side_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_peg_unplug_side_v2.py @@ -100,7 +100,7 @@ def reset_model(self): self.obj_init_pos = self._get_site_pos("pegEnd") self._target_pos = pos_plug + np.array([0.15, 0.0, 0.0]) - + self.model.site("goal").pos = self._target_pos return self._get_obs() def compute_reward(self, action, obs): diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_pick_out_of_hole_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_pick_out_of_hole_v2.py index 209c9e77b..81a49cfe0 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_pick_out_of_hole_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_pick_out_of_hole_v2.py @@ -103,7 +103,7 @@ def reset_model(self): self.obj_init_pos = pos_obj self._set_obj_xyz(self.obj_init_pos) self._target_pos = pos_goal - + self.model.site("goal").pos = self._target_pos return self._get_obs() def compute_reward(self, action, obs): diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_pick_place_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_pick_place_v2.py index 304082791..3eb81d2da 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_pick_place_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_pick_place_v2.py @@ -138,7 +138,7 @@ def reset_model(self): self.init_right_pad = self.get_body_com("rightpad") self._set_obj_xyz(self.obj_init_pos) - + self.model.site("goal").pos = self._target_pos return self._get_obs() def _gripper_caging_reward(self, action, obj_position): diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_pick_place_wall_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_pick_place_wall_v2.py index 654fee547..d22ae35da 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_pick_place_wall_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_pick_place_wall_v2.py @@ -130,7 +130,7 @@ def reset_model(self): self.obj_init_pos = goal_pos[:3] self._set_obj_xyz(self.obj_init_pos) - + self.model.site("goal").pos = self._target_pos return self._get_obs() def compute_reward(self, action, obs): diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_back_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_back_v2.py index 12635247e..eda822c1a 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_back_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_back_v2.py @@ -114,7 +114,7 @@ def reset_model(self): self.obj_init_pos = np.concatenate((goal_pos[:2], [self.obj_init_pos[-1]])) self._set_obj_xyz(self.obj_init_pos) - + self.model.site("goal").pos = self._target_pos return self._get_obs() def _gripper_caging_reward(self, action, obj_position, obj_radius): diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_v2.py index 0e08b1243..213f559f4 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_v2.py @@ -135,7 +135,7 @@ def reset_model(self): self.obj_init_pos = np.concatenate((goal_pos[:2], [self.obj_init_pos[-1]])) self._set_obj_xyz(self.obj_init_pos) - + self.model.site("goal").pos = self._target_pos return self._get_obs() def compute_reward(self, action, obs): diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_wall_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_wall_v2.py index 99b26856e..dc1929fa7 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_wall_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_wall_v2.py @@ -128,6 +128,7 @@ def reset_model(self): self.obj_init_pos = np.concatenate((goal_pos[:2], [self.obj_init_pos[-1]])) self._set_obj_xyz(self.obj_init_pos) + self.model.site("goal").pos = self._target_pos return self._get_obs() def compute_reward(self, action, obs): diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_reach_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_reach_v2.py index 3882a77c8..03d29792e 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_reach_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_reach_v2.py @@ -1,4 +1,3 @@ -import mujoco import numpy as np from gymnasium.spaces import Box from scipy.spatial.transform import Rotation @@ -113,7 +112,8 @@ def reset_model(self): self._target_pos = goal_pos[-3:] self.obj_init_pos = goal_pos[:3] self._set_obj_xyz(self.obj_init_pos) - mujoco.mj_forward(self.model, self.data) + + self._set_pos_site("goal", self._target_pos) return self._get_obs() def compute_reward(self, actions, obs): diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_reach_wall_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_reach_wall_v2.py index d4638b21b..6b16604e9 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_reach_wall_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_reach_wall_v2.py @@ -104,7 +104,7 @@ def reset_model(self): self.obj_init_pos = goal_pos[:3] self._set_obj_xyz(self.obj_init_pos) - + self._set_pos_site("goal", self._target_pos) return self._get_obs() def compute_reward(self, actions, obs): diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_shelf_place_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_shelf_place_v2.py index 19c8ae681..4fe4cb5fc 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_shelf_place_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_shelf_place_v2.py @@ -125,7 +125,7 @@ def reset_model(self): ) self._set_obj_xyz(self.obj_init_pos) - + self._set_pos_site("goal", self._target_pos) return self._get_obs() def compute_reward(self, action, obs): diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_soccer_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_soccer_v2.py index 51ec9babb..8b01c5aa8 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_soccer_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_soccer_v2.py @@ -108,7 +108,7 @@ def reset_model(self): self.maxPushDist = np.linalg.norm( self.obj_init_pos[:2] - np.array(self._target_pos)[:2] ) - + self._set_pos_site("goal", self._target_pos) return self._get_obs() def _gripper_caging_reward(self, action, obj_position, obj_radius): diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_stick_pull_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_stick_pull_v2.py index 3b899a072..2680a6017 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_stick_pull_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_stick_pull_v2.py @@ -146,7 +146,7 @@ def reset_model(self): self._set_stick_xyz(self.stick_init_pos) self._set_obj_xyz(self.obj_init_qpos) self.obj_init_pos = self.get_body_com("object").copy() - + self._set_pos_site("goal", self._target_pos) return self._get_obs() def _stick_is_inserted(self, handle, end_of_stick): diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_stick_push_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_stick_push_v2.py index 47d39b044..701cfc066 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_stick_push_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_stick_push_v2.py @@ -143,7 +143,7 @@ def reset_model(self): self._set_stick_xyz(self.stick_init_pos) self._set_obj_xyz(self.obj_init_qpos) self.obj_init_pos = self.get_body_com("object").copy() - + self._set_pos_site("goal", self._target_pos) return self._get_obs() def _gripper_caging_reward( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_sweep_into_goal_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_sweep_into_goal_v2.py index 10f275c2c..2430846b5 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_sweep_into_goal_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_sweep_into_goal_v2.py @@ -98,7 +98,7 @@ def reset_model(self): self.maxPushDist = np.linalg.norm( self.obj_init_pos[:2] - np.array(self._target_pos)[:2] ) - + self._set_pos_site("goal", self._target_pos) return self._get_obs() def _gripper_caging_reward(self, action, obj_position, obj_radius): diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_sweep_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_sweep_v2.py index 8d44d1ceb..8fe9ac0fd 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_sweep_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_sweep_v2.py @@ -97,7 +97,7 @@ def reset_model(self): self.get_body_com("obj")[:-1] - self._target_pos[:-1] ) self.target_reward = 1000 * self.maxPushDist + 1000 * 2 - + self._set_pos_site("goal", self._target_pos) return self._get_obs() def _gripper_caging_reward(self, action, obj_position, obj_radius): diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_window_close_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_window_close_v2.py index 41308be65..f063be139 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_window_close_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_window_close_v2.py @@ -112,7 +112,7 @@ def reset_model(self): [0.2, 0.0, 0.0] ) self.data.joint("window_slide").qpos = 0.2 - mujoco.mj_forward(self.model, self.data) + self._set_pos_site("goal", self._target_pos) return self._get_obs() def _reset_hand(self): diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_window_open_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_window_open_v2.py index 1d84ef514..7c66c1f72 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_window_open_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_window_open_v2.py @@ -111,7 +111,7 @@ def reset_model(self): self.window_handle_pos_init = self._get_pos_objects() self.data.joint("window_slide").qpos = 0.0 - mujoco.mj_forward(self.model, self.data) + self._set_pos_site("goal", self._target_pos) return self._get_obs() def compute_reward(self, actions, obs):