metadriverse · pengzhenghao · Feb 23, 2024 · Feb 22, 2024 · Feb 23, 2024
diff --git a/metadrive/component/sensors/base_camera.py b/metadrive/component/sensors/base_camera.py
@@ -1,7 +1,9 @@
-import numpy as np
 from typing import Union
-from panda3d.core import NodePath
+
 import cv2
+import numpy as np
+from panda3d.core import NodePath
+
 from metadrive.component.sensors.base_sensor import BaseSensor
 from metadrive.utils.cuda import check_cudart_err
 
@@ -17,6 +19,7 @@
 from panda3d.core import Vec3
 
 from metadrive.engine.core.image_buffer import ImageBuffer
+from metadrive import constants
 
 
 class BaseCamera(ImageBuffer, BaseSensor):
@@ -118,7 +121,7 @@ def perceive(
         self, to_float=True, new_parent_node: Union[NodePath, None] = None, position=None, hpr=None
     ) -> np.ndarray:
         """
-        When clip is set to False, the image will be represented by unit8 with component value ranging from [0-255].
+        When to_float is set to False, the image will be represented by unit8 with component value ranging from [0-255].
         Otherwise, it will be float type with component value ranging from [0.-1.]. By default, the reset parameters are
         all None. In this case, the camera will render the result with poses and position set by track() function.
 
@@ -127,15 +130,29 @@ def perceive(
         camera to capture a new image and return the camera to the owner. This usually happens when using one camera to
         render multiple times from different positions and poses.
 
-        new_parent_node should be a NodePath like object.origin and vehicle.origin or self.engine.origin, which
-        means the world origin. When new_parent_node is set, both position and hpr have to be set as well. The position 
+        new_parent_node should be a NodePath like object.origin or vehicle.origin or self.engine.origin, which
+        means the world origin. When new_parent_node is set, both position and hpr have to be set as well. The position
         and hpr are all 3-dim vector representing:
             1) the relative position to the reparent node
             2) the heading/pitch/roll of the sensor
-        """
 
+        Args:
+            to_float: When to_float is set to False, the image will be represented by unit8 with component value ranging
+                from [0-255]. Otherwise, it will be float type with component value ranging from [0.-1.].
+            new_parent_node: new_parent_node should be a NodePath like object.origin or vehicle.origin or
+                self.engine.origin, which means the world origin. When new_parent_node is set, both position and hpr
+                have to be set as well. The position and hpr are all 3-dim vector representing:
+            position: the relative position to the reparent node
+            hpr: the heading/pitch/roll of the sensor
+
+        Return:
+            Array representing the image.
+        """
         if new_parent_node:
-            assert position and hpr, "When new_parent_node is set, both position and hpr should be set as well"
+            if position is None:
+                position = constants.DEFAULT_SENSOR_OFFSET
+            if hpr is None:
+                position = constants.DEFAULT_SENSOR_HPR
 
             # return camera to original state
             original_object = self.cam.getParent()

diff --git a/metadrive/constants.py b/metadrive/constants.py
@@ -542,3 +542,7 @@ class CameraTagStateKey:
     RGB = "rgb"
     Depth = "depth"
     Semantic = "semantic"
+
+
+DEFAULT_SENSOR_OFFSET = (0., 0.8, 1.5)
+DEFAULT_SENSOR_HPR = (0., 0.0, 0.0)
diff --git a/metadrive/engine/core/main_camera.py b/metadrive/engine/core/main_camera.py
@@ -23,6 +23,7 @@
 except ImportError:
     _cuda_enable = False
 from metadrive.component.sensors.base_sensor import BaseSensor
+from metadrive import constants
 
 
 class MainCamera(BaseSensor):
@@ -451,7 +452,7 @@ def perceive(
         self, to_float=True, new_parent_node: Union[NodePath, None] = None, position=None, hpr=None
     ) -> np.ndarray:
         """
-        When clip is set to False, the image will be represented by unit8 with component value ranging from [0-255].
+        When to_float is set to False, the image will be represented by unit8 with component value ranging from [0-255].
         Otherwise, it will be float type with component value ranging from [0.-1.]. By default, the reset parameters are
         all None. In this case, the camera will render the result with poses and position set by track() function.
 
@@ -460,15 +461,30 @@ def perceive(
         camera to capture a new image and return the camera to the owner. This usually happens when using one camera to
         render multiple times from different positions and poses.
 
-        new_parent_node should be a NodePath like object.origin and vehicle.origin or self.engine.origin, which
+        new_parent_node should be a NodePath like object.origin or vehicle.origin or self.engine.origin, which
         means the world origin. When new_parent_node is set, both position and hpr have to be set as well. The position
         and hpr are all 3-dim vector representing:
             1) the relative position to the reparent node
             2) the heading/pitch/roll of the sensor
+
+        Args:
+            to_float: When to_float is set to False, the image will be represented by unit8 with component value ranging
+                from [0-255]. Otherwise, it will be float type with component value ranging from [0.-1.].
+            new_parent_node: new_parent_node should be a NodePath like object.origin or vehicle.origin or
+                self.engine.origin, which means the world origin. When new_parent_node is set, both position and hpr
+                have to be set as well. The position and hpr are all 3-dim vector representing:
+            position: the relative position to the reparent node
+            hpr: the heading/pitch/roll of the sensor
+
+        Return:
+            Array representing the image.
         """
 
         if new_parent_node:
-            assert position and hpr, "When new_parent_node is set, both position and hpr should be set as well"
+            if position is None:
+                position = constants.DEFAULT_SENSOR_OFFSET
+            if hpr is None:
+                position = constants.DEFAULT_SENSOR_HPR
 
             # return camera to original state
             original_object = self.camera.getParent()

diff --git a/metadrive/envs/base_env.py b/metadrive/envs/base_env.py
@@ -1,6 +1,4 @@
 import logging
-from metadrive.obs.image_obs import ImageStateObservation
-from metadrive.obs.state_obs import LidarStateObservation
 import time
 from collections import defaultdict
 from typing import Union, Dict, AnyStr, Optional, Tuple, Callable
@@ -9,6 +7,7 @@
 import numpy as np
 from panda3d.core import PNMImage
 
+from metadrive import constants
 from metadrive.component.sensors.base_camera import BaseCamera
 from metadrive.component.sensors.dashboard import DashBoard
 from metadrive.component.sensors.distance_detector import LaneLineDetector, SideDetector
@@ -22,8 +21,10 @@
 from metadrive.manager.agent_manager import VehicleAgentManager
 from metadrive.manager.record_manager import RecordManager
 from metadrive.manager.replay_manager import ReplayManager
-from metadrive.obs.observation_base import DummyObservation
+from metadrive.obs.image_obs import ImageStateObservation
 from metadrive.obs.observation_base import BaseObservation
+from metadrive.obs.observation_base import DummyObservation
+from metadrive.obs.state_obs import LidarStateObservation
 from metadrive.policy.env_input_policy import EnvInputPolicy
 from metadrive.scenario.utils import convert_recorded_scenario_exported
 from metadrive.utils import Config, merge_dicts, get_np_random, concat_step_infos
@@ -434,7 +435,7 @@ def step(self, actions: Union[Union[np.ndarray, list], Dict[AnyStr, Union[list,
         return self._get_step_return(actions, engine_info=engine_info)  # collect observation, reward, termination
 
     def _preprocess_actions(self, actions: Union[np.ndarray, Dict[AnyStr, np.ndarray], int]) \
-            -> Union[np.ndarray, Dict[AnyStr, np.ndarray], int]:
+        -> Union[np.ndarray, Dict[AnyStr, np.ndarray], int]:
         if not self.is_multi_agent:
             actions = {v_id: actions for v_id in self.agents.keys()}
         else:
@@ -887,7 +888,8 @@ def switch_to_third_person_view(self):
         self.main_camera.track(current_track_agent)
         for name, sensor in self.engine.sensors.items():
             if hasattr(sensor, "track") and name != "main_camera":
-                sensor.track(current_track_agent.origin, [0., 0.8, 1.5], [0, 0.59681, 0])
+                camera_video_posture = [0, 0.59681, 0]
+                sensor.track(current_track_agent.origin, constants.DEFAULT_SENSOR_OFFSET, camera_video_posture)
         return
 
     def next_seed_reset(self):