Skip to content

Commit

Permalink
fix(pu): fix log_buffer_memory_usage in ddp setting
Browse files Browse the repository at this point in the history
  • Loading branch information
dyyoungg committed Nov 13, 2024
1 parent aeb997c commit 9f4fba9
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 7 deletions.
2 changes: 1 addition & 1 deletion lzero/entry/train_unizero_multitask_segment.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ def train_unizero_multitask_segment(
# torch.cuda.set_device(device)
# print(f"set device后的 GPU 设备编号: {device}")

log_buffer_memory_usage(learner.train_iter, replay_buffer, tb_logger)
log_buffer_memory_usage(learner.train_iter, replay_buffer, tb_logger, cfg.policy.task_id)

collect_kwargs = {
'temperature': visit_count_temperature(
Expand Down
12 changes: 6 additions & 6 deletions lzero/entry/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def random_collect(
collector.reset_policy(policy.collect_mode)


def log_buffer_memory_usage(train_iter: int, buffer: "GameBuffer", writer: SummaryWriter) -> None:
def log_buffer_memory_usage(train_iter: int, buffer: "GameBuffer", writer: SummaryWriter, task_id=0) -> None:
"""
Overview:
Log the memory usage of the buffer and the current process to TensorBoard.
Expand All @@ -74,9 +74,9 @@ def log_buffer_memory_usage(train_iter: int, buffer: "GameBuffer", writer: Summa
"""
# "writer is None" means we are in a slave process in the DDP setup.
if writer is not None:
writer.add_scalar('Buffer/num_of_all_collected_episodes', buffer.num_of_collected_episodes, train_iter)
writer.add_scalar('Buffer/num_of_game_segments', len(buffer.game_segment_buffer), train_iter)
writer.add_scalar('Buffer/num_of_transitions', len(buffer.game_segment_game_pos_look_up), train_iter)
writer.add_scalar(f'Buffer/num_of_all_collected_episodes_{task_id}', buffer.num_of_collected_episodes, train_iter)
writer.add_scalar(f'Buffer/num_of_game_segments_{task_id}', len(buffer.game_segment_buffer), train_iter)
writer.add_scalar(f'Buffer/num_of_transitions_{task_id}', len(buffer.game_segment_game_pos_look_up), train_iter)

game_segment_buffer = buffer.game_segment_buffer

Expand All @@ -87,7 +87,7 @@ def log_buffer_memory_usage(train_iter: int, buffer: "GameBuffer", writer: Summa
buffer_memory_usage_mb = buffer_memory_usage / (1024 * 1024)

# Record the memory usage of self.game_segment_buffer to TensorBoard.
writer.add_scalar('Buffer/memory_usage/game_segment_buffer', buffer_memory_usage_mb, train_iter)
writer.add_scalar(f'Buffer/memory_usage/game_segment_buffer_{task_id}', buffer_memory_usage_mb, train_iter)

# Get the amount of memory currently used by the process (in bytes).
process = psutil.Process(os.getpid())
Expand All @@ -97,7 +97,7 @@ def log_buffer_memory_usage(train_iter: int, buffer: "GameBuffer", writer: Summa
process_memory_usage_mb = process_memory_usage / (1024 * 1024)

# Record the memory usage of the process to TensorBoard.
writer.add_scalar('Buffer/memory_usage/process', process_memory_usage_mb, train_iter)
writer.add_scalar(f'Buffer/memory_usage/process_{task_id}', process_memory_usage_mb, train_iter)


def log_buffer_run_time(train_iter: int, buffer: "GameBuffer", writer: SummaryWriter) -> None:
Expand Down

0 comments on commit 9f4fba9

Please sign in to comment.