From d8fe041abdd3b729728bdbf92712640147190d82 Mon Sep 17 00:00:00 2001 From: Mikhail Burnaev Date: Sun, 29 Dec 2024 03:57:22 +0300 Subject: [PATCH] feat: distributed mode (#55) --- resemble_enhance/enhancer/__main__.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/resemble_enhance/enhancer/__main__.py b/resemble_enhance/enhancer/__main__.py index 0c1ad5c..8d619a6 100644 --- a/resemble_enhance/enhancer/__main__.py +++ b/resemble_enhance/enhancer/__main__.py @@ -4,11 +4,12 @@ from pathlib import Path import torch +import torch.distributed as dist import torchaudio from tqdm import tqdm from .inference import denoise, enhance - +from ..utils.distributed import local_rank, fix_unset_envs @torch.inference_mode() def main(): @@ -68,6 +69,11 @@ def main(): action="store_true", help="Shuffle the audio paths and skip the existing ones, enabling multiple jobs to run in parallel", ) + parser.add_argument( + "--distributed_mode", + action="store_true", + help="Enable distributed training across multiple GPUs", + ) args = parser.parse_args() @@ -86,6 +92,14 @@ def main(): if args.parallel_mode: random.shuffle(paths) + if args.distributed_mode: + fix_unset_envs() + dist.init_process_group(backend='nccl' if device == "cuda" else "gloo") + torch.cuda.set_device(local_rank()) + num_processed = dist.get_world_size() + rank = dist.get_rank() + paths = paths[rank::num_processed] + if len(paths) == 0: print(f"No {args.suffix} files found in the following path: {args.in_dir}") return