Add enable_cudf_spill to LocalCudaCluster (#268)

NVIDIA · Sep 30, 2024 · 9af8da1 · 9af8da1
2 parents 97e8f15 + 7c476ac
commit 9af8da1
Showing 1 changed file with 2 additions and 17 deletions.
diff --git a/nemo_curator/utils/distributed_utils.py b/nemo_curator/utils/distributed_utils.py
@@ -29,7 +29,7 @@
 import psutil
 from dask.distributed import Client, LocalCluster, get_worker, performance_report
 
-from nemo_curator.utils.gpu_utils import GPU_INSTALL_STRING, is_cudf_type
+from nemo_curator.utils.gpu_utils import is_cudf_type
 from nemo_curator.utils.import_utils import gpu_only_import, gpu_only_import_from
 
 cudf = gpu_only_import("cudf")
@@ -70,14 +70,11 @@ def start_dask_gpu_local_cluster(
         rmm_pool_size=rmm_pool_size,
         protocol=protocol,
         rmm_async=True,
+        enable_cudf_spill=enable_spilling,
         **extra_kwargs,
     )
     client = Client(cluster)
 
-    if enable_spilling:
-        _enable_spilling()
-        client.run(_enable_spilling)
-
     if set_torch_to_use_rmm:
         _set_torch_to_use_rmm()
         client.run(_set_torch_to_use_rmm)
@@ -193,18 +190,6 @@ def _set_torch_to_use_rmm():
     torch.cuda.memory.change_current_allocator(rmm_torch_allocator)
 
 
-def _enable_spilling():
-    """
-    Setting this environment variable enables automatic spilling (and "unspilling")
-    of buffers from device to host to enable out-of-memory computation,
-    i.e., computing on objects that occupy more memory than is available on the GPU.
-
-    """
-    import cudf
-
-    cudf.set_option("spill", True)
-
-
 def read_single_partition(
     files,
     backend="cudf",