Update device to cuda

Signed-off-by: Angel Luu <[email protected]>
foundation-model-stack · Sep 11, 2024 · 6e9e7f2 · 6e9e7f2
1 parent cc9f2a1
commit 6e9e7f2
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/scripts/run_inference.py b/scripts/run_inference.py
@@ -198,7 +198,7 @@ def load(
                             attn_implementation="flash_attention_2"
                             if use_flash_attn
                             else None,
-                            device_map="auto",
+                            device_map="cuda",
                             torch_dtype=torch.float16
                             if use_flash_attn
                             else None,  # since we are using exllama kernel, we have to use float16