diff --git a/llama.cpp b/llama.cpp index 6bc005da105a8e..208491450d370e 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1671,95 +1671,6 @@ static ggml_backend_buffer_type_t llama_default_buffer_type_cpu(bool host_buffer GGML_UNUSED(host_buffer); } -static ggml_backend_buffer_type_t llama_default_buffer_type_offload(int gpu) { - ggml_backend_buffer_type_t buft = nullptr; - -#ifdef GGML_USE_METAL - buft = ggml_backend_metal_buffer_type(); -#elif defined(GGML_USE_CUDA) - buft = ggml_backend_cuda_buffer_type(gpu); -#elif defined(GGML_USE_VULKAN) - buft = ggml_backend_vk_buffer_type(gpu); -#elif defined(GGML_USE_SYCL) - buft = ggml_backend_sycl_buffer_type(gpu); -#elif defined(GGML_USE_CLBLAST) - buft = ggml_backend_opencl_buffer_type(); -#elif defined(GGML_USE_KOMPUTE) - buft = ggml_backend_kompute_buffer_type(gpu); - if (buft == nullptr) { - LLAMA_LOG_WARN("%s: cannot use GPU %d, check `vulkaninfo --summary`\n", __func__, gpu); - } -#elif defined(GGML_USE_QNN) - buft = ggml_backend_qnn_buffer_type(gpu); -#endif - - if (buft == nullptr) { - buft = llama_default_buffer_type_cpu(true); - } - return buft; - - GGML_UNUSED(gpu); -} - -static ggml_backend_buffer_type_t llama_default_buffer_type_split(int fallback_gpu, const float * tensor_split) { - ggml_backend_buffer_type_t buft = nullptr; - -#ifdef GGML_USE_CUDA - if (ggml_backend_cuda_get_device_count() > 1) { - buft = ggml_backend_cuda_split_buffer_type(tensor_split); - } -#endif - -#ifdef GGML_USE_SYCL - if (ggml_backend_sycl_get_device_count() > 1) { - buft = ggml_backend_sycl_split_buffer_type(tensor_split); - } -#endif - - if (buft == nullptr) { - buft = llama_default_buffer_type_offload(fallback_gpu); - } - return buft; - - GGML_UNUSED(tensor_split); -} - -static size_t llama_get_device_count() { -#if defined(GGML_USE_CUDA) - return ggml_backend_cuda_get_device_count(); -#elif defined(GGML_USE_SYCL) - return ggml_backend_sycl_get_device_count(); -#elif defined(GGML_USE_VULKAN) - return ggml_backend_vk_get_device_count(); -#elif defined(GGML_USE_QNN) - return ggml_backend_qnn_get_device_count(); -#else - return 1; -#endif -} - -static size_t llama_get_device_memory(int device) { -#if defined(GGML_USE_CUDA) - size_t total; - size_t free; - ggml_backend_cuda_get_device_memory(device, &free, &total); - return free; -#elif defined(GGML_USE_SYCL) - size_t total; - size_t free; - ggml_backend_sycl_get_device_memory(device, &free, &total); - return free; -#elif defined(GGML_USE_VULKAN) - size_t total; - size_t free; - ggml_backend_vk_get_device_memory(device, &free, &total); - return free; -#else - return 1; - GGML_UNUSED(device); -#endif -} - // // globals //