From 6eaea63e36d5856ca70da45c07316716211b9c2b Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 23 Dec 2024 13:28:56 +0200 Subject: [PATCH] minor --- include/llama.h | 2 +- src/llama-adapter.h | 4 ++-- src/llama-model.cpp | 3 +++ src/llama-model.h | 4 ++-- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/include/llama.h b/include/llama.h index 05c0130e46c21..c344288953954 100644 --- a/include/llama.h +++ b/include/llama.h @@ -543,7 +543,7 @@ extern "C" { // to an n_embd x n_layers buffer starting from layer 1. // il_start and il_end are the layer range the vector should apply to (both inclusive) // See llama_control_vector_load in common to load a control vector. - // TODO: rename to llama_adapter_vec_apply + // TODO: rename to llama_adapter_cvec_apply LLAMA_API int32_t llama_control_vector_apply( struct llama_context * lctx, const float * data, diff --git a/src/llama-adapter.h b/src/llama-adapter.h index 7b8ce47a82463..24f067db7b187 100644 --- a/src/llama-adapter.h +++ b/src/llama-adapter.h @@ -9,10 +9,10 @@ #include // -// llama_adapter_vec +// llama_adapter_cvec // -// TODO: rename to llama_adapter_vec +// TODO: rename to llama_adapter_cvec struct llama_control_vector { std::vector ctxs; std::vector bufs; diff --git a/src/llama-model.cpp b/src/llama-model.cpp index ba9a59e396313..70e6306336a3f 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -134,6 +134,7 @@ static bool buft_supported(ggml_backend_buffer_type_t buft, ggml_backend_dev_t d /*.mem_buffer =*/ NULL, /*.no_alloc =*/ true, }; + ggml_context_ptr ctx { ggml_init(params) }; if (!ctx) { throw std::runtime_error(format("failed to create ggml context")); @@ -147,6 +148,7 @@ static bool buft_supported(ggml_backend_buffer_type_t buft, ggml_backend_dev_t d op_tensor->src[i]->buffer = buf.get(); } } + bool op_supported = ggml_backend_dev_supports_op(dev, op_tensor); return op_supported; @@ -161,6 +163,7 @@ static ggml_backend_buffer_type_t select_buft(const llama_model::buft_list_t & b return cur_buft; } } + throw std::runtime_error(format("no suitable buffer type found")); } diff --git a/src/llama-model.h b/src/llama-model.h index aa3ff9b0d8567..5123ac9a02be5 100644 --- a/src/llama-model.h +++ b/src/llama-model.h @@ -334,6 +334,7 @@ struct llama_model { ggml_backend_dev_t dev; buft_list_t * buft_list; }; + layer_dev dev_input = {}; layer_dev dev_output = {}; std::vector dev_layer; @@ -348,7 +349,6 @@ struct llama_model { llama_mmaps mappings; // objects representing data potentially being locked in memory - // TODO: should these be part of llama_context instead? llama_mlocks mlock_bufs; llama_mlocks mlock_mmaps; @@ -371,7 +371,7 @@ std::string llama_model_arch_name (const llama_model & model); std::string llama_model_type_name (const llama_model & model); std::string llama_model_ftype_name(const llama_model & model); -// used by llama_adapter_vec +// used by llama_adapter_cvec ggml_backend_buffer_type_t llama_model_select_buft(const llama_model & model, int il); // used by llama_adapter_lora