diff --git a/ggml-qnn.cpp b/ggml-qnn.cpp index 92b6cff31f148a..931b421d358d85 100644 --- a/ggml-qnn.cpp +++ b/ggml-qnn.cpp @@ -3363,7 +3363,6 @@ GGML_CALL static bool ggml_backend_qnn_supports_op(ggml_backend_t backend, const //note: this function be used with proposal/refined ggml backend subsystem in this PR: // https://github.com/ggerganov/llama.cpp/pull/7641 -// // new ggml backend(only using system memory: ggml_backend_xxx_buffer_is_host return true) // can following this style for mixed inference between CPU&GPU / CPU&NPU very easily GGML_CALL static bool ggml_backend_qnn_offload_op(ggml_backend_t backend, const ggml_tensor * tensor) {