forked from ggerganov/llama.cpp
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
889 additions
and
845 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#pragma once | ||
|
||
#include "ggml.h" | ||
#include "backend.hpp" | ||
|
||
namespace qnn { | ||
|
||
typedef void (*ggml_qnn_op_t)(ggml_backend_qnn_context* ctx, | ||
const ggml_tensor* src0, | ||
const ggml_tensor* src1, | ||
ggml_tensor* dst); | ||
|
||
typedef const ggml_qnn_op_t(&ggml_qnn_op_array_t)[GGML_OP_COUNT]; | ||
|
||
ggml_qnn_op_array_t ggml_qnn_op_array(); | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
|
||
#include "utils.hpp" | ||
|
||
#include "ggml-qnn.h" | ||
#include "qnn-types.hpp" | ||
|
||
namespace qnn { | ||
|
||
// TODO: mapping more ggml data type to QNN data type | ||
// ref:explanation of k-quants, https://github.com/ggerganov/llama.cpp/pull/1684 | ||
Qnn_DataType_t datatype_from_ggml_datatype(enum ggml_type ggmltype) { | ||
switch (ggmltype) { | ||
case GGML_TYPE_F16: | ||
return QNN_DATATYPE_FLOAT_16; | ||
case GGML_TYPE_F32: | ||
return QNN_DATATYPE_FLOAT_32; | ||
case GGML_TYPE_I8: | ||
return QNN_DATATYPE_INT_8; | ||
case GGML_TYPE_Q8_0: | ||
return QNN_DATATYPE_SFIXED_POINT_8; | ||
case GGML_TYPE_Q4_0: | ||
return QNN_DATATYPE_SFIXED_POINT_4; | ||
default: | ||
break; | ||
} | ||
return QNN_DATATYPE_UNDEFINED; | ||
} | ||
|
||
|
||
uint32_t get_ggml_tensor_rank(const ggml_tensor* tensor) { | ||
uint32_t rank = 0; | ||
for (int i = 0; i < GGML_MAX_DIMS; i++) { | ||
if ((0 != tensor->ne[i]) && (1 != tensor->ne[i])) { | ||
rank++; | ||
} | ||
} | ||
return rank; | ||
} | ||
|
||
|
||
const char* get_backend_name(int n_backend_type) { | ||
switch (n_backend_type) { | ||
case QNN_BACKEND_CPU: | ||
return "QNN-CPU"; | ||
case QNN_BACKEND_GPU: | ||
return "QNN-GPU"; | ||
case QNN_BACKEND_NPU: | ||
return "QNN-NPU"; | ||
case QNN_BACKEND_GGML: | ||
return "ggml"; //"fake" QNN backend, used for compare performance between QNN backend and original GGML | ||
default: | ||
return "unknown"; | ||
} | ||
} | ||
|
||
const char* get_chipset_desc(uint32_t chipset_id) { | ||
switch (chipset_id) { | ||
case SM8450: | ||
return "SM8450"; | ||
case SM8475: | ||
return "SM8475"; | ||
case SM8550: | ||
return "SM8550"; | ||
case SM8650: | ||
return "SM8650"; | ||
default: | ||
return "unknown"; | ||
} | ||
} | ||
|
||
const char* get_htparch_desc(size_t htp_arch) { | ||
switch (htp_arch) { | ||
case V68: | ||
return "QCOM_HTP_V68"; | ||
case V69: | ||
return "QCOM_HTP_V69"; | ||
case V73: | ||
return "QCOM_HTP_V73"; | ||
case V75: | ||
return "QCOM_HTP_V75"; | ||
default: | ||
return "unknown"; | ||
} | ||
} | ||
|
||
intptr_t align_to(size_t alignment, intptr_t offset) { | ||
return offset % alignment == 0 | ||
? offset | ||
: offset + (static_cast<intptr_t>(alignment) - | ||
offset % static_cast<intptr_t>(alignment)); | ||
} | ||
|
||
uint32_t get_ggml_tensor_data_size(const ggml_tensor* tensor) { | ||
/* | ||
size_t data_size = ggml_row_size(tensor->type, tensor->ne[0]); | ||
size_t n_dims = qnn_get_ggml_tensor_rank(tensor); | ||
for (int i = 1; i < n_dims; i++) { | ||
data_size *= tensor->ne[i]; | ||
} | ||
return data_size; | ||
*/ | ||
return ggml_nbytes(tensor); | ||
} | ||
|
||
// ================================================================================================= | ||
// | ||
// QNN backend internal helper functions | ||
// | ||
// ================================================================================================= | ||
// TODO: only support GGML_OP_ADD/GGML_OP_MUL/GGML_OP_MUL_MAT | ||
const char* opname_from_ggmlop(enum ggml_op ggmlop) { | ||
switch (ggmlop) { | ||
case GGML_OP_ADD: | ||
return QNN_OP_ELEMENT_WISE_ADD; | ||
case GGML_OP_MUL: | ||
return QNN_OP_ELEMENT_WISE_MULTIPLY; | ||
case GGML_OP_MUL_MAT: | ||
return QNN_OP_MAT_MUL; | ||
default: | ||
break; | ||
} | ||
return nullptr; | ||
} | ||
|
||
} |
Oops, something went wrong.