forked from ggerganov/llama.cpp
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ggml: add Qualcomm QNN(Qualcomm Neural Network,aka Qualcomm AI Engine…
… Direct) backend
- Loading branch information
Showing
8 changed files
with
4,390 additions
and
1 deletion.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
#pragma once | ||
|
||
#include "ggml.h" | ||
#include "ggml-backend.h" | ||
|
||
#ifdef __cplusplus | ||
extern "C" { | ||
#endif | ||
|
||
|
||
#define GGML_QNN_MAX_DEVICES 3 | ||
|
||
//QNN cDSP and HTA backend would not be used currently, just focus on QNN CPU/GPU/NPU(aka HTP/DSP) backend currently | ||
enum QNNBackend { | ||
QNN_BACKEND_CPU, | ||
QNN_BACKEND_GPU, | ||
QNN_BACKEND_NPU, | ||
QNN_BACKEND_GGML, //"fake" QNN backend just for compare performance between QNN and original GGML | ||
}; | ||
|
||
GGML_API int ggml_backend_qnn_reg_devices(void); | ||
|
||
/** | ||
* | ||
* @param device 0: QNN_BACKEND_CPU 1: QNN_BACKEND_GPU 2: QNN_BACKEND_NPU(aka HTP/DSP) | ||
* @param qnn_lib_path qnn library path, such as "/data/local/tmp/" on Android or specified in JNI layer | ||
* @return | ||
*/ | ||
GGML_API ggml_backend_t ggml_backend_qnn_init(size_t dev_num, const char * qnn_lib_path); | ||
|
||
GGML_API bool ggml_backend_is_qnn(ggml_backend_t backend); | ||
|
||
GGML_API void ggml_backend_qnn_set_n_threads(ggml_backend_t backend, int thread_counts); | ||
|
||
GGML_API int ggml_backend_qnn_get_device_count(void); | ||
|
||
GGML_API void ggml_backend_qnn_get_device_description(size_t dev_num, char * description, size_t description_size); | ||
|
||
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_qnn_buffer_type(size_t dev_num); | ||
|
||
#ifdef __cplusplus | ||
} | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
out | ||
android-ndk-r26c* | ||
ggml-qnn-test* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
cmake_minimum_required(VERSION 3.22.1) | ||
project(ggml-qnn-test) | ||
|
||
set(CMAKE_VERBOSE_MAKEFILE on) | ||
set(CMAKE_CXX_STANDARD 17) | ||
set(CMAKE_CXX_STANDARD_REQUIRED ON) | ||
set(CMAKE_POSITION_INDEPENDENT_CODE ON) | ||
|
||
#set to ON if target Android phone is based on Qualcomm Snapdragon 8 Gen 3 | ||
set(TARGET_SNAPDRAGON_8_GEN3 OFF) | ||
|
||
set(QNN_INC_PATH ${QNN_SDK_PATH}/include/QNN) | ||
set(QNN_LIB_PATH ${QNN_SDK_PATH}/lib/aarch64-android) | ||
|
||
include_directories(${QNN_INC_PATH}) | ||
include_directories(../../) # ggml.h | ||
|
||
set(SOURCE_FILES | ||
../../ggml.c | ||
../../ggml-alloc.c | ||
../../ggml-backend.c | ||
../../ggml-quants.c | ||
../../ggml-qnn.cpp | ||
test-qnn-ops.cpp | ||
) | ||
|
||
|
||
message("QNN_SDK_PATH : ${QNN_SDK_PATH}") | ||
message("QNN_INC_PATH : ${QNN_INC_PATH}") | ||
message("QNN_LIB_PATH : ${QNN_LIB_PATH}") | ||
|
||
add_definitions(-D__ARM_NEON) | ||
add_definitions(-DGGML_USE_QNN) | ||
|
||
if(CMAKE_BUILD_TYPE STREQUAL "Release") | ||
add_definitions(-DNDEBUG) | ||
add_definitions(-O3) | ||
endif() | ||
|
||
if (TARGET_SNAPDRAGON_8_GEN3) | ||
# the below build optimization only verified and works well on Qualcomm SM8650-AB Snapdragon 8 Gen 3 | ||
add_definitions(-march=armv8.7-a) | ||
add_definitions(-mcpu=cortex-x1) | ||
add_definitions(-mtune=cortex-x1) | ||
|
||
else() | ||
# the below build optimization might be works well on ALL mainstream Android phone based on Qualcomm mobile SoC | ||
add_definitions(-mcpu=cortex-a72) | ||
|
||
endif() | ||
|
||
add_compile_options("-Wall" "-Wno-sign-compare") | ||
|
||
find_library(LOG_LIB log) | ||
|
||
link_libraries(${LOG_LIB} android) | ||
|
||
add_executable(${TARGET_NAME} | ||
${SOURCE_FILES} | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
#!/bin/bash | ||
|
||
set -e | ||
|
||
#https://qpm.qualcomm.com/#/main/tools/details/qualcomm_ai_engine_direct | ||
#https://developer.qualcomm.com/software/hexagon-dsp-sdk/tools | ||
QNN_SDK_PATH=/opt/qcom/aistack/qnn/2.20.0.240223/ | ||
|
||
ANDROID_NDK=`pwd`/android-ndk-r26c | ||
ANDROID_PLATFORM=android-34 | ||
TARGET=ggml-qnn-test | ||
|
||
|
||
function dump_vars() | ||
{ | ||
echo -e "ANDROID_NDK: ${ANDROID_NDK}" | ||
echo -e "QNN_SDK_PATH: ${QNN_SDK_PATH}" | ||
} | ||
|
||
|
||
function show_pwd() | ||
{ | ||
echo -e "current working path:$(pwd)\n" | ||
} | ||
|
||
|
||
function check_qnn_sdk() | ||
{ | ||
if [ ! -d ${QNN_SDK_PATH} ]; then | ||
echo -e "QNN_SDK_PATH ${QNN_SDK_PATH} not exist, pls check...\n" | ||
exit 1 | ||
fi | ||
} | ||
|
||
|
||
function check_and_download_ndk() | ||
{ | ||
is_android_ndk_exist=1 | ||
|
||
if [ ! -d ${ANDROID_NDK} ]; then | ||
is_android_ndk_exist=0 | ||
fi | ||
|
||
if [ ! -f ${ANDROID_NDK}/build/cmake/android.toolchain.cmake ]; then | ||
is_android_ndk_exist=0 | ||
fi | ||
|
||
if [ ${is_android_ndk_exist} -eq 0 ]; then | ||
|
||
if [ ! -f android-ndk-r26c-linux.zip ]; then | ||
wget --no-config --quiet --show-progress -O android-ndk-r26c-linux.zip https://dl.google.com/android/repository/android-ndk-r26c-linux.zip | ||
fi | ||
|
||
unzip android-ndk-r26c-linux.zip | ||
|
||
if [ $? -ne 0 ]; then | ||
printf "failed to download android ndk to %s \n" "${ANDROID_NDK}" | ||
exit 1 | ||
fi | ||
|
||
printf "android ndk saved to ${ANDROID_NDK} \n\n" | ||
else | ||
printf "android ndk already exist:${ANDROID_NDK} \n\n" | ||
fi | ||
} | ||
|
||
|
||
function build_arm64 | ||
{ | ||
cmake -H. -B./out/arm64-v8a -DTARGET_NAME=${TARGET} -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=${ANDROID_PLATFORM} -DANDROID_NDK=${ANDROID_NDK} -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake -DQNN_SDK_PATH=${QNN_SDK_PATH} | ||
|
||
cd ./out/arm64-v8a | ||
make | ||
|
||
ls -lah ${TARGET} | ||
/bin/cp ${TARGET} ../../ | ||
cd - | ||
} | ||
|
||
|
||
function remove_temp_dir() | ||
{ | ||
if [ -d out ]; then | ||
echo "remove out directory in `pwd`" | ||
rm -rf out | ||
fi | ||
} | ||
|
||
|
||
show_pwd | ||
check_and_download_ndk | ||
check_qnn_sdk | ||
dump_vars | ||
remove_temp_dir | ||
build_arm64 |
Oops, something went wrong.