Skip to content

Commit

Permalink
ggml: add Qualcomm QNN(Qualcomm Neural Network,aka Qualcomm AI Engine…
Browse files Browse the repository at this point in the history
… Direct) backend
  • Loading branch information
zhouwg committed Jun 4, 2024
1 parent bde7cd3 commit 95651be
Show file tree
Hide file tree
Showing 8 changed files with 4,390 additions and 1 deletion.
3,587 changes: 3,587 additions & 0 deletions ggml-qnn.cpp

Large diffs are not rendered by default.

43 changes: 43 additions & 0 deletions ggml-qnn.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#pragma once

#include "ggml.h"
#include "ggml-backend.h"

#ifdef __cplusplus
extern "C" {
#endif


#define GGML_QNN_MAX_DEVICES 3

//QNN cDSP and HTA backend would not be used currently, just focus on QNN CPU/GPU/NPU(aka HTP/DSP) backend currently
enum QNNBackend {
QNN_BACKEND_CPU,
QNN_BACKEND_GPU,
QNN_BACKEND_NPU,
QNN_BACKEND_GGML, //"fake" QNN backend just for compare performance between QNN and original GGML
};

GGML_API int ggml_backend_qnn_reg_devices(void);

/**
*
* @param device 0: QNN_BACKEND_CPU 1: QNN_BACKEND_GPU 2: QNN_BACKEND_NPU(aka HTP/DSP)
* @param qnn_lib_path qnn library path, such as "/data/local/tmp/" on Android or specified in JNI layer
* @return
*/
GGML_API ggml_backend_t ggml_backend_qnn_init(size_t dev_num, const char * qnn_lib_path);

GGML_API bool ggml_backend_is_qnn(ggml_backend_t backend);

GGML_API void ggml_backend_qnn_set_n_threads(ggml_backend_t backend, int thread_counts);

GGML_API int ggml_backend_qnn_get_device_count(void);

GGML_API void ggml_backend_qnn_get_device_description(size_t dev_num, char * description, size_t description_size);

GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_qnn_buffer_type(size_t dev_num);

#ifdef __cplusplus
}
#endif
45 changes: 44 additions & 1 deletion llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
# include "ggml-sycl.h"
#elif defined(GGML_USE_KOMPUTE)
# include "ggml-kompute.h"
#elif defined(GGML_USE_QNN)
# include "ggml-qnn.h"
#endif

#ifdef GGML_USE_METAL
Expand Down Expand Up @@ -1746,6 +1748,29 @@ static ggml_backend_buffer_type_t llama_default_buffer_type_cpu(bool host_buffer
GGML_UNUSED(host_buffer);
}

static ggml_backend_buffer_type_t llama_default_buffer_type_split(int fallback_gpu, const float * tensor_split) {
ggml_backend_buffer_type_t buft = nullptr;

#ifdef GGML_USE_CUDA
if (ggml_backend_cuda_get_device_count() > 1) {
buft = ggml_backend_cuda_split_buffer_type(tensor_split);
}
#endif

#ifdef GGML_USE_SYCL
if (ggml_backend_sycl_get_device_count() > 1) {
buft = ggml_backend_sycl_split_buffer_type(tensor_split);
}
#endif

if (buft == nullptr) {
buft = llama_default_buffer_type_offload(fallback_gpu);
}
return buft;

GGML_UNUSED(tensor_split);
}

//
// globals
//
Expand Down Expand Up @@ -2379,10 +2404,13 @@ static size_t llama_get_device_count(const llama_model & model) {
count = ggml_backend_sycl_get_device_count();
#elif defined(GGML_USE_VULKAN)
count = ggml_backend_vk_get_device_count();
#elif defined(GGML_USE_QNN)
count = ggml_backend_qnn_get_device_count();
#endif
#if defined(GGML_USE_RPC)
count += model.rpc_servers.size();
#endif

return count;
GGML_UNUSED(model);
}
Expand Down Expand Up @@ -2413,6 +2441,8 @@ static ggml_backend_buffer_type_t llama_default_buffer_type_offload(const llama_
if (buft == nullptr) {
LLAMA_LOG_WARN("%s: cannot use GPU %d, check `vulkaninfo --summary`\n", __func__, gpu);
}
#elif defined(GGML_USE_QNN)
buft = ggml_backend_qnn_buffer_type(gpu);
#endif

if (buft == nullptr) {
Expand Down Expand Up @@ -16100,6 +16130,8 @@ size_t llama_max_devices(void) {
return GGML_SYCL_MAX_DEVICES;
#elif defined(GGML_USE_VULKAN)
return GGML_VK_MAX_DEVICES;
#elif defined(GGML_USE_QNN)
return GGML_QNN_MAX_DEVICES;
#else
return 1;
#endif
Expand All @@ -16115,7 +16147,7 @@ bool llama_supports_mlock(void) {

bool llama_supports_gpu_offload(void) {
#if defined(GGML_USE_CUDA) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_METAL) || defined(GGML_USE_VULKAN) || \
defined(GGML_USE_SYCL) || defined(GGML_USE_KOMPUTE) || defined(GGML_USE_RPC)
defined(GGML_USE_SYCL) || defined(GGML_USE_KOMPUTE) || defined(GGML_USE_RPC) | defined(GGML_USE_QNN)
// Defined when llama.cpp is compiled with support for offloading model layers to GPU.
return true;
#else
Expand Down Expand Up @@ -16426,6 +16458,17 @@ struct llama_context * llama_new_context_with_model(
}
ctx->backends.push_back(backend);
}
#elif defined(GGML_USE_QNN)
if (model->n_gpu_layers > 0) {
//the second param is package name of Andorid app, can be got by JNI from Java layer
ggml_backend_t backend = ggml_backend_qnn_init(QNN_CPU, "/data/data/com.ggml.llamacpp/");
if (nullptr == backend) {
LLAMA_LOG_ERROR("%s: failed to initialize QNN backend\n", __func__);
llama_free(ctx);
return nullptr;
}
ctx->backends.push_back(backend);
}
#endif
#if defined(GGML_USE_RPC)
if (model->n_gpu_layers > 0) {
Expand Down
3 changes: 3 additions & 0 deletions tests/ggml-qnn/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
out
android-ndk-r26c*
ggml-qnn-test*
60 changes: 60 additions & 0 deletions tests/ggml-qnn/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
cmake_minimum_required(VERSION 3.22.1)
project(ggml-qnn-test)

set(CMAKE_VERBOSE_MAKEFILE on)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)

#set to ON if target Android phone is based on Qualcomm Snapdragon 8 Gen 3
set(TARGET_SNAPDRAGON_8_GEN3 OFF)

set(QNN_INC_PATH ${QNN_SDK_PATH}/include/QNN)
set(QNN_LIB_PATH ${QNN_SDK_PATH}/lib/aarch64-android)

include_directories(${QNN_INC_PATH})
include_directories(../../) # ggml.h

set(SOURCE_FILES
../../ggml.c
../../ggml-alloc.c
../../ggml-backend.c
../../ggml-quants.c
../../ggml-qnn.cpp
test-qnn-ops.cpp
)


message("QNN_SDK_PATH : ${QNN_SDK_PATH}")
message("QNN_INC_PATH : ${QNN_INC_PATH}")
message("QNN_LIB_PATH : ${QNN_LIB_PATH}")

add_definitions(-D__ARM_NEON)
add_definitions(-DGGML_USE_QNN)

if(CMAKE_BUILD_TYPE STREQUAL "Release")
add_definitions(-DNDEBUG)
add_definitions(-O3)
endif()

if (TARGET_SNAPDRAGON_8_GEN3)
# the below build optimization only verified and works well on Qualcomm SM8650-AB Snapdragon 8 Gen 3
add_definitions(-march=armv8.7-a)
add_definitions(-mcpu=cortex-x1)
add_definitions(-mtune=cortex-x1)

else()
# the below build optimization might be works well on ALL mainstream Android phone based on Qualcomm mobile SoC
add_definitions(-mcpu=cortex-a72)

endif()

add_compile_options("-Wall" "-Wno-sign-compare")

find_library(LOG_LIB log)

link_libraries(${LOG_LIB} android)

add_executable(${TARGET_NAME}
${SOURCE_FILES}
)
95 changes: 95 additions & 0 deletions tests/ggml-qnn/build-ggml-qnn.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#!/bin/bash

set -e

#https://qpm.qualcomm.com/#/main/tools/details/qualcomm_ai_engine_direct
#https://developer.qualcomm.com/software/hexagon-dsp-sdk/tools
QNN_SDK_PATH=/opt/qcom/aistack/qnn/2.20.0.240223/

ANDROID_NDK=`pwd`/android-ndk-r26c
ANDROID_PLATFORM=android-34
TARGET=ggml-qnn-test


function dump_vars()
{
echo -e "ANDROID_NDK: ${ANDROID_NDK}"
echo -e "QNN_SDK_PATH: ${QNN_SDK_PATH}"
}


function show_pwd()
{
echo -e "current working path:$(pwd)\n"
}


function check_qnn_sdk()
{
if [ ! -d ${QNN_SDK_PATH} ]; then
echo -e "QNN_SDK_PATH ${QNN_SDK_PATH} not exist, pls check...\n"
exit 1
fi
}


function check_and_download_ndk()
{
is_android_ndk_exist=1

if [ ! -d ${ANDROID_NDK} ]; then
is_android_ndk_exist=0
fi

if [ ! -f ${ANDROID_NDK}/build/cmake/android.toolchain.cmake ]; then
is_android_ndk_exist=0
fi

if [ ${is_android_ndk_exist} -eq 0 ]; then

if [ ! -f android-ndk-r26c-linux.zip ]; then
wget --no-config --quiet --show-progress -O android-ndk-r26c-linux.zip https://dl.google.com/android/repository/android-ndk-r26c-linux.zip
fi

unzip android-ndk-r26c-linux.zip

if [ $? -ne 0 ]; then
printf "failed to download android ndk to %s \n" "${ANDROID_NDK}"
exit 1
fi

printf "android ndk saved to ${ANDROID_NDK} \n\n"
else
printf "android ndk already exist:${ANDROID_NDK} \n\n"
fi
}


function build_arm64
{
cmake -H. -B./out/arm64-v8a -DTARGET_NAME=${TARGET} -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=${ANDROID_PLATFORM} -DANDROID_NDK=${ANDROID_NDK} -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake -DQNN_SDK_PATH=${QNN_SDK_PATH}

cd ./out/arm64-v8a
make

ls -lah ${TARGET}
/bin/cp ${TARGET} ../../
cd -
}


function remove_temp_dir()
{
if [ -d out ]; then
echo "remove out directory in `pwd`"
rm -rf out
fi
}


show_pwd
check_and_download_ndk
check_qnn_sdk
dump_vars
remove_temp_dir
build_arm64
Loading

0 comments on commit 95651be

Please sign in to comment.