From eff966968131ece680d0c1f2646d5b425f1bdbfb Mon Sep 17 00:00:00 2001 From: "zhou.weiguo" Date: Thu, 25 Apr 2024 23:39:31 +0800 Subject: [PATCH] adapt to test-backend-ops.cpp --- README-qnn.md | 6 ++- ggml-backend.c | 5 ++ ggml-qnn.cpp | 70 ++++++++++++------------- ggml-qnn.h | 6 +-- tests/ggml-qnn/.gitignore | 3 ++ tests/ggml-qnn/CMakeLists.txt | 80 ++++++++++++++++++++++++++++ tests/ggml-qnn/build-ggml-qnn.sh | 89 ++++++++++++++++++++++++++++++++ tests/ggml-qnn/run-ggml-qnn.sh | 22 ++++++++ tests/test-backend-ops.cpp | 4 ++ 9 files changed, 242 insertions(+), 43 deletions(-) create mode 100644 tests/ggml-qnn/.gitignore create mode 100644 tests/ggml-qnn/CMakeLists.txt create mode 100755 tests/ggml-qnn/build-ggml-qnn.sh create mode 100755 tests/ggml-qnn/run-ggml-qnn.sh diff --git a/README-qnn.md b/README-qnn.md index ce01d6b78fdf14..781a1f3ef8c44f 100644 --- a/README-qnn.md +++ b/README-qnn.md @@ -93,12 +93,14 @@ Any **mainstream** Android phone based on Qualcomm's mobile SoC should be suppor ### II. Build llama.cpp + QNN backend -Please refer to [project kantv](https://github.com/zhouwg/kantv) firstly. +Please refer to [project kantv](https://github.com/zhouwg/kantv) -A small and standalone Android example(or re-use [the existing Android example in llama.cpp](https://github.com/ggerganov/llama.cpp/tree/master/examples/llama.android)) for purpose of facilitate community developers to participate in develop/verify QNN backend. +or +using [test-backend-ops.cpp](tests/ggml-qnn) to verify it on Qualcomm mobile SoC based Android phone + ### III. Run the inference on Qualcomm mobile SoC based Android phone diff --git a/ggml-backend.c b/ggml-backend.c index e91d97cd9dcfce..24dbafd3c3d434 100644 --- a/ggml-backend.c +++ b/ggml-backend.c @@ -445,6 +445,11 @@ GGML_CALL static void ggml_backend_registry_init(void) { extern GGML_CALL void ggml_backend_kompute_reg_devices(void); ggml_backend_kompute_reg_devices(); #endif + +#ifdef GGML_USE_QNN + extern GGML_CALL int ggml_backend_qnn_reg_devices(void); + ggml_backend_qnn_reg_devices(); +#endif } GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) { diff --git a/ggml-qnn.cpp b/ggml-qnn.cpp index 7da19980a70a6e..1f3405620aed8e 100644 --- a/ggml-qnn.cpp +++ b/ggml-qnn.cpp @@ -1176,7 +1176,6 @@ static void qnn_buf_buffer_put(qnn_buf_t * fifo, buf_element_t * element) { fifo->qnn_buf_size++; fifo->qnn_buf_data_size += element->size; - LOGJ("put:index %d, fifo->size is %d, self->buffer_pool_num_free %d\n", element->id, fifo->qnn_buf_size, fifo->buffer_pool_num_free); pthread_cond_signal (&fifo->not_empty); pthread_mutex_unlock (&fifo->mutex); @@ -1426,9 +1425,12 @@ static void ggml_qnn_log_internal(ggml_log_level level, const char * file, const int len = vsnprintf(s_ggml_qnn_log_internal_buf + len_prefix, GGML_QNN_LOGBUF_LEN - len_prefix, format, args); if (len < (GGML_QNN_LOGBUF_LEN - len_prefix)) { #if (defined __ANDROID__) || (defined ANDROID) - __android_log_print(level, "ggml-qnn", "%s", s_ggml_qnn_log_internal_buf); + //for Android APP + __android_log_print(level, "ggml-qnn", "%s\n", s_ggml_qnn_log_internal_buf); + //for Android terminal + printf("%s\n", s_ggml_qnn_log_internal_buf); #else - printf("%s", buffer); //Qualcomm's QNN could running on Windows over ARM(aka WoA) + printf("%s\n", s_ggml_qnn_log_internal_buf); #endif } va_end(args); @@ -2125,9 +2127,9 @@ int qnn_instance::load_system() { _qnn_interface.qnn_system_context_create(&_qnn_system_handle); if (nullptr == _qnn_system_handle) { - LOGW("can not create QNN system contenxt\n"); + QNN_LOG_WARN("can not create QNN system contenxt\n"); } else { - QNN_LOG_DEBUG("initialize qnn system successfully\n"); + QNN_LOG_INFO("initialize qnn system successfully\n"); } return 0; @@ -2494,24 +2496,23 @@ static bool ggml_qnn_can_handle_op(const struct ggml_tensor * src0, const struct if (dst->op == GGML_OP_ADD) { return (src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16) && (src1->type == GGML_TYPE_F32 || src1->type == GGML_TYPE_F16) && - (dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16) && ((ne00 > 1 && ne01 > 1 && ne10 > 1 && ne11 > 1)) && - (src0->rank == src1->rank); + (dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16) && ((ne00 > 1 && ne01 > 1 && ne10 > 1 && ne11 > 1)); } if (dst->op == GGML_OP_MUL_MAT) { #if 1 // log output have significant effect to performance but useful during development stage QNN_LOG_DEBUG("GGML_OP_MUL_MAT"); - QNN_LOG_INFO("%15s: rank = %d, type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", - src0->name, src0->rank, + QNN_LOG_INFO("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", + src0->name, src0->type, ggml_type_name(src0->type), src0->ne[0], src0->ne[1], src0->ne[2], src0->nb[0], src0->nb[1], src0->nb[2]); - QNN_LOG_INFO("%15s: rank = %d, type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", - src1->name, src1->rank, + QNN_LOG_INFO("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", + src1->name, src1->type, ggml_type_name(src1->type), src1->ne[0], src1->ne[1], src1->ne[2], src1->nb[0], src1->nb[1], src1->nb[2]); - QNN_LOG_INFO("%15s: rank = %d, type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", - dst->name, dst->rank, + QNN_LOG_INFO("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", + dst->name, dst->type, ggml_type_name(dst->type), dst->ne[0], dst->ne[1], dst->ne[2], dst->nb[0], dst->nb[1], dst->nb[2]); #endif @@ -2576,18 +2577,18 @@ static void ggml_qnn_add(const ggml_tensor * src0, const ggml_tensor * src1, ggm QNN_INTERFACE_VER_TYPE qnn_raw_interface = ctx->raw_interface; n_begin_time = ggml_time_us(); -#if 0 //it works fine with whisper.cpp and llama.cpp. comment them because focus on mulmat in llama.cpp inference since 04-23-2024 +#if 0 QNN_LOG_DEBUG("call %s\n", __func__); - QNN_LOG_INFO("%15s: rank = %d, type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", - src0->name, src0->rank, + QNN_LOG_INFO("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", + src0->name, src0->type, ggml_type_name(src0->type), src0->ne[0], src0->ne[1], src0->ne[2], src0->nb[0], src0->nb[1], src0->nb[2]); - QNN_LOG_INFO("%15s: rank = %d, type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", - src1->name, src1->rank, + QNN_LOG_INFO("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", + src1->name, src1->type, ggml_type_name(src1->type), src1->ne[0], src1->ne[1], src1->ne[2], src1->nb[0], src1->nb[1], src1->nb[2]); - QNN_LOG_INFO("%15s: rank = %d, type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", - dst->name, dst->rank, + QNN_LOG_INFO("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", + dst->name, dst->type, ggml_type_name(dst->type), dst->ne[0], dst->ne[1], dst->ne[2], dst->nb[0], dst->nb[1], dst->nb[2]); QNN_LOG_DEBUG("%d, %d, %d, %d", src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3]); @@ -2793,16 +2794,16 @@ static void ggml_qnn_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1, n_begin_time = ggml_time_us(); QNN_LOG_DEBUG("call %s\n", __func__); - QNN_LOG_INFO("%15s: rank = %d, type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", - src0->name, src0->rank, + QNN_LOG_INFO("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", + src0->name, src0->type, ggml_type_name(src0->type), src0->ne[0], src0->ne[1], src0->ne[2], src0->nb[0], src0->nb[1], src0->nb[2]); - QNN_LOG_INFO("%15s: rank = %d, type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", - src1->name, src1->rank, + QNN_LOG_INFO("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", + src1->name, src1->type, ggml_type_name(src1->type), src1->ne[0], src1->ne[1], src1->ne[2], src1->nb[0], src1->nb[1], src1->nb[2]); - QNN_LOG_INFO("%15s: rank = %d, type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", - dst->name, dst->rank, + QNN_LOG_INFO("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", + dst->name, dst->type, ggml_type_name(dst->type), dst->ne[0], dst->ne[1], dst->ne[2], dst->nb[0], dst->nb[1], dst->nb[2]); QNN_LOG_DEBUG("%d, %d, %d, %d", src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3]); @@ -3000,16 +3001,16 @@ static void ggml_qnn_hanlde_op(const enum ggml_op ggmlop, const ggml_tensor * sr n_begin_time = ggml_time_us(); QNN_LOG_DEBUG("call %s\n", __func__); - QNN_LOG_INFO("%15s: rank = %d, type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", - src0->name, src0->rank, + QNN_LOG_INFO("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", + src0->name, src0->type, ggml_type_name(src0->type), src0->ne[0], src0->ne[1], src0->ne[2], src0->nb[0], src0->nb[1], src0->nb[2]); - QNN_LOG_INFO("%15s: rank = %d, type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", - src1->name, src1->rank, + QNN_LOG_INFO("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", + src1->name, src1->type, ggml_type_name(src1->type), src1->ne[0], src1->ne[1], src1->ne[2], src1->nb[0], src1->nb[1], src1->nb[2]); - QNN_LOG_INFO("%15s: rank = %d, type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", - dst->name, dst->rank, + QNN_LOG_INFO("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", + dst->name, dst->type, ggml_type_name(dst->type), dst->ne[0], dst->ne[1], dst->ne[2], dst->nb[0], dst->nb[1], dst->nb[2]); QNN_LOG_DEBUG("%d, %d, %d, %d", src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3]); @@ -4396,7 +4397,6 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads, int n_cur_ } -#if 0 //replaced with ggml_status ggml_backend_qnn_graph_compute_multithread static void * ggml_graph_compute_thread(void * data) { struct ggml_compute_state * state = (struct ggml_compute_state *) data; @@ -4531,7 +4531,6 @@ static void * ggml_graph_compute_thread(void * data) { return 0; } -#endif static ggml_status ggml_backend_qnn_graph_compute_multithread(ggml_backend_t backend, ggml_cgraph * cgraph) { @@ -4830,8 +4829,7 @@ ggml_backend_t ggml_backend_qnn_init(size_t device, const char * qnn_lib_path) { } -extern "C" int ggml_backend_qnn_reg_devices(); - +extern "C" int ggml_backend_qnn_reg_devices(void); int ggml_backend_qnn_reg_devices() { for (size_t idx = 0; idx < GGML_QNN_MAX_DEVICES; idx++) { diff --git a/ggml-qnn.h b/ggml-qnn.h index 6220a8a3fed0d0..cd15a7e3f91070 100644 --- a/ggml-qnn.h +++ b/ggml-qnn.h @@ -18,7 +18,7 @@ enum QNNBackend { QNN_HTP, }; -GGML_API int ggml_backend_qnn_reg_devices(); +GGML_API int ggml_backend_qnn_reg_devices(void); /** * @@ -39,10 +39,6 @@ GGML_API void ggml_backend_qnn_get_device_description(int device, char GGML_API ggml_backend_buffer_type_t ggml_backend_qnn_buffer_type(size_t dev_num); -// TODO: this is a temporary API, should be removed in the future -GGML_API bool ggml_qnn_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor); - - #ifdef __cplusplus } #endif diff --git a/tests/ggml-qnn/.gitignore b/tests/ggml-qnn/.gitignore new file mode 100644 index 00000000000000..a1cde9fb0a21e9 --- /dev/null +++ b/tests/ggml-qnn/.gitignore @@ -0,0 +1,3 @@ +out +android-ndk-r26c* +test-qnn* diff --git a/tests/ggml-qnn/CMakeLists.txt b/tests/ggml-qnn/CMakeLists.txt new file mode 100644 index 00000000000000..1f775676992490 --- /dev/null +++ b/tests/ggml-qnn/CMakeLists.txt @@ -0,0 +1,80 @@ +cmake_minimum_required(VERSION 3.22.1) +project(ggml-qnn) + +set(CMAKE_VERBOSE_MAKEFILE on) +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + +set(TARGET_SNAPDRAGON_8_GEN3 OFF) + +set(LLAMACPP_SRC_PATH ${PROJECT_ROOT_PATH}) +set(QNN_INC_PATH ${QNN_SDK_PATH}/include/QNN) +set(QNN_LIB_PATH ${QNN_SDK_PATH}/lib/aarch64-android) + +include_directories(${QNN_INC_PATH}) +include_directories(${LLAMACPP_SRC_PATH}) +include_directories(${LLAMACPP_SRC_PATH}/common) + +set(SOURCE_FILES + ${LLAMACPP_SRC_PATH}/ggml.c + ${LLAMACPP_SRC_PATH}/ggml-alloc.c + ${LLAMACPP_SRC_PATH}/ggml-backend.c + ${LLAMACPP_SRC_PATH}/ggml-quants.c + ${LLAMACPP_SRC_PATH}/ggml-qnn.cpp + ${LLAMACPP_SRC_PATH}/tests/test-backend-ops.cpp +) + + +message("PROJECT_ROOT_PATH : ${PROJECT_ROOT_PATH}") +message("LLAMACPP_SRC_PATH : ${LLAMACPP_SRC_PATH}") +message("QNN_SDK_PATH : ${QNN_SDK_PATH}") +message("QNN_INC_PATH : ${QNN_INC_PATH}") +message("QNN_LIB_PATH : ${QNN_LIB_PATH}") +message("target name : ${TARGET_NAME}") + + +add_definitions(-DTARGET_ANDROID) +add_definitions(-D__ARM_NEON) +add_definitions(-DGGML_USE_QNN) + +add_definitions(-DNDEBUG) +add_definitions(-O3) + +if (TARGET_SNAPDRAGON_8_GEN3) +add_definitions(-march=armv8.7-a) +add_definitions(-mcpu=cortex-x1) +add_definitions(-mtune=cortex-x1) + +else() + +# the below build optimization might be works well on ALL mainstream Android phones +add_definitions(-mcpu=cortex-a72) + +endif() + +add_compile_options("-Wall" "-Wno-sign-compare") + +if (GGML_JNI_QNN) + file(GLOB allPrebuiltQNNLibs "${QNN_LIB_PATH}/libQnn*.so") + + #file(COPY ${allPrebuiltQNNLibs} DESTINATION ${PREBUILT_LIB_PATH}/ ) + +endif() + +find_library(LOG_LIB log) + +add_library(QNNCpu + SHARED + IMPORTED) + +set_target_properties(QNNCpu + PROPERTIES + IMPORTED_LOCATION + ${PREBUILT_LIB_PATH}/libQnnCpu.so) + +link_libraries(${LOG_LIB} android) + +add_executable(${TARGET_NAME} + ${SOURCE_FILES} +) diff --git a/tests/ggml-qnn/build-ggml-qnn.sh b/tests/ggml-qnn/build-ggml-qnn.sh new file mode 100755 index 00000000000000..980b7c6b889ae4 --- /dev/null +++ b/tests/ggml-qnn/build-ggml-qnn.sh @@ -0,0 +1,89 @@ +#!/bin/bash + +set -e + +#modify following lines to adapt to local dev envs +PROJECT_ROOT_PATH=~/github/llama.cpp/ +#https://qpm.qualcomm.com/#/main/tools/details/qualcomm_ai_engine_direct +#https://developer.qualcomm.com/software/hexagon-dsp-sdk/tools +QNN_SDK_PATH=/opt/qcom/aistack/qnn/2.20.0.240223/ + + +ANDROID_NDK=`pwd`/android-ndk-r26c +TARGET=ggml-qnn-test + + +function dump_vars() +{ + echo -e "PROJECT_ROOT_PATH: ${PROJECT_ROOT_PATH}" + echo -e "ANDROID_NDK: ${ANDROID_NDK}" + echo -e "QNN_SDK_PATH: ${QNN_SDK_PATH}" +} + + +function show_pwd() +{ + echo -e "current working path:$(pwd)\n" +} + + +function check_and_download_ndk() +{ + is_android_ndk_exist=1 + + if [ ! -d ${ANDROID_NDK} ]; then + is_android_ndk_exist=0 + fi + + if [ ! -f ${ANDROID_NDK}/build/cmake/android.toolchain.cmake ]; then + is_android_ndk_exist=0 + fi + + if [ ${is_android_ndk_exist} -eq 0 ]; then + + if [ ! -f android-ndk-r26c-linux.zip ]; then + wget --no-config --quiet --show-progress -O android-ndk-r26c-linux.zip https://dl.google.com/android/repository/android-ndk-r26c-linux.zip + fi + + unzip android-ndk-r26c-linux.zip + + if [ $? -ne 0 ]; then + printf "failed to download android ndk to %s \n" "${ANDROID_NDK}" + exit 1 + fi + + printf "android ndk saved to ${ANDROID_NDK} \n\n" + else + printf "android ndk already exist:${ANDROID_NDK} \n\n" + fi +} + + +function build_arm64 +{ + cmake -H. -B./out/arm64-v8a -DPROJECT_ROOT_PATH=${PROJECT_ROOT_PATH} -DTARGET_NAME=${TARGET} -DCMAKE_BUILD_TYPE=${PROJECT_BUILD_TYPE} -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=${ANDROID_PLATFORM} -DANDROID_NDK=${ANDROID_NDK} -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake -DQNN_SDK_PATH=${QNN_SDK_PATH} + + cd ./out/arm64-v8a + make + + ls -lah ${TARGET} + /bin/cp ${TARGET} ../../ + cd - +} + + +function remove_temp_dir() +{ + if [ -d out ]; then + echo "remove out directory in `pwd`" + rm -rf out + fi +} + + + +show_pwd +check_and_download_ndk +dump_vars +remove_temp_dir +build_arm64 diff --git a/tests/ggml-qnn/run-ggml-qnn.sh b/tests/ggml-qnn/run-ggml-qnn.sh new file mode 100755 index 00000000000000..ea68aecb4f9424 --- /dev/null +++ b/tests/ggml-qnn/run-ggml-qnn.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +#modify following lines to adapt to local dev envs +QNN_SDK_PATH=/opt/qcom/aistack/qnn/2.20.0.240223/ + +GGML_QNN_TEST=ggml-qnn-test +REMOTE_PATH=/data/local/tmp/ + +adb push ${GGML_QNN_TEST} ${REMOTE_PATH} +adb push ${QNN_SDK_PATH}/lib/aarch64-android/libQnnSystem.so ${REMOTE_PATH}/ +adb push ${QNN_SDK_PATH}/lib/aarch64-android/libQnnCpu.so ${REMOTE_PATH}/ +adb push ${QNN_SDK_PATH}/lib/aarch64-android/libQnnGpu.so ${REMOTE_PATH}/ + +#the QNN HTP(aka DSP) backend only verified on Xiaomi14(Qualcomm SM8650-AB Snapdragon 8 Gen 3) successfully +adb push ${QNN_SDK_PATH}/lib/aarch64-android/libQnnHtp.so ${REMOTE_PATH}/ +adb push ${QNN_SDK_PATH}/lib/aarch64-android/libQnnHtpNetRunExtensions.so ${REMOTE_PATH}/ +adb push ${QNN_SDK_PATH}/lib/aarch64-android/libQnnHtpPrepare.so ${REMOTE_PATH}/ +adb push ${QNN_SDK_PATH}/lib/aarch64-android/libQnnHtpV75Stub.so ${REMOTE_PATH}/ +adb push ${QNN_SDK_PATH}/lib/hexagon-v75/unsigned/libQnnHtpV75Skel.so ${REMOTE_PATH}/ + +adb shell chmod +x /data/local/tmp/${GGML_QNN_TEST} +adb shell /data/local/tmp/${GGML_QNN_TEST} diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 02daad24b030ab..9f4afa35954670 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -2225,7 +2225,11 @@ int main(int argc, char ** argv) { continue; } +#ifdef GGML_USE_QNN + ggml_backend_t backend = ggml_backend_reg_init_backend(i, "/data/local/tmp/"); +#else ggml_backend_t backend = ggml_backend_reg_init_backend(i, NULL); +#endif GGML_ASSERT(backend != NULL); if (backend_filter == NULL && ggml_backend_is_cpu(backend)) {