Skip to content

Commit e7a3c68

Browse files
author
xiaying
committed
MNN:Sync: Sync Internal 3.3.0
1 parent a8fb0de commit e7a3c68

File tree

240 files changed

+35815
-21576
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

240 files changed

+35815
-21576
lines changed

CMakeLists.txt

Lines changed: 34 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -82,15 +82,15 @@ option(MNN_BUILD_AUDIO "Build audio api in MNN." OFF)
8282
option(MNN_SME2 "Use Arm sme2 instructions" ON)
8383

8484
if (MNN_BUILD_MINI)
85-
set(MNN_SKIPBUILD_GEOMETRY ON)
86-
set(MNN_REDUCE_SIZE ON)
85+
set(MNN_SKIPBUILD_GEOMETRY ON CACHE BOOL "<docstring>" FORCE)
86+
set(MNN_REDUCE_SIZE ON CACHE BOOL "<docstring>" FORCE)
8787
endif()
8888

8989
if (MNN_REDUCE_SIZE)
90-
set(MNN_SUPPORT_DEPRECATED_OP OFF)
91-
set(MNN_SUPPORT_DEPRECATED_OPV2 OFF)
92-
set(MNN_SUPPORT_QUANT_EXTEND OFF)
93-
set(MNN_USE_SPARSE_COMPUTE OFF)
90+
set(MNN_SUPPORT_DEPRECATED_OP OFF CACHE BOOL "<docstring>" FORCE)
91+
set(MNN_SUPPORT_DEPRECATED_OPV2 OFF CACHE BOOL "<docstring>" FORCE)
92+
set(MNN_SUPPORT_QUANT_EXTEND OFF CACHE BOOL "<docstring>" FORCE)
93+
set(MNN_USE_SPARSE_COMPUTE OFF CACHE BOOL "<docstring>" FORCE)
9494
endif()
9595

9696
IF (OHOS AND MNN_INTERNAL)
@@ -106,18 +106,19 @@ IF (NOT DEFINED MNN_USE_SPARSE_COMPUTE)
106106
ENDIF()
107107

108108
IF (MNN_BUILD_LLM)
109-
set(MNN_LOW_MEMORY ON)
110-
set(MNN_SUPPORT_TRANSFORMER_FUSE ON)
109+
set(MNN_LOW_MEMORY ON CACHE BOOL "<docstring>" FORCE)
110+
set(MNN_SUPPORT_TRANSFORMER_FUSE ON CACHE BOOL "<docstring>" FORCE)
111111
IF (MNN_BUILD_LLM_OMNI)
112-
set(MNN_BUILD_OPENCV ON)
113-
set(MNN_BUILD_AUDIO ON)
112+
set(MNN_BUILD_OPENCV ON CACHE BOOL "<docstring>" FORCE)
113+
set(MNN_BUILD_AUDIO ON CACHE BOOL "<docstring>" FORCE)
114+
set(MNN_IMGCODECS ON CACHE BOOL "<docstring>" FORCE)
114115
ENDIF()
115116
ENDIF()
116117

117118
IF (MNN_BUILD_DIFFUSION)
118-
set(MNN_LOW_MEMORY ON)
119-
set(MNN_SUPPORT_TRANSFORMER_FUSE ON)
120-
set(MNN_BUILD_OPENCV ON)
119+
set(MNN_LOW_MEMORY ON CACHE BOOL "<docstring>" FORCE)
120+
set(MNN_SUPPORT_TRANSFORMER_FUSE ON CACHE BOOL "<docstring>" FORCE)
121+
set(MNN_BUILD_OPENCV ON CACHE BOOL "<docstring>" FORCE)
121122
ENDIF()
122123

123124
IF(NOT MNN_BUILD_SHARED_LIBS AND MNN_SEP_BUILD)
@@ -656,6 +657,15 @@ IF(MNN_QNN)
656657
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNN_QNN>)
657658
ENDIF()
658659

660+
# NEUROPILOT
661+
IF(MNN_NEUROPILOT)
662+
target_compile_options(MNNCore PRIVATE -DMNN_NEUROPILOT=1)
663+
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/neuropilot)
664+
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNN_NEUROPILOT>)
665+
ENDIF()
666+
667+
# Vulkan
668+
659669
# Vulkan
660670
IF(MNN_VULKAN)
661671
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/vulkan/)
@@ -807,6 +817,16 @@ ELSE()
807817
ENDIF()
808818
ENDIF()
809819

820+
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/tools/audio)
821+
IF(MNN_BUILD_AUDIO)
822+
IF(MNN_SEP_BUILD)
823+
list(APPEND MNN_DEPS MNNAudio)
824+
ELSE()
825+
list(APPEND MNN_TARGETS MNNAudio)
826+
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNAudio>)
827+
ENDIF()
828+
ENDIF()
829+
810830
IF(MNN_BUILD_LLM)
811831
include(${CMAKE_CURRENT_LIST_DIR}/transformers/llm/engine/CMakeLists.txt)
812832
IF(NOT MNN_SEP_BUILD)
@@ -887,21 +907,12 @@ IF(WIN32 AND MNN_BUILD_CONVERTER AND MNN_BUILD_SHARED_LIBS)
887907
ENDIF()
888908
# Merge MNN/MNNExpress/MNNOpenCV and other backends into one .lib/.dll on Windows
889909

890-
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/tools/audio)
891-
IF(MNN_BUILD_AUDIO AND NOT MNN_SEP_BUILD)
892-
IF(MSVC)
893-
target_compile_definitions(MNNAudio PRIVATE "-DBUILDING_MNN_DLL" INTERFACE "-DUSING_MNN_DLL")
894-
ENDIF()
895-
target_sources(MNN PRIVATE $<TARGET_OBJECTS:MNNAudio>)
896-
ENDIF()
897-
898-
899910
if(CMAKE_SYSTEM_NAME MATCHES "^Linux")
900911
# Using -pthread, needed by thread-safe implemention of glibc, is better than only using -lpthread
901912
# https://stackoverflow.com/questions/23250863/difference-between-pthread-and-lpthread-while-compiling
902913
target_link_libraries(MNN PUBLIC -pthread dl)
903914
elseif(CMAKE_SYSTEM_NAME MATCHES "^Android")
904-
target_link_libraries(MNN PUBLIC log m)
915+
target_link_libraries(MNN PUBLIC log m android)
905916
else()
906917
endif()
907918
if (NOT MNN_BUILD_SHARED_LIBS)

backupcode/cpubackend/compute/DeconvolutionWithStride.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ static void _winograd(const DeconvolutionWithStride::ComputeUnit& unit, int thre
7070
el[2] = 0;
7171
el[3] = 0;
7272
size_t parameters[6];
73-
parameters[0] = eP * sizeof(float);
73+
parameters[0] = eP * lP * sizeof(float);
7474
parameters[1] = ROUND_UP(ic, lP);
7575
parameters[2] = oc;
7676
parameters[3] = eP * 4 * sizeof(float);
@@ -129,7 +129,7 @@ static void _gemmAndIm2col(const DeconvolutionWithStride::ComputeUnit& unit, int
129129
el[2] = 0;
130130
el[3] = 0;
131131
size_t parameters[6];
132-
parameters[0] = eP * sizeof(float);
132+
parameters[0] = eP * lP * sizeof(float);
133133
parameters[1] = ROUND_UP(ic, lP);
134134
parameters[2] = oc;
135135
parameters[3] = eP * 4 * sizeof(float);
@@ -272,7 +272,7 @@ void DeconvolutionWithStride::_extract(const Op* convOp) {
272272
std::shared_ptr<ConvolutionCommon::Int8Common> quanCommon;
273273
ConvolutionCommon::getConvParameters(&quanCommon, backend(), convOp, &tempWeight, &tempWeightSize);
274274
srcCount = tempWeightSize / kx / ky / outputCount;
275-
275+
276276
std::shared_ptr<Tensor> weightWrap(
277277
Tensor::create<float>(std::vector<int>{srcCount, outputCount, ky * kx}, (void*)tempWeight));
278278

demo/exec/pictureRecognition_module.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ int main(int argc, const char* argv[]) {
217217

218218
// Create Input
219219
int batchSize = argc - 3;
220-
auto input = MNN::Express::_Input({batchSize, 3, width, height}, MNN::Express::NC4HW4);
220+
auto input = MNN::Express::_Input({batchSize, 3, height, width}, MNN::Express::NC4HW4);
221221
for (int batch = 0; batch < batchSize; ++batch) {
222222
int size_w = width;
223223
int size_h = height;
@@ -257,4 +257,4 @@ int main(int argc, const char* argv[]) {
257257
rtmgr->updateCache();
258258

259259
return 0;
260-
}
260+
}

docs/compile/cmake.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ MNN使用CMake构建项目,CMake中的宏定义列表如下:
5959
| MNN_QNN | 是否构建`QNN`后端,默认为`OFF` |
6060
| MNN_QNN_ONLINE_FINALIZE |`MNN_QNN`开启的基础上,是否构建在线编译模式的QNN后端,默认为`ON` |
6161
| MNN_QNN_CONVERT_MODE |`MNN_QNN`开启的基础上,是否构建Convert模式的QNN后端,默认为`OFF` |
62+
| MNN_NEUROPILOT | 是否构建MLA的`NPU`离线转换后端或执行插件,默认为`OFF` |
6263
| MNN_NPU | 是否构建HIAI的`NPU`后端,默认为`OFF` |
6364
| MNN_USE_SPARSE_COMPUTE | 是否使用稀疏计算,默认为`ON` |
6465
| MNN_BUILD_BENCHMARK | 是否构建MNN的性能测试,默认为`OFF` |

docs/compile/other.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
- `llm_demo` 大语言模型推理示例程序
6464
- `diffusion_demo` 扩散模型示例程序
6565
- `llm_bench` 大语言模型测评工具
66+
- `quantize_llm` 大语言模型feature map量化工具
6667
## 测试工具
6768
- 相关编译选项
6869
- `MNN_BUILD_TOOLS` 是否编译测试工具
@@ -93,6 +94,7 @@
9394
- `fuseTest` 测试 GPU 自定义算子的功能,目前仅支持 Vulkan Buffer 模式
9495
- `GpuInterTest.out` 测试 GPU 内存输入的功能,目前仅支持 OpenCL Buffer 模式与 OpenGL texture 模式,编译时需打开 MNN_OPENCL 与 MNN_OPENGL
9596
- `LoRA` 将LorA权重添加到模型权重中
97+
- `compilefornpu` 将Npu要运行的部分转换为Plugin算子
9698
## Benchmark工具
9799
- 相关编译选项
98100
- `MNN_BUILD_BENCHMARK` 是否编译Benchmark工具

docs/inference/npu.md

Lines changed: 108 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -6,63 +6,129 @@
66
- NNAPI
77
- HIAI
88

9-
目前NPU相关后端均不支持可变形状、控制流等动态模型,算子数相比CPU/GPU支持要少,建议根据NPU是否能跑通,反复调整模型结构。
9+
## QNN
1010

11-
同时,由于QNN、CoreML与NNAPI在MNN中共用同一个Backend Type,这三个后端对应的编译宏MNN_QNN、MNN_COREML、MNN_NNAPI在编译时,至多只能打开一个。
11+
### QNN后端整体介绍
1212

13-
## QNN
14-
适用于使用高通芯片且配备高通Hexagon张量处理器(Hexagon Tensor Processor,HTP)的机型,可参考[高通官网的设备支持列表](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/overview.html#supported-snapdragon-devices)
13+
- MNN通过调用QNN SDK的CPP API构建了MNN-QNN后端,以期在能够使用高通NPU的设备上取得推理加速。
14+
- 我们支持了两种运行模式:
15+
- 在线构图模式,在线编译和序列化QNN计算图。
16+
- 支持静态形状的常规模型的推理。
17+
- 离线构图模式则先借助MNN的离线工具缓存QNN计算图的序列化产物,接着在运行时直接读取产物,可以节省初始化时间。
18+
- 支持静态形状/有限形状组合的常规模型的推理。
19+
- 可支持部分llm模型的推理加速。
20+
21+
### 准备工作
22+
23+
#### 开发环境
24+
- Host
25+
- 在线构图模式:无要求。
26+
- 离线构图模式:一台x86_64,Linux的机器(链路中的部分QNN工具必须在此环境中运行)。
27+
- Device
28+
- 一台可以使用高通NPU的设备;为便于陈述,下文假设这是一台Android系统的设备。
29+
30+
#### 明确硬件架构
31+
32+
QNN后端的部分使用步骤(如生成离线产物,确定QNN的NPU库依赖等)需要指定device的硬件架构对应的SOC ID以及HEXAGON ARCH。对于一些常见的硬件架构,我们列举如下供你参考:
33+
34+
| 硬件 | SOC ID | HEXAGON ARCH |
35+
| :------ | :----- | :----------- |
36+
| 8 Gen 1 | 36 | 69 |
37+
| 8 Gen 2 | 43 | 73 |
38+
| 8 Gen 3 | 57 | 75 |
39+
| 8 Elite | 69 | 79 |
40+
41+
对于其他的硬件架构,你可以参考高通官网的设备支持列表。
42+
43+
#### 获得QNN依赖
1544

16-
### 获得QNN依赖
17-
QNN后端依赖QNN SDK中的`/include/QNN``lib`,首先,我们需要获得相关依赖。
45+
MNN-QNN后端依赖QNN SDK中的`include/QNN``lib`,可通过以下步骤获取依赖:
1846
- [注册高通账号](https://myaccount.qualcomm.com/signup)
19-
- 访问Qualcomm AI Engine Direct SDK(即QNN SDK)[官网](https://www.qualcomm.com/developer/software/qualcomm-ai-engine-direct-sdk),下载SDK。
20-
- 参考以下指令,将下载的sdk中的`/include/QNN``lib`拷贝到MNN源码中的对应位置。
47+
- 访问Qualcomm AI Engine Direct SDK(即QNN SDK),下载SDK,并解压。比如`/home/xiaying/third/qnn/qairt/2.38.0.250901`
48+
- 修改`~/.bashrc` ,增加SDK路径到环境变量, 然后运行 `source ~/.bashrc` 或者重启终端。eg:
49+
2150
```
22-
QNN_SDK_ROOT="/YOUR/QNN/SDK/PATH" # modify this variable according to your environment
23-
MNN_ROOT="/YOUR/MNN/PATH" # modify this variable according to your environment
24-
INCLUDE_SRC="${QNN_SDK_ROOT}/include/QNN"
25-
LIB_SRC="${QNN_SDK_ROOT}/lib"
26-
INCLUDE_DEST="${MNN_ROOT}/source/backend/qnn/3rdParty/include"
27-
LIB_DEST="${MNN_ROOT}/source/backend/qnn/3rdParty/lib"
28-
mkdir "${MNN_ROOT}/source/backend/qnn/3rdParty"
29-
cp -r ${INCLUDE_SRC} ${INCLUDE_DEST}
30-
cp -r ${LIB_SRC} ${LIB_DEST}
51+
export QNN_SDK_ROOT=/home/xiaying/third/qnn/qairt/2.38.0.250901
52+
export QNN_ROOT=/home/xiaying/third/qnn/qairt/2.38.0.250901
53+
export HEXAGON_SDK_ROOT=/home/xiaying/third/qnn/qairt/2.38.0.250901
3154
```
3255

33-
### QNN后端编译
34-
- 编译 MNN 时打开编译宏`MNN_QNN`,即`-DMNN_QNN=ON`
35-
- 如果运行离线编译QNN模型(离线编译方法:使用MNN2QNNModel工具),需要开启`MNN_WITH_PLUGIN`宏。若需要减小库体积,可以选择关闭`MNN_QNN_ONLINE_FINALIZE`
56+
### 在线构图模式,推理常规模型
57+
在线构图模式的使用步骤与其他后端基本一致,主要包含以下三部分。
58+
59+
#### Host,交叉编译Device侧的MNN库及AI应用程序
60+
- 参考[“主库编译”](../compile/engine.md#主库编译),配置Android系统的编译环境及CMake变量。
61+
- 添加额外的CMake变量并编译:`-DMNN_QNN=ON``-DMNN_QNN_CONVERT_MODE=OFF``-DMNN_WITH_PLUGIN=OFF`
3662

63+
#### 推送资源至Device
3764

38-
### QNN后端运行
39-
- Backend Type设置为`MNN_FORWARD_NN`,即 5 。
40-
- 除MNN相关的库之外,QNN后端在运行时还依赖四个QNN库,可参考以下指令,将QNN中的库拷贝到设备中。其中变量`HEXAGON_ARCH`需要与你的目标机型匹配,可参考[高通官网的设备支持列表](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/overview.html#supported-snapdragon-devices),如8gen3的设备,需要设定`HEXAGON_ARCH="75"`
65+
参考下面的指令,将以下资源推送到Device侧
66+
- AI应用程序。
67+
- 交叉编译得到的Device侧的MNN库。
68+
- QNN库(`libQnnHtp.so``libQnnHtpV${HEXAGON_ARCH}Stub.so``libQnnHtpV${HEXAGON_ARCH}Skel.so``libQnnHtpPrepare.so`)。
69+
- MNN模型。
4170
```
4271
HEXAGON_ARCH="75" # modify this variable according to your environment
43-
MNN_ROOT="/YOUR/MNN/PATH" # modify this variable according to your environment
44-
ANDROID_PATH="/data/local/tmp"
45-
adb push ${MNN_ROOT}/source/backend/qnn/3rdParty/lib/aarch64-android/libQnnHtp.so ${ANDROID_PATH}/libQnnHtp.so
46-
47-
/*
48-
如下libQnnHtpPrepare.so和libQnnSystem.so两个库,根据情况二选一
49-
- 如果在线生成qnn图模型,运行时需要libQnnHtpPrepare.so
50-
- 如果离线生成qnn图模型,运行时需要libQnnSystem.so
51-
*/
52-
adb push ${MNN_ROOT}/source/backend/qnn/3rdParty/lib/aarch64-android/libQnnHtpPrepare.so ${ANDROID_PATH}/libQnnHtpPrepare.so
53-
adb push ${MNN_ROOT}/source/backend/qnn/3rdParty/lib/aarch64-android/libQnnSystem.so ${ANDROID_PATH}/libQnnSystem.so
54-
55-
adb push ${MNN_ROOT}/source/backend/qnn/3rdParty/lib/aarch64-android/libQnnHtpV${HEXAGON_ARCH}Stub.so ${ANDROID_PATH}/libQnnHtpV${HEXAGON_ARCH}Stub.so
56-
adb push ${MNN_ROOT}/source/backend/qnn/3rdParty/lib/hexagon-v${HEXAGON_ARCH}/unsigned/libQnnHtpV${HEXAGON_ARCH}Skel.so ${ANDROID_PATH}/libQnnHtpV${HEXAGON_ARCH}Skel.so
72+
MNN_ROOT_PATH="/YOUR/MNN/ROOT/PATH" # modify this variable according to your environment
73+
BUILD_ANDROID_PATH="/your/build/andorid/path" # modify this variable according to your environment
74+
ANDROID_WORKING_DIR="/data/local/tmp" # modify this variable according to your environment
75+
76+
# push mnn libs
77+
cd ${BUILD_ANDROID_PATH}
78+
find . -name "*.so" | while read solib; do
79+
adb push $solib ${ANDROID_WORKING_DIR}
80+
done
81+
cd -
82+
83+
# push your AI exe
84+
adb push /your/AI/exe ${ANDROID_WORKING_DIR}
85+
86+
# push QNN libs
87+
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtp.so ${ANDROID_WORKING_DIR}
88+
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpV${HEXAGON_ARCH}Stub.so ${ANDROID_WORKING_DIR}
89+
adb push ${QNN_SDK_ROOT}/lib/hexagon-v${HEXAGON_ARCH}/unsigned/libQnnHtpV${HEXAGON_ARCH}Skel.so ${ANDROID_WORKING_DIR}
90+
# The following lib is only needed in the online case.
91+
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpPrepare.so ${ANDROID_WORKING_DIR}
92+
93+
# push MNN models
94+
adb push model.mnn ${ANDROID_WORKING_DIR}
5795
```
58-
- 为了动态链接到QNN HTP相关的库,需要在环境变量`ADSP_LIBRARY_PATH`中添加QNN HTP库所在的目录(部分机型上有效)。如果这样也没法成功链接,可将可执行文件push到QNN HTP库所在目录(如`/data/local/tmp`),cd到对应目录后,再运行可执行文件,参考如下指令。
96+
97+
#### Device,链接并运行
98+
- 链接QNN库
99+
- 为了动态链接到QNN HTP相关的库,需要在环境变量`ADSP_LIBRARY_PATH`中添加QNN HTP库所在的目录(部分机型上有效)。如果这样也没法成功链接,可将可执行文件,QNN HTP库推送至同一目录,cd到对应目录后,再运行可执行文件,参考如下指令。
59100
```
60-
adb shell "cd /data/local/tmp && LD_LIBRARY_PATH=/data/local/tmp ADSP_LIBRARY_PATH=/data/local/tmp ./MyExe.out"
101+
adb shell "cd ${ANDROID_WORKING_DIR} && export LD_LIBRARY_PATH=.:${ANDROID_LD_LIBRARY_PATH} && export ADSP_LIBRARY_PATH=.:${ANDROID_ADSP_LIBRARY_PATH} && ./your/mnn/qnn/ai/exe"
61102
```
103+
- 配置MNN
104+
- Backend Type设置为`MNN_FORWARD_NN`,即5。
105+
- 在使用Module API推理时,需要设定`Module::Config`中的`shapeMutable`字段为`false`
106+
107+
### 离线构图模式,推理常规模型
108+
相较于在线构图模式,离线构图模式额外包含一次编译(构建生成离线产物需要的MNN库)以及一个模型转换步骤(将原始的MNN模型转化成QNN产物),具体如下。
109+
110+
#### Host,编译生成离线模式产物需要的的MNN库及相应MNN离线工具
111+
- 添加额外的CMake变量并编译:`-DMNN_QNN=ON``-DMNN_QNN_CONVERT_MODE=ON``-DMNN_WITH_PLUGIN=OFF``-DMNN_BUILD_TOOLS=ON`
112+
113+
#### Host,生成QNN离线构图产物
114+
调用`MNN2QNNModel`工具,针对Device的硬件架构,生成QNN离线产物(`model_${SOC_ID}_${HEXAGON_ARCH}.bin`)以及替代模型(`model_${SOC_ID}_${HEXAGON_ARCH}.mnn`),具体可参考[该工具的用法](../tools/convert.md#mnn2qnnmodel)
115+
116+
#### Host,交叉编译Device侧的MNN库及AI应用程序
117+
- 参考[“主库编译”](../compile/engine.md#主库编译),配置Android系统的编译环境及CMake变量。
118+
- 添加额外的CMake变量并编译:`-DMNN_QNN=ON``-DMNN_QNN_CONVERT_MODE=OFF``-DMNN_WITH_PLUGIN=ON`
119+
120+
#### 推送资源至Device
121+
[在线构图模式的情况](#推送资源至device)类似,但有以下两点不同:
122+
- 依赖的QNN库变为`libQnnHtp.so``libQnnHtpV${HEXAGON_ARCH}Stub.so``libQnnHtpV${HEXAGON_ARCH}Skel.so``libQnnSystem.so`(不再依赖`libQnnHtpPrepare.so`,而是依赖`libQnnSystem.so`)。
123+
- 不再使用原始的MNN模型,而是需要QNN离线产物(`model_${SOC_ID}_${HEXAGON_ARCH}.bin`)以及替代模型(`model_${SOC_ID}_${HEXAGON_ARCH}.mnn`)。
124+
125+
#### Device,链接并运行
126+
- 配置MNN
127+
- 指定backend type为0(CPU)。读取并推理QNN离线产物的功能被封装在Plugin算子内,该算子被注册在CPU后端,因此,此时需要指定backend type为CPU。
128+
- 在Device侧,如果你的离线产物和你的应用的工作目录不一致,那么你需要在程序中通过`Executor::RuntimeManager::setExternalPath`接口设定离线产物所在的目录。
129+
- 链接QNN库
130+
- 离线构图模式对于链接的要求和在线构图模式一致。
62131

63-
### QNN量化功能说明
64-
- 仅权重量化(激活是浮点):只支持Linear权重int8、channel-wise的对称量化。
65-
- 激活&权重都量化:支持激活per-tensor对称量化,权重是int8/int4、channel-wise的对称量化。
66132

67133
## CoreML
68134
适用于 Mac / iOS / iPad

0 commit comments

Comments
 (0)