microsoft · nmetulev · Dec 13, 2024 · Nov 26, 2024 · Nov 26, 2024 · Nov 26, 2024
diff --git a/AIDevGallery.SourceGenerator/Models/HardwareAccelerator.cs b/AIDevGallery.SourceGenerator/Models/HardwareAccelerator.cs
@@ -9,6 +9,7 @@ namespace AIDevGallery.SourceGenerator.Models
     internal enum HardwareAccelerator
     {
         CPU,
-        DML
+        DML,
+        QNN
     }
 }
diff --git a/AIDevGallery/AIDevGallery.csproj b/AIDevGallery/AIDevGallery.csproj
@@ -57,7 +57,6 @@
     <PackageReference Include="CommunityToolkit.WinUI.Media" />
     <PackageReference Include="CommunityToolkit.WinUI.UI.Controls.Markdown" />
     <PackageReference Include="MathNet.Numerics" />
-    <PackageReference Include="Microsoft.AI.DirectML" />
     <PackageReference Include="Microsoft.Build" />
     <PackageReference Include="Microsoft.Extensions.AI.Abstractions" />
     <PackageReference Include="Microsoft.ML.OnnxRuntime.Extensions" />
@@ -83,7 +82,19 @@
     <Manifest Include="$(ApplicationManifest)" />
   </ItemGroup>
 
-  <ItemGroup>
+  <ItemGroup Condition="$(Platform) == 'ARM64'">
+    <PackageReference Include="Microsoft.ML.OnnxRuntime.Qnn" />
+    <PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI" GeneratePathProperty="true" ExcludeAssets="all" />
+    <None Include="$(PKGMicrosoft_ML_OnnxRuntimeGenAI)\runtimes\win-arm64\native\onnxruntime-genai.dll">
+	    <Link>onnxruntime-genai.dll</Link>
+	    <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+	    <Visible>false</Visible>
+    </None>
+    <PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.Managed" />
+  </ItemGroup>
+
+  <ItemGroup Condition="$(Platform) == 'x64'">
+    <PackageReference Include="Microsoft.AI.DirectML" />
     <PackageReference Include="Microsoft.ML.OnnxRuntime.DirectML" />
     <PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.DirectML" />
   </ItemGroup>

diff --git a/AIDevGallery/Models/ModelCompatibility.cs b/AIDevGallery/Models/ModelCompatibility.cs
@@ -23,7 +23,8 @@ public static ModelCompatibility GetModelCompatibility(ModelDetails modelDetails
 
         ModelCompatibilityState compatibility;
         if (modelDetails.HardwareAccelerators.Contains(HardwareAccelerator.CPU) ||
-            (modelDetails.HardwareAccelerators.Contains(HardwareAccelerator.DML) && modelDetails.SupportedOnQualcomm == true))
+            (modelDetails.HardwareAccelerators.Contains(HardwareAccelerator.DML) && modelDetails.SupportedOnQualcomm == true) ||
+            (modelDetails.HardwareAccelerators.Contains(HardwareAccelerator.QNN) && DeviceUtils.IsArm64()))
         {
             compatibility = ModelCompatibilityState.Compatible;
         }

diff --git a/AIDevGallery/Models/Samples.cs b/AIDevGallery/Models/Samples.cs
@@ -152,7 +152,8 @@ internal class Scenario
     internal enum HardwareAccelerator
     {
         CPU,
-        DML
+        DML,
+        QNN
     }
 }
 

diff --git a/AIDevGallery/ProjectGenerator/Template/HardwareAccelerator.cs b/AIDevGallery/ProjectGenerator/Template/HardwareAccelerator.cs
@@ -3,5 +3,6 @@ namespace $safeprojectname$.SharedCode;
 internal enum HardwareAccelerator
 {
     CPU,
-    DML
+    DML,
+    QNN
 }
diff --git a/AIDevGallery/Samples/ModelsDefinitions/embeddings.modelgroup.json b/AIDevGallery/Samples/ModelsDefinitions/embeddings.modelgroup.json
@@ -18,7 +18,8 @@
             "Description": "This is an embedding model executed on the CPU or GPU",
             "HardwareAccelerator": [
               "CPU",
-              "DML"
+              "DML",
+              "QNN"
             ],
             "SupportedOnQualcomm": true,
             "Size": 90636722,
@@ -44,7 +45,8 @@
             "Description": "This is an embedding model executed on the CPU or GPU",
             "HardwareAccelerator": [
               "CPU",
-              "DML"
+              "DML",
+              "QNN"
             ],
             "SupportedOnQualcomm": true,
             "Size": 133358075,

diff --git a/AIDevGallery/Samples/ModelsDefinitions/imagemodels.modelgroup.json b/AIDevGallery/Samples/ModelsDefinitions/imagemodels.modelgroup.json
@@ -132,7 +132,8 @@
             "Description": "HRNet Pose",
             "HardwareAccelerator": [
               "CPU",
-              "DML"
+              "DML",
+              "QNN"
             ],
             "SupportedOnQualcomm": true,
             "Size": 114232171,

diff --git a/AIDevGallery/Samples/Open Source Models/Image Models/HRNetPose/PoseDetection.xaml.cs b/AIDevGallery/Samples/Open Source Models/Image Models/HRNetPose/PoseDetection.xaml.cs
@@ -76,6 +76,14 @@ private Task InitModel(string modelPath, HardwareAccelerator hardwareAccelerator
             {
                 sessionOptions.AppendExecutionProvider_DML(DeviceUtils.GetBestDeviceId());
             }
+            else if (hardwareAccelerator == HardwareAccelerator.QNN)
+            {
+                Dictionary<string, string> options = new()
+                {
+                    { "backend_path", "QnnHtp.dll" }
+                };
+                sessionOptions.AppendExecutionProvider("QNN", options);
+            }
 
             _inferenceSession = new InferenceSession(modelPath, sessionOptions);
         });

diff --git a/AIDevGallery/Samples/SharedCode/EmbeddingGenerator.cs b/AIDevGallery/Samples/SharedCode/EmbeddingGenerator.cs
@@ -32,6 +32,7 @@ internal partial class EmbeddingGenerator : IDisposable, IEmbeddingGenerator<str
     private readonly SessionOptions _sessionOptions;
     private readonly InferenceSession _inferenceSession;
     private readonly BertTokenizer _tokenizer;
+    private readonly int _chunkSize = 128;
 
     public EmbeddingGeneratorMetadata Metadata { get; }
     public EmbeddingGenerator(string modelPath, HardwareAccelerator hardwareAccelerator)
@@ -44,6 +45,15 @@ public EmbeddingGenerator(string modelPath, HardwareAccelerator hardwareAccelera
         {
             _sessionOptions.AppendExecutionProvider_DML(DeviceUtils.GetBestDeviceId());
         }
+        else if (hardwareAccelerator == HardwareAccelerator.QNN)
+        {
+            Dictionary<string, string> options = new()
+            {
+                { "backend_path", "QnnHtp.dll" }
+            };
+            _sessionOptions.AppendExecutionProvider("QNN", options);
+            _chunkSize = 8;
+        }
 
         _inferenceSession = new InferenceSession(Path.Join(modelPath, "onnx", "model.onnx"), _sessionOptions);
         _tokenizer = BertTokenizer.Create(Path.Join(modelPath, "vocab.txt"));
@@ -103,9 +113,7 @@ public async IAsyncEnumerable<Embedding<float>> GenerateStreamingAsync(
         EmbeddingGenerationOptions? options = null,
         [EnumeratorCancellation] CancellationToken cancellationToken = default)
     {
-        int chunkSize = 128;
-
-        var chunks = values.Chunk(chunkSize);
+        var chunks = values.Chunk(_chunkSize);
 
         using var runOptions = new RunOptions();
 

diff --git a/AIDevGallery/Utils/AppUtils.cs b/AIDevGallery/Utils/AppUtils.cs
@@ -78,11 +78,10 @@ public static string GetHardwareAcceleratorString(HardwareAccelerator hardwareAc
     {
         switch (hardwareAccelerator)
         {
-            default:
-            case HardwareAccelerator.CPU:
-                return "CPU";
             case HardwareAccelerator.DML:
                 return "GPU";
+            default:
+                return hardwareAccelerator.ToString();
         }
     }
 

diff --git a/Directory.Packages.props b/Directory.Packages.props
@@ -18,8 +18,11 @@
     <PackageVersion Include="CommunityToolkit.WinUI.Converters" Version="8.1.240916" />
     <PackageVersion Include="CommunityToolkit.WinUI.UI.Controls.Markdown" Version="7.1.2" />
     <PackageVersion Include="Microsoft.Build" Version="17.11.4" />
-    <PackageVersion Include="Microsoft.ML.OnnxRuntime.DirectML" Version="1.20.0" />
+    <PackageVersion Include="Microsoft.ML.OnnxRuntime.DirectML" Version="1.20.1" />
+    <PackageVersion Include="Microsoft.ML.OnnxRuntime.Qnn" Version="1.20.1" />
     <PackageVersion Include="Microsoft.ML.OnnxRuntime.Extensions" Version="0.13.0" />
+    <PackageVersion Include="Microsoft.ML.OnnxRuntimeGenAI" Version="0.5.2" />
+    <PackageVersion Include="Microsoft.ML.OnnxRuntimeGenAI.Managed" Version="0.5.2" />
     <PackageVersion Include="Microsoft.ML.OnnxRuntimeGenAI.DirectML" Version="0.5.0" />
     <PackageVersion Include="Microsoft.WindowsAppSDK" Version="1.6.241106002" />
     <PackageVersion Include="CommunityToolkit.WinUI.Animations" Version="8.1.240916" />
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,6 +9,7 @@ namespace AIDevGallery.SourceGenerator.Models @@
         internal enum HardwareAccelerator
         {
             CPU,
-            DML
+            DML,
+            QNN
         }
     }