Merge pull request #127 from HamletTanyavong/dev

Add support for complex numbers in methods that use OpenCL
HamletTanyavong · Aug 22, 2024 · 816c99a · 816c99a
2 parents d9aac30 + 71b74b0
commit 816c99a
Show file tree

Hide file tree

Showing 9 changed files with 206 additions and 29 deletions.
diff --git a/.editorconfig b/.editorconfig
@@ -20,8 +20,8 @@ insert_final_newline = true
 # Other
 trim_trailing_whitespace = true
 
-### OpenCL Files ###
-[*.cl]
+### C and OpenCL Files ###
+[*.{c,cl}]
 
 # Indentation and spacing
 indent_size = 4

diff --git a/src/Mathematics.NET/Core/Rational.cs b/src/Mathematics.NET/Core/Rational.cs
@@ -581,13 +581,9 @@ private static T GCD(T p, T q)
         while (p != T.Zero && q != T.Zero)
         {
             if (p > q)
-            {
                 p %= q;
-            }
             else
-            {
                 q %= p;
-            }
         }
         return p | q;
     }
@@ -616,13 +612,9 @@ private static T LCM(T p, T q)
         while (p != T.Zero && q != T.Zero)
         {
             if (p > q)
-            {
                 p %= q;
-            }
             else
-            {
                 q %= p;
-            }
         }
         return holdP / (p | q) * holdQ;
     }

diff --git a/src/Mathematics.NET/GPU/OpenCL/IComputeService.cs b/src/Mathematics.NET/GPU/OpenCL/IComputeService.cs
@@ -43,9 +43,19 @@ public interface IComputeService : IDisposable
     KernelWorkGroupInformation GetKernelWorkGroupInfo(Device device, Kernel kernel);
 
     //
-    // Methods.
+    // Methods
     //
 
+    /// <summary>Multiply a vector by a scalar.</summary>
+    /// <remarks>Padded vectors should have zeros added to their ends.</remarks>
+    /// <param name="device">The device to use.</param>
+    /// <param name="globalWorkSize">The global work size.</param>
+    /// <param name="localWorkSize">The local work size.</param>
+    /// <param name="vector">A vector.</param>
+    /// <param name="scalar">A scalar.</param>
+    /// <returns>A new vector.</returns>
+    ReadOnlySpan<Complex> CompVecMulScalar(Device device, nuint globalWorkSize, nuint localWorkSize, ReadOnlySpan<Complex> vector, Complex scalar);
+
     /// <summary>Multipy two matrices.</summary>
     /// <remarks>Padded matrices should have zeros added to their right and bottom.</remarks>
     /// <param name="device">The device to use.</param>
@@ -54,15 +64,11 @@ public interface IComputeService : IDisposable
     /// <param name="left">The left matrix.</param>
     /// <param name="right">The right matrix.</param>
     /// <returns>A new matrix.</returns>
+    ReadOnlySpan2D<Complex> CompMatMul(Device device, WorkSize2D globalWorkSize, WorkSize2D localWorkSize, ReadOnlySpan2D<Complex> left, ReadOnlySpan2D<Complex> right);
+
+    /// <inheritdoc cref="ComplexMatMul(Device, WorkSize2D, WorkSize2D, ReadOnlySpan2D{Complex}, ReadOnlySpan2D{Complex})"/>
     ReadOnlySpan2D<Real> MatMul(Device device, WorkSize2D globalWorkSize, WorkSize2D localWorkSize, ReadOnlySpan2D<Real> left, ReadOnlySpan2D<Real> right);
 
-    /// <summary>Multiply a vector by a scalar.</summary>
-    /// <remarks>Padded vectors should have zeros added to their ends.</remarks>
-    /// <param name="device">The device to use.</param>
-    /// <param name="globalWorkSize">The global work size.</param>
-    /// <param name="localWorkSize">The local work size.</param>
-    /// <param name="vector">A vector.</param>
-    /// <param name="scalar">A scalar.</param>
-    /// <returns>A new vector.</returns>
+    /// <inheritdoc cref="CompVecMulScalar(Device, nuint, nuint, ReadOnlySpan{Complex}, Complex)"/>
     ReadOnlySpan<Real> VecMulScalar(Device device, nuint globalWorkSize, nuint localWorkSize, ReadOnlySpan<Real> vector, Real scalar);
 }
diff --git a/src/Mathematics.NET/GPU/OpenCL/Kernels/comp_mat_mul.cl b/src/Mathematics.NET/GPU/OpenCL/Kernels/comp_mat_mul.cl
@@ -0,0 +1,25 @@
+__kernel void comp_mat_mul(__global const complex* matA,
+    __global const complex* matB,
+    int const k,
+    __global complex* result)
+{
+    int row = get_global_id(0);
+    int col = get_global_id(1);
+    int width = get_local_size(1) * get_num_groups(1);
+
+    int aIndex = row * k;
+    int bIndex = col;
+
+    complex sum;
+    sum.re = 0;
+    sum.im = 0;
+
+    for (int i = 0; i < k; i++)
+    {
+        sum = comp_add(sum, comp_mul(matA[aIndex], matB[bIndex]));
+        aIndex++;
+        bIndex += width;
+    }
+
+    result[row * width + col] = sum;
+}
diff --git a/src/Mathematics.NET/GPU/OpenCL/Kernels/comp_vec_mul_scalar.cl b/src/Mathematics.NET/GPU/OpenCL/Kernels/comp_vec_mul_scalar.cl
@@ -0,0 +1,7 @@
+__kernel void comp_vec_mul_scalar(__global const complex* vector,
+                                     const complex scalar,
+                                     __global complex* result)
+{
+    int i = get_global_id(0);
+    result[i] = comp_mul(vector[i], scalar);
+}
diff --git a/src/Mathematics.NET/GPU/OpenCL/Kernels/complex.c b/src/Mathematics.NET/GPU/OpenCL/Kernels/complex.c
@@ -0,0 +1,44 @@
+typedef struct __attribute__((packed)) {
+    double re;
+    double im;
+} complex;
+
+inline complex comp_add(const complex z, const complex w) {
+    complex result;
+    result.re = z.re + w.re;
+    result.im = z.im + w.im;
+    return result;
+}
+
+inline complex comp_div(const complex z, const complex w) {
+    double a = z.re;
+    double b = z.im;
+    double c = w.re;
+    double d = w.im;
+
+    complex result;
+    if (fabs(d) < fabs(c)) {
+        double u = d / c;
+        result.re = (a + b * u) / (c + d * u);
+        result.im = (b - a * u) / (c + d * u);
+    } else {
+        double u = c / d;
+        result.re = (b + a * u) / (d + c * u);
+        result.im = (b * u - a) / (d + c * u);
+    }
+    return result;
+}
+
+inline complex comp_mul(const complex z, const complex w) {
+    complex result;
+    result.re = z.re * w.re - z.im * w.im;
+    result.im = z.re * w.im + w.re * z.im;
+    return result;
+}
+
+inline complex comp_sub(const complex z, const complex w) {
+    complex result;
+    result.re = z.re - w.re;
+    result.im = z.im - w.im;
+    return result;
+}
diff --git a/src/Mathematics.NET/GPU/OpenCL/OpenCLService.cs b/src/Mathematics.NET/GPU/OpenCL/OpenCLService.cs
@@ -146,9 +146,108 @@ public void Dispose()
 
     public KernelWorkGroupInformation GetKernelWorkGroupInfo(Device device, Kernel kernel) => new(_logger, _cl, device, kernel);
 
-    //
-    // Interface implementations.
-    //
+    #region Interface Implementations
+
+    // TODO: Consider putting this in another file.
+
+    public unsafe ReadOnlySpan<Complex> CompVecMulScalar(Device device, nuint globalWorkSize, nuint localWorkSize, ReadOnlySpan<Complex> vector, Complex scalar)
+    {
+        var length = vector.Length;
+        var result = new Complex[length];
+
+        fixed (void* pVector = vector)
+        {
+            // Create buffers.
+            nint vectorBuffer = _cl.CreateBuffer(_context.Handle, MemFlags.UseHostPtr | MemFlags.ReadOnly | MemFlags.HostNoAccess, (nuint)(sizeof(Complex) * length), pVector, null);
+
+            fixed (void* pResult = result)
+            {
+                nint resultBuffer = _cl.CreateBuffer(_context.Handle, MemFlags.UseHostPtr | MemFlags.WriteOnly | MemFlags.HostReadOnly, (nuint)(sizeof(Complex) * length), pResult, null);
+
+                // Set kernel arguments.
+                var kernel = _program.Kernels["comp_vec_mul_scalar"].Handle;
+                var error = _cl.SetKernelArg(kernel, 0, (nuint)sizeof(nint), &vectorBuffer);
+                error |= _cl.SetKernelArg(kernel, 1, (nuint)sizeof(Complex), &scalar);
+                error |= _cl.SetKernelArg(kernel, 2, (nuint)sizeof(nint), &resultBuffer);
+#if DEBUG
+                if (error != (int)ErrorCodes.Success)
+                    _logger.LogDebug(s_setKernelArgError, _program.Kernels["comp_vec_mul_scalar"].Name);
+#endif
+
+                // Enqueue NDRange kernel.
+                using var commandQueue = _context.CreateCommandQueue(device, CommandQueueProperties.None);
+                error = _cl.EnqueueNdrangeKernel(commandQueue.Handle, kernel, 1, null, &globalWorkSize, &localWorkSize, 0, null, null);
+#if DEBUG
+                if (error != (int)ErrorCodes.Success)
+                    _logger.LogDebug(s_enqueueNDRangeKernelError);
+#endif
+                _cl.Finish(commandQueue.Handle);
+
+                // Enqueue read buffer.
+                _cl.EnqueueReadBuffer(commandQueue.Handle, resultBuffer, true, 0, (nuint)(sizeof(Complex) * length), pResult, 0, null, null);
+
+                // Release mem objects.
+                _cl.ReleaseMemObject(resultBuffer);
+            }
+            _cl.ReleaseMemObject(vectorBuffer);
+        }
+
+        return result;
+    }
+
+    public unsafe ReadOnlySpan2D<Complex> CompMatMul(Device device, WorkSize2D globalWorkSize, WorkSize2D localWorkSize, ReadOnlySpan2D<Complex> left, ReadOnlySpan2D<Complex> right)
+    {
+        var k = left.Width;
+        if (k != right.Height)
+            throw new Exception("Cannot multiply two matrices with incompatible dimensions.");
+        Span2D<Complex> result = new Complex[left.Height, right.Width];
+
+        fixed (void* pLeft = left)
+        {
+            // Create buffers.
+            nint leftBuffer = _cl.CreateBuffer(_context.Handle, MemFlags.UseHostPtr | MemFlags.ReadOnly | MemFlags.HostNoAccess, (nuint)(sizeof(Complex) * left.Length), pLeft, null);
+
+            fixed (void* pRight = right)
+            {
+                nint rightBuffer = _cl.CreateBuffer(_context.Handle, MemFlags.UseHostPtr | MemFlags.ReadOnly | MemFlags.HostNoAccess, (nuint)(sizeof(Complex) * right.Length), pRight, null);
+
+                fixed (void* pResult = result)
+                {
+                    nint resultBuffer = _cl.CreateBuffer(_context.Handle, MemFlags.UseHostPtr | MemFlags.WriteOnly | MemFlags.HostReadOnly, (nuint)(sizeof(Complex) * result.Length), pResult, null);
+
+                    // Set kernel arguments.
+                    var kernel = _program.Kernels["comp_mat_mul"].Handle;
+                    var error = _cl.SetKernelArg(kernel, 0, (nuint)sizeof(nint), &leftBuffer);
+                    error |= _cl.SetKernelArg(kernel, 1, (nuint)sizeof(nint), &rightBuffer);
+                    error |= _cl.SetKernelArg(kernel, 2, sizeof(int), &k);
+                    error |= _cl.SetKernelArg(kernel, 3, (nuint)sizeof(nint), &resultBuffer);
+#if DEBUG
+                    if (error != (int)ErrorCodes.Success)
+                        _logger.LogDebug(s_setKernelArgError, _program.Kernels["comp_mat_mul"].Name);
+#endif
+
+                    // Enqueue NDRange kernel.
+                    using var commandQueue = _context.CreateCommandQueue(device, CommandQueueProperties.None);
+                    error = _cl.EnqueueNdrangeKernel(commandQueue.Handle, kernel, 2, null, (nuint*)&globalWorkSize, (nuint*)&localWorkSize, 0, null, null);
+#if DEBUG
+                    if (error != (int)ErrorCodes.Success)
+                        _logger.LogDebug(s_enqueueNDRangeKernelError);
+#endif
+                    _cl.Finish(commandQueue.Handle);
+
+                    // Enqueue read buffer.
+                    _cl.EnqueueReadBuffer(commandQueue.Handle, resultBuffer, true, 0, (nuint)(sizeof(Complex) * result.Length), pResult, 0, null, null);
+
+                    // Release mem objects.
+                    _cl.ReleaseMemObject(resultBuffer);
+                }
+                _cl.ReleaseMemObject(rightBuffer);
+            }
+            _cl.ReleaseMemObject(leftBuffer);
+        }
+
+        return result;
+    }
 
     public unsafe ReadOnlySpan2D<Real> MatMul(Device device, WorkSize2D globalWorkSize, WorkSize2D localWorkSize, ReadOnlySpan2D<Real> left, ReadOnlySpan2D<Real> right)
     {
@@ -248,4 +347,6 @@ public unsafe ReadOnlySpan<Real> VecMulScalar(Device device, nuint globalWorkSiz
 
         return result;
     }
+
+    #endregion
 }
diff --git a/src/Mathematics.NET/GPU/OpenCL/Program.cs b/src/Mathematics.NET/GPU/OpenCL/Program.cs
@@ -25,8 +25,6 @@
 // SOFTWARE.
 // </copyright>
 
-// TODO: Create a source generator that finds kernels in the Kernels folder and adds them automatically.
-
 #pragma warning disable IDE0058
 
 using System.Reflection;
@@ -48,7 +46,7 @@ public unsafe Program(ILogger<OpenCLService> logger, CL cl, Context context, Rea
         _logger = logger;
 
         var assembly = Assembly.GetExecutingAssembly();
-        var resourceNames = assembly.GetManifestResourceNames().Where(x => x.EndsWith(".cl")).ToArray();
+        var resourceNames = assembly.GetManifestResourceNames().Where(x => x.EndsWith(".cl") || x.EndsWith(".c")).ToArray();
 
         var kernels = new string[resourceNames.Length];
         var kernelLengths = new nuint[resourceNames.Length];
@@ -91,7 +89,8 @@ public unsafe Program(ILogger<OpenCLService> logger, CL cl, Context context, Rea
             //                                    ^    ^
             // Index:                             35   ^3
 
-            CreateKernel(resource[35..^3]);
+            if (resource.EndsWith(".cl"))
+                CreateKernel(resource[35..^3]);
         }
     }
 

diff --git a/src/Mathematics.NET/Mathematics.NET.csproj b/src/Mathematics.NET/Mathematics.NET.csproj
@@ -17,15 +17,18 @@
   </PropertyGroup>
 
   <ItemGroup>
+    <None Remove="GPU\OpenCL\Kernels\complex.c" />
+    <None Remove="GPU\OpenCL\Kernels\comp_mat_mul.cl" />
+    <None Remove="GPU\OpenCL\Kernels\comp_vec_mul_scalar.cl" />
     <None Remove="GPU\OpenCL\Kernels\mat_mul.cl" />
     <None Remove="GPU\OpenCL\Kernels\vec_mul_scalar.cl" />
   </ItemGroup>
 
   <ItemGroup>
+    <EmbeddedResource Include="GPU\OpenCL\Kernels\complex.c" />
+    <EmbeddedResource Include="GPU\OpenCL\Kernels\comp_mat_mul.cl" />
+    <EmbeddedResource Include="GPU\OpenCL\Kernels\comp_vec_mul_scalar.cl" />
     <EmbeddedResource Include="GPU\OpenCL\Kernels\mat_mul.cl" />
-  </ItemGroup>
-
-  <ItemGroup>
     <EmbeddedResource Include="GPU\OpenCL\Kernels\vec_mul_scalar.cl" />
   </ItemGroup>