diff --git a/.editorconfig b/.editorconfig index 0df83d60..ab0618f3 100644 --- a/.editorconfig +++ b/.editorconfig @@ -20,8 +20,8 @@ insert_final_newline = true # Other trim_trailing_whitespace = true -### OpenCL Files ### -[*.cl] +### C and OpenCL Files ### +[*.{c,cl}] # Indentation and spacing indent_size = 4 diff --git a/src/Mathematics.NET/Core/Rational.cs b/src/Mathematics.NET/Core/Rational.cs index 504e40d9..202e2feb 100644 --- a/src/Mathematics.NET/Core/Rational.cs +++ b/src/Mathematics.NET/Core/Rational.cs @@ -581,13 +581,9 @@ private static T GCD(T p, T q) while (p != T.Zero && q != T.Zero) { if (p > q) - { p %= q; - } else - { q %= p; - } } return p | q; } @@ -616,13 +612,9 @@ private static T LCM(T p, T q) while (p != T.Zero && q != T.Zero) { if (p > q) - { p %= q; - } else - { q %= p; - } } return holdP / (p | q) * holdQ; } diff --git a/src/Mathematics.NET/GPU/OpenCL/IComputeService.cs b/src/Mathematics.NET/GPU/OpenCL/IComputeService.cs index 656522ff..5acea77b 100644 --- a/src/Mathematics.NET/GPU/OpenCL/IComputeService.cs +++ b/src/Mathematics.NET/GPU/OpenCL/IComputeService.cs @@ -43,9 +43,19 @@ public interface IComputeService : IDisposable KernelWorkGroupInformation GetKernelWorkGroupInfo(Device device, Kernel kernel); // - // Methods. + // Methods // + /// Multiply a vector by a scalar. + /// Padded vectors should have zeros added to their ends. + /// The device to use. + /// The global work size. + /// The local work size. + /// A vector. + /// A scalar. + /// A new vector. + ReadOnlySpan CompVecMulScalar(Device device, nuint globalWorkSize, nuint localWorkSize, ReadOnlySpan vector, Complex scalar); + /// Multipy two matrices. /// Padded matrices should have zeros added to their right and bottom. /// The device to use. @@ -54,15 +64,11 @@ public interface IComputeService : IDisposable /// The left matrix. /// The right matrix. /// A new matrix. + ReadOnlySpan2D CompMatMul(Device device, WorkSize2D globalWorkSize, WorkSize2D localWorkSize, ReadOnlySpan2D left, ReadOnlySpan2D right); + + /// ReadOnlySpan2D MatMul(Device device, WorkSize2D globalWorkSize, WorkSize2D localWorkSize, ReadOnlySpan2D left, ReadOnlySpan2D right); - /// Multiply a vector by a scalar. - /// Padded vectors should have zeros added to their ends. - /// The device to use. - /// The global work size. - /// The local work size. - /// A vector. - /// A scalar. - /// A new vector. + /// ReadOnlySpan VecMulScalar(Device device, nuint globalWorkSize, nuint localWorkSize, ReadOnlySpan vector, Real scalar); } diff --git a/src/Mathematics.NET/GPU/OpenCL/Kernels/comp_mat_mul.cl b/src/Mathematics.NET/GPU/OpenCL/Kernels/comp_mat_mul.cl new file mode 100644 index 00000000..6ec6580d --- /dev/null +++ b/src/Mathematics.NET/GPU/OpenCL/Kernels/comp_mat_mul.cl @@ -0,0 +1,25 @@ +__kernel void comp_mat_mul(__global const complex* matA, + __global const complex* matB, + int const k, + __global complex* result) +{ + int row = get_global_id(0); + int col = get_global_id(1); + int width = get_local_size(1) * get_num_groups(1); + + int aIndex = row * k; + int bIndex = col; + + complex sum; + sum.re = 0; + sum.im = 0; + + for (int i = 0; i < k; i++) + { + sum = comp_add(sum, comp_mul(matA[aIndex], matB[bIndex])); + aIndex++; + bIndex += width; + } + + result[row * width + col] = sum; +} \ No newline at end of file diff --git a/src/Mathematics.NET/GPU/OpenCL/Kernels/comp_vec_mul_scalar.cl b/src/Mathematics.NET/GPU/OpenCL/Kernels/comp_vec_mul_scalar.cl new file mode 100644 index 00000000..76a5cf2d --- /dev/null +++ b/src/Mathematics.NET/GPU/OpenCL/Kernels/comp_vec_mul_scalar.cl @@ -0,0 +1,7 @@ +__kernel void comp_vec_mul_scalar(__global const complex* vector, + const complex scalar, + __global complex* result) +{ + int i = get_global_id(0); + result[i] = comp_mul(vector[i], scalar); +} diff --git a/src/Mathematics.NET/GPU/OpenCL/Kernels/complex.c b/src/Mathematics.NET/GPU/OpenCL/Kernels/complex.c new file mode 100644 index 00000000..a9e4c755 --- /dev/null +++ b/src/Mathematics.NET/GPU/OpenCL/Kernels/complex.c @@ -0,0 +1,44 @@ +typedef struct __attribute__((packed)) { + double re; + double im; +} complex; + +inline complex comp_add(const complex z, const complex w) { + complex result; + result.re = z.re + w.re; + result.im = z.im + w.im; + return result; +} + +inline complex comp_div(const complex z, const complex w) { + double a = z.re; + double b = z.im; + double c = w.re; + double d = w.im; + + complex result; + if (fabs(d) < fabs(c)) { + double u = d / c; + result.re = (a + b * u) / (c + d * u); + result.im = (b - a * u) / (c + d * u); + } else { + double u = c / d; + result.re = (b + a * u) / (d + c * u); + result.im = (b * u - a) / (d + c * u); + } + return result; +} + +inline complex comp_mul(const complex z, const complex w) { + complex result; + result.re = z.re * w.re - z.im * w.im; + result.im = z.re * w.im + w.re * z.im; + return result; +} + +inline complex comp_sub(const complex z, const complex w) { + complex result; + result.re = z.re - w.re; + result.im = z.im - w.im; + return result; +} diff --git a/src/Mathematics.NET/GPU/OpenCL/OpenCLService.cs b/src/Mathematics.NET/GPU/OpenCL/OpenCLService.cs index cb2287d8..f4e3c4fe 100644 --- a/src/Mathematics.NET/GPU/OpenCL/OpenCLService.cs +++ b/src/Mathematics.NET/GPU/OpenCL/OpenCLService.cs @@ -146,9 +146,108 @@ public void Dispose() public KernelWorkGroupInformation GetKernelWorkGroupInfo(Device device, Kernel kernel) => new(_logger, _cl, device, kernel); - // - // Interface implementations. - // + #region Interface Implementations + + // TODO: Consider putting this in another file. + + public unsafe ReadOnlySpan CompVecMulScalar(Device device, nuint globalWorkSize, nuint localWorkSize, ReadOnlySpan vector, Complex scalar) + { + var length = vector.Length; + var result = new Complex[length]; + + fixed (void* pVector = vector) + { + // Create buffers. + nint vectorBuffer = _cl.CreateBuffer(_context.Handle, MemFlags.UseHostPtr | MemFlags.ReadOnly | MemFlags.HostNoAccess, (nuint)(sizeof(Complex) * length), pVector, null); + + fixed (void* pResult = result) + { + nint resultBuffer = _cl.CreateBuffer(_context.Handle, MemFlags.UseHostPtr | MemFlags.WriteOnly | MemFlags.HostReadOnly, (nuint)(sizeof(Complex) * length), pResult, null); + + // Set kernel arguments. + var kernel = _program.Kernels["comp_vec_mul_scalar"].Handle; + var error = _cl.SetKernelArg(kernel, 0, (nuint)sizeof(nint), &vectorBuffer); + error |= _cl.SetKernelArg(kernel, 1, (nuint)sizeof(Complex), &scalar); + error |= _cl.SetKernelArg(kernel, 2, (nuint)sizeof(nint), &resultBuffer); +#if DEBUG + if (error != (int)ErrorCodes.Success) + _logger.LogDebug(s_setKernelArgError, _program.Kernels["comp_vec_mul_scalar"].Name); +#endif + + // Enqueue NDRange kernel. + using var commandQueue = _context.CreateCommandQueue(device, CommandQueueProperties.None); + error = _cl.EnqueueNdrangeKernel(commandQueue.Handle, kernel, 1, null, &globalWorkSize, &localWorkSize, 0, null, null); +#if DEBUG + if (error != (int)ErrorCodes.Success) + _logger.LogDebug(s_enqueueNDRangeKernelError); +#endif + _cl.Finish(commandQueue.Handle); + + // Enqueue read buffer. + _cl.EnqueueReadBuffer(commandQueue.Handle, resultBuffer, true, 0, (nuint)(sizeof(Complex) * length), pResult, 0, null, null); + + // Release mem objects. + _cl.ReleaseMemObject(resultBuffer); + } + _cl.ReleaseMemObject(vectorBuffer); + } + + return result; + } + + public unsafe ReadOnlySpan2D CompMatMul(Device device, WorkSize2D globalWorkSize, WorkSize2D localWorkSize, ReadOnlySpan2D left, ReadOnlySpan2D right) + { + var k = left.Width; + if (k != right.Height) + throw new Exception("Cannot multiply two matrices with incompatible dimensions."); + Span2D result = new Complex[left.Height, right.Width]; + + fixed (void* pLeft = left) + { + // Create buffers. + nint leftBuffer = _cl.CreateBuffer(_context.Handle, MemFlags.UseHostPtr | MemFlags.ReadOnly | MemFlags.HostNoAccess, (nuint)(sizeof(Complex) * left.Length), pLeft, null); + + fixed (void* pRight = right) + { + nint rightBuffer = _cl.CreateBuffer(_context.Handle, MemFlags.UseHostPtr | MemFlags.ReadOnly | MemFlags.HostNoAccess, (nuint)(sizeof(Complex) * right.Length), pRight, null); + + fixed (void* pResult = result) + { + nint resultBuffer = _cl.CreateBuffer(_context.Handle, MemFlags.UseHostPtr | MemFlags.WriteOnly | MemFlags.HostReadOnly, (nuint)(sizeof(Complex) * result.Length), pResult, null); + + // Set kernel arguments. + var kernel = _program.Kernels["comp_mat_mul"].Handle; + var error = _cl.SetKernelArg(kernel, 0, (nuint)sizeof(nint), &leftBuffer); + error |= _cl.SetKernelArg(kernel, 1, (nuint)sizeof(nint), &rightBuffer); + error |= _cl.SetKernelArg(kernel, 2, sizeof(int), &k); + error |= _cl.SetKernelArg(kernel, 3, (nuint)sizeof(nint), &resultBuffer); +#if DEBUG + if (error != (int)ErrorCodes.Success) + _logger.LogDebug(s_setKernelArgError, _program.Kernels["comp_mat_mul"].Name); +#endif + + // Enqueue NDRange kernel. + using var commandQueue = _context.CreateCommandQueue(device, CommandQueueProperties.None); + error = _cl.EnqueueNdrangeKernel(commandQueue.Handle, kernel, 2, null, (nuint*)&globalWorkSize, (nuint*)&localWorkSize, 0, null, null); +#if DEBUG + if (error != (int)ErrorCodes.Success) + _logger.LogDebug(s_enqueueNDRangeKernelError); +#endif + _cl.Finish(commandQueue.Handle); + + // Enqueue read buffer. + _cl.EnqueueReadBuffer(commandQueue.Handle, resultBuffer, true, 0, (nuint)(sizeof(Complex) * result.Length), pResult, 0, null, null); + + // Release mem objects. + _cl.ReleaseMemObject(resultBuffer); + } + _cl.ReleaseMemObject(rightBuffer); + } + _cl.ReleaseMemObject(leftBuffer); + } + + return result; + } public unsafe ReadOnlySpan2D MatMul(Device device, WorkSize2D globalWorkSize, WorkSize2D localWorkSize, ReadOnlySpan2D left, ReadOnlySpan2D right) { @@ -248,4 +347,6 @@ public unsafe ReadOnlySpan VecMulScalar(Device device, nuint globalWorkSiz return result; } + + #endregion } diff --git a/src/Mathematics.NET/GPU/OpenCL/Program.cs b/src/Mathematics.NET/GPU/OpenCL/Program.cs index deb7b2c7..a37cb3e5 100644 --- a/src/Mathematics.NET/GPU/OpenCL/Program.cs +++ b/src/Mathematics.NET/GPU/OpenCL/Program.cs @@ -25,8 +25,6 @@ // SOFTWARE. // -// TODO: Create a source generator that finds kernels in the Kernels folder and adds them automatically. - #pragma warning disable IDE0058 using System.Reflection; @@ -48,7 +46,7 @@ public unsafe Program(ILogger logger, CL cl, Context context, Rea _logger = logger; var assembly = Assembly.GetExecutingAssembly(); - var resourceNames = assembly.GetManifestResourceNames().Where(x => x.EndsWith(".cl")).ToArray(); + var resourceNames = assembly.GetManifestResourceNames().Where(x => x.EndsWith(".cl") || x.EndsWith(".c")).ToArray(); var kernels = new string[resourceNames.Length]; var kernelLengths = new nuint[resourceNames.Length]; @@ -91,7 +89,8 @@ public unsafe Program(ILogger logger, CL cl, Context context, Rea // ^ ^ // Index: 35 ^3 - CreateKernel(resource[35..^3]); + if (resource.EndsWith(".cl")) + CreateKernel(resource[35..^3]); } } diff --git a/src/Mathematics.NET/Mathematics.NET.csproj b/src/Mathematics.NET/Mathematics.NET.csproj index 8a2c66c9..ce80c006 100644 --- a/src/Mathematics.NET/Mathematics.NET.csproj +++ b/src/Mathematics.NET/Mathematics.NET.csproj @@ -17,15 +17,18 @@ + + + + + + - - -