diff --git a/.editorconfig b/.editorconfig
index 0df83d60..ab0618f3 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -20,8 +20,8 @@ insert_final_newline = true
# Other
trim_trailing_whitespace = true
-### OpenCL Files ###
-[*.cl]
+### C and OpenCL Files ###
+[*.{c,cl}]
# Indentation and spacing
indent_size = 4
diff --git a/src/Mathematics.NET/Core/Rational.cs b/src/Mathematics.NET/Core/Rational.cs
index 504e40d9..202e2feb 100644
--- a/src/Mathematics.NET/Core/Rational.cs
+++ b/src/Mathematics.NET/Core/Rational.cs
@@ -581,13 +581,9 @@ private static T GCD(T p, T q)
while (p != T.Zero && q != T.Zero)
{
if (p > q)
- {
p %= q;
- }
else
- {
q %= p;
- }
}
return p | q;
}
@@ -616,13 +612,9 @@ private static T LCM(T p, T q)
while (p != T.Zero && q != T.Zero)
{
if (p > q)
- {
p %= q;
- }
else
- {
q %= p;
- }
}
return holdP / (p | q) * holdQ;
}
diff --git a/src/Mathematics.NET/GPU/OpenCL/IComputeService.cs b/src/Mathematics.NET/GPU/OpenCL/IComputeService.cs
index 656522ff..5acea77b 100644
--- a/src/Mathematics.NET/GPU/OpenCL/IComputeService.cs
+++ b/src/Mathematics.NET/GPU/OpenCL/IComputeService.cs
@@ -43,9 +43,19 @@ public interface IComputeService : IDisposable
KernelWorkGroupInformation GetKernelWorkGroupInfo(Device device, Kernel kernel);
//
- // Methods.
+ // Methods
//
+ /// Multiply a vector by a scalar.
+ /// Padded vectors should have zeros added to their ends.
+ /// The device to use.
+ /// The global work size.
+ /// The local work size.
+ /// A vector.
+ /// A scalar.
+ /// A new vector.
+ ReadOnlySpan CompVecMulScalar(Device device, nuint globalWorkSize, nuint localWorkSize, ReadOnlySpan vector, Complex scalar);
+
/// Multipy two matrices.
/// Padded matrices should have zeros added to their right and bottom.
/// The device to use.
@@ -54,15 +64,11 @@ public interface IComputeService : IDisposable
/// The left matrix.
/// The right matrix.
/// A new matrix.
+ ReadOnlySpan2D CompMatMul(Device device, WorkSize2D globalWorkSize, WorkSize2D localWorkSize, ReadOnlySpan2D left, ReadOnlySpan2D right);
+
+ ///
ReadOnlySpan2D MatMul(Device device, WorkSize2D globalWorkSize, WorkSize2D localWorkSize, ReadOnlySpan2D left, ReadOnlySpan2D right);
- /// Multiply a vector by a scalar.
- /// Padded vectors should have zeros added to their ends.
- /// The device to use.
- /// The global work size.
- /// The local work size.
- /// A vector.
- /// A scalar.
- /// A new vector.
+ ///
ReadOnlySpan VecMulScalar(Device device, nuint globalWorkSize, nuint localWorkSize, ReadOnlySpan vector, Real scalar);
}
diff --git a/src/Mathematics.NET/GPU/OpenCL/Kernels/comp_mat_mul.cl b/src/Mathematics.NET/GPU/OpenCL/Kernels/comp_mat_mul.cl
new file mode 100644
index 00000000..6ec6580d
--- /dev/null
+++ b/src/Mathematics.NET/GPU/OpenCL/Kernels/comp_mat_mul.cl
@@ -0,0 +1,25 @@
+__kernel void comp_mat_mul(__global const complex* matA,
+ __global const complex* matB,
+ int const k,
+ __global complex* result)
+{
+ int row = get_global_id(0);
+ int col = get_global_id(1);
+ int width = get_local_size(1) * get_num_groups(1);
+
+ int aIndex = row * k;
+ int bIndex = col;
+
+ complex sum;
+ sum.re = 0;
+ sum.im = 0;
+
+ for (int i = 0; i < k; i++)
+ {
+ sum = comp_add(sum, comp_mul(matA[aIndex], matB[bIndex]));
+ aIndex++;
+ bIndex += width;
+ }
+
+ result[row * width + col] = sum;
+}
\ No newline at end of file
diff --git a/src/Mathematics.NET/GPU/OpenCL/Kernels/comp_vec_mul_scalar.cl b/src/Mathematics.NET/GPU/OpenCL/Kernels/comp_vec_mul_scalar.cl
new file mode 100644
index 00000000..76a5cf2d
--- /dev/null
+++ b/src/Mathematics.NET/GPU/OpenCL/Kernels/comp_vec_mul_scalar.cl
@@ -0,0 +1,7 @@
+__kernel void comp_vec_mul_scalar(__global const complex* vector,
+ const complex scalar,
+ __global complex* result)
+{
+ int i = get_global_id(0);
+ result[i] = comp_mul(vector[i], scalar);
+}
diff --git a/src/Mathematics.NET/GPU/OpenCL/Kernels/complex.c b/src/Mathematics.NET/GPU/OpenCL/Kernels/complex.c
new file mode 100644
index 00000000..a9e4c755
--- /dev/null
+++ b/src/Mathematics.NET/GPU/OpenCL/Kernels/complex.c
@@ -0,0 +1,44 @@
+typedef struct __attribute__((packed)) {
+ double re;
+ double im;
+} complex;
+
+inline complex comp_add(const complex z, const complex w) {
+ complex result;
+ result.re = z.re + w.re;
+ result.im = z.im + w.im;
+ return result;
+}
+
+inline complex comp_div(const complex z, const complex w) {
+ double a = z.re;
+ double b = z.im;
+ double c = w.re;
+ double d = w.im;
+
+ complex result;
+ if (fabs(d) < fabs(c)) {
+ double u = d / c;
+ result.re = (a + b * u) / (c + d * u);
+ result.im = (b - a * u) / (c + d * u);
+ } else {
+ double u = c / d;
+ result.re = (b + a * u) / (d + c * u);
+ result.im = (b * u - a) / (d + c * u);
+ }
+ return result;
+}
+
+inline complex comp_mul(const complex z, const complex w) {
+ complex result;
+ result.re = z.re * w.re - z.im * w.im;
+ result.im = z.re * w.im + w.re * z.im;
+ return result;
+}
+
+inline complex comp_sub(const complex z, const complex w) {
+ complex result;
+ result.re = z.re - w.re;
+ result.im = z.im - w.im;
+ return result;
+}
diff --git a/src/Mathematics.NET/GPU/OpenCL/OpenCLService.cs b/src/Mathematics.NET/GPU/OpenCL/OpenCLService.cs
index cb2287d8..f4e3c4fe 100644
--- a/src/Mathematics.NET/GPU/OpenCL/OpenCLService.cs
+++ b/src/Mathematics.NET/GPU/OpenCL/OpenCLService.cs
@@ -146,9 +146,108 @@ public void Dispose()
public KernelWorkGroupInformation GetKernelWorkGroupInfo(Device device, Kernel kernel) => new(_logger, _cl, device, kernel);
- //
- // Interface implementations.
- //
+ #region Interface Implementations
+
+ // TODO: Consider putting this in another file.
+
+ public unsafe ReadOnlySpan CompVecMulScalar(Device device, nuint globalWorkSize, nuint localWorkSize, ReadOnlySpan vector, Complex scalar)
+ {
+ var length = vector.Length;
+ var result = new Complex[length];
+
+ fixed (void* pVector = vector)
+ {
+ // Create buffers.
+ nint vectorBuffer = _cl.CreateBuffer(_context.Handle, MemFlags.UseHostPtr | MemFlags.ReadOnly | MemFlags.HostNoAccess, (nuint)(sizeof(Complex) * length), pVector, null);
+
+ fixed (void* pResult = result)
+ {
+ nint resultBuffer = _cl.CreateBuffer(_context.Handle, MemFlags.UseHostPtr | MemFlags.WriteOnly | MemFlags.HostReadOnly, (nuint)(sizeof(Complex) * length), pResult, null);
+
+ // Set kernel arguments.
+ var kernel = _program.Kernels["comp_vec_mul_scalar"].Handle;
+ var error = _cl.SetKernelArg(kernel, 0, (nuint)sizeof(nint), &vectorBuffer);
+ error |= _cl.SetKernelArg(kernel, 1, (nuint)sizeof(Complex), &scalar);
+ error |= _cl.SetKernelArg(kernel, 2, (nuint)sizeof(nint), &resultBuffer);
+#if DEBUG
+ if (error != (int)ErrorCodes.Success)
+ _logger.LogDebug(s_setKernelArgError, _program.Kernels["comp_vec_mul_scalar"].Name);
+#endif
+
+ // Enqueue NDRange kernel.
+ using var commandQueue = _context.CreateCommandQueue(device, CommandQueueProperties.None);
+ error = _cl.EnqueueNdrangeKernel(commandQueue.Handle, kernel, 1, null, &globalWorkSize, &localWorkSize, 0, null, null);
+#if DEBUG
+ if (error != (int)ErrorCodes.Success)
+ _logger.LogDebug(s_enqueueNDRangeKernelError);
+#endif
+ _cl.Finish(commandQueue.Handle);
+
+ // Enqueue read buffer.
+ _cl.EnqueueReadBuffer(commandQueue.Handle, resultBuffer, true, 0, (nuint)(sizeof(Complex) * length), pResult, 0, null, null);
+
+ // Release mem objects.
+ _cl.ReleaseMemObject(resultBuffer);
+ }
+ _cl.ReleaseMemObject(vectorBuffer);
+ }
+
+ return result;
+ }
+
+ public unsafe ReadOnlySpan2D CompMatMul(Device device, WorkSize2D globalWorkSize, WorkSize2D localWorkSize, ReadOnlySpan2D left, ReadOnlySpan2D right)
+ {
+ var k = left.Width;
+ if (k != right.Height)
+ throw new Exception("Cannot multiply two matrices with incompatible dimensions.");
+ Span2D result = new Complex[left.Height, right.Width];
+
+ fixed (void* pLeft = left)
+ {
+ // Create buffers.
+ nint leftBuffer = _cl.CreateBuffer(_context.Handle, MemFlags.UseHostPtr | MemFlags.ReadOnly | MemFlags.HostNoAccess, (nuint)(sizeof(Complex) * left.Length), pLeft, null);
+
+ fixed (void* pRight = right)
+ {
+ nint rightBuffer = _cl.CreateBuffer(_context.Handle, MemFlags.UseHostPtr | MemFlags.ReadOnly | MemFlags.HostNoAccess, (nuint)(sizeof(Complex) * right.Length), pRight, null);
+
+ fixed (void* pResult = result)
+ {
+ nint resultBuffer = _cl.CreateBuffer(_context.Handle, MemFlags.UseHostPtr | MemFlags.WriteOnly | MemFlags.HostReadOnly, (nuint)(sizeof(Complex) * result.Length), pResult, null);
+
+ // Set kernel arguments.
+ var kernel = _program.Kernels["comp_mat_mul"].Handle;
+ var error = _cl.SetKernelArg(kernel, 0, (nuint)sizeof(nint), &leftBuffer);
+ error |= _cl.SetKernelArg(kernel, 1, (nuint)sizeof(nint), &rightBuffer);
+ error |= _cl.SetKernelArg(kernel, 2, sizeof(int), &k);
+ error |= _cl.SetKernelArg(kernel, 3, (nuint)sizeof(nint), &resultBuffer);
+#if DEBUG
+ if (error != (int)ErrorCodes.Success)
+ _logger.LogDebug(s_setKernelArgError, _program.Kernels["comp_mat_mul"].Name);
+#endif
+
+ // Enqueue NDRange kernel.
+ using var commandQueue = _context.CreateCommandQueue(device, CommandQueueProperties.None);
+ error = _cl.EnqueueNdrangeKernel(commandQueue.Handle, kernel, 2, null, (nuint*)&globalWorkSize, (nuint*)&localWorkSize, 0, null, null);
+#if DEBUG
+ if (error != (int)ErrorCodes.Success)
+ _logger.LogDebug(s_enqueueNDRangeKernelError);
+#endif
+ _cl.Finish(commandQueue.Handle);
+
+ // Enqueue read buffer.
+ _cl.EnqueueReadBuffer(commandQueue.Handle, resultBuffer, true, 0, (nuint)(sizeof(Complex) * result.Length), pResult, 0, null, null);
+
+ // Release mem objects.
+ _cl.ReleaseMemObject(resultBuffer);
+ }
+ _cl.ReleaseMemObject(rightBuffer);
+ }
+ _cl.ReleaseMemObject(leftBuffer);
+ }
+
+ return result;
+ }
public unsafe ReadOnlySpan2D MatMul(Device device, WorkSize2D globalWorkSize, WorkSize2D localWorkSize, ReadOnlySpan2D left, ReadOnlySpan2D right)
{
@@ -248,4 +347,6 @@ public unsafe ReadOnlySpan VecMulScalar(Device device, nuint globalWorkSiz
return result;
}
+
+ #endregion
}
diff --git a/src/Mathematics.NET/GPU/OpenCL/Program.cs b/src/Mathematics.NET/GPU/OpenCL/Program.cs
index deb7b2c7..a37cb3e5 100644
--- a/src/Mathematics.NET/GPU/OpenCL/Program.cs
+++ b/src/Mathematics.NET/GPU/OpenCL/Program.cs
@@ -25,8 +25,6 @@
// SOFTWARE.
//
-// TODO: Create a source generator that finds kernels in the Kernels folder and adds them automatically.
-
#pragma warning disable IDE0058
using System.Reflection;
@@ -48,7 +46,7 @@ public unsafe Program(ILogger logger, CL cl, Context context, Rea
_logger = logger;
var assembly = Assembly.GetExecutingAssembly();
- var resourceNames = assembly.GetManifestResourceNames().Where(x => x.EndsWith(".cl")).ToArray();
+ var resourceNames = assembly.GetManifestResourceNames().Where(x => x.EndsWith(".cl") || x.EndsWith(".c")).ToArray();
var kernels = new string[resourceNames.Length];
var kernelLengths = new nuint[resourceNames.Length];
@@ -91,7 +89,8 @@ public unsafe Program(ILogger logger, CL cl, Context context, Rea
// ^ ^
// Index: 35 ^3
- CreateKernel(resource[35..^3]);
+ if (resource.EndsWith(".cl"))
+ CreateKernel(resource[35..^3]);
}
}
diff --git a/src/Mathematics.NET/Mathematics.NET.csproj b/src/Mathematics.NET/Mathematics.NET.csproj
index 8a2c66c9..ce80c006 100644
--- a/src/Mathematics.NET/Mathematics.NET.csproj
+++ b/src/Mathematics.NET/Mathematics.NET.csproj
@@ -17,15 +17,18 @@
+
+
+
+
+
+
-
-
-