Skip to content

Commit

Permalink
Merge pull request #127 from HamletTanyavong/dev
Browse files Browse the repository at this point in the history
Add support for complex numbers in methods that use OpenCL
  • Loading branch information
HamletTanyavong authored Aug 22, 2024
2 parents d9aac30 + 71b74b0 commit 816c99a
Show file tree
Hide file tree
Showing 9 changed files with 206 additions and 29 deletions.
4 changes: 2 additions & 2 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ insert_final_newline = true
# Other
trim_trailing_whitespace = true

### OpenCL Files ###
[*.cl]
### C and OpenCL Files ###
[*.{c,cl}]

# Indentation and spacing
indent_size = 4
Expand Down
8 changes: 0 additions & 8 deletions src/Mathematics.NET/Core/Rational.cs
Original file line number Diff line number Diff line change
Expand Up @@ -581,13 +581,9 @@ private static T GCD(T p, T q)
while (p != T.Zero && q != T.Zero)
{
if (p > q)
{
p %= q;
}
else
{
q %= p;
}
}
return p | q;
}
Expand Down Expand Up @@ -616,13 +612,9 @@ private static T LCM(T p, T q)
while (p != T.Zero && q != T.Zero)
{
if (p > q)
{
p %= q;
}
else
{
q %= p;
}
}
return holdP / (p | q) * holdQ;
}
Expand Down
24 changes: 15 additions & 9 deletions src/Mathematics.NET/GPU/OpenCL/IComputeService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,19 @@ public interface IComputeService : IDisposable
KernelWorkGroupInformation GetKernelWorkGroupInfo(Device device, Kernel kernel);

//
// Methods.
// Methods
//

/// <summary>Multiply a vector by a scalar.</summary>
/// <remarks>Padded vectors should have zeros added to their ends.</remarks>
/// <param name="device">The device to use.</param>
/// <param name="globalWorkSize">The global work size.</param>
/// <param name="localWorkSize">The local work size.</param>
/// <param name="vector">A vector.</param>
/// <param name="scalar">A scalar.</param>
/// <returns>A new vector.</returns>
ReadOnlySpan<Complex> CompVecMulScalar(Device device, nuint globalWorkSize, nuint localWorkSize, ReadOnlySpan<Complex> vector, Complex scalar);

/// <summary>Multipy two matrices.</summary>
/// <remarks>Padded matrices should have zeros added to their right and bottom.</remarks>
/// <param name="device">The device to use.</param>
Expand All @@ -54,15 +64,11 @@ public interface IComputeService : IDisposable
/// <param name="left">The left matrix.</param>
/// <param name="right">The right matrix.</param>
/// <returns>A new matrix.</returns>
ReadOnlySpan2D<Complex> CompMatMul(Device device, WorkSize2D globalWorkSize, WorkSize2D localWorkSize, ReadOnlySpan2D<Complex> left, ReadOnlySpan2D<Complex> right);

/// <inheritdoc cref="ComplexMatMul(Device, WorkSize2D, WorkSize2D, ReadOnlySpan2D{Complex}, ReadOnlySpan2D{Complex})"/>
ReadOnlySpan2D<Real> MatMul(Device device, WorkSize2D globalWorkSize, WorkSize2D localWorkSize, ReadOnlySpan2D<Real> left, ReadOnlySpan2D<Real> right);

/// <summary>Multiply a vector by a scalar.</summary>
/// <remarks>Padded vectors should have zeros added to their ends.</remarks>
/// <param name="device">The device to use.</param>
/// <param name="globalWorkSize">The global work size.</param>
/// <param name="localWorkSize">The local work size.</param>
/// <param name="vector">A vector.</param>
/// <param name="scalar">A scalar.</param>
/// <returns>A new vector.</returns>
/// <inheritdoc cref="CompVecMulScalar(Device, nuint, nuint, ReadOnlySpan{Complex}, Complex)"/>
ReadOnlySpan<Real> VecMulScalar(Device device, nuint globalWorkSize, nuint localWorkSize, ReadOnlySpan<Real> vector, Real scalar);
}
25 changes: 25 additions & 0 deletions src/Mathematics.NET/GPU/OpenCL/Kernels/comp_mat_mul.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
__kernel void comp_mat_mul(__global const complex* matA,
__global const complex* matB,
int const k,
__global complex* result)
{
int row = get_global_id(0);
int col = get_global_id(1);
int width = get_local_size(1) * get_num_groups(1);

int aIndex = row * k;
int bIndex = col;

complex sum;
sum.re = 0;
sum.im = 0;

for (int i = 0; i < k; i++)
{
sum = comp_add(sum, comp_mul(matA[aIndex], matB[bIndex]));
aIndex++;
bIndex += width;
}

result[row * width + col] = sum;
}
7 changes: 7 additions & 0 deletions src/Mathematics.NET/GPU/OpenCL/Kernels/comp_vec_mul_scalar.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
__kernel void comp_vec_mul_scalar(__global const complex* vector,
const complex scalar,
__global complex* result)
{
int i = get_global_id(0);
result[i] = comp_mul(vector[i], scalar);
}
44 changes: 44 additions & 0 deletions src/Mathematics.NET/GPU/OpenCL/Kernels/complex.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
typedef struct __attribute__((packed)) {
double re;
double im;
} complex;

inline complex comp_add(const complex z, const complex w) {
complex result;
result.re = z.re + w.re;
result.im = z.im + w.im;
return result;
}

inline complex comp_div(const complex z, const complex w) {
double a = z.re;
double b = z.im;
double c = w.re;
double d = w.im;

complex result;
if (fabs(d) < fabs(c)) {
double u = d / c;
result.re = (a + b * u) / (c + d * u);
result.im = (b - a * u) / (c + d * u);
} else {
double u = c / d;
result.re = (b + a * u) / (d + c * u);
result.im = (b * u - a) / (d + c * u);
}
return result;
}

inline complex comp_mul(const complex z, const complex w) {
complex result;
result.re = z.re * w.re - z.im * w.im;
result.im = z.re * w.im + w.re * z.im;
return result;
}

inline complex comp_sub(const complex z, const complex w) {
complex result;
result.re = z.re - w.re;
result.im = z.im - w.im;
return result;
}
107 changes: 104 additions & 3 deletions src/Mathematics.NET/GPU/OpenCL/OpenCLService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,108 @@ public void Dispose()

public KernelWorkGroupInformation GetKernelWorkGroupInfo(Device device, Kernel kernel) => new(_logger, _cl, device, kernel);

//
// Interface implementations.
//
#region Interface Implementations

// TODO: Consider putting this in another file.

public unsafe ReadOnlySpan<Complex> CompVecMulScalar(Device device, nuint globalWorkSize, nuint localWorkSize, ReadOnlySpan<Complex> vector, Complex scalar)
{
var length = vector.Length;
var result = new Complex[length];

fixed (void* pVector = vector)
{
// Create buffers.
nint vectorBuffer = _cl.CreateBuffer(_context.Handle, MemFlags.UseHostPtr | MemFlags.ReadOnly | MemFlags.HostNoAccess, (nuint)(sizeof(Complex) * length), pVector, null);

fixed (void* pResult = result)
{
nint resultBuffer = _cl.CreateBuffer(_context.Handle, MemFlags.UseHostPtr | MemFlags.WriteOnly | MemFlags.HostReadOnly, (nuint)(sizeof(Complex) * length), pResult, null);

// Set kernel arguments.
var kernel = _program.Kernels["comp_vec_mul_scalar"].Handle;
var error = _cl.SetKernelArg(kernel, 0, (nuint)sizeof(nint), &vectorBuffer);
error |= _cl.SetKernelArg(kernel, 1, (nuint)sizeof(Complex), &scalar);
error |= _cl.SetKernelArg(kernel, 2, (nuint)sizeof(nint), &resultBuffer);
#if DEBUG
if (error != (int)ErrorCodes.Success)
_logger.LogDebug(s_setKernelArgError, _program.Kernels["comp_vec_mul_scalar"].Name);
#endif

// Enqueue NDRange kernel.
using var commandQueue = _context.CreateCommandQueue(device, CommandQueueProperties.None);
error = _cl.EnqueueNdrangeKernel(commandQueue.Handle, kernel, 1, null, &globalWorkSize, &localWorkSize, 0, null, null);
#if DEBUG
if (error != (int)ErrorCodes.Success)
_logger.LogDebug(s_enqueueNDRangeKernelError);
#endif
_cl.Finish(commandQueue.Handle);

// Enqueue read buffer.
_cl.EnqueueReadBuffer(commandQueue.Handle, resultBuffer, true, 0, (nuint)(sizeof(Complex) * length), pResult, 0, null, null);

// Release mem objects.
_cl.ReleaseMemObject(resultBuffer);
}
_cl.ReleaseMemObject(vectorBuffer);
}

return result;
}

public unsafe ReadOnlySpan2D<Complex> CompMatMul(Device device, WorkSize2D globalWorkSize, WorkSize2D localWorkSize, ReadOnlySpan2D<Complex> left, ReadOnlySpan2D<Complex> right)
{
var k = left.Width;
if (k != right.Height)
throw new Exception("Cannot multiply two matrices with incompatible dimensions.");
Span2D<Complex> result = new Complex[left.Height, right.Width];

fixed (void* pLeft = left)
{
// Create buffers.
nint leftBuffer = _cl.CreateBuffer(_context.Handle, MemFlags.UseHostPtr | MemFlags.ReadOnly | MemFlags.HostNoAccess, (nuint)(sizeof(Complex) * left.Length), pLeft, null);

fixed (void* pRight = right)
{
nint rightBuffer = _cl.CreateBuffer(_context.Handle, MemFlags.UseHostPtr | MemFlags.ReadOnly | MemFlags.HostNoAccess, (nuint)(sizeof(Complex) * right.Length), pRight, null);

fixed (void* pResult = result)
{
nint resultBuffer = _cl.CreateBuffer(_context.Handle, MemFlags.UseHostPtr | MemFlags.WriteOnly | MemFlags.HostReadOnly, (nuint)(sizeof(Complex) * result.Length), pResult, null);

// Set kernel arguments.
var kernel = _program.Kernels["comp_mat_mul"].Handle;
var error = _cl.SetKernelArg(kernel, 0, (nuint)sizeof(nint), &leftBuffer);
error |= _cl.SetKernelArg(kernel, 1, (nuint)sizeof(nint), &rightBuffer);
error |= _cl.SetKernelArg(kernel, 2, sizeof(int), &k);
error |= _cl.SetKernelArg(kernel, 3, (nuint)sizeof(nint), &resultBuffer);
#if DEBUG
if (error != (int)ErrorCodes.Success)
_logger.LogDebug(s_setKernelArgError, _program.Kernels["comp_mat_mul"].Name);
#endif

// Enqueue NDRange kernel.
using var commandQueue = _context.CreateCommandQueue(device, CommandQueueProperties.None);
error = _cl.EnqueueNdrangeKernel(commandQueue.Handle, kernel, 2, null, (nuint*)&globalWorkSize, (nuint*)&localWorkSize, 0, null, null);
#if DEBUG
if (error != (int)ErrorCodes.Success)
_logger.LogDebug(s_enqueueNDRangeKernelError);
#endif
_cl.Finish(commandQueue.Handle);

// Enqueue read buffer.
_cl.EnqueueReadBuffer(commandQueue.Handle, resultBuffer, true, 0, (nuint)(sizeof(Complex) * result.Length), pResult, 0, null, null);

// Release mem objects.
_cl.ReleaseMemObject(resultBuffer);
}
_cl.ReleaseMemObject(rightBuffer);
}
_cl.ReleaseMemObject(leftBuffer);
}

return result;
}

public unsafe ReadOnlySpan2D<Real> MatMul(Device device, WorkSize2D globalWorkSize, WorkSize2D localWorkSize, ReadOnlySpan2D<Real> left, ReadOnlySpan2D<Real> right)
{
Expand Down Expand Up @@ -248,4 +347,6 @@ public unsafe ReadOnlySpan<Real> VecMulScalar(Device device, nuint globalWorkSiz

return result;
}

#endregion
}
7 changes: 3 additions & 4 deletions src/Mathematics.NET/GPU/OpenCL/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@
// SOFTWARE.
// </copyright>

// TODO: Create a source generator that finds kernels in the Kernels folder and adds them automatically.

#pragma warning disable IDE0058

using System.Reflection;
Expand All @@ -48,7 +46,7 @@ public unsafe Program(ILogger<OpenCLService> logger, CL cl, Context context, Rea
_logger = logger;

var assembly = Assembly.GetExecutingAssembly();
var resourceNames = assembly.GetManifestResourceNames().Where(x => x.EndsWith(".cl")).ToArray();
var resourceNames = assembly.GetManifestResourceNames().Where(x => x.EndsWith(".cl") || x.EndsWith(".c")).ToArray();

var kernels = new string[resourceNames.Length];
var kernelLengths = new nuint[resourceNames.Length];
Expand Down Expand Up @@ -91,7 +89,8 @@ public unsafe Program(ILogger<OpenCLService> logger, CL cl, Context context, Rea
// ^ ^
// Index: 35 ^3

CreateKernel(resource[35..^3]);
if (resource.EndsWith(".cl"))
CreateKernel(resource[35..^3]);
}
}

Expand Down
9 changes: 6 additions & 3 deletions src/Mathematics.NET/Mathematics.NET.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,18 @@
</PropertyGroup>

<ItemGroup>
<None Remove="GPU\OpenCL\Kernels\complex.c" />
<None Remove="GPU\OpenCL\Kernels\comp_mat_mul.cl" />
<None Remove="GPU\OpenCL\Kernels\comp_vec_mul_scalar.cl" />
<None Remove="GPU\OpenCL\Kernels\mat_mul.cl" />
<None Remove="GPU\OpenCL\Kernels\vec_mul_scalar.cl" />
</ItemGroup>

<ItemGroup>
<EmbeddedResource Include="GPU\OpenCL\Kernels\complex.c" />
<EmbeddedResource Include="GPU\OpenCL\Kernels\comp_mat_mul.cl" />
<EmbeddedResource Include="GPU\OpenCL\Kernels\comp_vec_mul_scalar.cl" />
<EmbeddedResource Include="GPU\OpenCL\Kernels\mat_mul.cl" />
</ItemGroup>

<ItemGroup>
<EmbeddedResource Include="GPU\OpenCL\Kernels\vec_mul_scalar.cl" />
</ItemGroup>

Expand Down

0 comments on commit 816c99a

Please sign in to comment.