Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

@mtlprintf #418

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions docs/src/api/kernel.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,13 @@ MtlThreadGroupArray
MemoryFlags
threadgroup_barrier
simdgroup_barrier
```

## Printing

```@docs
@mtlprintf
@mtlprint
@mtlprintln
@mtlshow
```
28 changes: 28 additions & 0 deletions docs/src/usage/kernel.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,34 @@ Additional notes:
- Kernels must always return nothing
- Kernels are asynchronous. To synchronize, use the `Metal.@sync` macro.

## Printing

When debugging, it's not uncommon to want to print some values. This is achieved with `@mtlprintf`:

```julia
function gpu_add2_print!(y, x)
index = thread_position_in_grid_1d()
@mtlprintf("thread %d", index)
@inbounds y[i] += x[i]
return nothing
end

A = Metal.ones(Float32, 8);
B = Metal.rand(Float32, 8);

@metal threads=length(A) gpu_add2_print!(A, B)
```

`@mtlprintf` is supported on macOS 15 and later. `@mtlprintf` support most of the format specifiers that `printf`
supports in C with the following exceptions:
- `%n` and `%s` conversion specifiers are not supported
- Default argument promotion applies to arguments of half type which promote to the `double` type
- The format string must be a string literal

Metal places output from `@mtlprintf` into a log buffer. The system only removes the messages from the log buffer when the command buffer completes. When the log buffer becomes full, the system drops all subsequent messages.

See also: `@mtlprint`, `@mtlprintln` and `@mtlshow`
christiangnrd marked this conversation as resolved.
Show resolved Hide resolved

## Other Helpful Links

[Metal Shading Language Specification](https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf)
1 change: 1 addition & 0 deletions lib/mtl/MTL.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ include("events.jl")
include("fences.jl")
include("heap.jl")
include("buffer.jl")
include("log_state.jl")
include("command_queue.jl")
include("command_buf.jl")
include("compute_pipeline.jl")
Expand Down
1 change: 1 addition & 0 deletions lib/mtl/command_buf.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export MTLCommandBufferDescriptor
@objcproperties MTLCommandBufferDescriptor begin
@autoproperty retainedReferences::Bool setter=setRetainedReferences
@autoproperty errorOptions::MTLCommandBufferErrorOption setter=setErrorOptions
@autoproperty logState::id{MTLLogState} setter=setLogState
end

function MTLCommandBufferDescriptor()
Expand Down
25 changes: 25 additions & 0 deletions lib/mtl/command_queue.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,21 @@
export MTLCommandQueueDescriptor

@objcwrapper immutable=false MTLCommandQueueDescriptor <: NSObject

@objcproperties MTLCommandQueueDescriptor begin
@autoproperty maxCommandBufferCount::NSUInteger
@autoproperty logState::id{MTLLogState} setter=setLogState
end

function MTLCommandQueueDescriptor()
handle = @objc [MTLCommandQueueDescriptor alloc]::id{MTLCommandQueueDescriptor}
obj = MTLCommandQueueDescriptor(handle)
finalizer(release, obj)
@objc [obj::id{MTLCommandQueueDescriptor} init]::id{MTLCommandQueueDescriptor}
return obj
end
christiangnrd marked this conversation as resolved.
Show resolved Hide resolved


export MTLCommandQueue

@objcwrapper immutable=false MTLCommandQueue <: NSObject
Expand All @@ -13,3 +31,10 @@ function MTLCommandQueue(dev::MTLDevice)
finalizer(release, obj)
return obj
end

function MTLCommandQueue(dev::MTLDevice, descriptor::MTLCommandQueueDescriptor)
handle = @objc [dev::id{MTLDevice} newCommandQueueWithDescriptor:descriptor::id{MTLCommandQueueDescriptor}]::id{MTLCommandQueue}
obj = MTLCommandQueue(handle)
finalizer(release, obj)
return obj
end
40 changes: 40 additions & 0 deletions lib/mtl/log_state.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
export MTLLogLevel

@cenum MTLLogLevel::NSInteger begin
MTLLogLevelUndefined = 0
MTLLogLevelDebug = 1
MTLLogLevelInfo = 2
MTLLogLevelNotice = 3
MTLLogLevelError = 4
MTLLogLevelFault = 5
end

export MTLLogStateDescriptor

@objcwrapper immutable=false MTLLogStateDescriptor <: NSObject

@objcproperties MTLLogStateDescriptor begin
@autoproperty level::MTLLogLevel setter=setLevel
@autoproperty bufferSize::NSInteger setter=setBufferSize
end

function MTLLogStateDescriptor()
handle = @objc [MTLLogStateDescriptor alloc]::id{MTLLogStateDescriptor}
obj = MTLLogStateDescriptor(handle)
finalizer(release, obj)
@objc [obj::id{MTLLogStateDescriptor} init]::id{MTLLogStateDescriptor}
return obj
end


export MTLLogState

@objcwrapper MTLLogState <: NSObject

function MTLLogState(dev::MTLDevice, descriptor::MTLLogStateDescriptor)
err = Ref{id{NSError}}(nil)
handle = @objc [dev::id{MTLDevice} newLogStateWithDescriptor:descriptor::id{MTLLogStateDescriptor}
error:err::Ptr{id{NSError}}]::id{MTLLogState}
err[] == nil || throw(NSError(err[]))
MTLLogState(handle)
end
1 change: 1 addition & 0 deletions src/Metal.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ include("device/intrinsics/memory.jl")
include("device/intrinsics/simd.jl")
include("device/intrinsics/version.jl")
include("device/intrinsics/atomics.jl")
include("device/intrinsics/output.jl")
include("device/quirks.jl")

# array essentials
Expand Down
8 changes: 4 additions & 4 deletions src/compiler/compilation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,9 @@ function compile(@nospecialize(job::CompilerJob))

@signpost_interval log=log_compiler() "Generate LLVM IR" begin
# TODO: on 1.9, this actually creates a context. cache those.
ir, entry = JuliaContext() do ctx
ir, entry, loggingEnabled = JuliaContext() do ctx
mod, meta = GPUCompiler.compile(:llvm, job)
string(mod), LLVM.name(meta.entry)
string(mod), LLVM.name(meta.entry), haskey(functions(mod), "air.os_log")
end
end

Expand Down Expand Up @@ -172,7 +172,7 @@ function compile(@nospecialize(job::CompilerJob))
end
end

return (; ir, air, metallib, entry)
return (; ir, air, metallib, entry, loggingEnabled)
end

# link into an executable kernel
Expand Down Expand Up @@ -213,5 +213,5 @@ end

# most of the time, we don't need the function object,
# so don't keep it alive unconditionally in GPUCompiler's caches
pipeline_state, return_function ? fun : nothing
pipeline_state, return_function ? fun : nothing, compiled.loggingEnabled
end
35 changes: 32 additions & 3 deletions src/compiler/execution.jl
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ mtlconvert(arg, cce=nothing) = adapt(Adaptor(cce), arg)
struct HostKernel{F,TT}
f::F
pipeline::MTLComputePipelineState
loggingEnabled::Bool
end

const mtlfunction_lock = ReentrantLock()
Expand All @@ -186,15 +187,15 @@ function mtlfunction(f::F, tt::TT=Tuple{}; name=nothing, kwargs...) where {F,TT}
cache = compiler_cache(dev)
source = methodinstance(F, tt)
config = compiler_config(dev; name, kwargs...)::MetalCompilerConfig
pipeline, _ = GPUCompiler.cached_compilation(cache, source, config, compile, link)
pipeline, _, loggingEnabled = GPUCompiler.cached_compilation(cache, source, config, compile, link)

# create a callable object that captures the function instance. we don't need to think
# about world age here, as GPUCompiler already does and will return a different object
h = hash(pipeline, hash(f, hash(tt)))
kernel = get(_kernel_instances, h, nothing)
if kernel === nothing
# create the kernel state object
kernel = HostKernel{F,tt}(f, pipeline)
kernel = HostKernel{F,tt}(f, pipeline, loggingEnabled)
_kernel_instances[h] = kernel
end
return kernel::HostKernel{F,tt}
Expand Down Expand Up @@ -275,7 +276,35 @@ end
(threads.width * threads.height * threads.depth) > kernel.pipeline.maxTotalThreadsPerThreadgroup &&
throw(ArgumentError("Number of threads in group ($(threads.width * threads.height * threads.depth)) should not exceed $(kernel.pipeline.maxTotalThreadsPerThreadgroup)"))

cmdbuf = MTLCommandBuffer(queue)
cmdbuf = if kernel.loggingEnabled
# TODO: make this a dynamic error, i.e., from the kernel (JuliaGPU/Metal.jl#433)
if macos_version() < v"15"
error("Logging is only supported on macOS 15 or higher")
end

if MTLCaptureManager().isCapturing
error("Logging is not supported while GPU frame capturing")
end

log_state_descriptor = MTLLogStateDescriptor()
log_state_descriptor.level = MTL.MTLLogLevelDebug
log_state = MTLLogState(queue.device, log_state_descriptor)

function log_handler(subSystem, category, logLevel, message)
print(String(NSString(message)))
return nothing
end

block = @objcblock(log_handler, Nothing, (id{NSString}, id{NSString}, NSInteger, id{NSString}))
@objc [log_state::id{MTLLogState} addLogHandler:block::id{NSBlock}]::Nothing

cmdbuf_descriptor = MTLCommandBufferDescriptor()
cmdbuf_descriptor.logState = log_state
MTLCommandBuffer(queue, cmdbuf_descriptor)
else
MTLCommandBuffer(queue)
end

cmdbuf.label = "MTLCommandBuffer($(nameof(kernel.f)))"
cce = MTLComputeCommandEncoder(cmdbuf)
argument_buffers = try
Expand Down
Loading
Loading