Skip to content

Commit

Permalink
Rework cached compilation (#445)
Browse files Browse the repository at this point in the history
By moving certain checks to run time, we can avoid relying on a hacky generator
that (incorrectly) exposes the codegen world age. To further simplify things, we
now expect the user to pass in a method instance, and use that directly to key
the compilation cache.
  • Loading branch information
maleadt authored May 15, 2023
1 parent 8385e56 commit 3de799a
Show file tree
Hide file tree
Showing 14 changed files with 256 additions and 332 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "GPUCompiler"
uuid = "61eb1bfa-7361-4325-ad38-22787b887f55"
authors = ["Tim Besard <[email protected]>"]
version = "0.19.4"
version = "0.20.0"

[deps]
ExprTools = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
Expand Down
4 changes: 3 additions & 1 deletion src/GPUCompiler.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ using Libdl

using Scratch: @get_scratch!

const CC = Core.Compiler
using Core: MethodInstance, CodeInstance, CodeInfo

include("utils.jl")

# compiler interface and implementations
Expand All @@ -36,7 +39,6 @@ include("debug.jl")
include("driver.jl")

# other reusable functionality
include("cache.jl")
include("execution.jl")
include("reflection.jl")

Expand Down
65 changes: 0 additions & 65 deletions src/cache.jl

This file was deleted.

78 changes: 78 additions & 0 deletions src/execution.jl
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,81 @@ function assign_args!(code, _args)

return vars, var_exprs
end


## cached compilation

const cache_lock = ReentrantLock()

"""
cached_compilation(cache::Dict{Any}, src::MethodInstance, cfg::CompilerConfig,
compiler, linker)
Compile a method instance `src` with configuration `cfg`, by invoking `compiler` and
`linker` and storing the result in `cache`.
The `cache` argument should be a dictionary that can be indexed using any value and store
whatever the `linker` function returns. The `compiler` function should take a `CompilerJob`
and return data that can be cached across sessions (e.g., LLVM IR). This data is then
forwarded, along with the `CompilerJob`, to the `linker` function which is allowed to create
session-dependent objects (e.g., a `CuModule`).
"""
function cached_compilation(cache::AbstractDict{<:Any,V},
src::MethodInstance, cfg::CompilerConfig,
compiler::Function, linker::Function) where {V}
# NOTE: we index the cach both using (mi, world, cfg) keys, for the fast look-up,
# and using CodeInfo keys for the slow look-up. we need to cache both for
# performance, but cannot use a separate private cache for the ci->obj lookup
# (e.g. putting it next to the CodeInfo's in the CodeCache) because some clients
# expect to be able to wipe the cache (e.g. CUDA.jl's `device_reset!`)

# fast path: index the cache directly for the *current* world + compiler config

world = tls_world_age()
key = (objectid(src), world, cfg)
# NOTE: we store the MethodInstance's objectid to avoid an expensive allocation.
# Base does this with a multi-level lookup, first keyed on the mi,
# then a linear scan over the (typically few) entries.

# NOTE: no use of lock(::Function)/@lock/get! to avoid try/catch and closure overhead
lock(cache_lock)
obj = get(cache, key, nothing)
unlock(cache_lock)

if obj === nothing || compile_hook[] !== nothing
obj = actual_compilation(cache, src, world, cfg, compiler, linker)::V
lock(cache_lock)
cache[key] = obj
unlock(cache_lock)
end
return obj::V
end

@noinline function actual_compilation(cache::AbstractDict, src::MethodInstance, world::UInt,
cfg::CompilerConfig, compiler::Function, linker::Function)
job = CompilerJob(src, cfg, world)
obj = nothing

# fast path: find an applicable CodeInstance and see if we have compiled it before
ci = ci_cache_lookup(ci_cache(job), src, world, world)::Union{Nothing,CodeInstance}
if ci !== nothing && haskey(cache, ci)
obj = cache[ci]
end

# slow path: compile and link
if obj === nothing || compile_hook[] !== nothing
# TODO: consider loading the assembly from an on-disk cache here
asm = compiler(job)

if obj !== nothing
# we got here because of a *compile* hook; don't bother linking
return obj
end

obj = linker(job, asm)
ci = ci_cache_lookup(ci_cache(job), src, world, world)::CodeInstance
cache[ci] = obj
end

return obj
end
14 changes: 11 additions & 3 deletions src/interface.jl
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,14 @@ struct CompilerJob{T,P}
new{T,P}(src, cfg, world)
end

function Base.hash(job::CompilerJob, h::UInt)
h = hash(job.source, h)
h = hash(job.config, h)
h = hash(job.world, h)

return h
end


## contexts

Expand Down Expand Up @@ -257,7 +265,7 @@ valid_function_pointer(@nospecialize(job::CompilerJob), ptr::Ptr{Cvoid}) = false
# the codeinfo cache to use
function ci_cache(@nospecialize(job::CompilerJob))
lock(GLOBAL_CI_CACHES_LOCK) do
cache = get!(GLOBAL_CI_CACHES, (typeof(job.config.target), inference_params(job), optimization_params(job))) do
cache = get!(GLOBAL_CI_CACHES, job.config) do
CodeCache()
end
return cache
Expand All @@ -269,7 +277,7 @@ method_table(@nospecialize(job::CompilerJob)) = GLOBAL_METHOD_TABLE

# the inference parameters to use when constructing the GPUInterpreter
function inference_params(@nospecialize(job::CompilerJob))
return InferenceParams(;unoptimize_throw_blocks=false)
return CC.InferenceParams(; unoptimize_throw_blocks=false)
end

# the optimization parameters to use when constructing the GPUInterpreter
Expand All @@ -284,7 +292,7 @@ function optimization_params(@nospecialize(job::CompilerJob))
kwargs = (kwargs..., inline_cost_threshold=typemax(Int))
end

return OptimizationParams(;kwargs...)
return CC.OptimizationParams(;kwargs...)
end

# how much debuginfo to emit
Expand Down
Loading

2 comments on commit 3de799a

@maleadt
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/83621

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.20.0 -m "<description of version>" 3de799afb1e145a72aaa30dc7c29222d32802693
git push origin v0.20.0

Please sign in to comment.