Skip to content

Commit

Permalink
consistency
Browse files Browse the repository at this point in the history
  • Loading branch information
anicusan committed Jan 4, 2025
1 parent 69462f3 commit 3d1cfb7
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 9 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "AcceleratedKernels"
uuid = "6a4ca0a5-0e36-4168-a932-d9be78d558f1"
authors = ["Andrei-Leonard Nicusan <[email protected]> and contributors"]
version = "0.2.2"
version = "0.2.3"

[deps]
ArgCheck = "dce04be8-c92d-5529-be00-80e4d2c0e197"
Expand Down
10 changes: 4 additions & 6 deletions src/accumulate/accumulate.jl
Original file line number Diff line number Diff line change
Expand Up @@ -174,28 +174,26 @@ function _accumulate_impl!(
)
if backend isa GPU
if isnothing(dims)
accumulate_1d!(
return accumulate_1d!(
op, v, backend, alg,
init=init, inclusive=inclusive,
block_size=block_size, temp=temp, temp_flags=temp_flags,
)
return v
else
accumulate_nd!(
return accumulate_nd!(
op, v, backend,
init=init, dims=dims, inclusive=inclusive,
block_size=block_size,
)
end
else
if isnothing(dims)
accumulate_1d!(
return accumulate_1d!(
op, v,
init=init, inclusive=inclusive,
)
return v
else
accumulate_nd!(
return accumulate_nd!(
op, v,
init=init, dims=dims, inclusive=inclusive,
)
Expand Down
5 changes: 3 additions & 2 deletions src/accumulate/accumulate_nd.jl
Original file line number Diff line number Diff line change
Expand Up @@ -218,18 +218,19 @@ end
end
end

_running_prefix = running_prefix[0x1]
@synchronize()

if block_offset + ai < length_dims
total = op(running_prefix[0x1], temp[ai + bank_offset_a + 0x1])
total = op(_running_prefix, temp[ai + bank_offset_a + 0x1])
v[
input_base_idx +
(block_offset + ai) * vstrides[dims] +
0x1
] = total
end
if block_offset + bi < length_dims
total = op(running_prefix[0x1], temp[bi + bank_offset_b + 0x1])
total = op(_running_prefix, temp[bi + bank_offset_b + 0x1])
v[
input_base_idx +
(block_offset + bi) * vstrides[dims] +
Expand Down

0 comments on commit 3d1cfb7

Please sign in to comment.