Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support XFB in MoltenVK #2169

Draft
wants to merge 61 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
10154f5
MSL: Add a mechanism to fix up shader outputs.
cdavis5e Aug 13, 2022
2127a3b
Merged in msl-shader-output-fixup (pull request #2)
cdavis5e Aug 23, 2022
4a8543e
MSL: Deduplicate function constants.
cdavis5e Feb 16, 2023
f195855
Merged in msl-duplicate-spec-id (pull request #8)
cdavis5e Feb 17, 2023
343ff6e
Checkpoint for transform feedback work.
cdavis5e May 9, 2023
048ac2d
Merge remote-tracking branch 'origin/master'
gpx1000 May 10, 2023
179c6e0
Get things building.
gpx1000 May 10, 2023
f1c0ad2
Checkpoint: Beginnings of writing XFB data.
cdavis5e May 11, 2023
117eaa3
get it building.
gpx1000 May 13, 2023
1e8cbe4
Merge branch 'master' into xfb
gpx1000 May 15, 2023
f1913aa
get xfb decorations shader to work.
gpx1000 May 16, 2023
f8a27d9
check in for direction adjustment.
gpx1000 May 19, 2023
cebb964
Dynamic is an undefined primitive type. xfb_primitive_type needs to …
gpx1000 May 19, 2023
37c0972
Working together with Chip
gpx1000 Jun 19, 2023
aab161a
fix warnings from CI
gpx1000 Jun 20, 2023
9d2329a
Merge branch 'master' into origin-xfb
gpx1000 Jun 20, 2023
111cebb
Merge remote-tracking branch 'origin/main' into xfb
cdavis5e Jul 11, 2023
e3cf900
Merge remote-tracking branch 'origin/main' into xfb
cdavis5e Jul 24, 2023
562b959
Make sure vertex functions that use transform feedback become Metal k…
cdavis5e Aug 10, 2023
d62fe77
Merge remote-tracking branch 'gpx1000/xfb' into xfb
cdavis5e Aug 17, 2023
36d39df
Add the transform feedback buffer parameters to the vertex shader.
cdavis5e Aug 18, 2023
35858fb
Make sure all used outputs, including builtins, get XFB buffers.
cdavis5e Aug 22, 2023
3c427de
Make sure builtins have the correct names in XFB buffers.
cdavis5e Aug 23, 2023
c352f94
Add command line parameter to set the primitive type assumed for tran…
cdavis5e Sep 7, 2023
001ff7d
Add a variable for the XFB counter buffer.
cdavis5e Sep 8, 2023
28babde
Really crappy checkpoint for XFB work.
cdavis5e Sep 13, 2023
fb520f4
Getting closer...
cdavis5e Sep 15, 2023
556c9fa
Fix indices of triangle strips to account for winding.
cdavis5e Sep 15, 2023
b6279e5
Write out triangle fans correctly.
cdavis5e Sep 16, 2023
579635a
Fix build.
cdavis5e Sep 16, 2023
b86f512
Correct instance term in index.
cdavis5e Sep 16, 2023
66ca6c4
Update counter after filling transform feedback buffers.
cdavis5e Sep 16, 2023
948651b
Add missing breaks.
cdavis5e Sep 16, 2023
e74800f
Add missing commas.
cdavis5e Sep 16, 2023
b352521
Add missing scale for triangle fan case.
cdavis5e Sep 16, 2023
2798c48
Hoist common index expression out.
cdavis5e Sep 16, 2023
b931900
Make sure the local copy of the output is declared.
cdavis5e Sep 16, 2023
ab2b37b
Don't add captured outputs to the regular output struct.
cdavis5e Sep 18, 2023
a1d92e7
Add offsets and padding to transform feedback structs.
cdavis5e Sep 18, 2023
2959f3a
Use mark_as_packable() to mark the buffer structs as needing repacking.
cdavis5e Sep 18, 2023
16dd1f1
Fix broken constant generated MSL.
cdavis5e Sep 18, 2023
742f725
Only create a per-patch output block for tessellation control shaders.
cdavis5e Sep 18, 2023
8f66f30
Make sure the local variable for an output block gets created.
cdavis5e Sep 18, 2023
2e14c91
Simplify the code to add members to the XFB buffer blocks.
cdavis5e Sep 18, 2023
8dbf250
Make sure captured outputs passed as implicit arguments have correct …
cdavis5e Sep 18, 2023
b020270
Only use qualified name for builtins in the entry point().
cdavis5e Sep 18, 2023
3521814
Remove extraneous right parentheses.
cdavis5e Sep 18, 2023
bf4f823
Clang-format the changes.
cdavis5e Sep 18, 2023
109959e
Add tests for transform feedback in MSL.
cdavis5e Sep 18, 2023
3bd855f
Merge branch 'main' into xfb
cdavis5e Sep 19, 2023
1154932
Merge remote-tracking branch 'origin' into xfb
cdavis5e Sep 19, 2023
a547b52
Merge remote-tracking branch 'steve/xfb' into xfb
cdavis5e Sep 19, 2023
64fa0b6
Attempt to fix MSVC build.
cdavis5e Sep 19, 2023
dada588
Try again to work around MSVC brokenness.
cdavis5e Sep 19, 2023
adb3a7b
Attempt to work around weird brokenness that only happens...
cdavis5e Sep 19, 2023
fec7607
Try again to get the stupid compiler on the builder to see that std::…
cdavis5e Sep 19, 2023
0393302
Testing hypothesis that C++11 doesn't support enum as a key for an un…
gpx1000 Sep 19, 2023
739a140
hypothesis was correct for unordered_map stands to reason unordered_s…
gpx1000 Sep 19, 2023
15a8b70
Merge remote-tracking branch 'origin' into xfb
cdavis5e Nov 17, 2023
575e75d
Merge remote-tracking branch 'origin' into xfb
cdavis5e Nov 29, 2023
8bcfd32
Unfinished support for XFB+tessellation.
cdavis5e Dec 17, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -681,6 +681,7 @@ struct CLIArguments
bool msl_readwrite_texture_fences = true;
bool msl_agx_manual_cube_grad_fixup = false;
const char *msl_combined_sampler_suffix = nullptr;
CompilerMSL::Options::PrimitiveType msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::Dynamic;
bool glsl_emit_push_constant_as_ubo = false;
bool glsl_emit_ubo_as_plain_uniforms = false;
bool glsl_force_flattened_io_blocks = false;
Expand Down Expand Up @@ -968,7 +969,11 @@ static void print_help_msl()
"\t\tAll released Apple Silicon GPUs to date ignore one of the three partial derivatives\n"
"\t\tbased on the selected major axis, and expect the remaining derivatives to be\n"
"\t\tpartially transformed. This fixup gives correct results on Apple Silicon.\n"
"\t[--msl-combined-sampler-suffix <suffix>]:\n\t\tUses a custom suffix for combined samplers.\n");
"\t[--msl-combined-sampler-suffix <suffix>]:\n\t\tUses a custom suffix for combined samplers.\n"
"\t[--msl-xfb-primitive-type <type>]:\n\t\tGenerates code in a vertex shader to capture primitives of the\n\t\t"
"specified type for transform feedback. <type> may be one of dynamic,\n\t\t"
"point-list, line-list, line-strip, triangle-list, triangle-strip, or\n\t\t"
"triangle-fan. The default is \"dynamic\".\n");
// clang-format on
}

Expand Down Expand Up @@ -1243,6 +1248,7 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
msl_opts.manual_helper_invocation_updates = args.msl_manual_helper_invocation_updates;
msl_opts.check_discarded_frag_stores = args.msl_check_discarded_frag_stores;
msl_opts.sample_dref_lod_array_as_grad = args.msl_sample_dref_lod_array_as_grad;
msl_opts.xfb_primitive_type = args.msl_xfb_primitive_type;
msl_opts.ios_support_base_vertex_instance = true;
msl_opts.runtime_array_rich_descriptor = args.msl_runtime_array_rich_descriptor;
msl_opts.replace_recursive_inputs = args.msl_replace_recursive_inputs;
Expand Down Expand Up @@ -1811,6 +1817,25 @@ static int main_inner(int argc, char *argv[])
[&args](CLIParser &) { args.msl_runtime_array_rich_descriptor = true; });
cbs.add("--msl-replace-recursive-inputs",
[&args](CLIParser &) { args.msl_replace_recursive_inputs = true; });
cbs.add("--msl-xfb-primitive-type",
[&args](CLIParser &parser)
{
const char *type = parser.next_value_string("dynamic");
if (strcmp(type, "dynamic") == 0)
args.msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::Dynamic;
else if (strcmp(type, "point-list") == 0)
args.msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::PointList;
else if (strcmp(type, "line-list") == 0)
args.msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::LineList;
else if (strcmp(type, "line-strip") == 0)
args.msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::LineStrip;
else if (strcmp(type, "triangle-list") == 0)
args.msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::TriangleList;
else if (strcmp(type, "triangle-strip") == 0)
args.msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::TriangleStrip;
else if (strcmp(type, "triangle-fan") == 0)
args.msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::TriangleFan;
});
cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); });
cbs.add("--rename-entry-point", [&args](CLIParser &parser) {
auto old_name = parser.next_string();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

struct VertOut
{
float4 vBar;
};

struct spvXfbBuffer1
{
char _m0_pad[4];
packed_float4 gl_Position;
};

struct spvXfbBuffer2
{
char _m0_pad[16];
float4 vFoo;
};

struct spvXfbBuffer3
{
float4 vBar;
};

kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device atomic_uint* spvXfbCounter1 [[buffer(17)]], device spvXfbBuffer1* spvXfb1 [[buffer(13)]], device atomic_uint* spvXfbCounter2 [[buffer(18)]], device spvXfbBuffer2* spvXfb2 [[buffer(14)]], device atomic_uint* spvXfbCounter3 [[buffer(19)]], device spvXfbBuffer3* spvXfb3 [[buffer(15)]])
{
spvXfbBuffer1 spvXfbOutput1 = {};
spvXfbBuffer2 spvXfbOutput2 = {};
spvXfbBuffer3 spvXfbOutput3 = {};
VertOut _20 = {};
if (any(gl_GlobalInvocationID >= spvStageInputSize))
return;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This breaks threadgroup_barrier.

spvXfbOutput1.gl_Position = float4(1.0);
spvXfbOutput2.vFoo = float4(3.0);
_20.vBar = float4(5.0);
spvXfbOutput3.vBar = _20.vBar;
uint spvXfbIndex = gl_GlobalInvocationID.y * (spvStageInputSize.x & ~1u) + gl_GlobalInvocationID.x;
uint spvInitOffset1 = atomic_load_explicit(spvXfbCounter1, memory_order_relaxed);
spvXfb1 = reinterpret_cast<device spvXfbBuffer1*>(reinterpret_cast<device char*>(spvXfb1) + spvInitOffset1);
if ((gl_GlobalInvocationID.x & 1) || gl_GlobalInvocationID.x < spvStageInputSize.x - 1u)
spvXfb1[spvXfbIndex] = spvXfbOutput1;
uint spvInitOffset2 = atomic_load_explicit(spvXfbCounter2, memory_order_relaxed);
spvXfb2 = reinterpret_cast<device spvXfbBuffer2*>(reinterpret_cast<device char*>(spvXfb2) + spvInitOffset2);
if ((gl_GlobalInvocationID.x & 1) || gl_GlobalInvocationID.x < spvStageInputSize.x - 1u)
spvXfb2[spvXfbIndex] = spvXfbOutput2;
uint spvInitOffset3 = atomic_load_explicit(spvXfbCounter3, memory_order_relaxed);
spvXfb3 = reinterpret_cast<device spvXfbBuffer3*>(reinterpret_cast<device char*>(spvXfb3) + spvInitOffset3);
if ((gl_GlobalInvocationID.x & 1) || gl_GlobalInvocationID.x < spvStageInputSize.x - 1u)
spvXfb3[spvXfbIndex] = spvXfbOutput3;
threadgroup_barrier(mem_flags::mem_device);
if (all(gl_GlobalInvocationID.xy == 0))
{
uint spvWritten = (spvStageInputSize.x & ~1u) * spvStageInputSize.y;
atomic_store_explicit(spvXfbCounter1, spvInitOffset1 + sizeof(*spvXfb1) * spvWritten, memory_order_relaxed);
atomic_store_explicit(spvXfbCounter2, spvInitOffset2 + sizeof(*spvXfb2) * spvWritten, memory_order_relaxed);
atomic_store_explicit(spvXfbCounter3, spvInitOffset3 + sizeof(*spvXfb3) * spvWritten, memory_order_relaxed);
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

struct VertOut
{
float4 vBar;
};

struct spvXfbBuffer1
{
char _m0_pad[4];
packed_float4 gl_Position;
};

struct spvXfbBuffer2
{
char _m0_pad[16];
float4 vFoo;
};

struct spvXfbBuffer3
{
float4 vBar;
};

kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device atomic_uint* spvXfbCounter1 [[buffer(17)]], device spvXfbBuffer1* spvXfb1 [[buffer(13)]], device atomic_uint* spvXfbCounter2 [[buffer(18)]], device spvXfbBuffer2* spvXfb2 [[buffer(14)]], device atomic_uint* spvXfbCounter3 [[buffer(19)]], device spvXfbBuffer3* spvXfb3 [[buffer(15)]])
{
spvXfbBuffer1 spvXfbOutput1 = {};
spvXfbBuffer2 spvXfbOutput2 = {};
spvXfbBuffer3 spvXfbOutput3 = {};
VertOut _25 = {};
if (any(gl_GlobalInvocationID >= spvStageInputSize))
return;
spvXfbOutput1.gl_Position = float4(1.0);
spvXfbOutput2.vFoo = float4(3.0);
_25.vBar = float4(5.0);
spvXfbOutput3.vBar = _25.vBar;
uint spvXfbIndex = 2 * gl_GlobalInvocationID.y * (spvStageInputSize.x - 1u) + 2 * gl_GlobalInvocationID.x;
uint spvInitOffset1 = atomic_load_explicit(spvXfbCounter1, memory_order_relaxed);
spvXfb1 = reinterpret_cast<device spvXfbBuffer1*>(reinterpret_cast<device char*>(spvXfb1) + spvInitOffset1);
if (gl_GlobalInvocationID.x != spvStageInputSize.x - 1u)
spvXfb1[spvXfbIndex] = spvXfbOutput1;
if (gl_GlobalInvocationID.x != 0)
spvXfb1[spvXfbIndex - 1u] = spvXfbOutput1;
uint spvInitOffset2 = atomic_load_explicit(spvXfbCounter2, memory_order_relaxed);
spvXfb2 = reinterpret_cast<device spvXfbBuffer2*>(reinterpret_cast<device char*>(spvXfb2) + spvInitOffset2);
if (gl_GlobalInvocationID.x != spvStageInputSize.x - 1u)
spvXfb2[spvXfbIndex] = spvXfbOutput2;
if (gl_GlobalInvocationID.x != 0)
spvXfb2[spvXfbIndex - 1u] = spvXfbOutput2;
uint spvInitOffset3 = atomic_load_explicit(spvXfbCounter3, memory_order_relaxed);
spvXfb3 = reinterpret_cast<device spvXfbBuffer3*>(reinterpret_cast<device char*>(spvXfb3) + spvInitOffset3);
if (gl_GlobalInvocationID.x != spvStageInputSize.x - 1u)
spvXfb3[spvXfbIndex] = spvXfbOutput3;
if (gl_GlobalInvocationID.x != 0)
spvXfb3[spvXfbIndex - 1u] = spvXfbOutput3;
threadgroup_barrier(mem_flags::mem_device);
if (all(gl_GlobalInvocationID.xy == 0))
{
uint spvWritten = 2 * (spvStageInputSize.x - 1u) * spvStageInputSize.y;
atomic_store_explicit(spvXfbCounter1, spvInitOffset1 + sizeof(*spvXfb1) * spvWritten, memory_order_relaxed);
atomic_store_explicit(spvXfbCounter2, spvInitOffset2 + sizeof(*spvXfb2) * spvWritten, memory_order_relaxed);
atomic_store_explicit(spvXfbCounter3, spvInitOffset3 + sizeof(*spvXfb3) * spvWritten, memory_order_relaxed);
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

struct VertOut
{
float4 vBar;
};

struct spvXfbBuffer1
{
char _m0_pad[4];
packed_float4 gl_Position;
};

struct spvXfbBuffer2
{
char _m0_pad[16];
float4 vFoo;
};

struct spvXfbBuffer3
{
float4 vBar;
};

kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device atomic_uint* spvXfbCounter1 [[buffer(17)]], device spvXfbBuffer1* spvXfb1 [[buffer(13)]], device atomic_uint* spvXfbCounter2 [[buffer(18)]], device spvXfbBuffer2* spvXfb2 [[buffer(14)]], device atomic_uint* spvXfbCounter3 [[buffer(19)]], device spvXfbBuffer3* spvXfb3 [[buffer(15)]])
{
spvXfbBuffer1 spvXfbOutput1 = {};
spvXfbBuffer2 spvXfbOutput2 = {};
spvXfbBuffer3 spvXfbOutput3 = {};
VertOut _25 = {};
if (any(gl_GlobalInvocationID >= spvStageInputSize))
return;
spvXfbOutput1.gl_Position = float4(1.0);
spvXfbOutput2.vFoo = float4(3.0);
_25.vBar = float4(5.0);
spvXfbOutput3.vBar = _25.vBar;
uint spvXfbIndex = gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x;
uint spvInitOffset1 = atomic_load_explicit(spvXfbCounter1, memory_order_relaxed);
spvXfb1 = reinterpret_cast<device spvXfbBuffer1*>(reinterpret_cast<device char*>(spvXfb1) + spvInitOffset1);
spvXfb1[spvXfbIndex] = spvXfbOutput1;
uint spvInitOffset2 = atomic_load_explicit(spvXfbCounter2, memory_order_relaxed);
spvXfb2 = reinterpret_cast<device spvXfbBuffer2*>(reinterpret_cast<device char*>(spvXfb2) + spvInitOffset2);
spvXfb2[spvXfbIndex] = spvXfbOutput2;
uint spvInitOffset3 = atomic_load_explicit(spvXfbCounter3, memory_order_relaxed);
spvXfb3 = reinterpret_cast<device spvXfbBuffer3*>(reinterpret_cast<device char*>(spvXfb3) + spvInitOffset3);
spvXfb3[spvXfbIndex] = spvXfbOutput3;
threadgroup_barrier(mem_flags::mem_device);
if (all(gl_GlobalInvocationID.xy == 0))
{
uint spvWritten = spvStageInputSize.x * spvStageInputSize.y;
atomic_store_explicit(spvXfbCounter1, spvInitOffset1 + sizeof(*spvXfb1) * spvWritten, memory_order_relaxed);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How is XFB ordering maintained here? XFB data must be emitted in-order with input primitives.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The actual XFB buffers are indexed by the global invocation ID.

atomic_store_explicit(spvXfbCounter2, spvInitOffset2 + sizeof(*spvXfb2) * spvWritten, memory_order_relaxed);
atomic_store_explicit(spvXfbCounter3, spvInitOffset3 + sizeof(*spvXfb3) * spvWritten, memory_order_relaxed);
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

struct VertOut
{
float4 vBar;
};

struct spvXfbBuffer1
{
char _m0_pad[4];
packed_float4 gl_Position;
};

struct spvXfbBuffer2
{
char _m0_pad[16];
float4 vFoo;
};

struct spvXfbBuffer3
{
float4 vBar;
};

kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device atomic_uint* spvXfbCounter1 [[buffer(17)]], device spvXfbBuffer1* spvXfb1 [[buffer(13)]], device atomic_uint* spvXfbCounter2 [[buffer(18)]], device spvXfbBuffer2* spvXfb2 [[buffer(14)]], device atomic_uint* spvXfbCounter3 [[buffer(19)]], device spvXfbBuffer3* spvXfb3 [[buffer(15)]])
{
spvXfbBuffer1 spvXfbOutput1 = {};
spvXfbBuffer2 spvXfbOutput2 = {};
spvXfbBuffer3 spvXfbOutput3 = {};
VertOut _25 = {};
if (any(gl_GlobalInvocationID >= spvStageInputSize))
return;
spvXfbOutput1.gl_Position = float4(1.0);
spvXfbOutput2.vFoo = float4(3.0);
_25.vBar = float4(5.0);
spvXfbOutput3.vBar = _25.vBar;
uint spvXfbBaseIndex = 3 * gl_GlobalInvocationID.y * subsat(spvStageInputSize.x, 2u);
uint spvXfbIndex = spvXfbBaseIndex + 3 * gl_GlobalInvocationID.x - 2u;
uint spvInitOffset1 = atomic_load_explicit(spvXfbCounter1, memory_order_relaxed);
spvXfb1 = reinterpret_cast<device spvXfbBuffer1*>(reinterpret_cast<device char*>(spvXfb1) + spvInitOffset1);
if (gl_GlobalInvocationID.x == 0)
{
for (uint i = 0; i < subsat(spvStageInputSize.x, 2u); ++i)
spvXfb1[spvXfbBaseIndex + 3 * i] = spvXfbOutput1;
}
else
{
if (gl_GlobalInvocationID.x != spvStageInputSize.x - 1u)
spvXfb1[spvXfbIndex] = spvXfbOutput1;
if (gl_GlobalInvocationID.x != 1)
spvXfb1[spvXfbIndex - 2u] = spvXfbOutput1;
}
uint spvInitOffset2 = atomic_load_explicit(spvXfbCounter2, memory_order_relaxed);
spvXfb2 = reinterpret_cast<device spvXfbBuffer2*>(reinterpret_cast<device char*>(spvXfb2) + spvInitOffset2);
if (gl_GlobalInvocationID.x == 0)
{
for (uint i = 0; i < subsat(spvStageInputSize.x, 2u); ++i)
spvXfb2[spvXfbBaseIndex + 3 * i] = spvXfbOutput2;
}
else
{
if (gl_GlobalInvocationID.x != spvStageInputSize.x - 1u)
spvXfb2[spvXfbIndex] = spvXfbOutput2;
if (gl_GlobalInvocationID.x != 1)
spvXfb2[spvXfbIndex - 2u] = spvXfbOutput2;
}
uint spvInitOffset3 = atomic_load_explicit(spvXfbCounter3, memory_order_relaxed);
spvXfb3 = reinterpret_cast<device spvXfbBuffer3*>(reinterpret_cast<device char*>(spvXfb3) + spvInitOffset3);
if (gl_GlobalInvocationID.x == 0)
{
for (uint i = 0; i < subsat(spvStageInputSize.x, 2u); ++i)
spvXfb3[spvXfbBaseIndex + 3 * i] = spvXfbOutput3;
}
else
{
if (gl_GlobalInvocationID.x != spvStageInputSize.x - 1u)
spvXfb3[spvXfbIndex] = spvXfbOutput3;
if (gl_GlobalInvocationID.x != 1)
spvXfb3[spvXfbIndex - 2u] = spvXfbOutput3;
}
threadgroup_barrier(mem_flags::mem_device);
if (all(gl_GlobalInvocationID.xy == 0))
{
uint spvWritten = 3 * subsat(spvStageInputSize.x, 2u) * spvStageInputSize.y;
atomic_store_explicit(spvXfbCounter1, spvInitOffset1 + sizeof(*spvXfb1) * spvWritten, memory_order_relaxed);
atomic_store_explicit(spvXfbCounter2, spvInitOffset2 + sizeof(*spvXfb2) * spvWritten, memory_order_relaxed);
atomic_store_explicit(spvXfbCounter3, spvInitOffset3 + sizeof(*spvXfb3) * spvWritten, memory_order_relaxed);
}
}

Loading
Loading