From 10154f5c5e02b0c789cbb06b0476caaac7caa813 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Sat, 13 Aug 2022 16:55:34 -0700 Subject: [PATCH 01/48] MSL: Add a mechanism to fix up shader outputs. This is analogous to the existing support for fixing up shader inputs. It is intended to be used with tessellation to add implicit builtins that are read from a later stage, despite not being written in an earlier stage. (Believe it or not, this is in fact legal in Vulkan.) Helps fix 8 CTS tests under `dEQP-VK.pipeline.*.no_position`. (Eight other tests work solely by accident without this change.) --- main.cpp | 39 ++++++++-- spirv_cross_c.cpp | 52 +++++++++++-- spirv_cross_c.h | 39 ++++++---- spirv_msl.cpp | 195 ++++++++++++++++++++++++++++++++++++++++++++-- spirv_msl.hpp | 67 +++++++++++----- 5 files changed, 339 insertions(+), 53 deletions(-) diff --git a/main.cpp b/main.cpp index 81db89ce0..5db9134da 100644 --- a/main.cpp +++ b/main.cpp @@ -681,7 +681,8 @@ struct CLIArguments SmallVector msl_device_argument_buffers; SmallVector> msl_dynamic_buffers; SmallVector> msl_inline_uniform_blocks; - SmallVector msl_shader_inputs; + SmallVector msl_shader_inputs; + SmallVector msl_shader_outputs; SmallVector pls_in; SmallVector pls_out; SmallVector remaps; @@ -874,6 +875,10 @@ static void print_help_msl() "\t\t can be 'any32', 'any16', 'u16', 'u8', or 'other', to indicate a 32-bit opaque value, 16-bit opaque value, 16-bit unsigned integer, 8-bit unsigned integer, " "or other-typed variable. is the vector length of the variable, which must be greater than or equal to that declared in the shader.\n" "\t\tUseful if shader stage interfaces don't match up, as pipeline creation might otherwise fail.\n" + "\t[--msl-shader-output ]:\n\t\tSpecify the format of the shader output at .\n" + "\t\t can be 'any32', 'any16', 'u16', 'u8', or 'other', to indicate a 32-bit opaque value, 16-bit opaque value, 16-bit unsigned integer, 8-bit unsigned integer, " + "or other-typed variable. is the vector length of the variable, which must be greater than or equal to that declared in the shader.\n" + "\t\tUseful if shader stage interfaces don't match up, as pipeline creation might otherwise fail.\n" "\t[--msl-multi-patch-workgroup]:\n\t\tUse the new style of tessellation control processing, where multiple patches are processed per workgroup.\n" "\t\tThis should increase throughput by ensuring all the GPU's SIMD lanes are occupied, but it is not compatible with the old style.\n" "\t\tIn addition, this style also passes input variables in buffers directly instead of using vertex attribute processing.\n" @@ -1178,6 +1183,8 @@ static string compile_iteration(const CLIArguments &args, std::vector msl_comp->add_inline_uniform_block(v.first, v.second); for (auto &v : args.msl_shader_inputs) msl_comp->add_msl_shader_input(v); + for (auto &v : args.msl_shader_outputs) + msl_comp->add_msl_shader_output(v); if (args.msl_combined_sampler_suffix) msl_comp->set_combined_sampler_suffix(args.msl_combined_sampler_suffix); } @@ -1581,23 +1588,41 @@ static int main_inner(int argc, char *argv[]) cbs.add("--msl-no-clip-distance-user-varying", [&args](CLIParser &) { args.msl_enable_clip_distance_user_varying = false; }); cbs.add("--msl-shader-input", [&args](CLIParser &parser) { - MSLShaderInput input; + MSLShaderInterfaceVariable input; // Make sure next_uint() is called in-order. input.location = parser.next_uint(); const char *format = parser.next_value_string("other"); if (strcmp(format, "any32") == 0) - input.format = MSL_SHADER_INPUT_FORMAT_ANY32; + input.format = MSL_SHADER_VARIABLE_FORMAT_ANY32; else if (strcmp(format, "any16") == 0) - input.format = MSL_SHADER_INPUT_FORMAT_ANY16; + input.format = MSL_SHADER_VARIABLE_FORMAT_ANY16; else if (strcmp(format, "u16") == 0) - input.format = MSL_SHADER_INPUT_FORMAT_UINT16; + input.format = MSL_SHADER_VARIABLE_FORMAT_UINT16; else if (strcmp(format, "u8") == 0) - input.format = MSL_SHADER_INPUT_FORMAT_UINT8; + input.format = MSL_SHADER_VARIABLE_FORMAT_UINT8; else - input.format = MSL_SHADER_INPUT_FORMAT_OTHER; + input.format = MSL_SHADER_VARIABLE_FORMAT_OTHER; input.vecsize = parser.next_uint(); args.msl_shader_inputs.push_back(input); }); + cbs.add("--msl-shader-output", [&args](CLIParser &parser) { + MSLShaderInterfaceVariable output; + // Make sure next_uint() is called in-order. + output.location = parser.next_uint(); + const char *format = parser.next_value_string("other"); + if (strcmp(format, "any32") == 0) + output.format = MSL_SHADER_VARIABLE_FORMAT_ANY32; + else if (strcmp(format, "any16") == 0) + output.format = MSL_SHADER_VARIABLE_FORMAT_ANY16; + else if (strcmp(format, "u16") == 0) + output.format = MSL_SHADER_VARIABLE_FORMAT_UINT16; + else if (strcmp(format, "u8") == 0) + output.format = MSL_SHADER_VARIABLE_FORMAT_UINT8; + else + output.format = MSL_SHADER_VARIABLE_FORMAT_OTHER; + output.vecsize = parser.next_uint(); + args.msl_shader_outputs.push_back(input); + }); cbs.add("--msl-multi-patch-workgroup", [&args](CLIParser &) { args.msl_multi_patch_workgroup = true; }); cbs.add("--msl-vertex-for-tessellation", [&args](CLIParser &) { args.msl_vertex_for_tessellation = true; }); cbs.add("--msl-additional-fixed-sample-mask", diff --git a/spirv_cross_c.cpp b/spirv_cross_c.cpp index 4a62b635c..60e9125e3 100644 --- a/spirv_cross_c.cpp +++ b/spirv_cross_c.cpp @@ -1136,9 +1136,9 @@ spvc_result spvc_compiler_msl_add_vertex_attribute(spvc_compiler compiler, const } auto &msl = *static_cast(compiler->compiler.get()); - MSLShaderInput attr; + MSLShaderInterfaceVariable attr; attr.location = va->location; - attr.format = static_cast(va->format); + attr.format = static_cast(va->format); attr.builtin = static_cast(va->builtin); msl.add_msl_shader_input(attr); return SPVC_SUCCESS; @@ -1149,7 +1149,7 @@ spvc_result spvc_compiler_msl_add_vertex_attribute(spvc_compiler compiler, const #endif } -spvc_result spvc_compiler_msl_add_shader_input(spvc_compiler compiler, const spvc_msl_shader_input *si) +spvc_result spvc_compiler_msl_add_shader_input(spvc_compiler compiler, const spvc_msl_shader_interface_var *si) { #if SPIRV_CROSS_C_API_MSL if (compiler->backend != SPVC_BACKEND_MSL) @@ -1159,9 +1159,9 @@ spvc_result spvc_compiler_msl_add_shader_input(spvc_compiler compiler, const spv } auto &msl = *static_cast(compiler->compiler.get()); - MSLShaderInput input; + MSLShaderInterfaceVariable input; input.location = si->location; - input.format = static_cast(si->format); + input.format = static_cast(si->format); input.builtin = static_cast(si->builtin); input.vecsize = si->vecsize; msl.add_msl_shader_input(input); @@ -1173,6 +1173,30 @@ spvc_result spvc_compiler_msl_add_shader_input(spvc_compiler compiler, const spv #endif } +spvc_result spvc_compiler_msl_add_shader_output(spvc_compiler compiler, const spvc_msl_shader_interface_var *so) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast(compiler->compiler.get()); + MSLShaderInterfaceVariable output; + output.location = so->location; + output.format = static_cast(so->format); + output.builtin = static_cast(so->builtin); + output.vecsize = so->vecsize; + msl.add_msl_shader_output(output); + return SPVC_SUCCESS; +#else + (void)so; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + spvc_result spvc_compiler_msl_add_resource_binding(spvc_compiler compiler, const spvc_msl_resource_binding *binding) { @@ -1298,6 +1322,24 @@ spvc_bool spvc_compiler_msl_is_shader_input_used(spvc_compiler compiler, unsigne #endif } +spvc_bool spvc_compiler_msl_is_shader_output_used(spvc_compiler compiler, unsigned location) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; + } + + auto &msl = *static_cast(compiler->compiler.get()); + return msl.is_msl_shader_output_used(location) ? SPVC_TRUE : SPVC_FALSE; +#else + (void)location; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; +#endif +} + spvc_bool spvc_compiler_msl_is_vertex_attribute_used(spvc_compiler compiler, unsigned location) { return spvc_compiler_msl_is_shader_input_used(compiler, location); diff --git a/spirv_cross_c.h b/spirv_cross_c.h index a35a5d651..d77815109 100644 --- a/spirv_cross_c.h +++ b/spirv_cross_c.h @@ -290,21 +290,27 @@ typedef enum spvc_msl_index_type } spvc_msl_index_type; /* Maps to C++ API. */ -typedef enum spvc_msl_shader_input_format +typedef enum spvc_msl_shader_variable_format { - SPVC_MSL_SHADER_INPUT_FORMAT_OTHER = 0, - SPVC_MSL_SHADER_INPUT_FORMAT_UINT8 = 1, - SPVC_MSL_SHADER_INPUT_FORMAT_UINT16 = 2, - SPVC_MSL_SHADER_INPUT_FORMAT_ANY16 = 3, - SPVC_MSL_SHADER_INPUT_FORMAT_ANY32 = 4, + SPVC_MSL_SHADER_VARIABLE_FORMAT_OTHER = 0, + SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT8 = 1, + SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT16 = 2, + SPVC_MSL_SHADER_VARIABLE_FORMAT_ANY16 = 3, + SPVC_MSL_SHADER_VARIABLE_FORMAT_ANY32 = 4, /* Deprecated names. */ - SPVC_MSL_VERTEX_FORMAT_OTHER = SPVC_MSL_SHADER_INPUT_FORMAT_OTHER, - SPVC_MSL_VERTEX_FORMAT_UINT8 = SPVC_MSL_SHADER_INPUT_FORMAT_UINT8, - SPVC_MSL_VERTEX_FORMAT_UINT16 = SPVC_MSL_SHADER_INPUT_FORMAT_UINT16, + SPVC_MSL_VERTEX_FORMAT_OTHER = SPVC_MSL_SHADER_VARIABLE_FORMAT_OTHER, + SPVC_MSL_VERTEX_FORMAT_UINT8 = SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT8, + SPVC_MSL_VERTEX_FORMAT_UINT16 = SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT16, + SPVC_MSL_SHADER_INPUT_FORMAT_OTHER = SPVC_MSL_SHADER_VARIABLE_FORMAT_OTHER, + SPVC_MSL_SHADER_INPUT_FORMAT_UINT8 = SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT8, + SPVC_MSL_SHADER_INPUT_FORMAT_UINT16 = SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT16, + SPVC_MSL_SHADER_INPUT_FORMAT_ANY16 = SPVC_MSL_SHADER_VARIABLE_FORMAT_ANY16, + SPVC_MSL_SHADER_INPUT_FORMAT_ANY32 = SPVC_MSL_SHADER_VARIABLE_FORMAT_ANY32, + SPVC_MSL_SHADER_INPUT_FORMAT_INT_MAX = 0x7fffffff -} spvc_msl_shader_input_format, spvc_msl_vertex_format; +} spvc_msl_shader_variable_format, spvc_msl_shader_input_format, spvc_msl_vertex_format; /* Maps to C++ API. Deprecated; use spvc_msl_shader_input. */ typedef struct spvc_msl_vertex_attribute @@ -330,17 +336,21 @@ typedef struct spvc_msl_vertex_attribute SPVC_PUBLIC_API void spvc_msl_vertex_attribute_init(spvc_msl_vertex_attribute *attr); /* Maps to C++ API. */ -typedef struct spvc_msl_shader_input +typedef struct spvc_msl_shader_interface_var { unsigned location; spvc_msl_vertex_format format; SpvBuiltIn builtin; unsigned vecsize; -} spvc_msl_shader_input; +} spvc_msl_shader_interface_var, spvc_msl_shader_input; /* * Initializes the shader input struct. */ +SPVC_PUBLIC_API void spvc_msl_shader_interface_var_init(spvc_msl_shader_interface_var *var); +/* + * Deprecated. Use spvc_msl_shader_interface_var_init(). + */ SPVC_PUBLIC_API void spvc_msl_shader_input_init(spvc_msl_shader_input *input); /* Maps to C++ API. */ @@ -786,13 +796,16 @@ SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_vertex_attribute(spvc_compiler SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_resource_binding(spvc_compiler compiler, const spvc_msl_resource_binding *binding); SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_shader_input(spvc_compiler compiler, - const spvc_msl_shader_input *input); + const spvc_msl_shader_interface_var *input); +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_shader_output(spvc_compiler compiler, + const spvc_msl_shader_interface_var *output); SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_discrete_descriptor_set(spvc_compiler compiler, unsigned desc_set); SPVC_PUBLIC_API spvc_result spvc_compiler_msl_set_argument_buffer_device_address_space(spvc_compiler compiler, unsigned desc_set, spvc_bool device_address); /* Obsolete, use is_shader_input_used. */ SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_vertex_attribute_used(spvc_compiler compiler, unsigned location); SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_shader_input_used(spvc_compiler compiler, unsigned location); +SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_shader_output_used(spvc_compiler compiler, unsigned location); SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_resource_used(spvc_compiler compiler, SpvExecutionModel model, diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 583a92323..b1605e463 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -56,13 +56,20 @@ CompilerMSL::CompilerMSL(ParsedIR &&ir_) { } -void CompilerMSL::add_msl_shader_input(const MSLShaderInput &si) +void CompilerMSL::add_msl_shader_input(const MSLShaderInterfaceVariable &si) { inputs_by_location[{si.location, si.component}] = si; if (si.builtin != BuiltInMax && !inputs_by_builtin.count(si.builtin)) inputs_by_builtin[si.builtin] = si; } +void CompilerMSL::add_msl_shader_output(const MSLShaderInterfaceVariable &so) +{ + outputs_by_location[{so.location, so.component}] = so; + if (so.builtin != BuiltInMax && !outputs_by_builtin.count(so.builtin)) + outputs_by_builtin[so.builtin] = so; +} + void CompilerMSL::add_msl_resource_binding(const MSLResourceBinding &binding) { StageSetBinding tuple = { binding.stage, binding.desc_set, binding.binding }; @@ -150,6 +157,13 @@ bool CompilerMSL::is_msl_shader_input_used(uint32_t location) location_inputs_in_use_fallback.count(location) == 0; } +bool CompilerMSL::is_msl_shader_output_used(uint32_t location) +{ + // Don't report internal location allocations to app. + return location_outputs_in_use.count(location) != 0 && + location_outputs_in_use_fallback.count(location) == 0; +} + uint32_t CompilerMSL::get_automatic_builtin_input_location(spv::BuiltIn builtin) const { auto itr = builtin_to_automatic_input_location.find(builtin); @@ -159,6 +173,15 @@ uint32_t CompilerMSL::get_automatic_builtin_input_location(spv::BuiltIn builtin) return itr->second; } +uint32_t CompilerMSL::get_automatic_builtin_output_location(spv::BuiltIn builtin) const +{ + auto itr = builtin_to_automatic_output_location.find(builtin); + if (itr == builtin_to_automatic_output_location.end()) + return k_unknown_location; + else + return itr->second; +} + bool CompilerMSL::is_msl_resource_binding_used(ExecutionModel model, uint32_t desc_set, uint32_t binding) const { StageSetBinding tuple = { model, desc_set, binding }; @@ -2051,15 +2074,27 @@ void CompilerMSL::mark_as_workgroup_struct(SPIRType &type) void CompilerMSL::mark_location_as_used_by_shader(uint32_t location, const SPIRType &type, StorageClass storage, bool fallback) { - if (storage != StorageClassInput) - return; - uint32_t count = type_to_location_count(type); - for (uint32_t i = 0; i < count; i++) + switch (storage) { - location_inputs_in_use.insert(location + i); - if (fallback) - location_inputs_in_use_fallback.insert(location + i); + case StorageClassInput: + for (uint32_t i = 0; i < count; i++) + { + location_inputs_in_use.insert(location + i); + if (fallback) + location_inputs_in_use_fallback.insert(location + i); + } + break; + case StorageClassOutput: + for (uint32_t i = 0; i < count; i++) + { + location_outputs_in_use.insert(location + i); + if (fallback) + location_outputs_in_use_fallback.insert(location + i); + } + break; + default: + return; } } @@ -2360,6 +2395,12 @@ void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, co set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); mark_location_as_used_by_shader(locn, type, storage); } + else if (is_builtin && capture_output_to_buffer && storage == StorageClassOutput && outputs_by_builtin.count(builtin)) + { + uint32_t locn = outputs_by_builtin[builtin].location; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, type, storage); + } if (get_decoration_bitset(var.self).get(DecorationComponent)) { @@ -2525,6 +2566,12 @@ void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); mark_location_as_used_by_shader(locn, *usable_type, storage); } + else if (is_builtin && capture_output_to_buffer && storage == StorageClassOutput && outputs_by_builtin.count(builtin)) + { + uint32_t locn = outputs_by_builtin[builtin].location + i; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, *usable_type, storage); + } else if (is_builtin && (builtin == BuiltInClipDistance || builtin == BuiltInCullDistance)) { // Declare the Clip/CullDistance as [[user(clip/cullN)]]. @@ -2739,6 +2786,13 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass mark_location_as_used_by_shader(location, *usable_type, storage); location++; } + else if (is_builtin && capture_output_to_buffer && storage == StorageClassOutput && outputs_by_builtin.count(builtin)) + { + location = outputs_by_builtin[builtin].location + i; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location); + mark_location_as_used_by_shader(location, *usable_type, storage); + location++; + } else if (is_builtin && (builtin == BuiltInClipDistance || builtin == BuiltInCullDistance)) { // Declare the Clip/CullDistance as [[user(clip/cullN)]]. @@ -2933,6 +2987,13 @@ void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass stor mark_location_as_used_by_shader(location, get(mbr_type_id), storage); location++; } + else if (is_builtin && capture_output_to_buffer && storage == StorageClassOutput && outputs_by_builtin.count(builtin)) + { + location = outputs_by_builtin[builtin].location; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location); + mark_location_as_used_by_shader(location, get(mbr_type_id), storage); + location++; + } // Copy the component location, if present. if (has_member_decoration(var_type.self, mbr_idx, DecorationComponent)) @@ -3862,6 +3923,67 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) } } + if (capture_output_to_buffer && storage == StorageClassOutput) + { + // For captured output, add all inputs from the next stage to ensure + // the struct containing them is the correct size and layout. This is + // necessary for certain implicit builtins that may nonetheless be read, + // even when they aren't written. + for (auto &output : outputs_by_location) + { + if (location_outputs_in_use.count(output.first.location) != 0) + continue; + + // Create a fake variable to put at the location. + uint32_t offset = ir.increase_bound_by(4); + uint32_t type_id = offset; + uint32_t array_type_id = offset + 1; + uint32_t ptr_type_id = offset + 2; + uint32_t var_id = offset + 3; + + SPIRType type; + switch (output.second.format) + { + case MSL_SHADER_INPUT_FORMAT_UINT16: + case MSL_SHADER_INPUT_FORMAT_ANY16: + type.basetype = SPIRType::UShort; + type.width = 16; + break; + case MSL_SHADER_INPUT_FORMAT_ANY32: + default: + type.basetype = SPIRType::UInt; + type.width = 32; + break; + } + type.vecsize = output.second.vecsize; + set(type_id, type); + + if (get_execution_model() == ExecutionModelTessellationControl) + { + type.array.push_back(0); + type.array_size_literal.push_back(true); + type.parent_type = type_id; + set(array_type_id, type); + } + + type.pointer = true; + type.pointer_depth++; + type.parent_type = get_execution_model() == ExecutionModelTessellationControl ? array_type_id : type_id; + type.storage = storage; + auto &ptr_type = set(ptr_type_id, type); + ptr_type.self = type.parent_type; + + auto &fake_var = set(var_id, ptr_type_id, storage); + set_decoration(var_id, DecorationLocation, output.first.location); + if (output.first.component) + set_decoration(var_id, DecorationComponent, output.first.component); + + meta.strip_array = true; + meta.allow_local_declaration = false; + add_variable_to_interface_block(storage, ib_var_ref, ib_type, fake_var, meta); + } + } + // When multiple variables need to access same location, // unroll locations one by one and we will flatten output or input as necessary. for (auto &loc : meta.location_meta) @@ -11241,6 +11363,16 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in return join(" [[", loc_qual, "]]"); } + if (execution.model == ExecutionModelVertex && msl_options.vertex_for_tessellation && type.storage == StorageClassOutput) + { + // For this type of shader, we always arrange for it to capture its + // output to a buffer. For this reason, qualifiers are irrelevant here. + if (is_builtin) + // We still have to assign a location so the output struct will sort correctly. + get_or_allocate_builtin_output_member_location(builtin, type.self, index); + return ""; + } + // Tessellation control function inputs if (execution.model == ExecutionModelTessellationControl && type.storage == StorageClassInput) { @@ -11283,6 +11415,9 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in { // For this type of shader, we always arrange for it to capture its // output to a buffer. For this reason, qualifiers are irrelevant here. + if (is_builtin) + // We still have to assign a location so the output struct will sort correctly. + get_or_allocate_builtin_output_member_location(builtin, type.self, index); return ""; } @@ -11573,6 +11708,50 @@ uint32_t CompilerMSL::get_or_allocate_builtin_input_member_location(spv::BuiltIn return loc; } +uint32_t CompilerMSL::get_or_allocate_builtin_output_member_location(spv::BuiltIn builtin, + uint32_t type_id, uint32_t index, + uint32_t *comp) +{ + uint32_t loc = get_member_location(type_id, index, comp); + if (loc != k_unknown_location) + return loc; + loc = 0; + + if (comp) + *comp = k_unknown_component; + + // Late allocation. Find a location which is unused by the application. + // This can happen for built-in outputs in tessellation which are mixed and matched with user inputs. + auto &mbr_type = get(get(type_id).member_types[index]); + uint32_t count = type_to_location_count(mbr_type); + + const auto location_range_in_use = [this](uint32_t location, uint32_t location_count) -> bool { + for (uint32_t i = 0; i < location_count; i++) + if (location_outputs_in_use.count(location + i) != 0) + return true; + return false; + }; + + while (location_range_in_use(loc, count)) + loc++; + + set_member_decoration(type_id, index, DecorationLocation, loc); + + // Triangle tess level inputs are shared in one packed float4; + // mark both builtins as sharing one location. + if (get_execution_mode_bitset().get(ExecutionModeTriangles) && + (builtin == BuiltInTessLevelInner || builtin == BuiltInTessLevelOuter)) + { + builtin_to_automatic_output_location[BuiltInTessLevelInner] = loc; + builtin_to_automatic_output_location[BuiltInTessLevelOuter] = loc; + } + else + builtin_to_automatic_output_location[builtin] = loc; + + mark_location_as_used_by_shader(loc, mbr_type, StorageClassOutput, true); + return loc; +} + // Returns the type declaration for a function, including the // entry type if the current function is the entry point function string CompilerMSL::func_type_decl(SPIRType &type) diff --git a/spirv_msl.hpp b/spirv_msl.hpp index c15159cf4..4b9d88da1 100644 --- a/spirv_msl.hpp +++ b/spirv_msl.hpp @@ -34,34 +34,39 @@ namespace SPIRV_CROSS_NAMESPACE { -// Indicates the format of a shader input. Currently limited to specifying +// Indicates the format of a shader interface variable. Currently limited to specifying // if the input is an 8-bit unsigned integer, 16-bit unsigned integer, or // some other format. -enum MSLShaderInputFormat +enum MSLShaderVariableFormat { - MSL_SHADER_INPUT_FORMAT_OTHER = 0, - MSL_SHADER_INPUT_FORMAT_UINT8 = 1, - MSL_SHADER_INPUT_FORMAT_UINT16 = 2, - MSL_SHADER_INPUT_FORMAT_ANY16 = 3, - MSL_SHADER_INPUT_FORMAT_ANY32 = 4, + MSL_SHADER_VARIABLE_FORMAT_OTHER = 0, + MSL_SHADER_VARIABLE_FORMAT_UINT8 = 1, + MSL_SHADER_VARIABLE_FORMAT_UINT16 = 2, + MSL_SHADER_VARIABLE_FORMAT_ANY16 = 3, + MSL_SHADER_VARIABLE_FORMAT_ANY32 = 4, // Deprecated aliases. - MSL_VERTEX_FORMAT_OTHER = MSL_SHADER_INPUT_FORMAT_OTHER, - MSL_VERTEX_FORMAT_UINT8 = MSL_SHADER_INPUT_FORMAT_UINT8, - MSL_VERTEX_FORMAT_UINT16 = MSL_SHADER_INPUT_FORMAT_UINT16, - - MSL_SHADER_INPUT_FORMAT_INT_MAX = 0x7fffffff + MSL_VERTEX_FORMAT_OTHER = MSL_SHADER_VARIABLE_FORMAT_OTHER, + MSL_VERTEX_FORMAT_UINT8 = MSL_SHADER_VARIABLE_FORMAT_UINT8, + MSL_VERTEX_FORMAT_UINT16 = MSL_SHADER_VARIABLE_FORMAT_UINT16, + MSL_SHADER_INPUT_FORMAT_OTHER = MSL_SHADER_VARIABLE_FORMAT_OTHER, + MSL_SHADER_INPUT_FORMAT_UINT8 = MSL_SHADER_VARIABLE_FORMAT_UINT8, + MSL_SHADER_INPUT_FORMAT_UINT16 = MSL_SHADER_VARIABLE_FORMAT_UINT16, + MSL_SHADER_INPUT_FORMAT_ANY16 = MSL_SHADER_VARIABLE_FORMAT_ANY16, + MSL_SHADER_INPUT_FORMAT_ANY32 = MSL_SHADER_VARIABLE_FORMAT_ANY32, + + MSL_SHADER_VARIABLE_FORMAT_INT_MAX = 0x7fffffff }; -// Defines MSL characteristics of an input variable at a particular location. +// Defines MSL characteristics of a shader interface variable at a particular location. // After compilation, it is possible to query whether or not this location was used. // If vecsize is nonzero, it must be greater than or equal to the vecsize declared in the shader, // or behavior is undefined. -struct MSLShaderInput +struct MSLShaderInterfaceVariable { uint32_t location = 0; uint32_t component = 0; - MSLShaderInputFormat format = MSL_SHADER_INPUT_FORMAT_OTHER; + MSLShaderVariableFormat format = MSL_SHADER_VARIABLE_FORMAT_OTHER; spv::BuiltIn builtin = spv::BuiltInMax; uint32_t vecsize = 0; }; @@ -539,10 +544,15 @@ class CompilerMSL : public CompilerGLSL explicit CompilerMSL(const ParsedIR &ir); explicit CompilerMSL(ParsedIR &&ir); - // input is a shader input description used to fix up shader input variables. + // input is a shader interface variable description used to fix up shader input variables. // If shader inputs are provided, is_msl_shader_input_used() will return true after - // calling ::compile() if the location was used by the MSL code. - void add_msl_shader_input(const MSLShaderInput &input); + // calling ::compile() if the location were used by the MSL code. + void add_msl_shader_input(const MSLShaderInterfaceVariable &input); + + // output is a shader interface variable description used to fix up shader output variables. + // If shader outputs are provided, is_msl_shader_output_used() will return true after + // calling ::compile() if the location were used by the MSL code. + void add_msl_shader_output(const MSLShaderInterfaceVariable &output); // resource is a resource binding to indicate the MSL buffer, // texture or sampler index to use for a particular SPIR-V description set @@ -577,6 +587,9 @@ class CompilerMSL : public CompilerGLSL // Query after compilation is done. This allows you to check if an input location was used by the shader. bool is_msl_shader_input_used(uint32_t location); + // Query after compilation is done. This allows you to check if an output location were used by the shader. + bool is_msl_shader_output_used(uint32_t location); + // If not using add_msl_shader_input, it's possible // that certain builtin attributes need to be automatically assigned locations. // This is typical for tessellation builtin inputs such as tess levels, gl_Position, etc. @@ -584,6 +597,13 @@ class CompilerMSL : public CompilerGLSL // add_msl_shader_input or the builtin is not used, otherwise returns N in [[attribute(N)]]. uint32_t get_automatic_builtin_input_location(spv::BuiltIn builtin) const; + // If not using add_msl_shader_output, it's possible + // that certain builtin attributes need to be automatically assigned locations. + // This is typical for tessellation builtin outputs such as tess levels, gl_Position, etc. + // This returns k_unknown_location if the location were explicitly assigned with + // add_msl_shader_output or the builtin were not used, otherwise returns N in [[attribute(N)]]. + uint32_t get_automatic_builtin_output_location(spv::BuiltIn builtin) const; + // NOTE: Only resources which are remapped using add_msl_resource_binding will be reported here. // Constexpr samplers are always assumed to be emitted. // No specific MSLResourceBinding remapping is required for constexpr samplers as long as they are remapped @@ -894,6 +914,8 @@ class CompilerMSL : public CompilerGLSL uint32_t get_member_location(uint32_t type_id, uint32_t index, uint32_t *comp = nullptr) const; uint32_t get_or_allocate_builtin_input_member_location(spv::BuiltIn builtin, uint32_t type_id, uint32_t index, uint32_t *comp = nullptr); + uint32_t get_or_allocate_builtin_output_member_location(spv::BuiltIn builtin, + uint32_t type_id, uint32_t index, uint32_t *comp = nullptr); uint32_t get_physical_tess_level_array_size(spv::BuiltIn builtin) const; @@ -1004,12 +1026,17 @@ class CompilerMSL : public CompilerGLSL Options msl_options; std::set spv_function_implementations; // Must be ordered to ensure declarations are in a specific order. - std::map inputs_by_location; - std::unordered_map inputs_by_builtin; + std::map inputs_by_location; + std::unordered_map inputs_by_builtin; + std::map outputs_by_location; + std::unordered_map outputs_by_builtin; std::unordered_set location_inputs_in_use; std::unordered_set location_inputs_in_use_fallback; + std::unordered_set location_outputs_in_use; + std::unordered_set location_outputs_in_use_fallback; std::unordered_map fragment_output_components; std::unordered_map builtin_to_automatic_input_location; + std::unordered_map builtin_to_automatic_output_location; std::set pragma_lines; std::set typedef_lines; SmallVector vars_needing_early_declaration; From 4a8543ec53e635f6064d17448cfaf003a5b1c101 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Thu, 16 Feb 2023 14:56:38 -0800 Subject: [PATCH 02/48] MSL: Deduplicate function constants. It is possible in SPIR-V to declare multiple specialization constants with the same constant ID. The most common cause of this in GLSL is defining a spec constant, then declaring the workgroup size to use that spec constant by its ID. But, MSL forbids defining multiple function constants with the same function constant ID. So, we must only emit one definition of the actual function constant (with the `[[function_constant(id)]]` attribute); but we can point the other variables at this one definition. Fixes three tests in the Vulkan CTS under `dEQP-VK.compute.basic.max_local_size_*`. --- .../comp/local-size-duplicate-spec-id.comp | 23 ++++++++++++++++ .../comp/local-size-duplicate-spec-id.comp | 26 +++++++++++++++++++ .../comp/local-size-duplicate-spec-id.comp | 15 +++++++++++ spirv_msl.cpp | 23 +++++++++++++--- 4 files changed, 83 insertions(+), 4 deletions(-) create mode 100644 reference/opt/shaders-msl/comp/local-size-duplicate-spec-id.comp create mode 100644 reference/shaders-msl/comp/local-size-duplicate-spec-id.comp create mode 100644 shaders-msl/comp/local-size-duplicate-spec-id.comp diff --git a/reference/opt/shaders-msl/comp/local-size-duplicate-spec-id.comp b/reference/opt/shaders-msl/comp/local-size-duplicate-spec-id.comp new file mode 100644 index 000000000..19a56fc9f --- /dev/null +++ b/reference/opt/shaders-msl/comp/local-size-duplicate-spec-id.comp @@ -0,0 +1,23 @@ +#include +#include + +using namespace metal; + +struct StorageBuffer +{ + uint values[1]; +}; + +constant uint _22_tmp [[function_constant(0)]]; +constant uint _22 = is_function_constant_defined(_22_tmp) ? _22_tmp : 1u; +constant uint _23_tmp [[function_constant(1)]]; +constant uint _23 = is_function_constant_defined(_23_tmp) ? _23_tmp : 1u; +constant uint _24_tmp [[function_constant(2)]]; +constant uint _24 = is_function_constant_defined(_24_tmp) ? _24_tmp : 1u; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(_22, _23, _24); + +kernel void main0(device StorageBuffer& ssbo [[buffer(0)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]]) +{ + ssbo.values[gl_LocalInvocationIndex] = 1u; +} + diff --git a/reference/shaders-msl/comp/local-size-duplicate-spec-id.comp b/reference/shaders-msl/comp/local-size-duplicate-spec-id.comp new file mode 100644 index 000000000..84b9efd20 --- /dev/null +++ b/reference/shaders-msl/comp/local-size-duplicate-spec-id.comp @@ -0,0 +1,26 @@ +#include +#include + +using namespace metal; + +struct StorageBuffer +{ + uint values[1]; +}; + +constant int local_size_x_val_tmp [[function_constant(0)]]; +constant int local_size_x_val = is_function_constant_defined(local_size_x_val_tmp) ? local_size_x_val_tmp : 1; +constant int local_size_y_val_tmp [[function_constant(1)]]; +constant int local_size_y_val = is_function_constant_defined(local_size_y_val_tmp) ? local_size_y_val_tmp : 1; +constant int local_size_z_val_tmp [[function_constant(2)]]; +constant int local_size_z_val = is_function_constant_defined(local_size_z_val_tmp) ? local_size_z_val_tmp : 1; +constant uint _22 = is_function_constant_defined(local_size_x_val_tmp) ? local_size_x_val_tmp : 1u; +constant uint _23 = is_function_constant_defined(local_size_y_val_tmp) ? local_size_y_val_tmp : 1u; +constant uint _24 = is_function_constant_defined(local_size_z_val_tmp) ? local_size_z_val_tmp : 1u; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(_22, _23, _24); + +kernel void main0(device StorageBuffer& ssbo [[buffer(0)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]]) +{ + ssbo.values[gl_LocalInvocationIndex] = 1u; +} + diff --git a/shaders-msl/comp/local-size-duplicate-spec-id.comp b/shaders-msl/comp/local-size-duplicate-spec-id.comp new file mode 100644 index 000000000..060858b97 --- /dev/null +++ b/shaders-msl/comp/local-size-duplicate-spec-id.comp @@ -0,0 +1,15 @@ +#version 450 + +layout(constant_id=0) const int local_size_x_val = 1; +layout(constant_id=1) const int local_size_y_val = 1; +layout(constant_id=2) const int local_size_z_val = 1; + +layout(local_size_x_id=0, local_size_y_id=1, local_size_z_id=2) in; + +layout(set=0, binding=0) buffer StorageBuffer { + uint values[]; +} ssbo; + +void main() { + ssbo.values[gl_LocalInvocationIndex] = 1u; +} diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 5f4c0adde..b1d52e833 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -7344,6 +7344,20 @@ void CompilerMSL::emit_specialization_constants_and_structs() emitted = false; declared_structs.clear(); + // It is possible to have multiple spec constants that use the same spec constant ID. + // The most common cause of this is defining spec constants in GLSL while also declaring + // the workgroup size to use those spec constants. But, Metal forbids declaring more than + // one variable with the same function constant ID. + // In this case, we must only declare one variable with the [[function_constant(id)]] + // attribute, and use its initializer to initialize all the spec constants with + // that ID. + std::unordered_map unique_func_constants; + for (const auto &spec_constant : get_specialization_constants()) + { + if (!unique_func_constants.count(spec_constant.constant_id)) + unique_func_constants.insert(make_pair(spec_constant.constant_id, spec_constant.id)); + } + for (auto &id_ : ir.ids_for_constant_undef_or_type) { auto &id = ir.ids[id_]; @@ -7367,7 +7381,8 @@ void CompilerMSL::emit_specialization_constants_and_structs() string sc_type_name = type_to_glsl(type); add_resource_name(c.self); string sc_name = to_name(c.self); - string sc_tmp_name = sc_name + "_tmp"; + uint32_t constant_id = get_decoration(c.self, DecorationSpecId); + string sc_tmp_name = to_name(unique_func_constants[constant_id]) + "_tmp"; // Function constants are only supported in MSL 1.2 and later. // If we don't support it just declare the "default" directly. @@ -7377,10 +7392,10 @@ void CompilerMSL::emit_specialization_constants_and_structs() if (msl_options.supports_msl_version(1, 2) && has_decoration(c.self, DecorationSpecId) && !c.is_used_as_array_length) { - uint32_t constant_id = get_decoration(c.self, DecorationSpecId); // Only scalar, non-composite values can be function constants. - statement("constant ", sc_type_name, " ", sc_tmp_name, " [[function_constant(", constant_id, - ")]];"); + if (unique_func_constants[constant_id] == c.self) + statement("constant ", sc_type_name, " ", sc_tmp_name, " [[function_constant(", constant_id, + ")]];"); statement("constant ", sc_type_name, " ", sc_name, " = is_function_constant_defined(", sc_tmp_name, ") ? ", sc_tmp_name, " : ", constant_expression(c), ";"); } From 343ff6eb2606b0b7bcf0efc487e6034cd71caea3 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Tue, 9 May 2023 13:30:37 -0700 Subject: [PATCH 03/48] Checkpoint for transform feedback work. Does analysis of outputs and sorts them into buffers. Nothing else yet. --- spirv_msl.cpp | 143 ++++++++++++++++++++++++++++++++++++++++++++++++++ spirv_msl.hpp | 38 ++++++++++++++ 2 files changed, 181 insertions(+) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index f2b74d5a1..8eadcf551 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -17569,6 +17569,149 @@ void CompilerMSL::activate_argument_buffer_resources() }); } +void CompilerMSL::analyze_xfb_buffers() +{ + // Gather all used outputs and sort them out into transform feedback buffers. + + struct XfbOutput + { + SPIRVariable *var; + string name; + uint32_t member_index; + uint32_t offset; + bool block; + }; + SmallVector xfb_outputs[kMaxXfbBuffers]; + + for (uint32_t i = 0; i < kMaxXfbBuffers; ++i) + { + xfb_buffers[i] = 0; + xfb_locals[i] = 0; + xfb_strides[i] = 0; + } + + ir.for_each_typed_id([&](uint32_t self, SPIRVariable &var) { + auto &type = get_variable_data_type(var); + if (var.storage != StorageClassOutput || is_hidden_variable(var) || + (!has_decoration(self, DecorationXfbBuffer) && !has_decoration(type.self, DecorationBlock))) + return; + + uint32_t xfb_buffer_num, xfb_stride; + if (has_decoration(self, DecorationXfbBuffer)) + { + xfb_buffer_num = get_decoration(self, DecorationXfbBuffer); + xfb_stride = get_decoration(self, DecorationXfbStride); + + if (xfb_buffer_num >= kMaxXfbBuffers) + SPIRV_CROSS_THROW("Shader uses more than 4 transform feedback buffers."); + + // According to the spec, individual outputs or blocks are decorated with + // XfbStride to indicate the stride between two successive vertices in the buffer, + // but all XfbStrides for a given XfbBuffer must agree. + xfb_strides[xfb_buffer_num] = xfb_stride; + } + + if (type.basetype == SPIRType::Struct) + { + for (uint32_t i = 0; i < type.member_types.size(); ++i) + { + // According to Vulkan VUID 04716: + // "Only variables or block members in the output interface + // decorated with Offset can be captured for transform + // feedback..." + if (!has_member_decoration(type.self, i, DecorationOffset)) + continue; + uint32_t xfb_offset = get_member_decoration(type.self, i, DecorationOffset); + uint32_t mbr_xfb_buffer_num = has_member_decoration(type.self, i, DecorationXfbBuffer) ? get_member_decoration(type.self, i, DecorationXfbBuffer) : xfb_buffer_num; + xfb_outputs[mbr_xfb_buffer_num].emplace_back({&var, to_member_name(type, i), i, xfb_offset, true}); + if (has_member_decoration(type.self, i, DecorationXfbStride)) + xfb_strides[mbr_xfb_buffer_num] = get_member_decoration(type.self, i, DecorationXfbStride); + } + } + else + { + if (!has_decoration(type.self, DecorationOffset)) + return; + uint32_t xfb_offset = get_decoration(self, DecorationOffset); + xfb_outputs[xfb_buffer_num].emplace_back({&var, to_name(self), 0, xfb_offset, false}); + } + }); + + for (uint32_t xfb_buffer = 0; xfb_buffer < kMaxXfbBuffers; xfb_buffer++) + { + auto &outputs = xfb_outputs[xfb_buffer]; + if (outputs.empty()) + continue; + + uint32_t next_id = ir.increase_bound_by(5); + uint32_t local_var_id = next_id + 1; + uint32_t type_id = next_id + 2; + uint32_t ptr_type_id = next_id + 3; + uint32_t local_ptr_type_id = next_id + 4; + xfb_buffers[xfb_buffer] = next_id; + xfb_locals[xfb_buffer] = local_var_id; + + auto &buffer_type = set(type_id); + buffer_type.basetype = SPIRType::Struct; + buffer_type.storage = StorageClassStorageBuffer; + // Need to mark the type as a Block to enable this. + set_decoration(type_id, DecorationBlock); + set_name(type_id, join("spvXfbBuffer", xfb_buffer)); + + auto &ptr_type = set(ptr_type_id); + ptr_type = buffer_type; + ptr_type.pointer = true; + ptr_type.pointer_depth++; + ptr_type.parent_type = type_id; + + auto &local_ptr_type = set(local_ptr_type_id); + local_ptr_type = ptr_type; + local_ptr_type.storage = StorageClassFunction; + + set(local_var_id, local_ptr_type_id, StorageClassFunction); + set_name(local_var_id, join("spvXfbOutput", xfb_buffer)); + + uint32_t buffer_variable_id = next_id; + set(buffer_variable_id, ptr_type_id, StorageClassUniform); + set_name(buffer_variable_id, join("spvXfb", xfb_buffer)); + + // Members must be emitted in Offset order. + stable_sort(begin(outputs), end(outputs), [&](const XfbOutput &lhs, const XfbOutput &rhs) -> bool { + return lhs.offset < rhs.offset; + }); + + uint32_t member_index = 0; + for (auto &output : outputs) + { + auto &var = *output.var; + auto &type = get_variable_data_type(var); + + string mbr_name = ensure_valid_name(output.name, "m"); + set_member_name(buffer_type.self, member_index, mbr_name); + + if (!output.is_block) + { + // Drop pointer information when we emit the outputs into a struct. + buffer_type.member_types.push_back(get_variable_data_type_id(var)); + set_qualified_name(var.self, join(to_name(local_var_id), ".", mbr_name)); + } + else + { + // FIXME: Implement this! + } + + set_extended_member_decoration(buffer_type.self, member_index, SPIRVCrossDecorationInterfaceOrigID, + var.self); + member_index++; + + // FIXME: Still to do: + // - Add locals to entry point + // - Add buffer arguments to entry point + // - Make sure Xfb-captured outputs aren't in "normal" capture_output_to_buffer + } + } +} + bool CompilerMSL::using_builtin_array() const { return msl_options.force_native_arrays || is_using_builtin_array; diff --git a/spirv_msl.hpp b/spirv_msl.hpp index 2bc17b122..154fdad7a 100644 --- a/spirv_msl.hpp +++ b/spirv_msl.hpp @@ -287,6 +287,8 @@ static const uint32_t kArgumentBufferBinding = ~(3u); static const uint32_t kMaxArgumentBuffers = 8; +static const uint32_t kMaxXfbBuffers = 4; + // The arbitrary maximum for the nesting of array of array copies. static const uint32_t kArrayCopyMultidimMax = 6; @@ -496,6 +498,27 @@ class CompilerMSL : public CompilerGLSL // so it can be enabled only when the bug is present. bool sample_dref_lod_array_as_grad = false; + // Known primitive types. Largely uses the same values as VkPrimitiveTopology. + enum class PrimitiveType + { + Dynamic = -1, + PointList, + LineList, + LineStrip, + TriangleList, + TriangleStrip, + TriangleFan, + LineListWithAdjacency, + LineStripWithAdjacency, + TriangleListWithAdjacency, + TriangleStripWithAdjacency, + // 10 reserved for patch list + }; + + // Indicates the kind of input primitive. Only needed for vertex shaders that have the + // Xfb execution mode set; used to control storage of transformed vertices. + PrimitiveType xfb_primitive_type = PrimitiveType::Dynamic; + bool is_ios() const { return platform == iOS; @@ -546,6 +569,15 @@ class CompilerMSL : public CompilerGLSL get_entry_point().model == spv::ExecutionModelTessellationEvaluation); } + // Provide feedback to calling API to allow runtime to bind buffers + // for transform feedback if a vertex pipeline shader requires it. + bool needs_transform_feedback() const + { + auto &execution = get_entry_point(); + return execution.flags.get(spv::ExecutionModeXfb) && (execution.model == spv::ExecutionModelVertex || + execution.model == spv::ExecutionModelTessellationEvaluation); + } + // Provide feedback to calling API to allow it to pass an auxiliary // swizzle buffer if the shader needs it. bool needs_swizzle_buffer() const @@ -1183,6 +1215,10 @@ class CompilerMSL : public CompilerGLSL std::unordered_set atomic_image_vars; // Emulate texture2D atomic operations std::unordered_set pull_model_inputs; + VariableID xfb_buffers[kMaxXfbBuffers]; + VariableID xfb_locals[kMaxXfbBuffers]; + uint32_t xfb_strides[kMaxXfbBuffers]; + // Must be ordered since array is in a specific order. std::map> buffers_requiring_dynamic_offset; @@ -1202,6 +1238,8 @@ class CompilerMSL : public CompilerGLSL void add_argument_buffer_padding_sampler_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind); void add_argument_buffer_padding_type(uint32_t mbr_type_id, SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, uint32_t count); + void analyze_xfb_buffers(); + uint32_t get_target_components_for_fragment_location(uint32_t location) const; uint32_t build_extended_vector_type(uint32_t type_id, uint32_t components, SPIRType::BaseType basetype = SPIRType::Unknown); From 179c6e064f45d47323b33ddad0c7c61fc135cdce Mon Sep 17 00:00:00 2001 From: gpx1000 Date: Wed, 10 May 2023 16:14:56 -0700 Subject: [PATCH 04/48] Get things building. --- spirv_msl.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 8eadcf551..651f005dd 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -17623,7 +17623,7 @@ void CompilerMSL::analyze_xfb_buffers() continue; uint32_t xfb_offset = get_member_decoration(type.self, i, DecorationOffset); uint32_t mbr_xfb_buffer_num = has_member_decoration(type.self, i, DecorationXfbBuffer) ? get_member_decoration(type.self, i, DecorationXfbBuffer) : xfb_buffer_num; - xfb_outputs[mbr_xfb_buffer_num].emplace_back({&var, to_member_name(type, i), i, xfb_offset, true}); + xfb_outputs[mbr_xfb_buffer_num].emplace_back({&var, to_member_name(type, i), i, xfb_offset, true}); if (has_member_decoration(type.self, i, DecorationXfbStride)) xfb_strides[mbr_xfb_buffer_num] = get_member_decoration(type.self, i, DecorationXfbStride); } @@ -17633,7 +17633,7 @@ void CompilerMSL::analyze_xfb_buffers() if (!has_decoration(type.self, DecorationOffset)) return; uint32_t xfb_offset = get_decoration(self, DecorationOffset); - xfb_outputs[xfb_buffer_num].emplace_back({&var, to_name(self), 0, xfb_offset, false}); + xfb_outputs[xfb_buffer_num].emplace_back({&var, to_name(self), 0, xfb_offset, false}); } }); @@ -17689,7 +17689,7 @@ void CompilerMSL::analyze_xfb_buffers() string mbr_name = ensure_valid_name(output.name, "m"); set_member_name(buffer_type.self, member_index, mbr_name); - if (!output.is_block) + if (!output.block) { // Drop pointer information when we emit the outputs into a struct. buffer_type.member_types.push_back(get_variable_data_type_id(var)); From f1c0ad2d54eccd250bab8c78404706606bd3afd6 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Wed, 10 May 2023 22:34:09 -0700 Subject: [PATCH 05/48] Checkpoint: Beginnings of writing XFB data. This only does the bare minimum needed to write XFB data (and not even that actually). It still needs to calculate the offset in the buffer where the data need to be written, and primitive types other than points need to be implemented. --- spirv_msl.cpp | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 651f005dd..9b7e02e28 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -1476,7 +1476,7 @@ string CompilerMSL::compile() backend.support_pointer_to_pointer = true; backend.implicit_c_integer_promotion_rules = true; - capture_output_to_buffer = msl_options.capture_output_to_buffer; + capture_output_to_buffer = msl_options.capture_output_to_buffer || needs_transform_feedback(); is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer; // Initialize array here rather than constructor, MSVC 2013 workaround. @@ -13712,6 +13712,26 @@ void CompilerMSL::fix_up_shader_inputs_outputs() }); } }); + + // Transform feedback + if (needs_transform_feedback()) + { + entry_point.fixup_hooks_out.push_back([=]() { + for (uint32_t i = 0; i < kMaxXfbBuffers; ++i) + { + if (xfb_buffers[i] == 0) continue; + // First, update the amount of data written to the buffer. (TODO) + // Now, write the data out. + switch (msl_options.xfb_primitive_type) + { + case PrimitiveType::PointList: + statement(to_name(xfb_buffers[i]), "[FIXME] = ", to_expression(xfb_locals[i]), ";"); + default: + SPIRV_CROSS_THROW("Primitive type not yet supported for transform feedback."); + } + } + }); + } } // Returns the Metal index of the resource of the specified type as used by the specified variable. From 117eaa3ead1b0139fe703dae8518f32a317172e0 Mon Sep 17 00:00:00 2001 From: swinston Date: Sat, 13 May 2023 13:33:07 -0700 Subject: [PATCH 06/48] get it building. --- spirv_msl.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 9b7e02e28..36a5afa38 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -13716,7 +13716,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // Transform feedback if (needs_transform_feedback()) { - entry_point.fixup_hooks_out.push_back([=]() { + entry_func.fixup_hooks_out.push_back([=]() { for (uint32_t i = 0; i < kMaxXfbBuffers; ++i) { if (xfb_buffers[i] == 0) continue; @@ -13724,7 +13724,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // Now, write the data out. switch (msl_options.xfb_primitive_type) { - case PrimitiveType::PointList: + case Options::PrimitiveType::PointList: statement(to_name(xfb_buffers[i]), "[FIXME] = ", to_expression(xfb_locals[i]), ";"); default: SPIRV_CROSS_THROW("Primitive type not yet supported for transform feedback."); From f1913aaccfc95e0c21d339b26aa4b5d8b423394a Mon Sep 17 00:00:00 2001 From: gpx1000 Date: Tue, 16 May 2023 12:13:55 -0700 Subject: [PATCH 07/48] get xfb decorations shader to work. --- spirv_msl.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 72d20618f..56dba863e 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -1476,6 +1476,7 @@ string CompilerMSL::compile() backend.support_pointer_to_pointer = true; backend.implicit_c_integer_promotion_rules = true; + analyze_xfb_buffers(); capture_output_to_buffer = msl_options.capture_output_to_buffer || needs_transform_feedback(); is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer; @@ -13731,6 +13732,10 @@ void CompilerMSL::fix_up_shader_inputs_outputs() { case Options::PrimitiveType::PointList: statement(to_name(xfb_buffers[i]), "[FIXME] = ", to_expression(xfb_locals[i]), ";"); + break; + case Options::PrimitiveType::Dynamic: + statement(to_name(xfb_buffers[i]), "[FIXME] = ", to_expression(xfb_locals[i]), ";"); + break; default: SPIRV_CROSS_THROW("Primitive type not yet supported for transform feedback."); } From f8a27d98193110220744bd8142e2c43475ac020d Mon Sep 17 00:00:00 2001 From: gpx1000 Date: Fri, 19 May 2023 15:07:17 -0700 Subject: [PATCH 08/48] check in for direction adjustment. --- spirv_msl.cpp | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 56dba863e..401929c64 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -13723,19 +13723,52 @@ void CompilerMSL::fix_up_shader_inputs_outputs() if (needs_transform_feedback()) { entry_func.fixup_hooks_out.push_back([=]() { + size_t size_data_write_points = 0; + size_t size_data_write_lines = 0; + size_t size_data_write_triangles = 0; + + for (uint32_t i = 0; i < kMaxXfbBuffers; ++i) { + if (xfb_buffers[i] == 0) continue; + // First, update the amount of data written to the buffer. + switch (msl_options.xfb_primitive_type) + { + case Options::PrimitiveType::PointList: + size_data_write_points += 1; + break; + case Options::PrimitiveType::Dynamic: + break; + case Options::PrimitiveType::LineList: + case Options::PrimitiveType::LineStrip: + size_data_write_lines += 2; + break; + case Options::PrimitiveType::TriangleList: + case Options::PrimitiveType::TriangleStrip: + case Options::PrimitiveType::TriangleFan: + size_data_write_triangles += 3; + break; + default: + SPIRV_CROSS_THROW("Primitive type not yet supported for transform feedback."); + break; + } + + } for (uint32_t i = 0; i < kMaxXfbBuffers; ++i) { if (xfb_buffers[i] == 0) continue; - // First, update the amount of data written to the buffer. (TODO) // Now, write the data out. switch (msl_options.xfb_primitive_type) { case Options::PrimitiveType::PointList: + { statement(to_name(xfb_buffers[i]), "[FIXME] = ", to_expression(xfb_locals[i]), ";"); break; + } case Options::PrimitiveType::Dynamic: statement(to_name(xfb_buffers[i]), "[FIXME] = ", to_expression(xfb_locals[i]), ";"); break; + case Options::PrimitiveType::LineList: + statement(to_name(xfb_buffers[i]), "[FIXME] = " , to_expression(xfb_locals[i]), ";"); + statement(to_name(xfb_buffers[i]), "[FIXME + 1] = " , to_expression(xfb_locals[i]), ";"); default: SPIRV_CROSS_THROW("Primitive type not yet supported for transform feedback."); } From cebb96488ea3af2b0e0d5b7ea60cf046910bfd2d Mon Sep 17 00:00:00 2001 From: gpx1000 Date: Fri, 19 May 2023 15:23:39 -0700 Subject: [PATCH 09/48] Dynamic is an undefined primitive type. xfb_primitive_type needs to be updated to be used. --- spirv_msl.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 401929c64..6b81ea7ac 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -13735,8 +13735,6 @@ void CompilerMSL::fix_up_shader_inputs_outputs() case Options::PrimitiveType::PointList: size_data_write_points += 1; break; - case Options::PrimitiveType::Dynamic: - break; case Options::PrimitiveType::LineList: case Options::PrimitiveType::LineStrip: size_data_write_lines += 2; @@ -13746,6 +13744,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs() case Options::PrimitiveType::TriangleFan: size_data_write_triangles += 3; break; + case Options::PrimitiveType::Dynamic: default: SPIRV_CROSS_THROW("Primitive type not yet supported for transform feedback."); break; @@ -13763,12 +13762,10 @@ void CompilerMSL::fix_up_shader_inputs_outputs() statement(to_name(xfb_buffers[i]), "[FIXME] = ", to_expression(xfb_locals[i]), ";"); break; } - case Options::PrimitiveType::Dynamic: - statement(to_name(xfb_buffers[i]), "[FIXME] = ", to_expression(xfb_locals[i]), ";"); - break; case Options::PrimitiveType::LineList: statement(to_name(xfb_buffers[i]), "[FIXME] = " , to_expression(xfb_locals[i]), ";"); statement(to_name(xfb_buffers[i]), "[FIXME + 1] = " , to_expression(xfb_locals[i]), ";"); + case Options::PrimitiveType::Dynamic: default: SPIRV_CROSS_THROW("Primitive type not yet supported for transform feedback."); } From 37c0972677b1ea384084ceaf0e34df9d2e2375b2 Mon Sep 17 00:00:00 2001 From: gpx1000 Date: Mon, 19 Jun 2023 15:27:21 -0700 Subject: [PATCH 10/48] Working together with Chip --- spirv_msl.cpp | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 6b81ea7ac..f1f9c8cfd 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -1476,7 +1476,6 @@ string CompilerMSL::compile() backend.support_pointer_to_pointer = true; backend.implicit_c_integer_promotion_rules = true; - analyze_xfb_buffers(); capture_output_to_buffer = msl_options.capture_output_to_buffer || needs_transform_feedback(); is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer; @@ -1512,6 +1511,7 @@ string CompilerMSL::compile() } fixup_image_load_store_access(); + analyze_xfb_buffers(); set_enabled_interface_variables(get_active_interface_variables()); if (msl_options.force_active_argument_buffer_resources) @@ -13745,6 +13745,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs() size_data_write_triangles += 3; break; case Options::PrimitiveType::Dynamic: + break; default: SPIRV_CROSS_THROW("Primitive type not yet supported for transform feedback."); break; @@ -13759,13 +13760,19 @@ void CompilerMSL::fix_up_shader_inputs_outputs() { case Options::PrimitiveType::PointList: { - statement(to_name(xfb_buffers[i]), "[FIXME] = ", to_expression(xfb_locals[i]), ";"); + statement(to_name(xfb_buffers[i]), "[", to_string(size_data_write_points), "] = ", to_expression(xfb_locals[i]), ";"); break; } case Options::PrimitiveType::LineList: - statement(to_name(xfb_buffers[i]), "[FIXME] = " , to_expression(xfb_locals[i]), ";"); - statement(to_name(xfb_buffers[i]), "[FIXME + 1] = " , to_expression(xfb_locals[i]), ";"); + case Options::PrimitiveType::LineStrip: + statement(to_name(xfb_buffers[i]), "[", to_string(size_data_write_lines) , "] = " , to_expression(xfb_locals[i]), ";"); + case Options::PrimitiveType::TriangleList: + case Options::PrimitiveType::TriangleStrip: + case Options::PrimitiveType::TriangleFan: + statement(to_name(xfb_buffers[i]), "[", to_string(size_data_write_triangles), "] = ", to_expression(xfb_locals[i]), ";"); + break; case Options::PrimitiveType::Dynamic: + break; default: SPIRV_CROSS_THROW("Primitive type not yet supported for transform feedback."); } @@ -17652,8 +17659,11 @@ void CompilerMSL::analyze_xfb_buffers() ir.for_each_typed_id([&](uint32_t self, SPIRVariable &var) { auto &type = get_variable_data_type(var); - if (var.storage != StorageClassOutput || is_hidden_variable(var) || - (!has_decoration(self, DecorationXfbBuffer) && !has_decoration(type.self, DecorationBlock))) + if(var.storage != StorageClassOutput) + return; + if(is_hidden_variable(var)) + return; + if (!has_decoration(self, DecorationXfbBuffer)) return; uint32_t xfb_buffer_num, xfb_stride; @@ -17686,6 +17696,11 @@ void CompilerMSL::analyze_xfb_buffers() xfb_outputs[mbr_xfb_buffer_num].emplace_back({&var, to_member_name(type, i), i, xfb_offset, true}); if (has_member_decoration(type.self, i, DecorationXfbStride)) xfb_strides[mbr_xfb_buffer_num] = get_member_decoration(type.self, i, DecorationXfbStride); + else + { + bool test = has_member_decoration(type.parent_type, i, DecorationXfbStride); + break; + } } } else @@ -17757,7 +17772,7 @@ void CompilerMSL::analyze_xfb_buffers() } else { - // FIXME: Implement this! + buffer_type.member_types.push_back(get_variable_data_type_id(var)); } set_extended_member_decoration(buffer_type.self, member_index, SPIRVCrossDecorationInterfaceOrigID, From aab161a7763a5c588007acdcdc01ddc1503fd959 Mon Sep 17 00:00:00 2001 From: gpx1000 Date: Mon, 19 Jun 2023 17:16:46 -0700 Subject: [PATCH 11/48] fix warnings from CI --- spirv_msl.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index f1f9c8cfd..b51eb6531 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -13766,6 +13766,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs() case Options::PrimitiveType::LineList: case Options::PrimitiveType::LineStrip: statement(to_name(xfb_buffers[i]), "[", to_string(size_data_write_lines) , "] = " , to_expression(xfb_locals[i]), ";"); + break; case Options::PrimitiveType::TriangleList: case Options::PrimitiveType::TriangleStrip: case Options::PrimitiveType::TriangleFan: @@ -17666,7 +17667,7 @@ void CompilerMSL::analyze_xfb_buffers() if (!has_decoration(self, DecorationXfbBuffer)) return; - uint32_t xfb_buffer_num, xfb_stride; + uint32_t xfb_buffer_num = 0, xfb_stride; if (has_decoration(self, DecorationXfbBuffer)) { xfb_buffer_num = get_decoration(self, DecorationXfbBuffer); @@ -17698,7 +17699,11 @@ void CompilerMSL::analyze_xfb_buffers() xfb_strides[mbr_xfb_buffer_num] = get_member_decoration(type.self, i, DecorationXfbStride); else { - bool test = has_member_decoration(type.parent_type, i, DecorationXfbStride); + bool hasTransformFeedback = has_member_decoration(type.parent_type, i, DecorationXfbStride); + if(hasTransformFeedback) { + auto &execution = get_entry_point(); + execution.flags.set(spv::ExecutionModeXfb); + } break; } } @@ -17759,7 +17764,7 @@ void CompilerMSL::analyze_xfb_buffers() for (auto &output : outputs) { auto &var = *output.var; - auto &type = get_variable_data_type(var); +// auto &type = get_variable_data_type(var); string mbr_name = ensure_valid_name(output.name, "m"); set_member_name(buffer_type.self, member_index, mbr_name); From 562b959290990b85095f823b038f7f422f5e0b1c Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Thu, 10 Aug 2023 15:49:03 -0700 Subject: [PATCH 12/48] Make sure vertex functions that use transform feedback become Metal kernels. --- spirv_msl.cpp | 38 ++++++++++++++++++-------------------- spirv_msl.hpp | 6 ++++++ 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 66add3115..36f3d8d03 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -232,8 +232,8 @@ bool CompilerMSL::builtin_translates_to_nonarray(spv::BuiltIn builtin) const void CompilerMSL::build_implicit_builtins() { bool need_sample_pos = active_input_builtins.get(BuiltInSamplePosition); - bool need_vertex_params = capture_output_to_buffer && get_execution_model() == ExecutionModelVertex && - !msl_options.vertex_for_tessellation; + bool need_vertex_params = + capture_output_to_buffer && get_execution_model() == ExecutionModelVertex && !vertex_shader_is_kernel(); bool need_tesc_params = is_tesc_shader(); bool need_tese_params = is_tese_shader() && msl_options.raw_buffer_tese_input; bool need_subgroup_mask = @@ -248,7 +248,7 @@ void CompilerMSL::build_implicit_builtins() bool need_dispatch_base = msl_options.dispatch_base && get_execution_model() == ExecutionModelGLCompute && (active_input_builtins.get(BuiltInWorkgroupId) || active_input_builtins.get(BuiltInGlobalInvocationId)); - bool need_grid_params = get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation; + bool need_grid_params = vertex_shader_is_kernel(); bool need_vertex_base_params = need_grid_params && (active_input_builtins.get(BuiltInVertexId) || active_input_builtins.get(BuiltInVertexIndex) || @@ -1622,7 +1622,7 @@ void CompilerMSL::preprocess_op_codes() // Tessellation control shaders are run as compute functions in Metal, and so // must capture their output to a buffer. - if (is_tesc_shader() || (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation)) + if (is_tesc_shader() || vertex_shader_is_kernel()) { is_rasterization_disabled = true; capture_output_to_buffer = true; @@ -4004,7 +4004,7 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) // The first member of the indirect buffer is always the number of vertices // to draw. // We zero-base the InstanceID & VertexID variables for HLSL emulation elsewhere, so don't do it twice - if (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation) + if (vertex_shader_is_kernel()) { statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref, " = ", output_buffer_var_name, "[", to_expression(builtin_invocation_id_id), @@ -11823,7 +11823,7 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in case BuiltInInstanceId: case BuiltInInstanceIndex: case BuiltInBaseInstance: - if (msl_options.vertex_for_tessellation) + if (vertex_shader_is_kernel()) return ""; return string(" [[") + builtin_qualifier(builtin) + "]]"; @@ -11846,7 +11846,7 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in } // Vertex and tessellation evaluation function outputs - if (((execution.model == ExecutionModelVertex && !msl_options.vertex_for_tessellation) || is_tese_shader()) && + if (((execution.model == ExecutionModelVertex && !vertex_shader_is_kernel()) || is_tese_shader()) && type.storage == StorageClassOutput) { if (is_builtin) @@ -11889,7 +11889,7 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in return join(" [[", loc_qual, "]]"); } - if (execution.model == ExecutionModelVertex && msl_options.vertex_for_tessellation && type.storage == StorageClassOutput) + if (vertex_shader_is_kernel() && type.storage == StorageClassOutput) { // For this type of shader, we always arrange for it to capture its // output to a buffer. For this reason, qualifiers are irrelevant here. @@ -12303,7 +12303,7 @@ string CompilerMSL::func_type_decl(SPIRType &type) case ExecutionModelVertex: if (msl_options.vertex_for_tessellation && !msl_options.supports_msl_version(1, 2)) SPIRV_CROSS_THROW("Tessellation requires Metal 1.2."); - entry_type = msl_options.vertex_for_tessellation ? "kernel" : "vertex"; + entry_type = vertex_shader_is_kernel() ? "kernel" : "vertex"; break; case ExecutionModelTessellationEvaluation: if (!msl_options.supports_msl_version(1, 2)) @@ -12533,7 +12533,7 @@ bool CompilerMSL::is_direct_input_builtin(BuiltIn bi_type) case BuiltInInstanceId: case BuiltInInstanceIndex: case BuiltInBaseInstance: - return get_execution_model() != ExecutionModelVertex || !msl_options.vertex_for_tessellation; + return !vertex_shader_is_kernel(); // Tess. control function in case BuiltInPosition: case BuiltInPointSize: @@ -12705,8 +12705,7 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args) ep_args += join("constant uint* spvIndirectParams [[buffer(", msl_options.indirect_params_buffer_index, ")]]"); } - else if (stage_out_var_id && - !(get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation)) + else if (stage_out_var_id && !vertex_shader_is_kernel()) { if (!ep_args.empty()) ep_args += ", "; @@ -12714,7 +12713,7 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args) join("device uint* spvIndirectParams [[buffer(", msl_options.indirect_params_buffer_index, ")]]"); } - if (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation && + if (vertex_shader_is_kernel() && (active_input_builtins.get(BuiltInVertexIndex) || active_input_builtins.get(BuiltInVertexId)) && msl_options.vertex_index_type != Options::IndexType::None) { @@ -13215,7 +13214,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // Vertex shaders shouldn't have the problems with barriers in non-uniform control flow that // tessellation control shaders do, so early returns should be OK. We may need to revisit this // if it ever becomes possible to use barriers from a vertex shader. - if (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation) + if (vertex_shader_is_kernel()) { entry_func.fixup_hooks_in.push_back([this]() { statement("if (any(", to_expression(builtin_invocation_id_id), @@ -13699,7 +13698,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs() case BuiltInVertexId: case BuiltInVertexIndex: // This is direct-mapped normally. - if (!msl_options.vertex_for_tessellation) + if (!vertex_shader_is_kernel()) break; entry_func.fixup_hooks_in.push_back([=]() { @@ -13723,7 +13722,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs() break; case BuiltInBaseVertex: // This is direct-mapped normally. - if (!msl_options.vertex_for_tessellation) + if (!vertex_shader_is_kernel()) break; entry_func.fixup_hooks_in.push_back([=]() { @@ -13734,7 +13733,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs() case BuiltInInstanceId: case BuiltInInstanceIndex: // This is direct-mapped normally. - if (!msl_options.vertex_for_tessellation) + if (!vertex_shader_is_kernel()) break; entry_func.fixup_hooks_in.push_back([=]() { @@ -13747,7 +13746,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs() break; case BuiltInBaseInstance: // This is direct-mapped normally. - if (!msl_options.vertex_for_tessellation) + if (!vertex_shader_is_kernel()) break; entry_func.fixup_hooks_in.push_back([=]() { @@ -15929,8 +15928,7 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin) return "thread_index_in_simdgroup"; } else if (execution.model == ExecutionModelKernel || execution.model == ExecutionModelGLCompute || - execution.model == ExecutionModelTessellationControl || - (execution.model == ExecutionModelVertex && msl_options.vertex_for_tessellation)) + execution.model == ExecutionModelTessellationControl || vertex_shader_is_kernel()) { // We are generating a Metal kernel function. if (!msl_options.supports_msl_version(2)) diff --git a/spirv_msl.hpp b/spirv_msl.hpp index caecef6fe..6d0f6afc5 100644 --- a/spirv_msl.hpp +++ b/spirv_msl.hpp @@ -578,6 +578,12 @@ class CompilerMSL : public CompilerGLSL execution.model == spv::ExecutionModelTessellationEvaluation); } + bool vertex_shader_is_kernel() const + { + return get_execution_model() == spv::ExecutionModelVertex && + (msl_options.vertex_for_tessellation || needs_transform_feedback()); + } + // Provide feedback to calling API to allow it to pass an auxiliary // swizzle buffer if the shader needs it. bool needs_swizzle_buffer() const From 36d39df2cf4c94d61a28aa910ddf53411d126a95 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Fri, 18 Aug 2023 14:03:24 -0700 Subject: [PATCH 13/48] Add the transform feedback buffer parameters to the vertex shader. --- spirv_cross_c.cpp | 8 ++++++++ spirv_cross_c.h | 2 ++ spirv_msl.cpp | 16 +++++++++++++++- spirv_msl.hpp | 2 ++ 4 files changed, 27 insertions(+), 1 deletion(-) diff --git a/spirv_cross_c.cpp b/spirv_cross_c.cpp index c21fdeb6e..9fab62b91 100644 --- a/spirv_cross_c.cpp +++ b/spirv_cross_c.cpp @@ -742,6 +742,14 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c case SPVC_COMPILER_OPTION_MSL_SAMPLE_DREF_LOD_ARRAY_AS_GRAD: options->msl.sample_dref_lod_array_as_grad = value != 0; break; + + case SPVC_COMPILER_OPTION_MSL_XFB_COUNTER_BUFFER_INDEX_BASE: + options->msl.xfb_counter_buffer_index_base = value; + break; + + case SPVC_COMPILER_OPTION_MSL_XFB_OUTPUT_BUFFER_INDEX_BASE: + options->msl.xfb_output_buffer_index_base = value; + break; #endif default: diff --git a/spirv_cross_c.h b/spirv_cross_c.h index 0d8e6e10a..17adbc7e3 100644 --- a/spirv_cross_c.h +++ b/spirv_cross_c.h @@ -725,6 +725,8 @@ typedef enum spvc_compiler_option SPVC_COMPILER_OPTION_MSL_ARGUMENT_BUFFERS_TIER = 84 | SPVC_COMPILER_OPTION_MSL_BIT, SPVC_COMPILER_OPTION_MSL_SAMPLE_DREF_LOD_ARRAY_AS_GRAD = 85 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_XFB_COUNTER_BUFFER_INDEX_BASE = 86 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_XFB_OUTPUT_BUFFER_INDEX_BASE = 87 | SPVC_COMPILER_OPTION_MSL_BIT, SPVC_COMPILER_OPTION_INT_MAX = 0x7fffffff } spvc_compiler_option; diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 5929b1d96..d73621bad 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -12735,6 +12735,21 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args) } } + // Shaders with transform feedback get two additional buffers for + // each transform feedback buffer declared: a counter of data written, + // and the transform feedback data buffer proper. + if (needs_transform_feedback()) + { + for (uint32_t xfb_buffer = 0; xfb_buffer < kMaxXfbBuffers; xfb_buffer++) + { + if (!xfb_buffers[xfb_buffer]) + continue; + if (!ep_args.empty()) + ep_args += ", "; + ep_args += join("device uint& spvXfbCounter", xfb_buffer, " [[buffer(", msl_options.xfb_counter_buffer_index_base + xfb_buffer, ")]], "); + ep_args += join(variable_decl(get_type_from_variable(xfb_buffers[xfb_buffer]), to_name(xfb_buffers[xfb_buffer])), " [[buffer(", msl_options.xfb_output_buffer_index_base + xfb_buffer, ")]]"); + } + } // Tessellation control shaders get three additional parameters: // a buffer to hold the per-patch data, a buffer to hold the per-patch // tessellation levels, and a block of workgroup memory to hold the @@ -17904,7 +17919,6 @@ void CompilerMSL::analyze_xfb_buffers() // FIXME: Still to do: // - Add locals to entry point - // - Add buffer arguments to entry point // - Make sure Xfb-captured outputs aren't in "normal" capture_output_to_buffer } } diff --git a/spirv_msl.hpp b/spirv_msl.hpp index 6d0f6afc5..9345b3153 100644 --- a/spirv_msl.hpp +++ b/spirv_msl.hpp @@ -321,6 +321,8 @@ class CompilerMSL : public CompilerGLSL uint32_t shader_input_buffer_index = 22; uint32_t shader_index_buffer_index = 21; uint32_t shader_patch_input_buffer_index = 20; + uint32_t xfb_counter_buffer_index_base = 16; + uint32_t xfb_output_buffer_index_base = 12; uint32_t shader_input_wg_index = 0; uint32_t device_index = 0; uint32_t enable_frag_output_mask = 0xffffffff; From 35858fb576fcf5829fc7258bd79dcbd8469bafaf Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Tue, 22 Aug 2023 01:40:54 -0700 Subject: [PATCH 14/48] Make sure all used outputs, including builtins, get XFB buffers. --- spirv_msl.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index d73621bad..459ec1848 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -17795,9 +17795,7 @@ void CompilerMSL::analyze_xfb_buffers() auto &type = get_variable_data_type(var); if(var.storage != StorageClassOutput) return; - if(is_hidden_variable(var)) - return; - if (!has_decoration(self, DecorationXfbBuffer)) + if(is_hidden_variable(var, true)) return; uint32_t xfb_buffer_num = 0, xfb_stride; @@ -17829,11 +17827,16 @@ void CompilerMSL::analyze_xfb_buffers() uint32_t mbr_xfb_buffer_num = has_member_decoration(type.self, i, DecorationXfbBuffer) ? get_member_decoration(type.self, i, DecorationXfbBuffer) : xfb_buffer_num; xfb_outputs[mbr_xfb_buffer_num].emplace_back({&var, to_member_name(type, i), i, xfb_offset, true}); if (has_member_decoration(type.self, i, DecorationXfbStride)) + { xfb_strides[mbr_xfb_buffer_num] = get_member_decoration(type.self, i, DecorationXfbStride); + } else { + // XXX What's this for??? The validation rules for SPIR-V require + // this to be set if any of the transform feedback decorations are used! bool hasTransformFeedback = has_member_decoration(type.parent_type, i, DecorationXfbStride); - if(hasTransformFeedback) { + if (hasTransformFeedback) + { auto &execution = get_entry_point(); execution.flags.set(spv::ExecutionModeXfb); } @@ -17843,7 +17846,7 @@ void CompilerMSL::analyze_xfb_buffers() } else { - if (!has_decoration(type.self, DecorationOffset)) + if (!has_decoration(self, DecorationOffset)) return; uint32_t xfb_offset = get_decoration(self, DecorationOffset); xfb_outputs[xfb_buffer_num].emplace_back({&var, to_name(self), 0, xfb_offset, false}); @@ -17897,7 +17900,7 @@ void CompilerMSL::analyze_xfb_buffers() for (auto &output : outputs) { auto &var = *output.var; -// auto &type = get_variable_data_type(var); + auto &type = get_variable_data_type(var); string mbr_name = ensure_valid_name(output.name, "m"); set_member_name(buffer_type.self, member_index, mbr_name); @@ -17910,7 +17913,7 @@ void CompilerMSL::analyze_xfb_buffers() } else { - buffer_type.member_types.push_back(get_variable_data_type_id(var)); + buffer_type.member_types.push_back(type.member_types[member_index]); } set_extended_member_decoration(buffer_type.self, member_index, SPIRVCrossDecorationInterfaceOrigID, From 3c427def60271bcb756a892fa1e4b888a6014ee1 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Tue, 22 Aug 2023 17:57:36 -0700 Subject: [PATCH 15/48] Make sure builtins have the correct names in XFB buffers. --- spirv_msl.cpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 459ec1848..36c23a988 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -17825,7 +17825,13 @@ void CompilerMSL::analyze_xfb_buffers() continue; uint32_t xfb_offset = get_member_decoration(type.self, i, DecorationOffset); uint32_t mbr_xfb_buffer_num = has_member_decoration(type.self, i, DecorationXfbBuffer) ? get_member_decoration(type.self, i, DecorationXfbBuffer) : xfb_buffer_num; - xfb_outputs[mbr_xfb_buffer_num].emplace_back({&var, to_member_name(type, i), i, xfb_offset, true}); + string name; + if (has_member_decoration(type.self, i, DecorationBuiltIn)) + // Force this to have the proper name. + name = builtin_to_glsl(BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn)), StorageClassOutput); + else + name = to_member_name(type, i); + xfb_outputs[mbr_xfb_buffer_num].emplace_back({&var, name, i, xfb_offset, true}); if (has_member_decoration(type.self, i, DecorationXfbStride)) { xfb_strides[mbr_xfb_buffer_num] = get_member_decoration(type.self, i, DecorationXfbStride); @@ -17849,7 +17855,13 @@ void CompilerMSL::analyze_xfb_buffers() if (!has_decoration(self, DecorationOffset)) return; uint32_t xfb_offset = get_decoration(self, DecorationOffset); - xfb_outputs[xfb_buffer_num].emplace_back({&var, to_name(self), 0, xfb_offset, false}); + string name; + if (has_decoration(self, DecorationBuiltIn)) + // Force this to have the proper name. + name = builtin_to_glsl(BuiltIn(get_decoration(self, DecorationBuiltIn)), StorageClassOutput); + else + name = to_name(self); + xfb_outputs[xfb_buffer_num].emplace_back({&var, name, 0, xfb_offset, false}); } }); From c352f948eb12dfbc504be9498eea876b33fdbe39 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Thu, 7 Sep 2023 13:39:30 -0700 Subject: [PATCH 16/48] Add command line parameter to set the primitive type assumed for transform feedback. --- main.cpp | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/main.cpp b/main.cpp index 5e9b0168c..2b2f4ba1c 100644 --- a/main.cpp +++ b/main.cpp @@ -677,6 +677,8 @@ struct CLIArguments bool msl_check_discarded_frag_stores = false; bool msl_sample_dref_lod_array_as_grad = false; const char *msl_combined_sampler_suffix = nullptr; + CompilerMSL::Options::PrimitiveType msl_xfb_primitive_type = + CompilerMSL::Options::PrimitiveType::Dynamic; bool glsl_emit_push_constant_as_ubo = false; bool glsl_emit_ubo_as_plain_uniforms = false; bool glsl_force_flattened_io_blocks = false; @@ -954,7 +956,11 @@ static void print_help_msl() "\t\tSome Metal devices have a bug where the level() argument to\n" "\t\tdepth2d_array::sample_compare() in a fragment shader is biased by some\n" "\t\tunknown amount. This prevents the bias from being added.\n" - "\t[--msl-combined-sampler-suffix ]:\n\t\tUses a custom suffix for combined samplers.\n"); + "\t[--msl-combined-sampler-suffix ]:\n\t\tUses a custom suffix for combined samplers.\n" + "\t[--msl-xfb-primitive-type ]:\n\t\tGenerates code in a vertex shader to capture primitives of the\n\t\t" + "specified type for transform feedback. may be one of dynamic,\n\t\t" + "point-list, line-list, line-strip, triangle-list, triangle-strip, or\n\t\t" + "triangle-fan. The default is \"dynamic\".\n"); // clang-format on } @@ -1229,6 +1235,7 @@ static string compile_iteration(const CLIArguments &args, std::vector msl_opts.manual_helper_invocation_updates = args.msl_manual_helper_invocation_updates; msl_opts.check_discarded_frag_stores = args.msl_check_discarded_frag_stores; msl_opts.sample_dref_lod_array_as_grad = args.msl_sample_dref_lod_array_as_grad; + msl_opts.xfb_primitive_type = args.msl_xfb_primitive_type; msl_opts.ios_support_base_vertex_instance = true; msl_comp->set_msl_options(msl_opts); for (auto &v : args.msl_discrete_descriptor_sets) @@ -1789,6 +1796,23 @@ static int main_inner(int argc, char *argv[]) cbs.add("--msl-combined-sampler-suffix", [&args](CLIParser &parser) { args.msl_combined_sampler_suffix = parser.next_string(); }); + cbs.add("--msl-xfb-primitive-type", [&args](CLIParser &parser) { + const char *type = parser.next_value_string("dynamic"); + if (strcmp(type, "dynamic") == 0) + args.msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::Dynamic; + else if (strcmp(type, "point-list") == 0) + args.msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::PointList; + else if (strcmp(type, "line-list") == 0) + args.msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::LineList; + else if (strcmp(type, "line-strip") == 0) + args.msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::LineStrip; + else if (strcmp(type, "triangle-list") == 0) + args.msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::TriangleList; + else if (strcmp(type, "triangle-strip") == 0) + args.msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::TriangleStrip; + else if (strcmp(type, "triangle-fan") == 0) + args.msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::TriangleFan; + }); cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); }); cbs.add("--rename-entry-point", [&args](CLIParser &parser) { auto old_name = parser.next_string(); From 001ff7d4bebda7f69064867f684b36854307a028 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Fri, 8 Sep 2023 13:06:29 -0700 Subject: [PATCH 17/48] Add a variable for the XFB counter buffer. This lets us reference it later. --- spirv_msl.cpp | 30 ++++++++++++++++++++++++------ spirv_msl.hpp | 1 + 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 36c23a988..781b23594 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -12746,7 +12746,7 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args) continue; if (!ep_args.empty()) ep_args += ", "; - ep_args += join("device uint& spvXfbCounter", xfb_buffer, " [[buffer(", msl_options.xfb_counter_buffer_index_base + xfb_buffer, ")]], "); + ep_args += join(variable_decl(get_type_from_variable(xfb_counters[xfb_buffer]), to_name(xfb_counters[xfb_buffer])), " [[buffer(", msl_options.xfb_counter_buffer_index_base + xfb_buffer, ")]], "); ep_args += join(variable_decl(get_type_from_variable(xfb_buffers[xfb_buffer]), to_name(xfb_buffers[xfb_buffer])), " [[buffer(", msl_options.xfb_output_buffer_index_base + xfb_buffer, ")]]"); } } @@ -17786,6 +17786,7 @@ void CompilerMSL::analyze_xfb_buffers() for (uint32_t i = 0; i < kMaxXfbBuffers; ++i) { + xfb_counters[i] = 0; xfb_buffers[i] = 0; xfb_locals[i] = 0; xfb_strides[i] = 0; @@ -17871,14 +17872,32 @@ void CompilerMSL::analyze_xfb_buffers() if (outputs.empty()) continue; - uint32_t next_id = ir.increase_bound_by(5); + uint32_t next_id = ir.increase_bound_by(8); + uint32_t buffer_var_id = next_id; uint32_t local_var_id = next_id + 1; uint32_t type_id = next_id + 2; uint32_t ptr_type_id = next_id + 3; uint32_t local_ptr_type_id = next_id + 4; - xfb_buffers[xfb_buffer] = next_id; + uint32_t counter_var_id = next_id + 5; + uint32_t counter_type_id = next_id + 6; + uint32_t counter_ptr_type_id = next_id + 7; + xfb_counters[xfb_buffer] = counter_var_id; + xfb_buffers[xfb_buffer] = buffer_var_id; xfb_locals[xfb_buffer] = local_var_id; + auto &counter_type = set(counter_type_id); + counter_type.basetype = SPIRType::AtomicCounter; + counter_type.storage = StorageClassStorageBuffer; + + auto &counter_ptr_type = set(counter_ptr_type_id); + counter_ptr_type = counter_type; + counter_ptr_type.pointer = true; + counter_ptr_type.pointer_depth++; + counter_ptr_type.parent_type = counter_type_id; + + set(counter_var_id, counter_ptr_type_id, StorageClassUniform); + set_name(counter_var_id, join("spvXfbCounter", xfb_buffer)); + auto &buffer_type = set(type_id); buffer_type.basetype = SPIRType::Struct; buffer_type.storage = StorageClassStorageBuffer; @@ -17899,9 +17918,8 @@ void CompilerMSL::analyze_xfb_buffers() set(local_var_id, local_ptr_type_id, StorageClassFunction); set_name(local_var_id, join("spvXfbOutput", xfb_buffer)); - uint32_t buffer_variable_id = next_id; - set(buffer_variable_id, ptr_type_id, StorageClassUniform); - set_name(buffer_variable_id, join("spvXfb", xfb_buffer)); + set(buffer_var_id, ptr_type_id, StorageClassUniform); + set_name(buffer_var_id, join("spvXfb", xfb_buffer)); // Members must be emitted in Offset order. stable_sort(begin(outputs), end(outputs), [&](const XfbOutput &lhs, const XfbOutput &rhs) -> bool { diff --git a/spirv_msl.hpp b/spirv_msl.hpp index 9345b3153..3a5185328 100644 --- a/spirv_msl.hpp +++ b/spirv_msl.hpp @@ -1224,6 +1224,7 @@ class CompilerMSL : public CompilerGLSL std::unordered_set atomic_image_vars; // Emulate texture2D atomic operations std::unordered_set pull_model_inputs; + VariableID xfb_counters[kMaxXfbBuffers]; VariableID xfb_buffers[kMaxXfbBuffers]; VariableID xfb_locals[kMaxXfbBuffers]; uint32_t xfb_strides[kMaxXfbBuffers]; From 28babded8a47c04b54203991f64feffe5ac2a3b8 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Wed, 13 Sep 2023 13:56:57 -0700 Subject: [PATCH 18/48] Really crappy checkpoint for XFB work. I don't expect this to build, let alone work. (Really, all these changes ought to be squashed when merged to SPIRV-Cross.) --- spirv_msl.cpp | 159 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 138 insertions(+), 21 deletions(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 781b23594..1c7f246c5 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -13791,60 +13791,177 @@ void CompilerMSL::fix_up_shader_inputs_outputs() if (needs_transform_feedback()) { entry_func.fixup_hooks_out.push_back([=]() { - size_t size_data_write_points = 0; - size_t size_data_write_lines = 0; - size_t size_data_write_triangles = 0; - - for (uint32_t i = 0; i < kMaxXfbBuffers; ++i) { + string index_expr; + switch (msl_options.xfb_primitive_type) + { + case Options::PrimitiveType::PointList: + case Options::PrimitiveType::LineList: + case Options::PrimitiveType::TriangleList: + index_expr = join(to_expression(builtin_invocation_id_id), ".y * ", + to_expression(builtin_stage_input_size_id), ".x + ", + to_expression(builtin_invocation_id_id), ".x"); + break; + case Options::PrimitiveType::LineStrip: + // Calculation of the index expression is also complicated a bit because of this. + // Some worked examples: + // Vertex ordinal XFB indices + // 0 0 + // 1 1, 2 + // 2 3, 4 + // 3 5, 6 + // 4 7 + // FIXME: This doesn't account for primitive restart! + // 0 0 + // 1 1, 2 + // 2 3, 4 + // 3 5 + // 4 + // 5 n/a + // 6 + // 7 6 + // 8 7, 8 + // 9 9, 10 + // 10 11 + index_expr = join("2 * (", to_expression(builtin_invocation_id_id), ".y * ", + to_expression(builtin_stage_input_size_id), ".x + ", + to_expression(builtin_invocation_id_id), ".x)"); + break; + case Options::PrimitiveType::TriangleStrip: + // Vertex ordinal XFB indices + // 0 0 + // 1 1, 3 + // 2 2, 4, 6 + // 3 5, 7, 9 + // 4 8, 10 + // 5 11 + // FIXME: This doesn't account for primitive restart! + // 0 0 + // 1 1, 3 + // 2 2, 4, 6 + // 3 5, 7, 9 + // 4 8, 10 + // 5 11 + // 6 + // 7 12 + // 8 13, 15 + // 9 14, 16, 18 + // 10 17, 19, 21 + // 11 20, 22 + // 12 23 + // ---- + // 0 0 + // 1 1 + // 2 2 + // 3 + // 4 3 + // 5 4, 6 + // 6 5, 7 + // 7 8 + // ---- + // 0 0 + // 1 1, 3 + // 2 2, 4, 6 + // 3 5, 7 + // 4 8 + // 5 + // 6 n/a + // 7 n/a + // 8 + // 9 9 + // 10 10, 12 + // 11 11, 13, 15 + // 12 14, 16 + // 13 17 + index_expr = join("3 * (", to_expression(builtin_invocation_id_id), ".y * ", + to_expression(builtin_stage_input_size_id), ".x + ", + to_expression(builtin_invocation_id_id), ".x)"); + case Options::PrimitiveType::TriangleFan: + // FIXME: Primitive restart + index_expr = join("3 * (", to_expression(builtin_invocation_id_id), ".y * ", + to_expression(builtin_stage_input_size_id), ".x + ", + to_expression(builtin_invocation_id_id), ".x) - 2"); + case Options::PrimitiveType::Dynamic: + SPIRV_CROSS_THROW("Dynamic primitive type is not yet supported."); + } + // First, write the data out. + for (uint32_t i = 0; i < kMaxXfbBuffers; ++i) + { if (xfb_buffers[i] == 0) continue; - // First, update the amount of data written to the buffer. + statement("uint spvInitOffset", i, " = atomic_load_explicit(", to_name(xfb_counters[i]), ", memory_order_relaxed);"); + statement(to_name(xfb_buffers[i]), " = reinterpret_cast<", type_to_glsl(get_type_from_variable(xfb_buffers[i])), ">(reinterpret_cast(", to_name(xfb_buffers[i]), ") + spvInitOffset", i, ");"); switch (msl_options.xfb_primitive_type) { case Options::PrimitiveType::PointList: - size_data_write_points += 1; + // This is straightforward enough. Just make sure we don't overstep the data buffer (FIXME). + statement(to_name(xfb_buffers[i]), "[", index_expr, "] = ", to_expression(xfb_locals[i]), ";"); break; case Options::PrimitiveType::LineList: + // This is a little trickier, because we don't want to write an incomplete primitive. + // Therefore, we must write only if we're an odd vertex, or we're not the last one. + // FIXME: Bounds check the buffer, too. + statement("if ((", to_expression(builtin_invocation_id_id), ".x & 1) || ", to_expression(builtin_invocation_id_id) ".x < ", to_expression(builtin_stage_input_size), ".x - 1)"); + statement(" ", to_name(xfb_buffers[i]), "[", index_expr, "] = " , to_expression(xfb_locals[i]), ";"); + case Options::PrimitiveType::TriangleList: + // This is similar to the previous case, except here the boundary condition is + // if global_id.x % 3 == 2 or we're not one of the last two. + // FIXME: Bounds check the buffer, too. + statement("if ((", to_expression(builtin_invocation_id_id), ".x % 3 == 2) || ", to_expression(builtin_invocation_id_id) ".x + 2 < ", to_expression(builtin_stage_input_size), ".x)"); + statement(" ", to_name(xfb_buffers[i]), "[", index_expr, "] = ", to_expression(xfb_locals[i]), ";"); case Options::PrimitiveType::LineStrip: - size_data_write_lines += 2; + // This is more complicated. We have to write out each individual line segment. + // So if we're not the first or the last, we have to write twice. + // On top of that, we also have to handle primitive restart. (FIXME) + // FIXME: Bounds check the buffer, too. + statement(" ", to_name(xfb_buffers[i]), "[", index_expr, "] = " , to_expression(xfb_locals[i]), ";"); + statement(" ", to_name(xfb_buffers[i]), "[", index_expr, " - 1] = " , to_expression(xfb_locals[i]), ";"); break; - case Options::PrimitiveType::TriangleList: case Options::PrimitiveType::TriangleStrip: + // This is even worse. We still have to write twice if we're not first or last, + // but now if there's fewer than two vertices in this strip, we can't write at all. + // Again, primitive restart is a factor here. (FIXME) + // FIXME: Bounds check the buffer, too. + statement(" ", to_name(xfb_buffers[i]), "[", index_expr, "] = ", to_expression(xfb_locals[i]), ";"); + statement(" ", to_name(xfb_buffers[i]), "[", index_expr, " - 1] = ", to_expression(xfb_locals[i]), ";"); + statement(" ", to_name(xfb_buffers[i]), "[", index_expr, " - 2] = ", to_expression(xfb_locals[i]), ";"); + break; case Options::PrimitiveType::TriangleFan: - size_data_write_triangles += 3; + // This is the worst case of all. It's similar to the strip case, except now + // we have to write the fan base vertex for *every* triangle. (FIXME) + // Again, primitive restart is a factor here. (FIXME) + // FIXME: Bounds check the buffer, too. + // TODO TODO TODO + statement(to_name(xfb_buffers[i]), "[", index_expr, "] = ", to_expression(xfb_locals[i]), ";"); break; case Options::PrimitiveType::Dynamic: - break; default: SPIRV_CROSS_THROW("Primitive type not yet supported for transform feedback."); - break; } - } - for (uint32_t i = 0; i < kMaxXfbBuffers; ++i) - { + statement("threadgroup_barrier(mem_device);"); + // Now update the amount of data written to the buffer. + for (uint32_t i = 0; i < kMaxXfbBuffers; ++i) { if (xfb_buffers[i] == 0) continue; - // Now, write the data out. switch (msl_options.xfb_primitive_type) { case Options::PrimitiveType::PointList: - { - statement(to_name(xfb_buffers[i]), "[", to_string(size_data_write_points), "] = ", to_expression(xfb_locals[i]), ";"); + size_data_write_points += 1; break; - } case Options::PrimitiveType::LineList: case Options::PrimitiveType::LineStrip: - statement(to_name(xfb_buffers[i]), "[", to_string(size_data_write_lines) , "] = " , to_expression(xfb_locals[i]), ";"); + size_data_write_lines += 2; break; case Options::PrimitiveType::TriangleList: case Options::PrimitiveType::TriangleStrip: case Options::PrimitiveType::TriangleFan: - statement(to_name(xfb_buffers[i]), "[", to_string(size_data_write_triangles), "] = ", to_expression(xfb_locals[i]), ";"); + size_data_write_triangles += 3; break; case Options::PrimitiveType::Dynamic: break; default: SPIRV_CROSS_THROW("Primitive type not yet supported for transform feedback."); + break; } + } }); } From fb520f408738fb16e5846c2d863039add3378257 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Thu, 14 Sep 2023 17:31:11 -0700 Subject: [PATCH 19/48] Getting closer... --- spirv_msl.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 1c7f246c5..5e7af2b1e 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -13877,6 +13877,8 @@ void CompilerMSL::fix_up_shader_inputs_outputs() to_expression(builtin_invocation_id_id), ".x)"); case Options::PrimitiveType::TriangleFan: // FIXME: Primitive restart + // FIXME: This is wrong. The index expression here is different + // for the fan base vs. the others. index_expr = join("3 * (", to_expression(builtin_invocation_id_id), ".y * ", to_expression(builtin_stage_input_size_id), ".x + ", to_expression(builtin_invocation_id_id), ".x) - 2"); @@ -13912,16 +13914,21 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // So if we're not the first or the last, we have to write twice. // On top of that, we also have to handle primitive restart. (FIXME) // FIXME: Bounds check the buffer, too. + statement("if (", to_expression(builtin_invocation_id_id), ".x != ", to_expression(builtin_stage_input_size), ".x - 1)"); statement(" ", to_name(xfb_buffers[i]), "[", index_expr, "] = " , to_expression(xfb_locals[i]), ";"); + statement("if (", to_expression(builtin_invocation_id_id), ".x != 0)"); statement(" ", to_name(xfb_buffers[i]), "[", index_expr, " - 1] = " , to_expression(xfb_locals[i]), ";"); break; case Options::PrimitiveType::TriangleStrip: - // This is even worse. We still have to write twice if we're not first or last, - // but now if there's fewer than two vertices in this strip, we can't write at all. + // This is even worse. We have to write three times if we're not first or last, + // and now if there's fewer than two vertices in this strip, we can't write at all. // Again, primitive restart is a factor here. (FIXME) // FIXME: Bounds check the buffer, too. + statement("if (", to_expression(builtin_invocation_id_id), ".x + 2 < ", to_expression(builtin_stage_input_size), ".x)"); statement(" ", to_name(xfb_buffers[i]), "[", index_expr, "] = ", to_expression(xfb_locals[i]), ";"); + statement("if (", to_expression(builtin_invocation_id_id), ".x != 0 && ", to_expression(builtin_invocation_id_id), ".x != ", to_expression(builtin_stage_input_size), ".x - 1)"); statement(" ", to_name(xfb_buffers[i]), "[", index_expr, " - 1] = ", to_expression(xfb_locals[i]), ";"); + statement("if (", to_expression(builtin_invocation_id_id), ".x > 1)"); statement(" ", to_name(xfb_buffers[i]), "[", index_expr, " - 2] = ", to_expression(xfb_locals[i]), ";"); break; case Options::PrimitiveType::TriangleFan: From 556c9fa67fd387b079fe196f0107807295c8e24f Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Fri, 15 Sep 2023 13:17:51 -0700 Subject: [PATCH 20/48] Fix indices of triangle strips to account for winding. Work out how indexing works for triangle fans. A little bit closer... --- spirv_msl.cpp | 90 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 67 insertions(+), 23 deletions(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 5e7af2b1e..36ca76594 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -13830,24 +13830,24 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // Vertex ordinal XFB indices // 0 0 // 1 1, 3 - // 2 2, 4, 6 - // 3 5, 7, 9 - // 4 8, 10 - // 5 11 + // 2 2, 5, 6 + // 3 4, 7, 9 + // 4 8, 11 + // 5 10 // FIXME: This doesn't account for primitive restart! // 0 0 // 1 1, 3 - // 2 2, 4, 6 - // 3 5, 7, 9 - // 4 8, 10 - // 5 11 + // 2 2, 5, 6 + // 3 4, 7, 9 + // 4 8, 11 + // 5 10 // 6 // 7 12 // 8 13, 15 - // 9 14, 16, 18 - // 10 17, 19, 21 - // 11 20, 22 - // 12 23 + // 9 14, 17, 18 + // 10 16, 19, 21 + // 11 20, 23 + // 12 22 // ---- // 0 0 // 1 1 @@ -13855,13 +13855,13 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // 3 // 4 3 // 5 4, 6 - // 6 5, 7 - // 7 8 + // 6 5, 8 + // 7 7 // ---- // 0 0 // 1 1, 3 - // 2 2, 4, 6 - // 3 5, 7 + // 2 2, 5, 6 + // 3 4, 7 // 4 8 // 5 // 6 n/a @@ -13869,16 +13869,60 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // 8 // 9 9 // 10 10, 12 - // 11 11, 13, 15 - // 12 14, 16 + // 11 11, 14, 15 + // 12 13, 16 // 13 17 index_expr = join("3 * (", to_expression(builtin_invocation_id_id), ".y * ", to_expression(builtin_stage_input_size_id), ".x + ", to_expression(builtin_invocation_id_id), ".x)"); case Options::PrimitiveType::TriangleFan: - // FIXME: Primitive restart - // FIXME: This is wrong. The index expression here is different - // for the fan base vs. the others. + // The index expression in this case is different for the fan base. + // This is for the other vertices. It is very similar to the line strip case. + // Vertex ordinal XFB indices + // 0 0, 3, 6, 9 + // 1 1 + // 2 2, 4 + // 3 5, 7 + // 4 8, 10 + // 5 11 + // FIXME: This doesn't account for primitive restart! + // 0 0, 3, 6, 9 + // 1 1 + // 2 2, 4 + // 3 5, 7 + // 4 8, 10 + // 5 11 + // 6 + // 7 12, 15, 18, 21 + // 8 13 + // 9 14, 16 + // 10 17, 19 + // 11 20, 22 + // 12 23 + // ---- + // 0 0 + // 1 1 + // 2 2 + // 3 + // 4 3, 6 + // 5 4 + // 6 5, 7 + // 7 8 + // ---- + // 0 0, 3, 6 + // 1 1 + // 2 2, 4 + // 3 5, 7 + // 4 8 + // 5 + // 6 n/a + // 7 n/a + // 8 + // 9 9, 12, 15 + // 10 10 + // 11 11, 13 + // 12 14, 16 + // 13 17 index_expr = join("3 * (", to_expression(builtin_invocation_id_id), ".y * ", to_expression(builtin_stage_input_size_id), ".x + ", to_expression(builtin_invocation_id_id), ".x) - 2"); @@ -13927,9 +13971,9 @@ void CompilerMSL::fix_up_shader_inputs_outputs() statement("if (", to_expression(builtin_invocation_id_id), ".x + 2 < ", to_expression(builtin_stage_input_size), ".x)"); statement(" ", to_name(xfb_buffers[i]), "[", index_expr, "] = ", to_expression(xfb_locals[i]), ";"); statement("if (", to_expression(builtin_invocation_id_id), ".x != 0 && ", to_expression(builtin_invocation_id_id), ".x != ", to_expression(builtin_stage_input_size), ".x - 1)"); - statement(" ", to_name(xfb_buffers[i]), "[", index_expr, " - 1] = ", to_expression(xfb_locals[i]), ";"); + statement(" ", to_name(xfb_buffers[i]), "[", index_expr, " - 1 - (", to_expression(builtin_invocation_id_id), ".x & 1)] = ", to_expression(xfb_locals[i]), ";"); statement("if (", to_expression(builtin_invocation_id_id), ".x > 1)"); - statement(" ", to_name(xfb_buffers[i]), "[", index_expr, " - 2] = ", to_expression(xfb_locals[i]), ";"); + statement(" ", to_name(xfb_buffers[i]), "[", index_expr, " - 4 - (", to_expression(builtin_invocation_id_id), ".x & 1)] = ", to_expression(xfb_locals[i]), ";"); break; case Options::PrimitiveType::TriangleFan: // This is the worst case of all. It's similar to the strip case, except now From b6279e50cac64e038a2d4d6165fc9087e33cd46e Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Sat, 16 Sep 2023 09:20:34 -0700 Subject: [PATCH 21/48] Write out triangle fans correctly. --- spirv_msl.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 36ca76594..59aeda8eb 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -13977,11 +13977,21 @@ void CompilerMSL::fix_up_shader_inputs_outputs() break; case Options::PrimitiveType::TriangleFan: // This is the worst case of all. It's similar to the strip case, except now - // we have to write the fan base vertex for *every* triangle. (FIXME) + // we have to write the fan base vertex for *every* triangle. // Again, primitive restart is a factor here. (FIXME) // FIXME: Bounds check the buffer, too. - // TODO TODO TODO - statement(to_name(xfb_buffers[i]), "[", index_expr, "] = ", to_expression(xfb_locals[i]), ";"); + statement("if (", to_expression(builtin_invocation_id_id), ".x == 0)"); + begin_scope(); + statement("for (uint i = 0; i < ", to_expression(builtin_stage_input_size), ".x - 2; ++i)"); + statement(" ", to_name(xfb_buffers[i]), "[i] = ", to_name(xfb_locals[i]), ";"); + end_scope(); + statement("else"); + begin_scope(); + statement("if (", to_expression(builtin_invocation_id_id), ".x != ", to_expression(builtin_stage_input_size), ".x - 1)"); + statement(" ", to_name(xfb_buffers[i]), "[", index_expr, "] = " , to_expression(xfb_locals[i]), ";"); + statement("if (", to_expression(builtin_invocation_id_id), ".x != 1)"); + statement(" ", to_name(xfb_buffers[i]), "[", index_expr, " - 2] = " , to_expression(xfb_locals[i]), ";"); + end_scope(); break; case Options::PrimitiveType::Dynamic: default: From 579635a9b6309477411b2cc92079bca3ce184c0e Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Sat, 16 Sep 2023 11:08:01 -0700 Subject: [PATCH 22/48] Fix build. --- spirv_msl.cpp | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 59aeda8eb..3ad90014e 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -13927,7 +13927,8 @@ void CompilerMSL::fix_up_shader_inputs_outputs() to_expression(builtin_stage_input_size_id), ".x + ", to_expression(builtin_invocation_id_id), ".x) - 2"); case Options::PrimitiveType::Dynamic: - SPIRV_CROSS_THROW("Dynamic primitive type is not yet supported."); + default: + SPIRV_CROSS_THROW("Primitive type not yet supported for transform feedback."); } // First, write the data out. for (uint32_t i = 0; i < kMaxXfbBuffers; ++i) @@ -13945,20 +13946,20 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // This is a little trickier, because we don't want to write an incomplete primitive. // Therefore, we must write only if we're an odd vertex, or we're not the last one. // FIXME: Bounds check the buffer, too. - statement("if ((", to_expression(builtin_invocation_id_id), ".x & 1) || ", to_expression(builtin_invocation_id_id) ".x < ", to_expression(builtin_stage_input_size), ".x - 1)"); + statement("if ((", to_expression(builtin_invocation_id_id), ".x & 1) || ", to_expression(builtin_invocation_id_id), ".x < ", to_expression(builtin_stage_input_size_id), ".x - 1)"); statement(" ", to_name(xfb_buffers[i]), "[", index_expr, "] = " , to_expression(xfb_locals[i]), ";"); case Options::PrimitiveType::TriangleList: // This is similar to the previous case, except here the boundary condition is // if global_id.x % 3 == 2 or we're not one of the last two. // FIXME: Bounds check the buffer, too. - statement("if ((", to_expression(builtin_invocation_id_id), ".x % 3 == 2) || ", to_expression(builtin_invocation_id_id) ".x + 2 < ", to_expression(builtin_stage_input_size), ".x)"); + statement("if ((", to_expression(builtin_invocation_id_id), ".x % 3 == 2) || ", to_expression(builtin_invocation_id_id), ".x + 2 < ", to_expression(builtin_stage_input_size_id), ".x)"); statement(" ", to_name(xfb_buffers[i]), "[", index_expr, "] = ", to_expression(xfb_locals[i]), ";"); case Options::PrimitiveType::LineStrip: // This is more complicated. We have to write out each individual line segment. // So if we're not the first or the last, we have to write twice. // On top of that, we also have to handle primitive restart. (FIXME) // FIXME: Bounds check the buffer, too. - statement("if (", to_expression(builtin_invocation_id_id), ".x != ", to_expression(builtin_stage_input_size), ".x - 1)"); + statement("if (", to_expression(builtin_invocation_id_id), ".x != ", to_expression(builtin_stage_input_size_id), ".x - 1)"); statement(" ", to_name(xfb_buffers[i]), "[", index_expr, "] = " , to_expression(xfb_locals[i]), ";"); statement("if (", to_expression(builtin_invocation_id_id), ".x != 0)"); statement(" ", to_name(xfb_buffers[i]), "[", index_expr, " - 1] = " , to_expression(xfb_locals[i]), ";"); @@ -13968,9 +13969,9 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // and now if there's fewer than two vertices in this strip, we can't write at all. // Again, primitive restart is a factor here. (FIXME) // FIXME: Bounds check the buffer, too. - statement("if (", to_expression(builtin_invocation_id_id), ".x + 2 < ", to_expression(builtin_stage_input_size), ".x)"); + statement("if (", to_expression(builtin_invocation_id_id), ".x + 2 < ", to_expression(builtin_stage_input_size_id), ".x)"); statement(" ", to_name(xfb_buffers[i]), "[", index_expr, "] = ", to_expression(xfb_locals[i]), ";"); - statement("if (", to_expression(builtin_invocation_id_id), ".x != 0 && ", to_expression(builtin_invocation_id_id), ".x != ", to_expression(builtin_stage_input_size), ".x - 1)"); + statement("if (", to_expression(builtin_invocation_id_id), ".x != 0 && ", to_expression(builtin_invocation_id_id), ".x != ", to_expression(builtin_stage_input_size_id), ".x - 1)"); statement(" ", to_name(xfb_buffers[i]), "[", index_expr, " - 1 - (", to_expression(builtin_invocation_id_id), ".x & 1)] = ", to_expression(xfb_locals[i]), ";"); statement("if (", to_expression(builtin_invocation_id_id), ".x > 1)"); statement(" ", to_name(xfb_buffers[i]), "[", index_expr, " - 4 - (", to_expression(builtin_invocation_id_id), ".x & 1)] = ", to_expression(xfb_locals[i]), ";"); @@ -13982,12 +13983,12 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // FIXME: Bounds check the buffer, too. statement("if (", to_expression(builtin_invocation_id_id), ".x == 0)"); begin_scope(); - statement("for (uint i = 0; i < ", to_expression(builtin_stage_input_size), ".x - 2; ++i)"); + statement("for (uint i = 0; i < ", to_expression(builtin_stage_input_size_id), ".x - 2; ++i)"); statement(" ", to_name(xfb_buffers[i]), "[i] = ", to_name(xfb_locals[i]), ";"); end_scope(); statement("else"); begin_scope(); - statement("if (", to_expression(builtin_invocation_id_id), ".x != ", to_expression(builtin_stage_input_size), ".x - 1)"); + statement("if (", to_expression(builtin_invocation_id_id), ".x != ", to_expression(builtin_stage_input_size_id), ".x - 1)"); statement(" ", to_name(xfb_buffers[i]), "[", index_expr, "] = " , to_expression(xfb_locals[i]), ";"); statement("if (", to_expression(builtin_invocation_id_id), ".x != 1)"); statement(" ", to_name(xfb_buffers[i]), "[", index_expr, " - 2] = " , to_expression(xfb_locals[i]), ";"); @@ -14005,16 +14006,16 @@ void CompilerMSL::fix_up_shader_inputs_outputs() switch (msl_options.xfb_primitive_type) { case Options::PrimitiveType::PointList: - size_data_write_points += 1; + //size_data_write_points += 1; break; case Options::PrimitiveType::LineList: case Options::PrimitiveType::LineStrip: - size_data_write_lines += 2; + //size_data_write_lines += 2; break; case Options::PrimitiveType::TriangleList: case Options::PrimitiveType::TriangleStrip: case Options::PrimitiveType::TriangleFan: - size_data_write_triangles += 3; + //size_data_write_triangles += 3; break; case Options::PrimitiveType::Dynamic: break; From b86f51239774018f70e289d65f913e90f38e4e67 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Sat, 16 Sep 2023 11:51:10 -0700 Subject: [PATCH 23/48] Correct instance term in index. It should be based on the number of primitives written. Add missing instance term to the triangle fan base index. --- spirv_msl.cpp | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 3ad90014e..c0bbcf7e5 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -13795,12 +13795,20 @@ void CompilerMSL::fix_up_shader_inputs_outputs() switch (msl_options.xfb_primitive_type) { case Options::PrimitiveType::PointList: - case Options::PrimitiveType::LineList: - case Options::PrimitiveType::TriangleList: index_expr = join(to_expression(builtin_invocation_id_id), ".y * ", to_expression(builtin_stage_input_size_id), ".x + ", to_expression(builtin_invocation_id_id), ".x"); break; + case Options::PrimitiveType::LineList: + index_expr = join(to_expression(builtin_invocation_id_id), ".y * (", + to_expression(builtin_stage_input_size_id), ".x & ~1) + ", + to_expression(builtin_invocation_id_id), ".x"); + break; + case Options::PrimitiveType::TriangleList: + index_expr = join(to_expression(builtin_invocation_id_id), ".y * (", + to_expression(builtin_stage_input_size_id), ".x - ", to_expression(builtin_stage_input_size_id), ".x % 3) + ", + to_expression(builtin_invocation_id_id), ".x"); + break; case Options::PrimitiveType::LineStrip: // Calculation of the index expression is also complicated a bit because of this. // Some worked examples: @@ -13822,9 +13830,9 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // 8 7, 8 // 9 9, 10 // 10 11 - index_expr = join("2 * (", to_expression(builtin_invocation_id_id), ".y * ", - to_expression(builtin_stage_input_size_id), ".x + ", - to_expression(builtin_invocation_id_id), ".x)"); + index_expr = join("2 * ", to_expression(builtin_invocation_id_id), ".y * (", + to_expression(builtin_stage_input_size_id), ".x - 1) + 2 * ", + to_expression(builtin_invocation_id_id), ".x"); break; case Options::PrimitiveType::TriangleStrip: // Vertex ordinal XFB indices @@ -13872,8 +13880,8 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // 11 11, 14, 15 // 12 13, 16 // 13 17 - index_expr = join("3 * (", to_expression(builtin_invocation_id_id), ".y * ", - to_expression(builtin_stage_input_size_id), ".x + ", + index_expr = join("3 * ", to_expression(builtin_invocation_id_id), ".y * subsat(", + to_expression(builtin_stage_input_size_id), ".x, 2) + 3 * ", to_expression(builtin_invocation_id_id), ".x)"); case Options::PrimitiveType::TriangleFan: // The index expression in this case is different for the fan base. @@ -13923,8 +13931,8 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // 11 11, 13 // 12 14, 16 // 13 17 - index_expr = join("3 * (", to_expression(builtin_invocation_id_id), ".y * ", - to_expression(builtin_stage_input_size_id), ".x + ", + index_expr = join("3 * ", to_expression(builtin_invocation_id_id), ".y * subsat(", + to_expression(builtin_stage_input_size_id), ".x, 2) + 3 * ", to_expression(builtin_invocation_id_id), ".x) - 2"); case Options::PrimitiveType::Dynamic: default: @@ -13983,8 +13991,9 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // FIXME: Bounds check the buffer, too. statement("if (", to_expression(builtin_invocation_id_id), ".x == 0)"); begin_scope(); + statement("uint spvBaseIdx = 3 * ", to_expression(builtin_invocation_id_id), ".y * subsat(", to_expression(builtin_stage_input_size_id), ".x, 2);"); statement("for (uint i = 0; i < ", to_expression(builtin_stage_input_size_id), ".x - 2; ++i)"); - statement(" ", to_name(xfb_buffers[i]), "[i] = ", to_name(xfb_locals[i]), ";"); + statement(" ", to_name(xfb_buffers[i]), "[spvBaseIdx + i] = ", to_name(xfb_locals[i]), ";"); end_scope(); statement("else"); begin_scope(); From 66ca6c4154225688e6c7554f73ab9ba8502135e2 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Sat, 16 Sep 2023 12:30:18 -0700 Subject: [PATCH 24/48] Update counter after filling transform feedback buffers. --- spirv_msl.cpp | 47 +++++++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index c0bbcf7e5..9d3350ac5 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -13801,12 +13801,12 @@ void CompilerMSL::fix_up_shader_inputs_outputs() break; case Options::PrimitiveType::LineList: index_expr = join(to_expression(builtin_invocation_id_id), ".y * (", - to_expression(builtin_stage_input_size_id), ".x & ~1) + ", + to_expression(builtin_stage_input_size_id), ".x & ~1u) + ", to_expression(builtin_invocation_id_id), ".x"); break; case Options::PrimitiveType::TriangleList: index_expr = join(to_expression(builtin_invocation_id_id), ".y * (", - to_expression(builtin_stage_input_size_id), ".x - ", to_expression(builtin_stage_input_size_id), ".x % 3) + ", + to_expression(builtin_stage_input_size_id), ".x - ", to_expression(builtin_stage_input_size_id), ".x % 3u) + ", to_expression(builtin_invocation_id_id), ".x"); break; case Options::PrimitiveType::LineStrip: @@ -13831,7 +13831,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // 9 9, 10 // 10 11 index_expr = join("2 * ", to_expression(builtin_invocation_id_id), ".y * (", - to_expression(builtin_stage_input_size_id), ".x - 1) + 2 * ", + to_expression(builtin_stage_input_size_id), ".x - 1u) + 2 * ", to_expression(builtin_invocation_id_id), ".x"); break; case Options::PrimitiveType::TriangleStrip: @@ -13881,7 +13881,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // 12 13, 16 // 13 17 index_expr = join("3 * ", to_expression(builtin_invocation_id_id), ".y * subsat(", - to_expression(builtin_stage_input_size_id), ".x, 2) + 3 * ", + to_expression(builtin_stage_input_size_id), ".x, 2u) + 3 * ", to_expression(builtin_invocation_id_id), ".x)"); case Options::PrimitiveType::TriangleFan: // The index expression in this case is different for the fan base. @@ -13932,8 +13932,8 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // 12 14, 16 // 13 17 index_expr = join("3 * ", to_expression(builtin_invocation_id_id), ".y * subsat(", - to_expression(builtin_stage_input_size_id), ".x, 2) + 3 * ", - to_expression(builtin_invocation_id_id), ".x) - 2"); + to_expression(builtin_stage_input_size_id), ".x, 2u) + 3 * ", + to_expression(builtin_invocation_id_id), ".x) - 2u"); case Options::PrimitiveType::Dynamic: default: SPIRV_CROSS_THROW("Primitive type not yet supported for transform feedback."); @@ -13954,23 +13954,23 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // This is a little trickier, because we don't want to write an incomplete primitive. // Therefore, we must write only if we're an odd vertex, or we're not the last one. // FIXME: Bounds check the buffer, too. - statement("if ((", to_expression(builtin_invocation_id_id), ".x & 1) || ", to_expression(builtin_invocation_id_id), ".x < ", to_expression(builtin_stage_input_size_id), ".x - 1)"); + statement("if ((", to_expression(builtin_invocation_id_id), ".x & 1) || ", to_expression(builtin_invocation_id_id), ".x < ", to_expression(builtin_stage_input_size_id), ".x - 1u)"); statement(" ", to_name(xfb_buffers[i]), "[", index_expr, "] = " , to_expression(xfb_locals[i]), ";"); case Options::PrimitiveType::TriangleList: // This is similar to the previous case, except here the boundary condition is // if global_id.x % 3 == 2 or we're not one of the last two. // FIXME: Bounds check the buffer, too. - statement("if ((", to_expression(builtin_invocation_id_id), ".x % 3 == 2) || ", to_expression(builtin_invocation_id_id), ".x + 2 < ", to_expression(builtin_stage_input_size_id), ".x)"); + statement("if ((", to_expression(builtin_invocation_id_id), ".x % 3u == 2) || ", to_expression(builtin_invocation_id_id), ".x + 2 < ", to_expression(builtin_stage_input_size_id), ".x)"); statement(" ", to_name(xfb_buffers[i]), "[", index_expr, "] = ", to_expression(xfb_locals[i]), ";"); case Options::PrimitiveType::LineStrip: // This is more complicated. We have to write out each individual line segment. // So if we're not the first or the last, we have to write twice. // On top of that, we also have to handle primitive restart. (FIXME) // FIXME: Bounds check the buffer, too. - statement("if (", to_expression(builtin_invocation_id_id), ".x != ", to_expression(builtin_stage_input_size_id), ".x - 1)"); + statement("if (", to_expression(builtin_invocation_id_id), ".x != ", to_expression(builtin_stage_input_size_id), ".x - 1u)"); statement(" ", to_name(xfb_buffers[i]), "[", index_expr, "] = " , to_expression(xfb_locals[i]), ";"); statement("if (", to_expression(builtin_invocation_id_id), ".x != 0)"); - statement(" ", to_name(xfb_buffers[i]), "[", index_expr, " - 1] = " , to_expression(xfb_locals[i]), ";"); + statement(" ", to_name(xfb_buffers[i]), "[", index_expr, " - 1u] = " , to_expression(xfb_locals[i]), ";"); break; case Options::PrimitiveType::TriangleStrip: // This is even worse. We have to write three times if we're not first or last, @@ -13979,10 +13979,10 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // FIXME: Bounds check the buffer, too. statement("if (", to_expression(builtin_invocation_id_id), ".x + 2 < ", to_expression(builtin_stage_input_size_id), ".x)"); statement(" ", to_name(xfb_buffers[i]), "[", index_expr, "] = ", to_expression(xfb_locals[i]), ";"); - statement("if (", to_expression(builtin_invocation_id_id), ".x != 0 && ", to_expression(builtin_invocation_id_id), ".x != ", to_expression(builtin_stage_input_size_id), ".x - 1)"); - statement(" ", to_name(xfb_buffers[i]), "[", index_expr, " - 1 - (", to_expression(builtin_invocation_id_id), ".x & 1)] = ", to_expression(xfb_locals[i]), ";"); + statement("if (", to_expression(builtin_invocation_id_id), ".x != 0 && ", to_expression(builtin_invocation_id_id), ".x != ", to_expression(builtin_stage_input_size_id), ".x - 1u)"); + statement(" ", to_name(xfb_buffers[i]), "[", index_expr, " - 1u - (", to_expression(builtin_invocation_id_id), ".x & 1u)] = ", to_expression(xfb_locals[i]), ";"); statement("if (", to_expression(builtin_invocation_id_id), ".x > 1)"); - statement(" ", to_name(xfb_buffers[i]), "[", index_expr, " - 4 - (", to_expression(builtin_invocation_id_id), ".x & 1)] = ", to_expression(xfb_locals[i]), ";"); + statement(" ", to_name(xfb_buffers[i]), "[", index_expr, " - 4u - (", to_expression(builtin_invocation_id_id), ".x & 1u)] = ", to_expression(xfb_locals[i]), ";"); break; case Options::PrimitiveType::TriangleFan: // This is the worst case of all. It's similar to the strip case, except now @@ -13991,16 +13991,16 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // FIXME: Bounds check the buffer, too. statement("if (", to_expression(builtin_invocation_id_id), ".x == 0)"); begin_scope(); - statement("uint spvBaseIdx = 3 * ", to_expression(builtin_invocation_id_id), ".y * subsat(", to_expression(builtin_stage_input_size_id), ".x, 2);"); - statement("for (uint i = 0; i < ", to_expression(builtin_stage_input_size_id), ".x - 2; ++i)"); + statement("uint spvBaseIdx = 3 * ", to_expression(builtin_invocation_id_id), ".y * subsat(", to_expression(builtin_stage_input_size_id), ".x, 2u);"); + statement("for (uint i = 0; i < subsat(", to_expression(builtin_stage_input_size_id), ".x, 2u); ++i)"); statement(" ", to_name(xfb_buffers[i]), "[spvBaseIdx + i] = ", to_name(xfb_locals[i]), ";"); end_scope(); statement("else"); begin_scope(); - statement("if (", to_expression(builtin_invocation_id_id), ".x != ", to_expression(builtin_stage_input_size_id), ".x - 1)"); + statement("if (", to_expression(builtin_invocation_id_id), ".x != ", to_expression(builtin_stage_input_size_id), ".x - 1u)"); statement(" ", to_name(xfb_buffers[i]), "[", index_expr, "] = " , to_expression(xfb_locals[i]), ";"); statement("if (", to_expression(builtin_invocation_id_id), ".x != 1)"); - statement(" ", to_name(xfb_buffers[i]), "[", index_expr, " - 2] = " , to_expression(xfb_locals[i]), ";"); + statement(" ", to_name(xfb_buffers[i]), "[", index_expr, " - 2u] = " , to_expression(xfb_locals[i]), ";"); end_scope(); break; case Options::PrimitiveType::Dynamic: @@ -14010,21 +14010,27 @@ void CompilerMSL::fix_up_shader_inputs_outputs() } statement("threadgroup_barrier(mem_device);"); // Now update the amount of data written to the buffer. + statement("if (", to_expression(builtin_invocation_id_id), ".xy == 0)"); + begin_scope(); for (uint32_t i = 0; i < kMaxXfbBuffers; ++i) { if (xfb_buffers[i] == 0) continue; switch (msl_options.xfb_primitive_type) { case Options::PrimitiveType::PointList: - //size_data_write_points += 1; + statement("atomic_store_explicit(", to_name(xfb_counters[i]), "spvInitOffset", i, " + sizeof(*", to_name(xfb_buffers[i]), ") * ", to_expression(builtin_stage_input_size_id), ".x * ", to_expression(builtin_stage_input_size_id), ".y, memory_order_relaxed);"); break; case Options::PrimitiveType::LineList: + statement("atomic_store_explicit(", to_name(xfb_counters[i]), "spvInitOffset", i, " + sizeof(*", to_name(xfb_buffers[i]), ") * (", to_expression(builtin_stage_input_size_id), ".x & ~1u) * ", to_expression(builtin_stage_input_size_id), ".y, memory_order_relaxed);"); + break; case Options::PrimitiveType::LineStrip: - //size_data_write_lines += 2; + statement("atomic_store_explicit(", to_name(xfb_counters[i]), "spvInitOffset", i, " + sizeof(*", to_name(xfb_buffers[i]), ") * 2 * (", to_expression(builtin_stage_input_size_id), ".x - 1u) * ", to_expression(builtin_stage_input_size_id), ".y, memory_order_relaxed);"); break; case Options::PrimitiveType::TriangleList: + statement("atomic_store_explicit(", to_name(xfb_counters[i]), "spvInitOffset", i, " + sizeof(*", to_name(xfb_buffers[i]), ") * (", to_expression(builtin_stage_input_size_id), ".x - ", to_expression(builtin_stage_input_size_id), ".x % 3u) * ", to_expression(builtin_stage_input_size_id), ".y, memory_order_relaxed);"); + break; case Options::PrimitiveType::TriangleStrip: case Options::PrimitiveType::TriangleFan: - //size_data_write_triangles += 3; + statement("atomic_store_explicit(", to_name(xfb_counters[i]), "spvInitOffset", i, " + sizeof(*", to_name(xfb_buffers[i]), ") * 3 * subsat(", to_expression(builtin_stage_input_size_id), ".x, 2u) * ", to_expression(builtin_stage_input_size_id), ".y, memory_order_relaxed);"); break; case Options::PrimitiveType::Dynamic: break; @@ -14034,6 +14040,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs() } } + end_scope(); }); } } From 948651bdbf3a89ca59c983bb7c7f17ae277e48c0 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Sat, 16 Sep 2023 14:38:26 -0700 Subject: [PATCH 25/48] Add missing breaks. --- spirv_msl.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 9d3350ac5..d146a70c6 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -13883,6 +13883,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs() index_expr = join("3 * ", to_expression(builtin_invocation_id_id), ".y * subsat(", to_expression(builtin_stage_input_size_id), ".x, 2u) + 3 * ", to_expression(builtin_invocation_id_id), ".x)"); + break; case Options::PrimitiveType::TriangleFan: // The index expression in this case is different for the fan base. // This is for the other vertices. It is very similar to the line strip case. @@ -13934,6 +13935,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs() index_expr = join("3 * ", to_expression(builtin_invocation_id_id), ".y * subsat(", to_expression(builtin_stage_input_size_id), ".x, 2u) + 3 * ", to_expression(builtin_invocation_id_id), ".x) - 2u"); + break; case Options::PrimitiveType::Dynamic: default: SPIRV_CROSS_THROW("Primitive type not yet supported for transform feedback."); @@ -13956,12 +13958,14 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // FIXME: Bounds check the buffer, too. statement("if ((", to_expression(builtin_invocation_id_id), ".x & 1) || ", to_expression(builtin_invocation_id_id), ".x < ", to_expression(builtin_stage_input_size_id), ".x - 1u)"); statement(" ", to_name(xfb_buffers[i]), "[", index_expr, "] = " , to_expression(xfb_locals[i]), ";"); + break; case Options::PrimitiveType::TriangleList: // This is similar to the previous case, except here the boundary condition is // if global_id.x % 3 == 2 or we're not one of the last two. // FIXME: Bounds check the buffer, too. statement("if ((", to_expression(builtin_invocation_id_id), ".x % 3u == 2) || ", to_expression(builtin_invocation_id_id), ".x + 2 < ", to_expression(builtin_stage_input_size_id), ".x)"); statement(" ", to_name(xfb_buffers[i]), "[", index_expr, "] = ", to_expression(xfb_locals[i]), ";"); + break; case Options::PrimitiveType::LineStrip: // This is more complicated. We have to write out each individual line segment. // So if we're not the first or the last, we have to write twice. @@ -14033,7 +14037,6 @@ void CompilerMSL::fix_up_shader_inputs_outputs() statement("atomic_store_explicit(", to_name(xfb_counters[i]), "spvInitOffset", i, " + sizeof(*", to_name(xfb_buffers[i]), ") * 3 * subsat(", to_expression(builtin_stage_input_size_id), ".x, 2u) * ", to_expression(builtin_stage_input_size_id), ".y, memory_order_relaxed);"); break; case Options::PrimitiveType::Dynamic: - break; default: SPIRV_CROSS_THROW("Primitive type not yet supported for transform feedback."); break; From e74800fe83a944fca61fb71e478a0a67744c27bf Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Sat, 16 Sep 2023 14:44:07 -0700 Subject: [PATCH 26/48] Add missing commas. --- spirv_msl.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index d146a70c6..2551b1404 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -14021,20 +14021,20 @@ void CompilerMSL::fix_up_shader_inputs_outputs() switch (msl_options.xfb_primitive_type) { case Options::PrimitiveType::PointList: - statement("atomic_store_explicit(", to_name(xfb_counters[i]), "spvInitOffset", i, " + sizeof(*", to_name(xfb_buffers[i]), ") * ", to_expression(builtin_stage_input_size_id), ".x * ", to_expression(builtin_stage_input_size_id), ".y, memory_order_relaxed);"); + statement("atomic_store_explicit(", to_name(xfb_counters[i]), ", spvInitOffset", i, " + sizeof(*", to_name(xfb_buffers[i]), ") * ", to_expression(builtin_stage_input_size_id), ".x * ", to_expression(builtin_stage_input_size_id), ".y, memory_order_relaxed);"); break; case Options::PrimitiveType::LineList: - statement("atomic_store_explicit(", to_name(xfb_counters[i]), "spvInitOffset", i, " + sizeof(*", to_name(xfb_buffers[i]), ") * (", to_expression(builtin_stage_input_size_id), ".x & ~1u) * ", to_expression(builtin_stage_input_size_id), ".y, memory_order_relaxed);"); + statement("atomic_store_explicit(", to_name(xfb_counters[i]), ", spvInitOffset", i, " + sizeof(*", to_name(xfb_buffers[i]), ") * (", to_expression(builtin_stage_input_size_id), ".x & ~1u) * ", to_expression(builtin_stage_input_size_id), ".y, memory_order_relaxed);"); break; case Options::PrimitiveType::LineStrip: - statement("atomic_store_explicit(", to_name(xfb_counters[i]), "spvInitOffset", i, " + sizeof(*", to_name(xfb_buffers[i]), ") * 2 * (", to_expression(builtin_stage_input_size_id), ".x - 1u) * ", to_expression(builtin_stage_input_size_id), ".y, memory_order_relaxed);"); + statement("atomic_store_explicit(", to_name(xfb_counters[i]), ", spvInitOffset", i, " + sizeof(*", to_name(xfb_buffers[i]), ") * 2 * (", to_expression(builtin_stage_input_size_id), ".x - 1u) * ", to_expression(builtin_stage_input_size_id), ".y, memory_order_relaxed);"); break; case Options::PrimitiveType::TriangleList: - statement("atomic_store_explicit(", to_name(xfb_counters[i]), "spvInitOffset", i, " + sizeof(*", to_name(xfb_buffers[i]), ") * (", to_expression(builtin_stage_input_size_id), ".x - ", to_expression(builtin_stage_input_size_id), ".x % 3u) * ", to_expression(builtin_stage_input_size_id), ".y, memory_order_relaxed);"); + statement("atomic_store_explicit(", to_name(xfb_counters[i]), ", spvInitOffset", i, " + sizeof(*", to_name(xfb_buffers[i]), ") * (", to_expression(builtin_stage_input_size_id), ".x - ", to_expression(builtin_stage_input_size_id), ".x % 3u) * ", to_expression(builtin_stage_input_size_id), ".y, memory_order_relaxed);"); break; case Options::PrimitiveType::TriangleStrip: case Options::PrimitiveType::TriangleFan: - statement("atomic_store_explicit(", to_name(xfb_counters[i]), "spvInitOffset", i, " + sizeof(*", to_name(xfb_buffers[i]), ") * 3 * subsat(", to_expression(builtin_stage_input_size_id), ".x, 2u) * ", to_expression(builtin_stage_input_size_id), ".y, memory_order_relaxed);"); + statement("atomic_store_explicit(", to_name(xfb_counters[i]), ", spvInitOffset", i, " + sizeof(*", to_name(xfb_buffers[i]), ") * 3 * subsat(", to_expression(builtin_stage_input_size_id), ".x, 2u) * ", to_expression(builtin_stage_input_size_id), ".y, memory_order_relaxed);"); break; case Options::PrimitiveType::Dynamic: default: From b352521b601b3bcceb88265afe87cd1a9c575e9e Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Sat, 16 Sep 2023 14:45:51 -0700 Subject: [PATCH 27/48] Add missing scale for triangle fan case. --- spirv_msl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 2551b1404..180a86515 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -13997,7 +13997,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs() begin_scope(); statement("uint spvBaseIdx = 3 * ", to_expression(builtin_invocation_id_id), ".y * subsat(", to_expression(builtin_stage_input_size_id), ".x, 2u);"); statement("for (uint i = 0; i < subsat(", to_expression(builtin_stage_input_size_id), ".x, 2u); ++i)"); - statement(" ", to_name(xfb_buffers[i]), "[spvBaseIdx + i] = ", to_name(xfb_locals[i]), ";"); + statement(" ", to_name(xfb_buffers[i]), "[spvBaseIdx + 3 * i] = ", to_name(xfb_locals[i]), ";"); end_scope(); statement("else"); begin_scope(); From 2798c48a6100318fe4a977e90537f40d76298e96 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Sat, 16 Sep 2023 15:06:58 -0700 Subject: [PATCH 28/48] Hoist common index expression out. --- spirv_msl.cpp | 78 +++++++++++++++++++++++++-------------------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 180a86515..7a2a60a94 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -13932,14 +13932,15 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // 11 11, 13 // 12 14, 16 // 13 17 - index_expr = join("3 * ", to_expression(builtin_invocation_id_id), ".y * subsat(", - to_expression(builtin_stage_input_size_id), ".x, 2u) + 3 * ", - to_expression(builtin_invocation_id_id), ".x) - 2u"); + statement("uint spvXfbBaseIndex = 3 * ", to_expression(builtin_invocation_id_id), ".y * subsat(", + to_expression(builtin_stage_input_size_id), ".x, 2u);"); + index_expr = join("spvXfbBaseIndex + 3 * ", to_expression(builtin_invocation_id_id), ".x) - 2u"); break; case Options::PrimitiveType::Dynamic: default: SPIRV_CROSS_THROW("Primitive type not yet supported for transform feedback."); } + statement("uint spvXfbIndex = ", index_expr, ";"); // First, write the data out. for (uint32_t i = 0; i < kMaxXfbBuffers; ++i) { @@ -13950,21 +13951,21 @@ void CompilerMSL::fix_up_shader_inputs_outputs() { case Options::PrimitiveType::PointList: // This is straightforward enough. Just make sure we don't overstep the data buffer (FIXME). - statement(to_name(xfb_buffers[i]), "[", index_expr, "] = ", to_expression(xfb_locals[i]), ";"); + statement(to_name(xfb_buffers[i]), "[spvXfbIndex] = ", to_expression(xfb_locals[i]), ";"); break; case Options::PrimitiveType::LineList: // This is a little trickier, because we don't want to write an incomplete primitive. // Therefore, we must write only if we're an odd vertex, or we're not the last one. // FIXME: Bounds check the buffer, too. statement("if ((", to_expression(builtin_invocation_id_id), ".x & 1) || ", to_expression(builtin_invocation_id_id), ".x < ", to_expression(builtin_stage_input_size_id), ".x - 1u)"); - statement(" ", to_name(xfb_buffers[i]), "[", index_expr, "] = " , to_expression(xfb_locals[i]), ";"); + statement(" ", to_name(xfb_buffers[i]), "[spvXfbIndex] = " , to_expression(xfb_locals[i]), ";"); break; case Options::PrimitiveType::TriangleList: // This is similar to the previous case, except here the boundary condition is // if global_id.x % 3 == 2 or we're not one of the last two. // FIXME: Bounds check the buffer, too. statement("if ((", to_expression(builtin_invocation_id_id), ".x % 3u == 2) || ", to_expression(builtin_invocation_id_id), ".x + 2 < ", to_expression(builtin_stage_input_size_id), ".x)"); - statement(" ", to_name(xfb_buffers[i]), "[", index_expr, "] = ", to_expression(xfb_locals[i]), ";"); + statement(" ", to_name(xfb_buffers[i]), "[spvXfbIndex] = ", to_expression(xfb_locals[i]), ";"); break; case Options::PrimitiveType::LineStrip: // This is more complicated. We have to write out each individual line segment. @@ -13972,9 +13973,9 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // On top of that, we also have to handle primitive restart. (FIXME) // FIXME: Bounds check the buffer, too. statement("if (", to_expression(builtin_invocation_id_id), ".x != ", to_expression(builtin_stage_input_size_id), ".x - 1u)"); - statement(" ", to_name(xfb_buffers[i]), "[", index_expr, "] = " , to_expression(xfb_locals[i]), ";"); + statement(" ", to_name(xfb_buffers[i]), "[spvXfbIndex] = " , to_expression(xfb_locals[i]), ";"); statement("if (", to_expression(builtin_invocation_id_id), ".x != 0)"); - statement(" ", to_name(xfb_buffers[i]), "[", index_expr, " - 1u] = " , to_expression(xfb_locals[i]), ";"); + statement(" ", to_name(xfb_buffers[i]), "[spvXfbIndex - 1u] = " , to_expression(xfb_locals[i]), ";"); break; case Options::PrimitiveType::TriangleStrip: // This is even worse. We have to write three times if we're not first or last, @@ -13982,11 +13983,11 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // Again, primitive restart is a factor here. (FIXME) // FIXME: Bounds check the buffer, too. statement("if (", to_expression(builtin_invocation_id_id), ".x + 2 < ", to_expression(builtin_stage_input_size_id), ".x)"); - statement(" ", to_name(xfb_buffers[i]), "[", index_expr, "] = ", to_expression(xfb_locals[i]), ";"); + statement(" ", to_name(xfb_buffers[i]), "[spvXfbIndex] = ", to_expression(xfb_locals[i]), ";"); statement("if (", to_expression(builtin_invocation_id_id), ".x != 0 && ", to_expression(builtin_invocation_id_id), ".x != ", to_expression(builtin_stage_input_size_id), ".x - 1u)"); - statement(" ", to_name(xfb_buffers[i]), "[", index_expr, " - 1u - (", to_expression(builtin_invocation_id_id), ".x & 1u)] = ", to_expression(xfb_locals[i]), ";"); + statement(" ", to_name(xfb_buffers[i]), "[spvXfbIndex - 1u - (", to_expression(builtin_invocation_id_id), ".x & 1u)] = ", to_expression(xfb_locals[i]), ";"); statement("if (", to_expression(builtin_invocation_id_id), ".x > 1)"); - statement(" ", to_name(xfb_buffers[i]), "[", index_expr, " - 4u - (", to_expression(builtin_invocation_id_id), ".x & 1u)] = ", to_expression(xfb_locals[i]), ";"); + statement(" ", to_name(xfb_buffers[i]), "[spvXfbIndex - 4u - (", to_expression(builtin_invocation_id_id), ".x & 1u)] = ", to_expression(xfb_locals[i]), ";"); break; case Options::PrimitiveType::TriangleFan: // This is the worst case of all. It's similar to the strip case, except now @@ -13995,16 +13996,15 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // FIXME: Bounds check the buffer, too. statement("if (", to_expression(builtin_invocation_id_id), ".x == 0)"); begin_scope(); - statement("uint spvBaseIdx = 3 * ", to_expression(builtin_invocation_id_id), ".y * subsat(", to_expression(builtin_stage_input_size_id), ".x, 2u);"); statement("for (uint i = 0; i < subsat(", to_expression(builtin_stage_input_size_id), ".x, 2u); ++i)"); - statement(" ", to_name(xfb_buffers[i]), "[spvBaseIdx + 3 * i] = ", to_name(xfb_locals[i]), ";"); + statement(" ", to_name(xfb_buffers[i]), "[spvXfbBaseIndex + 3 * i] = ", to_name(xfb_locals[i]), ";"); end_scope(); statement("else"); begin_scope(); statement("if (", to_expression(builtin_invocation_id_id), ".x != ", to_expression(builtin_stage_input_size_id), ".x - 1u)"); - statement(" ", to_name(xfb_buffers[i]), "[", index_expr, "] = " , to_expression(xfb_locals[i]), ";"); + statement(" ", to_name(xfb_buffers[i]), "[spvXfbIndex] = " , to_expression(xfb_locals[i]), ";"); statement("if (", to_expression(builtin_invocation_id_id), ".x != 1)"); - statement(" ", to_name(xfb_buffers[i]), "[", index_expr, " - 2u] = " , to_expression(xfb_locals[i]), ";"); + statement(" ", to_name(xfb_buffers[i]), "[spvXfbIndex - 2u] = " , to_expression(xfb_locals[i]), ";"); end_scope(); break; case Options::PrimitiveType::Dynamic: @@ -14016,32 +14016,32 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // Now update the amount of data written to the buffer. statement("if (", to_expression(builtin_invocation_id_id), ".xy == 0)"); begin_scope(); + switch (msl_options.xfb_primitive_type) + { + case Options::PrimitiveType::PointList: + statement("uint spvWritten = ", to_expression(builtin_stage_input_size_id), ".x * ", to_expression(builtin_stage_input_size_id), ".y;"); + break; + case Options::PrimitiveType::LineList: + statement("uint spvWritten = (", to_expression(builtin_stage_input_size_id), ".x & ~1u) * ", to_expression(builtin_stage_input_size_id), ".y;"); + break; + case Options::PrimitiveType::LineStrip: + statement("uint spvWritten = 2 * (", to_expression(builtin_stage_input_size_id), ".x - 1u) * ", to_expression(builtin_stage_input_size_id), ".y;"); + break; + case Options::PrimitiveType::TriangleList: + statement("uint spvWritten = (", to_expression(builtin_stage_input_size_id), ".x - ", to_expression(builtin_stage_input_size_id), ".x % 3u) * ", to_expression(builtin_stage_input_size_id), ".y;"); + break; + case Options::PrimitiveType::TriangleStrip: + case Options::PrimitiveType::TriangleFan: + statement("uint spvWritten = 3 * subsat(", to_expression(builtin_stage_input_size_id), ".x, 2u) * ", to_expression(builtin_stage_input_size_id), ".y;"); + break; + case Options::PrimitiveType::Dynamic: + default: + SPIRV_CROSS_THROW("Primitive type not yet supported for transform feedback."); + break; + } for (uint32_t i = 0; i < kMaxXfbBuffers; ++i) { if (xfb_buffers[i] == 0) continue; - switch (msl_options.xfb_primitive_type) - { - case Options::PrimitiveType::PointList: - statement("atomic_store_explicit(", to_name(xfb_counters[i]), ", spvInitOffset", i, " + sizeof(*", to_name(xfb_buffers[i]), ") * ", to_expression(builtin_stage_input_size_id), ".x * ", to_expression(builtin_stage_input_size_id), ".y, memory_order_relaxed);"); - break; - case Options::PrimitiveType::LineList: - statement("atomic_store_explicit(", to_name(xfb_counters[i]), ", spvInitOffset", i, " + sizeof(*", to_name(xfb_buffers[i]), ") * (", to_expression(builtin_stage_input_size_id), ".x & ~1u) * ", to_expression(builtin_stage_input_size_id), ".y, memory_order_relaxed);"); - break; - case Options::PrimitiveType::LineStrip: - statement("atomic_store_explicit(", to_name(xfb_counters[i]), ", spvInitOffset", i, " + sizeof(*", to_name(xfb_buffers[i]), ") * 2 * (", to_expression(builtin_stage_input_size_id), ".x - 1u) * ", to_expression(builtin_stage_input_size_id), ".y, memory_order_relaxed);"); - break; - case Options::PrimitiveType::TriangleList: - statement("atomic_store_explicit(", to_name(xfb_counters[i]), ", spvInitOffset", i, " + sizeof(*", to_name(xfb_buffers[i]), ") * (", to_expression(builtin_stage_input_size_id), ".x - ", to_expression(builtin_stage_input_size_id), ".x % 3u) * ", to_expression(builtin_stage_input_size_id), ".y, memory_order_relaxed);"); - break; - case Options::PrimitiveType::TriangleStrip: - case Options::PrimitiveType::TriangleFan: - statement("atomic_store_explicit(", to_name(xfb_counters[i]), ", spvInitOffset", i, " + sizeof(*", to_name(xfb_buffers[i]), ") * 3 * subsat(", to_expression(builtin_stage_input_size_id), ".x, 2u) * ", to_expression(builtin_stage_input_size_id), ".y, memory_order_relaxed);"); - break; - case Options::PrimitiveType::Dynamic: - default: - SPIRV_CROSS_THROW("Primitive type not yet supported for transform feedback."); - break; - } - + statement("atomic_store_explicit(", to_name(xfb_counters[i]), ", spvInitOffset", i, " + sizeof(*", to_name(xfb_buffers[i]), ") * spvWritten, memory_order_relaxed);"); } end_scope(); }); From b93190072f0a94730b0453d098b41d6fcfa7a204 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Sat, 16 Sep 2023 15:39:59 -0700 Subject: [PATCH 29/48] Make sure the local copy of the output is declared. We need to use a local copy because the vertex may need to be written more than once. --- spirv_msl.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 7a2a60a94..d5111432f 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -18115,6 +18115,8 @@ void CompilerMSL::analyze_xfb_buffers() set(local_var_id, local_ptr_type_id, StorageClassFunction); set_name(local_var_id, join("spvXfbOutput", xfb_buffer)); + get(ir.default_entry_point).add_local_variable(local_var_id); + vars_needing_early_declaration.push_back(local_var_id); set(buffer_var_id, ptr_type_id, StorageClassUniform); set_name(buffer_var_id, join("spvXfb", xfb_buffer)); @@ -18149,7 +18151,6 @@ void CompilerMSL::analyze_xfb_buffers() member_index++; // FIXME: Still to do: - // - Add locals to entry point // - Make sure Xfb-captured outputs aren't in "normal" capture_output_to_buffer } } From ab2b37ba0e6c0e4ed8f485413c3767270f91b73c Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Sun, 17 Sep 2023 21:40:02 -0700 Subject: [PATCH 30/48] Don't add captured outputs to the regular output struct. Instead, go through the local variables we declared earlier. Almost done. --- spirv_msl.cpp | 60 +++++++++++++++++++++++++++++++++++++++++++++------ spirv_msl.hpp | 1 + 2 files changed, 54 insertions(+), 7 deletions(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index d5111432f..2f305ba3a 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -3792,6 +3792,26 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) if (bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance) hidden = false; + // Don't add captured transform feedback outputs. + if (needs_transform_feedback() && storage == StorageClassOutput) + { + if (is_block) + { + uint32_t mbr_cnt = uint32_t(type.member_types.size()); + bool all_captured = true; + for (uint32_t i = 0; i < mbr_cnt; i++) + { + bool active = !is_builtin || has_active_builtin(BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn)), storage); + all_captured = all_captured && (has_member_decoration(type.self, i, DecorationOffset) || !active); + } + hidden = hidden || all_captured; + } + else if (has_decoration(var_id, DecorationOffset)) + { + hidden = true; + } + } + // It's not enough to simply avoid marking fragment outputs if the pipeline won't // accept them. We can't put them in the struct at all, or otherwise the compiler // complains that the outputs weren't explicitly marked. @@ -15898,6 +15918,8 @@ string CompilerMSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) case BuiltInLayer: if (is_tesc_shader()) break; + if (needs_transform_feedback() && xfb_captured_builtins.count(builtin)) + return join(to_name(xfb_locals[xfb_captured_builtins[builtin]]), ".", CompilerGLSL::builtin_to_glsl(builtin, storage)); if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point) && !is_stage_output_builtin_masked(builtin)) return stage_out_var_name + "." + CompilerGLSL::builtin_to_glsl(builtin, storage); @@ -17971,6 +17993,7 @@ void CompilerMSL::activate_argument_buffer_resources() void CompilerMSL::analyze_xfb_buffers() { // Gather all used outputs and sort them out into transform feedback buffers. + auto &entry_func = get(ir.default_entry_point); struct XfbOutput { @@ -18012,7 +18035,7 @@ void CompilerMSL::analyze_xfb_buffers() xfb_strides[xfb_buffer_num] = xfb_stride; } - if (type.basetype == SPIRType::Struct) + if (type.basetype == SPIRType::Struct && has_decoration(type.self, DecorationBlock)) { for (uint32_t i = 0; i < type.member_types.size(); ++i) { @@ -18026,10 +18049,17 @@ void CompilerMSL::analyze_xfb_buffers() uint32_t mbr_xfb_buffer_num = has_member_decoration(type.self, i, DecorationXfbBuffer) ? get_member_decoration(type.self, i, DecorationXfbBuffer) : xfb_buffer_num; string name; if (has_member_decoration(type.self, i, DecorationBuiltIn)) + { // Force this to have the proper name. - name = builtin_to_glsl(BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn)), StorageClassOutput); + BuiltIn bi_type = BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn)); + name = builtin_to_glsl(bi_type, StorageClassOutput); + // Make sure it's referenced properly. + xfb_captured_builtins.insert(make_pair(bi_type, mbr_xfb_buffer_num)); + } else + { name = to_member_name(type, i); + } xfb_outputs[mbr_xfb_buffer_num].emplace_back({&var, name, i, xfb_offset, true}); if (has_member_decoration(type.self, i, DecorationXfbStride)) { @@ -18056,10 +18086,17 @@ void CompilerMSL::analyze_xfb_buffers() uint32_t xfb_offset = get_decoration(self, DecorationOffset); string name; if (has_decoration(self, DecorationBuiltIn)) + { // Force this to have the proper name. - name = builtin_to_glsl(BuiltIn(get_decoration(self, DecorationBuiltIn)), StorageClassOutput); + BuiltIn bi_type = BuiltIn(get_decoration(self, DecorationBuiltIn)); + name = builtin_to_glsl(bi_type, StorageClassOutput); + // Make sure it's referenced properly. + xfb_captured_builtins.insert(make_pair(bi_type, xfb_buffer_num)); + } else + { name = to_name(self); + } xfb_outputs[xfb_buffer_num].emplace_back({&var, name, 0, xfb_offset, false}); } }); @@ -18115,7 +18152,7 @@ void CompilerMSL::analyze_xfb_buffers() set(local_var_id, local_ptr_type_id, StorageClassFunction); set_name(local_var_id, join("spvXfbOutput", xfb_buffer)); - get(ir.default_entry_point).add_local_variable(local_var_id); + entry_func.add_local_variable(local_var_id); vars_needing_early_declaration.push_back(local_var_id); set(buffer_var_id, ptr_type_id, StorageClassUniform); @@ -18144,14 +18181,23 @@ void CompilerMSL::analyze_xfb_buffers() else { buffer_type.member_types.push_back(type.member_types[member_index]); + string qual_var_name = join(to_name(local_var_id), ".", mbr_name); + if (is_member_builtin(type, member_index, nullptr)) + { + set_member_qualified_name(type.self, member_index, qual_var_name); + } + else + { + // n.b. Must come BEFORE the big one that writes out the XFB buffers! + entry_func.fixup_hooks_out.push_back([=]() { + statement(qual_var_name, " = ", to_name(var.self), ".", to_member_name(type, member_index), ";"); + }); + } } set_extended_member_decoration(buffer_type.self, member_index, SPIRVCrossDecorationInterfaceOrigID, var.self); member_index++; - - // FIXME: Still to do: - // - Make sure Xfb-captured outputs aren't in "normal" capture_output_to_buffer } } } diff --git a/spirv_msl.hpp b/spirv_msl.hpp index 3a5185328..28e42550b 100644 --- a/spirv_msl.hpp +++ b/spirv_msl.hpp @@ -1228,6 +1228,7 @@ class CompilerMSL : public CompilerGLSL VariableID xfb_buffers[kMaxXfbBuffers]; VariableID xfb_locals[kMaxXfbBuffers]; uint32_t xfb_strides[kMaxXfbBuffers]; + std::unordered_map xfb_captured_builtins; // Must be ordered since array is in a specific order. std::map> buffers_requiring_dynamic_offset; From a1d92e715cc125baf3790dabf12c6f16c88fb940 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Sun, 17 Sep 2023 23:24:40 -0700 Subject: [PATCH 31/48] Add offsets and padding to transform feedback structs. This gives them the correct layout in memory. --- spirv_msl.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 2f305ba3a..e13e59266 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -18176,11 +18176,13 @@ void CompilerMSL::analyze_xfb_buffers() { // Drop pointer information when we emit the outputs into a struct. buffer_type.member_types.push_back(get_variable_data_type_id(var)); + set_member_decoration(type_id, buffer_type.member_types.size() - 1, DecorationOffset, get_decoration(var.self, DecorationOffset)); set_qualified_name(var.self, join(to_name(local_var_id), ".", mbr_name)); } else { buffer_type.member_types.push_back(type.member_types[member_index]); + set_member_decoration(type_id, buffer_type.member_types.size() - 1, DecorationOffset, get_member_decoration(type.self, member_index, DecorationOffset)); string qual_var_name = join(to_name(local_var_id), ".", mbr_name); if (is_member_builtin(type, member_index, nullptr)) { @@ -18199,6 +18201,17 @@ void CompilerMSL::analyze_xfb_buffers() var.self); member_index++; } + + // Because we have custom offsets and stride, the buffer struct needs repacking. + set_extended_decoration(type_id, SPIRVCrossDecorationBufferBlockRepacked); + // If the declared stride is not a multiple of the struct's natural alignment, + // then the struct needs to be packed. + bool packed_buffer = xfb_strides[xfb_buffer] % get_declared_type_alignment_msl(buffer_type, false, false) != 0; + if (packed_buffer) + mark_struct_members_packed(buffer_type); + // Make sure struct is padded to declared stride, so indexing works properly. + if (xfb_strides[xfb_buffer] > get_declared_struct_size_msl(buffer_type, packed_buffer)) + set_extended_decoration(type_id, SPIRVCrossDecorationPaddingTarget, xfb_strides[xfb_buffer]); } } From 2959f3a9f711bf04b7137395c653c0cf27f619c2 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Sun, 17 Sep 2023 23:28:55 -0700 Subject: [PATCH 32/48] Use mark_as_packable() to mark the buffer structs as needing repacking. That way, any nested structs get repacked as well. --- spirv_msl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index e13e59266..b77bd48c1 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -18203,7 +18203,7 @@ void CompilerMSL::analyze_xfb_buffers() } // Because we have custom offsets and stride, the buffer struct needs repacking. - set_extended_decoration(type_id, SPIRVCrossDecorationBufferBlockRepacked); + mark_as_packable(buffer_type); // If the declared stride is not a multiple of the struct's natural alignment, // then the struct needs to be packed. bool packed_buffer = xfb_strides[xfb_buffer] % get_declared_type_alignment_msl(buffer_type, false, false) != 0; From 16dd1f1172b4b99d2f0b840c3830442effd91c06 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Sun, 17 Sep 2023 23:41:24 -0700 Subject: [PATCH 33/48] Fix broken constant generated MSL. --- spirv_msl.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index b77bd48c1..c4844a87b 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -14032,9 +14032,9 @@ void CompilerMSL::fix_up_shader_inputs_outputs() SPIRV_CROSS_THROW("Primitive type not yet supported for transform feedback."); } } - statement("threadgroup_barrier(mem_device);"); + statement("threadgroup_barrier(mem_flags::mem_device);"); // Now update the amount of data written to the buffer. - statement("if (", to_expression(builtin_invocation_id_id), ".xy == 0)"); + statement("if (all(", to_expression(builtin_invocation_id_id), ".xy == 0))"); begin_scope(); switch (msl_options.xfb_primitive_type) { From 742f7259106d48de6aae621b0e6033068a756832 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Mon, 18 Sep 2023 00:18:24 -0700 Subject: [PATCH 34/48] Only create a per-patch output block for tessellation control shaders. This is the only type of shader that can even have such outputs. Not only does this save some work in most cases, it also fixes a problem with the next patch. --- spirv_msl.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index c4844a87b..c26c9eadb 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -1537,7 +1537,8 @@ string CompilerMSL::compile() // Do output first to ensure out. is declared at top of entry function. qual_pos_var_name = ""; stage_out_var_id = add_interface_block(StorageClassOutput); - patch_stage_out_var_id = add_interface_block(StorageClassOutput, true); + if (is_tesc_shader()) + patch_stage_out_var_id = add_interface_block(StorageClassOutput, true); stage_in_var_id = add_interface_block(StorageClassInput); if (is_tese_shader()) patch_stage_in_var_id = add_interface_block(StorageClassInput, true); From 8f66f305e7542690e776fe19efd450f49843616d Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Mon, 18 Sep 2023 00:20:00 -0700 Subject: [PATCH 35/48] Make sure the local variable for an output block gets created. We still rely on it to pass around and collect the output. To avoid duplicates, only do this if we would not do this normally. --- spirv_msl.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index c26c9eadb..e4bf8e375 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -3806,6 +3806,12 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) all_captured = all_captured && (has_member_decoration(type.self, i, DecorationOffset) || !active); } hidden = hidden || all_captured; + // We still rely on the block being declared as a variable. Make sure that happens. + if (all_captured && !is_builtin) + { + get(ir.default_entry_point).add_local_variable(var_id); + vars_needing_early_declaration.push_back(var_id); + } } else if (has_decoration(var_id, DecorationOffset)) { From 2e14c9101cca7beda3e63b7272772b6d47ddc0e1 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Mon, 18 Sep 2023 02:16:22 -0700 Subject: [PATCH 36/48] Simplify the code to add members to the XFB buffer blocks. Use the offset from the `XfbOutput` struct instead of querying it again from the ID. Use the `member_index` local instead of using `size() - 1` when setting member decorations. Don't set the qualified name for builtin block variabless--we handle those a different way. Use the member index from the `XfbOutput` when inspecing the original block type instead of the `member_index` local. This one was a real bug; honestly, I don't know how it even worked before. --- spirv_msl.cpp | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index e4bf8e375..6bb45f9c4 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -18165,11 +18165,6 @@ void CompilerMSL::analyze_xfb_buffers() set(buffer_var_id, ptr_type_id, StorageClassUniform); set_name(buffer_var_id, join("spvXfb", xfb_buffer)); - // Members must be emitted in Offset order. - stable_sort(begin(outputs), end(outputs), [&](const XfbOutput &lhs, const XfbOutput &rhs) -> bool { - return lhs.offset < rhs.offset; - }); - uint32_t member_index = 0; for (auto &output : outputs) { @@ -18183,19 +18178,15 @@ void CompilerMSL::analyze_xfb_buffers() { // Drop pointer information when we emit the outputs into a struct. buffer_type.member_types.push_back(get_variable_data_type_id(var)); - set_member_decoration(type_id, buffer_type.member_types.size() - 1, DecorationOffset, get_decoration(var.self, DecorationOffset)); + set_member_decoration(type_id, member_index, DecorationOffset, output.offset); set_qualified_name(var.self, join(to_name(local_var_id), ".", mbr_name)); } else { - buffer_type.member_types.push_back(type.member_types[member_index]); - set_member_decoration(type_id, buffer_type.member_types.size() - 1, DecorationOffset, get_member_decoration(type.self, member_index, DecorationOffset)); + buffer_type.member_types.push_back(type.member_types[output.member_index]); + set_member_decoration(type_id, member_index, DecorationOffset, output.offset); string qual_var_name = join(to_name(local_var_id), ".", mbr_name); - if (is_member_builtin(type, member_index, nullptr)) - { - set_member_qualified_name(type.self, member_index, qual_var_name); - } - else + if (!is_member_builtin(type, member_index, nullptr)) { // n.b. Must come BEFORE the big one that writes out the XFB buffers! entry_func.fixup_hooks_out.push_back([=]() { From 8dbf250cabd05ee9cd5591f953861ca07ca6a18f Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Mon, 18 Sep 2023 04:01:57 -0700 Subject: [PATCH 37/48] Make sure captured outputs passed as implicit arguments have correct types. Make sure they use the `thread` AS and that they have the `packed_` prefix, if necessary. --- spirv_msl.cpp | 50 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 6bb45f9c4..79cf303d3 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -2134,7 +2134,11 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: func.add_parameter(mbr_type_id, var_id, true); set(var_id, ptr_type_id, StorageClassFunction); + if (xfb_captured_outputs.count(arg_id)) + xfb_captured_outputs.insert(var_id); ir.meta[var_id].decoration = ir.meta[type_id].members[mbr_idx]; + if (xfb_packed_builtins.count(builtin)) + set_extended_decoration(var_id, SPIRVCrossDecorationPhysicalTypePacked); } mbr_idx++; } @@ -2144,9 +2148,13 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: uint32_t next_id = ir.increase_bound_by(1); func.add_parameter(type_id, next_id, true); set(next_id, type_id, StorageClassFunction, 0, arg_id); + if (xfb_captured_outputs.count(arg_id)) + xfb_captured_outputs.insert(next_id); // Ensure the new variable has all the same meta info ir.meta[next_id] = ir.meta[arg_id]; + if (xfb_packed_outputs.count(arg_id)) + set_extended_decoration(next_id, SPIRVCrossDecorationPhysicalTypePacked); } } } @@ -12383,6 +12391,11 @@ bool CompilerMSL::uses_explicit_early_fragment_test() // In MSL, address space qualifiers are required for all pointer or reference variables string CompilerMSL::get_argument_address_space(const SPIRVariable &argument) { + // If this is for a captured transform feedback output, then use the thread address space. + // This is a terrible kluge, but since we reused the original pointer type in constructing + // fake parameters for globals, we have to do this here. + if (xfb_captured_outputs.count(argument.self)) + return "thread"; const auto &type = get(argument.basetype); return get_type_address_space(type, argument.self, true); } @@ -14272,6 +14285,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) // For opaque types we handle const later due to descriptor address spaces. const char *cv_qualifier = (constref && !type_is_image) ? "const " : ""; + const char *pack_pfx = has_extended_decoration(var.self, SPIRVCrossDecorationPhysicalTypePacked) ? "packed_" : ""; string decl; // If this is a combined image-sampler for a 2D image with floating-point type, @@ -14315,9 +14329,9 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) is_using_builtin_array = true; if (is_using_builtin_array) - decl = join(cv_qualifier, builtin_type_decl(builtin_type, arg.id)); + decl = join(cv_qualifier, pack_pfx, builtin_type_decl(builtin_type, arg.id)); else - decl = join(cv_qualifier, type_to_glsl(type, arg.id)); + decl = join(cv_qualifier, pack_pfx, type_to_glsl(type, arg.id)); } else if ((type_storage == StorageClassUniform || type_storage == StorageClassStorageBuffer) && is_array(type)) { @@ -14340,7 +14354,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) decl += join(" ", cv_qualifier); } else - decl = join(cv_qualifier, type_to_glsl(type, arg.id)); + decl = join(cv_qualifier, pack_pfx, type_to_glsl(type, arg.id)); } if (!builtin && !is_pointer && @@ -18052,6 +18066,7 @@ void CompilerMSL::analyze_xfb_buffers() // feedback..." if (!has_member_decoration(type.self, i, DecorationOffset)) continue; + xfb_captured_outputs.insert(self); uint32_t xfb_offset = get_member_decoration(type.self, i, DecorationOffset); uint32_t mbr_xfb_buffer_num = has_member_decoration(type.self, i, DecorationXfbBuffer) ? get_member_decoration(type.self, i, DecorationXfbBuffer) : xfb_buffer_num; string name; @@ -18090,6 +18105,7 @@ void CompilerMSL::analyze_xfb_buffers() { if (!has_decoration(self, DecorationOffset)) return; + xfb_captured_outputs.insert(self); uint32_t xfb_offset = get_decoration(self, DecorationOffset); string name; if (has_decoration(self, DecorationBuiltIn)) @@ -18177,7 +18193,8 @@ void CompilerMSL::analyze_xfb_buffers() if (!output.block) { // Drop pointer information when we emit the outputs into a struct. - buffer_type.member_types.push_back(get_variable_data_type_id(var)); + const auto &var_type = get_variable_data_type(var); + buffer_type.member_types.push_back(var_type.self); set_member_decoration(type_id, member_index, DecorationOffset, output.offset); set_qualified_name(var.self, join(to_name(local_var_id), ".", mbr_name)); } @@ -18186,7 +18203,7 @@ void CompilerMSL::analyze_xfb_buffers() buffer_type.member_types.push_back(type.member_types[output.member_index]); set_member_decoration(type_id, member_index, DecorationOffset, output.offset); string qual_var_name = join(to_name(local_var_id), ".", mbr_name); - if (!is_member_builtin(type, member_index, nullptr)) + if (!is_member_builtin(type, output.member_index, nullptr)) { // n.b. Must come BEFORE the big one that writes out the XFB buffers! entry_func.fixup_hooks_out.push_back([=]() { @@ -18207,6 +18224,29 @@ void CompilerMSL::analyze_xfb_buffers() bool packed_buffer = xfb_strides[xfb_buffer] % get_declared_type_alignment_msl(buffer_type, false, false) != 0; if (packed_buffer) mark_struct_members_packed(buffer_type); + // If the block or any members are packed, we have to make sure that this is + // propagated to implicit parameters as well. + for (uint32_t i = 0; i < buffer_type.member_types.size(); ++i) + { + const auto &member_type = get(buffer_type.member_types[i]); + uint32_t var_id = get_extended_member_decoration(type_id, i, SPIRVCrossDecorationInterfaceOrigID); + const auto &var = get(var_id); + if (member_type.basetype != SPIRType::Struct && (packed_buffer || has_extended_member_decoration(type_id, i, SPIRVCrossDecorationPhysicalTypePacked))) + { + if (is_builtin_variable(var)) + { + if (outputs[i].block) + xfb_packed_builtins.insert(BuiltIn(get_member_decoration(get_variable_data_type_id(var), outputs[i].member_index, DecorationBuiltIn))); + else + xfb_packed_builtins.insert(BuiltIn(get_decoration(var_id, DecorationBuiltIn))); + + } + else + { + xfb_packed_outputs.insert(var_id); + } + } + } // Make sure struct is padded to declared stride, so indexing works properly. if (xfb_strides[xfb_buffer] > get_declared_struct_size_msl(buffer_type, packed_buffer)) set_extended_decoration(type_id, SPIRVCrossDecorationPaddingTarget, xfb_strides[xfb_buffer]); From b020270374c8a3e118428d1caee8dd45dcbd559d Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Mon, 18 Sep 2023 04:04:02 -0700 Subject: [PATCH 38/48] Only use qualified name for builtins in the entry point(). Add missing changes from previous patch. --- spirv_msl.cpp | 2 +- spirv_msl.hpp | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 79cf303d3..1b40d3f0e 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -15939,7 +15939,7 @@ string CompilerMSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) case BuiltInLayer: if (is_tesc_shader()) break; - if (needs_transform_feedback() && xfb_captured_builtins.count(builtin)) + if (needs_transform_feedback() && xfb_captured_builtins.count(builtin) && current_function && (current_function->self == ir.default_entry_point)) return join(to_name(xfb_locals[xfb_captured_builtins[builtin]]), ".", CompilerGLSL::builtin_to_glsl(builtin, storage)); if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point) && !is_stage_output_builtin_masked(builtin)) diff --git a/spirv_msl.hpp b/spirv_msl.hpp index 28e42550b..e98459532 100644 --- a/spirv_msl.hpp +++ b/spirv_msl.hpp @@ -1229,6 +1229,9 @@ class CompilerMSL : public CompilerGLSL VariableID xfb_locals[kMaxXfbBuffers]; uint32_t xfb_strides[kMaxXfbBuffers]; std::unordered_map xfb_captured_builtins; + std::unordered_set xfb_captured_outputs; + std::unordered_set xfb_packed_outputs; + std::unordered_set xfb_packed_builtins; // Must be ordered since array is in a specific order. std::map> buffers_requiring_dynamic_offset; From 3521814d1d1e6f64e9e4f92854d1fc1f0445f7d8 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Mon, 18 Sep 2023 04:15:45 -0700 Subject: [PATCH 39/48] Remove extraneous right parentheses. --- spirv_msl.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 1b40d3f0e..4c854311a 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -13922,7 +13922,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // 13 17 index_expr = join("3 * ", to_expression(builtin_invocation_id_id), ".y * subsat(", to_expression(builtin_stage_input_size_id), ".x, 2u) + 3 * ", - to_expression(builtin_invocation_id_id), ".x)"); + to_expression(builtin_invocation_id_id), ".x"); break; case Options::PrimitiveType::TriangleFan: // The index expression in this case is different for the fan base. @@ -13974,7 +13974,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // 13 17 statement("uint spvXfbBaseIndex = 3 * ", to_expression(builtin_invocation_id_id), ".y * subsat(", to_expression(builtin_stage_input_size_id), ".x, 2u);"); - index_expr = join("spvXfbBaseIndex + 3 * ", to_expression(builtin_invocation_id_id), ".x) - 2u"); + index_expr = join("spvXfbBaseIndex + 3 * ", to_expression(builtin_invocation_id_id), ".x - 2u"); break; case Options::PrimitiveType::Dynamic: default: From bf4f823e82a65561a02188cea1bb353e393fb3e8 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Mon, 18 Sep 2023 04:17:58 -0700 Subject: [PATCH 40/48] Clang-format the changes. --- main.cpp | 39 +-- spirv_msl.cpp | 755 +++++++++++++++++++++++++++----------------------- spirv_msl.hpp | 5 +- 3 files changed, 427 insertions(+), 372 deletions(-) diff --git a/main.cpp b/main.cpp index 2b2f4ba1c..35905a00c 100644 --- a/main.cpp +++ b/main.cpp @@ -677,8 +677,7 @@ struct CLIArguments bool msl_check_discarded_frag_stores = false; bool msl_sample_dref_lod_array_as_grad = false; const char *msl_combined_sampler_suffix = nullptr; - CompilerMSL::Options::PrimitiveType msl_xfb_primitive_type = - CompilerMSL::Options::PrimitiveType::Dynamic; + CompilerMSL::Options::PrimitiveType msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::Dynamic; bool glsl_emit_push_constant_as_ubo = false; bool glsl_emit_ubo_as_plain_uniforms = false; bool glsl_force_flattened_io_blocks = false; @@ -1796,23 +1795,25 @@ static int main_inner(int argc, char *argv[]) cbs.add("--msl-combined-sampler-suffix", [&args](CLIParser &parser) { args.msl_combined_sampler_suffix = parser.next_string(); }); - cbs.add("--msl-xfb-primitive-type", [&args](CLIParser &parser) { - const char *type = parser.next_value_string("dynamic"); - if (strcmp(type, "dynamic") == 0) - args.msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::Dynamic; - else if (strcmp(type, "point-list") == 0) - args.msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::PointList; - else if (strcmp(type, "line-list") == 0) - args.msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::LineList; - else if (strcmp(type, "line-strip") == 0) - args.msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::LineStrip; - else if (strcmp(type, "triangle-list") == 0) - args.msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::TriangleList; - else if (strcmp(type, "triangle-strip") == 0) - args.msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::TriangleStrip; - else if (strcmp(type, "triangle-fan") == 0) - args.msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::TriangleFan; - }); + cbs.add("--msl-xfb-primitive-type", + [&args](CLIParser &parser) + { + const char *type = parser.next_value_string("dynamic"); + if (strcmp(type, "dynamic") == 0) + args.msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::Dynamic; + else if (strcmp(type, "point-list") == 0) + args.msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::PointList; + else if (strcmp(type, "line-list") == 0) + args.msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::LineList; + else if (strcmp(type, "line-strip") == 0) + args.msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::LineStrip; + else if (strcmp(type, "triangle-list") == 0) + args.msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::TriangleList; + else if (strcmp(type, "triangle-strip") == 0) + args.msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::TriangleStrip; + else if (strcmp(type, "triangle-fan") == 0) + args.msl_xfb_primitive_type = CompilerMSL::Options::PrimitiveType::TriangleFan; + }); cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); }); cbs.add("--rename-entry-point", [&args](CLIParser &parser) { auto old_name = parser.next_string(); diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 4c854311a..a055f399b 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -3810,7 +3810,9 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) bool all_captured = true; for (uint32_t i = 0; i < mbr_cnt; i++) { - bool active = !is_builtin || has_active_builtin(BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn)), storage); + bool active = + !is_builtin || + has_active_builtin(BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn)), storage); all_captured = all_captured && (has_member_decoration(type.self, i, DecorationOffset) || !active); } hidden = hidden || all_captured; @@ -12786,8 +12788,12 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args) continue; if (!ep_args.empty()) ep_args += ", "; - ep_args += join(variable_decl(get_type_from_variable(xfb_counters[xfb_buffer]), to_name(xfb_counters[xfb_buffer])), " [[buffer(", msl_options.xfb_counter_buffer_index_base + xfb_buffer, ")]], "); - ep_args += join(variable_decl(get_type_from_variable(xfb_buffers[xfb_buffer]), to_name(xfb_buffers[xfb_buffer])), " [[buffer(", msl_options.xfb_output_buffer_index_base + xfb_buffer, ")]]"); + ep_args += join( + variable_decl(get_type_from_variable(xfb_counters[xfb_buffer]), to_name(xfb_counters[xfb_buffer])), + " [[buffer(", msl_options.xfb_counter_buffer_index_base + xfb_buffer, ")]], "); + ep_args += join( + variable_decl(get_type_from_variable(xfb_buffers[xfb_buffer]), to_name(xfb_buffers[xfb_buffer])), + " [[buffer(", msl_options.xfb_output_buffer_index_base + xfb_buffer, ")]]"); } } // Tessellation control shaders get three additional parameters: @@ -13830,261 +13836,299 @@ void CompilerMSL::fix_up_shader_inputs_outputs() // Transform feedback if (needs_transform_feedback()) { - entry_func.fixup_hooks_out.push_back([=]() { - string index_expr; - switch (msl_options.xfb_primitive_type) - { - case Options::PrimitiveType::PointList: - index_expr = join(to_expression(builtin_invocation_id_id), ".y * ", - to_expression(builtin_stage_input_size_id), ".x + ", - to_expression(builtin_invocation_id_id), ".x"); - break; - case Options::PrimitiveType::LineList: - index_expr = join(to_expression(builtin_invocation_id_id), ".y * (", - to_expression(builtin_stage_input_size_id), ".x & ~1u) + ", - to_expression(builtin_invocation_id_id), ".x"); - break; - case Options::PrimitiveType::TriangleList: - index_expr = join(to_expression(builtin_invocation_id_id), ".y * (", - to_expression(builtin_stage_input_size_id), ".x - ", to_expression(builtin_stage_input_size_id), ".x % 3u) + ", - to_expression(builtin_invocation_id_id), ".x"); - break; - case Options::PrimitiveType::LineStrip: - // Calculation of the index expression is also complicated a bit because of this. - // Some worked examples: - // Vertex ordinal XFB indices - // 0 0 - // 1 1, 2 - // 2 3, 4 - // 3 5, 6 - // 4 7 - // FIXME: This doesn't account for primitive restart! - // 0 0 - // 1 1, 2 - // 2 3, 4 - // 3 5 - // 4 - // 5 n/a - // 6 - // 7 6 - // 8 7, 8 - // 9 9, 10 - // 10 11 - index_expr = join("2 * ", to_expression(builtin_invocation_id_id), ".y * (", - to_expression(builtin_stage_input_size_id), ".x - 1u) + 2 * ", - to_expression(builtin_invocation_id_id), ".x"); - break; - case Options::PrimitiveType::TriangleStrip: - // Vertex ordinal XFB indices - // 0 0 - // 1 1, 3 - // 2 2, 5, 6 - // 3 4, 7, 9 - // 4 8, 11 - // 5 10 - // FIXME: This doesn't account for primitive restart! - // 0 0 - // 1 1, 3 - // 2 2, 5, 6 - // 3 4, 7, 9 - // 4 8, 11 - // 5 10 - // 6 - // 7 12 - // 8 13, 15 - // 9 14, 17, 18 - // 10 16, 19, 21 - // 11 20, 23 - // 12 22 - // ---- - // 0 0 - // 1 1 - // 2 2 - // 3 - // 4 3 - // 5 4, 6 - // 6 5, 8 - // 7 7 - // ---- - // 0 0 - // 1 1, 3 - // 2 2, 5, 6 - // 3 4, 7 - // 4 8 - // 5 - // 6 n/a - // 7 n/a - // 8 - // 9 9 - // 10 10, 12 - // 11 11, 14, 15 - // 12 13, 16 - // 13 17 - index_expr = join("3 * ", to_expression(builtin_invocation_id_id), ".y * subsat(", - to_expression(builtin_stage_input_size_id), ".x, 2u) + 3 * ", - to_expression(builtin_invocation_id_id), ".x"); - break; - case Options::PrimitiveType::TriangleFan: - // The index expression in this case is different for the fan base. - // This is for the other vertices. It is very similar to the line strip case. - // Vertex ordinal XFB indices - // 0 0, 3, 6, 9 - // 1 1 - // 2 2, 4 - // 3 5, 7 - // 4 8, 10 - // 5 11 - // FIXME: This doesn't account for primitive restart! - // 0 0, 3, 6, 9 - // 1 1 - // 2 2, 4 - // 3 5, 7 - // 4 8, 10 - // 5 11 - // 6 - // 7 12, 15, 18, 21 - // 8 13 - // 9 14, 16 - // 10 17, 19 - // 11 20, 22 - // 12 23 - // ---- - // 0 0 - // 1 1 - // 2 2 - // 3 - // 4 3, 6 - // 5 4 - // 6 5, 7 - // 7 8 - // ---- - // 0 0, 3, 6 - // 1 1 - // 2 2, 4 - // 3 5, 7 - // 4 8 - // 5 - // 6 n/a - // 7 n/a - // 8 - // 9 9, 12, 15 - // 10 10 - // 11 11, 13 - // 12 14, 16 - // 13 17 - statement("uint spvXfbBaseIndex = 3 * ", to_expression(builtin_invocation_id_id), ".y * subsat(", - to_expression(builtin_stage_input_size_id), ".x, 2u);"); - index_expr = join("spvXfbBaseIndex + 3 * ", to_expression(builtin_invocation_id_id), ".x - 2u"); - break; - case Options::PrimitiveType::Dynamic: - default: - SPIRV_CROSS_THROW("Primitive type not yet supported for transform feedback."); - } - statement("uint spvXfbIndex = ", index_expr, ";"); - // First, write the data out. - for (uint32_t i = 0; i < kMaxXfbBuffers; ++i) - { - if (xfb_buffers[i] == 0) continue; - statement("uint spvInitOffset", i, " = atomic_load_explicit(", to_name(xfb_counters[i]), ", memory_order_relaxed);"); - statement(to_name(xfb_buffers[i]), " = reinterpret_cast<", type_to_glsl(get_type_from_variable(xfb_buffers[i])), ">(reinterpret_cast(", to_name(xfb_buffers[i]), ") + spvInitOffset", i, ");"); - switch (msl_options.xfb_primitive_type) - { - case Options::PrimitiveType::PointList: - // This is straightforward enough. Just make sure we don't overstep the data buffer (FIXME). - statement(to_name(xfb_buffers[i]), "[spvXfbIndex] = ", to_expression(xfb_locals[i]), ";"); - break; - case Options::PrimitiveType::LineList: - // This is a little trickier, because we don't want to write an incomplete primitive. - // Therefore, we must write only if we're an odd vertex, or we're not the last one. - // FIXME: Bounds check the buffer, too. - statement("if ((", to_expression(builtin_invocation_id_id), ".x & 1) || ", to_expression(builtin_invocation_id_id), ".x < ", to_expression(builtin_stage_input_size_id), ".x - 1u)"); - statement(" ", to_name(xfb_buffers[i]), "[spvXfbIndex] = " , to_expression(xfb_locals[i]), ";"); - break; - case Options::PrimitiveType::TriangleList: - // This is similar to the previous case, except here the boundary condition is - // if global_id.x % 3 == 2 or we're not one of the last two. - // FIXME: Bounds check the buffer, too. - statement("if ((", to_expression(builtin_invocation_id_id), ".x % 3u == 2) || ", to_expression(builtin_invocation_id_id), ".x + 2 < ", to_expression(builtin_stage_input_size_id), ".x)"); - statement(" ", to_name(xfb_buffers[i]), "[spvXfbIndex] = ", to_expression(xfb_locals[i]), ";"); - break; - case Options::PrimitiveType::LineStrip: - // This is more complicated. We have to write out each individual line segment. - // So if we're not the first or the last, we have to write twice. - // On top of that, we also have to handle primitive restart. (FIXME) - // FIXME: Bounds check the buffer, too. - statement("if (", to_expression(builtin_invocation_id_id), ".x != ", to_expression(builtin_stage_input_size_id), ".x - 1u)"); - statement(" ", to_name(xfb_buffers[i]), "[spvXfbIndex] = " , to_expression(xfb_locals[i]), ";"); - statement("if (", to_expression(builtin_invocation_id_id), ".x != 0)"); - statement(" ", to_name(xfb_buffers[i]), "[spvXfbIndex - 1u] = " , to_expression(xfb_locals[i]), ";"); - break; - case Options::PrimitiveType::TriangleStrip: - // This is even worse. We have to write three times if we're not first or last, - // and now if there's fewer than two vertices in this strip, we can't write at all. - // Again, primitive restart is a factor here. (FIXME) - // FIXME: Bounds check the buffer, too. - statement("if (", to_expression(builtin_invocation_id_id), ".x + 2 < ", to_expression(builtin_stage_input_size_id), ".x)"); - statement(" ", to_name(xfb_buffers[i]), "[spvXfbIndex] = ", to_expression(xfb_locals[i]), ";"); - statement("if (", to_expression(builtin_invocation_id_id), ".x != 0 && ", to_expression(builtin_invocation_id_id), ".x != ", to_expression(builtin_stage_input_size_id), ".x - 1u)"); - statement(" ", to_name(xfb_buffers[i]), "[spvXfbIndex - 1u - (", to_expression(builtin_invocation_id_id), ".x & 1u)] = ", to_expression(xfb_locals[i]), ";"); - statement("if (", to_expression(builtin_invocation_id_id), ".x > 1)"); - statement(" ", to_name(xfb_buffers[i]), "[spvXfbIndex - 4u - (", to_expression(builtin_invocation_id_id), ".x & 1u)] = ", to_expression(xfb_locals[i]), ";"); - break; - case Options::PrimitiveType::TriangleFan: - // This is the worst case of all. It's similar to the strip case, except now - // we have to write the fan base vertex for *every* triangle. - // Again, primitive restart is a factor here. (FIXME) - // FIXME: Bounds check the buffer, too. - statement("if (", to_expression(builtin_invocation_id_id), ".x == 0)"); - begin_scope(); - statement("for (uint i = 0; i < subsat(", to_expression(builtin_stage_input_size_id), ".x, 2u); ++i)"); - statement(" ", to_name(xfb_buffers[i]), "[spvXfbBaseIndex + 3 * i] = ", to_name(xfb_locals[i]), ";"); - end_scope(); - statement("else"); - begin_scope(); - statement("if (", to_expression(builtin_invocation_id_id), ".x != ", to_expression(builtin_stage_input_size_id), ".x - 1u)"); - statement(" ", to_name(xfb_buffers[i]), "[spvXfbIndex] = " , to_expression(xfb_locals[i]), ";"); - statement("if (", to_expression(builtin_invocation_id_id), ".x != 1)"); - statement(" ", to_name(xfb_buffers[i]), "[spvXfbIndex - 2u] = " , to_expression(xfb_locals[i]), ";"); - end_scope(); - break; - case Options::PrimitiveType::Dynamic: - default: - SPIRV_CROSS_THROW("Primitive type not yet supported for transform feedback."); - } - } - statement("threadgroup_barrier(mem_flags::mem_device);"); - // Now update the amount of data written to the buffer. - statement("if (all(", to_expression(builtin_invocation_id_id), ".xy == 0))"); - begin_scope(); - switch (msl_options.xfb_primitive_type) - { - case Options::PrimitiveType::PointList: - statement("uint spvWritten = ", to_expression(builtin_stage_input_size_id), ".x * ", to_expression(builtin_stage_input_size_id), ".y;"); - break; - case Options::PrimitiveType::LineList: - statement("uint spvWritten = (", to_expression(builtin_stage_input_size_id), ".x & ~1u) * ", to_expression(builtin_stage_input_size_id), ".y;"); - break; - case Options::PrimitiveType::LineStrip: - statement("uint spvWritten = 2 * (", to_expression(builtin_stage_input_size_id), ".x - 1u) * ", to_expression(builtin_stage_input_size_id), ".y;"); - break; - case Options::PrimitiveType::TriangleList: - statement("uint spvWritten = (", to_expression(builtin_stage_input_size_id), ".x - ", to_expression(builtin_stage_input_size_id), ".x % 3u) * ", to_expression(builtin_stage_input_size_id), ".y;"); - break; - case Options::PrimitiveType::TriangleStrip: - case Options::PrimitiveType::TriangleFan: - statement("uint spvWritten = 3 * subsat(", to_expression(builtin_stage_input_size_id), ".x, 2u) * ", to_expression(builtin_stage_input_size_id), ".y;"); - break; - case Options::PrimitiveType::Dynamic: - default: - SPIRV_CROSS_THROW("Primitive type not yet supported for transform feedback."); - break; - } - for (uint32_t i = 0; i < kMaxXfbBuffers; ++i) { - if (xfb_buffers[i] == 0) continue; - statement("atomic_store_explicit(", to_name(xfb_counters[i]), ", spvInitOffset", i, " + sizeof(*", to_name(xfb_buffers[i]), ") * spvWritten, memory_order_relaxed);"); - } - end_scope(); - }); + entry_func.fixup_hooks_out.push_back( + [=]() + { + string index_expr; + switch (msl_options.xfb_primitive_type) + { + case Options::PrimitiveType::PointList: + index_expr = join(to_expression(builtin_invocation_id_id), ".y * ", + to_expression(builtin_stage_input_size_id), ".x + ", + to_expression(builtin_invocation_id_id), ".x"); + break; + case Options::PrimitiveType::LineList: + index_expr = join(to_expression(builtin_invocation_id_id), ".y * (", + to_expression(builtin_stage_input_size_id), ".x & ~1u) + ", + to_expression(builtin_invocation_id_id), ".x"); + break; + case Options::PrimitiveType::TriangleList: + index_expr = join(to_expression(builtin_invocation_id_id), ".y * (", + to_expression(builtin_stage_input_size_id), ".x - ", + to_expression(builtin_stage_input_size_id), ".x % 3u) + ", + to_expression(builtin_invocation_id_id), ".x"); + break; + case Options::PrimitiveType::LineStrip: + // Calculation of the index expression is also complicated a bit because of this. + // Some worked examples: + // Vertex ordinal XFB indices + // 0 0 + // 1 1, 2 + // 2 3, 4 + // 3 5, 6 + // 4 7 + // FIXME: This doesn't account for primitive restart! + // 0 0 + // 1 1, 2 + // 2 3, 4 + // 3 5 + // 4 + // 5 n/a + // 6 + // 7 6 + // 8 7, 8 + // 9 9, 10 + // 10 11 + index_expr = join("2 * ", to_expression(builtin_invocation_id_id), ".y * (", + to_expression(builtin_stage_input_size_id), ".x - 1u) + 2 * ", + to_expression(builtin_invocation_id_id), ".x"); + break; + case Options::PrimitiveType::TriangleStrip: + // Vertex ordinal XFB indices + // 0 0 + // 1 1, 3 + // 2 2, 5, 6 + // 3 4, 7, 9 + // 4 8, 11 + // 5 10 + // FIXME: This doesn't account for primitive restart! + // 0 0 + // 1 1, 3 + // 2 2, 5, 6 + // 3 4, 7, 9 + // 4 8, 11 + // 5 10 + // 6 + // 7 12 + // 8 13, 15 + // 9 14, 17, 18 + // 10 16, 19, 21 + // 11 20, 23 + // 12 22 + // ---- + // 0 0 + // 1 1 + // 2 2 + // 3 + // 4 3 + // 5 4, 6 + // 6 5, 8 + // 7 7 + // ---- + // 0 0 + // 1 1, 3 + // 2 2, 5, 6 + // 3 4, 7 + // 4 8 + // 5 + // 6 n/a + // 7 n/a + // 8 + // 9 9 + // 10 10, 12 + // 11 11, 14, 15 + // 12 13, 16 + // 13 17 + index_expr = join("3 * ", to_expression(builtin_invocation_id_id), ".y * subsat(", + to_expression(builtin_stage_input_size_id), ".x, 2u) + 3 * ", + to_expression(builtin_invocation_id_id), ".x"); + break; + case Options::PrimitiveType::TriangleFan: + // The index expression in this case is different for the fan base. + // This is for the other vertices. It is very similar to the line strip case. + // Vertex ordinal XFB indices + // 0 0, 3, 6, 9 + // 1 1 + // 2 2, 4 + // 3 5, 7 + // 4 8, 10 + // 5 11 + // FIXME: This doesn't account for primitive restart! + // 0 0, 3, 6, 9 + // 1 1 + // 2 2, 4 + // 3 5, 7 + // 4 8, 10 + // 5 11 + // 6 + // 7 12, 15, 18, 21 + // 8 13 + // 9 14, 16 + // 10 17, 19 + // 11 20, 22 + // 12 23 + // ---- + // 0 0 + // 1 1 + // 2 2 + // 3 + // 4 3, 6 + // 5 4 + // 6 5, 7 + // 7 8 + // ---- + // 0 0, 3, 6 + // 1 1 + // 2 2, 4 + // 3 5, 7 + // 4 8 + // 5 + // 6 n/a + // 7 n/a + // 8 + // 9 9, 12, 15 + // 10 10 + // 11 11, 13 + // 12 14, 16 + // 13 17 + statement("uint spvXfbBaseIndex = 3 * ", to_expression(builtin_invocation_id_id), ".y * subsat(", + to_expression(builtin_stage_input_size_id), ".x, 2u);"); + index_expr = join("spvXfbBaseIndex + 3 * ", to_expression(builtin_invocation_id_id), ".x - 2u"); + break; + case Options::PrimitiveType::Dynamic: + default: + SPIRV_CROSS_THROW("Primitive type not yet supported for transform feedback."); + } + statement("uint spvXfbIndex = ", index_expr, ";"); + // First, write the data out. + for (uint32_t i = 0; i < kMaxXfbBuffers; ++i) + { + if (xfb_buffers[i] == 0) + continue; + statement("uint spvInitOffset", i, " = atomic_load_explicit(", to_name(xfb_counters[i]), + ", memory_order_relaxed);"); + statement(to_name(xfb_buffers[i]), " = reinterpret_cast<", + type_to_glsl(get_type_from_variable(xfb_buffers[i])), ">(reinterpret_cast(", + to_name(xfb_buffers[i]), ") + spvInitOffset", i, ");"); + switch (msl_options.xfb_primitive_type) + { + case Options::PrimitiveType::PointList: + // This is straightforward enough. Just make sure we don't overstep the data buffer (FIXME). + statement(to_name(xfb_buffers[i]), "[spvXfbIndex] = ", to_expression(xfb_locals[i]), ";"); + break; + case Options::PrimitiveType::LineList: + // This is a little trickier, because we don't want to write an incomplete primitive. + // Therefore, we must write only if we're an odd vertex, or we're not the last one. + // FIXME: Bounds check the buffer, too. + statement("if ((", to_expression(builtin_invocation_id_id), ".x & 1) || ", + to_expression(builtin_invocation_id_id), ".x < ", + to_expression(builtin_stage_input_size_id), ".x - 1u)"); + statement(" ", to_name(xfb_buffers[i]), "[spvXfbIndex] = ", to_expression(xfb_locals[i]), + ";"); + break; + case Options::PrimitiveType::TriangleList: + // This is similar to the previous case, except here the boundary condition is + // if global_id.x % 3 == 2 or we're not one of the last two. + // FIXME: Bounds check the buffer, too. + statement("if ((", to_expression(builtin_invocation_id_id), ".x % 3u == 2) || ", + to_expression(builtin_invocation_id_id), ".x + 2 < ", + to_expression(builtin_stage_input_size_id), ".x)"); + statement(" ", to_name(xfb_buffers[i]), "[spvXfbIndex] = ", to_expression(xfb_locals[i]), + ";"); + break; + case Options::PrimitiveType::LineStrip: + // This is more complicated. We have to write out each individual line segment. + // So if we're not the first or the last, we have to write twice. + // On top of that, we also have to handle primitive restart. (FIXME) + // FIXME: Bounds check the buffer, too. + statement("if (", to_expression(builtin_invocation_id_id), + ".x != ", to_expression(builtin_stage_input_size_id), ".x - 1u)"); + statement(" ", to_name(xfb_buffers[i]), "[spvXfbIndex] = ", to_expression(xfb_locals[i]), + ";"); + statement("if (", to_expression(builtin_invocation_id_id), ".x != 0)"); + statement(" ", to_name(xfb_buffers[i]), + "[spvXfbIndex - 1u] = ", to_expression(xfb_locals[i]), ";"); + break; + case Options::PrimitiveType::TriangleStrip: + // This is even worse. We have to write three times if we're not first or last, + // and now if there's fewer than two vertices in this strip, we can't write at all. + // Again, primitive restart is a factor here. (FIXME) + // FIXME: Bounds check the buffer, too. + statement("if (", to_expression(builtin_invocation_id_id), ".x + 2 < ", + to_expression(builtin_stage_input_size_id), ".x)"); + statement(" ", to_name(xfb_buffers[i]), "[spvXfbIndex] = ", to_expression(xfb_locals[i]), + ";"); + statement("if (", to_expression(builtin_invocation_id_id), ".x != 0 && ", + to_expression(builtin_invocation_id_id), + ".x != ", to_expression(builtin_stage_input_size_id), ".x - 1u)"); + statement(" ", to_name(xfb_buffers[i]), "[spvXfbIndex - 1u - (", + to_expression(builtin_invocation_id_id), ".x & 1u)] = ", to_expression(xfb_locals[i]), + ";"); + statement("if (", to_expression(builtin_invocation_id_id), ".x > 1)"); + statement(" ", to_name(xfb_buffers[i]), "[spvXfbIndex - 4u - (", + to_expression(builtin_invocation_id_id), ".x & 1u)] = ", to_expression(xfb_locals[i]), + ";"); + break; + case Options::PrimitiveType::TriangleFan: + // This is the worst case of all. It's similar to the strip case, except now + // we have to write the fan base vertex for *every* triangle. + // Again, primitive restart is a factor here. (FIXME) + // FIXME: Bounds check the buffer, too. + statement("if (", to_expression(builtin_invocation_id_id), ".x == 0)"); + begin_scope(); + statement("for (uint i = 0; i < subsat(", to_expression(builtin_stage_input_size_id), + ".x, 2u); ++i)"); + statement(" ", to_name(xfb_buffers[i]), + "[spvXfbBaseIndex + 3 * i] = ", to_name(xfb_locals[i]), ";"); + end_scope(); + statement("else"); + begin_scope(); + statement("if (", to_expression(builtin_invocation_id_id), + ".x != ", to_expression(builtin_stage_input_size_id), ".x - 1u)"); + statement(" ", to_name(xfb_buffers[i]), "[spvXfbIndex] = ", to_expression(xfb_locals[i]), + ";"); + statement("if (", to_expression(builtin_invocation_id_id), ".x != 1)"); + statement(" ", to_name(xfb_buffers[i]), + "[spvXfbIndex - 2u] = ", to_expression(xfb_locals[i]), ";"); + end_scope(); + break; + case Options::PrimitiveType::Dynamic: + default: + SPIRV_CROSS_THROW("Primitive type not yet supported for transform feedback."); + } + } + statement("threadgroup_barrier(mem_flags::mem_device);"); + // Now update the amount of data written to the buffer. + statement("if (all(", to_expression(builtin_invocation_id_id), ".xy == 0))"); + begin_scope(); + switch (msl_options.xfb_primitive_type) + { + case Options::PrimitiveType::PointList: + statement("uint spvWritten = ", to_expression(builtin_stage_input_size_id), ".x * ", + to_expression(builtin_stage_input_size_id), ".y;"); + break; + case Options::PrimitiveType::LineList: + statement("uint spvWritten = (", to_expression(builtin_stage_input_size_id), ".x & ~1u) * ", + to_expression(builtin_stage_input_size_id), ".y;"); + break; + case Options::PrimitiveType::LineStrip: + statement("uint spvWritten = 2 * (", to_expression(builtin_stage_input_size_id), ".x - 1u) * ", + to_expression(builtin_stage_input_size_id), ".y;"); + break; + case Options::PrimitiveType::TriangleList: + statement("uint spvWritten = (", to_expression(builtin_stage_input_size_id), ".x - ", + to_expression(builtin_stage_input_size_id), ".x % 3u) * ", + to_expression(builtin_stage_input_size_id), ".y;"); + break; + case Options::PrimitiveType::TriangleStrip: + case Options::PrimitiveType::TriangleFan: + statement("uint spvWritten = 3 * subsat(", to_expression(builtin_stage_input_size_id), ".x, 2u) * ", + to_expression(builtin_stage_input_size_id), ".y;"); + break; + case Options::PrimitiveType::Dynamic: + default: + SPIRV_CROSS_THROW("Primitive type not yet supported for transform feedback."); + break; + } + for (uint32_t i = 0; i < kMaxXfbBuffers; ++i) + { + if (xfb_buffers[i] == 0) + continue; + statement("atomic_store_explicit(", to_name(xfb_counters[i]), ", spvInitOffset", i, " + sizeof(*", + to_name(xfb_buffers[i]), ") * spvWritten, memory_order_relaxed);"); + } + end_scope(); + }); } } @@ -15939,8 +15983,10 @@ string CompilerMSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) case BuiltInLayer: if (is_tesc_shader()) break; - if (needs_transform_feedback() && xfb_captured_builtins.count(builtin) && current_function && (current_function->self == ir.default_entry_point)) - return join(to_name(xfb_locals[xfb_captured_builtins[builtin]]), ".", CompilerGLSL::builtin_to_glsl(builtin, storage)); + if (needs_transform_feedback() && xfb_captured_builtins.count(builtin) && current_function && + (current_function->self == ir.default_entry_point)) + return join(to_name(xfb_locals[xfb_captured_builtins[builtin]]), ".", + CompilerGLSL::builtin_to_glsl(builtin, storage)); if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point) && !is_stage_output_builtin_masked(builtin)) return stage_out_var_name + "." + CompilerGLSL::builtin_to_glsl(builtin, storage); @@ -18034,95 +18080,99 @@ void CompilerMSL::analyze_xfb_buffers() xfb_strides[i] = 0; } - ir.for_each_typed_id([&](uint32_t self, SPIRVariable &var) { - auto &type = get_variable_data_type(var); - if(var.storage != StorageClassOutput) - return; - if(is_hidden_variable(var, true)) - return; + ir.for_each_typed_id( + [&](uint32_t self, SPIRVariable &var) + { + auto &type = get_variable_data_type(var); + if (var.storage != StorageClassOutput) + return; + if (is_hidden_variable(var, true)) + return; - uint32_t xfb_buffer_num = 0, xfb_stride; - if (has_decoration(self, DecorationXfbBuffer)) - { - xfb_buffer_num = get_decoration(self, DecorationXfbBuffer); - xfb_stride = get_decoration(self, DecorationXfbStride); + uint32_t xfb_buffer_num = 0, xfb_stride; + if (has_decoration(self, DecorationXfbBuffer)) + { + xfb_buffer_num = get_decoration(self, DecorationXfbBuffer); + xfb_stride = get_decoration(self, DecorationXfbStride); - if (xfb_buffer_num >= kMaxXfbBuffers) - SPIRV_CROSS_THROW("Shader uses more than 4 transform feedback buffers."); + if (xfb_buffer_num >= kMaxXfbBuffers) + SPIRV_CROSS_THROW("Shader uses more than 4 transform feedback buffers."); - // According to the spec, individual outputs or blocks are decorated with - // XfbStride to indicate the stride between two successive vertices in the buffer, - // but all XfbStrides for a given XfbBuffer must agree. - xfb_strides[xfb_buffer_num] = xfb_stride; - } + // According to the spec, individual outputs or blocks are decorated with + // XfbStride to indicate the stride between two successive vertices in the buffer, + // but all XfbStrides for a given XfbBuffer must agree. + xfb_strides[xfb_buffer_num] = xfb_stride; + } - if (type.basetype == SPIRType::Struct && has_decoration(type.self, DecorationBlock)) - { - for (uint32_t i = 0; i < type.member_types.size(); ++i) - { - // According to Vulkan VUID 04716: - // "Only variables or block members in the output interface - // decorated with Offset can be captured for transform - // feedback..." - if (!has_member_decoration(type.self, i, DecorationOffset)) - continue; - xfb_captured_outputs.insert(self); - uint32_t xfb_offset = get_member_decoration(type.self, i, DecorationOffset); - uint32_t mbr_xfb_buffer_num = has_member_decoration(type.self, i, DecorationXfbBuffer) ? get_member_decoration(type.self, i, DecorationXfbBuffer) : xfb_buffer_num; - string name; - if (has_member_decoration(type.self, i, DecorationBuiltIn)) - { - // Force this to have the proper name. - BuiltIn bi_type = BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn)); - name = builtin_to_glsl(bi_type, StorageClassOutput); - // Make sure it's referenced properly. - xfb_captured_builtins.insert(make_pair(bi_type, mbr_xfb_buffer_num)); - } - else - { - name = to_member_name(type, i); - } - xfb_outputs[mbr_xfb_buffer_num].emplace_back({&var, name, i, xfb_offset, true}); - if (has_member_decoration(type.self, i, DecorationXfbStride)) - { - xfb_strides[mbr_xfb_buffer_num] = get_member_decoration(type.self, i, DecorationXfbStride); - } - else - { - // XXX What's this for??? The validation rules for SPIR-V require - // this to be set if any of the transform feedback decorations are used! - bool hasTransformFeedback = has_member_decoration(type.parent_type, i, DecorationXfbStride); - if (hasTransformFeedback) - { - auto &execution = get_entry_point(); - execution.flags.set(spv::ExecutionModeXfb); - } - break; - } - } - } - else - { - if (!has_decoration(self, DecorationOffset)) - return; - xfb_captured_outputs.insert(self); - uint32_t xfb_offset = get_decoration(self, DecorationOffset); - string name; - if (has_decoration(self, DecorationBuiltIn)) - { - // Force this to have the proper name. - BuiltIn bi_type = BuiltIn(get_decoration(self, DecorationBuiltIn)); - name = builtin_to_glsl(bi_type, StorageClassOutput); - // Make sure it's referenced properly. - xfb_captured_builtins.insert(make_pair(bi_type, xfb_buffer_num)); - } - else - { - name = to_name(self); - } - xfb_outputs[xfb_buffer_num].emplace_back({&var, name, 0, xfb_offset, false}); - } - }); + if (type.basetype == SPIRType::Struct && has_decoration(type.self, DecorationBlock)) + { + for (uint32_t i = 0; i < type.member_types.size(); ++i) + { + // According to Vulkan VUID 04716: + // "Only variables or block members in the output interface + // decorated with Offset can be captured for transform + // feedback..." + if (!has_member_decoration(type.self, i, DecorationOffset)) + continue; + xfb_captured_outputs.insert(self); + uint32_t xfb_offset = get_member_decoration(type.self, i, DecorationOffset); + uint32_t mbr_xfb_buffer_num = has_member_decoration(type.self, i, DecorationXfbBuffer) ? + get_member_decoration(type.self, i, DecorationXfbBuffer) : + xfb_buffer_num; + string name; + if (has_member_decoration(type.self, i, DecorationBuiltIn)) + { + // Force this to have the proper name. + BuiltIn bi_type = BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn)); + name = builtin_to_glsl(bi_type, StorageClassOutput); + // Make sure it's referenced properly. + xfb_captured_builtins.insert(make_pair(bi_type, mbr_xfb_buffer_num)); + } + else + { + name = to_member_name(type, i); + } + xfb_outputs[mbr_xfb_buffer_num].emplace_back({ &var, name, i, xfb_offset, true }); + if (has_member_decoration(type.self, i, DecorationXfbStride)) + { + xfb_strides[mbr_xfb_buffer_num] = get_member_decoration(type.self, i, DecorationXfbStride); + } + else + { + // XXX What's this for??? The validation rules for SPIR-V require + // this to be set if any of the transform feedback decorations are used! + bool hasTransformFeedback = has_member_decoration(type.parent_type, i, DecorationXfbStride); + if (hasTransformFeedback) + { + auto &execution = get_entry_point(); + execution.flags.set(spv::ExecutionModeXfb); + } + break; + } + } + } + else + { + if (!has_decoration(self, DecorationOffset)) + return; + xfb_captured_outputs.insert(self); + uint32_t xfb_offset = get_decoration(self, DecorationOffset); + string name; + if (has_decoration(self, DecorationBuiltIn)) + { + // Force this to have the proper name. + BuiltIn bi_type = BuiltIn(get_decoration(self, DecorationBuiltIn)); + name = builtin_to_glsl(bi_type, StorageClassOutput); + // Make sure it's referenced properly. + xfb_captured_builtins.insert(make_pair(bi_type, xfb_buffer_num)); + } + else + { + name = to_name(self); + } + xfb_outputs[xfb_buffer_num].emplace_back({ &var, name, 0, xfb_offset, false }); + } + }); for (uint32_t xfb_buffer = 0; xfb_buffer < kMaxXfbBuffers; xfb_buffer++) { @@ -18206,9 +18256,11 @@ void CompilerMSL::analyze_xfb_buffers() if (!is_member_builtin(type, output.member_index, nullptr)) { // n.b. Must come BEFORE the big one that writes out the XFB buffers! - entry_func.fixup_hooks_out.push_back([=]() { - statement(qual_var_name, " = ", to_name(var.self), ".", to_member_name(type, member_index), ";"); - }); + entry_func.fixup_hooks_out.push_back( + [=]() { + statement(qual_var_name, " = ", to_name(var.self), ".", to_member_name(type, member_index), + ";"); + }); } } @@ -18231,15 +18283,16 @@ void CompilerMSL::analyze_xfb_buffers() const auto &member_type = get(buffer_type.member_types[i]); uint32_t var_id = get_extended_member_decoration(type_id, i, SPIRVCrossDecorationInterfaceOrigID); const auto &var = get(var_id); - if (member_type.basetype != SPIRType::Struct && (packed_buffer || has_extended_member_decoration(type_id, i, SPIRVCrossDecorationPhysicalTypePacked))) + if (member_type.basetype != SPIRType::Struct && + (packed_buffer || has_extended_member_decoration(type_id, i, SPIRVCrossDecorationPhysicalTypePacked))) { if (is_builtin_variable(var)) { if (outputs[i].block) - xfb_packed_builtins.insert(BuiltIn(get_member_decoration(get_variable_data_type_id(var), outputs[i].member_index, DecorationBuiltIn))); + xfb_packed_builtins.insert(BuiltIn(get_member_decoration( + get_variable_data_type_id(var), outputs[i].member_index, DecorationBuiltIn))); else xfb_packed_builtins.insert(BuiltIn(get_decoration(var_id, DecorationBuiltIn))); - } else { diff --git a/spirv_msl.hpp b/spirv_msl.hpp index e98459532..b3e33f38e 100644 --- a/spirv_msl.hpp +++ b/spirv_msl.hpp @@ -576,8 +576,9 @@ class CompilerMSL : public CompilerGLSL bool needs_transform_feedback() const { auto &execution = get_entry_point(); - return execution.flags.get(spv::ExecutionModeXfb) && (execution.model == spv::ExecutionModelVertex || - execution.model == spv::ExecutionModelTessellationEvaluation); + return execution.flags.get(spv::ExecutionModeXfb) && + (execution.model == spv::ExecutionModelVertex || + execution.model == spv::ExecutionModelTessellationEvaluation); } bool vertex_shader_is_kernel() const From 109959eb119e4e7714461eafd65bf95d54bc66c6 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Mon, 18 Sep 2023 04:18:21 -0700 Subject: [PATCH 41/48] Add tests for transform feedback in MSL. --- ...rm-feedback-decorations.xfb-line-list.vert | 62 +++++++++++ ...m-feedback-decorations.xfb-line-strip.vert | 68 ++++++++++++ ...m-feedback-decorations.xfb-point-list.vert | 59 ++++++++++ ...feedback-decorations.xfb-triangle-fan.vert | 93 ++++++++++++++++ ...eedback-decorations.xfb-triangle-list.vert | 62 +++++++++++ ...edback-decorations.xfb-triangle-strip.vert | 74 +++++++++++++ ...rm-feedback-decorations.xfb-line-list.vert | 82 ++++++++++++++ ...m-feedback-decorations.xfb-line-strip.vert | 76 +++++++++++++ ...m-feedback-decorations.xfb-point-list.vert | 67 ++++++++++++ ...feedback-decorations.xfb-triangle-fan.vert | 101 ++++++++++++++++++ ...eedback-decorations.xfb-triangle-list.vert | 70 ++++++++++++ ...edback-decorations.xfb-triangle-strip.vert | 82 ++++++++++++++ ...rm-feedback-decorations.xfb-line-list.vert | 35 ++++++ ...m-feedback-decorations.xfb-line-strip.vert | 25 +++++ ...m-feedback-decorations.xfb-point-list.vert | 25 +++++ ...feedback-decorations.xfb-triangle-fan.vert | 25 +++++ ...eedback-decorations.xfb-triangle-list.vert | 25 +++++ ...edback-decorations.xfb-triangle-strip.vert | 25 +++++ test_shaders.py | 18 ++++ 19 files changed, 1074 insertions(+) create mode 100644 reference/opt/shaders-msl/vert/transform-feedback-decorations.xfb-line-list.vert create mode 100644 reference/opt/shaders-msl/vert/transform-feedback-decorations.xfb-line-strip.vert create mode 100644 reference/opt/shaders-msl/vert/transform-feedback-decorations.xfb-point-list.vert create mode 100644 reference/opt/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-fan.vert create mode 100644 reference/opt/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-list.vert create mode 100644 reference/opt/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-strip.vert create mode 100644 reference/shaders-msl/vert/transform-feedback-decorations.xfb-line-list.vert create mode 100644 reference/shaders-msl/vert/transform-feedback-decorations.xfb-line-strip.vert create mode 100644 reference/shaders-msl/vert/transform-feedback-decorations.xfb-point-list.vert create mode 100644 reference/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-fan.vert create mode 100644 reference/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-list.vert create mode 100644 reference/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-strip.vert create mode 100644 shaders-msl/vert/transform-feedback-decorations.xfb-line-list.vert create mode 100644 shaders-msl/vert/transform-feedback-decorations.xfb-line-strip.vert create mode 100644 shaders-msl/vert/transform-feedback-decorations.xfb-point-list.vert create mode 100644 shaders-msl/vert/transform-feedback-decorations.xfb-triangle-fan.vert create mode 100644 shaders-msl/vert/transform-feedback-decorations.xfb-triangle-list.vert create mode 100644 shaders-msl/vert/transform-feedback-decorations.xfb-triangle-strip.vert diff --git a/reference/opt/shaders-msl/vert/transform-feedback-decorations.xfb-line-list.vert b/reference/opt/shaders-msl/vert/transform-feedback-decorations.xfb-line-list.vert new file mode 100644 index 000000000..50da6cdc9 --- /dev/null +++ b/reference/opt/shaders-msl/vert/transform-feedback-decorations.xfb-line-list.vert @@ -0,0 +1,62 @@ +#include +#include + +using namespace metal; + +struct VertOut +{ + float4 vBar; +}; + +struct spvXfbBuffer1 +{ + char _m0_pad[4]; + packed_float4 gl_Position; +}; + +struct spvXfbBuffer2 +{ + char _m0_pad[16]; + float4 vFoo; +}; + +struct spvXfbBuffer3 +{ + float4 vBar; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device atomic_uint* spvXfbCounter1 [[buffer(17)]], device spvXfbBuffer1* spvXfb1 [[buffer(13)]], device atomic_uint* spvXfbCounter2 [[buffer(18)]], device spvXfbBuffer2* spvXfb2 [[buffer(14)]], device atomic_uint* spvXfbCounter3 [[buffer(19)]], device spvXfbBuffer3* spvXfb3 [[buffer(15)]]) +{ + spvXfbBuffer1 spvXfbOutput1 = {}; + spvXfbBuffer2 spvXfbOutput2 = {}; + spvXfbBuffer3 spvXfbOutput3 = {}; + VertOut _20 = {}; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + spvXfbOutput1.gl_Position = float4(1.0); + spvXfbOutput2.vFoo = float4(3.0); + _20.vBar = float4(5.0); + spvXfbOutput3.vBar = _20.vBar; + uint spvXfbIndex = gl_GlobalInvocationID.y * (spvStageInputSize.x & ~1u) + gl_GlobalInvocationID.x; + uint spvInitOffset1 = atomic_load_explicit(spvXfbCounter1, memory_order_relaxed); + spvXfb1 = reinterpret_cast(reinterpret_cast(spvXfb1) + spvInitOffset1); + if ((gl_GlobalInvocationID.x & 1) || gl_GlobalInvocationID.x < spvStageInputSize.x - 1u) + spvXfb1[spvXfbIndex] = spvXfbOutput1; + uint spvInitOffset2 = atomic_load_explicit(spvXfbCounter2, memory_order_relaxed); + spvXfb2 = reinterpret_cast(reinterpret_cast(spvXfb2) + spvInitOffset2); + if ((gl_GlobalInvocationID.x & 1) || gl_GlobalInvocationID.x < spvStageInputSize.x - 1u) + spvXfb2[spvXfbIndex] = spvXfbOutput2; + uint spvInitOffset3 = atomic_load_explicit(spvXfbCounter3, memory_order_relaxed); + spvXfb3 = reinterpret_cast(reinterpret_cast(spvXfb3) + spvInitOffset3); + if ((gl_GlobalInvocationID.x & 1) || gl_GlobalInvocationID.x < spvStageInputSize.x - 1u) + spvXfb3[spvXfbIndex] = spvXfbOutput3; + threadgroup_barrier(mem_flags::mem_device); + if (all(gl_GlobalInvocationID.xy == 0)) + { + uint spvWritten = (spvStageInputSize.x & ~1u) * spvStageInputSize.y; + atomic_store_explicit(spvXfbCounter1, spvInitOffset1 + sizeof(*spvXfb1) * spvWritten, memory_order_relaxed); + atomic_store_explicit(spvXfbCounter2, spvInitOffset2 + sizeof(*spvXfb2) * spvWritten, memory_order_relaxed); + atomic_store_explicit(spvXfbCounter3, spvInitOffset3 + sizeof(*spvXfb3) * spvWritten, memory_order_relaxed); + } +} + diff --git a/reference/opt/shaders-msl/vert/transform-feedback-decorations.xfb-line-strip.vert b/reference/opt/shaders-msl/vert/transform-feedback-decorations.xfb-line-strip.vert new file mode 100644 index 000000000..be04ecff8 --- /dev/null +++ b/reference/opt/shaders-msl/vert/transform-feedback-decorations.xfb-line-strip.vert @@ -0,0 +1,68 @@ +#include +#include + +using namespace metal; + +struct VertOut +{ + float4 vBar; +}; + +struct spvXfbBuffer1 +{ + char _m0_pad[4]; + packed_float4 gl_Position; +}; + +struct spvXfbBuffer2 +{ + char _m0_pad[16]; + float4 vFoo; +}; + +struct spvXfbBuffer3 +{ + float4 vBar; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device atomic_uint* spvXfbCounter1 [[buffer(17)]], device spvXfbBuffer1* spvXfb1 [[buffer(13)]], device atomic_uint* spvXfbCounter2 [[buffer(18)]], device spvXfbBuffer2* spvXfb2 [[buffer(14)]], device atomic_uint* spvXfbCounter3 [[buffer(19)]], device spvXfbBuffer3* spvXfb3 [[buffer(15)]]) +{ + spvXfbBuffer1 spvXfbOutput1 = {}; + spvXfbBuffer2 spvXfbOutput2 = {}; + spvXfbBuffer3 spvXfbOutput3 = {}; + VertOut _25 = {}; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + spvXfbOutput1.gl_Position = float4(1.0); + spvXfbOutput2.vFoo = float4(3.0); + _25.vBar = float4(5.0); + spvXfbOutput3.vBar = _25.vBar; + uint spvXfbIndex = 2 * gl_GlobalInvocationID.y * (spvStageInputSize.x - 1u) + 2 * gl_GlobalInvocationID.x; + uint spvInitOffset1 = atomic_load_explicit(spvXfbCounter1, memory_order_relaxed); + spvXfb1 = reinterpret_cast(reinterpret_cast(spvXfb1) + spvInitOffset1); + if (gl_GlobalInvocationID.x != spvStageInputSize.x - 1u) + spvXfb1[spvXfbIndex] = spvXfbOutput1; + if (gl_GlobalInvocationID.x != 0) + spvXfb1[spvXfbIndex - 1u] = spvXfbOutput1; + uint spvInitOffset2 = atomic_load_explicit(spvXfbCounter2, memory_order_relaxed); + spvXfb2 = reinterpret_cast(reinterpret_cast(spvXfb2) + spvInitOffset2); + if (gl_GlobalInvocationID.x != spvStageInputSize.x - 1u) + spvXfb2[spvXfbIndex] = spvXfbOutput2; + if (gl_GlobalInvocationID.x != 0) + spvXfb2[spvXfbIndex - 1u] = spvXfbOutput2; + uint spvInitOffset3 = atomic_load_explicit(spvXfbCounter3, memory_order_relaxed); + spvXfb3 = reinterpret_cast(reinterpret_cast(spvXfb3) + spvInitOffset3); + if (gl_GlobalInvocationID.x != spvStageInputSize.x - 1u) + spvXfb3[spvXfbIndex] = spvXfbOutput3; + if (gl_GlobalInvocationID.x != 0) + spvXfb3[spvXfbIndex - 1u] = spvXfbOutput3; + threadgroup_barrier(mem_flags::mem_device); + if (all(gl_GlobalInvocationID.xy == 0)) + { + uint spvWritten = 2 * (spvStageInputSize.x - 1u) * spvStageInputSize.y; + atomic_store_explicit(spvXfbCounter1, spvInitOffset1 + sizeof(*spvXfb1) * spvWritten, memory_order_relaxed); + atomic_store_explicit(spvXfbCounter2, spvInitOffset2 + sizeof(*spvXfb2) * spvWritten, memory_order_relaxed); + atomic_store_explicit(spvXfbCounter3, spvInitOffset3 + sizeof(*spvXfb3) * spvWritten, memory_order_relaxed); + } +} + diff --git a/reference/opt/shaders-msl/vert/transform-feedback-decorations.xfb-point-list.vert b/reference/opt/shaders-msl/vert/transform-feedback-decorations.xfb-point-list.vert new file mode 100644 index 000000000..02ece7a19 --- /dev/null +++ b/reference/opt/shaders-msl/vert/transform-feedback-decorations.xfb-point-list.vert @@ -0,0 +1,59 @@ +#include +#include + +using namespace metal; + +struct VertOut +{ + float4 vBar; +}; + +struct spvXfbBuffer1 +{ + char _m0_pad[4]; + packed_float4 gl_Position; +}; + +struct spvXfbBuffer2 +{ + char _m0_pad[16]; + float4 vFoo; +}; + +struct spvXfbBuffer3 +{ + float4 vBar; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device atomic_uint* spvXfbCounter1 [[buffer(17)]], device spvXfbBuffer1* spvXfb1 [[buffer(13)]], device atomic_uint* spvXfbCounter2 [[buffer(18)]], device spvXfbBuffer2* spvXfb2 [[buffer(14)]], device atomic_uint* spvXfbCounter3 [[buffer(19)]], device spvXfbBuffer3* spvXfb3 [[buffer(15)]]) +{ + spvXfbBuffer1 spvXfbOutput1 = {}; + spvXfbBuffer2 spvXfbOutput2 = {}; + spvXfbBuffer3 spvXfbOutput3 = {}; + VertOut _25 = {}; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + spvXfbOutput1.gl_Position = float4(1.0); + spvXfbOutput2.vFoo = float4(3.0); + _25.vBar = float4(5.0); + spvXfbOutput3.vBar = _25.vBar; + uint spvXfbIndex = gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x; + uint spvInitOffset1 = atomic_load_explicit(spvXfbCounter1, memory_order_relaxed); + spvXfb1 = reinterpret_cast(reinterpret_cast(spvXfb1) + spvInitOffset1); + spvXfb1[spvXfbIndex] = spvXfbOutput1; + uint spvInitOffset2 = atomic_load_explicit(spvXfbCounter2, memory_order_relaxed); + spvXfb2 = reinterpret_cast(reinterpret_cast(spvXfb2) + spvInitOffset2); + spvXfb2[spvXfbIndex] = spvXfbOutput2; + uint spvInitOffset3 = atomic_load_explicit(spvXfbCounter3, memory_order_relaxed); + spvXfb3 = reinterpret_cast(reinterpret_cast(spvXfb3) + spvInitOffset3); + spvXfb3[spvXfbIndex] = spvXfbOutput3; + threadgroup_barrier(mem_flags::mem_device); + if (all(gl_GlobalInvocationID.xy == 0)) + { + uint spvWritten = spvStageInputSize.x * spvStageInputSize.y; + atomic_store_explicit(spvXfbCounter1, spvInitOffset1 + sizeof(*spvXfb1) * spvWritten, memory_order_relaxed); + atomic_store_explicit(spvXfbCounter2, spvInitOffset2 + sizeof(*spvXfb2) * spvWritten, memory_order_relaxed); + atomic_store_explicit(spvXfbCounter3, spvInitOffset3 + sizeof(*spvXfb3) * spvWritten, memory_order_relaxed); + } +} + diff --git a/reference/opt/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-fan.vert b/reference/opt/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-fan.vert new file mode 100644 index 000000000..561c5c390 --- /dev/null +++ b/reference/opt/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-fan.vert @@ -0,0 +1,93 @@ +#include +#include + +using namespace metal; + +struct VertOut +{ + float4 vBar; +}; + +struct spvXfbBuffer1 +{ + char _m0_pad[4]; + packed_float4 gl_Position; +}; + +struct spvXfbBuffer2 +{ + char _m0_pad[16]; + float4 vFoo; +}; + +struct spvXfbBuffer3 +{ + float4 vBar; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device atomic_uint* spvXfbCounter1 [[buffer(17)]], device spvXfbBuffer1* spvXfb1 [[buffer(13)]], device atomic_uint* spvXfbCounter2 [[buffer(18)]], device spvXfbBuffer2* spvXfb2 [[buffer(14)]], device atomic_uint* spvXfbCounter3 [[buffer(19)]], device spvXfbBuffer3* spvXfb3 [[buffer(15)]]) +{ + spvXfbBuffer1 spvXfbOutput1 = {}; + spvXfbBuffer2 spvXfbOutput2 = {}; + spvXfbBuffer3 spvXfbOutput3 = {}; + VertOut _25 = {}; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + spvXfbOutput1.gl_Position = float4(1.0); + spvXfbOutput2.vFoo = float4(3.0); + _25.vBar = float4(5.0); + spvXfbOutput3.vBar = _25.vBar; + uint spvXfbBaseIndex = 3 * gl_GlobalInvocationID.y * subsat(spvStageInputSize.x, 2u); + uint spvXfbIndex = spvXfbBaseIndex + 3 * gl_GlobalInvocationID.x - 2u; + uint spvInitOffset1 = atomic_load_explicit(spvXfbCounter1, memory_order_relaxed); + spvXfb1 = reinterpret_cast(reinterpret_cast(spvXfb1) + spvInitOffset1); + if (gl_GlobalInvocationID.x == 0) + { + for (uint i = 0; i < subsat(spvStageInputSize.x, 2u); ++i) + spvXfb1[spvXfbBaseIndex + 3 * i] = spvXfbOutput1; + } + else + { + if (gl_GlobalInvocationID.x != spvStageInputSize.x - 1u) + spvXfb1[spvXfbIndex] = spvXfbOutput1; + if (gl_GlobalInvocationID.x != 1) + spvXfb1[spvXfbIndex - 2u] = spvXfbOutput1; + } + uint spvInitOffset2 = atomic_load_explicit(spvXfbCounter2, memory_order_relaxed); + spvXfb2 = reinterpret_cast(reinterpret_cast(spvXfb2) + spvInitOffset2); + if (gl_GlobalInvocationID.x == 0) + { + for (uint i = 0; i < subsat(spvStageInputSize.x, 2u); ++i) + spvXfb2[spvXfbBaseIndex + 3 * i] = spvXfbOutput2; + } + else + { + if (gl_GlobalInvocationID.x != spvStageInputSize.x - 1u) + spvXfb2[spvXfbIndex] = spvXfbOutput2; + if (gl_GlobalInvocationID.x != 1) + spvXfb2[spvXfbIndex - 2u] = spvXfbOutput2; + } + uint spvInitOffset3 = atomic_load_explicit(spvXfbCounter3, memory_order_relaxed); + spvXfb3 = reinterpret_cast(reinterpret_cast(spvXfb3) + spvInitOffset3); + if (gl_GlobalInvocationID.x == 0) + { + for (uint i = 0; i < subsat(spvStageInputSize.x, 2u); ++i) + spvXfb3[spvXfbBaseIndex + 3 * i] = spvXfbOutput3; + } + else + { + if (gl_GlobalInvocationID.x != spvStageInputSize.x - 1u) + spvXfb3[spvXfbIndex] = spvXfbOutput3; + if (gl_GlobalInvocationID.x != 1) + spvXfb3[spvXfbIndex - 2u] = spvXfbOutput3; + } + threadgroup_barrier(mem_flags::mem_device); + if (all(gl_GlobalInvocationID.xy == 0)) + { + uint spvWritten = 3 * subsat(spvStageInputSize.x, 2u) * spvStageInputSize.y; + atomic_store_explicit(spvXfbCounter1, spvInitOffset1 + sizeof(*spvXfb1) * spvWritten, memory_order_relaxed); + atomic_store_explicit(spvXfbCounter2, spvInitOffset2 + sizeof(*spvXfb2) * spvWritten, memory_order_relaxed); + atomic_store_explicit(spvXfbCounter3, spvInitOffset3 + sizeof(*spvXfb3) * spvWritten, memory_order_relaxed); + } +} + diff --git a/reference/opt/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-list.vert b/reference/opt/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-list.vert new file mode 100644 index 000000000..fcce0b3e6 --- /dev/null +++ b/reference/opt/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-list.vert @@ -0,0 +1,62 @@ +#include +#include + +using namespace metal; + +struct VertOut +{ + float4 vBar; +}; + +struct spvXfbBuffer1 +{ + char _m0_pad[4]; + packed_float4 gl_Position; +}; + +struct spvXfbBuffer2 +{ + char _m0_pad[16]; + float4 vFoo; +}; + +struct spvXfbBuffer3 +{ + float4 vBar; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device atomic_uint* spvXfbCounter1 [[buffer(17)]], device spvXfbBuffer1* spvXfb1 [[buffer(13)]], device atomic_uint* spvXfbCounter2 [[buffer(18)]], device spvXfbBuffer2* spvXfb2 [[buffer(14)]], device atomic_uint* spvXfbCounter3 [[buffer(19)]], device spvXfbBuffer3* spvXfb3 [[buffer(15)]]) +{ + spvXfbBuffer1 spvXfbOutput1 = {}; + spvXfbBuffer2 spvXfbOutput2 = {}; + spvXfbBuffer3 spvXfbOutput3 = {}; + VertOut _25 = {}; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + spvXfbOutput1.gl_Position = float4(1.0); + spvXfbOutput2.vFoo = float4(3.0); + _25.vBar = float4(5.0); + spvXfbOutput3.vBar = _25.vBar; + uint spvXfbIndex = gl_GlobalInvocationID.y * (spvStageInputSize.x - spvStageInputSize.x % 3u) + gl_GlobalInvocationID.x; + uint spvInitOffset1 = atomic_load_explicit(spvXfbCounter1, memory_order_relaxed); + spvXfb1 = reinterpret_cast(reinterpret_cast(spvXfb1) + spvInitOffset1); + if ((gl_GlobalInvocationID.x % 3u == 2) || gl_GlobalInvocationID.x + 2 < spvStageInputSize.x) + spvXfb1[spvXfbIndex] = spvXfbOutput1; + uint spvInitOffset2 = atomic_load_explicit(spvXfbCounter2, memory_order_relaxed); + spvXfb2 = reinterpret_cast(reinterpret_cast(spvXfb2) + spvInitOffset2); + if ((gl_GlobalInvocationID.x % 3u == 2) || gl_GlobalInvocationID.x + 2 < spvStageInputSize.x) + spvXfb2[spvXfbIndex] = spvXfbOutput2; + uint spvInitOffset3 = atomic_load_explicit(spvXfbCounter3, memory_order_relaxed); + spvXfb3 = reinterpret_cast(reinterpret_cast(spvXfb3) + spvInitOffset3); + if ((gl_GlobalInvocationID.x % 3u == 2) || gl_GlobalInvocationID.x + 2 < spvStageInputSize.x) + spvXfb3[spvXfbIndex] = spvXfbOutput3; + threadgroup_barrier(mem_flags::mem_device); + if (all(gl_GlobalInvocationID.xy == 0)) + { + uint spvWritten = (spvStageInputSize.x - spvStageInputSize.x % 3u) * spvStageInputSize.y; + atomic_store_explicit(spvXfbCounter1, spvInitOffset1 + sizeof(*spvXfb1) * spvWritten, memory_order_relaxed); + atomic_store_explicit(spvXfbCounter2, spvInitOffset2 + sizeof(*spvXfb2) * spvWritten, memory_order_relaxed); + atomic_store_explicit(spvXfbCounter3, spvInitOffset3 + sizeof(*spvXfb3) * spvWritten, memory_order_relaxed); + } +} + diff --git a/reference/opt/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-strip.vert b/reference/opt/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-strip.vert new file mode 100644 index 000000000..46d752b21 --- /dev/null +++ b/reference/opt/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-strip.vert @@ -0,0 +1,74 @@ +#include +#include + +using namespace metal; + +struct VertOut +{ + float4 vBar; +}; + +struct spvXfbBuffer1 +{ + char _m0_pad[4]; + packed_float4 gl_Position; +}; + +struct spvXfbBuffer2 +{ + char _m0_pad[16]; + float4 vFoo; +}; + +struct spvXfbBuffer3 +{ + float4 vBar; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device atomic_uint* spvXfbCounter1 [[buffer(17)]], device spvXfbBuffer1* spvXfb1 [[buffer(13)]], device atomic_uint* spvXfbCounter2 [[buffer(18)]], device spvXfbBuffer2* spvXfb2 [[buffer(14)]], device atomic_uint* spvXfbCounter3 [[buffer(19)]], device spvXfbBuffer3* spvXfb3 [[buffer(15)]]) +{ + spvXfbBuffer1 spvXfbOutput1 = {}; + spvXfbBuffer2 spvXfbOutput2 = {}; + spvXfbBuffer3 spvXfbOutput3 = {}; + VertOut _25 = {}; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + spvXfbOutput1.gl_Position = float4(1.0); + spvXfbOutput2.vFoo = float4(3.0); + _25.vBar = float4(5.0); + spvXfbOutput3.vBar = _25.vBar; + uint spvXfbIndex = 3 * gl_GlobalInvocationID.y * subsat(spvStageInputSize.x, 2u) + 3 * gl_GlobalInvocationID.x; + uint spvInitOffset1 = atomic_load_explicit(spvXfbCounter1, memory_order_relaxed); + spvXfb1 = reinterpret_cast(reinterpret_cast(spvXfb1) + spvInitOffset1); + if (gl_GlobalInvocationID.x + 2 < spvStageInputSize.x) + spvXfb1[spvXfbIndex] = spvXfbOutput1; + if (gl_GlobalInvocationID.x != 0 && gl_GlobalInvocationID.x != spvStageInputSize.x - 1u) + spvXfb1[spvXfbIndex - 1u - (gl_GlobalInvocationID.x & 1u)] = spvXfbOutput1; + if (gl_GlobalInvocationID.x > 1) + spvXfb1[spvXfbIndex - 4u - (gl_GlobalInvocationID.x & 1u)] = spvXfbOutput1; + uint spvInitOffset2 = atomic_load_explicit(spvXfbCounter2, memory_order_relaxed); + spvXfb2 = reinterpret_cast(reinterpret_cast(spvXfb2) + spvInitOffset2); + if (gl_GlobalInvocationID.x + 2 < spvStageInputSize.x) + spvXfb2[spvXfbIndex] = spvXfbOutput2; + if (gl_GlobalInvocationID.x != 0 && gl_GlobalInvocationID.x != spvStageInputSize.x - 1u) + spvXfb2[spvXfbIndex - 1u - (gl_GlobalInvocationID.x & 1u)] = spvXfbOutput2; + if (gl_GlobalInvocationID.x > 1) + spvXfb2[spvXfbIndex - 4u - (gl_GlobalInvocationID.x & 1u)] = spvXfbOutput2; + uint spvInitOffset3 = atomic_load_explicit(spvXfbCounter3, memory_order_relaxed); + spvXfb3 = reinterpret_cast(reinterpret_cast(spvXfb3) + spvInitOffset3); + if (gl_GlobalInvocationID.x + 2 < spvStageInputSize.x) + spvXfb3[spvXfbIndex] = spvXfbOutput3; + if (gl_GlobalInvocationID.x != 0 && gl_GlobalInvocationID.x != spvStageInputSize.x - 1u) + spvXfb3[spvXfbIndex - 1u - (gl_GlobalInvocationID.x & 1u)] = spvXfbOutput3; + if (gl_GlobalInvocationID.x > 1) + spvXfb3[spvXfbIndex - 4u - (gl_GlobalInvocationID.x & 1u)] = spvXfbOutput3; + threadgroup_barrier(mem_flags::mem_device); + if (all(gl_GlobalInvocationID.xy == 0)) + { + uint spvWritten = 3 * subsat(spvStageInputSize.x, 2u) * spvStageInputSize.y; + atomic_store_explicit(spvXfbCounter1, spvInitOffset1 + sizeof(*spvXfb1) * spvWritten, memory_order_relaxed); + atomic_store_explicit(spvXfbCounter2, spvInitOffset2 + sizeof(*spvXfb2) * spvWritten, memory_order_relaxed); + atomic_store_explicit(spvXfbCounter3, spvInitOffset3 + sizeof(*spvXfb3) * spvWritten, memory_order_relaxed); + } +} + diff --git a/reference/shaders-msl/vert/transform-feedback-decorations.xfb-line-list.vert b/reference/shaders-msl/vert/transform-feedback-decorations.xfb-line-list.vert new file mode 100644 index 000000000..2c692f196 --- /dev/null +++ b/reference/shaders-msl/vert/transform-feedback-decorations.xfb-line-list.vert @@ -0,0 +1,82 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct VertOut +{ + float4 vBar; +}; + +struct spvXfbBuffer1 +{ + char _m0_pad[4]; + packed_float4 gl_Position; +}; + +struct spvXfbBuffer2 +{ + char _m0_pad[16]; + float4 vFoo; +}; + +struct spvXfbBuffer3 +{ + float4 vBar; +}; + +static inline __attribute__((always_inline)) +void baz(thread packed_float4& gl_Position) +{ + gl_Position = float4(1.0); +} + +static inline __attribute__((always_inline)) +void foo(thread float4& vFoo) +{ + vFoo = float4(3.0); +} + +static inline __attribute__((always_inline)) +void bar(thread VertOut& _20) +{ + _20.vBar = float4(5.0); +} + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device atomic_uint* spvXfbCounter1 [[buffer(17)]], device spvXfbBuffer1* spvXfb1 [[buffer(13)]], device atomic_uint* spvXfbCounter2 [[buffer(18)]], device spvXfbBuffer2* spvXfb2 [[buffer(14)]], device atomic_uint* spvXfbCounter3 [[buffer(19)]], device spvXfbBuffer3* spvXfb3 [[buffer(15)]]) +{ + spvXfbBuffer1 spvXfbOutput1 = {}; + spvXfbBuffer2 spvXfbOutput2 = {}; + spvXfbBuffer3 spvXfbOutput3 = {}; + VertOut _20 = {}; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + baz(spvXfbOutput1.gl_Position); + foo(spvXfbOutput2.vFoo); + bar(_20); + spvXfbOutput3.vBar = _20.vBar; + uint spvXfbIndex = gl_GlobalInvocationID.y * (spvStageInputSize.x & ~1u) + gl_GlobalInvocationID.x; + uint spvInitOffset1 = atomic_load_explicit(spvXfbCounter1, memory_order_relaxed); + spvXfb1 = reinterpret_cast(reinterpret_cast(spvXfb1) + spvInitOffset1); + if ((gl_GlobalInvocationID.x & 1) || gl_GlobalInvocationID.x < spvStageInputSize.x - 1u) + spvXfb1[spvXfbIndex] = spvXfbOutput1; + uint spvInitOffset2 = atomic_load_explicit(spvXfbCounter2, memory_order_relaxed); + spvXfb2 = reinterpret_cast(reinterpret_cast(spvXfb2) + spvInitOffset2); + if ((gl_GlobalInvocationID.x & 1) || gl_GlobalInvocationID.x < spvStageInputSize.x - 1u) + spvXfb2[spvXfbIndex] = spvXfbOutput2; + uint spvInitOffset3 = atomic_load_explicit(spvXfbCounter3, memory_order_relaxed); + spvXfb3 = reinterpret_cast(reinterpret_cast(spvXfb3) + spvInitOffset3); + if ((gl_GlobalInvocationID.x & 1) || gl_GlobalInvocationID.x < spvStageInputSize.x - 1u) + spvXfb3[spvXfbIndex] = spvXfbOutput3; + threadgroup_barrier(mem_flags::mem_device); + if (all(gl_GlobalInvocationID.xy == 0)) + { + uint spvWritten = (spvStageInputSize.x & ~1u) * spvStageInputSize.y; + atomic_store_explicit(spvXfbCounter1, spvInitOffset1 + sizeof(*spvXfb1) * spvWritten, memory_order_relaxed); + atomic_store_explicit(spvXfbCounter2, spvInitOffset2 + sizeof(*spvXfb2) * spvWritten, memory_order_relaxed); + atomic_store_explicit(spvXfbCounter3, spvInitOffset3 + sizeof(*spvXfb3) * spvWritten, memory_order_relaxed); + } +} + diff --git a/reference/shaders-msl/vert/transform-feedback-decorations.xfb-line-strip.vert b/reference/shaders-msl/vert/transform-feedback-decorations.xfb-line-strip.vert new file mode 100644 index 000000000..3605e9038 --- /dev/null +++ b/reference/shaders-msl/vert/transform-feedback-decorations.xfb-line-strip.vert @@ -0,0 +1,76 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct VertOut +{ + float4 vBar; +}; + +struct spvXfbBuffer1 +{ + char _m0_pad[4]; + packed_float4 gl_Position; +}; + +struct spvXfbBuffer2 +{ + char _m0_pad[16]; + float4 vFoo; +}; + +struct spvXfbBuffer3 +{ + float4 vBar; +}; + +static inline __attribute__((always_inline)) +void foo(thread float4& vFoo) +{ + vFoo = float4(3.0); +} + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device atomic_uint* spvXfbCounter1 [[buffer(17)]], device spvXfbBuffer1* spvXfb1 [[buffer(13)]], device atomic_uint* spvXfbCounter2 [[buffer(18)]], device spvXfbBuffer2* spvXfb2 [[buffer(14)]], device atomic_uint* spvXfbCounter3 [[buffer(19)]], device spvXfbBuffer3* spvXfb3 [[buffer(15)]]) +{ + spvXfbBuffer1 spvXfbOutput1 = {}; + spvXfbBuffer2 spvXfbOutput2 = {}; + spvXfbBuffer3 spvXfbOutput3 = {}; + VertOut _25 = {}; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + spvXfbOutput1.gl_Position = float4(1.0); + foo(spvXfbOutput2.vFoo); + _25.vBar = float4(5.0); + spvXfbOutput3.vBar = _25.vBar; + uint spvXfbIndex = 2 * gl_GlobalInvocationID.y * (spvStageInputSize.x - 1u) + 2 * gl_GlobalInvocationID.x; + uint spvInitOffset1 = atomic_load_explicit(spvXfbCounter1, memory_order_relaxed); + spvXfb1 = reinterpret_cast(reinterpret_cast(spvXfb1) + spvInitOffset1); + if (gl_GlobalInvocationID.x != spvStageInputSize.x - 1u) + spvXfb1[spvXfbIndex] = spvXfbOutput1; + if (gl_GlobalInvocationID.x != 0) + spvXfb1[spvXfbIndex - 1u] = spvXfbOutput1; + uint spvInitOffset2 = atomic_load_explicit(spvXfbCounter2, memory_order_relaxed); + spvXfb2 = reinterpret_cast(reinterpret_cast(spvXfb2) + spvInitOffset2); + if (gl_GlobalInvocationID.x != spvStageInputSize.x - 1u) + spvXfb2[spvXfbIndex] = spvXfbOutput2; + if (gl_GlobalInvocationID.x != 0) + spvXfb2[spvXfbIndex - 1u] = spvXfbOutput2; + uint spvInitOffset3 = atomic_load_explicit(spvXfbCounter3, memory_order_relaxed); + spvXfb3 = reinterpret_cast(reinterpret_cast(spvXfb3) + spvInitOffset3); + if (gl_GlobalInvocationID.x != spvStageInputSize.x - 1u) + spvXfb3[spvXfbIndex] = spvXfbOutput3; + if (gl_GlobalInvocationID.x != 0) + spvXfb3[spvXfbIndex - 1u] = spvXfbOutput3; + threadgroup_barrier(mem_flags::mem_device); + if (all(gl_GlobalInvocationID.xy == 0)) + { + uint spvWritten = 2 * (spvStageInputSize.x - 1u) * spvStageInputSize.y; + atomic_store_explicit(spvXfbCounter1, spvInitOffset1 + sizeof(*spvXfb1) * spvWritten, memory_order_relaxed); + atomic_store_explicit(spvXfbCounter2, spvInitOffset2 + sizeof(*spvXfb2) * spvWritten, memory_order_relaxed); + atomic_store_explicit(spvXfbCounter3, spvInitOffset3 + sizeof(*spvXfb3) * spvWritten, memory_order_relaxed); + } +} + diff --git a/reference/shaders-msl/vert/transform-feedback-decorations.xfb-point-list.vert b/reference/shaders-msl/vert/transform-feedback-decorations.xfb-point-list.vert new file mode 100644 index 000000000..c5514715a --- /dev/null +++ b/reference/shaders-msl/vert/transform-feedback-decorations.xfb-point-list.vert @@ -0,0 +1,67 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct VertOut +{ + float4 vBar; +}; + +struct spvXfbBuffer1 +{ + char _m0_pad[4]; + packed_float4 gl_Position; +}; + +struct spvXfbBuffer2 +{ + char _m0_pad[16]; + float4 vFoo; +}; + +struct spvXfbBuffer3 +{ + float4 vBar; +}; + +static inline __attribute__((always_inline)) +void foo(thread float4& vFoo) +{ + vFoo = float4(3.0); +} + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device atomic_uint* spvXfbCounter1 [[buffer(17)]], device spvXfbBuffer1* spvXfb1 [[buffer(13)]], device atomic_uint* spvXfbCounter2 [[buffer(18)]], device spvXfbBuffer2* spvXfb2 [[buffer(14)]], device atomic_uint* spvXfbCounter3 [[buffer(19)]], device spvXfbBuffer3* spvXfb3 [[buffer(15)]]) +{ + spvXfbBuffer1 spvXfbOutput1 = {}; + spvXfbBuffer2 spvXfbOutput2 = {}; + spvXfbBuffer3 spvXfbOutput3 = {}; + VertOut _25 = {}; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + spvXfbOutput1.gl_Position = float4(1.0); + foo(spvXfbOutput2.vFoo); + _25.vBar = float4(5.0); + spvXfbOutput3.vBar = _25.vBar; + uint spvXfbIndex = gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x; + uint spvInitOffset1 = atomic_load_explicit(spvXfbCounter1, memory_order_relaxed); + spvXfb1 = reinterpret_cast(reinterpret_cast(spvXfb1) + spvInitOffset1); + spvXfb1[spvXfbIndex] = spvXfbOutput1; + uint spvInitOffset2 = atomic_load_explicit(spvXfbCounter2, memory_order_relaxed); + spvXfb2 = reinterpret_cast(reinterpret_cast(spvXfb2) + spvInitOffset2); + spvXfb2[spvXfbIndex] = spvXfbOutput2; + uint spvInitOffset3 = atomic_load_explicit(spvXfbCounter3, memory_order_relaxed); + spvXfb3 = reinterpret_cast(reinterpret_cast(spvXfb3) + spvInitOffset3); + spvXfb3[spvXfbIndex] = spvXfbOutput3; + threadgroup_barrier(mem_flags::mem_device); + if (all(gl_GlobalInvocationID.xy == 0)) + { + uint spvWritten = spvStageInputSize.x * spvStageInputSize.y; + atomic_store_explicit(spvXfbCounter1, spvInitOffset1 + sizeof(*spvXfb1) * spvWritten, memory_order_relaxed); + atomic_store_explicit(spvXfbCounter2, spvInitOffset2 + sizeof(*spvXfb2) * spvWritten, memory_order_relaxed); + atomic_store_explicit(spvXfbCounter3, spvInitOffset3 + sizeof(*spvXfb3) * spvWritten, memory_order_relaxed); + } +} + diff --git a/reference/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-fan.vert b/reference/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-fan.vert new file mode 100644 index 000000000..5832f8b81 --- /dev/null +++ b/reference/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-fan.vert @@ -0,0 +1,101 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct VertOut +{ + float4 vBar; +}; + +struct spvXfbBuffer1 +{ + char _m0_pad[4]; + packed_float4 gl_Position; +}; + +struct spvXfbBuffer2 +{ + char _m0_pad[16]; + float4 vFoo; +}; + +struct spvXfbBuffer3 +{ + float4 vBar; +}; + +static inline __attribute__((always_inline)) +void foo(thread float4& vFoo) +{ + vFoo = float4(3.0); +} + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device atomic_uint* spvXfbCounter1 [[buffer(17)]], device spvXfbBuffer1* spvXfb1 [[buffer(13)]], device atomic_uint* spvXfbCounter2 [[buffer(18)]], device spvXfbBuffer2* spvXfb2 [[buffer(14)]], device atomic_uint* spvXfbCounter3 [[buffer(19)]], device spvXfbBuffer3* spvXfb3 [[buffer(15)]]) +{ + spvXfbBuffer1 spvXfbOutput1 = {}; + spvXfbBuffer2 spvXfbOutput2 = {}; + spvXfbBuffer3 spvXfbOutput3 = {}; + VertOut _25 = {}; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + spvXfbOutput1.gl_Position = float4(1.0); + foo(spvXfbOutput2.vFoo); + _25.vBar = float4(5.0); + spvXfbOutput3.vBar = _25.vBar; + uint spvXfbBaseIndex = 3 * gl_GlobalInvocationID.y * subsat(spvStageInputSize.x, 2u); + uint spvXfbIndex = spvXfbBaseIndex + 3 * gl_GlobalInvocationID.x - 2u; + uint spvInitOffset1 = atomic_load_explicit(spvXfbCounter1, memory_order_relaxed); + spvXfb1 = reinterpret_cast(reinterpret_cast(spvXfb1) + spvInitOffset1); + if (gl_GlobalInvocationID.x == 0) + { + for (uint i = 0; i < subsat(spvStageInputSize.x, 2u); ++i) + spvXfb1[spvXfbBaseIndex + 3 * i] = spvXfbOutput1; + } + else + { + if (gl_GlobalInvocationID.x != spvStageInputSize.x - 1u) + spvXfb1[spvXfbIndex] = spvXfbOutput1; + if (gl_GlobalInvocationID.x != 1) + spvXfb1[spvXfbIndex - 2u] = spvXfbOutput1; + } + uint spvInitOffset2 = atomic_load_explicit(spvXfbCounter2, memory_order_relaxed); + spvXfb2 = reinterpret_cast(reinterpret_cast(spvXfb2) + spvInitOffset2); + if (gl_GlobalInvocationID.x == 0) + { + for (uint i = 0; i < subsat(spvStageInputSize.x, 2u); ++i) + spvXfb2[spvXfbBaseIndex + 3 * i] = spvXfbOutput2; + } + else + { + if (gl_GlobalInvocationID.x != spvStageInputSize.x - 1u) + spvXfb2[spvXfbIndex] = spvXfbOutput2; + if (gl_GlobalInvocationID.x != 1) + spvXfb2[spvXfbIndex - 2u] = spvXfbOutput2; + } + uint spvInitOffset3 = atomic_load_explicit(spvXfbCounter3, memory_order_relaxed); + spvXfb3 = reinterpret_cast(reinterpret_cast(spvXfb3) + spvInitOffset3); + if (gl_GlobalInvocationID.x == 0) + { + for (uint i = 0; i < subsat(spvStageInputSize.x, 2u); ++i) + spvXfb3[spvXfbBaseIndex + 3 * i] = spvXfbOutput3; + } + else + { + if (gl_GlobalInvocationID.x != spvStageInputSize.x - 1u) + spvXfb3[spvXfbIndex] = spvXfbOutput3; + if (gl_GlobalInvocationID.x != 1) + spvXfb3[spvXfbIndex - 2u] = spvXfbOutput3; + } + threadgroup_barrier(mem_flags::mem_device); + if (all(gl_GlobalInvocationID.xy == 0)) + { + uint spvWritten = 3 * subsat(spvStageInputSize.x, 2u) * spvStageInputSize.y; + atomic_store_explicit(spvXfbCounter1, spvInitOffset1 + sizeof(*spvXfb1) * spvWritten, memory_order_relaxed); + atomic_store_explicit(spvXfbCounter2, spvInitOffset2 + sizeof(*spvXfb2) * spvWritten, memory_order_relaxed); + atomic_store_explicit(spvXfbCounter3, spvInitOffset3 + sizeof(*spvXfb3) * spvWritten, memory_order_relaxed); + } +} + diff --git a/reference/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-list.vert b/reference/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-list.vert new file mode 100644 index 000000000..3457d6614 --- /dev/null +++ b/reference/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-list.vert @@ -0,0 +1,70 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct VertOut +{ + float4 vBar; +}; + +struct spvXfbBuffer1 +{ + char _m0_pad[4]; + packed_float4 gl_Position; +}; + +struct spvXfbBuffer2 +{ + char _m0_pad[16]; + float4 vFoo; +}; + +struct spvXfbBuffer3 +{ + float4 vBar; +}; + +static inline __attribute__((always_inline)) +void foo(thread float4& vFoo) +{ + vFoo = float4(3.0); +} + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device atomic_uint* spvXfbCounter1 [[buffer(17)]], device spvXfbBuffer1* spvXfb1 [[buffer(13)]], device atomic_uint* spvXfbCounter2 [[buffer(18)]], device spvXfbBuffer2* spvXfb2 [[buffer(14)]], device atomic_uint* spvXfbCounter3 [[buffer(19)]], device spvXfbBuffer3* spvXfb3 [[buffer(15)]]) +{ + spvXfbBuffer1 spvXfbOutput1 = {}; + spvXfbBuffer2 spvXfbOutput2 = {}; + spvXfbBuffer3 spvXfbOutput3 = {}; + VertOut _25 = {}; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + spvXfbOutput1.gl_Position = float4(1.0); + foo(spvXfbOutput2.vFoo); + _25.vBar = float4(5.0); + spvXfbOutput3.vBar = _25.vBar; + uint spvXfbIndex = gl_GlobalInvocationID.y * (spvStageInputSize.x - spvStageInputSize.x % 3u) + gl_GlobalInvocationID.x; + uint spvInitOffset1 = atomic_load_explicit(spvXfbCounter1, memory_order_relaxed); + spvXfb1 = reinterpret_cast(reinterpret_cast(spvXfb1) + spvInitOffset1); + if ((gl_GlobalInvocationID.x % 3u == 2) || gl_GlobalInvocationID.x + 2 < spvStageInputSize.x) + spvXfb1[spvXfbIndex] = spvXfbOutput1; + uint spvInitOffset2 = atomic_load_explicit(spvXfbCounter2, memory_order_relaxed); + spvXfb2 = reinterpret_cast(reinterpret_cast(spvXfb2) + spvInitOffset2); + if ((gl_GlobalInvocationID.x % 3u == 2) || gl_GlobalInvocationID.x + 2 < spvStageInputSize.x) + spvXfb2[spvXfbIndex] = spvXfbOutput2; + uint spvInitOffset3 = atomic_load_explicit(spvXfbCounter3, memory_order_relaxed); + spvXfb3 = reinterpret_cast(reinterpret_cast(spvXfb3) + spvInitOffset3); + if ((gl_GlobalInvocationID.x % 3u == 2) || gl_GlobalInvocationID.x + 2 < spvStageInputSize.x) + spvXfb3[spvXfbIndex] = spvXfbOutput3; + threadgroup_barrier(mem_flags::mem_device); + if (all(gl_GlobalInvocationID.xy == 0)) + { + uint spvWritten = (spvStageInputSize.x - spvStageInputSize.x % 3u) * spvStageInputSize.y; + atomic_store_explicit(spvXfbCounter1, spvInitOffset1 + sizeof(*spvXfb1) * spvWritten, memory_order_relaxed); + atomic_store_explicit(spvXfbCounter2, spvInitOffset2 + sizeof(*spvXfb2) * spvWritten, memory_order_relaxed); + atomic_store_explicit(spvXfbCounter3, spvInitOffset3 + sizeof(*spvXfb3) * spvWritten, memory_order_relaxed); + } +} + diff --git a/reference/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-strip.vert b/reference/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-strip.vert new file mode 100644 index 000000000..7fe9a90bd --- /dev/null +++ b/reference/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-strip.vert @@ -0,0 +1,82 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct VertOut +{ + float4 vBar; +}; + +struct spvXfbBuffer1 +{ + char _m0_pad[4]; + packed_float4 gl_Position; +}; + +struct spvXfbBuffer2 +{ + char _m0_pad[16]; + float4 vFoo; +}; + +struct spvXfbBuffer3 +{ + float4 vBar; +}; + +static inline __attribute__((always_inline)) +void foo(thread float4& vFoo) +{ + vFoo = float4(3.0); +} + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device atomic_uint* spvXfbCounter1 [[buffer(17)]], device spvXfbBuffer1* spvXfb1 [[buffer(13)]], device atomic_uint* spvXfbCounter2 [[buffer(18)]], device spvXfbBuffer2* spvXfb2 [[buffer(14)]], device atomic_uint* spvXfbCounter3 [[buffer(19)]], device spvXfbBuffer3* spvXfb3 [[buffer(15)]]) +{ + spvXfbBuffer1 spvXfbOutput1 = {}; + spvXfbBuffer2 spvXfbOutput2 = {}; + spvXfbBuffer3 spvXfbOutput3 = {}; + VertOut _25 = {}; + if (any(gl_GlobalInvocationID >= spvStageInputSize)) + return; + spvXfbOutput1.gl_Position = float4(1.0); + foo(spvXfbOutput2.vFoo); + _25.vBar = float4(5.0); + spvXfbOutput3.vBar = _25.vBar; + uint spvXfbIndex = 3 * gl_GlobalInvocationID.y * subsat(spvStageInputSize.x, 2u) + 3 * gl_GlobalInvocationID.x; + uint spvInitOffset1 = atomic_load_explicit(spvXfbCounter1, memory_order_relaxed); + spvXfb1 = reinterpret_cast(reinterpret_cast(spvXfb1) + spvInitOffset1); + if (gl_GlobalInvocationID.x + 2 < spvStageInputSize.x) + spvXfb1[spvXfbIndex] = spvXfbOutput1; + if (gl_GlobalInvocationID.x != 0 && gl_GlobalInvocationID.x != spvStageInputSize.x - 1u) + spvXfb1[spvXfbIndex - 1u - (gl_GlobalInvocationID.x & 1u)] = spvXfbOutput1; + if (gl_GlobalInvocationID.x > 1) + spvXfb1[spvXfbIndex - 4u - (gl_GlobalInvocationID.x & 1u)] = spvXfbOutput1; + uint spvInitOffset2 = atomic_load_explicit(spvXfbCounter2, memory_order_relaxed); + spvXfb2 = reinterpret_cast(reinterpret_cast(spvXfb2) + spvInitOffset2); + if (gl_GlobalInvocationID.x + 2 < spvStageInputSize.x) + spvXfb2[spvXfbIndex] = spvXfbOutput2; + if (gl_GlobalInvocationID.x != 0 && gl_GlobalInvocationID.x != spvStageInputSize.x - 1u) + spvXfb2[spvXfbIndex - 1u - (gl_GlobalInvocationID.x & 1u)] = spvXfbOutput2; + if (gl_GlobalInvocationID.x > 1) + spvXfb2[spvXfbIndex - 4u - (gl_GlobalInvocationID.x & 1u)] = spvXfbOutput2; + uint spvInitOffset3 = atomic_load_explicit(spvXfbCounter3, memory_order_relaxed); + spvXfb3 = reinterpret_cast(reinterpret_cast(spvXfb3) + spvInitOffset3); + if (gl_GlobalInvocationID.x + 2 < spvStageInputSize.x) + spvXfb3[spvXfbIndex] = spvXfbOutput3; + if (gl_GlobalInvocationID.x != 0 && gl_GlobalInvocationID.x != spvStageInputSize.x - 1u) + spvXfb3[spvXfbIndex - 1u - (gl_GlobalInvocationID.x & 1u)] = spvXfbOutput3; + if (gl_GlobalInvocationID.x > 1) + spvXfb3[spvXfbIndex - 4u - (gl_GlobalInvocationID.x & 1u)] = spvXfbOutput3; + threadgroup_barrier(mem_flags::mem_device); + if (all(gl_GlobalInvocationID.xy == 0)) + { + uint spvWritten = 3 * subsat(spvStageInputSize.x, 2u) * spvStageInputSize.y; + atomic_store_explicit(spvXfbCounter1, spvInitOffset1 + sizeof(*spvXfb1) * spvWritten, memory_order_relaxed); + atomic_store_explicit(spvXfbCounter2, spvInitOffset2 + sizeof(*spvXfb2) * spvWritten, memory_order_relaxed); + atomic_store_explicit(spvXfbCounter3, spvInitOffset3 + sizeof(*spvXfb3) * spvWritten, memory_order_relaxed); + } +} + diff --git a/shaders-msl/vert/transform-feedback-decorations.xfb-line-list.vert b/shaders-msl/vert/transform-feedback-decorations.xfb-line-list.vert new file mode 100644 index 000000000..4803baa18 --- /dev/null +++ b/shaders-msl/vert/transform-feedback-decorations.xfb-line-list.vert @@ -0,0 +1,35 @@ +#version 450 +layout(xfb_stride = 32, xfb_offset = 16, xfb_buffer = 2, location = 0) out vec4 vFoo; + +layout(xfb_buffer = 1, xfb_stride = 20) out gl_PerVertex +{ + layout(xfb_offset = 4) vec4 gl_Position; + float gl_PointSize; +}; + +layout(xfb_buffer = 3) out VertOut +{ + layout(xfb_stride = 16, xfb_offset = 0, location = 1) vec4 vBar; +}; + +void foo() +{ + vFoo = vec4(3.0); +} + +void bar() +{ + vBar = vec4(5.0); +} + +void baz() +{ + gl_Position = vec4(1.0); +} + +void main() +{ + baz(); + foo(); + bar(); +} diff --git a/shaders-msl/vert/transform-feedback-decorations.xfb-line-strip.vert b/shaders-msl/vert/transform-feedback-decorations.xfb-line-strip.vert new file mode 100644 index 000000000..88f77fcbd --- /dev/null +++ b/shaders-msl/vert/transform-feedback-decorations.xfb-line-strip.vert @@ -0,0 +1,25 @@ +#version 450 +layout(xfb_stride = 32, xfb_offset = 16, xfb_buffer = 2, location = 0) out vec4 vFoo; + +layout(xfb_buffer = 1, xfb_stride = 20) out gl_PerVertex +{ + layout(xfb_offset = 4) vec4 gl_Position; + float gl_PointSize; +}; + +layout(xfb_buffer = 3) out VertOut +{ + layout(xfb_stride = 16, xfb_offset = 0, location = 1) vec4 vBar; +}; + +void foo() +{ + vFoo = vec4(3.0); +} + +void main() +{ + gl_Position = vec4(1.0); + foo(); + vBar = vec4(5.0); +} diff --git a/shaders-msl/vert/transform-feedback-decorations.xfb-point-list.vert b/shaders-msl/vert/transform-feedback-decorations.xfb-point-list.vert new file mode 100644 index 000000000..88f77fcbd --- /dev/null +++ b/shaders-msl/vert/transform-feedback-decorations.xfb-point-list.vert @@ -0,0 +1,25 @@ +#version 450 +layout(xfb_stride = 32, xfb_offset = 16, xfb_buffer = 2, location = 0) out vec4 vFoo; + +layout(xfb_buffer = 1, xfb_stride = 20) out gl_PerVertex +{ + layout(xfb_offset = 4) vec4 gl_Position; + float gl_PointSize; +}; + +layout(xfb_buffer = 3) out VertOut +{ + layout(xfb_stride = 16, xfb_offset = 0, location = 1) vec4 vBar; +}; + +void foo() +{ + vFoo = vec4(3.0); +} + +void main() +{ + gl_Position = vec4(1.0); + foo(); + vBar = vec4(5.0); +} diff --git a/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-fan.vert b/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-fan.vert new file mode 100644 index 000000000..88f77fcbd --- /dev/null +++ b/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-fan.vert @@ -0,0 +1,25 @@ +#version 450 +layout(xfb_stride = 32, xfb_offset = 16, xfb_buffer = 2, location = 0) out vec4 vFoo; + +layout(xfb_buffer = 1, xfb_stride = 20) out gl_PerVertex +{ + layout(xfb_offset = 4) vec4 gl_Position; + float gl_PointSize; +}; + +layout(xfb_buffer = 3) out VertOut +{ + layout(xfb_stride = 16, xfb_offset = 0, location = 1) vec4 vBar; +}; + +void foo() +{ + vFoo = vec4(3.0); +} + +void main() +{ + gl_Position = vec4(1.0); + foo(); + vBar = vec4(5.0); +} diff --git a/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-list.vert b/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-list.vert new file mode 100644 index 000000000..88f77fcbd --- /dev/null +++ b/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-list.vert @@ -0,0 +1,25 @@ +#version 450 +layout(xfb_stride = 32, xfb_offset = 16, xfb_buffer = 2, location = 0) out vec4 vFoo; + +layout(xfb_buffer = 1, xfb_stride = 20) out gl_PerVertex +{ + layout(xfb_offset = 4) vec4 gl_Position; + float gl_PointSize; +}; + +layout(xfb_buffer = 3) out VertOut +{ + layout(xfb_stride = 16, xfb_offset = 0, location = 1) vec4 vBar; +}; + +void foo() +{ + vFoo = vec4(3.0); +} + +void main() +{ + gl_Position = vec4(1.0); + foo(); + vBar = vec4(5.0); +} diff --git a/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-strip.vert b/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-strip.vert new file mode 100644 index 000000000..88f77fcbd --- /dev/null +++ b/shaders-msl/vert/transform-feedback-decorations.xfb-triangle-strip.vert @@ -0,0 +1,25 @@ +#version 450 +layout(xfb_stride = 32, xfb_offset = 16, xfb_buffer = 2, location = 0) out vec4 vFoo; + +layout(xfb_buffer = 1, xfb_stride = 20) out gl_PerVertex +{ + layout(xfb_offset = 4) vec4 gl_Position; + float gl_PointSize; +}; + +layout(xfb_buffer = 3) out VertOut +{ + layout(xfb_stride = 16, xfb_offset = 0, location = 1) vec4 vBar; +}; + +void foo() +{ + vFoo = vec4(3.0); +} + +void main() +{ + gl_Position = vec4(1.0); + foo(); + vBar = vec4(5.0); +} diff --git a/test_shaders.py b/test_shaders.py index 048d0f6fc..b6994f6d3 100755 --- a/test_shaders.py +++ b/test_shaders.py @@ -373,6 +373,24 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths): msl_args.append('ClipDistance') if '.relax-nan.' in shader: msl_args.append('--relax-nan-checks') + if '.xfb-point-list.' in shader: + msl_args.append('--msl-xfb-primitive-type') + msl_args.append('point-list') + if '.xfb-line-list.' in shader: + msl_args.append('--msl-xfb-primitive-type') + msl_args.append('line-list') + if '.xfb-line-strip.' in shader: + msl_args.append('--msl-xfb-primitive-type') + msl_args.append('line-strip') + if '.xfb-triangle-list.' in shader: + msl_args.append('--msl-xfb-primitive-type') + msl_args.append('triangle-list') + if '.xfb-triangle-strip.' in shader: + msl_args.append('--msl-xfb-primitive-type') + msl_args.append('triangle-strip') + if '.xfb-triangle-fan.' in shader: + msl_args.append('--msl-xfb-primitive-type') + msl_args.append('triangle-fan') subprocess.check_call(msl_args) From 64fa0b6d1bafb3edaf9ab3067934da36eac57730 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Mon, 18 Sep 2023 22:29:43 -0700 Subject: [PATCH 42/48] Attempt to fix MSVC build. --- spirv_msl.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 4c52fd872..8ab46adc2 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -3850,7 +3850,8 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) // We still rely on the block being declared as a variable. Make sure that happens. if (all_captured && !is_builtin) { - get(ir.default_entry_point).add_local_variable(var_id); + auto &entry_point = get(ir.default_entry_point); + entry_point.add_local_variable(var_id); vars_needing_early_declaration.push_back(var_id); } } From dada58818d8f4db6d5160a201156035312d25c05 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Tue, 19 Sep 2023 01:08:19 -0700 Subject: [PATCH 43/48] Try again to work around MSVC brokenness. --- spirv_msl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 8ab46adc2..91186c974 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -3762,6 +3762,7 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) SmallVector vars; bool incl_builtins = storage == StorageClassOutput || is_tessellation_shader(); bool has_seen_barycentric = false; + auto &entry_point = get(ir.default_entry_point); InterfaceBlockMeta meta; @@ -3850,7 +3851,6 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) // We still rely on the block being declared as a variable. Make sure that happens. if (all_captured && !is_builtin) { - auto &entry_point = get(ir.default_entry_point); entry_point.add_local_variable(var_id); vars_needing_early_declaration.push_back(var_id); } From adb3a7bd5b9d2b9ae40fd5efaf8bccc2fb106449 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Tue, 19 Sep 2023 03:05:15 -0700 Subject: [PATCH 44/48] Attempt to work around weird brokenness that only happens... ...on the builder and not for me. --- spirv_msl.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/spirv_msl.hpp b/spirv_msl.hpp index ce4b477d5..0173b43ab 100644 --- a/spirv_msl.hpp +++ b/spirv_msl.hpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include From fec7607c66c73bd30e73021e2686501dbe4e875e Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Tue, 19 Sep 2023 12:48:46 -0700 Subject: [PATCH 45/48] Try again to get the stupid compiler on the builder to see that std::hash is supposed to just work. --- spirv_cross_error_handling.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/spirv_cross_error_handling.hpp b/spirv_cross_error_handling.hpp index e96ebb9a7..0e0a4c0fc 100644 --- a/spirv_cross_error_handling.hpp +++ b/spirv_cross_error_handling.hpp @@ -26,6 +26,7 @@ #include #include +#include #include #ifndef SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS #include From 039330249325ef6cff08e495597d657403568aef Mon Sep 17 00:00:00 2001 From: gpx1000 Date: Tue, 19 Sep 2023 14:34:54 -0700 Subject: [PATCH 46/48] Testing hypothesis that C++11 doesn't support enum as a key for an unordered_map. --- spirv_msl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spirv_msl.hpp b/spirv_msl.hpp index 0173b43ab..3bf7b3c4e 100644 --- a/spirv_msl.hpp +++ b/spirv_msl.hpp @@ -1241,7 +1241,7 @@ class CompilerMSL : public CompilerGLSL VariableID xfb_buffers[kMaxXfbBuffers]; VariableID xfb_locals[kMaxXfbBuffers]; uint32_t xfb_strides[kMaxXfbBuffers]; - std::unordered_map xfb_captured_builtins; + std::unordered_map xfb_captured_builtins; std::unordered_set xfb_captured_outputs; std::unordered_set xfb_packed_outputs; std::unordered_set xfb_packed_builtins; From 739a140eb5364c84ba64b641592034b26a2f2413 Mon Sep 17 00:00:00 2001 From: gpx1000 Date: Tue, 19 Sep 2023 16:30:08 -0700 Subject: [PATCH 47/48] hypothesis was correct for unordered_map stands to reason unordered_set also doesn't support enum in C++11 for hash key --- spirv_msl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spirv_msl.hpp b/spirv_msl.hpp index 3bf7b3c4e..04daf97fe 100644 --- a/spirv_msl.hpp +++ b/spirv_msl.hpp @@ -1244,7 +1244,7 @@ class CompilerMSL : public CompilerGLSL std::unordered_map xfb_captured_builtins; std::unordered_set xfb_captured_outputs; std::unordered_set xfb_packed_outputs; - std::unordered_set xfb_packed_builtins; + std::unordered_set xfb_packed_builtins; SmallVector entry_point_bindings; From 8bcfd326e822bc01b9c02984f4c3f56afa121de2 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Sat, 16 Dec 2023 18:38:03 -0800 Subject: [PATCH 48/48] Unfinished support for XFB+tessellation. Only gets the base of the primitive so far. --- spirv_msl.cpp | 33 ++++++++++++++++++++++++++++++++- spirv_msl.hpp | 5 ++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 98698f039..8b2e88f49 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -938,6 +938,18 @@ void CompilerMSL::build_implicit_builtins() dynamic_offsets_buffer_id = var_id; } + if (is_tese_shader() && needs_transform_feedback()) + { + uint32_t var_id = build_constant_uint_array_pointer(); + set_name(var_id, "spvPatchVertexCounts"); + // This should never match anything. + set_decoration(var_id, DecorationDescriptorSet, ~(6u)); + set_decoration(var_id, DecorationBinding, msl_options.tese_patch_vertex_counts_buffer_index); + set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, + msl_options.tese_patch_vertex_counts_buffer_index); + patch_vertex_counts_buffer_id = var_id; + } + // If we're returning a struct from a vertex-like entry point, we must return a position attribute. bool need_position = (get_execution_model() == ExecutionModelVertex || is_tese_shader()) && !capture_output_to_buffer && !get_is_rasterization_disabled() && @@ -1561,6 +1573,8 @@ string CompilerMSL::compile() add_active_interface_variable(view_mask_buffer_id); if (dynamic_offsets_buffer_id) add_active_interface_variable(dynamic_offsets_buffer_id); + if (patch_vertex_counts_buffer_id) + add_active_interface_variable(patch_vertex_counts_buffer_id); if (builtin_layer_id) add_active_interface_variable(builtin_layer_id); if (builtin_dispatch_base_id && !msl_options.supports_msl_version(1, 2)) @@ -14152,7 +14166,8 @@ void CompilerMSL::fix_up_shader_inputs_outputs() [=]() { string index_expr; - switch (msl_options.xfb_primitive_type) + auto prim_type = is_tese_shader() ? Options::PrimitiveType::PatchList : msl_options.xfb_primitive_type; + switch (prim_type) { case Options::PrimitiveType::PointList: index_expr = join(to_expression(builtin_invocation_id_id), ".y * ", @@ -14297,6 +14312,22 @@ void CompilerMSL::fix_up_shader_inputs_outputs() to_expression(builtin_stage_input_size_id), ".x, 2u);"); index_expr = join("spvXfbBaseIndex + 3 * ", to_expression(builtin_invocation_id_id), ".x - 2u"); break; + case Options::PrimitiveType::PatchList: + // This is particularly nasty, because a variable number of vertices may be generated + // from each patch. Therefore, we must maintain a count of vertices per patch and + // sum the entire array up to our patch to figure out the base. Yes, this will slow + // down the later patches. + // But wait, there's more! We also need to figure out which vertex and triangle in the + // patch this is, to identify where to write the transform feedback data. The only + // identifying information we have is the tessellation coordinates (barycentric for + // triangles, normalized for quads). Therefore, we have to perform some sort of + // mathematical transformation on the tessellation coordinate to derive an index, and + // what's more, we have to do it in a way that the resulting triangles' vertices are emitted + // in the buffer in winding order, and that each vertex is emitted once *per triangle*, as the + // spec requires. + statement("uint spvXfbBaseIndex = 0;"); + statement("for (uint i = 0; i < ", to_expression(builtin_primitive_id_id), "; ++i)"); + statement(" spvXfbBaseIndex += ", to_name(patch_vertex_counts_buffer_id), "[i];"); case Options::PrimitiveType::Dynamic: default: SPIRV_CROSS_THROW("Primitive type not yet supported for transform feedback."); diff --git a/spirv_msl.hpp b/spirv_msl.hpp index b5659f24b..6a6aab168 100644 --- a/spirv_msl.hpp +++ b/spirv_msl.hpp @@ -314,6 +314,7 @@ class CompilerMSL : public CompilerGLSL uint32_t swizzle_buffer_index = 30; uint32_t indirect_params_buffer_index = 29; uint32_t shader_output_buffer_index = 28; + uint32_t tese_patch_vertex_counts_buffer_index = 27; uint32_t shader_patch_output_buffer_index = 27; uint32_t shader_tess_factor_buffer_index = 26; uint32_t buffer_size_buffer_index = 25; @@ -538,7 +539,7 @@ class CompilerMSL : public CompilerGLSL LineStripWithAdjacency, TriangleListWithAdjacency, TriangleStripWithAdjacency, - // 10 reserved for patch list + PatchList, }; // Indicates the kind of input primitive. Only needed for vertex shaders that have the @@ -1129,10 +1130,12 @@ class CompilerMSL : public CompilerGLSL uint32_t builtin_stage_input_size_id = 0; uint32_t builtin_local_invocation_index_id = 0; uint32_t builtin_workgroup_size_id = 0; + uint32_t builtin_tess_coord_id = 0; uint32_t swizzle_buffer_id = 0; uint32_t buffer_size_buffer_id = 0; uint32_t view_mask_buffer_id = 0; uint32_t dynamic_offsets_buffer_id = 0; + uint32_t patch_vertex_counts_buffer_id = 0; uint32_t uint_type_id = 0; uint32_t argument_buffer_padding_buffer_type_id = 0; uint32_t argument_buffer_padding_image_type_id = 0;