diff --git a/docs/user-guide/a3-02-reference-capability-atoms.md b/docs/user-guide/a3-02-reference-capability-atoms.md index e7a9b1bb41..a70a9f88c8 100644 --- a/docs/user-guide/a3-02-reference-capability-atoms.md +++ b/docs/user-guide/a3-02-reference-capability-atoms.md @@ -363,6 +363,12 @@ Extensions `SPV_EXT_demote_to_helper_invocation` > Represents the SPIR-V extension for demoting to helper invocation. +`SPV_KHR_maximal_reconvergence` +> Represents the SPIR-V extension for maximal reconvergence. + +`SPV_KHR_quad_control` +> Represents the SPIR-V extension for quad group control. + `SPV_KHR_fragment_shader_barycentric` > Represents the SPIR-V extension for fragment shader barycentric. @@ -503,6 +509,12 @@ Extensions `spvDemoteToHelperInvocation` > Represents the SPIR-V capability for demoting to helper invocation. +`spvMaximalReconvergenceKHR` +> Represents the SPIR-V capability for maximal reconvergence. + +`spvQuadControlKHR` +> Represents the SPIR-V capability for quad group control. + `GL_EXT_buffer_reference` > Represents the GL_EXT_buffer_reference extension. @@ -515,6 +527,12 @@ Extensions `GL_EXT_demote_to_helper_invocation` > Represents the GL_EXT_demote_to_helper_invocation extension. +`GL_EXT_maximal_reconvergence` +> Represents the GL_EXT_maximal_reconvergence extension. + +`GL_EXT_shader_quad_control` +> Represents the GL_EXT_shader_quad_control extension. + `GL_EXT_fragment_shader_barycentric` > Represents the GL_EXT_fragment_shader_barycentric extension. @@ -1078,6 +1096,9 @@ Compound Capabilities `helper_lane` > Capabilities required to enable helper-lane demotion +`quad_control` +> Capabilities required to enable quad group control + `breakpoint` > Capabilities required to enable shader breakpoints diff --git a/external/slang-rhi b/external/slang-rhi index 655d955274..37ecd14caf 160000 --- a/external/slang-rhi +++ b/external/slang-rhi @@ -1 +1 @@ -Subproject commit 655d955274abd24906d84543b49460e6094b6652 +Subproject commit 37ecd14cafe915898497c38ccf2a91bb9ee41504 diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang index adb7470ddc..bf53da88bc 100644 --- a/source/slang/core.meta.slang +++ b/source/slang/core.meta.slang @@ -3226,6 +3226,12 @@ __Addr __getLegalizedSPIRVGlobalParamAddr(T val); __intrinsic_op($(kIROp_RequireComputeDerivative)) void __requireComputeDerivative(); +__intrinsic_op($(kIROp_RequireMaximallyReconverges)) +void __requireMaximallyReconverges(); + +__intrinsic_op($(kIROp_RequireQuadDerivatives)) +void __requireQuadDerivatives(); + //@ public: /// @category misc_types enum MemoryOrder @@ -3959,3 +3965,18 @@ attribute_syntax [DerivativeGroupQuad] : DerivativeGroupQuadAttribute; /// effect on other targets. __attributeTarget(FuncDecl) attribute_syntax [DerivativeGroupLinear] : DerivativeGroupLinearAttribute; + +/// Emits `MaximallyReconvergesKHR` execution mode when producing SPIR-V. +/// This attribute has no effect on other targets. +__attributeTarget(FuncDecl) +attribute_syntax [MaximallyReconverges] : MaximallyReconvergesAttribute; + +/// Emits `QuadDerivativesKHR` execution mode when producing SPIR-V. +/// This attribute has no effect on other targets. +__attributeTarget(FuncDecl) +attribute_syntax [QuadDerivatives] : QuadDerivativesAttribute; + +/// Emits `RequireFullQuadsKHR` execution mode when producing SPIR-V. +/// This attribute has no effect on other targets. +__attributeTarget(FuncDecl) +attribute_syntax [RequireFullQuads] : RequireFullQuadsAttribute; diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index d5b59427f3..58ab10e241 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -15466,6 +15466,63 @@ bool IsHelperLane() } } +// +// Quad Control intrinsics +// +// For SPIRV and GLSL targets, the behavior is taken from Vulkan's `VK_KHR_shader_quad_control` spec. +// QuadAny/QuadAll will map to OpGroupNonUniformQuadAny/All, and using either of these functions will +// result in the QuadDerivativesKHR execution mode being used. If MaximallyReconvergesKHR is not already +// specified by other means, it will be added when using either of QuadAny/QuadAll, +// + +//@public: +/// Returns true if `expr` is true in any lane of the current quad. +__glsl_extension(GL_KHR_shader_subgroup_vote) +__glsl_extension(GL_EXT_maximal_reconvergence) +__glsl_extension(GL_EXT_shader_quad_control) +[ForceInline] +[require(glsl_hlsl_metal_spirv, quad_control)] +bool QuadAny(bool expr) +{ + __requireMaximallyReconverges(); + __requireQuadDerivatives(); + __target_switch + { + case hlsl: __intrinsic_asm "QuadAny"; + case glsl: __intrinsic_asm "subgroupQuadAny"; + case metal: __intrinsic_asm "quad_any"; + case spirv: + return spirv_asm + { + result:$$bool = OpGroupNonUniformQuadAnyKHR $expr; + }; + } +} + +//@public: +/// Returns true if `expr` is true in all lanes of the current quad. +__glsl_extension(GL_KHR_shader_subgroup_vote) +__glsl_extension(GL_EXT_maximal_reconvergence) +__glsl_extension(GL_EXT_shader_quad_control) +[ForceInline] +[require(glsl_hlsl_metal_spirv, quad_control)] +bool QuadAll(bool expr) +{ + __requireMaximallyReconverges(); + __requireQuadDerivatives(); + __target_switch + { + case hlsl: __intrinsic_asm "QuadAll"; + case glsl: __intrinsic_asm "subgroupQuadAll"; + case metal: __intrinsic_asm "quad_all"; + case spirv: + return spirv_asm + { + result:$$bool = OpGroupNonUniformQuadAllKHR $expr; + }; + } +} + // `typedef`s to help with the fact that HLSL has been sorta-kinda case insensitive at various points //@hidden: typedef Texture2D texture2D; diff --git a/source/slang/slang-ast-modifier.h b/source/slang/slang-ast-modifier.h index f5dd86df15..0ff5785366 100644 --- a/source/slang/slang-ast-modifier.h +++ b/source/slang/slang-ast-modifier.h @@ -1656,6 +1656,21 @@ class DerivativeGroupLinearAttribute : public Attribute SLANG_AST_CLASS(DerivativeGroupLinearAttribute) }; +class MaximallyReconvergesAttribute : public Attribute +{ + SLANG_AST_CLASS(MaximallyReconvergesAttribute) +}; + +class QuadDerivativesAttribute : public Attribute +{ + SLANG_AST_CLASS(QuadDerivativesAttribute) +}; + +class RequireFullQuadsAttribute : public Attribute +{ + SLANG_AST_CLASS(RequireFullQuadsAttribute) +}; + /// A `[payload]` attribute indicates that a `struct` type will be used as /// a ray payload for `TraceRay()` calls, and thus also as input/output /// for shaders in the ray tracing pipeline that might be invoked for diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef index c8a66448d3..4f6357779c 100644 --- a/source/slang/slang-capabilities.capdef +++ b/source/slang/slang-capabilities.capdef @@ -466,6 +466,14 @@ def SPV_EXT_mesh_shader : _spirv_1_4; /// [EXT] def SPV_EXT_demote_to_helper_invocation : _spirv_1_4; +/// Represents the SPIR-V extension for maximal reconvergence. +/// [EXT] +def SPV_KHR_maximal_reconvergence : _spirv_1_0; + +/// Represents the SPIR-V extension for quad group control. +/// [EXT] +def SPV_KHR_quad_control : _spirv_1_3; + /// Represents the SPIR-V extension for fragment shader barycentric. /// [EXT] def SPV_KHR_fragment_shader_barycentric : _spirv_1_0; @@ -654,6 +662,14 @@ def spvDemoteToHelperInvocationEXT : SPV_EXT_demote_to_helper_invocation; /// [EXT] def spvDemoteToHelperInvocation : spvDemoteToHelperInvocationEXT; +/// Represents the SPIR-V capability for maximal reconvergence. +/// [EXT] +def spvMaximalReconvergenceKHR : SPV_KHR_maximal_reconvergence; + +/// Represents the SPIR-V capability for quad group control. +/// [EXT] +def spvQuadControlKHR : SPV_KHR_quad_control; + // The following capabilities all pertain to how ray tracing shaders are translated // to GLSL, where there are two different extensions that can provide the core // functionality of `TraceRay` and the related operations. @@ -691,6 +707,8 @@ def _GL_EXT_shader_image_load_store : _GLSL_130; def _GL_EXT_shader_realtime_clock : glsl; def _GL_EXT_texture_query_lod : glsl; def _GL_EXT_texture_shadow_lod : _GLSL_130; +def _GL_EXT_maximal_reconvergence : _GLSL_140; +def _GL_EXT_shader_quad_control : _GLSL_140; def _GL_ARB_derivative_control : _GLSL_400; def _GL_ARB_fragment_shader_interlock : _GLSL_450; @@ -746,6 +764,14 @@ alias GL_EXT_debug_printf = _GL_EXT_debug_printf | SPV_KHR_non_semantic_info; /// [EXT] alias GL_EXT_demote_to_helper_invocation = _GL_EXT_demote_to_helper_invocation | spvDemoteToHelperInvocationEXT; +/// Represents the GL_EXT_maximal_reconvergence extension. +/// [EXT] +alias GL_EXT_maximal_reconvergence = _GL_EXT_maximal_reconvergence | spvMaximalReconvergenceKHR; + +/// Represents the GL_EXT_shader_quad_control extension. +/// [EXT] +alias GL_EXT_shader_quad_control = _GL_EXT_shader_quad_control | spvQuadControlKHR; + /// Represents the GL_EXT_fragment_shader_barycentric extension. /// [EXT] alias GL_EXT_fragment_shader_barycentric = _GL_EXT_fragment_shader_barycentric | spvFragmentBarycentricKHR; @@ -1925,6 +1951,13 @@ alias helper_lane = _sm_6_0 + fragment | metal + fragment ; +/// Capabilities required to enable quad group control +/// [Compound] +alias quad_control = _sm_6_7 + | GL_EXT_shader_quad_control + GL_EXT_maximal_reconvergence + GL_KHR_shader_subgroup_vote + | metal + ; + /// Capabilities required to enable shader breakpoints /// [Compound] alias breakpoint = GL_EXT_debug_printf | hlsl | _cuda_sm_8_0 | cpp; diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp index 64cc9969c4..d4b3cac275 100644 --- a/source/slang/slang-emit-c-like.cpp +++ b/source/slang/slang-emit-c-like.cpp @@ -129,14 +129,6 @@ void CLikeSourceEmitter::emitPreModuleImpl() m_writer->emit("\n"); } } -void CLikeSourceEmitter::emitPostModuleImpl() -{ - if (m_requiredAfter.requireComputeDerivatives.getLength() > 0) - { - m_writer->emit(m_requiredAfter.requireComputeDerivatives); - m_writer->emit("\n"); - } -} // // Types diff --git a/source/slang/slang-emit-c-like.h b/source/slang/slang-emit-c-like.h index dd8e276740..a60b2bec05 100644 --- a/source/slang/slang-emit-c-like.h +++ b/source/slang/slang-emit-c-like.h @@ -470,7 +470,6 @@ class CLikeSourceEmitter : public SourceEmitterBase void emitFrontMatter(TargetRequest* targetReq) { emitFrontMatterImpl(targetReq); } void emitPreModule() { emitPreModuleImpl(); } - void emitPostModule() { emitPostModuleImpl(); } void emitModule(IRModule* module, DiagnosticSink* sink) { m_irModule = module; @@ -541,7 +540,6 @@ class CLikeSourceEmitter : public SourceEmitterBase /// For example on targets that don't have built in vector/matrix support, this is where /// the appropriate generated declarations occur. virtual void emitPreModuleImpl(); - virtual void emitPostModuleImpl(); virtual void emitSimpleTypeAndDeclaratorImpl(IRType* type, DeclaratorInfo* declarator); void emitSimpleTypeAndDeclarator(IRType* type, DeclaratorInfo* declarator) @@ -722,10 +720,6 @@ class CLikeSourceEmitter : public SourceEmitterBase Dictionary m_mapInstToName; OrderedHashSet m_requiredPreludes; - struct RequiredAfter - { - String requireComputeDerivatives; - } m_requiredAfter; Dictionary m_builtinPreludes; }; diff --git a/source/slang/slang-emit-glsl.cpp b/source/slang/slang-emit-glsl.cpp index 23fff37acb..326eef8b47 100644 --- a/source/slang/slang-emit-glsl.cpp +++ b/source/slang/slang-emit-glsl.cpp @@ -25,9 +25,78 @@ GLSLSourceEmitter::GLSLSourceEmitter(const Desc& desc) SLANG_ASSERT(m_glslExtensionTracker); } +void GLSLSourceEmitter::_beforeComputeEmitProcessInstruction( + IRInst* parentFunc, + IRInst* inst, + IRBuilder& builder) +{ + if (auto requireGLSLExt = as(inst)) + { + _requireGLSLExtension(requireGLSLExt->getExtensionName()); + return; + } + + // Early exit on instructions we are not interested in. + if (!as(inst) && !as(inst) && + !(as(inst) && (m_entryPointStage == Stage::Compute))) + { + return; + } + + // Check for entry point specific decorations. + // + // Handle cases where "require" IR operations exist in the function body and are required + // as entry point decorations. + auto entryPoints = getReferencingEntryPoints(m_referencingEntryPoints, parentFunc); + if (entryPoints == nullptr) + return; + + for (auto entryPoint : *entryPoints) + { + if (as(inst)) + { + builder.addDecoration(entryPoint, kIROp_MaximallyReconvergesDecoration); + } + else if (as(inst)) + { + builder.addDecoration(entryPoint, kIROp_QuadDerivativesDecoration); + } + else + { + const auto requireComputeDerivative = as(inst); + + SLANG_ASSERT(requireComputeDerivative); + SLANG_ASSERT(m_entryPointStage == Stage::Compute); + + // Compute derivatives are quad by default, add the decoration if entry point + // does not not explicit linear decoration. + bool isQuad = !entryPoint->findDecoration(); + if (isQuad) + { + builder.addDecoration(entryPoint, kIROp_DerivativeGroupQuadDecoration); + } + } + } +} + void GLSLSourceEmitter::beforeComputeEmitActions(IRModule* module) { buildEntryPointReferenceGraph(this->m_referencingEntryPoints, module); + + IRBuilder builder(module); + for (auto globalInst : module->getGlobalInsts()) + { + if (auto func = as(globalInst)) + { + for (auto block : func->getBlocks()) + { + for (auto inst = block->getFirstInst(); inst; inst = inst->next) + { + _beforeComputeEmitProcessInstruction(func, inst, builder); + } + } + } + } } SlangResult GLSLSourceEmitter::init() @@ -78,8 +147,8 @@ void GLSLSourceEmitter::_requireRayQuery() { m_glslExtensionTracker->requireExtension(UnownedStringSlice::fromLiteral("GL_EXT_ray_query")); m_glslExtensionTracker->requireSPIRVVersion( - SemanticVersion(1, 4)); // required due to glslang bug which enables `SPV_KHR_ray_tracing` - // regardless of context + SemanticVersion(1, 4)); // required due to glslang bug which enables + // `SPV_KHR_ray_tracing` regardless of context m_glslExtensionTracker->requireVersion(ProfileVersion::GLSL_460); } @@ -226,8 +295,8 @@ void GLSLSourceEmitter::_emitGLSLStructuredBuffer( m_writer->emit(") "); /* - If the output type is a buffer, and we can determine it is only readonly we can prefix before - buffer with 'readonly' + If the output type is a buffer, and we can determine it is only readonly we can prefix + before buffer with 'readonly' The actual structuredBufferType could be @@ -349,8 +418,8 @@ void GLSLSourceEmitter::emitSSBOHeader(IRGlobalParam* varDecl, IRType* bufferTyp _emitMemoryQualifierDecorations(varDecl); /* - If the output type is a buffer, and we can determine it is only readonly we can prefix before - buffer with 'readonly' + If the output type is a buffer, and we can determine it is only readonly we can prefix + before buffer with 'readonly' HLSLByteAddressBufferType - This is unambiguously read only HLSLRWByteAddressBufferType - Read write @@ -437,11 +506,11 @@ void GLSLSourceEmitter::_emitGLSLParameterGroup( } /* - With resources backed by 'buffer' on glsl, we want to output 'readonly' if that is a good match - for the underlying type. If uniform it's implicit it's readonly + With resources backed by 'buffer' on glsl, we want to output 'readonly' if that is a good + match for the underlying type. If uniform it's implicit it's readonly - Here this only happens with isShaderRecord which is a 'constant buffer' (ie implicitly readonly) - or IRGLSLShaderStorageBufferType which is read write. + Here this only happens with isShaderRecord which is a 'constant buffer' (ie implicitly + readonly) or IRGLSLShaderStorageBufferType which is read write. */ { @@ -653,20 +722,21 @@ void GLSLSourceEmitter::_emitGLSLImageFormatModifier(IRInst* var, IRTextureType* // default to rgba // // The SPIR-V spec - // (https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.pdf) section 3.11 - // on Image Formats it does not list rgbf32. + // (https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.pdf) + // section 3.11 on Image Formats it does not list rgbf32. // // It seems SPIR-V can support having an image with an unknown-at-compile-time - // format, so long as the underlying API supports it. Ideally this would mean that - // we can just drop all these qualifiers when emitting GLSL for Vulkan targets. + // format, so long as the underlying API supports it. Ideally this would mean + // that we can just drop all these qualifiers when emitting GLSL for Vulkan + // targets. // - // This raises the question of what to do more long term. For Vulkan hopefully we - // can just drop the layout. For OpenGL targets it would seem reasonable to have - // well-defined rules for inferring the format (and just document that 3-component - // formats map to 4-component formats, but that shouldn't matter because the API - // wouldn't let the user allocate those 3-component formats anyway), and add an - // attribute for specifying the format manually if you really want to override our - // inference (e.g., to specify r11fg11fb10f). + // This raises the question of what to do more long term. For Vulkan hopefully + // we can just drop the layout. For OpenGL targets it would seem reasonable to + // have well-defined rules for inferring the format (and just document that + // 3-component formats map to 4-component formats, but that shouldn't matter + // because the API wouldn't let the user allocate those 3-component formats + // anyway), and add an attribute for specifying the format manually if you + // really want to override our inference (e.g., to specify r11fg11fb10f). m_writer->emit("rgba"); // Emit("rgb"); @@ -1332,11 +1402,11 @@ void GLSLSourceEmitter::emitEntryPointAttributesImpl( auto profile = entryPointDecor->getProfile(); auto stage = profile.getStage(); + IRNumThreadsDecoration* numThreadsDecor = nullptr; auto emitLocalSizeLayout = [&]() { Int sizeAlongAxis[kThreadGroupAxisCount]; - getComputeThreadGroupSize(irFunc, sizeAlongAxis); - + numThreadsDecor = getComputeThreadGroupSize(irFunc, sizeAlongAxis); m_writer->emit("layout("); char const* axes[] = {"x", "y", "z"}; for (int ii = 0; ii < kThreadGroupAxisCount; ++ii) @@ -1354,13 +1424,51 @@ void GLSLSourceEmitter::emitEntryPointAttributesImpl( switch (stage) { case Stage::Compute: + case Stage::Mesh: + case Stage::Amplification: + emitLocalSizeLayout(); + default: + break; + } + + /// Structure to track (some) entry point attributes, to allow ordering when emitting and to + /// ensure decorations are only emitted once. + /// + /// These entry points attributes may be implicitly added by built-in functions and the same + /// function may be called multiple times, hence the need to ensure they are only emitted + /// once. + struct GLSLEntryPointAttributes + { + bool quadDerivatives; + bool requireFullQuads; + bool maximallyReconverges; + String computeDerivatives; + } attributes{}; + + const auto requireQuadControlExtensions = [&]() + { + _requireGLSLExtension(UnownedStringSlice("GL_KHR_shader_subgroup_vote")); + _requireGLSLExtension(UnownedStringSlice("GL_EXT_shader_quad_control")); + }; + + for (auto decoration : irFunc->getDecorations()) + { + // Stage agnostic decorations. + if (as(decoration)) { - emitLocalSizeLayout(); + _requireGLSLExtension(UnownedStringSlice("GL_EXT_maximal_reconvergence")); + attributes.maximallyReconverges = true; } - break; - case Stage::Geometry: + else if (as(decoration)) { - if (auto decor = irFunc->findDecoration()) + requireQuadControlExtensions(); + attributes.quadDerivatives = true; + } + + switch (stage) + { + case Stage::Geometry: + if (auto decor = as(decoration)) { auto count = getIntVal(decor->getCount()); m_writer->emit("layout(max_vertices = "); @@ -1368,7 +1476,7 @@ void GLSLSourceEmitter::emitEntryPointAttributesImpl( m_writer->emit(") out;\n"); } - if (auto decor = irFunc->findDecoration()) + if (auto decor = as(decoration)) { auto count = getIntVal(decor->getCount()); m_writer->emit("layout(invocations = "); @@ -1379,7 +1487,7 @@ void GLSLSourceEmitter::emitEntryPointAttributesImpl( // These decorations were moved from the parameters to the entry point by // ir-glsl-legalize. The actual parameters have become potentially multiple global // parameters. - if (auto decor = irFunc->findDecoration()) + if (auto decor = as(decoration)) { switch (decor->getOp()) { @@ -1405,7 +1513,7 @@ void GLSLSourceEmitter::emitEntryPointAttributesImpl( } } - if (auto decor = irFunc->findDecoration()) + if (auto decor = as(decoration)) { IRType* type = decor->getStreamType(); @@ -1424,33 +1532,57 @@ void GLSLSourceEmitter::emitEntryPointAttributesImpl( SLANG_ASSERT(!"Unknown stream out type"); } } - } - break; - case Stage::Pixel: - { - if (irFunc->findDecoration()) + break; + case Stage::Pixel: + if (as(decoration)) { // https://www.khronos.org/opengl/wiki/Early_Fragment_Test m_writer->emit("layout(early_fragment_tests) in;\n"); } + else if (as(decoration)) + { + requireQuadControlExtensions(); + attributes.requireFullQuads = true; + } break; - } - case Stage::Mesh: - { - emitLocalSizeLayout(); - if (auto decor = irFunc->findDecoration()) + case Stage::Compute: + if (as(decoration)) + { + _requireGLSLExtension(UnownedStringSlice("GL_NV_compute_shader_derivatives")); + verifyComputeDerivativeGroupModifiers( + getSink(), + decoration->sourceLoc, + true, + false, + numThreadsDecor); + attributes.computeDerivatives = "layout(derivative_group_quadsNV) in;\n"; + } + else if (as(decoration)) + { + _requireGLSLExtension(UnownedStringSlice("GL_NV_compute_shader_derivatives")); + verifyComputeDerivativeGroupModifiers( + getSink(), + decoration->sourceLoc, + false, + true, + numThreadsDecor); + attributes.computeDerivatives = "layout(derivative_group_linearNV) in;\n"; + } + break; + case Stage::Mesh: + if (auto decor = as(decoration)) { m_writer->emit("layout(max_vertices = "); m_writer->emit(decor->getMaxSize()->getValue()); m_writer->emit(") out;\n"); } - if (auto decor = irFunc->findDecoration()) + if (auto decor = as(decoration)) { m_writer->emit("layout(max_primitives = "); m_writer->emit(decor->getMaxSize()->getValue()); m_writer->emit(") out;\n"); } - if (auto decor = irFunc->findDecoration()) + if (auto decor = as(decoration)) { // TODO: Ellie validate here/elsewhere, what's allowed here is // different from the tesselator @@ -1459,16 +1591,32 @@ void GLSLSourceEmitter::emitEntryPointAttributesImpl( m_writer->emit(decor->getTopology()->getStringSlice()); m_writer->emit("s) out;\n"); } + break; + default: + break; } - break; - case Stage::Amplification: - { - emitLocalSizeLayout(); - } - break; - // TODO: There are other stages that will need this kind of handling. - default: - break; + } + + if (attributes.quadDerivatives) + { + m_writer->emit("layout(quad_derivatives) in;\n"); + } + if (attributes.requireFullQuads) + { + m_writer->emit("layout(full_quads) in;\n"); + } + + // This must be emitted after local size when using glslang. + if (attributes.computeDerivatives.getLength() > 0) + { + m_writer->emit(attributes.computeDerivatives); + } + + // This must be emitted last because GLSL's `[[..]]` attribute syntax must come right + // before the entry point function declaration. + if (attributes.maximallyReconverges) + { + m_writer->emit("[[maximally_reconverges]]\n"); } } @@ -2745,63 +2893,6 @@ void GLSLSourceEmitter::handleRequiredCapabilitiesImpl(IRInst* inst) } } } - - // The function may have various requirment declaring functions its body. We also need to look - // for them. - auto func = as(inst); - if (!func) - return; - auto block = func->getFirstBlock(); - if (!block) - return; - for (auto childInst : block->getChildren()) - { - if (auto requireGLSLExt = as(childInst)) - { - _requireGLSLExtension(requireGLSLExt->getExtensionName()); - } - else if (const auto requireComputeDerivative = as(childInst)) - { - // only allowed 1 of derivative_group_quadsNV or derivative_group_linearNV - if (m_entryPointStage != Stage::Compute || - m_requiredAfter.requireComputeDerivatives.getLength() > 0) - return; - - _requireGLSLExtension(UnownedStringSlice("GL_NV_compute_shader_derivatives")); - - // This will only run once per program. - HashSet* entryPointsUsingInst = - getReferencingEntryPoints(m_referencingEntryPoints, func); - - for (auto entryPoint : *entryPointsUsingInst) - { - bool isQuad = !entryPoint->findDecoration(); - auto numThreadsDecor = entryPoint->findDecoration(); - if (isQuad) - { - verifyComputeDerivativeGroupModifiers( - getSink(), - inst->sourceLoc, - true, - false, - numThreadsDecor); - m_requiredAfter.requireComputeDerivatives = - "layout(derivative_group_quadsNV) in;"; - } - else - { - verifyComputeDerivativeGroupModifiers( - getSink(), - inst->sourceLoc, - false, - true, - numThreadsDecor); - m_requiredAfter.requireComputeDerivatives = - "layout(derivative_group_linearNV) in;"; - } - } - } - } } static Index _getGLSLVersion(ProfileVersion profile) diff --git a/source/slang/slang-emit-glsl.h b/source/slang/slang-emit-glsl.h index 49a7884c26..b07b410ca8 100644 --- a/source/slang/slang-emit-glsl.h +++ b/source/slang/slang-emit-glsl.h @@ -176,6 +176,8 @@ class GLSLSourceEmitter : public CLikeSourceEmitter void emitAtomicImageCoord(IRImageSubscript* operand); + void _beforeComputeEmitProcessInstruction(IRInst* parentFunc, IRInst* inst, IRBuilder& builder); + Dictionary> m_referencingEntryPoints; RefPtr m_glslExtensionTracker; diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index d8c479cd12..f5599289a2 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -3522,6 +3522,37 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex break; } + + case kIROp_RequireMaximallyReconverges: + if (auto entryPointsUsingInst = + getReferencingEntryPoints(m_referencingEntryPoints, getParentFunc(inst))) + { + ensureExtensionDeclaration(UnownedStringSlice("SPV_KHR_maximal_reconvergence")); + for (IRFunc* entryPoint : *entryPointsUsingInst) + { + requireSPIRVExecutionMode( + nullptr, + getIRInstSpvID(entryPoint), + SpvExecutionModeMaximallyReconvergesKHR); + } + } + break; + case kIROp_RequireQuadDerivatives: + if (auto entryPointsUsingInst = + getReferencingEntryPoints(m_referencingEntryPoints, getParentFunc(inst))) + { + ensureExtensionDeclaration(UnownedStringSlice("SPV_KHR_quad_control")); + requireSPIRVCapability(SpvCapabilityQuadControlKHR); + for (IRFunc* entryPoint : *entryPointsUsingInst) + { + requireSPIRVExecutionMode( + nullptr, + getIRInstSpvID(entryPoint), + SpvExecutionModeQuadDerivativesKHR); + } + } + break; + case kIROp_Return: if (as(inst)->getVal()->getOp() == kIROp_VoidLit) result = emitOpReturn(parent, inst); @@ -4437,6 +4468,20 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex } } break; + case kIROp_MaximallyReconvergesDecoration: + ensureExtensionDeclaration(UnownedStringSlice("SPV_khr_maximal_reconvergence")); + requireSPIRVExecutionMode(nullptr, dstID, SpvExecutionModeMaximallyReconvergesKHR); + break; + case kIROp_QuadDerivativesDecoration: + ensureExtensionDeclaration(UnownedStringSlice("SPV_KHR_quad_control")); + requireSPIRVCapability(SpvCapabilityQuadControlKHR); + requireSPIRVExecutionMode(nullptr, dstID, SpvExecutionModeQuadDerivativesKHR); + break; + case kIROp_RequireFullQuadsDecoration: + ensureExtensionDeclaration(UnownedStringSlice("SPV_KHR_quad_control")); + requireSPIRVCapability(SpvCapabilityQuadControlKHR); + requireSPIRVExecutionMode(nullptr, dstID, SpvExecutionModeRequireFullQuadsKHR); + break; case kIROp_SPIRVBufferBlockDecoration: { emitOpDecorate( diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index b9217de416..45dd683421 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -1891,8 +1891,6 @@ SlangResult CodeGenContext::emitEntryPointsSourceFromIR(ComPtr& outAr // Append the modules output code finalResult.append(code); - // Append all content that should be at the end of a module - sourceEmitter->emitPostModule(); finalResult.append(sourceWriter.getContentAndClear()); // Write out the result diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h index 88a9ac5e33..27003f6a79 100644 --- a/source/slang/slang-ir-inst-defs.h +++ b/source/slang/slang-ir-inst-defs.h @@ -656,6 +656,10 @@ INST(RequireComputeDerivative, RequireComputeDerivative, 0, 0) INST(StaticAssert, StaticAssert, 2, 0) INST(Printf, Printf, 1, 0) +// Quad control execution modes. +INST(RequireMaximallyReconverges, RequireMaximallyReconverges, 0, 0) +INST(RequireQuadDerivatives, RequireQuadDerivatives, 0, 0) + // TODO: We should consider splitting the basic arithmetic/comparison // ops into cases for signed integers, unsigned integers, and floating-point // values, to better match downstream targets that want to treat them @@ -953,6 +957,10 @@ INST_RANGE(BindingQuery, GetRegisterIndex, GetRegisterSpace) INST(DerivativeGroupQuadDecoration, DerivativeGroupQuad, 0, 0) INST(DerivativeGroupLinearDecoration, DerivativeGroupLinear, 0, 0) + INST(MaximallyReconvergesDecoration, MaximallyReconverges, 0, 0) + INST(QuadDerivativesDecoration, QuadDerivatives, 0, 0) + INST(RequireFullQuadsDecoration, RequireFullQuads, 0, 0) + // Marks a type to be non copyable, causing SSA pass to skip turning variables of the the type into SSA values. INST(NonCopyableTypeDecoration, nonCopyable, 0, 0) diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h index 53adce87a8..9b9cbd5b16 100644 --- a/source/slang/slang-ir-insts.h +++ b/source/slang/slang-ir-insts.h @@ -453,6 +453,9 @@ IR_SIMPLE_DECORATION(GlobalInputDecoration) IR_SIMPLE_DECORATION(GlobalOutputDecoration) IR_SIMPLE_DECORATION(DownstreamModuleExportDecoration) IR_SIMPLE_DECORATION(DownstreamModuleImportDecoration) +IR_SIMPLE_DECORATION(MaximallyReconvergesDecoration) +IR_SIMPLE_DECORATION(QuadDerivativesDecoration) +IR_SIMPLE_DECORATION(RequireFullQuadsDecoration) struct IRAvailableInDownstreamIRDecoration : IRDecoration { @@ -3421,6 +3424,16 @@ struct IRRequireComputeDerivative : IRInst IR_LEAF_ISA(RequireComputeDerivative) }; +struct IRRequireMaximallyReconverges : IRInst +{ + IR_LEAF_ISA(RequireMaximallyReconverges) +}; + +struct IRRequireQuadDerivatives : IRInst +{ + IR_LEAF_ISA(RequireQuadDerivatives) +}; + struct IRStaticAssert : IRInst { IR_LEAF_ISA(StaticAssert) diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp index 011ea6bc76..fbd05f2ab0 100644 --- a/source/slang/slang-lower-to-ir.cpp +++ b/source/slang/slang-lower-to-ir.cpp @@ -10261,6 +10261,18 @@ struct DeclLoweringVisitor : DeclVisitor derivativeGroupLinearDecor = getBuilder()->addSimpleDecoration(irFunc); } + else if (as(modifier)) + { + getBuilder()->addSimpleDecoration(irFunc); + } + else if (as(modifier)) + { + getBuilder()->addSimpleDecoration(irFunc); + } + else if (as(modifier)) + { + getBuilder()->addSimpleDecoration(irFunc); + } else if (as(modifier)) { getBuilder()->addSimpleDecoration(irFunc); diff --git a/tests/hlsl-intrinsic/quad-control/quad-control-comp-functionality.slang b/tests/hlsl-intrinsic/quad-control/quad-control-comp-functionality.slang new file mode 100644 index 0000000000..23db25d7fd --- /dev/null +++ b/tests/hlsl-intrinsic/quad-control/quad-control-comp-functionality.slang @@ -0,0 +1,40 @@ +//TEST(compute):COMPARE_COMPUTE_EX:-vk -compute -shaderobj +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -profile cs_6_7 -dx12 -use-dxil -shaderobj -render-feature hardware-device +//TEST(compute):COMPARE_COMPUTE_EX:-metal -compute -shaderobj + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +[numthreads(16, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + uint index = dispatchThreadID.x; + + if (index < 4) + { + // Quad 1. + // Should return true, index 0's expr is true while all other indices' expr are false. + outputBuffer[index] = uint(QuadAny((index % 4) == 0)); + } + else if (index < 8) + { + // Quad 2. + // Should return false, all indices' expr are false. + bool falseCondition = (5 == 4); + outputBuffer[index] = uint(QuadAny(falseCondition)); + } + else if (index < 12) + { + // Quad 3. + // Should return false, index 0's expr is true while all other indices' expr are false. + outputBuffer[index] = uint(QuadAll((index % 4) == 0)); + } + else + { + // Quad 4. + // Should return true, all indices' expr are true. + bool trueCondition = (5 == 5); + outputBuffer[index] = uint(QuadAll(trueCondition)); + } +} + diff --git a/tests/hlsl-intrinsic/quad-control/quad-control-comp-functionality.slang.expected.txt b/tests/hlsl-intrinsic/quad-control/quad-control-comp-functionality.slang.expected.txt new file mode 100644 index 0000000000..945f08f2c0 --- /dev/null +++ b/tests/hlsl-intrinsic/quad-control/quad-control-comp-functionality.slang.expected.txt @@ -0,0 +1,16 @@ +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +1 +1 +1 +1 diff --git a/tests/hlsl-intrinsic/quad-control/quad-control-frag-many-entry-points.slang b/tests/hlsl-intrinsic/quad-control/quad-control-frag-many-entry-points.slang new file mode 100644 index 0000000000..2312f0c957 --- /dev/null +++ b/tests/hlsl-intrinsic/quad-control/quad-control-frag-many-entry-points.slang @@ -0,0 +1,96 @@ +//TEST:SIMPLE(filecheck=CHECK_SPIRV): -target spirv -fvk-use-entrypoint-name +//TEST:SIMPLE(filecheck=CHECK_GLSL): -target glsl -fvk-use-entrypoint-name + +// +// Check that SPIRV quad control execution modes and GLSL layout/attribute decorations are only +// set on entry points that contain quad control functions and/or quad control decorations. +// + +Texture2D colorTexture1; +SamplerState samplerState; + +struct FragmentInput { + float2 uv : TEXCOORD0; +}; + +float4 getFragColor(float2 uv) { + float4 fragColor = float4(1.0, 1.0, 1.0, 1.0); + bool nonUniformCondition = uv.x > 0.5; + + if (QuadAny(nonUniformCondition)) { + float4 color = colorTexture1.Sample(samplerState, uv); + if (nonUniformCondition) { + fragColor = color; + } + } + + return fragColor; +} + +// CHECK_SPIRV: OpExecutionMode %fragmentMain1 MaximallyReconvergesKHR +// CHECK_SPIRV: OpExecutionMode %fragmentMain1 QuadDerivativesKHR +// CHECK_GLSL: layout(quad_derivatives) in +// CHECK_GLSL: [maximally_reconverges] +[shader("fragment")] +float4 fragmentMain1(FragmentInput input) : SV_Target +{ + bool nonUniformCondition = input.uv.x > 0.5; + + float4 fragColor = float4(1.0, 1.0, 1.0, 1.0); + + if (QuadAny(nonUniformCondition)) { + float4 color = colorTexture1.Sample(samplerState, input.uv); + if (nonUniformCondition) { + fragColor = color; + } + } + + return fragColor; +} + +// CHECK_SPIRV-NOT: OpExecutionMode %fragmentMain2 QuadDerivativesKHR +// CHECK_SPIRV: OpExecutionMode %fragmentMain2 MaximallyReconvergesKHR +// CHECK_GLSL-NOT: layout(quad_derivatives) in +// CHECK_GLSL: [maximally_reconverges] +[MaximallyReconverges] +[shader("fragment")] +float4 fragmentMain2(FragmentInput input) : SV_Target +{ + return float4(1.0, 1.0, 1.0, 1.0); +} + + +// CHECK_SPIRV-NOT: OpExecutionMode %fragmentMain3 MaximallyReconvergesKHR +// CHECK_SPIRV-NOT: OpExecutionMode %fragmentMain3 QuadDerivativesKHR +// CHECK_SPIRV: OpExecutionMode %fragmentMain3 RequireFullQuadsKHR +// CHECK_GLSL-NOT: layout(quad_derivatives) in +// CHECK_GLSL: layout(full_quads) in +// CHECK_GLSL-NOT: [maximally_reconverges] +[RequireFullQuads] +[shader("fragment")] +float4 fragmentMain3(FragmentInput input) : SV_Target +{ + return float4(1.0, 1.0, 1.0, 1.0); +} + +// CHECK_SPIRV: OpExecutionMode %fragmentMain4 MaximallyReconvergesKHR +// CHECK_SPIRV: OpExecutionMode %fragmentMain4 QuadDerivativesKHR +// CHECK_GLSL: layout(quad_derivatives) in +// CHECK_GLSL: [maximally_reconverges] +[shader("fragment")] +float4 fragmentMain4(FragmentInput input) : SV_Target +{ + return getFragColor(input.uv); +} + +// CHECK_SPIRV-NOT: OpExecutionMode %fragmentMain5 MaximallyReconvergesKHR +// CHECK_SPIRV-NOT: OpExecutionMode %fragmentMain5 QuadDerivativesKHR +// CHECK_SPIRV-NOT: OpExecutionMode %fragmentMain5 RequireFullQuadsKHR +// CHECK_GLSL-NOT: layout(quad_derivatives) in +// CHECK_GLSL-NOT: layout(full_quads) in +// CHECK_GLSL-NOT: [maximally_reconverges] +[shader("fragment")] +float4 fragmentMain5(FragmentInput input) : SV_Target +{ + return float4(1.0, 1.0, 1.0, 1.0); +} diff --git a/tests/hlsl-intrinsic/quad-control/quad-control-frag.slang b/tests/hlsl-intrinsic/quad-control/quad-control-frag.slang new file mode 100644 index 0000000000..29a9546a02 --- /dev/null +++ b/tests/hlsl-intrinsic/quad-control/quad-control-frag.slang @@ -0,0 +1,53 @@ +//TEST:SIMPLE(filecheck=CHECK_SPIRV): -entry fragmentMain -stage fragment -target spirv +//TEST:SIMPLE(filecheck=CHECK_GLSL): -entry fragmentMain -stage fragment -target glsl +//TEST:SIMPLE(filecheck=CHECK_HLSL): -entry fragmentMain -stage fragment -target hlsl +//TEST:SIMPLE(filecheck=CHECK_METAL): -entry fragmentMain -stage fragment -target metal + +Texture2D colorTexture1; +Texture2D colorTexture2; +SamplerState samplerState; + +struct FragmentInput { + float2 uv : TEXCOORD0; +}; + +// CHECK_SPIRV: OpExecutionMode %fragmentMain MaximallyReconvergesKHR +// CHECK_SPIRV: OpExecutionMode %fragmentMain QuadDerivativesKHR +// CHECK_SPIRV: OpExecutionMode %fragmentMain RequireFullQuadsKHR +// CHECK_GLSL: layout(quad_derivatives) in +// CHECK_GLSL: layout(full_quads) in +// CHECK_GLSL: [maximally_reconverges] +[QuadDerivatives] +[RequireFullQuads] +float4 fragmentMain(FragmentInput input) : SV_Target +{ + bool nonUniformCondition1 = input.uv.x > 0.5; + bool nonUniformCondition2 = input.uv.y > 0.8; + + float4 fragColor = float4(1.0, 1.0, 1.0, 1.0); + + // CHECK_SPIRV: OpGroupNonUniformQuadAnyKHR + // CHECK_GLSL: subgroupQuadAny + // CHECK_HLSL: QuadAny + // CHECK_METAL: quad_any + if (QuadAny(nonUniformCondition1)) { + float4 color = colorTexture1.Sample(samplerState, input.uv); + if (nonUniformCondition1) { + fragColor = color; + } + } + + // CHECK_SPIRV: OpGroupNonUniformQuadAllKHR + // CHECK_GLSL: subgroupQuadAll + // CHECK_HLSL: QuadAll + // CHECK_METAL: quad_all + if (QuadAll(nonUniformCondition2)) { + float4 color = colorTexture2.Sample(samplerState, input.uv); + if (nonUniformCondition2) { + fragColor += color * 0.5; + } + } + + return fragColor; +} +