Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hoist resource variables using a global initializer function correctly #4894

Closed
9 changes: 9 additions & 0 deletions source/slang/slang-emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
#include "slang-ir-restructure.h"
#include "slang-ir-restructure-scoping.h"
#include "slang-ir-sccp.h"
#include "slang-ir-simplify-global-vars.h"
#include "slang-ir-specialize.h"
#include "slang-ir-specialize-arrays.h"
#include "slang-ir-specialize-buffer-load-arg.h"
Expand Down Expand Up @@ -1216,10 +1217,18 @@ Result linkAndOptimizeIR(
break;
}

// Sometimes we will have a GlobalVar with a 'init' that aliases another
// concrete value.
// These sort of GlobalVar's should be removed in-favor of directly
// using the known value since this may be an indirect use of a resource
// variable (which is illegal for almost targets).
simplifyGlobalVars(irModule);

switch( target )
{
default:
break;
case CodeGenTarget::HLSL:
case CodeGenTarget::GLSL:
moveGlobalVarInitializationToEntryPoints(irModule);
break;
Expand Down
45 changes: 21 additions & 24 deletions source/slang/slang-ir-legalize-types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,6 @@ struct LegalCallBuilder
// result type of the function, so we know that
// the legalization funciton/call will use a `void`
// result type.
//
_emitCall(m_context->builder->getVoidType());
return resultVal;
}
Expand All @@ -364,41 +363,39 @@ struct LegalCallBuilder
{
case LegalType::Flavor::simple:
{
// In the leaf case we have a simple type, and
// we just want to declare a local variable based on it.
//
// Here we emit a local/global var which will be assigned as an out-parameter argument, then we 'Load' this var
// and use that 'Load' as the result of simplification.

auto simpleType = resultType.getSimple();
auto builder = m_context->builder;

// Recall that a local variable in our IR represents a *pointer*
// to storage of the appropriate type.
//
auto varPtr = builder->emitVar(simpleType);
// If we were going to emit an IRVar in global scope we have 1 important case to resolve: when IRVar is a resource
// and is assigned something in global scope. This is important since this means we need to make an IRGlobalVar
// instead of IRVar.
// A later pass will handle any out-parameter hoisting into IRGlobalVar init block logic

IRInst* varPtr = nullptr;
if (!builder->getInsertLoc().getParent() || builder->getInsertLoc().getParent()->getOp() == kIROp_Module)
varPtr = builder->createGlobalVar(simpleType);
else
varPtr = builder->emitVar(simpleType);
// We need to pass that pointer as an argument to our new
// `call` instruction, so that it can receive the value
// written by the callee.
//
m_args.add(varPtr);

// Note: Because `varPtr` is a pointer to the value we want,
// we have the small problem of needing to return a `LegalVal`
// that has dereferenced the value after the call.
//
// We solve this problem by inserting as `load` from our
// new variable immediately after the call, before going
// and resetting the insertion point to continue inserting
// stuff before the call (which is where we wnat the local
// variable declarations to go).
//
// TODO: Confirm that this logic can't go awry if (somehow)
// there is no instruction after `m_call`. That should not
// be possible inside of a function body, but it could in
// theory be a problem if we ever have top-level module-scope
// code representing initialization of constants and/or globals.
//
// but we want a value, we need to 'Load' the 'varPtr' result
// after our m_call.
//
// Since we may have more than 1 parameter we 'setInsertBefore(m_call);'
builder->setInsertBefore(m_call->getNextInst());
auto val = builder->emitLoad(simpleType, varPtr);
IRInst* val = nullptr;
if (as<IRGlobalVar>(varPtr))
val = varPtr;
else
val = builder->emitLoad(simpleType, varPtr);
builder->setInsertBefore(m_call);

return LegalVal::simple(val);
Expand Down
108 changes: 108 additions & 0 deletions source/slang/slang-ir-simplify-global-vars.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
// slang-ir-explicit-global-init.cpp

#include "slang-ir-simplify-global-vars.h"
#include "slang-ir-insts.h"
#include "slang-ir.h"
#include "slang-ir-util.h"

namespace Slang
{
void tryToSimplifyUseOfGlobalVar(IRInst* user, List<IRInst*>& toDestroy)
{
switch (user->getOp())
{
case kIROp_Store:
{
auto store = as<IRStore>(user);
if (store->getParent() && store->getParent()->getOp() == kIROp_Module)
{
// Store in Global scope into a GlobalVar is equal to 'insertReturn(global->GetFirstBlock(), Store->GetVal())'
if (auto globalVal = as<IRGlobalValueWithCode>(store->getPtr()))
{
IRBuilder builder(store);
IRBlock* block = globalVal->getFirstBlock();
if (!block)
{
builder.setInsertInto(globalVal);
block = builder.emitBlock();
}
builder.setInsertInto(block);
builder.emitReturn(store->getVal());
toDestroy.add(user);
}
}
break;
}
default:
break;
}
}

void simplifyGlobalVars(IRModule* module)
{
IRBuilder builder(module);
for (auto inst : module->getGlobalInsts())
{
auto globalVar = as<IRGlobalVar>(inst);
if (!globalVar)
continue;

// Only simplify resource types since otherwise we need additional logic
// not yet implemented
auto globalVarType = globalVar->getDataType();
auto globalPtrType = as<IRPtrTypeBase>(globalVarType);
if (!globalPtrType
|| !isResourceType(globalPtrType->getValueType()))
continue;

// Find any trivial use which is supposed-to-be part of the
// init block, hoist these.
// Do some extra auixillary checks.
List<IRInst*> toDestroy;
bool onlyGlobalSideEffects = true;
traverseUses(globalVar, [&](IRUse* use)
{
auto user = use->getUser();
tryToSimplifyUseOfGlobalVar(user, toDestroy);
if (user->getParent()->getOp() != kIROp_Module
&& user->mightHaveSideEffects())
onlyGlobalSideEffects = false;
});
for (auto i : toDestroy)
i->removeAndDeallocate();

// Check if our IRGlobalVar is an alias for another variable, if so, simplify
if (!onlyGlobalSideEffects)
continue;

auto firstBlock = globalVar->getFirstBlock();
if (!firstBlock)
continue;

// For an alias we must encounter a return op first
bool fail = false;
IRReturn* firstReturn = nullptr;

for (auto localInst : firstBlock->getChildren())
{
if (localInst->getOp() != kIROp_Return)
{
fail = true;
break;
}
else if (localInst->getOp() == kIROp_Return)
{
firstReturn = as<IRReturn>(localInst);
break;
}
}

if (fail || !firstReturn)
continue;

globalVar->replaceUsesWith(firstReturn->getVal());
firstBlock->removeAndDeallocateAllDecorationsAndChildren();
firstBlock->removeAndDeallocate();
}
}
}
11 changes: 11 additions & 0 deletions source/slang/slang-ir-simplify-global-vars.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// slang-ir-explicit-global-init.h
#pragma once

namespace Slang
{
struct IRModule;

/// Simplify IRGlobalVars and possibley remove if an alias.
void simplifyGlobalVars(IRModule* module);

}
4 changes: 3 additions & 1 deletion source/slang/slang-ir-specialize-resources.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1202,7 +1202,9 @@ bool specializeResourceOutputs(
HashSet<IRFunc*>& unspecializableFuncs)
{
auto targetRequest = codeGenContext->getTargetReq();
if(isD3DTarget(targetRequest) || isKhronosTarget(targetRequest))
// D3D & SPIRV/GLSL cannot easily use resources as parameters
// Metal cannot assign globals to resource-outputting functions (in global scope)
if(isD3DTarget(targetRequest) || isKhronosTarget(targetRequest) || isMetalTarget(targetRequest))
{}
else
{
Expand Down
49 changes: 49 additions & 0 deletions tests/compute/type-legalize-global-with-init-function.slang
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// type-legalize-global-with-init.slang
//
// Confirm that type legalization can handle a global constant
// with a resource type or a type that recursively contains
// resources.
//
//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF): -shaderobj
//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF): -vk -shaderobj
//
//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name=outputBuffer
RWStructuredBuffer<uint> outputBuffer;

//TEST_INPUT:ubuffer(data=[1 2 3 4 5 6 7 8], stride=4):name=inputBuffer
RWStructuredBuffer<uint> inputBuffer;

static const RWStructuredBuffer<uint> gBuffer = inputBuffer;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In general I think we will have issues when there are global resource variables that is being written to dynamically, so I am not sure if this is something we want to allow...

static const currently is overloaded to also mean compile time constants, maybe we should disallow static consts of resource type as well. Is this test coming from some existing user code?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In general I think we will have issues when there are global resource variables that is being written to dynamically, so I am not sure if this is something we want to allow...

We will have an issue with dynamic resource variable accesses.
Slang is lacking a compiler pass to manage non-compile-time known resource accesses such as:

RWStructuredBuffer<uint> outputBuffer;
RWStructuredBuffer<uint> inputBuffer1;
RWStructuredBuffer<uint> inputBuffer2;
RWStructuredBuffer<uint> inputBuffer3;

[numthreads(4,4,4)]
void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
{
    RWStructuredBuffer<uint> resource;
    if(dispatchThreadID.x == 1)
    {
        resource = inputBuffer1;
    }
    else if(dispatchThreadID.x == 2)
    {
        resource = inputBuffer2;
    }
    else if(dispatchThreadID.x == 3)
    {
        resource = inputBuffer3;
    }

    // This is illegal since resource is not known at compile time.
    outputBuffer[0] = resource[1];
}

From what I understand to add this would just piggy-back on our dynamic dispatch system logic:

  1. Diagnose all non-constexpr resources, assign all resources of equal type a different ID (like with dynamic dispatch).
  2. Any assignments to a resource assigns an ID (not the actual resource)
  3. All accesses to a non-compile-time resource specializes all texture intrinsic uses to pick the correct texture using our ID (if/else binary tree)

Copy link
Contributor Author

@ArielG-NV ArielG-NV Aug 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

static const currently is overloaded to also mean compile time constants, maybe we should disallow static consts of resource type as well. Is this test coming from some existing user code?

No test currently uses static const Stuff gStuff = Stuff( inputBuffer, inputBuffer );.
The problem is that some tests use static const Stuff gStuff = { inputBuffer, inputBuffer }, which once #4854 is merged will be an issue since we start emitting what is effectively static const Stuff gStuff = Stuff( inputBuffer, inputBuffer );.


struct Stuff
{
__init(RWStructuredBuffer<uint> inA, RWStructuredBuffer<uint> inB)
{
a = inA;
b = inB;
}
RWStructuredBuffer<uint> a;
RWStructuredBuffer<uint> b;
}

static const Stuff gStuff = Stuff( inputBuffer, inputBuffer );

uint test(uint x)
{
return gBuffer[x]
+ gStuff.a[x + 1] * 16
+ gStuff.b[x + 2] * 256;
}

[numthreads(4, 1, 1)]
void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
{
let tid = dispatchThreadID.x;
let inVal = tid;
let outVal = test(inVal);
outputBuffer[tid] = outVal;
}
//BUF: 321
//BUF: 432
//BUF: 543
//BUF: 654
8 changes: 7 additions & 1 deletion tests/compute/type-legalize-global-with-init.slang
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
// with a resource type or a type that recursively contains
// resources.
//
//TEST(compute):COMPARE_COMPUTE: -shaderobj
//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF): -shaderobj
//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF): -vk -shaderobj
//
//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name=outputBuffer
RWStructuredBuffer<uint> outputBuffer;
Expand Down Expand Up @@ -37,3 +38,8 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
let outVal = test(inVal);
outputBuffer[tid] = outVal;
}
//BUF: 321
//BUF: 432
//BUF: 543
//BUF: 654

This file was deleted.

Loading