Skip to content

Commit

Permalink
fix(batched): codegen bug for compref with varying index (AcademySoft…
Browse files Browse the repository at this point in the history
…wareFoundation#1776)

 Fix bug in batched codegen of compref when the index is varying.
The loadvalue was left to defaulted to uniform load, which we can't store into a varying/wide result.
Fixed similar bug in getmessage, and added check for spline knotcount to be uniform (although varying support could be added).

This bug exposed a missed optimization in BatchedAnalyser where a early out (return) needs to change the conditional to be varying to support masking.  However no check was being done to see if the loop control existed higher in the callstack.  So now the execution scope tracks loop nesting depth to see if the current function is really in a loop or not.  This allows the loop control flow to remain uniform (avoiding masking and more complex control flow).

Added nestloop-reg to testsuite to reproduce bug and verify its fixed.

---------

Signed-off-by: Alex M. Wells <[email protected]>
  • Loading branch information
AlexMWells authored Mar 2, 2024
1 parent 58aec3d commit baa9a09
Show file tree
Hide file tree
Showing 6 changed files with 140 additions and 16 deletions.
1 change: 1 addition & 0 deletions src/cmake/testing.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ macro (osl_add_all_tests)
metadata-braces min-reg miscmath missing-shader
mix-reg
named-components
nestedloop-reg
noise noise-cell
noise-gabor noise-gabor2d-filter noise-gabor3d-filter
noise-gabor-reg
Expand Down
72 changes: 61 additions & 11 deletions src/liboslexec/batched_analysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -838,11 +838,12 @@ class FunctionTreeTracker {
std::vector<Position> m_function_stack;
std::vector<Position> m_before_if_block_stack;
std::vector<Position> m_after_if_block_stack;
std::vector<int> m_loop_depth_stack;

public:
OSL_FORCEINLINE FunctionTreeTracker() : m_top_of_stack(end_pos())
{
push_function_call();
push_function_call(0);
}

FunctionTreeTracker(const FunctionTreeTracker&) = delete;
Expand Down Expand Up @@ -977,11 +978,12 @@ class FunctionTreeTracker {

OSL_FORCEINLINE Iterator end() const { return Iterator(*this, end_pos()); }

OSL_FORCEINLINE void push_function_call()
OSL_FORCEINLINE void push_function_call(int loop_depth_at_function)
{
OSL_DEV_ONLY(std::cout << "DependencyTreeTracker push_function_call"
<< std::endl);
m_function_stack.push_back(m_top_of_stack);
m_loop_depth_stack.push_back(loop_depth_at_function);
}

OSL_FORCEINLINE void push_if_block()
Expand Down Expand Up @@ -1062,7 +1064,16 @@ class FunctionTreeTracker {

OSL_FORCEINLINE Position top_pos() const { return m_top_of_stack; }

OSL_FORCEINLINE void pop_function_call()
OSL_FORCEINLINE int loop_depth_at_start_of_call()
{
if (m_loop_depth_stack.empty()) {
return 0;
} else {
return m_loop_depth_stack.back();
}
}

OSL_FORCEINLINE void pop_function_call(int loop_depth_at_function)
{
OSL_DEV_ONLY(std::cout << "DependencyTreeTracker pop_function_call"
<< std::endl);
Expand Down Expand Up @@ -1090,6 +1101,9 @@ class FunctionTreeTracker {
process_exit(early_out.dtt_pos);
}
}

OSL_ASSERT(m_loop_depth_stack.back() == loop_depth_at_function);
m_loop_depth_stack.pop_back();
}
};

Expand Down Expand Up @@ -1214,8 +1228,11 @@ class LoopStack {

std::vector<LoopInfo> m_loop_info_by_depth;

bool is_inside_loop() const { return depth() != 0; }

public:
bool is_inside_loop() const { return !m_loop_info_by_depth.empty(); }
int depth() const { return m_loop_info_by_depth.size(); }


int current_loop_op_index() const
{
Expand Down Expand Up @@ -1603,7 +1620,7 @@ struct Analyzer {

OSL_NOINLINE void make_loops_control_flow_depend_on_early_out_conditions()
{
// Need change the loop control flow which is dependent upon
// Need to change the loop control flow which is dependent upon
// a conditional. By making a circular dependency between the this
// [return|exit|break|continue] operation and the conditionals value,
// any varying values in the conditional controlling
Expand Down Expand Up @@ -1986,9 +2003,11 @@ struct Analyzer {
// as the current block, there was no conditionals involved
OSL_DEV_ONLY(std::cout << " FUNCTION CALL BLOCK BEGIN"
<< std::endl);
m_execution_scope_stack.push_function_call();
m_execution_scope_stack.push_function_call(
m_loop_stack.depth());
discover_symbols_between(op_index + 1, opcode.jump(0));
m_execution_scope_stack.pop_function_call();
m_execution_scope_stack.pop_function_call(
m_loop_stack.depth());
OSL_DEV_ONLY(std::cout << " FUNCTION CALL BLOCK END"
<< std::endl);

Expand Down Expand Up @@ -2020,12 +2039,16 @@ struct Analyzer {
m_execution_scope_stack.process_return(
m_conditional_symbol_stack.top_pos());

// The return will need change the loop control flow which is dependent upon
// IF AND ONLY IF, the current loop control exists inside
// the current function, because the return should only early
// out of those loops, not any higher in the call stack.
// The return will need to change the loop control flow which is dependent upon
// a conditional. By making a circular dependency between the return operation
// and the conditionals value, any varying values in the conditional controlling
// the return should flow back to the loop control variable, which might need to
// be varying so allow lanes to terminate the loop independently
if (m_loop_stack.is_inside_loop()) {
if (m_loop_stack.depth()
> m_execution_scope_stack.loop_depth_at_start_of_call()) {
make_loops_control_flow_depend_on_early_out_conditions();
}
}
Expand All @@ -2037,12 +2060,18 @@ struct Analyzer {
m_execution_scope_stack.process_exit(
m_conditional_symbol_stack.top_pos());

// The exit will need change the loop control flow which is dependent upon
// IF AND ONLY IF, the current loop control exists inside
// the current function, because the return should only early
// out of those loops, not any higher in the call stack.
// A different mechanism will look for early outs when
// unwinding up the callstack.
// The exit will need to change the loop control flow which is dependent upon
// a conditional. By making a circular dependency between the exit operation
// and the conditionals value, any varying values in the conditional controlling
// the exit should flow back to the loop control variable, which might need to
// be varying so allow lanes to terminate the loop independently
if (m_loop_stack.is_inside_loop()) {
if (m_loop_stack.depth()
> m_execution_scope_stack.loop_depth_at_start_of_call()) {
make_loops_control_flow_depend_on_early_out_conditions();
}
}
Expand Down Expand Up @@ -2095,6 +2124,10 @@ struct Analyzer {
|| symbol_to_be_varying->typespec().is_closure()
|| symbol_to_be_varying->connected()
|| symbol_to_be_varying->connected_down());
OSL_DEV_ONLY(std::cout << "<<<< make_varying symbol: "
<< symbol_to_be_varying->unmangled().c_str()
<< " force=" << force << std::endl);

symbol_to_be_varying->make_varying();
auto range = m_symbols_dependent_upon.equal_range(
symbol_to_be_varying);
Expand All @@ -2107,6 +2140,9 @@ struct Analyzer {
recursively_mark_varying(dependent_symbol);
}
};
OSL_DEV_ONLY(std::cout << ">>>> end make_varying symbol: "
<< symbol_to_be_varying->unmangled().c_str()
<< std::endl);
}
};

Expand Down Expand Up @@ -2639,6 +2675,8 @@ struct Analyzer {

OSL_NOINLINE void push_varying_of_shader_globals()
{
OSL_DEV_ONLY(std::cout << "push_varying_of_shader_globals begin"
<< std::endl);
for (auto&& s : inst()->symbols()) {
if (s.symtype() == SymTypeGlobal) {
// TODO: now that symbol has is_uniform()
Expand All @@ -2659,10 +2697,14 @@ struct Analyzer {
// so force their dependents to get marked
recursively_mark_varying(sym_ptr, true /*force*/);
}
OSL_DEV_ONLY(std::cout << "push_varying_of_shader_globals end"
<< std::endl);
}

OSL_NOINLINE void make_renderer_outputs_varying()
{
OSL_DEV_ONLY(std::cout << "make_renderer_outputs_varying begin"
<< std::endl);
// Mark all output parameters as varying to catch
// output parameters written to by uniform variables,
// as nothing would have made them varying, however as
Expand All @@ -2679,10 +2721,14 @@ struct Analyzer {
}
}
}
OSL_DEV_ONLY(std::cout << "make_renderer_outputs_varying end"
<< std::endl);
}

OSL_NOINLINE void make_interpolated_parameters_varying()
{
OSL_DEV_ONLY(std::cout << "make_interpolated_parameters_varying begin"
<< std::endl);
// Mark all interpolated parameters as varying,
// As we expect interpolated data, get_userdata will
// only provide varying values, so we must mark
Expand All @@ -2694,10 +2740,13 @@ struct Analyzer {
recursively_mark_varying(&s);
}
}
OSL_DEV_ONLY(std::cout << "make_interpolated_parameters_varying end"
<< std::endl);
}

OSL_NOINLINE void make_closures_varying()
{
OSL_DEV_ONLY(std::cout << "make_closures_varying begin" << std::endl);
// We assume that closures are always stored as varying pointers
FOREACH_SYM(Symbol & s, inst())
{
Expand All @@ -2707,6 +2756,7 @@ struct Analyzer {
recursively_mark_varying(&s);
}
}
OSL_DEV_ONLY(std::cout << "make_closures_varying end" << std::endl);
}

OSL_NOINLINE void push_varying_of_upstream_connections()
Expand Down
27 changes: 22 additions & 5 deletions src/liboslexec/batched_llvm_gen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2298,7 +2298,8 @@ LLVMGEN(llvm_gen_compref)

bool op_is_uniform = Result.is_uniform();

llvm::Value* c = rop.llvm_load_value(Index);
llvm::Value* c = rop.llvm_load_value(Index, /*deriv=*/0, /*component=*/0,
TypeDesc::UNKNOWN, Index.is_uniform());

if (Index.is_uniform()) {
if (rop.inst()->master()->range_checking()) {
Expand Down Expand Up @@ -6791,13 +6792,18 @@ LLVMGEN(llvm_gen_getmessage)
rop.ll.call_function(rop.build_name(FuncSpec("getmessage").mask()),
args);
} else {
llvm::Value* nameVal = rop.llvm_load_value(Name);
llvm::Value* nameVal
= rop.llvm_load_value(Name, /*deriv=*/0, /*component=*/0,
TypeDesc::UNKNOWN, nameVal_is_uniform);
if (nameVal_is_uniform) {
args[nameArgumentIndex] = nameVal;
}

llvm::Value* sourceVal = has_source ? rop.llvm_load_value(Source)
: rop.ll.constant(ustring());
llvm::Value* sourceVal
= has_source
? rop.llvm_load_value(Source, /*deriv=*/0, /*component=*/0,
TypeDesc::UNKNOWN, sourceVal_is_uniform)
: rop.ll.constant(ustring());
if (sourceVal_is_uniform) {
args[sourceArgumentIndex] = sourceVal;
}
Expand Down Expand Up @@ -7035,6 +7041,16 @@ LLVMGEN(llvm_gen_spline)
&& (!has_knot_count
|| (has_knot_count && Knot_count.typespec().is_int())));

if (has_knot_count && !Knot_count.is_uniform()) {
// TODO: varying knot count support could be added below as we already
// have a loop to handle varying spline basis. Just need to add tests
// to exercise and verify functions properly, until then...
rop.shadingcontext()->errorfmt(
"spline nknots parameter is varying, batched code gen requires a constant or uniform nknots, called from ({}:{})",
op.sourcefile(), op.sourceline());
return false;
}

bool op_is_uniform = Spline.is_uniform() && Value.is_uniform()
&& Knots.is_uniform();

Expand Down Expand Up @@ -7094,7 +7110,8 @@ LLVMGEN(llvm_gen_spline)
args.push_back(rop.llvm_void_ptr(Value)); // make things easy
args.push_back(rop.llvm_void_ptr(Knots));
if (has_knot_count)
args.push_back(rop.llvm_load_value(Knot_count));
args.push_back(rop.llvm_load_value(
Knot_count)); // TODO: add support for varying Knot_count
else
args.push_back(rop.ll.constant((int)Knots.typespec().arraylength()));
args.push_back(rop.ll.constant((int)Knots.typespec().arraylength()));
Expand Down
Empty file.
17 changes: 17 additions & 0 deletions testsuite/nestedloop-reg/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/usr/bin/env python

# Copyright Contributors to the Open Shading Language project.
# SPDX-License-Identifier: BSD-3-Clause
# https://github.com/AcademySoftwareFoundation/OpenShadingLanguage


###############################
#
###############################
command += testshade("-t 1 -g 32 32 -od uint8 test -o cout out.tif ")
outputs.append ("out.tif")

# expect a few LSB failures
failthresh = 0.008
failpercent = 3

39 changes: 39 additions & 0 deletions testsuite/nestedloop-reg/test.osl
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Copyright Contributors to the Open Shading Language project.
// SPDX-License-Identifier: BSD-3-Clause
// https://github.com/AcademySoftwareFoundation/OpenShadingLanguage

float limit_early_return(float channel) {
if (channel > 1.0) {
//exit();
return 1.0;
}
return channel;
}

float limit_single_return(float channel) {
float result = channel;
if (channel > 1.0) {
result = 1.0;
}
return result;
}

color soft_clip(color in_color)
{
color result;
for (int i=0; i < 3; ++i)
{
#if 1
result[i] = limit_early_return(in_color[i]);
#else
result[i] = limit_single_return(in_color[i]);
#endif
}
return result;
}

shader test (output color cout = 0)
{
color pixel = 2*P;
cout = soft_clip(pixel);
}

0 comments on commit baa9a09

Please sign in to comment.