Skip to content

Commit

Permalink
Removes mutex lock from most VPM access, small improvements and fixes
Browse files Browse the repository at this point in the history
* dumps layout of used VPM per kernel
* rewrites Emulator to handle VPM configuration per QPU
* fixes bug in eliminaion of bit operations
* fixes bug mapping IR operations to machine code
* fixed bug mapping volatile parameters to read-only parameters
* Emulator now tracks TMU read per TMU

See #113
  • Loading branch information
doe300 committed Nov 2, 2018
1 parent 7589854 commit 47cdfa0
Show file tree
Hide file tree
Showing 9 changed files with 295 additions and 138 deletions.
2 changes: 1 addition & 1 deletion src/Locals.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ namespace vc4c
* Parameter points to volatile memory, accesses to this parameter cannot be reordered/eliminated/duplicated or
* combined. Only valid for pointers.
*/
VOLATILE = 0x30
VOLATILE = 0x40
};

/*
Expand Down
15 changes: 8 additions & 7 deletions src/analysis/DependencyGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,12 +303,13 @@ static void createMutexDependencies(DependencyGraph& graph, DependencyNode& node
const intermediate::IntermediateInstruction* lastSemaphoreAccess,
const intermediate::IntermediateInstruction* lastMemFence)
{
if((node.key->hasValueType(ValueType::REGISTER) && node.key->getOutput()->reg().isVertexPipelineMemory()) ||
std::any_of(node.key->getArguments().begin(), node.key->getArguments().end(),
[](const Value& arg) -> bool { return arg.hasRegister() && arg.reg().isVertexPipelineMemory(); }) ||
node.key->writesRegister(REG_MUTEX))
if(((node.key->hasValueType(ValueType::REGISTER) && node.key->getOutput()->reg().isVertexPipelineMemory()) ||
std::any_of(node.key->getArguments().begin(), node.key->getArguments().end(),
[](const Value& arg) -> bool { return arg.hasRegister() && arg.reg().isVertexPipelineMemory(); }) ||
node.key->writesRegister(REG_MUTEX)) &&
lastMutexLock != nullptr)
{
// any VPM operation or mutex unlock must be ordered after the corresponding mutex lock
// any VPM operation or mutex unlock must be ordered after a previous mutex lock, if any
auto& otherNode = graph.assertNode(lastMutexLock);
addDependency(otherNode.getOrCreateEdge(&node).data, DependencyType::MUTEX_LOCK);
}
Expand Down Expand Up @@ -497,13 +498,13 @@ static void createVPMIODependencies(DependencyGraph& graph, DependencyNode& node
(node.key->writesRegister(REG_VPM_IO) || node.key->writesRegister(REG_VPM_DMA_STORE_ADDR) ||
node.key->writesRegister(REG_MUTEX)))
{
// any other VPM write, VPM write address setup or unlocking mutex must be executed aftre the VPM write
// any other VPM write, VPM write address setup or unlocking mutex must be executed after the VPM write
auto& otherNode = graph.assertNode(lastVPMWrite);
addDependency(otherNode.getOrCreateEdge(&node).data, DependencyType::PERIPHERY_ORDER);
}
if(lastVPMRead != nullptr && (node.key->readsRegister(REG_VPM_IO) || node.key->writesRegister(REG_MUTEX)))
{
// any other VPM read or unlocking mutex must be executed aftre the VPM read
// any other VPM read or unlocking mutex must be executed after the VPM read
auto& otherNode = graph.assertNode(lastVPMRead);
addDependency(otherNode.getOrCreateEdge(&node).data, DependencyType::PERIPHERY_ORDER);
}
Expand Down
2 changes: 2 additions & 0 deletions src/intermediate/Operations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,8 @@ qpu_asm::Instruction* Operation::convertToAsm(const FastMap<const Local*, Regist
// need to set the register-file A here too if possible (which it is for registers located on both
// physical files)
input1.first.file = RegisterFile::PHYSICAL_A;
else if(input0.first.file == RegisterFile::PHYSICAL_B)
input1.first.file = RegisterFile::PHYSICAL_A;
else
input1.first.file = RegisterFile::PHYSICAL_B;
}
Expand Down
13 changes: 8 additions & 5 deletions src/optimization/Eliminator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -648,12 +648,15 @@ bool optimizations::eliminateRedundantBitOp(const Module& module, Method& method

const auto& arg0 = op->assertArgument(0);
const auto& arg1 = op->assertArgument(1);
auto out = op->getOutput().value().local();
if(op->getOutput() && op->getOutput()->hasLocal())
{
auto out = op->getOutput().value().local();

if(arg0.hasLocal())
foundAnd(out, arg0.local(), it);
if(arg1.hasLocal())
foundAnd(out, arg1.local(), it);
if(arg0.hasLocal())
foundAnd(out, arg0.local(), it);
if(arg1.hasLocal())
foundAnd(out, arg1.local(), it);
}
};

if(op && op->op == OP_OR && !op->hasUnpackMode() && !op->hasPackMode())
Expand Down
2 changes: 0 additions & 2 deletions src/periphery/SFU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ using namespace vc4c::operators;

InstructionWalker periphery::insertSFUCall(const Register sfuReg, InstructionWalker it, const Value& arg)
{
// TODO need to synchronize SFU ?? (per slice!)
// Also need to include the reading of r4. And if this is enclosed in mutex, the NOPs are no longer replaced?
// 1. move argument to SFU register
assign(it, Value(sfuReg, TYPE_FLOAT)) = arg;
// 2. wait 2 instructions / don't touch r4
Expand Down
Loading

0 comments on commit 47cdfa0

Please sign in to comment.