Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cpu-o3: Transform the lsqunit #214

Open
wants to merge 10 commits into
base: xs-dev
Choose a base branch
from
6 changes: 5 additions & 1 deletion configs/example/xiangshan.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,8 +337,11 @@ def setKmhV3IdealParams(args, system):
cpu.mmu.itb.size = 96

cpu.BankConflictCheck = False # real bank conflict 0.2 score
cpu.EnableLdMissReplay = False
cpu.EnablePipeNukeCheck = False
cpu.StoreWbStage = 2 # store writeback at s2

cpu.scheduler = IdealScheduler()
cpu.scheduler = IdealScheduler()
# use centralized load/store issue queue, for hmmer

# ideal decoupled frontend
Expand All @@ -362,6 +365,7 @@ def setKmhV3IdealParams(args, system):
if args.caches:
cpu.icache.size = '128kB'
cpu.dcache.size = '128kB'
cpu.dcache.hint_wakeup_ahead_cycles = 0;
cpu.icache.enable_wayprediction = False
cpu.dcache.enable_wayprediction = False
cpu.dcache.tag_load_read_ports = 100 # 3->100
Expand Down
2 changes: 1 addition & 1 deletion src/arch/riscv/isa/decoder.isa
Original file line number Diff line number Diff line change
Expand Up @@ -613,7 +613,7 @@ decode QUADRANT default Unknown::unknown() {
0x03: decode FUNCT3 {
format FenceOp {
0x0: fence({{
}}, uint64_t, IsReadBarrier, IsWriteBarrier, No_OpClass);
}}, uint64_t, IsReadBarrier, IsWriteBarrier, MemReadOp);
0x1: fence_i({{
}}, uint64_t, IsNonSpeculative, IsSerializeAfter, No_OpClass);
}
Expand Down
34 changes: 32 additions & 2 deletions src/arch/riscv/isa/formats/amo.isa
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,36 @@ def template LRSCMacroConstructor {{
}
}};

// Strictly order-preserving LRSC
def template LRSCStrictMacroConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst machInst):
%(base_class)s("%(mnemonic)s", machInst, %(op_class)s)
{
%(constructor)s;

StaticInstPtr rel_fence;
StaticInstPtr lrsc;
StaticInstPtr acq_fence;

rel_fence = new MemFenceMicro(machInst, No_OpClass);
rel_fence->setFlag(IsFirstMicroop);
rel_fence->setFlag(IsReadBarrier);
rel_fence->setFlag(IsWriteBarrier);
rel_fence->setFlag(IsDelayedCommit);

// set up atomic rmw op
lrsc = new %(class_name)sMicro(machInst, this);
lrsc->setFlag(IsDelayedCommit);

acq_fence = new MemFenceMicro(machInst, No_OpClass);
acq_fence->setFlag(IsLastMicroop);
acq_fence->setFlag(IsReadBarrier);
acq_fence->setFlag(IsWriteBarrier);

microops = {rel_fence, lrsc, acq_fence};
}
}};

def template LRSCMicroConstructor {{
%(class_name)s::%(class_name)sMicro::%(class_name)sMicro(
ExtMachInst machInst, %(class_name)s *_p)
Expand Down Expand Up @@ -435,7 +465,7 @@ def format LoadReserved(memacc_code, postacc_code={{ }}, ea_code={{EA = Rs1;}},
macro_iop = InstObjParams(name, Name, 'LoadReserved', macro_ea_code,
macro_inst_flags)
header_output = LRSCDeclare.subst(macro_iop)
decoder_output = LRSCMacroConstructor.subst(macro_iop)
decoder_output = LRSCStrictMacroConstructor.subst(macro_iop)
decode_block = BasicDecode.subst(macro_iop)

exec_output = ''
Expand Down Expand Up @@ -463,7 +493,7 @@ def format StoreCond(memacc_code, postacc_code={{ }}, ea_code={{EA = Rs1;}},
macro_iop = InstObjParams(name, Name, 'StoreCond', macro_ea_code,
macro_inst_flags)
header_output = LRSCDeclare.subst(macro_iop)
decoder_output = LRSCMacroConstructor.subst(macro_iop)
decoder_output = LRSCStrictMacroConstructor.subst(macro_iop)
decode_block = BasicDecode.subst(macro_iop)

exec_output = ''
Expand Down
4 changes: 4 additions & 0 deletions src/cpu/o3/BaseO3CPU.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ def support_take_over(cls):
SbufferEvictThreshold = Param.Unsigned(7, "store buffer eviction threshold")
storeBufferInactiveThreshold = Param.Unsigned(800, "store buffer writeback timeout threshold")

StoreWbStage = Param.Unsigned(4, "Which PipeLine Stage store instruction writeback, 4 means S4")

LSQDepCheckShift = Param.Unsigned(0,
"Number of places to shift addr before check")
LSQCheckLoads = Param.Bool(True,
Expand All @@ -188,6 +190,8 @@ def support_take_over(cls):
LFSTEntrySize = Param.Unsigned(4,"The number of store table inst in every entry of LFST can contain")
SSITSize = Param.Unsigned(8192, "Store set ID table size")
BankConflictCheck = Param.Bool(True, "open Bank conflict check")
EnableLdMissReplay = Param.Bool(True, "Replay Cache missed load instrution from ReplayQ if True")
EnablePipeNukeCheck = Param.Bool(True, "Replay load if Raw violation is detected in loadPipe if True")


numRobs = Param.Unsigned(1, "Number of Reorder Buffers");
Expand Down
13 changes: 13 additions & 0 deletions src/cpu/o3/dyn_inst.hh
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ class DynInst : public ExecContext, public RefCounted
NotAnInst,
TranslationStarted,
TranslationCompleted,
WaitingCacheRefill,
PossibleLoadViolation,
HitExternalSnoop,
EffAddrValid,
Expand Down Expand Up @@ -462,6 +463,14 @@ class DynInst : public ExecContext, public RefCounted
}
void translationCompleted(bool f) { instFlags[TranslationCompleted] = f; }

/** True if inst is waiting for Dcache refill. */
bool
waitingCacheRefill() const
{
return instFlags[WaitingCacheRefill];
}
void waitingCacheRefill(bool f) { instFlags[WaitingCacheRefill] = f; }

/** True if this address was found to match a previous load and they issued
* out of order. If that happend, then it's only a problem if an incoming
* snoop invalidate modifies the line, in which case we need to squash.
Expand Down Expand Up @@ -1395,6 +1404,10 @@ class DynInst : public ExecContext, public RefCounted
return squashVer.getVersion();
}

ssize_t getLqIdx()
{
return lqIdx;
}

Addr getPC()
{
Expand Down
211 changes: 95 additions & 116 deletions src/cpu/o3/iew.cc
Original file line number Diff line number Diff line change
Expand Up @@ -682,6 +682,12 @@ IEW::blockMemInst(const DynInstPtr& inst)
instQueue.blockMemInst(inst);
}

void
IEW::cacheMissLdReplay(const DynInstPtr& inst)
{
instQueue.cacheMissLdReplay(inst);
}

void
IEW::cacheUnblocked()
{
Expand Down Expand Up @@ -1326,6 +1332,84 @@ IEW::printAvailableInsts()
std::cout << "\n";
}

void
IEW::SquashCheckAfterExe(DynInstPtr inst)
{
ThreadID tid = inst->threadNumber;

if (!fetchRedirect[tid] ||
!execWB->squash[tid] ||
execWB->squashedSeqNum[tid] > inst->seqNum) {

// Prevent testing for misprediction on load instructions,
// that have not been executed.
bool loadNotExecuted = !inst->isExecuted() && inst->isLoad();

if (inst->mispredicted() && !loadNotExecuted) {
fetchRedirect[tid] = true;

DPRINTF(IEW, "[tid:%i] [sn:%llu] Execute: "
"Branch mispredict detected.\n",
tid, inst->seqNum);
DPRINTF(IEW, "[tid:%i] [sn:%llu] "
"Predicted target was PC: %s\n",
tid, inst->seqNum, inst->readPredTarg());
DPRINTF(IEW, "[tid:%i] [sn:%llu] Execute: "
"Redirecting fetch to PC: %s\n",
tid, inst->seqNum, inst->pcState());
// If incorrect, then signal the ROB that it must be squashed.
squashDueToBranch(inst, tid);

ppMispredict->notify(inst);

if (inst->readPredTaken()) {
iewStats.predictedTakenIncorrect++;
} else {
iewStats.predictedNotTakenIncorrect++;
}
} else if (ldstQueue.violation(tid)) {
assert(inst->isMemRef());
// If there was an ordering violation, then get the
// DynInst that caused the violation. Note that this
// clears the violation signal.
DynInstPtr violator;
violator = ldstQueue.getMemDepViolator(tid);

DPRINTF(IEW, "LDSTQ detected a violation. Violator PC: %s "
"[sn:%lli], inst PC: %s [sn:%lli]. Addr is: %#x.\n",
violator->pcState(), violator->seqNum,
inst->pcState(), inst->seqNum, inst->physEffAddr);

fetchRedirect[tid] = true;

// Tell the instruction queue that a violation has occured.
instQueue.violation(inst, violator);

// Squash.
squashDueToMemOrder(violator, tid);

++iewStats.memOrderViolationEvents;
}
} else {
// Reset any state associated with redirects that will not
// be used.
if (ldstQueue.violation(tid)) {
assert(inst->isMemRef());

DynInstPtr violator = ldstQueue.getMemDepViolator(tid);

DPRINTF(IEW, "LDSTQ detected a violation. Violator PC: "
"%s, inst PC: %s. Addr is: %#x.\n",
violator->pcState(), inst->pcState(),
inst->physEffAddr);
DPRINTF(IEW, "Violation will not be handled because "
"already squashing\n");

++iewStats.memOrderViolationEvents;
}
}
}

void
IEW::executeInsts()
{
Expand Down Expand Up @@ -1393,61 +1477,23 @@ IEW::executeInsts()
// Tell the LDSTQ to execute this instruction (if it is a load).
if (inst->isAtomic()) {
// AMOs are treated like store requests
fault = ldstQueue.executeStore(inst);
fault = ldstQueue.executeAmo(inst);

if (inst->isTranslationDelayed() &&
fault == NoFault) {
// A hw page table walk is currently going on; the
// instruction must be deferred.
DPRINTF(IEW, "Execute: Delayed translation, deferring "
"store.\n");
instQueue.deferMemInst(inst);
deferMemInst(inst);
continue;
}
} else if (inst->isLoad()) {
// Loads will mark themselves as executed, and their writeback
// event adds the instruction to the queue to commit
fault = ldstQueue.executeLoad(inst);

if (inst->isTranslationDelayed() &&
fault == NoFault) {
// A hw page table walk is currently going on; the
// instruction must be deferred.
DPRINTF(IEW, "Execute: Delayed translation, deferring "
"load.\n");
instQueue.deferMemInst(inst);
continue;
}

if (inst->isDataPrefetch() || inst->isInstPrefetch()) {
inst->fault = NoFault;
}
// add this load inst to loadpipe S0.
ldstQueue.issueToLoadPipe(inst);
} else if (inst->isStore()) {
fault = ldstQueue.executeStore(inst);

if (inst->isTranslationDelayed() &&
fault == NoFault) {
// A hw page table walk is currently going on; the
// instruction must be deferred.
DPRINTF(IEW, "Execute: Delayed translation, deferring "
"store.\n");
instQueue.deferMemInst(inst);
continue;
}

// If the store had a fault then it may not have a mem req
if (fault != NoFault || !inst->readPredicate() ||
!inst->isStoreConditional()) {
// If the instruction faulted, then we need to send it
// along to commit without the instruction completing.
// Send this instruction to commit, also make sure iew
// stage realizes there is activity.
inst->setExecuted();
instToCommit(inst);
activityThisCycle();
}

instQueue.notifyExecuted(inst);
// add this store inst to storepipe S0.
ldstQueue.issueToStorePipe(inst);

// Store conditionals will mark themselves as
// executed, and their writeback event will add the
Expand Down Expand Up @@ -1486,81 +1532,14 @@ IEW::executeInsts()
// This probably needs to prioritize the redirects if a different
// scheduler is used. Currently the scheduler schedules the oldest
// instruction first, so the branch resolution order will be correct.
ThreadID tid = inst->threadNumber;

if (!fetchRedirect[tid] ||
!execWB->squash[tid] ||
execWB->squashedSeqNum[tid] > inst->seqNum) {

// Prevent testing for misprediction on load instructions,
// that have not been executed.
bool loadNotExecuted = !inst->isExecuted() && inst->isLoad();

if (inst->mispredicted() && !loadNotExecuted) {
fetchRedirect[tid] = true;

DPRINTF(IEW, "[tid:%i] [sn:%llu] Execute: "
"Branch mispredict detected.\n",
tid, inst->seqNum);
DPRINTF(IEW, "[tid:%i] [sn:%llu] "
"Predicted target was PC: %s\n",
tid, inst->seqNum, inst->readPredTarg());
DPRINTF(IEW, "[tid:%i] [sn:%llu] Execute: "
"Redirecting fetch to PC: %s\n",
tid, inst->seqNum, inst->pcState());
// If incorrect, then signal the ROB that it must be squashed.
squashDueToBranch(inst, tid);

ppMispredict->notify(inst);

if (inst->readPredTaken()) {
iewStats.predictedTakenIncorrect++;
} else {
iewStats.predictedNotTakenIncorrect++;
}
} else if (ldstQueue.violation(tid)) {
assert(inst->isMemRef());
// If there was an ordering violation, then get the
// DynInst that caused the violation. Note that this
// clears the violation signal.
DynInstPtr violator;
violator = ldstQueue.getMemDepViolator(tid);

DPRINTF(IEW, "LDSTQ detected a violation. Violator PC: %s "
"[sn:%lli], inst PC: %s [sn:%lli]. Addr is: %#x.\n",
violator->pcState(), violator->seqNum,
inst->pcState(), inst->seqNum, inst->physEffAddr);

fetchRedirect[tid] = true;

// Tell the instruction queue that a violation has occured.
instQueue.violation(inst, violator);

// Squash.
squashDueToMemOrder(violator, tid);

++iewStats.memOrderViolationEvents;
}
} else {
// Reset any state associated with redirects that will not
// be used.
if (ldstQueue.violation(tid)) {
assert(inst->isMemRef());

DynInstPtr violator = ldstQueue.getMemDepViolator(tid);

DPRINTF(IEW, "LDSTQ detected a violation. Violator PC: "
"%s, inst PC: %s. Addr is: %#x.\n",
violator->pcState(), inst->pcState(),
inst->physEffAddr);
DPRINTF(IEW, "Violation will not be handled because "
"already squashing\n");

++iewStats.memOrderViolationEvents;
}
if (!(inst->isLoad() || inst->isStore())) {
// Load/Store will call this in `lsq_unit.cc` after execution
SquashCheckAfterExe(inst);
}
}

ldstQueue.executePipeSx();

// Update and record activity if we processed any instructions.
if (inst_num) {
if (exeStatus == Idle) {
Expand Down
Loading
Loading