diff --git a/src/cpu/o3/iew.cc b/src/cpu/o3/iew.cc index 5334914d15..1de4b7293b 100644 --- a/src/cpu/o3/iew.cc +++ b/src/cpu/o3/iew.cc @@ -1468,7 +1468,7 @@ IEW::executeInsts() // Tell the LDSTQ to execute this instruction (if it is a load). if (inst->isAtomic()) { // AMOs are treated like store requests - fault = ldstQueue.executeStore(inst); + fault = ldstQueue.executeAmo(inst); if (inst->isTranslationDelayed() && fault == NoFault) { diff --git a/src/cpu/o3/lsq.cc b/src/cpu/o3/lsq.cc index ce644ec7e7..bd79d5e01f 100644 --- a/src/cpu/o3/lsq.cc +++ b/src/cpu/o3/lsq.cc @@ -43,6 +43,7 @@ #include #include +#include #include #include @@ -51,6 +52,7 @@ #include "base/trace.hh" #include "cpu/o3/cpu.hh" #include "cpu/o3/dyn_inst.hh" +#include "cpu/o3/dyn_inst_ptr.hh" #include "cpu/o3/iew.hh" #include "cpu/o3/limits.hh" #include "debug/Drain.hh" @@ -319,19 +321,11 @@ LSQ::executePipeSx() } Fault -LSQ::executeLoad(const DynInstPtr &inst) +LSQ::executeAmo(const DynInstPtr &inst) { ThreadID tid = inst->threadNumber; - return thread[tid].executeLoad(inst); -} - -Fault -LSQ::executeStore(const DynInstPtr &inst) -{ - ThreadID tid = inst->threadNumber; - - return thread[tid].executeStore(inst); + return thread[tid].executeAmo(inst); } void @@ -561,8 +555,12 @@ LSQ::recvFunctionalCustomSignal(PacketPtr pkt, int sig) LSQRequest *request = dynamic_cast(pkt->getPrimarySenderState()); panic_if(!request, "Got packet back with unknown sender state\n"); if (sig == DcacheRespType::Miss) { - // notify cache miss - iewStage->loadCancel(request->instruction()); + if (request->instruction()->isLoad()) { + // notify cache miss + iewStage->loadCancel(request->instruction()); + // set cache miss flag in pipeline + thread[request->_port.lsqID].setFlagInPipeLine(request->instruction(), LdStFlags::CacheMiss); + } } else { panic("unsupported sig %d in recvFunctionalCustomSignal\n", sig); } @@ -954,6 +952,14 @@ LSQ::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data, request->initiateTranslation(); } + if (!isLoad && !isAtomic) { + // store inst temporally saves its data in memData + inst->memData = new uint8_t[size]; + memcpy(inst->memData, data, size); + } + + inst->effSize = size; + if (!isLoad && !inst->isVector() && size > 1 && addr % size != 0) { warn( "Store misaligned: size: %u, Addr: %#lx, code: %d\n", size, addr, RiscvISA::ExceptionCode::STORE_ADDR_MISALIGNED); @@ -963,7 +969,7 @@ LSQ::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data, } /* This is the place were instructions get the effAddr. */ - if (request->isTranslationComplete()) { + if (inst->isAtomic() && request->isTranslationComplete()) { if (request->isMemAccessRequired()) { inst->effAddr = request->getVaddr(); inst->effSize = size; diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh index 19d9558340..122c994b89 100644 --- a/src/cpu/o3/lsq.hh +++ b/src/cpu/o3/lsq.hh @@ -76,6 +76,18 @@ class IEW; class LSQUnit; class StoreBufferEntry; +/** The Flag of Load/Store inst in Pipeline. */ +enum LdStFlags +{ + Valid = 0, + Replayed, + CacheMiss, + Squashed, + Num_Flags +}; + +constexpr uint64_t LdStFlagNum = LdStFlags::Num_Flags; + class LSQ { public: @@ -741,11 +753,8 @@ class LSQ /** Inserts a store into the LSQ. */ void insertStore(const DynInstPtr &store_inst); - /** Executes a load. */ - Fault executeLoad(const DynInstPtr &inst); - - /** Executes a store. */ - Fault executeStore(const DynInstPtr &inst); + /** Executes an amo inst. */ + Fault executeAmo(const DynInstPtr &inst); /** Iq issues a load to load pipeline. */ void issueToLoadPipe(const DynInstPtr &inst); diff --git a/src/cpu/o3/lsq_unit.cc b/src/cpu/o3/lsq_unit.cc index c5f318b27b..2588b32cd9 100644 --- a/src/cpu/o3/lsq_unit.cc +++ b/src/cpu/o3/lsq_unit.cc @@ -908,12 +908,48 @@ LSQUnit::checkViolations(typename LoadQueue::iterator& loadIt, return NoFault; } +void +LSQUnit::setFlagInPipeLine(DynInstPtr inst, LdStFlags f) +{ + bool found = false; + if (inst->isLoad()) { + for (int i = (loadPipeSx.size() - 1); i >= 0; i--) { + for (int j = 0; j < loadPipeSx[i]->size; j++) { + if (inst == loadPipeSx[i]->insts[j]) { + found = true; + (loadPipeSx[i]->flags[j])[f] = true; + break; + } + } + } + } else { + for (int i = (storePipeSx.size() - 1); i >= 0; i--) { + for (int j = 0; j < storePipeSx[i]->size; j++) { + if (inst == storePipeSx[i]->insts[j]) { + found = true; + (storePipeSx[i]->flags[j])[f] = true; + break; + } + } + } + } + + if (!found) { + panic("[sn:%ld] Can not found corresponding inst in PipeLine, isLoad: %d\n", inst->seqNum, inst->isLoad()); + } +} + void LSQUnit::issueToLoadPipe(const DynInstPtr &inst) { // push to loadPipeS0 assert(loadPipeSx[0]->size < MaxWidth); - loadPipeSx[0]->insts[loadPipeSx[0]->size++] = inst; + int idx = loadPipeSx[0]->size; + + loadPipeSx[0]->insts[idx] = inst; + loadPipeSx[0]->flags[idx][LdStFlags::Valid] = true; + loadPipeSx[0]->size++; + DPRINTF(LSQUnit, "issueToLoadPipe: [sn:%lli]\n", inst->seqNum); dumpLoadPipe(); } @@ -923,143 +959,78 @@ LSQUnit::issueToStorePipe(const DynInstPtr &inst) { // push to storePipeS0 assert(storePipeSx[0]->size < MaxWidth); - storePipeSx[0]->insts[storePipeSx[0]->size++] = inst; + int idx = storePipeSx[0]->size; + + storePipeSx[0]->insts[idx] = inst; + storePipeSx[0]->flags[idx][LdStFlags::Valid] = true; + storePipeSx[0]->size++; + DPRINTF(LSQUnit, "issueToStorePipe: [sn:%lli]\n", inst->seqNum); dumpStorePipe(); } -void -LSQUnit::executeLoadPipeSx() -{ - // TODO: execute operations in each load pipelines - Fault fault = NoFault; - for (int i = 0; i < loadPipeSx.size(); i++) { - auto& stage = loadPipeSx[i]; - switch (i) { - case 0: - break; - case 1: - for (int j = 0; j < stage->size; j++) { - auto& inst = stage->insts[j]; - if (!inst->isSquashed()) { - // Loads will mark themselves as executed, and their writeback - // event adds the instruction to the queue to commit - fault = executeLoad(inst); - if (inst->isTranslationDelayed() && - fault == NoFault) { - // A hw page table walk is currently going on; the - // instruction must be deferred. - DPRINTF(LSQUnit, "Execute: Delayed translation, deferring " - "load.\n"); - iewStage->deferMemInst(inst); - continue; - } - if (inst->isDataPrefetch() || inst->isInstPrefetch()) { - inst->fault = NoFault; - } - iewStage->SquashCheckAfterExe(inst); - } else { - DPRINTF(LSQUnit, "Execute: Instruction was squashed. PC: %s, [tid:%i]" - " [sn:%llu]\n", inst->pcState(), inst->threadNumber, - inst->seqNum); - inst->setExecuted(); - inst->setCanCommit(); - } - } - break; - case 2: - break; - case 3: - break; - default: - panic("unsupported loadpipe length"); - } - } -} - -void -LSQUnit::executeStorePipeSx() +Fault +LSQUnit::loadPipeS0(const DynInstPtr &inst, const std::bitset &flag) { - // TODO: execute operations in each store pipelines - Fault fault = NoFault; - for (int i = 0; i < storePipeSx.size(); i++) { - auto& stage = storePipeSx[i]; - switch (i) { - case 0: - break; - case 1: - for (int j = 0; j < stage->size; j++) { - auto& inst = stage->insts[j]; - if (!inst->isSquashed()) { - fault = executeStore(inst); - if (inst->isTranslationDelayed() && - fault == NoFault) { - // A hw page table walk is currently going on; the - // instruction must be deferred. - DPRINTF(LSQUnit, "Execute: Delayed translation, deferring " - "store.\n"); - iewStage->deferMemInst(inst); - continue; - } + DPRINTF(LSQUnit, "LoadPipeS0: Executing load PC %s, [sn:%lli] " + "flags: valid[%d], replayed[%d], cachemiss[%d], squashed[%d]\n", + inst->pcState(), inst->seqNum, + flag[LdStFlags::Valid], flag[LdStFlags::Replayed], + flag[LdStFlags::CacheMiss], flag[LdStFlags::Squashed]); + assert(!inst->isSquashed()); - // If the store had a fault then it may not have a mem req - if (fault != NoFault || !inst->readPredicate() || - !inst->isStoreConditional()) { - // If the instruction faulted, then we need to send it - // along to commit without the instruction completing. - // Send this instruction to commit, also make sure iew - // stage realizes there is activity. - inst->setExecuted(); - iewStage->instToCommit(inst); - iewStage->activityThisCycle(); - } - iewStage->notifyExecuted(inst); - iewStage->SquashCheckAfterExe(inst); - } else { - DPRINTF(LSQUnit, "Execute: Instruction was squashed. PC: %s, [tid:%i]" - " [sn:%llu]\n", inst->pcState(), inst->threadNumber, - inst->seqNum); - inst->setExecuted(); - inst->setCanCommit(); - } - } - break; - case 2: - break; - case 3: - break; - case 4: - break; - default: - panic("unsupported storepipe length"); - } - } -} + Fault load_fault = NoFault; + // Now initiateAcc only does TLB access + load_fault = inst->initiateAcc(); -void -LSQUnit::executePipeSx() -{ - executeLoadPipeSx(); - executeStorePipeSx(); + return load_fault; } Fault -LSQUnit::executeLoad(const DynInstPtr &inst) +LSQUnit::loadPipeS1(const DynInstPtr &inst, const std::bitset &flag) { - // Execute a specific load. - Fault load_fault = NoFault; - - DPRINTF(LSQUnit, "Executing load PC %s, [sn:%lli]\n", - inst->pcState(), inst->seqNum); - + DPRINTF(LSQUnit, "LoadPipeS1: Executing load PC %s, [sn:%lli] " + "flags: valid[%d], replayed[%d], cachemiss[%d], squashed[%d]\n", + inst->pcState(), inst->seqNum, + flag[LdStFlags::Valid], flag[LdStFlags::Replayed], + flag[LdStFlags::CacheMiss], flag[LdStFlags::Squashed]); assert(!inst->isSquashed()); - load_fault = inst->initiateAcc(); + Fault load_fault = inst->getFault(); + LSQRequest* request = inst->savedRequest; + + // Cache access + if (request && request->isTranslationComplete()) { + if (request->isMemAccessRequired()) { + inst->effAddr = request->getVaddr(); + inst->effAddrValid(true); + + // if (cpu->checker) { + // inst->reqToVerify = std::make_shared(*request->req()); + // } + Fault fault; + fault = read(request, inst->lqIdx); + // inst->getFault() may have the first-fault of a + // multi-access split request at this point. + // Overwrite that only if we got another type of fault + // (e.g. re-exec). + if (fault != NoFault) { + inst->getFault() = fault; + load_fault = fault; + } + } else { + inst->setMemAccPredicate(false); + // Commit will have to clean up whatever happened. Set this + // instruction as executed. + inst->setExecuted(); + } + } if (!inst->translationCompleted()) { + // TLB miss iewStage->loadCancel(inst); } else { - DPRINTF(LSQUnit, "load tlb hit [sn:%lli]\n", + DPRINTF(LSQUnit, "LoadPipeS1: load tlb hit [sn:%lli]\n", inst->seqNum); } @@ -1096,7 +1067,7 @@ LSQUnit::executeLoad(const DynInstPtr &inst) // commit. if (!inst->readPredicate()) inst->forwardOldRegs(); - DPRINTF(LSQUnit, "Load [sn:%lli] not executed from %s\n", + DPRINTF(LSQUnit, "LoadPipeS1: Load [sn:%lli] not executed from %s\n", inst->seqNum, (load_fault != NoFault ? "fault" : "predication")); if (!(inst->hasRequest() && inst->strictlyOrdered()) || @@ -1111,87 +1082,175 @@ LSQUnit::executeLoad(const DynInstPtr &inst) ++it; if (checkLoads) - return checkViolations(it, inst); + load_fault = checkViolations(it, inst); } } return load_fault; } -bool -LSQUnit::triggerStorePFTrain(int sq_idx) +Fault +LSQUnit::loadPipeS2(const DynInstPtr &inst, const std::bitset &flag) +{ + Fault fault = inst->getFault(); + DPRINTF(LSQUnit, "LoadPipeS2: Executing load PC %s, [sn:%lli] " + "flags: valid[%d], replayed[%d], cachemiss[%d], squashed[%d]\n", + inst->pcState(), inst->seqNum, + flag[LdStFlags::Valid], flag[LdStFlags::Replayed], + flag[LdStFlags::CacheMiss], flag[LdStFlags::Squashed]); + assert(!inst->isSquashed()); + return fault; +} + +Fault +LSQUnit::loadPipeS3(const DynInstPtr &inst, const std::bitset &flag) +{ + Fault fault = inst->getFault(); + DPRINTF(LSQUnit, "LoadPipeS3: Executing load PC %s, [sn:%lli] " + "flags: valid[%d], replayed[%d], cachemiss[%d], squashed[%d]\n", + inst->pcState(), inst->seqNum, + flag[LdStFlags::Valid], flag[LdStFlags::Replayed], + flag[LdStFlags::CacheMiss], flag[LdStFlags::Squashed]); + assert(!inst->isSquashed()); + return fault; +} + +void +LSQUnit::executeLoadPipeSx() { - auto inst = storeQueue[sq_idx].instruction(); - assert(inst->translationCompleted()); - Addr vaddr = inst->effAddr; - Addr pc = inst->pcState().instAddr(); - // create request - RequestPtr req = - std::make_shared(vaddr, 1, Request::STORE_PF_TRAIN, inst->requestorId(), pc, inst->contextId()); - req->setPaddr(inst->physEffAddr); + // TODO: execute operations in each load pipelines + Fault fault = NoFault; + for (int i = 0; i < loadPipeSx.size(); i++) { + auto& stage = loadPipeSx[i]; + for (int j = 0; j < stage->size; j++) { + auto& inst = stage->insts[j]; + auto& flag = stage->flags[j]; + if (!inst->isSquashed()) { + switch (i) { + case 0: + fault = loadPipeS0(inst, flag); + break; + case 1: + // Loads will mark themselves as executed, and their writeback + // event adds the instruction to the queue to commit + fault = loadPipeS1(inst, flag); - // create packet - PacketPtr pkt = Packet::createPFtrain(req); + if (inst->isTranslationDelayed() && fault == NoFault) { + // A hw page table walk is currently going on; the + // instruction must be deferred. + DPRINTF(LSQUnit, "Execute: Delayed translation, deferring " + "load.\n"); + iewStage->deferMemInst(inst); + flag[LdStFlags::Replayed] = true; + } - // send packet - bool success = dcachePort->sendTimingReq(pkt); - assert(success); // must be true + if (inst->isDataPrefetch() || inst->isInstPrefetch()) { + inst->fault = NoFault; + } - return true; + iewStage->SquashCheckAfterExe(inst); + break; + case 2: + fault = loadPipeS2(inst, flag); + break; + case 3: + fault = loadPipeS3(inst, flag); + break; + default: + panic("unsupported loadpipe length"); + } + } else { + DPRINTF(LSQUnit, "Execute: Instruction was squashed. PC: %s, [tid:%i]" + " [sn:%llu]\n", inst->pcState(), inst->threadNumber, + inst->seqNum); + inst->setExecuted(); + inst->setCanCommit(); + flag[LdStFlags::Squashed] = true; + } + } + } } Fault -LSQUnit::executeStore(const DynInstPtr &store_inst) +LSQUnit::storePipeS0(const DynInstPtr &inst, const std::bitset &flag) { // Make sure that a store exists. assert(storeQueue.size() != 0); + assert(!inst->isSquashed()); + + DPRINTF(LSQUnit, "StorePipeS0: Executing store PC %s [sn:%lli] " + "flags: valid[%d], replayed[%d], squashed[%d]\n", + inst->pcState(), inst->seqNum, + flag[LdStFlags::Valid], flag[LdStFlags::Replayed], flag[LdStFlags::Squashed]); - ssize_t store_idx = store_inst->sqIdx; + // Now initiateAcc only does TLB access + Fault store_fault = inst->initiateAcc(); - DPRINTF(LSQUnit, "Executing store PC %s [sn:%lli]\n", - store_inst->pcState(), store_inst->seqNum); + return store_fault; +} - assert(!store_inst->isSquashed()); +Fault +LSQUnit::storePipeS1(const DynInstPtr &inst, const std::bitset &flag) +{ + // Make sure that a store exists. + assert(storeQueue.size() != 0); + + ssize_t store_idx = inst->sqIdx; + LSQRequest* request = inst->savedRequest; + + DPRINTF(LSQUnit, "StorePipeS1: Executing store PC %s [sn:%lli] " + "flags: valid[%d], replayed[%d], squashed[%d]\n", + inst->pcState(), inst->seqNum, + flag[LdStFlags::Valid], flag[LdStFlags::Replayed], flag[LdStFlags::Squashed]); // Check the recently completed loads to see if any match this store's // address. If so, then we have a memory ordering violation. - typename LoadQueue::iterator loadIt = store_inst->lqIt; + typename LoadQueue::iterator loadIt = inst->lqIt; - Fault store_fault = store_inst->initiateAcc(); + /* This is the place were instructions get the effAddr. */ + if (request && request->isTranslationComplete()) { + if (request->isMemAccessRequired() && (inst->getFault() == NoFault)) { + inst->effAddr = request->getVaddr(); + inst->effAddrValid(true); - if (store_inst->isTranslationDelayed() && + if (cpu->checker) { + inst->reqToVerify = std::make_shared(*request->req()); + } + Fault fault; + fault = write(request, inst->memData, inst->sqIdx); + // release temporal data + delete [] inst->memData; + inst->memData = nullptr; + + if (fault != NoFault) + inst->getFault() = fault; + } + } + + Fault store_fault = inst->getFault(); + + if (inst->isTranslationDelayed() && store_fault == NoFault) return store_fault; - if (!store_inst->readPredicate()) { - DPRINTF(LSQUnit, "Store [sn:%lli] not executed from predication\n", - store_inst->seqNum); - store_inst->forwardOldRegs(); + if (!inst->readPredicate()) { + DPRINTF(LSQUnit, "StorePipeS1: Store [sn:%lli] not executed from predication\n", + inst->seqNum); + inst->forwardOldRegs(); return store_fault; } if (storeQueue[store_idx].size() == 0) { - DPRINTF(LSQUnit,"Fault on Store PC %s, [sn:%lli], Size = 0\n", - store_inst->pcState(), store_inst->seqNum); - - if (store_inst->isAtomic()) { - // If the instruction faulted, then we need to send it along - // to commit without the instruction completing. - if (!(store_inst->hasRequest() && store_inst->strictlyOrdered()) || - store_inst->isAtCommit()) { - store_inst->setExecuted(); - } - iewStage->instToCommit(store_inst); - iewStage->activityThisCycle(); - } + DPRINTF(LSQUnit, "StorePipeS1: Fault on Store PC %s, [sn:%lli], Size = 0\n", + inst->pcState(), inst->seqNum); return store_fault; } assert(store_fault == NoFault); - if (store_inst->isStoreConditional() || store_inst->isAtomic()) { - // Store conditionals and Atomics need to set themselves as able to + if (inst->isStoreConditional()) { + // Store conditionals need to set themselves as able to // writeback if we haven't had a fault by here. storeQueue[store_idx].canWB() = true; @@ -1202,8 +1261,193 @@ LSQUnit::executeStore(const DynInstPtr &store_inst) } } - return checkViolations(loadIt, store_inst); + return checkViolations(loadIt, inst); +} + +Fault +LSQUnit::storePipeS2(const DynInstPtr &inst, const std::bitset &flag) +{ + Fault fault = inst->getFault(); + assert(!inst->isSquashed()); + + DPRINTF(LSQUnit, "StorePipeS2: Executing store PC %s [sn:%lli] " + "flags: valid[%d], replayed[%d], squashed[%d]\n", + inst->pcState(), inst->seqNum, + flag[LdStFlags::Valid], flag[LdStFlags::Replayed], flag[LdStFlags::Squashed]); + return fault; +} + +Fault +LSQUnit::storePipeS3(const DynInstPtr &inst, const std::bitset &flag) +{ + Fault fault = inst->getFault(); + assert(!inst->isSquashed()); + + DPRINTF(LSQUnit, "StorePipeS3: Executing store PC %s [sn:%lli] " + "flags: valid[%d], replayed[%d], squashed[%d]\n", + inst->pcState(), inst->seqNum, + flag[LdStFlags::Valid], flag[LdStFlags::Replayed], flag[LdStFlags::Squashed]); + return fault; +} + +Fault +LSQUnit::storePipeS4(const DynInstPtr &inst, const std::bitset &flag) +{ + Fault fault = inst->getFault(); + assert(!inst->isSquashed()); + + DPRINTF(LSQUnit, "StorePipeS4: Executing store PC %s [sn:%lli]\n", + inst->pcState(), inst->seqNum); + return fault; +} + +void +LSQUnit::executeStorePipeSx() +{ + // TODO: execute operations in each store pipelines + Fault fault = NoFault; + for (int i = 0; i < storePipeSx.size(); i++) { + auto& stage = storePipeSx[i]; + for (int j = 0; j < stage->size; j++) { + auto& inst = stage->insts[j]; + auto& flag = stage->flags[j]; + if (!inst->isSquashed()) { + switch (i) { + case 0: + fault = storePipeS0(inst, flag); + break; + case 1: + fault = storePipeS1(inst, flag); + if (inst->isTranslationDelayed() && fault == NoFault) { + // A hw page table walk is currently going on; the + // instruction must be deferred. + DPRINTF(LSQUnit, "Execute: Delayed translation, deferring " + "store.\n"); + iewStage->deferMemInst(inst); + flag[LdStFlags::Replayed] = true; + continue; + } + + iewStage->notifyExecuted(inst); + iewStage->SquashCheckAfterExe(inst); + break; + case 2: + fault = storePipeS2(inst, flag); + break; + case 3: + fault = storePipeS3(inst, flag); + // If the store had a fault then it may not have a mem req + if (fault != NoFault || !inst->readPredicate() || !inst->isStoreConditional()) { + // If the instruction faulted, then we need to send it + // along to commit without the instruction completing. + // Send this instruction to commit, also make sure iew + // stage realizes there is activity. + if (!flag[LdStFlags::Replayed]) { + inst->setExecuted(); + iewStage->instToCommit(inst); + iewStage->activityThisCycle(); + } + } + break; + case 4: + fault = storePipeS4(inst, flag); + break; + default: + panic("unsupported storepipe length"); + } + } else { + DPRINTF(LSQUnit, "Execute: Instruction was squashed. PC: %s, [tid:%i]" + " [sn:%llu]\n", inst->pcState(), inst->threadNumber, + inst->seqNum); + inst->setExecuted(); + inst->setCanCommit(); + flag[LdStFlags::Squashed] = true; + } + } + } +} + +void +LSQUnit::executePipeSx() +{ + executeLoadPipeSx(); + executeStorePipeSx(); +} + +bool +LSQUnit::triggerStorePFTrain(int sq_idx) +{ + auto inst = storeQueue[sq_idx].instruction(); + assert(inst->translationCompleted()); + Addr vaddr = inst->effAddr; + Addr pc = inst->pcState().instAddr(); + // create request + RequestPtr req = + std::make_shared(vaddr, 1, Request::STORE_PF_TRAIN, inst->requestorId(), pc, inst->contextId()); + req->setPaddr(inst->physEffAddr); + + // create packet + PacketPtr pkt = Packet::createPFtrain(req); + + // send packet + bool success = dcachePort->sendTimingReq(pkt); + assert(success); // must be true + + return true; +} + +Fault +LSQUnit::executeAmo(const DynInstPtr &amo_inst) +{ + // Make sure that a store exists. + assert(storeQueue.size() != 0); + + ssize_t amo_idx = amo_inst->sqIdx; + + DPRINTF(LSQUnit, "Executing AMO PC %s [sn:%lli]\n", + amo_inst->pcState(), amo_inst->seqNum); + + assert(!amo_inst->isSquashed()); + + // Check the recently completed loads to see if any match this amo's + // address. If so, then we have a memory ordering violation. + typename LoadQueue::iterator loadIt = amo_inst->lqIt; + + Fault amo_fault = amo_inst->initiateAcc(); + + if (amo_inst->isTranslationDelayed() && amo_fault == NoFault) + return amo_fault; + + if (!amo_inst->readPredicate()) { + DPRINTF(LSQUnit, "AMO [sn:%lli] not executed from predication\n", + amo_inst->seqNum); + amo_inst->forwardOldRegs(); + return amo_fault; + } + + if (storeQueue[amo_idx].size() == 0) { + DPRINTF(LSQUnit,"Fault on AMO PC %s, [sn:%lli], Size = 0\n", + amo_inst->pcState(), amo_inst->seqNum); + + // If the amo instruction faulted, then we need to send it along + // to commit without the instruction completing. + if (!(amo_inst->hasRequest() && amo_inst->strictlyOrdered()) || + amo_inst->isAtCommit()) { + amo_inst->setExecuted(); + } + iewStage->instToCommit(amo_inst); + iewStage->activityThisCycle(); + + return amo_fault; + } + + assert(amo_fault == NoFault); + + // Atomics need to set themselves as able to writeback if we haven't had a fault by here. + storeQueue[amo_idx].canWB() = true; + ++storesToWB; + return checkViolations(loadIt, amo_inst); } void @@ -2114,10 +2358,15 @@ LSQUnit::dumpLoadPipe() for (int i = 0; i < loadPipeSx.size(); i++) { DPRINTF(LSQUnit, "Load S%d:, size: %d\n", i, loadPipeSx[i]->size); for (int j = 0; j < loadPipeSx[i]->size; j++) { - DPRINTF(LSQUnit, " PC: %s, [tid:%i] [sn:%lli]\n", + DPRINTF(LSQUnit, " PC: %s, [tid:%i] [sn:%lli] " + "flags: valid[%d], replayed[%d], cachemiss[%d], squashed[%d]\n", loadPipeSx[i]->insts[j]->pcState(), loadPipeSx[i]->insts[j]->threadNumber, - loadPipeSx[i]->insts[j]->seqNum + loadPipeSx[i]->insts[j]->seqNum, + (loadPipeSx[i]->flags[j])[LdStFlags::Valid], + (loadPipeSx[i]->flags[j])[LdStFlags::Replayed], + (loadPipeSx[i]->flags[j])[LdStFlags::CacheMiss], + (loadPipeSx[i]->flags[j])[LdStFlags::Squashed] ); } } @@ -2130,10 +2379,14 @@ LSQUnit::dumpStorePipe() for (int i = 0; i < storePipeSx.size(); i++) { DPRINTF(LSQUnit, "Store S%d:, size: %d\n", i, storePipeSx[i]->size); for (int j = 0; j < storePipeSx[i]->size; j++) { - DPRINTF(LSQUnit, " PC: %s, [tid:%i] [sn:%lli]\n", + DPRINTF(LSQUnit, " PC: %s, [tid:%i] [sn:%lli] " + "flags: valid[%d], replayed[%d], squashed[%d]\n", storePipeSx[i]->insts[j]->pcState(), storePipeSx[i]->insts[j]->threadNumber, - storePipeSx[i]->insts[j]->seqNum + storePipeSx[i]->insts[j]->seqNum, + (storePipeSx[i]->flags[j])[LdStFlags::Valid], + (storePipeSx[i]->flags[j])[LdStFlags::Replayed], + (storePipeSx[i]->flags[j])[LdStFlags::Squashed] ); } } @@ -2507,6 +2760,7 @@ LSQUnit::read(LSQRequest *request, ssize_t load_idx) } if (!request->isSent()) { iewStage->blockMemInst(load_inst); + setFlagInPipeLine(load_inst, LdStFlags::Replayed); } return NoFault; @@ -2517,9 +2771,9 @@ LSQUnit::write(LSQRequest *request, uint8_t *data, ssize_t store_idx) { assert(storeQueue[store_idx].valid()); - DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x | storeHead:%i " + DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x | storeHead:%i, size: %d" "[sn:%llu]\n", - store_idx - 1, request->req()->getPaddr(), storeQueue.head() - 1, + store_idx - 1, request->req()->getPaddr(), storeQueue.head() - 1, request->_size, storeQueue[store_idx].instruction()->seqNum); storeQueue[store_idx].setRequest(request); diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index d498b66a20..25962806d8 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -43,6 +43,7 @@ #define __CPU_O3_LSQ_UNIT_HH__ #include +#include #include #include #include @@ -62,6 +63,7 @@ #include "cpu/o3/comm.hh" #include "cpu/o3/cpu.hh" #include "cpu/o3/dyn_inst_ptr.hh" +#include "cpu/o3/limits.hh" #include "cpu/o3/lsq.hh" #include "cpu/timebuf.hh" #include "debug/HtmCpu.hh" @@ -340,18 +342,13 @@ class LSQUnit */ void checkSnoop(PacketPtr pkt); - /** Executes a load instruction. */ - Fault executeLoad(const DynInstPtr &inst); - /** Iq issues a load to load pipeline. */ void issueToLoadPipe(const DynInstPtr &inst); - Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; } - bool triggerStorePFTrain(int sq_idx); - /** Executes a store instruction. */ - Fault executeStore(const DynInstPtr& inst); + /** Executes an amo instruction. */ + Fault executeAmo(const DynInstPtr& inst); /** Iq issues a store to store pipeline. */ void issueToStorePipe(const DynInstPtr &inst); @@ -511,9 +508,20 @@ class LSQUnit /** Process instructions in each load pipeline stages. */ void executeLoadPipeSx(); + Fault loadPipeS0(const DynInstPtr &inst, const std::bitset &flag); + Fault loadPipeS1(const DynInstPtr &inst, const std::bitset &flag); + Fault loadPipeS2(const DynInstPtr &inst, const std::bitset &flag); + Fault loadPipeS3(const DynInstPtr &inst, const std::bitset &flag); + /** Process instructions in each store pipeline stages. */ void executeStorePipeSx(); + Fault storePipeS0(const DynInstPtr &inst, const std::bitset &flag); + Fault storePipeS1(const DynInstPtr &inst, const std::bitset &flag); + Fault storePipeS2(const DynInstPtr &inst, const std::bitset &flag); + Fault storePipeS3(const DynInstPtr &inst, const std::bitset &flag); + Fault storePipeS4(const DynInstPtr &inst, const std::bitset &flag); + /** Wrap function. */ void executePipeSx(); @@ -619,6 +627,7 @@ class LSQUnit int size; DynInstPtr insts[MaxWidth]; + std::bitset flags[MaxWidth]; }; /** The load pipeline TimeBuffer. */ TimeBuffer loadPipe; @@ -631,12 +640,16 @@ class LSQUnit int size; DynInstPtr insts[MaxWidth]; + std::bitset flags[MaxWidth]; }; /** The store pipeline TimeBuffer. */ TimeBuffer storePipe; /** Each stage in store pipeline. storePipeSx[0] means store pipe S0 */ std::vector::wire> storePipeSx; + /** Find inst in Load/Store Pipeline, set corresponding flag to true */ + void setFlagInPipeLine(DynInstPtr inst, LdStFlags f); + private: /** The number of places to shift addresses in the LSQ before checking * for dependency violations