Skip to content

Commit

Permalink
cpu-o3: add mem stall topdown
Browse files Browse the repository at this point in the history
Change-Id: Ice57ee1cc06d16cfd0510af2c32eeda811f23ebc
  • Loading branch information
tastynoob committed Jan 6, 2025
1 parent 0ded7a0 commit 77eea17
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 22 deletions.
2 changes: 1 addition & 1 deletion src/cpu/o3/inst_queue.cc
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ InstructionQueue::InstructionQueue(CPU *cpu_ptr, IEW *iew_ptr,
memDepUnit[tid].setIQ(this);
}

scheduler->setCPU(cpu_ptr);
scheduler->setCPU(cpu_ptr, &iew_ptr->ldstQueue);
scheduler->resetDepGraph(numPhysRegs);
scheduler->setMemDepUnit(memDepUnit);

Expand Down
25 changes: 23 additions & 2 deletions src/cpu/o3/issue_queue.cc
Original file line number Diff line number Diff line change
Expand Up @@ -623,6 +623,16 @@ Scheduler::SpecWakeupCompletion::description() const
return "Spec wakeup completion";
}

Scheduler::SchedulerStats::SchedulerStats(statistics::Group* parent)
: statistics::Group(parent),
ADD_STAT(exec_stall_cycle, ""),
ADD_STAT(memstall_any_load, ""),
ADD_STAT(memstall_l1miss,""),
ADD_STAT(memstall_l2miss,""),
ADD_STAT(memstall_l3miss,"")
{
}

bool
Scheduler::disp_policy::operator()(IssueQue* a, IssueQue* b) const
{
Expand All @@ -632,7 +642,7 @@ Scheduler::disp_policy::operator()(IssueQue* a, IssueQue* b) const
return p0 < p1;
}

Scheduler::Scheduler(const SchedulerParams& params) : SimObject(params), issueQues(params.IQs)
Scheduler::Scheduler(const SchedulerParams& params) : SimObject(params), stats(this), issueQues(params.IQs)
{
dispTable.resize(enums::OpClass::Num_OpClass);
opExecTimeTable.resize(enums::OpClass::Num_OpClass, 1);
Expand Down Expand Up @@ -719,9 +729,10 @@ Scheduler::Scheduler(const SchedulerParams& params) : SimObject(params), issueQu
}

void
Scheduler::setCPU(CPU* cpu)
Scheduler::setCPU(CPU* cpu, LSQ* lsq)
{
this->cpu = cpu;
this->lsq = lsq;
for (auto it : issueQues) {
it->setCPU(cpu);
}
Expand Down Expand Up @@ -764,6 +775,16 @@ Scheduler::issueAndSelect()
for (auto it : issueQues) {
it->issueToFu();
}
if (instsToFu.size() < 4) {
stats.exec_stall_cycle++;
}
if (instsToFu.size() == 0) {
if (lsq->anyInflightLoadsNotComplete()) stats.memstall_any_load++;
if (lsq->anyInflightLoadsNotComplete(1)) stats.memstall_l1miss++;
if (lsq->anyInflightLoadsNotComplete(2)) stats.memstall_l2miss++;
if (lsq->anyInflightLoadsNotComplete(3)) stats.memstall_l3miss++;
}

// must wait for all insts was issued
for (auto it : issueQues) {
it->selectInst();
Expand Down
13 changes: 12 additions & 1 deletion src/cpu/o3/issue_queue.hh
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,17 @@ class Scheduler : public SimObject

CPU* cpu;
MemDepUnit* memDepUnit;
LSQ* lsq;

struct SchedulerStats : public statistics::Group
{
SchedulerStats(statistics::Group* parent);
statistics::Scalar exec_stall_cycle;
statistics::Scalar memstall_any_load;
statistics::Scalar memstall_l1miss;
statistics::Scalar memstall_l2miss;
statistics::Scalar memstall_l3miss;
} stats;

struct disp_policy
{
Expand Down Expand Up @@ -246,7 +257,7 @@ class Scheduler : public SimObject

public:
Scheduler(const SchedulerParams& params);
void setCPU(CPU* cpu);
void setCPU(CPU* cpu, LSQ* lsq);
void resetDepGraph(uint64_t numPhysRegs);
void setMemDepUnit(MemDepUnit* memDepUnit) { this->memDepUnit = memDepUnit; }

Expand Down
21 changes: 21 additions & 0 deletions src/cpu/o3/lsq.cc
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,16 @@ int LSQ::getCount(ThreadID tid) { return thread.at(tid).getCount(); }

int LSQ::numLoads(ThreadID tid) { return thread.at(tid).numLoads(); }

bool LSQ::anyInflightLoadsNotComplete(int miss_level)
{
for (auto it : thread.at(0).inflightLoads) {
if (it->isAnyOutstandingRequest() && (it->mainReq()->depth >= miss_level)) {
return true;
}
}
return false;
}

int LSQ::numStores(ThreadID tid) { return thread.at(tid).numStores(); }

int
Expand Down Expand Up @@ -1352,6 +1362,12 @@ LSQ::SingleDataRequest::recvTimingResp(PacketPtr pkt)
// Dump inst num, request addr, and packet addr
DPRINTF(LSQ, "Single Req::recvTimingResp: inst: %llu, pkt: %#lx\n", pkt->req->getReqInstSeqNum(),
pkt->getAddr());
if (isLoad()) {
auto it = std::find(lsqUnit()->inflightLoads.begin(), lsqUnit()->inflightLoads.end(), this);
if (it != lsqUnit()->inflightLoads.end()) {
lsqUnit()->inflightLoads.erase(it);
}
}
assert(_numOutstandingPackets == 1);
flags.set(Flag::Complete);
assert(pkt == _packets.front());
Expand Down Expand Up @@ -1520,6 +1536,11 @@ LSQ::SingleDataRequest::sendPacketToCache()
bool tag_read_fail = false;
bool success = lsqUnit()->trySendPacket(isLoad(), _packets.at(0), bank_conflict, tag_read_fail);
if (success) {
if (isLoad()) {
assert(lsqUnit()->inflightLoads.size() < lsqUnit()->numLoads());
lsqUnit()->inflightLoads.emplace_back(this);
}

if (!bank_conflict) {
_numOutstandingPackets = 1;
}
Expand Down
38 changes: 20 additions & 18 deletions src/cpu/o3/lsq.hh
Original file line number Diff line number Diff line change
Expand Up @@ -283,24 +283,6 @@ class LSQ
uint64_t* res=nullptr, AtomicOpFunctorPtr amo_op=nullptr,
bool stale_translation=false);

bool
isLoad() const
{
return flags.isSet(Flag::IsLoad);
}

bool
isHInst() const
{
return flags.isSet(Flag::IsHInst);
}

bool
isAtomic() const
{
return flags.isSet(Flag::IsAtomic);
}

/** Install the request in the LQ/SQ. */
void install();

Expand Down Expand Up @@ -344,6 +326,24 @@ class LSQ

public:

bool
isLoad() const
{
return flags.isSet(Flag::IsLoad);
}

bool
isHInst() const
{
return flags.isSet(Flag::IsHInst);
}

bool
isAtomic() const
{
return flags.isSet(Flag::IsAtomic);
}

void forward();

/** Convenience getters/setters. */
Expand Down Expand Up @@ -803,6 +803,8 @@ class LSQ
/** Returns the total number of loads for a single thread. */
int numLoads(ThreadID tid);

bool anyInflightLoadsNotComplete(int miss_level = -1);

/** Returns the total number of stores in the store queue. */
int numStores();
/** Returns the total number of stores for a single thread. */
Expand Down
6 changes: 6 additions & 0 deletions src/cpu/o3/lsq_unit.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1520,6 +1520,12 @@ LSQUnit::squash(const InstSeqNum &squashed_num)
DPRINTF(HtmCpu, ">> htmStarts (%d) : htmStops-- (%d)\n",
htmStarts, htmStops);
}
auto request = loadQueue.back().request();
auto it = std::find(inflightLoads.begin(), inflightLoads.end(), request);
if (it != inflightLoads.end()) {
inflightLoads.erase(it);
}

// Clear the smart pointer to make sure it is decremented.
loadQueue.back().instruction()->setSquashed();
loadQueue.back().clear();
Expand Down
2 changes: 2 additions & 0 deletions src/cpu/o3/lsq_unit.hh
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,8 @@ class LSQUnit
using LoadQueue = CircularQueue<LQEntry>;
using StoreQueue = CircularQueue<SQEntry>;

std::vector<LSQRequest*> inflightLoads;

public:
/** Constructs an LSQ unit. init() must be called prior to use. */
LSQUnit(uint32_t lqEntries, uint32_t sqEntries, uint32_t sbufferEntries,
Expand Down

0 comments on commit 77eea17

Please sign in to comment.