Skip to content

Commit

Permalink
mem: prefetcher config align with kmh (#191)
Browse files Browse the repository at this point in the history
Change-Id: I41060526a1e317202b9c38a84cc9bb26841a177c
  • Loading branch information
tastynoob authored Oct 29, 2024
1 parent 1237bc4 commit 5cda952
Show file tree
Hide file tree
Showing 11 changed files with 88 additions and 21 deletions.
17 changes: 13 additions & 4 deletions configs/common/CacheConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def _get_cache_opts(level, options):
opts['assoc'] = getattr(options, assoc_attr)

prefetcher_attr = '{}_hwp_type'.format(level)
if hasattr(options, prefetcher_attr):
if hasattr(options, prefetcher_attr) and (not options.no_pf):
opts['prefetcher'] = _get_hwp(getattr(options, prefetcher_attr))

return opts
Expand Down Expand Up @@ -131,6 +131,11 @@ def config_cache(options, system):
# system.tol2bus_list.append(L2XBar(clk_domain = system.cpu_clk_domain, width=256))
system.l2_caches[i].cpu_side = system.tol2bus_list[i].mem_side_ports
system.tol2bus_list[i].snoop_filter.max_capacity = "16MB"
if options.kmh_align:
assert options.l2_hwp_type == 'L2CompositeWithWorkerPrefetcher'
system.l2_caches[i].prefetcher.enable_cmc = True
system.l2_caches[i].prefetcher.enable_bop = True
system.l2_caches[i].prefetcher.enable_cdp = False

if options.ideal_cache:
assert not options.l3cache, \
Expand Down Expand Up @@ -197,12 +202,16 @@ def config_cache(options, system):
dcache.prefetcher.enable_cplx = True
dcache.prefetcher.pht_pf_level = options.pht_pf_level
dcache.prefetcher.short_stride_thres = options.short_stride_thres
dcache.prefetcher.enable_temporal = not options.kmh_align
dcache.prefetcher.fuzzy_stride_matching = False
dcache.prefetcher.stream_pf_ahead = True

dcache.prefetcher.enable_bop = not options.kmh_align
dcache.prefetcher.bop_large.delay_queue_enable = True
dcache.prefetcher.bop_large.bad_score = 10
dcache.prefetcher.bop_small.delay_queue_enable = True
dcache.prefetcher.bop_small.bad_score = 5

dcache.prefetcher.queue_size = 128
dcache.prefetcher.max_prefetch_requests_with_pending_translation = 128
dcache.prefetcher.region_size = 64*16 # 64B * blocks per region
Expand All @@ -214,22 +223,22 @@ def config_cache(options, system):
system.cpu[i].add_pf_downstream(dcache.prefetcher)
if options.ideal_cache:
dcache.prefetcher.stream_pf_ahead = False
if options.l1d_use_xsstride:
if options.kmh_align:
dcache.prefetcher.enable_berti = False
dcache.prefetcher.enable_sstride = True

if options.ideal_cache:
icache.response_latency = 0
dcache.response_latency = 0

if options.l1_to_l2_pf_hint:
if (not options.no_pf) and options.l1_to_l2_pf_hint:
assert dcache.prefetcher != NULL and \
system.l2_caches[i].prefetcher != NULL
dcache.prefetcher.add_pf_downstream(system.l2_caches[i].prefetcher)
system.l2_caches[i].prefetcher.queue_size = 64
system.l2_caches[i].prefetcher.max_prefetch_requests_with_pending_translation = 128

if options.l3cache and options.l2_to_l3_pf_hint:
if (not options.no_pf) and options.l3cache and options.l2_to_l3_pf_hint:
assert system.l2_caches[i].prefetcher != NULL and \
system.l3.prefetcher != NULL
system.l2_caches[i].prefetcher.add_pf_downstream(system.l3.prefetcher)
Expand Down
8 changes: 5 additions & 3 deletions configs/common/Options.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,8 @@ def addNoISAOptions(parser, configure_xiangshan=False):
parser.add_argument("--cacheline_size", type=int, default=64)
parser.add_argument("--ideal-cache", action="store_true")

parser.add_argument("--no-pf", default=False,
action="store_true", help="L1 icache hardware prefetcher")
parser.add_argument("--l1i-hwp-type", default=None,
choices=ObjectList.hwp_list.get_names(), help="L1 icache hardware prefetcher")
parser.add_argument("--l1d-hwp-type", default='XSCompositePrefetcher',
Expand Down Expand Up @@ -280,12 +282,12 @@ def addCommonOptions(parser, configure_xiangshan=False):
action=ListRP, nargs=0,
help="List available replacement policy types")

parser.add_argument("--kmh-align", action="store_true", default=False,
help="""
Use kmu config""")
parser.add_argument("--list-hwp-types",
action=ListHWP, nargs=0,
help="List available hardware prefetcher types")
parser.add_argument("--l1d-use-xsstride", action="store_true", default=False,
help="""
Enable SPP component for L1 data prefetcher""")
parser.add_argument("--l1d-enable-spp", action="store_true", default=False,
help="""
Enable SPP component for L1 data prefetcher""")
Expand Down
9 changes: 6 additions & 3 deletions configs/example/kmh.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,13 @@
args.enable_difftest = True
args.enable_riscv_vector = True

args.l2_hwp_type = "WorkerPrefetcher"
# l1cache prefetcher use stream, stride
# l2cache prefetcher use pht, bop, cmc
# disable l1prefetcher store pf train
# disable l1 berti, l2 cdp
args.l2_hwp_type = "L2CompositeWithWorkerPrefetcher"
args.pht_pf_level = 2
args.l1d_use_xsstride = True

args.kmh_align = True

assert not args.external_memory_system

Expand Down
3 changes: 3 additions & 0 deletions configs/example/xiangshan.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ def build_test_system(np, args):
if args.mem_type == 'DRAMsim3':
assert args.dramsim3_ini is not None

for cpu in test_sys.cpu:
cpu.store_prefetch_train = not args.kmh_align
# ruby will overwrite the store_prefetch_train
if ruby:
test_sys._dma_ports = []
bootmem = getattr(test_sys, '_bootmem', None)
Expand Down
9 changes: 9 additions & 0 deletions src/mem/cache/prefetch/Prefetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -995,6 +995,7 @@ class XSCompositePrefetcher(QueuedPrefetcher):
enable_spp = Param.Bool(False, "Enable SPP component")
enable_temporal = Param.Bool(False, "Enable temporal component")
enable_berti = Param.Bool(True,"Enable berti component")
enable_bop = Param.Bool(True, "Enable BOP")

enable_sstride = Param.Bool(False,"Enable sms stride component")
enable_opt = Param.Bool(False,"Enable opt component")
Expand Down Expand Up @@ -1025,6 +1026,14 @@ class L2CompositeWithWorkerPrefetcher(CompositeWithWorkerPrefetcher):
cxx_header = "mem/cache/prefetch/l2_composite_with_worker.hh"

cdp = Param.CDP(CDP(is_sub_prefetcher=True), "")
cmc = Param.CMCPrefetcher(CMCPrefetcher(is_sub_prefetcher=True), "")
bop_large = Param.BOPPrefetcher(BOPPrefetcher(is_sub_prefetcher=True),
"Large BOP used in composite prefetcher ")
bop_small = Param.BOPPrefetcher(SmallBOPPrefetcher(is_sub_prefetcher=True),
"Small BOP used in composite prefetcher ")
enable_bop = Param.Bool(False, "Enable BOP")
enable_cdp = Param.Bool(False, "Enable CDP")
enable_cmc = Param.Bool(False, "Enable CMC")

class L3CompositeWithWorkerPrefetcher(CompositeWithWorkerPrefetcher):
type = 'L3CompositeWithWorkerPrefetcher'
Expand Down
4 changes: 2 additions & 2 deletions src/mem/cache/prefetch/cmc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,15 +75,15 @@ void
CMCPrefetcher::doPrefetch(const PrefetchInfo &pfi, std::vector<AddrPriority> &addresses, bool late,
PrefetchSourceType pf_source, bool is_first_shot)
{
bool can_prefetch = !pfi.isWrite() && pfi.hasPC();
bool can_prefetch = cache->level() == 1 ? (!pfi.isWrite() && pfi.hasPC()) : true;
if (!can_prefetch) {
return;
}
Addr pc = pfi.hasPC() ? pfi.getPC() : 0;

Addr vaddr = pfi.getAddr();
Addr block_addr = blockAddress(vaddr);
bool is_secure = pfi.isSecure();
Addr pc = pfi.getPC();
int prefetchSource = pf_source;

// if (enableDB) {
Expand Down
29 changes: 26 additions & 3 deletions src/mem/cache/prefetch/l2_composite_with_worker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,36 @@ namespace prefetch
{

L2CompositeWithWorkerPrefetcher::L2CompositeWithWorkerPrefetcher(const L2CompositeWithWorkerPrefetcherParams &p)
: CompositeWithWorkerPrefetcher(p), cdp(p.cdp)
: CompositeWithWorkerPrefetcher(p),
cdp(p.cdp),
largeBOP(p.bop_large),
smallBOP(p.bop_small),
cmc(p.cmc),
enableBOP(p.enable_bop),
enableCDP(p.enable_cdp),
enableCMC(p.enable_cmc)
{
cdp->pfLRUFilter = &pfLRUFilter;
largeBOP->filter = &pfLRUFilter;
smallBOP->filter = &pfLRUFilter;
cmc->filter = &pfLRUFilter;
cdp->parentRid = p.sys->getRequestorId(this);
}

void
L2CompositeWithWorkerPrefetcher::calculatePrefetch(const PrefetchInfo &pfi, std::vector<AddrPriority> &addresses)
L2CompositeWithWorkerPrefetcher::calculatePrefetch(const PrefetchInfo &pfi, std::vector<AddrPriority> &addresses,
bool late, PrefetchSourceType pf_source, bool miss_repeat)
{
cdp->calculatePrefetch(pfi, addresses);
if (enableCMC) {
cmc->doPrefetch(pfi, addresses, late, pf_source, false);
}
if (enableCDP) {
cdp->calculatePrefetch(pfi, addresses);
}
if (enableBOP) {
largeBOP->calculatePrefetch(pfi, addresses, late && pf_source == PrefetchSourceType::HWP_BOP);
smallBOP->calculatePrefetch(pfi, addresses, late && pf_source == PrefetchSourceType::HWP_BOP);
}
}

void
Expand Down Expand Up @@ -64,6 +84,9 @@ L2CompositeWithWorkerPrefetcher::setParentInfo(System *sys, ProbeManager *pm, Ca
{
cdp->setParentInfo(sys, pm, _cache, blk_size);
cdp->setStatsPtr(&prefetchStats);
largeBOP->setParentInfo(sys, pm, _cache, blk_size);
smallBOP->setParentInfo(sys, pm, _cache, blk_size);
cmc->setParentInfo(sys, pm, _cache, blk_size);
CompositeWithWorkerPrefetcher::setParentInfo(sys, pm, _cache, blk_size);
}

Expand Down
16 changes: 14 additions & 2 deletions src/mem/cache/prefetch/l2_composite_with_worker.hh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@

#include <vector>

#include "mem/cache/prefetch/bop.hh"
#include "mem/cache/prefetch/cdp.hh"
#include "mem/cache/prefetch/cmc.hh"
#include "mem/cache/prefetch/composite_with_worker.hh"
#include "params/L2CompositeWithWorkerPrefetcher.hh"

Expand All @@ -19,7 +21,10 @@ class L2CompositeWithWorkerPrefetcher : public CompositeWithWorkerPrefetcher
public:
L2CompositeWithWorkerPrefetcher(const L2CompositeWithWorkerPrefetcherParams &p);

void calculatePrefetch(const PrefetchInfo &pfi, std::vector<AddrPriority> &addresses) override;
void calculatePrefetch(const PrefetchInfo &pfi, std::vector<AddrPriority> &addresses) override {}

void calculatePrefetch(const PrefetchInfo &pfi, std::vector<AddrPriority> &addresses, bool late,
PrefetchSourceType source, bool miss_repeat) override;

void addHintDownStream(Base *down_stream) override
{
Expand All @@ -38,6 +43,13 @@ class L2CompositeWithWorkerPrefetcher : public CompositeWithWorkerPrefetcher

private:
CDP *cdp;
BOP* largeBOP;
BOP* smallBOP;
CMCPrefetcher* cmc;

const bool enableBOP;
const bool enableCDP;
const bool enableCMC;

bool offloadLowAccuracy = true;
};
Expand All @@ -46,4 +58,4 @@ class L2CompositeWithWorkerPrefetcher : public CompositeWithWorkerPrefetcher
} // namespace gem5


#endif // __MEM_CACHE_PREFETCH_COMPOITE_WITH_WORKER_L2_HH__
#endif // __MEM_CACHE_PREFETCH_COMPOITE_WITH_WORKER_L2_HH__
8 changes: 5 additions & 3 deletions src/mem/cache/prefetch/sms.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ XSCompositePrefetcher::XSCompositePrefetcher(const XSCompositePrefetcherParams &
enableTemporal(p.enable_temporal),
enableSstride(p.enable_sstride),
enableBerti(p.enable_berti),
enableBOP(p.enable_bop),
enableOpt(p.enable_opt),
enableXsstream(p.enable_xsstream),
phtEarlyUpdate(p.pht_early_update),
Expand Down Expand Up @@ -174,9 +175,10 @@ XSCompositePrefetcher::calculatePrefetch(const PrefetchInfo &pfi, std::vector<Ad


if (pf_source != PrefetchSourceType::SStream && !is_active_page) {
bool use_bop = (pfi.isPfFirstHit() &&
(pf_source == PrefetchSourceType::HWP_BOP || pf_source == PrefetchSourceType::IPCP_CPLX || pf_source == PrefetchSourceType::Berti)) ||
pfi.isCacheMiss();
bool use_bop = enableBOP && ((pfi.isPfFirstHit() && (pf_source == PrefetchSourceType::HWP_BOP ||
pf_source == PrefetchSourceType::IPCP_CPLX ||
pf_source == PrefetchSourceType::Berti)) ||
pfi.isCacheMiss());
use_bop &= !miss_repeat && is_first_shot; // miss repeat should not be handled by stride
if (use_bop) {
DPRINTF(XSCompositePrefetcher, "Do BOP traing/prefetching...\n");
Expand Down
1 change: 1 addition & 0 deletions src/mem/cache/prefetch/sms.hh
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ class XSCompositePrefetcher : public Queued
const bool enableTemporal;
const bool enableSstride;
const bool enableBerti;
const bool enableBOP;
const bool enableOpt;
const bool enableXsstream;
const bool phtEarlyUpdate;
Expand Down
5 changes: 4 additions & 1 deletion src/mem/cache/prefetch/worker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
namespace prefetch
{

WorkerPrefetcher::WorkerPrefetcher(const WorkerPrefetcherParams &p) : Queued(p), workerStats(this), pfLRUFilter(128)
WorkerPrefetcher::WorkerPrefetcher(const WorkerPrefetcherParams &p)
: Queued(p),
workerStats(this),
pfLRUFilter(256)
{
//Event *event = new EventFunctionWrapper([this]{ enableFunctionTrace(); }, name(), true);
transferEvent = new EventFunctionWrapper([this](){
Expand Down

0 comments on commit 5cda952

Please sign in to comment.