diff --git a/.gitmodules b/.gitmodules index f1908446..53b7abf1 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "src/simdjson"] path = src/simdjson url = https://github.com/simdjson/simdjson.git +[submodule "perfmon"] + path = perfmon + url = https://github.com/intel/perfmon diff --git a/cmake/CPM.cmake b/cmake/CPM.cmake new file mode 100644 index 00000000..baf2d8c3 --- /dev/null +++ b/cmake/CPM.cmake @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: MIT +# +# SPDX-FileCopyrightText: Copyright (c) 2019-2023 Lars Melchior and contributors + +set(CPM_DOWNLOAD_VERSION 0.40.2) +set(CPM_HASH_SUM "c8cdc32c03816538ce22781ed72964dc864b2a34a310d3b7104812a5ca2d835d") + +if(CPM_SOURCE_CACHE) + set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake") +elseif(DEFINED ENV{CPM_SOURCE_CACHE}) + set(CPM_DOWNLOAD_LOCATION "$ENV{CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake") +else() + set(CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake") +endif() + +# Expand relative path. This is important if the provided path contains a tilde (~) +get_filename_component(CPM_DOWNLOAD_LOCATION ${CPM_DOWNLOAD_LOCATION} ABSOLUTE) + +file(DOWNLOAD + https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake + ${CPM_DOWNLOAD_LOCATION} EXPECTED_HASH SHA256=${CPM_HASH_SUM} +) + +include(${CPM_DOWNLOAD_LOCATION}) diff --git a/perfmon b/perfmon new file mode 160000 index 00000000..f8c51ca9 --- /dev/null +++ b/perfmon @@ -0,0 +1 @@ +Subproject commit f8c51ca9f611356a3deb0e1ab6c1404d7393d126 diff --git a/src/PMURegisterDeclarations/GenuineIntel-6-AD-0.json b/src/PMURegisterDeclarations/GenuineIntel-6-AD-0.json new file mode 100644 index 00000000..d68375b7 --- /dev/null +++ b/src/PMURegisterDeclarations/GenuineIntel-6-AD-0.json @@ -0,0 +1,158 @@ +{ + "core" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "User": {"Config": 0, "Position": 16, "Width": 1, "DefaultValue": 1}, + "OS": {"Config": 0, "Position": 17, "Width": 1, "DefaultValue": 1}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1}, + "PinControl": {"Config": 0, "Position": 19, "Width": 1, "DefaultValue": 0}, + "APICInt": {"Config": 0, "Position": 20, "Width": 1, "DefaultValue": 0}, + "Enable": {"Config": 0, "Position": 22, "Width": 1, "DefaultValue": 1}, + "Invert": {"Config": 0, "Position": 23, "Width": 1}, + "CounterMask": {"Config": 0, "Position": 24, "Width": 8}, + "InTX": {"Config": 0, "Position": 32, "Width": 1, "DefaultValue": 0}, + "InTXCheckpointed": {"Config": 0, "Position": 33, "Width": 1, "DefaultValue": 0}, + "MSRIndex": { + "0x1a6" : {"Config": 1, "Position": 0, "Width": 64}, + "0x1a7" : {"Config": 2, "Position": 0, "Width": 64}, + "0x3f6" : {"Config": 3, "Position": 0, "Width": 64}, + "0x3f7" : {"Config": 4, "Position": 0, "Width": 64} + } + }, + "fixed0" : { + "OS": {"Config": 0, "Position": 0, "Width": 1, "DefaultValue": 1}, + "User": {"Config": 0, "Position": 1, "Width": 1, "DefaultValue": 1}, + "EnablePMI": {"Config": 0, "Position": 3, "Width": 1, "DefaultValue": 0}, + "EventCode": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "UMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "EdgeDetect": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "Invert": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "CounterMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"} + }, + "fixed1" : { + "OS": {"Config": 0, "Position": 4, "Width": 1, "DefaultValue": 1}, + "User": {"Config": 0, "Position": 5, "Width": 1, "DefaultValue": 1}, + "EnablePMI": {"Config": 0, "Position": 7, "Width": 1, "DefaultValue": 0}, + "EventCode": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "UMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "EdgeDetect": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "Invert": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "CounterMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"} + }, + "fixed2" : { + "OS": {"Config": 0, "Position": 8, "Width": 1, "DefaultValue": 1}, + "User": {"Config": 0, "Position": 9, "Width": 1, "DefaultValue": 1}, + "EnablePMI": {"Config": 0, "Position": 11, "Width": 1, "DefaultValue": 0}, + "EventCode": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "UMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "EdgeDetect": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "Invert": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "CounterMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"} + }, + "fixed3" : { + "OS": {"Config": 0, "Position": 12, "Width": 1, "DefaultValue": 1}, + "User": {"Config": 0, "Position": 13, "Width": 1, "DefaultValue": 1}, + "EnablePMI": {"Config": 0, "Position": 15, "Width": 1, "DefaultValue": 0}, + "EventCode": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "UMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "EdgeDetect": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "Invert": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "CounterMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "PerfMetrics": {"Config": 2, "Position": 0, "Width": 1, "DefaultValue": 0, "__comment": "fake field to tell the collector to also print the L1 top-down metrics, not just raw slots count"} + } + }, + "cha" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "TIDEnable": {"Config": 0, "Position": 16, "Width": 1, "DefaultValue": 0}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0}, + "UMaskExt": {"Config": 0, "Position": 32, "Width": 26}, + "TID": {"Config": 1, "Position": 0, "Width": 10, "DefaultValue": 0} + } + }, + "imc" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} + } + }, + "xpi" : { + "__comment" : "this is for UPI LL and QPI LL uncore PMUs", + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0}, + "UMaskExt": {"Config": 0, "Position": 32, "Width": 24} + } + }, + "m2m" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0}, + "UMaskExt": {"Config": 0, "Position": 32, "Width": 8} + } + }, + "m3upi" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} + } + }, + "mdf" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} + } + }, + "irp" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} + } + }, + "pcu" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0} + } + }, + "pciex8" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0} + } + }, + "pciex16" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0} + } + }, + "iio" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 12, "DefaultValue": 0}, + "PortMask": {"Config": 0, "Position": 36, "Width": 12}, + "FCMask": {"Config": 0, "Position": 48, "Width": 3} + } + } +} diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 50399d51..7f3c16e4 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -711,6 +711,7 @@ void PCM::initCStateSupportTables() case ADL: case RPL: case MTL: + case LNL: case SNOWRIDGE: PCM_CSTATE_ARRAY(pkgCStateMsr, PCM_PARAM_PROTECT({0, 0, 0x3F8, 0, 0x3F9, 0, 0x3FA, 0, 0, 0, 0 }) ); case NEHALEM_EP: @@ -736,6 +737,7 @@ void PCM::initCStateSupportTables() case ICX: case SPR: case EMR: + case GNR: case SRF: PCM_CSTATE_ARRAY(pkgCStateMsr, PCM_PARAM_PROTECT({0, 0, 0x60D, 0, 0, 0, 0x3F9, 0, 0, 0, 0}) ); case HASWELL_ULT: @@ -785,10 +787,12 @@ void PCM::initCStateSupportTables() case ADL: case RPL: case MTL: + case LNL: case SNOWRIDGE: case ICX: case SPR: case EMR: + case GNR: case SRF: PCM_CSTATE_ARRAY(coreCStateMsr, PCM_PARAM_PROTECT({0, 0, 0, 0x3FC, 0, 0, 0x3FD, 0x3FE, 0, 0, 0}) ); case KNL: @@ -1637,10 +1641,12 @@ bool PCM::detectNominalFrequency() || cpu_model == ADL || cpu_model == RPL || cpu_model == MTL + || cpu_model == LNL || cpu_model == SKX || cpu_model == ICX || cpu_model == SPR || cpu_model == EMR + || cpu_model == GNR || cpu_model == SRF ) ? (100000000ULL) : (133333333ULL); @@ -1898,6 +1904,7 @@ void PCM::initUncoreObjects() case ADL: // TGLClientBW works fine for ADL case RPL: // TGLClientBW works fine for RPL case MTL: // TGLClientBW works fine for MTL + case LNL: // TGLClientBW works fine for LNL clientBW = std::make_shared(); break; /* Disabled since ADLClientBW requires 2x multiplier for BW on top @@ -1937,6 +1944,7 @@ void PCM::initUncoreObjects() case SNOWRIDGE: case SPR: case EMR: + case GNR: case SRF: { bool failed = false; @@ -2122,6 +2130,7 @@ void PCM::initUncorePMUsDirect() } break; case SRF: + case GNR: uncorePMUs[s].resize(1); { std::vector > CounterControlRegs{ @@ -2247,6 +2256,7 @@ void PCM::initUncorePMUsDirect() break; case SPR: case EMR: + case GNR: case SRF: uncorePMUs[s].resize(1); addPMUsFromDiscoveryRef(uncorePMUs[s][0][PCU_PMU_ID], SPR_PCU_BOX_TYPE, 0xE); @@ -2273,6 +2283,7 @@ void PCM::initUncorePMUsDirect() case EMR: addMDFPMUs(SPR_MDF_BOX_TYPE); break; + case GNR: case SRF: addMDFPMUs(BHS_MDF_BOX_TYPE); break; @@ -2319,6 +2330,7 @@ void PCM::initUncorePMUsDirect() switch (cpu_model) { + case GNR: case SRF: uncorePMUs[s].resize(1); if (safe_getenv("PCM_NO_PCIE_GEN5_DISCOVERY") == std::string("1")) @@ -2423,6 +2435,7 @@ void PCM::initUncorePMUsDirect() } } break; + case PCM::GNR: case PCM::SRF: for (uint32 s = 0; s < (uint32)num_sockets; ++s) { @@ -2632,6 +2645,7 @@ void PCM::initUncorePMUsDirect() IRP_CTR_REG_OFFSET = SPR_IRP_CTR_REG_OFFSET; IRP_UNIT_CTL = SPR_IRP_UNIT_CTL; break; + case GNR: case SRF: irpStacks = BHS_M2IOSF_NUM; IRP_CTL_REG_OFFSET = BHS_IRP_CTL_REG_OFFSET; @@ -2766,6 +2780,7 @@ void PCM::initUncorePMUsDirect() { case PCM::SPR: case PCM::EMR: + case PCM::GNR: case PCM::SRF: { const auto n_units = (std::min)(uncorePMUDiscovery->getNumBoxes(SPR_CXLCM_BOX_TYPE, s), @@ -3237,10 +3252,12 @@ bool PCM::isCPUModelSupported(const int model_) || model_ == ADL || model_ == RPL || model_ == MTL + || model_ == LNL || model_ == SKX || model_ == ICX || model_ == SPR || model_ == EMR + || model_ == GNR || model_ == SRF ); } @@ -3282,6 +3299,9 @@ bool PCM::checkModel() case RPL_3: cpu_model = RPL; break; + case GNR_D: + cpu_model = GNR; + break; } if(!isCPUModelSupported((int)cpu_model)) @@ -3408,7 +3428,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter canUsePerf = false; if (!silent) std::cerr << "Installed Linux kernel perf does not support hardware top-down level-1 counters. Using direct PMU programming instead.\n"; } - if (canUsePerf && (cpu_model == ADL || cpu_model == RPL || cpu_model == MTL)) + if (canUsePerf && (cpu_model == ADL || cpu_model == RPL || cpu_model == MTL || cpu_model == LNL)) { canUsePerf = false; if (!silent) std::cerr << "Linux kernel perf rejects an architectural event on your platform. Using direct PMU programming instead.\n"; @@ -3495,6 +3515,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter case ADL: case RPL: case MTL: + case LNL: LLCArchEventInit(hybridAtomEventDesc); hybridAtomEventDesc[2].event_number = SKL_MEM_LOAD_RETIRED_L2_MISS_EVTNR; hybridAtomEventDesc[2].umask_value = SKL_MEM_LOAD_RETIRED_L2_MISS_UMASK; @@ -3549,6 +3570,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter case ICX: case SPR: case EMR: + case GNR: assert(useSkylakeEvents()); coreEventDesc[0].event_number = SKL_MEM_LOAD_RETIRED_L3_MISS_EVTNR; coreEventDesc[0].umask_value = SKL_MEM_LOAD_RETIRED_L3_MISS_UMASK; @@ -4844,6 +4866,8 @@ const char * PCM::getUArchCodename(const int32 cpu_model_param) const return "Raptor Lake"; case MTL: return "Meteor Lake"; + case LNL: + return "Lunar Lake"; case SKX: if (cpu_model_param >= 0) { @@ -4865,6 +4889,8 @@ const char * PCM::getUArchCodename(const int32 cpu_model_param) const return "Sapphire Rapids-SP"; case EMR: return "Emerald Rapids-SP"; + case GNR: + return "Granite Rapids-SP"; case SRF: return "Sierra Forest"; } @@ -5599,6 +5625,8 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof case SPR: case EMR: case SRF: + case GNR: + case GNR_D: PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(1); // clock ticks break; default: @@ -5618,6 +5646,8 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof case SPR: case EMR: case SRF: + case GNR: + case GNR_D: PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x35); // POWER_STATE_OCCUPANCY.C0 PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x36); // POWER_STATE_OCCUPANCY.C3 PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x37); // POWER_STATE_OCCUPANCY.C6 @@ -5636,12 +5666,12 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof case 3: PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x04); // Thermal frequency limit cycles: FREQ_MAX_LIMIT_THERMAL_CYCLES PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x05); // Power frequency limit cycles: FREQ_MAX_POWER_CYCLES - PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF) + PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF,GNR) break; case 4: // not supported on SKX, ICX, SNOWRIDGE, SPR, EMR PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x06); // OS frequency limit cycles: FREQ_MAX_OS_CYCLES PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x05); // Power frequency limit cycles: FREQ_MAX_POWER_CYCLES - PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF) + PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF,GNR) break; case 5: if(JAKETOWN == cpu_model) @@ -5662,6 +5692,8 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof || SPR == cpu_model || EMR == cpu_model || SRF == cpu_model + || GNR == cpu_model + || GNR_D == cpu_model ) { PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x74) + PCU_MSR_PMON_CTL_EDGE_DET ; // number of frequency transitions @@ -5691,10 +5723,12 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof || SPR == cpu_model || EMR == cpu_model || SRF == cpu_model + || GNR == cpu_model + || GNR_D == cpu_model ) { - PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(0x4E) ; // PC1e residenicies (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF) - PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x4E) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC1 transitions (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF) + PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(0x4E) ; // PC1e residenicies (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF,GNR) + PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x4E) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC1 transitions (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF,GNR) PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x2B) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC2e transitions PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x2D) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC6 transitions } else @@ -7535,6 +7569,7 @@ void ServerUncorePMUs::initRegisterLocations(const PCM * pcm) } else if ( cpu_model == PCM::SRF + || cpu_model == PCM::GNR ) { PCM_PCICFG_QPI_INIT(0, BHS); @@ -7762,6 +7797,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) case PCM::SNOWRIDGE: case PCM::SPR: case PCM::EMR: + case PCM::GNR: // B2CMI PMUs case PCM::SRF: m2mPMUs.push_back( UncorePMU( @@ -7946,6 +7982,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) switch (cpu_model) { + case PCM::GNR: case PCM::SRF: initBHSiMCPMUs(12); break; @@ -8032,6 +8069,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) ) ); break; + case PCM::GNR: case PCM::SRF: m3upiPMUs.push_back( UncorePMU( @@ -8197,6 +8235,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) break; case PCM::SPR: case PCM::EMR: + case PCM::GNR: case PCM::SRF: xpiPMUs.push_back( UncorePMU( @@ -8731,6 +8770,7 @@ void ServerUncorePMUs::programServerUncoreMemoryMetrics(const ServerUncoreMemory return; } break; + case PCM::GNR: case PCM::SRF: if (metrics == PmemMemoryMode) { @@ -8825,6 +8865,7 @@ void ServerUncorePMUs::program() EDCCntConfig[EventPosition::READ] = MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x05) + MC_CH_PCI_PMON_CTL_UMASK(0xcf); // monitor reads on counter 0: CAS_COUNT.RD EDCCntConfig[EventPosition::WRITE] = MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x05) + MC_CH_PCI_PMON_CTL_UMASK(0xf0); // monitor writes on counter 1: CAS_COUNT.WR break; + case PCM::GNR: case PCM::SRF: MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x05) + MC_CH_PCI_PMON_CTL_UMASK(0xcf); // monitor reads on counter 0: CAS_COUNT_SCH0.RD MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x05) + MC_CH_PCI_PMON_CTL_UMASK(0xf0); // monitor writes on counter 1: CAS_COUNT_SCH0.WR @@ -8956,6 +8997,7 @@ uint64 ServerUncorePMUs::getImcReadsForChannels(uint32 beginChannel, uint32 endC result += getMCCounter(i, EventPosition::READ); switch (cpu_model) { + case PCM::GNR: case PCM::SRF: result += getMCCounter(i, EventPosition::READ2); break; @@ -8972,6 +9014,7 @@ uint64 ServerUncorePMUs::getImcWrites() result += getMCCounter(i, EventPosition::WRITE); switch (cpu_model) { + case PCM::GNR: case PCM::SRF: result += getMCCounter(i, EventPosition::WRITE2); break; @@ -9095,6 +9138,8 @@ void ServerUncorePMUs::program_power_metrics(int mc_profile) case PCM::SPR: case PCM::EMR: case PCM::SRF: + case PCM::GNR: + case PCM::GNR_D: UNC_M_POWER_CKE_CYCLES = 0x47; break; } @@ -9102,6 +9147,8 @@ void ServerUncorePMUs::program_power_metrics(int mc_profile) switch (cpu_model) { case PCM::SRF: + case PCM::GNR: + case PCM::GNR_D: UNC_M_POWER_CHANNEL_PPD_CYCLES = 0x88; break; } @@ -9109,6 +9156,8 @@ void ServerUncorePMUs::program_power_metrics(int mc_profile) switch (cpu_model) { case PCM::SRF: + case PCM::GNR: + case PCM::GNR_D: UNC_M_SELF_REFRESH_ENTER_SUCCESS_CYCLES_UMASK = 0x01; break; } @@ -9210,6 +9259,7 @@ void ServerUncorePMUs::programM2M() cfg[EventPosition::PMM_READ] = M2M_PCI_PMON_CTL_EVENT(0x37) + M2M_PCI_PMON_CTL_UMASK(0x20) + UNC_PMON_CTL_UMASK_EXT(0x07); // UNC_M2M_IMC_READS.TO_PMM cfg[EventPosition::PMM_WRITE] = M2M_PCI_PMON_CTL_EVENT(0x38) + M2M_PCI_PMON_CTL_UMASK(0x80) + UNC_PMON_CTL_UMASK_EXT(0x1C); // UNC_M2M_IMC_WRITES.TO_PMM break; + case PCM::GNR: case PCM::SRF: cfg[EventPosition::NM_HIT] = M2M_PCI_PMON_CTL_EVENT(0x1F) + M2M_PCI_PMON_CTL_UMASK(0x0F); // UNC_B2CMI_TAG_HIT.ALL cfg[EventPosition::M2M_CLOCKTICKS] = 0; // CLOCKTICKS @@ -9561,6 +9611,7 @@ uint64 ServerUncorePMUs::computeQPISpeed(const uint32 core_nr, const int cpumode std::pair regBits{}; switch (cpumodel) { + case PCM::GNR: case PCM::SRF: UPISpeedMap = { { 0, 2500}, @@ -9684,6 +9735,7 @@ uint64 PCM::CX_MSR_PMON_CTRY(uint32 Cbo, uint32 Ctr) const case SPR: case EMR: + case GNR: case SRF: return SPR_CHA0_MSR_PMON_CTR0 + SPR_CHA_MSR_STEP * Cbo + Ctr; @@ -9713,6 +9765,7 @@ uint64 PCM::CX_MSR_PMON_BOX_FILTER(uint32 Cbo) const case SPR: case EMR: + case GNR: case SRF: return SPR_CHA0_MSR_PMON_BOX_FILTER + SPR_CHA_MSR_STEP * Cbo; @@ -9756,6 +9809,7 @@ uint64 PCM::CX_MSR_PMON_CTLY(uint32 Cbo, uint32 Ctl) const case SPR: case EMR: + case GNR: case SRF: return SPR_CHA0_MSR_PMON_CTL0 + SPR_CHA_MSR_STEP * Cbo + Ctl; @@ -9785,6 +9839,7 @@ uint64 PCM::CX_MSR_PMON_BOX_CTL(uint32 Cbo) const case SPR: case EMR: + case GNR: case SRF: return SPR_CHA0_MSR_PMON_BOX_CTRL + SPR_CHA_MSR_STEP * Cbo; @@ -9860,6 +9915,7 @@ uint32 PCM::getMaxNumOfCBoxesInternal() const uint64 val = 0; switch (cpu_model) { + case GNR: case SRF: { const auto MSR_PMON_NUMBER_CBOS = 0x3fed; @@ -9974,6 +10030,7 @@ void PCM::programIIOCounters(uint64 rawEvents[4], int IIOStack) int stacks_count; switch (getCPUModel()) { + case PCM::GNR: case PCM::SRF: stacks_count = BHS_M2IOSF_NUM; break; @@ -10068,6 +10125,7 @@ void PCM::programPCIeEventGroup(eventGroup_t &eventGroup) switch (cpu_model) { + case PCM::GNR: case PCM::SRF: case PCM::SPR: case PCM::EMR: @@ -10119,6 +10177,7 @@ void PCM::programCbo(const uint64 * events, const uint32 opCode, const uint32 nc && SNOWRIDGE != cpu_model && SPR != cpu_model && EMR != cpu_model + && GNR != cpu_model && SRF != cpu_model ) { @@ -10382,7 +10441,9 @@ bool PCM::supportIDXAccelDev() const { case PCM::SPR: case PCM::EMR: + case PCM::GNR: case PCM::SRF: + case PCM::GNR_D: retval = true; break; @@ -10617,6 +10678,7 @@ void UncorePMU::freeze(const uint32 extra) { case PCM::SPR: case PCM::EMR: + case PCM::GNR: case PCM::SRF: *unitControl = SPR_UNC_PMON_UNIT_CTL_FRZ; break; @@ -10631,6 +10693,7 @@ void UncorePMU::unfreeze(const uint32 extra) { case PCM::SPR: case PCM::EMR: + case PCM::GNR: case PCM::SRF: *unitControl = 0; break; @@ -10650,6 +10713,7 @@ bool UncorePMU::initFreeze(const uint32 extra, const char* xPICheckMsg) { case PCM::SPR: case PCM::EMR: + case PCM::GNR: case PCM::SRF: *unitControl = SPR_UNC_PMON_UNIT_CTL_FRZ; // freeze *unitControl = SPR_UNC_PMON_UNIT_CTL_FRZ + SPR_UNC_PMON_UNIT_CTL_RST_CONTROL; // freeze and reset control registers @@ -10688,6 +10752,7 @@ void UncorePMU::resetUnfreeze(const uint32 extra) { case PCM::SPR: case PCM::EMR: + case PCM::GNR: case PCM::SRF: *unitControl = SPR_UNC_PMON_UNIT_CTL_FRZ + SPR_UNC_PMON_UNIT_CTL_RST_COUNTERS; // freeze and reset counter registers *unitControl = 0; // unfreeze diff --git a/src/cpucounters.h b/src/cpucounters.h index 876b2e5f..8a02e69f 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -1248,6 +1248,7 @@ class PCM_API PCM { case SPR: case EMR: + case GNR: case SRF: *ctrl = *curEvent; break; @@ -1304,6 +1305,8 @@ class PCM_API PCM ( SPR == cpu_model || EMR == cpu_model + || GNR == cpu_model + || GNR_D == cpu_model ); } @@ -1633,6 +1636,7 @@ class PCM_API PCM case ADL: case RPL: case MTL: + case LNL: if (topology[coreID].core_type == TopologyEntry::Atom) { return std::make_pair(OFFCORE_RESPONSE_0_EVTNR, event + 1); @@ -1648,6 +1652,7 @@ class PCM_API PCM case ADL: // ADL big core (GLC) case RPL: case MTL: + case LNL: useGLCOCREvent = true; break; } @@ -1873,6 +1878,7 @@ class PCM_API PCM RPL_2 = 0xbf, RPL_3 = 0xbe, MTL = 0xAA, + LNL = 0xBD, BDX = 79, KNL = 87, SKL = 94, @@ -1881,7 +1887,9 @@ class PCM_API PCM ICX = 106, SPR = 143, EMR = 207, + GNR = 173, SRF = 175, + GNR_D = 174, END_OF_MODEL_LIST = 0x0ffff }; @@ -1975,6 +1983,7 @@ class PCM_API PCM case ICX: case SPR: case EMR: + case GNR: case SRF: return (serverUncorePMUs.size() && serverUncorePMUs[0].get()) ? (serverUncorePMUs[0]->getNumQPIPorts()) : 0; } @@ -2001,6 +2010,7 @@ class PCM_API PCM case ICX: case SPR: case EMR: + case GNR: case SRF: case BDX: case KNL: @@ -2029,6 +2039,7 @@ class PCM_API PCM case ICX: case SPR: case EMR: + case GNR: case SRF: case BDX: case KNL: @@ -2060,6 +2071,7 @@ class PCM_API PCM case ICX: case SPR: case EMR: + case GNR: case SRF: case BDX: case KNL: @@ -2093,6 +2105,8 @@ class PCM_API PCM case RPL: case MTL: return 6; + case LNL: + return 12; case SNOWRIDGE: return 4; case DENVERTON: @@ -2120,6 +2134,7 @@ class PCM_API PCM return 5; case SPR: case EMR: + case GNR: case SRF: return 6; } @@ -2172,6 +2187,7 @@ class PCM_API PCM case SNOWRIDGE: case SPR: case EMR: + case GNR: case SRF: case KNL: return true; @@ -2432,8 +2448,10 @@ class PCM_API PCM || cpu_model == PCM::ADL || cpu_model == PCM::RPL || cpu_model == PCM::MTL + || cpu_model == PCM::LNL || cpu_model == PCM::SPR || cpu_model == PCM::EMR + || cpu_model == PCM::GNR || cpu_model == PCM::SRF ); } @@ -2451,6 +2469,7 @@ class PCM_API PCM || cpu_model == PCM::ICX || cpu_model == PCM::SPR || cpu_model == PCM::EMR + || cpu_model == PCM::GNR || cpu_model == PCM::SRF ); } @@ -2474,6 +2493,7 @@ class PCM_API PCM || cpu_model == PCM::ICX || cpu_model == PCM::SPR || cpu_model == PCM::EMR + || cpu_model == PCM::GNR || cpu_model == PCM::SRF ); } @@ -2490,6 +2510,7 @@ class PCM_API PCM || cpu_model == PCM::ICX || cpu_model == PCM::SPR || cpu_model == PCM::EMR + || cpu_model == PCM::GNR || cpu_model == PCM::SRF ); } @@ -2503,6 +2524,7 @@ class PCM_API PCM || cpu_model == PCM::SPR || cpu_model == PCM::EMR || cpu_model == PCM::SRF + || cpu_model == PCM::GNR ; } @@ -2515,6 +2537,7 @@ class PCM_API PCM { return ( cpu_model == PCM::SRF + || cpu_model == PCM::GNR ); } @@ -2551,10 +2574,11 @@ class PCM_API PCM return ( cpu_model == PCM::SKX || cpu_model == PCM::ICX - || cpu_model == PCM::SNOWRIDGE + || cpu_model == PCM::SNOWRIDGE || cpu_model == PCM::SPR || cpu_model == PCM::EMR || cpu_model == PCM::SRF + || cpu_model == PCM::GNR ); } @@ -2563,6 +2587,7 @@ class PCM_API PCM return MSR.empty() == false && getMaxNumOfUncorePMUs(UBOX_PMU_ID) > 0ULL && getNumCores() == getNumOnlineCores() + && PCM::GNR != cpu_model && PCM::SRF != cpu_model ; } @@ -2655,6 +2680,7 @@ class PCM_API PCM || cpu_model == PCM::ICX || cpu_model == PCM::SPR || cpu_model == PCM::EMR + || cpu_model == PCM::GNR || cpu_model == PCM::SRF || cpu_model == PCM::BDX || cpu_model == PCM::KNL @@ -2675,6 +2701,7 @@ class PCM_API PCM || cpu_model_ == PCM::ICX || cpu_model_ == PCM::SPR || cpu_model_ == PCM::EMR + || cpu_model_ == PCM::GNR || cpu_model_ == PCM::SRF ); } @@ -2699,6 +2726,7 @@ class PCM_API PCM || cpu_model == PCM::ICX || cpu_model == PCM::SPR || cpu_model == PCM::EMR + || cpu_model == PCM::GNR || cpu_model == PCM::SRF ); } @@ -2714,6 +2742,7 @@ class PCM_API PCM || PCM::ICX == cpu_model || PCM::SPR == cpu_model || PCM::EMR == cpu_model + || PCM::GNR == cpu_model ; } @@ -2726,6 +2755,7 @@ class PCM_API PCM || cpu_model == ADL || cpu_model == RPL || cpu_model == MTL + || cpu_model == LNL || useSKLPath() ; } @@ -3382,6 +3412,7 @@ double getDRAMConsumedJoules(const CounterStateType & before, const CounterState || PCM::BDX == cpu_model || PCM::SKX == cpu_model || PCM::ICX == cpu_model + || PCM::GNR == cpu_model || PCM::SRF == cpu_model || PCM::KNL == cpu_model ) { @@ -4229,6 +4260,7 @@ uint64 getL2CacheMisses(const CounterStateType & before, const CounterStateType || cpu_model == PCM::ADL || cpu_model == PCM::RPL || cpu_model == PCM::MTL + || cpu_model == PCM::LNL ) { return after.Event[BasicCounterState::SKLL2MissPos] - before.Event[BasicCounterState::SKLL2MissPos]; } @@ -4340,6 +4372,7 @@ uint64 getL3CacheHitsSnoop(const CounterStateType & before, const CounterStateTy || cpu_model == PCM::ADL || cpu_model == PCM::RPL || cpu_model == PCM::MTL + || cpu_model == PCM::LNL ) { const int64 misses = getL3CacheMisses(before, after); diff --git a/src/lspci.h b/src/lspci.h index 0017aac6..c0148f37 100644 --- a/src/lspci.h +++ b/src/lspci.h @@ -19,6 +19,7 @@ #else // different distributions put it in different places #define PCI_IDS_PATH "/usr/share/hwdata/pci.ids" +#define PCI_IDS_PATH2 "/usr/share/misc/pci.ids" #define PCI_IDS_NOT_FOUND "/usr/share/hwdata/pci.ids file is not available." \ " Ensure that the \"hwdata\" package is properly installed or download" \ " https://raw.githubusercontent.com/pciutils/pciids/master/pci.ids and" \ diff --git a/src/mmio.cpp b/src/mmio.cpp index 2d4973c6..9e4c406c 100644 --- a/src/mmio.cpp +++ b/src/mmio.cpp @@ -47,7 +47,7 @@ class PCMPmem : public WinPmem { _tcscat_s(driver_filename, MAX_PATH, TEXT("\\winpmem_x64.sys")); if (GetFileAttributes(driver_filename) == INVALID_FILE_ATTRIBUTES) { - std::cerr << "ERROR: winpmem_x64.sys not found in current directory. Download it from https://github.com/Velocidex/WinPmem/blob/master/kernel/binaries/winpmem_x64.sys .\n"; + std::cerr << "ERROR: winpmem_x64.sys not found in current directory. Download it from https://github.com/Velocidex/WinPmem/blob/f044f340dd05658d026b0f293cdfa92876159872/kernel/binaries/winpmem_x64.sys .\n"; std::cerr << "ERROR: Memory bandwidth statistics will not be available.\n"; } break; @@ -55,7 +55,7 @@ class PCMPmem : public WinPmem { _tcscat_s(driver_filename, MAX_PATH, TEXT("\\winpmem_x86.sys")); if (GetFileAttributes(driver_filename) == INVALID_FILE_ATTRIBUTES) { - std::cerr << "ERROR: winpmem_x86.sys not found in current directory. Download it from https://github.com/Velocidex/WinPmem/blob/master/kernel/binaries/winpmem_x86.sys .\n"; + std::cerr << "ERROR: winpmem_x86.sys not found in current directory. Download it from https://github.com/Velocidex/WinPmem/blob/f044f340dd05658d026b0f293cdfa92876159872/kernel/binaries/winpmem_x86.sys .\n"; std::cerr << "ERROR: Memory bandwidth statistics will not be available.\n"; } break; diff --git a/src/opCode-173.txt b/src/opCode-173.txt new file mode 100644 index 00000000..c3ccfbc9 --- /dev/null +++ b/src/opCode-173.txt @@ -0,0 +1,45 @@ +#Clockticks +#ctr=0,ev_sel=0x1,umask=0x0,en=1,ch_mask=0,fc_mask=0x0,multiplier=1,divider=1,hname=Clockticks,vname=Total +# Inbound (PCIe device DMA into system) payload events +ctr=0,ev_sel=0x83,umask=0x1,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part0 +ctr=1,ev_sel=0x83,umask=0x1,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part1 +ctr=0,ev_sel=0x83,umask=0x1,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part2 +ctr=1,ev_sel=0x83,umask=0x1,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part3 +ctr=0,ev_sel=0x83,umask=0x1,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part4 +ctr=1,ev_sel=0x83,umask=0x1,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part5 +ctr=0,ev_sel=0x83,umask=0x1,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part6 +ctr=1,ev_sel=0x83,umask=0x1,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part7 +ctr=0,ev_sel=0x83,umask=0x4,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part0 +ctr=1,ev_sel=0x83,umask=0x4,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part1 +ctr=0,ev_sel=0x83,umask=0x4,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part2 +ctr=1,ev_sel=0x83,umask=0x4,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part3 +ctr=0,ev_sel=0x83,umask=0x4,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part4 +ctr=1,ev_sel=0x83,umask=0x4,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part5 +ctr=0,ev_sel=0x83,umask=0x4,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part6 +ctr=1,ev_sel=0x83,umask=0x4,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part7 +# Outbound (CPU MMIO to the PCIe device) payload events +ctr=2,ev_sel=0xc0,umask=0x4,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part0 +ctr=3,ev_sel=0xc0,umask=0x4,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part1 +ctr=2,ev_sel=0xc0,umask=0x4,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part2 +ctr=3,ev_sel=0xc0,umask=0x4,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part3 +ctr=2,ev_sel=0xc0,umask=0x4,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part4 +ctr=3,ev_sel=0xc0,umask=0x4,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part5 +ctr=2,ev_sel=0xc0,umask=0x4,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part6 +ctr=3,ev_sel=0xc0,umask=0x4,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part7 +ctr=2,ev_sel=0xc0,umask=0x1,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part0 +ctr=3,ev_sel=0xc0,umask=0x1,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part1 +ctr=2,ev_sel=0xc0,umask=0x1,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part2 +ctr=3,ev_sel=0xc0,umask=0x1,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part3 +ctr=2,ev_sel=0xc0,umask=0x1,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part4 +ctr=3,ev_sel=0xc0,umask=0x1,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part5 +ctr=2,ev_sel=0xc0,umask=0x1,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part6 +ctr=3,ev_sel=0xc0,umask=0x1,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part7 +# IOMMU events +ctr=0,ev_sel=0x40,umask=0x01,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOTLB Lookup,vname=Total +ctr=1,ev_sel=0x40,umask=0x20,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOTLB Miss,vname=Total +ctr=2,ev_sel=0x40,umask=0x80,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=Ctxt Cache Hit,vname=Total +ctr=3,ev_sel=0x41,umask=0x10,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=256T Cache Hit,vname=Total +ctr=0,ev_sel=0x41,umask=0x08,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=512G Cache Hit,vname=Total +ctr=1,ev_sel=0x41,umask=0x04,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=1G Cache Hit,vname=Total +ctr=2,ev_sel=0x41,umask=0x02,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=2M Cache Hit,vname=Total +ctr=3,ev_sel=0x41,umask=0xc0,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOMMU Mem Access,vname=Total diff --git a/src/pcm-iio.cpp b/src/pcm-iio.cpp index ee97015c..d36f5f17 100644 --- a/src/pcm-iio.cpp +++ b/src/pcm-iio.cpp @@ -1700,6 +1700,7 @@ std::unique_ptr IPlatformMapping::getPlatformMapping(int cpu_m case PCM::EMR: return std::unique_ptr{new EagleStreamPlatformMapping(cpu_model, sockets_count)}; case PCM::SRF: + case PCM::GNR: return std::unique_ptr{new BirchStreamPlatform(cpu_model, sockets_count)}; default: return nullptr; @@ -1717,6 +1718,7 @@ ccr* get_ccr(PCM* m, uint64_t& ccr) case PCM::SPR: case PCM::EMR: case PCM::SRF: + case PCM::GNR: return new icx_ccr(ccr); default: cerr << m->getCPUFamilyModelString() << " is not supported! Program aborted" << endl; diff --git a/src/pcm-memory.cpp b/src/pcm-memory.cpp index ba08e513..c2e9418f 100644 --- a/src/pcm-memory.cpp +++ b/src/pcm-memory.cpp @@ -355,21 +355,21 @@ float PMM_MM_Ratio(const memdata_t *md, const uint32 skt) void printSocketBWFooter(PCM *m, uint32 no_columns, uint32 skt, const memdata_t *md) { for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " Mem Read (MB/s) :" << setw(9) << md->iMC_Rd_socket[i] << " --|"; + cout << "|-- SKT " << setw(2) << i << " Mem Read (MB/s) :" << setw(9) << md->iMC_Rd_socket[i] << " --|"; } cout << "\n"; for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " Mem Write(MB/s) :" << setw(9) << md->iMC_Wr_socket[i] << " --|"; + cout << "|-- SKT " << setw(2) << i << " Mem Write(MB/s) :" << setw(9) << md->iMC_Wr_socket[i] << " --|"; } cout << "\n"; if (anyPmem(md->metrics)) { for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " PMM Read (MB/s): " << setw(8) << md->iMC_PMM_Rd_socket[i] << " --|"; + cout << "|-- SKT " << setw(2) << i << " PMM Read (MB/s): " << setw(8) << md->iMC_PMM_Rd_socket[i] << " --|"; } cout << "\n"; for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " PMM Write(MB/s): " << setw(8) << md->iMC_PMM_Wr_socket[i] << " --|"; + cout << "|-- SKT " << setw(2) << i << " PMM Write(MB/s): " << setw(8) << md->iMC_PMM_Wr_socket[i] << " --|"; } cout << "\n"; } @@ -377,17 +377,17 @@ void printSocketBWFooter(PCM *m, uint32 no_columns, uint32 skt, const memdata_t { for (uint32 i = skt; i < (skt + no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " PMM AD Bw(MB/s): " << setw(8) << AD_BW(md, i) << " --|"; + cout << "|-- SKT " << setw(2) << i << " PMM AD Bw(MB/s): " << setw(8) << AD_BW(md, i) << " --|"; } cout << "\n"; for (uint32 i = skt; i < (skt + no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " PMM MM Bw(MB/s): " << setw(8) << md->MemoryMode_Miss_socket[i] << " --|"; + cout << "|-- SKT " << setw(2) << i << " PMM MM Bw(MB/s): " << setw(8) << md->MemoryMode_Miss_socket[i] << " --|"; } cout << "\n"; for (uint32 i = skt; i < (skt + no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " PMM MM Bw/DRAM Bw:" << setw(8) << PMM_MM_Ratio(md, i) << " --|"; + cout << "|-- SKT " << setw(2) << i << " PMM MM Bw/DRAM Bw:" << setw(8) << PMM_MM_Ratio(md, i) << " --|"; } cout << "\n"; } @@ -396,7 +396,7 @@ void printSocketBWFooter(PCM *m, uint32 no_columns, uint32 skt, const memdata_t for (uint32 ctrl = 0; ctrl < max_imc_controllers; ++ctrl) { for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << "." << ctrl << " NM read hit rate :" << setw(6) << md->M2M_NM_read_hit_rate[i][ctrl] << " --|"; + cout << "|-- SKT " << setw(2) << i << "." << ctrl << " NM read hit rate :" << setw(6) << md->M2M_NM_read_hit_rate[i][ctrl] << " --|"; } cout << "\n"; } @@ -404,36 +404,37 @@ void printSocketBWFooter(PCM *m, uint32 no_columns, uint32 skt, const memdata_t if ((md->metrics == PmemMemoryMode && md->NM_hit_rate_supported) || md->BHS_NM == true) { for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " NM hit rate: " << setw(6) << md->NM_hit_rate[i] << " --|"; + cout << "|-- SKT " << setw(2) << i << " NM hit rate: " << setw(6) << md->NM_hit_rate[i] << " --|"; } cout << "\n"; for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " NM hits (M/s): " << setw(7) << (md->MemoryMode_Hit_socket[i])/1000000. << " --|"; + cout << "|-- SKT " << setw(2) << i << " NM hits (M/s): " << setw(7) << (md->MemoryMode_Hit_socket[i])/1000000. << " --|"; } cout << "\n"; for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " NM misses (M/s): " << setw(7) << (md->MemoryMode_Miss_socket[i])/1000000. << " --|"; + cout << "|-- SKT " << setw(2) << i << " NM misses (M/s): " << setw(7) << (md->MemoryMode_Miss_socket[i])/1000000. << " --|"; } cout << "\n"; } if (md->BHS_NM == true) { for (uint32 i = skt; i < (skt + no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " NM miss Bw(MB/s):" << setw(9) << (md->MemoryMode_Miss_socket[i] * 64. * 2.) / 1000000. << " --|"; + cout << "|-- SKT " << setw(2) << i << " NM miss Bw(MB/s):" << setw(9) << (md->MemoryMode_Miss_socket[i] * 64. * 2.) / 1000000. << " --|"; } cout << "\n"; } if ( md->metrics == PartialWrites && m->getCPUModel() != PCM::SRF + && m->getCPUModel() != PCM::GNR ) { for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " P. Write (T/s): " << dec << setw(10) << md->partial_write[i] << " --|"; + cout << "|-- SKT " << setw(2) << i << " P. Write (T/s): " << dec << setw(10) << md->partial_write[i] << " --|"; } cout << "\n"; } for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " Memory (MB/s): " << setw(11) << right << (md->iMC_Rd_socket[i]+md->iMC_Wr_socket[i]+ + cout << "|-- SKT " << setw(2) << i << " Memory (MB/s): " << setw(11) << right << (md->iMC_Rd_socket[i]+md->iMC_Wr_socket[i]+ md->iMC_PMM_Rd_socket[i]+md->iMC_PMM_Wr_socket[i]) << " --|"; } cout << "\n"; @@ -732,6 +733,7 @@ void display_bandwidth_csv(PCM *m, memdata_t *md, uint64 /*elapsedTime*/, const if (m->HBMmemoryTrafficMetricsAvailable() == false) { if ( md->metrics == PartialWrites + && m->getCPUModel() != PCM::GNR && m->getCPUModel() != PCM::SRF ) { @@ -991,6 +993,7 @@ void calculate_bandwidth(PCM *m, writes = getMCCounter(channel, ServerUncorePMUs::EventPosition::WRITE, uncState1[skt], uncState2[skt]); switch (cpu_model) { + case PCM::GNR: case PCM::SRF: reads += getMCCounter(channel, ServerUncorePMUs::EventPosition::READ2, uncState1[skt], uncState2[skt]); writes += getMCCounter(channel, ServerUncorePMUs::EventPosition::WRITE2, uncState1[skt], uncState2[skt]); @@ -1054,7 +1057,8 @@ void calculate_bandwidth(PCM *m, md.MemoryMode_Hit_socket[skt] += toRate(memoryModeHits); } else if ( - cpu_model != PCM::SRF + cpu_model != PCM::GNR + && cpu_model != PCM::SRF ) { md.partial_write[skt] += (uint64)(getMCCounter(channel, ServerUncorePMUs::EventPosition::PARTIAL, uncState1[skt], uncState2[skt]) / (elapsedTime / 1000.0)); diff --git a/src/pcm-pcie.cpp b/src/pcm-pcie.cpp index 37ab8557..da55b93e 100644 --- a/src/pcm-pcie.cpp +++ b/src/pcm-pcie.cpp @@ -96,6 +96,7 @@ void print_usage(const string & progname) IPlatform *IPlatform::getPlatform(PCM *m, bool csv, bool print_bandwidth, bool print_additional_info, uint32 delay) { switch (m->getCPUModel()) { + case PCM::GNR: case PCM::SRF: return new BirchStreamPlatform(m, csv, print_bandwidth, print_additional_info, delay); case PCM::SPR: diff --git a/src/pcm-power.cpp b/src/pcm-power.cpp index 86923c26..7982375a 100644 --- a/src/pcm-power.cpp +++ b/src/pcm-power.cpp @@ -481,6 +481,8 @@ int mainThrows(int argc, char * argv[]) && cpu_model != PCM::SPR && cpu_model != PCM::EMR && cpu_model != PCM::SRF + && cpu_model != PCM::GNR + && cpu_model != PCM::GNR_D ) cout << "; Clipped freq limit cycles:" << getNormalizedPCUCounter(u, 3, BeforeState[socket], AfterState[socket]) * 100. << " %"; cout << "\n"; @@ -493,6 +495,8 @@ int mainThrows(int argc, char * argv[]) || cpu_model == PCM::SPR || cpu_model == PCM::EMR || cpu_model == PCM::SRF + || cpu_model == PCM::GNR + || cpu_model == PCM::GNR_D ) { cout << "This PCU profile is not supported on your processor\n"; @@ -531,6 +535,8 @@ int mainThrows(int argc, char * argv[]) case PCM::SPR: case PCM::EMR: case PCM::SRF: + case PCM::GNR: + case PCM::GNR_D: cout << "; PC2 residency: " << getPackageCStateResidency(2, BeforeState[socket], AfterState[socket]) * 100. << " %"; cout << "; PC2 transitions: " << getUncoreCounter(PCM::PCU_PMU_ID, u, 2, BeforeState[socket], AfterState[socket]) << " "; cout << "; PC3 residency: " << getPackageCStateResidency(3, BeforeState[socket], AfterState[socket]) * 100. << " %"; diff --git a/src/pcm-raw.cpp b/src/pcm-raw.cpp index 593b35eb..400e5139 100644 --- a/src/pcm-raw.cpp +++ b/src/pcm-raw.cpp @@ -222,7 +222,9 @@ bool initPMUEventMap() if (!in.is_open()) { cerr << "ERROR: File " << mapfilePath << " can't be open. \n"; - cerr << " Download it from https://raw.githubusercontent.com/intel/perfmon/main/" << mapfile << " \n"; + cerr << " Use -ep /perfmon option if you cloned PCM source repository recursively with submodules,\n"; + cerr << " or run 'git clone https://github.com/intel/perfmon' to download the perfmon event repository and use -ep option\n"; + cerr << " or download the file from https://raw.githubusercontent.com/intel/perfmon/main/" << mapfile << " \n"; return false; } int32 FMSPos = -1;