From a3e671a84e7fb91ac4396da2d0726fec0fcc5849 Mon Sep 17 00:00:00 2001 From: Roman Dementiev Date: Mon, 12 Aug 2024 12:00:52 +0200 Subject: [PATCH 01/12] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7fdb4c83..1a7ebf5b 100644 --- a/README.md +++ b/README.md @@ -65,7 +65,7 @@ Building PCM Tools Clone PCM repository with submodules: ``` -git clone --recursive https://github.com/intel/pcm.git +git clone --recursive https://github.com/intel/pcm ``` or clone the repository first, and then update submodules with: From 68f3d928f1bf49be498e6303c453dc9b5ab28c68 Mon Sep 17 00:00:00 2001 From: "Bruggeman, Otto G" Date: Tue, 13 Aug 2024 11:42:48 +0200 Subject: [PATCH 02/12] Fix public github #810 --- src/lspci.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/lspci.h b/src/lspci.h index 0cf09134..af81886b 100644 --- a/src/lspci.h +++ b/src/lspci.h @@ -17,7 +17,9 @@ " https://raw.githubusercontent.com/pciutils/pciids/master/pci.ids and" \ " copy it to the current directory." #else +// different distributions put it in different places #define PCI_IDS_PATH "/usr/share/hwdata/pci.ids" +#define PCI_IDS_PATH2 "/usr/share/misc/pci.ids" #define PCI_IDS_NOT_FOUND "/usr/share/hwdata/pci.ids file is not available." \ " Ensure that the \"hwdata\" package is properly installed or download" \ " https://raw.githubusercontent.com/pciutils/pciids/master/pci.ids and" \ @@ -448,6 +450,12 @@ void load_PCIDB(PCIDB & pciDB) if (!in.is_open()) { #ifndef _MSC_VER + // On Unix, try PCI_IDS_PATH2 + in.open(PCI_IDS_PATH2); + } + + if (!in.is_open()) + { // On Unix, try the current directory if the default path failed in.open("pci.ids"); } From e3960c01f045cd5ad331bf70173bf881eaeca657 Mon Sep 17 00:00:00 2001 From: Roman Dementiev Date: Mon, 1 Jul 2024 19:16:10 +0200 Subject: [PATCH 03/12] pcm-power for SRF --- src/cpucounters.cpp | 59 ++++++++++++++++++++++++++++++++++++--------- src/pcm-power.cpp | 18 ++++++++++++-- 2 files changed, 64 insertions(+), 13 deletions(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 68abab4b..50399d51 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -5598,6 +5598,7 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof { case SPR: case EMR: + case SRF: PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(1); // clock ticks break; default: @@ -5616,6 +5617,7 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof { case SPR: case EMR: + case SRF: PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x35); // POWER_STATE_OCCUPANCY.C0 PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x36); // POWER_STATE_OCCUPANCY.C3 PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x37); // POWER_STATE_OCCUPANCY.C6 @@ -5634,12 +5636,12 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof case 3: PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x04); // Thermal frequency limit cycles: FREQ_MAX_LIMIT_THERMAL_CYCLES PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x05); // Power frequency limit cycles: FREQ_MAX_POWER_CYCLES - PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR) + PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF) break; case 4: // not supported on SKX, ICX, SNOWRIDGE, SPR, EMR PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x06); // OS frequency limit cycles: FREQ_MAX_OS_CYCLES PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x05); // Power frequency limit cycles: FREQ_MAX_POWER_CYCLES - PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX and ICX and SNOWRIDGE) + PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF) break; case 5: if(JAKETOWN == cpu_model) @@ -5650,8 +5652,17 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof { PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x60) + PCU_MSR_PMON_CTL_EDGE_DET ; // number of frequency transitions PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x60) ; // cycles spent changing frequency: FREQ_TRANS_CYCLES - } else if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model - || ICX == cpu_model || SNOWRIDGE == cpu_model || SPR == cpu_model || EMR == cpu_model) + } else if ( + HASWELLX == cpu_model + || BDX_DE == cpu_model + || BDX == cpu_model + || SKX == cpu_model + || ICX == cpu_model + || SNOWRIDGE == cpu_model + || SPR == cpu_model + || EMR == cpu_model + || SRF == cpu_model + ) { PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x74) + PCU_MSR_PMON_CTL_EDGE_DET ; // number of frequency transitions PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x74) ; // cycles spent changing frequency: FREQ_TRANS_CYCLES @@ -5670,11 +5681,21 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof { PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x2B) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC2 transitions PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x2D) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC6 transitions - } else if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model || ICX == cpu_model || SNOWRIDGE == cpu_model || SPR == cpu_model || EMR == cpu_model) + } else if ( + HASWELLX == cpu_model + || BDX_DE == cpu_model + || BDX == cpu_model + || SKX == cpu_model + || ICX == cpu_model + || SNOWRIDGE == cpu_model + || SPR == cpu_model + || EMR == cpu_model + || SRF == cpu_model + ) { - PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(0x4E) ; // PC1e residenicies (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR) - PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x4E) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC1 transitions (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR) - PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x2B) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC2 transitions + PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(0x4E) ; // PC1e residenicies (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF) + PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x4E) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC1 transitions (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF) + PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x2B) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC2e transitions PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x2D) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC6 transitions } else { @@ -9073,9 +9094,25 @@ void ServerUncorePMUs::program_power_metrics(int mc_profile) case PCM::SNOWRIDGE: case PCM::SPR: case PCM::EMR: + case PCM::SRF: UNC_M_POWER_CKE_CYCLES = 0x47; break; } + unsigned int UNC_M_POWER_CHANNEL_PPD_CYCLES = 0x85; + switch (cpu_model) + { + case PCM::SRF: + UNC_M_POWER_CHANNEL_PPD_CYCLES = 0x88; + break; + } + unsigned int UNC_M_SELF_REFRESH_ENTER_SUCCESS_CYCLES_UMASK = 0; + switch (cpu_model) + { + case PCM::SRF: + UNC_M_SELF_REFRESH_ENTER_SUCCESS_CYCLES_UMASK = 0x01; + break; + } + switch(mc_profile) { case 0: // POWER_CKE_CYCLES.RANK0 and POWER_CKE_CYCLES.RANK1 @@ -9103,9 +9140,9 @@ void ServerUncorePMUs::program_power_metrics(int mc_profile) MCCntConfig[3] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(0x80) + MC_CH_PCI_PMON_CTL_THRESH(1) + MC_CH_PCI_PMON_CTL_EDGE_DET; break; case 4: // POWER_SELF_REFRESH - MCCntConfig[0] = MC_CH_PCI_PMON_CTL_EVENT(0x43); - MCCntConfig[1] = MC_CH_PCI_PMON_CTL_EVENT(0x43) + MC_CH_PCI_PMON_CTL_THRESH(1) + MC_CH_PCI_PMON_CTL_EDGE_DET; - MCCntConfig[2] = MC_CH_PCI_PMON_CTL_EVENT(0x85); + MCCntConfig[0] = MC_CH_PCI_PMON_CTL_EVENT(0x43) + MC_CH_PCI_PMON_CTL_UMASK(UNC_M_SELF_REFRESH_ENTER_SUCCESS_CYCLES_UMASK); + MCCntConfig[1] = MC_CH_PCI_PMON_CTL_EVENT(0x43) + MC_CH_PCI_PMON_CTL_UMASK(UNC_M_SELF_REFRESH_ENTER_SUCCESS_CYCLES_UMASK) + MC_CH_PCI_PMON_CTL_THRESH(1) + MC_CH_PCI_PMON_CTL_EDGE_DET; + MCCntConfig[2] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CHANNEL_PPD_CYCLES); break; } diff --git a/src/pcm-power.cpp b/src/pcm-power.cpp index 08a015d8..86923c26 100644 --- a/src/pcm-power.cpp +++ b/src/pcm-power.cpp @@ -474,13 +474,26 @@ int mainThrows(int argc, char * argv[]) printHeader(true); cout << "; Thermal freq limit cycles: " << getNormalizedPCUCounter(u, 1, BeforeState[socket], AfterState[socket]) * 100. << " %" << "; Power freq limit cycles:" << getNormalizedPCUCounter(u, 2, BeforeState[socket], AfterState[socket]) * 100. << " %"; - if(cpu_model != PCM::SKX && cpu_model != PCM::ICX && cpu_model != PCM::SNOWRIDGE && cpu_model != PCM::SPR && cpu_model != PCM::EMR) + if( + cpu_model != PCM::SKX + && cpu_model != PCM::ICX + && cpu_model != PCM::SNOWRIDGE + && cpu_model != PCM::SPR + && cpu_model != PCM::EMR + && cpu_model != PCM::SRF + ) cout << "; Clipped freq limit cycles:" << getNormalizedPCUCounter(u, 3, BeforeState[socket], AfterState[socket]) * 100. << " %"; cout << "\n"; break; case 4: - if (cpu_model == PCM::SKX || cpu_model == PCM::ICX || cpu_model == PCM::SNOWRIDGE || cpu_model == PCM::SPR || cpu_model == PCM::EMR) + if ( cpu_model == PCM::SKX + || cpu_model == PCM::ICX + || cpu_model == PCM::SNOWRIDGE + || cpu_model == PCM::SPR + || cpu_model == PCM::EMR + || cpu_model == PCM::SRF + ) { cout << "This PCU profile is not supported on your processor\n"; break; @@ -517,6 +530,7 @@ int mainThrows(int argc, char * argv[]) case PCM::SNOWRIDGE: case PCM::SPR: case PCM::EMR: + case PCM::SRF: cout << "; PC2 residency: " << getPackageCStateResidency(2, BeforeState[socket], AfterState[socket]) * 100. << " %"; cout << "; PC2 transitions: " << getUncoreCounter(PCM::PCU_PMU_ID, u, 2, BeforeState[socket], AfterState[socket]) << " "; cout << "; PC3 residency: " << getPackageCStateResidency(3, BeforeState[socket], AfterState[socket]) * 100. << " %"; From 9be2dafba2080a759fff4f8303c63bdc70f6aa76 Mon Sep 17 00:00:00 2001 From: Thomas Willhalm Date: Thu, 22 Aug 2024 18:59:53 +0200 Subject: [PATCH 04/12] Added license to Dockerfile --- Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Dockerfile b/Dockerfile index 076eafda..906d37a6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,7 @@ FROM fedora:40@sha256:5ce8497aeea599bf6b54ab3979133923d82aaa4f6ca5ced1812611b197c79eb0 as builder +# Dockerfile for Intel PCM sensor server +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2020-2024 Intel Corporation RUN dnf -y install gcc-c++ git findutils make cmake openssl openssl-devel libasan libasan-static COPY . /tmp/pcm From 4f1eaf7f10bc9fe85931c568860437b6c9482d7d Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Tue, 10 Sep 2024 10:59:41 +0200 Subject: [PATCH 05/12] pcm-memory: replace NODE with SKT to avoid confusion with NUMA NODEs Change-Id: Ib474082f54f84a62b0d28b8cb07229502ca9fbf7 --- src/pcm-memory.cpp | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/pcm-memory.cpp b/src/pcm-memory.cpp index ba08e513..a283f5d2 100644 --- a/src/pcm-memory.cpp +++ b/src/pcm-memory.cpp @@ -355,21 +355,21 @@ float PMM_MM_Ratio(const memdata_t *md, const uint32 skt) void printSocketBWFooter(PCM *m, uint32 no_columns, uint32 skt, const memdata_t *md) { for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " Mem Read (MB/s) :" << setw(9) << md->iMC_Rd_socket[i] << " --|"; + cout << "|-- SKT " << setw(2) << i << " Mem Read (MB/s) :" << setw(9) << md->iMC_Rd_socket[i] << " --|"; } cout << "\n"; for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " Mem Write(MB/s) :" << setw(9) << md->iMC_Wr_socket[i] << " --|"; + cout << "|-- SKT " << setw(2) << i << " Mem Write(MB/s) :" << setw(9) << md->iMC_Wr_socket[i] << " --|"; } cout << "\n"; if (anyPmem(md->metrics)) { for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " PMM Read (MB/s): " << setw(8) << md->iMC_PMM_Rd_socket[i] << " --|"; + cout << "|-- SKT " << setw(2) << i << " PMM Read (MB/s): " << setw(8) << md->iMC_PMM_Rd_socket[i] << " --|"; } cout << "\n"; for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " PMM Write(MB/s): " << setw(8) << md->iMC_PMM_Wr_socket[i] << " --|"; + cout << "|-- SKT " << setw(2) << i << " PMM Write(MB/s): " << setw(8) << md->iMC_PMM_Wr_socket[i] << " --|"; } cout << "\n"; } @@ -377,17 +377,17 @@ void printSocketBWFooter(PCM *m, uint32 no_columns, uint32 skt, const memdata_t { for (uint32 i = skt; i < (skt + no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " PMM AD Bw(MB/s): " << setw(8) << AD_BW(md, i) << " --|"; + cout << "|-- SKT " << setw(2) << i << " PMM AD Bw(MB/s): " << setw(8) << AD_BW(md, i) << " --|"; } cout << "\n"; for (uint32 i = skt; i < (skt + no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " PMM MM Bw(MB/s): " << setw(8) << md->MemoryMode_Miss_socket[i] << " --|"; + cout << "|-- SKT " << setw(2) << i << " PMM MM Bw(MB/s): " << setw(8) << md->MemoryMode_Miss_socket[i] << " --|"; } cout << "\n"; for (uint32 i = skt; i < (skt + no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " PMM MM Bw/DRAM Bw:" << setw(8) << PMM_MM_Ratio(md, i) << " --|"; + cout << "|-- SKT " << setw(2) << i << " PMM MM Bw/DRAM Bw:" << setw(8) << PMM_MM_Ratio(md, i) << " --|"; } cout << "\n"; } @@ -396,7 +396,7 @@ void printSocketBWFooter(PCM *m, uint32 no_columns, uint32 skt, const memdata_t for (uint32 ctrl = 0; ctrl < max_imc_controllers; ++ctrl) { for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << "." << ctrl << " NM read hit rate :" << setw(6) << md->M2M_NM_read_hit_rate[i][ctrl] << " --|"; + cout << "|-- SKT " << setw(2) << i << "." << ctrl << " NM read hit rate :" << setw(6) << md->M2M_NM_read_hit_rate[i][ctrl] << " --|"; } cout << "\n"; } @@ -404,22 +404,22 @@ void printSocketBWFooter(PCM *m, uint32 no_columns, uint32 skt, const memdata_t if ((md->metrics == PmemMemoryMode && md->NM_hit_rate_supported) || md->BHS_NM == true) { for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " NM hit rate: " << setw(6) << md->NM_hit_rate[i] << " --|"; + cout << "|-- SKT " << setw(2) << i << " NM hit rate: " << setw(6) << md->NM_hit_rate[i] << " --|"; } cout << "\n"; for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " NM hits (M/s): " << setw(7) << (md->MemoryMode_Hit_socket[i])/1000000. << " --|"; + cout << "|-- SKT " << setw(2) << i << " NM hits (M/s): " << setw(7) << (md->MemoryMode_Hit_socket[i])/1000000. << " --|"; } cout << "\n"; for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " NM misses (M/s): " << setw(7) << (md->MemoryMode_Miss_socket[i])/1000000. << " --|"; + cout << "|-- SKT " << setw(2) << i << " NM misses (M/s): " << setw(7) << (md->MemoryMode_Miss_socket[i])/1000000. << " --|"; } cout << "\n"; } if (md->BHS_NM == true) { for (uint32 i = skt; i < (skt + no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " NM miss Bw(MB/s):" << setw(9) << (md->MemoryMode_Miss_socket[i] * 64. * 2.) / 1000000. << " --|"; + cout << "|-- SKT " << setw(2) << i << " NM miss Bw(MB/s):" << setw(9) << (md->MemoryMode_Miss_socket[i] * 64. * 2.) / 1000000. << " --|"; } cout << "\n"; } @@ -428,12 +428,12 @@ void printSocketBWFooter(PCM *m, uint32 no_columns, uint32 skt, const memdata_t ) { for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " P. Write (T/s): " << dec << setw(10) << md->partial_write[i] << " --|"; + cout << "|-- SKT " << setw(2) << i << " P. Write (T/s): " << dec << setw(10) << md->partial_write[i] << " --|"; } cout << "\n"; } for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " Memory (MB/s): " << setw(11) << right << (md->iMC_Rd_socket[i]+md->iMC_Wr_socket[i]+ + cout << "|-- SKT " << setw(2) << i << " Memory (MB/s): " << setw(11) << right << (md->iMC_Rd_socket[i]+md->iMC_Wr_socket[i]+ md->iMC_PMM_Rd_socket[i]+md->iMC_PMM_Wr_socket[i]) << " --|"; } cout << "\n"; From 992bd904c8affd80d3619141a352a80ab78a699e Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Thu, 8 Aug 2024 10:25:39 +0200 Subject: [PATCH 06/12] add perfmon event repository as submodule for convenience Change-Id: Icaa4159fb57da0597a8ff1692850645e54e46a88 --- .gitmodules | 3 +++ perfmon | 1 + 2 files changed, 4 insertions(+) create mode 160000 perfmon diff --git a/.gitmodules b/.gitmodules index f1908446..53b7abf1 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "src/simdjson"] path = src/simdjson url = https://github.com/simdjson/simdjson.git +[submodule "perfmon"] + path = perfmon + url = https://github.com/intel/perfmon diff --git a/perfmon b/perfmon new file mode 160000 index 00000000..f8c51ca9 --- /dev/null +++ b/perfmon @@ -0,0 +1 @@ +Subproject commit f8c51ca9f611356a3deb0e1ab6c1404d7393d126 From 49c76a86548f61ad21dcd45713f355d062d6c28c Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Thu, 8 Aug 2024 11:10:44 +0200 Subject: [PATCH 07/12] pcm-raw: update instructions for perfmon event map/files Change-Id: Ib40edae09a6399cc764687afab476da6ce88c78d --- src/pcm-raw.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pcm-raw.cpp b/src/pcm-raw.cpp index 593b35eb..400e5139 100644 --- a/src/pcm-raw.cpp +++ b/src/pcm-raw.cpp @@ -222,7 +222,9 @@ bool initPMUEventMap() if (!in.is_open()) { cerr << "ERROR: File " << mapfilePath << " can't be open. \n"; - cerr << " Download it from https://raw.githubusercontent.com/intel/perfmon/main/" << mapfile << " \n"; + cerr << " Use -ep /perfmon option if you cloned PCM source repository recursively with submodules,\n"; + cerr << " or run 'git clone https://github.com/intel/perfmon' to download the perfmon event repository and use -ep option\n"; + cerr << " or download the file from https://raw.githubusercontent.com/intel/perfmon/main/" << mapfile << " \n"; return false; } int32 FMSPos = -1; From 471de17e0b8aaf800fff5c5a75d4d579856b20fe Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 11 Sep 2024 14:50:28 +0200 Subject: [PATCH 08/12] add CPM.cmake Change-Id: Ie48ae106573b46482f91238bafc3c23f30f2373f --- cmake/CPM.cmake | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 cmake/CPM.cmake diff --git a/cmake/CPM.cmake b/cmake/CPM.cmake new file mode 100644 index 00000000..baf2d8c3 --- /dev/null +++ b/cmake/CPM.cmake @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: MIT +# +# SPDX-FileCopyrightText: Copyright (c) 2019-2023 Lars Melchior and contributors + +set(CPM_DOWNLOAD_VERSION 0.40.2) +set(CPM_HASH_SUM "c8cdc32c03816538ce22781ed72964dc864b2a34a310d3b7104812a5ca2d835d") + +if(CPM_SOURCE_CACHE) + set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake") +elseif(DEFINED ENV{CPM_SOURCE_CACHE}) + set(CPM_DOWNLOAD_LOCATION "$ENV{CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake") +else() + set(CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake") +endif() + +# Expand relative path. This is important if the provided path contains a tilde (~) +get_filename_component(CPM_DOWNLOAD_LOCATION ${CPM_DOWNLOAD_LOCATION} ABSOLUTE) + +file(DOWNLOAD + https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake + ${CPM_DOWNLOAD_LOCATION} EXPECTED_HASH SHA256=${CPM_HASH_SUM} +) + +include(${CPM_DOWNLOAD_LOCATION}) From 475f22e2d39daf7479009f70e29eda0d35965819 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 11 Sep 2024 17:43:25 +0200 Subject: [PATCH 09/12] specify a stable URL for WinPMem Change-Id: I8a5e543487673228bed6fcb9486022d8828e693e --- src/mmio.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mmio.cpp b/src/mmio.cpp index 2d4973c6..9e4c406c 100644 --- a/src/mmio.cpp +++ b/src/mmio.cpp @@ -47,7 +47,7 @@ class PCMPmem : public WinPmem { _tcscat_s(driver_filename, MAX_PATH, TEXT("\\winpmem_x64.sys")); if (GetFileAttributes(driver_filename) == INVALID_FILE_ATTRIBUTES) { - std::cerr << "ERROR: winpmem_x64.sys not found in current directory. Download it from https://github.com/Velocidex/WinPmem/blob/master/kernel/binaries/winpmem_x64.sys .\n"; + std::cerr << "ERROR: winpmem_x64.sys not found in current directory. Download it from https://github.com/Velocidex/WinPmem/blob/f044f340dd05658d026b0f293cdfa92876159872/kernel/binaries/winpmem_x64.sys .\n"; std::cerr << "ERROR: Memory bandwidth statistics will not be available.\n"; } break; @@ -55,7 +55,7 @@ class PCMPmem : public WinPmem { _tcscat_s(driver_filename, MAX_PATH, TEXT("\\winpmem_x86.sys")); if (GetFileAttributes(driver_filename) == INVALID_FILE_ATTRIBUTES) { - std::cerr << "ERROR: winpmem_x86.sys not found in current directory. Download it from https://github.com/Velocidex/WinPmem/blob/master/kernel/binaries/winpmem_x86.sys .\n"; + std::cerr << "ERROR: winpmem_x86.sys not found in current directory. Download it from https://github.com/Velocidex/WinPmem/blob/f044f340dd05658d026b0f293cdfa92876159872/kernel/binaries/winpmem_x86.sys .\n"; std::cerr << "ERROR: Memory bandwidth statistics will not be available.\n"; } break; From f00fc7b4db9b34b5f5a629993b117c79a27b993f Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 11 Sep 2024 14:28:29 +0200 Subject: [PATCH 10/12] add inital support for LNL (Lunar Lake) Change-Id: I86f53f2e8fde44dd3431ad5c9df5bd91be534420 --- src/cpucounters.cpp | 9 ++++++++- src/cpucounters.h | 8 ++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 50399d51..b2f0bf35 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -711,6 +711,7 @@ void PCM::initCStateSupportTables() case ADL: case RPL: case MTL: + case LNL: case SNOWRIDGE: PCM_CSTATE_ARRAY(pkgCStateMsr, PCM_PARAM_PROTECT({0, 0, 0x3F8, 0, 0x3F9, 0, 0x3FA, 0, 0, 0, 0 }) ); case NEHALEM_EP: @@ -785,6 +786,7 @@ void PCM::initCStateSupportTables() case ADL: case RPL: case MTL: + case LNL: case SNOWRIDGE: case ICX: case SPR: @@ -1637,6 +1639,7 @@ bool PCM::detectNominalFrequency() || cpu_model == ADL || cpu_model == RPL || cpu_model == MTL + || cpu_model == LNL || cpu_model == SKX || cpu_model == ICX || cpu_model == SPR @@ -3237,6 +3240,7 @@ bool PCM::isCPUModelSupported(const int model_) || model_ == ADL || model_ == RPL || model_ == MTL + || model_ == LNL || model_ == SKX || model_ == ICX || model_ == SPR @@ -3408,7 +3412,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter canUsePerf = false; if (!silent) std::cerr << "Installed Linux kernel perf does not support hardware top-down level-1 counters. Using direct PMU programming instead.\n"; } - if (canUsePerf && (cpu_model == ADL || cpu_model == RPL || cpu_model == MTL)) + if (canUsePerf && (cpu_model == ADL || cpu_model == RPL || cpu_model == MTL || cpu_model == LNL)) { canUsePerf = false; if (!silent) std::cerr << "Linux kernel perf rejects an architectural event on your platform. Using direct PMU programming instead.\n"; @@ -3495,6 +3499,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter case ADL: case RPL: case MTL: + case LNL: LLCArchEventInit(hybridAtomEventDesc); hybridAtomEventDesc[2].event_number = SKL_MEM_LOAD_RETIRED_L2_MISS_EVTNR; hybridAtomEventDesc[2].umask_value = SKL_MEM_LOAD_RETIRED_L2_MISS_UMASK; @@ -4844,6 +4849,8 @@ const char * PCM::getUArchCodename(const int32 cpu_model_param) const return "Raptor Lake"; case MTL: return "Meteor Lake"; + case LNL: + return "Lunar Lake"; case SKX: if (cpu_model_param >= 0) { diff --git a/src/cpucounters.h b/src/cpucounters.h index 876b2e5f..2f3aef3c 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -1633,6 +1633,7 @@ class PCM_API PCM case ADL: case RPL: case MTL: + case LNL: if (topology[coreID].core_type == TopologyEntry::Atom) { return std::make_pair(OFFCORE_RESPONSE_0_EVTNR, event + 1); @@ -1648,6 +1649,7 @@ class PCM_API PCM case ADL: // ADL big core (GLC) case RPL: case MTL: + case LNL: useGLCOCREvent = true; break; } @@ -1873,6 +1875,7 @@ class PCM_API PCM RPL_2 = 0xbf, RPL_3 = 0xbe, MTL = 0xAA, + LNL = 0xBD, BDX = 79, KNL = 87, SKL = 94, @@ -2093,6 +2096,8 @@ class PCM_API PCM case RPL: case MTL: return 6; + case LNL: + return 12; case SNOWRIDGE: return 4; case DENVERTON: @@ -2432,6 +2437,7 @@ class PCM_API PCM || cpu_model == PCM::ADL || cpu_model == PCM::RPL || cpu_model == PCM::MTL + || cpu_model == PCM::LNL || cpu_model == PCM::SPR || cpu_model == PCM::EMR || cpu_model == PCM::SRF @@ -4229,6 +4235,7 @@ uint64 getL2CacheMisses(const CounterStateType & before, const CounterStateType || cpu_model == PCM::ADL || cpu_model == PCM::RPL || cpu_model == PCM::MTL + || cpu_model == PCM::LNL ) { return after.Event[BasicCounterState::SKLL2MissPos] - before.Event[BasicCounterState::SKLL2MissPos]; } @@ -4340,6 +4347,7 @@ uint64 getL3CacheHitsSnoop(const CounterStateType & before, const CounterStateTy || cpu_model == PCM::ADL || cpu_model == PCM::RPL || cpu_model == PCM::MTL + || cpu_model == PCM::LNL ) { const int64 misses = getL3CacheMisses(before, after); From 1cbf5580db25392874f43cbf24ecaca618a73761 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 11 Sep 2024 15:14:39 +0200 Subject: [PATCH 11/12] support free-running BW counters on LNL Change-Id: Ieef26942c585e61ba16048dfe1c7b0752dcc6c26 --- src/cpucounters.cpp | 1 + src/cpucounters.h | 1 + 2 files changed, 2 insertions(+) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index b2f0bf35..dbf902cf 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -1901,6 +1901,7 @@ void PCM::initUncoreObjects() case ADL: // TGLClientBW works fine for ADL case RPL: // TGLClientBW works fine for RPL case MTL: // TGLClientBW works fine for MTL + case LNL: // TGLClientBW works fine for LNL clientBW = std::make_shared(); break; /* Disabled since ADLClientBW requires 2x multiplier for BW on top diff --git a/src/cpucounters.h b/src/cpucounters.h index 2f3aef3c..e3d3e82d 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -2732,6 +2732,7 @@ class PCM_API PCM || cpu_model == ADL || cpu_model == RPL || cpu_model == MTL + || cpu_model == LNL || useSKLPath() ; } From da08f11b1b8e3f5971ad234edb9786d5a60140c9 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Thu, 12 Sep 2024 14:18:37 +0200 Subject: [PATCH 12/12] add GNR support Co-authored-by: Alexander Antonov Co-authored-by: Gang Deng Co-authored-by: White Hu Co-authored-by: Pavithran Pandiyan Co-authored-by: Roman Dementiev Change-Id: I4aabb179568ec5652e1814f58cc98a554430407f --- .../GenuineIntel-6-AD-0.json | 158 ++++++++++++++++++ src/cpucounters.cpp | 65 ++++++- src/cpucounters.h | 26 ++- src/opCode-173.txt | 45 +++++ src/pcm-iio.cpp | 2 + src/pcm-memory.cpp | 6 +- src/pcm-pcie.cpp | 1 + src/pcm-power.cpp | 6 + 8 files changed, 303 insertions(+), 6 deletions(-) create mode 100644 src/PMURegisterDeclarations/GenuineIntel-6-AD-0.json create mode 100644 src/opCode-173.txt diff --git a/src/PMURegisterDeclarations/GenuineIntel-6-AD-0.json b/src/PMURegisterDeclarations/GenuineIntel-6-AD-0.json new file mode 100644 index 00000000..d68375b7 --- /dev/null +++ b/src/PMURegisterDeclarations/GenuineIntel-6-AD-0.json @@ -0,0 +1,158 @@ +{ + "core" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "User": {"Config": 0, "Position": 16, "Width": 1, "DefaultValue": 1}, + "OS": {"Config": 0, "Position": 17, "Width": 1, "DefaultValue": 1}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1}, + "PinControl": {"Config": 0, "Position": 19, "Width": 1, "DefaultValue": 0}, + "APICInt": {"Config": 0, "Position": 20, "Width": 1, "DefaultValue": 0}, + "Enable": {"Config": 0, "Position": 22, "Width": 1, "DefaultValue": 1}, + "Invert": {"Config": 0, "Position": 23, "Width": 1}, + "CounterMask": {"Config": 0, "Position": 24, "Width": 8}, + "InTX": {"Config": 0, "Position": 32, "Width": 1, "DefaultValue": 0}, + "InTXCheckpointed": {"Config": 0, "Position": 33, "Width": 1, "DefaultValue": 0}, + "MSRIndex": { + "0x1a6" : {"Config": 1, "Position": 0, "Width": 64}, + "0x1a7" : {"Config": 2, "Position": 0, "Width": 64}, + "0x3f6" : {"Config": 3, "Position": 0, "Width": 64}, + "0x3f7" : {"Config": 4, "Position": 0, "Width": 64} + } + }, + "fixed0" : { + "OS": {"Config": 0, "Position": 0, "Width": 1, "DefaultValue": 1}, + "User": {"Config": 0, "Position": 1, "Width": 1, "DefaultValue": 1}, + "EnablePMI": {"Config": 0, "Position": 3, "Width": 1, "DefaultValue": 0}, + "EventCode": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "UMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "EdgeDetect": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "Invert": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "CounterMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"} + }, + "fixed1" : { + "OS": {"Config": 0, "Position": 4, "Width": 1, "DefaultValue": 1}, + "User": {"Config": 0, "Position": 5, "Width": 1, "DefaultValue": 1}, + "EnablePMI": {"Config": 0, "Position": 7, "Width": 1, "DefaultValue": 0}, + "EventCode": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "UMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "EdgeDetect": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "Invert": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "CounterMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"} + }, + "fixed2" : { + "OS": {"Config": 0, "Position": 8, "Width": 1, "DefaultValue": 1}, + "User": {"Config": 0, "Position": 9, "Width": 1, "DefaultValue": 1}, + "EnablePMI": {"Config": 0, "Position": 11, "Width": 1, "DefaultValue": 0}, + "EventCode": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "UMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "EdgeDetect": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "Invert": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "CounterMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"} + }, + "fixed3" : { + "OS": {"Config": 0, "Position": 12, "Width": 1, "DefaultValue": 1}, + "User": {"Config": 0, "Position": 13, "Width": 1, "DefaultValue": 1}, + "EnablePMI": {"Config": 0, "Position": 15, "Width": 1, "DefaultValue": 0}, + "EventCode": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "UMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "EdgeDetect": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "Invert": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "CounterMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "PerfMetrics": {"Config": 2, "Position": 0, "Width": 1, "DefaultValue": 0, "__comment": "fake field to tell the collector to also print the L1 top-down metrics, not just raw slots count"} + } + }, + "cha" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "TIDEnable": {"Config": 0, "Position": 16, "Width": 1, "DefaultValue": 0}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0}, + "UMaskExt": {"Config": 0, "Position": 32, "Width": 26}, + "TID": {"Config": 1, "Position": 0, "Width": 10, "DefaultValue": 0} + } + }, + "imc" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} + } + }, + "xpi" : { + "__comment" : "this is for UPI LL and QPI LL uncore PMUs", + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0}, + "UMaskExt": {"Config": 0, "Position": 32, "Width": 24} + } + }, + "m2m" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0}, + "UMaskExt": {"Config": 0, "Position": 32, "Width": 8} + } + }, + "m3upi" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} + } + }, + "mdf" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} + } + }, + "irp" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} + } + }, + "pcu" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0} + } + }, + "pciex8" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0} + } + }, + "pciex16" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0} + } + }, + "iio" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 12, "DefaultValue": 0}, + "PortMask": {"Config": 0, "Position": 36, "Width": 12}, + "FCMask": {"Config": 0, "Position": 48, "Width": 3} + } + } +} diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index dbf902cf..7f3c16e4 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -737,6 +737,7 @@ void PCM::initCStateSupportTables() case ICX: case SPR: case EMR: + case GNR: case SRF: PCM_CSTATE_ARRAY(pkgCStateMsr, PCM_PARAM_PROTECT({0, 0, 0x60D, 0, 0, 0, 0x3F9, 0, 0, 0, 0}) ); case HASWELL_ULT: @@ -791,6 +792,7 @@ void PCM::initCStateSupportTables() case ICX: case SPR: case EMR: + case GNR: case SRF: PCM_CSTATE_ARRAY(coreCStateMsr, PCM_PARAM_PROTECT({0, 0, 0, 0x3FC, 0, 0, 0x3FD, 0x3FE, 0, 0, 0}) ); case KNL: @@ -1644,6 +1646,7 @@ bool PCM::detectNominalFrequency() || cpu_model == ICX || cpu_model == SPR || cpu_model == EMR + || cpu_model == GNR || cpu_model == SRF ) ? (100000000ULL) : (133333333ULL); @@ -1941,6 +1944,7 @@ void PCM::initUncoreObjects() case SNOWRIDGE: case SPR: case EMR: + case GNR: case SRF: { bool failed = false; @@ -2126,6 +2130,7 @@ void PCM::initUncorePMUsDirect() } break; case SRF: + case GNR: uncorePMUs[s].resize(1); { std::vector > CounterControlRegs{ @@ -2251,6 +2256,7 @@ void PCM::initUncorePMUsDirect() break; case SPR: case EMR: + case GNR: case SRF: uncorePMUs[s].resize(1); addPMUsFromDiscoveryRef(uncorePMUs[s][0][PCU_PMU_ID], SPR_PCU_BOX_TYPE, 0xE); @@ -2277,6 +2283,7 @@ void PCM::initUncorePMUsDirect() case EMR: addMDFPMUs(SPR_MDF_BOX_TYPE); break; + case GNR: case SRF: addMDFPMUs(BHS_MDF_BOX_TYPE); break; @@ -2323,6 +2330,7 @@ void PCM::initUncorePMUsDirect() switch (cpu_model) { + case GNR: case SRF: uncorePMUs[s].resize(1); if (safe_getenv("PCM_NO_PCIE_GEN5_DISCOVERY") == std::string("1")) @@ -2427,6 +2435,7 @@ void PCM::initUncorePMUsDirect() } } break; + case PCM::GNR: case PCM::SRF: for (uint32 s = 0; s < (uint32)num_sockets; ++s) { @@ -2636,6 +2645,7 @@ void PCM::initUncorePMUsDirect() IRP_CTR_REG_OFFSET = SPR_IRP_CTR_REG_OFFSET; IRP_UNIT_CTL = SPR_IRP_UNIT_CTL; break; + case GNR: case SRF: irpStacks = BHS_M2IOSF_NUM; IRP_CTL_REG_OFFSET = BHS_IRP_CTL_REG_OFFSET; @@ -2770,6 +2780,7 @@ void PCM::initUncorePMUsDirect() { case PCM::SPR: case PCM::EMR: + case PCM::GNR: case PCM::SRF: { const auto n_units = (std::min)(uncorePMUDiscovery->getNumBoxes(SPR_CXLCM_BOX_TYPE, s), @@ -3246,6 +3257,7 @@ bool PCM::isCPUModelSupported(const int model_) || model_ == ICX || model_ == SPR || model_ == EMR + || model_ == GNR || model_ == SRF ); } @@ -3287,6 +3299,9 @@ bool PCM::checkModel() case RPL_3: cpu_model = RPL; break; + case GNR_D: + cpu_model = GNR; + break; } if(!isCPUModelSupported((int)cpu_model)) @@ -3555,6 +3570,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter case ICX: case SPR: case EMR: + case GNR: assert(useSkylakeEvents()); coreEventDesc[0].event_number = SKL_MEM_LOAD_RETIRED_L3_MISS_EVTNR; coreEventDesc[0].umask_value = SKL_MEM_LOAD_RETIRED_L3_MISS_UMASK; @@ -4873,6 +4889,8 @@ const char * PCM::getUArchCodename(const int32 cpu_model_param) const return "Sapphire Rapids-SP"; case EMR: return "Emerald Rapids-SP"; + case GNR: + return "Granite Rapids-SP"; case SRF: return "Sierra Forest"; } @@ -5607,6 +5625,8 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof case SPR: case EMR: case SRF: + case GNR: + case GNR_D: PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(1); // clock ticks break; default: @@ -5626,6 +5646,8 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof case SPR: case EMR: case SRF: + case GNR: + case GNR_D: PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x35); // POWER_STATE_OCCUPANCY.C0 PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x36); // POWER_STATE_OCCUPANCY.C3 PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x37); // POWER_STATE_OCCUPANCY.C6 @@ -5644,12 +5666,12 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof case 3: PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x04); // Thermal frequency limit cycles: FREQ_MAX_LIMIT_THERMAL_CYCLES PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x05); // Power frequency limit cycles: FREQ_MAX_POWER_CYCLES - PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF) + PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF,GNR) break; case 4: // not supported on SKX, ICX, SNOWRIDGE, SPR, EMR PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x06); // OS frequency limit cycles: FREQ_MAX_OS_CYCLES PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x05); // Power frequency limit cycles: FREQ_MAX_POWER_CYCLES - PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF) + PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF,GNR) break; case 5: if(JAKETOWN == cpu_model) @@ -5670,6 +5692,8 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof || SPR == cpu_model || EMR == cpu_model || SRF == cpu_model + || GNR == cpu_model + || GNR_D == cpu_model ) { PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x74) + PCU_MSR_PMON_CTL_EDGE_DET ; // number of frequency transitions @@ -5699,10 +5723,12 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof || SPR == cpu_model || EMR == cpu_model || SRF == cpu_model + || GNR == cpu_model + || GNR_D == cpu_model ) { - PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(0x4E) ; // PC1e residenicies (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF) - PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x4E) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC1 transitions (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF) + PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(0x4E) ; // PC1e residenicies (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF,GNR) + PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x4E) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC1 transitions (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF,GNR) PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x2B) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC2e transitions PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x2D) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC6 transitions } else @@ -7543,6 +7569,7 @@ void ServerUncorePMUs::initRegisterLocations(const PCM * pcm) } else if ( cpu_model == PCM::SRF + || cpu_model == PCM::GNR ) { PCM_PCICFG_QPI_INIT(0, BHS); @@ -7770,6 +7797,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) case PCM::SNOWRIDGE: case PCM::SPR: case PCM::EMR: + case PCM::GNR: // B2CMI PMUs case PCM::SRF: m2mPMUs.push_back( UncorePMU( @@ -7954,6 +7982,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) switch (cpu_model) { + case PCM::GNR: case PCM::SRF: initBHSiMCPMUs(12); break; @@ -8040,6 +8069,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) ) ); break; + case PCM::GNR: case PCM::SRF: m3upiPMUs.push_back( UncorePMU( @@ -8205,6 +8235,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) break; case PCM::SPR: case PCM::EMR: + case PCM::GNR: case PCM::SRF: xpiPMUs.push_back( UncorePMU( @@ -8739,6 +8770,7 @@ void ServerUncorePMUs::programServerUncoreMemoryMetrics(const ServerUncoreMemory return; } break; + case PCM::GNR: case PCM::SRF: if (metrics == PmemMemoryMode) { @@ -8833,6 +8865,7 @@ void ServerUncorePMUs::program() EDCCntConfig[EventPosition::READ] = MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x05) + MC_CH_PCI_PMON_CTL_UMASK(0xcf); // monitor reads on counter 0: CAS_COUNT.RD EDCCntConfig[EventPosition::WRITE] = MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x05) + MC_CH_PCI_PMON_CTL_UMASK(0xf0); // monitor writes on counter 1: CAS_COUNT.WR break; + case PCM::GNR: case PCM::SRF: MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x05) + MC_CH_PCI_PMON_CTL_UMASK(0xcf); // monitor reads on counter 0: CAS_COUNT_SCH0.RD MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x05) + MC_CH_PCI_PMON_CTL_UMASK(0xf0); // monitor writes on counter 1: CAS_COUNT_SCH0.WR @@ -8964,6 +8997,7 @@ uint64 ServerUncorePMUs::getImcReadsForChannels(uint32 beginChannel, uint32 endC result += getMCCounter(i, EventPosition::READ); switch (cpu_model) { + case PCM::GNR: case PCM::SRF: result += getMCCounter(i, EventPosition::READ2); break; @@ -8980,6 +9014,7 @@ uint64 ServerUncorePMUs::getImcWrites() result += getMCCounter(i, EventPosition::WRITE); switch (cpu_model) { + case PCM::GNR: case PCM::SRF: result += getMCCounter(i, EventPosition::WRITE2); break; @@ -9103,6 +9138,8 @@ void ServerUncorePMUs::program_power_metrics(int mc_profile) case PCM::SPR: case PCM::EMR: case PCM::SRF: + case PCM::GNR: + case PCM::GNR_D: UNC_M_POWER_CKE_CYCLES = 0x47; break; } @@ -9110,6 +9147,8 @@ void ServerUncorePMUs::program_power_metrics(int mc_profile) switch (cpu_model) { case PCM::SRF: + case PCM::GNR: + case PCM::GNR_D: UNC_M_POWER_CHANNEL_PPD_CYCLES = 0x88; break; } @@ -9117,6 +9156,8 @@ void ServerUncorePMUs::program_power_metrics(int mc_profile) switch (cpu_model) { case PCM::SRF: + case PCM::GNR: + case PCM::GNR_D: UNC_M_SELF_REFRESH_ENTER_SUCCESS_CYCLES_UMASK = 0x01; break; } @@ -9218,6 +9259,7 @@ void ServerUncorePMUs::programM2M() cfg[EventPosition::PMM_READ] = M2M_PCI_PMON_CTL_EVENT(0x37) + M2M_PCI_PMON_CTL_UMASK(0x20) + UNC_PMON_CTL_UMASK_EXT(0x07); // UNC_M2M_IMC_READS.TO_PMM cfg[EventPosition::PMM_WRITE] = M2M_PCI_PMON_CTL_EVENT(0x38) + M2M_PCI_PMON_CTL_UMASK(0x80) + UNC_PMON_CTL_UMASK_EXT(0x1C); // UNC_M2M_IMC_WRITES.TO_PMM break; + case PCM::GNR: case PCM::SRF: cfg[EventPosition::NM_HIT] = M2M_PCI_PMON_CTL_EVENT(0x1F) + M2M_PCI_PMON_CTL_UMASK(0x0F); // UNC_B2CMI_TAG_HIT.ALL cfg[EventPosition::M2M_CLOCKTICKS] = 0; // CLOCKTICKS @@ -9569,6 +9611,7 @@ uint64 ServerUncorePMUs::computeQPISpeed(const uint32 core_nr, const int cpumode std::pair regBits{}; switch (cpumodel) { + case PCM::GNR: case PCM::SRF: UPISpeedMap = { { 0, 2500}, @@ -9692,6 +9735,7 @@ uint64 PCM::CX_MSR_PMON_CTRY(uint32 Cbo, uint32 Ctr) const case SPR: case EMR: + case GNR: case SRF: return SPR_CHA0_MSR_PMON_CTR0 + SPR_CHA_MSR_STEP * Cbo + Ctr; @@ -9721,6 +9765,7 @@ uint64 PCM::CX_MSR_PMON_BOX_FILTER(uint32 Cbo) const case SPR: case EMR: + case GNR: case SRF: return SPR_CHA0_MSR_PMON_BOX_FILTER + SPR_CHA_MSR_STEP * Cbo; @@ -9764,6 +9809,7 @@ uint64 PCM::CX_MSR_PMON_CTLY(uint32 Cbo, uint32 Ctl) const case SPR: case EMR: + case GNR: case SRF: return SPR_CHA0_MSR_PMON_CTL0 + SPR_CHA_MSR_STEP * Cbo + Ctl; @@ -9793,6 +9839,7 @@ uint64 PCM::CX_MSR_PMON_BOX_CTL(uint32 Cbo) const case SPR: case EMR: + case GNR: case SRF: return SPR_CHA0_MSR_PMON_BOX_CTRL + SPR_CHA_MSR_STEP * Cbo; @@ -9868,6 +9915,7 @@ uint32 PCM::getMaxNumOfCBoxesInternal() const uint64 val = 0; switch (cpu_model) { + case GNR: case SRF: { const auto MSR_PMON_NUMBER_CBOS = 0x3fed; @@ -9982,6 +10030,7 @@ void PCM::programIIOCounters(uint64 rawEvents[4], int IIOStack) int stacks_count; switch (getCPUModel()) { + case PCM::GNR: case PCM::SRF: stacks_count = BHS_M2IOSF_NUM; break; @@ -10076,6 +10125,7 @@ void PCM::programPCIeEventGroup(eventGroup_t &eventGroup) switch (cpu_model) { + case PCM::GNR: case PCM::SRF: case PCM::SPR: case PCM::EMR: @@ -10127,6 +10177,7 @@ void PCM::programCbo(const uint64 * events, const uint32 opCode, const uint32 nc && SNOWRIDGE != cpu_model && SPR != cpu_model && EMR != cpu_model + && GNR != cpu_model && SRF != cpu_model ) { @@ -10390,7 +10441,9 @@ bool PCM::supportIDXAccelDev() const { case PCM::SPR: case PCM::EMR: + case PCM::GNR: case PCM::SRF: + case PCM::GNR_D: retval = true; break; @@ -10625,6 +10678,7 @@ void UncorePMU::freeze(const uint32 extra) { case PCM::SPR: case PCM::EMR: + case PCM::GNR: case PCM::SRF: *unitControl = SPR_UNC_PMON_UNIT_CTL_FRZ; break; @@ -10639,6 +10693,7 @@ void UncorePMU::unfreeze(const uint32 extra) { case PCM::SPR: case PCM::EMR: + case PCM::GNR: case PCM::SRF: *unitControl = 0; break; @@ -10658,6 +10713,7 @@ bool UncorePMU::initFreeze(const uint32 extra, const char* xPICheckMsg) { case PCM::SPR: case PCM::EMR: + case PCM::GNR: case PCM::SRF: *unitControl = SPR_UNC_PMON_UNIT_CTL_FRZ; // freeze *unitControl = SPR_UNC_PMON_UNIT_CTL_FRZ + SPR_UNC_PMON_UNIT_CTL_RST_CONTROL; // freeze and reset control registers @@ -10696,6 +10752,7 @@ void UncorePMU::resetUnfreeze(const uint32 extra) { case PCM::SPR: case PCM::EMR: + case PCM::GNR: case PCM::SRF: *unitControl = SPR_UNC_PMON_UNIT_CTL_FRZ + SPR_UNC_PMON_UNIT_CTL_RST_COUNTERS; // freeze and reset counter registers *unitControl = 0; // unfreeze diff --git a/src/cpucounters.h b/src/cpucounters.h index e3d3e82d..8a02e69f 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -1248,6 +1248,7 @@ class PCM_API PCM { case SPR: case EMR: + case GNR: case SRF: *ctrl = *curEvent; break; @@ -1304,6 +1305,8 @@ class PCM_API PCM ( SPR == cpu_model || EMR == cpu_model + || GNR == cpu_model + || GNR_D == cpu_model ); } @@ -1884,7 +1887,9 @@ class PCM_API PCM ICX = 106, SPR = 143, EMR = 207, + GNR = 173, SRF = 175, + GNR_D = 174, END_OF_MODEL_LIST = 0x0ffff }; @@ -1978,6 +1983,7 @@ class PCM_API PCM case ICX: case SPR: case EMR: + case GNR: case SRF: return (serverUncorePMUs.size() && serverUncorePMUs[0].get()) ? (serverUncorePMUs[0]->getNumQPIPorts()) : 0; } @@ -2004,6 +2010,7 @@ class PCM_API PCM case ICX: case SPR: case EMR: + case GNR: case SRF: case BDX: case KNL: @@ -2032,6 +2039,7 @@ class PCM_API PCM case ICX: case SPR: case EMR: + case GNR: case SRF: case BDX: case KNL: @@ -2063,6 +2071,7 @@ class PCM_API PCM case ICX: case SPR: case EMR: + case GNR: case SRF: case BDX: case KNL: @@ -2125,6 +2134,7 @@ class PCM_API PCM return 5; case SPR: case EMR: + case GNR: case SRF: return 6; } @@ -2177,6 +2187,7 @@ class PCM_API PCM case SNOWRIDGE: case SPR: case EMR: + case GNR: case SRF: case KNL: return true; @@ -2440,6 +2451,7 @@ class PCM_API PCM || cpu_model == PCM::LNL || cpu_model == PCM::SPR || cpu_model == PCM::EMR + || cpu_model == PCM::GNR || cpu_model == PCM::SRF ); } @@ -2457,6 +2469,7 @@ class PCM_API PCM || cpu_model == PCM::ICX || cpu_model == PCM::SPR || cpu_model == PCM::EMR + || cpu_model == PCM::GNR || cpu_model == PCM::SRF ); } @@ -2480,6 +2493,7 @@ class PCM_API PCM || cpu_model == PCM::ICX || cpu_model == PCM::SPR || cpu_model == PCM::EMR + || cpu_model == PCM::GNR || cpu_model == PCM::SRF ); } @@ -2496,6 +2510,7 @@ class PCM_API PCM || cpu_model == PCM::ICX || cpu_model == PCM::SPR || cpu_model == PCM::EMR + || cpu_model == PCM::GNR || cpu_model == PCM::SRF ); } @@ -2509,6 +2524,7 @@ class PCM_API PCM || cpu_model == PCM::SPR || cpu_model == PCM::EMR || cpu_model == PCM::SRF + || cpu_model == PCM::GNR ; } @@ -2521,6 +2537,7 @@ class PCM_API PCM { return ( cpu_model == PCM::SRF + || cpu_model == PCM::GNR ); } @@ -2557,10 +2574,11 @@ class PCM_API PCM return ( cpu_model == PCM::SKX || cpu_model == PCM::ICX - || cpu_model == PCM::SNOWRIDGE + || cpu_model == PCM::SNOWRIDGE || cpu_model == PCM::SPR || cpu_model == PCM::EMR || cpu_model == PCM::SRF + || cpu_model == PCM::GNR ); } @@ -2569,6 +2587,7 @@ class PCM_API PCM return MSR.empty() == false && getMaxNumOfUncorePMUs(UBOX_PMU_ID) > 0ULL && getNumCores() == getNumOnlineCores() + && PCM::GNR != cpu_model && PCM::SRF != cpu_model ; } @@ -2661,6 +2680,7 @@ class PCM_API PCM || cpu_model == PCM::ICX || cpu_model == PCM::SPR || cpu_model == PCM::EMR + || cpu_model == PCM::GNR || cpu_model == PCM::SRF || cpu_model == PCM::BDX || cpu_model == PCM::KNL @@ -2681,6 +2701,7 @@ class PCM_API PCM || cpu_model_ == PCM::ICX || cpu_model_ == PCM::SPR || cpu_model_ == PCM::EMR + || cpu_model_ == PCM::GNR || cpu_model_ == PCM::SRF ); } @@ -2705,6 +2726,7 @@ class PCM_API PCM || cpu_model == PCM::ICX || cpu_model == PCM::SPR || cpu_model == PCM::EMR + || cpu_model == PCM::GNR || cpu_model == PCM::SRF ); } @@ -2720,6 +2742,7 @@ class PCM_API PCM || PCM::ICX == cpu_model || PCM::SPR == cpu_model || PCM::EMR == cpu_model + || PCM::GNR == cpu_model ; } @@ -3389,6 +3412,7 @@ double getDRAMConsumedJoules(const CounterStateType & before, const CounterState || PCM::BDX == cpu_model || PCM::SKX == cpu_model || PCM::ICX == cpu_model + || PCM::GNR == cpu_model || PCM::SRF == cpu_model || PCM::KNL == cpu_model ) { diff --git a/src/opCode-173.txt b/src/opCode-173.txt new file mode 100644 index 00000000..c3ccfbc9 --- /dev/null +++ b/src/opCode-173.txt @@ -0,0 +1,45 @@ +#Clockticks +#ctr=0,ev_sel=0x1,umask=0x0,en=1,ch_mask=0,fc_mask=0x0,multiplier=1,divider=1,hname=Clockticks,vname=Total +# Inbound (PCIe device DMA into system) payload events +ctr=0,ev_sel=0x83,umask=0x1,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part0 +ctr=1,ev_sel=0x83,umask=0x1,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part1 +ctr=0,ev_sel=0x83,umask=0x1,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part2 +ctr=1,ev_sel=0x83,umask=0x1,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part3 +ctr=0,ev_sel=0x83,umask=0x1,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part4 +ctr=1,ev_sel=0x83,umask=0x1,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part5 +ctr=0,ev_sel=0x83,umask=0x1,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part6 +ctr=1,ev_sel=0x83,umask=0x1,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part7 +ctr=0,ev_sel=0x83,umask=0x4,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part0 +ctr=1,ev_sel=0x83,umask=0x4,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part1 +ctr=0,ev_sel=0x83,umask=0x4,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part2 +ctr=1,ev_sel=0x83,umask=0x4,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part3 +ctr=0,ev_sel=0x83,umask=0x4,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part4 +ctr=1,ev_sel=0x83,umask=0x4,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part5 +ctr=0,ev_sel=0x83,umask=0x4,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part6 +ctr=1,ev_sel=0x83,umask=0x4,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part7 +# Outbound (CPU MMIO to the PCIe device) payload events +ctr=2,ev_sel=0xc0,umask=0x4,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part0 +ctr=3,ev_sel=0xc0,umask=0x4,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part1 +ctr=2,ev_sel=0xc0,umask=0x4,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part2 +ctr=3,ev_sel=0xc0,umask=0x4,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part3 +ctr=2,ev_sel=0xc0,umask=0x4,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part4 +ctr=3,ev_sel=0xc0,umask=0x4,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part5 +ctr=2,ev_sel=0xc0,umask=0x4,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part6 +ctr=3,ev_sel=0xc0,umask=0x4,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part7 +ctr=2,ev_sel=0xc0,umask=0x1,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part0 +ctr=3,ev_sel=0xc0,umask=0x1,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part1 +ctr=2,ev_sel=0xc0,umask=0x1,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part2 +ctr=3,ev_sel=0xc0,umask=0x1,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part3 +ctr=2,ev_sel=0xc0,umask=0x1,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part4 +ctr=3,ev_sel=0xc0,umask=0x1,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part5 +ctr=2,ev_sel=0xc0,umask=0x1,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part6 +ctr=3,ev_sel=0xc0,umask=0x1,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part7 +# IOMMU events +ctr=0,ev_sel=0x40,umask=0x01,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOTLB Lookup,vname=Total +ctr=1,ev_sel=0x40,umask=0x20,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOTLB Miss,vname=Total +ctr=2,ev_sel=0x40,umask=0x80,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=Ctxt Cache Hit,vname=Total +ctr=3,ev_sel=0x41,umask=0x10,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=256T Cache Hit,vname=Total +ctr=0,ev_sel=0x41,umask=0x08,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=512G Cache Hit,vname=Total +ctr=1,ev_sel=0x41,umask=0x04,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=1G Cache Hit,vname=Total +ctr=2,ev_sel=0x41,umask=0x02,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=2M Cache Hit,vname=Total +ctr=3,ev_sel=0x41,umask=0xc0,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOMMU Mem Access,vname=Total diff --git a/src/pcm-iio.cpp b/src/pcm-iio.cpp index ee97015c..d36f5f17 100644 --- a/src/pcm-iio.cpp +++ b/src/pcm-iio.cpp @@ -1700,6 +1700,7 @@ std::unique_ptr IPlatformMapping::getPlatformMapping(int cpu_m case PCM::EMR: return std::unique_ptr{new EagleStreamPlatformMapping(cpu_model, sockets_count)}; case PCM::SRF: + case PCM::GNR: return std::unique_ptr{new BirchStreamPlatform(cpu_model, sockets_count)}; default: return nullptr; @@ -1717,6 +1718,7 @@ ccr* get_ccr(PCM* m, uint64_t& ccr) case PCM::SPR: case PCM::EMR: case PCM::SRF: + case PCM::GNR: return new icx_ccr(ccr); default: cerr << m->getCPUFamilyModelString() << " is not supported! Program aborted" << endl; diff --git a/src/pcm-memory.cpp b/src/pcm-memory.cpp index a283f5d2..c2e9418f 100644 --- a/src/pcm-memory.cpp +++ b/src/pcm-memory.cpp @@ -425,6 +425,7 @@ void printSocketBWFooter(PCM *m, uint32 no_columns, uint32 skt, const memdata_t } if ( md->metrics == PartialWrites && m->getCPUModel() != PCM::SRF + && m->getCPUModel() != PCM::GNR ) { for (uint32 i=skt; i<(skt+no_columns); ++i) { @@ -732,6 +733,7 @@ void display_bandwidth_csv(PCM *m, memdata_t *md, uint64 /*elapsedTime*/, const if (m->HBMmemoryTrafficMetricsAvailable() == false) { if ( md->metrics == PartialWrites + && m->getCPUModel() != PCM::GNR && m->getCPUModel() != PCM::SRF ) { @@ -991,6 +993,7 @@ void calculate_bandwidth(PCM *m, writes = getMCCounter(channel, ServerUncorePMUs::EventPosition::WRITE, uncState1[skt], uncState2[skt]); switch (cpu_model) { + case PCM::GNR: case PCM::SRF: reads += getMCCounter(channel, ServerUncorePMUs::EventPosition::READ2, uncState1[skt], uncState2[skt]); writes += getMCCounter(channel, ServerUncorePMUs::EventPosition::WRITE2, uncState1[skt], uncState2[skt]); @@ -1054,7 +1057,8 @@ void calculate_bandwidth(PCM *m, md.MemoryMode_Hit_socket[skt] += toRate(memoryModeHits); } else if ( - cpu_model != PCM::SRF + cpu_model != PCM::GNR + && cpu_model != PCM::SRF ) { md.partial_write[skt] += (uint64)(getMCCounter(channel, ServerUncorePMUs::EventPosition::PARTIAL, uncState1[skt], uncState2[skt]) / (elapsedTime / 1000.0)); diff --git a/src/pcm-pcie.cpp b/src/pcm-pcie.cpp index 37ab8557..da55b93e 100644 --- a/src/pcm-pcie.cpp +++ b/src/pcm-pcie.cpp @@ -96,6 +96,7 @@ void print_usage(const string & progname) IPlatform *IPlatform::getPlatform(PCM *m, bool csv, bool print_bandwidth, bool print_additional_info, uint32 delay) { switch (m->getCPUModel()) { + case PCM::GNR: case PCM::SRF: return new BirchStreamPlatform(m, csv, print_bandwidth, print_additional_info, delay); case PCM::SPR: diff --git a/src/pcm-power.cpp b/src/pcm-power.cpp index 86923c26..7982375a 100644 --- a/src/pcm-power.cpp +++ b/src/pcm-power.cpp @@ -481,6 +481,8 @@ int mainThrows(int argc, char * argv[]) && cpu_model != PCM::SPR && cpu_model != PCM::EMR && cpu_model != PCM::SRF + && cpu_model != PCM::GNR + && cpu_model != PCM::GNR_D ) cout << "; Clipped freq limit cycles:" << getNormalizedPCUCounter(u, 3, BeforeState[socket], AfterState[socket]) * 100. << " %"; cout << "\n"; @@ -493,6 +495,8 @@ int mainThrows(int argc, char * argv[]) || cpu_model == PCM::SPR || cpu_model == PCM::EMR || cpu_model == PCM::SRF + || cpu_model == PCM::GNR + || cpu_model == PCM::GNR_D ) { cout << "This PCU profile is not supported on your processor\n"; @@ -531,6 +535,8 @@ int mainThrows(int argc, char * argv[]) case PCM::SPR: case PCM::EMR: case PCM::SRF: + case PCM::GNR: + case PCM::GNR_D: cout << "; PC2 residency: " << getPackageCStateResidency(2, BeforeState[socket], AfterState[socket]) * 100. << " %"; cout << "; PC2 transitions: " << getUncoreCounter(PCM::PCU_PMU_ID, u, 2, BeforeState[socket], AfterState[socket]) << " "; cout << "; PC3 residency: " << getPackageCStateResidency(3, BeforeState[socket], AfterState[socket]) * 100. << " %";