From 9550c4aa4128d8c0affe3f72ad7e6d8389c6b575 Mon Sep 17 00:00:00 2001 From: "CCR\\huqiuhua" Date: Fri, 4 Aug 2023 13:59:03 +0800 Subject: [PATCH 01/43] pcm-accel: return value 0 to avoid the confuse to user if overflow happen. --- src/cpucounters.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/cpucounters.h b/src/cpucounters.h index 0da43604..2c0c6cb1 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -4356,6 +4356,12 @@ inline double getLocalMemoryRequestRatio(const CounterStateType & before, const template inline uint64 getNumberOfEvents(const CounterType & before, const CounterType & after) { + // prevent overflows due to counter dissynchronisation + if (after.data < before.data) + { + return 0; + } + return after.data - before.data; } //! \brief Returns average last level cache read+prefetch miss latency in ns From ff011f78b7fc13e5acf95649f3f64053f93382c4 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Fri, 4 Aug 2023 11:45:53 +0200 Subject: [PATCH 02/43] add UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES test Change-Id: Iad2bc336a296f801f72e6848befe27a9c70857db --- tests/test.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test.sh b/tests/test.sh index 60f22ec6..e5124075 100755 --- a/tests/test.sh +++ b/tests/test.sh @@ -319,6 +319,7 @@ UNC_UPI_TxL0P_POWER_CYCLES UNC_UPI_RxL0P_POWER_CYCLES UNC_UPI_RxL_FLITS.ALL_DATA UNC_UPI_RxL_FLITS.NON_DATA +UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES MSR_EVENT:msr=0x10:type=FREERUN:scope=thread MSR_EVENT:msr=0x10:type=static:scope=thread pcicfg/config=0x2021,config1=4,config2=0,width=32 From 6f04f38bfbba355e4c1017fc1e44616145a04bc2 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Fri, 4 Aug 2023 12:53:38 +0200 Subject: [PATCH 03/43] add pcu in GenuineIntel-6-55-4.json Change-Id: I29b1e15ca76deb1d45c7502d89ad514aaaad03dd --- src/PMURegisterDeclarations/GenuineIntel-6-55-4.json | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/PMURegisterDeclarations/GenuineIntel-6-55-4.json b/src/PMURegisterDeclarations/GenuineIntel-6-55-4.json index 6a8626e2..2d39fd49 100644 --- a/src/PMURegisterDeclarations/GenuineIntel-6-55-4.json +++ b/src/PMURegisterDeclarations/GenuineIntel-6-55-4.json @@ -100,6 +100,13 @@ "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} } }, + "pcu" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0} + } + }, "irp" : { "programmable" : { "EventCode": {"Config": 0, "Position": 0, "Width": 8}, From c3effcf5ad34e512242cb91c9b1168d1c2105fda Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Fri, 4 Aug 2023 13:43:43 +0200 Subject: [PATCH 04/43] add pcu in GenuineIntel-6-55-{7,B}.json Change-Id: I42f2c398678770aae4b579e00ca41cf6f614c782 --- src/PMURegisterDeclarations/GenuineIntel-6-55-7.json | 7 +++++++ src/PMURegisterDeclarations/GenuineIntel-6-55-B.json | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/src/PMURegisterDeclarations/GenuineIntel-6-55-7.json b/src/PMURegisterDeclarations/GenuineIntel-6-55-7.json index 6a8626e2..2d39fd49 100644 --- a/src/PMURegisterDeclarations/GenuineIntel-6-55-7.json +++ b/src/PMURegisterDeclarations/GenuineIntel-6-55-7.json @@ -100,6 +100,13 @@ "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} } }, + "pcu" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0} + } + }, "irp" : { "programmable" : { "EventCode": {"Config": 0, "Position": 0, "Width": 8}, diff --git a/src/PMURegisterDeclarations/GenuineIntel-6-55-B.json b/src/PMURegisterDeclarations/GenuineIntel-6-55-B.json index 6a8626e2..2d39fd49 100644 --- a/src/PMURegisterDeclarations/GenuineIntel-6-55-B.json +++ b/src/PMURegisterDeclarations/GenuineIntel-6-55-B.json @@ -100,6 +100,13 @@ "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} } }, + "pcu" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0} + } + }, "irp" : { "programmable" : { "EventCode": {"Config": 0, "Position": 0, "Width": 8}, From 7b1a9c1e4db48b57e0478633778ee7e0209e6bb7 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Tue, 12 Sep 2023 16:01:19 +0200 Subject: [PATCH 05/43] add MMIOEventPosition Change-Id: If426b41f04c97e490115f594146d5e5aea8ebff5 --- src/cpucounters.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/cpucounters.h b/src/cpucounters.h index 2c0c6cb1..0b9573c6 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -1323,6 +1323,18 @@ class PCM_API PCM && a[PCICFGEventPosition::width] == b[PCICFGEventPosition::width]; } }; + struct MMIOEventPosition + { + enum constants + { + deviceID = PCICFGEventPosition::deviceID, + offset = PCICFGEventPosition::offset, + type = PCICFGEventPosition::type, + membar_bits1 = 3, + membar_bits2 = 4, + width = PCICFGEventPosition::width + }; + }; private: std::unordered_map, PCICFGRegisterEncodingHash, PCICFGRegisterEncodingCmp> PCICFGRegisterLocations{}; public: From 9e6ab5527e0322015928b6265a7b64b595e7b1ce Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 13 Sep 2023 11:48:23 +0200 Subject: [PATCH 06/43] add more MMIO reg functions Change-Id: Ia03f53b21dbb9a26912e92bcd77e3d257d275adf --- src/cpucounters.h | 46 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/src/cpucounters.h b/src/cpucounters.h index 0b9573c6..77d86c57 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -1015,6 +1015,7 @@ class PCM_API PCM void readMSRs(std::shared_ptr msr, const RawPMUConfig & msrConfig, CounterStateType & result); void readQPICounters(SystemCounterState & counterState); void readPCICFGRegisters(SystemCounterState& result); + void readMMIORegisters(SystemCounterState& result); void reportQPISpeed() const; void readCoreCounterConfig(const bool complainAboutMSR = false); void readCPUMicrocodeLevel(); @@ -1335,8 +1336,28 @@ class PCM_API PCM width = PCICFGEventPosition::width }; }; + typedef std::pair, uint32> MMIORegisterEncoding; // MMIORange shared ptr, offset + struct MMIORegisterEncodingHash : public PCICFGRegisterEncodingHash + { + std::size_t operator()(const RawEventEncoding& e) const + { + std::size_t h4 = std::hash{}(e[MMIOEventPosition::membar_bits1]); + std::size_t h5 = std::hash{}(e[MMIOEventPosition::membar_bits2]); + return PCICFGRegisterEncodingHash::operator()(e) ^ (h4 << 3ULL) ^ (h5 << 4ULL); + } + }; + struct MMIORegisterEncodingCmp : public PCICFGRegisterEncodingCmp + { + bool operator ()(const RawEventEncoding& a, const RawEventEncoding& b) const + { + return PCICFGRegisterEncodingCmp::operator()(a,b) + && a[MMIOEventPosition::membar_bits1] == b[MMIOEventPosition::membar_bits1] + && a[MMIOEventPosition::membar_bits2] == b[MMIOEventPosition::membar_bits2]; + } + }; private: std::unordered_map, PCICFGRegisterEncodingHash, PCICFGRegisterEncodingCmp> PCICFGRegisterLocations{}; + std::unordered_map, MMIORegisterEncodingHash, MMIORegisterEncodingCmp> MMIORegisterLocations{}; public: TopologyEntry::CoreType getCoreType(const unsigned coreID) const @@ -1601,7 +1622,7 @@ class PCM_API PCM } return false; } - RawPMUConfig threadMSRConfig{}, packageMSRConfig{}, pcicfgConfig{}; + RawPMUConfig threadMSRConfig{}, packageMSRConfig{}, pcicfgConfig{}, mmioConfig{}; public: //! \brief Reads CPU model id @@ -3310,12 +3331,14 @@ class SystemCounterState : public SocketCounterState { friend class PCM; friend std::vector getPCICFGEvent(const PCM::RawEventEncoding& eventEnc, const SystemCounterState& before, const SystemCounterState& after); + friend std::vector getMMIOEvent(const PCM::RawEventEncoding& eventEnc, const SystemCounterState& before, const SystemCounterState& after); std::vector > incomingQPIPackets; // each 64 byte std::vector > outgoingQPIFlits; // idle or data/non-data flits depending on the architecture std::vector > TxL0Cycles; uint64 uncoreTSC; std::unordered_map , PCM::PCICFGRegisterEncodingHash, PCM::PCICFGRegisterEncodingCmp> PCICFGValues{}; + std::unordered_map, PCM::MMIORegisterEncodingHash, PCM::MMIORegisterEncodingCmp> MMIOValues{}; protected: void readAndAggregate(std::shared_ptr handle) @@ -4452,13 +4475,14 @@ inline double getRetiring(const CounterStateType & before, const CounterStateTyp return 0.; } -inline std::vector getPCICFGEvent(const PCM::RawEventEncoding & eventEnc, const SystemCounterState& before, const SystemCounterState& after) +template +inline std::vector getRegisterEvent(const PCM::RawEventEncoding& eventEnc, const ValuesType& beforeValues, const ValuesType& afterValues) { std::vector result{}; - auto beforeIter = before.PCICFGValues.find(eventEnc); - auto afterIter = after.PCICFGValues.find(eventEnc); - if (beforeIter != before.PCICFGValues.end() && - afterIter != after.PCICFGValues.end()) + auto beforeIter = beforeValues.find(eventEnc); + auto afterIter = afterValues.find(eventEnc); + if (beforeIter != beforeValues.end() && + afterIter != afterValues.end()) { const auto& beforeValues = beforeIter->second; const auto& afterValues = afterIter->second; @@ -4480,6 +4504,16 @@ inline std::vector getPCICFGEvent(const PCM::RawEventEncoding & eventEnc return result; } +inline std::vector getPCICFGEvent(const PCM::RawEventEncoding & eventEnc, const SystemCounterState& before, const SystemCounterState& after) +{ + return getRegisterEvent(eventEnc, before.PCICFGValues, after.PCICFGValues); +} + +inline std::vector getMMIOEvent(const PCM::RawEventEncoding& eventEnc, const SystemCounterState& before, const SystemCounterState& after) +{ + return getRegisterEvent(eventEnc, before.MMIOValues, after.MMIOValues); +} + template uint64 getMSREvent(const uint64& index, const PCM::MSRType& type, const CounterStateType& before, const CounterStateType& after) { From 2c7adc4a6eef7c442326508b50b805e5e275bc18 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 13 Sep 2023 12:31:25 +0200 Subject: [PATCH 07/43] populate MMIORegisterLocations Change-Id: I5fa846f3c1be33504e02630679f37854f1fed215 --- src/cpucounters.cpp | 49 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 850ef297..49e15387 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -5435,6 +5435,7 @@ PCM::ErrorCode PCM::program(const RawPMUConfigs& curPMUConfigs_, const bool sile threadMSRConfig = RawPMUConfig{}; packageMSRConfig = RawPMUConfig{}; pcicfgConfig = RawPMUConfig{}; + mmioConfig = RawPMUConfig{}; RawPMUConfigs curPMUConfigs = curPMUConfigs_; constexpr auto globalRegPos = 0ULL; PCM::ExtendedCustomCoreEventDescription conf; @@ -5649,6 +5650,54 @@ PCM::ErrorCode PCM::program(const RawPMUConfigs& curPMUConfigs_, const bool sile addLocations(pcicfgConfig.programmable); addLocations(pcicfgConfig.fixed); } + else if (type == "mmio") + { + mmioConfig = pmuConfig.second; + auto addLocations = [this](const std::vector& configs) { + for (const auto& c : configs) + { + if (MMIORegisterLocations.find(c.first) == MMIORegisterLocations.end()) + { + // add locations + std::vector locations; + const auto deviceID = c.first[MMIOEventPosition::deviceID]; + forAllIntelDevices([&locations, &deviceID, &c](const uint32 group, const uint32 bus, const uint32 device, const uint32 function, const uint32 device_id) + { + if (deviceID == device_id && PciHandleType::exists(group, bus, device, function)) + { + PciHandleType pciHandle(group, bus, device, function); + auto computeBarOffset = [&pciHandle](uint64 membarBits) -> size_t + { + if (membarBits) + { + const auto destPos = extract_bits(membarBits, 32, 39); + const auto numBits = extract_bits(membarBits, 24, 31); + const auto srcPos = extract_bits(membarBits, 16, 23); + const auto pcicfgOffset = extract_bits(membarBits, 0, 15); + uint32 memBarOffset = 0; + pciHandle.read32(pcicfgOffset, &memBarOffset); + return size_t(extract_bits_ui(memBarOffset, srcPos, srcPos + numBits - 1)) << destPos; + } + return 0; + }; + + size_t memBar = computeBarOffset(c.first[MMIOEventPosition::membar_bits1]) + | computeBarOffset(c.first[MMIOEventPosition::membar_bits2]); + + assert(memBar); + + const size_t addr = memBar + c.first[MMIOEventPosition::offset]; + // MMIORange shared ptr (handle), offset + locations.push_back(MMIORegisterEncoding{ std::make_shared(addr & ~4095ULL, 4096), (uint32) (addr & 4095ULL) }); + } + }); + MMIORegisterLocations[c.first] = locations; + } + } + }; + addLocations(mmioConfig.programmable); + addLocations(mmioConfig.fixed); + } else if (type == "cxlcm") { programCXLCM(events64); From b1ed56286ae484ba4d0a2ea33cb82365d9a08df2 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 13 Sep 2023 13:39:11 +0200 Subject: [PATCH 08/43] add readMMIORegisters Change-Id: Id979370acc64d083e060e6f4846479f681301e15 --- src/cpucounters.cpp | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 49e15387..bf4ec833 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -6119,6 +6119,50 @@ void PCM::readPCICFGRegisters(SystemCounterState& systemState) } } +void PCM::readMMIORegisters(SystemCounterState& systemState) +{ + auto read = [this, &systemState](const RawEventConfig& cfg) { + const RawEventEncoding& reEnc = cfg.first; + systemState.MMIOValues[reEnc].clear(); + for (auto& reg : MMIORegisterLocations[reEnc]) + { + const auto width = reEnc[MMIOEventPosition::width]; + auto& h = reg.first; + const auto& offset = reg.second; + if (h.get()) + { + uint64 value = ~0ULL; + uint32 value32 = 0; + switch (width) + { + case 16: + value32 = h->read32(offset); + value = (uint64)extract_bits_ui(value32, 0, 15); + break; + case 32: + value32 = h->read32(offset); + value = (uint64)value32; + break; + case 64: + value = h->read64(offset); + break; + default: + std::cerr << "ERROR: Unsupported width " << width << " for mmio register " << cfg.second << "\n"; + } + systemState.MMIOValues[reEnc].push_back(value); + } + } + }; + for (const auto& cfg : mmioConfig.programmable) + { + read(cfg); + } + for (const auto& cfg : mmioConfig.fixed) + { + read(cfg); + } +} + void PCM::readQPICounters(SystemCounterState & result) { // read QPI counters @@ -6323,6 +6367,7 @@ void PCM::getAllCounterStates(SystemCounterState & systemState, std::vector Date: Wed, 13 Sep 2023 14:09:47 +0200 Subject: [PATCH 09/43] pcm-raw: add support of mmio register reads Change-Id: I9eeeb5a418106b9635d5b1877312cd39a98cb2b9 --- src/pcm-raw.cpp | 108 +++++++++++++++++++++++++++++++----------------- 1 file changed, 70 insertions(+), 38 deletions(-) diff --git a/src/pcm-raw.cpp b/src/pcm-raw.cpp index d8ba2493..dbe7275d 100644 --- a/src/pcm-raw.cpp +++ b/src/pcm-raw.cpp @@ -1179,6 +1179,22 @@ std::string getPCICFGEventString(const PCM::RawEventEncoding & eventEnc, const s return c.str(); } +std::string getMMIOEventString(const PCM::RawEventEncoding& eventEnc, const std::string& type) +{ + std::stringstream c; + c << type << ":0x" << std::hex << + eventEnc[PCM::MMIOEventPosition::deviceID] << + ":0x" << eventEnc[PCM::MMIOEventPosition::offset] << + ":0x" << eventEnc[PCM::MMIOEventPosition::membar_bits1] << + ":0x" << eventEnc[PCM::MMIOEventPosition::membar_bits2] << + ":0x" << eventEnc[PCM::MMIOEventPosition::width] << + ":" << getTypeString(eventEnc[PCM::MMIOEventPosition::type]); + return c.str(); +} + +typedef std::string(*getEventStringFunc)(const PCM::RawEventEncoding& eventEnc, const std::string& type); +typedef std::vector(getEventFunc)(const PCM::RawEventEncoding& eventEnc, const SystemCounterState& before, const SystemCounterState& after); + enum MSRScope { Thread, @@ -1462,28 +1478,12 @@ void printTransposed(const PCM::RawPMUConfigs& curPMUConfigs, is_header_printed = true; } }; - if (type == "core") - { - printCores(pcm::TopologyEntry::Core); - } - else if (type == "atom") - { - printCores(pcm::TopologyEntry::Atom); - } - else if (type == "thread_msr") - { - printMSRRows(MSRScope::Thread); - } - else if (type == "package_msr") - { - printMSRRows(MSRScope::Package); - } - else if (type == "pcicfg") + auto printRegisterRows = [&](getEventStringFunc getEventString, getEventFunc getEvent) { auto printRegister = [&](const PCM::RawEventConfig& event) -> bool { - const std::string name = (event.second.empty()) ? getPCICFGEventString(event.first, type) : event.second; - const auto values = getPCICFGEvent(event.first, SysBeforeState, SysAfterState); + const std::string name = (event.second.empty()) ? getEventString(event.first, type) : event.second; + const auto values = getEvent(event.first, SysBeforeState, SysAfterState); if (is_header && is_header_printed) return false; @@ -1541,6 +1541,30 @@ void printTransposed(const PCM::RawPMUConfigs& curPMUConfigs, break; } } + }; + if (type == "core") + { + printCores(pcm::TopologyEntry::Core); + } + else if (type == "atom") + { + printCores(pcm::TopologyEntry::Atom); + } + else if (type == "thread_msr") + { + printMSRRows(MSRScope::Thread); + } + else if (type == "package_msr") + { + printMSRRows(MSRScope::Package); + } + else if (type == "pcicfg") + { + printRegisterRows(getPCICFGEventString, getPCICFGEvent); + } + else if (type == "mmio") + { + printRegisterRows(getMMIOEventString, getMMIOEvent); } else if (type == "m3upi") { @@ -1738,6 +1762,28 @@ void print(const PCM::RawPMUConfigs& curPMUConfigs, } } }; + auto printRegisters = [&](getEventStringFunc getEventString, getEventFunc getEvent) + { + auto printOneRegister = [&](const PCM::RawEventConfig& event) + { + const auto values = getEvent(event.first, SysBeforeState, SysAfterState); + for (size_t r = 0; r < values.size(); ++r) + { + choose(outputType, + [&r]() { cout << "SYSTEM_" << r << separator; }, + [&]() { if (event.second.empty()) cout << getEventString(event.first, type) << separator; else cout << event.second << separator; }, + [&]() { cout << values[r] << separator; }); + } + }; + for (const auto& event : events) + { + printOneRegister(event); + } + for (const auto& event : fixedEvents) + { + printOneRegister(event); + } + }; if (type == "core") { printCores(pcm::TopologyEntry::Core); @@ -1888,25 +1934,11 @@ void print(const PCM::RawPMUConfigs& curPMUConfigs, } else if (type == "pcicfg") { - auto printPCICFG = [&](const PCM::RawEventConfig& event) - { - const auto values = getPCICFGEvent(event.first, SysBeforeState, SysAfterState); - for (size_t r = 0; r < values.size(); ++r) - { - choose(outputType, - [&r]() { cout << "SYSTEM_" << r << separator; }, - [&]() { if (event.second.empty()) cout << getPCICFGEventString(event.first, type) << separator; else cout << event.second << separator; }, - [&]() { cout << values[r] << separator; }); - } - }; - for (const auto& event : events) - { - printPCICFG(event); - } - for (const auto& event : fixedEvents) - { - printPCICFG(event); - } + printRegisters(getPCICFGEventString, getPCICFGEvent); + } + else if (type == "mmio") + { + printRegisters(getMMIOEventString, getMMIOEvent); } else if (type == "ubox") { From 878b6b1ae2d9e5147502e3d3c6a09d824657f55f Mon Sep 17 00:00:00 2001 From: Roman Dementiev Date: Thu, 14 Sep 2023 10:52:04 +0200 Subject: [PATCH 10/43] document MMIO register access --- doc/PCM_RAW_README.md | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/doc/PCM_RAW_README.md b/doc/PCM_RAW_README.md index 95ce5947..4fae6275 100644 --- a/doc/PCM_RAW_README.md +++ b/doc/PCM_RAW_README.md @@ -77,6 +77,35 @@ pcicfg32/config=0xe20,config1=0x180,config2=0x0,name=CHANERR_INT ``` From: https://www.intel.la/content/dam/www/public/us/en/documents/datasheets/xeon-e7-v2-datasheet-vol-2.pdf +MMIO Registers: + +``` +mmio/config=,config1=,config2=,config3=[,config4=],width=[,name=] +``` + +The MEMBAR is computed by logically ORing the result of membar_bits1 and membar_bits1 computation described below (PCICFG read + bit extraction and shift). The final MMIO register address = MEMBAR + offset. + +* width: register width in bits (16,32,64) +* dev_id: Intel PCI device id where the membar address registers are located +* membar_bits1: mmioBase register bits to compute membar (base address) + - bits 0-15 : PCICFG register offset to read membar1 bits + - bits 16-23: source position of membar bits in the PCICFG register + - bits 24-31: number of bits + - bits 32-39: destination bit position in the membar +* membar_bits2: mmioBase register bits to compute membar (base address), can be zero if only membar_bits1 is sufficient for locating the register. + - bits 0-15 : PCICFG register offset to read membar2 bits + - bits 16-23: source position of membar bits in the PCICFG register + - bits 24-31: number of bits + - bits 32-39: destination bit position in the membar +* offset: offset of the MMIO register relative to the membar +* static_or_freerun: same syntax as for MSR registers + +Example (Icelake server iMC PMON MMIO register read): + +``` +mmio/config=0x3451,config1=0x22808,config2=1,config3=0x171D0000D0,config4=0x0c0b0000d8,width=64 +``` + -------------------------------------------------------------------------------- Collecting Events By Names From Event Lists (https://github.com/intel/perfmon/) -------------------------------------------------------------------------------- From 76eb36d4b04db1c03517809a3b29a5828f534bef Mon Sep 17 00:00:00 2001 From: Roman Dementiev Date: Mon, 18 Sep 2023 10:35:53 +0200 Subject: [PATCH 11/43] pcm-raw: add support of opc field for BDX --- src/PMURegisterDeclarations/GenuineIntel-6-4F-1.json | 3 ++- src/pcm-raw.cpp | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/PMURegisterDeclarations/GenuineIntel-6-4F-1.json b/src/PMURegisterDeclarations/GenuineIntel-6-4F-1.json index e660ba7a..541399c8 100644 --- a/src/PMURegisterDeclarations/GenuineIntel-6-4F-1.json +++ b/src/PMURegisterDeclarations/GenuineIntel-6-4F-1.json @@ -64,7 +64,8 @@ "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0}, "Filter0": {"Config": 1, "Position": 0, "Width": 64, "DefaultValue": 0}, "TID": {"Config": 1, "Position": 0, "Width": 6, "DefaultValue": 0}, - "Filter1": {"Config": 2, "Position": 0, "Width": 64, "DefaultValue": 59} + "Filter1": {"Config": 2, "Position": 0, "Width": 64, "DefaultValue": 59}, + "OPC": {"Config": 2, "Position": 20, "Width": 9, "DefaultValue": 0} } }, "imc" : { diff --git a/src/pcm-raw.cpp b/src/pcm-raw.cpp index dbe7275d..323ed340 100644 --- a/src/pcm-raw.cpp +++ b/src/pcm-raw.cpp @@ -822,6 +822,10 @@ AddEventStatus addEventFromDB(PCM::RawPMUConfigs& curPMUConfigs, string fullEven { setField("Filter1", read_number(assignment[1].c_str())); } + else if (assignment.size() == 2 && assignment[0] == "opc") + { + setField("OPC", read_number(assignment[1].c_str())); + } else if (assignment.size() == 2 && assignment[0] == "t") { setField("Threshold", read_number(assignment[1].c_str())); From 4ef546571cfad81827f237e147d1f00cfdb4156b Mon Sep 17 00:00:00 2001 From: Roman Dementiev Date: Mon, 18 Sep 2023 10:57:45 +0200 Subject: [PATCH 12/43] pcm-raw: add support of state field for BDX --- src/PMURegisterDeclarations/GenuineIntel-6-4F-1.json | 1 + src/pcm-raw.cpp | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/src/PMURegisterDeclarations/GenuineIntel-6-4F-1.json b/src/PMURegisterDeclarations/GenuineIntel-6-4F-1.json index 541399c8..44be66fb 100644 --- a/src/PMURegisterDeclarations/GenuineIntel-6-4F-1.json +++ b/src/PMURegisterDeclarations/GenuineIntel-6-4F-1.json @@ -64,6 +64,7 @@ "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0}, "Filter0": {"Config": 1, "Position": 0, "Width": 64, "DefaultValue": 0}, "TID": {"Config": 1, "Position": 0, "Width": 6, "DefaultValue": 0}, + "State": {"Config": 1, "Position": 17, "Width": 7, "DefaultValue": 0}, "Filter1": {"Config": 2, "Position": 0, "Width": 64, "DefaultValue": 59}, "OPC": {"Config": 2, "Position": 20, "Width": 9, "DefaultValue": 0} } diff --git a/src/pcm-raw.cpp b/src/pcm-raw.cpp index 323ed340..49097756 100644 --- a/src/pcm-raw.cpp +++ b/src/pcm-raw.cpp @@ -826,6 +826,10 @@ AddEventStatus addEventFromDB(PCM::RawPMUConfigs& curPMUConfigs, string fullEven { setField("OPC", read_number(assignment[1].c_str())); } + else if (assignment.size() == 2 && assignment[0] == "state") + { + setField("State", read_number(assignment[1].c_str())); + } else if (assignment.size() == 2 && assignment[0] == "t") { setField("Threshold", read_number(assignment[1].c_str())); From 0aa31fd9ea5863b60efccee18db74d96baad79aa Mon Sep 17 00:00:00 2001 From: Roman Dementiev Date: Mon, 18 Sep 2023 11:39:18 +0200 Subject: [PATCH 13/43] pcm-raw: add support of nc and isoc field for BDX Change-Id: I69eecd47bf1e54ced67f27fd7d6d0d101adc30f9 --- src/PMURegisterDeclarations/GenuineIntel-6-4F-1.json | 4 +++- src/pcm-raw.cpp | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/PMURegisterDeclarations/GenuineIntel-6-4F-1.json b/src/PMURegisterDeclarations/GenuineIntel-6-4F-1.json index 44be66fb..7e696668 100644 --- a/src/PMURegisterDeclarations/GenuineIntel-6-4F-1.json +++ b/src/PMURegisterDeclarations/GenuineIntel-6-4F-1.json @@ -66,7 +66,9 @@ "TID": {"Config": 1, "Position": 0, "Width": 6, "DefaultValue": 0}, "State": {"Config": 1, "Position": 17, "Width": 7, "DefaultValue": 0}, "Filter1": {"Config": 2, "Position": 0, "Width": 64, "DefaultValue": 59}, - "OPC": {"Config": 2, "Position": 20, "Width": 9, "DefaultValue": 0} + "OPC": {"Config": 2, "Position": 20, "Width": 9, "DefaultValue": 0}, + "NC": {"Config": 2, "Position": 30, "Width": 1, "DefaultValue": 0}, + "ISOC": {"Config": 2, "Position": 31, "Width": 1, "DefaultValue": 0} } }, "imc" : { diff --git a/src/pcm-raw.cpp b/src/pcm-raw.cpp index 49097756..c5aa31d6 100644 --- a/src/pcm-raw.cpp +++ b/src/pcm-raw.cpp @@ -826,6 +826,14 @@ AddEventStatus addEventFromDB(PCM::RawPMUConfigs& curPMUConfigs, string fullEven { setField("OPC", read_number(assignment[1].c_str())); } + else if (assignment.size() == 2 && assignment[0] == "nc") + { + setField("NC", read_number(assignment[1].c_str())); + } + else if (assignment.size() == 2 && assignment[0] == "isoc") + { + setField("ISOC", read_number(assignment[1].c_str())); + } else if (assignment.size() == 2 && assignment[0] == "state") { setField("State", read_number(assignment[1].c_str())); From 018fc001721eb06d18f098d737990341cba8d733 Mon Sep 17 00:00:00 2001 From: Roman Dementiev Date: Mon, 18 Sep 2023 11:53:22 +0200 Subject: [PATCH 14/43] pcm-raw: added invert and anythread modifiers --- src/pcm-raw.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/pcm-raw.cpp b/src/pcm-raw.cpp index c5aa31d6..012f085f 100644 --- a/src/pcm-raw.cpp +++ b/src/pcm-raw.cpp @@ -756,6 +756,8 @@ AddEventStatus addEventFromDB(PCM::RawPMUConfigs& curPMUConfigs, string fullEven std::regex CounterMaskRegex("c(0x[0-9a-fA-F]+|[[:digit:]]+)"); std::regex UmaskRegex("u(0x[0-9a-fA-F]+|[[:digit:]]+)"); std::regex EdgeDetectRegex("e(0x[0-9a-fA-F]+|[[:digit:]]+)"); + std::regex AnyThreadRegex("amt(0x[0-9a-fA-F]+|[[:digit:]]+)"); + std::regex InvertRegex("i(0x[0-9a-fA-F]+|[[:digit:]]+)"); while (mod != EventTokens.end()) { const auto assignment = split(*mod, '='); @@ -798,6 +800,18 @@ AddEventStatus addEventFromDB(PCM::RawPMUConfigs& curPMUConfigs, string fullEven const std::string Str{ mod->begin() + 1, mod->end() }; setField("EdgeDetect", read_number(Str.c_str())); } + else if (std::regex_match(mod->c_str(), AnyThreadRegex)) + { + // AnyThread modifier + const std::string Str{ mod->begin() + 1, mod->end() }; + setField("AnyThread", read_number(Str.c_str())); + } + else if (std::regex_match(mod->c_str(), InvertRegex)) + { + // Invert modifier + const std::string Str{ mod->begin() + 1, mod->end() }; + setField("Invert", read_number(Str.c_str())); + } else if (std::regex_match(mod->c_str(), UmaskRegex)) { // UMask modifier From 8e3c66d4a29455ea3bfd35d15d24accabb4e495e Mon Sep 17 00:00:00 2001 From: Roman Dementiev Date: Mon, 18 Sep 2023 12:31:52 +0200 Subject: [PATCH 15/43] pcm-raw: support ha pmu Change-Id: I9227028d12838425e5466e66ca68b1be4e0c8c88 --- .../GenuineIntel-6-4F-1.json | 8 ++++++ src/cpucounters.cpp | 15 +++++++++++ src/cpucounters.h | 19 ++++++++++++++ src/pcm-raw.cpp | 26 +++++++++++++++++++ 4 files changed, 68 insertions(+) diff --git a/src/PMURegisterDeclarations/GenuineIntel-6-4F-1.json b/src/PMURegisterDeclarations/GenuineIntel-6-4F-1.json index 7e696668..c4b4ae66 100644 --- a/src/PMURegisterDeclarations/GenuineIntel-6-4F-1.json +++ b/src/PMURegisterDeclarations/GenuineIntel-6-4F-1.json @@ -79,6 +79,14 @@ "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} } }, + "ha" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} + } + }, "xpi" : { "__comment" : "this is for UPI LL and QPI LL uncore PMUs", "programmable" : { diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index bf4ec833..627eebfd 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -5574,6 +5574,13 @@ PCM::ErrorCode PCM::program(const RawPMUConfigs& curPMUConfigs_, const bool sile uncore->programIMC(events32); } } + else if (type == "ha") + { + for (auto& uncore : serverUncorePMUs) + { + uncore->programHA(events32); + } + } else if (type == "m2m") { for (auto& uncore : serverUncorePMUs) @@ -6532,6 +6539,9 @@ ServerUncoreCounterState PCM::getServerUncoreCounterState(uint32 socket) assert(controller < result.M2MCounter.size()); for (uint32 cnt = 0; cnt < ServerUncoreCounterState::maxCounters; ++cnt) result.M2MCounter[controller][cnt] = serverUncorePMUs[socket]->getM2MCounter(controller, cnt); + assert(controller < result.HACounter.size()); + for (uint32 cnt = 0; cnt < ServerUncoreCounterState::maxCounters; ++cnt) + result.HACounter[controller][cnt] = serverUncorePMUs[socket]->getHACounter(controller, cnt); } serverUncorePMUs[socket]->unfreezeCounters(); } @@ -8583,6 +8593,11 @@ uint64 ServerUncorePMUs::getPMUCounter(std::vector & pmu, const uint3 return result; } +uint64 ServerUncorePMUs::getHACounter(uint32 id, uint32 counter) +{ + return getPMUCounter(haPMUs, id, counter); +} + uint64 ServerUncorePMUs::getMCCounter(uint32 channel, uint32 counter) { return getPMUCounter(imcPMUs, channel, counter); diff --git a/src/cpucounters.h b/src/cpucounters.h index 77d86c57..9d89a356 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -510,6 +510,10 @@ class ServerUncorePMUs //! \param box box ID/number //! \param counter counter number uint64 getM2MCounter(uint32 box, uint32 counter); + //! \brief Direct read of HA counter + //! \param box box ID/number + //! \param counter counter number + uint64 getHACounter(uint32 box, uint32 counter); //! \brief Freezes event counting void freezeCounters(); @@ -2877,6 +2881,17 @@ uint64 getM2MCounter(uint32 controller, uint32 counter, const CounterStateType & return after.M2MCounter[controller][counter] - before.M2MCounter[controller][counter]; } +/*! \brief Direct read of HA controller PMU counter (counter meaning depends on the programming: power/performance/etc) + \param counter counter number + \param controller controller number + \param before CPU counter state before the experiment + \param after CPU counter state after the experiment +*/ +template +uint64 getHACounter(uint32 controller, uint32 counter, const CounterStateType & before, const CounterStateType & after) +{ + return after.HACounter[controller][counter] - before.HACounter[controller][counter]; +} /*! \brief Direct read of embedded DRAM memory controller counter (counter meaning depends on the programming: power/performance/etc) \param counter counter number @@ -3168,6 +3183,7 @@ class ServerUncoreCounterState : public UncoreCounterState std::array HBMClocks; std::array, maxChannels> MCCounter; // channel X counter std::array, maxControllers> M2MCounter; // M2M/iMC boxes x counter + std::array, maxControllers> HACounter; // HA boxes x counter std::array, maxChannels> EDCCounter; // EDC controller X counter std::array PCUCounter; std::unordered_map freeRunningCounter; @@ -3201,6 +3217,8 @@ class ServerUncoreCounterState : public UncoreCounterState template friend uint64 getM2MCounter(uint32 controller, uint32 counter, const CounterStateType & before, const CounterStateType & after); template + friend uint64 getHACounter(uint32 controller, uint32 counter, const CounterStateType & before, const CounterStateType & after); + template friend uint64 getEDCCounter(uint32 channel, uint32 counter, const CounterStateType & before, const CounterStateType & after); template friend uint64 getPCUCounter(uint32 counter, const CounterStateType & before, const CounterStateType & after); @@ -3232,6 +3250,7 @@ class ServerUncoreCounterState : public UncoreCounterState HBMClocks{{}}, MCCounter{{}}, M2MCounter{{}}, + HACounter{{}}, EDCCounter{{}}, PCUCounter{{}}, PackageThermalHeadroom(0), diff --git a/src/pcm-raw.cpp b/src/pcm-raw.cpp index 012f085f..87505906 100644 --- a/src/pcm-raw.cpp +++ b/src/pcm-raw.cpp @@ -1629,6 +1629,14 @@ void printTransposed(const PCM::RawPMUConfigs& curPMUConfigs, [&]() { printUncoreRows([](const uint32 u, const uint32 i, const ServerUncoreCounterState& before, const ServerUncoreCounterState& after) { return getM2MCounter(u, i, before, after); }, (uint32)m->getMCPerSocket(), "MC"); }); } + else if (type == "ha") + { + choose(outputType, + [&]() { printUncoreRows(nullptr, (uint32) m->getMCPerSocket(), "HA"); }, + [&]() { printUncoreRows(nullptr, (uint32) m->getMCPerSocket(), type); }, + [&]() { printUncoreRows([](const uint32 u, const uint32 i, const ServerUncoreCounterState& before, const ServerUncoreCounterState& after) { return getHACounter(u, i, before, after); }, (uint32)m->getMCPerSocket(), "HA"); + }); + } else if (type == "pcu") { choose(outputType, @@ -1901,6 +1909,24 @@ void print(const PCM::RawPMUConfigs& curPMUConfigs, } } } + else if (type == "ha") + { + for (uint32 s = 0; s < m->getNumSockets(); ++s) + { + for (uint32 mc = 0; mc < m->getMCPerSocket(); ++mc) + { + int i = 0; + for (auto& event : events) + { + choose(outputType, + [s, mc]() { cout << "SKT" << s << "HA" << mc << separator; }, + [&event, &i]() { if (event.second.empty()) cout << "HAEvent" << i << separator; else cout << event.second << separator; }, + [&]() { cout << getHACounter(mc, i, BeforeUncoreState[s], AfterUncoreState[s]) << separator; }); + ++i; + } + } + } + } else if (type == "pcu") { for (uint32 s = 0; s < m->getNumSockets(); ++s) From 00c6490f679f8277827bae307d7f257d8d572476 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Mon, 18 Sep 2023 14:17:00 +0200 Subject: [PATCH 16/43] pcm-raw: allow imc fixed event name change Change-Id: Ia502c28295e74864bc79456545abe08713b27aaa --- src/pcm-raw.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pcm-raw.cpp b/src/pcm-raw.cpp index 87505906..9df95418 100644 --- a/src/pcm-raw.cpp +++ b/src/pcm-raw.cpp @@ -1614,11 +1614,12 @@ void printTransposed(const PCM::RawPMUConfigs& curPMUConfigs, } else if (type == "imc") { + const std::string fixedEventName = (fixedEvents.empty() == false && fixedEvents[0].second.empty() == false) ? fixedEvents[0].second : "DRAMClocks"; choose(outputType, [&]() { printUncoreRows(nullptr, (uint32) m->getMCChannelsPerSocket(), "CHAN"); }, [&]() { printUncoreRows(nullptr, (uint32) m->getMCChannelsPerSocket(), type); }, [&]() { printUncoreRows([](const uint32 u, const uint32 i, const ServerUncoreCounterState& before, const ServerUncoreCounterState& after) { return getMCCounter(u, i, before, after); }, (uint32)m->getMCChannelsPerSocket(), - "DRAMClocks", [](const uint32 u, const ServerUncoreCounterState& before, const ServerUncoreCounterState& after) { return getDRAMClocks(u, before, after); }); + fixedEventName, [](const uint32 u, const ServerUncoreCounterState& before, const ServerUncoreCounterState& after) { return getDRAMClocks(u, before, after); }); }); } else if (type == "m2m") From 38d50446cf7f5721dbac43416a52efb55eb9601b Mon Sep 17 00:00:00 2001 From: Roman Dementiev Date: Tue, 19 Sep 2023 09:16:56 +0200 Subject: [PATCH 17/43] pcm-raw: remove the max number of event restrictions for register events --- src/cpucounters.cpp | 4 ++-- src/pcm-raw.cpp | 4 ++++ src/utils.cpp | 12 ++++++++++++ src/utils.h | 2 +- 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 627eebfd..282c031d 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -5541,9 +5541,9 @@ PCM::ErrorCode PCM::program(const RawPMUConfigs& curPMUConfigs_, const bool sile { continue; } - if (events.programmable.size() > ServerUncoreCounterState::maxCounters) + if (events.programmable.size() > ServerUncoreCounterState::maxCounters && isRegisterEvent(type) == false) { - std::cerr << "ERROR: trying to program " << events.programmable.size() << " core PMU counters, which exceeds the max num possible (" << ServerUncoreCounterState::maxCounters << ")."; + std::cerr << "ERROR: trying to program " << events.programmable.size() << " uncore PMU counters, which exceeds the max num possible (" << ServerUncoreCounterState::maxCounters << ")."; return PCM::UnknownError; } uint32 events32[ServerUncoreCounterState::maxCounters] = { 0,0,0,0,0,0,0,0 }; diff --git a/src/pcm-raw.cpp b/src/pcm-raw.cpp index 9df95418..e47fe39a 100644 --- a/src/pcm-raw.cpp +++ b/src/pcm-raw.cpp @@ -132,6 +132,10 @@ enum AddEventStatus bool tooManyEvents(const std::string & pmuName, const int event_pos, const std::string& fullEventStr) { + if (isRegisterEvent(pmuName)) + { + return false; + } PCM* m = PCM::getInstance(); assert(m); const int maxCounters = (pmuName == "core" || pmuName == "atom") ? m->getMaxCustomCoreEvents() : ServerUncoreCounterState::maxCounters; diff --git a/src/utils.cpp b/src/utils.cpp index b8418c18..5bfe3318 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -838,6 +838,18 @@ std::string dos2unix(std::string in) return in; } +bool isRegisterEvent(const std::string & pmu) +{ + if (pmu == "mmio" + || pmu == "pcicfg" + || pmu == "package_msr" + || pmu == "thread_msr") + { + return true; + } + return false; +} + std::string a_title(const std::string &init, const std::string &name) { char begin = init[0]; std::string row = init; diff --git a/src/utils.h b/src/utils.h index 5c810dbc..229f1304 100644 --- a/src/utils.h +++ b/src/utils.h @@ -574,7 +574,7 @@ typedef enum{ }evt_cb_type; std::string dos2unix(std::string in); - +bool isRegisterEvent(const std::string & pmu); std::string a_title (const std::string &init, const std::string &name); std::string a_data (std::string init, struct data d); std::string a_header_footer(std::string init, std::string name); From 1b9d4111a712dad03b08cd818cbcfd2c5377a45f Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Tue, 19 Sep 2023 11:04:09 +0200 Subject: [PATCH 18/43] implement PCM_NO_MAIN_EXCEPTION_HANDLER variable Change-Id: I81a6c19e651480bb00cc3278c86c4470e783e370 --- src/utils.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/utils.h b/src/utils.h index 229f1304..e4779467 100644 --- a/src/utils.h +++ b/src/utils.h @@ -29,10 +29,16 @@ #include #include + +namespace pcm { + std::string safe_getenv(const char* env); +} + #define PCM_MAIN_NOTHROW \ int mainThrows(int argc, char * argv[]); \ int main(int argc, char * argv[]) \ { \ + if (pcm::safe_getenv("PCM_NO_MAIN_EXCEPTION_HANDLER") == std::string("1")) return mainThrows(argc, argv); \ try { \ return mainThrows(argc, argv); \ } catch(const std::runtime_error & e) \ @@ -521,8 +527,6 @@ inline uint64 extract_bits(uint64 myin, uint32 beg, uint32 end) return myll; } -std::string safe_getenv(const char* env); - #ifdef _MSC_VER inline HANDLE openMSRDriver() { From 594ee51d5ac961cf8ad6e57bdefb5dcd09447565 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Tue, 19 Sep 2023 11:11:22 +0200 Subject: [PATCH 19/43] document PCM_NO_MAIN_EXCEPTION_HANDLER Change-Id: I8e6ca96acf31bee1609d8be263b99764c1da308a --- doc/ENVVAR_README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/ENVVAR_README.md b/doc/ENVVAR_README.md index af285553..58acfa7a 100644 --- a/doc/ENVVAR_README.md +++ b/doc/ENVVAR_README.md @@ -9,3 +9,5 @@ `PCM_PRINT_TOPOLOGY=1` : print detailed CPU topology `PCM_KEEP_NMI_WATCHDOG=1` : don't disable NMI watchdog (reducing the core metrics set) + +`PCM_NO_MAIN_EXCEPTION_HANDLER=1` : don't catch exceptions in the main function of pcm tools (a debugging option) From 6613d578c64e90cf2bcfbb464b5a50214a9648be Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 20 Sep 2023 12:07:08 +0200 Subject: [PATCH 20/43] add PCU PMU declaration for BDX Change-Id: I4e130c2a054ce258c57a480e1ebcd3cb8ccc4108 --- src/PMURegisterDeclarations/GenuineIntel-6-4F-1.json | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/PMURegisterDeclarations/GenuineIntel-6-4F-1.json b/src/PMURegisterDeclarations/GenuineIntel-6-4F-1.json index c4b4ae66..6901131a 100644 --- a/src/PMURegisterDeclarations/GenuineIntel-6-4F-1.json +++ b/src/PMURegisterDeclarations/GenuineIntel-6-4F-1.json @@ -87,6 +87,13 @@ "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} } }, + "pcu" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0} + } + }, "xpi" : { "__comment" : "this is for UPI LL and QPI LL uncore PMUs", "programmable" : { From c583d3319a4bdca1866865fbc2e12e8882c2793f Mon Sep 17 00:00:00 2001 From: Roman Dementiev Date: Wed, 20 Sep 2023 13:07:10 +0200 Subject: [PATCH 21/43] throw exception with a message in pcihandle class --- src/pci.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/pci.cpp b/src/pci.cpp index b65fac1f..3afcbbfd 100644 --- a/src/pci.cpp +++ b/src/pci.cpp @@ -580,8 +580,10 @@ void PciHandleMM::readMCFG() if (read_bytes == 0) { ::close(mcfg_handle); - std::cerr << "PCM Error: Cannot read MCFG-table\n"; - throw std::exception(); + const auto msg = "PCM Error: Cannot read MCFG-table"; + std::cerr << msg; + std::cerr << "\n"; + throw std::runtime_error(msg); } const unsigned segments = mcfgHeader.nrecords(); @@ -597,8 +599,10 @@ void PciHandleMM::readMCFG() if (read_bytes == 0) { ::close(mcfg_handle); - std::cerr << "PCM Error: Cannot read MCFG-table (2)\n"; - throw std::exception(); + const auto msg = "PCM Error: Cannot read MCFG-table (2)"; + std::cerr << msg; + std::cerr << "\n"; + throw std::runtime_error(msg); } #ifdef PCM_DEBUG std::cout << "PCM Debug: segment " << std::dec << i << " "; From 2aa27842e2059b3f1a861310bbcb1a2d9463b6a4 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 20 Sep 2023 13:16:41 +0200 Subject: [PATCH 22/43] catch exception in getMaxNumOfCBoxes to allow the fall-back perf API method to be called Change-Id: I9ae1a831c16576ebae9606991ebb1650b5e39cd8 --- src/cpucounters.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 282c031d..0999998d 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -9016,7 +9016,7 @@ uint32 PCM::getMaxNumOfCBoxes() const switch (cpu_model) { case SPR: - { + try { PciHandleType * h = getDeviceHandle(PCM_INTEL_PCI_VENDOR_ID, 0x325b); if (h) { @@ -9028,6 +9028,10 @@ uint32 PCM::getMaxNumOfCBoxes() const delete h; } } + catch (std::exception& e) + { + std::cerr << "Warning: reading the number of CHA from PCICFG register has failed: " << e.what() << "\n"; + } break; case KNL: case SKX: From 73bb96ebfe29b16ff788dbfe3e7e8219df39eb65 Mon Sep 17 00:00:00 2001 From: Alexander Antonov Date: Wed, 27 Sep 2023 11:01:26 +0200 Subject: [PATCH 23/43] Enable pcm-iio for SPR MCC --- src/pcm-iio.cpp | 216 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 168 insertions(+), 48 deletions(-) diff --git a/src/pcm-iio.cpp b/src/pcm-iio.cpp index 04f99d3a..0e5dcd8d 100644 --- a/src/pcm-iio.cpp +++ b/src/pcm-iio.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include // std::length_error #include #include @@ -143,34 +144,65 @@ static const std::map snr_sad_to_pmu_id_mapping = { #define IAX_DID 0x0cfe // In-Memory Database Analytics Accelerator (IAX) #define QATV2_DID 0x4940 // QuickAssist (CPM) v2 -#define SPR_DMI_PART_ID 7 +#define SPR_XCC_DMI_PART_ID 7 +#define SPR_MCC_DMI_PART_ID 2 +#define SPR_XCC_HQM_PART_ID 5 +#define SPR_MCC_HQM_PART_ID 4 +#define SPR_XCC_QAT_PART_ID 4 +#define SPR_MCC_QAT_PART_ID 5 #define SPR_SAD_CONTROL_CFG_OFFSET SNR_ICX_SAD_CONTROL_CFG_OFFSET -#define SPR_DMI_PMON_ID 1 -#define SPR_PCIE_GEN5_0_PMON_ID 2 -#define SPR_PCIE_GEN5_1_PMON_ID 4 -#define SPR_PCIE_GEN5_2_PMON_ID 6 -#define SPR_PCIE_GEN5_3_PMON_ID 7 -#define SPR_PCIE_GEN5_4_PMON_ID 9 -#define SPR_IDX0_PMON_ID 0 -#define SPR_IDX1_PMON_ID 3 -#define SPR_IDX2_PMON_ID 5 -#define SPR_IDX3_PMON_ID 8 - -const std::map spr_sad_to_pmu_id_mapping = { - { 0, SPR_DMI_PMON_ID }, - { 1, SPR_PCIE_GEN5_0_PMON_ID }, - { 2, SPR_PCIE_GEN5_1_PMON_ID }, - { 3, SPR_PCIE_GEN5_2_PMON_ID }, - { 4, SPR_PCIE_GEN5_3_PMON_ID }, - { 5, SPR_PCIE_GEN5_4_PMON_ID }, - { 8, SPR_IDX0_PMON_ID }, - { 9, SPR_IDX1_PMON_ID }, - { 10, SPR_IDX2_PMON_ID }, - { 11, SPR_IDX3_PMON_ID } +#define SPR_PCU_CR3_DID 0x325b +#define SPR_PCU_CR3_REG_DEVICE 0x1e +#define SPR_PCU_CR3_REG_FUNCTION 0x03 +#define SPR_CAPID4_OFFSET 0x94 +#define SPR_CAPID4_GET_PHYSICAL_CHOP(capid4) ((capid4 >> 6) & 3) +#define SPR_PHYSICAL_CHOP_XCC 0b11 +#define SPR_PHYSICAL_CHOP_MCC 0b01 + +#define SPR_XCC_DMI_PMON_ID 1 +#define SPR_XCC_PCIE_GEN5_0_PMON_ID 2 +#define SPR_XCC_PCIE_GEN5_1_PMON_ID 4 +#define SPR_XCC_PCIE_GEN5_2_PMON_ID 6 +#define SPR_XCC_PCIE_GEN5_3_PMON_ID 7 +#define SPR_XCC_PCIE_GEN5_4_PMON_ID 9 +#define SPR_XCC_IDX0_PMON_ID 0 +#define SPR_XCC_IDX1_PMON_ID 3 +#define SPR_XCC_IDX2_PMON_ID 5 +#define SPR_XCC_IDX3_PMON_ID 8 + +const std::map spr_xcc_sad_to_pmu_id_mapping = { + { 0, SPR_XCC_DMI_PMON_ID }, + { 1, SPR_XCC_PCIE_GEN5_0_PMON_ID }, + { 2, SPR_XCC_PCIE_GEN5_1_PMON_ID }, + { 3, SPR_XCC_PCIE_GEN5_2_PMON_ID }, + { 4, SPR_XCC_PCIE_GEN5_3_PMON_ID }, + { 5, SPR_XCC_PCIE_GEN5_4_PMON_ID }, + { 8, SPR_XCC_IDX0_PMON_ID }, + { 9, SPR_XCC_IDX1_PMON_ID }, + { 10, SPR_XCC_IDX2_PMON_ID }, + { 11, SPR_XCC_IDX3_PMON_ID } }; -static const std::string spr_iio_stack_names[] = { +#define SPR_MCC_DMI_PMON_ID 10 +#define SPR_MCC_PCIE_GEN5_0_PMON_ID 0 // assumption +#define SPR_MCC_PCIE_GEN5_1_PMON_ID 1 +#define SPR_MCC_PCIE_GEN5_2_PMON_ID 2 +#define SPR_MCC_PCIE_GEN5_3_PMON_ID 4 // assumption +#define SPR_MCC_PCIE_GEN5_4_PMON_ID 5 +#define SPR_MCC_IDX0_PMON_ID 3 + +const std::map spr_mcc_sad_to_pmu_id_mapping = { + { 0, SPR_MCC_PCIE_GEN5_0_PMON_ID }, + { 1, SPR_MCC_PCIE_GEN5_1_PMON_ID }, + { 2, SPR_MCC_PCIE_GEN5_2_PMON_ID }, + { 3, SPR_MCC_DMI_PMON_ID }, + { 4, SPR_MCC_PCIE_GEN5_3_PMON_ID }, + { 5, SPR_MCC_PCIE_GEN5_4_PMON_ID }, + { 8, SPR_MCC_IDX0_PMON_ID }, +}; + +static const std::string spr_xcc_iio_stack_names[] = { "IIO Stack 0 - IDX0 ", "IIO Stack 1 - DMI ", "IIO Stack 2 - PCIe0 ", @@ -185,6 +217,24 @@ static const std::string spr_iio_stack_names[] = { "IIO Stack 11 - NONE ", }; +/* + * SPR MCC has 7 I/O stacks but PMON block for DMI has ID number 10. + * And just to follow such enumeration keep Stack 10 for DMI. + */ +static const std::string spr_mcc_iio_stack_names[] = { + "IIO Stack 0 - PCIe0 ", + "IIO Stack 1 - PCIe1 ", + "IIO Stack 2 - PCIe2 ", + "IIO Stack 3 - IDX0 ", + "IIO Stack 4 - PCIe3 ", + "IIO Stack 5 - PCIe4 ", + "IIO Stack 6 - NONE ", + "IIO Stack 7 - NONE ", + "IIO Stack 8 - NONE ", + "IIO Stack 9 - NONE ", + "IIO Stack 10 - DMI ", +}; + struct iio_counter : public counter { std::vector data; }; @@ -882,17 +932,56 @@ class EagleStreamPlatformMapping: public IPlatformMapping { private: bool getRootBuses(std::map> &root_buses); - bool stackProbe(int unit, const struct bdf &address, struct iio_stacks_on_socket &iio_on_socket); + bool stackProbeXcc(int unit, const struct bdf &address, struct iio_stacks_on_socket &iio_on_socket); + bool stackProbeMcc(int unit, const struct bdf &address, struct iio_stacks_on_socket &iio_on_socket); bool eagleStreamDmiStackProbe(int unit, const struct bdf &address, struct iio_stacks_on_socket &iio_on_socket); bool eagleStreamPciStackProbe(int unit, const struct bdf &address, struct iio_stacks_on_socket &iio_on_socket); bool eagleStreamAcceleratorStackProbe(int unit, const struct bdf &address, struct iio_stacks_on_socket &iio_on_socket); + bool setChopValue(); + bool isXccPlatform() const { return m_chop == kXccChop; } + + const std::uint32_t kXccChop = 0b11; + const std::uint32_t kMccChop = 0b01; + + std::uint32_t m_chop; public: - EagleStreamPlatformMapping(uint32_t sockets_count) : IPlatformMapping(sockets_count) {} + EagleStreamPlatformMapping(uint32_t sockets_count) : IPlatformMapping(sockets_count), m_chop(0) {} ~EagleStreamPlatformMapping() = default; bool pciTreeDiscover(std::vector& iios) override; }; +bool EagleStreamPlatformMapping::setChopValue() +{ + for (uint16_t b = 0; b < 256; b++) { + struct pci pci_dev(0, b, SPR_PCU_CR3_REG_DEVICE, SPR_PCU_CR3_REG_FUNCTION); + if (!probe_pci(&pci_dev)) { + continue; + } + if (!((pci_dev.vendor_id == PCM_INTEL_PCI_VENDOR_ID) && (pci_dev.device_id == SPR_PCU_CR3_DID))) { + continue; + } + std::uint32_t capid4; + PciHandleType h(0, b, SPR_PCU_CR3_REG_DEVICE, SPR_PCU_CR3_REG_FUNCTION); + h.read32(SPR_CAPID4_OFFSET, &capid4); + if (capid4 == (std::numeric_limits::max)()) { + std::cerr << "Cannot read PCU RC3 register" << std::endl; + return false; + } + capid4 = SPR_CAPID4_GET_PHYSICAL_CHOP(capid4); + if (capid4 == kXccChop || capid4 == kMccChop) { + m_chop = capid4; + } + else { + std::cerr << "Unknown chop value " << capid4 << std::endl; + return false; + } + return true; + } + std::cerr << "Cannot find PCU RC3 registers on the system. Device ID is " << std::hex << SPR_PCU_CR3_DID << std::dec << std::endl; + return false; +} + bool EagleStreamPlatformMapping::getRootBuses(std::map> &root_buses) { bool mapped = true; @@ -924,12 +1013,13 @@ bool EagleStreamPlatformMapping::getRootBuses(std::map> ((cpuBusId % 4) * 8)) & 0xff; root_buses[package_id][pmuId] = bdf(domain, rootBus, 0, 0); cout << "Mapped CPU bus #" << cpuBusId << " (domain " << domain << " bus " << std::hex << rootBus << std::dec << ") to IO PMU #" @@ -947,12 +1037,13 @@ bool EagleStreamPlatformMapping::eagleStreamDmiStackProbe(int unit, const struct { struct iio_stack stack; stack.iio_unit_id = unit; - stack.stack_name = spr_iio_stack_names[unit]; + stack.stack_name = isXccPlatform() ? spr_xcc_iio_stack_names[unit] : spr_mcc_iio_stack_names[unit]; stack.busno = address.busno; stack.domain = address.domainno; struct iio_bifurcated_part pch_part; struct pci *pci = &pch_part.root_pci_dev; - pch_part.part_id = SPR_DMI_PART_ID; + auto dmi_part_id = isXccPlatform() ? SPR_XCC_DMI_PART_ID : SPR_MCC_DMI_PART_ID; + pch_part.part_id = dmi_part_id; pci->bdf = address; if (!probe_pci(pci)) { cerr << "Failed to probe DMI Stack: address: " << std::setw(4) << std::setfill('0') << std::hex << address.domainno << @@ -965,7 +1056,7 @@ bool EagleStreamPlatformMapping::eagleStreamDmiStackProbe(int unit, const struct if (!iio_on_socket.socket_id) probeDeviceRange(pch_part.child_pci_devs, pci->bdf.domainno, pci->secondary_bus_number, pci->subordinate_bus_number); - pci->parts_no.push_back(SPR_DMI_PART_ID); + pci->parts_no.push_back(dmi_part_id); stack.parts.push_back(pch_part); iio_on_socket.stacks.push_back(stack); @@ -981,7 +1072,7 @@ bool EagleStreamPlatformMapping::eagleStreamPciStackProbe(int unit, const struct stack.domain = address.domainno; stack.busno = address.busno; stack.iio_unit_id = unit; - stack.stack_name = spr_iio_stack_names[unit]; + stack.stack_name = isXccPlatform() ? spr_xcc_iio_stack_names[unit] : spr_mcc_iio_stack_names[unit]; for (int slot = 1; slot < 9; ++slot) { // Check if port is enabled @@ -1004,6 +1095,7 @@ bool EagleStreamPlatformMapping::eagleStreamPciStackProbe(int unit, const struct } } } + stack.parts.push_back(part); } } iio_on_socket.stacks.push_back(stack); @@ -1020,7 +1112,7 @@ bool EagleStreamPlatformMapping::eagleStreamAcceleratorStackProbe(int unit, cons // Channel mappings are checked on B0 stepping auto rb = address.busno; const std::vector acceleratorBuses{ rb, rb + 1, rb + 2, rb + 3 }; - stack.stack_name = spr_iio_stack_names[unit]; + stack.stack_name = isXccPlatform() ? spr_xcc_iio_stack_names[unit] : spr_mcc_iio_stack_names[unit]; for (auto& b : acceleratorBuses) { for (auto d = 0; d < 32; ++d) { for (auto f = 0; f < 8; ++f) { @@ -1041,10 +1133,10 @@ bool EagleStreamPlatformMapping::eagleStreamAcceleratorStackProbe(int unit, cons pci_dev.parts_no.push_back(2); break; case HQMV2_DID: - pci_dev.parts_no.push_back(5); + pci_dev.parts_no.push_back(isXccPlatform() ? SPR_XCC_HQM_PART_ID : SPR_MCC_HQM_PART_ID); break; case QATV2_DID: - pci_dev.parts_no.push_back(4); + pci_dev.parts_no.push_back(isXccPlatform() ? SPR_XCC_QAT_PART_ID : SPR_MCC_QAT_PART_ID); break; default: continue; @@ -1061,22 +1153,41 @@ bool EagleStreamPlatformMapping::eagleStreamAcceleratorStackProbe(int unit, cons return true; } -bool EagleStreamPlatformMapping::stackProbe(int unit, const struct bdf &address, struct iio_stacks_on_socket &iio_on_socket) +bool EagleStreamPlatformMapping::stackProbeXcc(int unit, const struct bdf &address, struct iio_stacks_on_socket &iio_on_socket) { switch (unit) { - case SPR_DMI_PMON_ID: + case SPR_XCC_DMI_PMON_ID: return eagleStreamDmiStackProbe(unit, address, iio_on_socket); - case SPR_PCIE_GEN5_0_PMON_ID: - case SPR_PCIE_GEN5_1_PMON_ID: - case SPR_PCIE_GEN5_2_PMON_ID: - case SPR_PCIE_GEN5_3_PMON_ID: - case SPR_PCIE_GEN5_4_PMON_ID: + case SPR_XCC_PCIE_GEN5_0_PMON_ID: + case SPR_XCC_PCIE_GEN5_1_PMON_ID: + case SPR_XCC_PCIE_GEN5_2_PMON_ID: + case SPR_XCC_PCIE_GEN5_3_PMON_ID: + case SPR_XCC_PCIE_GEN5_4_PMON_ID: return eagleStreamPciStackProbe(unit, address, iio_on_socket); - case SPR_IDX0_PMON_ID: - case SPR_IDX1_PMON_ID: - case SPR_IDX2_PMON_ID: - case SPR_IDX3_PMON_ID: + case SPR_XCC_IDX0_PMON_ID: + case SPR_XCC_IDX1_PMON_ID: + case SPR_XCC_IDX2_PMON_ID: + case SPR_XCC_IDX3_PMON_ID: + return eagleStreamAcceleratorStackProbe(unit, address, iio_on_socket); + default: + return false; + } +} + +bool EagleStreamPlatformMapping::stackProbeMcc(int unit, const struct bdf &address, struct iio_stacks_on_socket &iio_on_socket) +{ + switch (unit) + { + case SPR_MCC_DMI_PMON_ID: + return eagleStreamDmiStackProbe(unit, address, iio_on_socket); + case SPR_MCC_PCIE_GEN5_0_PMON_ID: + case SPR_MCC_PCIE_GEN5_1_PMON_ID: + case SPR_MCC_PCIE_GEN5_2_PMON_ID: + case SPR_MCC_PCIE_GEN5_3_PMON_ID: + case SPR_MCC_PCIE_GEN5_4_PMON_ID: + return eagleStreamPciStackProbe(unit, address, iio_on_socket); + case SPR_MCC_IDX0_PMON_ID: return eagleStreamAcceleratorStackProbe(unit, address, iio_on_socket); default: return false; @@ -1085,6 +1196,8 @@ bool EagleStreamPlatformMapping::stackProbe(int unit, const struct bdf &address, bool EagleStreamPlatformMapping::pciTreeDiscover(std::vector& iios) { + if (!setChopValue()) return false; + std::map> root_buses; if (!getRootBuses(root_buses)) { @@ -1096,8 +1209,15 @@ bool EagleStreamPlatformMapping::pciTreeDiscover(std::vectorfirst; for (auto rb = rbs_on_socket.cbegin(); rb != rbs_on_socket.cend(); ++rb) { - if (!stackProbe(rb->first, rb->second, iio_on_socket)) { - return false; + if (isXccPlatform()) { + if (!stackProbeXcc(rb->first, rb->second, iio_on_socket)) { + return false; + } + } + else { + if (!stackProbeMcc(rb->first, rb->second, iio_on_socket)) { + return false; + } } } std::sort(iio_on_socket.stacks.begin(), iio_on_socket.stacks.end()); From efc21e045b42a4c360ea144c9b2bc823b36f2e86 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Tue, 26 Sep 2023 12:51:12 +0200 Subject: [PATCH 24/43] extend max pcu number Change-Id: I3e2a23d5223b9b89e4eae5df213aa5044e9169ce --- src/cpucounters.cpp | 53 ++++++++---- src/cpucounters.h | 21 +++-- src/pcm-power.cpp | 195 +++++++++++++++++++++++--------------------- src/pcm-raw.cpp | 23 +++--- 4 files changed, 165 insertions(+), 127 deletions(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 0999998d..866b0df0 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -2026,6 +2026,7 @@ void PCM::globalFreezeUncoreCountersInternal(const unsigned long long int freeze void PCM::initUncorePMUsDirect() { + pcuPMUs.resize(num_sockets); for (uint32 s = 0; s < (uint32)num_sockets; ++s) { auto & handle = MSR[socketRefCore[s]]; @@ -2121,7 +2122,7 @@ void PCM::initUncorePMUsDirect() { case IVYTOWN: case JAKETOWN: - pcuPMUs.push_back( + pcuPMUs[s].push_back( UncorePMU( std::make_shared(handle, JKTIVT_PCU_MSR_PMON_BOX_CTL_ADDR), std::make_shared(handle, JKTIVT_PCU_MSR_PMON_CTL0_ADDR), @@ -2144,7 +2145,7 @@ void PCM::initUncorePMUsDirect() case HASWELLX: case SKX: case ICX: - pcuPMUs.push_back( + pcuPMUs[s].push_back( UncorePMU( std::make_shared(handle, HSX_PCU_MSR_PMON_BOX_CTL_ADDR), std::make_shared(handle, HSX_PCU_MSR_PMON_CTL0_ADDR), @@ -2162,13 +2163,14 @@ void PCM::initUncorePMUsDirect() ); break; case SPR: - addPMUsFromDiscovery(pcuPMUs, SPR_PCU_BOX_TYPE, 0xE); - if (pcuPMUs.empty()) + addPMUsFromDiscovery(pcuPMUs[s], SPR_PCU_BOX_TYPE, 0xE); + if (pcuPMUs[s].empty()) { std::cerr << "ERROR: PCU PMU not found\n"; } break; } + assert(pcuPMUs[s].size() <= ServerUncoreCounterState::maxPUnits); // add MDF PMUs switch (cpu_model) @@ -2562,9 +2564,10 @@ void PCM::initUncorePMUsPerf() irpPMUs.resize(num_sockets); cboPMUs.resize(num_sockets); mdfPMUs.resize(num_sockets); + pcuPMUs.resize(num_sockets); for (uint32 s = 0; s < (uint32)num_sockets; ++s) { - populatePerfPMUs(s, enumeratePerfPMUs("pcu", 100), pcuPMUs, false, true); + populatePerfPMUs(s, enumeratePerfPMUs("pcu", 100), pcuPMUs[s], false, true); populatePerfPMUs(s, enumeratePerfPMUs("ubox", 100), uboxPMUs, true); populatePerfPMUs(s, enumeratePerfPMUs("cbox", 100), cboPMUs[s], false, true, true); populatePerfPMUs(s, enumeratePerfPMUs("cha", 200), cboPMUs[s], false, true, true); @@ -4673,9 +4676,12 @@ void PCM::cleanupUncorePMUs(const bool silent) pmu.cleanup(); } } - for (auto & pmu : pcuPMUs) + for (auto& spcuPMUs : pcuPMUs) { - pmu.cleanup(); + for (auto& pmu : spcuPMUs) + { + pmu.cleanup(); + } } for (auto& sPMUs : cxlPMUs) { @@ -5418,14 +5424,17 @@ void PCM::programPCU(uint32* PCUCntConf, const uint64 filter) uint32 refCore = socketRefCore[i]; TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux - pcuPMUs[i].initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN); - - if (pcuPMUs[i].filter[0].get()) + for (auto& pmu : pcuPMUs[i]) { - *pcuPMUs[i].filter[0] = filter; - } + pmu.initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN); - program(pcuPMUs[i], &PCUCntConf[0], &PCUCntConf[4], UNC_PMON_UNIT_CTL_FRZ_EN); + if (pmu.filter[0].get()) + { + *pmu.filter[0] = filter; + } + + program(pmu, &PCUCntConf[0], &PCUCntConf[4], UNC_PMON_UNIT_CTL_FRZ_EN); + } } } @@ -5731,7 +5740,10 @@ void PCM::freezeServerUncoreCounters() const auto refCore = socketRefCore[i]; TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux - pcuPMUs[i].freeze(UNC_PMON_UNIT_CTL_FRZ_EN); + for (auto& pmu : pcuPMUs[i]) + { + pmu.freeze(UNC_PMON_UNIT_CTL_FRZ_EN); + } if (IIOEventsAvailable()) { @@ -5783,7 +5795,10 @@ void PCM::unfreezeServerUncoreCounters() const auto refCore = socketRefCore[i]; TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux - pcuPMUs[i].unfreeze(UNC_PMON_UNIT_CTL_FRZ_EN); + for (auto& pmu : pcuPMUs[i]) + { + pmu.unfreeze(UNC_PMON_UNIT_CTL_FRZ_EN); + } if (IIOEventsAvailable()) { @@ -6588,9 +6603,13 @@ ServerUncoreCounterState PCM::getServerUncoreCounterState(uint32 socket) result.UBOXCounter[i] = *(uboxPMUs[socket].counterValue[i]); result.UncClocks = getUncoreClocks(socket); } - for (int i = 0; i < ServerUncoreCounterState::maxCounters && socket < pcuPMUs.size() && size_t(i) < pcuPMUs[socket].size(); ++i) + for (size_t u = 0; socket < pcuPMUs.size() && u < pcuPMUs[socket].size(); ++u) { - result.PCUCounter[i] = *pcuPMUs[socket].counterValue[i]; + for (int i = 0; i < ServerUncoreCounterState::maxCounters && size_t(i) < pcuPMUs[socket][u].size(); ++i) + { + assert(u < ServerUncoreCounterState::maxPUnits); + result.PCUCounter[u][i] = *pcuPMUs[socket][u].counterValue[i]; + } } for (size_t p = 0; p < getNumCXLPorts(socket); ++p) { diff --git a/src/cpucounters.h b/src/cpucounters.h index 9d89a356..3c3bf346 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -633,7 +633,7 @@ class PCM_API PCM bool programmed_core_pmu{false}; std::vector > MSR; std::vector > serverUncorePMUs; - std::vector pcuPMUs; + std::vector > pcuPMUs; std::vector > iioPMUs; std::vector > irpPMUs; std::vector uboxPMUs; @@ -1695,6 +1695,12 @@ class PCM_API PCM } return 0; } + //! \brief Returns the number of PUnits per socket + //! \return number of PUnits per socket + uint64 getPUnitsPerSocket() const + { + return (pcuPMUs.empty() == false) ? pcuPMUs[0].size() : 0; + } //! \brief Returns the number of detected integrated memory controllers per socket uint32 getMCPerSocket() const @@ -2913,9 +2919,9 @@ uint64 getEDCCounter(uint32 channel, uint32 counter, const CounterStateType & be \param after CPU counter state after the experiment */ template -uint64 getPCUCounter(uint32 counter, const CounterStateType & before, const CounterStateType & after) +uint64 getPCUCounter(uint32 unit, uint32 counter, const CounterStateType & before, const CounterStateType & after) { - return after.PCUCounter[counter] - before.PCUCounter[counter]; + return after.PCUCounter[unit][counter] - before.PCUCounter[unit][counter]; } /*! \brief Returns clock ticks of power control unit @@ -2923,9 +2929,9 @@ uint64 getPCUCounter(uint32 counter, const CounterStateType & before, const Coun \param after CPU counter state after the experiment */ template -uint64 getPCUClocks(const CounterStateType & before, const CounterStateType & after) +uint64 getPCUClocks(uint32 unit, const CounterStateType & before, const CounterStateType & after) { - return getPCUCounter(0, before, after); + return getPCUCounter(unit, 0, before, after); } /*! \brief Returns energy consumed by processor, excluding DRAM (measured in internal units) @@ -3154,6 +3160,7 @@ class ServerUncoreCounterState : public UncoreCounterState maxMDFs = 128, maxIIOStacks = 16, maxCXLPorts = 6, + maxPUnits = 5, maxCounters = 8 }; enum EventPosition @@ -3185,7 +3192,7 @@ class ServerUncoreCounterState : public UncoreCounterState std::array, maxControllers> M2MCounter; // M2M/iMC boxes x counter std::array, maxControllers> HACounter; // HA boxes x counter std::array, maxChannels> EDCCounter; // EDC controller X counter - std::array PCUCounter; + std::array, maxPUnits> PCUCounter; std::unordered_map freeRunningCounter; int32 PackageThermalHeadroom; uint64 InvariantTSC; // invariant time stamp counter @@ -3221,7 +3228,7 @@ class ServerUncoreCounterState : public UncoreCounterState template friend uint64 getEDCCounter(uint32 channel, uint32 counter, const CounterStateType & before, const CounterStateType & after); template - friend uint64 getPCUCounter(uint32 counter, const CounterStateType & before, const CounterStateType & after); + friend uint64 getPCUCounter(uint32 unit, uint32 counter, const CounterStateType & before, const CounterStateType & after); template friend uint64 getConsumedEnergy(const CounterStateType & before, const CounterStateType & after); template diff --git a/src/pcm-power.cpp b/src/pcm-power.cpp index 72b32ba4..581e486d 100644 --- a/src/pcm-power.cpp +++ b/src/pcm-power.cpp @@ -74,16 +74,16 @@ uint64 getPPDCycles(uint32 channel, const ServerUncoreCounterState & before, con return getMCCounter(channel, 2, before, after); } -double getNormalizedPCUCounter(uint32 counter, const ServerUncoreCounterState & before, const ServerUncoreCounterState & after) +double getNormalizedPCUCounter(uint32 unit, uint32 counter, const ServerUncoreCounterState & before, const ServerUncoreCounterState & after) { - return double(getPCUCounter(counter, before, after)) / double(getPCUClocks(before, after)); + return double(getPCUCounter(unit, counter, before, after)) / double(getPCUClocks(unit, before, after)); } -double getNormalizedPCUCounter(uint32 counter, const ServerUncoreCounterState & before, const ServerUncoreCounterState & after, PCM * m) +double getNormalizedPCUCounter(uint32 unit, uint32 counter, const ServerUncoreCounterState & before, const ServerUncoreCounterState & after, PCM * m) { const uint64 PCUClocks = (m->getPCUFrequency() * getInvariantTSC(before, after)) / m->getNominalFrequency(); // cout << "PCM Debug: PCU clocks " << PCUClocks << " PCU frequency: " << m->getPCUFrequency() << "\n"; - return double(getPCUCounter(counter, before, after)) / double(PCUClocks); + return double(getPCUCounter(unit, counter, before, after)) / double(PCUClocks); } int default_freq_band[3] = { 12, 20, 40 }; @@ -421,81 +421,89 @@ int mainThrows(int argc, char * argv[]) << "\n"; } } - switch (pcu_profile) + + for (uint32 u = 0; u < m->getPUnitsPerSocket(); ++u) { - case 0: - if (cpu_model == PCM::HASWELLX || cpu_model == PCM::BDX_DE || cpu_model == PCM::SKX) + auto printHeader = [&socket,&m,&u, &BeforeState, &AfterState] (const bool printPCUClocks) + { + cout << "S" << socket; + if (m->getPUnitsPerSocket() > 1) + { + cout << "U" << u; + } + if (printPCUClocks) + { + cout << "; PCUClocks: " << getPCUClocks(u, BeforeState[socket], AfterState[socket]); + } + }; + switch (pcu_profile) + { + case 0: + if (cpu_model == PCM::HASWELLX || cpu_model == PCM::BDX_DE || cpu_model == PCM::SKX) + break; + printHeader(true); + cout << "; Freq band 0/1/2 cycles: " << 100. * getNormalizedPCUCounter(u, 1, BeforeState[socket], AfterState[socket]) << "%" + << "; " << 100. * getNormalizedPCUCounter(u, 2, BeforeState[socket], AfterState[socket]) << "%" + << "; " << 100. * getNormalizedPCUCounter(u, 3, BeforeState[socket], AfterState[socket]) << "%" + << "\n"; break; - cout << "S" << socket - << "; PCUClocks: " << getPCUClocks(BeforeState[socket], AfterState[socket]) - << "; Freq band 0/1/2 cycles: " << 100. * getNormalizedPCUCounter(1, BeforeState[socket], AfterState[socket]) << "%" - << "; " << 100. * getNormalizedPCUCounter(2, BeforeState[socket], AfterState[socket]) << "%" - << "; " << 100. * getNormalizedPCUCounter(3, BeforeState[socket], AfterState[socket]) << "%" - << "\n"; - break; - case 1: - cout << "S" << socket - << "; PCUClocks: " << getPCUClocks(BeforeState[socket], AfterState[socket]) - << ((cpu_model == PCM::SKX)?"; core C0_1/C3/C6_7-state residency: ":"; core C0/C3/C6-state residency: ") - << getNormalizedPCUCounter(1, BeforeState[socket], AfterState[socket]) - << "; " << getNormalizedPCUCounter(2, BeforeState[socket], AfterState[socket]) - << "; " << getNormalizedPCUCounter(3, BeforeState[socket], AfterState[socket]) - << "\n"; - break; - - case 2: - cout << "S" << socket - << "; PCUClocks: " << getPCUClocks(BeforeState[socket], AfterState[socket]) - << "; Internal prochot cycles: " << getNormalizedPCUCounter(1, BeforeState[socket], AfterState[socket]) * 100. << " %" - << "; External prochot cycles:" << getNormalizedPCUCounter(2, BeforeState[socket], AfterState[socket]) * 100. << " %" - << "; Thermal freq limit cycles:" << getNormalizedPCUCounter(3, BeforeState[socket], AfterState[socket]) * 100. << " %" - << "\n"; - break; - - case 3: - cout << "S" << socket - << "; PCUClocks: " << getPCUClocks(BeforeState[socket], AfterState[socket]) - << "; Thermal freq limit cycles: " << getNormalizedPCUCounter(1, BeforeState[socket], AfterState[socket]) * 100. << " %" - << "; Power freq limit cycles:" << getNormalizedPCUCounter(2, BeforeState[socket], AfterState[socket]) * 100. << " %"; - if(cpu_model != PCM::SKX && cpu_model != PCM::ICX && cpu_model != PCM::SNOWRIDGE && cpu_model != PCM::SPR) - cout << "; Clipped freq limit cycles:" << getNormalizedPCUCounter(3, BeforeState[socket], AfterState[socket]) * 100. << " %"; - cout << "\n"; - break; + case 1: + printHeader(true); + cout << ((cpu_model == PCM::SKX) ? "; core C0_1/C3/C6_7-state residency: " : "; core C0/C3/C6-state residency: ") + << getNormalizedPCUCounter(u, 1, BeforeState[socket], AfterState[socket]) + << "; " << getNormalizedPCUCounter(u, 2, BeforeState[socket], AfterState[socket]) + << "; " << getNormalizedPCUCounter(u, 3, BeforeState[socket], AfterState[socket]) + << "\n"; + break; - case 4: - if (cpu_model == PCM::SKX || cpu_model == PCM::ICX || cpu_model == PCM::SNOWRIDGE || cpu_model == PCM::SPR) - { - cout << "This PCU profile is not supported on your processor\n"; + case 2: + printHeader(true); + cout << "; Internal prochot cycles: " << getNormalizedPCUCounter(u, 1, BeforeState[socket], AfterState[socket]) * 100. << " %" + << "; External prochot cycles:" << getNormalizedPCUCounter(u, 2, BeforeState[socket], AfterState[socket]) * 100. << " %" + << "; Thermal freq limit cycles:" << getNormalizedPCUCounter(u, 3, BeforeState[socket], AfterState[socket]) * 100. << " %" + << "\n"; break; - } - cout << "S" << socket - << "; PCUClocks: " << getPCUClocks(BeforeState[socket], AfterState[socket]) - << "; OS freq limit cycles: " << getNormalizedPCUCounter(1, BeforeState[socket], AfterState[socket]) * 100. << " %" - << "; Power freq limit cycles:" << getNormalizedPCUCounter(2, BeforeState[socket], AfterState[socket]) * 100. << " %" - << "; Clipped freq limit cycles:" << getNormalizedPCUCounter(3, BeforeState[socket], AfterState[socket]) * 100. << " %" - << "\n"; - break; - case 5: - cout << "S" << socket - << "; PCUClocks: " << getPCUClocks(BeforeState[socket], AfterState[socket]) - << "; Frequency transition count: " << getPCUCounter(1, BeforeState[socket], AfterState[socket]) << " " - << "; Cycles spent changing frequency: " << getNormalizedPCUCounter(2, BeforeState[socket], AfterState[socket], m) * 100. << " %"; - if (PCM::HASWELLX == cpu_model) { - cout << "; UFS transition count: " << getPCUCounter(3, BeforeState[socket], AfterState[socket]) << " "; - cout << "; UFS transition cycles: " << getNormalizedPCUCounter(0, BeforeState[socket], AfterState[socket], m) * 100. << " %"; - } - cout << "\n"; - break; - case 6: - cout << "S" << socket; - if (cpu_model == PCM::HASWELLX || PCM::BDX_DE == cpu_model) - cout << "; PC1e+ residency: " << getNormalizedPCUCounter(0, BeforeState[socket], AfterState[socket], m) * 100. << " %" - "; PC1e+ transition count: " << getPCUCounter(1, BeforeState[socket], AfterState[socket]) << " "; + case 3: + printHeader(true); + cout << "; Thermal freq limit cycles: " << getNormalizedPCUCounter(u, 1, BeforeState[socket], AfterState[socket]) * 100. << " %" + << "; Power freq limit cycles:" << getNormalizedPCUCounter(u, 2, BeforeState[socket], AfterState[socket]) * 100. << " %"; + if (cpu_model != PCM::SKX && cpu_model != PCM::ICX && cpu_model != PCM::SNOWRIDGE && cpu_model != PCM::SPR) + cout << "; Clipped freq limit cycles:" << getNormalizedPCUCounter(u, 3, BeforeState[socket], AfterState[socket]) * 100. << " %"; + cout << "\n"; + break; - switch (cpu_model) - { + case 4: + if (cpu_model == PCM::SKX || cpu_model == PCM::ICX || cpu_model == PCM::SNOWRIDGE || cpu_model == PCM::SPR) + { + cout << "This PCU profile is not supported on your processor\n"; + break; + } + printHeader(true); + cout << "; OS freq limit cycles: " << getNormalizedPCUCounter(u, 1, BeforeState[socket], AfterState[socket]) * 100. << " %" + << "; Power freq limit cycles:" << getNormalizedPCUCounter(u, 2, BeforeState[socket], AfterState[socket]) * 100. << " %" + << "; Clipped freq limit cycles:" << getNormalizedPCUCounter(u, 3, BeforeState[socket], AfterState[socket]) * 100. << " %" + << "\n"; + break; + case 5: + printHeader(true); + cout << "; Frequency transition count: " << getPCUCounter(u, 1, BeforeState[socket], AfterState[socket]) << " " + << "; Cycles spent changing frequency: " << getNormalizedPCUCounter(u, 2, BeforeState[socket], AfterState[socket], m) * 100. << " %"; + if (PCM::HASWELLX == cpu_model) { + cout << "; UFS transition count: " << getPCUCounter(u, 3, BeforeState[socket], AfterState[socket]) << " "; + cout << "; UFS transition cycles: " << getNormalizedPCUCounter(u, 0, BeforeState[socket], AfterState[socket], m) * 100. << " %"; + } + cout << "\n"; + break; + case 6: + printHeader(false); + if (cpu_model == PCM::HASWELLX || PCM::BDX_DE == cpu_model) + cout << "; PC1e+ residency: " << getNormalizedPCUCounter(u, 0, BeforeState[socket], AfterState[socket], m) * 100. << " %" + "; PC1e+ transition count: " << getPCUCounter(u, 1, BeforeState[socket], AfterState[socket]) << " "; + + switch (cpu_model) + { case PCM::IVYTOWN: case PCM::HASWELLX: case PCM::BDX_DE: @@ -504,32 +512,33 @@ int mainThrows(int argc, char * argv[]) case PCM::SNOWRIDGE: case PCM::SPR: cout << "; PC2 residency: " << getPackageCStateResidency(2, BeforeState[socket], AfterState[socket]) * 100. << " %"; - cout << "; PC2 transitions: " << getPCUCounter(2, BeforeState[socket], AfterState[socket]) << " "; + cout << "; PC2 transitions: " << getPCUCounter(u, 2, BeforeState[socket], AfterState[socket]) << " "; cout << "; PC3 residency: " << getPackageCStateResidency(3, BeforeState[socket], AfterState[socket]) * 100. << " %"; cout << "; PC6 residency: " << getPackageCStateResidency(6, BeforeState[socket], AfterState[socket]) * 100. << " %"; - cout << "; PC6 transitions: " << getPCUCounter(3, BeforeState[socket], AfterState[socket]) << " "; + cout << "; PC6 transitions: " << getPCUCounter(u, 3, BeforeState[socket], AfterState[socket]) << " "; break; - } + } - cout << "\n"; - break; - case 7: - if (PCM::HASWELLX == cpu_model || PCM::BDX_DE == cpu_model || PCM::BDX == cpu_model) { - cout << "S" << socket - << "; UFS_TRANSITIONS_PERF_P_LIMIT: " << getNormalizedPCUCounter(0, BeforeState[socket], AfterState[socket], m) * 100. << " %" - << "; UFS_TRANSITIONS_IO_P_LIMIT: " << getNormalizedPCUCounter(1, BeforeState[socket], AfterState[socket], m) * 100. << " %" - << "; UFS_TRANSITIONS_UP_RING_TRAFFIC: " << getNormalizedPCUCounter(2, BeforeState[socket], AfterState[socket], m) * 100. << " %" - << "; UFS_TRANSITIONS_UP_STALL_CYCLES: " << getNormalizedPCUCounter(3, BeforeState[socket], AfterState[socket], m) * 100. << " %" - << "\n"; - } - break; - case 8: - if (PCM::HASWELLX == cpu_model || PCM::BDX_DE == cpu_model || PCM::BDX == cpu_model) { - cout << "S" << socket - << "; UFS_TRANSITIONS_DOWN: " << getNormalizedPCUCounter(0, BeforeState[socket], AfterState[socket], m) * 100. << " %" - << "\n"; + cout << "\n"; + break; + case 7: + if (PCM::HASWELLX == cpu_model || PCM::BDX_DE == cpu_model || PCM::BDX == cpu_model) { + printHeader(false); + cout << "; UFS_TRANSITIONS_PERF_P_LIMIT: " << getNormalizedPCUCounter(u, 0, BeforeState[socket], AfterState[socket], m) * 100. << " %" + << "; UFS_TRANSITIONS_IO_P_LIMIT: " << getNormalizedPCUCounter(u, 1, BeforeState[socket], AfterState[socket], m) * 100. << " %" + << "; UFS_TRANSITIONS_UP_RING_TRAFFIC: " << getNormalizedPCUCounter(u, 2, BeforeState[socket], AfterState[socket], m) * 100. << " %" + << "; UFS_TRANSITIONS_UP_STALL_CYCLES: " << getNormalizedPCUCounter(u, 3, BeforeState[socket], AfterState[socket], m) * 100. << " %" + << "\n"; + } + break; + case 8: + if (PCM::HASWELLX == cpu_model || PCM::BDX_DE == cpu_model || PCM::BDX == cpu_model) { + printHeader(false); + cout << "; UFS_TRANSITIONS_DOWN: " << getNormalizedPCUCounter(u, 0, BeforeState[socket], AfterState[socket], m) * 100. << " %" + << "\n"; + } + break; } - break; } cout << "S" << socket diff --git a/src/pcm-raw.cpp b/src/pcm-raw.cpp index e47fe39a..501d4af8 100644 --- a/src/pcm-raw.cpp +++ b/src/pcm-raw.cpp @@ -1645,9 +1645,9 @@ void printTransposed(const PCM::RawPMUConfigs& curPMUConfigs, else if (type == "pcu") { choose(outputType, - [&]() { printUncoreRows(nullptr, 1U, ""); }, - [&]() { printUncoreRows(nullptr, 1U, type); }, - [&]() { printUncoreRows([](const uint32, const uint32 i, const ServerUncoreCounterState& before, const ServerUncoreCounterState& after) { return getPCUCounter(i, before, after); }, 1U, ""); + [&]() { printUncoreRows(nullptr, (uint32) m->getPUnitsPerSocket(), "P"); }, + [&]() { printUncoreRows(nullptr, (uint32) m->getPUnitsPerSocket(), type); }, + [&]() { printUncoreRows([](const uint32 u, const uint32 i, const ServerUncoreCounterState& before, const ServerUncoreCounterState& after) { return getPCUCounter(u, i, before, after); }, 1U, ""); }); } else if (type == "ubox") @@ -1936,14 +1936,17 @@ void print(const PCM::RawPMUConfigs& curPMUConfigs, { for (uint32 s = 0; s < m->getNumSockets(); ++s) { - int i = 0; - for (auto& event : events) + for (uint32 u = 0; u < m->getPUnitsPerSocket(); ++u) { - choose(outputType, - [s]() { cout << "SKT" << s << separator; }, - [&event, &i]() { if (event.second.empty()) cout << "PCUEvent" << i << separator; else cout << event.second << separator; }, - [&]() { cout << getPCUCounter(i, BeforeUncoreState[s], AfterUncoreState[s]) << separator; }); - ++i; + int i = 0; + for (auto& event : events) + { + choose(outputType, + [s, u]() { cout << "SKT" << s << "P" << u << separator; }, + [&event, &i]() { if (event.second.empty()) cout << "PCUEvent" << i << separator; else cout << event.second << separator; }, + [&]() { cout << getPCUCounter(u, i, BeforeUncoreState[s], AfterUncoreState[s]) << separator; }); + ++i; + } } } } From cd07494abca59b54a27c48184ab9a7e0fa16edb1 Mon Sep 17 00:00:00 2001 From: Roman Dementiev Date: Wed, 27 Sep 2023 09:47:58 +0200 Subject: [PATCH 25/43] print additional uncore PMU unit information --- src/cpucounters.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 866b0df0..7a9d2fbb 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -1984,6 +1984,19 @@ void PCM::initUncoreObjects() { initUncorePMUsDirect(); } + + std::cerr << "Info: " << uboxPMUs.size() << " UBOX units detected.\n"; + for (uint32 s = 0; s < (uint32)num_sockets; ++s) + { + std::cerr << "Socket " << s << ":" << + " " << ((s < pcuPMUs.size()) ? pcuPMUs[s].size() : 0) << " PCU units detected." + " " << ((s < iioPMUs.size()) ? iioPMUs[s].size() : 0) << " IIO units detected." + " " << ((s < irpPMUs.size()) ? irpPMUs[s].size() : 0) << " IRP units detected." + " " << ((s < cboPMUs.size()) ? cboPMUs[s].size() : 0) << " CHA/CBO units detected." + " " << ((s < mdfPMUs.size()) ? mdfPMUs[s].size() : 0) << " MDF units detected." + " " << ((s < cxlPMUs.size()) ? cxlPMUs[s].size() : 0) << " CXL units detected." + "\n"; + } } void PCM::globalFreezeUncoreCounters() From 0819793e2e4101ae5616a490d1a0d086213b5a6c Mon Sep 17 00:00:00 2001 From: Roman Dementiev Date: Wed, 27 Sep 2023 10:30:00 +0200 Subject: [PATCH 26/43] refactor IIO PMU init --- src/cpucounters.cpp | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 7a9d2fbb..10b7a6ca 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -2200,9 +2200,10 @@ void PCM::initUncorePMUsDirect() } // init IIO addresses - if (getCPUModel() == PCM::SKX) + iioPMUs.resize(num_sockets); + switch (getCPUModel()) { - iioPMUs.resize(num_sockets); + case PCM::SKX: for (uint32 s = 0; s < (uint32)num_sockets; ++s) { auto & handle = MSR[socketRefCore[s]]; @@ -2221,10 +2222,8 @@ void PCM::initUncorePMUsDirect() ); } } - } - else if (getCPUModel() == PCM::ICX) - { - iioPMUs.resize(num_sockets); + break; + case PCM::ICX: for (uint32 s = 0; s < (uint32)num_sockets; ++s) { auto & handle = MSR[socketRefCore[s]]; @@ -2243,10 +2242,8 @@ void PCM::initUncorePMUsDirect() ); } } - } - else if (getCPUModel() == PCM::SNOWRIDGE) - { - iioPMUs.resize(num_sockets); + break; + case PCM::SNOWRIDGE: for (uint32 s = 0; s < (uint32)num_sockets; ++s) { auto & handle = MSR[socketRefCore[s]]; @@ -2265,11 +2262,8 @@ void PCM::initUncorePMUsDirect() ); } } - } - - if (getCPUModel() == PCM::SPR) - { - iioPMUs.resize(num_sockets); + break; + case PCM::SPR: for (uint32 s = 0; s < (uint32)num_sockets; ++s) { auto & handle = MSR[socketRefCore[s]]; @@ -2288,9 +2282,9 @@ void PCM::initUncorePMUsDirect() ); } } + break; } - //init the IDX accelerator auto createIDXPMU = [](const size_t addr, const size_t mapSize, const size_t numaNode, const size_t socketId) -> IDX_PMU { From 56a4ba6cd88ddd418493eb06e66e4ed64670b7ae Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 4 Oct 2023 08:44:24 +0200 Subject: [PATCH 27/43] factor out bit insert function Change-Id: I9293c424d48166620c0b42ec10920d692cbeef8e --- src/pcm-raw.cpp | 4 +--- src/utils.h | 7 +++++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/pcm-raw.cpp b/src/pcm-raw.cpp index 501d4af8..888592dc 100644 --- a/src/pcm-raw.cpp +++ b/src/pcm-raw.cpp @@ -648,9 +648,7 @@ AddEventStatus addEventFromDB(PCM::RawPMUConfigs& curPMUConfigs, string fullEven if (cfg >= config.first.size()) throw std::runtime_error("Config field value is out of bounds"); const auto width = uint64_t(fieldDescriptionObj["Width"]); assert(width <= 64); - const uint64 mask = (width == 64) ? (~0ULL) : ((1ULL << width) - 1ULL); // 1 -> 1b, 2 -> 11b, 3 -> 111b - config.first[cfg] &= ~(mask << position); // clear - config.first[cfg] |= (value & mask) << position; + config.first[cfg] = insertBits(config.first[cfg], value, position, width); }; auto PMUObj = (*PMURegisterDeclarations)[pmuName]; if (PMUObj.error() == NO_SUCH_FIELD) diff --git a/src/utils.h b/src/utils.h index e4779467..eadc11df 100644 --- a/src/utils.h +++ b/src/utils.h @@ -605,5 +605,12 @@ int readMaxFromSysFS(const char * path); bool readMapFromSysFS(const char * path, std::unordered_map &result, bool silent = false); #endif +inline uint64 insertBits(uint64 input, const uint64 value, const int64_t position, const uint64 width) +{ + const uint64 mask = (width == 64) ? (~0ULL) : ((1ULL << width) - 1ULL); // 1 -> 1b, 2 -> 11b, 3 -> 111b + input &= ~(mask << position); // clear + input |= (value & mask) << position; + return input; +} } // namespace pcm From 389f46f86a1b4772d87a263938a880c46e482020 Mon Sep 17 00:00:00 2001 From: Roman Dementiev Date: Wed, 4 Oct 2023 14:33:10 +0200 Subject: [PATCH 28/43] refactor uncore pmu discovery --- src/uncore_pmu_discovery.cpp | 74 ++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 36 deletions(-) diff --git a/src/uncore_pmu_discovery.cpp b/src/uncore_pmu_discovery.cpp index 49f13623..99630225 100644 --- a/src/uncore_pmu_discovery.cpp +++ b/src/uncore_pmu_discovery.cpp @@ -76,56 +76,58 @@ UncorePMUDiscovery::UncorePMUDiscovery() // std::cout << "Intel device scan. found "<< std::hex << group << ":" << bus << ":" << device << ":" << function << " " << device_id << " with capability list\n" << std::dec; union { struct { - uint32 id:16; - uint32 version:4; - uint32 next:12; + uint64 cap_id:16; + uint64 cap_version:4; + uint64 cap_next:12; + uint64 vsec_id:16; + uint64 vsec_version:4; + uint64 vsec_length:12; + uint64 entryID:16; + uint64 NumEntries:8; + uint64 EntrySize:8; + uint64 tBIR:3; + uint64 Address:29; } fields; - uint32 value; + uint64 raw_value64[2]; + uint32 raw_value32[4]; } header; + uint64 offset = 0x100; do { - if (offset == 0 || h.read32(offset, &header.value) != sizeof(uint32) || header.value == 0) + if (offset == 0 || h.read32(offset, &header.raw_value32[0]) != sizeof(uint32) || header.raw_value32[0] == 0) + { + return; + } + if (h.read64(offset, &header.raw_value64[0]) != sizeof(uint64) || h.read64(offset + sizeof(uint64), &header.raw_value64[1]) != sizeof(uint64)) { return; } - // std::cout << "offset " << offset << "\n"; - if (header.fields.id == 0x23) // UNCORE_EXT_CAP_ID_DISCOVERY + // std::cout << "offset 0x" << std::hex << offset << " header.fields.cap_id: 0x" << header.fields.cap_id << std::dec << "\n"; + if (header.fields.cap_id == 0xb) // Vendor Specific Information { - // std::cout << "found UNCORE_EXT_CAP_ID_DISCOVERY\n"; - uint32 entryID = 0; - constexpr auto UNCORE_DISCOVERY_DVSEC_OFFSET = 8; - if (h.read32(offset + UNCORE_DISCOVERY_DVSEC_OFFSET, &entryID) == sizeof(uint32)) // read at UNCORE_DISCOVERY_DVSEC_OFFSET + // std::cout << ".. found Vendor Specific Information ID 0x" << std::hex << header.fields.vsec_id << " " << std::dec << " len:" << header.fields.vsec_length << "\n"; + } + else if (header.fields.cap_id == 0x23) // UNCORE_EXT_CAP_ID_DISCOVERY + { + // std::cout << ".. found UNCORE_EXT_CAP_ID_DISCOVERY entryID: 0x" << std::hex << header.fields.entryID << std::dec << "\n"; + if (header.fields.entryID == 1) // UNCORE_DISCOVERY_DVSEC_ID_PMON { - entryID &= 0xffff; // apply UNCORE_DISCOVERY_DVSEC_ID_MASK - if (entryID == 1) // UNCORE_DISCOVERY_DVSEC_ID_PMON + // std::cout << ".... found UNCORE_DISCOVERY_DVSEC_ID_PMON\n"; + auto barOffset = 0x10 + header.fields.tBIR * 4; + uint32 bar = 0; + if (h.read32(barOffset, &bar) == sizeof(uint32) && bar != 0) // read bar + { + bar &= ~4095; + processTables(bar); + } + else { - // std::cout << "found UNCORE_DISCOVERY_DVSEC_ID_PMON\n"; - uint32 bir = 0; - if (h.read32(offset + UNCORE_DISCOVERY_DVSEC_OFFSET + 4, &bir) == sizeof(uint32)) // read "bir" value (2:0) - { - bir &= 7; - auto barOffset = 0x10 + bir * 4; - uint32 bar = 0; - if (h.read32(barOffset, &bar) == sizeof(uint32) && bar != 0) // read bar - { - bar &= ~4095; - processTables(bar); - return; - } - else - { - std::cerr << "Error: can't read bar from offset " << barOffset << " \n"; - } - } - else - { - std::cerr << "Error: can't read bir\n"; - } + std::cerr << "Error: can't read bar from offset " << barOffset << " \n"; } } } - offset = header.fields.next & ~3; + offset = header.fields.cap_next & ~3; } while (1); } }); From 4d9249ff6b4ea38714d430f4e2bb49c01e45ab34 Mon Sep 17 00:00:00 2001 From: Roman Dementiev Date: Wed, 4 Oct 2023 15:35:15 +0200 Subject: [PATCH 29/43] refactor DVSEC processing --- src/pci.h | 64 +++++++++++++++++++++++++++++++++ src/uncore_pmu_discovery.cpp | 68 +++--------------------------------- 2 files changed, 68 insertions(+), 64 deletions(-) diff --git a/src/pci.h b/src/pci.h index 76f7852b..f200466c 100644 --- a/src/pci.h +++ b/src/pci.h @@ -233,6 +233,70 @@ inline void forAllIntelDevices(F f, int requestedDevice = -1, int requestedFunct } } +union VSEC { + struct { + uint64 cap_id:16; + uint64 cap_version:4; + uint64 cap_next:12; + uint64 vsec_id:16; + uint64 vsec_version:4; + uint64 vsec_length:12; + uint64 entryID:16; + uint64 NumEntries:8; + uint64 EntrySize:8; + uint64 tBIR:3; + uint64 Address:29; + } fields; + uint64 raw_value64[2]; + uint32 raw_value32[4]; +}; + +template +void processDVSEC(MatchFunc matchFunc, ProcessFunc processFunc) +{ + forAllIntelDevices([&](const uint32 group, const uint32 bus, const uint32 device, const uint32 function, const uint32 /* device_id */) + { + uint32 status{0}; + PciHandleType h(group, bus, device, function); + h.read32(6, &status); // read status + if (status & 0x10) // has capability list + { + // std::cout << "Intel device scan. found "<< std::hex << group << ":" << bus << ":" << device << ":" << function << " " << device_id << " with capability list\n" << std::dec; + VSEC header; + uint64 offset = 0x100; + do + { + if (offset == 0 || h.read32(offset, &header.raw_value32[0]) != sizeof(uint32) || header.raw_value32[0] == 0) + { + return; + } + if (h.read64(offset, &header.raw_value64[0]) != sizeof(uint64) || h.read64(offset + sizeof(uint64), &header.raw_value64[1]) != sizeof(uint64)) + { + return; + } + // std::cout << "offset 0x" << std::hex << offset << " header.fields.cap_id: 0x" << header.fields.cap_id << std::dec << "\n"; + // std::cout << ".. found entryID: 0x" << std::hex << header.fields.entryID << std::dec << "\n"; + if (matchFunc(header)) // UNCORE_DISCOVERY_DVSEC_ID_PMON + { + // std::cout << ".... found UNCORE_DISCOVERY_DVSEC_ID_PMON\n"; + auto barOffset = 0x10 + header.fields.tBIR * 4; + uint32 bar = 0; + if (h.read32(barOffset, &bar) == sizeof(uint32) && bar != 0) // read bar + { + bar &= ~4095; + processFunc(bar); + } + else + { + std::cerr << "Error: can't read bar from offset " << barOffset << " \n"; + } + } + offset = header.fields.cap_next & ~3; + } while (1); + } + }); +} + } // namespace pcm #endif diff --git a/src/uncore_pmu_discovery.cpp b/src/uncore_pmu_discovery.cpp index 99630225..2e319b30 100644 --- a/src/uncore_pmu_discovery.cpp +++ b/src/uncore_pmu_discovery.cpp @@ -65,72 +65,12 @@ UncorePMUDiscovery::UncorePMUDiscovery() ++socket; }; try { - forAllIntelDevices( - [&processTables](const uint32 group, const uint32 bus, const uint32 device, const uint32 function, const uint32 /* device_id */) + processDVSEC([](const VSEC & vsec) { - uint32 status{0}; - PciHandleType h(group, bus, device, function); - h.read32(6, &status); // read status - if (status & 0x10) // has capability list - { - // std::cout << "Intel device scan. found "<< std::hex << group << ":" << bus << ":" << device << ":" << function << " " << device_id << " with capability list\n" << std::dec; - union { - struct { - uint64 cap_id:16; - uint64 cap_version:4; - uint64 cap_next:12; - uint64 vsec_id:16; - uint64 vsec_version:4; - uint64 vsec_length:12; - uint64 entryID:16; - uint64 NumEntries:8; - uint64 EntrySize:8; - uint64 tBIR:3; - uint64 Address:29; - } fields; - uint64 raw_value64[2]; - uint32 raw_value32[4]; - } header; + return vsec.fields.cap_id == 0x23 // UNCORE_EXT_CAP_ID_DISCOVERY + && vsec.fields.entryID == 1; // UNCORE_DISCOVERY_DVSEC_ID_PMON + }, processTables); - uint64 offset = 0x100; - do - { - if (offset == 0 || h.read32(offset, &header.raw_value32[0]) != sizeof(uint32) || header.raw_value32[0] == 0) - { - return; - } - if (h.read64(offset, &header.raw_value64[0]) != sizeof(uint64) || h.read64(offset + sizeof(uint64), &header.raw_value64[1]) != sizeof(uint64)) - { - return; - } - // std::cout << "offset 0x" << std::hex << offset << " header.fields.cap_id: 0x" << header.fields.cap_id << std::dec << "\n"; - if (header.fields.cap_id == 0xb) // Vendor Specific Information - { - // std::cout << ".. found Vendor Specific Information ID 0x" << std::hex << header.fields.vsec_id << " " << std::dec << " len:" << header.fields.vsec_length << "\n"; - } - else if (header.fields.cap_id == 0x23) // UNCORE_EXT_CAP_ID_DISCOVERY - { - // std::cout << ".. found UNCORE_EXT_CAP_ID_DISCOVERY entryID: 0x" << std::hex << header.fields.entryID << std::dec << "\n"; - if (header.fields.entryID == 1) // UNCORE_DISCOVERY_DVSEC_ID_PMON - { - // std::cout << ".... found UNCORE_DISCOVERY_DVSEC_ID_PMON\n"; - auto barOffset = 0x10 + header.fields.tBIR * 4; - uint32 bar = 0; - if (h.read32(barOffset, &bar) == sizeof(uint32) && bar != 0) // read bar - { - bar &= ~4095; - processTables(bar); - } - else - { - std::cerr << "Error: can't read bar from offset " << barOffset << " \n"; - } - } - } - offset = header.fields.cap_next & ~3; - } while (1); - } - }); } catch (...) { std::cerr << "WARNING: enumeration of devices in UncorePMUDiscovery failed\n"; From 0bf18dde20a80266ba5b9cbf335ebe400d340ec0 Mon Sep 17 00:00:00 2001 From: Roman Dementiev Date: Thu, 5 Oct 2023 09:47:07 +0200 Subject: [PATCH 30/43] implement and use mmio_memcpy --- src/mmio.cpp | 29 ++++++++++++++++++++++++++++- src/mmio.h | 2 ++ src/pci.h | 2 +- src/uncore_pmu_discovery.cpp | 21 +++------------------ src/utils.h | 14 ++++++++++++++ 5 files changed, 48 insertions(+), 20 deletions(-) diff --git a/src/mmio.cpp b/src/mmio.cpp index c3a12ba5..c729ca83 100644 --- a/src/mmio.cpp +++ b/src/mmio.cpp @@ -21,9 +21,10 @@ #ifdef _MSC_VER #include -#include "utils.h" #endif +#include "utils.h" +#include #include namespace pcm { @@ -266,4 +267,30 @@ MMIORange::~MMIORange() #endif +void mmio_memcpy(void * dest_, const uint64 src, const size_t n, const bool checkFailures) +{ + assert((src % sizeof(uint32)) == 0); + assert((n % sizeof(uint32)) == 0); + + const uint64 end = src + n; + const uint64 mapBegin = roundDownTo4K(src); + const uint64 mapSize = roundUpTo4K(end) - mapBegin; + uint32 * dest = (uint32 *)dest_; + MMIORange range(mapBegin, mapSize); + + for (uint64 i = src; i < end; i += sizeof(uint32), ++dest) + { + const auto value = range.read32(i - mapBegin); + if (checkFailures && value == ~uint32(0)) + { + // a bad read + std::ostringstream strstr; + strstr << "Failed to read memory at 0x" << std::hex << i << std::dec << "\n"; + std::cerr << strstr.str(); + throw std::runtime_error(strstr.str()); + } + *dest = value; + } +} + } // namespace pcm diff --git a/src/mmio.h b/src/mmio.h index a636d26f..ea5ed18b 100644 --- a/src/mmio.h +++ b/src/mmio.h @@ -158,4 +158,6 @@ class MMIORange }; #endif +void mmio_memcpy(void * dest, const uint64 src, const size_t n, const bool checkFailures); + } // namespace pcm diff --git a/src/pci.h b/src/pci.h index f200466c..da2f15e1 100644 --- a/src/pci.h +++ b/src/pci.h @@ -284,7 +284,7 @@ void processDVSEC(MatchFunc matchFunc, ProcessFunc processFunc) if (h.read32(barOffset, &bar) == sizeof(uint32) && bar != 0) // read bar { bar &= ~4095; - processFunc(bar); + processFunc(bar, header); } else { diff --git a/src/uncore_pmu_discovery.cpp b/src/uncore_pmu_discovery.cpp index 2e319b30..aa2685a0 100644 --- a/src/uncore_pmu_discovery.cpp +++ b/src/uncore_pmu_discovery.cpp @@ -18,30 +18,15 @@ UncorePMUDiscovery::UncorePMUDiscovery() return; } unsigned socket = 0; - auto processTables = [&socket,this](const uint64 bar) + auto processTables = [&socket,this](const uint64 bar, const VSEC &) { constexpr size_t UncoreDiscoverySize = 3UL; union UncoreGlobalDiscovery { GlobalPMU pmu; uint64 table[UncoreDiscoverySize]; }; - MMIORange range(bar, UNCORE_DISCOVERY_MAP_SIZE); // mmio range with UNCORE_DISCOVERY_MAP_SIZE bytes UncoreGlobalDiscovery global; - auto copyTable = [&range,&UncoreDiscoverySize,&bar](uint64 * table, const size_t offset) - { - for (size_t i = 0; i < UncoreDiscoverySize; ++i) - { - const auto pos = offset + i * sizeof(uint64); - assert(pos < UNCORE_DISCOVERY_MAP_SIZE); - table[i] = range.read64(pos); - if (table[i] == ~0ULL) - { - std::cerr << "Failed to read memory at 0x" << std::hex << bar << " + 0x" << pos << std::dec << "\n"; - throw std::exception(); - } - } - }; - copyTable(global.table, 0); + mmio_memcpy(global.table, bar, UncoreDiscoverySize * sizeof(uint64), true); globalPMUs.push_back(global.pmu); union UncoreUnitDiscovery { BoxPMU pmu; @@ -52,7 +37,7 @@ UncorePMUDiscovery::UncorePMUDiscovery() BoxPMUMap boxPMUMap; for (size_t u = 0; u < global.pmu.maxUnits; ++u) { - copyTable(unit.table, (u+1) * step); + mmio_memcpy(unit.table, bar + (u+1) * step, UncoreDiscoverySize * sizeof(uint64), true); if (unit.table[0] == 0 && unit.table[1] == 0) { // invalid entry diff --git a/src/utils.h b/src/utils.h index eadc11df..2cea8828 100644 --- a/src/utils.h +++ b/src/utils.h @@ -613,4 +613,18 @@ inline uint64 insertBits(uint64 input, const uint64 value, const int64_t positio return input; } +inline uint64 roundDownTo4K(uint64 number) { + return number & ~0xFFFULL; // Mask the lower 12 bits to round down to 4K +} + +inline uint64 roundUpTo4K(uint64 number) { + if (number % 4096ULL == 0ULL) { + // Already a multiple of 4K + return number; + } else { + // Round up to the next multiple of 4K + return ((number / 4096ULL) + 1ULL) * 4096ULL; + } +} + } // namespace pcm From 982fb7a85e54deedb9f5113109af38fb5aa691af Mon Sep 17 00:00:00 2001 From: Roman Dementiev Date: Thu, 5 Oct 2023 13:08:26 +0200 Subject: [PATCH 31/43] implement pcm-tpmi utility Change-Id: Ic853168b08d31e84e46cd229d2ede788ef9e3c19 --- src/CMakeLists.txt | 2 +- src/pcm-tpmi.cpp | 208 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 209 insertions(+), 1 deletion(-) create mode 100644 src/pcm-tpmi.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d86a9682..65b2d630 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -3,7 +3,7 @@ # All pcm-* executables -set(PROJECT_NAMES pcm pcm-numa pcm-latency pcm-power pcm-msr pcm-memory pcm-tsx pcm-pcie pcm-core pcm-iio pcm-lspci pcm-pcicfg pcm-mmio pcm-raw pcm-accel) +set(PROJECT_NAMES pcm pcm-numa pcm-latency pcm-power pcm-msr pcm-memory pcm-tsx pcm-pcie pcm-core pcm-iio pcm-lspci pcm-pcicfg pcm-mmio pcm-tpmi pcm-raw pcm-accel) file(GLOB COMMON_SOURCES msr.cpp cpucounters.cpp pci.cpp mmio.cpp bw.cpp utils.cpp topology.cpp debug.cpp threadpool.cpp uncore_pmu_discovery.cpp) diff --git a/src/pcm-tpmi.cpp b/src/pcm-tpmi.cpp new file mode 100644 index 00000000..4176a1f7 --- /dev/null +++ b/src/pcm-tpmi.cpp @@ -0,0 +1,208 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2023 Intel Corporation + +// written by Roman Dementiev +#include "cpucounters.h" +#ifdef _MSC_VER +#include +#include "windows/windriver.h" +#else +#include +#endif +#include +#include +#include +#include +#ifdef _MSC_VER +#include "freegetopt/getopt.h" +#endif + +using namespace pcm; + +void print_usage(const char * progname) +{ + std::cout << "Usage " << progname << " [-w value] [-d] [-b low:high] ID offset\n\n"; + std::cout << " Reads/writes TPMI (Topology Aware Register and PM Capsule Interface) register \n"; + std::cout << " ID : TPMI ID\n"; + std::cout << " offset : register offset\n"; + std::cout << " -w value : write the value before reading \n"; + std::cout << " -b low:high : read or write only low..high bits of the register\n"; + std::cout << " -d : output all numbers in dec (default is hex)\n"; + std::cout << " -v : verbose ouput\n"; + std::cout << " --version : print application version\n"; + std::cout << "\n"; +} + +PCM_MAIN_NOTHROW; + +int mainThrows(int argc, char * argv[]) +{ + if(print_version(argc, argv)) + return 0; + + std::cout << "\n Intel(r) Performance Counter Monitor " << PCM_VERSION << "\n"; + + std::cout << "\n TPMI (Topology Aware Register and PM Capsule Interface) read/write utility\n\n"; + + uint64 value = 0; + bool write = false; + bool dec = false; + bool verbose = false; + std::pair bits{-1, -1}; + + int my_opt = -1; + while ((my_opt = getopt(argc, argv, "w:dvb:")) != -1) + { + switch (my_opt) + { + case 'w': + write = true; + value = (pcm::uint32)read_number(optarg); + break; + case 'd': + dec = true; + break; + case 'v': + verbose = true; + break; + case 'b': + { + const auto bitsArray = pcm::split(std::string(optarg),':'); + assert(bitsArray.size() == 2); + bits.first = (int64)read_number(bitsArray[0].c_str()); + bits.second = (int64)read_number(bitsArray[1].c_str()); + assert(bits.first >= 0); + assert(bits.second >= 0); + assert(bits.first < 64); + assert(bits.second < 64); + if (bits.first > bits.second) + { + std::swap(bits.first, bits.second); + } + } + break; + default: + print_usage(argv[0]); + return -1; + } + } + + if (optind + 1 >= argc) + { + print_usage(argv[0]); + return -1; + } + + uint64 requestedID = (uint64)read_number(argv[optind]); + uint64 requestedRelativeOffset = (uint64)read_number(argv[optind + 1]); + + #ifdef _MSC_VER + // Increase the priority a bit to improve context switching delays on Windows + SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_ABOVE_NORMAL); + + // WARNING: This driver code (msr.sys) is only for testing purposes, not for production use + Driver drv = Driver(Driver::msrLocalPath()); + // drv.stop(); // restart driver (usually not needed) + if (!drv.start()) + { + tcerr << "Can not load MSR driver.\n"; + tcerr << "You must have a signed driver at " << drv.driverPath() << " and have administrator rights to run this program\n"; + return -1; + } + #endif + + processDVSEC([](const VSEC & vsec) + { + return vsec.fields.cap_id == 0xb // Vendor Specific DVSEC + && vsec.fields.vsec_id == 0x42; // TPMI PM_Features + }, [&](const uint64 bar, const VSEC & vsec) + { + struct PFS + { + uint64 TPMI_ID:8; + uint64 NumEntries:8; + uint64 EntrySize:16; + uint64 CapOffset:16; + uint64 Attribute:2; + uint64 Reserved:14; + }; + static_assert(sizeof(PFS) == sizeof(uint64), "sizeof(PFS) != sizeof(uint64)"); + assert(vsec.fields.EntrySize == 2); + std::vector pfsArray(vsec.fields.NumEntries); + pcm::mmio_memcpy(&(pfsArray[0]), bar + vsec.fields.Address, vsec.fields.NumEntries * sizeof(PFS), true); + for (const auto & pfs : pfsArray) + { + if (verbose) + { + std::cout << "PFS" << + "\t TPMI_ID: " << pfs.TPMI_ID << + "\t NumEntries: " << pfs.NumEntries << + "\t EntrySize: " << pfs.EntrySize << + "\t CapOffset: " << pfs.CapOffset << + "\t Attribute: " << pfs.Attribute << + "\n"; + } + for (uint64 p = 0; p < pfs.NumEntries; ++p) + { + uint32 reg0 = 0; + const auto addr = bar + vsec.fields.Address + pfs.CapOffset * 1024ULL + p * pfs.EntrySize * sizeof(uint32); + mmio_memcpy(®0, addr, sizeof(uint32), false); + if (reg0 == ~0U) + { + if (verbose) + { + std::cout << "invalid entry " << p << "\n"; + } + } + else if (pfs.TPMI_ID == requestedID) + { + if (verbose) + { + std::cout << "Entry "<< p << std::hex; + for (uint64 i_offset = 0; i_offset < pfs.EntrySize * sizeof(uint32); i_offset += sizeof(uint64)) + { + uint64 reg = 0; + mmio_memcpy(®, addr + i_offset, sizeof(uint64), false); + std::cout << " register "<< i_offset << " = " << reg; + } + std::cout << std::dec << "\n"; + } + try { + const auto requestedAddr = addr + requestedRelativeOffset; + const auto baseAddr = roundDownTo4K(requestedAddr); + const auto baseOffset = requestedAddr - baseAddr; + MMIORange range(baseAddr, 4096ULL, !write); + if (!dec) std::cout << std::hex << std::showbase; + if (bits.first >= 0 && write) + { + // to write bits need to read the old value first + uint64 old_value = range.read64(baseOffset); + value = insertBits(old_value, value, bits.first, bits.second - bits.first + 1); + } + if (write) + { + std::cout << " Writing " << value << " to TPMI ID " << requestedID << "@" << requestedRelativeOffset << " for entry " << p << "\n"; + range.write64(baseOffset, value); + } + value = range.read64(baseOffset); + std::cout << " Read "; + if (bits.first >= 0) + { + std::cout << "bits "<< std::dec << bits.first << ":" << bits.second << " "; + if (!dec) std::cout << std::hex << std::showbase; + value = extract_bits(value, bits.first, bits.second); + } + std::cout << "value " << value << " from TPMI ID " << requestedID << "@" << requestedRelativeOffset << " for entry " << p << "\n\n"; + } + catch (std::exception& e) + { + std::cerr << "Error accessing registers: " << e.what() << "\n"; + std::cerr << "Please check if the program can access MSR/PCICFG drivers.\n"; + } + } + } + } + }); + + return 0; +} From e702516d15e731551d6774ab921a38931a475bc4 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Thu, 5 Oct 2023 13:57:57 +0200 Subject: [PATCH 32/43] address a clang scan warning Change-Id: Ic0b81d77f1f0632f34c762f4bd28add1ab11e504 --- src/utils.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/utils.h b/src/utils.h index 2cea8828..c35a6d49 100644 --- a/src/utils.h +++ b/src/utils.h @@ -494,6 +494,10 @@ inline uint32 extract_bits_ui(uint32 myin, uint32 beg, uint32 end) inline uint64 build_bit(uint32 beg, uint32 end) { uint64 myll = 0; + if (end > 63) + { + end = 63; + } if (end == 63) { myll = static_cast(-1); From f03b827387215006d703c668b07f1e9c4dbff18d Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Thu, 5 Oct 2023 15:47:29 +0200 Subject: [PATCH 33/43] add documentation link for tpmi Change-Id: I52b3500be1c8e62a028d068ba3bd79640e159de3 --- README.md | 2 +- src/pcm-tpmi.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0732617d..b3c3617a 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ Graphical front ends: - **pcm-sensor** : front-end for KDE KSysGuard - **pcm-service** : front-end for Windows perfmon -There are also utilities for reading/writing model specific registers (**pcm-msr**), PCI configuration registers (**pcm-pcicfg**) and memory mapped registers (**pcm-mmio**) supported on Linux, Windows, Mac OS X and FreeBSD. +There are also utilities for reading/writing model specific registers (**pcm-msr**), PCI configuration registers (**pcm-pcicfg**), memory mapped registers (**pcm-mmio**) and TPMI registers (**pcm-tpmi**) supported on Linux, Windows, Mac OS X and FreeBSD. And finally a daemon that stores core, memory and QPI counters in shared memory that can be be accessed by non-root users. diff --git a/src/pcm-tpmi.cpp b/src/pcm-tpmi.cpp index 4176a1f7..c4dfe0c2 100644 --- a/src/pcm-tpmi.cpp +++ b/src/pcm-tpmi.cpp @@ -43,6 +43,7 @@ int mainThrows(int argc, char * argv[]) std::cout << "\n Intel(r) Performance Counter Monitor " << PCM_VERSION << "\n"; std::cout << "\n TPMI (Topology Aware Register and PM Capsule Interface) read/write utility\n\n"; + // register documentation: https://github.com/intel/tpmi_power_management uint64 value = 0; bool write = false; From 09b1d442d2e201b6cdc9d543ee2ff8d7056e4325 Mon Sep 17 00:00:00 2001 From: Roman Dementiev Date: Fri, 6 Oct 2023 15:23:15 +0200 Subject: [PATCH 34/43] add bit operations to other register tools --- src/pcm-mmio.cpp | 29 ++++++++++++++++++----------- src/pcm-msr.cpp | 27 +++++++++++++++++++-------- src/pcm-pcicfg.cpp | 21 ++++++++++++++------- src/pcm-tpmi.cpp | 32 ++++---------------------------- src/utils.cpp | 18 ++++++++++++++++++ src/utils.h | 30 ++++++++++++++++++++++++++++++ 6 files changed, 103 insertions(+), 54 deletions(-) diff --git a/src/pcm-mmio.cpp b/src/pcm-mmio.cpp index f1b4f72e..bc720d18 100644 --- a/src/pcm-mmio.cpp +++ b/src/pcm-mmio.cpp @@ -27,30 +27,33 @@ void print_usage(const char* progname) { std::cout << "Usage " << progname << " [-w value] [-q] [-d] address\n\n"; std::cout << " Reads/writes MMIO (memory mapped) register in the specified address\n"; - std::cout << " -w value : write the value before reading \n"; - std::cout << " -q : read/write 64-bit quad word (default is 32-bit double word)\n"; - std::cout << " -d : output all numbers in dec (default is hex)\n"; - std::cout << " -n size : number of bytes read from specified address(batch read mode), max bytes=" << MAX_BATCH_OPERATE_BYTES << "\n"; - std::cout << " --version : print application version\n"; + std::cout << " -w value : write the value before reading \n"; + std::cout << " -b low:high : read or write only low..high bits of the register\n"; + std::cout << " -q : read/write 64-bit quad word (default is 32-bit double word)\n"; + std::cout << " -d : output all numbers in dec (default is hex)\n"; + std::cout << " -n size : number of bytes read from specified address(batch read mode), max bytes=" << MAX_BATCH_OPERATE_BYTES << "\n"; + std::cout << " --version : print application version\n"; std::cout << "\n"; } template -void doOp(const uint64 address, const uint64 offset, const uint32 batch_bytes, const bool write, T value, RD readOp, WR writeOp, const bool dec) +void doOp(const std::pair & bits, const uint64 address, const uint64 offset, const uint32 batch_bytes, const bool write, T value, RD readOp, WR writeOp, const bool dec) { if (batch_bytes == 0) //single mode { if (!dec) std::cout << std::hex << std::showbase; constexpr auto bit = sizeof(T) * 8; + readOldValueHelper(bits, value, write, [&readOp, & offset](T & old_value){ old_value = readOp(offset); return true; }); if (write) - { + { std::cout << " Writing " << value << " to " << std::dec << bit; if (!dec) std::cout << std::hex << std::showbase; std::cout <<"-bit MMIO register " << address << "\n"; writeOp(offset, value); } value = readOp(offset); - std::cout << " Read value " << value << " from " << std::dec << bit; + extractBitsPrintHelper(bits, value, dec); + std::cout << " from " << std::dec << bit; if (!dec) std::cout << std::hex << std::showbase; std::cout << "-bit MMIO register " << address << "\n\n"; } @@ -91,9 +94,10 @@ int mainThrows(int argc, char * argv[]) bool dec = false; bool quad = false; uint32 batch_bytes = 0; + std::pair bits{-1, -1}; int my_opt = -1; - while ((my_opt = getopt(argc, argv, "w:dqn:")) != -1) + while ((my_opt = getopt(argc, argv, "w:dqn:b:")) != -1) { switch (my_opt) { @@ -107,6 +111,9 @@ int mainThrows(int argc, char * argv[]) case 'q': quad = true; break; + case 'b': + bits = parseBitsParameter(optarg); + break; case 'n': batch_bytes = read_number(optarg); if (batch_bytes > MAX_BATCH_OPERATE_BYTES) @@ -149,11 +156,11 @@ int mainThrows(int argc, char * argv[]) using namespace std::placeholders; if (quad) { - doOp(address, offset, batch_bytes, write, (uint64)value, std::bind(&MMIORange::read64, &mmio, _1), std::bind(&MMIORange::write64, &mmio, _1, _2), dec); + doOp(bits, address, offset, batch_bytes, write, (uint64)value, std::bind(&MMIORange::read64, &mmio, _1), std::bind(&MMIORange::write64, &mmio, _1, _2), dec); } else { - doOp(address, offset, batch_bytes, write, (uint32)value, std::bind(&MMIORange::read32, &mmio, _1), std::bind(&MMIORange::write32, &mmio, _1, _2), dec); + doOp(bits, address, offset, batch_bytes, write, (uint32)value, std::bind(&MMIORange::read32, &mmio, _1), std::bind(&MMIORange::write32, &mmio, _1, _2), dec); } } catch (std::exception & e) diff --git a/src/pcm-msr.cpp b/src/pcm-msr.cpp index ec82d499..86b3dccd 100644 --- a/src/pcm-msr.cpp +++ b/src/pcm-msr.cpp @@ -23,11 +23,12 @@ void print_usage(const char * progname) { std::cout << "Usage " << progname << " [-w value] [-c core] [-a] [-d] msr\n\n"; std::cout << " Reads/writes specified msr (model specific register) \n"; - std::cout << " -w value : write the value before reading \n"; - std::cout << " -c core : perform msr read/write on specified core (default is 0)\n"; - std::cout << " -d : output all numbers in dec (default is hex)\n"; - std::cout << " -a : perform msr read/write operations on all cores\n"; - std::cout << " --version : print application version\n"; + std::cout << " -w value : write the value before reading \n"; + std::cout << " -c core : perform msr read/write on specified core (default is 0)\n"; + std::cout << " -b low:high : read or write only low..high bits of the register\n"; + std::cout << " -d : output all numbers in dec (default is hex)\n"; + std::cout << " -a : perform msr read/write operations on all cores\n"; + std::cout << " --version : print application version\n"; std::cout << "\n"; } @@ -47,9 +48,10 @@ int mainThrows(int argc, char * argv[]) int core = 0; int msr = -1; bool dec = false; + std::pair bits{-1, -1}; int my_opt = -1; - while ((my_opt = getopt(argc, argv, "w:c:da")) != -1) + while ((my_opt = getopt(argc, argv, "w:c:dab:")) != -1) { switch (my_opt) { @@ -66,6 +68,9 @@ int mainThrows(int argc, char * argv[]) case 'a': core = -1; break; + case 'b': + bits = parseBitsParameter(optarg); + break; default: print_usage(argv[0]); return -1; @@ -94,11 +99,16 @@ int mainThrows(int argc, char * argv[]) return -1; } #endif - auto doOne = [&dec, &write, &msr](int core, uint64 value) + auto doOne = [&dec, &write, &msr, &bits](int core, uint64 value) { try { MsrHandle h(core); if (!dec) std::cout << std::hex << std::showbase; + if (!readOldValueHelper(bits, value, write, [&h, &msr](uint64 & old_value){ return h.read(msr, &old_value) == 8; })) + { + std::cout << " Read error!\n"; + return; + } if (write) { std::cout << " Writing " << value << " to MSR " << msr << " on core " << core << "\n"; @@ -110,7 +120,8 @@ int mainThrows(int argc, char * argv[]) value = 0; if (h.read(msr, &value) == 8) { - std::cout << " Read value " << value << " from MSR " << msr << " on core " << core << "\n\n"; + extractBitsPrintHelper(bits, value, dec); + std::cout << " from MSR " << msr << " on core " << core << "\n\n"; } else { diff --git a/src/pcm-pcicfg.cpp b/src/pcm-pcicfg.cpp index 75fe6d6f..a991ab74 100644 --- a/src/pcm-pcicfg.cpp +++ b/src/pcm-pcicfg.cpp @@ -23,10 +23,11 @@ void print_usage(const char * progname) { std::cout << "Usage " << progname << " [-w value] [-d] [-i ID] [group bus device function] offset\n\n"; std::cout << " Reads/writes 32-bit PCICFG register \n"; - std::cout << " -w value : write the value before reading \n"; - std::cout << " -d : output all numbers in dec (default is hex)\n"; - std::cout << " -i ID : specify Intel device ID instead of group bus device function\n"; - std::cout << " --version : print application version\n"; + std::cout << " -w value : write the value before reading \n"; + std::cout << " -b low:high : read or write only low..high bits of the register\n"; + std::cout << " -d : output all numbers in dec (default is hex)\n"; + std::cout << " -i ID : specify Intel device ID instead of group bus device function\n"; + std::cout << " --version : print application version\n"; std::cout << "\n"; } @@ -51,9 +52,10 @@ int mainThrows(int argc, char * argv[]) bool write = false; bool dec = false; uint32 deviceID = 0; + std::pair bits{-1, -1}; int my_opt = -1; - while ((my_opt = getopt(argc, argv, "i:w:d")) != -1) + while ((my_opt = getopt(argc, argv, "i:w:db:")) != -1) { switch (my_opt) { @@ -64,6 +66,9 @@ int mainThrows(int argc, char * argv[]) write = true; value = (pcm::uint32)read_number(optarg); break; + case 'b': + bits = parseBitsParameter(optarg); + break; case 'd': dec = true; break; @@ -100,12 +105,13 @@ int mainThrows(int argc, char * argv[]) } #endif - auto one = [&dec,&write](const uint32 & group, const uint32 & bus, const uint32 & device, const uint32 & function, const uint32 & offset, uint32 value) + auto one = [&dec,&write,&bits](const uint32 & group, const uint32 & bus, const uint32 & device, const uint32 & function, const uint32 & offset, uint32 value) { try { PciHandleType h(group, bus, device, function); if (!dec) std::cout << std::hex << std::showbase; + readOldValueHelper(bits, value, write, [&h, &offset](uint32 & old_value){ h.read32(offset, &old_value); return true; }); if (write) { std::cout << " Writing " << value << " to " << group << ":" << bus << ":" << device << ":" << function << "@" << offset << "\n"; @@ -113,7 +119,8 @@ int mainThrows(int argc, char * argv[]) } value = 0; h.read32(offset, &value); - std::cout << " Read value " << value << " from " << group << ":" << bus << ":" << device << ":" << function << "@" << offset << "\n\n"; + extractBitsPrintHelper(bits, value, dec); + std::cout << " from " << group << ":" << bus << ":" << device << ":" << function << "@" << offset << "\n\n"; } catch (std::exception& e) { diff --git a/src/pcm-tpmi.cpp b/src/pcm-tpmi.cpp index c4dfe0c2..1d39d86c 100644 --- a/src/pcm-tpmi.cpp +++ b/src/pcm-tpmi.cpp @@ -67,20 +67,7 @@ int mainThrows(int argc, char * argv[]) verbose = true; break; case 'b': - { - const auto bitsArray = pcm::split(std::string(optarg),':'); - assert(bitsArray.size() == 2); - bits.first = (int64)read_number(bitsArray[0].c_str()); - bits.second = (int64)read_number(bitsArray[1].c_str()); - assert(bits.first >= 0); - assert(bits.second >= 0); - assert(bits.first < 64); - assert(bits.second < 64); - if (bits.first > bits.second) - { - std::swap(bits.first, bits.second); - } - } + bits = parseBitsParameter(optarg); break; default: print_usage(argv[0]); @@ -174,26 +161,15 @@ int mainThrows(int argc, char * argv[]) const auto baseOffset = requestedAddr - baseAddr; MMIORange range(baseAddr, 4096ULL, !write); if (!dec) std::cout << std::hex << std::showbase; - if (bits.first >= 0 && write) - { - // to write bits need to read the old value first - uint64 old_value = range.read64(baseOffset); - value = insertBits(old_value, value, bits.first, bits.second - bits.first + 1); - } + readOldValueHelper(bits, value, write, [&range, &baseOffset](uint64 & old_value){ old_value = range.read64(baseOffset); return true; }); if (write) { std::cout << " Writing " << value << " to TPMI ID " << requestedID << "@" << requestedRelativeOffset << " for entry " << p << "\n"; range.write64(baseOffset, value); } value = range.read64(baseOffset); - std::cout << " Read "; - if (bits.first >= 0) - { - std::cout << "bits "<< std::dec << bits.first << ":" << bits.second << " "; - if (!dec) std::cout << std::hex << std::showbase; - value = extract_bits(value, bits.first, bits.second); - } - std::cout << "value " << value << " from TPMI ID " << requestedID << "@" << requestedRelativeOffset << " for entry " << p << "\n\n"; + extractBitsPrintHelper(bits, value, dec); + std::cout << " from TPMI ID " << requestedID << "@" << requestedRelativeOffset << " for entry " << p << "\n\n"; } catch (std::exception& e) { diff --git a/src/utils.cpp b/src/utils.cpp index 872c3881..410f49ef 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -1146,6 +1146,24 @@ bool get_cpu_bus(uint32 msmDomain, uint32 msmBus, uint32 msmDev, uint32 msmFunc, } } +std::pair parseBitsParameter(const char * param) +{ + std::pair bits{-1, -1}; + const auto bitsArray = pcm::split(std::string(param),':'); + assert(bitsArray.size() == 2); + bits.first = (int64)read_number(bitsArray[0].c_str()); + bits.second = (int64)read_number(bitsArray[1].c_str()); + assert(bits.first >= 0); + assert(bits.second >= 0); + assert(bits.first < 64); + assert(bits.second < 64); + if (bits.first > bits.second) + { + std::swap(bits.first, bits.second); + } + return bits; +} + #ifdef __linux__ FILE * tryOpen(const char * path, const char * mode) { diff --git a/src/utils.h b/src/utils.h index c35a6d49..e0579757 100644 --- a/src/utils.h +++ b/src/utils.h @@ -631,4 +631,34 @@ inline uint64 roundUpTo4K(uint64 number) { } } +std::pair parseBitsParameter(const char * param); +template +inline bool readOldValueHelper(const std::pair & bits, T & value, const bool & write, R readValue) +{ + if (bits.first >= 0 && write) + { + // to write bits need to read the old value first + T old_value = 0; + if (!readValue(old_value)) + { + return false; + } + value = insertBits(old_value, value, bits.first, bits.second - bits.first + 1); + } + return true; +} + +template +inline void extractBitsPrintHelper(const std::pair & bits, T & value, const bool & dec) +{ + std::cout << " Read "; + if (bits.first >= 0) + { + std::cout << "bits "<< std::dec << bits.first << ":" << bits.second << " "; + if (!dec) std::cout << std::hex << std::showbase; + value = extract_bits(value, bits.first, bits.second); + } + std::cout << "value " << value; +} + } // namespace pcm From fd4c478040086cef4c445ef686506b6235240b46 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Sun, 15 Oct 2023 12:45:40 +0200 Subject: [PATCH 35/43] add PCM_SET_DLL_DIR Change-Id: If3fef361eb1846538070955aa40372ee0611f7b4 --- src/utils.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/utils.h b/src/utils.h index e0579757..9ae3111a 100644 --- a/src/utils.h +++ b/src/utils.h @@ -34,10 +34,17 @@ namespace pcm { std::string safe_getenv(const char* env); } +#ifdef _MSC_VER +#define PCM_SET_DLL_DIR SetDllDirectory(_T("")); +#else +#define PCM_SET_DLL_DIR +#endif + #define PCM_MAIN_NOTHROW \ int mainThrows(int argc, char * argv[]); \ int main(int argc, char * argv[]) \ { \ + PCM_SET_DLL_DIR \ if (pcm::safe_getenv("PCM_NO_MAIN_EXCEPTION_HANDLER") == std::string("1")) return mainThrows(argc, argv); \ try { \ return mainThrows(argc, argv); \ From 15a9a4e4d23dfcf55ef9d8b3dd7b4255dfe8e274 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Sun, 15 Oct 2023 14:55:43 +0200 Subject: [PATCH 36/43] load winring0 dll from the windows system directory Change-Id: Ic4b75dc33270a5f7810d477501770c64f9d7cb59 --- src/winring0/OlsApiInit.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/winring0/OlsApiInit.h b/src/winring0/OlsApiInit.h index 1e2fadaa..fd9383b0 100644 --- a/src/winring0/OlsApiInit.h +++ b/src/winring0/OlsApiInit.h @@ -134,14 +134,18 @@ _SetOlsValue SetOlsValue = NULL; BOOL InitOpenLibSys(HMODULE *hModule) { + TCHAR dll_path[MAX_PATH]; + GetSystemDirectory(dll_path, MAX_PATH - 20); #ifdef _M_X64 - *hModule = LoadLibrary(_T("WinRing0x64.dll")); + _tcscat_s(dll_path, MAX_PATH, TEXT("\\WinRing0x64.dll")); #else - *hModule = LoadLibrary(_T("WinRing0.dll")); + _tcscat_s(dll_path, MAX_PATH, TEXT("\\WinRing0.dll")); #endif + *hModule = LoadLibrary(dll_path); if(*hModule == NULL) { + std::wcerr << "The dll could not be loaded from " << dll_path <<"\n"; return FALSE; } From 7c6371968de298fc92c03fcb6e333ba8f1e12701 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Sun, 15 Oct 2023 15:00:01 +0200 Subject: [PATCH 37/43] call PCM_SET_DLL_DIR in pcm-service Change-Id: I1fb327dadbb1e0a78f544e89cc5617c57a4f28f4 --- src/windows/PCMService.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/windows/PCMService.cpp b/src/windows/PCMService.cpp index 5743b8ba..c3691d4b 100644 --- a/src/windows/PCMService.cpp +++ b/src/windows/PCMService.cpp @@ -20,6 +20,7 @@ using namespace System::Reflection; //To install/uninstall the service, type: "PCM-Service.exe [-Install/-Uninstall]" int _tmain(int argc, _TCHAR* argv[]) { + PCM_SET_DLL_DIR if (argc >= 2) { if (argv[1][0] == _T('/')) From 54bb53754fce3193d034628f2da39f04ecb3977a Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Sun, 15 Oct 2023 12:44:07 +0200 Subject: [PATCH 38/43] add restrictDriverAccessNative --- src/cpucounters.cpp | 5 +++++ src/utils.cpp | 33 ++++++++++++++++++++++++++++ src/utils.h | 4 ++++ src/windows/restrictDriverAccess.cpp | 2 +- 4 files changed, 43 insertions(+), 1 deletion(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 10b7a6ca..2908f235 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -89,6 +89,11 @@ int convertUnknownToInt(size_t size, char* value); #ifdef _MSC_VER +void PCM_API restrictDriverAccess(LPCTSTR path) +{ + restrictDriverAccessNative(path); +} + HMODULE hOpenLibSys = NULL; #ifndef NO_WINRING diff --git a/src/utils.cpp b/src/utils.cpp index 410f49ef..4734f6da 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -7,6 +7,10 @@ #include #include #ifdef _MSC_VER +#include +#include +#include +#include #include #include #else @@ -1256,4 +1260,33 @@ bool readMapFromSysFS(const char * path, std::unordered_map } #endif +#ifdef _MSC_VER + +void restrictDriverAccessNative(LPCTSTR path) +{ + PSECURITY_DESCRIPTOR pSD = nullptr; + + if (!ConvertStringSecurityDescriptorToSecurityDescriptor( + _T("O:BAG:SYD:(A;;FA;;;SY)(A;;FA;;;BA)"), + SDDL_REVISION_1, + &pSD, + nullptr)) + { + _tprintf(TEXT("Error in ConvertStringSecurityDescriptorToSecurityDescriptor: %d\n"), GetLastError()); + return; + } + + if (SetFileSecurity(path, DACL_SECURITY_INFORMATION, pSD)) + { + _tprintf(TEXT("Successfully restricted access for %s\n"), path); + } + else + { + _tprintf(TEXT("Error in SetFileSecurity for %s. Error %d\n"), path, GetLastError()); + } + + LocalFree(pSD); +} +#endif + } // namespace pcm diff --git a/src/utils.h b/src/utils.h index 9ae3111a..4780c14d 100644 --- a/src/utils.h +++ b/src/utils.h @@ -668,4 +668,8 @@ inline void extractBitsPrintHelper(const std::pair & bits, T & valu std::cout << "value " << value; } +#ifdef _MSC_VER +void restrictDriverAccessNative(LPCTSTR path); +#endif; + } // namespace pcm diff --git a/src/windows/restrictDriverAccess.cpp b/src/windows/restrictDriverAccess.cpp index 848f5546..8b2dea4b 100644 --- a/src/windows/restrictDriverAccess.cpp +++ b/src/windows/restrictDriverAccess.cpp @@ -14,7 +14,7 @@ namespace pcm { #endif // _MSC_VER //! restrict usage of driver to system (SY) and builtin admins (BA) -void restrictDriverAccess(LPCTSTR path) +void restrictDriverAccessCls(LPCTSTR path) { try { System::Security::AccessControl::FileSecurity^ fSecurity = System::IO::File::GetAccessControl(gcnew System::String(path)); From 4b3d57035be7080ce288014bbd482eed81c5c638 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Sun, 15 Oct 2023 19:22:31 +0200 Subject: [PATCH 39/43] drop restrictDriverAccessCls Change-Id: I34a02cdfd90cb5615d9e9de65fb23d039a34093e --- src/utils.cpp | 1 + src/windows/restrictDriverAccess.cpp | 14 -------------- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/src/utils.cpp b/src/utils.cpp index 4734f6da..ba47e80c 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -1262,6 +1262,7 @@ bool readMapFromSysFS(const char * path, std::unordered_map #ifdef _MSC_VER +//! restrict usage of driver to system (SY) and builtin admins (BA) void restrictDriverAccessNative(LPCTSTR path) { PSECURITY_DESCRIPTOR pSD = nullptr; diff --git a/src/windows/restrictDriverAccess.cpp b/src/windows/restrictDriverAccess.cpp index 8b2dea4b..866a6dc5 100644 --- a/src/windows/restrictDriverAccess.cpp +++ b/src/windows/restrictDriverAccess.cpp @@ -13,18 +13,4 @@ namespace pcm { #endif #endif // _MSC_VER -//! restrict usage of driver to system (SY) and builtin admins (BA) -void restrictDriverAccessCls(LPCTSTR path) -{ - try { - System::Security::AccessControl::FileSecurity^ fSecurity = System::IO::File::GetAccessControl(gcnew System::String(path)); - fSecurity->SetSecurityDescriptorSddlForm("O:BAG:SYD:(A;;FA;;;SY)(A;;FA;;;BA)"); - System::IO::File::SetAccessControl(gcnew System::String(path), fSecurity); - } - catch (...) - { - tcerr << "Error in GetAccessControl/SetSecurityDescriptorSddlForm for " << path << " driver.\n"; - } -} - } // namespace pcm From 444854d2d5e93e8b69b76fe957f45a2402aa490a Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Sun, 15 Oct 2023 19:31:52 +0200 Subject: [PATCH 40/43] drop restrictDriverAccess in CMakeList Change-Id: I680916e8de4cd0cdfa4bfcb65208bca2a700488f --- src/CMakeLists.txt | 14 ++++---------- src/windows/restrictDriverAccess.cpp | 16 ---------------- 2 files changed, 4 insertions(+), 26 deletions(-) delete mode 100644 src/windows/restrictDriverAccess.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 65b2d630..072a7eb2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -54,26 +54,20 @@ if(MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /wd4251 /wd4273 /EHa /Zi") add_definitions(/W3) - # https://cmake.org/cmake/help/latest/prop_tgt/MSVC_RUNTIME_LIBRARY.html - # windows/restrictDriverAccess.cpp is building separately - add_library(restrictDriverAccess OBJECT windows/restrictDriverAccess.cpp) - set_target_properties(restrictDriverAccess PROPERTIES COMMON_LANGUAGE_RUNTIME "") - target_compile_definitions(restrictDriverAccess PRIVATE _CONSOLE _UNICODE UNICODE) - - # Rest of windows/* files + restrictDriverAccess.cpp -> PCM_STATIC + # windows/* files -> PCM_STATIC file(GLOB WINDOWS_SOURCES winpmem/winpmem.cpp windows/stdafx.cpp freegetopt/getopt.cpp) - add_library(PCM_STATIC STATIC $ ${COMMON_SOURCES} ${WINDOWS_SOURCES}) + add_library(PCM_STATIC STATIC ${COMMON_SOURCES} ${WINDOWS_SOURCES}) target_compile_definitions(PCM_STATIC PRIVATE UNICODE _UNICODE _CONSOLE) # Graphical perfmon front-end: pcm-lib, pcm-service # Files: COMMON_FILES() + pcm-lib.cpp winpmem\winpmem.cpp dllmain.cpp file(GLOB PCM_LIB_SOURCES winpmem/winpmem.cpp dllmain.cpp pcm-lib.cpp ) - add_library(pcm-lib SHARED $ ${COMMON_SOURCES} ${PCM_LIB_SOURCES}) + add_library(pcm-lib SHARED ${COMMON_SOURCES} ${PCM_LIB_SOURCES}) target_compile_definitions(pcm-lib PRIVATE _WINDOWS _USRDLL PCM_EXPORTS _WINDLL _UNICODE UNICODE) # Pcm-service files: PCM_SHARED + AssemblyInfo.cpp PCMInstaller.cpp PCMService.cpp file(GLOB PCM_SERVICE_SOURCES windows/PCMInstaller.cpp windows/PCMService.cpp windows/AssemblyInfo.cpp winddows/utils.cpp) - add_executable(pcm-service $ ${PCM_SERVICE_SOURCES}) + add_executable(pcm-service ${PCM_SERVICE_SOURCES}) target_compile_definitions(pcm-service PRIVATE _UNICODE UNICODE _CONSOLE) set_target_properties(pcm-service PROPERTIES LINK_FLAGS "/INCREMENTAL:NO" COMMON_LANGUAGE_RUNTIME "") set_property(TARGET pcm-service PROPERTY VS_DOTNET_REFERENCES "System;System.Configuration.Install;System.Data;System.Management;System.ServiceProcess;System.Xml") diff --git a/src/windows/restrictDriverAccess.cpp b/src/windows/restrictDriverAccess.cpp deleted file mode 100644 index 866a6dc5..00000000 --- a/src/windows/restrictDriverAccess.cpp +++ /dev/null @@ -1,16 +0,0 @@ -// SPDX-License-Identifier: BSD-3-Clause -// Copyright (c) 2016-2022, Intel Corporation -#include -#include - -namespace pcm { - -#ifdef _MSC_VER -#ifdef UNICODE - static auto& tcerr = std::wcerr; -#else - static auto& tcerr = std::cerr; -#endif -#endif // _MSC_VER - -} // namespace pcm From f8c18e98b4ae57b1479bba31b5875eebe7229b7a Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Mon, 16 Oct 2023 12:18:51 +0200 Subject: [PATCH 41/43] don't compile with MSVC DLLs Change-Id: I0bf37392e96621b4e4c67cba058d46e0b1d0a512 --- src/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 072a7eb2..c2c1d304 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -58,12 +58,14 @@ if(MSVC) file(GLOB WINDOWS_SOURCES winpmem/winpmem.cpp windows/stdafx.cpp freegetopt/getopt.cpp) add_library(PCM_STATIC STATIC ${COMMON_SOURCES} ${WINDOWS_SOURCES}) target_compile_definitions(PCM_STATIC PRIVATE UNICODE _UNICODE _CONSOLE) + target_compile_options(PCM_STATIC PRIVATE "/MT$<$:d>") # Graphical perfmon front-end: pcm-lib, pcm-service # Files: COMMON_FILES() + pcm-lib.cpp winpmem\winpmem.cpp dllmain.cpp file(GLOB PCM_LIB_SOURCES winpmem/winpmem.cpp dllmain.cpp pcm-lib.cpp ) add_library(pcm-lib SHARED ${COMMON_SOURCES} ${PCM_LIB_SOURCES}) target_compile_definitions(pcm-lib PRIVATE _WINDOWS _USRDLL PCM_EXPORTS _WINDLL _UNICODE UNICODE) + target_compile_options(pcm-lib PRIVATE "/MT$<$:d>") # Pcm-service files: PCM_SHARED + AssemblyInfo.cpp PCMInstaller.cpp PCMService.cpp file(GLOB PCM_SERVICE_SOURCES windows/PCMInstaller.cpp windows/PCMService.cpp windows/AssemblyInfo.cpp winddows/utils.cpp) @@ -128,6 +130,10 @@ foreach(PROJECT_NAME ${PROJECT_NAMES}) add_executable(${PROJECT_NAME} ${PROJECT_FILE}) + if(MSVC) + target_compile_options(${PROJECT_NAME} PRIVATE "/MT$<$:d>") + endif(MSVC) + # specific file for pcm-raw project if(${PROJECT_NAME} STREQUAL pcm-raw) set(LIBS ${LIBS} PCM_SIMDJSON) From 7995daf1fd8f2d8faee2c81b718237358cf2e68d Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Mon, 16 Oct 2023 12:25:23 +0200 Subject: [PATCH 42/43] be less verbose Change-Id: I44df668e40c7d2cf14ce849b50b9bffaf54e7ee0 --- src/utils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils.cpp b/src/utils.cpp index ba47e80c..2f68d53b 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -1279,7 +1279,7 @@ void restrictDriverAccessNative(LPCTSTR path) if (SetFileSecurity(path, DACL_SECURITY_INFORMATION, pSD)) { - _tprintf(TEXT("Successfully restricted access for %s\n"), path); + // _tprintf(TEXT("Successfully restricted access for %s\n"), path); } else { From ab09473be95dd632f671e866eb22a208e670f513 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Tue, 17 Oct 2023 11:40:53 +0200 Subject: [PATCH 43/43] update windows-related documentation Change-Id: I298b3a17f3f0e03c5017b194867d97b3a6cbb5d9 --- README.md | 4 ++-- doc/WINDOWS_HOWTO.md | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index b3c3617a..76a148d4 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,7 @@ Debug is default on Windows. Specify config to build Release: ``` cmake --build . --config Release ``` -On Windows and MacOs additional drivers are required. Please find instructions here: [WINDOWS_HOWTO.md](doc/WINDOWS_HOWTO.md) and [MAC_HOWTO.txt](doc/MAC_HOWTO.txt). +On Windows and MacOs additional drivers and steps are required. Please find instructions here: [WINDOWS_HOWTO.md](doc/WINDOWS_HOWTO.md) and [MAC_HOWTO.txt](doc/MAC_HOWTO.txt). FreeBSD/DragonFlyBSD-specific details can be found in [FREEBSD_HOWTO.txt](doc/FREEBSD_HOWTO.txt) @@ -103,7 +103,7 @@ Downloading Pre-Compiled PCM Tools * RHEL8.5 or later: `sudo dnf install pcm` * Fedora: `sudo yum install pcm` * RPMs and DEBs with the *latest* PCM version for RHEL/SLE/Ubuntu/Debian/openSUSE/etc distributions (binary and source) are available [here](https://software.opensuse.org/download/package?package=pcm&project=home%3Aopcm) -- Windows: download PCM binaries as [appveyor build service](https://ci.appveyor.com/project/opcm/pcm/history) artifacts and required Visual C++ Redistributable from [www.microsoft.com](https://www.microsoft.com/en-us/download/details.aspx?id=48145). Additional drivers are needed, see [WINDOWS_HOWTO.md](doc/WINDOWS_HOWTO.md). +- Windows: download PCM binaries as [appveyor build service](https://ci.appveyor.com/project/opcm/pcm/history) artifacts and required Visual C++ Redistributable from [www.microsoft.com](https://www.microsoft.com/en-us/download/details.aspx?id=48145). Additional steps and drivers are required, see [WINDOWS_HOWTO.md](doc/WINDOWS_HOWTO.md). - Docker: see [instructions on how to use pcm-sensor-server pre-compiled container from docker hub](doc/DOCKER_README.md). -------------------------------------------------------------------------------- diff --git a/doc/WINDOWS_HOWTO.md b/doc/WINDOWS_HOWTO.md index 83fe9fd5..1ac274ca 100644 --- a/doc/WINDOWS_HOWTO.md +++ b/doc/WINDOWS_HOWTO.md @@ -13,11 +13,11 @@ _For support of systems with more than _**_64_**_ logical cores you need to comp ``` alternatively you can perform `cmake -B build`, open *PCM.sln* form *build* folder in and build required project in Visual Studio. .exe and .dll files will be located in *build\bin\Release* folder -3. Copy the msr.sys driver and pcm.exe into a single directory +3. As Administrator create PCM directory in Windows "Program Files" directory (e.g. `C:\Program Files (x86)\PCM\`) +4. As Administrator copy the msr.sys driver and pcm.exe into the PCM directory +5. Run pcm.exe utility from the PCM directory as Administrator -4. Run pcm.exe utility from this directory - -For Windows 7 and Windows Server 2008 R2 the PCM utilities need to be run as administrator: +For Windows 7+ and Windows Server 2008+ R2 the PCM utilities need to be run as administrator: Alternatively you can achieve the same using the “Properties” Windows menu of the executable (“Privilege level” setting in the “Compatibility” tab): Right mouse click -> Properties -> Compatibility -> Privilege level -> Set “Run this program as an administrator”. @@ -35,7 +35,7 @@ If you are getting the error `Starting MSR service failed with error 3 The syste 4. Build 'PCM-Service.exe' using Microsoft Visual Studio or cmake -5. Copy PCM-Service.exe, PCM-Service.exe.config, and pcm-lib.dll files into a single directory +5. Copy PCM-Service.exe, PCM-Service.exe.config, and pcm-lib.dll files into the PCM sub-directory in Windows "Program Files" directory (see above) The config file enables support for legacy security policy. Without this configuration switch, you will get an exception like this: