Skip to content

Commit

Permalink
Merge tag '2023-11-09' into push-2023-11-09
Browse files Browse the repository at this point in the history
Change-Id: I3a8e0dab5504a8de4a54f9ee266b70a824b07e14
  • Loading branch information
rdementi committed Nov 10, 2023
2 parents ba42ed9 + 8360600 commit 8fec46e
Show file tree
Hide file tree
Showing 13 changed files with 882 additions and 476 deletions.
4 changes: 3 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# All pcm-* executables
set(PROJECT_NAMES pcm pcm-numa pcm-latency pcm-power pcm-msr pcm-memory pcm-tsx pcm-pcie pcm-core pcm-iio pcm-lspci pcm-pcicfg pcm-mmio pcm-tpmi pcm-raw pcm-accel)

file(GLOB COMMON_SOURCES msr.cpp cpucounters.cpp pci.cpp mmio.cpp bw.cpp utils.cpp topology.cpp debug.cpp threadpool.cpp uncore_pmu_discovery.cpp)
file(GLOB COMMON_SOURCES pcm-accel-common.cpp msr.cpp cpucounters.cpp pci.cpp mmio.cpp bw.cpp utils.cpp topology.cpp debug.cpp threadpool.cpp uncore_pmu_discovery.cpp)

if (APPLE)
file(GLOB UNUX_SOURCES dashboard.cpp)
Expand Down Expand Up @@ -143,6 +143,8 @@ foreach(PROJECT_NAME ${PROJECT_NAMES})
file(READ pcm-sensor-server.service.in SENSOR_SERVICE_IN)
string(REPLACE "@@CMAKE_INSTALL_SBINDIR@@" "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_SBINDIR}" SENSOR_SERVICE "${SENSOR_SERVICE_IN}")
file(WRITE "${CMAKE_BINARY_DIR}/pcm-sensor-server.service" "${SENSOR_SERVICE}")
file(GLOB PROJECT_FILE ${PROJECT_NAME}.cpp pcm-accel-common.h pcm-accel-common.cpp)
target_include_directories(pcm-sensor-server PUBLIC ${CMAKE_SOURCE_DIR})
if(LINUX_SYSTEMD)
install(FILES "${CMAKE_BINARY_DIR}/pcm-sensor-server.service" DESTINATION "${LINUX_SYSTEMD_UNITDIR}")
endif(LINUX_SYSTEMD)
Expand Down
23 changes: 23 additions & 0 deletions src/cpucounters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2737,6 +2737,8 @@ PCM::PCM() :
num_phys_cores_per_socket(0),
num_online_cores(0),
num_online_sockets(0),
accel(0),
accel_counters_num_max(0),
core_gen_counter_num_max(0),
core_gen_counter_num_used(0), // 0 means no core gen counters used
core_gen_counter_width(0),
Expand Down Expand Up @@ -3502,6 +3504,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter
lastProgrammedCustomCounters.clear();
lastProgrammedCustomCounters.resize(num_cores);
core_global_ctrl_value = 0ULL;
isHWTMAL1Supported(); // ínit value to prevent MT races

std::vector<std::future<void> > asyncCoreResults;
std::vector<PCM::ErrorCode> programmingStatuses(num_cores, PCM::Success);
Expand Down Expand Up @@ -6480,6 +6483,26 @@ uint32 PCM::getNumSockets() const
return (uint32)num_sockets;
}

uint32 PCM::getAccel() const
{
return accel;
}

void PCM::setAccel(uint32 input)
{
accel = input;
}

uint32 PCM::getNumberofAccelCounters() const
{
return accel_counters_num_max;
}

void PCM::setNumberofAccelCounters(uint32 input)
{
accel_counters_num_max = input;
}

uint32 PCM::getNumOnlineSockets() const
{
return (uint32)num_online_sockets;
Expand Down
30 changes: 29 additions & 1 deletion src/cpucounters.h
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,8 @@ class PCM_API PCM
int32 num_phys_cores_per_socket;
int32 num_online_cores;
int32 num_online_sockets;
uint32 accel;
uint32 accel_counters_num_max;
uint32 core_gen_counter_num_max;
uint32 core_gen_counter_num_used;
uint32 core_gen_counter_width;
Expand Down Expand Up @@ -1506,9 +1508,29 @@ class PCM_API PCM
\return Number of sockets in the system
*/
uint32 getNumSockets() const;

/*! \brief Reads the accel type in the system
\return acceltype
*/
uint32 getAccel() const;

/*! \brief Sets the accel type in the system
\return acceltype
*/
void setAccel(uint32 input);

/*! \brief Reads the Number of AccelCounters in the system
\return None
*/
uint32 getNumberofAccelCounters() const;

/*! \brief Sets the Number of AccelCounters in the system
\return number of counters
*/
void setNumberofAccelCounters(uint32 input);

/*! \brief Reads number of online sockets (CPUs) in the system
\return Number of online sockets in the system
\return Number of online sockets in the system
*/
uint32 getNumOnlineSockets() const;

Expand Down Expand Up @@ -3374,6 +3396,11 @@ class SystemCounterState : public SocketCounterState
}

public:
typedef uint32_t h_id;
typedef uint32_t v_id;
typedef std::map<std::pair<h_id,v_id>,uint64_t> ctr_data;
typedef std::vector<ctr_data> dev_content;
std::vector<SimpleCounterState> accel_counters;
std::vector<uint64> CXLWriteMem,CXLWriteCache;
friend uint64 getIncomingQPILinkBytes(uint32 socketNr, uint32 linkNr, const SystemCounterState & before, const SystemCounterState & after);
friend uint64 getIncomingQPILinkBytes(uint32 socketNr, uint32 linkNr, const SystemCounterState & now);
Expand All @@ -3385,6 +3412,7 @@ class SystemCounterState : public SocketCounterState
uncoreTSC(0)
{
PCM * m = PCM::getInstance();
accel_counters.resize(m->getNumberofAccelCounters());
CXLWriteMem.resize(m->getNumSockets(),0);
CXLWriteCache.resize(m->getNumSockets(),0);
incomingQPIPackets.resize(m->getNumSockets(),
Expand Down
36 changes: 35 additions & 1 deletion src/dashboard.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
#include <vector>
#include <memory>
#include <unistd.h>
#include "cpucounters.h"

#include "pcm-accel-common.h"
#include "dashboard.h"

namespace pcm {
Expand Down Expand Up @@ -515,6 +516,12 @@ std::string influxDBCore_Aggregate_Core_Counters(const std::string& S, const std
return influxDB_Counters(S, m, "Core Aggregate_Core Counters");
}

std::string influxDBAccel_Counters(const std::string& S, const std::string& m)
{
AcceleratorCounterState * accs = AcceleratorCounterState::getInstance();
return std::string("mean(\\\"Sockets_") + S + "_Accelerators_" +accs->getAccelCounterName()+" Counters Device_" + m + "\\\")";
}

std::string influxDBCore_Aggregate_Core_Counters(const std::string& m)
{
return influxDB_Counters(m, "Core Aggregate_Core Counters");
Expand Down Expand Up @@ -542,6 +549,7 @@ std::mutex dashboardGenMutex;
std::string getPCMDashboardJSON(const PCMDashboardType type, int ns, int nu, int nc)
{
auto pcm = PCM::getInstance();
auto accs = AcceleratorCounterState::getInstance();
std::lock_guard<std::mutex> dashboardGenGuard(dashboardGenMutex);
const size_t NumSockets = (ns < 0) ? pcm->getNumSockets() : ns;
const size_t NumUPILinksPerSocket = (nu < 0) ? pcm->getQPILinksPerSocket() : nu;
Expand Down Expand Up @@ -783,6 +791,32 @@ std::string getPCMDashboardJSON(const PCMDashboardType type, int ns, int nu, int
dashboard.push(panel);
dashboard.push(panel1);
}
if (pcm->getAccel() != ACCEL_NOCONFIG){
auto accelCounters = [&](const std::string & m)
{
auto panel = std::make_shared<GraphPanel>(0, y, width, height, accs->getAccelCounterName() + " " + m,"Byte/sec", false);
std::shared_ptr<Panel> panel1;
panel1 = std::make_shared<BarGaugePanel>(width, y, max_width - width, height, std::string("Current ") +accs->getAccelCounterName() + " (Byte/sec)");
y += height;
for (size_t s = 0; s < accs->getNumOfAccelDevs(); ++s)
{
const auto S = std::to_string(s);
const auto suffix = "/1";
auto t = createTarget("Device "+S,
"mean(\\\"Accelerators_"+accs->getAccelCounterName()+" Counters Device " + S + "_" + m + "\\\")" + suffix,
"rate(" + prometheusMetric(accs->remove_string_inside_use(m)) + "{instance=\\\"$node\\\", aggregate=\\\"system\\\", source=\\\"accel\\\" ,"+accs->getAccelCounterName()+"device=\\\"" + S + "\\\"}" + interval + ")" + suffix);
panel->push(t);
panel1->push(t);

}
dashboard.push(panel);
dashboard.push(panel1);
};
for (int j =0;j<accs->getNumberOfCounters();j++)
{
accelCounters(accs->getAccelIndexCounterName(j));
}
}
for (size_t s = 0; s < NumSockets; ++s)
{
const auto S = std::to_string(s);
Expand Down
18 changes: 13 additions & 5 deletions src/pci.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ inline void forAllIntelDevices(F f, int requestedDevice = -1, int requestedFunct

auto probe = [&f](const uint32 group, const uint32 bus, const uint32 device, const uint32 function)
{
// std::cerr << "Probing " << std::hex << group << ":" << bus << ":" << device << ":" << function << " " << std::dec << "\n";
uint32 value = 0;
try
{
Expand All @@ -191,6 +192,7 @@ inline void forAllIntelDevices(F f, int requestedDevice = -1, int requestedFunct
}
const uint32 vendor_id = value & 0xffff;
const uint32 device_id = (value >> 16) & 0xffff;
// std::cerr << "Found dev " << std::hex << vendor_id << ":" << device_id << std::dec << "\n";
if (vendor_id != PCM_INTEL_PCI_VENDOR_ID)
{
return;
Expand Down Expand Up @@ -256,12 +258,13 @@ void processDVSEC(MatchFunc matchFunc, ProcessFunc processFunc)
{
forAllIntelDevices([&](const uint32 group, const uint32 bus, const uint32 device, const uint32 function, const uint32 /* device_id */)
{
// std::cerr << "Intel device scan. found " << std::hex << group << ":" << bus << ":" << device << ":" << function << " " << device_id << std::dec;
uint32 status{0};
PciHandleType h(group, bus, device, function);
h.read32(6, &status); // read status
if (status & 0x10) // has capability list
{
// std::cout << "Intel device scan. found "<< std::hex << group << ":" << bus << ":" << device << ":" << function << " " << device_id << " with capability list\n" << std::dec;
// std::cerr << "Intel device scan. found "<< std::hex << group << ":" << bus << ":" << device << ":" << function << " " << device_id << " with capability list\n" << std::dec;
VSEC header;
uint64 offset = 0x100;
do
Expand All @@ -274,11 +277,11 @@ void processDVSEC(MatchFunc matchFunc, ProcessFunc processFunc)
{
return;
}
// std::cout << "offset 0x" << std::hex << offset << " header.fields.cap_id: 0x" << header.fields.cap_id << std::dec << "\n";
// std::cout << ".. found entryID: 0x" << std::hex << header.fields.entryID << std::dec << "\n";
if (matchFunc(header)) // UNCORE_DISCOVERY_DVSEC_ID_PMON
// std::cerr << "offset 0x" << std::hex << offset << " header.fields.cap_id: 0x" << header.fields.cap_id << std::dec << "\n";
// std::cerr << ".. found entryID: 0x" << std::hex << header.fields.entryID << std::dec << "\n";
if (matchFunc(header))
{
// std::cout << ".... found UNCORE_DISCOVERY_DVSEC_ID_PMON\n";
// std::cerr << ".... found match\n";
auto barOffset = 0x10 + header.fields.tBIR * 4;
uint32 bar = 0;
if (h.read32(barOffset, &bar) == sizeof(uint32) && bar != 0) // read bar
Expand All @@ -291,7 +294,12 @@ void processDVSEC(MatchFunc matchFunc, ProcessFunc processFunc)
std::cerr << "Error: can't read bar from offset " << barOffset << " \n";
}
}
const uint64 lastOffset = offset;
offset = header.fields.cap_next & ~3;
if (lastOffset == offset) // the offset did not change
{
return; // deadlock protection
}
} while (1);
}
});
Expand Down
Loading

0 comments on commit 8fec46e

Please sign in to comment.