Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Push 2023 12 13 #622

Merged
merged 32 commits into from
Dec 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
226cf9d
use right PCM_MSR_DRV_NAME to access MSR driver
rdementi Nov 6, 2023
73b2dae
pcm accel grafana support
pavithranp Oct 31, 2023
285d502
try cross-platform-actions for FreeBSD
rdementi Nov 6, 2023
a47b34b
fix race condition in isHWTMAL1Supported
rdementi Nov 7, 2023
8360600
fix deadlock in processDVSEC
rdementi Nov 8, 2023
4ce30e2
Merge remote-tracking branch 'opcm-github/master'
rdementi Nov 14, 2023
5c48cc9
add support of PP0 and PP1 energy metrics
rdementi Nov 15, 2023
1f2ae40
initial code for 1f leaf topology detection
rdementi Nov 16, 2023
495e634
catch exceptions in msr.sys
rdementi Nov 16, 2023
6b4856c
add getDomainTypeStr
rdementi Nov 30, 2023
b024e4d
refactor populateEntry
rdementi Nov 30, 2023
59fae52
add implicit SocketPackageDomain
rdementi Dec 1, 2023
009a62c
rely on cpuid leaf 0x1F topology info when available
rdementi Dec 1, 2023
3c364cc
Merge remote-tracking branch 'opcm-github/master'
rdementi Dec 7, 2023
7a9494f
Get rid of max_sockets hardcoded value in pcm-iio
Dec 11, 2023
78e9929
Print events names when PCM_DEBUG is enabled
Dec 11, 2023
16539eb
Print information about pcm-iio utility in help
Dec 11, 2023
9650e55
Add missed '-list' to help information
Dec 11, 2023
eafb4f8
Load PCIe DB, set signal handlers and create PCM instance after
Dec 11, 2023
3ef52ef
Cleanup after rebase
Dec 11, 2023
b7f0c8f
Fix pipeline error with --list
Dec 11, 2023
fc50181
Revert changes for pcm version output
Dec 11, 2023
fe268dd
Print after checking silent mode
Dec 11, 2023
aaabba1
address https://github.com/intel/pcm/issues/618
opcm Dec 12, 2023
dea5f3f
drop dead code
opcm Dec 13, 2023
40e76f7
factor out cpuid 0xb topology code and use it in OSX
opcm Dec 13, 2023
90a3339
expose thread id and tile id on OSX
opcm Dec 13, 2023
aaf7f7e
fix compile error on MSVC
opcm Dec 13, 2023
d324f09
be silent inside initCoreMasks
opcm Dec 13, 2023
7fdc77e
address a clang scan warning
opcm Dec 13, 2023
10f854c
address a clang scan warning (II)
opcm Dec 13, 2023
d4c0972
Merge tag '2023-12-13' into push-2023-12-13
rdementi Dec 14, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 0 additions & 23 deletions src/MacMSRDriver/MSRKernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,27 +13,4 @@ typedef struct {
uint32_t msr_num;
} pcm_msr_data_t;

/*
// The topologyEntry struct that is used by PCM
typedef struct{
uint32_t os_id;
uint32_t socket;
uint32_t core_id;
} topologyEntry;

// A kernel version of the topology entry structure. It has
// an extra unused int to explicitly align the struct on a 64bit
// boundary, preventing the compiler from adding extra padding.
enum {
kOpenDriver,
kCloseDriver,
kReadMSR,
kWriteMSR,
kBuildTopology,
kGetNumInstances,
kIncrementNumInstances,
kDecrementNumInstances,
kNumberOfMethods
};
*/
#endif
26 changes: 15 additions & 11 deletions src/MacMSRDriver/PcmMsr/PcmMsr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ PcmMsrDriverClassName *g_pci_driver = NULL;
asm volatile ("wrmsr" : : "c" (msr), "a" (lo), "d" (hi))
#define rdmsr(msr,lo,hi) \
asm volatile ("\trdmsr\n" : "=a" (lo), "=d" (hi) : "c" (msr))
#define cpuid(func1, func2, a, b, c, d) \
asm volatile ("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (func1), "c" (func2));

extern "C" {
extern void mp_rendezvous_no_intrs(void (*func)(void *),
Expand Down Expand Up @@ -58,14 +56,18 @@ void cpuWriteMSR(void* pIDatas){

void cpuGetTopoData(void* pTopos){
TopologyEntry* entries = (TopologyEntry*)pTopos;
int cpu = cpu_number();
int info[4];
entries[cpu].os_id = cpu;
cpuid(0xB, 1, info[0], info[1], info[2], info[3]);
entries[cpu].socket = info[3] >> info[0] & 0xF;

cpuid(0xB, 0, info[0], info[1], info[2], info[3]);
entries[cpu].core_id = info[3] >> info[0] & 0xF;
const int cpu = cpu_number();

TopologyEntry & entry = entries[cpu];
entry.os_id = cpu;

uint32 smtMaskWidth = 0;
uint32 coreMaskWidth = 0;
uint32 l2CacheMaskShift = 0;
initCoreMasks(smtMaskWidth, coreMaskWidth, l2CacheMaskShift);
PCM_CPUID_INFO cpuid_args;
pcm_cpuid(0xb, 0x0, cpuid_args);
fillEntry(entry, smtMaskWidth, coreMaskWidth, l2CacheMaskShift, cpuid_args.array[3]);
}

OSDefineMetaClassAndStructors(com_intel_driver_PcmMsr, IOService)
Expand Down Expand Up @@ -188,8 +190,10 @@ IOReturn PcmMsrDriverClassName::buildTopology(TopologyEntry* odata, uint32_t inp

for(uint32_t i = 0; i < num_cores && i < input_num_cores; i++)
{
odata[i].core_id = topologies[i].core_id;
odata[i].os_id = topologies[i].os_id;
odata[i].thread_id = topologies[i].thread_id;
odata[i].core_id = topologies[i].core_id;
odata[i].tile_id = topologies[i].tile_id;
odata[i].socket = topologies[i].socket;
}

Expand Down
92 changes: 9 additions & 83 deletions src/cpucounters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -339,17 +339,6 @@ void pcm_cpuid_bsd(int leaf, PCM_CPUID_INFO& info, int core)
}
#endif

/* Adding the new version of cpuid with leaf and subleaf as an input */
void pcm_cpuid(const unsigned leaf, const unsigned subleaf, PCM_CPUID_INFO & info)
{
#ifdef _MSC_VER
__cpuidex(info.array, leaf, subleaf);
#else
__asm__ __volatile__ ("cpuid" : \
"=a" (info.reg.eax), "=b" (info.reg.ebx), "=c" (info.reg.ecx), "=d" (info.reg.edx) : "a" (leaf), "c" (subleaf));
#endif
}

#ifdef __linux__
bool isNMIWatchdogEnabled(const bool silent);
bool keepNMIWatchdogEnabled();
Expand Down Expand Up @@ -1121,16 +1110,9 @@ bool PCM::discoverSystemTopology()
socketIdMap_type socketIdMap;

PCM_CPUID_INFO cpuid_args;
// init constants for CPU topology leaf 0xB
// adapted from Topology Enumeration Reference code for Intel 64 Architecture
// https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration
int wasCoreReported = 0, wasThreadReported = 0;
int subleaf = 0, levelType, levelShift;
//uint32 coreSelectMask = 0, smtSelectMask = 0;
uint32 smtMaskWidth = 0;
//uint32 pkgSelectMask = (-1), pkgSelectMaskShift = 0;
uint32 corePlusSMTMaskWidth = 0;
uint32 coreMaskWidth = 0;
uint32 l2CacheMaskShift = 0;

struct domain
{
Expand All @@ -1140,30 +1122,14 @@ bool PCM::discoverSystemTopology()
std::unordered_map<int, domain> topologyDomainMap;
{
TemporalThreadAffinity aff0(0);
do

if (initCoreMasks(smtMaskWidth, coreMaskWidth, l2CacheMaskShift) == false)
{
pcm_cpuid(0xb, subleaf, cpuid_args);
if (cpuid_args.array[1] == 0)
{ // if EBX ==0 then this subleaf is not valid, we can exit the loop
break;
}
levelType = extract_bits_ui(cpuid_args.array[2], 8, 15);
levelShift = extract_bits_ui(cpuid_args.array[0], 0, 4);
switch (levelType)
{
case 1: //level type is SMT, so levelShift is the SMT_Mask_Width
smtMaskWidth = levelShift;
wasThreadReported = 1;
break;
case 2: //level type is Core, so levelShift is the CorePlusSMT_Mask_Width
corePlusSMTMaskWidth = levelShift;
wasCoreReported = 1;
break;
default:
break;
}
subleaf++;
} while (1);
std::cerr << "ERROR: Major problem? No leaf 0 under cpuid function 11.\n";
return false;
}

int subleaf = 0;

std::vector<domain> topologyDomains;
if (max_cpuid >= 0x1F)
Expand Down Expand Up @@ -1209,42 +1175,6 @@ bool PCM::discoverSystemTopology()
}
}

if (wasThreadReported && wasCoreReported)
{
coreMaskWidth = corePlusSMTMaskWidth - smtMaskWidth;
}
else if (!wasCoreReported && wasThreadReported)
{
coreMaskWidth = smtMaskWidth;
}
else
{
std::cerr << "ERROR: Major problem? No leaf 0 under cpuid function 11.\n";
return false;
}

(void) coreMaskWidth; // to suppress warnings on MacOS (unused vars)

uint32 l2CacheMaskShift = 0;
#ifdef PCM_DEBUG_TOPOLOGY
uint32 threadsSharingL2;
#endif
uint32 l2CacheMaskWidth;

pcm_cpuid(0x4, 2, cpuid_args); // get ID for L2 cache
l2CacheMaskWidth = 1 + extract_bits_ui(cpuid_args.array[0],14,25); // number of APIC IDs sharing L2 cache
#ifdef PCM_DEBUG_TOPOLOGY
threadsSharingL2 = l2CacheMaskWidth;
#endif
for( ; l2CacheMaskWidth > 1; l2CacheMaskWidth >>= 1)
{
l2CacheMaskShift++;
}
#ifdef PCM_DEBUG_TOPOLOGY
std::cerr << "DEBUG: Number of threads sharing L2 cache = " << threadsSharingL2
<< " [the most significant bit = " << l2CacheMaskShift << "]\n";
#endif

#ifndef __APPLE__
auto populateEntry = [&topologyDomainMap,&smtMaskWidth, &coreMaskWidth, &l2CacheMaskShift](TopologyEntry& entry)
{
Expand Down Expand Up @@ -1285,11 +1215,7 @@ bool PCM::discoverSystemTopology()
}
else
{
const int apic_id = getAPICID(0xb);
entry.thread_id = smtMaskWidth ? extract_bits_ui(apic_id, 0, smtMaskWidth - 1) : 0;
entry.core_id = (smtMaskWidth + coreMaskWidth) ? extract_bits_ui(apic_id, smtMaskWidth, smtMaskWidth + coreMaskWidth - 1) : 0;
entry.socket = extract_bits_ui(apic_id, smtMaskWidth + coreMaskWidth, 31);
entry.tile_id = extract_bits_ui(apic_id, l2CacheMaskShift, 31);
fillEntry(entry, smtMaskWidth, coreMaskWidth, l2CacheMaskShift, getAPICID(0xb));
}
};
#endif
Expand Down
34 changes: 16 additions & 18 deletions src/pcm-iio.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ using namespace pcm;
#define SKX_UNC_SOCKETID_UBOX_LNID_OFFSET 0xC0
#define SKX_UNC_SOCKETID_UBOX_GID_OFFSET 0xD4

const uint8_t max_sockets = 4;
static const std::string iio_stack_names[6] = {
"IIO Stack 0 - CBDMA/DMI ",
"IIO Stack 1 - PCIe0 ",
Expand Down Expand Up @@ -239,8 +238,7 @@ struct iio_counter : public counter {
std::vector<result_content> data;
};

//TODO: remove binding to stacks amount
result_content results(max_sockets, stack_content(12, ctr_data()));
result_content results;

typedef struct
{
Expand Down Expand Up @@ -1444,6 +1442,7 @@ void print_usage(const string& progname)
cout << " -csv-delimiter=<value> | /csv-delimiter=<value> => set custom csv delimiter\n";
cout << " -human-readable | /human-readable => use human readable format for output (for csv only)\n";
cout << " -root-port | /root-port => add root port devices to output (for csv only)\n";
cout << " -list | --list => provide platform topology info\n";
cout << " -i[=number] | /i[=number] => allow to determine number of iterations\n";
cout << " Examples:\n";
cout << " " << progname << " 1.0 -i=10 => print counters every second 10 times and exit\n";
Expand All @@ -1456,22 +1455,18 @@ PCM_MAIN_NOTHROW;

int mainThrows(int argc, char * argv[])
{
if(print_version(argc, argv))
if (print_version(argc, argv))
exit(EXIT_SUCCESS);

null_stream nullStream;
check_and_set_silent(argc, argv, nullStream);

set_signal_handlers();

std::cout << "\n Intel(r) Performance Counter Monitor " << PCM_VERSION << "\n";
std::cout << "\n This utility measures IIO information\n\n";

string program = string(argv[0]);

vector<struct iio_counter> counters;
PCIDB pciDB;
load_PCIDB(pciDB);
bool csv = false;
bool human_readable = false;
bool show_root_port = false;
Expand All @@ -1480,11 +1475,9 @@ int mainThrows(int argc, char * argv[])
double delay = PCM_DELAY_DEFAULT;
bool list = false;
MainLoop mainLoop;
PCM * m = PCM::getInstance();
iio_evt_parse_context evt_ctx;
// Map with metrics names.
map<string,std::pair<h_id,std::map<string,v_id>>> nameMap;
map<string,uint32_t> opcodeFieldMap;

while (argc > 1) {
argv++;
Expand All @@ -1511,7 +1504,7 @@ int mainThrows(int argc, char * argv[])
else if (check_argument_equals(*argv, {"-human-readable", "/human-readable"})) {
human_readable = true;
}
else if (check_argument_equals(*argv, {"--list"})) {
else if (check_argument_equals(*argv, {"-list", "--list"})) {
list = true;
}
else if (check_argument_equals(*argv, {"-root-port", "/root-port"})) {
Expand All @@ -1526,13 +1519,14 @@ int mainThrows(int argc, char * argv[])
}
}

set_signal_handlers();

print_cpu_details();

//TODO: remove binding to max sockets count.
if (m->getNumSockets() > max_sockets) {
cerr << "Only systems with up to " << max_sockets << " sockets are supported! Program aborted\n";
exit(EXIT_FAILURE);
}
PCM * m = PCM::getInstance();

PCIDB pciDB;
load_PCIDB(pciDB);

auto mapping = IPlatformMapping::getPlatformMapping(m->getCPUModel(), m->getNumSockets());
if (!mapping) {
Expand Down Expand Up @@ -1568,6 +1562,7 @@ int mainThrows(int argc, char * argv[])
exit(EXIT_FAILURE);
}

map<string,uint32_t> opcodeFieldMap;
opcodeFieldMap["opcode"] = PCM::OPCODE;
opcodeFieldMap["ev_sel"] = PCM::EVENT_SELECT;
opcodeFieldMap["umask"] = PCM::UMASK;
Expand Down Expand Up @@ -1600,8 +1595,11 @@ int mainThrows(int argc, char * argv[])
exit(EXIT_FAILURE);
}

//print_nameMap(nameMap);
//TODO: Taking from cli
#ifdef PCM_DEBUG
print_nameMap(nameMap);
#endif

results.resize(m->getNumSockets(), stack_content(m->getMaxNumOfIIOStacks(), ctr_data()));

mainLoop([&]()
{
Expand Down
Loading
Loading