Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update to the latest internal version #180

Merged
merged 8 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .bazelrc
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Require c++17 for c++ files.
build --cxxopt='-xc++'
build --cxxopt='-std=c++17'
build --copt='-Wno-sign-compare'

Expand All @@ -7,5 +8,10 @@ build --copt='-Wno-sign-compare'
# using previous versions of Bazel.
common --enable_bzlmod

# For 3rd party code: Disable warnings entirely.
# They are not actionable and just create noise.
build --per_file_copt=external/.*@-w
build --host_per_file_copt=external/.*@-w

# Load user-specific configuration, if any.
try-import %workspace%/user.bazelrc
61 changes: 0 additions & 61 deletions external/zlib.BUILD

This file was deleted.

46 changes: 26 additions & 20 deletions src/perf_data_converter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -364,14 +364,15 @@ std::string DataSrcString(uint64_t mem_lvl) {
if (mem_lvl & quipper::PERF_MEM_LVL_L1) return "L1";
if (mem_lvl & quipper::PERF_MEM_LVL_LFB) return "LFB";
if (mem_lvl & quipper::PERF_MEM_LVL_L2) return "L2";
if (mem_lvl & quipper::PERF_MEM_LVL_L3) return "L3";
if (mem_lvl & quipper::PERF_MEM_LVL_L3) return "Local L3";
if (mem_lvl & quipper::PERF_MEM_LVL_LOC_RAM) return "Local DRAM";
if (mem_lvl &
(quipper::PERF_MEM_LVL_REM_RAM1 | quipper::PERF_MEM_LVL_REM_RAM2))
return "Remote DRAM";
if (mem_lvl &
(quipper::PERF_MEM_LVL_REM_CCE1 | quipper::PERF_MEM_LVL_REM_CCE2))
return "Remote Cache";
// AMD ZEN4+ (Genoa+) supports Near and Far L3 levels. For other AMD platforms
// Local and Near L3 both refer to Local L3.
if (mem_lvl & quipper::PERF_MEM_LVL_REM_CCE1) return "Near L3";
if (mem_lvl & quipper::PERF_MEM_LVL_REM_CCE2) return "Far L3";
if (mem_lvl & quipper::PERF_MEM_LVL_IO) return "IO Memory";
if (mem_lvl & quipper::PERF_MEM_LVL_UNC) return "Uncached Memory";
return "Unknown Level";
Expand Down Expand Up @@ -836,7 +837,7 @@ bool PerfDataConverter::Sample(const PerfDataHandler::SampleContext& sample) {
// LBR callstacks include only user call chains. If this is an LBR sample,
// we get the kernel callstack from the sample's callchain, and the user
// callstack from the sample's branch_stack.
const bool lbr_sample = !sample.branch_stack.empty();
const bool lbr_sample = !sample.branch_stack.empty() && !sample.spe.is_spe;
bool skipped_dup = false;
for (const auto& frame : sample.callchain) {
if (lbr_sample && frame.ip == quipper::PERF_CONTEXT_USER) {
Expand Down Expand Up @@ -875,23 +876,28 @@ bool PerfDataConverter::Sample(const PerfDataHandler::SampleContext& sample) {
AddOrGetLocation(event_pid, frame.ip - 1, frame.mapping, builder));
IncBuildIdStats(event_pid, frame.mapping);
}
for (const auto& frame : sample.branch_stack) {
// branch_stack entries are pairs of <from, to> locations corresponding to
// addresses of call instructions and target addresses of those calls.
// We need only the addresses of the function call instructions, stored in
// the 'from' field, to recover the call chains.
if (frame.from.mapping == nullptr) {
continue;
}
// An LBR entry includes the address of the call instruction, so we don't
// have to do any adjustments.
if (frame.from.ip < frame.from.mapping->start) {
continue;

// Only add the frame from branch_stack if it is an LBR sample.
if (lbr_sample) {
for (const auto& frame : sample.branch_stack) {
// branch_stack entries are pairs of <from, to> locations corresponding to
// addresses of call instructions and target addresses of those calls. We
// need only the addresses of the function call instructions, stored in
// the 'from' field, to recover the call chains.
if (frame.from.mapping == nullptr) {
continue;
}
// An LBR entry includes the address of the call instruction, so we don't
// have to do any adjustments.
if (frame.from.ip < frame.from.mapping->start) {
continue;
}
sample_key.stack.push_back(AddOrGetLocation(event_pid, frame.from.ip,
frame.from.mapping, builder));
IncBuildIdStats(event_pid, frame.from.mapping);
}
sample_key.stack.push_back(AddOrGetLocation(event_pid, frame.from.ip,
frame.from.mapping, builder));
IncBuildIdStats(event_pid, frame.from.mapping);
}

AddOrUpdateSample(sample, event_pid, sample_key, builder);
return true;
}
Expand Down
2 changes: 1 addition & 1 deletion src/perf_data_converter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1073,7 +1073,7 @@ TEST_F(PerfDataConverterTest, ConvertsDataSrc) {
const std::unordered_map<std::string, uint64_t> expected_counts{
{"L1", 2},
{"L2", 1},
{"L3", 1},
{"Local L3", 1},
};
EXPECT_THAT(counts_by_datasrc, UnorderedPointwise(Eq(), expected_counts));
}
Expand Down
36 changes: 16 additions & 20 deletions src/quipper/perf_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1171,7 +1171,7 @@ bool PerfReader::ReadNonHeaderEventDataWithoutHeader(

if (event->header.type == PERF_RECORD_MMAP ||
event->header.type == PERF_RECORD_MMAP2) {
if (proto_->file_attrs(0).has_attr() &&
if (proto_->file_attrs_size() > 0 && proto_->file_attrs(0).has_attr() &&
proto_->file_attrs(0).attr().exclude_kernel() &&
event->header.misc & PERF_RECORD_MISC_KERNEL && event->mmap.len == 0) {
// A buggy version of perf emits zero-length MMAP records for the kernel
Expand Down Expand Up @@ -1330,7 +1330,7 @@ bool PerfReader::ReadMetadataWithoutHeader(DataReader* data, u32 type,
case HEADER_BRANCH_STACK:
return true;
case HEADER_PMU_MAPPINGS:
return ReadPMUMappingsMetadata(data, size);
return ReadPMUMappingsMetadata(data);
case HEADER_GROUP_DESC:
return ReadGroupDescMetadata(data);
case HEADER_HYBRID_TOPOLOGY:
Expand Down Expand Up @@ -1683,26 +1683,20 @@ bool PerfReader::ReadNUMATopologyMetadata(DataReader* data) {
return true;
}

bool PerfReader::ReadPMUMappingsMetadata(DataReader* data, size_t size) {
bool PerfReader::ReadPMUMappingsMetadata(DataReader* data) {
pmu_mappings_num_mappings_type num_mappings;
auto begin_offset = data->Tell();
if (!data->ReadUint32(&num_mappings)) {
LOG(ERROR) << "Error reading the number of PMU mappings.";
return false;
}

// Check size of the data read in addition to the iteration based on the
// number of PMU mappings because the number of pmu mappings is always zero
// in piped perf.data file.
//
// The number of PMU mappings is initialized to zero and after all the
// mappings are wirtten to the perf.data files, this value is set to the
// number of PMU mappings written. This logic doesn't work in pipe mode. So,
// the number of PMU mappings is always zero.
// Fix to write the number of PMU mappings before writing the actual PMU
// mappings landed upstream in 4.14. But the check for size is required as
// long as there are machines with older version of perf.
for (u32 i = 0; i < num_mappings || data->Tell() - begin_offset < size; ++i) {
if (num_mappings == 0) {
LOG(ERROR) << "Found 0 PMU mappings. Expecting more. If this is a pre-4.14 "
"perf.data file in pipe mode, it's unsupported now.";
return false;
}

for (u32 i = 0; i < num_mappings; ++i) {
PerfPMUMappingsMetadata mapping;
if (!data->ReadUint32(&mapping.type) ||
!data->ReadStringWithSizeFromData(&mapping.name)) {
Expand All @@ -1712,10 +1706,6 @@ bool PerfReader::ReadPMUMappingsMetadata(DataReader* data, size_t size) {
serializer_.SerializePMUMappingsMetadata(mapping,
proto_->add_pmu_mappings());
}
if (data->Tell() - begin_offset != size) {
LOG(ERROR) << "Size from the header doesn't match the read size";
return false;
}
return true;
}

Expand Down Expand Up @@ -2360,6 +2350,12 @@ bool PerfReader::ReadAttrEventBlock(DataReader* data, size_t size) {

// attr.attr.size has been upgraded to the current size of perf_event_attr.
const size_t actual_attr_size = data->Tell() - initial_offset;
if (size < actual_attr_size) {
LOG(ERROR) << "Declared payload size " << size << " of "
<< "PERF_RECORD_HEADER_ATTR event is less than the number of "
<< "bytes read for the attr_event struct " << actual_attr_size;
return false;
}

const size_t num_ids =
(size - actual_attr_size) / sizeof(decltype(attr.ids)::value_type);
Expand Down
2 changes: 1 addition & 1 deletion src/quipper/perf_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ class PerfReader {
bool ReadUint64Metadata(DataReader* data, u32 type, size_t size);
bool ReadCPUTopologyMetadata(DataReader* data, size_t size);
bool ReadNUMATopologyMetadata(DataReader* data);
bool ReadPMUMappingsMetadata(DataReader* data, size_t size);
bool ReadPMUMappingsMetadata(DataReader* data);
bool ReadGroupDescMetadata(DataReader* data);
bool ReadEventDescMetadata(DataReader* data);
bool ReadHybridTopologyMetadata(DataReader* data, size_t size);
Expand Down
8 changes: 8 additions & 0 deletions src/quipper/perf_test_files.cc
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,14 @@ const std::vector<const char*>& GetPerfPipedDataFiles() {
* cat &> /tmp/perf.data.piped.header_feautres_group_desc-6.8
*/
"perf.data.piped.header_feautres_group_desc-6.8",

/* Perf data that contains an aligned HEADER_PMU_MAPPINGS
* PERF_RECORD_FEATURE, generated in piped mode from perf 6.12.
* Command:
* $ /tmp/perf record -e cycles -o - -- echo "Hello, World!" | \
* cat &> /tmp/perf.data.piped.header_features_aligned-6.12
*/
"perf.data.piped.header_features_aligned-6.12",
};
return *files;
}
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
/usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2
/usr/lib/x86_64-linux-gnu/libc.so.6
Binary file not shown.
Loading