From d4ba7ff998ea865121d864bd627cb0b98948c8ea Mon Sep 17 00:00:00 2001 From: Protobuf Team Bot Date: Fri, 27 Dec 2024 11:05:19 -0800 Subject: [PATCH] Print the presence probability when analysis is enabled. PiperOrigin-RevId: 710098117 --- .../cpp/tools/analyze_profile_proto.cc | 19 ++++++- .../cpp/tools/analyze_profile_proto.h | 3 ++ .../cpp/tools/analyze_profile_proto_main.cc | 4 ++ .../cpp/tools/analyze_profile_proto_test.cc | 53 +++++++++++++++++-- 4 files changed, 74 insertions(+), 5 deletions(-) diff --git a/src/google/protobuf/compiler/cpp/tools/analyze_profile_proto.cc b/src/google/protobuf/compiler/cpp/tools/analyze_profile_proto.cc index 426b9ddd2699..de4910685ada 100644 --- a/src/google/protobuf/compiler/cpp/tools/analyze_profile_proto.cc +++ b/src/google/protobuf/compiler/cpp/tools/analyze_profile_proto.cc @@ -32,6 +32,7 @@ #include "absl/strings/cord.h" #include "absl/strings/match.h" #include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" #include "absl/strings/str_replace.h" #include "absl/strings/string_view.h" #include "google/protobuf/compiler/cpp/cpp_access_info_parse_helper.h" @@ -54,6 +55,7 @@ struct PDProtoAnalysis { PDProtoScale usage = PDProtoScale::kDefault; uint64_t presence_count = 0; uint64_t usage_count = 0; + float presence_probability = 0.0; }; std::ostream& operator<<(std::ostream& s, PDProtoScale scale) { @@ -119,6 +121,8 @@ class PDProtoAnalyzer { return analysis; } + analysis.presence_probability = GetPresenceProbability(field); + if (IsLikelyPresent(field)) { analysis.presence = PDProtoScale::kLikely; } else if (IsRarelyPresent(field)) { @@ -182,6 +186,13 @@ class PDProtoAnalyzer { info_map_.IsCold(field, AccessInfoMap::kWrite, kColdRatio); } + float GetPresenceProbability(const FieldDescriptor* field) { + // Since message count is max(#parse, #serialization), return the max of + // access ratio of both parse and serialization. + return std::max(info_map_.AccessRatio(field, AccessInfoMap::kWrite), + info_map_.AccessRatio(field, AccessInfoMap::kRead)); + } + cpp::Options options_; AccessInfoMap info_map_; SplitMap split_map_; @@ -436,8 +447,12 @@ static absl::StatusOr AnalyzeProfileProto( stream << " " << TypeName(field) << " " << field->name() << ":"; if (options.print_analysis) { - if (analysis.presence != PDProtoScale::kDefault) { - stream << " " << analysis.presence << "_PRESENT"; + if (analysis.presence != PDProtoScale::kDefault || + options.print_analysis_all) { + stream << " " << analysis.presence << "_PRESENT" + << absl::StrFormat( + "(%.2f%%)", + analysis.presence_probability * 100); } if (analysis.usage != PDProtoScale::kDefault) { stream << " " << analysis.usage << "_USED(" diff --git a/src/google/protobuf/compiler/cpp/tools/analyze_profile_proto.h b/src/google/protobuf/compiler/cpp/tools/analyze_profile_proto.h index 289d29441535..a5f140b6a416 100644 --- a/src/google/protobuf/compiler/cpp/tools/analyze_profile_proto.h +++ b/src/google/protobuf/compiler/cpp/tools/analyze_profile_proto.h @@ -26,6 +26,9 @@ struct AnalyzeProfileProtoOptions { // true to include presence and usage info instead of only optimization info bool print_analysis = false; + // true to include presence probability info + bool print_analysis_all = false; + // Descriptor pool to use. Must not be null. const DescriptorPool* pool = nullptr; diff --git a/src/google/protobuf/compiler/cpp/tools/analyze_profile_proto_main.cc b/src/google/protobuf/compiler/cpp/tools/analyze_profile_proto_main.cc index b0756a397737..f7e4f05949f3 100644 --- a/src/google/protobuf/compiler/cpp/tools/analyze_profile_proto_main.cc +++ b/src/google/protobuf/compiler/cpp/tools/analyze_profile_proto_main.cc @@ -26,6 +26,9 @@ ABSL_FLAG(bool, all, false, "Print all fields"); ABSL_FLAG(bool, analysis, false, "Print field analysis"); +ABSL_FLAG(bool, analysis_all, false, + "Print detailed field analysis, such as field presence probability, " + "for all fields, not just hot or cold ones."); ABSL_FLAG(std::string, message_filter, "", "Regex match for message name"); ABSL_FLAG(bool, aggregate_analysis, false, "If set, will recursively find proto.profile in the given dir and " @@ -45,6 +48,7 @@ int main(int argc, char* argv[]) { if (!absl::GetFlag(FLAGS_aggregate_analysis)) { options.print_all_fields = absl::GetFlag(FLAGS_all); options.print_analysis = absl::GetFlag(FLAGS_analysis); + options.print_analysis_all = absl::GetFlag(FLAGS_analysis_all); options.message_filter = absl::GetFlag(FLAGS_message_filter); status = AnalyzeProfileProtoToText(std::cout, argv[1], options); } else { diff --git a/src/google/protobuf/compiler/cpp/tools/analyze_profile_proto_test.cc b/src/google/protobuf/compiler/cpp/tools/analyze_profile_proto_test.cc index a288702f82a5..f2d0999fb211 100644 --- a/src/google/protobuf/compiler/cpp/tools/analyze_profile_proto_test.cc +++ b/src/google/protobuf/compiler/cpp/tools/analyze_profile_proto_test.cc @@ -186,9 +186,9 @@ TEST(AnalyzeProfileProtoTest, PrintStatistics) { R"(Message google::protobuf::compiler::tools::AnalyzeThis int32 id: RARELY_USED(100) string optional_string: RARELY_USED(100) - string[] repeated_string: LIKELY_PRESENT RARELY_USED(100) - AnalyzeChild optional_child: LIKELY_PRESENT RARELY_USED(1) LAZY - AnalyzeChild[] repeated_child: LIKELY_PRESENT RARELY_USED(100) + string[] repeated_string: LIKELY_PRESENT(100.00%) RARELY_USED(100) + AnalyzeChild optional_child: LIKELY_PRESENT(100.00%) RARELY_USED(1) LAZY + AnalyzeChild[] repeated_child: LIKELY_PRESENT(100.00%) RARELY_USED(100) Nested nested: RARELY_USED(100) ======== singular_lazy_num=1 @@ -209,6 +209,53 @@ repeated_lazy_pcount/repeated_total_pcount=0 )"); } +TEST(AnalyzeProfileProtoTest, PrintStatisticsAll) { + AccessInfo info = ParseTextOrDie(R"pb( + language: "cpp" + message { + name: "google::protobuf::compiler::tools::AnalyzeThis" + count: 100 + field { name: "id" getters_count: 1 configs_count: 100 } + field { name: "optional_string" getters_count: 1 configs_count: 100 } + field { name: "optional_child" getters_count: 100 configs_count: 1 } + field { name: "repeated_string" getters_count: 100 configs_count: 100 } + field { name: "repeated_child" getters_count: 100 configs_count: 100 } + field { name: "nested" getters_count: 1 configs_count: 100 } + } + )pb"); + AnalyzeProfileProtoOptions options; + options.print_unused_threshold = false; + options.print_optimized = false; + options.print_analysis = true; + options.print_analysis_all = true; + options.pool = DescriptorPool::generated_pool(); + EXPECT_STREQ(AnalyzeToText(info, options).c_str(), + R"(Message google::protobuf::compiler::tools::AnalyzeThis + int32 id: DEFAULT_PRESENT(1.00%) RARELY_USED(100) + string optional_string: DEFAULT_PRESENT(1.00%) RARELY_USED(100) + string[] repeated_string: LIKELY_PRESENT(100.00%) RARELY_USED(100) + AnalyzeChild optional_child: LIKELY_PRESENT(100.00%) RARELY_USED(1) LAZY + AnalyzeChild[] repeated_child: LIKELY_PRESENT(100.00%) RARELY_USED(100) + Nested nested: DEFAULT_PRESENT(1.00%) RARELY_USED(100) +======== +singular_lazy_num=1 +singular_lazy_0usage_num=0 +repeated_lazy_num=0 +singular_total_pcount=101 +repeated_total_pcount=100 +singular_lazy_pcount=100 +singular_lazy_0usage_pcount=0 +repeated_lazy_pcount=0 +max_pcount=100 +max_ucount=100 +repeated_lazy_num/singular_lazy_num=0 +repeated_lazy_pcount/singular_lazy_pcount=0 +singular_lazy_pcount/singular_total_pcount=0.990099 +singular_lazy_0usage_pcount/singular_total_pcount=0 +repeated_lazy_pcount/repeated_total_pcount=0 +)"); +} + } // namespace } // namespace tools } // namespace compiler