Skip to content

Commit

Permalink
Print the presence probability when analysis is enabled.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 710098117
  • Loading branch information
protobuf-github-bot authored and copybara-github committed Dec 27, 2024
1 parent aaa08d8 commit d4ba7ff
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 5 deletions.
19 changes: 17 additions & 2 deletions src/google/protobuf/compiler/cpp/tools/analyze_profile_proto.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include "absl/strings/cord.h"
#include "absl/strings/match.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "absl/strings/str_replace.h"
#include "absl/strings/string_view.h"
#include "google/protobuf/compiler/cpp/cpp_access_info_parse_helper.h"
Expand All @@ -54,6 +55,7 @@ struct PDProtoAnalysis {
PDProtoScale usage = PDProtoScale::kDefault;
uint64_t presence_count = 0;
uint64_t usage_count = 0;
float presence_probability = 0.0;
};

std::ostream& operator<<(std::ostream& s, PDProtoScale scale) {
Expand Down Expand Up @@ -119,6 +121,8 @@ class PDProtoAnalyzer {
return analysis;
}

analysis.presence_probability = GetPresenceProbability(field);

if (IsLikelyPresent(field)) {
analysis.presence = PDProtoScale::kLikely;
} else if (IsRarelyPresent(field)) {
Expand Down Expand Up @@ -182,6 +186,13 @@ class PDProtoAnalyzer {
info_map_.IsCold(field, AccessInfoMap::kWrite, kColdRatio);
}

float GetPresenceProbability(const FieldDescriptor* field) {
// Since message count is max(#parse, #serialization), return the max of
// access ratio of both parse and serialization.
return std::max(info_map_.AccessRatio(field, AccessInfoMap::kWrite),
info_map_.AccessRatio(field, AccessInfoMap::kRead));
}

cpp::Options options_;
AccessInfoMap info_map_;
SplitMap split_map_;
Expand Down Expand Up @@ -436,8 +447,12 @@ static absl::StatusOr<Stats> AnalyzeProfileProto(
stream << " " << TypeName(field) << " " << field->name() << ":";

if (options.print_analysis) {
if (analysis.presence != PDProtoScale::kDefault) {
stream << " " << analysis.presence << "_PRESENT";
if (analysis.presence != PDProtoScale::kDefault ||
options.print_analysis_all) {
stream << " " << analysis.presence << "_PRESENT"
<< absl::StrFormat(
"(%.2f%%)",
analysis.presence_probability * 100);
}
if (analysis.usage != PDProtoScale::kDefault) {
stream << " " << analysis.usage << "_USED("
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ struct AnalyzeProfileProtoOptions {
// true to include presence and usage info instead of only optimization info
bool print_analysis = false;

// true to include presence probability info
bool print_analysis_all = false;

// Descriptor pool to use. Must not be null.
const DescriptorPool* pool = nullptr;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@

ABSL_FLAG(bool, all, false, "Print all fields");
ABSL_FLAG(bool, analysis, false, "Print field analysis");
ABSL_FLAG(bool, analysis_all, false,
"Print detailed field analysis, such as field presence probability, "
"for all fields, not just hot or cold ones.");
ABSL_FLAG(std::string, message_filter, "", "Regex match for message name");
ABSL_FLAG(bool, aggregate_analysis, false,
"If set, will recursively find proto.profile in the given dir and "
Expand All @@ -45,6 +48,7 @@ int main(int argc, char* argv[]) {
if (!absl::GetFlag(FLAGS_aggregate_analysis)) {
options.print_all_fields = absl::GetFlag(FLAGS_all);
options.print_analysis = absl::GetFlag(FLAGS_analysis);
options.print_analysis_all = absl::GetFlag(FLAGS_analysis_all);
options.message_filter = absl::GetFlag(FLAGS_message_filter);
status = AnalyzeProfileProtoToText(std::cout, argv[1], options);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,9 +186,9 @@ TEST(AnalyzeProfileProtoTest, PrintStatistics) {
R"(Message google::protobuf::compiler::tools::AnalyzeThis
int32 id: RARELY_USED(100)
string optional_string: RARELY_USED(100)
string[] repeated_string: LIKELY_PRESENT RARELY_USED(100)
AnalyzeChild optional_child: LIKELY_PRESENT RARELY_USED(1) LAZY
AnalyzeChild[] repeated_child: LIKELY_PRESENT RARELY_USED(100)
string[] repeated_string: LIKELY_PRESENT(100.00%) RARELY_USED(100)
AnalyzeChild optional_child: LIKELY_PRESENT(100.00%) RARELY_USED(1) LAZY
AnalyzeChild[] repeated_child: LIKELY_PRESENT(100.00%) RARELY_USED(100)
Nested nested: RARELY_USED(100)
========
singular_lazy_num=1
Expand All @@ -209,6 +209,53 @@ repeated_lazy_pcount/repeated_total_pcount=0
)");
}

TEST(AnalyzeProfileProtoTest, PrintStatisticsAll) {
AccessInfo info = ParseTextOrDie(R"pb(
language: "cpp"
message {
name: "google::protobuf::compiler::tools::AnalyzeThis"
count: 100
field { name: "id" getters_count: 1 configs_count: 100 }
field { name: "optional_string" getters_count: 1 configs_count: 100 }
field { name: "optional_child" getters_count: 100 configs_count: 1 }
field { name: "repeated_string" getters_count: 100 configs_count: 100 }
field { name: "repeated_child" getters_count: 100 configs_count: 100 }
field { name: "nested" getters_count: 1 configs_count: 100 }
}
)pb");
AnalyzeProfileProtoOptions options;
options.print_unused_threshold = false;
options.print_optimized = false;
options.print_analysis = true;
options.print_analysis_all = true;
options.pool = DescriptorPool::generated_pool();
EXPECT_STREQ(AnalyzeToText(info, options).c_str(),
R"(Message google::protobuf::compiler::tools::AnalyzeThis
int32 id: DEFAULT_PRESENT(1.00%) RARELY_USED(100)
string optional_string: DEFAULT_PRESENT(1.00%) RARELY_USED(100)
string[] repeated_string: LIKELY_PRESENT(100.00%) RARELY_USED(100)
AnalyzeChild optional_child: LIKELY_PRESENT(100.00%) RARELY_USED(1) LAZY
AnalyzeChild[] repeated_child: LIKELY_PRESENT(100.00%) RARELY_USED(100)
Nested nested: DEFAULT_PRESENT(1.00%) RARELY_USED(100)
========
singular_lazy_num=1
singular_lazy_0usage_num=0
repeated_lazy_num=0
singular_total_pcount=101
repeated_total_pcount=100
singular_lazy_pcount=100
singular_lazy_0usage_pcount=0
repeated_lazy_pcount=0
max_pcount=100
max_ucount=100
repeated_lazy_num/singular_lazy_num=0
repeated_lazy_pcount/singular_lazy_pcount=0
singular_lazy_pcount/singular_total_pcount=0.990099
singular_lazy_0usage_pcount/singular_total_pcount=0
repeated_lazy_pcount/repeated_total_pcount=0
)");
}

} // namespace
} // namespace tools
} // namespace compiler
Expand Down

0 comments on commit d4ba7ff

Please sign in to comment.