Skip to content

Commit

Permalink
aggregate error info
Browse files Browse the repository at this point in the history
  • Loading branch information
williampsmith committed Nov 1, 2024
1 parent 7d55405 commit 1764907
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 27 deletions.
7 changes: 5 additions & 2 deletions crates/sui-core/src/authority_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1296,10 +1296,13 @@ impl ValidatorService {
traffic_controller.tally(TrafficTally {
direct: client,
through_fullnode: None,
error_weight: error.clone().map(normalize).unwrap_or(Weight::zero()),
error_info: error.map(|e| {
let error_type = String::from(e.clone().as_ref());
let error_weight = normalize(e);
(error_weight, error_type)
}),
spam_weight,
timestamp: SystemTime::now(),
error_type: error.map(|e| e.to_string()),
})
}
unwrapped_response
Expand Down
6 changes: 3 additions & 3 deletions crates/sui-core/src/traffic_controller/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ pub struct TrafficControllerMetrics {
pub num_dry_run_blocked_requests: IntCounter,
pub tally_handled: IntCounter,
pub error_tally_handled: IntCounter,
pub tally_errors: IntCounterVec,
pub tally_error_types: IntCounterVec,
pub deadmans_switch_enabled: IntGauge,
pub highest_direct_spam_rate: IntGauge,
pub highest_proxied_spam_rate: IntGauge,
Expand Down Expand Up @@ -91,8 +91,8 @@ impl TrafficControllerMetrics {
registry
)
.unwrap(),
tally_errors: register_int_counter_vec_with_registry!(
"traffic_control_tally_errors",
tally_error_types: register_int_counter_vec_with_registry!(
"traffic_control_tally_error_types",
"Number of tally errors, grouped by error type",
&["error_type"],
registry
Expand Down
20 changes: 12 additions & 8 deletions crates/sui-core/src/traffic_controller/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -425,15 +425,20 @@ async fn handle_error_tally(
metrics: Arc<TrafficControllerMetrics>,
mem_drainfile_present: bool,
) -> Result<(), reqwest::Error> {
if !tally.error_weight.is_sampled() {
let error_weight = if let Some((error_weight, error_type)) = tally.clone().error_info {
metrics
.tally_error_types
.with_label_values(&[error_type.as_str()])
.inc();
error_weight
} else {
return Ok(());
};
if !error_weight.is_sampled() {
return Ok(());
}
let resp = policy.handle_tally(tally.clone());
let resp = policy.handle_tally(tally);
metrics.error_tally_handled.inc();
metrics
.tally_errors
.with_label_values(&[tally.error_type.as_deref().unwrap_or("unknown")])
.inc();
if let Some(fw_config) = fw_config {
if fw_config.delegate_error_blocking && !mem_drainfile_present {
let client = nodefw_client
Expand Down Expand Up @@ -750,9 +755,8 @@ impl TrafficSim {
client,
// TODO add proxy IP for testing
None,
None,
// TODO add weight adjustments
Weight::one(),
None,
Weight::one(),
));
} else {
Expand Down
18 changes: 6 additions & 12 deletions crates/sui-core/src/traffic_controller/policies.rs
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,7 @@ impl TrafficSketch {
pub struct TrafficTally {
pub direct: Option<IpAddr>,
pub through_fullnode: Option<IpAddr>,
pub error_weight: Weight,
pub error_type: Option<String>,
pub error_info: Option<(Weight, String)>,
pub spam_weight: Weight,
pub timestamp: SystemTime,
}
Expand All @@ -232,15 +231,13 @@ impl TrafficTally {
pub fn new(
direct: Option<IpAddr>,
through_fullnode: Option<IpAddr>,
error_type: Option<String>,
error_weight: Weight,
error_info: Option<(Weight, String)>,
spam_weight: Weight,
) -> Self {
Self {
direct,
through_fullnode,
error_weight,
error_type,
error_info,
spam_weight,
timestamp: SystemTime::now(),
}
Expand Down Expand Up @@ -518,24 +515,21 @@ mod tests {
let alice = TrafficTally {
direct: Some(IpAddr::V4(Ipv4Addr::new(8, 7, 6, 5))),
through_fullnode: Some(IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4))),
error_weight: Weight::zero(),
error_type: None,
error_info: None,
spam_weight: Weight::one(),
timestamp: SystemTime::now(),
};
let bob = TrafficTally {
direct: Some(IpAddr::V4(Ipv4Addr::new(8, 7, 6, 5))),
through_fullnode: Some(IpAddr::V4(Ipv4Addr::new(4, 3, 2, 1))),
error_weight: Weight::zero(),
error_type: None,
error_info: None,
spam_weight: Weight::one(),
timestamp: SystemTime::now(),
};
let charlie = TrafficTally {
direct: Some(IpAddr::V4(Ipv4Addr::new(8, 7, 6, 5))),
through_fullnode: Some(IpAddr::V4(Ipv4Addr::new(5, 6, 7, 8))),
error_weight: Weight::zero(),
error_type: None,
error_info: None,
spam_weight: Weight::one(),
timestamp: SystemTime::now(),
};
Expand Down
7 changes: 5 additions & 2 deletions crates/sui-json-rpc/src/axum_router.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,11 @@ fn handle_traffic_resp(
traffic_controller.tally(TrafficTally {
direct: client,
through_fullnode: None,
error_weight: error.clone().map(normalize).unwrap_or(Weight::zero()),
error_info: error.map(|e| {
let error_type = e.to_string();
let error_weight = normalize(e);
(error_weight, error_type)
}),
// For now, count everything as spam with equal weight
// on the rpc node side, including gas-charging endpoints
// such as `sui_executeTransactionBlock`, as this can enable
Expand All @@ -262,7 +266,6 @@ fn handle_traffic_resp(
// to provide a weight distribution based on the method being called.
spam_weight: Weight::one(),
timestamp: SystemTime::now(),
error_type: error.map(|e| e.to_string()),
});
}

Expand Down

0 comments on commit 1764907

Please sign in to comment.