Skip to content

Commit

Permalink
Merge branch 'develop' into wip/scaleAllDevelop
Browse files Browse the repository at this point in the history
  • Loading branch information
lukeseawalker authored Aug 22, 2023
2 parents a48a911 + 0c77fc9 commit ac8f243
Showing 1 changed file with 7 additions and 3 deletions.
10 changes: 7 additions & 3 deletions src/slurm_plugin/cluster_event_publisher.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,14 @@
}


NODE_LAUNCH_FAILURE_COUNT = {
STATIC_NODE_LAUNCH_FAILURE_COUNT = {
"message": "Number of static nodes that failed to launch a backing instance after node maintenance",
"event_type": "node-launch-failure-count",
}
DYNAMIC_NODE_LAUNCH_FAILURE_COUNT = {
"message": "Number of dynamic nodes that failed to launch a backing instance",
"event_type": "node-launch-failure-count",
}
NODE_LAUNCH_FAILURE = {
"message": "After node maintenance, node failed to launch a backing instance",
"event_type": "node-launch-failure",
Expand Down Expand Up @@ -192,7 +196,7 @@ def publish_unhealthy_static_node_events(
for count, error_detail in self._generate_launch_failure_details(failed_nodes):
self.publish_event(
logging.WARNING if count else logging.DEBUG,
**NODE_LAUNCH_FAILURE_COUNT,
**STATIC_NODE_LAUNCH_FAILURE_COUNT,
timestamp=timestamp,
detail=error_detail,
)
Expand Down Expand Up @@ -497,7 +501,7 @@ def publish_node_launch_events(self, failed_nodes: Dict[str, List[str]]):
for count, error_detail in self._generate_launch_failure_details(failed_nodes):
self.publish_event(
logging.WARNING if count else logging.DEBUG,
**NODE_LAUNCH_FAILURE_COUNT,
**DYNAMIC_NODE_LAUNCH_FAILURE_COUNT,
timestamp=timestamp,
detail=error_detail,
)
Expand Down

0 comments on commit ac8f243

Please sign in to comment.