diff --git a/src/slurm_plugin/cluster_event_publisher.py b/src/slurm_plugin/cluster_event_publisher.py index bfe640296..87519cfbe 100644 --- a/src/slurm_plugin/cluster_event_publisher.py +++ b/src/slurm_plugin/cluster_event_publisher.py @@ -36,10 +36,14 @@ } -NODE_LAUNCH_FAILURE_COUNT = { +STATIC_NODE_LAUNCH_FAILURE_COUNT = { "message": "Number of static nodes that failed to launch a backing instance after node maintenance", "event_type": "node-launch-failure-count", } +DYNAMIC_NODE_LAUNCH_FAILURE_COUNT = { + "message": "Number of dynamic nodes that failed to launch a backing instance", + "event_type": "node-launch-failure-count", +} NODE_LAUNCH_FAILURE = { "message": "After node maintenance, node failed to launch a backing instance", "event_type": "node-launch-failure", @@ -192,7 +196,7 @@ def publish_unhealthy_static_node_events( for count, error_detail in self._generate_launch_failure_details(failed_nodes): self.publish_event( logging.WARNING if count else logging.DEBUG, - **NODE_LAUNCH_FAILURE_COUNT, + **STATIC_NODE_LAUNCH_FAILURE_COUNT, timestamp=timestamp, detail=error_detail, ) @@ -497,7 +501,7 @@ def publish_node_launch_events(self, failed_nodes: Dict[str, List[str]]): for count, error_detail in self._generate_launch_failure_details(failed_nodes): self.publish_event( logging.WARNING if count else logging.DEBUG, - **NODE_LAUNCH_FAILURE_COUNT, + **DYNAMIC_NODE_LAUNCH_FAILURE_COUNT, timestamp=timestamp, detail=error_detail, )