From 448d343d4f28aa2105b3df2a0788ef499a6609fa Mon Sep 17 00:00:00 2001
From: Marcos Bento <Marcos.Bento@ecmwf.int>
Date: Mon, 2 Oct 2023 11:29:11 +0100
Subject: [PATCH] ecflow_server: correct default status log optimization

These changes correct the optimisation/inhibition of unnecessarily logging the node state change, applicable to the descendants of a node with default status COMPLETE.

Without these changes, the controlling flag was update+used and, because it was passed as reference, later incorrectly applied to siblings nodes.

To avoid similar issues in the future, all Requeue_args data members not currently updated in the scope of handling requeue are made constant.

Re ECFLOW-1914
---
 ANode/src/Node.hpp          | 10 +++++-----
 ANode/src/NodeContainer.cpp | 14 ++++++--------
 Base/src/AbstractServer.hpp |  2 +-
 3 files changed, 12 insertions(+), 14 deletions(-)
diff --git a/ANode/src/Node.hpp b/ANode/src/Node.hpp
index 719eca339..fc48624c8 100644
--- a/ANode/src/Node.hpp
+++ b/ANode/src/Node.hpp
@@ -187,12 +187,12 @@ class Node : public std::enable_shared_from_this<Node> {
               reset_next_time_slot_(reset_next_time_slot),
               reset_relative_duration_(reset_relative_duration),
               log_state_changes_(log_state_changes) {}
-        Requeue_t requeue_t{FULL};
+        const Requeue_t requeue_t{FULL};
         int clear_suspended_in_child_nodes_{0};
-        bool resetRepeats_{true};
-        bool reset_next_time_slot_{true};
-        bool reset_relative_duration_{true};
-        bool log_state_changes_{true};
+        const bool resetRepeats_{true};
+        const bool reset_next_time_slot_{true};
+        const bool reset_relative_duration_{true};
+        const bool log_state_changes_{true};
     };
     virtual void requeue(Requeue_args&);
 
diff --git a/ANode/src/NodeContainer.cpp b/ANode/src/NodeContainer.cpp
index b2f76cc3e..2dfbf0f45 100644
--- a/ANode/src/NodeContainer.cpp
+++ b/ANode/src/NodeContainer.cpp
@@ -140,20 +140,18 @@ void NodeContainer::requeue(Requeue_args& args) {
     if (args.clear_suspended_in_child_nodes_ >= 0)
         args.clear_suspended_in_child_nodes_++;
 
-    // If the defstatus is complete, don't bother logging the state change in the re-queue
-    // When we have several thousands children (as in operations), we will end up
-    // changing state to queue, then again changing it back to complete.
-    // To avoid this problem we don't bother logging state change for re-queue
-    // See: ECFLOW-1239
-    if (d_st_ == DState::COMPLETE)
-        args.log_state_changes_ = false;
+    // If the node `default status` == COMPLETE, we don't log the change of state when re-queueing the descendants.
+    // Without this optimization, all children would change state to QUEUED, and eventually change back to COMPLETE.
+    // This avoids flooding the log with unnecessary messages regarding several thousand children
+    // (e.g. as it happens in operations). See ECFLOW-1239 for details.
+    bool log_state_changes_descendents = (d_st_ != DState::COMPLETE);
 
     Node::Requeue_args largs(args.requeue_t,
                              true /* reset repeats, Moot for tasks */,
                              args.clear_suspended_in_child_nodes_,
                              args.reset_next_time_slot_,
                              true /* reset relative duration */,
-                             args.log_state_changes_);
+                             log_state_changes_descendents);
 
     for (const auto& n : nodes_) {
         n->requeue(largs);
diff --git a/Base/src/AbstractServer.hpp b/Base/src/AbstractServer.hpp
index cc83cd328..321c4ac94 100644
--- a/Base/src/AbstractServer.hpp
+++ b/Base/src/AbstractServer.hpp
@@ -178,7 +178,7 @@ class AbstractServer {
 
     // Instead of immediate node tree traversal at the end of child command
     // we use 'increment_job_generation_count' to defer job generation to server
-    // The server will will check job generation count at poll time.
+    // The server will check job generation count at poll time.
     // This approach radically reduces the number of times we traverse the node tree
     // and hence improves server throughput.
     void increment_job_generation_count() { job_gen_count_++; }