From 448d343d4f28aa2105b3df2a0788ef499a6609fa Mon Sep 17 00:00:00 2001 From: Marcos Bento Date: Mon, 2 Oct 2023 11:29:11 +0100 Subject: [PATCH] ecflow_server: correct default status log optimization These changes correct the optimisation/inhibition of unnecessarily logging the node state change, applicable to the descendants of a node with default status COMPLETE. Without these changes, the controlling flag was update+used and, because it was passed as reference, later incorrectly applied to siblings nodes. To avoid similar issues in the future, all Requeue_args data members not currently updated in the scope of handling requeue are made constant. Re ECFLOW-1914 --- ANode/src/Node.hpp | 10 +++++----- ANode/src/NodeContainer.cpp | 14 ++++++-------- Base/src/AbstractServer.hpp | 2 +- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/ANode/src/Node.hpp b/ANode/src/Node.hpp index 719eca339..fc48624c8 100644 --- a/ANode/src/Node.hpp +++ b/ANode/src/Node.hpp @@ -187,12 +187,12 @@ class Node : public std::enable_shared_from_this { reset_next_time_slot_(reset_next_time_slot), reset_relative_duration_(reset_relative_duration), log_state_changes_(log_state_changes) {} - Requeue_t requeue_t{FULL}; + const Requeue_t requeue_t{FULL}; int clear_suspended_in_child_nodes_{0}; - bool resetRepeats_{true}; - bool reset_next_time_slot_{true}; - bool reset_relative_duration_{true}; - bool log_state_changes_{true}; + const bool resetRepeats_{true}; + const bool reset_next_time_slot_{true}; + const bool reset_relative_duration_{true}; + const bool log_state_changes_{true}; }; virtual void requeue(Requeue_args&); diff --git a/ANode/src/NodeContainer.cpp b/ANode/src/NodeContainer.cpp index b2f76cc3e..2dfbf0f45 100644 --- a/ANode/src/NodeContainer.cpp +++ b/ANode/src/NodeContainer.cpp @@ -140,20 +140,18 @@ void NodeContainer::requeue(Requeue_args& args) { if (args.clear_suspended_in_child_nodes_ >= 0) args.clear_suspended_in_child_nodes_++; - // If the defstatus is complete, don't bother logging the state change in the re-queue - // When we have several thousands children (as in operations), we will end up - // changing state to queue, then again changing it back to complete. - // To avoid this problem we don't bother logging state change for re-queue - // See: ECFLOW-1239 - if (d_st_ == DState::COMPLETE) - args.log_state_changes_ = false; + // If the node `default status` == COMPLETE, we don't log the change of state when re-queueing the descendants. + // Without this optimization, all children would change state to QUEUED, and eventually change back to COMPLETE. + // This avoids flooding the log with unnecessary messages regarding several thousand children + // (e.g. as it happens in operations). See ECFLOW-1239 for details. + bool log_state_changes_descendents = (d_st_ != DState::COMPLETE); Node::Requeue_args largs(args.requeue_t, true /* reset repeats, Moot for tasks */, args.clear_suspended_in_child_nodes_, args.reset_next_time_slot_, true /* reset relative duration */, - args.log_state_changes_); + log_state_changes_descendents); for (const auto& n : nodes_) { n->requeue(largs); diff --git a/Base/src/AbstractServer.hpp b/Base/src/AbstractServer.hpp index cc83cd328..321c4ac94 100644 --- a/Base/src/AbstractServer.hpp +++ b/Base/src/AbstractServer.hpp @@ -178,7 +178,7 @@ class AbstractServer { // Instead of immediate node tree traversal at the end of child command // we use 'increment_job_generation_count' to defer job generation to server - // The server will will check job generation count at poll time. + // The server will check job generation count at poll time. // This approach radically reduces the number of times we traverse the node tree // and hence improves server throughput. void increment_job_generation_count() { job_gen_count_++; }