Skip to content

Commit

Permalink
Merge pull request #24225 from vespa-engine/vekterli/remove-deprecate…
Browse files Browse the repository at this point in the history
…d-init-state-handling

Remove StateManager handling of deprecated Initializing state [run-systemtest]
  • Loading branch information
baldersheim authored Sep 26, 2022
2 parents 1403d25 + 02db592 commit 5a805b9
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 58 deletions.
6 changes: 3 additions & 3 deletions storage/src/tests/storageserver/statemanagertest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,9 @@ TEST_F(StateManagerTest, reported_node_state) {
// Add a state listener to check that we get events.
MyStateListener stateListener(*_manager);
_manager->addStateListener(stateListener);
// Test that initial state is initializing
// Test that initial state is Down
auto nodeState = _manager->getReportedNodeState();
EXPECT_EQ("s:i b:58 i:0 t:1", nodeState->toString(false));
EXPECT_EQ("s:d b:58 t:1", nodeState->toString(false));
// Test that it works to update the state
{
auto lock = _manager->grabStateChangeLock();
Expand Down Expand Up @@ -236,7 +236,7 @@ TEST_F(StateManagerTest, reported_node_state) {
_manager->setReportedNodeState(ns);
}
std::string expectedEvents =
"s:i b:58 i:0 t:1 -> s:u b:58 t:1\n"
"s:d b:58 t:1 -> s:u b:58 t:1\n"
"s:u b:58 t:1 -> s:s b:58 t:1 m:Stopping\\x20node\n";
EXPECT_EQ(expectedEvents, stateListener.ost.str());
}
Expand Down
35 changes: 5 additions & 30 deletions storage/src/vespa/storage/storageserver/statemanager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,14 @@ StateManager::StateManager(StorageComponentRegister& compReg,
_stateLock(),
_stateCond(),
_listenerLock(),
_nodeState(std::make_shared<lib::NodeState>(_component.getNodeType(), lib::State::INITIALIZING)),
_nodeState(std::make_shared<lib::NodeState>(_component.getNodeType(), lib::State::DOWN)),
_nextNodeState(),
_systemState(std::make_shared<const ClusterStateBundle>(lib::ClusterState())),
_nextSystemState(),
_reported_host_info_cluster_state_version(0),
_stateListeners(),
_queuedStateRequests(),
_threadLock(),
_lastProgressUpdateCausingSend(0),
_progressLastInitStateSend(-1),
_systemStateHistory(),
_systemStateHistorySize(50),
_hostInfo(std::move(hostInfo)),
Expand Down Expand Up @@ -238,31 +236,8 @@ StateManager::notifyStateListeners()
break; // No change
}
if (_nextNodeState) {
assert(!(_nodeState->getState() == State::UP
&& _nextNodeState->getState() == State::INITIALIZING));

if (_nodeState->getState() == State::INITIALIZING
&& _nextNodeState->getState() == State::INITIALIZING
&& ((_component.getClock().getTimeInMillis() - _lastProgressUpdateCausingSend)
< framework::MilliSecTime(1000))
&& _nextNodeState->getInitProgress() < 1
&& (_nextNodeState->getInitProgress() - _progressLastInitStateSend) < 0.01)
{
// For this special case, where we only have gotten a little
// initialization progress and we have reported recently,
// don't trigger sending get node state reply yet.
} else {
newState = _nextNodeState;
if (!_queuedStateRequests.empty()
&& _nextNodeState->getState() == State::INITIALIZING)
{
_lastProgressUpdateCausingSend = _component.getClock().getTimeInMillis();
_progressLastInitStateSend = newState->getInitProgress();
} else {
_lastProgressUpdateCausingSend = framework::MilliSecTime(0);
_progressLastInitStateSend = -1;
}
}
assert(_nextNodeState->getState() != State::INITIALIZING);
newState = _nextNodeState;
_nodeState = _nextNodeState;
_nextNodeState.reset();
}
Expand Down Expand Up @@ -414,14 +389,14 @@ StateManager::onGetNodeState(const api::GetNodeStateCommand::SP& cmd)
"%" PRId64 " milliseconds unless a node state change "
"happens before that time.",
msTimeout, msTimeout * 800 / 1000);
TimeStatePair pair(
TimeStateCmdPair pair(
_component.getClock().getTimeInMillis()
+ framework::MilliSecTime(msTimeout * 800 / 1000),
cmd);
_queuedStateRequests.emplace_back(std::move(pair));
} else {
LOG(debug, "Answered get node state request right away since it "
"thought we were in nodestate %s, while our actual "
"thought we were in node state %s, while our actual "
"node state is currently %s and we didn't just reply to "
"existing request.",
cmd->getExpectedState() == nullptr ? "unknown"
Expand Down
49 changes: 24 additions & 25 deletions storage/src/vespa/storage/storageserver/statemanager.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,36 +41,35 @@ class StateManager : public NodeStateUpdater,
private framework::Runnable,
private vespalib::JsonStreamTypes
{
StorageComponent _component;
metrics::MetricManager& _metricManager;
mutable std::mutex _stateLock;
std::condition_variable _stateCond;
std::mutex _listenerLock;
std::shared_ptr<lib::NodeState> _nodeState;
std::shared_ptr<lib::NodeState> _nextNodeState;
using ClusterStateBundle = lib::ClusterStateBundle;
using TimeStateCmdPair = std::pair<framework::MilliSecTime, api::GetNodeStateCommand::SP>;
using TimeSysStatePair = std::pair<framework::MilliSecTime, std::shared_ptr<const ClusterStateBundle>>;

StorageComponent _component;
metrics::MetricManager& _metricManager;
mutable std::mutex _stateLock;
std::condition_variable _stateCond;
std::mutex _listenerLock;
std::shared_ptr<lib::NodeState> _nodeState;
std::shared_ptr<lib::NodeState> _nextNodeState;
std::shared_ptr<const ClusterStateBundle> _systemState;
std::shared_ptr<const ClusterStateBundle> _nextSystemState;
uint32_t _reported_host_info_cluster_state_version;
std::list<StateListener*> _stateListeners;
typedef std::pair<framework::MilliSecTime, api::GetNodeStateCommand::SP> TimeStatePair;
std::list<TimeStatePair> _queuedStateRequests;
mutable std::mutex _threadLock;
std::condition_variable _threadCond;
framework::MilliSecTime _lastProgressUpdateCausingSend;
vespalib::Double _progressLastInitStateSend;
using TimeSysStatePair = std::pair<framework::MilliSecTime, std::shared_ptr<const ClusterStateBundle>>;
std::deque<TimeSysStatePair> _systemStateHistory;
uint32_t _systemStateHistorySize;
std::unique_ptr<HostInfo> _hostInfo;
framework::Thread::UP _thread;
uint32_t _reported_host_info_cluster_state_version;
std::list<StateListener*> _stateListeners;
std::list<TimeStateCmdPair> _queuedStateRequests;
mutable std::mutex _threadLock;
std::condition_variable _threadCond;
std::deque<TimeSysStatePair> _systemStateHistory;
uint32_t _systemStateHistorySize;
std::unique_ptr<HostInfo> _hostInfo;
framework::Thread::UP _thread;
// Controllers that have observed a GetNodeState response sent _after_
// immediately_send_get_node_state_replies() has been invoked.
std::unordered_set<uint16_t> _controllers_observed_explicit_node_state;
bool _noThreadTestMode;
bool _grabbedExternalLock;
std::atomic<bool> _notifyingListeners;
std::atomic<bool> _requested_almost_immediate_node_state_replies;
std::unordered_set<uint16_t> _controllers_observed_explicit_node_state;
bool _noThreadTestMode;
bool _grabbedExternalLock;
std::atomic<bool> _notifyingListeners;
std::atomic<bool> _requested_almost_immediate_node_state_replies;

public:
explicit StateManager(StorageComponentRegister&, metrics::MetricManager&,
Expand Down

0 comments on commit 5a805b9

Please sign in to comment.