This repository has been archived by the owner on Dec 16, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 88
/
guarddog_impl.h
151 lines (133 loc) · 5.42 KB
/
guarddog_impl.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#pragma once
#include <chrono>
#include <vector>
#include "envoy/api/api.h"
#include "envoy/config/bootstrap/v3/bootstrap.pb.h"
#include "envoy/event/timer.h"
#include "envoy/server/configuration.h"
#include "envoy/server/guarddog.h"
#include "envoy/server/guarddog_config.h"
#include "envoy/server/watchdog.h"
#include "envoy/stats/scope.h"
#include "envoy/stats/stats.h"
#include "common/common/lock_guard.h"
#include "common/common/logger.h"
#include "common/common/thread.h"
#include "common/event/libevent.h"
#include "absl/types/optional.h"
namespace Envoy {
namespace Server {
/**
* This feature performs deadlock detection stats collection & enforcement.
*
* It launches a thread that scans at an interval the minimum of the configured
* intervals. If it finds starved threads or suspected deadlocks it will take
* the appropriate action depending on the config parameters described below.
*
* Thread lifetime is tied to GuardDog object lifetime (RAII style).
*/
class GuardDogImpl : public GuardDog {
public:
/**
* Defines a test interlock hook to enable tests to synchronize the guard-dog
* execution so they can probe current counter values. The default
* implementation that runs in production has empty methods, which are
* overridden in the implementation used during tests.
*/
class TestInterlockHook {
public:
virtual ~TestInterlockHook() = default;
/**
* Called from GuardDogImpl to indicate that it has evaluated all watch-dogs
* up to a particular point in time.
*/
virtual void signalFromImpl(MonotonicTime) {}
/**
* Called from GuardDog tests to block until the implementation has reached
* the desired point in time.
*/
virtual void waitFromTest(Thread::MutexBasicLockable&, MonotonicTime) {}
};
/**
* @param stats_scope Statistics scope to write watchdog_miss and
* watchdog_mega_miss events into.
* @param config Configuration object.
* @param api API object.
* @param test_interlock a hook for enabling interlock with unit tests.
*
* See the configuration documentation for details on the timeout settings.
*/
GuardDogImpl(Stats::Scope& stats_scope, const Server::Configuration::Watchdog& config,
Api::Api& api, absl::string_view name,
std::unique_ptr<TestInterlockHook>&& test_interlock);
GuardDogImpl(Stats::Scope& stats_scope, const Server::Configuration::Watchdog& config,
Api::Api& api, absl::string_view name);
~GuardDogImpl() override;
/**
* Exposed for testing purposes only (but harmless to call):
*/
const std::chrono::milliseconds loopIntervalForTest() const { return loop_interval_; }
/**
* Test hook to force a step() to catch up with the current simulated
* time. This is inlined so that it does not need to be present in the
* production binary.
*/
void forceCheckForTest() {
Thread::LockGuard guard(mutex_);
MonotonicTime now = time_source_.monotonicTime();
loop_timer_->enableTimer(std::chrono::milliseconds(0));
test_interlock_hook_->waitFromTest(mutex_, now);
}
// Server::GuardDog
WatchDogSharedPtr createWatchDog(Thread::ThreadId thread_id,
const std::string& thread_name) override;
void stopWatching(WatchDogSharedPtr wd) override;
private:
void start(Api::Api& api);
void step();
void stop();
// Per the C++ standard it is OK to use these in ctor initializer as long as
// it is after kill and multikill timeout values are initialized.
bool killEnabled() const { return kill_timeout_ > std::chrono::milliseconds(0); }
bool multikillEnabled() const { return multi_kill_timeout_ > std::chrono::milliseconds(0); }
using WatchDogAction = envoy::config::bootstrap::v3::Watchdog::WatchdogAction;
// Helper function to invoke all the GuardDogActions registered for an Event.
void invokeGuardDogActions(
WatchDogAction::WatchdogEvent event,
std::vector<std::pair<Thread::ThreadId, MonotonicTime>> thread_last_checkin_pairs,
MonotonicTime now);
struct WatchedDog {
WatchedDog(Stats::Scope& stats_scope, const std::string& thread_name,
const WatchDogSharedPtr& watch_dog);
const WatchDogSharedPtr dog_;
absl::optional<MonotonicTime> last_alert_time_;
bool miss_alerted_{};
bool megamiss_alerted_{};
Stats::Counter& miss_counter_;
Stats::Counter& megamiss_counter_;
};
using WatchedDogPtr = std::unique_ptr<WatchedDog>;
std::unique_ptr<TestInterlockHook> test_interlock_hook_;
Stats::Scope& stats_scope_;
TimeSource& time_source_;
const std::chrono::milliseconds miss_timeout_;
const std::chrono::milliseconds megamiss_timeout_;
const std::chrono::milliseconds kill_timeout_;
const std::chrono::milliseconds multi_kill_timeout_;
const double multi_kill_fraction_;
const std::chrono::milliseconds loop_interval_;
Stats::Counter& watchdog_miss_counter_;
Stats::Counter& watchdog_megamiss_counter_;
std::vector<WatchedDogPtr> watched_dogs_ ABSL_GUARDED_BY(wd_lock_);
Thread::MutexBasicLockable wd_lock_;
Thread::ThreadPtr thread_;
Event::DispatcherPtr dispatcher_;
Event::TimerPtr loop_timer_;
using EventToActionsMap = absl::flat_hash_map<WatchDogAction::WatchdogEvent,
std::vector<Configuration::GuardDogActionPtr>>;
EventToActionsMap events_to_actions_;
Thread::MutexBasicLockable mutex_;
bool run_thread_ ABSL_GUARDED_BY(mutex_);
};
} // namespace Server
} // namespace Envoy