From d1b802bc534bee3be0901545589ad90a4ef3f58e Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 6 Sep 2022 11:22:58 -0400 Subject: [PATCH] Heartbeat interval: Make it configurable (#2243) When I was looking at a separate timer for async timeout evaluation < 1000ms fidelity it became apparent most of the cost in a heartbeat is the timeout evaluation anyway, but also: if you are chasing a lower timeout fidelity you likely want to observe a server outage ASAP as well so I think it makes more sense to actually open up the heartbeat to make it configurable. I added some docs about how this is risky but that portion and comments could use scrutiny (if we even agree with doing this) on being strong enough warnings around this. I would prefer to leave this as code-only due to potential downsides people may not appreciate otherwise. --- docs/Configuration.md | 5 +- docs/ReleaseNotes.md | 1 + .../Configuration/DefaultOptionsProvider.cs | 9 +++ .../ConfigurationOptions.cs | 20 +++++++ .../ConnectionMultiplexer.cs | 7 +-- src/StackExchange.Redis/PhysicalBridge.cs | 2 +- .../PublicAPI/PublicAPI.Shipped.txt | 3 + .../CommandTimeouts.cs | 57 +++++++++++++++++++ 8 files changed, 98 insertions(+), 6 deletions(-) create mode 100644 tests/StackExchange.Redis.Tests/CommandTimeouts.cs diff --git a/docs/Configuration.md b/docs/Configuration.md index 5d4590383..cbe20001b 100644 --- a/docs/Configuration.md +++ b/docs/Configuration.md @@ -106,11 +106,14 @@ Additional code-only options: - BeforeSocketConnect - Default: `null` - Allows modifying a `Socket` before connecting (for advanced scenarios) - SslClientAuthenticationOptions (`netcooreapp3.1`/`net5.0` and higher) - Default: `null` - - Allows specifying exact options for SSL/TLS authentication against a server (e.g. cipher suites, protocols, etc.) - overrides all other SSL configuration options. This is a `Func` which receiveces the host (or `SslHost` if set) to get the options for. If `null` is returned from the `Func`, it's the same as this property not being set at all when connecting. + - Allows specifying exact options for SSL/TLS authentication against a server (e.g. cipher suites, protocols, etc.) - overrides all other SSL configuration options. This is a `Func` which receives the host (or `SslHost` if set) to get the options for. If `null` is returned from the `Func`, it's the same as this property not being set at all when connecting. - SocketManager - Default: `SocketManager.Shared`: - The thread pool to use for scheduling work to and from the socket connected to Redis, one of... - `SocketManager.Shared`: Use a shared dedicated thread pool for _all_ multiplexers (defaults to 10 threads) - best balance for most scenarios. - `SocketManager.ThreadPool`: Use the build-in .NET thread pool for scheduling. This can perform better for very small numbers of cores or with large apps on large machines that need to use more than 10 threads (total, across all multiplexers) under load. **Important**: this option isn't the default because it's subject to thread pool growth/starvation and if for example synchronous calls are waiting on a redis command to come back to unblock other threads, stalls/hangs can result. Use with caution, especially if you have sync-over-async work in play. +- HeartbeatInterval - Default: `1000ms` + - Allows running the heartbeat more often which importantly includes timeout evaluation. For example if you have a 50ms command timeout, we're only actually checking it during the heartbeat (once per second by default), so it's possible 50-1050ms pass _before we notice it timed out_. If you want more fidelity in that check and to observe that a server failed faster, you can lower this to run the heartbeat more often to achieve that. + - **Note: heartbeats are not free and that's why the default is 1 second. There is additional overhead to running this more often simply because it does some work each time it fires.** Tokens in the configuration string are comma-separated; any without an `=` sign are assumed to be redis server endpoints. Endpoints without an explicit port will use 6379 if ssl is not enabled, and 6380 if ssl is enabled. Tokens starting with `$` are taken to represent command maps, for example: `$config=cfg`. diff --git a/docs/ReleaseNotes.md b/docs/ReleaseNotes.md index 2e100a2f7..43e589327 100644 --- a/docs/ReleaseNotes.md +++ b/docs/ReleaseNotes.md @@ -11,6 +11,7 @@ - Fix [#1968](https://github.com/StackExchange/StackExchange.Redis/issues/1968): Improved handling of EVAL scripts during server restarts and failovers, detecting and re-sending the script for a retry when needed ([#2170 by martintmk](https://github.com/StackExchange/StackExchange.Redis/pull/2170)) - Adds: `ConfigurationOptions.SslClientAuthenticationOptions` (`netcoreapp3.1`/`net5.0`+ only) to give more control over SSL/TLS authentication ([#2224 by NickCraver](https://github.com/StackExchange/StackExchange.Redis/pull/2224)) - Fix [#2240](https://github.com/StackExchange/StackExchange.Redis/pull/2241): Improve support for DNS-based IPv6 endpoints ([#2241 by NickCraver](https://github.com/StackExchange/StackExchange.Redis/pull/2241)) +- Adds: `ConfigurationOptions.HeartbeatInterval` (**Advanced Setting** - [see docs](https://stackexchange.github.io/StackExchange.Redis/Configuration#configuration-options)) To allow more finite control of the client heartbeat, which encompases how often command timeouts are actually evaluated - still defaults to 1,000 ms ([#2243 by NickCraver](https://github.com/StackExchange/StackExchange.Redis/pull/2243)) - Fix [#1879](https://github.com/StackExchange/StackExchange.Redis/issues/1879): Improve exception message when the wrong password is used ([#2246 by NickCraver](https://github.com/StackExchange/StackExchange.Redis/pull/2246)) - Fix [#2233](https://github.com/StackExchange/StackExchange.Redis/issues/2233): Repeated connection to Sentinel servers using the same ConfigurationOptions would fail ([#2242 by NickCraver](https://github.com/StackExchange/StackExchange.Redis/pull/2242)) diff --git a/src/StackExchange.Redis/Configuration/DefaultOptionsProvider.cs b/src/StackExchange.Redis/Configuration/DefaultOptionsProvider.cs index 492dd7270..72b1d3997 100644 --- a/src/StackExchange.Redis/Configuration/DefaultOptionsProvider.cs +++ b/src/StackExchange.Redis/Configuration/DefaultOptionsProvider.cs @@ -114,6 +114,15 @@ public static void AddProvider(DefaultOptionsProvider provider) /// public virtual Version DefaultVersion => RedisFeatures.v3_0_0; + /// + /// Controls how often the connection heartbeats. A heartbeat includes: + /// - Evaluating if any messages have timed out + /// - Evaluating connection status (checking for failures) + /// - Sending a server message to keep the connection alive if needed + /// + /// Be aware setting this very low incurs additional overhead of evaluating the above more often. + public virtual TimeSpan HeartbeatInterval => TimeSpan.FromSeconds(1); + /// /// Should exceptions include identifiable details? (key names, additional .Data annotations) /// diff --git a/src/StackExchange.Redis/ConfigurationOptions.cs b/src/StackExchange.Redis/ConfigurationOptions.cs index 12e561c21..061ac8f3e 100644 --- a/src/StackExchange.Redis/ConfigurationOptions.cs +++ b/src/StackExchange.Redis/ConfigurationOptions.cs @@ -145,6 +145,8 @@ public static string TryNormalize(string value) private string? tieBreaker, sslHost, configChannel; + private TimeSpan? heartbeatInterval; + private CommandMap? commandMap; private Version? defaultVersion; @@ -366,6 +368,24 @@ public Version DefaultVersion /// public EndPointCollection EndPoints { get; init; } = new EndPointCollection(); + /// + /// Controls how often the connection heartbeats. A heartbeat includes: + /// - Evaluating if any messages have timed out + /// - Evaluating connection status (checking for failures) + /// - Sending a server message to keep the connection alive if needed + /// + /// + /// This defaults to 1000 milliseconds and should not be changed for most use cases. + /// If for example you want to evaluate whether commands have violated the at a lower fidelity + /// than 1000 milliseconds, you could lower this value. + /// Be aware setting this very low incurs additional overhead of evaluating the above more often. + /// + public TimeSpan HeartbeatInterval + { + get => heartbeatInterval ?? Defaults.HeartbeatInterval; + set => heartbeatInterval = value; + } + /// /// Use ThreadPriority.AboveNormal for SocketManager reader and writer threads (true by default). /// If , will be used. diff --git a/src/StackExchange.Redis/ConnectionMultiplexer.cs b/src/StackExchange.Redis/ConnectionMultiplexer.cs index 2be70f4bb..4d64fa7de 100644 --- a/src/StackExchange.Redis/ConnectionMultiplexer.cs +++ b/src/StackExchange.Redis/ConnectionMultiplexer.cs @@ -22,8 +22,6 @@ namespace StackExchange.Redis /// public sealed partial class ConnectionMultiplexer : IInternalConnectionMultiplexer // implies : IConnectionMultiplexer and : IDisposable { - internal const int MillisecondsPerHeartbeat = 1000; - // This gets accessed for every received event; let's make sure we can process it "raw" internal readonly byte[]? ConfigurationChangedChannel; // Unique identifier used when tracing @@ -812,7 +810,7 @@ internal EndPoint[] GetEndPoints() private sealed class TimerToken { - public TimerToken(ConnectionMultiplexer muxer) + private TimerToken(ConnectionMultiplexer muxer) { _ref = new WeakReference(muxer); } @@ -840,7 +838,8 @@ public TimerToken(ConnectionMultiplexer muxer) internal static IDisposable Create(ConnectionMultiplexer connection) { var token = new TimerToken(connection); - var timer = new Timer(Heartbeat, token, MillisecondsPerHeartbeat, MillisecondsPerHeartbeat); + var heartbeatMilliseconds = (int)connection.RawConfig.HeartbeatInterval.TotalMilliseconds; + var timer = new Timer(Heartbeat, token, heartbeatMilliseconds, heartbeatMilliseconds); token.SetTimer(timer); return timer; } diff --git a/src/StackExchange.Redis/PhysicalBridge.cs b/src/StackExchange.Redis/PhysicalBridge.cs index 64080d07f..cd225a529 100644 --- a/src/StackExchange.Redis/PhysicalBridge.cs +++ b/src/StackExchange.Redis/PhysicalBridge.cs @@ -20,7 +20,7 @@ internal sealed class PhysicalBridge : IDisposable private const int ProfileLogSamples = 10; - private const double ProfileLogSeconds = (ConnectionMultiplexer.MillisecondsPerHeartbeat * ProfileLogSamples) / 1000.0; + private const double ProfileLogSeconds = (1000 /* ms */ * ProfileLogSamples) / 1000.0; private static readonly Message ReusableAskingCommand = Message.Create(-1, CommandFlags.FireAndForget, RedisCommand.ASKING); diff --git a/src/StackExchange.Redis/PublicAPI/PublicAPI.Shipped.txt b/src/StackExchange.Redis/PublicAPI/PublicAPI.Shipped.txt index 8ffbccb7b..4f9d4658e 100644 --- a/src/StackExchange.Redis/PublicAPI/PublicAPI.Shipped.txt +++ b/src/StackExchange.Redis/PublicAPI/PublicAPI.Shipped.txt @@ -220,6 +220,8 @@ StackExchange.Redis.ConfigurationOptions.DefaultVersion.get -> System.Version! StackExchange.Redis.ConfigurationOptions.DefaultVersion.set -> void StackExchange.Redis.ConfigurationOptions.EndPoints.get -> StackExchange.Redis.EndPointCollection! StackExchange.Redis.ConfigurationOptions.EndPoints.init -> void +StackExchange.Redis.ConfigurationOptions.HeartbeatInterval.get -> System.TimeSpan +StackExchange.Redis.ConfigurationOptions.HeartbeatInterval.set -> void StackExchange.Redis.ConfigurationOptions.HighPrioritySocketThreads.get -> bool StackExchange.Redis.ConfigurationOptions.HighPrioritySocketThreads.set -> void StackExchange.Redis.ConfigurationOptions.IncludeDetailInExceptions.get -> bool @@ -1759,6 +1761,7 @@ virtual StackExchange.Redis.Configuration.DefaultOptionsProvider.DefaultVersion. virtual StackExchange.Redis.Configuration.DefaultOptionsProvider.GetDefaultClientName() -> string! virtual StackExchange.Redis.Configuration.DefaultOptionsProvider.GetDefaultSsl(StackExchange.Redis.EndPointCollection! endPoints) -> bool virtual StackExchange.Redis.Configuration.DefaultOptionsProvider.GetSslHostFromEndpoints(StackExchange.Redis.EndPointCollection! endPoints) -> string? +virtual StackExchange.Redis.Configuration.DefaultOptionsProvider.HeartbeatInterval.get -> System.TimeSpan virtual StackExchange.Redis.Configuration.DefaultOptionsProvider.IncludeDetailInExceptions.get -> bool virtual StackExchange.Redis.Configuration.DefaultOptionsProvider.IncludePerformanceCountersInExceptions.get -> bool virtual StackExchange.Redis.Configuration.DefaultOptionsProvider.IsMatch(System.Net.EndPoint! endpoint) -> bool diff --git a/tests/StackExchange.Redis.Tests/CommandTimeouts.cs b/tests/StackExchange.Redis.Tests/CommandTimeouts.cs new file mode 100644 index 000000000..c7530018b --- /dev/null +++ b/tests/StackExchange.Redis.Tests/CommandTimeouts.cs @@ -0,0 +1,57 @@ +using System; +using System.Threading.Tasks; +using Xunit; +using Xunit.Abstractions; + +namespace StackExchange.Redis.Tests; + +[Collection(NonParallelCollection.Name)] +public class CommandTimeouts : TestBase +{ + public CommandTimeouts(ITestOutputHelper output) : base (output) { } + + [Fact] + public async Task DefaultHeartbeatTimeout() + { + var options = ConfigurationOptions.Parse(TestConfig.Current.PrimaryServerAndPort); + options.AllowAdmin = true; + options.AsyncTimeout = 1000; + + using var pauseConn = ConnectionMultiplexer.Connect(options); + using var conn = ConnectionMultiplexer.Connect(options); + + var pauseServer = GetServer(pauseConn); + _ = pauseServer.ExecuteAsync("CLIENT", "PAUSE", 2000); + + var key = Me(); + var db = conn.GetDatabase(); + var sw = ValueStopwatch.StartNew(); + var ex = await Assert.ThrowsAsync(async () => await db.StringGetAsync(key)); + Log(ex.Message); + var duration = sw.GetElapsedTime(); + Assert.True(duration < TimeSpan.FromSeconds(2100), $"Duration ({duration.Milliseconds} ms) should be less than 2100ms"); + } + + [Fact] + public async Task DefaultHeartbeatLowTimeout() + { + var options = ConfigurationOptions.Parse(TestConfig.Current.PrimaryServerAndPort); + options.AllowAdmin = true; + options.AsyncTimeout = 50; + options.HeartbeatInterval = TimeSpan.FromMilliseconds(100); + + using var pauseConn = ConnectionMultiplexer.Connect(options); + using var conn = ConnectionMultiplexer.Connect(options); + + var pauseServer = GetServer(pauseConn); + _ = pauseServer.ExecuteAsync("CLIENT", "PAUSE", 2000); + + var key = Me(); + var db = conn.GetDatabase(); + var sw = ValueStopwatch.StartNew(); + var ex = await Assert.ThrowsAsync(async () => await db.StringGetAsync(key)); + Log(ex.Message); + var duration = sw.GetElapsedTime(); + Assert.True(duration < TimeSpan.FromSeconds(250), $"Duration ({duration.Milliseconds} ms) should be less than 250ms"); + } +}