From 9db8696bf2f8cfd29b8ad042cb5c73123ba7968a Mon Sep 17 00:00:00 2001 From: Georgy Moiseev Date: Thu, 11 Apr 2024 18:23:20 +0300 Subject: [PATCH] test: retry cluster init if unhealthy Sometimes cluster fails to bootstrap in tests. The reasons are yet unknown and likely unrelated to crud or maybe even crud tests setup. After this patch, in case cluster preparation had failed for a test, we retry to create a cluster up t three times. Part of #432 --- test/helper.lua | 61 +++++++++++++++++++++++------- test/integration/role_test.lua | 7 ++-- test/unit/not_initialized_test.lua | 2 +- 3 files changed, 51 insertions(+), 19 deletions(-) diff --git a/test/helper.lua b/test/helper.lua index d68d3af7..fd781769 100644 --- a/test/helper.lua +++ b/test/helper.lua @@ -902,30 +902,63 @@ function helpers.start_tarantool3_cluster(g, cfg) end function helpers.start_cluster(g, cartridge_cfg, vshard_cfg, tarantool3_cluster_cfg, opts) - checks('table', '?table', '?table', '?table', {wait_crud_is_ready = '?boolean'}) + checks('table', '?table', '?table', '?table', { + wait_crud_is_ready = '?boolean', + backend = '?string', + retries = '?number', + }) opts = opts or {} + if opts.wait_crud_is_ready == nil then opts.wait_crud_is_ready = true end - if g.params.backend == helpers.backend.CARTRIDGE then - helpers.skip_cartridge_unsupported() - - helpers.start_cartridge_cluster(g, cartridge_cfg) - elseif g.params.backend == helpers.backend.VSHARD then - helpers.start_vshard_cluster(g, vshard_cfg) - elseif g.params.backend == helpers.backend.CONFIG then - helpers.skip_if_tarantool3_crud_roles_unsupported() + if opts.backend == nil then + opts.backend = g.params.backend + end + assert(opts.backend ~= nil, 'Please, provide backend') - helpers.start_tarantool3_cluster(g, tarantool3_cluster_cfg) + local DEFAULT_RETRIES = 3 + if opts.retries == nil then + opts.retries = DEFAULT_RETRIES end - g.router = g.cluster:server('router') - assert(g.router ~= nil, 'router found') + local current_attempt = 0 + while true do + current_attempt = current_attempt + 1 + + if opts.backend == helpers.backend.CARTRIDGE then + helpers.skip_cartridge_unsupported() + + helpers.start_cartridge_cluster(g, cartridge_cfg) + elseif opts.backend == helpers.backend.VSHARD then + helpers.start_vshard_cluster(g, vshard_cfg) + elseif opts.backend == helpers.backend.CONFIG then + helpers.skip_if_tarantool3_crud_roles_unsupported() - if opts.wait_crud_is_ready then - helpers.wait_crud_is_ready_on_cluster(g) + helpers.start_tarantool3_cluster(g, tarantool3_cluster_cfg) + end + + g.router = g.cluster:server('router') + assert(g.router ~= nil, 'router found') + + local ok, err = false, nil -- luacheck: ignore + if opts.wait_crud_is_ready then + ok, err = pcall(helpers.wait_crud_is_ready_on_cluster, g, {backend = opts.backend}) + else + ok = true + end + + if ok then + break + end + + helpers.stop_cluster(g.cluster, opts.backend) + + if current_attempt == opts.retries then + error(err) + end end end diff --git a/test/integration/role_test.lua b/test/integration/role_test.lua index adec1d23..9ed0b18a 100644 --- a/test/integration/role_test.lua +++ b/test/integration/role_test.lua @@ -13,10 +13,9 @@ end) g.before_each(function(cg) -- Tests are rather dangerous and may break the cluster, -- so it's safer to restart for each case. - helpers.start_tarantool3_cluster(cg, cg.template_cfg) - cg.router = cg.cluster:server('router') - - helpers.wait_crud_is_ready_on_cluster(cg, {backend = helpers.backend.CONFIG}) + helpers.start_cluster(cg, nil, nil, cg.template_cfg, { + backend = helpers.backend.CONFIG, + }) end) g.after_each(function(cg) diff --git a/test/unit/not_initialized_test.lua b/test/unit/not_initialized_test.lua index ae761252..82f9b4ee 100644 --- a/test/unit/not_initialized_test.lua +++ b/test/unit/not_initialized_test.lua @@ -83,7 +83,7 @@ pgroup.before_all(function(g) cartridge_cfg_template, vshard_cfg_template, tarantool3_cluster_cfg_template, - {wait_crud_is_ready = false} + {wait_crud_is_ready = false, retries = 1} ) g.router = g.cluster:server('router')