diff --git a/ocaml/idl/datamodel_errors.ml b/ocaml/idl/datamodel_errors.ml index 8407711d21f..ce9bdc29e82 100644 --- a/ocaml/idl/datamodel_errors.ml +++ b/ocaml/idl/datamodel_errors.ml @@ -720,6 +720,9 @@ let _ = the coordinator's database and pointing to the correct coordinator? Are \ all servers using the same pool secret?" () ; + error Api_errors.host_xapi_version_higher_than_coordinator + ["host_xapi_version"] + ~doc:"The host xapi version is higher than the one in the coordinator" () ; error Api_errors.host_broken [] ~doc: "This server failed in the middle of an automatic failover operation and \ diff --git a/ocaml/idl/datamodel_host.ml b/ocaml/idl/datamodel_host.ml index 3cb410d517d..672f34ea8c4 100644 --- a/ocaml/idl/datamodel_host.ml +++ b/ocaml/idl/datamodel_host.ml @@ -95,6 +95,7 @@ let local_assert_healthy = ; Api_errors.license_restriction ; Api_errors.license_does_not_support_pooling ; Api_errors.ha_should_be_fenced + ; Api_errors.host_xapi_version_higher_than_coordinator ] ~allowed_roles:_R_LOCAL_ROOT_ONLY () diff --git a/ocaml/util/xapi_version.ml b/ocaml/util/xapi_version.ml index cccc294a45f..90e71077898 100644 --- a/ocaml/util/xapi_version.ml +++ b/ocaml/util/xapi_version.ml @@ -18,11 +18,19 @@ let hostname = "localhost" let date = Xapi_build_info.date +let parse_xapi_version version = + try Scanf.sscanf version "%d.%d.%s" (fun maj min rest -> (maj, min, rest)) + with _ -> + failwith + (Printf.sprintf "Couldn't determine xapi version from string: '%s'" + version + ) + let version, xapi_version_major, xapi_version_minor, git_id = match Build_info.V1.version () with | None -> ("0.0.dev", 0, 0, "dev") - | Some v -> ( + | Some v -> let str = Build_info.V1.Version.to_string v in let version = if String.starts_with ~prefix:"v" str then @@ -30,16 +38,11 @@ let version, xapi_version_major, xapi_version_minor, git_id = else str in - try - let maj, min, git_id = - Scanf.sscanf version "%d.%d.%s" (fun maj min rest -> (maj, min, rest)) - in - (version, maj, min, git_id) - with _ -> - failwith - (Printf.sprintf - "Couldn't determine xapi version - got unexpected version from \ - dune: '%s'" - version - ) - ) + let maj, min, git_id = parse_xapi_version version in + (version, maj, min, git_id) + +let compare_version version_a version_b = + let maj_a, min_a, _ = parse_xapi_version version_a in + let maj_b, min_b, _ = parse_xapi_version version_b in + let ( ) a b = if a = 0 then b else a in + Int.compare maj_a maj_b Int.compare min_a min_b 0 diff --git a/ocaml/util/xapi_version.mli b/ocaml/util/xapi_version.mli index 136d8a1b8ce..77d6e5ef022 100644 --- a/ocaml/util/xapi_version.mli +++ b/ocaml/util/xapi_version.mli @@ -23,3 +23,5 @@ val git_id : string val xapi_version_major : int val xapi_version_minor : int + +val compare_version : string -> string -> int diff --git a/ocaml/xapi-consts/api_errors.ml b/ocaml/xapi-consts/api_errors.ml index 70557f54ee9..505c873ba4e 100644 --- a/ocaml/xapi-consts/api_errors.ml +++ b/ocaml/xapi-consts/api_errors.ml @@ -143,6 +143,9 @@ let host_master_cannot_talk_back = "HOST_MASTER_CANNOT_TALK_BACK" let host_unknown_to_master = "HOST_UNKNOWN_TO_MASTER" +let host_xapi_version_higher_than_coordinator = + "HOST_XAPI_VERSION_HIGHER_THAN_COORDINATOR" + (* should be fenced *) let host_broken = "HOST_BROKEN" diff --git a/ocaml/xapi-consts/api_messages.ml b/ocaml/xapi-consts/api_messages.ml index 9228713f4ea..250099d2c08 100644 --- a/ocaml/xapi-consts/api_messages.ml +++ b/ocaml/xapi-consts/api_messages.ml @@ -355,3 +355,6 @@ let tls_verification_emergency_disabled = addMessage "TLS_VERIFICATION_EMERGENCY_DISABLED" 3L let periodic_update_sync_failed = addMessage "PERIODIC_UPDATE_SYNC_FAILED" 3L + +let xapi_startup_blocked_as_version_higher_than_coordinator = + addMessage "XAPI_STARTUP_BLOCKED_AS_VERSION_HIGHER_THAN_COORDINATOR" 2L diff --git a/ocaml/xapi/xapi.ml b/ocaml/xapi/xapi.ml index dff9913ee00..c9221c2a739 100644 --- a/ocaml/xapi/xapi.ml +++ b/ocaml/xapi/xapi.ml @@ -417,14 +417,19 @@ let wait_for_management_ip_address ~__context = ) ; ip -type hello_error = +type host_status_check_error = | Permanent (* e.g. the pool secret is wrong i.e. wrong master *) | Temporary (* some glitch or other *) -(** Attempt a Pool.hello, return None if ok or Some hello_error otherwise *) -let attempt_pool_hello my_ip = +let xapi_ver_high_alerted = ref false + +(** Attempt checking host status with pool coordinator: + * 1. Pool.hello + * 2. if Pool.hello ok, check xapi version + * Return None if ok or Some host_status_check_error otherwise *) +let attempt_host_status_check_with_coordinator ~__context my_ip = let localhost_uuid = Helpers.get_localhost_uuid () in try Helpers.call_emergency_mode_functions (Pool_role.get_master_address ()) @@ -444,7 +449,46 @@ let attempt_pool_hello my_ip = [localhost_uuid] ; Some Permanent | `ok -> - None + let xapi_version_higher version = + version |> Xapi_version.compare_version Xapi_version.version + |> fun r -> r > 0 + in + if + xapi_version_higher + (Db.Host.get_software_version ~__context + ~self:(Helpers.get_master ~__context) + |> List.assoc "xapi_build" + ) + then ( + let name_label = + Db.Host.get_name_label ~__context + ~self:(Helpers.get_localhost ~__context) + in + let err_msg = + Printf.sprintf + "Xapi startup in pool member %s is blocked as its xapi \ + version (%s) is higher than xapi version in pool \ + coordinator." + name_label Xapi_version.version + in + if not !xapi_ver_high_alerted then ( + let name, priority = + Api_messages + .xapi_startup_blocked_as_version_higher_than_coordinator + in + ignore + (Client.Client.Message.create ~rpc ~session_id ~name ~priority + ~cls:`Host ~obj_uuid:localhost_uuid ~body:err_msg + ) ; + xapi_ver_high_alerted := true + ) ; + error "%s" err_msg ; + Xapi_host.set_emergency_mode_error + Api_errors.host_xapi_version_higher_than_coordinator + [Xapi_version.version] ; + Some Permanent + ) else + None ) with | Api_errors.Server_error (code, _) @@ -456,13 +500,15 @@ let attempt_pool_hello my_ip = [localhost_uuid] ; Some Permanent | Api_errors.Server_error (code, params) as exn -> - debug "Caught exception: %s during Pool.hello" - (ExnHelper.string_of_exn exn) ; + debug "Caught exception: %s in %s" + (ExnHelper.string_of_exn exn) + __FUNCTION__ ; Xapi_host.set_emergency_mode_error code params ; Some Temporary | exn -> - debug "Caught exception: %s during Pool.hello" - (ExnHelper.string_of_exn exn) ; + debug "Caught exception: %s in %s" + (ExnHelper.string_of_exn exn) + __FUNCTION__ ; Xapi_host.set_emergency_mode_error Api_errors.internal_error [ExnHelper.string_of_exn exn] ; Some Temporary @@ -1129,14 +1175,18 @@ let server_init () = Helpers.touch_file !Xapi_globs.ready_file ; (* Keep trying to log into master *) let finished = ref false in + while not !finished do (* Grab the management IP address (wait forever for it if necessary) *) let ip = wait_for_management_ip_address ~__context in debug "Start master_connection watchdog" ; ignore (Master_connection.start_master_connection_watchdog ()) ; debug "Attempting to communicate with master" ; - (* Try to say hello to the pool *) - match attempt_pool_hello ip with + + (* Try to check host status with the pool *) + match + attempt_host_status_check_with_coordinator ~__context ip + with | None -> finished := true | Some Temporary -> @@ -1144,8 +1194,9 @@ let server_init () = Thread.delay 5. | Some Permanent -> error - "Permanent error in Pool.hello, will retry after %.0fs \ - just in case" + "Permanent error in \ + attempt_host_status_check_with_coordinator, will retry \ + after %.0fs just in case" !Db_globs.permanent_master_failure_retry_interval ; Thread.delay !Db_globs.permanent_master_failure_retry_interval done ;