From b5356e46b2943e266b5595d9e2593b91539da1a4 Mon Sep 17 00:00:00 2001 From: Nathan Henderson Date: Mon, 30 Sep 2024 08:00:50 -0700 Subject: [PATCH] Restrict getting the restore process start time to CRaC API There is a race condition on the existence of the criu restore process and retrieving its start time, when --restore-detached is passed to criu restore. This patch restrict retrieving the process start time to restoring via CRaC which is not affected. Issues: eclipse-openj9/openj9#20214 Signed-off-by: Nathan Henderson --- runtime/vm/CRIUHelpers.cpp | 27 +++++++++-------- .../cmdLineTests/criu/criu_nonPortable.xml | 16 ---------- .../src/org/openj9/criu/TimeChangeTest.java | 30 ------------------- 3 files changed, 14 insertions(+), 59 deletions(-) diff --git a/runtime/vm/CRIUHelpers.cpp b/runtime/vm/CRIUHelpers.cpp index 9acbd71b70a..136b8c8801b 100644 --- a/runtime/vm/CRIUHelpers.cpp +++ b/runtime/vm/CRIUHelpers.cpp @@ -1583,7 +1583,6 @@ criuCheckpointJVMImpl(JNIEnv *env, I_32 syslogBufferSize = 0; UDATA oldVMState = VM_VMHelpers::setVMState(currentThread, J9VMSTATE_CRIU_SUPPORT_CHECKPOINT_PHASE_START); UDATA notSafeToCheckpoint = 0; - UDATA criuRestorePid = 0; U_32 intGhostFileLimit = 0; IDATA criuDumpReturnCode = 0; bool restoreFailure = false; @@ -1885,19 +1884,21 @@ criuCheckpointJVMImpl(JNIEnv *env, } if (hasDumpSucceeded) { - /* Calculate restore time excluding `criu restore ...` for MXBean API. */ - criuRestorePid = j9sysinfo_get_ppid(); - systemReturnCode = j9sysinfo_get_process_start_time(criuRestorePid, &restoreNanoUTCTime); - if (0 != systemReturnCode) { - currentExceptionClass = vm->checkpointState.criuSystemRestoreExceptionClass; - nlsMsgFormat = j9nls_lookup_message( - J9NLS_DO_NOT_PRINT_MESSAGE_TAG | J9NLS_DO_NOT_APPEND_NEWLINE, - J9NLS_VM_CRIU_J9_GET_PROCESS_START_TIME_FAILURE, - NULL); - restoreFailure = true; + /* Calculate restore time for CRaC MXBean API. */ + if (J9_ARE_ALL_BITS_SET(vm->checkpointState.flags, J9VM_CRAC_IS_CHECKPOINT_ENABLED)) { + UDATA cracRestorePid = j9sysinfo_get_ppid(); + systemReturnCode = j9sysinfo_get_process_start_time(cracRestorePid, &restoreNanoUTCTime); + if (0 != systemReturnCode) { + currentExceptionClass = vm->checkpointState.criuSystemRestoreExceptionClass; + nlsMsgFormat = j9nls_lookup_message( + J9NLS_DO_NOT_PRINT_MESSAGE_TAG | J9NLS_DO_NOT_APPEND_NEWLINE, + J9NLS_VM_CRIU_J9_GET_PROCESS_START_TIME_FAILURE, + NULL); + restoreFailure = true; + } + vm->checkpointState.processRestoreStartTimeInNanoseconds = (I_64)restoreNanoUTCTime; + Trc_VM_criu_process_restore_start_after_dump(currentThread, cracRestorePid, vm->checkpointState.processRestoreStartTimeInNanoseconds); } - vm->checkpointState.processRestoreStartTimeInNanoseconds = (I_64)restoreNanoUTCTime; - Trc_VM_criu_process_restore_start_after_dump(currentThread, criuRestorePid, vm->checkpointState.processRestoreStartTimeInNanoseconds); /* Load restore arguments from restore file or env vars. */ switch (loadRestoreArguments(currentThread, optionsFileChars, envFileChars)) { diff --git a/test/functional/cmdLineTests/criu/criu_nonPortable.xml b/test/functional/cmdLineTests/criu/criu_nonPortable.xml index a3bdbbe865b..f4f13e6fcb3 100644 --- a/test/functional/cmdLineTests/criu/criu_nonPortable.xml +++ b/test/functional/cmdLineTests/criu/criu_nonPortable.xml @@ -209,22 +209,6 @@ User requested Java dump using - - bash $SCRIPPATH$ $TEST_RESROOT$ $JAVA_COMMAND$ "$JVM_OPTIONS$ $XTRACE_CRIU$ $STD_CMD_OPTS$" $MAINCLASS_TIMECHANGE$ testGetProcessRestoreStartTime 1 false false - Killed - PASSED: InternalCRIUSupport.getProcessRestoreStartTime() - CRIU is not enabled - Operation not permitted - FAILED: InternalCRIUSupport.getProcessRestoreStartTime() - - Thread pid mismatch - do not match expected - Unable to create a thread: - - Could not dump the JVM processes, err=-70 - User requested Java dump using - - bash $SCRIPPATH$ $TEST_RESROOT$ $JAVA_COMMAND$ "$JVM_OPTIONS$ -Xtrace:print={j9jcl.533,j9vm.684-696,j9vm.699,j9vm.717-743} $STD_CMD_OPTS$" -Xdump:java+system+jit:events=throw+systhrow,filter=org/eclipse/openj9/criu/JVMCheckpointException $MAINCLASS_TIMECHANGE$ testMXBeanUpTime 1 false false Killed diff --git a/test/functional/cmdLineTests/criu/src/org/openj9/criu/TimeChangeTest.java b/test/functional/cmdLineTests/criu/src/org/openj9/criu/TimeChangeTest.java index ff23ee56e81..319aeecb809 100644 --- a/test/functional/cmdLineTests/criu/src/org/openj9/criu/TimeChangeTest.java +++ b/test/functional/cmdLineTests/criu/src/org/openj9/criu/TimeChangeTest.java @@ -61,9 +61,6 @@ public static void main(String args[]) throws InterruptedException { case "testGetLastRestoreTime": tct.testGetLastRestoreTime(); break; - case "testGetProcessRestoreStartTime": - tct.testGetProcessRestoreStartTime(); - break; case "testMXBeanUpTime": tct.testMXBeanUpTime(); break; @@ -267,33 +264,6 @@ private void testGetLastRestoreTime() { } } - private void testGetProcessRestoreStartTime() { - long processRestoreStartTime = InternalCRIUSupport.getProcessRestoreStartTime(); - if (processRestoreStartTime != -1) { - System.out.println("FAILED: InternalCRIUSupport.getProcessRestoreStartTime() - " + processRestoreStartTime - + " is not -1 before restore"); - } - CRIUSupport criu = CRIUTestUtils.prepareCheckPointJVM(CRIUTestUtils.imagePath); - long beforeCheckpointTime = TimeUtilities.getCurrentTimeInNanoseconds(); - CRIUTestUtils.checkPointJVMNoSetup(criu, CRIUTestUtils.imagePath, false); - processRestoreStartTime = InternalCRIUSupport.getProcessRestoreStartTime(); - long lastRestoreTime = InternalCRIUSupport.getLastRestoreTime(); - long afterRestoreTime = TimeUtilities.getCurrentTimeInNanoseconds(); - if (beforeCheckpointTime >= processRestoreStartTime) { - System.out.println("FAILED: InternalCRIUSupport.getProcessRestoreStartTime() - " + processRestoreStartTime - + " is less than beforeCheckpointTime - " + beforeCheckpointTime); - } else if (processRestoreStartTime >= lastRestoreTime) { - System.out.println("FAILED: InternalCRIUSupport.getProcessRestoreStartTime() - " + processRestoreStartTime - + " is more than InternalCRIUSupport.getLastRestoreTime() - " + lastRestoreTime); - } else if (processRestoreStartTime >= afterRestoreTime) { - System.out.println("FAILED: InternalCRIUSupport.getProcessRestoreStartTime() - " + processRestoreStartTime - + " is more than afterRestoreTime - " + afterRestoreTime); - } else { - System.out.println("PASSED: InternalCRIUSupport.getProcessRestoreStartTime() - " + processRestoreStartTime - + " is between beforeCheckpointTime - " + beforeCheckpointTime + " and afterRestoreTime - " + afterRestoreTime); - } - } - private void testMXBeanUpTime() { CRIUSupport criu = CRIUTestUtils.prepareCheckPointJVM(CRIUTestUtils.imagePath); RuntimeMXBean mxb = ManagementFactory.getRuntimeMXBean();