diff --git a/runtime/compiler/aarch64/codegen/ARM64PrivateLinkage.cpp b/runtime/compiler/aarch64/codegen/ARM64PrivateLinkage.cpp index 7203b67b6c2..68766c49e26 100644 --- a/runtime/compiler/aarch64/codegen/ARM64PrivateLinkage.cpp +++ b/runtime/compiler/aarch64/codegen/ARM64PrivateLinkage.cpp @@ -1224,18 +1224,17 @@ void J9::ARM64::PrivateLinkage::buildVirtualDispatch(TR::Node *callNode, TR::ARM64VirtualUnresolvedSnippet(cg(), callNode, vcSnippetLabel, argSize, doneLabel, (uint8_t *)thunk); cg()->addSnippet(vcSnippet); - TR::Register *dstReg = cg()->allocateRegister(); // The following instructions are modified by _virtualUnresolvedHelper // in aarch64/runtime/PicBuilder.spp to load the vTable index in x9 - generateTrg1ImmInstruction(cg(), TR::InstOpCode::movzx, callNode, x9, 0); + + // This `b` instruction is modified to movzx x9, lower 16bit of offset + generateLabelInstruction(cg(), TR::InstOpCode::b, callNode, vcSnippetLabel); generateTrg1ImmInstruction(cg(), TR::InstOpCode::movkx, callNode, x9, TR::MOV_LSL16); generateTrg1Src1ImmInstruction(cg(), TR::InstOpCode::sbfmx, callNode, x9, x9, 0x1F); // sxtw x9, w9 tempMR = new (trHeapMemory()) TR::MemoryReference(vftReg, x9, cg()); - generateTrg1MemInstruction(cg(), TR::InstOpCode::ldroffx, callNode, dstReg, tempMR); - gcPoint = generateLabelInstruction(cg(), TR::InstOpCode::b, callNode, vcSnippetLabel); - - cg()->stopUsingRegister(dstReg); + generateTrg1MemInstruction(cg(), TR::InstOpCode::ldroffx, callNode, x9, tempMR); + gcPoint = generateRegBranchInstruction(cg(), TR::InstOpCode::blr, callNode, x9); } else { diff --git a/runtime/compiler/aarch64/codegen/CallSnippet.cpp b/runtime/compiler/aarch64/codegen/CallSnippet.cpp index f5f93dc3282..70368c9a471 100644 --- a/runtime/compiler/aarch64/codegen/CallSnippet.cpp +++ b/runtime/compiler/aarch64/codegen/CallSnippet.cpp @@ -472,7 +472,10 @@ uint8_t *TR::ARM64VirtualUnresolvedSnippet::emitSnippetBody() TR_J2IVirtualThunkPointer, cg()), __FILE__, __LINE__, callNode); - return cursor + 8; + cursor += 8; + // Lock word + *(int32_t *)cursor = 0; + return cursor + sizeof(int32_t); } void @@ -499,11 +502,23 @@ TR_Debug::print(TR::FILE *pOutFile, TR::ARM64VirtualUnresolvedSnippet * snippet) printPrefix(pOutFile, NULL, cursor, sizeof(intptr_t)); trfprintf(pOutFile, ".dword \t0x%08x\t\t; cpIndex", *(intptr_t *)cursor); + cursor += sizeof(intptr_t); + + printPrefix(pOutFile, NULL, cursor, sizeof(intptr_t)); + trfprintf(pOutFile, ".dword \t" POINTER_PRINTF_FORMAT "\t\t; Private J9Method pointer", *(intptr_t *)cursor); + cursor += sizeof(intptr_t); + + printPrefix(pOutFile, NULL, cursor, sizeof(intptr_t)); + trfprintf(pOutFile, ".dword \t" POINTER_PRINTF_FORMAT "\t\t; J2I thunk address for private", *(intptr_t *)cursor); + cursor += sizeof(intptr_t); + + printPrefix(pOutFile, NULL, cursor, 4); + trfprintf(pOutFile, ".word \t0x%08x\t\t; Lock Word For Resolution", *(int32_t *)cursor); } uint32_t TR::ARM64VirtualUnresolvedSnippet::getLength(int32_t estimatedSnippetStart) { - return 44; + return 48; } uint8_t *TR::ARM64InterfaceCallSnippet::emitSnippetBody() diff --git a/runtime/compiler/aarch64/runtime/PicBuilder.spp b/runtime/compiler/aarch64/runtime/PicBuilder.spp index 5654da38dba..5e0a925f1a5 100644 --- a/runtime/compiler/aarch64/runtime/PicBuilder.spp +++ b/runtime/compiler/aarch64/runtime/PicBuilder.spp @@ -119,6 +119,7 @@ .set J9TR_UVCSnippet_CPIndex, 16 .set J9TR_UVCSnippet_method, 24 .set J9TR_UVCSnippet_J2IThunk, 32 + .set J9TR_UVCSnippet_lockword, 40 // Unresolved data snippet @@ -630,17 +631,16 @@ const_jitResolveConstantDynamic: // // in: x30 = snippet // -// trash: x10, x11 +// trash: x10, x11, x12 // For virtual unresolved call, we generate following instructions -// movz x9, #0 +// b VirtualUnresolvedSnippet ; change this to "movz x9, #low16bits" // movk x9, #0, LSL #16 // sxtw x9, w9 -// ldr dstReg, [vftReg, x9] -// b VirtualUnresolvedSnippet ; change this to "blr dstReg" +// ldr x9, [vftReg, x9] +// blr x9 // -// We encode the resolved index value into movz and movk instructions first -// Then the b instruction is changed to "blr dstReg" +// We encode the resolved index value (signed 32 bits) into movz and movk instructions // _virtualUnresolvedHelper: stp x7, x6, [J9SP, #-64]! // save parameter regs. jitWalkResolveMethodFrame assumes that argument registers are saved in this order @@ -680,38 +680,47 @@ L_calloutPrivate: add J9SP, J9SP, #64 br x10 // Call the target, not returning here L_callVirtual: - mov x2, x0 - sub x0, x10, #20 // get the address of the movz instruction - ldr w1, [x0] // fetch the movz instruction - ubfx x3, x2, #0, #16 // lower 16 bits of the index - orr w1, w1, w3, LSL #5 // encode the index in the movz instruction - str w1, [x0] // store the movz instruction - ldr w1, [x0, #4] // fetch the movk instruction - ubfx x3, x2, #16, #16 // next 16 bits of the index - orr w1, w1, w3, LSL #5 // encode the index in the movk instruction - str w1, [x0, #4] // store the movk instruction - mov x1, #8 // 2 instruction to flush + add x1, x11, #J9TR_UVCSnippet_lockword // address of the lockword + mov w3, #1 + ldxr w2, [x1] + cbnz w2, L_spinForUpdate // already locked by another thread + stxr w2, w3, [x1] // try to lock + cbnz w2, L_spinForUpdate // failed to lock + mov x12, x0 // resolved index + sub x0, x10, #16 // get the address of the movk instruction + ldr w1, [x0] // fetch the movk instruction + ubfx x2, x12, #16, #16 // upper 16 bits of the index + orr w1, w1, w2, LSL #5 // encode the index in the movk instruction + str w1, [x0] // store the movk instruction + mov x1, #4 // 1 instruction to flush bl flushICache - sub x0, x10, #8 // get the address of the ldr instruction - ldr w2, [x0] // fetch the ldr instruction - ubfx x2, x2, #0, #5 // extract the dstReg - ldr w3, const_blr // fetch constant for the blr instruction - orr w3, w3, w2, LSL #5 // encode the dstReg in the blr instruction - sub x0, x10, #4 // get the address of the b instruction - str w3, [x0] // store instruction + ldr w1, const_movz_x9 // fetch movz template + ubfx x12, x12, #0, #16 // lower 16 bits of the index + sub x0, x10, #20 // get the address of the b instruction + orr w1, w1, w12, LSL #5 // encode the index in the movz instruction + str w1, [x0] // store the movz instruction mov x1, #4 // 1 instruction to flush bl flushICache - sub x30, x10, #20 // set the movk instruction as the destination + mov w0, #0 + dmb sy + str w0, [x11, #J9TR_UVCSnippet_lockword] // unlock +L_calloutVirtual: + sub x30, x10, #20 // set the movz instruction as the destination ldp x7, x6, [J9SP, #0] // restore other parameter regs ldp x5, x4, [J9SP, #16] ldp x3, x2, [J9SP, #32] ldp x1, x0, [J9SP, #48] add J9SP, J9SP, #64 - ret // jump back to the movk instruction + ret // jump back to the movz instruction + +L_spinForUpdate: + ldr w2, [x1] + cbnz w2, L_spinForUpdate + b L_calloutVirtual // another thread completed rewriting instructions .align 2 -const_blr: - .word 0xD63F0000 +const_movz_x9: + .word 0xD2800009 // template for "movz x9, #0" // Handles calls to interface call snippets //