diff --git a/runtime/compiler/z/codegen/J9TreeEvaluator.cpp b/runtime/compiler/z/codegen/J9TreeEvaluator.cpp index 2af9369469f..d242668546e 100644 --- a/runtime/compiler/z/codegen/J9TreeEvaluator.cpp +++ b/runtime/compiler/z/codegen/J9TreeEvaluator.cpp @@ -4871,7 +4871,6 @@ static TR::Register * generateMultianewArrayWithInlineAllocators(TR::Node *node, #if defined(J9VM_GC_SPARSE_HEAP_ALLOCATION) bool isOffHeapAllocationEnabled = TR::Compiler->om.isOffHeapAllocationEnabled(); - TR::LabelSymbol *populateFirstDimDataAddrSlot = isOffHeapAllocationEnabled ? generateLabelSymbol(cg) : NULL; #endif /* defined(J9VM_GC_SPARSE_HEAP_ALLOCATION) */ // oolJumpLabel is a common point that all branches will jump to. From this label, we branch to OOL code. @@ -4889,12 +4888,14 @@ static TR::Register * generateMultianewArrayWithInlineAllocators(TR::Node *node, TR::Register *classReg = cg->evaluate(thirdChild); // In the mainline, first load the first and second dimensions' lengths into registers. + // LGF is needed so that same reg can be used to write NULL to dataAddr slot for 0 length arrays + TR::InstOpCode::Mnemonic loadDimLenOpCode = TR::InstOpCode::LGF; TR::Register *firstDimLenReg = cg->allocateRegister(); - cursor = generateRXInstruction(cg, TR::InstOpCode::LGF, node, firstDimLenReg, generateS390MemoryReference(dimsPtrReg, 4, cg)); + cursor = generateRXInstruction(cg, loadDimLenOpCode, node, firstDimLenReg, generateS390MemoryReference(dimsPtrReg, 4, cg)); iComment("Load 1st dim length."); TR::Register *secondDimLenReg = cg->allocateRegister(); - cursor = generateRXInstruction(cg, TR::InstOpCode::L, node, secondDimLenReg, generateS390MemoryReference(dimsPtrReg, 0, cg)); + cursor = generateRXInstruction(cg, loadDimLenOpCode, node, secondDimLenReg, generateS390MemoryReference(dimsPtrReg, 0, cg)); iComment("Load 2nd dim length."); // Check to see if second dimension is indeed 0. If yes, then proceed to handle the case here. Otherwise jump to OOL code. @@ -4948,33 +4949,24 @@ static TR::Register * generateMultianewArrayWithInlineAllocators(TR::Node *node, iComment("Init 1st dim mustBeZero field."); cursor = generateRXInstruction(cg, TR::InstOpCode::ST, node, firstDimLenReg, generateS390MemoryReference(targetReg, fej9->getOffsetOfDiscontiguousArraySizeField(), cg)); iComment("Init 1st dim size field."); - } - #if defined(J9VM_GC_SPARSE_HEAP_ALLOCATION) - if (isOffHeapAllocationEnabled) - { - TR_ASSERT_FATAL_WITH_NODE(node, - (TR::Compiler->om.compressObjectReferences() - && (fej9->getOffsetOfDiscontiguousDataAddrField() - fej9->getOffsetOfContiguousDataAddrField()) == 8) - || (!TR::Compiler->om.compressObjectReferences() - && fej9->getOffsetOfDiscontiguousDataAddrField() == fej9->getOffsetOfContiguousDataAddrField()), - "Offset of dataAddr field in discontiguous array is expected to be 8 bytes more than contiguous array if using compressed refs, " - "or same if using full refs. But was %d bytes for discontiguous and %d bytes for contiguous array.\n", - fej9->getOffsetOfDiscontiguousDataAddrField(), fej9->getOffsetOfContiguousDataAddrField()); - - // Load dataAddr slot offset difference since 0 size arrays are treated as discontiguous. - generateRIInstruction(cg, - TR::InstOpCode::LGHI, - node, - temp1Reg, - static_cast(fej9->getOffsetOfDiscontiguousDataAddrField() - fej9->getOffsetOfContiguousDataAddrField())); - cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, populateFirstDimDataAddrSlot); - } - else + if (isOffHeapAllocationEnabled) + { + TR_ASSERT_FATAL_WITH_NODE(node, + TR::InstOpCode::LGF == loadDimLenOpCode, + "LGF must be used to load 1st dimension length into firstDimLenReg. " + "Array size field is 32 bits in size but dataAddr slot is 64 bits so " + "to use the same register to intialize dataAddr slot we must clean " + "out top 32 bits of firstDimLenReg. LGF sign extends 32 bit 1st dim " + "size to 64 bits enabling us to use firstDimLenReg to write NULL in " + "the dataAddr field for 0 length arrays.\n"); + cursor = generateRXInstruction(cg, TR::InstOpCode::STG, node, firstDimLenReg, generateS390MemoryReference(targetReg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg)); + iComment("Clear 1st dim dataAddr field."); + } #endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */ - { - cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionDone); } + + cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionDone); iComment("Init class field and jump."); // We end up in this region of the ICF if the first dimension is non-zero and the second dimension is zero. @@ -5056,27 +5048,24 @@ static TR::Register * generateMultianewArrayWithInlineAllocators(TR::Node *node, iComment("Init 2st dim mustBeZero field."); cursor = generateRXInstruction(cg, TR::InstOpCode::ST, node, secondDimLenReg, generateS390MemoryReference(temp2Reg, fej9->getOffsetOfDiscontiguousArraySizeField(), cg)); iComment("Init 2st dim size field."); - } - - TR::Register *temp3Reg = cg->allocateRegister(); - #if defined(J9VM_GC_SPARSE_HEAP_ALLOCATION) - if (isOffHeapAllocationEnabled) - { - // Populate dataAddr slot for 2nd dimension zero size array. - generateRXInstruction(cg, - TR::InstOpCode::LA, - node, - temp3Reg, - generateS390MemoryReference(temp2Reg, TR::Compiler->om.discontiguousArrayHeaderSizeInBytes(), cg)); - generateRXInstruction(cg, - TR::InstOpCode::STG, - node, - temp3Reg, - generateS390MemoryReference(temp2Reg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg)); - } + if (isOffHeapAllocationEnabled) + { + TR_ASSERT_FATAL_WITH_NODE(node, + TR::InstOpCode::LGF == loadDimLenOpCode, + "LGF must be used to load 2nd dimension length into secondDimLenReg. " + "Array size field is 32 bits in size but dataAddr slot is 64 bits so " + "to use the same register to intialize dataAddr slot we must clean " + "out top 32 bits of secondDimLenReg. LGF sign extends 32 bit 2nd dim " + "size to 64 bits enabling us to use secondDimLenReg to write NULL in " + "the dataAddr field for 0 length arrays.\n"); + cursor = generateRXInstruction(cg, TR::InstOpCode::STG, node, secondDimLenReg, generateS390MemoryReference(temp2Reg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg)); + iComment("Clear 2nd dim dataAddr field."); + } #endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */ + } + TR::Register *temp3Reg = cg->allocateRegister(); // Store 2nd dim element into 1st dim array slot, compress temp2 if needed if (comp->target().is64Bit() && comp->useCompressedPointers()) { @@ -5103,15 +5092,24 @@ static TR::Register * generateMultianewArrayWithInlineAllocators(TR::Node *node, #if defined(J9VM_GC_SPARSE_HEAP_ALLOCATION) if (isOffHeapAllocationEnabled) { - // No offset is needed since 1st dimension array is contiguous. - generateRRInstruction(cg, TR::InstOpCode::getXORRegOpCode(), node, temp1Reg, temp1Reg); - generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, populateFirstDimDataAddrSlot); + /* Populate dataAddr slot of 1st dimension array. We don't need to worry + * about zero length array since it has already been taken care of. + */ + generateRXInstruction(cg, + TR::InstOpCode::LA, + node, + temp3Reg, + generateS390MemoryReference(targetReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg)); + cursor = generateRXInstruction(cg, + TR::InstOpCode::STG, + node, + temp3Reg, + generateS390MemoryReference(targetReg, fej9->getOffsetOfContiguousDataAddrField(), cg)); + iComment("populateFirstDimDataAddrSlot."); } - else #endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */ - { - generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionDone); - } + + generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionDone); TR::RegisterDependencyConditions *dependencies = generateRegisterDependencyConditions(0,10,cg); dependencies->addPostCondition(dimReg, TR::RealRegister::AssignAny); @@ -5128,28 +5126,6 @@ static TR::Register * generateMultianewArrayWithInlineAllocators(TR::Node *node, generateS390LabelInstruction(cg, TR::InstOpCode::label, node, oolJumpLabel); generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, oolFailLabel); -#if defined(J9VM_GC_SPARSE_HEAP_ALLOCATION) - if (isOffHeapAllocationEnabled) - { - /* Populate dataAddr slot of 1st dimension array. Arrays of non-zero size - * use contiguous header layout while zero size arrays use discontiguous header layout. - */ - cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, populateFirstDimDataAddrSlot); - iComment("populateFirstDimDataAddrSlot."); - - generateRXInstruction(cg, - TR::InstOpCode::LA, - node, - temp3Reg, - generateS390MemoryReference(targetReg, temp1Reg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg)); - generateRXInstruction(cg, - TR::InstOpCode::STG, - node, - temp3Reg, - generateS390MemoryReference(targetReg, temp1Reg, fej9->getOffsetOfContiguousDataAddrField(), cg)); - } -#endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */ - generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionDone, dependencies); TR::Register *targetRegisterFinal = cg->allocateCollectedReferenceRegister(); @@ -11119,81 +11095,81 @@ J9::Z::TreeEvaluator::VMnewEvaluator(TR::Node * node, TR::CodeGenerator * cg) #ifdef J9VM_GC_SPARSE_HEAP_ALLOCATION if (TR::Compiler->om.isOffHeapAllocationEnabled()) { - /* Here we'll update dataAddr slot for both fixed and variable length arrays. Fixed length arrays are - * simple as we just need to check first child of the node for array size. For variable length arrays - * runtime size checks are needed to determine whether to use contiguous or discontiguous header layout. - * - * In both scenarios, arrays of non-zero size use contiguous header layout while zero size arrays use - * discontiguous header layout. + /* Here we'll update dataAddr slot for fixed and variable non-zero length arrays. DataAddr field + * of 0 length arrays will be NULL'ed. */ - TR::Register *offsetReg = NULL; - TR::MemoryReference *dataAddrMR = NULL; - TR::MemoryReference *dataAddrSlotMR = NULL; + TR::Register *tmpDataAddrReg = srm->findOrCreateScratchRegister(); + // Clear out reg so that it can used to NULL fields in the array header + iCursor = generateRRInstruction(cg, TR::InstOpCode::XGR, node, tmpDataAddrReg, tmpDataAddrReg, iCursor); - if (isVariableLen && TR::Compiler->om.compressObjectReferences()) + /* Clear out padding and dataAddr field of array header assuming it's a 0 length array + * so we don't have to worry about clearing it out later during initialization. + * Dealing with 0 length array here keeps the dataAddr field initialization sequence simple. + */ + if (TR::Compiler->om.compressObjectReferences()) { - /* We need to check enumReg (array size) at runtime to determine correct offset of dataAddr field. - * Here we deal only with compressed refs because dataAddr offset for discontiguous - * and contiguous arrays is the same in full refs. - */ - if (comp->getOption(TR_TraceCG)) - traceMsg(comp, "Node (%p): Dealing with compressed refs variable length array.\n", node); - TR_ASSERT_FATAL_WITH_NODE(node, - (fej9->getOffsetOfDiscontiguousDataAddrField() - fej9->getOffsetOfContiguousDataAddrField()) == 8, - "Offset of dataAddr field in discontiguous array is expected to be 8 bytes more than contiguous array. " - "But was %d bytes for discontiguous and %d bytes for contiguous array.\n", - fej9->getOffsetOfDiscontiguousDataAddrField(), fej9->getOffsetOfContiguousDataAddrField()); + fej9->getOffsetOfDiscontiguousArraySizeField() + 8 == fej9->getOffsetOfDiscontiguousDataAddrField(), + "4 byte padding is added after size field in discontiguous header layout for 8 byte alignment. " + "Size field is 4 bytes in size so adding 4 to size field offset should equal offset of dataAddr field. " + "But size field + 8 bytes was %d while dataAddr field offset was %d bytes for discontiguous array.\n", + static_cast(fej9->getOffsetOfDiscontiguousArraySizeField() + 8), fej9->getOffsetOfDiscontiguousDataAddrField()); - offsetReg = cg->allocateRegister(); - // Invert enumReg sign. 0 and negative numbers remain unchanged. - iCursor = generateRREInstruction(cg, TR::InstOpCode::LNGFR, node, offsetReg, enumReg, iCursor); - iCursor = generateRSInstruction(cg, TR::InstOpCode::SRLG, node, dataSizeReg, offsetReg, 63, iCursor); - iCursor = generateRSInstruction(cg, TR::InstOpCode::SLLG, node, offsetReg, dataSizeReg, 3, iCursor); - // Inverting the sign bit will leave us with either -8 (if enumCopyReg > 0) or 0 (if enumCopyReg == 0). - iCursor = generateRREInstruction(cg, TR::InstOpCode::LNGR, node, offsetReg, offsetReg, iCursor); - - dataAddrMR = generateS390MemoryReference(resReg, offsetReg, TR::Compiler->om.discontiguousArrayHeaderSizeInBytes(), cg); - dataAddrSlotMR = generateS390MemoryReference(resReg, offsetReg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg); - } - else if (!isVariableLen && node->getFirstChild()->getOpCode().isLoadConst() && node->getFirstChild()->getInt() == 0) - { if (comp->getOption(TR_TraceCG)) - traceMsg(comp, "Node (%p): Dealing with full/compressed refs fixed length zero size array.\n", node); + { + traceMsg(comp, + "Node (%p): Clean out padding added after size field and dataAddr field assuming 0 length array. " + "If we are not dealing with 0 length array, 0s would be written to first element so no harm done.\n", + node); + } - dataAddrMR = generateS390MemoryReference(resReg, TR::Compiler->om.discontiguousArrayHeaderSizeInBytes(), cg); - dataAddrSlotMR = generateS390MemoryReference(resReg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg); + iCursor = generateRXInstruction(cg, TR::InstOpCode::ST, node, tmpDataAddrReg, generateS390MemoryReference(resReg, fej9->getOffsetOfDiscontiguousArraySizeField() + 4, cg), iCursor); + iCursor = generateRXInstruction(cg, TR::InstOpCode::STG, node, tmpDataAddrReg, generateS390MemoryReference(resReg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg), iCursor); } else { + TR_ASSERT_FATAL_WITH_NODE(node, + fej9->getOffsetOfDiscontiguousDataAddrField() == fej9->getOffsetOfContiguousDataAddrField(), + "dataAddr field offset is expected to be same for both contiguous and discontiguous arrays in full refs. " + "But was %d bytes for discontiguous and %d bytes for contiguous array.\n", + fej9->getOffsetOfDiscontiguousDataAddrField(), fej9->getOffsetOfContiguousDataAddrField()); + if (comp->getOption(TR_TraceCG)) { traceMsg(comp, - "Node (%p): Dealing with either full/compressed refs fixed length non-zero size array or full refs variable length array.\n", + "Node (%p): Clean out dataAddr field assuming 0 length array. In full refs mode, " + "dataAddr field offset is same for both contiguous and discontiguous header layout " + "so harm done if our assumption about array length turns out to be wrong.\n", node); } - if (!TR::Compiler->om.compressObjectReferences()) - { - TR_ASSERT_FATAL_WITH_NODE(node, - fej9->getOffsetOfDiscontiguousDataAddrField() == fej9->getOffsetOfContiguousDataAddrField(), - "dataAddr field offset is expected to be same for both contiguous and discontiguous arrays in full refs. " - "But was %d bytes for discontiguous and %d bytes for contiguous array.\n", - fej9->getOffsetOfDiscontiguousDataAddrField(), fej9->getOffsetOfContiguousDataAddrField()); - } - - dataAddrMR = generateS390MemoryReference(resReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg); - dataAddrSlotMR = generateS390MemoryReference(resReg, fej9->getOffsetOfContiguousDataAddrField(), cg); + iCursor = generateRXInstruction(cg, TR::InstOpCode::STG, node, tmpDataAddrReg, generateS390MemoryReference(resReg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg), iCursor); } - iCursor = generateRXInstruction(cg, TR::InstOpCode::LA, node, dataSizeReg, dataAddrMR, iCursor); - iCursor = generateRXInstruction(cg, TR::InstOpCode::STG, node, dataSizeReg, dataAddrSlotMR, iCursor); + // Intialize dataAddr field for non-zero length arrays + TR::MemoryReference *dataAddrMR = generateS390MemoryReference(resReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg); + TR::MemoryReference *dataAddrSlotMR = generateS390MemoryReference(resReg, fej9->getOffsetOfContiguousDataAddrField(), cg); + if (isVariableLen) + { + // We need to check enumReg (array length) at runtime to determine if dataAddr needs to be intialized or not. + if (comp->getOption(TR_TraceCG)) + traceMsg(comp, "Node (%p): Dealing with compressed/full refs variable length non-zero length array.\n", node); - if (offsetReg) + iCursor = generateRXInstruction(cg, TR::InstOpCode::LA, node, tmpDataAddrReg, dataAddrMR, iCursor); // load first element address + iCursor = generateRILInstruction(cg, TR::InstOpCode::CFI, node, enumReg, 0, iCursor); + // Write only if array length is non zero + iCursor = generateRSInstruction(cg, TR::InstOpCode::STOCG, node, tmpDataAddrReg, static_cast(0x2), dataAddrSlotMR, iCursor); + } + else if (!isVariableLen && node->getFirstChild()->getOpCode().isLoadConst() && node->getFirstChild()->getInt() == 0) { - conditions->addPostCondition(offsetReg, TR::RealRegister::AssignAny); - cg->stopUsingRegister(offsetReg); + if (comp->getOption(TR_TraceCG)) + traceMsg(comp, "Node (%p): Dealing with either full/compressed refs fixed length non-zero length array.\n", node); + + iCursor = generateRXInstruction(cg, TR::InstOpCode::LA, node, tmpDataAddrReg, dataAddrMR, iCursor); // load first element address + iCursor = generateRXInstruction(cg, TR::InstOpCode::STG, node, tmpDataAddrReg, dataAddrSlotMR, iCursor); } + + srm->reclaimScratchRegister(tmpDataAddrReg); } #endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */