Skip to content

Commit

Permalink
Intialize dataAddr field only for non-zero length array
Browse files Browse the repository at this point in the history
Update array inline allocation sequence to initialize dataAddr field
only for non-zero length arrays. Field should be left blank for zero
length arrays.

Signed-off-by: Shubham Verma <[email protected]>
  • Loading branch information
VermaSh committed Dec 24, 2024
1 parent 73af0c7 commit da4a877
Showing 1 changed file with 104 additions and 128 deletions.
232 changes: 104 additions & 128 deletions runtime/compiler/z/codegen/J9TreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4871,7 +4871,6 @@ static TR::Register * generateMultianewArrayWithInlineAllocators(TR::Node *node,

#if defined(J9VM_GC_SPARSE_HEAP_ALLOCATION)
bool isOffHeapAllocationEnabled = TR::Compiler->om.isOffHeapAllocationEnabled();
TR::LabelSymbol *populateFirstDimDataAddrSlot = isOffHeapAllocationEnabled ? generateLabelSymbol(cg) : NULL;
#endif /* defined(J9VM_GC_SPARSE_HEAP_ALLOCATION) */

// oolJumpLabel is a common point that all branches will jump to. From this label, we branch to OOL code.
Expand All @@ -4889,12 +4888,14 @@ static TR::Register * generateMultianewArrayWithInlineAllocators(TR::Node *node,
TR::Register *classReg = cg->evaluate(thirdChild);

// In the mainline, first load the first and second dimensions' lengths into registers.
// LGF is needed so that same reg can be used to write NULL to dataAddr slot for 0 length arrays
TR::InstOpCode::Mnemonic loadDimLenOpCode = TR::InstOpCode::LGF;
TR::Register *firstDimLenReg = cg->allocateRegister();
cursor = generateRXInstruction(cg, TR::InstOpCode::LGF, node, firstDimLenReg, generateS390MemoryReference(dimsPtrReg, 4, cg));
cursor = generateRXInstruction(cg, loadDimLenOpCode, node, firstDimLenReg, generateS390MemoryReference(dimsPtrReg, 4, cg));
iComment("Load 1st dim length.");

TR::Register *secondDimLenReg = cg->allocateRegister();
cursor = generateRXInstruction(cg, TR::InstOpCode::L, node, secondDimLenReg, generateS390MemoryReference(dimsPtrReg, 0, cg));
cursor = generateRXInstruction(cg, loadDimLenOpCode, node, secondDimLenReg, generateS390MemoryReference(dimsPtrReg, 0, cg));
iComment("Load 2nd dim length.");

// Check to see if second dimension is indeed 0. If yes, then proceed to handle the case here. Otherwise jump to OOL code.
Expand Down Expand Up @@ -4948,33 +4949,24 @@ static TR::Register * generateMultianewArrayWithInlineAllocators(TR::Node *node,
iComment("Init 1st dim mustBeZero field.");
cursor = generateRXInstruction(cg, TR::InstOpCode::ST, node, firstDimLenReg, generateS390MemoryReference(targetReg, fej9->getOffsetOfDiscontiguousArraySizeField(), cg));
iComment("Init 1st dim size field.");
}

#if defined(J9VM_GC_SPARSE_HEAP_ALLOCATION)
if (isOffHeapAllocationEnabled)
{
TR_ASSERT_FATAL_WITH_NODE(node,
(TR::Compiler->om.compressObjectReferences()
&& (fej9->getOffsetOfDiscontiguousDataAddrField() - fej9->getOffsetOfContiguousDataAddrField()) == 8)
|| (!TR::Compiler->om.compressObjectReferences()
&& fej9->getOffsetOfDiscontiguousDataAddrField() == fej9->getOffsetOfContiguousDataAddrField()),
"Offset of dataAddr field in discontiguous array is expected to be 8 bytes more than contiguous array if using compressed refs, "
"or same if using full refs. But was %d bytes for discontiguous and %d bytes for contiguous array.\n",
fej9->getOffsetOfDiscontiguousDataAddrField(), fej9->getOffsetOfContiguousDataAddrField());

// Load dataAddr slot offset difference since 0 size arrays are treated as discontiguous.
generateRIInstruction(cg,
TR::InstOpCode::LGHI,
node,
temp1Reg,
static_cast<int32_t>(fej9->getOffsetOfDiscontiguousDataAddrField() - fej9->getOffsetOfContiguousDataAddrField()));
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, populateFirstDimDataAddrSlot);
}
else
if (isOffHeapAllocationEnabled)
{
TR_ASSERT_FATAL_WITH_NODE(node,
TR::InstOpCode::LGF == loadDimLenOpCode,
"LGF must be used to load 1st dimension length into firstDimLenReg. "
"Array size field is 32 bits in size but dataAddr slot is 64 bits so "
"to use the same register to intialize dataAddr slot we must clean "
"out top 32 bits of firstDimLenReg. LGF sign extends 32 bit 1st dim "
"size to 64 bits enabling us to use firstDimLenReg to write NULL in "
"the dataAddr field for 0 length arrays.\n");
cursor = generateRXInstruction(cg, TR::InstOpCode::STG, node, firstDimLenReg, generateS390MemoryReference(targetReg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg));
iComment("Clear 1st dim dataAddr field.");
}
#endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */
{
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionDone);
}

cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionDone);
iComment("Init class field and jump.");

// We end up in this region of the ICF if the first dimension is non-zero and the second dimension is zero.
Expand Down Expand Up @@ -5056,27 +5048,24 @@ static TR::Register * generateMultianewArrayWithInlineAllocators(TR::Node *node,
iComment("Init 2st dim mustBeZero field.");
cursor = generateRXInstruction(cg, TR::InstOpCode::ST, node, secondDimLenReg, generateS390MemoryReference(temp2Reg, fej9->getOffsetOfDiscontiguousArraySizeField(), cg));
iComment("Init 2st dim size field.");
}

TR::Register *temp3Reg = cg->allocateRegister();

#if defined(J9VM_GC_SPARSE_HEAP_ALLOCATION)
if (isOffHeapAllocationEnabled)
{
// Populate dataAddr slot for 2nd dimension zero size array.
generateRXInstruction(cg,
TR::InstOpCode::LA,
node,
temp3Reg,
generateS390MemoryReference(temp2Reg, TR::Compiler->om.discontiguousArrayHeaderSizeInBytes(), cg));
generateRXInstruction(cg,
TR::InstOpCode::STG,
node,
temp3Reg,
generateS390MemoryReference(temp2Reg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg));
}
if (isOffHeapAllocationEnabled)
{
TR_ASSERT_FATAL_WITH_NODE(node,
TR::InstOpCode::LGF == loadDimLenOpCode,
"LGF must be used to load 2nd dimension length into secondDimLenReg. "
"Array size field is 32 bits in size but dataAddr slot is 64 bits so "
"to use the same register to intialize dataAddr slot we must clean "
"out top 32 bits of secondDimLenReg. LGF sign extends 32 bit 2nd dim "
"size to 64 bits enabling us to use secondDimLenReg to write NULL in "
"the dataAddr field for 0 length arrays.\n");
cursor = generateRXInstruction(cg, TR::InstOpCode::STG, node, secondDimLenReg, generateS390MemoryReference(temp2Reg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg));
iComment("Clear 2nd dim dataAddr field.");
}
#endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */
}

TR::Register *temp3Reg = cg->allocateRegister();
// Store 2nd dim element into 1st dim array slot, compress temp2 if needed
if (comp->target().is64Bit() && comp->useCompressedPointers())
{
Expand All @@ -5103,15 +5092,24 @@ static TR::Register * generateMultianewArrayWithInlineAllocators(TR::Node *node,
#if defined(J9VM_GC_SPARSE_HEAP_ALLOCATION)
if (isOffHeapAllocationEnabled)
{
// No offset is needed since 1st dimension array is contiguous.
generateRRInstruction(cg, TR::InstOpCode::getXORRegOpCode(), node, temp1Reg, temp1Reg);
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, populateFirstDimDataAddrSlot);
/* Populate dataAddr slot of 1st dimension array. We don't need to worry
* about zero length array since it has already been taken care of.
*/
generateRXInstruction(cg,
TR::InstOpCode::LA,
node,
temp3Reg,
generateS390MemoryReference(targetReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
cursor = generateRXInstruction(cg,
TR::InstOpCode::STG,
node,
temp3Reg,
generateS390MemoryReference(targetReg, fej9->getOffsetOfContiguousDataAddrField(), cg));
iComment("populateFirstDimDataAddrSlot.");
}
else
#endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */
{
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionDone);
}

generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionDone);

TR::RegisterDependencyConditions *dependencies = generateRegisterDependencyConditions(0,10,cg);
dependencies->addPostCondition(dimReg, TR::RealRegister::AssignAny);
Expand All @@ -5128,28 +5126,6 @@ static TR::Register * generateMultianewArrayWithInlineAllocators(TR::Node *node,
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, oolJumpLabel);
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, oolFailLabel);

#if defined(J9VM_GC_SPARSE_HEAP_ALLOCATION)
if (isOffHeapAllocationEnabled)
{
/* Populate dataAddr slot of 1st dimension array. Arrays of non-zero size
* use contiguous header layout while zero size arrays use discontiguous header layout.
*/
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, populateFirstDimDataAddrSlot);
iComment("populateFirstDimDataAddrSlot.");

generateRXInstruction(cg,
TR::InstOpCode::LA,
node,
temp3Reg,
generateS390MemoryReference(targetReg, temp1Reg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
generateRXInstruction(cg,
TR::InstOpCode::STG,
node,
temp3Reg,
generateS390MemoryReference(targetReg, temp1Reg, fej9->getOffsetOfContiguousDataAddrField(), cg));
}
#endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */

generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionDone, dependencies);

TR::Register *targetRegisterFinal = cg->allocateCollectedReferenceRegister();
Expand Down Expand Up @@ -11119,81 +11095,81 @@ J9::Z::TreeEvaluator::VMnewEvaluator(TR::Node * node, TR::CodeGenerator * cg)
#ifdef J9VM_GC_SPARSE_HEAP_ALLOCATION
if (TR::Compiler->om.isOffHeapAllocationEnabled())
{
/* Here we'll update dataAddr slot for both fixed and variable length arrays. Fixed length arrays are
* simple as we just need to check first child of the node for array size. For variable length arrays
* runtime size checks are needed to determine whether to use contiguous or discontiguous header layout.
*
* In both scenarios, arrays of non-zero size use contiguous header layout while zero size arrays use
* discontiguous header layout.
/* Here we'll update dataAddr slot for fixed and variable non-zero length arrays. DataAddr field
* of 0 length arrays will be NULL'ed.
*/
TR::Register *offsetReg = NULL;
TR::MemoryReference *dataAddrMR = NULL;
TR::MemoryReference *dataAddrSlotMR = NULL;
TR::Register *tmpDataAddrReg = srm->findOrCreateScratchRegister();
// Clear out reg so that it can used to NULL fields in the array header
iCursor = generateRRInstruction(cg, TR::InstOpCode::XGR, node, tmpDataAddrReg, tmpDataAddrReg, iCursor);

if (isVariableLen && TR::Compiler->om.compressObjectReferences())
/* Clear out padding and dataAddr field of array header assuming it's a 0 length array
* so we don't have to worry about clearing it out later during initialization.
* Dealing with 0 length array here keeps the dataAddr field initialization sequence simple.
*/
if (TR::Compiler->om.compressObjectReferences())
{
/* We need to check enumReg (array size) at runtime to determine correct offset of dataAddr field.
* Here we deal only with compressed refs because dataAddr offset for discontiguous
* and contiguous arrays is the same in full refs.
*/
if (comp->getOption(TR_TraceCG))
traceMsg(comp, "Node (%p): Dealing with compressed refs variable length array.\n", node);

TR_ASSERT_FATAL_WITH_NODE(node,
(fej9->getOffsetOfDiscontiguousDataAddrField() - fej9->getOffsetOfContiguousDataAddrField()) == 8,
"Offset of dataAddr field in discontiguous array is expected to be 8 bytes more than contiguous array. "
"But was %d bytes for discontiguous and %d bytes for contiguous array.\n",
fej9->getOffsetOfDiscontiguousDataAddrField(), fej9->getOffsetOfContiguousDataAddrField());
fej9->getOffsetOfDiscontiguousArraySizeField() + 8 == fej9->getOffsetOfDiscontiguousDataAddrField(),
"4 byte padding is added after size field in discontiguous header layout for 8 byte alignment. "
"Size field is 4 bytes in size so adding 4 to size field offset should equal offset of dataAddr field. "
"But size field + 8 bytes was %d while dataAddr field offset was %d bytes for discontiguous array.\n",
static_cast<uint32_t>(fej9->getOffsetOfDiscontiguousArraySizeField() + 8), fej9->getOffsetOfDiscontiguousDataAddrField());

offsetReg = cg->allocateRegister();
// Invert enumReg sign. 0 and negative numbers remain unchanged.
iCursor = generateRREInstruction(cg, TR::InstOpCode::LNGFR, node, offsetReg, enumReg, iCursor);
iCursor = generateRSInstruction(cg, TR::InstOpCode::SRLG, node, dataSizeReg, offsetReg, 63, iCursor);
iCursor = generateRSInstruction(cg, TR::InstOpCode::SLLG, node, offsetReg, dataSizeReg, 3, iCursor);
// Inverting the sign bit will leave us with either -8 (if enumCopyReg > 0) or 0 (if enumCopyReg == 0).
iCursor = generateRREInstruction(cg, TR::InstOpCode::LNGR, node, offsetReg, offsetReg, iCursor);

dataAddrMR = generateS390MemoryReference(resReg, offsetReg, TR::Compiler->om.discontiguousArrayHeaderSizeInBytes(), cg);
dataAddrSlotMR = generateS390MemoryReference(resReg, offsetReg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg);
}
else if (!isVariableLen && node->getFirstChild()->getOpCode().isLoadConst() && node->getFirstChild()->getInt() == 0)
{
if (comp->getOption(TR_TraceCG))
traceMsg(comp, "Node (%p): Dealing with full/compressed refs fixed length zero size array.\n", node);
{
traceMsg(comp,
"Node (%p): Clean out padding added after size field and dataAddr field assuming 0 length array. "
"If we are not dealing with 0 length array, 0s would be written to first element so no harm done.\n",
node);
}

dataAddrMR = generateS390MemoryReference(resReg, TR::Compiler->om.discontiguousArrayHeaderSizeInBytes(), cg);
dataAddrSlotMR = generateS390MemoryReference(resReg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg);
iCursor = generateRXInstruction(cg, TR::InstOpCode::ST, node, tmpDataAddrReg, generateS390MemoryReference(resReg, fej9->getOffsetOfDiscontiguousArraySizeField() + 4, cg), iCursor);
iCursor = generateRXInstruction(cg, TR::InstOpCode::STG, node, tmpDataAddrReg, generateS390MemoryReference(resReg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg), iCursor);
}
else
{
TR_ASSERT_FATAL_WITH_NODE(node,
fej9->getOffsetOfDiscontiguousDataAddrField() == fej9->getOffsetOfContiguousDataAddrField(),
"dataAddr field offset is expected to be same for both contiguous and discontiguous arrays in full refs. "
"But was %d bytes for discontiguous and %d bytes for contiguous array.\n",
fej9->getOffsetOfDiscontiguousDataAddrField(), fej9->getOffsetOfContiguousDataAddrField());

if (comp->getOption(TR_TraceCG))
{
traceMsg(comp,
"Node (%p): Dealing with either full/compressed refs fixed length non-zero size array or full refs variable length array.\n",
"Node (%p): Clean out dataAddr field assuming 0 length array. In full refs mode, "
"dataAddr field offset is same for both contiguous and discontiguous header layout "
"so harm done if our assumption about array length turns out to be wrong.\n",
node);
}

if (!TR::Compiler->om.compressObjectReferences())
{
TR_ASSERT_FATAL_WITH_NODE(node,
fej9->getOffsetOfDiscontiguousDataAddrField() == fej9->getOffsetOfContiguousDataAddrField(),
"dataAddr field offset is expected to be same for both contiguous and discontiguous arrays in full refs. "
"But was %d bytes for discontiguous and %d bytes for contiguous array.\n",
fej9->getOffsetOfDiscontiguousDataAddrField(), fej9->getOffsetOfContiguousDataAddrField());
}

dataAddrMR = generateS390MemoryReference(resReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg);
dataAddrSlotMR = generateS390MemoryReference(resReg, fej9->getOffsetOfContiguousDataAddrField(), cg);
iCursor = generateRXInstruction(cg, TR::InstOpCode::STG, node, tmpDataAddrReg, generateS390MemoryReference(resReg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg), iCursor);
}

iCursor = generateRXInstruction(cg, TR::InstOpCode::LA, node, dataSizeReg, dataAddrMR, iCursor);
iCursor = generateRXInstruction(cg, TR::InstOpCode::STG, node, dataSizeReg, dataAddrSlotMR, iCursor);
// Intialize dataAddr field for non-zero length arrays
TR::MemoryReference *dataAddrMR = generateS390MemoryReference(resReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg);
TR::MemoryReference *dataAddrSlotMR = generateS390MemoryReference(resReg, fej9->getOffsetOfContiguousDataAddrField(), cg);
if (isVariableLen)
{
// We need to check enumReg (array length) at runtime to determine if dataAddr needs to be intialized or not.
if (comp->getOption(TR_TraceCG))
traceMsg(comp, "Node (%p): Dealing with compressed/full refs variable length non-zero length array.\n", node);

if (offsetReg)
iCursor = generateRXInstruction(cg, TR::InstOpCode::LA, node, tmpDataAddrReg, dataAddrMR, iCursor); // load first element address
iCursor = generateRILInstruction(cg, TR::InstOpCode::CFI, node, enumReg, 0, iCursor);
// Write only if array length is non zero
iCursor = generateRSInstruction(cg, TR::InstOpCode::STOCG, node, tmpDataAddrReg, static_cast<uint32_t>(0x2), dataAddrSlotMR, iCursor);
}
else if (!isVariableLen && node->getFirstChild()->getOpCode().isLoadConst() && node->getFirstChild()->getInt() == 0)
{
conditions->addPostCondition(offsetReg, TR::RealRegister::AssignAny);
cg->stopUsingRegister(offsetReg);
if (comp->getOption(TR_TraceCG))
traceMsg(comp, "Node (%p): Dealing with either full/compressed refs fixed length non-zero length array.\n", node);

iCursor = generateRXInstruction(cg, TR::InstOpCode::LA, node, tmpDataAddrReg, dataAddrMR, iCursor); // load first element address
iCursor = generateRXInstruction(cg, TR::InstOpCode::STG, node, tmpDataAddrReg, dataAddrSlotMR, iCursor);
}

srm->reclaimScratchRegister(tmpDataAddrReg);
}
#endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */

Expand Down

0 comments on commit da4a877

Please sign in to comment.