Skip to content

Commit

Permalink
[AArch64] Refactor allocation of locals and stack realignment (#72028)
Browse files Browse the repository at this point in the history
Factor out some stack allocation in a separate function. This patch
splits out the generic portion of a larger refactoring done as a part of
stack clash protection support.

The patch is almost, but not quite NFC. The only difference should
be that where we have adjacent allocation of stack space
for local SVE objects and non-local SVE objects the order
of `sub sp, ...` and `addvl sp, ...` instructions is reversed, because now
it's done with a single call to `emitFrameOffset` and it happens
add/subtract the fixed part before the scalable part, e.g.

    addvl sp, sp, #-2
    sub sp, sp, #16, lsl #12
    sub sp, sp, #16

becomes

    sub sp, sp, #16, lsl #12
    sub sp, sp, #16
    addvl sp, sp, #-2
  • Loading branch information
momchil-velikov authored Nov 15, 2023
1 parent 1b781ee commit dedf2c6
Show file tree
Hide file tree
Showing 11 changed files with 105 additions and 95 deletions.
114 changes: 59 additions & 55 deletions llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF,
static bool produceCompactUnwindFrame(MachineFunction &MF);
static bool needsWinCFI(const MachineFunction &MF);
static StackOffset getSVEStackSize(const MachineFunction &MF);
static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB);

/// Returns true if a homogeneous prolog or epilog code can be emitted
/// for the size optimization. If possible, a frame helper call is injected.
Expand Down Expand Up @@ -688,6 +689,44 @@ void AArch64FrameLowering::emitCalleeSavedSVERestores(
emitCalleeSavedRestores(MBB, MBBI, true);
}

void AArch64FrameLowering::allocateStackSpace(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
bool NeedsRealignment, StackOffset AllocSize, bool NeedsWinCFI,
bool *HasWinCFI, bool EmitCFI, StackOffset InitialOffset) const {

if (!AllocSize)
return;

DebugLoc DL;
MachineFunction &MF = *MBB.getParent();
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
const MachineFrameInfo &MFI = MF.getFrameInfo();

Register TargetReg =
NeedsRealignment ? findScratchNonCalleeSaveRegister(&MBB) : AArch64::SP;
// SUB Xd/SP, SP, AllocSize
emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII,
MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI,
EmitCFI, InitialOffset);

if (NeedsRealignment) {
const int64_t MaxAlign = MFI.getMaxAlign().value();
const uint64_t AndMask = ~(MaxAlign - 1);
// AND SP, Xd, 0b11111...0000
BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP)
.addReg(TargetReg, RegState::Kill)
.addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
.setMIFlags(MachineInstr::FrameSetup);
AFI.setStackRealigned(true);

// No need for SEH instructions here; if we're realigning the stack,
// we've set a frame pointer and already finished the SEH prologue.
assert(!NeedsWinCFI);
}
}

static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE) {
switch (Reg.id()) {
default:
Expand Down Expand Up @@ -1774,7 +1813,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
}
}

StackOffset AllocateBefore = SVEStackSize, AllocateAfter = {};
StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
MachineBasicBlock::iterator CalleeSavesBegin = MBBI, CalleeSavesEnd = MBBI;

// Process the SVE callee-saves to determine what space needs to be
Expand All @@ -1787,67 +1826,32 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
++MBBI;
CalleeSavesEnd = MBBI;

AllocateBefore = StackOffset::getScalable(CalleeSavedSize);
AllocateAfter = SVEStackSize - AllocateBefore;
SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize);
SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
}

// Allocate space for the callee saves (if any).
emitFrameOffset(
MBB, CalleeSavesBegin, DL, AArch64::SP, AArch64::SP, -AllocateBefore, TII,
MachineInstr::FrameSetup, false, false, nullptr,
EmitAsyncCFI && !HasFP && AllocateBefore,
StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes));
StackOffset CFAOffset =
StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
allocateStackSpace(MBB, CalleeSavesBegin, false, SVECalleeSavesSize, false,
nullptr, EmitAsyncCFI && !HasFP, CFAOffset);
CFAOffset += SVECalleeSavesSize;

if (EmitAsyncCFI)
emitCalleeSavedSVELocations(MBB, CalleeSavesEnd);

// Finally allocate remaining SVE stack space.
emitFrameOffset(MBB, CalleeSavesEnd, DL, AArch64::SP, AArch64::SP,
-AllocateAfter, TII, MachineInstr::FrameSetup, false, false,
nullptr, EmitAsyncCFI && !HasFP && AllocateAfter,
AllocateBefore + StackOffset::getFixed(
(int64_t)MFI.getStackSize() - NumBytes));

// Allocate space for the rest of the frame.
if (NumBytes) {
unsigned scratchSPReg = AArch64::SP;

if (NeedsRealignment) {
scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
assert(scratchSPReg != AArch64::NoRegister);
}

// If we're a leaf function, try using the red zone.
if (!canUseRedZone(MF)) {
// FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
// the correct value here, as NumBytes also includes padding bytes,
// which shouldn't be counted here.
emitFrameOffset(
MBB, MBBI, DL, scratchSPReg, AArch64::SP,
StackOffset::getFixed(-NumBytes), TII, MachineInstr::FrameSetup,
false, NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
SVEStackSize +
StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes));
}
if (NeedsRealignment) {
assert(MFI.getMaxAlign() > Align(1));
assert(scratchSPReg != AArch64::SP);

// SUB X9, SP, NumBytes
// -- X9 is temporary register, so shouldn't contain any live data here,
// -- free to use. This is already produced by emitFrameOffset above.
// AND SP, X9, 0b11111...0000
uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);

BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
.addReg(scratchSPReg, RegState::Kill)
.addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64));
AFI->setStackRealigned(true);

// No need for SEH instructions here; if we're realigning the stack,
// we've set a frame pointer and already finished the SEH prologue.
assert(!NeedsWinCFI);
}
// Allocate space for the rest of the frame including SVE locals. Align the
// stack as necessary.
assert(!(canUseRedZone(MF) && NeedsRealignment) &&
"Cannot use redzone with stack realignment");
if (!canUseRedZone(MF)) {
// FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
// the correct value here, as NumBytes also includes padding bytes,
// which shouldn't be counted here.
allocateStackSpace(MBB, CalleeSavesEnd, NeedsRealignment,
SVELocalsSize + StackOffset::getFixed(NumBytes),
NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
CFAOffset);
}

// If we need a base pointer, set it up here. It's whatever the value of the
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AArch64/AArch64FrameLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,11 @@ class AArch64FrameLowering : public TargetFrameLowering {
MachineBasicBlock::iterator MBBI) const;
void emitCalleeSavedSVERestores(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) const;
void allocateStackSpace(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
bool NeedsRealignment, StackOffset AllocSize,
bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,
StackOffset InitialOffset) const;

/// Emit target zero call-used regs.
void emitZeroCallUsedRegs(BitVector RegsToZero,
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
name: hasBasepointer
# CHECK-LABEL: name: hasBasepointer
# CHECK: bb.0:
# CHECK: $sp = frame-setup ADDVL_XXI $sp, -1
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
# CHECK: $sp = frame-setup SUBXri $sp, 16, 0
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
# CHECK-NEXT: $x19 = ADDXri $sp, 0, 0
# CHECK: STRXui $x0, $x19, 0
tracksRegLiveness: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #2064
; CHECK-NEXT: addvl sp, sp, #-32
; CHECK-NEXT: addvl sp, sp, #-28
; CHECK-NEXT: sub sp, sp, #2064
; CHECK-NEXT: ldr x8, [sp, #2048]
; CHECK-NEXT: addvl sp, sp, #31
; CHECK-NEXT: addvl sp, sp, #29
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
name: LateScavengingSlot
# CHECK-LABEL: name: LateScavengingSlot
# CHECK: bb.0:
# CHECK: $sp = frame-setup ADDVL_XXI $sp, -1
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 8, 12
# CHECK: $sp = frame-setup SUBXri $sp, 8, 12
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
# CHECK: STRXui killed $[[SCRATCH:x[0-9]+]], $sp, 0
# CHECK-NEXT: $[[SCRATCH]] = ADDVL_XXI $fp, -1
# CHECK-NEXT: STRXui $x0, killed $[[SCRATCH]], 0
Expand Down
Loading

0 comments on commit dedf2c6

Please sign in to comment.