From dedf2c6bb5193652f6ad7d9ff9e676624c2485b7 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov@arm.com>
Date: Wed, 15 Nov 2023 09:27:01 +0000
Subject: [PATCH] [AArch64] Refactor allocation of locals and stack realignment
 (#72028)

Factor out some stack allocation in a separate function. This patch
splits out the generic portion of a larger refactoring done as a part of
stack clash protection support.

The patch is almost, but not quite NFC. The only difference should
be that where we have adjacent allocation of stack space
for local SVE objects and non-local SVE objects the order
of `sub sp, ...` and `addvl sp, ...` instructions is reversed, because now
it's done with a single call to `emitFrameOffset` and it happens
add/subtract the fixed part before the scalable part, e.g.

    addvl sp, sp, #-2
    sub sp, sp, #16, lsl #12
    sub sp, sp, #16

becomes

    sub sp, sp, #16, lsl #12
    sub sp, sp, #16
    addvl sp, sp, #-2
---
 .../Target/AArch64/AArch64FrameLowering.cpp   | 114 +++++++++---------
 .../lib/Target/AArch64/AArch64FrameLowering.h |   5 +
 .../AArch64/framelayout-sve-basepointer.mir   |   4 +-
 .../framelayout-sve-fixed-width-access.mir    |   2 +-
 .../framelayout-sve-scavengingslot.mir        |   4 +-
 llvm/test/CodeGen/AArch64/framelayout-sve.mir |  55 ++++-----
 ...nging-call-disable-stackslot-scavenging.ll |   2 +-
 .../AArch64/spill-stack-realignment.mir       |   2 +-
 llvm/test/CodeGen/AArch64/stack-guard-sve.ll  |   4 +-
 .../AArch64/sve-calling-convention-mixed.ll   |   4 +-
 .../CodeGen/AArch64/sve-fixed-length-fp128.ll |   4 +-
 11 files changed, 105 insertions(+), 95 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 09eeac3c5db6..b036f7582323 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -296,6 +296,7 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF,
 static bool produceCompactUnwindFrame(MachineFunction &MF);
 static bool needsWinCFI(const MachineFunction &MF);
 static StackOffset getSVEStackSize(const MachineFunction &MF);
+static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB);
 
 /// Returns true if a homogeneous prolog or epilog code can be emitted
 /// for the size optimization. If possible, a frame helper call is injected.
@@ -688,6 +689,44 @@ void AArch64FrameLowering::emitCalleeSavedSVERestores(
   emitCalleeSavedRestores(MBB, MBBI, true);
 }
 
+void AArch64FrameLowering::allocateStackSpace(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    bool NeedsRealignment, StackOffset AllocSize, bool NeedsWinCFI,
+    bool *HasWinCFI, bool EmitCFI, StackOffset InitialOffset) const {
+
+  if (!AllocSize)
+    return;
+
+  DebugLoc DL;
+  MachineFunction &MF = *MBB.getParent();
+  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+  AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+
+  Register TargetReg =
+      NeedsRealignment ? findScratchNonCalleeSaveRegister(&MBB) : AArch64::SP;
+  // SUB Xd/SP, SP, AllocSize
+  emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII,
+                  MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI,
+                  EmitCFI, InitialOffset);
+
+  if (NeedsRealignment) {
+    const int64_t MaxAlign = MFI.getMaxAlign().value();
+    const uint64_t AndMask = ~(MaxAlign - 1);
+    // AND SP, Xd, 0b11111...0000
+    BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP)
+        .addReg(TargetReg, RegState::Kill)
+        .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
+        .setMIFlags(MachineInstr::FrameSetup);
+    AFI.setStackRealigned(true);
+
+    // No need for SEH instructions here; if we're realigning the stack,
+    // we've set a frame pointer and already finished the SEH prologue.
+    assert(!NeedsWinCFI);
+  }
+}
+
 static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE) {
   switch (Reg.id()) {
   default:
@@ -1774,7 +1813,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
     }
   }
 
-  StackOffset AllocateBefore = SVEStackSize, AllocateAfter = {};
+  StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
   MachineBasicBlock::iterator CalleeSavesBegin = MBBI, CalleeSavesEnd = MBBI;
 
   // Process the SVE callee-saves to determine what space needs to be
@@ -1787,67 +1826,32 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
       ++MBBI;
     CalleeSavesEnd = MBBI;
 
-    AllocateBefore = StackOffset::getScalable(CalleeSavedSize);
-    AllocateAfter = SVEStackSize - AllocateBefore;
+    SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize);
+    SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
   }
 
   // Allocate space for the callee saves (if any).
-  emitFrameOffset(
-      MBB, CalleeSavesBegin, DL, AArch64::SP, AArch64::SP, -AllocateBefore, TII,
-      MachineInstr::FrameSetup, false, false, nullptr,
-      EmitAsyncCFI && !HasFP && AllocateBefore,
-      StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes));
+  StackOffset CFAOffset =
+      StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
+  allocateStackSpace(MBB, CalleeSavesBegin, false, SVECalleeSavesSize, false,
+                     nullptr, EmitAsyncCFI && !HasFP, CFAOffset);
+  CFAOffset += SVECalleeSavesSize;
 
   if (EmitAsyncCFI)
     emitCalleeSavedSVELocations(MBB, CalleeSavesEnd);
 
-  // Finally allocate remaining SVE stack space.
-  emitFrameOffset(MBB, CalleeSavesEnd, DL, AArch64::SP, AArch64::SP,
-                  -AllocateAfter, TII, MachineInstr::FrameSetup, false, false,
-                  nullptr, EmitAsyncCFI && !HasFP && AllocateAfter,
-                  AllocateBefore + StackOffset::getFixed(
-                                       (int64_t)MFI.getStackSize() - NumBytes));
-
-  // Allocate space for the rest of the frame.
-  if (NumBytes) {
-    unsigned scratchSPReg = AArch64::SP;
-
-    if (NeedsRealignment) {
-      scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
-      assert(scratchSPReg != AArch64::NoRegister);
-    }
-
-    // If we're a leaf function, try using the red zone.
-    if (!canUseRedZone(MF)) {
-      // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
-      // the correct value here, as NumBytes also includes padding bytes,
-      // which shouldn't be counted here.
-      emitFrameOffset(
-          MBB, MBBI, DL, scratchSPReg, AArch64::SP,
-          StackOffset::getFixed(-NumBytes), TII, MachineInstr::FrameSetup,
-          false, NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
-          SVEStackSize +
-              StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes));
-    }
-    if (NeedsRealignment) {
-      assert(MFI.getMaxAlign() > Align(1));
-      assert(scratchSPReg != AArch64::SP);
-
-      // SUB X9, SP, NumBytes
-      //   -- X9 is temporary register, so shouldn't contain any live data here,
-      //   -- free to use. This is already produced by emitFrameOffset above.
-      // AND SP, X9, 0b11111...0000
-      uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
-
-      BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
-          .addReg(scratchSPReg, RegState::Kill)
-          .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64));
-      AFI->setStackRealigned(true);
-
-      // No need for SEH instructions here; if we're realigning the stack,
-      // we've set a frame pointer and already finished the SEH prologue.
-      assert(!NeedsWinCFI);
-    }
+  // Allocate space for the rest of the frame including SVE locals. Align the
+  // stack as necessary.
+  assert(!(canUseRedZone(MF) && NeedsRealignment) &&
+         "Cannot use redzone with stack realignment");
+  if (!canUseRedZone(MF)) {
+    // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
+    // the correct value here, as NumBytes also includes padding bytes,
+    // which shouldn't be counted here.
+    allocateStackSpace(MBB, CalleeSavesEnd, NeedsRealignment,
+                       SVELocalsSize + StackOffset::getFixed(NumBytes),
+                       NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
+                       CFAOffset);
   }
 
   // If we need a base pointer, set it up here. It's whatever the value of the
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 147b5c181be5..f3313f3b53ff 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -150,6 +150,11 @@ class AArch64FrameLowering : public TargetFrameLowering {
                                   MachineBasicBlock::iterator MBBI) const;
   void emitCalleeSavedSVERestores(MachineBasicBlock &MBB,
                                   MachineBasicBlock::iterator MBBI) const;
+  void allocateStackSpace(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MBBI,
+                          bool NeedsRealignment, StackOffset AllocSize,
+                          bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,
+                          StackOffset InitialOffset) const;
 
   /// Emit target zero call-used regs.
   void emitZeroCallUsedRegs(BitVector RegsToZero,
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir
index 623c0f240be4..265c474fbc5d 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir
@@ -4,8 +4,8 @@
 name: hasBasepointer
 # CHECK-LABEL: name: hasBasepointer
 # CHECK: bb.0:
-# CHECK:      $sp = frame-setup ADDVL_XXI $sp, -1
-# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+# CHECK:      $sp = frame-setup SUBXri $sp, 16, 0
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
 # CHECK-NEXT: $x19 = ADDXri $sp, 0, 0
 # CHECK:      STRXui $x0, $x19, 0
 tracksRegLiveness: true
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-fixed-width-access.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-fixed-width-access.mir
index e367a380f8ba..35fd7ca77d5c 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-sve-fixed-width-access.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve-fixed-width-access.mir
@@ -7,9 +7,9 @@
   ; CHECK:       // %bb.0: // %entry
   ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
   ; CHECK-NEXT:    mov x29, sp
+  ; CHECK-NEXT:    sub sp, sp, #2064
   ; CHECK-NEXT:    addvl sp, sp, #-32
   ; CHECK-NEXT:    addvl sp, sp, #-28
-  ; CHECK-NEXT:    sub sp, sp, #2064
   ; CHECK-NEXT:    ldr x8, [sp, #2048]
   ; CHECK-NEXT:    addvl sp, sp, #31
   ; CHECK-NEXT:    addvl sp, sp, #29
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir
index d54f67634d02..680f9c335c25 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir
@@ -4,9 +4,9 @@
 name: LateScavengingSlot
 # CHECK-LABEL: name: LateScavengingSlot
 # CHECK: bb.0:
-# CHECK:      $sp = frame-setup ADDVL_XXI $sp, -1
-# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 8, 12
+# CHECK:      $sp = frame-setup SUBXri $sp, 8, 12
 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
 # CHECK:      STRXui killed $[[SCRATCH:x[0-9]+]], $sp, 0
 # CHECK-NEXT: $[[SCRATCH]] = ADDVL_XXI $fp, -1
 # CHECK-NEXT: STRXui $x0, killed $[[SCRATCH]], 0
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve.mir b/llvm/test/CodeGen/AArch64/framelayout-sve.mir
index 7c87587c6dc4..213d7919e4a7 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-sve.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve.mir
@@ -60,10 +60,10 @@
 # CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
-# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22
 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
-# CHECK-NEXT: CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22
 
 # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2
 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 32
@@ -77,7 +77,7 @@
 # ASM-LABEL: test_allocate_sve:
 # ASM:       .cfi_def_cfa_offset 16
 # ASM-NEXT:  .cfi_offset w29, -16
-# ASM:       .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
+# ASM:       .cfi_def_cfa_offset 32
 # ASM:       .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 16 * VG
 # ASM:       .cfi_def_cfa wsp, 32
 # ASM:       .cfi_def_cfa_offset 16
@@ -87,7 +87,7 @@
 #
 # UNWINDINFO:       DW_CFA_def_cfa_offset: +16
 # UNWINDINFO-NEXT:  DW_CFA_offset: reg29 -16
-# UNWINDINFO:       DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
+# UNWINDINFO:       DW_CFA_def_cfa_offset: +32 
 # UNWINDINFO:       DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
 # UNWINDINFO:       DW_CFA_def_cfa: reg31 +32
 # UNWINDINFO:       DW_CFA_def_cfa_offset: +16
@@ -125,10 +125,11 @@ body:             |
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w20, -8
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w21, -16
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -32
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
-# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22
 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 48
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22
+
 #
 # CHECK-NEXT: $x20 = IMPLICIT_DEF
 # CHECK-NEXT: $x21 = IMPLICIT_DEF
@@ -149,7 +150,7 @@ body:             |
 # ASM:       .cfi_offset w20, -8
 # ASM-NEXT:  .cfi_offset w21, -16
 # ASM-NEXT:  .cfi_offset w29, -32
-# ASM:       .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 16 * VG
+# ASM:       .cfi_def_cfa_offset 48
 # ASM:       .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 16 * VG
 #
 # ASM:       .cfi_def_cfa wsp, 48
@@ -164,7 +165,7 @@ body:             |
 # UNWINDINFO:       DW_CFA_offset: reg20 -8
 # UNWINDINFO-NEXT:  DW_CFA_offset: reg21 -16
 # UNWINDINFO-NEXT:  DW_CFA_offset: reg29 -32
-# UNWINDINFO:       DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
+# UNWINDINFO:       DW_CFA_def_cfa_offset: +48 
 # UNWINDINFO:       DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +48, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
 #
 # UNWINDINFO:       DW_CFA_def_cfa: reg31 +48
@@ -205,9 +206,9 @@ body:             |
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 16
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
 # CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 16, 0
-# CHECK-NEXT: $sp = ANDXri killed $[[TMP]]
+# CHECK-NEXT: $[[TMP]] = frame-setup ADDVL_XXI $[[TMP]], -2
+# CHECK-NEXT: $sp = frame-setup ANDXri killed $[[TMP]]
 # CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0
 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16
 # CHECK-NEXT: $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2
@@ -267,9 +268,9 @@ body:             |
 # CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3
-# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22
 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22
 
 # CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16
@@ -292,7 +293,7 @@ body:             |
 # ASM-LABEL:  test_address_sve:
 # ASM:       .cfi_def_cfa_offset 16
 # ASM-NEXT:  .cfi_offset w29, -16
-# ASM:       .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
+# ASM:       .cfi_def_cfa_offset 32
 # ASM:       .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 24 * VG
 #
 # ASM:       .cfi_def_cfa wsp, 32
@@ -302,7 +303,7 @@ body:             |
 #
 # UNWINDINFO:       DW_CFA_def_cfa_offset: +16
 # UNWINDINFO-NEXT:  DW_CFA_offset: reg29 -16
-# UNWINDINFO:       DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
+# UNWINDINFO:       DW_CFA_def_cfa_offset: +32
 # UNWINDINFO:       DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
 #
 # UNWINDINFO:       DW_CFA_def_cfa: reg31 +32
@@ -353,8 +354,8 @@ body:             |
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 16
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3
 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3
 
 # CHECK-NEXT: STR_ZXI $z0, $fp, -1
 # CHECK-NEXT: STR_ZXI $z1, $fp, -2
@@ -429,9 +430,9 @@ body:             |
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
 
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
-# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22
 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22
 # CHECK:      $[[TMP:x[0-9]+]] = ADDVL_XXI $sp, 1
 # CHECK-NEXT: $x0 = LDRXui killed $[[TMP]], 4
@@ -448,7 +449,7 @@ body:             |
 # ASM-LABEL: test_stack_arg_sve:
 # ASM:       .cfi_def_cfa_offset 16
 # ASM-NEXT:  .cfi_offset w29, -16
-# ASM:       .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+# ASM:       .cfi_def_cfa_offset 32
 # ASM:       .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 8 * VG
 #
 # ASM:       .cfi_def_cfa wsp, 32
@@ -458,7 +459,7 @@ body:             |
 
 # UNWINDINFO:      DW_CFA_def_cfa_offset: +16
 # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
-# UNWINDINFO:      DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
+# UNWINDINFO:      DW_CFA_def_cfa_offset: +32
 # UNWINDINFO:      DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
 #
 # UNWINDINFO:      DW_CFA_def_cfa: reg31 +32
@@ -640,8 +641,8 @@ body:             |
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w19, -16
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -24
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -32
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
 # CHECK-NEXT: $x19 = ADDXri $sp, 0, 0
 # CHECK-NEXT: STRXui $xzr, $x19, 0
 # CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0
@@ -863,9 +864,9 @@ body:             |
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4d, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4e, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4f, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22
-# CHECK:      $sp = frame-setup ADDVL_XXI $sp, -1
-# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22
 # CHECK:      $sp = frame-setup SUBXri $sp, 32, 0
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22
+# CHECK:      $sp = frame-setup ADDVL_XXI $sp, -1
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22
 
 # CHECK:      $sp = frame-destroy ADDXri $sp, 32, 0
@@ -916,7 +917,7 @@ body:             |
 # ASM-NEXT:  .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 32 - 48 * VG
 # ASM-NEXT:  .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 32 - 56 * VG
 # ASM-NEXT:  .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 32 - 64 * VG
-# ASM:       .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 152 * VG
+# ASM:       .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 64 + 144 * VG 
 # ASM:       .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 64 + 152 * VG
 #
 # ASM:       .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 152 * VG
@@ -950,7 +951,7 @@ body:             |
 # UNWINDINFO-NEXT: DW_CFA_expression: reg77 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -48, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
 # UNWINDINFO-NEXT: DW_CFA_expression: reg78 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -56, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
 # UNWINDINFO-NEXT: DW_CFA_expression: reg79 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -64, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
-# UNWINDINFO:      DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +152, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
+# UNWINDINFO:      DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +64, DW_OP_plus, DW_OP_consts +144, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
 # UNWINDINFO:      DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +64, DW_OP_plus, DW_OP_consts +152, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
 #
 # UNWINDINFO:      DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +152, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
@@ -1031,9 +1032,9 @@ body:             |
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4d, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
 # CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 16, 0
-# CHECK-NEXT: $sp = ANDXri killed $[[TMP]]
+# CHECK-NEXT: $[[TMP]] = frame-setup ADDVL_XXI $[[TMP]], -1
+# CHECK-NEXT: $sp = frame-setup ANDXri killed $[[TMP]]
 
 # CHECK:      $sp = frame-destroy ADDVL_XXI $fp, -18
 # CHECK-NEXT: $p15 = frame-destroy LDR_PXI $sp, 4
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll
index e4cd4d6c05c5..45ca7844b065 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll
@@ -20,8 +20,8 @@ define void @test_no_stackslot_scavenging(float %f) #0 {
 ; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
 ; CHECK-NEXT:    str x29, [sp, #64] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x30, x24, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    str s0, [sp, #12] // 4-byte Folded Spill
 ; CHECK-NEXT:    //APP
 ; CHECK-NEXT:    //NO_APP
diff --git a/llvm/test/CodeGen/AArch64/spill-stack-realignment.mir b/llvm/test/CodeGen/AArch64/spill-stack-realignment.mir
index 1b9411d07f43..f6fc627ac2d3 100644
--- a/llvm/test/CodeGen/AArch64/spill-stack-realignment.mir
+++ b/llvm/test/CodeGen/AArch64/spill-stack-realignment.mir
@@ -21,7 +21,7 @@ stack:
   - { id: 1, size: 4, alignment: 4, local-offset: -68 }
 
 # CHECK: body:
-# CHECK:   $sp = ANDXri killed ${{x[0-9]+}}, 7865
+# CHECK:   $sp = frame-setup ANDXri killed ${{x[0-9]+}}, 7865
 # CHECK:   STRSui $s0, $sp, 0
 # CHECK:   STRSui $s0, $fp, 7
 body:             |
diff --git a/llvm/test/CodeGen/AArch64/stack-guard-sve.ll b/llvm/test/CodeGen/AArch64/stack-guard-sve.ll
index 1672a7eb8739..5acbb22bf1ab 100644
--- a/llvm/test/CodeGen/AArch64/stack-guard-sve.ll
+++ b/llvm/test/CodeGen/AArch64/stack-guard-sve.ll
@@ -148,9 +148,9 @@ entry:
 
 ; CHECK-LABEL: local_stack_alloc:
 ; CHECK: mov x29, sp
-; CHECK: addvl sp, sp, #-2
 ; CHECK: sub sp, sp, #16, lsl #12
 ; CHECK: sub sp, sp, #16
+; CHECK: addvl sp, sp, #-2
 
 ; Stack guard is placed below the SVE stack area (and above all fixed-width objects)
 ; CHECK-DAG: add [[STACK_GUARD_SPILL_PART_LOC:x[0-9]+]], sp, #8, lsl #12
@@ -198,9 +198,9 @@ entry:
 
 ; CHECK-LABEL: local_stack_alloc_strong:
 ; CHECK: mov x29, sp
-; CHECK: addvl sp, sp, #-3
 ; CHECK: sub sp, sp, #16, lsl #12
 ; CHECK: sub sp, sp, #16
+; CHECK: addvl sp, sp, #-3
 
 ; Stack guard is placed at the top of the SVE stack area
 ; CHECK-DAG: ldr [[STACK_GUARD:x[0-9]+]], [{{x[0-9]+}}, :lo12:__stack_chk_guard]
diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll
index b7505625cde9..6738bddb8af3 100644
--- a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll
+++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll
@@ -56,8 +56,8 @@ define float @foo2(ptr %x0, ptr %x1) nounwind {
 ; CHECK-LABEL: foo2:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT:    addvl sp, sp, #-4
 ; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    addvl sp, sp, #-4
 ; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    fmov s0, #1.00000000
 ; CHECK-NEXT:    add x8, sp, #16
@@ -699,8 +699,8 @@ define void @verify_all_operands_are_initialised() {
 ; CHECK-LABEL: verify_all_operands_are_initialised:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 8 * VG
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
index da7e772461e2..9d9d4a64a5d1 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
@@ -9,8 +9,8 @@ define void @fcvt_v4f64_v4f128(ptr %a, ptr %b) vscale_range(2,0) #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-32]! // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    add x8, sp, #48
 ; CHECK-NEXT:    mov x19, x1
@@ -59,8 +59,8 @@ define void @fcvt_v4f128_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-32]! // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    sub sp, sp, #128
+; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    ldr q1, [x0, #64]
 ; CHECK-NEXT:    ldr q0, [x0, #80]
 ; CHECK-NEXT:    mov x19, x1