Skip to content

Commit

Permalink
Implement Arm32 VSHLL and QADD16 instructions (#7301)
Browse files Browse the repository at this point in the history
  • Loading branch information
gdkchan authored Sep 12, 2024
1 parent ca59c3f commit 2f36a66
Show file tree
Hide file tree
Showing 6 changed files with 172 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/ARMeilleure/Decoders/OpCodeTable.cs
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,7 @@ static OpCodeTable()
SetA32("<<<<01101000xxxxxxxxxxxxxx01xxxx", InstName.Pkh, InstEmit32.Pkh, OpCode32AluRsImm.Create);
SetA32("11110101xx01xxxx1111xxxxxxxxxxxx", InstName.Pld, InstEmit32.Nop, OpCode32.Create);
SetA32("11110111xx01xxxx1111xxxxxxx0xxxx", InstName.Pld, InstEmit32.Nop, OpCode32.Create);
SetA32("<<<<01100010xxxxxxxx11110001xxxx", InstName.Qadd16, InstEmit32.Qadd16, OpCode32AluReg.Create);
SetA32("<<<<011011111111xxxx11110011xxxx", InstName.Rbit, InstEmit32.Rbit, OpCode32AluReg.Create);
SetA32("<<<<011010111111xxxx11110011xxxx", InstName.Rev, InstEmit32.Rev, OpCode32AluReg.Create);
SetA32("<<<<011010111111xxxx11111011xxxx", InstName.Rev16, InstEmit32.Rev16, OpCode32AluReg.Create);
Expand Down Expand Up @@ -1034,6 +1035,7 @@ static OpCodeTable()
SetAsimd("111100101x>>>xxxxxxx0101>xx1xxxx", InstName.Vshl, InstEmit32.Vshl, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32);
SetAsimd("1111001x0xxxxxxxxxxx0100xxx0xxxx", InstName.Vshl, InstEmit32.Vshl_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
SetAsimd("1111001x1x>>>xxxxxxx101000x1xxxx", InstName.Vshll, InstEmit32.Vshll, OpCode32SimdShImmLong.Create, OpCode32SimdShImmLong.CreateT32); // A1 encoding.
SetAsimd("111100111x11<<10xxxx001100x0xxxx", InstName.Vshll, InstEmit32.Vshll2, OpCode32SimdMovn.Create, OpCode32SimdMovn.CreateT32); // A2 encoding.
SetAsimd("1111001x1x>>>xxxxxxx0000>xx1xxxx", InstName.Vshr, InstEmit32.Vshr, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32);
SetAsimd("111100101x>>>xxxxxxx100000x1xxx0", InstName.Vshrn, InstEmit32.Vshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32);
SetAsimd("111100111x>>>xxxxxxx0101>xx1xxxx", InstName.Vsli, InstEmit32.Vsli_I, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32);
Expand Down
113 changes: 113 additions & 0 deletions src/ARMeilleure/Instructions/InstEmitAlu32.cs
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,16 @@ public static void Pkh(ArmEmitterContext context)
EmitAluStore(context, res);
}

public static void Qadd16(ArmEmitterContext context)
{
OpCode32AluReg op = (OpCode32AluReg)context.CurrOp;

SetIntA32(context, op.Rd, EmitSigned16BitPair(context, GetIntA32(context, op.Rn), GetIntA32(context, op.Rm), (d, n, m) =>
{
EmitSaturateRange(context, d, context.Add(n, m), 16, unsigned: false, setQ: false);
}));
}

public static void Rbit(ArmEmitterContext context)
{
Operand m = GetAluM(context);
Expand Down Expand Up @@ -976,6 +986,94 @@ void SetD(int part, Operand value)
}
}

private static void EmitSaturateRange(ArmEmitterContext context, Operand result, Operand value, uint saturateTo, bool unsigned, bool setQ = true)
{
Debug.Assert(saturateTo <= 32);
Debug.Assert(!unsigned || saturateTo < 32);

if (!unsigned && saturateTo == 32)
{
// No saturation possible for this case.

context.Copy(result, value);

return;
}
else if (saturateTo == 0)
{
// Result is always zero if we saturate 0 bits.

context.Copy(result, Const(0));

return;
}

Operand satValue;

if (unsigned)
{
// Negative values always saturate (to zero).
// So we must always ignore the sign bit when masking, so that the truncated value will differ from the original one.

satValue = context.BitwiseAnd(value, Const((int)(uint.MaxValue >> (32 - (int)saturateTo))));
}
else
{
satValue = context.ShiftLeft(value, Const(32 - (int)saturateTo));
satValue = context.ShiftRightSI(satValue, Const(32 - (int)saturateTo));
}

// If the result is 0, the values are equal and we don't need saturation.
Operand lblNoSat = Label();
context.BranchIfFalse(lblNoSat, context.Subtract(value, satValue));

// Saturate and set Q flag.
if (unsigned)
{
if (saturateTo == 31)
{
// Only saturation case possible when going from 32 bits signed to 32 or 31 bits unsigned
// is when the signed input is negative, as all positive values are representable on a 31 bits range.

satValue = Const(0);
}
else
{
satValue = context.ShiftRightSI(value, Const(31));
satValue = context.BitwiseNot(satValue);
satValue = context.ShiftRightUI(satValue, Const(32 - (int)saturateTo));
}
}
else
{
if (saturateTo == 1)
{
satValue = context.ShiftRightSI(value, Const(31));
}
else
{
satValue = Const(uint.MaxValue >> (33 - (int)saturateTo));
satValue = context.BitwiseExclusiveOr(satValue, context.ShiftRightSI(value, Const(31)));
}
}

if (setQ)
{
SetFlag(context, PState.QFlag, Const(1));
}

context.Copy(result, satValue);

Operand lblExit = Label();
context.Branch(lblExit);

context.MarkLabel(lblNoSat);

context.Copy(result, value);

context.MarkLabel(lblExit);
}

private static void EmitSaturateUqadd(ArmEmitterContext context, Operand result, Operand value, uint saturateTo)
{
Debug.Assert(saturateTo <= 32);
Expand Down Expand Up @@ -1053,6 +1151,21 @@ private static void EmitSaturateUqsub(ArmEmitterContext context, Operand result,
context.MarkLabel(lblExit);
}

private static Operand EmitSigned16BitPair(ArmEmitterContext context, Operand rn, Operand rm, Action<Operand, Operand, Operand> elementAction)
{
Operand tempD = context.AllocateLocal(OperandType.I32);

Operand tempN = context.SignExtend16(OperandType.I32, rn);
Operand tempM = context.SignExtend16(OperandType.I32, rm);
elementAction(tempD, tempN, tempM);
Operand tempD2 = context.ZeroExtend16(OperandType.I32, tempD);

tempN = context.ShiftRightSI(rn, Const(16));
tempM = context.ShiftRightSI(rm, Const(16));
elementAction(tempD, tempN, tempM);
return context.BitwiseOr(tempD2, context.ShiftLeft(tempD, Const(16)));
}

private static Operand EmitUnsigned16BitPair(ArmEmitterContext context, Operand rn, Operand rm, Action<Operand, Operand, Operand> elementAction)
{
Operand tempD = context.AllocateLocal(OperandType.I32);
Expand Down
32 changes: 32 additions & 0 deletions src/ARMeilleure/Instructions/InstEmitSimdShift32.cs
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,38 @@ public static void Vshll(ArmEmitterContext context)
context.Copy(GetVecA32(op.Qd), res);
}

public static void Vshll2(ArmEmitterContext context)
{
OpCode32Simd op = (OpCode32Simd)context.CurrOp;

Operand res = context.VectorZero();

int elems = op.GetBytesCount() >> op.Size;

for (int index = 0; index < elems; index++)
{
Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, !op.U);

if (op.Size == 2)
{
if (op.U)
{
me = context.ZeroExtend32(OperandType.I64, me);
}
else
{
me = context.SignExtend32(OperandType.I64, me);
}
}

me = context.ShiftLeft(me, Const(8 << op.Size));

res = EmitVectorInsert(context, res, me, index, op.Size + 1);
}

context.Copy(GetVecA32(op.Qd), res);
}

public static void Vshr(ArmEmitterContext context)
{
OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
Expand Down
1 change: 1 addition & 0 deletions src/ARMeilleure/Instructions/InstName.cs
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,7 @@ enum InstName
Pld,
Pop,
Push,
Qadd16,
Rev,
Revsh,
Rsb,
Expand Down
1 change: 1 addition & 0 deletions src/Ryujinx.Tests/Cpu/CpuTestAlu32.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ private static uint[] UQAddSub16()
{
return new[]
{
0xe6200f10u, // QADD16 R0, R0, R0
0xe6600f10u, // UQADD16 R0, R0, R0
0xe6600f70u, // UQSUB16 R0, R0, R0
};
Expand Down
23 changes: 23 additions & 0 deletions src/Ryujinx.Tests/Cpu/CpuTestSimd32.cs
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,29 @@ public void Vmovn_V([Range(0u, 3u)] uint rd,
CompareAgainstUnicorn();
}

[Test, Pairwise, Description("VSHLL.<size> {<Vd>}, <Vm>, #<imm>")]
public void Vshll([Values(0u, 2u)] uint rd,
[Values(1u, 0u)] uint rm,
[Values(0u, 1u, 2u)] uint size,
[Random(RndCnt)] ulong z,
[Random(RndCnt)] ulong a,
[Random(RndCnt)] ulong b)
{
uint opcode = 0xf3b20300u; // VSHLL.I8 Q0, D0, #8

opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1);
opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
opcode |= size << 18;

V128 v0 = MakeVectorE0E1(z, z);
V128 v1 = MakeVectorE0E1(a, z);
V128 v2 = MakeVectorE0E1(b, z);

SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);

CompareAgainstUnicorn();
}

[Test, Pairwise, Description("VSWP D0, D0")]
public void Vswp([Values(0u, 1u)] uint rd,
[Values(0u, 1u)] uint rm,
Expand Down

0 comments on commit 2f36a66

Please sign in to comment.