Skip to content

Commit

Permalink
[ARM64_DYNAREC] Reworked MUL/IMUL opcodes a bit
Browse files Browse the repository at this point in the history
  • Loading branch information
ptitSeb committed Dec 22, 2024
1 parent 25b4dd1 commit 1bb7d9e
Show file tree
Hide file tree
Showing 6 changed files with 162 additions and 41 deletions.
8 changes: 8 additions & 0 deletions src/dynarec/arm64/arm64_emitter.h
Original file line number Diff line number Diff line change
Expand Up @@ -205,17 +205,25 @@ int convert_bitmask(uint64_t bitmask);

#define SUBx_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(1, 1, 0, 0b00, Rm, 0, Rn, Rd))
#define SUBSx_REG(Rd, Rn, Rm) FEMIT(ADDSUB_REG_gen(1, 1, 1, 0b00, Rm, 0, Rn, Rd))
#define SUBSx_REG_ASR(Rd, Rn, Rm, asr) FEMIT(ADDSUB_REG_gen(1, 1, 1, 0b10, Rm, asr, Rn, Rd))
#define SUBx_REG_LSL(Rd, Rn, Rm, lsl) EMIT(ADDSUB_REG_gen(1, 1, 0, 0b00, Rm, lsl, Rn, Rd))
#define SUBw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(0, 1, 0, 0b00, Rm, 0, Rn, Rd))
#define SUBw_REG_LSL(Rd, Rn, Rm, lsl) EMIT(ADDSUB_REG_gen(0, 1, 0, 0b00, Rm, lsl, Rn, Rd))
#define SUBSw_REG(Rd, Rn, Rm) FEMIT(ADDSUB_REG_gen(0, 1, 1, 0b00, Rm, 0, Rn, Rd))
#define SUBSw_REG_LSL(Rd, Rn, Rm, lsl) FEMIT(ADDSUB_REG_gen(0, 1, 1, 0b00, Rm, lsl, Rn, Rd))
#define SUBSw_REG_LSR(Rd, Rn, Rm, lsr) FEMIT(ADDSUB_REG_gen(0, 1, 1, 0b01, Rm, lsr, Rn, Rd))
#define SUBSw_REG_ASR(Rd, Rn, Rm, asr) FEMIT(ADDSUB_REG_gen(0, 1, 1, 0b10, Rm, asr, Rn, Rd))
#define SUBxw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.w, 1, 0, 0b00, Rm, 0, Rn, Rd))
#define SUBz_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.is32bits?0:1, 1, 0, 0b00, Rm, 0, Rn, Rd))
#define SUBSxw_REG(Rd, Rn, Rm) FEMIT(ADDSUB_REG_gen(rex.w, 1, 1, 0b00, Rm, 0, Rn, Rd))
#define SUBSxw_REG_ASR(Rd, Rn, Rm, asr) FEMIT(ADDSUB_REG_gen(rex.w, 1, 1, 0b10, Rm, asr, Rn, Rd))
#define CMPSx_REG(Rn, Rm) SUBSx_REG(xZR, Rn, Rm)
#define CMPSx_REG_ASR(Rn, Rm, asr) SUBSx_REG_ASR(xZR, Rn, Rm, asr)
#define CMPSw_REG(Rn, Rm) SUBSw_REG(wZR, Rn, Rm)
#define CMPSw_REG_LSR(Rn, Rm, lsr) SUBSw_REG_LSR(wZR, Rn, Rm, lsr)
#define CMPSw_REG_ASR(Rn, Rm, asr) SUBSw_REG_ASR(wZR, Rn, Rm, asr)
#define CMPSxw_REG(Rn, Rm) SUBSxw_REG(xZR, Rn, Rm)
#define CMPSxw_REG_ASR(Rn, Rm, asr) SUBSxw_REG_ASR(xZR, Rn, Rm, asr)
#define NEGx_REG(Rd, Rm) SUBx_REG(Rd, xZR, Rm);
#define NEGw_REG(Rd, Rm) SUBw_REG(Rd, wZR, Rm);
#define NEGxw_REG(Rd, Rm) SUBxw_REG(Rd, xZR, Rm);
Expand Down
62 changes: 46 additions & 16 deletions src/dynarec/arm64/dynarec_arm64_00.c
Original file line number Diff line number Diff line change
Expand Up @@ -836,8 +836,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
BICw(xFlags, xFlags, x1);
}
IFX(X_CF | X_OF) {
ASRx(x4, gd, 63);
CMPSx_REG(x3, x4);
CMPSx_REG_ASR(x3, gd, 63);
CSETw(x1, cNE);
IFX(X_CF) {
BFIw(xFlags, x1, F_CF, 1);
Expand All @@ -861,8 +860,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
BICw(xFlags, xFlags, x1);
}
IFX(X_CF | X_OF) {
ASRw(x4, gd, 31);
CMPSw_REG(x3, x4);
CMPSw_REG_ASR(x3, gd, 31);
CSETw(x1, cNE);
IFX(X_CF) {
BFIw(xFlags, x1, F_CF, 1);
Expand Down Expand Up @@ -902,8 +900,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
BICw(xFlags, xFlags, x1);
}
IFX(X_CF | X_OF) {
ASRx(x4, gd, 63);
CMPSx_REG(x3, x4);
CMPSx_REG_ASR(x3, gd, 63);
CSETw(x1, cNE);
IFX(X_CF) {
BFIw(xFlags, x1, F_CF, 1);
Expand All @@ -927,8 +924,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
BICw(xFlags, xFlags, x1);
}
IFX(X_CF | X_OF) {
ASRw(x4, gd, 31);
CMPSw_REG(x3, x4);
CMPSw_REG_ASR(x3, gd, 31);
CSETw(x1, cNE);
IFX(X_CF) {
BFIw(xFlags, x1, F_CF, 1);
Expand Down Expand Up @@ -3294,23 +3290,58 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
break;
case 4:
INST_NAME("MUL AL, Eb");
SETFLAGS(X_ALL, SF_PENDING);
SETFLAGS(X_ALL, SF_SET);
GETEB(x1, 0);
UXTBw(x2, xRAX);
MULw(x1, x2, x1);
UFLAG_RES(x1);
BFIx(xRAX, x1, 0, 16);
UFLAG_DF(x1, d_mul8);
UFLAG_IF {
SET_DFNONE(x4);
IFX(X_CF|X_OF) {
CMPSw_REG_LSR(xZR, x1, 8);
CSETw(x3, cNE);
IFX(X_CF) {
BFIw(xFlags, x3, F_CF, 1);
}
IFX(X_OF) {
BFIw(xFlags, x3, F_OF, 1);
}
}
IFX(X_AF | X_PF | X_ZF | X_SF)
if(box64_dynarec_test) {
// to avoid noise during test
MOV32w(x3, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF));
BICw(xFlags, xFlags, x3);
}
}
break;
case 5:
INST_NAME("IMUL AL, Eb");
SETFLAGS(X_ALL, SF_PENDING);
SETFLAGS(X_ALL, SF_SET);
GETSEB(x1, 0);
SXTBw(x2, xRAX);
MULw(x1, x2, x1);
UFLAG_RES(x1);
BFIx(xRAX, x1, 0, 16);
UFLAG_DF(x1, d_imul8);
UFLAG_IF {
SET_DFNONE(x4);
IFX(X_CF|X_OF) {
ASRxw(x2, x1, 8);
CMPSw_REG_ASR(x2, x1, 16);
CSETw(x3, cNE);
IFX(X_CF) {
BFIw(xFlags, x3, F_CF, 1);
}
IFX(X_OF) {
BFIw(xFlags, x3, F_OF, 1);
}
}
IFX(X_AF | X_PF | X_ZF | X_SF)
if(box64_dynarec_test) {
// to avoid noise during test
MOV32w(x3, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF));
BICw(xFlags, xFlags, x3);
}
}
break;
case 6:
INST_NAME("DIV Eb");
Expand Down Expand Up @@ -3442,8 +3473,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
UFLAG_IF {
SET_DFNONE(x4);
IFX(X_CF|X_OF) {
ASRxw(x4, xRAX, rex.w?63:31);
CMPSxw_REG(xRDX, x4);
CMPSxw_REG_ASR(xRDX, xRAX, rex.w?63:31);
CSETw(x3, cNE);
IFX(X_CF) {
BFIw(xFlags, x3, F_CF, 1);
Expand Down
6 changes: 2 additions & 4 deletions src/dynarec/arm64/dynarec_arm64_0f.c
Original file line number Diff line number Diff line change
Expand Up @@ -1910,8 +1910,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
MULx(gd, gd, ed);
SET_DFNONE(x4);
IFX(X_CF|X_OF) {
ASRx(x4, gd, 63);
CMPSx_REG(x3, x4);
CMPSx_REG_ASR(x3, gd, 63);
CSETw(x3, cNE);
IFX(X_CF) {
BFIw(xFlags, x3, F_CF, 1);
Expand All @@ -1931,8 +1930,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
MOVw_REG(gd, gd);
SET_DFNONE(x4);
IFX(X_CF|X_OF) {
ASRw(x4, gd, 31);
CMPSw_REG(x3, x4);
CMPSw_REG_ASR(x3, gd, 31);
CSETw(x3, cNE);
IFX(X_CF) {
BFIw(xFlags, x3, F_CF, 1);
Expand Down
71 changes: 62 additions & 9 deletions src/dynarec/arm64/dynarec_arm64_66.c
Original file line number Diff line number Diff line change
Expand Up @@ -436,16 +436,34 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
} else {
INST_NAME("IMUL Gw,Ew,Ib");
}
SETFLAGS(X_ALL, SF_PENDING);
SETFLAGS(X_ALL, SF_SET);
nextop = F8;
GETSEW(x1, (opcode==0x69)?2:1);
if(opcode==0x69) i32 = F16S; else i32 = F8S;
MOV32w(x2, i32);
MULw(x2, x2, x1);
UFLAG_RES(x2);
gd=x2;
GWBACK;
UFLAG_DF(x1, d_imul16);
UFLAG_IF {
SET_DFNONE(x4);
IFX(X_CF|X_OF) {
ASRxw(x1, x2, 16);
CMPSw_REG_ASR(x1, x2, 31);
CSETw(x3, cNE);
IFX(X_CF) {
BFIw(xFlags, x3, F_CF, 1);
}
IFX(X_OF) {
BFIw(xFlags, x3, F_OF, 1);
}
}
IFX(X_AF | X_PF | X_ZF | X_SF)
if(box64_dynarec_test) {
// to avoid noise during test
MOV32w(x3, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF));
BICw(xFlags, xFlags, x3);
}
}
break;
case 0x6A:
INST_NAME("PUSH Ib");
Expand Down Expand Up @@ -1342,25 +1360,60 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
break;
case 4:
INST_NAME("MUL AX, Ew");
SETFLAGS(X_ALL, SF_PENDING);
SETFLAGS(X_ALL, SF_SET);
GETEW(x1, 0);
UXTHw(x2, xRAX);
MULw(x1, x2, x1);
UFLAG_RES(x1);
BFIz(xRAX, x1, 0, 16);
BFXILx(xRDX, x1, 16, 16);
UFLAG_DF(x1, d_mul16);
UFLAG_IF {
SET_DFNONE(x4);
IFX(X_CF|X_OF) {
CMPSw_REG_LSR(xZR, x1, 16);
CSETw(x3, cNE);
IFX(X_CF) {
BFIw(xFlags, x3, F_CF, 1);
}
IFX(X_OF) {
BFIw(xFlags, x3, F_OF, 1);
}
}
IFX(X_AF | X_PF | X_ZF | X_SF)
if(box64_dynarec_test) {
// to avoid noise during test
MOV32w(x3, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF));
BICw(xFlags, xFlags, x3);
}
}
break;
case 5:
INST_NAME("IMUL AX, Ew");
SETFLAGS(X_ALL, SF_PENDING);
SETFLAGS(X_ALL, SF_SET);
GETSEW(x1, 0);
SXTHw(x2, xRAX);
MULw(x1, x2, x1);
UFLAG_RES(x1);
BFIz(xRAX, x1, 0, 16);
BFXILx(xRDX, x1, 16, 16);
UFLAG_DF(x1, d_imul16);
UFLAG_IF {
SET_DFNONE(x4);
IFX(X_CF|X_OF) {
ASRxw(x2, x1, 16);
CMPSw_REG_ASR(x2, x1, 31);
CSETw(x3, cNE);
IFX(X_CF) {
BFIw(xFlags, x3, F_CF, 1);
}
IFX(X_OF) {
BFIw(xFlags, x3, F_OF, 1);
}
}
IFX(X_AF | X_PF | X_ZF | X_SF)
if(box64_dynarec_test) {
// to avoid noise during test
MOV32w(x3, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF));
BICw(xFlags, xFlags, x3);
}
}
break;
case 6:
INST_NAME("DIV Ew");
Expand Down
24 changes: 21 additions & 3 deletions src/dynarec/arm64/dynarec_arm64_660f.c
Original file line number Diff line number Diff line change
Expand Up @@ -2444,14 +2444,32 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n

case 0xAF:
INST_NAME("IMUL Gw,Ew");
SETFLAGS(X_ALL, SF_PENDING);
SETFLAGS(X_ALL, SF_SET);
nextop = F8;
GETSEW(x1, 0);
GETSGW(x2);
MULw(x2, x2, x1);
UFLAG_RES(x2);
GWBACK;
UFLAG_DF(x1, d_imul16);
UFLAG_IF {
SET_DFNONE(x4);
IFX(X_CF|X_OF) {
ASRw(x1, x2, 16);
CMPSw_REG_ASR(x1, x2, 31);
CSETw(x3, cNE);
IFX(X_CF) {
BFIw(xFlags, x3, F_CF, 1);
}
IFX(X_OF) {
BFIw(xFlags, x3, F_OF, 1);
}
}
IFX(X_AF | X_PF | X_ZF | X_SF)
if(box64_dynarec_test) {
// to avoid noise during test
MOV32w(x3, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF));
BICw(xFlags, xFlags, x3);
}
}
break;

case 0xB3:
Expand Down
32 changes: 23 additions & 9 deletions src/dynarec/arm64/dynarec_arm64_67.c
Original file line number Diff line number Diff line change
Expand Up @@ -917,7 +917,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin

case 0x6B:
INST_NAME("IMUL Gd, Ed, Ib");
SETFLAGS(X_ALL, SF_PENDING);
SETFLAGS(X_ALL, SF_SET);
nextop = F8;
GETGD;
GETED32(1);
Expand All @@ -934,8 +934,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
BICw(xFlags, xFlags, x1);
}
IFX(X_CF | X_OF) {
ASRx(x4, gd, 63);
CMPSx_REG(x3, x4);
CMPSx_REG_ASR(x3, gd, 63);
CSETw(x1, cNE);
IFX(X_CF) {
BFIw(xFlags, x1, F_CF, 1);
Expand All @@ -959,8 +958,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
BICw(xFlags, xFlags, x1);
}
IFX(X_CF | X_OF) {
ASRw(x4, gd, 31);
CMPSw_REG(x3, x4);
CMPSw_REG_ASR(x3, gd, 31);
CSETw(x1, cNE);
IFX(X_CF) {
BFIw(xFlags, x1, F_CF, 1);
Expand Down Expand Up @@ -1501,7 +1499,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
break;
case 5:
INST_NAME("IMUL EAX, Ed");
SETFLAGS(X_ALL, SF_PENDING);
SETFLAGS(X_ALL, SF_SET);
GETED32(0);
if(rex.w) {
if(ed==xRDX) gd=x3; else gd=xRDX;
Expand All @@ -1513,9 +1511,25 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
MOVw_REG(xRAX, xRDX);
LSRx(xRDX, xRDX, 32);
}
UFLAG_RES(xRAX);
UFLAG_OP1(xRDX);
UFLAG_DF(x2, rex.w?d_imul64:d_imul32);
UFLAG_IF {
SET_DFNONE(x4);
IFX(X_CF|X_OF) {
CMPSxw_REG_ASR(xRDX, xRAX, rex.w?63:31);
CSETw(x3, cNE);
IFX(X_CF) {
BFIw(xFlags, x3, F_CF, 1);
}
IFX(X_OF) {
BFIw(xFlags, x3, F_OF, 1);
}
}
IFX(X_AF | X_PF | X_ZF | X_SF)
if(box64_dynarec_test) {
// to avoid noise during test
MOV32w(x3, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF));
BICw(xFlags, xFlags, x3);
}
}
break;
case 6:
INST_NAME("DIV Ed");
Expand Down

0 comments on commit 1bb7d9e

Please sign in to comment.