Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[LA64_DYNAREC] Optimized some 16bit shift opcodes #2192

Merged
merged 1 commit into from
Dec 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 28 additions & 33 deletions src/dynarec/la64/dynarec_la64_66.c
Original file line number Diff line number Diff line change
Expand Up @@ -650,48 +650,43 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
case 4:
case 6:
INST_NAME("SHL Ew, Ib");
UFLAG_IF { MESSAGE(LOG_DUMP, "Need Optimization for flags\n"); }
SETFLAGS(X_ALL, SF_PENDING);
GETEW(x1, 1);
u8 = F8;
UFLAG_IF { MOV32w(x2, (u8 & 15)); }
UFLAG_OP12(ed, x2)
if (MODREG) {
SLLI_D(ed, ed, 48 + (u8 & 15));
SRLI_D(ed, ed, 48);
if (geted_ib(dyn, addr, ninst, nextop) & 0x1f) {
SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined
GETEW(x1, 0);
u8 = (F8) & 0x1f;
emit_shl16c(dyn, ninst, x1, u8, x5, x4, x6);
EWBACK;
} else {
SLLI_D(ed, ed, u8 & 15);
FAKEED;
F8;
}
EWBACK;
UFLAG_RES(ed);
UFLAG_DF(x3, d_shl16);
break;
case 5:
INST_NAME("SHR Ew, Ib");
UFLAG_IF { MESSAGE(LOG_DUMP, "Need Optimization for flags\n"); }
SETFLAGS(X_ALL, SF_PENDING);
GETEW(x1, 1);
u8 = F8;
UFLAG_IF { MOV32w(x2, (u8 & 15)); }
UFLAG_OP12(ed, x2)
SRLI_D(ed, ed, u8 & 15);
EWBACK;
UFLAG_RES(ed);
UFLAG_DF(x3, d_shr16);
if (geted_ib(dyn, addr, ninst, nextop) & 0x1f) {
SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined
GETEW(x1, 0);
u8 = (F8) & 0x1f;
emit_shr16c(dyn, ninst, x1, u8, x5, x4, x6);
EWBACK;
} else {
FAKEED;
F8;
}
break;
case 7:
INST_NAME("SAR Ew, Ib");
SETFLAGS(X_ALL, SF_PENDING);
UFLAG_IF { MESSAGE(LOG_DUMP, "Need Optimization for flags\n"); }
GETSEW(x1, 1);
u8 = F8;
UFLAG_IF { MOV32w(x2, (u8 & 15)); }
UFLAG_OP12(ed, x2)
SRAI_D(ed, ed, u8 & 15);
if (MODREG) BSTRPICK_D(ed, ed, 15, 0);
EWBACK;
UFLAG_RES(ed);
UFLAG_DF(x3, d_sar16);
if (geted_ib(dyn, addr, ninst, nextop) & 0x1f) {
SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined
GETSEW(x1, 0);
u8 = (F8) & 0x1f;
emit_sar16c(dyn, ninst, x1, u8, x5, x4, x6);
EWBACK;
} else {
FAKEED;
F8;
}
break;
default:
DEFAULT;
Expand Down
227 changes: 227 additions & 0 deletions src/dynarec/la64/dynarec_la64_emit_shift.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,101 @@ void emit_shl16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
}
}

// emit SHL16 instruction, from s1 , constant c, store result in s1 using s3, s4 and s5 as scratch
void emit_shl16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5)
{
if (!c) return;
// c != 0

IFX (X_PEND) {
MOV64x(s3, c);
ST_H(s3, xEmu, offsetof(x64emu_t, op2));
ST_H(s1, xEmu, offsetof(x64emu_t, op1));
SET_DF(s4, d_shl16);
} else IFX (X_ALL) {
SET_DFNONE();
}

if (la64_lbt) {
IFX (X_PEND) {
} else {
MOV64x(s3, c);
}
IFX (X_ALL) {
X64_SLL_H(s1, s3);
}

SLLI_D(s1, s1, c);
BSTRPICK_D(s1, s1, 15, 0);

IFX (X_PEND) {
ST_H(s1, xEmu, offsetof(x64emu_t, res));
}
return;
}

CLEAR_FLAGS(s3);
if (c < 16) {
IFX (X_CF | X_OF) {
SRLI_D(s3, s1, 16 - c);
ANDI(s5, s3, 1); // LSB == F_CF
IFX (X_CF) {
OR(xFlags, xFlags, s5);
}
}

SLLI_D(s1, s1, c + 48);
IFX (X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
SRLI_D(s1, s1, 48);

IFX (X_PEND) {
ST_H(s1, xEmu, offsetof(x64emu_t, res));
}
IFX (X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX (X_OF) {
// OF flag is affected only on 1-bit shifts
if (c == 1) {
SRLI_D(s3, s1, 15);
XOR(s3, s3, s5);
SLLI_D(s3, s3, F_OF);
OR(xFlags, xFlags, s3);
}
}
IFX (X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
} else {
IFX (X_CF) {
if (c == 16) {
ANDI(s3, s1, 1);
OR(xFlags, xFlags, s3); // F_CF == 0
}
}
MV(s1, xZR);

IFX (X_PEND) {
ST_H(s1, xEmu, offsetof(x64emu_t, res));
}
// OF nop
// SF nop
// AF nop
IFX (X_PF | X_ZF) {
IFX (X_ZF) {
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX (X_PF) {
ORI(xFlags, xFlags, 1 << F_PF);
}
}
}
}

// emit SHL32 instruction, from s1 , shift s2, store result in s1 using s3, s4 and s5 as scratch
void emit_shl32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5)
{
Expand Down Expand Up @@ -354,6 +449,72 @@ void emit_shr16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
}
}

// emit SHR16 instruction, from s1 , constant c, store result in s1 using s3, s4 and s5 as scratch
void emit_shr16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5)
{
if (!c) return;
// c != 0

IFX (X_PEND) {
MOV64x(s3, c);
ST_H(s3, xEmu, offsetof(x64emu_t, op2));
ST_H(s1, xEmu, offsetof(x64emu_t, op1));
SET_DF(s4, d_shr16);
} else IFX (X_ALL) {
SET_DFNONE();
}

if (la64_lbt) {
IFX (X_PEND) {
} else {
MOV64x(s3, c);
}
IFX (X_ALL) {
X64_SRL_H(s1, s3);
}
SRLI_D(s1, s1, c);
IFX (X_PEND) {
ST_H(s1, xEmu, offsetof(x64emu_t, res));
}
return;
}

CLEAR_FLAGS(s3);
IFX (X_CF) {
if (c > 1) {
SRAI_D(s3, s1, c - 1);
ANDI(s3, s3, 1); // LSB == F_CF
} else {
// no need to shift
ANDI(s3, s1, 1); // LSB == F_CF
}
OR(xFlags, xFlags, s3);
}
IFX (X_OF) {
// OF flag is affected only on 1-bit shifts
// OF flag is set to the most-significant bit of the original operand
if (c == 1) {
SRLI_D(s3, s1, 15);
SLLI_D(s3, s3, F_OF);
OR(xFlags, xFlags, s3);
}
}

SRLI_D(s1, s1, c);

// SF should be unset
IFX (X_PEND) {
ST_H(s1, xEmu, offsetof(x64emu_t, res));
}
IFX (X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX (X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
}

// emit SHR32 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3 and s4 as scratch
void emit_shr32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4)
{
Expand Down Expand Up @@ -563,6 +724,72 @@ void emit_sar16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
}
}


// emit SAR16 instruction, from s1 , constant c, store result in s1 using s3, s4 and s5 as scratch
void emit_sar16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5)
{
if (!c) return;
// c != 0

IFX (X_PEND) {
MOV64x(s3, c);
ST_H(s3, xEmu, offsetof(x64emu_t, op2));
ST_H(s1, xEmu, offsetof(x64emu_t, op1));
SET_DF(s4, d_sar16);
} else IFX (X_ALL) {
SET_DFNONE();
}

if (la64_lbt) {
IFX (X_PEND) {
} else {
MOV64x(s3, c);
}
IFX (X_ALL) {
X64_SRA_H(s1, s3);
}
SRLI_D(s1, s1, c);
BSTRPICK_D(s1, s1, 15, 0);
IFX (X_PEND) {
ST_H(s1, xEmu, offsetof(x64emu_t, res));
}
return;
}

CLEAR_FLAGS(s3);
IFX (X_CF) {
if (c > 1) {
SRAI_D(s3, s1, c - 1);
ANDI(s3, s3, 1); // LSB == F_CF
} else {
// no need to shift
ANDI(s3, s1, 1); // LSB == F_CF
}
OR(xFlags, xFlags, s3);
}
// For the SAR instruction, the OF flag is cleared for all 1-bit shifts.
// OF nop
IFX (X_SF) {
// SF is the same as the original operand
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}

SRLI_D(s1, s1, c);
BSTRPICK_D(s1, s1, 15, 0);

IFX (X_PEND) {
ST_H(s1, xEmu, offsetof(x64emu_t, res));
}
IFX (X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX (X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
}

// emit SAR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
void emit_sar32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4)
{
Expand Down
6 changes: 6 additions & 0 deletions src/dynarec/la64/dynarec_la64_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -862,13 +862,16 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
#define emit_and32 STEPNAME(emit_and32)
#define emit_and32c STEPNAME(emit_and32c)
#define emit_shl16 STEPNAME(emit_shl16)
#define emit_shl16c STEPNAME(emit_shl16c)
#define emit_shl32 STEPNAME(emit_shl32)
#define emit_shl32c STEPNAME(emit_shl32c)
#define emit_shr8 STEPNAME(emit_shr8)
#define emit_shr16 STEPNAME(emit_shr16)
#define emit_shr16c STEPNAME(emit_shr16c)
#define emit_shr32 STEPNAME(emit_shr32)
#define emit_shr32c STEPNAME(emit_shr32c)
#define emit_sar16 STEPNAME(emit_sar16)
#define emit_sar16c STEPNAME(emit_sar16c)
#define emit_sar32c STEPNAME(emit_sar32c)
#define emit_shld32c STEPNAME(emit_shld32c)
#define emit_shrd32c STEPNAME(emit_shrd32c)
Expand Down Expand Up @@ -967,13 +970,16 @@ void emit_and16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
void emit_and32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
void emit_and32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4);
void emit_shl16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
void emit_shl16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5);
void emit_shl32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
void emit_shl32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4, int s5);
void emit_shr8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
void emit_shr16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
void emit_shr16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5);
void emit_shr32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
void emit_shr32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
void emit_sar16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
void emit_sar16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5);
void emit_sar32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
void emit_shld32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
void emit_shrd32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
Expand Down
Loading