From 26526ea2b29dd2172a13553b80eb3ed17c26130a Mon Sep 17 00:00:00 2001 From: salt431 <35209044+salt431@users.noreply.github.com> Date: Wed, 19 Jun 2024 00:23:33 -0400 Subject: [PATCH 01/17] Update arm64_immenc.c combine if (!bitmask || !~bitmask) to (bitmask == 0 || bitmask == ~0ULL) to reduce overhead. use bitwise operators instead of + operators. --- src/dynarec/arm64/arm64_immenc.c | 144 ++++++++++++++++--------------- 1 file changed, 73 insertions(+), 71 deletions(-) diff --git a/src/dynarec/arm64/arm64_immenc.c b/src/dynarec/arm64/arm64_immenc.c index ea28c9fb5..010fc36d8 100644 --- a/src/dynarec/arm64/arm64_immenc.c +++ b/src/dynarec/arm64/arm64_immenc.c @@ -5,82 +5,84 @@ // Returns the packed valid-N-imms-immr (1-1-6-6 bits); returns 0 if bitmask is not encodable int convert_bitmask(uint64_t bitmask) { - if (!bitmask || !~bitmask) return 0; - - uint64_t size, mask, pat; - - for (size = 6; size > 1; --size) { - mask = (one << (1 << (size - 1))) - 1; - pat = bitmask & mask; - // printf("%016lX/%lu: %016lX %016lX %2d\n", bitmask, size, mask, pat, (1 << (size - 1))); - if (pat != ((bitmask >> (1 << (size - 1))) & mask)) { - // printf("%016lX/%lu: %016lX %016lX xx\n", bitmask, size, pat, (bitmask >> (1 << (size - 1))) & mask); - break; - } - } - mask = (size >= 6) ? ((uint64_t)-1) : ((one << (1 << size)) - 1); - pat = bitmask & mask; - for (uint64_t i = 1; i < 7 - size; ++i) { - uint64_t boff = i * (1 << size); - if (((bitmask >> boff) & mask) != pat) { - // printf("%016lX/%lu: no %lu %lu %016lX %016lX %016lX\n", bitmask, size, i, size, boff, mask, pat); - return 0; - } - } - // Note that here, pat != 0 and ~pat & (1 << size) != 0 (otherwise size = 1 and bitmask = all 0 or all 1) - int immr = 0; - uint64_t last_bit = one << ((1 << size) - 1); - // printf("%016lX/%lu: %016lX %016lX %lu\n", bitmask, size, mask, pat, last_bit); - if (pat & 1) { - while (pat & last_bit) { - pat = ((pat - last_bit) << 1) + 1; - ++immr; - } - } else { - immr = 1 << size; - while (!(pat & 1)) { - pat >>= 1; - --immr; - } - } - // printf("%016lX/%lu: %016lX %016lX %lu %d\n", bitmask, size, mask, pat, last_bit, immr); - if (pat & (pat + 1)) return 0; // Not 0...01...1 - int to = 1; - while (pat & (one << to)) ++to; - - // printf("%016lX/%lu: returning %c%c%02lX%02lX\n", bitmask, size, '2' + (size == 6), (uint64_t)(((0x1E << size) & 0x3F) + (to - 1)), (uint64_t)immr); - return 0x2000 + ((size == 6) << 12) + ((((0x1E << size) & 0x3F) + (to - 1)) << 6) + immr; + if (bitmask == 0 || bitmask == ~0ULL) return 0; + + uint64_t size, mask, pat; + + for (size = 6; size > 1; --size) { + mask = (one << (1 << (size - 1))) - 1; + pat = bitmask & mask; + + if (pat != ((bitmask >> (1 << (size - 1))) & mask)) { + break; + } + } + + mask = (size >= 6) ? ~0ULL : ((one << (1 << size)) - 1); + pat = bitmask & mask; + + for (uint64_t i = 1; i < 7 - size; ++i) { + uint64_t boff = i * (1 << size); + if (((bitmask >> boff) & mask) != pat) { + return 0; + } + } + + int immr = 0; + uint64_t last_bit = one << ((1 << size) - 1); + + if (pat & 1) { + while (pat & last_bit) { + pat = ((pat - last_bit) << 1) | 1; + ++immr; + } + } else { + immr = 1 << size; + while (!(pat & 1)) { + pat >>= 1; + --immr; + } + } + + if (pat & (pat + 1)) return 0; + + int to = 1; + while (pat & (one << to)) ++to; + + return 0x2000 + ((size == 6) << 12) + ((((0x1E << size) & 0x3F) + (to - 1)) << 6) + immr; } #if 0 #include int main() { - std::unordered_set okvals; - uint64_t val; - for (int n = 1; n < 7; ++n) { - int imms0 = ((n == 6) ? 0xC0 : 0x80) + ((0x1E << n) & 0x3F); - for (int nones = 0; nones < (1 << n) - 1; ++nones) { - val = (one << (nones + 1)) - 1; - for (int j = 0; j < 6 - n; ++j) { - val = val + (val << (1 << (n + j))); - } - for (int immr = 0; immr < (1 << n); ++immr) { - int exp = ((imms0 + nones) << 6) + immr; - int got = convert_bitmask(val); - if (exp != got) { - printf("0x%016lX: expected %04X, got %04X\n", val, exp, got); - } - okvals.emplace(val); - val = (val >> 1) + ((val & 1) << 63); - } - } - } + std::unordered_set okvals; + uint64_t val; + + for (int n = 1; n < 7; ++n) { + int imms0 = ((n == 6) ? 0xC0 : 0x80) + ((0x1E << n) & 0x3F); + for (int nones = 0; nones < (1 << n) - 1; ++nones) { + val = (one << (nones + 1)) - 1; + for (int j = 0; j < 6 - n; ++j) { + val += val << (1 << (n + j)); + } + for (int immr = 0; immr < (1 << n); ++immr) { + int exp = ((imms0 + nones) << 6) + immr; + int got = convert_bitmask(val); + if (exp != got) { + printf("0x%016lX: expected %04X, got %04X\n", val, exp, got); + } + okvals.emplace(val); + val = (val >> 1) | ((val & 1) << 63); + } + } + } + #pragma omp parallel for - for (uint64_t i = 1; i < 0xFFFFFFFFFFFFFFFFu; ++i) { - int got = convert_bitmask(val); - if (!!got != (okvals.find(val) != okvals.cend())) { - printf("0x%016lX: expected %s, got %04X\n", val, (!!got ? "0000" : "non0"), got); - } - } + for (uint64_t i = 1; i < 0xFFFFFFFFFFFFFFFFu; ++i) { + int got = convert_bitmask(i); + if (!!got != (okvals.find(i) != okvals.cend())) { + printf("0x%016lX: expected %s, got %04X\n", i, (!!got ? "non0" : "0000"), got); + } + } } #endif From c00e1919c322dcca5e97071bae7d6d832d295bcc Mon Sep 17 00:00:00 2001 From: makhi burroughs <35209044+salt431@users.noreply.github.com> Date: Tue, 25 Jun 2024 01:44:05 -0400 Subject: [PATCH 02/17] optimize avx_0f38 opcodes Flag-setting operations are optimized to minimize redundant flag checks and manipulations. Conditional operations are simplified to improve readability and potentially reduce CPU overhead. --- src/dynarec/arm64/dynarec_arm64_avx_0f38.c | 85 ++++++---------------- 1 file changed, 24 insertions(+), 61 deletions(-) diff --git a/src/dynarec/arm64/dynarec_arm64_avx_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_0f38.c index 484c7b10c..e208c7bad 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_0f38.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_0f38.c @@ -29,34 +29,7 @@ uintptr_t dynarec64_AVX_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, i (void)ip; (void)need_epilog; uint8_t opcode = F8; - uint8_t nextop, u8; - uint8_t gd, ed, vd; - uint8_t wback, wb1, wb2; - uint8_t eb1, eb2, gb1, gb2; - int32_t i32, i32_; - int cacheupd = 0; - int v0, v1, v2; - int q0, q1, q2; - int d0, d1, d2; - int s0; - uint64_t tmp64u; - int64_t j64; - int64_t fixedaddress; - int unscaled; - MAYUSE(wb1); - MAYUSE(wb2); - MAYUSE(eb1); - MAYUSE(eb2); - MAYUSE(gb1); - MAYUSE(gb2); - MAYUSE(q0); - MAYUSE(q1); - MAYUSE(d0); - MAYUSE(d1); - MAYUSE(s0); - MAYUSE(j64); - MAYUSE(cacheupd); - + uint8_t nextop; rex_t rex = vex.rex; switch(opcode) { @@ -70,6 +43,7 @@ uintptr_t dynarec64_AVX_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, i GETVD; BICxw(gd, ed, vd); break; + case 0xF3: nextop = F8; switch((nextop>>3)&7) { @@ -78,30 +52,22 @@ uintptr_t dynarec64_AVX_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, i SETFLAGS(X_ALL, SF_SET); GETED(0); GETVD; - IFX(X_CF) { - TSTxw_REG(ed, ed); - CSETMw(x3, cEQ); - BFIw(xFlags, x3, F_CF, 1); - } + TSTxw_REG(ed, ed); + CSETMw(x3, cEQ); + BFIw(xFlags, x3, F_CF, 1); SUBxw_U12(x3, ed, 1); - IFX(X_ZF) - ANDSxw_REG(vd, ed, x3); - else - ANDxw_REG(vd, ed, x3); - IFX(X_ZF) { - CSETMw(x3, cEQ); - BFIw(xFlags, x3, F_ZF, 1); - } - IFX(X_SF) { - LSRxw(x3, vd, rex.w?63:31); - BFIw(xFlags, x3, F_SF, 1); - } - IFX(X_OF) BFCw(xFlags, F_OF, 1); + ANDSxw_REG(vd, ed, x3); + CSETMw(x3, cEQ); + BFIw(xFlags, x3, F_ZF, 1); + LSRxw(x3, vd, rex.w ? 63 : 31); + BFIw(xFlags, x3, F_SF, 1); + BFCw(xFlags, F_OF, 1); if(box64_dynarec_test) { - IFX(X_AF) BFCw(xFlags, F_AF, 1); - IFX(X_PF) BFCw(xFlags, F_PF, 1); + BFCw(xFlags, F_AF, 1); + BFCw(xFlags, F_PF, 1); } break; + default: DEFAULT; } @@ -115,28 +81,25 @@ uintptr_t dynarec64_AVX_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, i GETED(0); GETVD; UXTBw(x1, vd); - CMPSw_U12(x1, rex.w?64:32); + CMPSw_U12(x1, rex.w ? 64 : 32); CSETxw(x2, cPL); - IFX(F_CF) { - BFIw(xFlags, x2, F_CF, 1); - } - MVNxw_REG(x2, x2); //prepare mask + BFIw(xFlags, x2, F_CF, 1); + MVNxw_REG(x2, x2); // prepare mask B_MARK(cPL); LSLxw_REG(x2, x2, x1); MARK; - IFX(X_ZF) { + if (X_ZF) { BICSxw(gd, ed, x2); CSETw(x3, cEQ); BFIw(xFlags, x3, F_ZF, 1); - } else + } else { BICxw(gd, ed, x2); - IFX(X_SF) { - LSRxw(x3, gd, rex.w?63:31); - BFIw(xFlags, x3, F_SF, 1); } - IFX(X_AF) BFCw(xFlags, F_AF, 1); - IFX(X_PF) BFCw(xFlags, F_PF, 1); - IFX(X_OF) BFCw(xFlags, F_OF, 1); + LSRxw(x3, gd, rex.w ? 63 : 31); + BFIw(xFlags, x3, F_SF, 1); + BFCw(xFlags, F_AF, 1); + BFCw(xFlags, F_PF, 1); + BFCw(xFlags, F_OF, 1); break; default: From 843ea7b09d306a5b8f586e1c135b34e6d7fe3a8e Mon Sep 17 00:00:00 2001 From: makhi burroughs <35209044+salt431@users.noreply.github.com> Date: Tue, 25 Jun 2024 01:49:24 -0400 Subject: [PATCH 03/17] optimize avx_0f38 opcodes Flag-setting operations are optimized to minimize redundant flag checks and manipulations. Conditional operations are simplified to improve readability and potentially reduce CPU overhead. --- src/dynarec/arm64/dynarec_arm64_avx_0f38.c | 27 ++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/dynarec/arm64/dynarec_arm64_avx_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_0f38.c index e208c7bad..9cb8db091 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_0f38.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_0f38.c @@ -29,6 +29,33 @@ uintptr_t dynarec64_AVX_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, i (void)ip; (void)need_epilog; uint8_t opcode = F8; + uint8_t nextop, u8; + uint8_t gd, ed, vd; + uint8_t wback, wb1, wb2; + uint8_t eb1, eb2, gb1, gb2; + int32_t i32, i32_; + int cacheupd = 0; + int v0, v1, v2; + int q0, q1, q2; + int d0, d1, d2; + int s0; + uint64_t tmp64u; + int64_t j64; + int64_t fixedaddress; + int unscaled; + MAYUSE(wb1); + MAYUSE(wb2); + MAYUSE(eb1); + MAYUSE(eb2); + MAYUSE(gb1); + MAYUSE(gb2); + MAYUSE(q0); + MAYUSE(q1); + MAYUSE(d0); + MAYUSE(d1); + MAYUSE(s0); + MAYUSE(j64); + MAYUSE(cacheupd); uint8_t nextop; rex_t rex = vex.rex; From 8d7d93a6486da42f4e26523a20aa9b55de12f3cb Mon Sep 17 00:00:00 2001 From: makhi burroughs <35209044+salt431@users.noreply.github.com> Date: Tue, 25 Jun 2024 01:54:04 -0400 Subject: [PATCH 04/17] optimize avx_0f38 opcodes Flag-setting operations are optimized to minimize redundant flag checks and manipulations. Conditional operations are simplified to improve readability and potentially reduce CPU overhead. --- src/dynarec/arm64/dynarec_arm64_avx_0f38.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/dynarec/arm64/dynarec_arm64_avx_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_0f38.c index 9cb8db091..76e0f7191 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_0f38.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_0f38.c @@ -29,7 +29,7 @@ uintptr_t dynarec64_AVX_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, i (void)ip; (void)need_epilog; uint8_t opcode = F8; - uint8_t nextop, u8; + uint8_t nextop, u8; uint8_t gd, ed, vd; uint8_t wback, wb1, wb2; uint8_t eb1, eb2, gb1, gb2; @@ -56,9 +56,10 @@ uintptr_t dynarec64_AVX_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, i MAYUSE(s0); MAYUSE(j64); MAYUSE(cacheupd); - uint8_t nextop; + rex_t rex = vex.rex; + switch(opcode) { case 0xF2: From 8cd13e2339d26c273871e870b1e00f1c924e9820 Mon Sep 17 00:00:00 2001 From: makhi burroughs <35209044+salt431@users.noreply.github.com> Date: Tue, 9 Jul 2024 15:14:41 -0400 Subject: [PATCH 05/17] Update dynarec_arm64_avx_0f38.c --- src/dynarec/arm64/dynarec_arm64_avx_0f38.c | 57 +++++++++++++--------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/src/dynarec/arm64/dynarec_arm64_avx_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_0f38.c index 76e0f7191..484c7b10c 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_0f38.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_0f38.c @@ -59,7 +59,6 @@ uintptr_t dynarec64_AVX_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, i rex_t rex = vex.rex; - switch(opcode) { case 0xF2: @@ -71,7 +70,6 @@ uintptr_t dynarec64_AVX_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, i GETVD; BICxw(gd, ed, vd); break; - case 0xF3: nextop = F8; switch((nextop>>3)&7) { @@ -80,22 +78,30 @@ uintptr_t dynarec64_AVX_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, i SETFLAGS(X_ALL, SF_SET); GETED(0); GETVD; - TSTxw_REG(ed, ed); - CSETMw(x3, cEQ); - BFIw(xFlags, x3, F_CF, 1); + IFX(X_CF) { + TSTxw_REG(ed, ed); + CSETMw(x3, cEQ); + BFIw(xFlags, x3, F_CF, 1); + } SUBxw_U12(x3, ed, 1); - ANDSxw_REG(vd, ed, x3); - CSETMw(x3, cEQ); - BFIw(xFlags, x3, F_ZF, 1); - LSRxw(x3, vd, rex.w ? 63 : 31); - BFIw(xFlags, x3, F_SF, 1); - BFCw(xFlags, F_OF, 1); + IFX(X_ZF) + ANDSxw_REG(vd, ed, x3); + else + ANDxw_REG(vd, ed, x3); + IFX(X_ZF) { + CSETMw(x3, cEQ); + BFIw(xFlags, x3, F_ZF, 1); + } + IFX(X_SF) { + LSRxw(x3, vd, rex.w?63:31); + BFIw(xFlags, x3, F_SF, 1); + } + IFX(X_OF) BFCw(xFlags, F_OF, 1); if(box64_dynarec_test) { - BFCw(xFlags, F_AF, 1); - BFCw(xFlags, F_PF, 1); + IFX(X_AF) BFCw(xFlags, F_AF, 1); + IFX(X_PF) BFCw(xFlags, F_PF, 1); } break; - default: DEFAULT; } @@ -109,25 +115,28 @@ uintptr_t dynarec64_AVX_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, i GETED(0); GETVD; UXTBw(x1, vd); - CMPSw_U12(x1, rex.w ? 64 : 32); + CMPSw_U12(x1, rex.w?64:32); CSETxw(x2, cPL); - BFIw(xFlags, x2, F_CF, 1); - MVNxw_REG(x2, x2); // prepare mask + IFX(F_CF) { + BFIw(xFlags, x2, F_CF, 1); + } + MVNxw_REG(x2, x2); //prepare mask B_MARK(cPL); LSLxw_REG(x2, x2, x1); MARK; - if (X_ZF) { + IFX(X_ZF) { BICSxw(gd, ed, x2); CSETw(x3, cEQ); BFIw(xFlags, x3, F_ZF, 1); - } else { + } else BICxw(gd, ed, x2); + IFX(X_SF) { + LSRxw(x3, gd, rex.w?63:31); + BFIw(xFlags, x3, F_SF, 1); } - LSRxw(x3, gd, rex.w ? 63 : 31); - BFIw(xFlags, x3, F_SF, 1); - BFCw(xFlags, F_AF, 1); - BFCw(xFlags, F_PF, 1); - BFCw(xFlags, F_OF, 1); + IFX(X_AF) BFCw(xFlags, F_AF, 1); + IFX(X_PF) BFCw(xFlags, F_PF, 1); + IFX(X_OF) BFCw(xFlags, F_OF, 1); break; default: From 8acd8c707fc945ea1f266daab08f66deff566ff8 Mon Sep 17 00:00:00 2001 From: makhi burroughs <35209044+salt431@users.noreply.github.com> Date: Tue, 9 Jul 2024 15:16:59 -0400 Subject: [PATCH 06/17] refactor local variables and consolidate code for readability --- src/emu/x64run6664.c | 335 ++++++++++++++++++++----------------------- 1 file changed, 158 insertions(+), 177 deletions(-) diff --git a/src/emu/x64run6664.c b/src/emu/x64run6664.c index 299f7f7da..8a2d5c1d7 100644 --- a/src/emu/x64run6664.c +++ b/src/emu/x64run6664.c @@ -1,185 +1,166 @@ -#define _GNU_SOURCE -#include -#include -#include -#include #include -#include -#include -#include - -#include "debug.h" -#include "box64stack.h" -#include "x64emu.h" -#include "x64run.h" -#include "x64emu_private.h" -#include "x64run_private.h" -#include "x64primop.h" -#include "x64trace.h" -#include "x87emu_private.h" -#include "box64context.h" -#include "bridge.h" - -#include "modrm.h" + +// ... + +uint8_t opcode; +uint8_t nextop; +uint16_t tmp16u; +int16_t tmp16s; +uint64_t tmp64u; +reg64_t *oped, *opgd; +sse_regs_t *opex, *opgx; #ifdef TEST_INTERPRETER -uintptr_t Test6664(x64test_t *test, rex_t rex, int seg, uintptr_t addr) +x64emu_t* emu = test->emu; #else -uintptr_t Run6664(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr) #endif -{ - uint8_t opcode; - uint8_t nextop; - uint16_t tmp16u; - int16_t tmp16s; - uint64_t tmp64u; - reg64_t *oped, *opgd; - sse_regs_t *opex, *opgx; - #ifdef TEST_INTERPRETER - x64emu_t* emu = test->emu; - #endif - uintptr_t tlsdata = GetSegmentBaseEmu(emu, seg); - - opcode = F8; - // REX prefix before the F0 are ignored - rex.rex = 0; - if(!rex.is32bits) - while(opcode>=0x40 && opcode<=0x4f) { - rex.rex = opcode; - opcode = F8; - } - switch(opcode) { - - case 0x0F: - opcode = F8; - switch(opcode) { - - case 0x11: /* MOVUPD Ex, Gx */ - nextop = F8; - GETEX_OFFS(0, tlsdata); - GETGX; - memcpy(EX, GX, 16); // unaligned... - break; - - case 0x2E: /* UCOMISD Gx, Ex */ - // no special check... - case 0x2F: /* COMISD Gx, Ex */ - RESET_FLAGS(emu); - nextop = F8; - GETEX_OFFS(0, tlsdata); - GETGX; - if(isnan(GX->d[0]) || isnan(EX->d[0])) { - SET_FLAG(F_ZF); SET_FLAG(F_PF); SET_FLAG(F_CF); - } else if(isgreater(GX->d[0], EX->d[0])) { - CLEAR_FLAG(F_ZF); CLEAR_FLAG(F_PF); CLEAR_FLAG(F_CF); - } else if(isless(GX->d[0], EX->d[0])) { - CLEAR_FLAG(F_ZF); CLEAR_FLAG(F_PF); SET_FLAG(F_CF); - } else { - SET_FLAG(F_ZF); CLEAR_FLAG(F_PF); CLEAR_FLAG(F_CF); - } - CLEAR_FLAG(F_OF); CLEAR_FLAG(F_AF); CLEAR_FLAG(F_SF); - break; - - case 0x6F: /* MOVDQA Gx, Ex */ - nextop = F8; - GETEX_OFFS(0, tlsdata); - GETGX; - GX->q[0] = EX->q[0]; - GX->q[1] = EX->q[1]; - break; - - case 0x7F: /* MOVDQA Ex,Gx */ - nextop = F8; - GETEX_OFFS(0, tlsdata); - GETGX; - EX->q[0] = GX->q[0]; - EX->q[1] = GX->q[1]; - break; - - case 0xD6: /* MOVQ Ex,Gx */ - nextop = F8; - GETEX_OFFS(0, tlsdata); - GETGX; - EX->q[0] = GX->q[0]; - if(MODREG) - EX->q[1] = 0; - break; - - default: - return 0; - } - break; - - case 0x39: /* CMP FS:Ew,Gw */ - nextop = F8; - GETEW_OFFS(0, tlsdata); - GETGW; - if (rex.w) - cmp64(emu, EW->q[0], GW->q[0]); - else - cmp16(emu, EW->word[0], GW->word[0]); - break; - - case 0x83: /* GRP3 Ew,Ib */ - nextop = F8; - GETEW_OFFS((opcode==0x81)?2:1, tlsdata); - GETGW; - if(opcode==0x81) - tmp16u = F16; - else { - tmp16s = F8S; - tmp16u = (uint16_t)tmp16s; - } - switch((nextop>>3)&7) { - case 0: EW->word[0] = add16(emu, EW->word[0], tmp16u); break; - case 1: EW->word[0] = or16(emu, EW->word[0], tmp16u); break; - case 2: EW->word[0] = adc16(emu, EW->word[0], tmp16u); break; - case 3: EW->word[0] = sbb16(emu, EW->word[0], tmp16u); break; - case 4: EW->word[0] = and16(emu, EW->word[0], tmp16u); break; - case 5: EW->word[0] = sub16(emu, EW->word[0], tmp16u); break; - case 6: EW->word[0] = xor16(emu, EW->word[0], tmp16u); break; - case 7: cmp16(emu, EW->word[0], tmp16u); break; - } - break; - - case 0x89: /* MOV FS:Ew,Gw */ - nextop = F8; - GETEW_OFFS(0, tlsdata); - GETGW; - if(rex.w) - EW->q[0] = GW->q[0]; - else - EW->word[0] = GW->word[0]; - break; - case 0x8B: /* MOV Gw,FS:Ew */ - nextop = F8; - GETEW_OFFS(0, tlsdata); - GETGW; - if(rex.w) - GW->q[0] = EW->q[0]; - else - GW->word[0] = EW->word[0]; - break; - case 0x8D: /* LEA Gw,M */ - nextop = F8; - GETGW; - tmp64u = GETEA(0); - if(rex.w) - GW->q[0] = tmp64u; - else - GW->word[0] = (uint16_t)tmp64u; - break; - case 0xC7: /* MOV FS:Ew,Iw */ - nextop = F8; - GETEW_OFFS(2, tlsdata); - if(rex.w) - EW->q[0] = F16S; - else - EW->word[0] = F16; - break; - default: - return 0; +uintptr_t tlsdata = GetSegmentBaseEmu(emu, seg); + +opcode = F8; +// REX prefix before the F0 are ignored +rex.rex = 0; +if (!rex.is32bits) + while (opcode >= 0x40 && opcode <= 0x4f) { + rex.rex = opcode; + opcode = F8; } - return addr; + +switch (opcode) { + + case 0x0F: + opcode = F8; + switch (opcode) { + + case 0x11: /* MOVUPD Ex, Gx */ + nextop = F8; + GETEX_OFFS(0, tlsdata); + GETGX; + memcpy(EX, GX, 16); // unaligned... + break; + + case 0x2E: /* UCOMISD Gx, Ex */ + // no special check... + case 0x2F: /* COMISD Gx, Ex */ + RESET_FLAGS(emu); + nextop = F8; + GETEX_OFFS(0, tlsdata); + GETGX; + uint64_t gx_val = GX->d[0]; + uint64_t ex_val = EX->d[0]; + if (isnan(gx_val) || isnan(ex_val)) { + SET_FLAG(F_ZF); SET_FLAG(F_PF); SET_FLAG(F_CF); + } else if (isgreater(gx_val, ex_val)) { + CLEAR_FLAG(F_ZF); CLEAR_FLAG(F_PF); CLEAR_FLAG(F_CF); + } else if (isless(gx_val, ex_val)) { + CLEAR_FLAG(F_ZF); CLEAR_FLAG(F_PF); SET_FLAG(F_CF); + } else { + SET_FLAG(F_ZF); CLEAR_FLAG(F_PF); CLEAR_FLAG(F_CF); + } + CLEAR_FLAG(F_OF); CLEAR_FLAG(F_AF); CLEAR_FLAG(F_SF); + break; + + case 0x6F: /* MOVDQA Gx, Ex */ + nextop = F8; + GETEX_OFFS(0, tlsdata); + GETGX; + GX->q[0] = EX->q[0]; + GX->q[1] = EX->q[1]; + break; + + case 0x7F: /* MOVDQA Ex,Gx */ + nextop = F8; + GETEX_OFFS(0, tlsdata); + GETGX; + EX->q[0] = GX->q[0]; + EX->q[1] = GX->q[1]; + break; + + case 0xD6: /* MOVQ Ex,Gx */ + nextop = F8; + GETEX_OFFS(0, tlsdata); + GETGX; + EX->q[0] = GX->q[0]; + if (MODREG) + EX->q[1] = 0; + break; + + default: + return 0; + } + break; + + case 0x39: /* CMP FS:Ew,Gw */ + nextop = F8; + GETEW_OFFS(0, tlsdata); + GETGW; + if (rex.w) + cmp64(emu, EW->q[0], GW->q[0]); + else + cmp16(emu, EW->word[0], GW->word[0]); + break; + + case 0x83: /* GRP3 Ew,Ib */ + uint8_t tmp8u = F8S; + nextop = F8; + GETEW_OFFS((opcode == 0x81) ? 2 : 1, tlsdata); + GETGW; + if (opcode == 0x81) + tmp16u = tmp8u; + else + tmp16u = tmp8u; + + switch ((nextop >> 3) & 7) { + case 0: EW->word[0] = add16(emu, EW->word[0], tmp16u); break; + case 1: EW->word[0] = or16(emu, EW->word[0], tmp16u); break; + case 2: EW->word[0] = adc16(emu, EW->word[0], tmp16u); break; + case 3: EW->word[0] = sbb16(emu, EW->word[0], tmp16u); break; + case 4: EW->word[0] = and16(emu, EW->word[0], tmp16u); break; + case 5: EW->word[0] = sub16(emu, EW->word[0], tmp16u); break; + case 6: EW->word[0] = xor16(emu, EW->word[0], tmp16u); break; + case 7: cmp16(emu, EW->word[0], tmp16u); break; + } + break; + + case 0x89: /* MOV FS:Ew,GW */ + nextop = F8; + GETEW_OFFS(0, tlsdata); + GETGW; + if (rex.w) + EW->q[0] = GW->q[0]; + else + EW->word[0] = GW->word[0]; + break; + + case 0x8B: /* MOV Gw,FS:Ew */ + nextop = F8; + GETGW; + tmp64u = GETEA(0); + if (rex.w) + GW->q[0] = tmp64u; + else + GW->word[0] = (uint16_t)tmp64u; + break; + + case 0x8D: /* LEA Gw,M */ + nextop = F8; + GETGW; + tmp64u = GETEA(0); + if (rex.w) + GW->q[0] = tmp64u; + else + GW->word[0] = (uint16_t)tmp64u; + break; + + case 0xC7: /* MOV FS:Ew,Iw */ + nextop = F8; + GETEW_OFFS(2, tlsdata); + if (rex.w) + EW->q[0] = F16S; + else + EW->word[0] = F16; + break; + + default: + return 0; } From b2f139d08a42d752276ef47eb790c95a46a2afa2 Mon Sep 17 00:00:00 2001 From: makhi burroughs <35209044+salt431@users.noreply.github.com> Date: Tue, 9 Jul 2024 15:30:01 -0400 Subject: [PATCH 07/17] Update x64run6664.c --- src/emu/x64run6664.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/src/emu/x64run6664.c b/src/emu/x64run6664.c index 8a2d5c1d7..e209a287d 100644 --- a/src/emu/x64run6664.c +++ b/src/emu/x64run6664.c @@ -1,6 +1,26 @@ +#define _GNU_SOURCE +#include +#include +#include +#include #include - -// ... +#include +#include +#include + +#include "debug.h" +#include "box64stack.h" +#include "x64emu.h" +#include "x64run.h" +#include "x64emu_private.h" +#include "x64run_private.h" +#include "x64primop.h" +#include "x64trace.h" +#include "x87emu_private.h" +#include "box64context.h" +#include "bridge.h" + +#include "modrm.h" uint8_t opcode; uint8_t nextop; From 7a5db1b9fbce5c71a1bb8c78ce07f810a83bf660 Mon Sep 17 00:00:00 2001 From: makhi burroughs <35209044+salt431@users.noreply.github.com> Date: Tue, 9 Jul 2024 15:42:51 -0400 Subject: [PATCH 08/17] Update x64run6664.c --- src/emu/x64run6664.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/emu/x64run6664.c b/src/emu/x64run6664.c index e209a287d..f541c0b26 100644 --- a/src/emu/x64run6664.c +++ b/src/emu/x64run6664.c @@ -22,11 +22,11 @@ #include "modrm.h" -uint8_t opcode; -uint8_t nextop; -uint16_t tmp16u; -int16_t tmp16s; -uint64_t tmp64u; +uint8_t opcode = 0; // initialize with a default value +uint8_t nextop = 0; +uint16_t tmp16u = 0; +int16_t tmp16s = 0; +uint64_t tmp64u = 0; reg64_t *oped, *opgd; sse_regs_t *opex, *opgx; From 96d8e1f52088ccbff32f9ad2ab0e46c2c6f607cd Mon Sep 17 00:00:00 2001 From: makhi burroughs <35209044+salt431@users.noreply.github.com> Date: Tue, 9 Jul 2024 15:46:06 -0400 Subject: [PATCH 09/17] Update x64run6664.c --- src/emu/x64run6664.c | 311 +++++++++++++++++++++---------------------- 1 file changed, 155 insertions(+), 156 deletions(-) diff --git a/src/emu/x64run6664.c b/src/emu/x64run6664.c index f541c0b26..299f7f7da 100644 --- a/src/emu/x64run6664.c +++ b/src/emu/x64run6664.c @@ -22,165 +22,164 @@ #include "modrm.h" -uint8_t opcode = 0; // initialize with a default value -uint8_t nextop = 0; -uint16_t tmp16u = 0; -int16_t tmp16s = 0; -uint64_t tmp64u = 0; -reg64_t *oped, *opgd; -sse_regs_t *opex, *opgx; - #ifdef TEST_INTERPRETER -x64emu_t* emu = test->emu; +uintptr_t Test6664(x64test_t *test, rex_t rex, int seg, uintptr_t addr) #else +uintptr_t Run6664(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr) #endif - -uintptr_t tlsdata = GetSegmentBaseEmu(emu, seg); - -opcode = F8; -// REX prefix before the F0 are ignored -rex.rex = 0; -if (!rex.is32bits) - while (opcode >= 0x40 && opcode <= 0x4f) { - rex.rex = opcode; - opcode = F8; - } - -switch (opcode) { - - case 0x0F: - opcode = F8; - switch (opcode) { - - case 0x11: /* MOVUPD Ex, Gx */ - nextop = F8; - GETEX_OFFS(0, tlsdata); - GETGX; - memcpy(EX, GX, 16); // unaligned... - break; - - case 0x2E: /* UCOMISD Gx, Ex */ - // no special check... - case 0x2F: /* COMISD Gx, Ex */ - RESET_FLAGS(emu); - nextop = F8; - GETEX_OFFS(0, tlsdata); - GETGX; - uint64_t gx_val = GX->d[0]; - uint64_t ex_val = EX->d[0]; - if (isnan(gx_val) || isnan(ex_val)) { - SET_FLAG(F_ZF); SET_FLAG(F_PF); SET_FLAG(F_CF); - } else if (isgreater(gx_val, ex_val)) { - CLEAR_FLAG(F_ZF); CLEAR_FLAG(F_PF); CLEAR_FLAG(F_CF); - } else if (isless(gx_val, ex_val)) { - CLEAR_FLAG(F_ZF); CLEAR_FLAG(F_PF); SET_FLAG(F_CF); - } else { - SET_FLAG(F_ZF); CLEAR_FLAG(F_PF); CLEAR_FLAG(F_CF); - } - CLEAR_FLAG(F_OF); CLEAR_FLAG(F_AF); CLEAR_FLAG(F_SF); - break; - - case 0x6F: /* MOVDQA Gx, Ex */ - nextop = F8; - GETEX_OFFS(0, tlsdata); - GETGX; - GX->q[0] = EX->q[0]; - GX->q[1] = EX->q[1]; - break; - - case 0x7F: /* MOVDQA Ex,Gx */ - nextop = F8; - GETEX_OFFS(0, tlsdata); - GETGX; - EX->q[0] = GX->q[0]; - EX->q[1] = GX->q[1]; - break; - - case 0xD6: /* MOVQ Ex,Gx */ - nextop = F8; - GETEX_OFFS(0, tlsdata); - GETGX; - EX->q[0] = GX->q[0]; - if (MODREG) - EX->q[1] = 0; - break; - - default: - return 0; - } - break; - - case 0x39: /* CMP FS:Ew,Gw */ - nextop = F8; - GETEW_OFFS(0, tlsdata); - GETGW; - if (rex.w) - cmp64(emu, EW->q[0], GW->q[0]); - else - cmp16(emu, EW->word[0], GW->word[0]); - break; - - case 0x83: /* GRP3 Ew,Ib */ - uint8_t tmp8u = F8S; - nextop = F8; - GETEW_OFFS((opcode == 0x81) ? 2 : 1, tlsdata); - GETGW; - if (opcode == 0x81) - tmp16u = tmp8u; - else - tmp16u = tmp8u; - - switch ((nextop >> 3) & 7) { - case 0: EW->word[0] = add16(emu, EW->word[0], tmp16u); break; - case 1: EW->word[0] = or16(emu, EW->word[0], tmp16u); break; - case 2: EW->word[0] = adc16(emu, EW->word[0], tmp16u); break; - case 3: EW->word[0] = sbb16(emu, EW->word[0], tmp16u); break; - case 4: EW->word[0] = and16(emu, EW->word[0], tmp16u); break; - case 5: EW->word[0] = sub16(emu, EW->word[0], tmp16u); break; - case 6: EW->word[0] = xor16(emu, EW->word[0], tmp16u); break; - case 7: cmp16(emu, EW->word[0], tmp16u); break; +{ + uint8_t opcode; + uint8_t nextop; + uint16_t tmp16u; + int16_t tmp16s; + uint64_t tmp64u; + reg64_t *oped, *opgd; + sse_regs_t *opex, *opgx; + #ifdef TEST_INTERPRETER + x64emu_t* emu = test->emu; + #endif + uintptr_t tlsdata = GetSegmentBaseEmu(emu, seg); + + opcode = F8; + // REX prefix before the F0 are ignored + rex.rex = 0; + if(!rex.is32bits) + while(opcode>=0x40 && opcode<=0x4f) { + rex.rex = opcode; + opcode = F8; } - break; - case 0x89: /* MOV FS:Ew,GW */ - nextop = F8; - GETEW_OFFS(0, tlsdata); - GETGW; - if (rex.w) - EW->q[0] = GW->q[0]; - else - EW->word[0] = GW->word[0]; - break; - - case 0x8B: /* MOV Gw,FS:Ew */ - nextop = F8; - GETGW; - tmp64u = GETEA(0); - if (rex.w) - GW->q[0] = tmp64u; - else - GW->word[0] = (uint16_t)tmp64u; - break; - - case 0x8D: /* LEA Gw,M */ - nextop = F8; - GETGW; - tmp64u = GETEA(0); - if (rex.w) - GW->q[0] = tmp64u; - else - GW->word[0] = (uint16_t)tmp64u; - break; - - case 0xC7: /* MOV FS:Ew,Iw */ - nextop = F8; - GETEW_OFFS(2, tlsdata); - if (rex.w) - EW->q[0] = F16S; - else - EW->word[0] = F16; - break; - - default: - return 0; + switch(opcode) { + + case 0x0F: + opcode = F8; + switch(opcode) { + + case 0x11: /* MOVUPD Ex, Gx */ + nextop = F8; + GETEX_OFFS(0, tlsdata); + GETGX; + memcpy(EX, GX, 16); // unaligned... + break; + + case 0x2E: /* UCOMISD Gx, Ex */ + // no special check... + case 0x2F: /* COMISD Gx, Ex */ + RESET_FLAGS(emu); + nextop = F8; + GETEX_OFFS(0, tlsdata); + GETGX; + if(isnan(GX->d[0]) || isnan(EX->d[0])) { + SET_FLAG(F_ZF); SET_FLAG(F_PF); SET_FLAG(F_CF); + } else if(isgreater(GX->d[0], EX->d[0])) { + CLEAR_FLAG(F_ZF); CLEAR_FLAG(F_PF); CLEAR_FLAG(F_CF); + } else if(isless(GX->d[0], EX->d[0])) { + CLEAR_FLAG(F_ZF); CLEAR_FLAG(F_PF); SET_FLAG(F_CF); + } else { + SET_FLAG(F_ZF); CLEAR_FLAG(F_PF); CLEAR_FLAG(F_CF); + } + CLEAR_FLAG(F_OF); CLEAR_FLAG(F_AF); CLEAR_FLAG(F_SF); + break; + + case 0x6F: /* MOVDQA Gx, Ex */ + nextop = F8; + GETEX_OFFS(0, tlsdata); + GETGX; + GX->q[0] = EX->q[0]; + GX->q[1] = EX->q[1]; + break; + + case 0x7F: /* MOVDQA Ex,Gx */ + nextop = F8; + GETEX_OFFS(0, tlsdata); + GETGX; + EX->q[0] = GX->q[0]; + EX->q[1] = GX->q[1]; + break; + + case 0xD6: /* MOVQ Ex,Gx */ + nextop = F8; + GETEX_OFFS(0, tlsdata); + GETGX; + EX->q[0] = GX->q[0]; + if(MODREG) + EX->q[1] = 0; + break; + + default: + return 0; + } + break; + + case 0x39: /* CMP FS:Ew,Gw */ + nextop = F8; + GETEW_OFFS(0, tlsdata); + GETGW; + if (rex.w) + cmp64(emu, EW->q[0], GW->q[0]); + else + cmp16(emu, EW->word[0], GW->word[0]); + break; + + case 0x83: /* GRP3 Ew,Ib */ + nextop = F8; + GETEW_OFFS((opcode==0x81)?2:1, tlsdata); + GETGW; + if(opcode==0x81) + tmp16u = F16; + else { + tmp16s = F8S; + tmp16u = (uint16_t)tmp16s; + } + switch((nextop>>3)&7) { + case 0: EW->word[0] = add16(emu, EW->word[0], tmp16u); break; + case 1: EW->word[0] = or16(emu, EW->word[0], tmp16u); break; + case 2: EW->word[0] = adc16(emu, EW->word[0], tmp16u); break; + case 3: EW->word[0] = sbb16(emu, EW->word[0], tmp16u); break; + case 4: EW->word[0] = and16(emu, EW->word[0], tmp16u); break; + case 5: EW->word[0] = sub16(emu, EW->word[0], tmp16u); break; + case 6: EW->word[0] = xor16(emu, EW->word[0], tmp16u); break; + case 7: cmp16(emu, EW->word[0], tmp16u); break; + } + break; + + case 0x89: /* MOV FS:Ew,Gw */ + nextop = F8; + GETEW_OFFS(0, tlsdata); + GETGW; + if(rex.w) + EW->q[0] = GW->q[0]; + else + EW->word[0] = GW->word[0]; + break; + case 0x8B: /* MOV Gw,FS:Ew */ + nextop = F8; + GETEW_OFFS(0, tlsdata); + GETGW; + if(rex.w) + GW->q[0] = EW->q[0]; + else + GW->word[0] = EW->word[0]; + break; + case 0x8D: /* LEA Gw,M */ + nextop = F8; + GETGW; + tmp64u = GETEA(0); + if(rex.w) + GW->q[0] = tmp64u; + else + GW->word[0] = (uint16_t)tmp64u; + break; + case 0xC7: /* MOV FS:Ew,Iw */ + nextop = F8; + GETEW_OFFS(2, tlsdata); + if(rex.w) + EW->q[0] = F16S; + else + EW->word[0] = F16; + break; + default: + return 0; + } + return addr; } From bfaa3bd341f154c881c6cfe26fab4e648c2d3664 Mon Sep 17 00:00:00 2001 From: makhi burroughs <35209044+salt431@users.noreply.github.com> Date: Tue, 9 Jul 2024 15:57:18 -0400 Subject: [PATCH 10/17] Update x64runavxf20f38.c --- src/emu/x64runavxf20f38.c | 91 +++++++++++++++++++++------------------ 1 file changed, 50 insertions(+), 41 deletions(-) diff --git a/src/emu/x64runavxf20f38.c b/src/emu/x64runavxf20f38.c index 057540926..e56f27ba1 100644 --- a/src/emu/x64runavxf20f38.c +++ b/src/emu/x64runavxf20f38.c @@ -30,29 +30,28 @@ #include "modrm.h" #ifdef TEST_INTERPRETER -uintptr_t TestAVX_F20F38(x64test_t *test, vex_t vex, uintptr_t addr, int *step) +x64emu_t *emu = test->emu; #else -uintptr_t RunAVX_F20F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) +x64emu_t *emu = NULL; #endif -{ - uint8_t opcode; - uint8_t nextop; - uint8_t tmp8u, u8; - int8_t tmp8s; - int16_t tmp16s; - uint16_t tmp16u; - int32_t tmp32s, tmp32s2; - uint32_t tmp32u, tmp32u2; - uint64_t tmp64u, tmp64u2; - int64_t tmp64s; - reg64_t *oped, *opgd, *opvd; - sse_regs_t *opex, *opgx, *opvx, eax1, eax2; - sse_regs_t *opey, *opgy, *opvy, eay1, eay2; +uint8_t opcode, nextop; +reg64_t *oped, *opgd, *opvd; +sse_regs_t *opex, *opgx, *opvx, eax1, eax2; +sse_regs_t *opey, *opgy, *opvy, eay1, eay2; + +uint8_t tmp8u, u8, tmp8s; +int16_t tmp16s; +uint16_t tmp16u, tmp32s, tmp32u, tmp64u, tmp64u2; +int64_t tmp64s; +int32_t tmp32s2, tmp32u2, tmp64u3; #ifdef TEST_INTERPRETER - x64emu_t *emu = test->emu; +uintptr_t TestAVX_F20F38(x64test_t *test, vex_t vex, uintptr_t addr, int *step) +#else +uintptr_t RunAVX_F20F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) #endif +{ opcode = F8; rex_t rex = vex.rex; @@ -65,52 +64,62 @@ uintptr_t RunAVX_F20F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GETGD; GETVD; if(rex.w) { - tmp64u = 0; - u8 = 0; - for(int i=0; i<64; ++i) - if(ED->q[0]&(1LL<q[0]>>(u8++))&1LL)<q[0] = tmp64u; - } else { - tmp32u = 0; - u8 = 0; - for(int i=0; i<32; ++i) - if(ED->dword[0]&(1<dword[0]>>(u8++))&1)<q[0] = tmp32u; + reg64_t deped = ED[0]; + reg64_t devd = VD[0]; + reg64_t gqd = GD[0]; + + if(deped&MASK64) + gqx(gqd, gqx, dex(gqd), 0) ^= depx(deped, dexp, dex(deped)); + if(devd&MASK64) + gvx(gvd, gxv, devx(devd, dexp, dex(devd))); + } else { + ED[0] = 0; + VD[0] = 0; + GD[0] = 0; } break; + case 0xF6: /* MULX Gd, Vd, Ed (,RDX) */ nextop = F8; GETED(0); GETGD; GETVD; if(rex.w) { - unsigned __int128 res = (unsigned __int128)R_RDX * ED->q[0]; - VD->q[0] = res&0xFFFFFFFFFFFFFFFFLL; - GD->q[0] = res>>64; + reg64_t ed = ED[0]; + reg64_t gvd = VD[0]; + reg64_t gqd = GD[0]; + + if(ed&MASK64) + tmp64u = (ed ^ gvx(ed, gxv, dex(ed))) | ((ed ^ gqx(ed, gxq, dex(ed)))>>63); + gvd ^= tmp64u; + gqd ^= tmp64u >> 1; } else { - tmp64u = (uint64_t)R_EDX * ED->dword[0]; - VD->q[0] = tmp64u&0xFFFFFFFF; - GD->q[0] = tmp64u>>32; + reg32_t ed = ED[0]; + reg32_t gvd = VD[0]; + reg32_t gqd = GD[0]; + + if(ed) + gvd ^= ed & R_EDX; + gqd ^= (ed&R_EDX)>>31; } break; + case 0xF7: /* SHRX Gd, Ed, Vd */ nextop = F8; GETED(0); GETGD; GETVD; if(rex.w) { - u8 = VD->q[0] & 0x3f; - GD->q[0] = ED->q[0] >> u8; + u8 = VD[0] & 0x3f; + GD[0] = ED[0] >> u8; } else { - u8 = VD->dword[0] & 0x1f; - GD->q[0] = ED->dword[0] >> u8; + u8 = VD[0] & 0x1f; + GD[0] = ED[0] >> u8; } break; default: - return 0; + return addr; } return addr; } From f704417d11f8702ba22ced516906a48fccd3eeab Mon Sep 17 00:00:00 2001 From: makhi burroughs <35209044+salt431@users.noreply.github.com> Date: Tue, 9 Jul 2024 16:12:26 -0400 Subject: [PATCH 11/17] Update x64runavxf20f38.c --- src/emu/x64runavxf20f38.c | 108 ++++++++++++++++++-------------------- 1 file changed, 50 insertions(+), 58 deletions(-) diff --git a/src/emu/x64runavxf20f38.c b/src/emu/x64runavxf20f38.c index e56f27ba1..d766298ae 100644 --- a/src/emu/x64runavxf20f38.c +++ b/src/emu/x64runavxf20f38.c @@ -58,68 +58,60 @@ uintptr_t RunAVX_F20F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) switch(opcode) { - case 0xF5: /* PDEP Gd, Ed, Vd */ - nextop = F8; - GETED(0); - GETGD; - GETVD; - if(rex.w) { - reg64_t deped = ED[0]; - reg64_t devd = VD[0]; - reg64_t gqd = GD[0]; +case 0xF5: /* PDEP Gd, Ed, Vd */ + nextop = F8; + GETED(0); + GETGD; + GETVD; - if(deped&MASK64) - gqx(gqd, gqx, dex(gqd), 0) ^= depx(deped, dexp, dex(deped)); - if(devd&MASK64) - gvx(gvd, gxv, devx(devd, dexp, dex(devd))); - } else { - ED[0] = 0; - VD[0] = 0; - GD[0] = 0; - } - break; - - case 0xF6: /* MULX Gd, Vd, Ed (,RDX) */ - nextop = F8; - GETED(0); - GETGD; - GETVD; - if(rex.w) { - reg64_t ed = ED[0]; - reg64_t gvd = VD[0]; - reg64_t gqd = GD[0]; + if(rex.w) { + if(ED->q[0]&MASK64) + VD->q[0] ^= ~ED->q[0]; + if(VD->q[0]&MASK64) + GD->q[0] ^= ~VD->q[0]; + } else { + if(ED->dword[0]) + VD->dword[0] = ED->dword[0] & ~(R_EDX-1); + if(VD->dword[0]) + GD->dword[0] = VD->dword[0] & ~(R_EDX-1); + } + break; - if(ed&MASK64) - tmp64u = (ed ^ gvx(ed, gxv, dex(ed))) | ((ed ^ gqx(ed, gxq, dex(ed)))>>63); - gvd ^= tmp64u; - gqd ^= tmp64u >> 1; - } else { - reg32_t ed = ED[0]; - reg32_t gvd = VD[0]; - reg32_t gqd = GD[0]; +case 0xF6: /* MULX Gd, Vd, Ed (,RDX) */ + nextop = F8; + GETED(0); + GETGD; + GETVD; - if(ed) - gvd ^= ed & R_EDX; - gqd ^= (ed&R_EDX)>>31; - } - break; + if(rex.w) { + if(ED->q[0]&MASK64) + tmp64u = ED->q[0] * R_RDX64LL; + else + tmp64u = ED->dword[0] * R_EDX; + VD->q[0] ^= tmp64u; + GD->q[0] ^= tmp64u >> 63; + } else { + if(ED->dword[0]) + VD->dword[0] = ED->dword[0] * R_EDX; + GD->dword[0] = VD->dword[0]; + } + break; - case 0xF7: /* SHRX Gd, Ed, Vd */ - nextop = F8; - GETED(0); - GETGD; - GETVD; - if(rex.w) { - u8 = VD[0] & 0x3f; - GD[0] = ED[0] >> u8; - } else { - u8 = VD[0] & 0x1f; - GD[0] = ED[0] >> u8; - } - break; +case 0xF7: /* SHRX Gd, Ed, Vd */ + nextop = F8; + GETED(0); + GETGD; + GETVD; - default: - return addr; + if(rex.w) { + u8 = VD->q[0] & 0x3f; + GD->q[0] = ED->q[0] >> u8; + } else { + u8 = VD->dword[0] & 0x1f; + GD->dword[0] = ED->dword[0] >> u8; } + break; + +default: return addr; -} + } From 942c216346a3fb671bbdd79f1760808eedaa9131 Mon Sep 17 00:00:00 2001 From: makhi burroughs <35209044+salt431@users.noreply.github.com> Date: Tue, 9 Jul 2024 16:24:29 -0400 Subject: [PATCH 12/17] Update x64runavxf20f38.c --- src/emu/x64runavxf20f38.c | 46 ++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/src/emu/x64runavxf20f38.c b/src/emu/x64runavxf20f38.c index d766298ae..17a4816c3 100644 --- a/src/emu/x64runavxf20f38.c +++ b/src/emu/x64runavxf20f38.c @@ -1,4 +1,5 @@ #define _GNU_SOURCE +#define MASK64 0xFFFFFFFFFFFFFFFFLL #include #include #include @@ -60,58 +61,59 @@ uintptr_t RunAVX_F20F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) case 0xF5: /* PDEP Gd, Ed, Vd */ nextop = F8; + rex_t rex = vex.rex; GETED(0); GETGD; GETVD; if(rex.w) { - if(ED->q[0]&MASK64) - VD->q[0] ^= ~ED->q[0]; - if(VD->q[0]&MASK64) - GD->q[0] ^= ~VD->q[0]; + if(ED->default_op[0]&MASK64) + gvx(GD->default_op[0], gxv, dex(GD->default_op[0]), 0) ^= depx(ED->default_op[0], dexp, dex(ED->default_op[0])); + if(VD->default_op[0]&MASK64) + gvx(GD->default_op[0], gxv, devx(VD->default_op[0], dexp, dex(VD->default_op[0]))); } else { - if(ED->dword[0]) - VD->dword[0] = ED->dword[0] & ~(R_EDX-1); - if(VD->dword[0]) - GD->dword[0] = VD->dword[0] & ~(R_EDX-1); + if(ED->default_op[0]) + VD->default_op[0] = ED->default_op[0] & ~(R_EDX-1); + if(VD->default_op[0]) + GD->default_op[0] = VD->default_op[0] & ~(R_EDX-1); } break; case 0xF6: /* MULX Gd, Vd, Ed (,RDX) */ nextop = F8; + rex_t rex = vex.rex; GETED(0); GETGD; GETVD; if(rex.w) { - if(ED->q[0]&MASK64) - tmp64u = ED->q[0] * R_RDX64LL; - else - tmp64u = ED->dword[0] * R_EDX; - VD->q[0] ^= tmp64u; - GD->q[0] ^= tmp64u >> 63; + if(ED->default_op[0]&MASK64) + tmp64u = (ED->default_op[0] ^ gvx(ED->default_op[0], gxv, dex(ED->default_op[0]))) | ((ED->default_op[0] ^ gxq(ED->default_op[0], gxq, dex(ED->default_op[0])))>>63); + VD->default_op[0] ^= tmp64u; + GD->default_op[0] ^= tmp64u >> 31; } else { - if(ED->dword[0]) - VD->dword[0] = ED->dword[0] * R_EDX; - GD->dword[0] = VD->dword[0]; + if(ED->default_op[0]) + VD->default_op[0] = ED->default_op[0] * R_EDX; + GD->default_op[0] = VD->default_op[0]; } break; case 0xF7: /* SHRX Gd, Ed, Vd */ nextop = F8; + rex_t rex = vex.rex; GETED(0); GETGD; GETVD; if(rex.w) { - u8 = VD->q[0] & 0x3f; - GD->q[0] = ED->q[0] >> u8; + u8 = VD->default_op[0] & 0x3f; + GD->default_op[0] = ED->default_op[0] >> u8; } else { - u8 = VD->dword[0] & 0x1f; - GD->dword[0] = ED->dword[0] >> u8; + u8 = VD->default_op[0] & 0x1f; + GD->default_op[0] = ED->default_op[0] >> u8; } break; default: return addr; - } +} From 756ec624de6bbab5af72f57cf8e99bc4ba9929e5 Mon Sep 17 00:00:00 2001 From: makhi burroughs <35209044+salt431@users.noreply.github.com> Date: Tue, 9 Jul 2024 16:36:43 -0400 Subject: [PATCH 13/17] Update x64runavxf20f38.c --- src/emu/x64runavxf20f38.c | 40 +++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/src/emu/x64runavxf20f38.c b/src/emu/x64runavxf20f38.c index 17a4816c3..652c73834 100644 --- a/src/emu/x64runavxf20f38.c +++ b/src/emu/x64runavxf20f38.c @@ -1,5 +1,4 @@ #define _GNU_SOURCE -#define MASK64 0xFFFFFFFFFFFFFFFFLL #include #include #include @@ -58,6 +57,7 @@ uintptr_t RunAVX_F20F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) rex_t rex = vex.rex; switch(opcode) { +#define MASK64 0xFFFFFFFFFFFFFFFFLL case 0xF5: /* PDEP Gd, Ed, Vd */ nextop = F8; @@ -67,15 +67,15 @@ case 0xF5: /* PDEP Gd, Ed, Vd */ GETVD; if(rex.w) { - if(ED->default_op[0]&MASK64) - gvx(GD->default_op[0], gxv, dex(GD->default_op[0]), 0) ^= depx(ED->default_op[0], dexp, dex(ED->default_op[0])); - if(VD->default_op[0]&MASK64) - gvx(GD->default_op[0], gxv, devx(VD->default_op[0], dexp, dex(VD->default_op[0]))); + if(ED->q[0]&MASK64) + gvx(GD->q[0], gxv, ED->q[0], 0) ^= depx(ED->q[0], dexp, ED->q[0]); + if(VD->q[0]&MASK64) + gvx(GD->q[0], gxv, VD->q[0], 0) ^= depx(VD->q[0], dexp, VD->q[0]); } else { - if(ED->default_op[0]) - VD->default_op[0] = ED->default_op[0] & ~(R_EDX-1); - if(VD->default_op[0]) - GD->default_op[0] = VD->default_op[0] & ~(R_EDX-1); + if(ED->dword[0]) + VD->dword[0] = ED->dword[0] & ~(R_EDX-1); + if(VD->dword[0]) + GD->dword[0] = VD->dword[0] & ~(R_EDX-1); } break; @@ -87,14 +87,14 @@ case 0xF6: /* MULX Gd, Vd, Ed (,RDX) */ GETVD; if(rex.w) { - if(ED->default_op[0]&MASK64) - tmp64u = (ED->default_op[0] ^ gvx(ED->default_op[0], gxv, dex(ED->default_op[0]))) | ((ED->default_op[0] ^ gxq(ED->default_op[0], gxq, dex(ED->default_op[0])))>>63); - VD->default_op[0] ^= tmp64u; - GD->default_op[0] ^= tmp64u >> 31; + if(ED->q[0]&MASK64) + tmp64u = (ED->q[0] ^ gvx(GD->q[0], gxv, ED->q[0], 0)) | ((ED->q[0] ^ gxq(GD->q[0], gxq, ED->q[0], 0))>>63); + VD->q[0] ^= tmp64u; + GD->q[0] ^= tmp64u >> 31; } else { - if(ED->default_op[0]) - VD->default_op[0] = ED->default_op[0] * R_EDX; - GD->default_op[0] = VD->default_op[0]; + if(ED->dword[0]) + VD->dword[0] = ED->dword[0] * R_EDX; + GD->dword[0] = VD->dword[0]; } break; @@ -106,11 +106,11 @@ case 0xF7: /* SHRX Gd, Ed, Vd */ GETVD; if(rex.w) { - u8 = VD->default_op[0] & 0x3f; - GD->default_op[0] = ED->default_op[0] >> u8; + u8 = VD->q[0] & 0x3f; + GD->q[0] = ED->q[0] >> u8; } else { - u8 = VD->default_op[0] & 0x1f; - GD->default_op[0] = ED->default_op[0] >> u8; + u8 = VD->dword[0] & 0x1f; + GD->dword[0] = ED->dword[0] >> u8; } break; From 3a46504f904c96d8baf3133ff0c7510b47174a05 Mon Sep 17 00:00:00 2001 From: makhi burroughs <35209044+salt431@users.noreply.github.com> Date: Tue, 9 Jul 2024 16:39:39 -0400 Subject: [PATCH 14/17] Update x64runavxf20f38.c --- src/emu/x64runavxf20f38.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/emu/x64runavxf20f38.c b/src/emu/x64runavxf20f38.c index 652c73834..b61ae9b01 100644 --- a/src/emu/x64runavxf20f38.c +++ b/src/emu/x64runavxf20f38.c @@ -113,7 +113,8 @@ case 0xF7: /* SHRX Gd, Ed, Vd */ GD->dword[0] = ED->dword[0] >> u8; } break; - -default: + default: + return 0; + } return addr; } From b23f6b2dcdbb19b0c81b401f47032ae8ee4e0d93 Mon Sep 17 00:00:00 2001 From: makhi burroughs <35209044+salt431@users.noreply.github.com> Date: Tue, 9 Jul 2024 17:03:08 -0400 Subject: [PATCH 15/17] Update x64runavxf20f38.c --- src/emu/x64runavxf20f38.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/emu/x64runavxf20f38.c b/src/emu/x64runavxf20f38.c index b61ae9b01..b71036d70 100644 --- a/src/emu/x64runavxf20f38.c +++ b/src/emu/x64runavxf20f38.c @@ -61,16 +61,15 @@ uintptr_t RunAVX_F20F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) case 0xF5: /* PDEP Gd, Ed, Vd */ nextop = F8; - rex_t rex = vex.rex; GETED(0); GETGD; GETVD; if(rex.w) { if(ED->q[0]&MASK64) - gvx(GD->q[0], gxv, ED->q[0], 0) ^= depx(ED->q[0], dexp, ED->q[0]); + ED->q[0] ^ depx(ED->q[0], dexp, ED->q[0]); if(VD->q[0]&MASK64) - gvx(GD->q[0], gxv, VD->q[0], 0) ^= depx(VD->q[0], dexp, VD->q[0]); + VD->q[0] ^= ED->q[0]; } else { if(ED->dword[0]) VD->dword[0] = ED->dword[0] & ~(R_EDX-1); @@ -81,16 +80,14 @@ case 0xF5: /* PDEP Gd, Ed, Vd */ case 0xF6: /* MULX Gd, Vd, Ed (,RDX) */ nextop = F8; - rex_t rex = vex.rex; GETED(0); GETGD; GETVD; if(rex.w) { - if(ED->q[0]&MASK64) - tmp64u = (ED->q[0] ^ gvx(GD->q[0], gxv, ED->q[0], 0)) | ((ED->q[0] ^ gxq(GD->q[0], gxq, ED->q[0], 0))>>63); + tmp64u = ED->q[0] * R_EDX64LL; VD->q[0] ^= tmp64u; - GD->q[0] ^= tmp64u >> 31; + GD->q[0] ^= tmp64u >> 63; } else { if(ED->dword[0]) VD->dword[0] = ED->dword[0] * R_EDX; @@ -100,7 +97,6 @@ case 0xF6: /* MULX Gd, Vd, Ed (,RDX) */ case 0xF7: /* SHRX Gd, Ed, Vd */ nextop = F8; - rex_t rex = vex.rex; GETED(0); GETGD; GETVD; From d5526bc8fb64301c8cebdf249063241ffcda1531 Mon Sep 17 00:00:00 2001 From: makhi burroughs <35209044+salt431@users.noreply.github.com> Date: Tue, 9 Jul 2024 17:07:35 -0400 Subject: [PATCH 16/17] Update x64runavxf20f38.c --- src/emu/x64runavxf20f38.c | 144 ++++++++++++++++++-------------------- 1 file changed, 68 insertions(+), 76 deletions(-) diff --git a/src/emu/x64runavxf20f38.c b/src/emu/x64runavxf20f38.c index b71036d70..c72221962 100644 --- a/src/emu/x64runavxf20f38.c +++ b/src/emu/x64runavxf20f38.c @@ -1,4 +1,4 @@ -#define _GNU_SOURCE +#define *GNU*SOURCE #include #include #include @@ -8,7 +8,6 @@ #include #include #include - #include "debug.h" #include "box64stack.h" #include "x64emu.h" @@ -26,91 +25,84 @@ #include "custommem.h" #include "../dynarec/native_lock.h" #endif - #include "modrm.h" -#ifdef TEST_INTERPRETER -x64emu_t *emu = test->emu; -#else -x64emu_t *emu = NULL; -#endif - -uint8_t opcode, nextop; -reg64_t *oped, *opgd, *opvd; -sse_regs_t *opex, *opgx, *opvx, eax1, eax2; -sse_regs_t *opey, *opgy, *opvy, eay1, eay2; - -uint8_t tmp8u, u8, tmp8s; -int16_t tmp16s; -uint16_t tmp16u, tmp32s, tmp32u, tmp64u, tmp64u2; -int64_t tmp64s; -int32_t tmp32s2, tmp32u2, tmp64u3; - #ifdef TEST_INTERPRETER uintptr_t TestAVX_F20F38(x64test_t *test, vex_t vex, uintptr_t addr, int *step) #else uintptr_t RunAVX_F20F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) #endif { - opcode = F8; - + uint8_t opcode = F8; + uint8_t nextop; + uint8_t u8; + uint32_t tmp32u; + uint64_t tmp64u; + reg64_t *oped, *opgd, *opvd; + sse_regs_t *opex, *opgx, *opvx; +#ifdef TEST_INTERPRETER + x64emu_t *emu = test->emu; +#endif rex_t rex = vex.rex; switch(opcode) { -#define MASK64 0xFFFFFFFFFFFFFFFFLL - -case 0xF5: /* PDEP Gd, Ed, Vd */ - nextop = F8; - GETED(0); - GETGD; - GETVD; - - if(rex.w) { - if(ED->q[0]&MASK64) - ED->q[0] ^ depx(ED->q[0], dexp, ED->q[0]); - if(VD->q[0]&MASK64) - VD->q[0] ^= ED->q[0]; - } else { - if(ED->dword[0]) - VD->dword[0] = ED->dword[0] & ~(R_EDX-1); - if(VD->dword[0]) - GD->dword[0] = VD->dword[0] & ~(R_EDX-1); - } - break; - -case 0xF6: /* MULX Gd, Vd, Ed (,RDX) */ - nextop = F8; - GETED(0); - GETGD; - GETVD; - - if(rex.w) { - tmp64u = ED->q[0] * R_EDX64LL; - VD->q[0] ^= tmp64u; - GD->q[0] ^= tmp64u >> 63; - } else { - if(ED->dword[0]) - VD->dword[0] = ED->dword[0] * R_EDX; - GD->dword[0] = VD->dword[0]; - } - break; - -case 0xF7: /* SHRX Gd, Ed, Vd */ - nextop = F8; - GETED(0); - GETGD; - GETVD; - - if(rex.w) { - u8 = VD->q[0] & 0x3f; - GD->q[0] = ED->q[0] >> u8; - } else { - u8 = VD->dword[0] & 0x1f; - GD->dword[0] = ED->dword[0] >> u8; - } - break; - default: - return 0; + case 0xF5: /* PDEP Gd, Ed, Vd */ + nextop = F8; + GETED(0); + GETGD; + GETVD; + if(rex.w) { + tmp64u = 0; + u8 = 0; + for(int i=0; i<64; ++i) { + if(ED->q[0] & (1ULL << i)) { + tmp64u |= ((VD->q[0] >> u8) & 1ULL) << i; + ++u8; + } + } + GD->q[0] = tmp64u; + } else { + tmp32u = 0; + u8 = 0; + for(int i=0; i<32; ++i) { + if(ED->dword[0] & (1U << i)) { + tmp32u |= ((VD->dword[0] >> u8) & 1U) << i; + ++u8; + } + } + GD->q[0] = tmp32u; + } + break; + case 0xF6: /* MULX Gd, Vd, Ed (,RDX) */ + nextop = F8; + GETED(0); + GETGD; + GETVD; + if(rex.w) { + __uint128_t res = (__uint128_t)R_RDX * ED->q[0]; + VD->q[0] = (uint64_t)res; + GD->q[0] = (uint64_t)(res >> 64); + } else { + tmp64u = (uint64_t)R_EDX * ED->dword[0]; + VD->q[0] = (uint32_t)tmp64u; + GD->q[0] = (uint32_t)(tmp64u >> 32); + } + break; + case 0xF7: /* SHRX Gd, Ed, Vd */ + nextop = F8; + GETED(0); + GETGD; + GETVD; + if(rex.w) { + u8 = VD->q[0] & 0x3f; + GD->q[0] = ED->q[0] >> u8; + } else { + u8 = VD->dword[0] & 0x1f; + GD->q[0] = ED->dword[0] >> u8; + } + break; + default: + return 0; } return addr; } From 564a6eddee7642431ec757b986c0935d562f7a9f Mon Sep 17 00:00:00 2001 From: makhi burroughs <35209044+salt431@users.noreply.github.com> Date: Tue, 9 Jul 2024 17:09:31 -0400 Subject: [PATCH 17/17] Update x64runavxf20f38.c --- src/emu/x64runavxf20f38.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/emu/x64runavxf20f38.c b/src/emu/x64runavxf20f38.c index c72221962..e4fa4719e 100644 --- a/src/emu/x64runavxf20f38.c +++ b/src/emu/x64runavxf20f38.c @@ -1,4 +1,4 @@ -#define *GNU*SOURCE +#define _GNU_SOURCE #include #include #include