Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

optimize avx #1666

Open
wants to merge 19 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 22 additions & 30 deletions src/emu/x64runavxf20f38.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
#include <signal.h>
#include <sys/types.h>
#include <unistd.h>

#include "debug.h"
#include "box64stack.h"
#include "x64emu.h"
Expand All @@ -26,7 +25,6 @@
#include "custommem.h"
#include "../dynarec/native_lock.h"
#endif

#include "modrm.h"

#ifdef TEST_INTERPRETER
Expand All @@ -35,30 +33,19 @@ uintptr_t TestAVX_F20F38(x64test_t *test, vex_t vex, uintptr_t addr, int *step)
uintptr_t RunAVX_F20F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
#endif
{
uint8_t opcode;
uint8_t opcode = F8;
uint8_t nextop;
uint8_t tmp8u, u8;
int8_t tmp8s;
int16_t tmp16s;
uint16_t tmp16u;
int32_t tmp32s, tmp32s2;
uint32_t tmp32u, tmp32u2;
uint64_t tmp64u, tmp64u2;
int64_t tmp64s;
uint8_t u8;
uint32_t tmp32u;
uint64_t tmp64u;
reg64_t *oped, *opgd, *opvd;
sse_regs_t *opex, *opgx, *opvx, eax1, eax2;
sse_regs_t *opey, *opgy, *opvy, eay1, eay2;


sse_regs_t *opex, *opgx, *opvx;
#ifdef TEST_INTERPRETER
x64emu_t *emu = test->emu;
#endif
opcode = F8;

rex_t rex = vex.rex;

switch(opcode) {

case 0xF5: /* PDEP Gd, Ed, Vd */
nextop = F8;
GETED(0);
Expand All @@ -67,16 +54,22 @@ uintptr_t RunAVX_F20F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
if(rex.w) {
tmp64u = 0;
u8 = 0;
for(int i=0; i<64; ++i)
if(ED->q[0]&(1LL<<i))
tmp64u |= ((VD->q[0]>>(u8++))&1LL)<<i;
for(int i=0; i<64; ++i) {
if(ED->q[0] & (1ULL << i)) {
tmp64u |= ((VD->q[0] >> u8) & 1ULL) << i;
++u8;
}
}
GD->q[0] = tmp64u;
} else {
tmp32u = 0;
u8 = 0;
for(int i=0; i<32; ++i)
if(ED->dword[0]&(1<<i))
tmp32u |= ((VD->dword[0]>>(u8++))&1)<<i;
for(int i=0; i<32; ++i) {
if(ED->dword[0] & (1U << i)) {
tmp32u |= ((VD->dword[0] >> u8) & 1U) << i;
++u8;
}
}
GD->q[0] = tmp32u;
}
break;
Expand All @@ -86,13 +79,13 @@ uintptr_t RunAVX_F20F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
GETGD;
GETVD;
if(rex.w) {
unsigned __int128 res = (unsigned __int128)R_RDX * ED->q[0];
VD->q[0] = res&0xFFFFFFFFFFFFFFFFLL;
GD->q[0] = res>>64;
__uint128_t res = (__uint128_t)R_RDX * ED->q[0];
VD->q[0] = (uint64_t)res;
GD->q[0] = (uint64_t)(res >> 64);
} else {
tmp64u = (uint64_t)R_EDX * ED->dword[0];
VD->q[0] = tmp64u&0xFFFFFFFF;
GD->q[0] = tmp64u>>32;
VD->q[0] = (uint32_t)tmp64u;
GD->q[0] = (uint32_t)(tmp64u >> 32);
}
break;
case 0xF7: /* SHRX Gd, Ed, Vd */
Expand All @@ -108,7 +101,6 @@ uintptr_t RunAVX_F20F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
GD->q[0] = ED->dword[0] >> u8;
}
break;

default:
return 0;
}
Expand Down