Skip to content

Commit

Permalink
Riscv simd
Browse files Browse the repository at this point in the history
Signed-off-by: Laszlo Voros <[email protected]>
  • Loading branch information
vorosl committed Dec 18, 2024
1 parent 20d770b commit 9a9181f
Show file tree
Hide file tree
Showing 8 changed files with 1,474 additions and 106 deletions.
19 changes: 14 additions & 5 deletions src/jit/Backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ static void emitSelect128(sljit_compiler*, Instruction*, sljit_s32);
static void emitMove(sljit_compiler*, uint32_t type, Operand* from, Operand* to);
static ByteCodeStackOffset* emitStoreOntoStack(sljit_compiler* compiler, Operand* param, ByteCodeStackOffset* stackOffset, const ValueTypeVector& types, bool isWordOffsets);

#if (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) || (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
#if (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) || (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) || (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV && defined __riscv_vector)
#define HAS_SIMD

#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32)
Expand Down Expand Up @@ -433,7 +433,7 @@ static void simdOperandToArg(sljit_compiler* compiler, Operand* operand, JITArg&
arg.argw = 0;
}

#endif /* SLJIT_CONFIG_ARM */
#endif /* SLJIT_CONFIG_ARM || SLJIT_CONFIG_X86 || SLJIT_CONFIG_RISCV */

#include "FloatMathInl.h"

Expand All @@ -456,6 +456,8 @@ static void simdOperandToArg(sljit_compiler* compiler, Operand* operand, JITArg&
#include "SimdArm64Inl.h"
#elif (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32)
#include "SimdArm32Inl.h"
#elif (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV && defined __riscv_vector)
#include "SimdRiscvInl.h"
#endif /* SLJIT_CONFIG_ARM */

#ifdef HAS_SIMD
Expand Down Expand Up @@ -1028,6 +1030,9 @@ JITCompiler::JITCompiler(Module* module, uint32_t JITFlags)
, m_options(0)
, m_savedIntegerRegCount(0)
, m_savedFloatRegCount(0)
#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS)
, m_savedVectorRegCount(0)
#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */
, m_stackTmpSize(0)
{
if (module->m_jitModule != nullptr) {
Expand Down Expand Up @@ -1530,9 +1535,13 @@ void JITCompiler::emitProlog()
ASSERT(m_stackTmpSize <= 16);
#endif /* SLJIT_CONFIG_ARM_32 */

sljit_emit_enter(m_compiler, options, SLJIT_ARGS1(P, P_R),
SLJIT_NUMBER_OF_SCRATCH_REGISTERS | SLJIT_ENTER_FLOAT(SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS),
(m_savedIntegerRegCount + 2) | SLJIT_ENTER_FLOAT(m_savedFloatRegCount), m_context.stackTmpStart + m_stackTmpSize);
sljit_s32 scratches = SLJIT_NUMBER_OF_SCRATCH_REGISTERS | SLJIT_ENTER_FLOAT(SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS) | SLJIT_ENTER_VECTOR(SLJIT_NUMBER_OF_SCRATCH_VECTOR_REGISTERS);
#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS)
sljit_s32 saveds = (m_savedIntegerRegCount + 2) | SLJIT_ENTER_FLOAT(m_savedFloatRegCount) | SLJIT_ENTER_VECTOR(m_savedVectorRegCount);
#else /* !SLJIT_SEPARATE_VECTOR_REGISTERS */
sljit_s32 saveds = (m_savedIntegerRegCount + 2) | SLJIT_ENTER_FLOAT(m_savedFloatRegCount) | SLJIT_ENTER_VECTOR(m_savedFloatRegCount);
#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */
sljit_emit_enter(m_compiler, options, SLJIT_ARGS1(P, P_R), scratches, saveds, m_context.stackTmpStart + m_stackTmpSize);

sljit_emit_op1(m_compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), kContextOffset, SLJIT_R0, 0);

Expand Down
138 changes: 78 additions & 60 deletions src/jit/ByteCodeParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,66 +143,66 @@ static bool isFloatGlobal(uint32_t globalIndex, Module* module)
// [TEMPORARY_TYPE] | S0 | S2 : A temporary register is required, which can
// be the same as the first or third source operands

#define OPERAND_TYPE_LIST \
OL2(OTOp1I32, /* SD */ I32, I32 | S0) \
OL3(OTOp2I32, /* SSD */ I32, I32, I32 | S0 | S1) \
OL2(OTOp1I64, /* SD */ I64, I64 | S0) \
OL2(OTOp1F32, /* SD */ F32, F32 | S0) \
OL2(OTOp1F64, /* SD */ F64, F64 | S0) \
OL3(OTOp2F32, /* SSD */ F32, F32, F32 | S0 | S1) \
OL3(OTOp2F64, /* SSD */ F64, F64, F64 | S0 | S1) \
OL1(OTGetI32, /* S */ I32) \
OL1(OTPutI32, /* D */ I32) \
OL1(OTPutI64, /* D */ I64) \
OL1(OTPutV128, /* D */ V128) \
OL1(OTPutPTR, /* D */ PTR) \
OL2(OTMoveF32, /* SD */ F32 | NOTMP, F32 | S0) \
OL2(OTMoveF64, /* SD */ F64 | NOTMP, F64 | S0) \
OL2(OTMoveV128, /* SD */ V128, V128 | S0) \
OL2(OTI32ReinterpretF32, /* SD */ F32, I32) \
OL2(OTI64ReinterpretF64, /* SD */ F64, I64) \
OL2(OTF32ReinterpretI32, /* SD */ I32, F32) \
OL2(OTF64ReinterpretI64, /* SD */ I64, F64) \
OL2(OTEqzI64, /* SD */ I64, I32) \
OL3(OTCompareI64, /* SSD */ I64, I64, I32) \
OL3(OTCompareF32, /* SSD */ F32, F32, I32) \
OL3(OTCompareF64, /* SSD */ F64, F64, I32) \
OL3(OTCopySignF32, /* SSD */ F32, F32, F32 | TMP | S0 | S1) \
OL3(OTCopySignF64, /* SSD */ F64, F64, F64 | TMP | S0 | S1) \
OL2(OTDemoteF64, /* SD */ F64, F32 | S0) \
OL2(OTPromoteF32, /* SD */ F32, F64 | S0) \
OL4(OTLoadI32, /* SDTT */ I32, I32 | S0, PTR, I32 | S0) \
OL4(OTLoadF32, /* SDTT */ I32, F32, PTR, I32 | S0) \
OL4(OTLoadF64, /* SDTT */ I32, F64, PTR, I32 | S0) \
OL4(OTLoadV128, /* SDTT */ I32, V128 | TMP, PTR, I32 | S0) \
OL5(OTLoadLaneV128, /* SSDTTT */ I32, V128 | NOTMP, V128 | TMP | S1, PTR, I32 | S0) \
OL5(OTStoreI32, /* SSTTT */ I32, I32, PTR, I32 | S0, I32 | S1) \
OL4(OTStoreF32, /* SSTT */ I32, F32 | NOTMP, PTR, I32 | S0) \
OL5(OTStoreI64, /* SSTTT */ I32, I64, PTR, I32 | S0, PTR | S1) \
OL4(OTStoreF64, /* SSTT */ I32, F64 | NOTMP, PTR, I32 | S0) \
OL4(OTStoreV128, /* SSTT */ I32, V128 | TMP, PTR, I32 | S0) \
OL3(OTCallback3Arg, /* SSS */ I32, I32, I32) \
OL3(OTTableGrow, /* SSD */ I32, PTR, I32 | S0 | S1) \
OL4(OTTableSet, /* SSTT */ I32, PTR, I32 | S0, PTR) \
OL3(OTTableGet, /* SDT */ I32, PTR | TMP | S0, I32) \
OL1(OTGlobalGetF32, /* D */ F32) \
OL1(OTGlobalGetF64, /* D */ F64) \
OL2(OTGlobalSetI32, /* ST */ I32, PTR) \
OL2(OTGlobalSetI64, /* ST */ I64, PTR) \
OL1(OTGlobalSetF32, /* S */ F32 | NOTMP) \
OL1(OTGlobalSetF64, /* S */ F64 | NOTMP) \
OL2(OTConvertInt32FromInt64, /* SD */ I64, I32) \
OL2(OTConvertInt64FromInt32, /* SD */ I32, I64) \
OL2(OTConvertInt32FromFloat32, /* SD */ F32 | TMP, I32 | TMP) \
OL2(OTConvertInt32FromFloat64, /* SD */ F64 | TMP, I32 | TMP) \
OL2(OTConvertInt64FromFloat32Callback, /* SD */ F32, I64) \
OL2(OTConvertInt64FromFloat64Callback, /* SD */ F64, I64) \
OL2(OTConvertFloat32FromInt32, /* SD */ I32, F32) \
OL2(OTConvertFloat64FromInt32, /* SD */ I32, F64) \
OL2(OTConvertFloat32FromInt64, /* SD */ I64, F32) \
OL2(OTConvertFloat64FromInt64, /* SD */ I64, F64) \
OL4(OTSelectI32, /* SSSD */ I32, I32, I32, I32 | S0 | S1) \
OL4(OTSelectF32, /* SSSD */ F32, F32, I32, F32 | S0 | S1) \
#define OPERAND_TYPE_LIST \
OL2(OTOp1I32, /* SD */ I32, I32 | S0) \
OL3(OTOp2I32, /* SSD */ I32, I32, I32 | S0 | S1) \
OL2(OTOp1I64, /* SD */ I64, I64 | S0) \
OL2(OTOp1F32, /* SD */ F32, F32 | S0) \
OL2(OTOp1F64, /* SD */ F64, F64 | S0) \
OL3(OTOp2F32, /* SSD */ F32, F32, F32 | S0 | S1) \
OL3(OTOp2F64, /* SSD */ F64, F64, F64 | S0 | S1) \
OL1(OTGetI32, /* S */ I32) \
OL1(OTPutI32, /* D */ I32) \
OL1(OTPutI64, /* D */ I64) \
OL1(OTPutV128, /* D */ V128) \
OL1(OTPutPTR, /* D */ PTR) \
OL2(OTMoveF32, /* SD */ F32 | NOTMP, F32 | S0) \
OL2(OTMoveF64, /* SD */ F64 | NOTMP, F64 | S0) \
OL2(OTMoveV128, /* SD */ V128, V128 | S0) \
OL2(OTI32ReinterpretF32, /* SD */ F32, I32) \
OL2(OTI64ReinterpretF64, /* SD */ F64, I64) \
OL2(OTF32ReinterpretI32, /* SD */ I32, F32) \
OL2(OTF64ReinterpretI64, /* SD */ I64, F64) \
OL2(OTEqzI64, /* SD */ I64, I32) \
OL3(OTCompareI64, /* SSD */ I64, I64, I32) \
OL3(OTCompareF32, /* SSD */ F32, F32, I32) \
OL3(OTCompareF64, /* SSD */ F64, F64, I32) \
OL3(OTCopySignF32, /* SSD */ F32, F32, F32 | TMP | S0 | S1) \
OL3(OTCopySignF64, /* SSD */ F64, F64, F64 | TMP | S0 | S1) \
OL2(OTDemoteF64, /* SD */ F64, F32 | S0) \
OL2(OTPromoteF32, /* SD */ F32, F64 | S0) \
OL4(OTLoadI32, /* SDTT */ I32, I32 | S0, PTR, I32 | S0) \
OL4(OTLoadF32, /* SDTT */ I32, F32, PTR, I32 | S0) \
OL4(OTLoadF64, /* SDTT */ I32, F64, PTR, I32 | S0) \
OL4(OTLoadV128, /* SDTT */ I32, V128 | TMP, PTR, I32 | S0) \
OL5(OTLoadLaneV128, /* SSDTT */ I32, V128 | NOTMP, V128 | TMP | S1, PTR, I32 | S0) \
OL5(OTStoreI32, /* SSTTT */ I32, I32, PTR, I32 | S0, I32 | S1) \
OL4(OTStoreF32, /* SSTT */ I32, F32 | NOTMP, PTR, I32 | S0) \
OL5(OTStoreI64, /* SSTTT */ I32, I64, PTR, I32 | S0, PTR | S1) \
OL4(OTStoreF64, /* SSTT */ I32, F64 | NOTMP, PTR, I32 | S0) \
OL4(OTStoreV128, /* SSTT */ I32, V128 | TMP, PTR, I32 | S0) \
OL3(OTCallback3Arg, /* SSS */ I32, I32, I32) \
OL3(OTTableGrow, /* SSD */ I32, PTR, I32 | S0 | S1) \
OL4(OTTableSet, /* SSTT */ I32, PTR, I32 | S0, PTR) \
OL3(OTTableGet, /* SDT */ I32, PTR | TMP | S0, I32) \
OL1(OTGlobalGetF32, /* D */ F32) \
OL1(OTGlobalGetF64, /* D */ F64) \
OL2(OTGlobalSetI32, /* ST */ I32, PTR) \
OL2(OTGlobalSetI64, /* ST */ I64, PTR) \
OL1(OTGlobalSetF32, /* S */ F32 | NOTMP) \
OL1(OTGlobalSetF64, /* S */ F64 | NOTMP) \
OL2(OTConvertInt32FromInt64, /* SD */ I64, I32) \
OL2(OTConvertInt64FromInt32, /* SD */ I32, I64) \
OL2(OTConvertInt32FromFloat32, /* SD */ F32 | TMP, I32 | TMP) \
OL2(OTConvertInt32FromFloat64, /* SD */ F64 | TMP, I32 | TMP) \
OL2(OTConvertInt64FromFloat32Callback, /* SD */ F32, I64) \
OL2(OTConvertInt64FromFloat64Callback, /* SD */ F64, I64) \
OL2(OTConvertFloat32FromInt32, /* SD */ I32, F32) \
OL2(OTConvertFloat64FromInt32, /* SD */ I32, F64) \
OL2(OTConvertFloat32FromInt64, /* SD */ I64, F32) \
OL2(OTConvertFloat64FromInt64, /* SD */ I64, F64) \
OL4(OTSelectI32, /* SSSD */ I32, I32, I32, I32 | S0 | S1) \
OL4(OTSelectF32, /* SSSD */ F32, F32, I32, F32 | S0 | S1) \
OL4(OTSelectF64, /* SSSD */ F64, F64, I32, F64 | S0 | S1)

#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE)
Expand Down Expand Up @@ -322,6 +322,24 @@ static bool isFloatGlobal(uint32_t globalIndex, Module* module)
#define OTShiftV128Tmp OTShiftV128
#define OTOp3DotAddV128 OTOp3V128

#elif (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV)
#define OPERAND_TYPE_LIST_SIMD_ARCH \
OL2(OTOp1V128CB, /* SD */ V128 | NOTMP, V128 | NOTMP) \
OL3(OTOp2V128, /* SSD */ V128 | TMP, V128 | TMP, V128 | TMP | S0 | S1) \
OL3(OTOp1V128Tmp, /* SDT */ V128 | NOTMP, V128 | TMP | S0, V128) \
OL3(OTSwizzleV128, /* SSD */ V128 | TMP, V128 | NOTMP, V128 | TMP | S1) \
OL3(OTShuffleV128, /* SSD */ V128 | TMP, V128 | TMP, V128 | TMP) \
OL3(OTShiftV128, /* SSD */ V128 | NOTMP, I32, V128 | TMP | S0)

// List of aliases.
#define OTOp2V128Rev OTOp2V128
#define OTOp2V128Tmp OTOp2V128
#define OTMinMaxV128 OTOp2V128
#define OTPMinMaxV128 OTOp2V128
#define OTPopcntV128 OTOp1V128Tmp
#define OTShiftV128Tmp OTShiftV128
#define OTOp3DotAddV128 OTOp3V128

#endif /* SLJIT_CONFIG_ARM */

// Constructing read-only operand descriptors.
Expand Down
3 changes: 3 additions & 0 deletions src/jit/Compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -832,6 +832,9 @@ class JITCompiler {
uint32_t m_options;
uint8_t m_savedIntegerRegCount;
uint8_t m_savedFloatRegCount;
#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS)
uint8_t m_savedVectorRegCount;
#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */
uint8_t m_stackTmpSize;

std::vector<TryBlock> m_tryBlocks;
Expand Down
8 changes: 7 additions & 1 deletion src/jit/InstList.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -425,8 +425,14 @@ void JITCompiler::dump()
prefix = "F";
savedStart = SLJIT_FR(SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS);
savedEnd = SLJIT_FS0;
#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS)
if ((variable.info & Instruction::TypeMask) == Instruction::V128Operand) {
prefix = "V";
savedStart = SLJIT_VR(SLJIT_NUMBER_OF_SCRATCH_VECTOR_REGISTERS);
savedEnd = SLJIT_VS1;
}
#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */
}

uint32_t reg1 = static_cast<uint32_t>(VARIABLE_GET_REF(variable.value));

#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE)
Expand Down
Loading

0 comments on commit 9a9181f

Please sign in to comment.