Skip to content

Commit

Permalink
WIP: Support Short (8/16 bit) atomic RMW operations on RISCV
Browse files Browse the repository at this point in the history
Signed-off-by: Máté Tokodi [email protected]
  • Loading branch information
matetokodi committed Oct 22, 2024
1 parent 4ae176a commit 4dd002b
Show file tree
Hide file tree
Showing 5 changed files with 223 additions and 28 deletions.
4 changes: 4 additions & 0 deletions src/jit/Backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1057,6 +1057,10 @@ void JITCompiler::compileFunction(JITFunction* jitFunc, bool isExternal)
ASSERT(m_context.trapBlocksStart == 0);
m_context.trapBlocksStart = 1;
}

if (sljit_emit_atomic_load(m_compiler, SLJIT_MOV_U16 | SLJIT_ATOMIC_TEST, SLJIT_R0, SLJIT_R1) != SLJIT_ERR_UNSUPPORTED) {
m_options |= JITCompiler::kHasShortAtomic;
}
}

#ifdef WALRUS_JITPERF
Expand Down
40 changes: 26 additions & 14 deletions src/jit/ByteCodeParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1907,24 +1907,27 @@ static void compileFunction(JITCompiler* compiler)
}
break;
}
case ByteCode::I32AtomicRmwAddOpcode:
case ByteCode::I32AtomicRmw8AddUOpcode:
case ByteCode::I32AtomicRmw16AddUOpcode:
case ByteCode::I32AtomicRmwSubOpcode:
case ByteCode::I32AtomicRmw8SubUOpcode:
case ByteCode::I32AtomicRmw16SubUOpcode:
case ByteCode::I32AtomicRmwAndOpcode:
case ByteCode::I32AtomicRmw8AndUOpcode:
case ByteCode::I32AtomicRmw16AndUOpcode:
case ByteCode::I32AtomicRmwOrOpcode:
case ByteCode::I32AtomicRmw8OrUOpcode:
case ByteCode::I32AtomicRmw16OrUOpcode:
case ByteCode::I32AtomicRmwXorOpcode:
case ByteCode::I32AtomicRmw8XorUOpcode:
case ByteCode::I32AtomicRmw16XorUOpcode:
case ByteCode::I32AtomicRmwXchgOpcode:
case ByteCode::I32AtomicRmw8XchgUOpcode:
case ByteCode::I32AtomicRmw16XchgUOpcode: {
compiler->increaseStackTmpSize(16);
FALLTHROUGH;
}
case ByteCode::I32AtomicRmwAddOpcode:
case ByteCode::I32AtomicRmwSubOpcode:
case ByteCode::I32AtomicRmwAndOpcode:
case ByteCode::I32AtomicRmwOrOpcode:
case ByteCode::I32AtomicRmwXorOpcode:
case ByteCode::I32AtomicRmwXchgOpcode: {
info = Instruction::kIs32Bit;
requiredInit = OTAtomicRmwI32;
FALLTHROUGH;
Expand All @@ -1945,21 +1948,24 @@ static void compileFunction(JITCompiler* compiler)
}
case ByteCode::I64AtomicRmw8AddUOpcode:
case ByteCode::I64AtomicRmw16AddUOpcode:
case ByteCode::I64AtomicRmw32AddUOpcode:
case ByteCode::I64AtomicRmw8SubUOpcode:
case ByteCode::I64AtomicRmw16SubUOpcode:
case ByteCode::I64AtomicRmw32SubUOpcode:
case ByteCode::I64AtomicRmw8AndUOpcode:
case ByteCode::I64AtomicRmw16AndUOpcode:
case ByteCode::I64AtomicRmw32AndUOpcode:
case ByteCode::I64AtomicRmw8OrUOpcode:
case ByteCode::I64AtomicRmw16OrUOpcode:
case ByteCode::I64AtomicRmw32OrUOpcode:
case ByteCode::I64AtomicRmw8XorUOpcode:
case ByteCode::I64AtomicRmw16XorUOpcode:
case ByteCode::I64AtomicRmw32XorUOpcode:
case ByteCode::I64AtomicRmw8XchgUOpcode:
case ByteCode::I64AtomicRmw16XchgUOpcode:
case ByteCode::I64AtomicRmw16XchgUOpcode: {
compiler->increaseStackTmpSize(16);
FALLTHROUGH;
}
case ByteCode::I64AtomicRmw32AddUOpcode:
case ByteCode::I64AtomicRmw32SubUOpcode:
case ByteCode::I64AtomicRmw32AndUOpcode:
case ByteCode::I64AtomicRmw32OrUOpcode:
case ByteCode::I64AtomicRmw32XorUOpcode:
case ByteCode::I64AtomicRmw32XchgUOpcode: {
Instruction* instr = compiler->append(byteCode, Instruction::Atomic, opcode, 2, 1);
instr->addInfo(info);
Expand All @@ -1973,9 +1979,12 @@ static void compileFunction(JITCompiler* compiler)
operands[2] = STACK_OFFSET(atomicRmw->dstOffset());
break;
}
case ByteCode::I32AtomicRmwCmpxchgOpcode:
case ByteCode::I32AtomicRmw8CmpxchgUOpcode:
case ByteCode::I32AtomicRmw16CmpxchgUOpcode: {
compiler->increaseStackTmpSize(16);
FALLTHROUGH;
}
case ByteCode::I32AtomicRmwCmpxchgOpcode: {
info = Instruction::kIs32Bit;
requiredInit = OTAtomicRmwCmpxchgI32;
FALLTHROUGH;
Expand All @@ -1990,7 +1999,10 @@ static void compileFunction(JITCompiler* compiler)
FALLTHROUGH;
}
case ByteCode::I64AtomicRmw8CmpxchgUOpcode:
case ByteCode::I64AtomicRmw16CmpxchgUOpcode:
case ByteCode::I64AtomicRmw16CmpxchgUOpcode: {
compiler->increaseStackTmpSize(16);
FALLTHROUGH;
}
case ByteCode::I64AtomicRmw32CmpxchgUOpcode: {
Instruction* instr = compiler->append(byteCode, Instruction::Atomic, opcode, 3, 1);
instr->addInfo(info);
Expand Down
1 change: 1 addition & 0 deletions src/jit/Compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,7 @@ class JITCompiler {
#endif

static const uint32_t kHasCondMov = 1 << 0;
static const uint32_t kHasShortAtomic = 1 << 1;

JITCompiler(Module* module, uint32_t JITFlags);

Expand Down
194 changes: 180 additions & 14 deletions src/jit/MemoryInl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1153,11 +1153,14 @@ static void emitAtomicRmwCmpxchg64(sljit_compiler* compiler, Instruction* instr)

static void emitAtomic(sljit_compiler* compiler, Instruction* instr)
{
bool noShortAtomic = !(CompileContext::get(compiler)->compiler->options() & JITCompiler::kHasShortAtomic);
sljit_s32 operationSize = SLJIT_MOV;
sljit_s32 size = 0;
sljit_s32 offset = 0;
sljit_s32 operation;
uint32_t options = MemAddress::CheckNaturalAlignment | MemAddress::AbsoluteAddress;
sljit_sw stackTmpStart = CompileContext::get(compiler)->stackTmpStart;


switch (instr->opcode()) {
case ByteCode::I64AtomicRmwCmpxchgOpcode: {
Expand Down Expand Up @@ -1390,21 +1393,79 @@ static void emitAtomic(sljit_compiler* compiler, Instruction* instr)
sljit_s32 baseReg = SLJIT_EXTRACT_REG(addr.memArg.arg);
sljit_s32 tmpReg = srcReg;

sljit_emit_atomic_load(compiler, operationSize, SLJIT_TMP_DEST_REG, baseReg);
JITArg memValue(operands + 0);
sljit_s32 memValueReg = SLJIT_EXTRACT_REG(memValue.arg);
sljit_s32 maskReg = SLJIT_TMP_R2;
sljit_s32 tempReg = noShortAtomic ? SLJIT_TMP_R0 : SLJIT_TMP_DEST_REG;

if (SLJIT_IS_IMM(memValueReg)) {
return;
}

if (noShortAtomic && size <= 2) {
#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE)
maskReg = SLJIT_TMP_R1;
#endif /* SLJIT_32BIT_ARCHITECTURE */
operationSize = SLJIT_MOV32;

sljit_emit_op2(compiler, SLJIT_AND, maskReg, 0, baseReg, 0, SLJIT_IMM, 0x3);
sljit_emit_op2(compiler, SLJIT_SHL, maskReg, 0, maskReg, 0, SLJIT_IMM, 3); // multiply by 8
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), stackTmpStart + 8 + WORD_LOW_OFFSET, maskReg, 0);

sljit_emit_op2(compiler, SLJIT_AND, baseReg, 0, baseReg, 0, SLJIT_IMM, ~0x3);

sljit_emit_op2(compiler, SLJIT_AND, srcReg, 0, srcReg, 0, SLJIT_IMM, (0xffffffff) >> ((4 - size) * 8));
sljit_emit_op2(compiler, SLJIT_SHL, srcReg, 0, srcReg, 0, maskReg, 0);

sljit_emit_op2(compiler, SLJIT_SHL, maskReg, 0, SLJIT_IMM, (0xffffffff) >> ((4 - size) * 8), maskReg, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), stackTmpStart + WORD_LOW_OFFSET, maskReg, 0);
}
sljit_emit_atomic_load(compiler, operationSize, tempReg, baseReg);

if (noShortAtomic && size <= 2) {
sljit_emit_op1(compiler, SLJIT_MOV, memValueReg, 0, tempReg, 0);
}

if (operation != OP_XCHG) {
tmpReg = instr->requiredReg(1);
sljit_emit_op2(compiler, operation, tmpReg, 0, SLJIT_TMP_DEST_REG, 0, srcReg, 0);
}

sljit_emit_atomic_store(compiler, operationSize | SLJIT_SET_ATOMIC_STORED, tmpReg, baseReg, SLJIT_TMP_DEST_REG);
if (noShortAtomic && size <= 2) {
sljit_emit_op2(compiler, SLJIT_XOR, maskReg, 0, maskReg, 0, SLJIT_IMM, -1);
sljit_emit_op2(compiler, SLJIT_AND, memValueReg, 0, memValueReg, 0, maskReg, 0);
}

if (operation != OP_XCHG) {
sljit_emit_op2(compiler, operation, tmpReg, 0, tempReg, 0, srcReg, 0);
}

sljit_s32 returnReg = tempReg;
if (noShortAtomic && size <= 2) {
sljit_emit_op2(compiler, SLJIT_OR, tmpReg, 0, tmpReg, 0, memValueReg, 0);
}

#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32)
returnReg = memValueReg;
sljit_emit_op1(compiler, SLJIT_MOV, memValueReg, 0, tempReg, 0);
#endif /* SLJIT_CONFIG_ARM_32 */

sljit_emit_atomic_store(compiler, operationSize | SLJIT_SET_ATOMIC_STORED, tmpReg, baseReg, tempReg);
sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), restartOnFailure);

if (noShortAtomic && size <= 2) {
sljit_emit_op1(compiler, SLJIT_MOV, maskReg, 0, SLJIT_MEM1(SLJIT_SP), stackTmpStart + WORD_LOW_OFFSET);
sljit_emit_op2(compiler, SLJIT_AND, returnReg, 0, returnReg, 0, maskReg, 0);

sljit_emit_op1(compiler, SLJIT_MOV, maskReg, 0, SLJIT_MEM1(SLJIT_SP), stackTmpStart + 8 + WORD_LOW_OFFSET);
sljit_emit_op2(compiler, SLJIT_LSHR, returnReg, 0, returnReg, 0, maskReg, 0);
}

sljit_emit_op1(compiler, SLJIT_MOV, dst.arg, dst.argw, returnReg, 0);
#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE)
if (dstPair.arg2 != 0) {
sljit_emit_op1(compiler, SLJIT_MOV, dstPair.arg2, dstPair.arg2w, SLJIT_IMM, 0);
}
#endif /* SLJIT_32BIT_ARCHITECTURE */
sljit_emit_op1(compiler, SLJIT_MOV, dst.arg, dst.argw, SLJIT_TMP_DEST_REG, 0);
return;
}

Expand All @@ -1417,6 +1478,7 @@ static void emitAtomic(sljit_compiler* compiler, Instruction* instr)
JITArgPair dstPair, srcExpectedPair;
sljit_s32 tmpReg;
sljit_s32 srcExpectedReg;
sljit_s32 srcValueReg;

dstPair.arg2 = 0;

Expand All @@ -1428,6 +1490,7 @@ static void emitAtomic(sljit_compiler* compiler, Instruction* instr)
dst = JITArg(operands + 3);
tmpReg = GET_SOURCE_REG(tmp.arg, instr->requiredReg(1));
srcExpectedReg = GET_SOURCE_REG(srcExpected.arg, instr->requiredReg(2));
srcValueReg = GET_TARGET_REG(srcValue.arg, instr->requiredReg(0));
} else {
JITArgPair tmpPair(operands + 0);
JITArgPair srcValuePair(operands + 2);
Expand All @@ -1439,6 +1502,7 @@ static void emitAtomic(sljit_compiler* compiler, Instruction* instr)

srcValue.arg = srcValuePair.arg1;
srcValue.argw = srcValuePair.arg1w;
srcValueReg = GET_TARGET_REG(srcValuePair.arg1, instr->requiredReg(0));
dst.arg = dstPair.arg1;
dst.argw = dstPair.arg1w;
sljit_emit_op1(compiler, SLJIT_MOV, dstPair.arg2, dstPair.arg2w, SLJIT_IMM, 0);
Expand All @@ -1450,28 +1514,90 @@ static void emitAtomic(sljit_compiler* compiler, Instruction* instr)
struct sljit_label* restartOnFailure = sljit_emit_label(compiler);
sljit_s32 baseReg = SLJIT_EXTRACT_REG(addr.memArg.arg);

sljit_s32 memValueReg = tmpReg;
sljit_s32 maskReg = SLJIT_TMP_R1;
sljit_s32 tempReg = noShortAtomic ? SLJIT_TMP_R0 : SLJIT_TMP_DEST_REG;

if (SLJIT_IS_IMM(memValueReg)) {
return;
}

if (noShortAtomic && size <= 2) {
if (!(operationSize & SLJIT_32) && operationSize != SLJIT_MOV32) {
operationSize = SLJIT_MOV;
} else {
operationSize = SLJIT_MOV32;
}
}

if (!(operationSize & SLJIT_32) && operationSize != SLJIT_MOV32) {
compareTopFalse = sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL, SLJIT_IMM, 0, srcExpectedPair.arg2, srcExpectedPair.arg2w);
}
if (noShortAtomic && size <= 2) {
sljit_emit_op2(compiler, SLJIT_AND, maskReg, 0, baseReg, 0, SLJIT_IMM, 0x3);
sljit_emit_op2(compiler, SLJIT_SHL, maskReg, 0, maskReg, 0, SLJIT_IMM, 3); // multiply by 8
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), stackTmpStart + 8 + WORD_LOW_OFFSET, maskReg, 0);

sljit_emit_op2(compiler, SLJIT_AND, baseReg, 0, baseReg, 0, SLJIT_IMM, ~0x3);

sljit_emit_op2(compiler, SLJIT_AND, srcValueReg, 0, srcValueReg, 0, SLJIT_IMM, (0xffffffff) >> ((4 - size) * 8));
sljit_emit_op2(compiler, SLJIT_SHL, srcValueReg, 0, srcValueReg, 0, maskReg, 0);

sljit_emit_op2(compiler, SLJIT_SHL, maskReg, 0, SLJIT_IMM, (0xffffffff) >> ((4 - size) * 8), maskReg, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), stackTmpStart + WORD_LOW_OFFSET, maskReg, 0);
sljit_emit_op1(compiler, SLJIT_MOV, maskReg, 0, SLJIT_MEM1(SLJIT_SP), stackTmpStart + 8 + WORD_LOW_OFFSET);
}

sljit_emit_op1(compiler, SLJIT_MOV, tmpReg, 0, srcValue.arg, srcValue.argw);

sljit_emit_atomic_load(compiler, operationSize, SLJIT_TMP_DEST_REG, baseReg);
compareFalse = sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL, SLJIT_TMP_DEST_REG, 0, srcExpectedReg, 0);
sljit_emit_atomic_store(compiler, operationSize | SLJIT_SET_ATOMIC_STORED, tmpReg, baseReg, SLJIT_TMP_DEST_REG);
sljit_emit_atomic_load(compiler, operationSize, tempReg, baseReg);

if (noShortAtomic && size <= 2) {
sljit_emit_op2(compiler, SLJIT_AND, tmpReg, 0, tempReg, 0, maskReg, 0);
}

compareFalse = sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL, (noShortAtomic && size <= 2 ? tmpReg : tempReg), 0, srcExpectedReg, 0);

if (noShortAtomic && size <= 2) {
sljit_emit_op2(compiler, SLJIT_AND, tmpReg, 0, tmpReg, 0, maskReg, 0);

sljit_emit_op1(compiler, SLJIT_MOV, srcExpectedReg, 0, tempReg, 0);
sljit_emit_op2(compiler, SLJIT_XOR, maskReg, 0, maskReg, 0, SLJIT_IMM, -1);
sljit_emit_op2(compiler, SLJIT_AND, srcExpectedReg, 0, srcExpectedReg, 0, maskReg, 0);
sljit_emit_op2(compiler, SLJIT_OR, tmpReg, 0, srcValueReg, 0, srcExpectedReg, 0);
}

if (noShortAtomic) {
sljit_emit_op1(compiler, operationSize, maskReg, 0, tempReg, 0);
}
sljit_emit_atomic_store(compiler, operationSize | SLJIT_SET_ATOMIC_STORED, tmpReg, baseReg, tempReg);
if (noShortAtomic) {
sljit_emit_op1(compiler, operationSize, tempReg, 0, maskReg, 0);
}
sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), restartOnFailure);
storeSuccess = sljit_emit_jump(compiler, SLJIT_ATOMIC_STORED);

if (!(operationSize & SLJIT_32) && operationSize != SLJIT_MOV32) {
sljit_set_label(compareTopFalse, sljit_emit_label(compiler));
sljit_emit_op1(compiler, operationSize, SLJIT_TMP_DEST_REG, 0, addr.memArg.arg, addr.memArg.argw);
sljit_emit_op1(compiler, operationSize, tempReg, 0, addr.memArg.arg, addr.memArg.argw);
}
sljit_set_label(compareFalse, sljit_emit_label(compiler));
sljit_set_label(storeSuccess, sljit_emit_label(compiler));

sljit_set_label(compareFalse, sljit_emit_label(compiler));

if (noShortAtomic && size <= 2) {
sljit_emit_op1(compiler, SLJIT_MOV, maskReg, 0, SLJIT_MEM1(SLJIT_SP), stackTmpStart + WORD_LOW_OFFSET);
sljit_emit_op2(compiler, SLJIT_AND, tmpReg, 0, tempReg, 0, maskReg, 0);

sljit_emit_op1(compiler, SLJIT_MOV, maskReg, 0, SLJIT_MEM1(SLJIT_SP), stackTmpStart + 8 + WORD_LOW_OFFSET);
sljit_emit_op2(compiler, SLJIT_LSHR, tmpReg, 0, tmpReg, 0, maskReg, 0);
tempReg = tmpReg;
}

if (dstPair.arg2 != 0) {
sljit_emit_op1(compiler, SLJIT_MOV, dstPair.arg2, dstPair.arg2w, SLJIT_IMM, 0);
}
sljit_emit_op1(compiler, SLJIT_MOV, dst.arg, dst.argw, SLJIT_TMP_DEST_REG, 0);
sljit_emit_op1(compiler, SLJIT_MOV, dst.arg, dst.argw, tempReg, 0);
#else /* !SLJIT_32BIT_ARCHITECTURE */
sljit_s32 tmpReg;
sljit_s32 srcExpectedReg;
Expand All @@ -1481,19 +1607,59 @@ static void emitAtomic(sljit_compiler* compiler, Instruction* instr)
JITArg dst(operands + 3);
tmpReg = GET_SOURCE_REG(tmp.arg, instr->requiredReg(1));
srcExpectedReg = GET_SOURCE_REG(srcExpected.arg, instr->requiredReg(2));
sljit_s32 tempReg = SLJIT_TMP_DEST_REG;
sljit_s32 tempReg2 = SLJIT_TMP_R1;
sljit_s32 maskReg = SLJIT_TMP_R2;

struct sljit_jump* compareFalse;
struct sljit_label* restartOnFailure = sljit_emit_label(compiler);
sljit_s32 baseReg = SLJIT_EXTRACT_REG(addr.memArg.arg);

sljit_emit_op1(compiler, SLJIT_MOV, tmpReg, 0, srcValue.arg, srcValue.argw);
sljit_emit_atomic_load(compiler, operationSize, SLJIT_TMP_DEST_REG, baseReg);
compareFalse = sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL, SLJIT_TMP_DEST_REG, 0, srcExpectedReg, 0);
sljit_emit_atomic_store(compiler, operationSize | SLJIT_SET_ATOMIC_STORED, tmpReg, baseReg, SLJIT_TMP_DEST_REG);

if (noShortAtomic && size <= 2) {
operationSize = SLJIT_MOV_P;

sljit_emit_op2(compiler, SLJIT_AND, maskReg, 0, baseReg, 0, SLJIT_IMM, 0x7);
sljit_emit_op2(compiler, SLJIT_SHL, maskReg, 0, maskReg, 0, SLJIT_IMM, 3); // multiply by 8
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), stackTmpStart + 8 + WORD_LOW_OFFSET, maskReg, 0);

sljit_emit_op2(compiler, SLJIT_AND, baseReg, 0, baseReg, 0, SLJIT_IMM, ~0x7);

sljit_emit_op2(compiler, SLJIT_AND, tmpReg, 0, tmpReg, 0, SLJIT_IMM, (0xffffffff) >> ((4 - size) * 8));
sljit_emit_op2(compiler, SLJIT_SHL, tmpReg, 0, tmpReg, 0, maskReg, 0);

sljit_emit_op2(compiler, SLJIT_SHL, maskReg, 0, SLJIT_IMM, (0xffffffff) >> ((4 - size) * 8), maskReg, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), stackTmpStart + WORD_LOW_OFFSET, maskReg, 0);
sljit_emit_op1(compiler, SLJIT_MOV, maskReg, 0, SLJIT_MEM1(SLJIT_SP), stackTmpStart + 8 + WORD_LOW_OFFSET);
}

sljit_emit_atomic_load(compiler, operationSize, tempReg, baseReg);
if (noShortAtomic && size <= 2) {
sljit_emit_op2(compiler, SLJIT_LSHR, tempReg2, 0, tempReg, 0, maskReg, 0);
sljit_emit_op2(compiler, SLJIT_AND, tempReg2, 0, tempReg2, 0, SLJIT_IMM, (0xffffffff) >> ((4 - size) * 8));
}
compareFalse = sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL, (noShortAtomic && size <= 2 ? tempReg2 : tempReg), 0, srcExpectedReg, 0);

if (noShortAtomic && size <= 2) {
sljit_emit_op1(compiler, SLJIT_MOV, srcExpectedReg, 0, SLJIT_IMM, (0xffffffff) >> ((4 - size) * 8));
sljit_emit_op2(compiler, SLJIT_SHL, maskReg, 0, srcExpectedReg, 0, maskReg, 0);
sljit_emit_op2(compiler, SLJIT_XOR, maskReg, 0, maskReg, 0, SLJIT_IMM, -1);
sljit_emit_op2(compiler, SLJIT_AND, tempReg2, 0, tempReg, 0, maskReg, 0);
sljit_emit_op2(compiler, SLJIT_OR, tmpReg, 0, tmpReg, 0, tempReg2, 0);
sljit_emit_op1(compiler, SLJIT_MOV, tempReg2, 0, tempReg, 0);
}

sljit_emit_atomic_store(compiler, operationSize | SLJIT_SET_ATOMIC_STORED, tmpReg, baseReg, tempReg);
sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), restartOnFailure);

sljit_set_label(compareFalse, sljit_emit_label(compiler));
sljit_emit_op1(compiler, SLJIT_MOV, dst.arg, dst.argw, SLJIT_TMP_DEST_REG, 0);

if (noShortAtomic && size <= 2) {
sljit_emit_op2(compiler, SLJIT_AND, tempReg, 0, tempReg, 0, SLJIT_IMM, (0xffffffff) >> ((4 - size) * 8));
}

sljit_emit_op1(compiler, SLJIT_MOV, dst.arg, dst.argw, tempReg, 0);
#endif /* SLJIT_32BIT_ARCHITECTURE */
}

Expand Down
Loading

0 comments on commit 4dd002b

Please sign in to comment.