diff --git a/src/amdilc/amdilc.c b/src/amdilc/amdilc.c index 74dd3def..a7fc5a07 100644 --- a/src/amdilc/amdilc.c +++ b/src/amdilc/amdilc.c @@ -8,9 +8,6 @@ static HCRYPTPROV mCryptProvider = 0; -static void freeSource( - Source* src); - static void calcSha1( uint8_t* digest, const uint8_t* data, @@ -33,45 +30,13 @@ static void calcSha1( CryptDestroyHash(hash); } -static void freeDestination( - Destination* dst) -{ - if (dst->absoluteSrc != NULL) { - freeSource(dst->absoluteSrc); - } - free(dst->absoluteSrc); -} - -static void freeSource( - Source* src) -{ - for (unsigned i = 0; i < src->srcCount; i++) { - freeSource(&src->srcs[i]); - } - free(src->srcs); -} - -static void freeInstruction( - Instruction* instr) -{ - for (unsigned i = 0; i < instr->dstCount; i++) { - freeDestination(&instr->dsts[i]); - } - for (unsigned i = 0; i < instr->srcCount; i++) { - freeSource(&instr->srcs[i]); - } - free(instr->dsts); - free(instr->srcs); - free(instr->extras); -} - static void freeKernel( Kernel* kernel) { - for (unsigned i = 0; i < kernel->instrCount; i++) { - freeInstruction(&kernel->instrs[i]); - } free(kernel->instrs); + free(kernel->srcBuffer); + free(kernel->dstBuffer); + free(kernel->extrasBuffer); } static bool isShaderDumpEnabled() @@ -135,7 +100,10 @@ IlcShader ilcCompileShader( getShaderName(name, NAME_LEN, code, size); LOGV("compiling %s...\n", name); - Kernel* kernel = ilcDecodeStream((Token*)code, size / sizeof(Token)); + Kernel* kernel = calloc(1, sizeof(Kernel)); + + ilcDecodeStream(kernel, (Token*)code, size / sizeof(Token)); + bool dump = isShaderDumpEnabled(); if (dump) { @@ -159,7 +127,9 @@ void ilcDisassembleShader( const void* code, unsigned size) { - Kernel* kernel = ilcDecodeStream((Token*)code, size / sizeof(Token)); + Kernel* kernel = calloc(1, sizeof(Kernel)); + + ilcDecodeStream(kernel, (Token*)code, size / sizeof(Token)); ilcDumpKernel(file, kernel); freeKernel(kernel); diff --git a/src/amdilc/amdilc_compiler.c b/src/amdilc/amdilc_compiler.c index 812d19a7..50f3f6b2 100644 --- a/src/amdilc/amdilc_compiler.c +++ b/src/amdilc/amdilc_compiler.c @@ -1,7 +1,8 @@ #include "amdilc_spirv.h" #include "amdilc_internal.h" -#define MAX_SRC_COUNT (8) +#define BUFFER_ALLOC_FACTOR (1.5f) + #define ZERO_LITERAL (0x00000000) #define ONE_LITERAL (0x3F800000) #define FALSE_LITERAL (0x00000000) @@ -115,6 +116,7 @@ typedef struct { IlcSpvId bool4Id; unsigned currentStrideIndex; unsigned regCount; + unsigned regSize; IlcRegister* regs; unsigned resourceCount; IlcResource* resources; @@ -388,7 +390,18 @@ static const IlcRegister* addRegister( emitName(compiler, reg->id, identifier, reg->ilNum); compiler->regCount++; - compiler->regs = realloc(compiler->regs, sizeof(IlcRegister) * compiler->regCount); + unsigned size = compiler->regCount * sizeof(IlcRegister); + if (compiler->regSize < size) { + if (compiler->regSize < sizeof(IlcRegister)) { + compiler->regSize = sizeof(IlcRegister) * 32; + } + + while (compiler->regSize < size) { + compiler->regSize *= BUFFER_ALLOC_FACTOR; + } + compiler->regs = realloc(compiler->regs, compiler->regSize); + } + compiler->regs[compiler->regCount - 1] = *reg; return &compiler->regs[compiler->regCount - 1]; @@ -611,7 +624,7 @@ static IlcSpvId loadSource( src->hasImmediate ? src->immediate : 0); if (src->srcCount > 0) { assert(src->srcCount == 1); - IlcSpvId rel4Id = loadSource(compiler, &src->srcs[0], COMP_MASK_XYZW, + IlcSpvId rel4Id = loadSource(compiler, &compiler->kernel->srcBuffer[src->srcs[0]], COMP_MASK_XYZW, compiler->int4Id); IlcSpvId relId = emitVectorTrim(compiler, rel4Id, compiler->int4Id, 0, 1); indexId = ilcSpvPutOp2(compiler->module, SpvOpIAdd, compiler->intId, indexId, relId); @@ -746,7 +759,7 @@ static void storeDestination( IlcSpvId ptrId = 0; if (dst->registerType == IL_REGTYPE_ITEMP) { - if (dst->absoluteSrc != NULL) { + if (dst->hasAbsoluteSrc) { LOGW("unhandled absolute source\n"); } @@ -757,14 +770,14 @@ static void storeDestination( dst->hasImmediate ? dst->immediate : 0); if (dst->relativeSrcCount > 0) { assert(dst->relativeSrcCount == 1); - IlcSpvId rel4Id = loadSource(compiler, &dst->relativeSrcs[0], COMP_MASK_XYZW, + IlcSpvId rel4Id = loadSource(compiler, &compiler->kernel->srcBuffer[dst->relativeSrcs[0]], COMP_MASK_XYZW, compiler->int4Id); IlcSpvId relId = emitVectorTrim(compiler, rel4Id, compiler->int4Id, 0, 1); indexId = ilcSpvPutOp2(compiler->module, SpvOpIAdd, compiler->intId, indexId, relId); } ptrId = ilcSpvPutAccessChain(compiler->module, ptrTypeId, reg->id, 1, &indexId); } else { - if (dst->absoluteSrc != NULL) { + if (dst->hasAbsoluteSrc) { LOGW("unhandled absolute source\n"); } if (dst->relativeSrcCount > 0) { @@ -905,10 +918,10 @@ static void emitConstBuffer( IlcSpvId ptrTypeId = ilcSpvPutPointerType(compiler->module, SpvStorageClassPrivate, typeId); for (unsigned i = 0; i < arraySize; i++) { IlcSpvId consistuentIds[] = { - ilcSpvPutConstant(compiler->module, compiler->floatId, instr->extras[4 * i + 0]), - ilcSpvPutConstant(compiler->module, compiler->floatId, instr->extras[4 * i + 1]), - ilcSpvPutConstant(compiler->module, compiler->floatId, instr->extras[4 * i + 2]), - ilcSpvPutConstant(compiler->module, compiler->floatId, instr->extras[4 * i + 3]), + ilcSpvPutConstant(compiler->module, compiler->floatId, compiler->kernel->extrasBuffer[instr->extrasStartIndex + 4 * i + 0]), + ilcSpvPutConstant(compiler->module, compiler->floatId, compiler->kernel->extrasBuffer[instr->extrasStartIndex + 4 * i + 1]), + ilcSpvPutConstant(compiler->module, compiler->floatId, compiler->kernel->extrasBuffer[instr->extrasStartIndex + 4 * i + 2]), + ilcSpvPutConstant(compiler->module, compiler->floatId, compiler->kernel->extrasBuffer[instr->extrasStartIndex + 4 * i + 3]), }; IlcSpvId compositeId = ilcSpvPutConstantComposite(compiler->module, typeId, 4, consistuentIds); @@ -937,7 +950,7 @@ static void emitIndexedTempArray( IlcCompiler* compiler, const Instruction* instr) { - const Source* src = &instr->srcs[0]; + const Source* src = &compiler->kernel->srcBuffer[instr->srcs[0]]; assert(src->registerType == IL_REGTYPE_ITEMP && src->hasImmediate); @@ -966,7 +979,7 @@ static void emitLiteral( IlcCompiler* compiler, const Instruction* instr) { - const Source* src = &instr->srcs[0]; + const Source* src = &compiler->kernel->srcBuffer[instr->srcs[0]]; assert(src->registerType == IL_REGTYPE_LITERAL); @@ -974,10 +987,10 @@ static void emitLiteral( IlcSpvId literalId = emitVariable(compiler, literalTypeId, SpvStorageClassPrivate); IlcSpvId consistuentIds[] = { - ilcSpvPutConstant(compiler->module, compiler->floatId, instr->extras[0]), - ilcSpvPutConstant(compiler->module, compiler->floatId, instr->extras[1]), - ilcSpvPutConstant(compiler->module, compiler->floatId, instr->extras[2]), - ilcSpvPutConstant(compiler->module, compiler->floatId, instr->extras[3]), + ilcSpvPutConstant(compiler->module, compiler->floatId, compiler->kernel->extrasBuffer[instr->extrasStartIndex + 0]), + ilcSpvPutConstant(compiler->module, compiler->floatId, compiler->kernel->extrasBuffer[instr->extrasStartIndex + 1]), + ilcSpvPutConstant(compiler->module, compiler->floatId, compiler->kernel->extrasBuffer[instr->extrasStartIndex + 2]), + ilcSpvPutConstant(compiler->module, compiler->floatId, compiler->kernel->extrasBuffer[instr->extrasStartIndex + 3]), }; IlcSpvId compositeId = ilcSpvPutConstantComposite(compiler->module, literalTypeId, 4, consistuentIds); @@ -1005,7 +1018,7 @@ static void emitOutput( { uint8_t importUsage = GET_BITS(instr->control, 0, 4); - const Destination* dst = &instr->dsts[0]; + const Destination* dst = &compiler->kernel->dstBuffer[instr->dsts[0]]; const IlcRegister* dupeReg = findRegister(compiler, dst->registerType, dst->registerNum); if (dupeReg != NULL) { // Outputs are allowed to be redeclared with different components. @@ -1128,7 +1141,7 @@ static void emitInput( instr->srcCount == 0 && instr->extraCount == 0); - const Destination* dst = &instr->dsts[0]; + const Destination* dst = &compiler->kernel->dstBuffer[instr->dsts[0]]; assert(!dst->clamp && dst->shiftScale == IL_SHIFT_NONE); @@ -1266,10 +1279,10 @@ static void emitResource( uint8_t id = GET_BITS(instr->control, 0, 7); uint8_t type = GET_BITS(instr->control, 8, 11); bool unnorm = GET_BIT(instr->control, 31); - uint8_t fmtx = GET_BITS(instr->extras[0], 20, 22); - uint8_t fmty = GET_BITS(instr->extras[0], 23, 25); - uint8_t fmtz = GET_BITS(instr->extras[0], 26, 28); - uint8_t fmtw = GET_BITS(instr->extras[0], 29, 31); + uint8_t fmtx = GET_BITS(compiler->kernel->extrasBuffer[instr->extrasStartIndex], 20, 22); + uint8_t fmty = GET_BITS(compiler->kernel->extrasBuffer[instr->extrasStartIndex], 23, 25); + uint8_t fmtz = GET_BITS(compiler->kernel->extrasBuffer[instr->extrasStartIndex], 26, 28); + uint8_t fmtw = GET_BITS(compiler->kernel->extrasBuffer[instr->extrasStartIndex], 29, 31); SpvDim spvDim = getSpvDimension(compiler, type, true); @@ -1343,8 +1356,8 @@ static void emitTypedUav( // IL_OP_DCL_TYPED_UAV allows 14-bit IDs assert(instr->extraCount == 1); id = GET_BITS(instr->control, 0, 13); - fmtx = GET_BITS(instr->extras[0], 0, 3); - type = GET_BITS(instr->extras[0], 4, 7); + fmtx = GET_BITS(compiler->kernel->extrasBuffer[instr->extrasStartIndex], 0, 3); + type = GET_BITS(compiler->kernel->extrasBuffer[instr->extrasStartIndex], 4, 7); } else { assert(false); } @@ -1422,7 +1435,7 @@ static void emitUav( IlcSpvId strideId = 0; if (isStructured) { - strideId = ilcSpvPutConstant(compiler->module, compiler->intId, instr->extras[0]); + strideId = ilcSpvPutConstant(compiler->module, compiler->intId, compiler->kernel->extrasBuffer[instr->extrasStartIndex]); } else { // TODO get stride from descriptor } @@ -1468,7 +1481,7 @@ static void emitSrv( IlcSpvId strideId = 0; if (isStructured) { - strideId = ilcSpvPutConstant(compiler->module, compiler->intId, instr->extras[0]); + strideId = ilcSpvPutConstant(compiler->module, compiler->intId, compiler->kernel->extrasBuffer[instr->extrasStartIndex]); } else { if (compiler->kernel->shaderType != IL_SHADER_VERTEX) { LOGE("unhandled raw SRVs for shader type %u\n", compiler->kernel->shaderType); @@ -1542,8 +1555,8 @@ static void emitLds( { bool isStructured = instr->opcode == IL_DCL_STRUCT_LDS; uint16_t id = GET_BITS(instr->control, 0, 13); - unsigned stride = isStructured ? instr->extras[0] : 1; - unsigned length = isStructured ? instr->extras[1] : instr->extras[0]; + unsigned stride = isStructured ? compiler->kernel->extrasBuffer[instr->extrasStartIndex + 0] : 1; + unsigned length = isStructured ? compiler->kernel->extrasBuffer[instr->extrasStartIndex + 1] : compiler->kernel->extrasBuffer[instr->extrasStartIndex + 0]; IlcSpvId lengthId = ilcSpvPutConstant(compiler->module, compiler->uintId, stride * length / 4); IlcSpvId arrayId = ilcSpvPutArrayType(compiler->module, compiler->uintId, lengthId); @@ -1627,7 +1640,7 @@ static void emitFloatOp( } for (int i = 0; i < instr->srcCount; i++) { - srcIds[i] = loadSource(compiler, &instr->srcs[i], componentMask, compiler->float4Id); + srcIds[i] = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[i]], componentMask, compiler->float4Id); } switch (instr->opcode) { @@ -1838,7 +1851,7 @@ static void emitFloatOp( break; } - storeDestination(compiler, &instr->dsts[0], resId, compiler->float4Id); + storeDestination(compiler, &compiler->kernel->dstBuffer[instr->dsts[0]], resId, compiler->float4Id); } static void emitFloatComparisonOp( @@ -1849,7 +1862,7 @@ static void emitFloatComparisonOp( SpvOp compOp = 0; for (int i = 0; i < instr->srcCount; i++) { - srcIds[i] = loadSource(compiler, &instr->srcs[i], COMP_MASK_XYZW, compiler->float4Id); + srcIds[i] = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[i]], COMP_MASK_XYZW, compiler->float4Id); } switch (instr->opcode) { @@ -1879,7 +1892,7 @@ static void emitFloatComparisonOp( IlcSpvId resId = ilcSpvPutSelect(compiler->module, compiler->float4Id, condId, true4Id, false4Id); - storeDestination(compiler, &instr->dsts[0], resId, compiler->float4Id); + storeDestination(compiler, &compiler->kernel->dstBuffer[instr->dsts[0]], resId, compiler->float4Id); } static void emitIntegerOp( @@ -1898,7 +1911,7 @@ static void emitIntegerOp( } for (int i = 0; i < instr->srcCount; i++) { - srcIds[i] = loadSource(compiler, &instr->srcs[i], COMP_MASK_XYZW, typeId); + srcIds[i] = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[i]], COMP_MASK_XYZW, typeId); } switch (instr->opcode) { @@ -2013,7 +2026,7 @@ static void emitIntegerOp( break; } - storeDestination(compiler, &instr->dsts[0], resId, typeId); + storeDestination(compiler, &compiler->kernel->dstBuffer[instr->dsts[0]], resId, typeId); } static void emitIntegerComparisonOp( @@ -2024,7 +2037,7 @@ static void emitIntegerComparisonOp( SpvOp compOp = 0; for (int i = 0; i < instr->srcCount; i++) { - srcIds[i] = loadSource(compiler, &instr->srcs[i], COMP_MASK_XYZW, compiler->int4Id); + srcIds[i] = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[i]], COMP_MASK_XYZW, compiler->int4Id); } switch (instr->opcode) { @@ -2064,7 +2077,7 @@ static void emitIntegerComparisonOp( IlcSpvId resId = ilcSpvPutSelect(compiler->module, compiler->float4Id, condId, trueCompositeId, falseCompositeId); - storeDestination(compiler, &instr->dsts[0], resId, compiler->float4Id); + storeDestination(compiler, &compiler->kernel->dstBuffer[instr->dsts[0]], resId, compiler->float4Id); } static void emitCmovLogical( @@ -2074,7 +2087,7 @@ static void emitCmovLogical( IlcSpvId srcIds[MAX_SRC_COUNT] = { 0 }; for (int i = 0; i < instr->srcCount; i++) { - srcIds[i] = loadSource(compiler, &instr->srcs[i], COMP_MASK_XYZW, compiler->float4Id); + srcIds[i] = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[i]], COMP_MASK_XYZW, compiler->float4Id); } // For each component, select src1 if src0 has any bit set, otherwise select src2 @@ -2088,7 +2101,7 @@ static void emitCmovLogical( IlcSpvId resId = ilcSpvPutSelect(compiler->module, compiler->float4Id, condId, srcIds[1], srcIds[2]); - storeDestination(compiler, &instr->dsts[0], resId, compiler->float4Id); + storeDestination(compiler, &compiler->kernel->dstBuffer[instr->dsts[0]], resId, compiler->float4Id); } static void emitNumThreadPerGroup( @@ -2096,9 +2109,9 @@ static void emitNumThreadPerGroup( const Instruction* instr) { IlcSpvWord sizes[] = { - instr->extraCount >= 1 ? instr->extras[0] : 1, - instr->extraCount >= 2 ? instr->extras[1] : 1, - instr->extraCount >= 3 ? instr->extras[2] : 1, + instr->extraCount >= 1 ? compiler->kernel->extrasBuffer[instr->extrasStartIndex + 0] : 1, + instr->extraCount >= 2 ? compiler->kernel->extrasBuffer[instr->extrasStartIndex + 1] : 1, + instr->extraCount >= 3 ? compiler->kernel->extrasBuffer[instr->extrasStartIndex + 2] : 1, }; ilcSpvPutExecMode(compiler->module, compiler->entryPointId, SpvExecutionModeLocalSize, 3, sizes); @@ -2125,7 +2138,7 @@ static void emitIf( .hasElseBlock = false, }; - IlcSpvId srcId = loadSource(compiler, &instr->srcs[0], COMP_MASK_XYZW, compiler->int4Id); + IlcSpvId srcId = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[0]], COMP_MASK_XYZW, compiler->int4Id); IlcSpvId labelBeginId = ilcSpvAllocId(compiler->module); IlcSpvId condId = emitConditionCheck(compiler, srcId, instr->opcode == IL_OP_IF_LOGICALNZ); ilcSpvPutSelectionMerge(compiler->module, ifElseBlock.labelEndId); @@ -2235,7 +2248,7 @@ static void emitSwitch( IlcCompiler* compiler, const Instruction* instr) { - IlcSpvId srcId = loadSource(compiler, &instr->srcs[0], COMP_MASK_XYZW, compiler->int4Id); + IlcSpvId srcId = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[0]], COMP_MASK_XYZW, compiler->int4Id); IlcSpvId xId = emitVectorTrim(compiler, srcId, compiler->int4Id, COMP_INDEX_X, 1); const IlcSwitchCaseBlock switchCaseBlock = { @@ -2276,7 +2289,7 @@ static void emitCase( block->switchCase.cases = realloc(block->switchCase.cases, block->switchCase.caseCount * sizeof(IlcCase)); block->switchCase.cases[block->switchCase.caseCount - 1] = (IlcCase) { - .literal = instr->extras[0], + .literal = compiler->kernel->extrasBuffer[instr->extrasStartIndex], .labelId = labelId, }; } @@ -2346,9 +2359,9 @@ static void emitBreak( if (instr->opcode == IL_OP_BREAK) { ilcSpvPutBranch(compiler->module, labelBreakId); } else if (instr->opcode == IL_OP_BREAKC) { - IlcSpvId srcAId = loadSource(compiler, &instr->srcs[0], COMP_MASK_XYZW, compiler->float4Id); + IlcSpvId srcAId = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[0]], COMP_MASK_XYZW, compiler->float4Id); IlcSpvId xAId = emitVectorTrim(compiler, srcAId, compiler->float4Id, COMP_INDEX_X, 1); - IlcSpvId srcBId = loadSource(compiler, &instr->srcs[1], COMP_MASK_XYZW, compiler->float4Id); + IlcSpvId srcBId = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[1]], COMP_MASK_XYZW, compiler->float4Id); IlcSpvId xBId = emitVectorTrim(compiler, srcBId, compiler->float4Id, COMP_INDEX_X, 1); uint8_t relop = GET_BITS(instr->control, 0, 2); @@ -2381,7 +2394,7 @@ static void emitBreak( ilcSpvPutBranchConditional(compiler->module, condId, labelBreakId, labelId); } else if (instr->opcode == IL_OP_BREAK_LOGICALZ || instr->opcode == IL_OP_BREAK_LOGICALNZ) { - IlcSpvId srcId = loadSource(compiler, &instr->srcs[0], COMP_MASK_XYZW, compiler->int4Id); + IlcSpvId srcId = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[0]], COMP_MASK_XYZW, compiler->int4Id); IlcSpvId condId = emitConditionCheck(compiler, srcId, instr->opcode == IL_OP_BREAK_LOGICALNZ); ilcSpvPutBranchConditional(compiler->module, condId, labelBreakId, labelId); @@ -2408,7 +2421,7 @@ static void emitContinue( ilcSpvPutBranch(compiler->module, block->loop.labelContinueId); } else if (instr->opcode == IL_OP_CONTINUE_LOGICALZ || instr->opcode == IL_OP_CONTINUE_LOGICALNZ) { - IlcSpvId srcId = loadSource(compiler, &instr->srcs[0], COMP_MASK_XYZW, compiler->int4Id); + IlcSpvId srcId = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[0]], COMP_MASK_XYZW, compiler->int4Id); IlcSpvId condId = emitConditionCheck(compiler, srcId, instr->opcode == IL_OP_CONTINUE_LOGICALNZ); ilcSpvPutBranchConditional(compiler->module, condId, block->loop.labelContinueId, labelId); @@ -2426,7 +2439,7 @@ static void emitDiscard( IlcSpvId labelBeginId = ilcSpvAllocId(compiler->module); IlcSpvId labelEndId = ilcSpvAllocId(compiler->module); - IlcSpvId srcId = loadSource(compiler, &instr->srcs[0], COMP_MASK_XYZW, compiler->int4Id); + IlcSpvId srcId = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[0]], COMP_MASK_XYZW, compiler->int4Id); IlcSpvId condId = emitConditionCheck(compiler, srcId, instr->opcode == IL_OP_DISCARD_LOGICALNZ); ilcSpvPutSelectionMerge(compiler->module, labelEndId); ilcSpvPutBranchConditional(compiler->module, condId, labelBeginId, labelEndId); @@ -2483,14 +2496,14 @@ static void emitLoad( uint8_t ilResourceId = GET_BITS(instr->control, 0, 7); const IlcResource* resource = findResource(compiler, RES_TYPE_GENERIC, ilResourceId); - const Destination* dst = &instr->dsts[0]; + const Destination* dst = &compiler->kernel->dstBuffer[instr->dsts[0]]; if (resource == NULL) { LOGE("resource %d not found\n", ilResourceId); return; } - IlcSpvId srcId = loadSource(compiler, &instr->srcs[0], COMP_MASK_XYZW, compiler->int4Id); + IlcSpvId srcId = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[0]], COMP_MASK_XYZW, compiler->int4Id); SpvImageOperandsMask operandsMask = 0; unsigned operandIdCount = 0; @@ -2517,7 +2530,7 @@ static void emitResinfo( bool ilReturnType = GET_BIT(instr->control, 8); const IlcResource* resource = findResource(compiler, RES_TYPE_GENERIC, ilResourceId); - const Destination* dst = &instr->dsts[0]; + const Destination* dst = &compiler->kernel->dstBuffer[instr->dsts[0]]; if (resource == NULL) { LOGE("resource %d not found\n", ilResourceId); @@ -2529,7 +2542,7 @@ static void emitResinfo( IlcSpvId vecTypeId = dimCount == 1 ? compiler->intId : ilcSpvPutVectorType(compiler->module, compiler->intId, dimCount); IlcSpvId resourceId = ilcSpvPutLoad(compiler->module, resource->typeId, resource->id); - IlcSpvId srcId = loadSource(compiler, &instr->srcs[0], COMP_MASK_XYZW, compiler->int4Id); + IlcSpvId srcId = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[0]], COMP_MASK_XYZW, compiler->int4Id); IlcSpvId lodId = emitVectorTrim(compiler, srcId, compiler->int4Id, COMP_INDEX_X, 1); ilcSpvPutCapability(compiler->module, SpvCapabilityImageQuery); IlcSpvId sizesId = ilcSpvPutImageQuerySizeLod(compiler->module, vecTypeId, resourceId, lodId); @@ -2569,7 +2582,7 @@ static void emitSample( const IlcResource* resource = findResource(compiler, RES_TYPE_GENERIC, ilResourceId); const IlcSampler* sampler = findOrCreateSampler(compiler, ilSamplerId); - const Destination* dst = &instr->dsts[0]; + const Destination* dst = &compiler->kernel->dstBuffer[instr->dsts[0]]; if (resource == NULL) { LOGE("resource %d not found\n", ilResourceId); @@ -2578,7 +2591,7 @@ static void emitSample( unsigned dimCount = getResourceDimensionCount(resource->ilType); IlcSpvWord sampleOp = 0; - IlcSpvId coordinateId = loadSource(compiler, &instr->srcs[0], COMP_MASK_XYZW, + IlcSpvId coordinateId = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[0]], COMP_MASK_XYZW, compiler->float4Id); IlcSpvId drefId = 0; SpvImageOperandsMask operandsMask = 0; @@ -2591,18 +2604,18 @@ static void emitSample( sampleOp = SpvOpImageSampleImplicitLod; operandsMask |= SpvImageOperandsBiasMask; - IlcSpvId biasId = loadSource(compiler, &instr->srcs[1], COMP_MASK_XYZW, compiler->float4Id); + IlcSpvId biasId = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[1]], COMP_MASK_XYZW, compiler->float4Id); operandIds[0] = emitVectorTrim(compiler, biasId, compiler->float4Id, COMP_INDEX_X, 1); operandIdCount++; } else if (instr->opcode == IL_OP_SAMPLE_G) { sampleOp = SpvOpImageSampleExplicitLod; operandsMask |= SpvImageOperandsGradMask; - IlcSpvId xGradId = loadSource(compiler, &instr->srcs[1], COMP_MASK_XYZW, + IlcSpvId xGradId = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[1]], COMP_MASK_XYZW, compiler->float4Id); operandIds[0] = emitVectorTrim(compiler, xGradId, compiler->float4Id, COMP_INDEX_X, dimCount); - IlcSpvId yGradId = loadSource(compiler, &instr->srcs[2], COMP_MASK_XYZW, + IlcSpvId yGradId = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[2]], COMP_MASK_XYZW, compiler->float4Id); operandIds[1] = emitVectorTrim(compiler, yGradId, compiler->float4Id, COMP_INDEX_X, dimCount); @@ -2611,12 +2624,12 @@ static void emitSample( sampleOp = SpvOpImageSampleExplicitLod; operandsMask |= SpvImageOperandsLodMask; - IlcSpvId lodId = loadSource(compiler, &instr->srcs[1], COMP_MASK_XYZW, compiler->float4Id); + IlcSpvId lodId = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[1]], COMP_MASK_XYZW, compiler->float4Id); operandIds[0] = emitVectorTrim(compiler, lodId, compiler->float4Id, COMP_INDEX_X, 1); operandIdCount++; } else if (instr->opcode == IL_OP_SAMPLE_C_LZ) { sampleOp = SpvOpImageSampleDrefExplicitLod; - drefId = loadSource(compiler, &instr->srcs[1], COMP_MASK_XYZW, compiler->float4Id); + drefId = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[1]], COMP_MASK_XYZW, compiler->float4Id); drefId = emitVectorTrim(compiler, drefId, compiler->float4Id, COMP_INDEX_X, 1); operandsMask |= SpvImageOperandsLodMask; @@ -2677,7 +2690,7 @@ static void emitFetch4( const IlcResource* resource = findResource(compiler, RES_TYPE_GENERIC, ilResourceId); const IlcSampler* sampler = findOrCreateSampler(compiler, ilSamplerId); - const Destination* dst = &instr->dsts[0]; + const Destination* dst = &compiler->kernel->dstBuffer[instr->dsts[0]]; if (resource == NULL) { LOGE("resource %d not found\n", ilResourceId); @@ -2686,7 +2699,7 @@ static void emitFetch4( unsigned dimCount = getResourceDimensionCount(resource->ilType); IlcSpvWord sampleOp = 0; - IlcSpvId coordinateId = loadSource(compiler, &instr->srcs[0], COMP_MASK_XYZW, + IlcSpvId coordinateId = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[0]], COMP_MASK_XYZW, compiler->float4Id); IlcSpvId argId = 0; SpvImageOperandsMask operandsMask = 0; @@ -2699,7 +2712,7 @@ static void emitFetch4( argId = ilcSpvPutConstant(compiler->module, compiler->intId, instr->primModifier); } else if (instr->opcode == IL_OP_FETCH4_C || instr->opcode == IL_OP_FETCH4_PO_C) { sampleOp = SpvOpImageDrefGather; - IlcSpvId drefId = loadSource(compiler, &instr->srcs[1], COMP_MASK_XYZW, compiler->float4Id); + IlcSpvId drefId = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[1]], COMP_MASK_XYZW, compiler->float4Id); argId = emitVectorTrim(compiler, drefId, compiler->float4Id, COMP_INDEX_X, 1); } else { assert(false); @@ -2708,7 +2721,7 @@ static void emitFetch4( if (instr->opcode == IL_OP_FETCH4_PO || instr->opcode == IL_OP_FETCH4_PO_C) { // Programmable offset unsigned srcIndex = instr->opcode == IL_OP_FETCH4_PO ? 1 : 2; - IlcSpvId offsetsId = loadSource(compiler, &instr->srcs[srcIndex], COMP_MASK_XYZW, + IlcSpvId offsetsId = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[srcIndex]], COMP_MASK_XYZW, compiler->int4Id); offsetsId = emitVectorTrim(compiler, offsetsId, compiler->int4Id, COMP_INDEX_X, dimCount); operandsMask |= SpvImageOperandsOffsetMask; @@ -2755,15 +2768,15 @@ static void emitLdsLoadVec( uint8_t ilResourceId = GET_BITS(instr->control, 0, 14); const IlcResource* resource = findResource(compiler, RES_TYPE_LDS, ilResourceId); - const Destination* dst = &instr->dsts[0]; + const Destination* dst = &compiler->kernel->dstBuffer[instr->dsts[0]]; if (resource == NULL) { LOGE("resource %d not found\n", ilResourceId); return; } - IlcSpvId index4Id = loadSource(compiler, &instr->srcs[0], COMP_MASK_XYZW, compiler->int4Id); - IlcSpvId offset4Id = loadSource(compiler, &instr->srcs[1], COMP_MASK_XYZW, compiler->int4Id); + IlcSpvId index4Id = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[0]], COMP_MASK_XYZW, compiler->int4Id); + IlcSpvId offset4Id = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[1]], COMP_MASK_XYZW, compiler->int4Id); IlcSpvId indexId = emitVectorTrim(compiler, index4Id, compiler->int4Id, COMP_INDEX_X, 1); IlcSpvId offsetId = emitVectorTrim(compiler, offset4Id, compiler->int4Id, COMP_INDEX_X, 1); @@ -2797,16 +2810,16 @@ static void emitLdsStoreVec( uint8_t ilResourceId = GET_BITS(instr->control, 0, 14); const IlcResource* resource = findResource(compiler, RES_TYPE_LDS, ilResourceId); - const Destination* dst = &instr->dsts[0]; + const Destination* dst = &compiler->kernel->dstBuffer[instr->dsts[0]]; if (resource == NULL) { LOGE("resource %d not found\n", ilResourceId); return; } - IlcSpvId index4Id = loadSource(compiler, &instr->srcs[0], COMP_MASK_XYZW, compiler->int4Id); - IlcSpvId offset4Id = loadSource(compiler, &instr->srcs[1], COMP_MASK_XYZW, compiler->int4Id); - IlcSpvId dataId = loadSource(compiler, &instr->srcs[2], COMP_MASK_XYZW, compiler->uint4Id); + IlcSpvId index4Id = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[0]], COMP_MASK_XYZW, compiler->int4Id); + IlcSpvId offset4Id = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[1]], COMP_MASK_XYZW, compiler->int4Id); + IlcSpvId dataId = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[2]], COMP_MASK_XYZW, compiler->uint4Id); IlcSpvId indexId = emitVectorTrim(compiler, index4Id, compiler->int4Id, COMP_INDEX_X, 1); IlcSpvId offsetId = emitVectorTrim(compiler, offset4Id, compiler->int4Id, COMP_INDEX_X, 1); @@ -2841,7 +2854,7 @@ static void emitUavLoad( uint8_t ilResourceId = GET_BITS(instr->control, 0, 14); const IlcResource* resource = findResource(compiler, RES_TYPE_GENERIC, ilResourceId); - const Destination* dst = &instr->dsts[0]; + const Destination* dst = &compiler->kernel->dstBuffer[instr->dsts[0]]; if (resource == NULL) { LOGE("resource %d not found\n", ilResourceId); @@ -2851,7 +2864,7 @@ static void emitUavLoad( // Vulkan spec: "The Result Type operand of OpImageRead must be a vector of four components." IlcSpvId texel4TypeId = ilcSpvPutVectorType(compiler->module, resource->texelTypeId, 4); IlcSpvId resourceId = ilcSpvPutLoad(compiler->module, resource->typeId, resource->id); - IlcSpvId addressId = loadSource(compiler, &instr->srcs[0], COMP_MASK_XYZW, compiler->int4Id); + IlcSpvId addressId = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[0]], COMP_MASK_XYZW, compiler->int4Id); IlcSpvId readId = ilcSpvPutImageRead(compiler->module, texel4TypeId, resourceId, addressId); storeDestination(compiler, dst, readId, texel4TypeId); } @@ -2863,14 +2876,14 @@ static void emitUavStructLoad( uint16_t ilResourceId = GET_BITS(instr->control, 0, 14); const IlcResource* resource = findResource(compiler, RES_TYPE_GENERIC, ilResourceId); - const Destination* dst = &instr->dsts[0]; + const Destination* dst = &compiler->kernel->dstBuffer[instr->dsts[0]]; if (resource == NULL) { LOGE("resource %d not found\n", ilResourceId); return; } - IlcSpvId srcId = loadSource(compiler, &instr->srcs[0], COMP_MASK_XYZW, compiler->int4Id); + IlcSpvId srcId = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[0]], COMP_MASK_XYZW, compiler->int4Id); IlcSpvId indexId = emitVectorTrim(compiler, srcId, compiler->int4Id, COMP_INDEX_X, 1); IlcSpvId offsetId = emitVectorTrim(compiler, srcId, compiler->int4Id, COMP_INDEX_Y, 1); IlcSpvId wordAddrId = emitWordAddress(compiler, indexId, resource->strideId, offsetId); @@ -2922,9 +2935,9 @@ static void emitUavStore( } IlcSpvId resourceId = ilcSpvPutLoad(compiler->module, resource->typeId, resource->id); - IlcSpvId addressId = loadSource(compiler, &instr->srcs[0], COMP_MASK_XYZW, compiler->int4Id); + IlcSpvId addressId = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[0]], COMP_MASK_XYZW, compiler->int4Id); IlcSpvId elementTypeId = ilcSpvPutVectorType(compiler->module, resource->texelTypeId, 4); - IlcSpvId elementId = loadSource(compiler, &instr->srcs[1], COMP_MASK_XYZW, elementTypeId); + IlcSpvId elementId = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[1]], COMP_MASK_XYZW, elementTypeId); ilcSpvPutImageWrite(compiler->module, resourceId, addressId, elementId); } @@ -2937,15 +2950,15 @@ static void emitUavRawStructStore( uint16_t ilResourceId = GET_BITS(instr->control, 0, 14); const IlcResource* resource = findResource(compiler, RES_TYPE_GENERIC, ilResourceId); - const Destination* dst = &instr->dsts[0]; + const Destination* dst = &compiler->kernel->dstBuffer[instr->dsts[0]]; if (resource == NULL) { LOGE("resource %d not found\n", ilResourceId); return; } - IlcSpvId srcId = loadSource(compiler, &instr->srcs[0], COMP_MASK_XYZW, compiler->int4Id); - IlcSpvId dataId = loadSource(compiler, &instr->srcs[1], COMP_MASK_XYZW, compiler->float4Id); + IlcSpvId srcId = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[0]], COMP_MASK_XYZW, compiler->int4Id); + IlcSpvId dataId = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[1]], COMP_MASK_XYZW, compiler->float4Id); IlcSpvId wordAddrId = 0; if (isRaw) { IlcSpvId addrId = emitVectorTrim(compiler, srcId, compiler->int4Id, COMP_INDEX_X, 1); @@ -2993,8 +3006,8 @@ static void emitLdsAtomicOp( return; } - IlcSpvId src0Id = loadSource(compiler, &instr->srcs[0], COMP_MASK_XYZW, compiler->int4Id); - IlcSpvId src1Id = loadSource(compiler, &instr->srcs[1], COMP_MASK_XYZW, compiler->uint4Id); + IlcSpvId src0Id = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[0]], COMP_MASK_XYZW, compiler->int4Id); + IlcSpvId src1Id = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[1]], COMP_MASK_XYZW, compiler->uint4Id); IlcSpvId readId = 0; IlcSpvId pointerTypeId = ilcSpvPutPointerType(compiler->module, SpvStorageClassWorkgroup, resource->texelTypeId); @@ -3018,7 +3031,7 @@ static void emitLdsAtomicOp( if (instr->dstCount > 0) { IlcSpvId resId = emitVectorGrow(compiler, readId, resource->texelTypeId, 1); - storeDestination(compiler, &instr->dsts[0], resId, compiler->int4Id); + storeDestination(compiler, &compiler->kernel->dstBuffer[instr->dsts[0]], resId, compiler->int4Id); } } @@ -3038,7 +3051,7 @@ static void emitUavAtomicOp( IlcSpvId vecTypeId = ilcSpvPutVectorType(compiler->module, resource->texelTypeId, 4); IlcSpvId pointerTypeId = ilcSpvPutPointerType(compiler->module, SpvStorageClassImage, resource->texelTypeId); - IlcSpvId addressId = loadSource(compiler, &instr->srcs[0], COMP_MASK_XYZW, compiler->int4Id); + IlcSpvId addressId = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[0]], COMP_MASK_XYZW, compiler->int4Id); IlcSpvId trimAddressId = emitVectorTrim(compiler, addressId, compiler->int4Id, COMP_INDEX_X, getResourceDimensionCount(resource->ilType)); IlcSpvId zeroId = ilcSpvPutConstant(compiler->module, compiler->intId, ZERO_LITERAL); @@ -3050,7 +3063,7 @@ static void emitUavAtomicOp( IlcSpvId semanticsId = ilcSpvPutConstant(compiler->module, compiler->intId, SpvMemorySemanticsAcquireReleaseMask | SpvMemorySemanticsImageMemoryMask); - IlcSpvId src1Id = loadSource(compiler, &instr->srcs[1], COMP_MASK_XYZW, vecTypeId); + IlcSpvId src1Id = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[1]], COMP_MASK_XYZW, vecTypeId); IlcSpvId valueId = emitVectorTrim(compiler, src1Id, vecTypeId, COMP_INDEX_X, 1); if (instr->opcode == IL_OP_UAV_ADD || instr->opcode == IL_OP_UAV_READ_ADD) { @@ -3062,7 +3075,7 @@ static void emitUavAtomicOp( if (instr->dstCount > 0) { IlcSpvId resId = emitVectorGrow(compiler, readId, resource->texelTypeId, 1); - storeDestination(compiler, &instr->dsts[0], resId, vecTypeId); + storeDestination(compiler, &compiler->kernel->dstBuffer[instr->dsts[0]], resId, vecTypeId); } } @@ -3073,7 +3086,7 @@ static void emitAppendBufOp( uint16_t ilResourceId = GET_BITS(instr->control, 0, 14); const IlcResource* resource = findResource(compiler, RES_TYPE_ATOMIC_COUNTER, 0); - const Destination* dst = &instr->dsts[0]; + const Destination* dst = &compiler->kernel->dstBuffer[instr->dsts[0]]; if (resource == NULL) { // Lazily declare atomic counter buffer @@ -3168,14 +3181,14 @@ static void emitStructuredSrvLoad( } const IlcResource* resource = findResource(compiler, RES_TYPE_GENERIC, ilResourceId); - const Destination* dst = &instr->dsts[0]; + const Destination* dst = &compiler->kernel->dstBuffer[instr->dsts[0]]; if (resource == NULL) { LOGE("resource %d not found\n", ilResourceId); return; } - IlcSpvId srcId = loadSource(compiler, &instr->srcs[0], COMP_MASK_XYZW, compiler->int4Id); + IlcSpvId srcId = loadSource(compiler, &compiler->kernel->srcBuffer[instr->srcs[0]], COMP_MASK_XYZW, compiler->int4Id); IlcSpvId indexId = emitVectorTrim(compiler, srcId, compiler->int4Id, COMP_INDEX_X, 1); IlcSpvId offsetId = emitVectorTrim(compiler, srcId, compiler->int4Id, COMP_INDEX_Y, 1); @@ -3583,7 +3596,7 @@ static void emitInstr( // FIXME seems to be some sort of vertex ID offset (as seen in 3DMark), store 0 for now IlcSpvId zeroId = ilcSpvPutConstant(compiler->module, compiler->intId, ZERO_LITERAL); IlcSpvId zero4Id = emitVectorGrow(compiler, zeroId, compiler->intId, 1); - storeDestination(compiler, &instr->dsts[0], zero4Id, compiler->int4Id); + storeDestination(compiler, &compiler->kernel->dstBuffer[instr->dsts[0]], zero4Id, compiler->int4Id); } break; default: LOGW("unhandled instruction %d\n", instr->opcode); @@ -3681,7 +3694,7 @@ static void emitEntryPoint( unsigned interfaceCount = compiler->regCount + compiler->resourceCount + compiler->samplerCount; - IlcSpvWord* interfaces = malloc(sizeof(IlcSpvWord) * interfaceCount); + STACK_ARRAY(IlcSpvWord, interfaces, 128, interfaceCount); unsigned interfaceIndex = 0; for (int i = 0; i < compiler->regCount; i++) { @@ -3715,7 +3728,7 @@ static void emitEntryPoint( break; } - free(interfaces); + STACK_ARRAY_FINISH(interfaces); } IlcShader ilcCompileKernel( @@ -3752,6 +3765,7 @@ IlcShader ilcCompileKernel( .bool4Id = ilcSpvPutVectorType(&module, boolId, 4), .currentStrideIndex = 0, .regCount = 0, + .regSize = 0, .regs = NULL, .resourceCount = 0, .resources = NULL, diff --git a/src/amdilc/amdilc_decoder.c b/src/amdilc/amdilc_decoder.c index f43aeb08..cbdebd05 100644 --- a/src/amdilc/amdilc_decoder.c +++ b/src/amdilc/amdilc_decoder.c @@ -1,5 +1,7 @@ #include "amdilc_internal.h" +#define BUFFER_ALLOC_FACTOR 1.5f + typedef struct { uint16_t opcode; uint8_t dstCount; @@ -164,8 +166,73 @@ static bool hasIndexedResourceSampler( instr->opcode == IL_OP_FETCH4_PO_C; } +static void reserveSources( + Kernel* kernel, + unsigned srcCount, + unsigned* srcIndices) +{ + unsigned size = (kernel->srcCount + srcCount) * sizeof(Source); + + /* since this function can be called from decodeSource, indices will be written before possible realloc */ + for (unsigned i = 0; i < srcCount; i++) { + srcIndices[i] = kernel->srcCount++; + } + + if (kernel->srcSize < size) { + if (kernel->srcSize < sizeof(Source) * 128) { + kernel->srcSize = sizeof(Source) * 128; + } + while (kernel->srcSize < size) { + kernel->srcSize *= BUFFER_ALLOC_FACTOR; + } + kernel->srcBuffer = realloc(kernel->srcBuffer, kernel->srcSize); + } +} + +static void reserveDestinations( + Kernel* kernel, + unsigned dstCount, + unsigned* dstIndices) +{ + unsigned size = (kernel->dstCount + dstCount) * sizeof(Destination); + if (kernel->dstSize < size) { + if (kernel->dstSize < sizeof(Destination) * 128) { + kernel->dstSize = sizeof(Destination) * 128; + } + while (kernel->dstSize < size) { + kernel->dstSize *= BUFFER_ALLOC_FACTOR; + } + kernel->dstBuffer = realloc(kernel->dstBuffer, kernel->dstSize); + } + for (unsigned i = 0; i < dstCount; i++) { + dstIndices[i] = kernel->dstCount++; + } +} + +static unsigned reserveExtras( + Kernel* kernel, + unsigned extraCount) +{ + unsigned size = (kernel->extraCount + extraCount) * sizeof(Token); + unsigned currentExtrasCount = kernel->extraCount; + if (kernel->extraSize < size) { + if (kernel->extraSize < sizeof(Token) * 128) { + kernel->extraSize = sizeof(Token) * 128; + } + while (kernel->extraSize < size) { + kernel->extraSize *= BUFFER_ALLOC_FACTOR; + } + kernel->extrasBuffer = realloc(kernel->extrasBuffer, kernel->extraSize); + } + + kernel->extraCount += extraCount; + + return currentExtrasCount; +} + static unsigned decodeSource( - Source* src, + Kernel* kernel, + unsigned srcIndex, const Token* token); static unsigned getSourceCount( @@ -228,6 +295,7 @@ static unsigned decodeIlVersion( } static unsigned decodeDestination( + Kernel* kernel, Destination* dst, const Token* token) { @@ -267,8 +335,9 @@ static unsigned decodeDestination( if (relativeAddress == IL_ADDR_ABSOLUTE) { if (dimension) { - dst->absoluteSrc = malloc(sizeof(Source)); - idx += decodeSource(dst->absoluteSrc, &token[idx]); + dst->hasAbsoluteSrc = true; + reserveSources(kernel, 1, &dst->absoluteSrc); + idx += decodeSource(kernel, dst->absoluteSrc, &token[idx]); } } else if (relativeAddress == IL_ADDR_RELATIVE) { // TODO @@ -276,15 +345,21 @@ static unsigned decodeDestination( assert(!dimension); } else if (relativeAddress == IL_ADDR_REG_RELATIVE) { dst->relativeSrcCount = dimension ? 2 : 1; - dst->relativeSrcs = malloc(dst->relativeSrcCount * sizeof(Source)); - for (unsigned i = 0; i < dst->relativeSrcCount; i++) { - idx += decodeSource(&dst->relativeSrcs[i], &token[idx]); + reserveSources(kernel, dst->relativeSrcCount, dst->relativeSrcs); + idx += decodeSource(kernel, dst->relativeSrcs[0], &token[idx]); + // the immediate follows after the first addr reg + if (dst->hasImmediate) { + dst->immediate = token[idx]; + idx++; + } + if (dst->relativeSrcCount > 1) { + idx += decodeSource(kernel, dst->relativeSrcs[1], &token[idx]); } } else { assert(false); } - if (dst->hasImmediate) { + if (dst->hasImmediate && relativeAddress != IL_ADDR_REG_RELATIVE) { dst->immediate = token[idx]; idx++; } @@ -297,8 +372,10 @@ static unsigned decodeDestination( return idx; } +/* this function can cause srcBuffer reallocation, so index is being used as well as a macro to resolve the pointer */ static unsigned decodeSource( - Source* src, + Kernel* kernel, + unsigned srcIndex, const Token* token) { unsigned idx = 0; @@ -307,6 +384,7 @@ static unsigned decodeSource( bool dimension; bool extended; +#define src (&kernel->srcBuffer[srcIndex]) memset(src, 0, sizeof(*src)); src->registerNum = GET_BITS(token[idx], 0, 15); @@ -345,8 +423,8 @@ static unsigned decodeSource( if (relativeAddress == IL_ADDR_ABSOLUTE) { if (dimension) { src->srcCount = 1; - src->srcs = malloc(sizeof(Source)); - idx += decodeSource(&src->srcs[0], &token[idx]); + reserveSources(kernel, src->srcCount, src->srcs); + idx += decodeSource(kernel, src->srcs[0], &token[idx]); } } else if (relativeAddress == IL_ADDR_RELATIVE) { // TODO @@ -354,15 +432,21 @@ static unsigned decodeSource( assert(!dimension); } else if (relativeAddress == IL_ADDR_REG_RELATIVE) { src->srcCount = dimension ? 2 : 1; - src->srcs = malloc(src->srcCount * sizeof(Source)); - for (unsigned i = 0; i < src->srcCount; i++) { - idx += decodeSource(&src->srcs[i], &token[idx]); + reserveSources(kernel, src->srcCount, src->srcs); + idx += decodeSource(kernel, src->srcs[0], &token[idx]); + // the immediate follows after the first addr reg + if (src->hasImmediate) { + src->immediate = token[idx]; + idx++; + } + if (src->srcCount > 1) { + idx += decodeSource(kernel, src->srcs[1], &token[idx]); } } else { assert(false); } - if (src->hasImmediate) { + if (src->hasImmediate && relativeAddress != IL_ADDR_REG_RELATIVE) { src->immediate = token[idx]; idx++; } @@ -371,11 +455,12 @@ static unsigned decodeSource( // TODO LOGW("unhandled extended register addressing\n"); } - +#undef src return idx; } static unsigned decodeInstruction( + Kernel* kernel, Instruction* instr, const Token* token, uint16_t prefixControl) @@ -390,7 +475,7 @@ static unsigned decodeInstruction( if (instr->opcode == IL_OP_PREFIX) { // Pass prefix info to the next instruction - return idx + decodeInstruction(instr, &token[idx], instr->control); + return idx + decodeInstruction(kernel, instr, &token[idx], instr->control); } if (instr->opcode >= IL_OP_LAST) { @@ -430,20 +515,21 @@ static unsigned decodeInstruction( } instr->dstCount = info->dstCount; - instr->dsts = malloc(sizeof(Destination) * instr->dstCount); + reserveDestinations(kernel, instr->dstCount, instr->dsts); for (int i = 0; i < instr->dstCount; i++) { - idx += decodeDestination(&instr->dsts[i], &token[idx]); + idx += decodeDestination(kernel, &kernel->dstBuffer[instr->dsts[i]], &token[idx]); } instr->srcCount = getSourceCount(instr); - instr->srcs = malloc(sizeof(Source) * instr->srcCount); + reserveSources(kernel, instr->srcCount, instr->srcs); for (int i = 0; i < instr->srcCount; i++) { - idx += decodeSource(&instr->srcs[i], &token[idx]); + idx += decodeSource(kernel, instr->srcs[i], &token[idx]); } instr->extraCount = getExtraCount(instr); - instr->extras = malloc(sizeof(Token) * instr->extraCount); - memcpy(instr->extras, &token[idx], sizeof(Token) * instr->extraCount); + instr->extrasStartIndex = reserveExtras(kernel, instr->extraCount); + + memcpy(&kernel->extrasBuffer[instr->extrasStartIndex], &token[idx], sizeof(Token) * instr->extraCount); idx += instr->extraCount; instr->preciseMask = GET_BITS(prefixControl, 0, 3); @@ -451,23 +537,32 @@ static unsigned decodeInstruction( return idx; } -Kernel* ilcDecodeStream( +void ilcDecodeStream( + Kernel* kernel, const Token* tokens, unsigned count) { - Kernel* kernel = malloc(sizeof(Kernel)); unsigned idx = 0; idx += decodeIlLang(kernel, &tokens[idx]); idx += decodeIlVersion(kernel, &tokens[idx]); + kernel->instrSize = 0; kernel->instrCount = 0; kernel->instrs = NULL; + while (idx < count) { kernel->instrCount++; - kernel->instrs = realloc(kernel->instrs, sizeof(Instruction) * kernel->instrCount); - idx += decodeInstruction(&kernel->instrs[kernel->instrCount - 1], &tokens[idx], 0); + unsigned size = kernel->instrCount * sizeof(Instruction); + if (kernel->instrSize < size) { + if (kernel->instrSize == 0) { + kernel->instrSize = sizeof(Instruction) * MAX(count / 64, 1); + } + while (kernel->instrSize < size) { + kernel->instrSize *= BUFFER_ALLOC_FACTOR; + } + kernel->instrs = realloc(kernel->instrs, kernel->instrSize); + } + idx += decodeInstruction(kernel, &kernel->instrs[kernel->instrCount - 1], &tokens[idx], 0); } - - return kernel; } diff --git a/src/amdilc/amdilc_dump.c b/src/amdilc/amdilc_dump.c index 06bd42c9..48e93fd0 100644 --- a/src/amdilc/amdilc_dump.c +++ b/src/amdilc/amdilc_dump.c @@ -278,6 +278,7 @@ static const char* mIlRelopNames[6] = { static void dumpSource( FILE* file, + const Kernel* kernel, const Source* src); static const char* getComponentName( @@ -309,6 +310,7 @@ static void dumpGlobalFlags( static void dumpDestination( FILE* file, + const Kernel* kernel, const Destination* dst) { fprintf(file, "%s%s %s", @@ -324,7 +326,7 @@ static void dumpDestination( if (dst->registerType == IL_REGTYPE_ITEMP || dst->registerType == IL_REGTYPE_OUTPUT) { - assert(dst->absoluteSrc == NULL); + assert(!dst->hasAbsoluteSrc); assert(dst->relativeSrcCount <= 1); bool indexed = dst->hasImmediate || dst->relativeSrcCount > 0; @@ -333,7 +335,7 @@ static void dumpDestination( fprintf(file, "["); } if (dst->relativeSrcCount > 0) { - dumpSource(file, &dst->relativeSrcs[0]); + dumpSource(file, kernel, &kernel->srcBuffer[dst->relativeSrcs[0]]); if (dst->hasImmediate) { fprintf(file, "+"); @@ -347,12 +349,12 @@ static void dumpDestination( } } else if (dst->registerType == IL_REGTYPE_INPUTCP) { // Attribute number - fprintf(file, "[%u]", dst->absoluteSrc->registerNum); + fprintf(file, "[%u]", kernel->srcBuffer[dst->absoluteSrc].registerNum); } else { if (dst->hasImmediate) { LOGW("unhandled immediate value\n"); } - if (dst->absoluteSrc != NULL) { + if (dst->hasAbsoluteSrc) { LOGW("unhandled absolute source\n"); } if (dst->relativeSrcCount > 0) { @@ -374,6 +376,7 @@ static void dumpDestination( static void dumpSource( FILE* file, + const Kernel* kernel, const Source* src) { fprintf(file, "%s", mIlRegTypeNames[src->registerType]); @@ -405,7 +408,7 @@ static void dumpSource( fprintf(file, "["); } if (srcCount > 0) { - dumpSource(file, &src->srcs[0]); + dumpSource(file, kernel, &kernel->srcBuffer[src->srcs[0]]); if (src->hasImmediate) { fprintf(file, "+"); @@ -428,7 +431,7 @@ static void dumpSource( if (src->registerType == IL_REGTYPE_INPUTCP) { // Last source is reserved for the attribute number - fprintf(file, "[%u]", src->srcs[srcCount].registerNum); + fprintf(file, "[%u]", kernel->srcBuffer[src->srcs[srcCount]].registerNum); } if (src->swizzle[0] != IL_COMPSEL_X_R || @@ -470,6 +473,7 @@ static void dumpSource( static void dumpInstruction( FILE* file, + const Kernel* kernel, const Instruction* instr, int* indentLevel) { @@ -572,7 +576,7 @@ static void dumpInstruction( fprintf(file, "break_logicalnz"); break; case IL_OP_CASE: - fprintf(file, "case %d", instr->extras[0]); + fprintf(file, "case %d", kernel->extrasBuffer[instr->extrasStartIndex]); (*indentLevel)++; break; case IL_OP_CONTINUE_LOGICALZ: @@ -635,10 +639,10 @@ static void dumpInstruction( GET_BITS(instr->control, 0, 7), mIlPixTexUsageNames[GET_BITS(instr->control, 8, 11)], GET_BIT(instr->control, 31) ? ",unnorm" : "", - mIlElementFormatNames[GET_BITS(instr->extras[0], 20, 22)], - mIlElementFormatNames[GET_BITS(instr->extras[0], 23, 25)], - mIlElementFormatNames[GET_BITS(instr->extras[0], 26, 28)], - mIlElementFormatNames[GET_BITS(instr->extras[0], 29, 31)]); + mIlElementFormatNames[GET_BITS(kernel->extrasBuffer[instr->extrasStartIndex], 20, 22)], + mIlElementFormatNames[GET_BITS(kernel->extrasBuffer[instr->extrasStartIndex], 23, 25)], + mIlElementFormatNames[GET_BITS(kernel->extrasBuffer[instr->extrasStartIndex], 26, 28)], + mIlElementFormatNames[GET_BITS(kernel->extrasBuffer[instr->extrasStartIndex], 29, 31)]); break; case IL_OP_DISCARD_LOGICALNZ: fprintf(file, "discard_logicalnz"); @@ -832,7 +836,7 @@ static void dumpInstruction( case IL_OP_DCL_NUM_THREAD_PER_GROUP: fprintf(file, "dcl_num_thread_per_group"); for (int i = 0; i < instr->extraCount; i++) { - fprintf(file, "%s %u", i != 0 ? "," : "", instr->extras[i]); + fprintf(file, "%s %u", i != 0 ? "," : "", kernel->extrasBuffer[instr->extrasStartIndex + i]); } break; case IL_OP_FENCE: @@ -889,7 +893,7 @@ static void dumpInstruction( break; case IL_OP_DCL_STRUCT_SRV: fprintf(file, "dcl_struct_srv_id(%u) %u", - GET_BITS(instr->control, 0, 13), instr->extras[0]); + GET_BITS(instr->control, 0, 13), kernel->extrasBuffer[instr->extrasStartIndex]); break; case IL_OP_SRV_STRUCT_LOAD: if (GET_BIT(instr->control, 12)) { @@ -907,11 +911,11 @@ static void dumpInstruction( break; case IL_DCL_LDS: fprintf(file, "dcl_lds_id(%u) %u", - GET_BITS(instr->control, 0, 13), instr->extras[0]); + GET_BITS(instr->control, 0, 13), kernel->extrasBuffer[instr->extrasStartIndex]); break; case IL_DCL_STRUCT_LDS: fprintf(file, "dcl_struct_lds_id(%u) %u, %u", - GET_BITS(instr->control, 0, 13), instr->extras[0], instr->extras[1]); + GET_BITS(instr->control, 0, 13), kernel->extrasBuffer[instr->extrasStartIndex + 0], kernel->extrasBuffer[instr->extrasStartIndex + 1]); break; case IL_OP_LDS_READ_ADD: fprintf(file, "lds_read_add_resource(%u)", GET_BITS(instr->control, 0, 13)); @@ -923,10 +927,10 @@ static void dumpInstruction( fprintf(file, "ubit_extract"); break; case IL_DCL_NUM_ICP: - fprintf(file, "dcl_num_icp%u", instr->extras[0]); + fprintf(file, "dcl_num_icp%u", kernel->extrasBuffer[instr->extrasStartIndex]); break; case IL_DCL_NUM_OCP: - fprintf(file, "dcl_num_ocp%u", instr->extras[0]); + fprintf(file, "dcl_num_ocp%u", kernel->extrasBuffer[instr->extrasStartIndex]); break; case IL_OP_HS_FORK_PHASE: fprintf(file, "hs_fork_phase %u", instr->control); @@ -947,7 +951,7 @@ static void dumpInstruction( fprintf(file, "dcl_ts_output_primitive_%s", mIlTsOutputPrimitiveNames[instr->control]); break; case IL_DCL_MAX_TESSFACTOR: - fprintf(file, "dcl_max_tessfactor %g", *((float*)&instr->extras[0])); + fprintf(file, "dcl_max_tessfactor %g", *((float*)&kernel->extrasBuffer[instr->extrasStartIndex])); break; case IL_OP_I_FIRSTBIT: fprintf(file, "ffb(%s)", mIlFfbOptionNames[instr->control]); @@ -990,21 +994,21 @@ static void dumpInstruction( break; case IL_OP_DCL_TYPED_UAV: // FIXME guessed from IL_OP_DCL_UAV - if (GET_BITS(instr->extras[0], 8, 31)) { - LOGW("unhandled dcl_typed_uav bits 0x%X\n", instr->extras[0]); + if (GET_BITS(kernel->extrasBuffer[instr->extrasStartIndex], 8, 31)) { + LOGW("unhandled dcl_typed_uav bits 0x%X\n", kernel->extrasBuffer[instr->extrasStartIndex]); } fprintf(file, "dcl_typed_uav_id(%u)_type(%s)_fmtx(%s)", GET_BITS(instr->control, 0, 13), - mIlPixTexUsageNames[GET_BITS(instr->extras[0], 4, 7)], - mIlElementFormatNames[GET_BITS(instr->extras[0], 0, 3)]); + mIlPixTexUsageNames[GET_BITS(kernel->extrasBuffer[instr->extrasStartIndex], 4, 7)], + mIlElementFormatNames[GET_BITS(kernel->extrasBuffer[instr->extrasStartIndex], 0, 3)]); break; case IL_OP_DCL_TYPELESS_UAV: // FIXME guessed - if (GET_BITS(instr->extras[0], 8, 31) || instr->extras[1]) { - LOGW("unhandled dcl_typed_uav bits 0x%X 0x%X\n", instr->extras[0], instr->extras[1]); + if (GET_BITS(kernel->extrasBuffer[instr->extrasStartIndex], 8, 31) || kernel->extrasBuffer[instr->extrasStartIndex + 1]) { + LOGW("unhandled dcl_typed_uav bits 0x%X 0x%X\n", kernel->extrasBuffer[instr->extrasStartIndex], kernel->extrasBuffer[instr->extrasStartIndex + 1]); } fprintf(file, "dcl_typeless_uav_id(%u)_stride(%u)_length(?)_access(?)", - GET_BITS(instr->control, 0, 13), instr->extras[0]); + GET_BITS(instr->control, 0, 13), kernel->extrasBuffer[instr->extrasStartIndex]); break; case IL_UNK_660: fprintf(file, "unk_%u", instr->opcode); @@ -1035,7 +1039,7 @@ static void dumpInstruction( assert(instr->dstCount <= 1); for (int i = 0; i < instr->dstCount; i++) { - dumpDestination(file, &instr->dsts[i]); + dumpDestination(file, kernel, &kernel->dstBuffer[instr->dsts[i]]); } for (int i = 0; i < instr->srcCount; i++) { @@ -1044,12 +1048,12 @@ static void dumpInstruction( } fprintf(file, " "); - dumpSource(file, &instr->srcs[i]); + dumpSource(file, kernel, &kernel->srcBuffer[instr->srcs[i]]); } if (instr->opcode == IL_DCL_LITERAL) { for (int i = 0; i < instr->extraCount; i++) { - fprintf(file, ", 0x%08X", instr->extras[i]); + fprintf(file, ", 0x%08X", kernel->extrasBuffer[instr->extrasStartIndex + i]); } } else if (instr->opcode == IL_DCL_GLOBAL_FLAGS) { dumpGlobalFlags(file, instr->control); @@ -1071,6 +1075,6 @@ void ilcDumpKernel( kernel->multipass ? "_mp" : "", kernel->realtime ? "_rt" : ""); for (int i = 0; i < kernel->instrCount; i++) { - dumpInstruction(file, &kernel->instrs[i], &indentLevel); + dumpInstruction(file, kernel, &kernel->instrs[i], &indentLevel); } } diff --git a/src/amdilc/amdilc_internal.h b/src/amdilc/amdilc_internal.h index a3cd11be..c527817f 100644 --- a/src/amdilc/amdilc_internal.h +++ b/src/amdilc/amdilc_internal.h @@ -19,6 +19,19 @@ #define MAX(a, b) \ ((a) > (b) ? (a) : (b)) +#define COUNT_OF(array) \ + (sizeof(array) / sizeof((array)[0])) + +#define STACK_ARRAY(type, name, stackCount, count) \ + type _stack_##name[stackCount]; \ + type* name = (count) <= (stackCount) ? _stack_##name : malloc((count) * sizeof(type)) + +#define STACK_ARRAY_FINISH(name) \ + if (name != _stack_##name) free(name) + +#define MAX_SRC_COUNT (8) +#define MAX_DST_COUNT (1) + typedef uint32_t Token; typedef struct _Source Source; @@ -28,9 +41,10 @@ typedef struct { uint8_t component[4]; bool clamp; uint8_t shiftScale; - Source* absoluteSrc; + bool hasAbsoluteSrc; + unsigned absoluteSrc; unsigned relativeSrcCount; - Source* relativeSrcs; + unsigned relativeSrcs[2]; bool hasImmediate; Token immediate; } Destination; @@ -48,7 +62,7 @@ typedef struct _Source { uint8_t divComp; bool clamp; unsigned srcCount; - Source* srcs; + unsigned srcs[2]; bool hasImmediate; Token immediate; } Source; @@ -61,11 +75,11 @@ typedef struct { Token resourceFormat; Token addressOffset; unsigned dstCount; - Destination* dsts; + unsigned dsts[MAX_DST_COUNT]; unsigned srcCount; - Source* srcs; + unsigned srcs[MAX_SRC_COUNT]; unsigned extraCount; - Token* extras; + unsigned extrasStartIndex; uint8_t preciseMask; } Instruction; @@ -76,13 +90,24 @@ typedef struct { uint8_t shaderType; bool multipass; bool realtime; + unsigned dstCount; + unsigned dstSize; + Destination* dstBuffer; + unsigned srcCount; + unsigned srcSize; + Source* srcBuffer; + unsigned extraCount; + unsigned extraSize; + Token* extrasBuffer; + unsigned instrSize; unsigned instrCount; Instruction* instrs; } Kernel; extern const char* mIlShaderTypeNames[IL_SHADER_LAST]; -Kernel* ilcDecodeStream( +void ilcDecodeStream( + Kernel* kernel, const Token* tokens, unsigned count); diff --git a/src/amdilc/amdilc_spirv.c b/src/amdilc/amdilc_spirv.c index 1592ba35..9a66657b 100644 --- a/src/amdilc/amdilc_spirv.c +++ b/src/amdilc/amdilc_spirv.c @@ -18,7 +18,10 @@ static void putBuffer( unsigned size = (buffer->wordCount + otherBuffer->wordCount) * sizeof(IlcSpvWord); if (buffer->wordSize < size) { // Grow the buffer exponentially to minimize allocations - buffer->wordSize = sizeof(IlcSpvWord); + if (buffer->wordSize < sizeof(IlcSpvWord)) { + buffer->wordSize = sizeof(IlcSpvWord) * 1024; + } + while (buffer->wordSize < size) { buffer->wordSize *= BUFFER_ALLOC_FACTOR; } @@ -245,12 +248,12 @@ void ilcSpvMoveWords( unsigned wordCount = buffer->wordCount - srcWordIndex; // Move the end of the buffer starting at src to dst - IlcSpvWord* tmp = malloc(wordCount * sizeof(IlcSpvWord)); + STACK_ARRAY(IlcSpvWord, tmp, 128, wordCount); memcpy(tmp, &buffer->words[srcWordIndex], wordCount * sizeof(IlcSpvWord)); memmove(&buffer->words[dstWordIndex + wordCount], &buffer->words[dstWordIndex], (srcWordIndex - dstWordIndex) * sizeof(IlcSpvWord)); memcpy(&buffer->words[dstWordIndex], tmp, wordCount * sizeof(IlcSpvWord)); - free(tmp); + STACK_ARRAY_FINISH(tmp); } uint32_t ilcSpvAllocId( @@ -477,13 +480,13 @@ IlcSpvId ilcSpvPutFunctionType( const IlcSpvId* argTypeIds) { unsigned argCount = 1 + argTypeIdCount; - IlcSpvWord* args = malloc(sizeof(args[0]) * argCount); + STACK_ARRAY(IlcSpvWord, args, 8, argCount); args[0] = returnTypeId; memcpy(&args[1], argTypeIds, sizeof(args[0]) * argTypeIdCount); IlcSpvId id = putType(module, SpvOpTypeFunction, argCount, args, false, false); - free(args); + STACK_ARRAY_FINISH(args); return id; }