diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp index 6b5882f44e8b..e28934601d88 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp @@ -2508,6 +2508,14 @@ static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn, SmallVector distTileSizes = getDefaultDistributedLevelTileSizes(op, DistributionHeuristicConfig{}); TileSizesListType tileSizes = {distTileSizes}; + SmallVector vecTileSizes = distTileSizes; + + // Add an extra level of tiling. + // TODO: Limit vector tile sizes for other TilingInterface ops. + if (auto linalgOp = dyn_cast(*op)) { + limitVectorTileSizes(linalgOp, vecTileSizes); + } + tileSizes.push_back(vecTileSizes); return setOpConfigAndEntryPointFnTranslation( entryPointFn, op, tileSizes, DispatchLoweringPassPipeline::CPUDefault); } diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp index a62b9c310f65..14e040fcd642 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp @@ -64,29 +64,20 @@ class LLVMCPULowerExecutableTargetPass }; } // namespace -// TODO(dcaballe): We temporarily need this utility to retrieve a valid -// lowering config. We should be able to remove this once we have a lowering -// config attribute per op. -static FailureOr -getRootLoweringConfig(FunctionOpInterface funcOp) { +static FailureOr +getTilingConfigForPipeline(FunctionOpInterface funcOp) { SmallVector computeOps = getComputeOps(funcOp); - // Check for self first. FailureOr rootOp = getRootOperation(computeOps); + if (failed(rootOp) || !rootOp.value()) { + return failure(); + } auto rootLoweringConfig = iree_compiler::getLoweringConfig( rootOp.value()); - if (rootLoweringConfig) { - return rootLoweringConfig; + if (!rootLoweringConfig) { + return failure(); } - - return failure(); -} - -static TilingConfig getTilingConfigForPipeline(FunctionOpInterface funcOp) { - auto maybeLoweringConfig = getRootLoweringConfig(funcOp); - assert(succeeded(maybeLoweringConfig) && - "Pipeline requires a lowering config"); - return TilingConfig(*maybeLoweringConfig); + return TilingConfig(rootLoweringConfig); } void LLVMCPULowerExecutableTargetPass::runOnOperation() { @@ -122,42 +113,76 @@ void LLVMCPULowerExecutableTargetPass::runOnOperation() { // No pipleline specified, nothing to do. case IREE::Codegen::DispatchLoweringPassPipeline::None: return; - case IREE::Codegen::DispatchLoweringPassPipeline::CPUDefault: - addCPUDefaultPassPipeline(pipeline); + case IREE::Codegen::DispatchLoweringPassPipeline::CPUDefault: { + auto maybeTilingConfig = getTilingConfigForPipeline(funcOp); + addCPUDefaultPassPipeline(pipeline, maybeTilingConfig); break; + } case IREE::Codegen::DispatchLoweringPassPipeline:: CPUBufferOpsTileAndVectorize: { - TilingConfig tilingConfig = getTilingConfigForPipeline(funcOp); - addCPUBufferOpsTileAndVectorizePipeline(pipeline, tilingConfig, + auto maybeTilingConfig = getTilingConfigForPipeline(funcOp); + if (failed(maybeTilingConfig)) { + funcOp.emitOpError("Tiling Config is necessary for " + "CPUBufferOpsTileAndVectorize pipeline."); + return signalPassFailure(); + } + addCPUBufferOpsTileAndVectorizePipeline(pipeline, *maybeTilingConfig, pipelineOpts); break; } case IREE::Codegen::DispatchLoweringPassPipeline::CPUDoubleTilingExpert: { - TilingConfig tilingConfig = getTilingConfigForPipeline(funcOp); - addMultiTilingExpertPassPipeline(pipeline, tilingConfig, pipelineOpts); + auto maybeTilingConfig = getTilingConfigForPipeline(funcOp); + if (failed(maybeTilingConfig)) { + funcOp.emitOpError( + "Tiling Config is necessary for CPUDoubleTilingExpert pipeline."); + return signalPassFailure(); + } + addMultiTilingExpertPassPipeline(pipeline, *maybeTilingConfig, + pipelineOpts); break; } case IREE::Codegen::DispatchLoweringPassPipeline:: CPUConvTileAndDecomposeExpert: { - TilingConfig tilingConfig = getTilingConfigForPipeline(funcOp); - addConvTileAndDecomposeExpertPassPipeline(pipeline, tilingConfig, + auto maybeTilingConfig = getTilingConfigForPipeline(funcOp); + if (failed(maybeTilingConfig)) { + funcOp.emitOpError("Tiling Config is necessary for " + "CPUConvTileAndDecomposeExpert pipeline."); + return signalPassFailure(); + } + addConvTileAndDecomposeExpertPassPipeline(pipeline, *maybeTilingConfig, pipelineOpts); break; } case IREE::Codegen::DispatchLoweringPassPipeline::Mmt4dTilingExpert: { - TilingConfig tilingConfig = getTilingConfigForPipeline(funcOp); - addMmt4dTilingExpertPassPipeline(pipeline, tilingConfig, pipelineOpts); + auto maybeTilingConfig = getTilingConfigForPipeline(funcOp); + if (failed(maybeTilingConfig)) { + funcOp.emitOpError( + "Tiling Config is necessary for Mmt4dTilingExpert pipeline."); + return signalPassFailure(); + } + addMmt4dTilingExpertPassPipeline(pipeline, *maybeTilingConfig, + pipelineOpts); break; } case IREE::Codegen::DispatchLoweringPassPipeline::CPUDataTiling: { - TilingConfig tilingConfig = getTilingConfigForPipeline(funcOp); - addCPUDataTilingPipeline(pipeline, tilingConfig, pipelineOpts); + auto maybeTilingConfig = getTilingConfigForPipeline(funcOp); + if (failed(maybeTilingConfig)) { + funcOp.emitOpError( + "Tiling Config is necessary for CPUDataTiling pipeline."); + return signalPassFailure(); + } + addCPUDataTilingPipeline(pipeline, *maybeTilingConfig, pipelineOpts); break; } case IREE::Codegen::DispatchLoweringPassPipeline:: CPULinalgExtTileAndVectorize: { - TilingConfig tilingConfig = getTilingConfigForPipeline(funcOp); - addCPULinalgExtTileAndVectorizePipeline(pipeline, tilingConfig, + auto maybeTilingConfig = getTilingConfigForPipeline(funcOp); + if (failed(maybeTilingConfig)) { + funcOp.emitOpError("Tiling Config is necessary for " + "CPULinalgExtTileAndVectorize pipeline."); + return signalPassFailure(); + } + addCPULinalgExtTileAndVectorizePipeline(pipeline, *maybeTilingConfig, pipelineOpts); break; } diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp index 9ef65e28e94f..16c47d2184cb 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp @@ -653,8 +653,14 @@ void addCPULinalgExtTileAndVectorizePipeline( } } -void addCPUDefaultPassPipeline(OpPassManager &funcPassManager) { - addTileAndDistributePasses(funcPassManager); +void addCPUDefaultPassPipeline(OpPassManager &funcPassManager, + FailureOr &tilingConfig) { + if (succeeded(tilingConfig) && + tilingConfig.value().getNumTilingLevels() > 1) { + addTileAndDistributePasses(funcPassManager); + funcPassManager.addPass(createLLVMCPUTileAndFusePass( + tilingConfig.value().getVectorCommonParallelLevel())); + } addCPUBufferizePasses(funcPassManager); } diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h index 4696bc808118..16e1fafcf805 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h @@ -96,10 +96,11 @@ void addCPULinalgExtTileAndVectorizePipeline( OpPassManager &funcPassManager, TilingConfig &tilingConfig, LLVMCPUPipelineOptions &pipelineOpt); -/// Populates the passes to lower to scalars operations for linalg based -/// code-generation. This pipeline does not vectorize, but instead just -/// converts to memrefs -void addCPUDefaultPassPipeline(OpPassManager &funcPassManager); +/// Populates the passes to lower scalars and unknown tensor op (i.e. linalg op +/// that is not specialized by any pipeline). Adds an additional level of tiling +/// and converts to memrefs. +void addCPUDefaultPassPipeline(OpPassManager &funcPassManager, + FailureOr &tilingConfig); void addConvTileAndDecomposeExpertPassPipeline( OpPassManager &funcPassManager, TilingConfig &tilingConfig, diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir index 6b1788d91202..d59b9d69c285 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir @@ -1943,3 +1943,18 @@ module { // CHECK: iree_linalg_ext.custom_op // CHECK-SAME: lowering_config = #[[CONFIG]] // CHECK-NOT: lowering_config + +// ----- + +// Test additional level of tiling in the CPUDefault pipeline. linalg.quantized_matmul doesn't have specialized pipeline +// since, it gets decomposed to matmul that has specialized pipeline. +#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> +func.func @test_tiling_cpu_default(%arg0: tensor<256x256xi8>, %arg1: tensor<256x256xi8>, %arg2: i32, %arg3: i32, %arg4: tensor<256x256xi32>) -> tensor<256x256xi32> attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { + %0 = linalg.quantized_matmul ins(%arg0, %arg1, %arg2, %arg3 : tensor<256x256xi8>, tensor<256x256xi8>, i32, i32) outs(%arg4 : tensor<256x256xi32>) -> tensor<256x256xi32> + return %0 : tensor<256x256xi32> +} +// CHECK-DAG: #[[CONFIG0:.+]] = #iree_codegen.lowering_config +// CHECK-DAG: #[[TRANSLATION_INFO]] = #iree_codegen.translation_info +// CHECK: func @test_tiling_cpu_default( +// CHECK-SAME: translation_info = #[[TRANSLATION_INFO]] +// CHECK: linalg.quantized_matmul {lowering_config = #[[CONFIG0]]} diff --git a/compiler/src/iree/compiler/DispatchCreation/FormDispatchRegions.cpp b/compiler/src/iree/compiler/DispatchCreation/FormDispatchRegions.cpp index b38b1a593001..e866022eb9a9 100644 --- a/compiler/src/iree/compiler/DispatchCreation/FormDispatchRegions.cpp +++ b/compiler/src/iree/compiler/DispatchCreation/FormDispatchRegions.cpp @@ -547,14 +547,6 @@ isFusableWithConsumer(OpOperand &fusedOperand, return false; } - // TODO: Enable grouped convolution and depth wise pooling fusion. - // Rightnow, this is going through the default CPU pipeline and not through - // CONVTilingExpert. - if (isa(producer)) { - return false; - } - auto producerFusionOp = dyn_cast(producer); auto consumerFusionOp =