Skip to content

Commit

Permalink
Merge branch 'main' into matmul_kernel_integer
Browse files Browse the repository at this point in the history
  • Loading branch information
resting-dove committed May 1, 2024
2 parents b80631b + 475f42a commit f41a73e
Show file tree
Hide file tree
Showing 39 changed files with 2,350 additions and 308 deletions.
7 changes: 7 additions & 0 deletions UserConfig.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
{
"matmul_vec_size_bits": 0,
"matmul_tile": false,
"matmul_use_fixed_tile_sizes": true,
"matmul_fixed_tile_sizes": [4, 4, 4, 4, 4],
"matmul_unroll_factor": 1,
"matmul_unroll_jam_factor": 4,
"matmul_num_vec_registers": 16,
"use_cuda": false,
"use_vectorized_exec": false,
"use_obj_ref_mgnt": true,
Expand Down
Empty file modified install-ubuntu-packages.sh
100644 → 100755
Empty file.
12 changes: 10 additions & 2 deletions src/api/cli/DaphneUserConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,22 @@ class DaphneLogger;
* Container to pass around user configuration
*/
struct DaphneUserConfig {
// Remember to update UserConfig.json accordingly!

// Remember to update UserConfig.json accordingly!
bool use_cuda = false;
bool use_vectorized_exec = false;
bool use_distributed = false;
bool use_obj_ref_mgnt = true;
bool use_ipa_const_propa = true;
bool use_phy_op_selection = true;
bool use_mlir_codegen = false;
int matmul_vec_size_bits = 0;
bool matmul_tile = false;
int matmul_unroll_factor = 1;
int matmul_unroll_jam_factor=4;
int matmul_num_vec_registers=16;
bool matmul_use_fixed_tile_sizes = false;
std::vector<unsigned> matmul_fixed_tile_sizes = {4, 4};
bool matmul_invert_loops = false;
bool use_mlir_hybrid_codegen = false;
bool cuda_fuse_any = false;
bool vectorized_single_queue = false;
Expand Down Expand Up @@ -74,6 +81,7 @@ struct DaphneUserConfig {
size_t max_distributed_serialization_chunk_size = std::numeric_limits<int>::max() - 1024; // 2GB (-1KB to make up for gRPC headers etc.) - which is the maximum size allowed by gRPC / MPI. TODO: Investigate what might be the optimal.
int numberOfThreads = -1;
int minimumTaskSize = 1;

// minimum considered log level (e.g., no logging below ERROR (essentially suppressing WARN, INFO, DEBUG and TRACE)
spdlog::level::level_enum log_level_limit = spdlog::level::err;
std::vector<LogConfig> loggers;
Expand Down
50 changes: 49 additions & 1 deletion src/api/internal/daphne_internal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/

#include "runtime/local/datastructures/IAllocationDescriptor.h"
#include <vector>
#ifdef USE_MPI
#include "runtime/distributed/worker/MPIWorker.h"
#endif
Expand All @@ -31,7 +32,6 @@
#include "mlir/ExecutionEngine/ExecutionEngine.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/Pass/PassManager.h"
#include "llvm/Support/CommandLine.h"

#ifdef USE_CUDA
Expand Down Expand Up @@ -260,10 +260,46 @@ int startDAPHNE(int argc, const char** argv, DaphneLibResult* daphneLibRes, int
"libdir", cat(daphneOptions),
desc("The directory containing kernel libraries")
);

static opt<bool> mlirCodegen(
"mlir-codegen", cat(daphneOptions),
desc("Enables lowering of certain DaphneIR operations on DenseMatrix to low-level MLIR operations.")
);
static opt<int> matmul_vec_size_bits(
"matmul-vec-size-bits", cat(daphneOptions),
desc("Set the vector size to be used in the lowering of the MatMul operation if possible. Value of 0 is interpreted as off switch."),
init(0)
);
static opt<bool> matmul_tile(
"matmul-tile", cat(daphneOptions),
desc("Enables loop tiling in the lowering of the MatMul operation.")
);
static opt<int> matmul_unroll_factor(
"matmul-unroll-factor", cat(daphneOptions),
desc("Factor by which to unroll the finally resulting inner most loop in the lowered MatMul if tiling is used."),
init(1)
);
static opt<int> matmul_unroll_jam_factor(
"matmul-unroll-jam-factor", cat(daphneOptions),
desc("Factor by which to unroll jam the two inner most loop in the lowered MatMul if tiling is used."),
init(4)
);
static opt<int> matmul_num_vec_registers(
"matmul-num-vec-registers", cat(daphneOptions),
desc("Number of vector registers. Used during automatic tiling in lowering of MatMulOp"),
init(16)
);
static llvm::cl::list<unsigned> matmul_fixed_tile_sizes(
"matmul-fixed-tile-sizes", cat(daphneOptions),
desc("Set fixed tile sizes to be used for the lowering of MatMul if tiling is used. This also enables tiling."),
CommaSeparated
);
static opt<bool> matmul_invert_loops(
"matmul-invert-loops", cat(daphneOptions),
desc("Enable inverting of the inner two loops in the matrix multiplication as a fallback option, if tiling is not possible or deactivated.")
);


static opt<bool> performHybridCodegen(
"mlir-hybrid-codegen", cat(daphneOptions),
desc("Enables prototypical hybrid code generation combining pre-compiled kernels and MLIR code generation.")
Expand Down Expand Up @@ -382,6 +418,18 @@ int startDAPHNE(int argc, const char** argv, DaphneLibResult* daphneLibRes, int
user_config.use_ipa_const_propa = !noIPAConstPropa;
user_config.use_phy_op_selection = !noPhyOpSelection;
user_config.use_mlir_codegen = mlirCodegen;
user_config.matmul_vec_size_bits = matmul_vec_size_bits;
user_config.matmul_tile = matmul_tile;
user_config.matmul_unroll_factor = matmul_unroll_factor;
user_config.matmul_unroll_jam_factor = matmul_unroll_jam_factor;
user_config.matmul_num_vec_registers = matmul_num_vec_registers;
user_config.matmul_invert_loops = matmul_invert_loops;
if (matmul_fixed_tile_sizes.size() > 0) {
user_config.matmul_use_fixed_tile_sizes = true;
user_config.matmul_fixed_tile_sizes = matmul_fixed_tile_sizes;
// Specifying a fixed tile size will be interpreted as wanting to use tiling.
user_config.matmul_tile = true;
}
user_config.use_mlir_hybrid_codegen = performHybridCodegen;

if(!libDir.getValue().empty())
Expand Down
29 changes: 21 additions & 8 deletions src/compiler/execution/DaphneIrExecutor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include <ir/daphneir/Daphne.h>
#include <ir/daphneir/Passes.h>
#include <ir/daphneir/Passes.h.inc>
#include <mlir/Dialect/LLVMIR/LLVMDialect.h>
#include <mlir/Dialect/LLVMIR/Transforms/Passes.h>

Expand All @@ -32,6 +33,7 @@
#include "mlir/Conversion/MathToLLVM/MathToLLVM.h"
#include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h"
#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Affine/Passes.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
Expand Down Expand Up @@ -298,25 +300,36 @@ void DaphneIrExecutor::buildCodegenPipeline(mlir::PassManager &pm) {
mlir::daphne::createPrintIRPass("IR before codegen pipeline"));

pm.addPass(mlir::daphne::createDaphneOptPass());

if (!userConfig_.use_mlir_hybrid_codegen) {
pm.addPass(mlir::daphne::createMatMulOpLoweringPass());
}

pm.addPass(mlir::daphne::createEwOpLoweringPass());
pm.addPass(mlir::daphne::createAggAllOpLoweringPass());
pm.addPass(mlir::daphne::createMapOpLoweringPass());
pm.addPass(mlir::createInlinerPass());

pm.addPass(mlir::daphne::createEwOpLoweringPass());
pm.addNestedPass<mlir::func::FuncOp>(mlir::createLoopFusionPass());

if (!userConfig_.use_mlir_hybrid_codegen) {
pm.addPass(mlir::daphne::createMatMulOpLoweringPass(
userConfig_.matmul_tile, userConfig_.matmul_vec_size_bits,
userConfig_.matmul_fixed_tile_sizes,
userConfig_.matmul_use_fixed_tile_sizes,
userConfig_.matmul_unroll_factor, userConfig_.matmul_unroll_jam_factor,
userConfig_.matmul_num_vec_registers,
userConfig_.matmul_invert_loops));
if (userConfig_.explain_mlir_codegen)
pm.addPass(
mlir::daphne::createPrintIRPass("IR directly after lowering MatMulOp."));
}

pm.addPass(mlir::createConvertMathToLLVMPass());
pm.addPass(mlir::daphne::createModOpLoweringPass());
pm.addPass(mlir::createCanonicalizerPass());
pm.addPass(mlir::createCSEPass());
pm.addNestedPass<mlir::func::FuncOp>(mlir::createLoopFusionPass());
pm.addNestedPass<mlir::func::FuncOp>(
mlir::createAffineScalarReplacementPass());
pm.addPass(mlir::createLowerAffinePass());

mlir::LowerVectorToLLVMOptions lowerVectorToLLVMOptions;
pm.addPass(mlir::createConvertVectorToLLVMPass(lowerVectorToLLVMOptions));

if (userConfig_.explain_mlir_codegen)
pm.addPass(
mlir::daphne::createPrintIRPass("IR after codegen pipeline"));
Expand Down
Loading

0 comments on commit f41a73e

Please sign in to comment.