Skip to content

Commit

Permalink
feat: map table column type (4paradigm#3723)
Browse files Browse the repository at this point in the history
* feat: create table with map type columns

* feat: insert stmt support for map data type

simple insert row codegen support from 4paradigm#3731.

* test: fix plan tests
  • Loading branch information
aceforeverd authored Mar 5, 2024
1 parent f35f26b commit 1106259
Show file tree
Hide file tree
Showing 36 changed files with 884 additions and 209 deletions.
5 changes: 0 additions & 5 deletions cases/plan/error_unsupport_sql.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,6 @@ cases:
desc: 表路径层级超过边界2
sql: |
select db.t1.level3.* from t;
- id: 9
desc: Insert 非常量
mode: request-unsupport
sql: |
insert into t1 values(1, 2, aaa);
- id: in_predicate_subquery
desc: test_expr in subquery
sql: |
Expand Down
2 changes: 1 addition & 1 deletion cases/query/udf_query.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,7 @@ cases:
map('1', 2, '1', 4, '1', 6, '7', 8, '9', 10, '11', 12)['1'] as e9,
# map("c", 99, "d", NULL)["d"] as e10,
expect:
# FIXME
# FIXME(someone): add e10 result core dump occasionally on centOS
columns: ["e1 string", "e2 int", "e3 string", "e4 int", "e5 string", "e6 timestamp", "e7 int", "e8 int", "e9 int"]
data: |
2, 100, NULL, 101, f, 2000, 10, NULL, 2
Expand Down
7 changes: 2 additions & 5 deletions hybridse/include/codec/fe_schema_codec.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,7 @@
#define HYBRIDSE_INCLUDE_CODEC_FE_SCHEMA_CODEC_H_

#include <cstring>
#include <iostream>
#include <map>
#include <string>
#include <vector>
#include "vm/catalog.h"

namespace hybridse {
Expand Down Expand Up @@ -56,7 +53,7 @@ class SchemaCodec {
if (it->name().size() >= 128) {
return false;
}
uint8_t name_size = (uint8_t)(it->name().size());
uint8_t name_size = static_cast<uint8_t>(it->name().size());
memcpy(cbuffer, static_cast<const void*>(&name_size), 1);
cbuffer += 1;
memcpy(cbuffer, static_cast<const void*>(it->name().c_str()),
Expand All @@ -66,7 +63,7 @@ class SchemaCodec {
return true;
}

static bool Decode(const std::string& buf, vm::Schema* schema) {
static bool Decode(const std::string& buf, codec::Schema* schema) {
if (schema == NULL) return false;
if (buf.size() <= 0) return true;
const char* buffer = buf.c_str();
Expand Down
3 changes: 3 additions & 0 deletions hybridse/include/node/sql_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -1900,6 +1900,9 @@ class ColumnDefNode : public SqlNode {

std::string GetColumnName() const { return column_name_; }

const ColumnSchemaNode *schema() const { return schema_; }

// deprecated, use ColumnDefNode::schema instead
DataType GetColumnType() const { return schema_->type(); }

const ExprNode* GetDefaultValue() const { return schema_->default_value(); }
Expand Down
8 changes: 4 additions & 4 deletions hybridse/include/sdk/base_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,13 @@ typedef ::google::protobuf::RepeatedPtrField< ::hybridse::type::TableDef>

class SchemaImpl : public Schema {
public:
explicit SchemaImpl(const vm::Schema& schema);
explicit SchemaImpl(const codec::Schema& schema);
SchemaImpl() {}

~SchemaImpl();

const vm::Schema& GetSchema() const { return schema_; }
inline void SetSchema(const vm::Schema& schema) { schema_ = schema; }
const codec::Schema& GetSchema() const { return schema_; }
inline void SetSchema(const codec::Schema& schema) { schema_ = schema; }
int32_t GetColumnCnt() const;

const std::string& GetColumnName(uint32_t index) const;
Expand All @@ -46,7 +46,7 @@ class SchemaImpl : public Schema {
const bool IsConstant(uint32_t index) const;

private:
vm::Schema schema_;
codec::Schema schema_;
};

class TableImpl : public Table {
Expand Down
12 changes: 7 additions & 5 deletions hybridse/src/codegen/block_ir_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -290,16 +290,18 @@ bool BlockIRBuilder::BuildReturnStmt(const ::hybridse::node::FnReturnStmt *node,
}
::llvm::Value *value = value_wrapper.GetValue(&builder);
if (TypeIRBuilder::IsStructPtr(value->getType())) {
StructTypeIRBuilder *struct_builder =
StructTypeIRBuilder::CreateStructTypeIRBuilder(block->getModule(),
value->getType());
auto struct_builder = StructTypeIRBuilder::CreateStructTypeIRBuilder(block->getModule(), value->getType());
if (!struct_builder.ok()) {
status.code = kCodegenError;
status.msg = struct_builder.status().ToString();
return false;
}
NativeValue ret_value;
if (!var_ir_builder.LoadRetStruct(&ret_value, status)) {
LOG(WARNING) << "fail to load ret struct address";
return false;
}
if (!struct_builder->CopyFrom(block, value,
ret_value.GetValue(&builder))) {
if (!struct_builder.value()->CopyFrom(block, value, ret_value.GetValue(&builder))) {
return false;
}
value = builder.getInt1(true);
Expand Down
4 changes: 2 additions & 2 deletions hybridse/src/codegen/buf_ir_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ bool BufNativeIRBuilder::BuildGetStringField(uint32_t col_idx, uint32_t offset,

BufNativeEncoderIRBuilder::BufNativeEncoderIRBuilder(CodeGenContextBase* ctx,
const std::map<uint32_t, NativeValue>* outputs,
const vm::Schema* schema)
const codec::Schema* schema)
: ctx_(ctx),
outputs_(outputs),
schema_(schema),
Expand Down Expand Up @@ -530,7 +530,7 @@ absl::StatusOr<llvm::Function*> BufNativeEncoderIRBuilder::GetOrBuildAppendMapFn
auto bs = ctx_->CreateBranchNot(is_null, [&]() -> base::Status {
auto row_ptr = BuildGetPtrOffset(sub_builder, i8_ptr, str_body_offset);
CHECK_TRUE(row_ptr.ok(), common::kCodegenError, row_ptr.status().ToString());
auto sz = map_builder.Encode(ctx_, map_ptr, row_ptr.value());
auto sz = map_builder.Encode(ctx_, row_ptr.value(), map_ptr);
CHECK_TRUE(sz.ok(), common::kCodegenError, sz.status().ToString());
sub_builder->CreateStore(sz.value(), encode_sz_alloca);
return {};
Expand Down
9 changes: 2 additions & 7 deletions hybridse/src/codegen/buf_ir_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,14 @@
#include "codegen/row_ir_builder.h"
#include "codegen/scope_var.h"
#include "codegen/variable_ir_builder.h"
#include "vm/catalog.h"

namespace hybridse {
namespace codegen {

class BufNativeEncoderIRBuilder : public RowEncodeIRBuilder {
public:
BufNativeEncoderIRBuilder(CodeGenContextBase* ctx, const std::map<uint32_t, NativeValue>* outputs,
const vm::Schema* schema);
const codec::Schema* schema);

~BufNativeEncoderIRBuilder() override;

Expand All @@ -55,10 +54,6 @@ class BufNativeEncoderIRBuilder : public RowEncodeIRBuilder {
::llvm::Value* str_addr_space, ::llvm::Value* str_body_offset, uint32_t str_field_idx,
::llvm::Value** output);

// encode SQL map data type into row
base::Status AppendMapVal(const type::ColumnSchema& sc, llvm::Value* i8_ptr, uint32_t field_idx,
const NativeValue& val, llvm::Value* str_addr_space, llvm::Value* str_body_offset,
uint32_t str_field_idx, llvm::Value** next_str_body_offset);
absl::StatusOr<llvm::Function*> GetOrBuildAppendMapFn(const type::ColumnSchema& sc) const;

base::Status AppendHeader(::llvm::Value* i8_ptr, ::llvm::Value* size,
Expand All @@ -74,7 +69,7 @@ class BufNativeEncoderIRBuilder : public RowEncodeIRBuilder {
private:
CodeGenContextBase* ctx_;
const std::map<uint32_t, NativeValue>* outputs_;
const vm::Schema* schema_;
const codec::Schema* schema_;
uint32_t str_field_start_offset_;
// n = offset_vec_[i] is
// schema_[i] is base type (except string): col encode offset in row
Expand Down
149 changes: 149 additions & 0 deletions hybridse/src/codegen/insert_row_builder.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
/**
* Copyright (c) 2024 OpenMLDB authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "codegen/insert_row_builder.h"

#include <map>
#include <string>
#include <utility>
#include <vector>

#include "absl/status/status.h"
#include "base/fe_status.h"
#include "codegen/buf_ir_builder.h"
#include "codegen/context.h"
#include "codegen/expr_ir_builder.h"
#include "node/node_manager.h"
#include "passes/resolve_fn_and_attrs.h"
#include "udf/default_udf_library.h"
#include "vm/engine.h"
#include "vm/jit_wrapper.h"

namespace hybridse {
namespace codegen {

InsertRowBuilder::InsertRowBuilder(const codec::Schema* schema) : schema_(schema) {}

absl::Status InsertRowBuilder::Init() {
::hybridse::vm::Engine::InitializeGlobalLLVM();

jit_ = std::unique_ptr<vm::HybridSeJitWrapper>(vm::HybridSeJitWrapper::Create());
if (!jit_->Init()) {
jit_ = nullptr;
return absl::InternalError("fail to init jit");
}
if (!vm::HybridSeJitWrapper::InitJitSymbols(jit_.get())) {
jit_ = nullptr;
return absl::InternalError("fail to init jit symbols");
}
return absl::OkStatus();
}

absl::StatusOr<std::shared_ptr<int8_t>> InsertRowBuilder::ComputeRow(const node::ExprListNode* values) {
EnsureInitialized();
return ComputeRow(values->children_);
}

absl::StatusOr<std::shared_ptr<int8_t>> InsertRowBuilder::ComputeRow(absl::Span<node::ExprNode* const> values) {
EnsureInitialized();

std::unique_ptr<llvm::LLVMContext> llvm_ctx = llvm::make_unique<llvm::LLVMContext>();
std::unique_ptr<llvm::Module> llvm_module = llvm::make_unique<llvm::Module>("insert_row_builder", *llvm_ctx);
vm::SchemasContext empty_sc;
node::NodeManager nm;
codec::Schema empty_param_types;
CodeGenContext dump_ctx(llvm_module.get(), &empty_sc, &empty_param_types, &nm);

auto library = udf::DefaultUdfLibrary::get();
node::ExprAnalysisContext expr_ctx(&nm, library, &empty_sc, &empty_param_types);
passes::ResolveFnAndAttrs resolver(&expr_ctx);

std::vector<node::ExprNode*> transformed;
for (auto& expr : values) {
node::ExprNode* out = nullptr;
CHECK_STATUS_TO_ABSL(resolver.VisitExpr(expr, &out));
transformed.push_back(out);
}

std::string fn_name = absl::StrCat("gen_insert_row_", fn_counter_++);
auto fs = BuildFn(&dump_ctx, fn_name, transformed);
CHECK_ABSL_STATUSOR(fs);

llvm::Function* fn = fs.value();

if (!jit_->OptModule(llvm_module.get())) {
return absl::InternalError("fail to optimize module");
}

if (!jit_->AddModule(std::move(llvm_module), std::move(llvm_ctx))) {
return absl::InternalError("add llvm module failed");
}

auto c_fn = jit_->FindFunction(fn->getName());
void (*encode)(int8_t**) = reinterpret_cast<void (*)(int8_t**)>(const_cast<int8_t*>(c_fn));

int8_t* insert_row = nullptr;
encode(&insert_row);

auto managed_row = std::shared_ptr<int8_t>(insert_row, std::free);

return managed_row;
}

absl::StatusOr<llvm::Function*> InsertRowBuilder::BuildFn(CodeGenContext* ctx, llvm::StringRef fn_name,
absl::Span<node::ExprNode* const> values) {
llvm::Function* fn = ctx->GetModule()->getFunction(fn_name);
if (fn == nullptr) {
auto builder = ctx->GetBuilder();
llvm::FunctionType* fnt = llvm::FunctionType::get(builder->getVoidTy(),
{
builder->getInt8PtrTy()->getPointerTo(),
},
false);

fn = llvm::Function::Create(fnt, llvm::GlobalValue::ExternalLinkage, fn_name, ctx->GetModule());
FunctionScopeGuard fg(fn, ctx);

llvm::Value* row_ptr_ptr = fn->arg_begin();

ExprIRBuilder expr_builder(ctx);

std::map<uint32_t, NativeValue> columns;
for (uint32_t i = 0; i < values.size(); ++i) {
auto expr = values[i];

NativeValue out;
auto s = expr_builder.Build(expr, &out);
CHECK_STATUS_TO_ABSL(s);

columns[i] = out;
}

BufNativeEncoderIRBuilder encode_builder(ctx, &columns, schema_);
CHECK_STATUS_TO_ABSL(encode_builder.Init());

encode_builder.BuildEncode(row_ptr_ptr);

builder->CreateRetVoid();
}

return fn;
}

// build the function that transform a single insert row values into encoded row
absl::StatusOr<llvm::Function*> InsertRowBuilder::BuildEncodeFn() { return absl::OkStatus(); }
} // namespace codegen
} // namespace hybridse
67 changes: 67 additions & 0 deletions hybridse/src/codegen/insert_row_builder.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/**
* Copyright (c) 2024 OpenMLDB authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef HYBRIDSE_SRC_CODEGEN_INSERT_ROW_BUILDER_H_
#define HYBRIDSE_SRC_CODEGEN_INSERT_ROW_BUILDER_H_

#include <memory>

#include "absl/status/statusor.h"
#include "codec/fe_row_codec.h"
#include "codegen/context.h"
#include "llvm/IR/Function.h"
#include "node/sql_node.h"
#include "vm/jit_wrapper.h"

namespace hybridse {
namespace codegen {

class InsertRowBuilder {
public:
explicit InsertRowBuilder(const codec::Schema* schema);

absl::Status Init();

// compute the encoded row result for insert statement's single values expression list
//
// currently, expressions in insert values do not expect external source, so unsupported expressions
// will simply fail on resolving.
absl::StatusOr<std::shared_ptr<int8_t>> ComputeRow(absl::Span<node::ExprNode* const> values);

absl::StatusOr<std::shared_ptr<int8_t>> ComputeRow(const node::ExprListNode* values);

private:
void EnsureInitialized() { assert(jit_ && "InsertRowBuilder not initialized"); }

// build the function the will output the row from single insert values
//
// the function is just equivalent to C: `void fn(int8_t**)`.
// BuildFn returns different function with different name on every invocation
absl::StatusOr<llvm::Function*> BuildFn(CodeGenContext* ctx, llvm::StringRef fn_name,
absl::Span<node::ExprNode* const>);

// build the function that transform a single insert row values into encoded row
absl::StatusOr<llvm::Function*> BuildEncodeFn();

// CodeGenContextBase* ctx_;
const codec::Schema* schema_;
std::atomic<uint32_t> fn_counter_ = 0;

std::unique_ptr<vm::HybridSeJitWrapper> jit_;
};
} // namespace codegen
} // namespace hybridse
#endif // HYBRIDSE_SRC_CODEGEN_INSERT_ROW_BUILDER_H_
Loading

0 comments on commit 1106259

Please sign in to comment.