Skip to content

Commit

Permalink
feat: support compress (#3572)
Browse files Browse the repository at this point in the history
  • Loading branch information
dl239 authored Nov 15, 2023
1 parent c2b7817 commit 71754ff
Show file tree
Hide file tree
Showing 43 changed files with 489 additions and 466 deletions.
37 changes: 37 additions & 0 deletions cases/plan/create.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1035,3 +1035,40 @@ cases:
+-kind: HIVE
+-path: hdfs://path
+-table_option_list: []
- id: 34
desc: Create 指定压缩
sql: |
create table t1(
column1 int,
column2 timestamp,
index(key=column1, ts=column2)) OPTIONS (compress_type="snappy");
expect:
node_tree_str: |
+-node[CREATE]
+-table: t1
+-IF NOT EXIST: 0
+-column_desc_list[list]:
| +-0:
| | +-node[kColumnDesc]
| | +-column_name: column1
| | +-column_type: int32
| | +-NOT NULL: 0
| +-1:
| | +-node[kColumnDesc]
| | +-column_name: column2
| | +-column_type: timestamp
| | +-NOT NULL: 0
| +-2:
| +-node[kColumnIndex]
| +-keys: [column1]
| +-ts_col: column2
| +-abs_ttl: -2
| +-lat_ttl: -2
| +-ttl_type: <nil>
| +-version_column: <nil>
| +-version_count: 0
+-table_option_list[list]:
+-0:
+-node[kCompressType]
+-compress_type: snappy
16 changes: 11 additions & 5 deletions docs/en/reference/sql/ddl/CREATE_TABLE_STATEMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,11 @@ StorageMode
::= 'Memory'
| 'HDD'
| 'SSD'
CompressTypeOption
::= 'COMPRESS_TYPE' '=' CompressType
CompressType
::= 'NoCompress'
| 'Snappy
```
Expand All @@ -484,6 +489,7 @@ StorageMode
| `REPLICANUM` | It defines the number of replicas for the table. Note that the number of replicas is only configurable in Cluster version. | `OPTIONS (REPLICANUM=3)` |
| `DISTRIBUTION` | It defines the distributed node endpoint configuration. Generally, it contains a Leader node and several followers. `(leader, [follower1, follower2, ..])`. Without explicit configuration, OpenMLDB will automatically configure `DISTRIBUTION` according to the environment and nodes. | `DISTRIBUTION = [ ('127.0.0.1:6527', [ '127.0.0.1:6528','127.0.0.1:6529' ])]` |
| `STORAGE_MODE` | It defines the storage mode of the table. The supported modes are `Memory`, `HDD` and `SSD`. When not explicitly configured, it defaults to `Memory`. <br/>If you need to support a storage mode other than `Memory` mode, `tablet` requires additional configuration options. For details, please refer to [tablet configuration file **conf/tablet.flags**](../../../deploy/conf.md#the-configuration-file-for-apiserver:-conf/tablet.flags). | `OPTIONS (STORAGE_MODE='HDD')` |
| `COMPRESS_TYPE` | It defines the compress types of the table. The supported compress type are `NoCompress` and `Snappy`. The default value is `NoCompress` | `OPTIONS (COMPRESS_TYPE='Snappy')`
#### The Difference between Disk Table and Memory Table
Expand Down Expand Up @@ -515,11 +521,11 @@ DESC t1;
--- -------------------- ------ ---------- ------ ---------------
1 INDEX_0_1651143735 col1 std_time 0min kAbsoluteTime
--- -------------------- ------ ---------- ------ ---------------
--------------
storage_mode
--------------
HDD
--------------
--------------- --------------
compress_type storage_mode
--------------- --------------
NoCompress HDD
--------------- --------------
```
The following sql command create a table with specified distribution.
```sql
Expand Down
10 changes: 5 additions & 5 deletions docs/en/reference/sql/ddl/DESC_STATEMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,11 @@ desc t1;
--- -------------------- ------ ---------- ---------- ---------------
1 INDEX_0_1658136511 col1 std_time 43200min kAbsoluteTime
--- -------------------- ------ ---------- ---------- ---------------
--------------
storage_mode
--------------
Memory
--------------
--------------- --------------
compress_type storage_mode
--------------- --------------
NoCompress Memory
--------------- --------------

```

Expand Down
2 changes: 1 addition & 1 deletion docs/en/reference/sql/ddl/SHOW_CREATE_TABLE_STATEMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ show create table t1;
`c3` bigInt,
`c4` timestamp,
INDEX (KEY=`c1`, TS=`c4`, TTL_TYPE=ABSOLUTE, TTL=0m)
) OPTIONS (PARTITIONNUM=8, REPLICANUM=2, STORAGE_MODE='HDD');
) OPTIONS (PARTITIONNUM=8, REPLICANUM=2, STORAGE_MODE='HDD', COMPRESS_TYPE='NoCompress');
------- ---------------------------------------------------------------

1 rows in set
Expand Down
16 changes: 11 additions & 5 deletions docs/zh/openmldb_sql/ddl/CREATE_TABLE_STATEMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,11 @@ StorageMode
::= 'Memory'
| 'HDD'
| 'SSD'
CompressTypeOption
::= 'COMPRESS_TYPE' '=' CompressType
CompressType
::= 'NoCompress'
| 'Snappy'
```


Expand All @@ -460,6 +465,7 @@ StorageMode
| `REPLICANUM` | 配置表的副本数。请注意,副本数只有在集群版中才可以配置。 | `OPTIONS (REPLICANUM=3)` |
| `DISTRIBUTION` | 配置分布式的节点endpoint。一般包含一个Leader节点和若干Follower节点。`(leader, [follower1, follower2, ..])`。不显式配置时,OpenMLDB会自动根据环境和节点来配置`DISTRIBUTION`| `DISTRIBUTION = [ ('127.0.0.1:6527', [ '127.0.0.1:6528','127.0.0.1:6529' ])]` |
| `STORAGE_MODE` | 表的存储模式,支持的模式有`Memory``HDD``SSD`。不显式配置时,默认为`Memory`。<br/>如果需要支持非`Memory`模式的存储模式,`tablet`需要额外的配置选项,具体可参考[tablet配置文件 conf/tablet.flags](../../../deploy/conf.md)| `OPTIONS (STORAGE_MODE='HDD')` |
| `COMPRESS_TYPE` | 指定表的压缩类型。目前只支持Snappy压缩, 。默认为 `NoCompress` 即不压缩。 | `OPTIONS (COMPRESS_TYPE='Snappy')`

#### 磁盘表与内存表区别
- 磁盘表对应`STORAGE_MODE`的取值为`HDD``SSD`。内存表对应的`STORAGE_MODE`取值为`Memory`
Expand Down Expand Up @@ -488,11 +494,11 @@ DESC t1;
--- -------------------- ------ ---------- ------ ---------------
1 INDEX_0_1651143735 col1 std_time 0min kAbsoluteTime
--- -------------------- ------ ---------- ------ ---------------
--------------
storage_mode
--------------
HDD
--------------
--------------- --------------
compress_type storage_mode
--------------- --------------
NoCompress HDD
--------------- --------------
```
创建一张表,指定分片的分布状态
```sql
Expand Down
10 changes: 5 additions & 5 deletions docs/zh/openmldb_sql/ddl/DESC_STATEMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,11 @@ desc t1;
--- -------------------- ------ ---------- ---------- ---------------
1 INDEX_0_1658136511 col1 std_time 43200min kAbsoluteTime
--- -------------------- ------ ---------- ---------- ---------------
--------------
storage_mode
--------------
Memory
--------------
--------------- --------------
compress_type storage_mode
--------------- --------------
NoCompress Memory
--------------- --------------

```

Expand Down
2 changes: 1 addition & 1 deletion docs/zh/openmldb_sql/ddl/SHOW_CREATE_TABLE_STATEMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ show create table t1;
`c3` bigInt,
`c4` timestamp,
INDEX (KEY=`c1`, TS=`c4`, TTL_TYPE=ABSOLUTE, TTL=0m)
) OPTIONS (PARTITIONNUM=8, REPLICANUM=2, STORAGE_MODE='HDD');
) OPTIONS (PARTITIONNUM=8, REPLICANUM=2, STORAGE_MODE='HDD', COMPRESS_TYPE='NoCompress');
------- ---------------------------------------------------------------

1 rows in set
Expand Down
6 changes: 6 additions & 0 deletions hybridse/include/node/node_enum.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ enum SqlNodeType {
kWithClauseEntry,
kAlterTableStmt,
kShowStmt,
kCompressType,
kSqlNodeTypeLast, // debug type
};

Expand Down Expand Up @@ -342,6 +343,11 @@ enum StorageMode {
kHDD = 3,
};

enum CompressType {
kNoCompress = 0,
kSnappy = 1,
};

// batch plan node type
enum BatchPlanNodeType { kBatchDataset, kBatchPartition, kBatchMap };

Expand Down
2 changes: 0 additions & 2 deletions hybridse/include/node/node_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -399,8 +399,6 @@ class NodeManager {

SqlNode *MakeReplicaNumNode(int num);

SqlNode *MakeStorageModeNode(StorageMode storage_mode);

SqlNode *MakePartitionNumNode(int num);

SqlNode *MakeDistributionsNode(const NodePointVector& distribution_list);
Expand Down
33 changes: 30 additions & 3 deletions hybridse/include/node/sql_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <vector>

#include "absl/status/statusor.h"
#include "absl/strings/match.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "boost/algorithm/string.hpp"
Expand Down Expand Up @@ -309,17 +310,26 @@ inline const std::string StorageModeName(StorageMode mode) {
}

inline const StorageMode NameToStorageMode(const std::string& name) {
if (boost::iequals(name, "memory")) {
if (absl::EqualsIgnoreCase(name, "memory")) {
return kMemory;
} else if (boost::iequals(name, "hdd")) {
} else if (absl::EqualsIgnoreCase(name, "hdd")) {
return kHDD;
} else if (boost::iequals(name, "ssd")) {
} else if (absl::EqualsIgnoreCase(name, "ssd")) {
return kSSD;
} else {
return kUnknown;
}
}

inline absl::StatusOr<CompressType> NameToCompressType(const std::string& name) {
if (absl::EqualsIgnoreCase(name, "snappy")) {
return CompressType::kSnappy;
} else if (absl::EqualsIgnoreCase(name, "nocompress")) {
return CompressType::kNoCompress;
}
return absl::Status(absl::StatusCode::kInvalidArgument, absl::StrCat("invalid compress type: ", name));
}

inline const std::string RoleTypeName(RoleType type) {
switch (type) {
case kLeader:
Expand Down Expand Up @@ -1884,6 +1894,23 @@ class StorageModeNode : public SqlNode {
StorageMode storage_mode_;
};

class CompressTypeNode : public SqlNode {
public:
CompressTypeNode() : SqlNode(kCompressType, 0, 0), compress_type_(kNoCompress) {}

explicit CompressTypeNode(CompressType compress_type)
: SqlNode(kCompressType, 0, 0), compress_type_(compress_type) {}

~CompressTypeNode() {}

CompressType GetCompressType() const { return compress_type_; }

void Print(std::ostream &output, const std::string &org_tab) const;

private:
CompressType compress_type_;
};

class CreateTableLikeClause {
public:
CreateTableLikeClause() = default;
Expand Down
5 changes: 0 additions & 5 deletions hybridse/src/node/node_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1031,11 +1031,6 @@ SqlNode *NodeManager::MakeReplicaNumNode(int num) {
return RegisterNode(node_ptr);
}

SqlNode *NodeManager::MakeStorageModeNode(StorageMode storage_mode) {
SqlNode *node_ptr = new StorageModeNode(storage_mode);
return RegisterNode(node_ptr);
}

SqlNode *NodeManager::MakePartitionNumNode(int num) {
SqlNode *node_ptr = new PartitionNumNode(num);
return RegisterNode(node_ptr);
Expand Down
3 changes: 2 additions & 1 deletion hybridse/src/node/plan_node_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,8 @@ TEST_F(PlanNodeTest, ExtractColumnsAndIndexsTest) {
manager_->MakeColumnDescNode("col3", node::kFloat, true),
manager_->MakeColumnDescNode("col4", node::kVarchar, true),
manager_->MakeColumnDescNode("col5", node::kTimestamp, true), index_node},
{manager_->MakeReplicaNumNode(3), manager_->MakePartitionNumNode(8), manager_->MakeStorageModeNode(kMemory)},
{manager_->MakeReplicaNumNode(3), manager_->MakePartitionNumNode(8),
manager_->MakeNode<StorageModeNode>(kMemory)},
false);
ASSERT_TRUE(nullptr != node);
std::vector<std::string> columns;
Expand Down
12 changes: 12 additions & 0 deletions hybridse/src/node/sql_node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1168,6 +1168,7 @@ static absl::flat_hash_map<SqlNodeType, absl::string_view> CreateSqlNodeTypeToNa
{kReplicaNum, "kReplicaNum"},
{kPartitionNum, "kPartitionNum"},
{kStorageMode, "kStorageMode"},
{kCompressType, "kCompressType"},
{kFn, "kFn"},
{kFnParaList, "kFnParaList"},
{kCreateSpStmt, "kCreateSpStmt"},
Expand Down Expand Up @@ -2603,6 +2604,17 @@ void StorageModeNode::Print(std::ostream &output, const std::string &org_tab) co
PrintValue(output, tab, StorageModeName(storage_mode_), "storage_mode", true);
}

void CompressTypeNode::Print(std::ostream &output, const std::string &org_tab) const {
SqlNode::Print(output, org_tab);
const std::string tab = org_tab + INDENT + SPACE_ED;
output << "\n";
if (compress_type_ == CompressType::kSnappy) {
PrintValue(output, tab, "snappy", "compress_type", true);
} else {
PrintValue(output, tab, "nocompress", "compress_type", true);
}
}

void PartitionNumNode::Print(std::ostream &output, const std::string &org_tab) const {
SqlNode::Print(output, org_tab);
const std::string tab = org_tab + INDENT + SPACE_ED;
Expand Down
2 changes: 1 addition & 1 deletion hybridse/src/node/sql_node_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -676,7 +676,7 @@ TEST_F(SqlNodeTest, CreateIndexNodeTest) {
node_manager_->MakeColumnDescNode("col4", node::kVarchar, true),
node_manager_->MakeColumnDescNode("col5", node::kTimestamp, true), index_node},
{node_manager_->MakeReplicaNumNode(3), node_manager_->MakePartitionNumNode(8),
node_manager_->MakeStorageModeNode(kMemory)},
node_manager_->MakeNode<StorageModeNode>(kMemory)},
false);
ASSERT_TRUE(nullptr != node);
std::vector<std::string> columns;
Expand Down
14 changes: 12 additions & 2 deletions hybridse/src/planv2/ast_node_converter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1761,8 +1761,18 @@ base::Status ConvertTableOption(const zetasql::ASTOptionsEntry* entry, node::Nod
} else if (absl::EqualsIgnoreCase("storage_mode", identifier_v)) {
std::string storage_mode;
CHECK_STATUS(AstStringLiteralToString(entry->value(), &storage_mode));
boost::to_lower(storage_mode);
*output = node_manager->MakeStorageModeNode(node::NameToStorageMode(storage_mode));
absl::AsciiStrToLower(&storage_mode);
*output = node_manager->MakeNode<node::StorageModeNode>(node::NameToStorageMode(storage_mode));
} else if (absl::EqualsIgnoreCase("compress_type", identifier_v)) {
std::string compress_type;
CHECK_STATUS(AstStringLiteralToString(entry->value(), &compress_type));
absl::AsciiStrToLower(&compress_type);
auto ret = node::NameToCompressType(compress_type);
if (ret.ok()) {
*output = node_manager->MakeNode<node::CompressTypeNode>(*ret);
} else {
return base::Status(common::kSqlAstError, ret.status().ToString());
}
} else {
return base::Status(common::kSqlAstError, absl::StrCat("invalid option ", identifier));
}
Expand Down
Loading

0 comments on commit 71754ff

Please sign in to comment.