diff --git a/.github/workflows/cicd.yaml b/.github/workflows/cicd.yaml
index 5fa5f6411c8..de76ed04a5f 100644
--- a/.github/workflows/cicd.yaml
+++ b/.github/workflows/cicd.yaml
@@ -107,9 +107,11 @@ jobs:
uses: actions/upload-artifact@v2
with:
name: linux-ut-result-cpp-${{ github.sha }}
+ # exclude _deps xml
path: |
build/**/*.xml
reports/*.xml
+ !build/_deps/*
- name: install
if: ${{ github.event_name == 'push' }}
diff --git a/.github/workflows/sdk.yml b/.github/workflows/sdk.yml
index 8f4dc6bd628..dc4dd94a2b6 100644
--- a/.github/workflows/sdk.yml
+++ b/.github/workflows/sdk.yml
@@ -352,6 +352,7 @@ jobs:
image: ghcr.io/4paradigm/hybridsql:latest
env:
OPENMLDB_BUILD_TARGET: "openmldb"
+ OPENMLDB_MODE: standalone
steps:
- uses: actions/checkout@v2
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 21066a3c505..703d6bf11de 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -136,6 +136,7 @@ endif()
include(FetchContent)
set(FETCHCONTENT_QUIET OFF)
include(farmhash)
+include(rapidjson)
# contrib libs
add_subdirectory(contrib EXCLUDE_FROM_ALL)
diff --git a/benchmark/pom.xml b/benchmark/pom.xml
index d1d7b99c916..572aec4d282 100644
--- a/benchmark/pom.xml
+++ b/benchmark/pom.xml
@@ -27,12 +27,12 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs
com.4paradigm.openmldb
openmldb-jdbc
- 0.7.0
+ 0.8.3
com.4paradigm.openmldb
openmldb-native
- 0.7.0-allinone
+ 0.8.3-allinone
org.slf4j
diff --git a/benchmark/src/main/java/com/_4paradigm/openmldb/benchmark/BenchmarkConfig.java b/benchmark/src/main/java/com/_4paradigm/openmldb/benchmark/BenchmarkConfig.java
index c6546cadc5d..4f9861cbda2 100644
--- a/benchmark/src/main/java/com/_4paradigm/openmldb/benchmark/BenchmarkConfig.java
+++ b/benchmark/src/main/java/com/_4paradigm/openmldb/benchmark/BenchmarkConfig.java
@@ -34,6 +34,7 @@ public class BenchmarkConfig {
public static long TS_BASE = System.currentTimeMillis();
public static String DEPLOY_NAME;
public static String CSV_PATH;
+ public static int PUT_BACH_SIZE = 1;
private static SqlExecutor executor = null;
private static SdkOption option = null;
@@ -58,6 +59,7 @@ public class BenchmarkConfig {
// if(!CSV_PATH.startsWith("/")){
// CSV_PATH=Util.getRootPath()+CSV_PATH;
// }
+ PUT_BACH_SIZE = Integer.valueOf(prop.getProperty("PUT_BACH_SIZE", "1"));
} catch (Exception e) {
e.printStackTrace();
}
diff --git a/benchmark/src/main/java/com/_4paradigm/openmldb/benchmark/OpenMLDBInsertBenchmark.java b/benchmark/src/main/java/com/_4paradigm/openmldb/benchmark/OpenMLDBInsertBenchmark.java
new file mode 100644
index 00000000000..a856d46ecfd
--- /dev/null
+++ b/benchmark/src/main/java/com/_4paradigm/openmldb/benchmark/OpenMLDBInsertBenchmark.java
@@ -0,0 +1,131 @@
+package com._4paradigm.openmldb.benchmark;
+
+import com._4paradigm.openmldb.sdk.SqlExecutor;
+import org.openjdk.jmh.annotations.*;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+import java.sql.Timestamp;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+
+@BenchmarkMode(Mode.SampleTime)
+@OutputTimeUnit(TimeUnit.MICROSECONDS)
+@State(Scope.Benchmark)
+@Threads(10)
+@Fork(value = 1, jvmArgs = {"-Xms8G", "-Xmx8G"})
+@Warmup(iterations = 2)
+@Measurement(iterations = 5, time = 60)
+
+public class OpenMLDBInsertBenchmark {
+ private SqlExecutor executor;
+ private String database = "test_put_db";
+ private String tableName = "test_put_t1";
+ private int indexNum;
+ private String placeholderSQL;
+ private Random random;
+ int stringNum = 15;
+ int doubleNum= 5;
+ int timestampNum = 5;
+ int bigintNum = 5;
+
+ public OpenMLDBInsertBenchmark() {
+ executor = BenchmarkConfig.GetSqlExecutor(false);
+ indexNum = BenchmarkConfig.WINDOW_NUM;
+ random = new Random();
+ StringBuilder builder = new StringBuilder();
+ builder.append("insert into ");
+ builder.append(tableName);
+ builder.append(" values (");
+ for (int i = 0; i < stringNum + doubleNum + timestampNum + bigintNum; i++) {
+ if (i > 0) {
+ builder.append(", ");
+ }
+ builder.append("?");
+ }
+ builder.append(");");
+ placeholderSQL = builder.toString();
+ }
+
+ @Setup
+ public void initEnv() {
+ Util.executeSQL("CREATE DATABASE IF NOT EXISTS " + database + ";", executor);
+ Util.executeSQL("USE " + database + ";", executor);
+ String ddl = Util.genDDL(tableName, indexNum);
+ Util.executeSQL(ddl, executor);
+ }
+
+ @Benchmark
+ public void executePut() {
+ java.sql.PreparedStatement pstmt = null;
+ try {
+ pstmt = executor.getInsertPreparedStmt(database, placeholderSQL);
+ for (int num = 0; num < BenchmarkConfig.PUT_BACH_SIZE; num++) {
+ int idx = 1;
+ for (int i = 0; i < stringNum; i++) {
+ if (i < indexNum) {
+ pstmt.setString(idx, String.valueOf(BenchmarkConfig.PK_BASE + random.nextInt(BenchmarkConfig.PK_NUM)));
+ } else {
+ pstmt.setString(idx, "v" + String.valueOf(100000 + random.nextInt(100000)));
+ }
+ idx++;
+ }
+ for (int i = 0; i < doubleNum; i++) {
+ pstmt.setDouble(idx, random.nextDouble());
+ idx++;
+ }
+ for (int i = 0; i < timestampNum; i++) {
+ pstmt.setTimestamp(idx, new Timestamp(System.currentTimeMillis()));
+ idx++;
+ }
+ for (int i = 0; i < bigintNum; i++) {
+ pstmt.setLong(idx, random.nextLong());
+ idx++;
+ }
+ if (BenchmarkConfig.PUT_BACH_SIZE > 1) {
+ pstmt.addBatch();
+ }
+ }
+ if (BenchmarkConfig.PUT_BACH_SIZE > 1) {
+ pstmt.executeBatch();
+ } else {
+ pstmt.execute();
+ }
+ } catch (Exception e) {
+ e.printStackTrace();
+ } finally {
+ if (pstmt != null) {
+ try {
+ pstmt.close();
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+ }
+ }
+
+ @TearDown
+ public void cleanEnv() {
+ Util.executeSQL("USE " + database + ";", executor);
+ Util.executeSQL("DROP TABLE " + tableName + ";", executor);
+ Util.executeSQL("DROP DATABASE " + database + ";", executor);
+ }
+
+ public static void main(String[] args) {
+ /* OpenMLDBPutBenchmark benchmark = new OpenMLDBPutBenchmark();
+ benchmark.initEnv();
+ benchmark.executePut();
+ benchmark.cleanEnv();*/
+
+ try {
+ Options opt = new OptionsBuilder()
+ .include(OpenMLDBInsertBenchmark.class.getSimpleName())
+ .forks(1)
+ .build();
+ new Runner(opt).run();
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+}
diff --git a/benchmark/src/main/resources/conf.properties b/benchmark/src/main/resources/conf.properties
index bf3d22a4310..bcde106ed08 100644
--- a/benchmark/src/main/resources/conf.properties
+++ b/benchmark/src/main/resources/conf.properties
@@ -1,5 +1,5 @@
-ZK_CLUSTER=172.24.4.55:30008
-ZK_PATH=/openmldb
+ZK_CLUSTER=172.24.4.55:32200
+ZK_PATH=/openmldb_test
WINDOW_NUM=2
WINDOW_SIZE=1000
@@ -12,3 +12,5 @@ PK_BASE=1000000
DATABASE=bank_perf
DEPLOY_NAME=deploy_bank
CSV_PATH=data/bank_flattenRequest.csv
+
+PUT_BACH_SIZE=100
\ No newline at end of file
diff --git a/cases/plan/create.yaml b/cases/plan/create.yaml
index 315ec30a305..f1076934391 100644
--- a/cases/plan/create.yaml
+++ b/cases/plan/create.yaml
@@ -1035,3 +1035,40 @@ cases:
+-kind: HIVE
+-path: hdfs://path
+-table_option_list: []
+
+ - id: 34
+ desc: Create 指定压缩
+ sql: |
+ create table t1(
+ column1 int,
+ column2 timestamp,
+ index(key=column1, ts=column2)) OPTIONS (compress_type="snappy");
+ expect:
+ node_tree_str: |
+ +-node[CREATE]
+ +-table: t1
+ +-IF NOT EXIST: 0
+ +-column_desc_list[list]:
+ | +-0:
+ | | +-node[kColumnDesc]
+ | | +-column_name: column1
+ | | +-column_type: int32
+ | | +-NOT NULL: 0
+ | +-1:
+ | | +-node[kColumnDesc]
+ | | +-column_name: column2
+ | | +-column_type: timestamp
+ | | +-NOT NULL: 0
+ | +-2:
+ | +-node[kColumnIndex]
+ | +-keys: [column1]
+ | +-ts_col: column2
+ | +-abs_ttl: -2
+ | +-lat_ttl: -2
+ | +-ttl_type:
+ | +-version_column:
+ | +-version_count: 0
+ +-table_option_list[list]:
+ +-0:
+ +-node[kCompressType]
+ +-compress_type: snappy
diff --git a/cases/plan/join_query.yaml b/cases/plan/join_query.yaml
index 4d2bbdc0e57..28021b54d4b 100644
--- a/cases/plan/join_query.yaml
+++ b/cases/plan/join_query.yaml
@@ -18,20 +18,83 @@ cases:
sql: SELECT t1.COL1, t1.COL2, t2.COL1, t2.COL2 FROM t1 full join t2 on t1.col1 = t2.col2;
mode: physical-plan-unsupport
- id: 2
+ mode: request-unsupport
desc: 简单SELECT LEFT JOIN
- mode: runner-unsupport
sql: SELECT t1.COL1, t1.COL2, t2.COL1, t2.COL2 FROM t1 left join t2 on t1.col1 = t2.col2;
+ expect:
+ node_tree_str: |
+ +-node[kQuery]: kQuerySelect
+ +-distinct_opt: false
+ +-where_expr: null
+ +-group_expr_list: null
+ +-having_expr: null
+ +-order_expr_list: null
+ +-limit: null
+ +-select_list[list]:
+ | +-0:
+ | | +-node[kResTarget]
+ | | +-val:
+ | | | +-expr[column ref]
+ | | | +-relation_name: t1
+ | | | +-column_name: COL1
+ | | +-name:
+ | +-1:
+ | | +-node[kResTarget]
+ | | +-val:
+ | | | +-expr[column ref]
+ | | | +-relation_name: t1
+ | | | +-column_name: COL2
+ | | +-name:
+ | +-2:
+ | | +-node[kResTarget]
+ | | +-val:
+ | | | +-expr[column ref]
+ | | | +-relation_name: t2
+ | | | +-column_name: COL1
+ | | +-name:
+ | +-3:
+ | +-node[kResTarget]
+ | +-val:
+ | | +-expr[column ref]
+ | | +-relation_name: t2
+ | | +-column_name: COL2
+ | +-name:
+ +-tableref_list[list]:
+ | +-0:
+ | +-node[kTableRef]: kJoin
+ | +-join_type: LeftJoin
+ | +-left:
+ | | +-node[kTableRef]: kTable
+ | | +-table: t1
+ | | +-alias:
+ | +-right:
+ | +-node[kTableRef]: kTable
+ | +-table: t2
+ | +-alias:
+ | +-order_expressions: null
+ | +-on:
+ | +-expr[binary]
+ | +-=[list]:
+ | +-0:
+ | | +-expr[column ref]
+ | | +-relation_name: t1
+ | | +-column_name: col1
+ | +-1:
+ | +-expr[column ref]
+ | +-relation_name: t2
+ | +-column_name: col2
+ +-window_list: []
- id: 3
desc: 简单SELECT LAST JOIN
sql: SELECT t1.COL1, t1.COL2, t2.COL1, t2.COL2 FROM t1 last join t2 order by t2.col5 on t1.col1 = t2.col2;
- id: 4
desc: 简单SELECT RIGHT JOIN
sql: SELECT t1.COL1, t1.COL2, t2.COL1, t2.COL2 FROM t1 right join t2 on t1.col1 = t2.col2;
- mode: runner-unsupport
+ mode: physical-plan-unsupport
- id: 5
desc: LeftJoin有不等式条件
sql: SELECT t1.col1 as t1_col1, t2.col2 as t2_col2 FROM t1 left join t2 on t1.col1 = t2.col2 and t2.col5 >= t1.col5;
- mode: runner-unsupport
+ mode: request-unsupport
- id: 6
desc: LastJoin有不等式条件
sql: SELECT t1.col1 as t1_col1, t2.col2 as t2_col2 FROM t1 last join t2 order by t2.col5 on t1.col1 = t2.col2 and t2.col5 >= t1.col5;
@@ -162,4 +225,4 @@ cases:
col1 as id,
sum(col2) OVER w2 as w2_col2_sum FROM t1 WINDOW
w2 AS (PARTITION BY col1 ORDER BY col5 ROWS_RANGE BETWEEN 1d OPEN PRECEDING AND CURRENT ROW)
- ) as out1 ON out0.id = out1.id;
\ No newline at end of file
+ ) as out1 ON out0.id = out1.id;
diff --git a/cases/query/fail_query.yaml b/cases/query/fail_query.yaml
index 4058525678c..415fa203127 100644
--- a/cases/query/fail_query.yaml
+++ b/cases/query/fail_query.yaml
@@ -49,3 +49,24 @@ cases:
SELECT 100 + 1s;
expect:
success: false
+ - id: 3
+ desc: unsupport join
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",20,1000]
+ - ["bb",30,1000]
+ - name: t2
+ columns: ["c2 int","c4 timestamp"]
+ indexs: ["index1:c2:c4"]
+ rows:
+ - [20,3000]
+ - [20,2000]
+ sql: |
+ select t1.c1 as id, t2.* from t1 right join t2
+ on t1.c2 = t2.c2
+ expect:
+ success: false
+ msg: unsupport join type RightJoin
diff --git a/cases/query/last_join_subquery_window.yml b/cases/query/last_join_subquery_window.yml
new file mode 100644
index 00000000000..81787f87e67
--- /dev/null
+++ b/cases/query/last_join_subquery_window.yml
@@ -0,0 +1,406 @@
+cases:
+ # ===================================================================
+ # LAST JOIN (WINDOW)
+ # ===================================================================
+ - id: 0
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",2,1590738989000]
+ - ["bb",3,1590738990000]
+ - ["cc",4,1590738991000]
+ - name: t2
+ columns: ["c1 string", "c2 int", "c4 timestamp"]
+ indexs: ["index1:c1:c4", "index2:c2:c4"]
+ rows:
+ - ["aa",1, 1590738989000]
+ - ["bb",3, 1590738990000]
+ - ["dd",4, 1590738991000]
+ sql: |
+ select t1.c1, tx.c1 as c1r, tx.c2 as c2r, agg
+ from t1 last join (
+ select c1, c2, count(c4) over w as agg
+ from t2
+ window w as (
+ partition by c1 order by c4
+ rows between 1 preceding and current row
+ )
+ ) tx
+ on t1.c2 = tx.c2
+ request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1r, tx.c2 -> c2r, agg))
+ REQUEST_JOIN(type=LastJoin, condition=, left_keys=(), right_keys=(), index_keys=(t1.c2))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(EXCLUDE_REQUEST_ROW, partition_keys=(), orders=(ASC), rows=(c4, 1 PRECEDING, 0 CURRENT), index_keys=(c1))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ cluster_request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1r, tx.c2 -> c2r, agg))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ DATA_PROVIDER(request=t1)
+ REQUEST_JOIN(OUTPUT_RIGHT_ONLY, type=LastJoin, condition=, left_keys=(), right_keys=(), index_keys=(#5))
+ SIMPLE_PROJECT(sources=(#5 -> t1.c2))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(c1, c2, agg))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ SIMPLE_PROJECT(sources=(c1, c2))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(EXCLUDE_REQUEST_ROW, partition_keys=(), orders=(ASC), rows=(c4, 1 PRECEDING, 0 CURRENT), index_keys=(c1))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ expect:
+ columns: ["c1 string", "c1r string", "c2r int", "agg int64"]
+ order: c1
+ data: |
+ aa, NULL, NULL, NULL
+ bb, bb, 3, 1
+ cc, dd, 4, 1
+ - id: 1
+ desc: last join window(attributes)
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",2,2000]
+ - ["bb",3,2000]
+ - ["cc",4,2000]
+ - name: t2
+ columns: ["c1 string", "c2 int", "c4 timestamp", "val int"]
+ indexs: ["index1:c1:c4", "index2:c2:c4"]
+ rows:
+ - ["aa",1, 1000, 1]
+ - ["aa",4, 2000, 2]
+ - ["bb",3, 3000, 3]
+ - ["dd",4, 8000, 4]
+ - ["dd",4, 7000, 5]
+ - ["dd",4, 9000, 6]
+ sql: |
+ select t1.c1, tx.c1 as c1r, tx.c2 as c2r, agg1, agg2
+ from t1 last join (
+ select c1, c2, c4,
+ count(c4) over w as agg1,
+ max(val) over w as agg2
+ from t2
+ window w as (
+ partition by c1 order by c4
+ rows between 2 preceding and current row
+ exclude current_row
+ )
+ ) tx
+ order by tx.c4
+ on t1.c2 = tx.c2
+ request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1r, tx.c2 -> c2r, agg1, agg2))
+ REQUEST_JOIN(type=LastJoin, right_sort=(ASC), condition=, left_keys=(), right_keys=(), index_keys=(t1.c2))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(EXCLUDE_REQUEST_ROW, EXCLUDE_CURRENT_ROW, partition_keys=(), orders=(ASC), rows=(c4, 2 PRECEDING, 0 CURRENT), index_keys=(c1))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ cluster_request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1r, tx.c2 -> c2r, agg1, agg2))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ DATA_PROVIDER(request=t1)
+ REQUEST_JOIN(OUTPUT_RIGHT_ONLY, type=LastJoin, right_sort=(ASC), condition=, left_keys=(), right_keys=(), index_keys=(#5))
+ SIMPLE_PROJECT(sources=(#5 -> t1.c2))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(c1, c2, c4, agg1, agg2))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ SIMPLE_PROJECT(sources=(c1, c2, c4))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(EXCLUDE_REQUEST_ROW, EXCLUDE_CURRENT_ROW, partition_keys=(), orders=(ASC), rows=(c4, 2 PRECEDING, 0 CURRENT), index_keys=(c1))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ expect:
+ columns: ["c1 string", "c1r string", "c2r int", "agg1 int64", 'agg2 int']
+ order: c1
+ data: |
+ aa, NULL, NULL, NULL, NULL
+ bb, bb, 3, 0, NULL
+ cc, dd, 4, 2, 5
+ - id: 2
+ # issue on join to (multiple windows), fix later
+ mode: batch-unsupport
+ desc: last join multiple windows
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",2,2000]
+ - ["bb",3,2000]
+ - ["cc",4,2000]
+ - name: t2
+ columns: ["c1 string", "c2 int", "c4 timestamp", "val int", "gp int"]
+ indexs: ["index1:c1:c4", "index2:c2:c4", "index3:gp:c4"]
+ rows:
+ - ["aa",1, 1000, 1, 0]
+ - ["aa",4, 2000, 2, 0]
+ - ["bb",3, 3000, 3, 1]
+ - ["dd",4, 8000, 4, 1]
+ - ["dd",4, 7000, 5, 1]
+ - ["dd",4, 9000, 6, 1]
+ sql: |
+ select t1.c1, tx.c1 as c1r, tx.c2 as c2r, agg1, agg2, agg3
+ from t1 last join (
+ select c1, c2, c4,
+ count(c4) over w1 as agg1,
+ max(val) over w1 as agg2,
+ min(val) over w2 as agg3
+ from t2
+ window w1 as (
+ partition by c1 order by c4
+ rows between 2 preceding and current row
+ exclude current_row
+ ),
+ w2 as (
+ partition by gp order by c4
+ rows_range between 3s preceding and current row
+ exclude current_time
+ )
+ ) tx
+ order by tx.c4
+ on t1.c2 = tx.c2
+ request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1r, tx.c2 -> c2r, agg1, agg2, agg3))
+ REQUEST_JOIN(type=LastJoin, right_sort=(ASC), condition=, left_keys=(), right_keys=(), index_keys=(t1.c2))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(c1, c2, c4, agg1, agg2, agg3))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(EXCLUDE_REQUEST_ROW, EXCLUDE_CURRENT_ROW, partition_keys=(), orders=(ASC), rows=(c4, 2 PRECEDING, 0 CURRENT), index_keys=(c1))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(EXCLUDE_REQUEST_ROW, EXCLUDE_CURRENT_TIME, partition_keys=(), orders=(ASC), range=(c4, 3000 PRECEDING, 0 CURRENT), index_keys=(gp))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(type=Partition, table=t2, index=index3)
+ cluster_request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1r, tx.c2 -> c2r, agg1, agg2, agg3))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ DATA_PROVIDER(request=t1)
+ REQUEST_JOIN(OUTPUT_RIGHT_ONLY, type=LastJoin, right_sort=(ASC), condition=, left_keys=(), right_keys=(), index_keys=(#5))
+ SIMPLE_PROJECT(sources=(#5 -> t1.c2))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(c1, c2, c4, agg1, agg2, agg3))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ SIMPLE_PROJECT(sources=(c1, c2, c4))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(EXCLUDE_REQUEST_ROW, EXCLUDE_CURRENT_ROW, partition_keys=(), orders=(ASC), rows=(c4, 2 PRECEDING, 0 CURRENT), index_keys=(c1))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(EXCLUDE_REQUEST_ROW, EXCLUDE_CURRENT_TIME, partition_keys=(), orders=(ASC), range=(c4, 3000 PRECEDING, 0 CURRENT), index_keys=(gp))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(type=Partition, table=t2, index=index3)
+ expect:
+ columns: ["c1 string", "c1r string", "c2r int", "agg1 int64", 'agg2 int', 'agg3 int']
+ order: c1
+ data: |
+ aa, NULL, NULL, NULL, NULL, NULL
+ bb, bb, 3, 0, NULL, NULL
+ cc, dd, 4, 2, 5, 4
+ - id: 3
+ desc: last join window union
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",2,2000]
+ - ["bb",3,2000]
+ - ["cc",4,2000]
+ - name: t2
+ columns: ["c1 string", "c2 int", "c4 timestamp", "val int"]
+ indexs: ["index1:c1:c4", "index2:c2:c4" ]
+ rows:
+ - ["aa",1, 1000, 1]
+ - ["aa",4, 2000, 2]
+ - ["bb",3, 3000, 3]
+ - ["dd",4, 8000, 4]
+ - ["dd",4, 9000, 6]
+ - name: t3
+ columns: ["c1 string", "c2 int", "c4 timestamp", "val int"]
+ indexs: ["index1:c1:c4", "index2:c2:c4"]
+ rows:
+ - ["aa", 2, 1000, 5]
+ - ["bb", 3, 2000, 8]
+ - ["dd", 4, 4000, 12]
+ - ["dd", 4, 7000, 10]
+ - ["dd", 4, 6000, 11]
+ - ["dd", 4, 10000, 100]
+ sql: |
+ select t1.c1, tx.c1 as c1r, tx.c2 as c2r, agg1, agg2
+ from t1 last join (
+ select c1, c2, c4,
+ count(c4) over w1 as agg1,
+ max(val) over w1 as agg2,
+ from t2
+ window w1 as (
+ union t3
+ partition by c1 order by c4
+ rows_range between 3s preceding and current row
+ instance_not_in_window exclude current_row
+ )
+ ) tx
+ order by tx.c4
+ on t1.c2 = tx.c2
+ request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1r, tx.c2 -> c2r, agg1, agg2))
+ REQUEST_JOIN(type=LastJoin, right_sort=(ASC), condition=, left_keys=(), right_keys=(), index_keys=(t1.c2))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(EXCLUDE_CURRENT_ROW, INSTANCE_NOT_IN_WINDOW, partition_keys=(c1), orders=(c4 ASC), range=(c4, 3000 PRECEDING, 0 CURRENT), index_keys=)
+ +-UNION(partition_keys=(), orders=(ASC), range=(c4, 3000 PRECEDING, 0 CURRENT), index_keys=(c1))
+ RENAME(name=t2)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(table=t2)
+ cluster_request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1r, tx.c2 -> c2r, agg1, agg2))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ DATA_PROVIDER(request=t1)
+ REQUEST_JOIN(OUTPUT_RIGHT_ONLY, type=LastJoin, right_sort=(ASC), condition=, left_keys=(), right_keys=(), index_keys=(#5))
+ SIMPLE_PROJECT(sources=(#5 -> t1.c2))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(c1, c2, c4, agg1, agg2))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ SIMPLE_PROJECT(sources=(c1, c2, c4))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(EXCLUDE_CURRENT_ROW, INSTANCE_NOT_IN_WINDOW, partition_keys=(c1), orders=(c4 ASC), range=(c4, 3000 PRECEDING, 0 CURRENT), index_keys=)
+ +-UNION(partition_keys=(), orders=(ASC), range=(c4, 3000 PRECEDING, 0 CURRENT), index_keys=(c1))
+ RENAME(name=t2)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(table=t2)
+ expect:
+ columns: ["c1 string", "c1r string", "c2r int", "agg1 int64", 'agg2 int']
+ order: c1
+ data: |
+ aa, NULL, NULL, NULL, NULL
+ bb, bb, 3, 1, 8
+ cc, dd, 4, 2, 11
+ - id: 4
+ desc: last join mulitple window union
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",2,2000]
+ - ["bb",3,2000]
+ - ["cc",4,2000]
+ - name: t2
+ columns: ["c1 string", "c2 int", "c4 timestamp", "val int"]
+ indexs: ["index1:c1:c4", "index2:c2:c4" ]
+ rows:
+ - ["aa",1, 1000, 1]
+ - ["aa",4, 2000, 2]
+ - ["bb",3, 3000, 3]
+ - ["dd",4, 8000, 4]
+ - ["dd",4, 9000, 6]
+ - name: t3
+ columns: ["c1 string", "c2 int", "c4 timestamp", "val int"]
+ indexs: ["index1:c1:c4", "index2:c2:c4"]
+ rows:
+ - ["aa", 2, 1000, 5]
+ - ["bb", 3, 2000, 8]
+ - ["dd", 4, 4000, 12]
+ - ["dd", 4, 7000, 10]
+ - ["dd", 4, 6000, 11]
+ - ["dd", 4, 10000, 100]
+ sql: |
+ select t1.c1, tx.c1 as c1r, tx.c2 as c2r, agg1, agg2, agg3
+ from t1 last join (
+ select c1, c2, c4,
+ count(c4) over w1 as agg1,
+ max(val) over w1 as agg2,
+ min(val) over w2 as agg3
+ from t2
+ window w1 as (
+ union t3
+ partition by c1 order by c4
+ rows_range between 3s preceding and current row
+ instance_not_in_window exclude current_row
+ ),
+ w2 as (
+ union t3
+ partition by c1 order by c4
+ rows between 2 preceding and current row
+ instance_not_in_window
+ )
+ ) tx
+ order by tx.c4
+ on t1.c2 = tx.c2
+ request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1r, tx.c2 -> c2r, agg1, agg2, agg3))
+ REQUEST_JOIN(type=LastJoin, right_sort=(ASC), condition=, left_keys=(), right_keys=(), index_keys=(t1.c2))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(c1, c2, c4, agg1, agg2, agg3))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(EXCLUDE_CURRENT_ROW, INSTANCE_NOT_IN_WINDOW, partition_keys=(c1), orders=(c4 ASC), range=(c4, 3000 PRECEDING, 0 CURRENT), index_keys=)
+ +-UNION(partition_keys=(), orders=(ASC), range=(c4, 3000 PRECEDING, 0 CURRENT), index_keys=(c1))
+ RENAME(name=t2)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(table=t2)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(INSTANCE_NOT_IN_WINDOW, partition_keys=(c1), orders=(c4 ASC), rows=(c4, 2 PRECEDING, 0 CURRENT), index_keys=)
+ +-UNION(partition_keys=(), orders=(ASC), rows=(c4, 2 PRECEDING, 0 CURRENT), index_keys=(c1))
+ RENAME(name=t2)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(table=t2)
+ cluster_request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1r, tx.c2 -> c2r, agg1, agg2, agg3))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ DATA_PROVIDER(request=t1)
+ REQUEST_JOIN(OUTPUT_RIGHT_ONLY, type=LastJoin, right_sort=(ASC), condition=, left_keys=(), right_keys=(), index_keys=(#5))
+ SIMPLE_PROJECT(sources=(#5 -> t1.c2))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(c1, c2, c4, agg1, agg2, agg3))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ SIMPLE_PROJECT(sources=(c1, c2, c4))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(EXCLUDE_CURRENT_ROW, INSTANCE_NOT_IN_WINDOW, partition_keys=(c1), orders=(c4 ASC), range=(c4, 3000 PRECEDING, 0 CURRENT), index_keys=)
+ +-UNION(partition_keys=(), orders=(ASC), range=(c4, 3000 PRECEDING, 0 CURRENT), index_keys=(c1))
+ RENAME(name=t2)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(table=t2)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(INSTANCE_NOT_IN_WINDOW, partition_keys=(c1), orders=(c4 ASC), rows=(c4, 2 PRECEDING, 0 CURRENT), index_keys=)
+ +-UNION(partition_keys=(), orders=(ASC), rows=(c4, 2 PRECEDING, 0 CURRENT), index_keys=(c1))
+ RENAME(name=t2)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(table=t2)
+ expect:
+ columns: ["c1 string", "c1r string", "c2r int", "agg1 int64", 'agg2 int', "agg3 int"]
+ order: c1
+ data: |
+ aa, NULL, NULL, NULL, NULL, NULL
+ bb, bb, 3, 1, 8, 3
+ cc, dd, 4, 2, 11, 6
diff --git a/cases/query/left_join.yml b/cases/query/left_join.yml
new file mode 100644
index 00000000000..87e1c387ea6
--- /dev/null
+++ b/cases/query/left_join.yml
@@ -0,0 +1,575 @@
+cases:
+ - id: 0
+ desc: last join to a left join subquery
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",20,1000]
+ - ["bb",30,1000]
+ - ["cc",40,1000]
+ - ["dd",50,1000]
+ - name: t2
+ columns: ["c1 string","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",2000]
+ - ["bb",2000]
+ - ["cc",3000]
+ - name: t3
+ columns: ["c1 string","c2 int","c3 bigint","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",19,13,3000]
+ - ["aa",21,13,3000]
+ - ["bb",34,131,3000]
+ - ["bb",21,131,3000]
+ sql: |
+ select
+ t1.c1,
+ tx.c1 as c1l,
+ tx.c1r,
+ tx.c2r
+ from t1 last join
+ (
+ select t2.c1 as c1,
+ t3.c1 as c1r,
+ t3.c2 as c2r
+ from t2 left join t3
+ on t2.c1 = t3.c1
+ ) tx
+ on t1.c1 = tx.c1 and t1.c2 > tx.c2r
+ batch_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1l, tx.c1r, tx.c2r))
+ JOIN(type=LastJoin, condition=t1.c2 > tx.c2r, left_keys=(), right_keys=(), index_keys=(t1.c1))
+ DATA_PROVIDER(table=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(t2.c1, t3.c1 -> c1r, t3.c2 -> c2r))
+ JOIN(type=LeftJoin, condition=, left_keys=(), right_keys=(), index_keys=(t2.c1))
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1l, tx.c1r, tx.c2r))
+ REQUEST_JOIN(type=LastJoin, condition=t1.c2 > tx.c2r, left_keys=(), right_keys=(), index_keys=(t1.c1))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(t2.c1, t3.c1 -> c1r, t3.c2 -> c2r))
+ REQUEST_JOIN(type=LeftJoin, condition=, left_keys=(), right_keys=(), index_keys=(t2.c1))
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ expect:
+ order: c1
+ columns: ["c1 string", "c1l string", "c1r string", "c2r int"]
+ data: |
+ aa, aa, aa, 19
+ bb, bb, bb, 21
+ cc, NULL, NULL, NULL
+ dd, NULL, NULL, NULL
+ - id: 1
+ desc: last join to a left join subquery, request unsupport if left join not optimized
+ mode: request-unsupport
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",20,1000]
+ - ["bb",30,1000]
+ - ["cc",40,1000]
+ - ["dd",50,1000]
+ - name: t2
+ columns: ["c1 string","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",2000]
+ - ["bb",3000]
+ - ["cc",4000]
+ - name: t3
+ columns: ["c1 string","c2 int","c3 bigint","c4 timestamp"]
+ indexs: ["index1:c2:c4"]
+ rows:
+ - ["aa",19,13,3000]
+ - ["aa",21,13,4000]
+ - ["bb",34,131,3000]
+ - ["bb",21,131,4000]
+ sql: |
+ select
+ t1.c1,
+ tx.c1 as c1l,
+ tx.c1r,
+ tx.c2r
+ from t1 last join
+ (
+ select t2.c1 as c1,
+ t3.c1 as c1r,
+ t3.c2 as c2r
+ from t2 left join t3
+ on t2.c1 = t3.c1
+ ) tx
+ on t1.c1 = tx.c1 and t1.c2 > tx.c2r
+ batch_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1l, tx.c1r, tx.c2r))
+ JOIN(type=LastJoin, condition=t1.c2 > tx.c2r, left_keys=(), right_keys=(), index_keys=(t1.c1))
+ DATA_PROVIDER(table=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(t2.c1, t3.c1 -> c1r, t3.c2 -> c2r))
+ JOIN(type=LeftJoin, condition=, left_keys=(t2.c1), right_keys=(t3.c1), index_keys=)
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ DATA_PROVIDER(table=t3)
+ expect:
+ order: c1
+ columns: ["c1 string", "c1l string", "c1r string", "c2r int"]
+ data: |
+ aa, aa, aa, 19
+ bb, bb, bb, 21
+ cc, NULL, NULL, NULL
+ dd, NULL, NULL, NULL
+ - id: 2
+ desc: last join to a left join subquery, index optimized with additional condition
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",20,1000]
+ - ["bb",30,1000]
+ - ["cc",40,1000]
+ - ["dd",50,1000]
+ - name: t2
+ columns: ["c1 string", "c2 int", "c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa", 42, 2000]
+ - ["bb", 68, 3000]
+ - ["cc", 42, 4000]
+ - name: t3
+ columns: ["c1 string","c2 int","c3 bigint","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",19,13,3000]
+ - ["aa",21,13,4000]
+ - ["bb",34,131,3000]
+ - ["bb",21,131,4000]
+ sql: |
+ select
+ t1.c1,
+ tx.c1 as c1l,
+ tx.c1r,
+ tx.c2r
+ from t1 last join
+ (
+ select t2.c1 as c1,
+ t3.c1 as c1r,
+ t3.c2 as c2r
+ from t2 left join t3
+ on t2.c1 = t3.c1 and t2.c2 = 2 * t3.c2
+ ) tx
+ on t1.c1 = tx.c1
+ request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1l, tx.c1r, tx.c2r))
+ REQUEST_JOIN(type=LastJoin, condition=, left_keys=(), right_keys=(), index_keys=(t1.c1))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(t2.c1, t3.c1 -> c1r, t3.c2 -> c2r))
+ REQUEST_JOIN(type=LeftJoin, condition=, left_keys=(t2.c2), right_keys=(2 * t3.c2), index_keys=(t2.c1))
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ cluster_request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1l, tx.c1r, tx.c2r))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ DATA_PROVIDER(request=t1)
+ REQUEST_JOIN(OUTPUT_RIGHT_ONLY, type=LastJoin, condition=, left_keys=(), right_keys=(), index_keys=(#4))
+ SIMPLE_PROJECT(sources=(#4 -> t1.c1))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(t2.c1, t3.c1 -> c1r, t3.c2 -> c2r))
+ REQUEST_JOIN(type=LeftJoin, condition=, left_keys=(t2.c2), right_keys=(2 * t3.c2), index_keys=(t2.c1))
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ expect:
+ order: c1
+ columns: ["c1 string", "c1l string", "c1r string", "c2r int"]
+ data: |
+ aa, aa, aa, 21
+ bb, bb, bb, 34
+ cc, cc, NULL, NULL
+ dd, NULL, NULL, NULL
+ - id: 3
+ desc: last join to a left join subquery 2, index optimized with additional condition
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",20,1000]
+ - ["bb",30,1000]
+ - ["cc",40,1000]
+ - ["dd",50,1000]
+ - name: t2
+ columns: ["c1 string", "c2 int", "c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa", 20, 2000]
+ - ["bb", 10, 3000]
+ - ["cc", 42, 4000]
+ - name: t3
+ columns: ["c1 string","c2 int","c3 bigint","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",19,13,3000]
+ - ["aa",21,13,4000]
+ - ["bb",34,131,3000]
+ - ["bb",21,131,4000]
+ sql: |
+ select
+ t1.c1,
+ tx.c1 as c1l,
+ tx.c1r,
+ tx.c2r
+ from t1 last join
+ (
+ select t2.c1 as c1,
+ t3.c1 as c1r,
+ t3.c2 as c2r
+ from t2 left join t3
+ on t2.c1 = t3.c1 and t2.c2 > t3.c2
+ ) tx
+ on t1.c1 = tx.c1
+ request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1l, tx.c1r, tx.c2r))
+ REQUEST_JOIN(type=LastJoin, condition=, left_keys=(), right_keys=(), index_keys=(t1.c1))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(t2.c1, t3.c1 -> c1r, t3.c2 -> c2r))
+ REQUEST_JOIN(type=LeftJoin, condition=t2.c2 > t3.c2, left_keys=(), right_keys=(), index_keys=(t2.c1))
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ cluster_request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1l, tx.c1r, tx.c2r))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ DATA_PROVIDER(request=t1)
+ REQUEST_JOIN(OUTPUT_RIGHT_ONLY, type=LastJoin, condition=, left_keys=(), right_keys=(), index_keys=(#4))
+ SIMPLE_PROJECT(sources=(#4 -> t1.c1))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(t2.c1, t3.c1 -> c1r, t3.c2 -> c2r))
+ REQUEST_JOIN(type=LeftJoin, condition=t2.c2 > t3.c2, left_keys=(), right_keys=(), index_keys=(t2.c1))
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ expect:
+ order: c1
+ columns: ["c1 string", "c1l string", "c1r string", "c2r int"]
+ data: |
+ aa, aa, aa, 19
+ bb, bb, NULL, NULL
+ cc, cc, NULL, NULL
+ dd, NULL, NULL, NULL
+ - id: 4
+ desc: last join to two left join
+ # there is no restriction for multiple left joins, including request mode,
+ # but it may not high performance like multiple last joins
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",20,1000]
+ - ["bb",30,1000]
+ - ["cc",40,1000]
+ - ["dd",50,1000]
+ - name: t2
+ columns: ["c1 string", "c2 int", "c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa", 20, 2000]
+ - ["bb", 10, 3000]
+ - ["cc", 42, 4000]
+ - name: t3
+ columns: ["c1 string","c2 int","c3 bigint","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",19,13,3000]
+ - ["aa",21,8, 4000]
+ - ["bb",34,131,3000]
+ - ["bb",21,131,4000]
+ - ["cc",27,100,5000]
+ - name: t4
+ columns: ["c1 string","c2 int","c3 bigint","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",19,14,3000]
+ - ["aa",21,13,4000]
+ - ["bb",34,1,3000]
+ - ["bb",21,132,4000]
+ sql: |
+ select
+ t1.c1,
+ tx.c1 as c1l,
+ tx.c1r,
+ tx.c2r,
+ tx.c3x
+ from t1 last join
+ (
+ select t2.c1 as c1,
+ t3.c1 as c1r,
+ t3.c2 as c2r,
+ t4.c3 as c3x
+ from t2 left outer join t3
+ on t2.c1 = t3.c1 and t2.c2 > t3.c2
+ left join t4
+ on t2.c1 = t4.c1 and t3.c3 < t4.c3
+ ) tx
+ on t1.c1 = tx.c1
+ request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1l, tx.c1r, tx.c2r, tx.c3x))
+ REQUEST_JOIN(type=LastJoin, condition=, left_keys=(), right_keys=(), index_keys=(t1.c1))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(t2.c1, t3.c1 -> c1r, t3.c2 -> c2r, t4.c3 -> c3x))
+ REQUEST_JOIN(type=LeftJoin, condition=t3.c3 < t4.c3, left_keys=(), right_keys=(), index_keys=(t2.c1))
+ REQUEST_JOIN(type=LeftJoin, condition=t2.c2 > t3.c2, left_keys=(), right_keys=(), index_keys=(t2.c1))
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ DATA_PROVIDER(type=Partition, table=t4, index=index1)
+ cluster_request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1l, tx.c1r, tx.c2r, tx.c3x))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ DATA_PROVIDER(request=t1)
+ REQUEST_JOIN(OUTPUT_RIGHT_ONLY, type=LastJoin, condition=, left_keys=(), right_keys=(), index_keys=(#4))
+ SIMPLE_PROJECT(sources=(#4 -> t1.c1))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(t2.c1, t3.c1 -> c1r, t3.c2 -> c2r, t4.c3 -> c3x))
+ REQUEST_JOIN(type=LeftJoin, condition=t3.c3 < t4.c3, left_keys=(), right_keys=(), index_keys=(t2.c1))
+ REQUEST_JOIN(type=LeftJoin, condition=t2.c2 > t3.c2, left_keys=(), right_keys=(), index_keys=(t2.c1))
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ DATA_PROVIDER(type=Partition, table=t4, index=index1)
+ expect:
+ order: c1
+ columns: ["c1 string", "c1l string", "c1r string", "c2r int", "c3x bigint"]
+ data: |
+ aa, aa, aa, 19, 14
+ bb, bb, NULL, NULL, NULL
+ cc, cc, cc, 27, NULL
+ dd, NULL, NULL, NULL, NULL
+ - id: 5
+ desc: simple left join
+ mode: request-unsupport
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",20,1000]
+ - ["bb",30,1000]
+ - name: t2
+ columns: ["c2 int","c4 timestamp"]
+ indexs: ["index1:c2:c4"]
+ rows:
+ - [20,3000]
+ - [20,2000]
+ sql: |
+ select t1.c1 as id, t2.* from t1 left join t2
+ on t1.c2 = t2.c2
+ expect:
+ order: c1
+ columns: ["id string", "c2 int","c4 timestamp"]
+ data: |
+ aa, 20, 3000
+ aa, 20, 2000
+ bb, NULL, NULL
+ - id: 6
+ desc: lastjoin(leftjoin(filter, table))
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",20,1000]
+ - ["bb",30,1000]
+ - ["cc",40,1000]
+ - ["dd",50,1000]
+ - name: t2
+ columns: ["c1 string", "c2 int", "c4 timestamp"]
+ indexs: ["index1:c1:c4", "index2:c2:c4"]
+ rows:
+ - ["bb",20, 1000]
+ - ["aa",30, 2000]
+ - ["bb",30, 3000]
+ - ["cc",40, 4000]
+ - ["dd",50, 5000]
+ - name: t3
+ columns: ["c1 string","c2 int","c3 bigint","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",19,13,3000]
+ - ["bb",34,131,3000]
+ sql: |
+ select
+ t1.c1,
+ t1.c2,
+ tx.*
+ from t1 last join
+ (
+ select t2.c1 as tx_0_c1,
+ t2.c2 as tx_0_c2,
+ t2.c4 as tx_0_c4,
+ t3.c2 as tx_1_c2,
+ t3.c3 as tx_1_c3
+ from (select * from t2 where c1 != 'dd') t2 left join t3
+ on t2.c1 = t3.c1
+ ) tx
+ order by tx.tx_0_c4
+ on t1.c2 = tx.tx_0_c2
+ request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, t1.c2, tx.tx_0_c1, tx.tx_0_c2, tx.tx_0_c4, tx.tx_1_c2, tx.tx_1_c3))
+ REQUEST_JOIN(type=LastJoin, right_sort=(ASC), condition=, left_keys=(), right_keys=(), index_keys=(t1.c2))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(t2.c1 -> tx_0_c1, t2.c2 -> tx_0_c2, t2.c4 -> tx_0_c4, t3.c2 -> tx_1_c2, t3.c3 -> tx_1_c3))
+ REQUEST_JOIN(type=LeftJoin, condition=, left_keys=(), right_keys=(), index_keys=(t2.c1))
+ RENAME(name=t2)
+ FILTER_BY(condition=c1 != dd, left_keys=, right_keys=, index_keys=)
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ expect:
+ order: c1
+ columns: ["c1 string", "c2 int", "tx_0_c1 string", "tx_0_c2 int", "tx_0_c4 timestamp", "tx_1_c2 int", "tx_1_c3 int64"]
+ data: |
+ aa, 20, bb, 20, 1000, 34, 131
+ bb, 30, bb, 30, 3000, 34, 131
+ cc, 40, cc, 40, 4000, NULL, NULL
+ dd, 50, NULL, NULL, NULL, NULL, NULL
+ - id: 7
+ desc: lastjoin(leftjoin(filter, filter))
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",20,1000]
+ - ["bb",30,1000]
+ - ["cc",40,1000]
+ - ["dd",50,1000]
+ - name: t2
+ columns: ["c1 string", "c2 int", "c4 timestamp"]
+ indexs: ["index1:c1:c4", "index2:c2:c4"]
+ rows:
+ - ["bb",20, 1000]
+ - ["aa",30, 2000]
+ - ["bb",30, 3000]
+ - ["cc",40, 4000]
+ - ["dd",50, 5000]
+ - name: t3
+ columns: ["c1 string","c2 int","c3 bigint","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",19,13,3000]
+ - ["bb",34,131,3000]
+ cluster_request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, t1.c2, tx.tx_0_c1, tx.tx_0_c2, tx.tx_0_c4, tx.tx_1_c2, tx.tx_1_c3))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ DATA_PROVIDER(request=t1)
+ REQUEST_JOIN(OUTPUT_RIGHT_ONLY, type=LastJoin, right_sort=(ASC), condition=, left_keys=(#5), right_keys=(#8), index_keys=)
+ SIMPLE_PROJECT(sources=(#5 -> t1.c2))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(t2.c1 -> tx_0_c1, t2.c2 -> tx_0_c2, t2.c4 -> tx_0_c4, t3.c2 -> tx_1_c2, t3.c3 -> tx_1_c3))
+ REQUEST_JOIN(type=LeftJoin, condition=, left_keys=(), right_keys=(), index_keys=(t2.c1))
+ RENAME(name=t2)
+ FILTER_BY(condition=, left_keys=(), right_keys=(), index_keys=(30))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ RENAME(name=t3)
+ FILTER_BY(condition=c2 > 20, left_keys=, right_keys=, index_keys=)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ sql: |
+ select
+ t1.c1,
+ t1.c2,
+ tx.*
+ from t1 last join
+ (
+ select t2.c1 as tx_0_c1,
+ t2.c2 as tx_0_c2,
+ t2.c4 as tx_0_c4,
+ t3.c2 as tx_1_c2,
+ t3.c3 as tx_1_c3
+ from (select * from t2 where c2 = 30) t2 left join (select * from t3 where c2 > 20) t3
+ on t2.c1 = t3.c1
+ ) tx
+ order by tx.tx_0_c4
+ on t1.c2 = tx.tx_0_c2
+ request_plan: |
+ expect:
+ order: c1
+ columns: ["c1 string", "c2 int", "tx_0_c1 string", "tx_0_c2 int", "tx_0_c4 timestamp", "tx_1_c2 int", "tx_1_c3 int64"]
+ data: |
+ aa, 20, NULL, NULL, NULL, NULL, NULL
+ bb, 30, bb, 30, 3000, 34, 131
+ cc, 40, NULL, NULL, NULL, NULL, NULL
+ dd, 50, NULL, NULL, NULL, NULL, NULL
+ - id: 8
+ desc: lastjoin(leftjoin(filter, filter))
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",20,1000]
+ - ["bb",30,1000]
+ - ["cc",40,1000]
+ - name: t2
+ columns: ["c1 string", "c2 int", "c4 timestamp"]
+ indexs: ["index1:c1:c4", "index2:c2:c4"]
+ rows:
+ - ["bb",20, 1000]
+ - ["aa",20, 2000]
+ - ["bb",30, 3000]
+ - ["cc",40, 4000]
+ - name: t3
+ columns: ["c1 string","c2 int","c3 bigint","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",19,13,3000]
+ - ["bb",34,131,3000]
+ sql: |
+ select
+ t1.c1,
+ t1.c2,
+ tx.*
+ from t1 last join
+ (
+ select t2.c1 as tx_0_c1,
+ t2.c2 as tx_0_c2,
+ t2.c4 as tx_0_c4,
+ t3.c2 as tx_1_c2,
+ t3.c3 as tx_1_c3
+ from (select * from t2 where c2 = 20) t2 left join (select * from t3 where c1 = 'bb') t3
+ on t2.c1 = t3.c1
+ ) tx
+ on t1.c2 = tx.tx_0_c2 and not isnull(tx.tx_1_c2)
+ cluster_request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, t1.c2, tx.tx_0_c1, tx.tx_0_c2, tx.tx_0_c4, tx.tx_1_c2, tx.tx_1_c3))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ DATA_PROVIDER(request=t1)
+ REQUEST_JOIN(OUTPUT_RIGHT_ONLY, type=LastJoin, condition=NOT isnull(#89), left_keys=(#5), right_keys=(#8), index_keys=)
+ SIMPLE_PROJECT(sources=(#5 -> t1.c2))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(t2.c1 -> tx_0_c1, t2.c2 -> tx_0_c2, t2.c4 -> tx_0_c4, t3.c2 -> tx_1_c2, t3.c3 -> tx_1_c3))
+ REQUEST_JOIN(type=LeftJoin, condition=, left_keys=(t2.c1), right_keys=(t3.c1), index_keys=)
+ RENAME(name=t2)
+ FILTER_BY(condition=, left_keys=(), right_keys=(), index_keys=(20))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ RENAME(name=t3)
+ FILTER_BY(condition=, left_keys=(), right_keys=(), index_keys=(bb))
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ expect:
+ order: c1
+ columns: ["c1 string", "c2 int", "tx_0_c1 string", "tx_0_c2 int", "tx_0_c4 timestamp", "tx_1_c2 int", "tx_1_c3 int64"]
+ data: |
+ aa, 20, bb, 20, 1000, 34, 131
+ bb, 30, NULL, NULL, NULL, NULL, NULL
+ cc, 40, NULL, NULL, NULL, NULL, NULL
diff --git a/cmake/rapidjson.cmake b/cmake/rapidjson.cmake
new file mode 100644
index 00000000000..6b1ecd2a6dd
--- /dev/null
+++ b/cmake/rapidjson.cmake
@@ -0,0 +1,9 @@
+FetchContent_Declare(
+ rapidjson
+ URL https://github.com/Tencent/rapidjson/archive/refs/tags/v1.1.0.zip
+ URL_HASH MD5=ceb1cf16e693a3170c173dc040a9d2bd
+ EXCLUDE_FROM_ALL # don't build this project as part of the overall build
+)
+# don't build this project, just populate
+FetchContent_Populate(rapidjson)
+include_directories(${rapidjson_SOURCE_DIR}/include)
diff --git a/docs/en/deploy/conf.md b/docs/en/deploy/conf.md
index 11667427247..138a414fa3d 100644
--- a/docs/en/deploy/conf.md
+++ b/docs/en/deploy/conf.md
@@ -9,6 +9,8 @@
# If you are deploying the standalone version, you do not need to configure zk_cluster and zk_root_path, just comment these two configurations. Deploying the cluster version needs to configure these two items, and the two configurations of all nodes in a cluster must be consistent
#--zk_cluster=127.0.0.1:7181
#--zk_root_path=/openmldb_cluster
+# set the username and password of zookeeper if authentication is enabled
+#--zk_cert=user:passwd
# The address of the tablet needs to be specified in the standalone version, and this configuration can be ignored in the cluster version
--tablet=127.0.0.1:9921
# Configure log directory
@@ -76,6 +78,8 @@
# If you start the cluster version, you need to specify the address of zk and the node path of the cluster in zk
#--zk_cluster=127.0.0.1:7181
#--zk_root_path=/openmldb_cluster
+# set the username and password of zookeeper if authentication is enabled
+#--zk_cert=user:passwd
# Configure the thread pool size, it is recommended to be consistent with the number of CPU cores
--thread_pool_size=24
@@ -218,6 +222,8 @@
# If the deployed openmldb is a cluster version, you need to specify the zk address and the cluster zk node directory
#--zk_cluster=127.0.0.1:7181
#--zk_root_path=/openmldb_cluster
+# set the username and password of zookeeper if authentication is enabled
+#--zk_cert=user:passwd
# configure log path
--openmldb_log_dir=./logs
@@ -249,6 +255,7 @@ zookeeper.connection_timeout=5000
zookeeper.max_retries=10
zookeeper.base_sleep_time=1000
zookeeper.max_connect_waitTime=30000
+#zookeeper.cert=user:passwd
# Spark Config
spark.home=
diff --git a/docs/en/reference/sql/ddl/CREATE_TABLE_STATEMENT.md b/docs/en/reference/sql/ddl/CREATE_TABLE_STATEMENT.md
index a0d11d90657..ba62cf55231 100644
--- a/docs/en/reference/sql/ddl/CREATE_TABLE_STATEMENT.md
+++ b/docs/en/reference/sql/ddl/CREATE_TABLE_STATEMENT.md
@@ -473,6 +473,11 @@ StorageMode
::= 'Memory'
| 'HDD'
| 'SSD'
+CompressTypeOption
+ ::= 'COMPRESS_TYPE' '=' CompressType
+CompressType
+ ::= 'NoCompress'
+ | 'Snappy
```
@@ -484,6 +489,7 @@ StorageMode
| `REPLICANUM` | It defines the number of replicas for the table. Note that the number of replicas is only configurable in Cluster version. | `OPTIONS (REPLICANUM=3)` |
| `DISTRIBUTION` | It defines the distributed node endpoint configuration. Generally, it contains a Leader node and several followers. `(leader, [follower1, follower2, ..])`. Without explicit configuration, OpenMLDB will automatically configure `DISTRIBUTION` according to the environment and nodes. | `DISTRIBUTION = [ ('127.0.0.1:6527', [ '127.0.0.1:6528','127.0.0.1:6529' ])]` |
| `STORAGE_MODE` | It defines the storage mode of the table. The supported modes are `Memory`, `HDD` and `SSD`. When not explicitly configured, it defaults to `Memory`.
If you need to support a storage mode other than `Memory` mode, `tablet` requires additional configuration options. For details, please refer to [tablet configuration file **conf/tablet.flags**](../../../deploy/conf.md#the-configuration-file-for-apiserver:-conf/tablet.flags). | `OPTIONS (STORAGE_MODE='HDD')` |
+| `COMPRESS_TYPE` | It defines the compress types of the table. The supported compress type are `NoCompress` and `Snappy`. The default value is `NoCompress` | `OPTIONS (COMPRESS_TYPE='Snappy')`
#### The Difference between Disk Table and Memory Table
@@ -515,11 +521,11 @@ DESC t1;
--- -------------------- ------ ---------- ------ ---------------
1 INDEX_0_1651143735 col1 std_time 0min kAbsoluteTime
--- -------------------- ------ ---------- ------ ---------------
- --------------
- storage_mode
- --------------
- HDD
- --------------
+ --------------- --------------
+ compress_type storage_mode
+ --------------- --------------
+ NoCompress HDD
+ --------------- --------------
```
The following sql command create a table with specified distribution.
```sql
diff --git a/docs/en/reference/sql/ddl/DESC_STATEMENT.md b/docs/en/reference/sql/ddl/DESC_STATEMENT.md
index 8179c952c56..a7d288064bb 100644
--- a/docs/en/reference/sql/ddl/DESC_STATEMENT.md
+++ b/docs/en/reference/sql/ddl/DESC_STATEMENT.md
@@ -56,11 +56,11 @@ desc t1;
--- -------------------- ------ ---------- ---------- ---------------
1 INDEX_0_1658136511 col1 std_time 43200min kAbsoluteTime
--- -------------------- ------ ---------- ---------- ---------------
- --------------
- storage_mode
- --------------
- Memory
- --------------
+ --------------- --------------
+ compress_type storage_mode
+ --------------- --------------
+ NoCompress Memory
+ --------------- --------------
```
diff --git a/docs/en/reference/sql/ddl/SHOW_CREATE_TABLE_STATEMENT.md b/docs/en/reference/sql/ddl/SHOW_CREATE_TABLE_STATEMENT.md
index dd411410e65..967ebce316a 100644
--- a/docs/en/reference/sql/ddl/SHOW_CREATE_TABLE_STATEMENT.md
+++ b/docs/en/reference/sql/ddl/SHOW_CREATE_TABLE_STATEMENT.md
@@ -21,7 +21,7 @@ show create table t1;
`c3` bigInt,
`c4` timestamp,
INDEX (KEY=`c1`, TS=`c4`, TTL_TYPE=ABSOLUTE, TTL=0m)
- ) OPTIONS (PARTITIONNUM=8, REPLICANUM=2, STORAGE_MODE='HDD');
+ ) OPTIONS (PARTITIONNUM=8, REPLICANUM=2, STORAGE_MODE='HDD', COMPRESS_TYPE='NoCompress');
------- ---------------------------------------------------------------
1 rows in set
diff --git a/docs/zh/deploy/conf.md b/docs/zh/deploy/conf.md
index ef05f0c8dc9..de538720e5d 100644
--- a/docs/zh/deploy/conf.md
+++ b/docs/zh/deploy/conf.md
@@ -9,6 +9,8 @@
# 如果是部署单机版不需要配置zk_cluster和zk_root_path,把这俩配置注释即可. 部署集群版需要配置这两项,一个集群中所有节点的这两个配置必须保持一致
#--zk_cluster=127.0.0.1:7181
#--zk_root_path=/openmldb_cluster
+# 配置zk认证的用户名和密码, 用冒号分割
+#--zk_cert=user:passwd
# 单机版需要指定tablet的地址, 集群版此配置可忽略
--tablet=127.0.0.1:9921
# 配置log目录
@@ -76,6 +78,8 @@
# 如果启动集群版需要指定zk的地址和集群在zk的节点路径
#--zk_cluster=127.0.0.1:7181
#--zk_root_path=/openmldb_cluster
+# 配置zk认证的用户名和密码, 用冒号分割
+#--zk_cert=user:passwd
# 配置线程池大小,建议和cpu核数一致
--thread_pool_size=24
@@ -222,6 +226,8 @@
# 如果部署的openmldb是集群版,需要指定zk地址和集群zk节点目录
#--zk_cluster=127.0.0.1:7181
#--zk_root_path=/openmldb_cluster
+# 配置zk认证的用户名和密码, 用冒号分割
+#--zk_cert=user:passwd
# 配置日志路径
--openmldb_log_dir=./logs
@@ -254,6 +260,7 @@ zookeeper.connection_timeout=5000
zookeeper.max_retries=10
zookeeper.base_sleep_time=1000
zookeeper.max_connect_waitTime=30000
+#zookeeper.cert=user:passwd
# Spark Config
spark.home=
diff --git a/docs/zh/openmldb_sql/ddl/CREATE_TABLE_STATEMENT.md b/docs/zh/openmldb_sql/ddl/CREATE_TABLE_STATEMENT.md
index 1dffc9d4cae..a44f699eed3 100644
--- a/docs/zh/openmldb_sql/ddl/CREATE_TABLE_STATEMENT.md
+++ b/docs/zh/openmldb_sql/ddl/CREATE_TABLE_STATEMENT.md
@@ -450,6 +450,11 @@ StorageMode
::= 'Memory'
| 'HDD'
| 'SSD'
+CompressTypeOption
+ ::= 'COMPRESS_TYPE' '=' CompressType
+CompressType
+ ::= 'NoCompress'
+ | 'Snappy'
```
@@ -460,6 +465,7 @@ StorageMode
| `REPLICANUM` | 配置表的副本数。请注意,副本数只有在集群版中才可以配置。 | `OPTIONS (REPLICANUM=3)` |
| `DISTRIBUTION` | 配置分布式的节点endpoint。一般包含一个Leader节点和若干Follower节点。`(leader, [follower1, follower2, ..])`。不显式配置时,OpenMLDB会自动根据环境和节点来配置`DISTRIBUTION`。 | `DISTRIBUTION = [ ('127.0.0.1:6527', [ '127.0.0.1:6528','127.0.0.1:6529' ])]` |
| `STORAGE_MODE` | 表的存储模式,支持的模式有`Memory`、`HDD`或`SSD`。不显式配置时,默认为`Memory`。
如果需要支持非`Memory`模式的存储模式,`tablet`需要额外的配置选项,具体可参考[tablet配置文件 conf/tablet.flags](../../../deploy/conf.md)。 | `OPTIONS (STORAGE_MODE='HDD')` |
+| `COMPRESS_TYPE` | 指定表的压缩类型。目前只支持Snappy压缩, 。默认为 `NoCompress` 即不压缩。 | `OPTIONS (COMPRESS_TYPE='Snappy')`
#### 磁盘表与内存表区别
- 磁盘表对应`STORAGE_MODE`的取值为`HDD`或`SSD`。内存表对应的`STORAGE_MODE`取值为`Memory`。
@@ -488,11 +494,11 @@ DESC t1;
--- -------------------- ------ ---------- ------ ---------------
1 INDEX_0_1651143735 col1 std_time 0min kAbsoluteTime
--- -------------------- ------ ---------- ------ ---------------
- --------------
- storage_mode
- --------------
- HDD
- --------------
+ --------------- --------------
+ compress_type storage_mode
+ --------------- --------------
+ NoCompress HDD
+ --------------- --------------
```
创建一张表,指定分片的分布状态
```sql
diff --git a/docs/zh/openmldb_sql/ddl/DESC_STATEMENT.md b/docs/zh/openmldb_sql/ddl/DESC_STATEMENT.md
index 1088411dc03..ca0d0de87bf 100644
--- a/docs/zh/openmldb_sql/ddl/DESC_STATEMENT.md
+++ b/docs/zh/openmldb_sql/ddl/DESC_STATEMENT.md
@@ -56,11 +56,11 @@ desc t1;
--- -------------------- ------ ---------- ---------- ---------------
1 INDEX_0_1658136511 col1 std_time 43200min kAbsoluteTime
--- -------------------- ------ ---------- ---------- ---------------
- --------------
- storage_mode
- --------------
- Memory
- --------------
+ --------------- --------------
+ compress_type storage_mode
+ --------------- --------------
+ NoCompress Memory
+ --------------- --------------
```
diff --git a/docs/zh/openmldb_sql/ddl/SHOW_CREATE_TABLE_STATEMENT.md b/docs/zh/openmldb_sql/ddl/SHOW_CREATE_TABLE_STATEMENT.md
index e697f687846..22c08fb754e 100644
--- a/docs/zh/openmldb_sql/ddl/SHOW_CREATE_TABLE_STATEMENT.md
+++ b/docs/zh/openmldb_sql/ddl/SHOW_CREATE_TABLE_STATEMENT.md
@@ -21,7 +21,7 @@ show create table t1;
`c3` bigInt,
`c4` timestamp,
INDEX (KEY=`c1`, TS=`c4`, TTL_TYPE=ABSOLUTE, TTL=0m)
- ) OPTIONS (PARTITIONNUM=8, REPLICANUM=2, STORAGE_MODE='HDD');
+ ) OPTIONS (PARTITIONNUM=8, REPLICANUM=2, STORAGE_MODE='HDD', COMPRESS_TYPE='NoCompress');
------- ---------------------------------------------------------------
1 rows in set
diff --git a/docs/zh/quickstart/sdk/rest_api.md b/docs/zh/quickstart/sdk/rest_api.md
index 0526127cd29..0a225e444f6 100644
--- a/docs/zh/quickstart/sdk/rest_api.md
+++ b/docs/zh/quickstart/sdk/rest_api.md
@@ -5,6 +5,18 @@
- REST APIs 通过 APIServer 和 OpenMLDB 的服务进行交互,因此 APIServer 模块必须被正确部署才能有效使用。APISever 在安装部署时是可选模块,参照 [APIServer 部署文档](../../deploy/install_deploy.md#部署-apiserver)。
- 现阶段,APIServer 主要用来做功能测试使用,并不推荐用来测试性能,也不推荐在生产环境使用。APIServer 的默认部署目前并没有高可用机制,并且引入了额外的网络和编解码开销。生产环境推荐使用 Java SDK,功能覆盖最完善,并且在功能、性能上都经过了充分测试。
+## JSON Body
+
+与APIServer的交互中,请求体均为JSON格式,并支持一定的扩展格式。注意以下几点:
+
+- 传入超过整型或浮点数最大值的数值,将会解析失败,比如,double类型传入`1e1000`。
+- 非数值浮点数:在传入数据时,支持传入`NaN`、`Infinity`、`-Infinity`,与缩写`Inf`、`-Inf`(注意是unquoted的,并非字符串,也不支持其他变种写法)。在返回数据时,支持返回`NaN`、`Infinity`、`-Infinity`(不支持变种写法)。如果你需要将三者转换为null,可以配置 `write_nan_and_inf_null`。
+- 可以传入整型数字到浮点数,比如,`1`可被读取为double。
+- float浮点数可能有精度损失,比如,`0.3`读取后将不会严格等于`0.3`,而是`0.30000000000000004`。我们不拒绝精度损失,请从业务层面考虑是否需要对此进行处理。传入超过float max但不超过double max的值,在读取后将成为`Inf`。
+- `true/false`、`null`并不支持大写,只支持小写。
+- timestamp类型暂不支持传入年月日字符串,只支持传入数值,比如`1635247427000`。
+- date类型请传入**年月日字符串**,中间不要包含任何空格。
+
## 数据插入
请求地址:http://ip:port/dbs/{db_name}/tables/{table_name}
@@ -55,7 +67,8 @@ curl http://127.0.0.1:8080/dbs/db/tables/trans -X PUT -d '{
```JSON
{
"input": [["row0_value0", "row0_value1", "row0_value2"], ["row1_value0", "row1_value1", "row1_value2"], ...],
- "need_schema": false
+ "need_schema": false,
+ "write_nan_and_inf_null": false
}
```
@@ -73,6 +86,7 @@ curl http://127.0.0.1:8080/dbs/db/tables/trans -X PUT -d '{
- 可以支持多行,其结果与返回的 response 中的 data.data 字段的数组一一对应。
- need_schema 可以设置为 true, 返回就会有输出结果的 schema。可选参数,默认为 false。
+- write_nan_and_inf_null 可以设置为 true,可选参数,默认为false。如果设置为 true,当输出数据中有 NaN、Inf、-Inf 时,会将其转换为 null。
- input 为 array 格式/JSON 格式时候返回结果也是 array 格式/JSON 格式,一次请求的 input 只支持一种格式,请不要混合格式。
- JSON 格式的 input 数据可以有多余列。
@@ -131,7 +145,8 @@ curl http://127.0.0.1:8080/dbs/demo_db/deployments/demo_data_service -X POST -d'
"input": {
"schema": [],
"data": []
- }
+ },
+ "write_nan_and_inf_null": false
}
```
diff --git a/hybridse/examples/toydb/src/storage/table_iterator.cc b/hybridse/examples/toydb/src/storage/table_iterator.cc
index 45561cd52a1..8ea4a3e0349 100644
--- a/hybridse/examples/toydb/src/storage/table_iterator.cc
+++ b/hybridse/examples/toydb/src/storage/table_iterator.cc
@@ -62,7 +62,7 @@ WindowTableIterator::WindowTableIterator(Segment*** segments, uint32_t seg_cnt,
seg_idx_(0),
pk_it_(),
table_(table) {
- GoToStart();
+ SeekToFirst();
}
WindowTableIterator::~WindowTableIterator() {}
@@ -80,7 +80,7 @@ void WindowTableIterator::Seek(const std::string& key) {
pk_it_->Seek(pk);
}
-void WindowTableIterator::SeekToFirst() {}
+void WindowTableIterator::SeekToFirst() { GoToStart(); }
std::unique_ptr WindowTableIterator::GetValue() {
if (!pk_it_)
diff --git a/hybridse/examples/toydb/src/tablet/tablet_catalog.cc b/hybridse/examples/toydb/src/tablet/tablet_catalog.cc
index feeb750ab6f..81764df9da6 100644
--- a/hybridse/examples/toydb/src/tablet/tablet_catalog.cc
+++ b/hybridse/examples/toydb/src/tablet/tablet_catalog.cc
@@ -19,7 +19,6 @@
#include
#include
#include
-#include "codec/list_iterator_codec.h"
#include "glog/logging.h"
#include "storage/table_iterator.h"
@@ -99,13 +98,6 @@ bool TabletTableHandler::Init() {
return true;
}
-std::unique_ptr TabletTableHandler::GetIterator() {
- std::unique_ptr it(
- new storage::FullTableIterator(table_->GetSegments(),
- table_->GetSegCnt(), table_));
- return std::move(it);
-}
-
std::unique_ptr TabletTableHandler::GetWindowIterator(
const std::string& idx_name) {
auto iter = index_hint_.find(idx_name);
@@ -136,22 +128,6 @@ RowIterator* TabletTableHandler::GetRawIterator() {
return new storage::FullTableIterator(table_->GetSegments(),
table_->GetSegCnt(), table_);
}
-const uint64_t TabletTableHandler::GetCount() {
- auto iter = GetIterator();
- uint64_t cnt = 0;
- while (iter->Valid()) {
- iter->Next();
- cnt++;
- }
- return cnt;
-}
-Row TabletTableHandler::At(uint64_t pos) {
- auto iter = GetIterator();
- while (pos-- > 0 && iter->Valid()) {
- iter->Next();
- }
- return iter->Valid() ? iter->GetValue() : Row();
-}
TabletCatalog::TabletCatalog() : tables_(), db_() {}
@@ -249,22 +225,6 @@ std::unique_ptr TabletSegmentHandler::GetWindowIterator(
const std::string& idx_name) {
return std::unique_ptr();
}
-const uint64_t TabletSegmentHandler::GetCount() {
- auto iter = GetIterator();
- uint64_t cnt = 0;
- while (iter->Valid()) {
- cnt++;
- iter->Next();
- }
- return cnt;
-}
-Row TabletSegmentHandler::At(uint64_t pos) {
- auto iter = GetIterator();
- while (pos-- > 0 && iter->Valid()) {
- iter->Next();
- }
- return iter->Valid() ? iter->GetValue() : Row();
-}
const uint64_t TabletPartitionHandler::GetCount() {
auto iter = GetWindowIterator();
@@ -275,5 +235,6 @@ const uint64_t TabletPartitionHandler::GetCount() {
}
return cnt;
}
+
} // namespace tablet
} // namespace hybridse
diff --git a/hybridse/examples/toydb/src/tablet/tablet_catalog.h b/hybridse/examples/toydb/src/tablet/tablet_catalog.h
index fa41140a495..9d2e8b907e5 100644
--- a/hybridse/examples/toydb/src/tablet/tablet_catalog.h
+++ b/hybridse/examples/toydb/src/tablet/tablet_catalog.h
@@ -21,7 +21,6 @@
#include
#include
#include
-#include "base/spin_lock.h"
#include "storage/table_impl.h"
#include "vm/catalog.h"
@@ -68,8 +67,6 @@ class TabletSegmentHandler : public TableHandler {
std::unique_ptr GetIterator() override;
RowIterator* GetRawIterator() override;
std::unique_ptr GetWindowIterator(const std::string& idx_name) override;
- const uint64_t GetCount() override;
- Row At(uint64_t pos) override;
const std::string GetHandlerTypeName() override {
return "TabletSegmentHandler";
}
@@ -79,7 +76,7 @@ class TabletSegmentHandler : public TableHandler {
std::string key_;
};
-class TabletPartitionHandler
+class TabletPartitionHandler final
: public PartitionHandler,
public std::enable_shared_from_this {
public:
@@ -91,6 +88,8 @@ class TabletPartitionHandler
~TabletPartitionHandler() {}
+ RowIterator* GetRawIterator() override { return table_handler_->GetRawIterator(); }
+
const OrderType GetOrderType() const override { return OrderType::kDescOrder; }
const vm::Schema* GetSchema() override { return table_handler_->GetSchema(); }
@@ -104,6 +103,7 @@ class TabletPartitionHandler
std::unique_ptr GetWindowIterator() override {
return table_handler_->GetWindowIterator(index_name_);
}
+
const uint64_t GetCount() override;
std::shared_ptr GetSegment(const std::string& key) override {
@@ -119,7 +119,7 @@ class TabletPartitionHandler
vm::IndexHint index_hint_;
};
-class TabletTableHandler
+class TabletTableHandler final
: public vm::TableHandler,
public std::enable_shared_from_this {
public:
@@ -135,28 +135,23 @@ class TabletTableHandler
bool Init();
- inline const vm::Schema* GetSchema() { return &schema_; }
+ const vm::Schema* GetSchema() override { return &schema_; }
- inline const std::string& GetName() { return name_; }
+ const std::string& GetName() override { return name_; }
- inline const std::string& GetDatabase() { return db_; }
+ const std::string& GetDatabase() override { return db_; }
- inline const vm::Types& GetTypes() { return types_; }
+ const vm::Types& GetTypes() override { return types_; }
- inline const vm::IndexHint& GetIndex() { return index_hint_; }
+ const vm::IndexHint& GetIndex() override { return index_hint_; }
const Row Get(int32_t pos);
- inline std::shared_ptr GetTable() { return table_; }
- std::unique_ptr GetIterator();
+ std::shared_ptr GetTable() { return table_; }
RowIterator* GetRawIterator() override;
- std::unique_ptr GetWindowIterator(
- const std::string& idx_name);
- virtual const uint64_t GetCount();
- Row At(uint64_t pos) override;
+ std::unique_ptr GetWindowIterator(const std::string& idx_name) override;
- virtual std::shared_ptr GetPartition(
- const std::string& index_name) {
+ std::shared_ptr GetPartition(const std::string& index_name) override {
if (index_hint_.find(index_name) == index_hint_.cend()) {
LOG(WARNING)
<< "fail to get partition for tablet table handler, index name "
@@ -169,12 +164,12 @@ class TabletTableHandler
const std::string GetHandlerTypeName() override {
return "TabletTableHandler";
}
- virtual std::shared_ptr GetTablet(
- const std::string& index_name, const std::string& pk) {
+ std::shared_ptr GetTablet(const std::string& index_name,
+ const std::string& pk) override {
return tablet_;
}
- virtual std::shared_ptr GetTablet(
- const std::string& index_name, const std::vector& pks) {
+ std::shared_ptr GetTablet(const std::string& index_name,
+ const std::vector& pks) override {
return tablet_;
}
diff --git a/hybridse/examples/toydb/src/testing/toydb_engine_test_base.cc b/hybridse/examples/toydb/src/testing/toydb_engine_test_base.cc
index fcaa71d8373..35a595b431e 100644
--- a/hybridse/examples/toydb/src/testing/toydb_engine_test_base.cc
+++ b/hybridse/examples/toydb/src/testing/toydb_engine_test_base.cc
@@ -15,8 +15,9 @@
*/
#include "testing/toydb_engine_test_base.h"
+
+#include "absl/strings/str_join.h"
#include "gtest/gtest.h"
-#include "gtest/internal/gtest-param-util.h"
using namespace llvm; // NOLINT (build/namespaces)
using namespace llvm::orc; // NOLINT (build/namespaces)
@@ -141,18 +142,12 @@ std::shared_ptr BuildOnePkTableStorage(
}
return catalog;
}
-void BatchRequestEngineCheckWithCommonColumnIndices(
- const SqlCase& sql_case, const EngineOptions options,
- const std::set& common_column_indices) {
- std::ostringstream oss;
- for (size_t index : common_column_indices) {
- oss << index << ",";
- }
- LOG(INFO) << "BatchRequestEngineCheckWithCommonColumnIndices: "
- "common_column_indices = ["
- << oss.str() << "]";
- ToydbBatchRequestEngineTestRunner engine_test(sql_case, options,
- common_column_indices);
+// Run check with common column index info
+void BatchRequestEngineCheckWithCommonColumnIndices(const SqlCase& sql_case, const EngineOptions options,
+ const std::set& common_column_indices) {
+ LOG(INFO) << "BatchRequestEngineCheckWithCommonColumnIndices: common_column_indices = ["
+ << absl::StrJoin(common_column_indices, ",") << "]";
+ ToydbBatchRequestEngineTestRunner engine_test(sql_case, options, common_column_indices);
engine_test.RunCheck();
}
diff --git a/hybridse/include/codec/fe_row_codec.h b/hybridse/include/codec/fe_row_codec.h
index 1e0e5b1badc..0e0b153f5a5 100644
--- a/hybridse/include/codec/fe_row_codec.h
+++ b/hybridse/include/codec/fe_row_codec.h
@@ -157,6 +157,9 @@ class RowView {
const Schema* GetSchema() const { return &schema_; }
inline bool IsNULL(const int8_t* row, uint32_t idx) const {
+ if (row == nullptr) {
+ return true;
+ }
const int8_t* ptr = row + HEADER_LENGTH + (idx >> 3);
return *(reinterpret_cast(ptr)) & (1 << (idx & 0x07));
}
diff --git a/hybridse/include/codec/row.h b/hybridse/include/codec/row.h
index cd6abb0a3a1..69158d41e85 100644
--- a/hybridse/include/codec/row.h
+++ b/hybridse/include/codec/row.h
@@ -54,7 +54,7 @@ class Row {
inline int32_t size() const { return slice_.size(); }
inline int32_t size(int32_t pos) const {
- return 0 == pos ? slice_.size() : slices_[pos - 1].size();
+ return 0 == pos ? slice_.size() : slices_.at(pos - 1).size();
}
// Return true if the length of the referenced data is zero
diff --git a/hybridse/include/codec/row_iterator.h b/hybridse/include/codec/row_iterator.h
index 2075918666c..fa60d21a37e 100644
--- a/hybridse/include/codec/row_iterator.h
+++ b/hybridse/include/codec/row_iterator.h
@@ -71,7 +71,14 @@ class WindowIterator {
virtual bool Valid() = 0;
/// Return the RowIterator of current segment
/// of dataset if Valid() return `true`.
- virtual std::unique_ptr GetValue() = 0;
+ virtual std::unique_ptr GetValue() {
+ auto p = GetRawValue();
+ if (!p) {
+ return nullptr;
+ }
+
+ return std::unique_ptr(p);
+ }
/// Return the RowIterator of current segment
/// of dataset if Valid() return `true`.
virtual RowIterator *GetRawValue() = 0;
diff --git a/hybridse/include/codec/row_list.h b/hybridse/include/codec/row_list.h
index b32ad24c3eb..f601b207b9c 100644
--- a/hybridse/include/codec/row_list.h
+++ b/hybridse/include/codec/row_list.h
@@ -65,7 +65,13 @@ class ListV {
ListV() {}
virtual ~ListV() {}
/// \brief Return the const iterator
- virtual std::unique_ptr> GetIterator() = 0;
+ virtual std::unique_ptr> GetIterator() {
+ auto raw = GetRawIterator();
+ if (raw == nullptr) {
+ return {};
+ }
+ return std::unique_ptr>(raw);
+ }
/// \brief Return the const iterator raw pointer
virtual ConstIterator *GetRawIterator() = 0;
@@ -76,7 +82,7 @@ class ListV {
virtual const uint64_t GetCount() {
auto iter = GetIterator();
uint64_t cnt = 0;
- while (iter->Valid()) {
+ while (iter && iter->Valid()) {
iter->Next();
cnt++;
}
diff --git a/hybridse/include/node/node_enum.h b/hybridse/include/node/node_enum.h
index 16e18291478..fc1dde18b07 100644
--- a/hybridse/include/node/node_enum.h
+++ b/hybridse/include/node/node_enum.h
@@ -97,6 +97,7 @@ enum SqlNodeType {
kWithClauseEntry,
kAlterTableStmt,
kShowStmt,
+ kCompressType,
kSqlNodeTypeLast, // debug type
};
@@ -251,7 +252,7 @@ enum JoinType {
kJoinTypeRight,
kJoinTypeInner,
kJoinTypeConcat,
- kJoinTypeComma
+ kJoinTypeCross, // AKA commma join
};
enum UnionType { kUnionTypeDistinct, kUnionTypeAll };
@@ -342,6 +343,11 @@ enum StorageMode {
kHDD = 3,
};
+enum CompressType {
+ kNoCompress = 0,
+ kSnappy = 1,
+};
+
// batch plan node type
enum BatchPlanNodeType { kBatchDataset, kBatchPartition, kBatchMap };
diff --git a/hybridse/include/node/node_manager.h b/hybridse/include/node/node_manager.h
index ab87e588a53..e70f0a59564 100644
--- a/hybridse/include/node/node_manager.h
+++ b/hybridse/include/node/node_manager.h
@@ -399,8 +399,6 @@ class NodeManager {
SqlNode *MakeReplicaNumNode(int num);
- SqlNode *MakeStorageModeNode(StorageMode storage_mode);
-
SqlNode *MakePartitionNumNode(int num);
SqlNode *MakeDistributionsNode(const NodePointVector& distribution_list);
diff --git a/hybridse/include/node/sql_node.h b/hybridse/include/node/sql_node.h
index dcf162a96ab..30f7a6cc34a 100644
--- a/hybridse/include/node/sql_node.h
+++ b/hybridse/include/node/sql_node.h
@@ -25,6 +25,7 @@
#include
#include "absl/status/statusor.h"
+#include "absl/strings/match.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "boost/algorithm/string.hpp"
@@ -309,17 +310,26 @@ inline const std::string StorageModeName(StorageMode mode) {
}
inline const StorageMode NameToStorageMode(const std::string& name) {
- if (boost::iequals(name, "memory")) {
+ if (absl::EqualsIgnoreCase(name, "memory")) {
return kMemory;
- } else if (boost::iequals(name, "hdd")) {
+ } else if (absl::EqualsIgnoreCase(name, "hdd")) {
return kHDD;
- } else if (boost::iequals(name, "ssd")) {
+ } else if (absl::EqualsIgnoreCase(name, "ssd")) {
return kSSD;
} else {
return kUnknown;
}
}
+inline absl::StatusOr NameToCompressType(const std::string& name) {
+ if (absl::EqualsIgnoreCase(name, "snappy")) {
+ return CompressType::kSnappy;
+ } else if (absl::EqualsIgnoreCase(name, "nocompress")) {
+ return CompressType::kNoCompress;
+ }
+ return absl::Status(absl::StatusCode::kInvalidArgument, absl::StrCat("invalid compress type: ", name));
+}
+
inline const std::string RoleTypeName(RoleType type) {
switch (type) {
case kLeader:
@@ -1884,6 +1894,23 @@ class StorageModeNode : public SqlNode {
StorageMode storage_mode_;
};
+class CompressTypeNode : public SqlNode {
+ public:
+ CompressTypeNode() : SqlNode(kCompressType, 0, 0), compress_type_(kNoCompress) {}
+
+ explicit CompressTypeNode(CompressType compress_type)
+ : SqlNode(kCompressType, 0, 0), compress_type_(compress_type) {}
+
+ ~CompressTypeNode() {}
+
+ CompressType GetCompressType() const { return compress_type_; }
+
+ void Print(std::ostream &output, const std::string &org_tab) const;
+
+ private:
+ CompressType compress_type_;
+};
+
class CreateTableLikeClause {
public:
CreateTableLikeClause() = default;
diff --git a/hybridse/include/vm/catalog.h b/hybridse/include/vm/catalog.h
index 30e68316606..4bd007645bd 100644
--- a/hybridse/include/vm/catalog.h
+++ b/hybridse/include/vm/catalog.h
@@ -217,6 +217,7 @@ class TableHandler : public DataHandler {
virtual ~TableHandler() {}
/// Return table column Types information.
+ /// TODO: rm it, never used
virtual const Types& GetTypes() = 0;
/// Return the index information
@@ -224,8 +225,7 @@ class TableHandler : public DataHandler {
/// Return WindowIterator
/// so that user can use it to iterate datasets segment by segment.
- virtual std::unique_ptr GetWindowIterator(
- const std::string& idx_name) = 0;
+ virtual std::unique_ptr GetWindowIterator(const std::string& idx_name) { return nullptr; }
/// Return the HandlerType of the dataset.
/// Return HandlerType::kTableHandler by default
@@ -254,8 +254,7 @@ class TableHandler : public DataHandler {
/// Return Tablet binding to specify index and keys.
/// Return `null` by default.
- virtual std::shared_ptr GetTablet(
- const std::string& index_name, const std::vector& pks) {
+ virtual std::shared_ptr GetTablet(const std::string& index_name, const std::vector& pks) {
return std::shared_ptr();
}
};
@@ -286,27 +285,19 @@ class ErrorTableHandler : public TableHandler {
/// Return empty column Types.
const Types& GetTypes() override { return types_; }
/// Return empty table Schema.
- inline const Schema* GetSchema() override { return schema_; }
+ const Schema* GetSchema() override { return schema_; }
/// Return empty table name
- inline const std::string& GetName() override { return table_name_; }
+ const std::string& GetName() override { return table_name_; }
/// Return empty indexn information
- inline const IndexHint& GetIndex() override { return index_hint_; }
+ const IndexHint& GetIndex() override { return index_hint_; }
/// Return name of database
- inline const std::string& GetDatabase() override { return db_; }
+ const std::string& GetDatabase() override { return db_; }
/// Return null iterator
- std::unique_ptr GetIterator() {
- return std::unique_ptr();
- }
- /// Return null iterator
- RowIterator* GetRawIterator() { return nullptr; }
- /// Return null window iterator
- std::unique_ptr GetWindowIterator(
- const std::string& idx_name) {
- return std::unique_ptr();
- }
+ RowIterator* GetRawIterator() override { return nullptr; }
+
/// Return empty row
- virtual Row At(uint64_t pos) { return Row(); }
+ Row At(uint64_t pos) override { return Row(); }
/// Return 0
const uint64_t GetCount() override { return 0; }
@@ -317,7 +308,7 @@ class ErrorTableHandler : public TableHandler {
}
/// Return status
- virtual base::Status GetStatus() { return status_; }
+ base::Status GetStatus() override { return status_; }
protected:
base::Status status_;
@@ -340,16 +331,11 @@ class PartitionHandler : public TableHandler {
PartitionHandler() : TableHandler() {}
~PartitionHandler() {}
- /// Return the iterator of row iterator.
- /// Return null by default
- virtual std::unique_ptr GetIterator() {
- return std::unique_ptr();
- }
- /// Return the iterator of row iterator
- /// Return null by default
- RowIterator* GetRawIterator() { return nullptr; }
- virtual std::unique_ptr GetWindowIterator(
- const std::string& idx_name) {
+ // Return the iterator of row iterator
+ // Return null by default
+ RowIterator* GetRawIterator() override { return nullptr; }
+
+ std::unique_ptr GetWindowIterator(const std::string& idx_name) override {
return std::unique_ptr();
}
@@ -361,18 +347,15 @@ class PartitionHandler : public TableHandler {
const HandlerType GetHandlerType() override { return kPartitionHandler; }
/// Return empty row, cause partition dataset does not support At operation.
- virtual Row At(uint64_t pos) { return Row(); }
+ // virtual Row At(uint64_t pos) { return Row(); }
/// Return Return table handler of specific segment binding to given key.
/// Return `null` by default.
- virtual std::shared_ptr GetSegment(const std::string& key) {
- return std::shared_ptr();
- }
+ virtual std::shared_ptr GetSegment(const std::string& key) = 0;
/// Return a sequence of table handles of specify segments binding to given
/// keys set.
- virtual std::vector> GetSegments(
- const std::vector& keys) {
+ virtual std::vector> GetSegments(const std::vector& keys) {
std::vector> segments;
for (auto key : keys) {
segments.push_back(GetSegment(key));
@@ -383,9 +366,6 @@ class PartitionHandler : public TableHandler {
const std::string GetHandlerTypeName() override {
return "PartitionHandler";
}
- /// Return order type of the dataset,
- /// and return kNoneOrder by default.
- const OrderType GetOrderType() const { return kNoneOrder; }
};
/// \brief A wrapper of table handler which is used as a asynchronous row
diff --git a/hybridse/include/vm/mem_catalog.h b/hybridse/include/vm/mem_catalog.h
index 2fc5df4960c..6237edd1d43 100644
--- a/hybridse/include/vm/mem_catalog.h
+++ b/hybridse/include/vm/mem_catalog.h
@@ -25,8 +25,6 @@
#include
#include
#include
-#include "base/fe_slice.h"
-#include "codec/list_iterator_codec.h"
#include "glog/logging.h"
#include "vm/catalog.h"
@@ -66,11 +64,11 @@ class MemTimeTableIterator : public RowIterator {
MemTimeTableIterator(const MemTimeTable* table, const vm::Schema* schema,
int32_t start, int32_t end);
~MemTimeTableIterator();
- void Seek(const uint64_t& ts);
- void SeekToFirst();
- const uint64_t& GetKey() const;
- void Next();
- bool Valid() const;
+ void Seek(const uint64_t& ts) override;
+ void SeekToFirst() override;
+ const uint64_t& GetKey() const override;
+ void Next() override;
+ bool Valid() const override;
const Row& GetValue() override;
bool IsSeekable() const override;
@@ -88,12 +86,12 @@ class MemTableIterator : public RowIterator {
MemTableIterator(const MemTable* table, const vm::Schema* schema,
int32_t start, int32_t end);
~MemTableIterator();
- void Seek(const uint64_t& ts);
- void SeekToFirst();
- const uint64_t& GetKey() const;
- const Row& GetValue();
- void Next();
- bool Valid() const;
+ void Seek(const uint64_t& ts) override;
+ void SeekToFirst() override;
+ const uint64_t& GetKey() const override;
+ const Row& GetValue() override;
+ void Next() override;
+ bool Valid() const override;
bool IsSeekable() const override;
private:
@@ -115,7 +113,6 @@ class MemWindowIterator : public WindowIterator {
void SeekToFirst();
void Next();
bool Valid();
- std::unique_ptr GetValue();
RowIterator* GetRawValue();
const Row GetKey();
@@ -157,24 +154,21 @@ class MemTableHandler : public TableHandler {
~MemTableHandler() override;
const Types& GetTypes() override { return types_; }
- inline const Schema* GetSchema() { return schema_; }
- inline const std::string& GetName() { return table_name_; }
- inline const IndexHint& GetIndex() { return index_hint_; }
- inline const std::string& GetDatabase() { return db_; }
+ const Schema* GetSchema() override { return schema_; }
+ const std::string& GetName() override { return table_name_; }
+ const IndexHint& GetIndex() override { return index_hint_; }
+ const std::string& GetDatabase() override { return db_; }
- std::unique_ptr GetIterator() override;
RowIterator* GetRawIterator() override;
- std::unique_ptr GetWindowIterator(
- const std::string& idx_name);
void AddRow(const Row& row);
void Reverse();
- virtual const uint64_t GetCount() { return table_.size(); }
- virtual Row At(uint64_t pos) {
+ const uint64_t GetCount() override { return table_.size(); }
+ Row At(uint64_t pos) override {
return pos < table_.size() ? table_.at(pos) : Row();
}
- const OrderType GetOrderType() const { return order_type_; }
+ const OrderType GetOrderType() const override { return order_type_; }
void SetOrderType(const OrderType order_type) { order_type_ = order_type; }
const std::string GetHandlerTypeName() override {
return "MemTableHandler";
@@ -200,14 +194,11 @@ class MemTimeTableHandler : public TableHandler {
const Schema* schema);
const Types& GetTypes() override;
~MemTimeTableHandler() override;
- inline const Schema* GetSchema() { return schema_; }
- inline const std::string& GetName() { return table_name_; }
- inline const IndexHint& GetIndex() { return index_hint_; }
- std::unique_ptr GetIterator();
- RowIterator* GetRawIterator();
- inline const std::string& GetDatabase() { return db_; }
- std::unique_ptr GetWindowIterator(
- const std::string& idx_name);
+ const Schema* GetSchema() override { return schema_; }
+ const std::string& GetName() override { return table_name_; }
+ const IndexHint& GetIndex() override { return index_hint_; }
+ RowIterator* GetRawIterator() override;
+ const std::string& GetDatabase() override { return db_; }
void AddRow(const uint64_t key, const Row& v);
void AddFrontRow(const uint64_t key, const Row& v);
void PopBackRow();
@@ -220,12 +211,12 @@ class MemTimeTableHandler : public TableHandler {
}
void Sort(const bool is_asc);
void Reverse();
- virtual const uint64_t GetCount() { return table_.size(); }
- virtual Row At(uint64_t pos) {
+ const uint64_t GetCount() override { return table_.size(); }
+ Row At(uint64_t pos) override {
return pos < table_.size() ? table_.at(pos).second : Row();
}
void SetOrderType(const OrderType order_type) { order_type_ = order_type; }
- const OrderType GetOrderType() const { return order_type_; }
+ const OrderType GetOrderType() const override { return order_type_; }
const std::string GetHandlerTypeName() override {
return "MemTimeTableHandler";
}
@@ -254,21 +245,11 @@ class Window : public MemTimeTableHandler {
return std::make_unique(&table_, schema_);
}
- RowIterator* GetRawIterator() {
- return new vm::MemTimeTableIterator(&table_, schema_);
- }
+ RowIterator* GetRawIterator() override { return new vm::MemTimeTableIterator(&table_, schema_); }
virtual bool BufferData(uint64_t key, const Row& row) = 0;
virtual void PopBackData() { PopBackRow(); }
virtual void PopFrontData() = 0;
- virtual const uint64_t GetCount() { return table_.size(); }
- virtual Row At(uint64_t pos) {
- if (pos >= table_.size()) {
- return Row();
- } else {
- return table_[pos].second;
- }
- }
const std::string GetHandlerTypeName() override { return "Window"; }
bool instance_not_in_window() const { return instance_not_in_window_; }
@@ -322,7 +303,7 @@ class WindowRange {
return WindowRange(Window::kFrameRowsMergeRowsRange, start_offset, 0,
rows_preceding, max_size);
}
- inline const WindowPositionStatus GetWindowPositionStatus(
+ const WindowPositionStatus GetWindowPositionStatus(
bool out_of_rows, bool before_window, bool exceed_window) const {
switch (frame_type_) {
case Window::WindowFrameType::kFrameRows:
@@ -531,7 +512,7 @@ class CurrentHistoryWindow : public HistoryWindow {
void PopFrontData() override { PopFrontRow(); }
- bool BufferData(uint64_t key, const Row& row) {
+ bool BufferData(uint64_t key, const Row& row) override {
if (!table_.empty() && GetFrontRow().first > key) {
DLOG(WARNING) << "Fail BufferData: buffer key less than latest key";
return false;
@@ -560,34 +541,25 @@ class MemSegmentHandler : public TableHandler {
virtual ~MemSegmentHandler() {}
- inline const vm::Schema* GetSchema() {
+ const vm::Schema* GetSchema() override {
return partition_hander_->GetSchema();
}
- inline const std::string& GetName() { return partition_hander_->GetName(); }
+ const std::string& GetName() override { return partition_hander_->GetName(); }
- inline const std::string& GetDatabase() {
+ const std::string& GetDatabase() override {
return partition_hander_->GetDatabase();
}
- inline const vm::Types& GetTypes() { return partition_hander_->GetTypes(); }
+ const vm::Types& GetTypes() override { return partition_hander_->GetTypes(); }
- inline const vm::IndexHint& GetIndex() {
+ const vm::IndexHint& GetIndex() override {
return partition_hander_->GetIndex();
}
- const OrderType GetOrderType() const {
+ const OrderType GetOrderType() const override {
return partition_hander_->GetOrderType();
}
- std::unique_ptr GetIterator() {
- auto iter = partition_hander_->GetWindowIterator();
- if (iter) {
- iter->Seek(key_);
- return iter->Valid() ? iter->GetValue()
- : std::unique_ptr();
- }
- return std::unique_ptr();
- }
RowIterator* GetRawIterator() override {
auto iter = partition_hander_->GetWindowIterator();
if (iter) {
@@ -596,12 +568,11 @@ class MemSegmentHandler : public TableHandler {
}
return nullptr;
}
- std::unique_ptr GetWindowIterator(
- const std::string& idx_name) {
+ std::unique_ptr GetWindowIterator(const std::string& idx_name) override {
LOG(WARNING) << "SegmentHandler can't support window iterator";
return std::unique_ptr();
}
- virtual const uint64_t GetCount() {
+ const uint64_t GetCount() override {
auto iter = GetIterator();
if (!iter) {
return 0;
@@ -634,9 +605,7 @@ class MemSegmentHandler : public TableHandler {
std::string key_;
};
-class MemPartitionHandler
- : public PartitionHandler,
- public std::enable_shared_from_this {
+class MemPartitionHandler : public PartitionHandler, public std::enable_shared_from_this {
public:
MemPartitionHandler();
explicit MemPartitionHandler(const Schema* schema);
@@ -649,18 +618,19 @@ class MemPartitionHandler
const Schema* GetSchema() override;
const std::string& GetName() override;
const std::string& GetDatabase() override;
- virtual std::unique_ptr GetWindowIterator();
+ RowIterator* GetRawIterator() override { return nullptr; }
+ std::unique_ptr GetWindowIterator() override;
bool AddRow(const std::string& key, uint64_t ts, const Row& row);
void Sort(const bool is_asc);
void Reverse();
void Print();
- virtual const uint64_t GetCount() { return partitions_.size(); }
- virtual std::shared_ptr GetSegment(const std::string& key) {
+ const uint64_t GetCount() override { return partitions_.size(); }
+ std::shared_ptr GetSegment(const std::string& key) override {
return std::shared_ptr(
new MemSegmentHandler(shared_from_this(), key));
}
void SetOrderType(const OrderType order_type) { order_type_ = order_type; }
- const OrderType GetOrderType() const { return order_type_; }
+ const OrderType GetOrderType() const override { return order_type_; }
const std::string GetHandlerTypeName() override {
return "MemPartitionHandler";
}
@@ -674,6 +644,7 @@ class MemPartitionHandler
IndexHint index_hint_;
OrderType order_type_;
};
+
class ConcatTableHandler : public MemTimeTableHandler {
public:
ConcatTableHandler(std::shared_ptr left, size_t left_slices,
@@ -692,19 +663,13 @@ class ConcatTableHandler : public MemTimeTableHandler {
status_ = SyncValue();
return MemTimeTableHandler::At(pos);
}
- std::unique_ptr GetIterator() {
- if (status_.isRunning()) {
- status_ = SyncValue();
- }
- return MemTimeTableHandler::GetIterator();
- }
- RowIterator* GetRawIterator() {
+ RowIterator* GetRawIterator() override {
if (status_.isRunning()) {
status_ = SyncValue();
}
return MemTimeTableHandler::GetRawIterator();
}
- virtual const uint64_t GetCount() {
+ const uint64_t GetCount() override {
if (status_.isRunning()) {
status_ = SyncValue();
}
@@ -757,11 +722,11 @@ class MemCatalog : public Catalog {
bool Init();
- std::shared_ptr GetDatabase(const std::string& db) {
+ std::shared_ptr GetDatabase(const std::string& db) override {
return dbs_[db];
}
std::shared_ptr GetTable(const std::string& db,
- const std::string& table_name) {
+ const std::string& table_name) override {
return tables_[db][table_name];
}
bool IndexSupport() override { return true; }
@@ -783,17 +748,11 @@ class RequestUnionTableHandler : public TableHandler {
: request_ts_(request_ts), request_row_(request_row), window_(window) {}
~RequestUnionTableHandler() {}
- std::unique_ptr GetIterator() override {
- return std::unique_ptr(GetRawIterator());
- }
RowIterator* GetRawIterator() override;
const Types& GetTypes() override { return window_->GetTypes(); }
const IndexHint& GetIndex() override { return window_->GetIndex(); }
- std::unique_ptr GetWindowIterator(const std::string&) {
- return nullptr;
- }
- const OrderType GetOrderType() const { return window_->GetOrderType(); }
+ const OrderType GetOrderType() const override { return window_->GetOrderType(); }
const Schema* GetSchema() override { return window_->GetSchema(); }
const std::string& GetName() override { return window_->GetName(); }
const std::string& GetDatabase() override { return window_->GetDatabase(); }
diff --git a/hybridse/include/vm/physical_op.h b/hybridse/include/vm/physical_op.h
index d2fdafb5349..dd51c73bfd1 100644
--- a/hybridse/include/vm/physical_op.h
+++ b/hybridse/include/vm/physical_op.h
@@ -731,6 +731,7 @@ class PhysicalConstProjectNode : public PhysicalOpNode {
public:
explicit PhysicalConstProjectNode(const ColumnProjects &project)
: PhysicalOpNode(kPhysicalOpConstProject, true), project_(project) {
+ output_type_ = kSchemaTypeRow;
fn_infos_.push_back(&project_.fn_info());
}
virtual ~PhysicalConstProjectNode() {}
@@ -785,7 +786,11 @@ class PhysicalAggregationNode : public PhysicalProjectNode {
public:
PhysicalAggregationNode(PhysicalOpNode *node, const ColumnProjects &project, const node::ExprNode *condition)
: PhysicalProjectNode(node, kAggregation, project, true), having_condition_(condition) {
- output_type_ = kSchemaTypeRow;
+ if (node->GetOutputType() == kSchemaTypeGroup) {
+ output_type_ = kSchemaTypeGroup;
+ } else {
+ output_type_ = kSchemaTypeRow;
+ }
fn_infos_.push_back(&having_condition_.fn_info());
}
virtual ~PhysicalAggregationNode() {}
@@ -1065,7 +1070,7 @@ class RequestWindowUnionList {
RequestWindowUnionList() : window_unions_() {}
virtual ~RequestWindowUnionList() {}
void AddWindowUnion(PhysicalOpNode *node, const RequestWindowOp &window) {
- window_unions_.push_back(std::make_pair(node, window));
+ window_unions_.emplace_back(node, window);
}
const PhysicalOpNode *GetKey(uint32_t index) {
auto iter = window_unions_.begin();
@@ -1179,23 +1184,25 @@ class PhysicalWindowAggrerationNode : public PhysicalProjectNode {
class PhysicalJoinNode : public PhysicalBinaryNode {
public:
+ static constexpr PhysicalOpType kConcreteNodeKind = kPhysicalOpJoin;
+
PhysicalJoinNode(PhysicalOpNode *left, PhysicalOpNode *right,
const node::JoinType join_type)
- : PhysicalBinaryNode(left, right, kPhysicalOpJoin, false),
+ : PhysicalBinaryNode(left, right, kConcreteNodeKind, false),
join_(join_type),
joined_schemas_ctx_(this),
output_right_only_(false) {
- output_type_ = left->GetOutputType();
+ InitOuptput();
}
PhysicalJoinNode(PhysicalOpNode *left, PhysicalOpNode *right,
const node::JoinType join_type,
const node::OrderByNode *orders,
const node::ExprNode *condition)
- : PhysicalBinaryNode(left, right, kPhysicalOpJoin, false),
+ : PhysicalBinaryNode(left, right, kConcreteNodeKind, false),
join_(join_type, orders, condition),
joined_schemas_ctx_(this),
output_right_only_(false) {
- output_type_ = left->GetOutputType();
+ InitOuptput();
RegisterFunctionInfo();
}
@@ -1204,11 +1211,11 @@ class PhysicalJoinNode : public PhysicalBinaryNode {
const node::ExprNode *condition,
const node::ExprListNode *left_keys,
const node::ExprListNode *right_keys)
- : PhysicalBinaryNode(left, right, kPhysicalOpJoin, false),
+ : PhysicalBinaryNode(left, right, kConcreteNodeKind, false),
join_(join_type, condition, left_keys, right_keys),
joined_schemas_ctx_(this),
output_right_only_(false) {
- output_type_ = left->GetOutputType();
+ InitOuptput();
RegisterFunctionInfo();
}
@@ -1218,31 +1225,31 @@ class PhysicalJoinNode : public PhysicalBinaryNode {
const node::ExprNode *condition,
const node::ExprListNode *left_keys,
const node::ExprListNode *right_keys)
- : PhysicalBinaryNode(left, right, kPhysicalOpJoin, false),
+ : PhysicalBinaryNode(left, right, kConcreteNodeKind, false),
join_(join_type, orders, condition, left_keys, right_keys),
joined_schemas_ctx_(this),
output_right_only_(false) {
- output_type_ = left->GetOutputType();
+ InitOuptput();
RegisterFunctionInfo();
}
PhysicalJoinNode(PhysicalOpNode *left, PhysicalOpNode *right,
const Join &join)
- : PhysicalBinaryNode(left, right, kPhysicalOpJoin, false),
+ : PhysicalBinaryNode(left, right, kConcreteNodeKind, false),
join_(join),
joined_schemas_ctx_(this),
output_right_only_(false) {
- output_type_ = left->GetOutputType();
+ InitOuptput();
RegisterFunctionInfo();
}
PhysicalJoinNode(PhysicalOpNode *left, PhysicalOpNode *right,
const Join &join, const bool output_right_only)
- : PhysicalBinaryNode(left, right, kPhysicalOpJoin, false),
+ : PhysicalBinaryNode(left, right, kConcreteNodeKind, false),
join_(join),
joined_schemas_ctx_(this),
output_right_only_(output_right_only) {
- output_type_ = left->GetOutputType();
+ InitOuptput();
RegisterFunctionInfo();
}
@@ -1271,37 +1278,59 @@ class PhysicalJoinNode : public PhysicalBinaryNode {
Join join_;
SchemasContext joined_schemas_ctx_;
const bool output_right_only_;
+
+ private:
+ void InitOuptput() {
+ switch (join_.join_type_) {
+ case node::kJoinTypeLast:
+ case node::kJoinTypeConcat: {
+ output_type_ = GetProducer(0)->GetOutputType();
+ break;
+ }
+ default: {
+ // standard SQL JOINs, always treat as a table output
+ if (GetProducer(0)->GetOutputType() == kSchemaTypeGroup) {
+ output_type_ = kSchemaTypeGroup;
+ } else {
+ output_type_ = kSchemaTypeTable;
+ }
+ break;
+ }
+ }
+ }
};
class PhysicalRequestJoinNode : public PhysicalBinaryNode {
public:
+ static constexpr PhysicalOpType kConcreteNodeKind = kPhysicalOpRequestJoin;
+
PhysicalRequestJoinNode(PhysicalOpNode *left, PhysicalOpNode *right,
const node::JoinType join_type)
- : PhysicalBinaryNode(left, right, kPhysicalOpRequestJoin, false),
+ : PhysicalBinaryNode(left, right, kConcreteNodeKind, false),
join_(join_type),
joined_schemas_ctx_(this),
output_right_only_(false) {
- output_type_ = left->GetOutputType();
+ InitOuptput();
RegisterFunctionInfo();
}
PhysicalRequestJoinNode(PhysicalOpNode *left, PhysicalOpNode *right,
const node::JoinType join_type,
const node::OrderByNode *orders,
const node::ExprNode *condition)
- : PhysicalBinaryNode(left, right, kPhysicalOpRequestJoin, false),
+ : PhysicalBinaryNode(left, right, kConcreteNodeKind, false),
join_(join_type, orders, condition),
joined_schemas_ctx_(this),
output_right_only_(false) {
- output_type_ = left->GetOutputType();
+ InitOuptput();
RegisterFunctionInfo();
}
PhysicalRequestJoinNode(PhysicalOpNode *left, PhysicalOpNode *right,
const Join &join, const bool output_right_only)
- : PhysicalBinaryNode(left, right, kPhysicalOpRequestJoin, false),
+ : PhysicalBinaryNode(left, right, kConcreteNodeKind, false),
join_(join),
joined_schemas_ctx_(this),
output_right_only_(output_right_only) {
- output_type_ = left->GetOutputType();
+ InitOuptput();
RegisterFunctionInfo();
}
@@ -1311,11 +1340,11 @@ class PhysicalRequestJoinNode : public PhysicalBinaryNode {
const node::ExprNode *condition,
const node::ExprListNode *left_keys,
const node::ExprListNode *right_keys)
- : PhysicalBinaryNode(left, right, kPhysicalOpRequestJoin, false),
+ : PhysicalBinaryNode(left, right, kConcreteNodeKind, false),
join_(join_type, condition, left_keys, right_keys),
joined_schemas_ctx_(this),
output_right_only_(false) {
- output_type_ = left->GetOutputType();
+ InitOuptput();
RegisterFunctionInfo();
}
PhysicalRequestJoinNode(PhysicalOpNode *left, PhysicalOpNode *right,
@@ -1324,11 +1353,11 @@ class PhysicalRequestJoinNode : public PhysicalBinaryNode {
const node::ExprNode *condition,
const node::ExprListNode *left_keys,
const node::ExprListNode *right_keys)
- : PhysicalBinaryNode(left, right, kPhysicalOpRequestJoin, false),
+ : PhysicalBinaryNode(left, right, kConcreteNodeKind, false),
join_(join_type, orders, condition, left_keys, right_keys),
joined_schemas_ctx_(this),
output_right_only_(false) {
- output_type_ = left->GetOutputType();
+ InitOuptput();
RegisterFunctionInfo();
}
@@ -1359,6 +1388,26 @@ class PhysicalRequestJoinNode : public PhysicalBinaryNode {
Join join_;
SchemasContext joined_schemas_ctx_;
const bool output_right_only_;
+
+ private:
+ void InitOuptput() {
+ switch (join_.join_type_) {
+ case node::kJoinTypeLast:
+ case node::kJoinTypeConcat: {
+ output_type_ = GetProducer(0)->GetOutputType();
+ break;
+ }
+ default: {
+ // standard SQL JOINs, always treat as a table output
+ if (GetProducer(0)->GetOutputType() == kSchemaTypeGroup) {
+ output_type_ = kSchemaTypeGroup;
+ } else {
+ output_type_ = kSchemaTypeTable;
+ }
+ break;
+ }
+ }
+ }
};
class PhysicalUnionNode : public PhysicalBinaryNode {
@@ -1415,7 +1464,7 @@ class PhysicalRequestUnionNode : public PhysicalBinaryNode {
instance_not_in_window_(false),
exclude_current_time_(false),
output_request_row_(true) {
- output_type_ = kSchemaTypeTable;
+ InitOuptput();
fn_infos_.push_back(&window_.partition_.fn_info());
fn_infos_.push_back(&window_.index_key_.fn_info());
@@ -1427,7 +1476,7 @@ class PhysicalRequestUnionNode : public PhysicalBinaryNode {
instance_not_in_window_(w_ptr->instance_not_in_window()),
exclude_current_time_(w_ptr->exclude_current_time()),
output_request_row_(true) {
- output_type_ = kSchemaTypeTable;
+ InitOuptput();
fn_infos_.push_back(&window_.partition_.fn_info());
fn_infos_.push_back(&window_.sort_.fn_info());
@@ -1443,7 +1492,7 @@ class PhysicalRequestUnionNode : public PhysicalBinaryNode {
instance_not_in_window_(instance_not_in_window),
exclude_current_time_(exclude_current_time),
output_request_row_(output_request_row) {
- output_type_ = kSchemaTypeTable;
+ InitOuptput();
fn_infos_.push_back(&window_.partition_.fn_info());
fn_infos_.push_back(&window_.sort_.fn_info());
@@ -1455,7 +1504,8 @@ class PhysicalRequestUnionNode : public PhysicalBinaryNode {
virtual void Print(std::ostream &output, const std::string &tab) const;
const bool Valid() { return true; }
static PhysicalRequestUnionNode *CastFrom(PhysicalOpNode *node);
- bool AddWindowUnion(PhysicalOpNode *node) {
+ bool AddWindowUnion(PhysicalOpNode *node) { return AddWindowUnion(node, window_); }
+ bool AddWindowUnion(PhysicalOpNode *node, const RequestWindowOp& window) {
if (nullptr == node) {
LOG(WARNING) << "Fail to add window union : table is null";
return false;
@@ -1472,9 +1522,8 @@ class PhysicalRequestUnionNode : public PhysicalBinaryNode {
<< "Union Table and window input schema aren't consistent";
return false;
}
- window_unions_.AddWindowUnion(node, window_);
- RequestWindowOp &window_union =
- window_unions_.window_unions_.back().second;
+ window_unions_.AddWindowUnion(node, window);
+ RequestWindowOp &window_union = window_unions_.window_unions_.back().second;
fn_infos_.push_back(&window_union.partition_.fn_info());
fn_infos_.push_back(&window_union.sort_.fn_info());
fn_infos_.push_back(&window_union.range_.fn_info());
@@ -1484,11 +1533,10 @@ class PhysicalRequestUnionNode : public PhysicalBinaryNode {
std::vector GetDependents() const override;
- const bool instance_not_in_window() const {
- return instance_not_in_window_;
- }
- const bool exclude_current_time() const { return exclude_current_time_; }
- const bool output_request_row() const { return output_request_row_; }
+ bool instance_not_in_window() const { return instance_not_in_window_; }
+ bool exclude_current_time() const { return exclude_current_time_; }
+ bool output_request_row() const { return output_request_row_; }
+ void set_output_request_row(bool flag) { output_request_row_ = flag; }
const RequestWindowOp &window() const { return window_; }
const RequestWindowUnionList &window_unions() const {
return window_unions_;
@@ -1506,10 +1554,20 @@ class PhysicalRequestUnionNode : public PhysicalBinaryNode {
}
RequestWindowOp window_;
- const bool instance_not_in_window_;
- const bool exclude_current_time_;
- const bool output_request_row_;
+ bool instance_not_in_window_;
+ bool exclude_current_time_;
+ bool output_request_row_;
RequestWindowUnionList window_unions_;
+
+ private:
+ void InitOuptput() {
+ auto left = GetProducer(0);
+ if (left->GetOutputType() == kSchemaTypeRow) {
+ output_type_ = kSchemaTypeTable;
+ } else {
+ output_type_ = kSchemaTypeGroup;
+ }
+ }
};
class PhysicalRequestAggUnionNode : public PhysicalOpNode {
@@ -1620,14 +1678,22 @@ class PhysicalFilterNode : public PhysicalUnaryNode {
public:
PhysicalFilterNode(PhysicalOpNode *node, const node::ExprNode *condition)
: PhysicalUnaryNode(node, kPhysicalOpFilter, true), filter_(condition) {
- output_type_ = node->GetOutputType();
+ if (node->GetOutputType() == kSchemaTypeGroup && filter_.index_key_.ValidKey()) {
+ output_type_ = kSchemaTypeTable;
+ } else {
+ output_type_ = node->GetOutputType();
+ }
fn_infos_.push_back(&filter_.condition_.fn_info());
fn_infos_.push_back(&filter_.index_key_.fn_info());
}
PhysicalFilterNode(PhysicalOpNode *node, Filter filter)
: PhysicalUnaryNode(node, kPhysicalOpFilter, true), filter_(filter) {
- output_type_ = node->GetOutputType();
+ if (node->GetOutputType() == kSchemaTypeGroup && filter_.index_key_.ValidKey()) {
+ output_type_ = kSchemaTypeTable;
+ } else {
+ output_type_ = node->GetOutputType();
+ }
fn_infos_.push_back(&filter_.condition_.fn_info());
fn_infos_.push_back(&filter_.index_key_.fn_info());
diff --git a/hybridse/include/vm/simple_catalog.h b/hybridse/include/vm/simple_catalog.h
index 1e1cd78a2f6..fd7c2f3b952 100644
--- a/hybridse/include/vm/simple_catalog.h
+++ b/hybridse/include/vm/simple_catalog.h
@@ -22,7 +22,6 @@
#include
#include
-#include "glog/logging.h"
#include "proto/fe_type.pb.h"
#include "vm/catalog.h"
#include "vm/mem_catalog.h"
diff --git a/hybridse/src/base/fe_slice.cc b/hybridse/src/base/fe_slice.cc
index 9f41c6016ca..c2ca3560741 100644
--- a/hybridse/src/base/fe_slice.cc
+++ b/hybridse/src/base/fe_slice.cc
@@ -25,7 +25,7 @@ void RefCountedSlice::Release() {
if (this->ref_cnt_ != nullptr) {
auto& cnt = *this->ref_cnt_;
cnt -= 1;
- if (cnt == 0) {
+ if (cnt == 0 && buf() != nullptr) {
// memset in case the buf is still used after free
memset(buf(), 0, size());
free(buf());
diff --git a/hybridse/src/node/node_manager.cc b/hybridse/src/node/node_manager.cc
index 8f6f80d7517..f60ba20d6b2 100644
--- a/hybridse/src/node/node_manager.cc
+++ b/hybridse/src/node/node_manager.cc
@@ -1031,11 +1031,6 @@ SqlNode *NodeManager::MakeReplicaNumNode(int num) {
return RegisterNode(node_ptr);
}
-SqlNode *NodeManager::MakeStorageModeNode(StorageMode storage_mode) {
- SqlNode *node_ptr = new StorageModeNode(storage_mode);
- return RegisterNode(node_ptr);
-}
-
SqlNode *NodeManager::MakePartitionNumNode(int num) {
SqlNode *node_ptr = new PartitionNumNode(num);
return RegisterNode(node_ptr);
diff --git a/hybridse/src/node/plan_node_test.cc b/hybridse/src/node/plan_node_test.cc
index 4f0d55d0166..5ffb76142a7 100644
--- a/hybridse/src/node/plan_node_test.cc
+++ b/hybridse/src/node/plan_node_test.cc
@@ -239,7 +239,8 @@ TEST_F(PlanNodeTest, ExtractColumnsAndIndexsTest) {
manager_->MakeColumnDescNode("col3", node::kFloat, true),
manager_->MakeColumnDescNode("col4", node::kVarchar, true),
manager_->MakeColumnDescNode("col5", node::kTimestamp, true), index_node},
- {manager_->MakeReplicaNumNode(3), manager_->MakePartitionNumNode(8), manager_->MakeStorageModeNode(kMemory)},
+ {manager_->MakeReplicaNumNode(3), manager_->MakePartitionNumNode(8),
+ manager_->MakeNode(kMemory)},
false);
ASSERT_TRUE(nullptr != node);
std::vector columns;
diff --git a/hybridse/src/node/sql_node.cc b/hybridse/src/node/sql_node.cc
index 6fa2a82d42a..3847366c148 100644
--- a/hybridse/src/node/sql_node.cc
+++ b/hybridse/src/node/sql_node.cc
@@ -1168,6 +1168,7 @@ static absl::flat_hash_map CreateSqlNodeTypeToNa
{kReplicaNum, "kReplicaNum"},
{kPartitionNum, "kPartitionNum"},
{kStorageMode, "kStorageMode"},
+ {kCompressType, "kCompressType"},
{kFn, "kFn"},
{kFnParaList, "kFnParaList"},
{kCreateSpStmt, "kCreateSpStmt"},
@@ -2603,6 +2604,17 @@ void StorageModeNode::Print(std::ostream &output, const std::string &org_tab) co
PrintValue(output, tab, StorageModeName(storage_mode_), "storage_mode", true);
}
+void CompressTypeNode::Print(std::ostream &output, const std::string &org_tab) const {
+ SqlNode::Print(output, org_tab);
+ const std::string tab = org_tab + INDENT + SPACE_ED;
+ output << "\n";
+ if (compress_type_ == CompressType::kSnappy) {
+ PrintValue(output, tab, "snappy", "compress_type", true);
+ } else {
+ PrintValue(output, tab, "nocompress", "compress_type", true);
+ }
+}
+
void PartitionNumNode::Print(std::ostream &output, const std::string &org_tab) const {
SqlNode::Print(output, org_tab);
const std::string tab = org_tab + INDENT + SPACE_ED;
diff --git a/hybridse/src/node/sql_node_test.cc b/hybridse/src/node/sql_node_test.cc
index 545d9b647fd..227cb80dcea 100644
--- a/hybridse/src/node/sql_node_test.cc
+++ b/hybridse/src/node/sql_node_test.cc
@@ -676,7 +676,7 @@ TEST_F(SqlNodeTest, CreateIndexNodeTest) {
node_manager_->MakeColumnDescNode("col4", node::kVarchar, true),
node_manager_->MakeColumnDescNode("col5", node::kTimestamp, true), index_node},
{node_manager_->MakeReplicaNumNode(3), node_manager_->MakePartitionNumNode(8),
- node_manager_->MakeStorageModeNode(kMemory)},
+ node_manager_->MakeNode(kMemory)},
false);
ASSERT_TRUE(nullptr != node);
std::vector columns;
diff --git a/hybridse/src/passes/physical/batch_request_optimize.cc b/hybridse/src/passes/physical/batch_request_optimize.cc
index 52488e6a981..86fdfee92c5 100644
--- a/hybridse/src/passes/physical/batch_request_optimize.cc
+++ b/hybridse/src/passes/physical/batch_request_optimize.cc
@@ -269,6 +269,7 @@ static Status UpdateProjectExpr(
return replacer.Replace(expr->DeepCopy(ctx->node_manager()), output);
}
+// simplify simple project, remove orphan descendant producer nodes
static Status CreateSimplifiedProject(PhysicalPlanContext* ctx,
PhysicalOpNode* input,
const ColumnProjects& projects,
@@ -279,8 +280,7 @@ static Status CreateSimplifiedProject(PhysicalPlanContext* ctx,
can_project = false;
for (size_t i = 0; i < cur_input->producers().size(); ++i) {
auto cand_input = cur_input->GetProducer(i);
- if (cand_input->GetOutputType() !=
- PhysicalSchemaType::kSchemaTypeRow) {
+ if (cand_input->GetOutputType() != PhysicalSchemaType::kSchemaTypeRow) {
continue;
}
bool is_valid = true;
@@ -949,21 +949,16 @@ Status CommonColumnOptimize::ProcessJoin(PhysicalPlanContext* ctx,
}
} else if (is_non_common_join) {
// join only depend on non-common left part
- if (left_state->non_common_op == join_op->GetProducer(0) &&
- right == join_op->GetProducer(1)) {
+ if (left_state->non_common_op == join_op->GetProducer(0) && right == join_op->GetProducer(1)) {
state->common_op = nullptr;
state->non_common_op = join_op;
} else {
PhysicalRequestJoinNode* new_join = nullptr;
- CHECK_STATUS(ctx->CreateOp(
- &new_join, left_state->non_common_op, right, join_op->join(),
- join_op->output_right_only()));
- CHECK_STATUS(ReplaceComponentExpr(
- join_op->join(), join_op->joined_schemas_ctx(),
- new_join->joined_schemas_ctx(), ctx->node_manager(),
- &new_join->join_));
- state->common_op =
- join_op->output_right_only() ? nullptr : left_state->common_op;
+ CHECK_STATUS(ctx->CreateOp(&new_join, left_state->non_common_op, right,
+ join_op->join(), join_op->output_right_only()));
+ CHECK_STATUS(ReplaceComponentExpr(join_op->join(), join_op->joined_schemas_ctx(),
+ new_join->joined_schemas_ctx(), ctx->node_manager(), &new_join->join_));
+ state->common_op = join_op->output_right_only() ? nullptr : left_state->common_op;
state->non_common_op = new_join;
if (!join_op->output_right_only()) {
for (size_t left_idx : left_state->common_column_indices) {
diff --git a/hybridse/src/passes/physical/batch_request_optimize_test.cc b/hybridse/src/passes/physical/batch_request_optimize_test.cc
index e53b7c377e2..48259b68ed4 100644
--- a/hybridse/src/passes/physical/batch_request_optimize_test.cc
+++ b/hybridse/src/passes/physical/batch_request_optimize_test.cc
@@ -54,6 +54,9 @@ INSTANTIATE_TEST_SUITE_P(
INSTANTIATE_TEST_SUITE_P(
BatchRequestLastJoinQuery, BatchRequestOptimizeTest,
testing::ValuesIn(sqlcase::InitCases("cases/query/last_join_query.yaml")));
+INSTANTIATE_TEST_SUITE_P(
+ BatchRequestLeftJoin, BatchRequestOptimizeTest,
+ testing::ValuesIn(sqlcase::InitCases("cases/query/left_join.yml")));
INSTANTIATE_TEST_SUITE_P(
BatchRequestLastJoinWindowQuery, BatchRequestOptimizeTest,
testing::ValuesIn(sqlcase::InitCases("cases/query/last_join_window_query.yaml")));
diff --git a/hybridse/src/passes/physical/group_and_sort_optimized.cc b/hybridse/src/passes/physical/group_and_sort_optimized.cc
index ae333b6af47..2d51b336167 100644
--- a/hybridse/src/passes/physical/group_and_sort_optimized.cc
+++ b/hybridse/src/passes/physical/group_and_sort_optimized.cc
@@ -25,6 +25,7 @@
#include "absl/cleanup/cleanup.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
+#include "node/node_enum.h"
#include "vm/physical_op.h"
namespace hybridse {
@@ -294,6 +295,7 @@ bool GroupAndSortOptimized::KeysOptimized(const SchemasContext* root_schemas_ctx
absl::Cleanup clean = [&]() {
expr_cache_.clear();
+ optimize_info_ = nullptr;
};
auto s = BuildExprCache(left_key->keys(), root_schemas_ctx);
@@ -347,6 +349,18 @@ bool GroupAndSortOptimized::KeysOptimizedImpl(const SchemasContext* root_schemas
if (DataProviderType::kProviderTypeTable == scan_op->provider_type_ ||
DataProviderType::kProviderTypePartition == scan_op->provider_type_) {
+ auto* table_node = dynamic_cast(scan_op);
+ if (optimize_info_) {
+ if (optimize_info_->left_key == left_key && optimize_info_->index_key == index_key &&
+ optimize_info_->right_key == right_key && optimize_info_->sort_key == sort) {
+ if (optimize_info_->optimized != nullptr &&
+ table_node->GetDb() == optimize_info_->optimized->GetDb() &&
+ table_node->GetName() == optimize_info_->optimized->GetName()) {
+ *new_in = optimize_info_->optimized;
+ return true;
+ }
+ }
+ }
const node::ExprListNode* right_partition =
right_key == nullptr ? left_key->keys() : right_key->keys();
@@ -453,13 +467,15 @@ bool GroupAndSortOptimized::KeysOptimizedImpl(const SchemasContext* root_schemas
dynamic_cast(node_manager_->MakeOrderByNode(node_manager_->MakeExprList(
node_manager_->MakeOrderExpression(nullptr, first_order_expression->is_asc())))));
}
+
+ optimize_info_.reset(new OptimizeInfo(left_key, index_key, right_key, sort, partition_op));
*new_in = partition_op;
return true;
}
} else if (PhysicalOpType::kPhysicalOpSimpleProject == in->GetOpType()) {
PhysicalOpNode* new_depend;
- if (!KeysOptimizedImpl(in->GetProducer(0)->schemas_ctx(), in->GetProducer(0), left_key, index_key, right_key, sort,
- &new_depend)) {
+ if (!KeysOptimizedImpl(in->GetProducer(0)->schemas_ctx(), in->GetProducer(0), left_key, index_key, right_key,
+ sort, &new_depend)) {
return false;
}
@@ -493,7 +509,8 @@ bool GroupAndSortOptimized::KeysOptimizedImpl(const SchemasContext* root_schemas
PhysicalFilterNode* filter_op = dynamic_cast(in);
PhysicalOpNode* new_depend;
- if (!KeysOptimizedImpl(root_schemas_ctx, in->producers()[0], left_key, index_key, right_key, sort, &new_depend)) {
+ if (!KeysOptimizedImpl(root_schemas_ctx, in->producers()[0], left_key, index_key, right_key, sort,
+ &new_depend)) {
return false;
}
PhysicalFilterNode* new_filter = nullptr;
@@ -515,8 +532,16 @@ bool GroupAndSortOptimized::KeysOptimizedImpl(const SchemasContext* root_schemas
&new_depend)) {
return false;
}
+ PhysicalOpNode* new_right = in->GetProducer(1);
+ if (request_join->join_.join_type_ == node::kJoinTypeConcat) {
+ // for concat join, only acceptable if the two inputs (of course same table) optimized by the same index
+ auto* rebase_sc = in->GetProducer(1)->schemas_ctx();
+ if (!KeysOptimizedImpl(rebase_sc, in->GetProducer(1), left_key, index_key, right_key, sort, &new_right)) {
+ return false;
+ }
+ }
PhysicalRequestJoinNode* new_join = nullptr;
- auto s = plan_ctx_->CreateOp(&new_join, new_depend, request_join->GetProducer(1),
+ auto s = plan_ctx_->CreateOp(&new_join, new_depend, new_right,
request_join->join(), request_join->output_right_only());
if (!s.isOK()) {
LOG(WARNING) << "Fail to create new request join op: " << s;
@@ -545,6 +570,57 @@ bool GroupAndSortOptimized::KeysOptimizedImpl(const SchemasContext* root_schemas
*new_in = new_join;
return true;
+ } else if (PhysicalOpType::kPhysicalOpProject == in->GetOpType()) {
+ auto * project = dynamic_cast(in);
+ if (project == nullptr || project->project_type_ != vm::kAggregation) {
+ return false;
+ }
+
+ auto * agg_project = dynamic_cast(in);
+
+ PhysicalOpNode* new_depend = nullptr;
+ auto* rebase_sc = in->GetProducer(0)->schemas_ctx();
+ if (!KeysOptimizedImpl(rebase_sc, in->GetProducer(0), left_key, index_key, right_key, sort,
+ &new_depend)) {
+ return false;
+ }
+
+ vm::PhysicalAggregationNode* new_agg = nullptr;
+ if (!plan_ctx_
+ ->CreateOp(&new_agg, new_depend, agg_project->project(),
+ agg_project->having_condition_.condition())
+ .isOK()) {
+ return false;
+ }
+ *new_in = new_agg;
+ return true;
+ } else if (PhysicalOpType::kPhysicalOpRequestUnion == in->GetOpType()) {
+ // JOIN (..., AGG(REQUEST_UNION(left, ...))): JOIN condition optimizing left
+ PhysicalOpNode* new_left_depend = nullptr;
+ auto* rebase_sc = in->GetProducer(0)->schemas_ctx();
+ if (!KeysOptimizedImpl(rebase_sc, in->GetProducer(0), left_key, index_key, right_key, sort,
+ &new_left_depend)) {
+ return false;
+ }
+
+ auto * request_union = dynamic_cast(in);
+
+ vm::PhysicalRequestUnionNode* new_union = nullptr;
+ if (!plan_ctx_
+ ->CreateOp(
+ &new_union, new_left_depend, in->GetProducer(1), request_union->window(),
+ request_union->instance_not_in_window(), request_union->exclude_current_time(),
+ request_union->output_request_row())
+ .isOK()) {
+ return false;
+ }
+ for (auto& pair : request_union->window_unions().window_unions_) {
+ if (!new_union->AddWindowUnion(pair.first, pair.second)) {
+ return false;
+ }
+ }
+ *new_in = new_union;
+ return true;
}
return false;
}
diff --git a/hybridse/src/passes/physical/group_and_sort_optimized.h b/hybridse/src/passes/physical/group_and_sort_optimized.h
index 1d410f2b8e8..2e50571b29d 100644
--- a/hybridse/src/passes/physical/group_and_sort_optimized.h
+++ b/hybridse/src/passes/physical/group_and_sort_optimized.h
@@ -93,6 +93,17 @@ class GroupAndSortOptimized : public TransformUpPysicalPass {
std::string db_name;
};
+ struct OptimizeInfo {
+ OptimizeInfo(const Key* left_key, const Key* index_key, const Key* right_key, const Sort* s,
+ vm::PhysicalPartitionProviderNode* optimized)
+ : left_key(left_key), index_key(index_key), right_key(right_key), sort_key(s), optimized(optimized) {}
+ const Key* left_key;
+ const Key* index_key;
+ const Key* right_key;
+ const Sort* sort_key;
+ vm::PhysicalPartitionProviderNode* optimized;
+ };
+
private:
bool Transform(PhysicalOpNode* in, PhysicalOpNode** output);
@@ -149,6 +160,8 @@ class GroupAndSortOptimized : public TransformUpPysicalPass {
// A source column name is the column name in string that refers to a physical table,
// only one table got optimized each time
std::unordered_map expr_cache_;
+
+ std::unique_ptr optimize_info_;
};
} // namespace passes
} // namespace hybridse
diff --git a/hybridse/src/passes/physical/transform_up_physical_pass.h b/hybridse/src/passes/physical/transform_up_physical_pass.h
index fed721d4c66..a9a80bd90b4 100644
--- a/hybridse/src/passes/physical/transform_up_physical_pass.h
+++ b/hybridse/src/passes/physical/transform_up_physical_pass.h
@@ -17,7 +17,6 @@
#define HYBRIDSE_SRC_PASSES_PHYSICAL_TRANSFORM_UP_PHYSICAL_PASS_H_
#include
-#include
#include
#include
diff --git a/hybridse/src/plan/planner.cc b/hybridse/src/plan/planner.cc
index 1584d76acbb..fc350d1ffb6 100644
--- a/hybridse/src/plan/planner.cc
+++ b/hybridse/src/plan/planner.cc
@@ -272,7 +272,7 @@ base::Status Planner::CreateSelectQueryPlan(const node::SelectQueryNode *root, n
auto first_window_project = dynamic_cast(project_list_vec[1]);
node::ProjectListNode *merged_project =
node_manager_->MakeProjectListPlanNode(first_window_project->GetW(), true);
- if (!is_cluster_optimized_ && !enable_batch_window_parallelization_ &&
+ if (!is_cluster_optimized_ && !enable_batch_window_parallelization_ &&
node::ProjectListNode::MergeProjectList(simple_project, first_window_project, merged_project)) {
project_list_vec[0] = nullptr;
project_list_vec[1] = merged_project;
diff --git a/hybridse/src/planv2/ast_node_converter.cc b/hybridse/src/planv2/ast_node_converter.cc
index c0c3864716b..f2fa6fad4e2 100644
--- a/hybridse/src/planv2/ast_node_converter.cc
+++ b/hybridse/src/planv2/ast_node_converter.cc
@@ -1113,13 +1113,13 @@ base::Status ConvertTableExpressionNode(const zetasql::ASTTableExpression* root,
node::TableRefNode* right = nullptr;
node::OrderByNode* order_by = nullptr;
node::ExprNode* condition = nullptr;
- node::JoinType join_type = node::JoinType::kJoinTypeInner;
CHECK_STATUS(ConvertTableExpressionNode(join->lhs(), node_manager, &left))
CHECK_STATUS(ConvertTableExpressionNode(join->rhs(), node_manager, &right))
CHECK_STATUS(ConvertOrderBy(join->order_by(), node_manager, &order_by))
if (nullptr != join->on_clause()) {
CHECK_STATUS(ConvertExprNode(join->on_clause()->expression(), node_manager, &condition))
}
+ node::JoinType join_type = node::JoinType::kJoinTypeInner;
switch (join->join_type()) {
case zetasql::ASTJoin::JoinType::FULL: {
join_type = node::JoinType::kJoinTypeFull;
@@ -1137,12 +1137,14 @@ base::Status ConvertTableExpressionNode(const zetasql::ASTTableExpression* root,
join_type = node::JoinType::kJoinTypeLast;
break;
}
- case zetasql::ASTJoin::JoinType::INNER: {
+ case zetasql::ASTJoin::JoinType::INNER:
+ case zetasql::ASTJoin::JoinType::DEFAULT_JOIN_TYPE: {
join_type = node::JoinType::kJoinTypeInner;
break;
}
- case zetasql::ASTJoin::JoinType::COMMA: {
- join_type = node::JoinType::kJoinTypeComma;
+ case zetasql::ASTJoin::JoinType::COMMA:
+ case zetasql::ASTJoin::JoinType::CROSS: {
+ join_type = node::JoinType::kJoinTypeCross;
break;
}
default: {
@@ -1290,6 +1292,7 @@ base::Status ConvertQueryExpr(const zetasql::ASTQueryExpression* query_expressio
if (nullptr != select_query->from_clause()) {
CHECK_STATUS(ConvertTableExpressionNode(select_query->from_clause()->table_expression(), node_manager,
&table_ref_node))
+ // TODO(.): dont mark table ref as a list, it never happens
if (nullptr != table_ref_node) {
tableref_list_ptr = node_manager->MakeNodeList();
tableref_list_ptr->PushBack(table_ref_node);
@@ -1761,8 +1764,18 @@ base::Status ConvertTableOption(const zetasql::ASTOptionsEntry* entry, node::Nod
} else if (absl::EqualsIgnoreCase("storage_mode", identifier_v)) {
std::string storage_mode;
CHECK_STATUS(AstStringLiteralToString(entry->value(), &storage_mode));
- boost::to_lower(storage_mode);
- *output = node_manager->MakeStorageModeNode(node::NameToStorageMode(storage_mode));
+ absl::AsciiStrToLower(&storage_mode);
+ *output = node_manager->MakeNode(node::NameToStorageMode(storage_mode));
+ } else if (absl::EqualsIgnoreCase("compress_type", identifier_v)) {
+ std::string compress_type;
+ CHECK_STATUS(AstStringLiteralToString(entry->value(), &compress_type));
+ absl::AsciiStrToLower(&compress_type);
+ auto ret = node::NameToCompressType(compress_type);
+ if (ret.ok()) {
+ *output = node_manager->MakeNode(*ret);
+ } else {
+ return base::Status(common::kSqlAstError, ret.status().ToString());
+ }
} else {
return base::Status(common::kSqlAstError, absl::StrCat("invalid option ", identifier));
}
diff --git a/hybridse/src/testing/engine_test_base.cc b/hybridse/src/testing/engine_test_base.cc
index 2c3134d1257..4992b6b5018 100644
--- a/hybridse/src/testing/engine_test_base.cc
+++ b/hybridse/src/testing/engine_test_base.cc
@@ -533,9 +533,13 @@ INSTANTIATE_TEST_SUITE_P(EngineExtreamQuery, EngineTest,
INSTANTIATE_TEST_SUITE_P(EngineLastJoinQuery, EngineTest,
testing::ValuesIn(sqlcase::InitCases("cases/query/last_join_query.yaml")));
+INSTANTIATE_TEST_SUITE_P(EngineLeftJoin, EngineTest,
+ testing::ValuesIn(sqlcase::InitCases("cases/query/left_join.yml")));
INSTANTIATE_TEST_SUITE_P(EngineLastJoinWindowQuery, EngineTest,
testing::ValuesIn(sqlcase::InitCases("cases/query/last_join_window_query.yaml")));
+INSTANTIATE_TEST_SUITE_P(EngineLastJoinSubqueryWindow, EngineTest,
+ testing::ValuesIn(sqlcase::InitCases("cases/query/last_join_subquery_window.yml")));
INSTANTIATE_TEST_SUITE_P(EngineLastJoinWhere, EngineTest,
testing::ValuesIn(sqlcase::InitCases("cases/query/last_join_where.yaml")));
INSTANTIATE_TEST_SUITE_P(EngineWindowQuery, EngineTest,
diff --git a/hybridse/src/testing/engine_test_base.h b/hybridse/src/testing/engine_test_base.h
index e759169f0fd..0805ff1b3c5 100644
--- a/hybridse/src/testing/engine_test_base.h
+++ b/hybridse/src/testing/engine_test_base.h
@@ -318,8 +318,7 @@ class BatchRequestEngineTestRunner : public EngineTestRunner {
bool has_batch_request = !sql_case_.batch_request().columns_.empty();
if (!has_batch_request) {
- LOG(WARNING) << "No batch request field in case, "
- << "try use last row from primary input";
+ LOG(WARNING) << "No batch request field in case, try use last row from primary input";
}
std::vector original_request_data;
diff --git a/hybridse/src/vm/catalog_wrapper.cc b/hybridse/src/vm/catalog_wrapper.cc
index d134a92e51b..fbdd337e869 100644
--- a/hybridse/src/vm/catalog_wrapper.cc
+++ b/hybridse/src/vm/catalog_wrapper.cc
@@ -28,7 +28,7 @@ std::shared_ptr PartitionProjectWrapper::GetSegment(
new TableProjectWrapper(segment, parameter_, fun_));
}
}
-base::ConstIterator* PartitionProjectWrapper::GetRawIterator() {
+codec::RowIterator* PartitionProjectWrapper::GetRawIterator() {
auto iter = partition_handler_->GetIterator();
if (!iter) {
return nullptr;
@@ -47,7 +47,7 @@ std::shared_ptr PartitionFilterWrapper::GetSegment(
new TableFilterWrapper(segment, parameter_, fun_));
}
}
-base::ConstIterator* PartitionFilterWrapper::GetRawIterator() {
+codec::RowIterator* PartitionFilterWrapper::GetRawIterator() {
auto iter = partition_handler_->GetIterator();
if (!iter) {
return nullptr;
@@ -76,10 +76,6 @@ std::shared_ptr TableFilterWrapper::GetPartition(
}
}
-LazyLastJoinIterator::LazyLastJoinIterator(std::unique_ptr&& left, std::shared_ptr right,
- const Row& param, std::shared_ptr join)
- : left_it_(std::move(left)), right_(right), parameter_(param), join_(join) {}
-
void LazyLastJoinIterator::Seek(const uint64_t& key) { left_it_->Seek(key); }
void LazyLastJoinIterator::SeekToFirst() { left_it_->SeekToFirst(); }
@@ -90,49 +86,36 @@ void LazyLastJoinIterator::Next() { left_it_->Next(); }
bool LazyLastJoinIterator::Valid() const { return left_it_ && left_it_->Valid(); }
-LazyLastJoinTableHandler::LazyLastJoinTableHandler(std::shared_ptr left,
- std::shared_ptr right, const Row& param,
+LazyJoinPartitionHandler::LazyJoinPartitionHandler(std::shared_ptr left,
+ std::shared_ptr right, const Row& param,
std::shared_ptr join)
: left_(left), right_(right), parameter_(param), join_(join) {}
-LazyLastJoinPartitionHandler::LazyLastJoinPartitionHandler(std::shared_ptr left,
- std::shared_ptr right, const Row& param,
- std::shared_ptr join)
- : left_(left), right_(right), parameter_(param), join_(join) {}
-
-std::shared_ptr LazyLastJoinPartitionHandler::GetSegment(const std::string& key) {
+std::shared_ptr LazyJoinPartitionHandler::GetSegment(const std::string& key) {
auto left_seg = left_->GetSegment(key);
- return std::shared_ptr(new LazyLastJoinTableHandler(left_seg, right_, parameter_, join_));
+ return std::shared_ptr(new LazyJoinTableHandler(left_seg, right_, parameter_, join_));
}
-std::shared_ptr LazyLastJoinTableHandler::GetPartition(const std::string& index_name) {
+std::shared_ptr LazyJoinTableHandler::GetPartition(const std::string& index_name) {
return std::shared_ptr(
- new LazyLastJoinPartitionHandler(left_->GetPartition(index_name), right_, parameter_, join_));
+ new LazyJoinPartitionHandler(left_->GetPartition(index_name), right_, parameter_, join_));
}
-std::unique_ptr LazyLastJoinTableHandler::GetIterator() {
- auto iter = left_->GetIterator();
- if (!iter) {
- return std::unique_ptr();
- }
-
- return std::unique_ptr(new LazyLastJoinIterator(std::move(iter), right_, parameter_, join_));
-}
-std::unique_ptr LazyLastJoinPartitionHandler::GetIterator() {
+codec::RowIterator* LazyJoinPartitionHandler::GetRawIterator() {
auto iter = left_->GetIterator();
if (!iter) {
- return std::unique_ptr();
+ return nullptr;
}
- return std::unique_ptr(new LazyLastJoinIterator(std::move(iter), right_, parameter_, join_));
+ return new LazyLastJoinIterator(std::move(iter), right_, parameter_, join_);
}
-std::unique_ptr LazyLastJoinPartitionHandler::GetWindowIterator() {
+std::unique_ptr LazyJoinPartitionHandler::GetWindowIterator() {
auto wi = left_->GetWindowIterator();
if (wi == nullptr) {
return std::unique_ptr();
}
- return std::unique_ptr(new LazyLastJoinWindowIterator(std::move(wi), right_, parameter_, join_));
+ return std::unique_ptr(new LazyJoinWindowIterator(std::move(wi), right_, parameter_, join_));
}
const Row& LazyLastJoinIterator::GetValue() {
@@ -140,29 +123,279 @@ const Row& LazyLastJoinIterator::GetValue() {
return value_;
}
-std::unique_ptr LazyLastJoinTableHandler::GetWindowIterator(const std::string& idx_name) {
- return nullptr;
+codec::RowIterator* LazyJoinTableHandler::GetRawIterator() {
+ auto iter = left_->GetIterator();
+ if (!iter) {
+ return {};
+ }
+
+ switch (join_->join_type_) {
+ case node::kJoinTypeLast:
+ return new LazyLastJoinIterator(std::move(iter), right_, parameter_, join_);
+ case node::kJoinTypeLeft:
+ return new LazyLeftJoinIterator(std::move(iter), right_, parameter_, join_);
+ default:
+ return {};
+ }
}
-LazyLastJoinWindowIterator::LazyLastJoinWindowIterator(std::unique_ptr&& iter,
- std::shared_ptr right, const Row& param,
- std::shared_ptr join)
+LazyJoinWindowIterator::LazyJoinWindowIterator(std::unique_ptr&& iter,
+ std::shared_ptr right, const Row& param,
+ std::shared_ptr join)
: left_(std::move(iter)), right_(right), parameter_(param), join_(join) {}
-std::unique_ptr LazyLastJoinWindowIterator::GetValue() {
+
+codec::RowIterator* LazyJoinWindowIterator::GetRawValue() {
auto iter = left_->GetValue();
if (!iter) {
- return std::unique_ptr();
+ return nullptr;
}
- return std::unique_ptr(new LazyLastJoinIterator(std::move(iter), right_, parameter_, join_));
+ switch (join_->join_type_) {
+ case node::kJoinTypeLast:
+ return new LazyLastJoinIterator(std::move(iter), right_, parameter_, join_);
+ case node::kJoinTypeLeft:
+ return new LazyLeftJoinIterator(std::move(iter), right_, parameter_, join_);
+ default:
+ return {};
+ }
}
-RowIterator* LazyLastJoinWindowIterator::GetRawValue() {
- auto iter = left_->GetValue();
- if (!iter) {
+
+std::shared_ptr ConcatPartitionHandler::GetSegment(const std::string& key) {
+ auto left_seg = left_->GetSegment(key);
+ auto right_seg = right_->GetSegment(key);
+ return std::shared_ptr(
+ new SimpleConcatTableHandler(left_seg, left_slices_, right_seg, right_slices_));
+}
+
+RowIterator* ConcatPartitionHandler::GetRawIterator() {
+ auto li = left_->GetIterator();
+ if (!li) {
return nullptr;
}
+ auto ri = right_->GetIterator();
+ return new ConcatIterator(std::move(li), left_slices_, std::move(ri), right_slices_);
+}
+
+std::unique_ptr LazyRequestUnionPartitionHandler::GetWindowIterator() {
+ auto w = left_->GetWindowIterator();
+ if (!w) {
+ return {};
+ }
- return new LazyLastJoinIterator(std::move(iter), right_, parameter_, join_);
+ return std::unique_ptr(new LazyRequestUnionWindowIterator(std::move(w), func_));
+}
+
+std::shared_ptr LazyRequestUnionPartitionHandler::GetSegment(const std::string& key) {
+ return nullptr;
+}
+
+const IndexHint& LazyRequestUnionPartitionHandler::GetIndex() { return left_->GetIndex(); }
+
+const Types& LazyRequestUnionPartitionHandler::GetTypes() { return left_->GetTypes(); }
+
+codec::RowIterator* LazyRequestUnionPartitionHandler::GetRawIterator() { return nullptr; }
+
+bool LazyAggIterator::Valid() const { return it_->Valid(); }
+void LazyAggIterator::Next() { it_->Next(); }
+const uint64_t& LazyAggIterator::GetKey() const { return it_->GetKey(); }
+const Row& LazyAggIterator::GetValue() {
+ if (Valid()) {
+ auto request = it_->GetValue();
+ auto window = func_(request);
+ if (window) {
+ buf_ = agg_gen_->Gen(parameter_, window);
+ return buf_;
+ }
+ }
+
+ buf_ = Row();
+ return buf_;
+}
+
+void LazyAggIterator::Seek(const uint64_t& key) { it_->Seek(key); }
+void LazyAggIterator::SeekToFirst() { it_->SeekToFirst(); }
+
+codec::RowIterator* LazyAggTableHandler::GetRawIterator() {
+ auto it = left_->GetIterator();
+ if (!it) {
+ return nullptr;
+ }
+ return new LazyAggIterator(std::move(it), func_, agg_gen_, parameter_);
+}
+
+const Types& LazyAggTableHandler::GetTypes() { return left_->GetTypes(); }
+const IndexHint& LazyAggTableHandler::GetIndex() { return left_->GetIndex(); }
+const Schema* LazyAggTableHandler::GetSchema() { return nullptr; }
+const std::string& LazyAggTableHandler::GetName() { return left_->GetName(); }
+const std::string& LazyAggTableHandler::GetDatabase() { return left_->GetDatabase(); }
+std::shared_ptr LazyAggPartitionHandler::GetSegment(const std::string& key) {
+ auto seg = input_->Left()->GetSegment(key);
+ return std::shared_ptr(new LazyAggTableHandler(seg, input_->Func(), agg_gen_, parameter_));
+}
+const std::string LazyAggPartitionHandler::GetHandlerTypeName() { return "LazyLastJoinPartitionHandler"; }
+
+codec::RowIterator* LazyAggPartitionHandler::GetRawIterator() {
+ auto it = input_->Left()->GetIterator();
+ return new LazyAggIterator(std::move(it), input_->Func(), agg_gen_, parameter_);
+}
+
+bool ConcatIterator::Valid() const { return left_ && left_->Valid(); }
+void ConcatIterator::Next() {
+ left_->Next();
+ if (right_ && right_->Valid()) {
+ right_->Next();
+ }
+}
+const uint64_t& ConcatIterator::GetKey() const { return left_->GetKey(); }
+const Row& ConcatIterator::GetValue() {
+ if (!right_ || !right_->Valid()) {
+ buf_ = Row(left_slices_, left_->GetValue(), right_slices_, Row());
+ } else {
+ buf_ = Row(left_slices_, left_->GetValue(), right_slices_, right_->GetValue());
+ }
+ return buf_;
+}
+void ConcatIterator::Seek(const uint64_t& key) {
+ left_->Seek(key);
+ if (right_ && right_->Valid()) {
+ right_->Seek(key);
+ }
+}
+void ConcatIterator::SeekToFirst() {
+ left_->SeekToFirst();
+ if (right_) {
+ right_->SeekToFirst();
+ }
+}
+RowIterator* SimpleConcatTableHandler::GetRawIterator() {
+ auto li = left_->GetIterator();
+ if (!li) {
+ return nullptr;
+ }
+ auto ri = right_->GetIterator();
+ return new ConcatIterator(std::move(li), left_slices_, std::move(ri), right_slices_);
+}
+std::unique_ptr ConcatPartitionHandler::GetWindowIterator() { return nullptr; }
+
+std::unique_ptr LazyAggPartitionHandler::GetWindowIterator() {
+ auto w = input_->Left()->GetWindowIterator();
+ return std::unique_ptr(
+ new LazyAggWindowIterator(std::move(w), input_->Func(), agg_gen_, parameter_));
+}
+
+RowIterator* LazyAggWindowIterator::GetRawValue() {
+ auto w = left_->GetValue();
+ if (!w) {
+ return nullptr;
+ }
+
+ return new LazyAggIterator(std::move(w), func_, agg_gen_, parameter_);
+}
+void LazyRequestUnionIterator::Next() {
+ if (Valid()) {
+ cur_iter_->Next();
+ }
+ if (!Valid()) {
+ left_->Next();
+ OnNewRow();
+ }
+}
+bool LazyRequestUnionIterator::Valid() const { return cur_iter_ && cur_iter_->Valid(); }
+void LazyRequestUnionIterator::Seek(const uint64_t& key) {
+ left_->Seek(key);
+ OnNewRow(false);
+}
+void LazyRequestUnionIterator::SeekToFirst() {
+ left_->SeekToFirst();
+ OnNewRow();
+}
+void LazyRequestUnionIterator::OnNewRow(bool continue_on_empty) {
+ while (left_->Valid()) {
+ auto row = left_->GetValue();
+ auto tb = func_(row);
+ if (tb) {
+ auto it = tb->GetIterator();
+ if (it) {
+ it->SeekToFirst();
+ if (it->Valid()) {
+ cur_window_ = tb;
+ cur_iter_ = std::move(it);
+ break;
+ }
+ }
+ }
+
+ if (continue_on_empty) {
+ left_->Next();
+ } else {
+ cur_window_ = {};
+ cur_iter_ = {};
+ break;
+ }
+ }
+}
+const uint64_t& LazyRequestUnionIterator::GetKey() const { return cur_iter_->GetKey(); }
+const Row& LazyRequestUnionIterator::GetValue() { return cur_iter_->GetValue(); }
+RowIterator* LazyRequestUnionWindowIterator::GetRawValue() {
+ auto rows = left_->GetValue();
+ if (!rows) {
+ return {};
+ }
+
+ return new LazyRequestUnionIterator(std::move(rows), func_);
+}
+bool LazyRequestUnionWindowIterator::Valid() { return left_ && left_->Valid(); }
+const Row LazyRequestUnionWindowIterator::GetKey() { return left_->GetKey(); }
+void LazyRequestUnionWindowIterator::SeekToFirst() { left_->SeekToFirst(); }
+void LazyRequestUnionWindowIterator::Seek(const std::string& key) { left_->Seek(key); }
+void LazyRequestUnionWindowIterator::Next() { left_->Next(); }
+const std::string LazyJoinPartitionHandler::GetHandlerTypeName() {
+ return "LazyJoinPartitionHandler(" + node::JoinTypeName(join_->join_type_) + ")";
+}
+const std::string LazyJoinTableHandler::GetHandlerTypeName() {
+ return "LazyJoinTableHandler(" + node::JoinTypeName(join_->join_type_) + ")";
+}
+void LazyLeftJoinIterator::Next() {
+ if (right_it_ && right_it_->Valid()) {
+ right_it_->Next();
+ auto res = join_->RowJoinIterator(left_value_, right_it_, parameter_);
+ matches_right_ |= res.second;
+ if (matches_right_ && !right_it_->Valid()) {
+ // matched from right somewhere, skip the NULL match
+ left_it_->Next();
+ onNewLeftRow();
+ } else {
+ // RowJoinIterator returns NULL match by default
+ value_ = res.first;
+ }
+ } else {
+ left_it_->Next();
+ onNewLeftRow();
+ }
+}
+void LazyLeftJoinIterator::onNewLeftRow() {
+ // reset
+ right_it_ = nullptr;
+ left_value_ = Row();
+ value_ = Row();
+ matches_right_ = false;
+
+ if (!left_it_->Valid()) {
+ // end of iterator
+ return;
+ }
+
+ left_value_ = left_it_->GetValue();
+ if (right_partition_) {
+ right_it_ = join_->InitRight(left_value_, right_partition_, parameter_);
+ } else {
+ right_it_ = right_->GetIterator();
+ right_it_->SeekToFirst();
+ }
+
+ auto res = join_->RowJoinIterator(left_value_, right_it_, parameter_);
+ value_ = res.first;
+ matches_right_ |= res.second;
}
} // namespace vm
} // namespace hybridse
diff --git a/hybridse/src/vm/catalog_wrapper.h b/hybridse/src/vm/catalog_wrapper.h
index 11441b4bf54..bfd1265aa82 100644
--- a/hybridse/src/vm/catalog_wrapper.h
+++ b/hybridse/src/vm/catalog_wrapper.h
@@ -17,10 +17,13 @@
#ifndef HYBRIDSE_SRC_VM_CATALOG_WRAPPER_H_
#define HYBRIDSE_SRC_VM_CATALOG_WRAPPER_H_
+#include
#include
#include
#include
+#include "absl/base/attributes.h"
+#include "codec/row_iterator.h"
#include "vm/catalog.h"
#include "vm/generator.h"
@@ -142,15 +145,6 @@ class WindowIteratorProjectWrapper : public WindowIterator {
const ProjectFun* fun)
: WindowIterator(), iter_(std::move(iter)), parameter_(parameter), fun_(fun) {}
virtual ~WindowIteratorProjectWrapper() {}
- std::unique_ptr GetValue() override {
- auto iter = iter_->GetValue();
- if (!iter) {
- return std::unique_ptr();
- } else {
- return std::unique_ptr(
- new IteratorProjectWrapper(std::move(iter), parameter_, fun_));
- }
- }
RowIterator* GetRawValue() override {
auto iter = iter_->GetValue();
if (!iter) {
@@ -176,15 +170,6 @@ class WindowIteratorFilterWrapper : public WindowIterator {
const PredicateFun* fun)
: WindowIterator(), iter_(std::move(iter)), parameter_(parameter), fun_(fun) {}
virtual ~WindowIteratorFilterWrapper() {}
- std::unique_ptr GetValue() override {
- auto iter = iter_->GetValue();
- if (!iter) {
- return std::unique_ptr();
- } else {
- return std::unique_ptr(
- new IteratorFilterWrapper(std::move(iter), parameter_, fun_));
- }
- }
RowIterator* GetRawValue() override {
auto iter = iter_->GetValue();
if (!iter) {
@@ -240,16 +225,7 @@ class PartitionProjectWrapper : public PartitionHandler {
const std::string& GetDatabase() override {
return partition_handler_->GetDatabase();
}
- std::unique_ptr> GetIterator() override {
- auto iter = partition_handler_->GetIterator();
- if (!iter) {
- return std::unique_ptr();
- } else {
- return std::unique_ptr(
- new IteratorProjectWrapper(std::move(iter), parameter_, fun_));
- }
- }
- base::ConstIterator* GetRawIterator() override;
+ codec::RowIterator* GetRawIterator() override;
Row At(uint64_t pos) override {
value_ = fun_->operator()(partition_handler_->At(pos), parameter_);
return value_;
@@ -303,16 +279,8 @@ class PartitionFilterWrapper : public PartitionHandler {
const std::string& GetDatabase() override {
return partition_handler_->GetDatabase();
}
- std::unique_ptr> GetIterator() override {
- auto iter = partition_handler_->GetIterator();
- if (!iter) {
- return std::unique_ptr>();
- } else {
- return std::unique_ptr(
- new IteratorFilterWrapper(std::move(iter), parameter_, fun_));
- }
- }
- base::ConstIterator* GetRawIterator() override;
+
+ codec::RowIterator* GetRawIterator() override;
std::shared_ptr GetSegment(const std::string& key) override;
@@ -334,15 +302,6 @@ class TableProjectWrapper : public TableHandler {
: TableHandler(), table_hander_(table_handler), parameter_(parameter), value_(), fun_(fun) {}
virtual ~TableProjectWrapper() {}
- std::unique_ptr GetIterator() override {
- auto iter = table_hander_->GetIterator();
- if (!iter) {
- return std::unique_ptr();
- } else {
- return std::unique_ptr(
- new IteratorProjectWrapper(std::move(iter), parameter_, fun_));
- }
- }
const Types& GetTypes() override { return table_hander_->GetTypes(); }
const IndexHint& GetIndex() override { return table_hander_->GetIndex(); }
std::unique_ptr GetWindowIterator(
@@ -360,7 +319,7 @@ class TableProjectWrapper : public TableHandler {
const std::string& GetDatabase() override {
return table_hander_->GetDatabase();
}
- base::ConstIterator* GetRawIterator() override {
+ codec::RowIterator* GetRawIterator() override {
auto iter = table_hander_->GetIterator();
if (!iter) {
return nullptr;
@@ -389,14 +348,6 @@ class TableFilterWrapper : public TableHandler {
: TableHandler(), table_hander_(table_handler), parameter_(parameter), fun_(fun) {}
virtual ~TableFilterWrapper() {}
- std::unique_ptr GetIterator() override {
- auto iter = table_hander_->GetIterator();
- if (!iter) {
- return std::unique_ptr();
- } else {
- return std::make_unique(std::move(iter), parameter_, fun_);
- }
- }
const Types& GetTypes() override { return table_hander_->GetTypes(); }
const IndexHint& GetIndex() override { return table_hander_->GetIndex(); }
@@ -412,9 +363,13 @@ class TableFilterWrapper : public TableHandler {
const Schema* GetSchema() override { return table_hander_->GetSchema(); }
const std::string& GetName() override { return table_hander_->GetName(); }
const std::string& GetDatabase() override { return table_hander_->GetDatabase(); }
- base::ConstIterator* GetRawIterator() override {
- return new IteratorFilterWrapper(static_cast>(table_hander_->GetRawIterator()),
- parameter_, fun_);
+ codec::RowIterator* GetRawIterator() override {
+ auto iter = table_hander_->GetIterator();
+ if (!iter) {
+ return nullptr;
+ } else {
+ return new IteratorFilterWrapper(std::move(iter), parameter_, fun_);
+ }
}
std::shared_ptr GetPartition(const std::string& index_name) override;
const OrderType GetOrderType() const override { return table_hander_->GetOrderType(); }
@@ -426,29 +381,25 @@ class TableFilterWrapper : public TableHandler {
const PredicateFun* fun_;
};
-class LimitTableHandler : public TableHandler {
+class LimitTableHandler final : public TableHandler {
public:
explicit LimitTableHandler(std::shared_ptr table, int32_t limit)
: TableHandler(), table_hander_(table), limit_(limit) {}
virtual ~LimitTableHandler() {}
- std::unique_ptr GetIterator() override {
- auto iter = table_hander_->GetIterator();
- if (!iter) {
- return std::unique_ptr();
- } else {
- return std::make_unique(std::move(iter), limit_);
- }
- }
-
// FIXME(ace): do not use this, not implemented
std::unique_ptr GetWindowIterator(const std::string& idx_name) override {
LOG(ERROR) << "window iterator for LimitTableHandler is not implemented, don't use";
return table_hander_->GetWindowIterator(idx_name);
}
- base::ConstIterator* GetRawIterator() override {
- return new LimitIterator(static_cast>(table_hander_->GetRawIterator()), limit_);
+ codec::RowIterator* GetRawIterator() override {
+ auto iter = table_hander_->GetIterator();
+ if (!iter) {
+ return nullptr;
+ } else {
+ return new LimitIterator(std::move(iter), limit_);
+ }
}
const Types& GetTypes() override { return table_hander_->GetTypes(); }
@@ -562,10 +513,15 @@ class RowCombineWrapper : public RowHandler {
const ProjectFun* fun_;
};
+// Last Join iterator on demand
+// for request mode, right source must be a PartitionHandler
class LazyLastJoinIterator : public RowIterator {
public:
- LazyLastJoinIterator(std::unique_ptr&& left, std::shared_ptr right, const Row& param,
- std::shared_ptr join);
+ LazyLastJoinIterator(std::unique_ptr&& left, std::shared_ptr right, const Row& param,
+ std::shared_ptr join) ABSL_ATTRIBUTE_NONNULL()
+ : left_it_(std::move(left)), right_(right), parameter_(param), join_(join) {
+ SeekToFirst();
+ }
~LazyLastJoinIterator() override {}
@@ -582,30 +538,82 @@ class LazyLastJoinIterator : public RowIterator {
private:
std::unique_ptr left_it_;
- std::shared_ptr right_;
+ std::shared_ptr right_;
const Row& parameter_;
std::shared_ptr join_;
Row value_;
};
+class LazyLeftJoinIterator : public RowIterator {
+ public:
+ LazyLeftJoinIterator(std::unique_ptr&& left, std::shared_ptr right, const Row& param,
+ std::shared_ptr join)
+ : left_it_(std::move(left)), right_(right), parameter_(param), join_(join) {
+ if (right_->GetHandlerType() == kPartitionHandler) {
+ right_partition_ = std::dynamic_pointer_cast(right_);
+ }
+ SeekToFirst();
+ }
+
+ ~LazyLeftJoinIterator() override {}
+
+ bool Valid() const override { return left_it_->Valid(); }
+
+ // actual compute performed here, left_it_ and right_it_ is updated to the next position of join
+ void Next() override;
+
+ const uint64_t& GetKey() const override {
+ return left_it_->GetKey();
+ }
-class LazyLastJoinPartitionHandler final : public PartitionHandler {
+ const Row& GetValue() override {
+ return value_;
+ }
+
+ bool IsSeekable() const override { return true; };
+
+ void Seek(const uint64_t& key) override {
+ left_it_->Seek(key);
+ onNewLeftRow();
+ }
+
+ void SeekToFirst() override {
+ left_it_->SeekToFirst();
+ onNewLeftRow();
+ }
+
+ private:
+ // left_value_ changed, update right_it_ based on join condition
+ void onNewLeftRow();
+
+ std::unique_ptr left_it_;
+ std::shared_ptr right_;
+ std::shared_ptr right_partition_;
+ const Row parameter_;
+ std::shared_ptr join_;
+
+ // whether current left row has any rows from right joined, left join fallback to NULL if non matches
+ bool matches_right_ = false;
+ std::unique_ptr right_it_;
+ Row left_value_;
+ Row value_;
+};
+
+class LazyJoinPartitionHandler final : public PartitionHandler {
public:
- LazyLastJoinPartitionHandler(std::shared_ptr left, std::shared_ptr right,
- const Row& param, std::shared_ptr