From 88c9cf214655865ac40852c7002c2496cbcc3f5a Mon Sep 17 00:00:00 2001 From: Qingsheng Ren Date: Tue, 19 Mar 2024 17:55:33 +0800 Subject: [PATCH] [FLINK-34677][cdc] Synchronize Chinese documentations for Flink CDC (#3169) --- docs/content.zh/docs/connectors/doris.md | 19 ++- .../datastream-api-package-guidance.md | 2 +- .../legacy-flink-cdc-sources/db2-cdc.md | 2 +- .../legacy-flink-cdc-sources/mongodb-cdc.md | 2 +- .../legacy-flink-cdc-sources/oceanbase-cdc.md | 2 +- .../legacy-flink-cdc-sources/overview.md | 1 + .../tutorials}/_index.md | 4 +- .../build-real-time-data-lake-tutorial.md | 2 +- .../tutorials}/db2-tutorial.md | 2 +- .../tutorials}/mongodb-tutorial.md | 2 +- .../tutorials}/mysql-postgres-tutorial.md | 2 +- .../tutorials}/oceanbase-tutorial.md | 2 +- .../tutorials}/oracle-tutorial.md | 2 +- .../tutorials}/polardbx-tutorial.md | 2 +- .../tutorials}/sqlserver-tutorial.md | 2 +- .../tutorials}/tidb-tutorial.md | 2 +- docs/content.zh/docs/connectors/mysql.md | 26 ++-- docs/content.zh/docs/connectors/overview.md | 34 +++-- docs/content.zh/docs/connectors/starrocks.md | 22 ++- .../docs/core-concept/data-pipeline.md | 77 ++++++++++ .../content.zh/docs/core-concept/data-sink.md | 25 ++++ .../docs/core-concept/data-source.md | 26 ++++ docs/content.zh/docs/core-concept/route.md | 49 +++++++ docs/content.zh/docs/core-concept/table-id.md | 15 ++ .../content.zh/docs/core-concept/transform.md | 7 + docs/content.zh/docs/deployment/kubernetes.md | 134 ++++++++++++++++++ docs/content.zh/docs/deployment/standalone.md | 107 ++++++++++++++ docs/content.zh/docs/deployment/yarn.md | 128 +++++++++++++++++ .../contribute-to-flink-cdc.md | 4 +- .../understand-flink-cdc-api.md | 98 +++++++++++++ .../docs/get-started/introduction.md | 97 ++++++++++++- .../get-started/quickstart/mysql-to-doris.md | 2 +- 32 files changed, 833 insertions(+), 68 deletions(-) rename docs/content.zh/docs/{get-started/quickstart/using-legacy-sources => connectors/legacy-flink-cdc-sources/tutorials}/_index.md (95%) rename docs/content.zh/docs/{get-started/quickstart/using-legacy-sources => connectors/legacy-flink-cdc-sources/tutorials}/build-real-time-data-lake-tutorial.md (99%) rename docs/content.zh/docs/{get-started/quickstart/using-legacy-sources => connectors/legacy-flink-cdc-sources/tutorials}/db2-tutorial.md (98%) rename docs/content.zh/docs/{get-started/quickstart/using-legacy-sources => connectors/legacy-flink-cdc-sources/tutorials}/mongodb-tutorial.md (98%) rename docs/content.zh/docs/{get-started/quickstart/using-legacy-sources => connectors/legacy-flink-cdc-sources/tutorials}/mysql-postgres-tutorial.md (99%) rename docs/content.zh/docs/{get-started/quickstart/using-legacy-sources => connectors/legacy-flink-cdc-sources/tutorials}/oceanbase-tutorial.md (99%) rename docs/content.zh/docs/{get-started/quickstart/using-legacy-sources => connectors/legacy-flink-cdc-sources/tutorials}/oracle-tutorial.md (99%) rename docs/content.zh/docs/{get-started/quickstart/using-legacy-sources => connectors/legacy-flink-cdc-sources/tutorials}/polardbx-tutorial.md (99%) rename docs/content.zh/docs/{get-started/quickstart/using-legacy-sources => connectors/legacy-flink-cdc-sources/tutorials}/sqlserver-tutorial.md (99%) rename docs/content.zh/docs/{get-started/quickstart/using-legacy-sources => connectors/legacy-flink-cdc-sources/tutorials}/tidb-tutorial.md (99%) diff --git a/docs/content.zh/docs/connectors/doris.md b/docs/content.zh/docs/connectors/doris.md index 101cae7d60..61b88d0dfc 100644 --- a/docs/content.zh/docs/connectors/doris.md +++ b/docs/content.zh/docs/connectors/doris.md @@ -24,13 +24,12 @@ specific language governing permissions and limitations under the License. --> -# Doris Pipeline Connector - -This article introduces of Doris Pipeline Connector +# Doris Connector +This article introduces of Doris Connector ## Example ----------------- + ```yaml source: @@ -50,8 +49,7 @@ pipeline: ``` -## Pipeline options ----------------- +## Connector Options
@@ -180,17 +178,16 @@ pipeline:
-## Data Type Mapping ----------------- +## Data Type Mapping
- - - + + + diff --git a/docs/content.zh/docs/connectors/legacy-flink-cdc-sources/datastream-api-package-guidance.md b/docs/content.zh/docs/connectors/legacy-flink-cdc-sources/datastream-api-package-guidance.md index 57493c721d..7cfead63a9 100644 --- a/docs/content.zh/docs/connectors/legacy-flink-cdc-sources/datastream-api-package-guidance.md +++ b/docs/content.zh/docs/connectors/legacy-flink-cdc-sources/datastream-api-package-guidance.md @@ -1,6 +1,6 @@ --- title: "DataStream API Package Guidance" -weight: 999 +weight: 998 type: docs aliases: - /connectors/legacy-flink-cdc-sources/datastream-api-package-guidance diff --git a/docs/content.zh/docs/connectors/legacy-flink-cdc-sources/db2-cdc.md b/docs/content.zh/docs/connectors/legacy-flink-cdc-sources/db2-cdc.md index 346dd14509..ef1e33a500 100644 --- a/docs/content.zh/docs/connectors/legacy-flink-cdc-sources/db2-cdc.md +++ b/docs/content.zh/docs/connectors/legacy-flink-cdc-sources/db2-cdc.md @@ -53,7 +53,7 @@ using a build automation tool (such as Maven or SBT) and SQL Client with SQL JAR Download flink-sql-connector-db2-cdc-3.0-SNAPSHOT.jar and put it under `/lib/`. -**Note:** flink-sql-connector-db2-cdc-XXX-SNAPSHOT version is the code corresponding to the development branch. Users should use the released version, such as +**Note:** flink-sql-connector-db2-cdc-XXX-SNAPSHOT version is the code corresponding to the development branch. Users should use the released version, such as [flink-sql-connector-db2-cdc-3.0.0.jar](https://mvnrepository.com/artifact/com.ververica/flink-connector-db2-cdc), the released version will be available in the Maven central warehouse. diff --git a/docs/content.zh/docs/connectors/legacy-flink-cdc-sources/mongodb-cdc.md b/docs/content.zh/docs/connectors/legacy-flink-cdc-sources/mongodb-cdc.md index e13afa7c2c..0f078644b1 100644 --- a/docs/content.zh/docs/connectors/legacy-flink-cdc-sources/mongodb-cdc.md +++ b/docs/content.zh/docs/connectors/legacy-flink-cdc-sources/mongodb-cdc.md @@ -52,7 +52,7 @@ Setup MongoDB - MongoDB version MongoDB version >= 3.6
- We use [change streams](https://docs.mongodb.com/manual/changeStreams/) feature (new in version 3.6) to capture change data. +We use [change streams](https://docs.mongodb.com/manual/changeStreams/) feature (new in version 3.6) to capture change data. - Cluster Deployment diff --git a/docs/content.zh/docs/connectors/legacy-flink-cdc-sources/oceanbase-cdc.md b/docs/content.zh/docs/connectors/legacy-flink-cdc-sources/oceanbase-cdc.md index f1767049eb..c051dd2650 100644 --- a/docs/content.zh/docs/connectors/legacy-flink-cdc-sources/oceanbase-cdc.md +++ b/docs/content.zh/docs/connectors/legacy-flink-cdc-sources/oceanbase-cdc.md @@ -154,7 +154,7 @@ Flink SQL> CREATE TABLE orders ( ); ``` -You can also try the quickstart tutorial that sync data from OceanBase to Elasticsearch, please refer [Flink CDC Tutorial](<{{ ref "docs/connectors/legacy-flink-cdc-sources/tutorials/oceanbase-tutorial" }}>) for more information. +You can also try the quickstart tutorial that sync data from OceanBase to Elasticsearch, please refer [Flink CDC Tutorial]({{< ref "docs/connectors/legacy-flink-cdc-sources/tutorials/oceanbase-tutorial" >}}) for more information. Connector Options ---------------- diff --git a/docs/content.zh/docs/connectors/legacy-flink-cdc-sources/overview.md b/docs/content.zh/docs/connectors/legacy-flink-cdc-sources/overview.md index 842d15c967..4311ed961c 100644 --- a/docs/content.zh/docs/connectors/legacy-flink-cdc-sources/overview.md +++ b/docs/content.zh/docs/connectors/legacy-flink-cdc-sources/overview.md @@ -29,6 +29,7 @@ under the License. Flink CDC sources is a set of source connectors for Apache Flink®, ingesting changes from different databases using change data capture (CDC). Some CDC sources integrate Debezium as the engine to capture data changes. So it can fully leverage the ability of Debezium. See more about what is [Debezium](https://github.com/debezium/debezium). +You can also read [tutorials]({{< ref "docs/connectors/legacy-flink-cdc-sources/tutorials/mysql-postgres-tutorial" >}}) about how to use these sources. {{< img src="/fig/cdc-flow.png" width="600px" alt="Flink CDC" >}} diff --git a/docs/content.zh/docs/get-started/quickstart/using-legacy-sources/_index.md b/docs/content.zh/docs/connectors/legacy-flink-cdc-sources/tutorials/_index.md similarity index 95% rename from docs/content.zh/docs/get-started/quickstart/using-legacy-sources/_index.md rename to docs/content.zh/docs/connectors/legacy-flink-cdc-sources/tutorials/_index.md index 973659a4a7..7c83c335c2 100644 --- a/docs/content.zh/docs/get-started/quickstart/using-legacy-sources/_index.md +++ b/docs/content.zh/docs/connectors/legacy-flink-cdc-sources/tutorials/_index.md @@ -1,7 +1,7 @@ --- -title: "Using Legacy Sources" +title: Tutorials bookCollapseSection: true -weight: 3 +weight: 999 --- -# MySQL CDC Pipeline Connector +# MySQL Connector -The MySQL CDC Pipeline Connector allows for reading snapshot data and incremental data from MySQL database and provides end-to-end full-database data synchronization capabilities. -This document describes how to setup the MySQL CDC Pipeline connector. +MySQL connector allows reading snapshot data and incremental data from MySQL database and provides end-to-end full-database data synchronization capabilities. +This document describes how to setup the MySQL connector. -How to create Pipeline ----------------- +## Example -The pipeline for reading data from MySQL and sink to Doris can be defined as follows: +An example of the pipeline for reading data from MySQL and sink to Doris can be defined as follows: ```yaml source: @@ -58,8 +57,7 @@ pipeline: parallelism: 4 ``` -Pipeline Connector Options ----------------- +## Connector Options
CDC typeDoris typeNOTEFlink CDC TypeDoris TypeNote
@@ -245,8 +243,7 @@ Pipeline Connector Options
-Startup Reading Position --------- +## Startup Reading Position The config option `scan.startup.mode` specifies the startup mode for MySQL CDC consumer. The valid enumerations are: @@ -259,16 +256,15 @@ The config option `scan.startup.mode` specifies the startup mode for MySQL CDC c - `timestamp`: Skip snapshot phase and start reading binlog events from a specific timestamp. -Data Type Mapping ----------------- +## Data Type Mapping
- - - + + + diff --git a/docs/content.zh/docs/connectors/overview.md b/docs/content.zh/docs/connectors/overview.md index 00b2bf2a34..0eee7bdfa8 100644 --- a/docs/content.zh/docs/connectors/overview.md +++ b/docs/content.zh/docs/connectors/overview.md @@ -24,21 +24,33 @@ specific language governing permissions and limitations under the License. --> -# Pipeline Connectors Of CDC Streaming ELT Framework +# Connectors + +Flink CDC provides several source and sink connectors to interact with external +systems. You can use these connectors out-of-box, by adding released JARs to +your Flink CDC environment, and specifying the connector in your YAML pipeline +definition. ## Supported Connectors -| Connector | Database | -|---------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| [doris-pipeline](doris-pipeline.md) |
  • [Doris](https://doris.apache.org/): 1.2.x, 2.x.x | -| [mysql-pipeline](mysql-pipeline.md) |
  • [MySQL](https://dev.mysql.com/doc): 5.6, 5.7, 8.0.x
  • [RDS MySQL](https://www.aliyun.com/product/rds/mysql): 5.6, 5.7, 8.0.x
  • [PolarDB MySQL](https://www.aliyun.com/product/polardb): 5.6, 5.7, 8.0.x
  • [Aurora MySQL](https://aws.amazon.com/cn/rds/aurora): 5.6, 5.7, 8.0.x
  • [MariaDB](https://mariadb.org): 10.x
  • [PolarDB X](https://github.com/ApsaraDB/galaxysql): 2.0.1 | -| [starrocks-pipeline](starrocks-pipeline.md) |
  • [StarRocks](https://www.starrocks.io/): 2.x, 3.x | +| Connector | Supported Type | External System | +|------------------------------------------------------|----------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| [Apache Doris]({{< ref "docs/connectors/doris" >}}) | Sink |
  • [Apache Doris](https://doris.apache.org/): 1.2.x, 2.x.x | +| [MySQL]({{< ref "docs/connectors/mysql" >}}) | Source |
  • [MySQL](https://dev.mysql.com/doc): 5.6, 5.7, 8.0.x
  • [RDS MySQL](https://www.aliyun.com/product/rds/mysql): 5.6, 5.7, 8.0.x
  • [PolarDB MySQL](https://www.aliyun.com/product/polardb): 5.6, 5.7, 8.0.x
  • [Aurora MySQL](https://aws.amazon.com/cn/rds/aurora): 5.6, 5.7, 8.0.x
  • [MariaDB](https://mariadb.org): 10.x
  • [PolarDB X](https://github.com/ApsaraDB/galaxysql): 2.0.1 | +| [StarRocks]({{< ref "docs/connectors/starrocks" >}}) | Sink |
  • [StarRocks](https://www.starrocks.io/): 2.x, 3.x | + +## Develop Your Own Connector + +If provided connectors cannot fulfill your requirement, you can always develop +your own connector to get your external system involved in Flink CDC pipelines. +Check out [Flink CDC APIs]({{< ref "docs/developer-guide/understand-flink-cdc-api" >}}) +to learn how to develop your own connectors. -## Supported Flink Versions -The following table shows the version mapping between Flink® CDC Pipeline and Flink®: +## Legacy Flink CDC Sources -| Flink® CDC Version | Flink® Version | -|:-----------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:| -| 3.0.* | 1.14.\*, 1.15.\*, 1.16.\*, 1.17.\*, 1.18.\* | +Flink CDC sources introduces before 3.0 are still available as normal Flink +connector sources. You can find more details in the +[overview page]({{< ref "docs/connectors/legacy-flink-cdc-sources/overview" >}}) +of legacy Flink CDC sources. {{< top >}} diff --git a/docs/content.zh/docs/connectors/starrocks.md b/docs/content.zh/docs/connectors/starrocks.md index 64dff6689a..84a4a99d1f 100644 --- a/docs/content.zh/docs/connectors/starrocks.md +++ b/docs/content.zh/docs/connectors/starrocks.md @@ -24,17 +24,16 @@ specific language governing permissions and limitations under the License. --> -# StarRocks Pipeline Connector +# StarRocks Connector -The StarRocks Pipeline connector can be used as the *Data Sink* of the pipeline, and write data to [StarRocks](https://github.com/StarRocks/starrocks). This document describes how to set up the StarRocks Pipeline connector. +StarRocks connector can be used as the *Data Sink* of the pipeline, and write data to [StarRocks](https://github.com/StarRocks/starrocks). This document describes how to set up the StarRocks connector. ## What can the connector do? * Create table automatically if not exist * Schema change synchronization * Data synchronization -How to create Pipeline ----------------- +## Example The pipeline for reading data from MySQL and sink to StarRocks can be defined as follows: @@ -62,8 +61,8 @@ pipeline: parallelism: 2 ``` -Pipeline Connector Options ----------------- +## Connector Options +
  • MySQL typeCDC typeNOTEMySQL typeFlink CDC typeNote
    @@ -220,8 +219,8 @@ Pipeline Connector Options
    -Usage Notes --------- + +## Usage Notes * Only support StarRocks primary key table, so the source table must have primary keys. @@ -244,15 +243,14 @@ Usage Notes to write data to StarRocks. You can see [sink documentation](https://github.com/StarRocks/starrocks-connector-for-apache-flink/blob/main/docs/content/connector-sink.md) for how it works. -Data Type Mapping ----------------- +## Data Type Mapping
    - + - + diff --git a/docs/content.zh/docs/core-concept/data-pipeline.md b/docs/content.zh/docs/core-concept/data-pipeline.md index a1cf1986e0..3903c922b0 100644 --- a/docs/content.zh/docs/core-concept/data-pipeline.md +++ b/docs/content.zh/docs/core-concept/data-pipeline.md @@ -23,3 +23,80 @@ KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> + +# Definition +Since events in Flink CDC flow from the upstream to the downstream in a pipeline manner, the whole ETL task is referred as a **Data Pipeline**. + +# Parameters +A pipeline corresponds to a chain of operators in Flink. +To describe a Data Pipeline, the following parts are required: +- [source]({{< ref "docs/core-concept/data-source" >}}) +- [sink]({{< ref "docs/core-concept/data-sink" >}}) +- [pipeline](#pipeline-configurations) + +the following parts are optional: +- [route]({{< ref "docs/core-concept/route" >}}) +- [transform]({{< ref "docs/core-concept/transform" >}}) + +# Example +## Only required +We could use following yaml file to define a concise Data Pipeline describing synchronize all tables under MySQL app_db database to Doris : + +```yaml + source: + type: mysql + hostname: localhost + port: 3306 + username: root + password: 123456 + tables: app_db.\.* + + sink: + type: doris + fenodes: 127.0.0.1:8030 + username: root + password: "" + + pipeline: + name: Sync MySQL Database to Doris + parallelism: 2 +``` + +## With optional +We could use following yaml file to define a complicated Data Pipeline describing synchronize all tables under MySQL app_db database to Doris and give specific target database name ods_db and specific target table name prefix ods_ : + +```yaml + source: + type: mysql + hostname: localhost + port: 3306 + username: root + password: 123456 + tables: app_db.\.* + + sink: + type: doris + fenodes: 127.0.0.1:8030 + username: root + password: "" + route: + - source-table: app_db.orders + sink-table: ods_db.ods_orders + - source-table: app_db.shipments + sink-table: ods_db.ods_shipments + - source-table: app_db.products + sink-table: ods_db.ods_products + + pipeline: + name: Sync MySQL Database to Doris + parallelism: 2 +``` + +# Pipeline Configurations +The following config options of Data Pipeline level are supported: + +| parameter | meaning | optional/required | +|-----------------|-----------------------------------------------------------------------------------------|-------------------| +| name | The name of the pipeline, which will be submitted to the Flink cluster as the job name. | optional | +| parallelism | The global parallelism of the pipeline. | required | +| local-time-zone | The local time zone defines current session time zone id. | optional | \ No newline at end of file diff --git a/docs/content.zh/docs/core-concept/data-sink.md b/docs/content.zh/docs/core-concept/data-sink.md index 9c86f00f6e..2dab1dc4a7 100644 --- a/docs/content.zh/docs/core-concept/data-sink.md +++ b/docs/content.zh/docs/core-concept/data-sink.md @@ -23,3 +23,28 @@ KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> + +# Definition +**Data Sink** is used to apply schema changes and write change data to external systems. +A Data Sink can write to multiple tables simultaneously. + +# Parameters +To describe a data sink, the follows are required: + +| parameter | meaning | optional/required | +|-----------------------------|-------------------------------------------------------------------------------------------------|-------------------| +| type | The type of the sink, such as doris or starrocks. | required | +| name | The name of the sink, which is user-defined (a default value provided). | optional | +| configurations of Data Sink | Configurations to build the Data Sink e.g. connection configurations and sink table properties. | optional | + +# Example +We could use this yaml file to define a doris sink: +```yaml +sink: + type: doris + name: doris-sink # Optional parameter for description purpose + fenodes: 127.0.0.1:8030 + username: root + password: "" + table.create.properties.replication_num: 1 # Optional parameter for advanced functionalities +``` \ No newline at end of file diff --git a/docs/content.zh/docs/core-concept/data-source.md b/docs/content.zh/docs/core-concept/data-source.md index d2859bd58a..5d6c33deb8 100644 --- a/docs/content.zh/docs/core-concept/data-source.md +++ b/docs/content.zh/docs/core-concept/data-source.md @@ -23,3 +23,29 @@ KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> + +# Definition +**Data Source** is used to access metadata and read the changed data from external systems. +A Data Source can read data from multiple tables simultaneously. + +# Parameters +To describe a data source, the follows are required: + +| parameter | meaning | optional/required | +|-------------------------------|-----------------------------------------------------------------------------------------------------|-------------------| +| type | The type of the source, such as mysql. | required | +| name | The name of the source, which is user-defined (a default value provided). | optional | +| configurations of Data Source | Configurations to build the Data Source e.g. connection configurations and source table properties. | optional | + +# Example +We could use yaml files to define a mysql source: +```yaml +source: + type: mysql + name: mysql-source #optional,description information + host: localhost + port: 3306 + username: admin + password: pass + tables: adb.*, bdb.user_table_[0-9]+, [app|web]_order_\.* +``` \ No newline at end of file diff --git a/docs/content.zh/docs/core-concept/route.md b/docs/content.zh/docs/core-concept/route.md index 9dbe80c03a..0a8c906fbb 100644 --- a/docs/content.zh/docs/core-concept/route.md +++ b/docs/content.zh/docs/core-concept/route.md @@ -23,3 +23,52 @@ KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> + +# Definition +**Route** specifies the rule of matching a list of source-table and mapping to sink-table. The most typical scenario is the merge of sub-databases and sub-tables, routing multiple upstream source tables to the same sink table. + +# Parameters +To describe a route, the follows are required: + +| parameter | meaning | optional/required | +|--------------|----------------------------------------------------|-------------------| +| source-table | Source table id, supports regular expressions | required | +| sink-table | Sink table id, supports regular expressions | required | +| description | Routing rule description(a default value provided) | optional | + +A route module can contain a list of source-table/sink-table rules. + +# Example +## Route one Data Source table to one Data Sink table +if synchronize the table `web_order` in the database `mydb` to a Doris table `ods_web_order`, we can use this yaml file to define this route: + +```yaml +route: + source-table: mydb.web_order + sink-table: mydb.ods_web_order + description: sync table to one destination table with given prefix ods_ +``` + +## Route multiple Data Source tables to one Data Sink table +What's more, if you want to synchronize the sharding tables in the database `mydb` to a Doris table `ods_web_order`, we can use this yaml file to define this route: +```yaml +route: + source-table: mydb\.* + sink-table: mydb.ods_web_order + description: sync sharding tables to one destination table +``` + +## Complex Route via combining route rules +What's more, if you want to specify many different mapping rules, we can use this yaml file to define this route: +```yaml +route: + - source-table: mydb.orders + sink-table: ods_db.ods_orders + description: sync orders table to orders + - source-table: mydb.shipments + sink-table: ods_db.ods_shipments + description: sync shipments table to ods_shipments + - source-table: mydb.products + sink-table: ods_db.ods_products + description: sync products table to ods_products +``` \ No newline at end of file diff --git a/docs/content.zh/docs/core-concept/table-id.md b/docs/content.zh/docs/core-concept/table-id.md index 83769301cc..261c8fd09b 100644 --- a/docs/content.zh/docs/core-concept/table-id.md +++ b/docs/content.zh/docs/core-concept/table-id.md @@ -23,3 +23,18 @@ KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> + +# Definition +When connecting to external systems, it is necessary to establish a mapping relationship with the storage objects of the external system. This is what **Table Id** refers to. + +# Example +To be compatible with most external systems, the Table Id is represented by a 3-tuple : (namespace, schemaName, tableName). +Connectors should establish the mapping between Table Id and storage objects in external systems. + +The following table lists the parts in table Id of different data systems: + +| data system | parts in tableId | String example | +|-----------------------|--------------------------|---------------------| +| Oracle/PostgreSQL | database, schema, table | mydb.default.orders | +| MySQL/Doris/StarRocks | database, table | mydb.orders | +| Kafka | topic | orders | diff --git a/docs/content.zh/docs/core-concept/transform.md b/docs/content.zh/docs/core-concept/transform.md index 76015dea16..0ffa24829a 100644 --- a/docs/content.zh/docs/core-concept/transform.md +++ b/docs/content.zh/docs/core-concept/transform.md @@ -23,3 +23,10 @@ KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> + +# Definition +**Transform** module helps users delete and expand data columns based on the data columns in the table. +What's more, it also helps users filter some unnecessary data during the synchronization process. + +# Example +This feature will support soon. \ No newline at end of file diff --git a/docs/content.zh/docs/deployment/kubernetes.md b/docs/content.zh/docs/deployment/kubernetes.md index f8ca63fc48..1587572765 100644 --- a/docs/content.zh/docs/deployment/kubernetes.md +++ b/docs/content.zh/docs/deployment/kubernetes.md @@ -23,3 +23,137 @@ KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> + +# Introduction + +Kubernetes is a popular container-orchestration system for automating computer application deployment, scaling, and management. +Flink's native Kubernetes integration allows you to directly deploy Flink on a running Kubernetes cluster. +Moreover, Flink is able to dynamically allocate and de-allocate TaskManagers depending on the required resources because it can directly talk to Kubernetes. + +Apache Flink also provides a Kubernetes operator for managing Flink clusters on Kubernetes. It supports both standalone and native deployment mode and greatly simplifies deployment, configuration and the life cycle management of Flink resources on Kubernetes. + +For more information, please refer to the [Flink Kubernetes Operator documentation](https://nightlies.apache.org/flink/flink-kubernetes-operator-docs-main/docs/concepts/overview/). + +## Preparation + +The doc assumes a running Kubernetes cluster fulfilling the following requirements: + +- Kubernetes >= 1.9. +- KubeConfig, which has access to list, create, delete pods and services, configurable via `~/.kube/config`. You can verify permissions by running `kubectl auth can-i pods`. +- Enabled Kubernetes DNS. +- `default` service account with [RBAC](https://nightlies.apache.org/flink/flink-docs-master/docs/deployment/resource-providers/native_kubernetes/#rbac) permissions to create, delete pods. + +If you have problems setting up a Kubernetes cluster, please take a look at [how to setup a Kubernetes cluster](https://kubernetes.io/docs/setup/). + +## Session Mode + +Flink runs on all UNIX-like environments, i.e. Linux, Mac OS X, and Cygwin (for Windows). +You can refer [overview]({{< ref "docs/connectors/overview" >}}) to check supported versions and download [the binary release](https://flink.apache.org/downloads/) of Flink, +then extract the archive: + +```bash +tar -xzf flink-*.tgz +``` + +You should set `FLINK_HOME` environment variables like: + +```bash +export FLINK_HOME=/path/flink-* +``` + +### Start a session cluster + +To start a session cluster on k8s, run the bash script that comes with Flink: + +```bash +cd /path/flink-* +./bin/kubernetes-session.sh -Dkubernetes.cluster-id=my-first-flink-cluster +``` + +After successful startup, the return information is as follows: + +``` +org.apache.flink.kubernetes.utils.KubernetesUtils [] - Kubernetes deployment requires a fixed port. Configuration blob.server.port will be set to 6124 +org.apache.flink.kubernetes.utils.KubernetesUtils [] - Kubernetes deployment requires a fixed port. Configuration taskmanager.rpc.port will be set to 6122 +org.apache.flink.kubernetes.KubernetesClusterDescriptor [] - Please note that Flink client operations(e.g. cancel, list, stop, savepoint, etc.) won't work from outside the Kubernetes cluster since 'kubernetes.rest-service.exposed.type' has been set to ClusterIP. +org.apache.flink.kubernetes.KubernetesClusterDescriptor [] - Create flink session cluster my-first-flink-cluster successfully, JobManager Web Interface: http://my-first-flink-cluster-rest.default:8081 +``` + +{{< hint info >}} +please refer to [Flink documentation](https://nightlies.apache.org/flink/flink-docs-master/docs/deployment/resource-providers/native_kubernetes/#accessing-flinks-web-ui) to expose Flink’s Web UI and REST endpoint. +You should ensure that REST endpoint can be accessed by the node of your submission. +{{< /hint >}} +Then, you need to add these two config to your flink-conf.yaml: + +```yaml +rest.bind-port: {{REST_PORT}} +rest.address: {{NODE_IP}} +``` + +{{REST_PORT}} and {{NODE_IP}} should be replaced by the actual values of your JobManager Web Interface. + +### Set up Flink CDC +Download the tar file of Flink CDC from [release page](https://github.com/apache/flink-cdc/releases), then extract the archive: + +```bash +tar -xzf flink-cdc-*.tar.gz +``` + +Extracted `flink-cdc` contains four directories: `bin`,`lib`,`log` and `conf`. + +Download the connector jars from [release page](https://github.com/apache/flink-cdc/releases), and move it to the `lib` directory. +Download links are available only for stable releases, SNAPSHOT dependencies need to be built based on specific branch by yourself. + +### Submit a Flink CDC Job +Here is an example file for synchronizing the entire database `mysql-to-doris.yaml`: + +```yaml +################################################################################ +# Description: Sync MySQL all tables to Doris +################################################################################ +source: + type: mysql + hostname: localhost + port: 3306 + username: root + password: 123456 + tables: app_db.\.* + server-id: 5400-5404 + server-time-zone: UTC + +sink: + type: doris + fenodes: 127.0.0.1:8030 + username: root + password: "" + +pipeline: + name: Sync MySQL Database to Doris + parallelism: 2 + +``` + +You need to modify the configuration file according to your needs, refer to connectors more information. +- [MySQL pipeline connector]({{< ref "docs/connectors/mysql.md" >}}) +- [Apache Doris pipeline connector]({{< ref "docs/connectors/doris.md" >}}) + +Finally, submit job to Flink Standalone cluster using Cli. + +```bash +cd /path/flink-cdc-* +./bin/flink-cdc.sh mysql-to-doris.yaml +``` + +After successful submission, the return information is as follows: + +```bash +Pipeline has been submitted to cluster. +Job ID: ae30f4580f1918bebf16752d4963dc54 +Job Description: Sync MySQL Database to Doris +``` + +Then you can find a job named `Sync MySQL Database to Doris` running through Flink Web UI. + +{{< hint info >}} +Please note that submitting with **native application mode** and **Flink Kubernetes operator** are not supported for now. +{{< /hint >}} \ No newline at end of file diff --git a/docs/content.zh/docs/deployment/standalone.md b/docs/content.zh/docs/deployment/standalone.md index 2304a6df31..dd052bcab2 100644 --- a/docs/content.zh/docs/deployment/standalone.md +++ b/docs/content.zh/docs/deployment/standalone.md @@ -23,3 +23,110 @@ KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> + +# Introduction +Standalone mode is Flink’s simplest deployment mode. This short guide will show you how to download the latest stable version of Flink, install, and run it. +You will also run an example Flink CDC job and view it in the web UI. + + +## Preparation + +Flink runs on all UNIX-like environments, i.e. Linux, Mac OS X, and Cygwin (for Windows). +You can refer [overview]({{< ref "docs/connectors/overview" >}}) to check supported versions and download [the binary release](https://flink.apache.org/downloads/) of Flink, +then extract the archive: + +```bash +tar -xzf flink-*.tgz +``` + +You should set `FLINK_HOME` environment variables like: + +```bash +export FLINK_HOME=/path/flink-* +``` + +### Start and stop a local cluster + +To start a local cluster, run the bash script that comes with Flink: + +```bash +cd /path/flink-* +./bin/start-cluster.sh +``` + +Flink is now running as a background process. You can check its status with the following command: + +```bash +ps aux | grep flink +``` + +You should be able to navigate to the web UI at [localhost:8081](http://localhost:8081) to view +the Flink dashboard and see that the cluster is up and running. + +To quickly stop the cluster and all running components, you can use the provided script: + +```bash +./bin/stop-cluster.sh +``` + +## Set up Flink CDC +Download the tar file of Flink CDC from [release page](https://github.com/apache/flink-cdc/releases), then extract the archive: + +```bash +tar -xzf flink-cdc-*.tar.gz +``` + +Extracted `flink-cdc` contains four directories: `bin`,`lib`,`log` and `conf`. + +Download the connector jars from [release page](https://github.com/apache/flink-cdc/releases), and move it to the `lib` directory. +Download links are available only for stable releases, SNAPSHOT dependencies need to be built based on specific branch by yourself. + + +## Submit a Flink CDC Job +Here is an example file for synchronizing the entire database `mysql-to-doris.yaml`: + +```yaml +################################################################################ +# Description: Sync MySQL all tables to Doris +################################################################################ +source: + type: mysql + hostname: localhost + port: 3306 + username: root + password: 123456 + tables: app_db.\.* + server-id: 5400-5404 + server-time-zone: UTC + +sink: + type: doris + fenodes: 127.0.0.1:8030 + username: root + password: "" + +pipeline: + name: Sync MySQL Database to Doris + parallelism: 2 +``` + +You need to modify the configuration file according to your needs, refer to connectors more information. +- [MySQL pipeline connector]({{< ref "docs/connectors/mysql.md" >}}) +- [Apache Doris pipeline connector]({{< ref "docs/connectors/doris.md" >}}) + +Finally, submit job to Flink Standalone cluster using Cli. + +```bash +cd /path/flink-cdc-* +./bin/flink-cdc.sh mysql-to-doris.yaml +``` + +After successful submission, the return information is as follows: + +```bash +Pipeline has been submitted to cluster. +Job ID: ae30f4580f1918bebf16752d4963dc54 +Job Description: Sync MySQL Database to Doris +``` + +Then you can find a job named `Sync MySQL Database to Doris` running through Flink Web UI. \ No newline at end of file diff --git a/docs/content.zh/docs/deployment/yarn.md b/docs/content.zh/docs/deployment/yarn.md index d0621273ae..ec9700d898 100644 --- a/docs/content.zh/docs/deployment/yarn.md +++ b/docs/content.zh/docs/deployment/yarn.md @@ -23,3 +23,131 @@ KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> + +# Introduction + +[Apache Hadoop YARN](https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/YARN.html) is a resource provider popular with many data processing frameworks. +Flink services are submitted to YARN's ResourceManager, which spawns containers on machines managed by YARN NodeManagers. Flink deploys its JobManager and TaskManager instances into such containers. + +Flink can dynamically allocate and de-allocate TaskManager resources depending on the number of processing slots required by the job(s) running on the JobManager. + +## Preparation + +This *Getting Started* section assumes a functional YARN environment, starting from version 2.10.2. YARN environments are provided most conveniently through services such as Amazon EMR, Google Cloud DataProc or products like Cloudera. [Manually setting up a YARN environment locally](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/SingleCluster.html) or [on a cluster](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/ClusterSetup.html) is not recommended for following through this *Getting Started* tutorial. + +- Make sure your YARN cluster is ready for accepting Flink applications by running `yarn top`. It should show no error messages. +- Download a recent Flink distribution from the [download page](https://flink.apache.org/downloads/) and unpack it. +- **Important** Make sure that the `HADOOP_CLASSPATH` environment variable is set up (it can be checked by running `echo $HADOOP_CLASSPATH`). If not, set it up using + +```bash +export HADOOP_CLASSPATH=`hadoop classpath` +``` + +## Session Mode + +Flink runs on all UNIX-like environments, i.e. Linux, Mac OS X, and Cygwin (for Windows). +You can refer [overview]({{< ref "docs/connectors/overview" >}}) to check supported versions and download [the binary release](https://flink.apache.org/downloads/) of Flink, +then extract the archive: + +```bash +tar -xzf flink-*.tgz +``` + +You should set `FLINK_HOME` environment variables like: + +```bash +export FLINK_HOME=/path/flink-* +``` + +### Starting a Flink Session on YARN + +Once you've made sure that the `HADOOP_CLASSPATH` environment variable is set, you can launch a Flink on YARN session: + +```bash +# we assume to be in the root directory of +# the unzipped Flink distribution + +# export HADOOP_CLASSPATH +export HADOOP_CLASSPATH=`hadoop classpath` + +# Start YARN session +./bin/yarn-session.sh --detached + +# Stop YARN session (replace the application id based +# on the output of the yarn-session.sh command) +echo "stop" | ./bin/yarn-session.sh -id application_XXXXX_XXX +``` + +After starting YARN session, you can now access the Flink Web UI through the URL printed in the last lines of the command output, or through the YARN ResourceManager web UI. + +Then, you need to add some configs to your flink-conf.yaml: + +```yaml +rest.bind-port: {{REST_PORT}} +rest.address: {{NODE_IP}} +execution.target: yarn-session +yarn.application.id: {{YARN_APPLICATION_ID}} +``` + +{{REST_PORT}} and {{NODE_IP}} should be replaced by the actual values of your JobManager Web Interface, and {{YARN_APPLICATION_ID}} should be replaced by the actual YARN application ID of Flink. + +### Set up Flink CDC +Download the tar file of Flink CDC from [release page](https://github.com/apache/flink-cdc/releases), then extract the archive: + +```bash +tar -xzf flink-cdc-*.tar.gz +``` + +Extracted `flink-cdc` contains four directories: `bin`,`lib`,`log` and `conf`. + +Download the connector jars from [release page](https://github.com/apache/flink-cdc/releases), and move it to the `lib` directory. +Download links are available only for stable releases, SNAPSHOT dependencies need to be built based on specific branch by yourself. + +### Submit a Flink CDC Job +Here is an example file for synchronizing the entire database `mysql-to-doris.yaml`: + +```yaml +################################################################################ +# Description: Sync MySQL all tables to Doris +################################################################################ +source: + type: mysql + hostname: localhost + port: 3306 + username: root + password: 123456 + tables: app_db.\.* + server-id: 5400-5404 + server-time-zone: UTC + +sink: + type: doris + fenodes: 127.0.0.1:8030 + username: root + password: "" + +pipeline: + name: Sync MySQL Database to Doris + parallelism: 2 + +``` + +You need to modify the configuration file according to your needs. +Finally, submit job to Flink Standalone cluster using Cli. + +```bash +cd /path/flink-cdc-* +./bin/flink-cdc.sh mysql-to-doris.yaml +``` + +After successful submission, the return information is as follows: + +```bash +Pipeline has been submitted to cluster. +Job ID: ae30f4580f1918bebf16752d4963dc54 +Job Description: Sync MySQL Database to Doris +``` + +You can find a job named `Sync MySQL Database to Doris` running through Flink Web UI. + +Please note that submitting to application mode cluster and per-job mode cluster are not supported for now. diff --git a/docs/content.zh/docs/developer-guide/contribute-to-flink-cdc.md b/docs/content.zh/docs/developer-guide/contribute-to-flink-cdc.md index 5bb6e0a694..39c41f6846 100644 --- a/docs/content.zh/docs/developer-guide/contribute-to-flink-cdc.md +++ b/docs/content.zh/docs/developer-guide/contribute-to-flink-cdc.md @@ -50,7 +50,7 @@ Flink CDC 项目通过众多贡献者的代码贡献来维护,改进和拓展 如果您想要为 Flink CDC 贡献代码,可以通过如下的方式。 -1. 首先在 [Flink jira](https://issues.
    apache.org/jira/projects/FLINK/issues) 的想要负责的 issue +1. 首先在 [Flink jira](https://issues.
    apache.org/jira/projects/FLINK/issues) 的想要负责的 issue 下评论(最好在评论中解释下对于这个问题的理解,和后续的设计,如果可能的话也可以提供下 POC 的代码)。 2. 在这个 issue 被分配给你后,开始进行开发实现(提交信息请遵循`[FLINK-xxx][xxx] xxxxxxx`的格式)。 3. 开发完成后可以向 [Flink CDC](https://github.com/apache/flink-cdc) 项目提交 PR(请确保 Clone 的项目 committer 有操作权限)。 @@ -65,7 +65,7 @@ Flink CDC 项目通过众多贡献者的代码贡献来维护,改进和拓展 评审时,需要检查对应的 PR 是否合理的描述了本次修改的内容,能否支持评审人较快的理解和评审代码。对于比较琐碎的修改,不需要提供太过详细的信息。 -2. 提交的 PR 代码质量是否符合标准? +2. 提交的 PR 代码质量是否符合标准? - 代码是否遵循正确的软件开发习惯? - 代码是否正确,鲁棒性如何,是否便于维护和拓展,是否是可测试的? diff --git a/docs/content.zh/docs/developer-guide/understand-flink-cdc-api.md b/docs/content.zh/docs/developer-guide/understand-flink-cdc-api.md index 8a71c80d64..f9163e87f9 100644 --- a/docs/content.zh/docs/developer-guide/understand-flink-cdc-api.md +++ b/docs/content.zh/docs/developer-guide/understand-flink-cdc-api.md @@ -5,6 +5,7 @@ type: docs aliases: - /developer-guide/understand-flink-cdc-api --- + + +# Understand Flink CDC API + +If you are planning to build your own Flink CDC connectors, or considering +contributing to Flink CDC, you might want to hava a deeper look at the APIs of +Flink CDC. This document will go through some important concepts and interfaces +in order to help you with your development. + +## Event + +An event under the context of Flink CDC is a special kind of record in Flink's +data stream. It describes the captured changes in the external system on source +side, gets processed and transformed by internal operators built by Flink CDC, +and finally passed to data sink then write or applied to the external system on +sink side. + +Each change event contains the table ID it belongs to, and the payload that the +event carries. Based on the type of payload, we categorize events into these +kinds: + +### DataChangeEvent + +DataChangeEvent describes data changes in the source. It consists of 5 fields + +- `Table ID`: table ID it belongs to +- `Before`: pre-image of the data +- `After`: post-image of the data +- `Operation type`: type of the change operation +- `Meta`: metadata of the change + +For the operation type field, we pre-define 4 operation types: + +- Insert: new data entry, with `before = null` and `after = new data` +- Delete: removal of data, with `before = removed` data and `after = null` +- Update: update of existed data, with `before = data before change` + and `after = data after change` +- Replace: + +### SchemaChangeEvent + +SchemaChangeEvent describes schema changes in the source. Compared to +DataChangeEvent, the payload of SchemaChangeEvent describes changes in the table +structure in the external system, including: + +- `AddColumnEvent`: new column in the table +- `AlterColumnTypeEvent`: type change of a column +- `CreateTableEvent`: creation of a new table. Also used to describe the schema + of + a pre-emitted DataChangeEvent +- `DropColumnEvent`: removal of a column +- `RenameColumnEvent`: name change of a column + +### Flow of Events + +As you may have noticed, data change event doesn't have its schema bound with +it. This reduces the size of data change event and the overhead of +serialization, but makes it not self-descriptive Then how does the framework +know how to interpret the data change event? + +To resolve the problem, the framework adds a requirement to the flow of events: +a `CreateTableEvent` must be emitted before any `DataChangeEvent` if a table is +new to the framework, and `SchemaChangeEvent` must be emitted before any +`DataChangeEvent` if the schema of a table is changed. This requirement makes +sure that the framework has been aware of the schema before processing any data +changes. + +{{< img src="/fig/flow-of-events.png" alt="Flow of Events" >}} + +## Data Source + +Data source works as a factory of `EventSource` and `MetadataAccessor`, +constructing runtime implementations of source that captures changes from +external system and provides metadata. + +`EventSource` is a Flink source that reads changes, converts them to events +, then emits to downstream Flink operators. You can refer +to [Flink documentation](https://nightlies.apache.org/flink/flink-docs-stable/docs/dev/datastream/sources/) +to learn internals and how to implement a Flink source. + +`MetadataAccessor` serves as the metadata reader of the external system, by +listing namespaces, schemas and tables, and provide the table schema (table +structure) of the given table ID. + +## Data Sink + +Symmetrical with data source, data sink consists of `EventSink` +and `MetadataApplier`, which writes data change events and apply schema +changes (metadata changes) to external system. + +`EventSink` is a Flink sink that receives change event from upstream operator, +and apply them to the external system. Currently we only support Flink's Sink V2 +API. + +`MetadataApplier` will be used to handle schema changes. When the framework +receives schema change event from source, after making some internal +synchronizations and flushes, it will apply the schema change to +external system via this applier. diff --git a/docs/content.zh/docs/get-started/introduction.md b/docs/content.zh/docs/get-started/introduction.md index c285cff5cf..d0d4af7a84 100644 --- a/docs/content.zh/docs/get-started/introduction.md +++ b/docs/content.zh/docs/get-started/introduction.md @@ -3,7 +3,7 @@ title: "Introduction" weight: 1 type: docs aliases: - - /get-started/introdution/ + - /get-started/introduction/ --- + +# Welcome to Flink CDC 🎉 + +Flink CDC is a streaming data integration tool that aims to provide users with +a more robust API. It allows users to describe their ETL pipeline logic via YAML +elegantly and help users automatically generating customized Flink operators and +submitting job. Flink CDC prioritizes optimizing the task submission process and +offers enhanced functionalities such as schema evolution, data transformation, +full database synchronization and exactly-once semantic. + +Deeply integrated with and powered by Apache Flink, Flink CDC provides: + +* ✅ End-to-end data integration framework +* ✅ API for data integration users to build jobs easily +* ✅ Multi-table support in Source / Sink +* ✅ Synchronization of entire databases +* ✅ Schema evolution capability + +## How to Use Flink CDC + +Flink CDC provides an YAML-formatted user API that more suitable for data +integration scenarios. Here's an example YAML file defining a data pipeline that +ingests real-time changes from MySQL, and synchronize them to Apache Doris: + +```yaml +source: + type: mysql + hostname: localhost + port: 3306 + username: root + password: 123456 + tables: app_db.\.* + server-id: 5400-5404 + server-time-zone: UTC + +sink: + type: doris + fenodes: 127.0.0.1:8030 + username: root + password: "" + table.create.properties.light_schema_change: true + table.create.properties.replication_num: 1 + +pipeline: + name: Sync MySQL Database to Doris + parallelism: 2 +``` + +By submitting the YAML file with `flink-cdc.sh`, a Flink job will be compiled +and deployed to a designated Flink cluster. Please refer to [Core Concept]({{< +ref "docs/core-concept/data-pipeline" >}}) to get full documentation of all +supported functionalities of a pipeline. + +## Write Your First Flink CDC Pipeline + +Explore Flink CDC document to get hands on your first real-time data integration +pipeline: + +### Quickstart + +Check out the quickstart guide to learn how to establish a Flink CDC pipeline: + +- [MySQL to Apache Doris]({{< ref "docs/get-started/quickstart/mysql-to-doris" >}}) +- [MySQL to StarRocks]({{< ref "docs/get-started/quickstart/mysql-to-starrocks" >}}) + +### Understand Core Concepts + +Get familiar with core concepts we introduced in Flink CDC and try to build +more complex pipelines: + +- [Data Pipeline]({{< ref "docs/core-concept/data-pipeline" >}}) +- [Data Source]({{< ref "docs/core-concept/data-source" >}}) +- [Data Sink]({{< ref "docs/core-concept/data-sink" >}}) +- [Table ID]({{< ref "docs/core-concept/table-id" >}}) +- [Transform]({{< ref "docs/core-concept/transform" >}}) +- [Route]({{< ref "docs/core-concept/route" >}}) + +### Submit Pipeline to Flink Cluster + +Learn how to submit the pipeline to Flink cluster running on different +deployment mode: + +- [standalone]({{< ref "docs/deployment/standalone" >}}) +- [Kubernetes]({{< ref "docs/deployment/kubernetes" >}}) +- [YARN]({{< ref "docs/deployment/yarn" >}}) + +## Development and Contribution + +If you want to connect Flink CDC to your customized external system, or +contributing to the framework itself, these sections could be helpful: + +- Understand [Flink CDC APIs]({{< ref "docs/developer-guide/understand-flink-cdc-api" >}}) + to develop your own Flink CDC connector +- Learn about how to [contributing to Flink CDC]({{< ref "docs/developer-guide/contribute-to-flink-cdc" >}}) +- Check out [licenses]({{< ref "docs/developer-guide/licenses" >}}) used by Flink CDC diff --git a/docs/content/docs/get-started/quickstart/mysql-to-doris.md b/docs/content/docs/get-started/quickstart/mysql-to-doris.md index 21244f9224..c48ceb785c 100644 --- a/docs/content/docs/get-started/quickstart/mysql-to-doris.md +++ b/docs/content/docs/get-started/quickstart/mysql-to-doris.md @@ -26,7 +26,7 @@ under the License. # Streaming ELT from MySQL to Doris -This tutorial is to show how to quickly build a Streaming ELT job from MySQL to StarRocks using Flink CDC, including the +This tutorial is to show how to quickly build a Streaming ELT job from MySQL to Doris using Flink CDC, including the feature of sync all table of one database, schema change evolution and sync sharding tables into one table. All exercises in this tutorial are performed in the Flink CDC CLI, and the entire process uses standard SQL syntax, without a single line of Java/Scala code or IDE installation.
    CDC typeFlink CDC type StarRocks typeNOTENote