tpch_clickhouse #1218
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: "tpch_clickhouse" | |
on: | |
push: | |
branches: | |
- 'release-**' | |
paths-ignore: | |
- 'docs/**' | |
pull_request: | |
branches: | |
- 'release-**' | |
paths-ignore: | |
- 'docs/**' | |
schedule: | |
- cron: '30 20 * * *' | |
workflow_dispatch: | |
jobs: | |
tpch-clickhouse: | |
strategy: | |
fail-fast: false | |
matrix: | |
type: [ 'normal', 'limitcache'] | |
# type: ['local'] | |
runs-on: ubuntu-20.04 | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v3 | |
with: | |
fetch-depth: 1 | |
- name: Build | |
uses: ./.github/actions/build | |
- name: Run Redis | |
shell: bash | |
run: | | |
sudo docker run -d --name redis -v redis-data:/data \ | |
-p 6379:6379 redis redis-server --appendonly yes | |
- name: Setup minio | |
shell: bash | |
run: | | |
docker run -d -p 9010:9000 --name minio \ | |
-e "MINIO_ACCESS_KEY=minioadmin" \ | |
-e "MINIO_SECRET_KEY=minioadmin" \ | |
-v /tmp/data:/data \ | |
-v /tmp/config:/root/.minio \ | |
minio/minio server /data | |
sleep 5 | |
- name: Juicefs Format | |
shell: bash | |
run: | | |
sudo ./juicefs format --trash-days 0 --storage minio --bucket http://127.0.0.1:9010/mypics \ | |
--access-key minioadmin \ | |
--secret-key minioadmin \ | |
redis://127.0.0.1:6379/1 pics | |
- name: Juicefs Mount | |
shell: bash | |
run: | | |
if [ "${{matrix.type}}" = "limitcache" ]; then | |
echo "mount: type is limitcache" | |
sudo ./juicefs mount -d redis://127.0.0.1:6379/1 /data/jfs --cache-size 3072 --no-usage-report | |
elif [ "${{matrix.type}}" = "normal" ]; then | |
echo "mount: type is normal" | |
sudo ./juicefs mount -d redis://127.0.0.1:6379/1 /data/jfs --no-usage-report | |
elif [ "${{matrix.type}}" = "local" ]; then | |
echo "mount: type is local" | |
sudo mkdir -p /data/jfs/ | |
sudo chmod -R 777 /data/ | |
else | |
echo "<FATAL>: invalid type" | |
exit 1 | |
fi | |
- name: Install ClickHouse | |
shell: bash | |
run: | | |
sudo .github/scripts/apt_install.sh apt-transport-https ca-certificates dirmngr | |
echo "deb https://packages.clickhouse.com/deb stable main" | sudo tee /etc/apt/sources.list.d/clickhouse.list | |
sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754 | |
sudo DEBIAN_FRONTEND=noninteractive .github/scripts/apt_install.sh clickhouse-server clickhouse-client | |
- name: Change Datadir | |
shell: bash | |
run: | | |
sudo chmod 777 /etc/clickhouse-server/ | |
sudo chmod 777 /etc/clickhouse-server/config.xml | |
sudo sed -i "s?<path>/var/lib/clickhouse/</path>?<path>/data/jfs/</path>?" /etc/clickhouse-server/config.xml | |
sudo service clickhouse-server start | |
sudo service clickhouse-server status | |
ls -lah /data/jfs/ | |
- name: Generate Data | |
shell: bash | |
run: | | |
cd /data/jfs/ | |
git clone https://github.com/sanwan/chdata.git || (rm -rf chdata;git clone https://github.com/sanwan/chdata.git) | |
cd chdata | |
cat lineorder.tbl.gz* >lineorder.tbl.gz | |
gzip -dk lineorder.tbl.gz | |
git clone https://github.com/sanwan/actionfile.git || (rm -rf actionfile;git clone https://github.com/sanwan/actionfile.git) | |
mv actionfile/*.tbl . | |
rm lineorder.tbl.gz* | |
ls -lrth | |
#sudo du -sh /data/jfs/ | |
df -lh | |
- name: Create Table | |
shell: bash | |
run: | | |
clickhouse-client --query "CREATE TABLE customer | |
( | |
C_CUSTKEY UInt32, | |
C_NAME String, | |
C_ADDRESS String, | |
C_CITY LowCardinality(String), | |
C_NATION LowCardinality(String), | |
C_REGION LowCardinality(String), | |
C_PHONE String, | |
C_MKTSEGMENT LowCardinality(String) | |
) | |
ENGINE = MergeTree ORDER BY (C_CUSTKEY);" | |
clickhouse-client --query "CREATE TABLE lineorder | |
( | |
LO_ORDERKEY UInt32, | |
LO_LINENUMBER UInt8, | |
LO_CUSTKEY UInt32, | |
LO_PARTKEY UInt32, | |
LO_SUPPKEY UInt32, | |
LO_ORDERDATE Date, | |
LO_ORDERPRIORITY LowCardinality(String), | |
LO_SHIPPRIORITY UInt8, | |
LO_QUANTITY UInt8, | |
LO_EXTENDEDPRICE UInt32, | |
LO_ORDTOTALPRICE UInt32, | |
LO_DISCOUNT UInt8, | |
LO_REVENUE UInt32, | |
LO_SUPPLYCOST UInt32, | |
LO_TAX UInt8, | |
LO_COMMITDATE Date, | |
LO_SHIPMODE LowCardinality(String) | |
) | |
ENGINE = MergeTree PARTITION BY toYear(LO_ORDERDATE) ORDER BY (LO_ORDERDATE, LO_ORDERKEY);" | |
clickhouse-client --query "CREATE TABLE part | |
( | |
P_PARTKEY UInt32, | |
P_NAME String, | |
P_MFGR LowCardinality(String), | |
P_CATEGORY LowCardinality(String), | |
P_BRAND LowCardinality(String), | |
P_COLOR LowCardinality(String), | |
P_TYPE LowCardinality(String), | |
P_SIZE UInt8, | |
P_CONTAINER LowCardinality(String) | |
) | |
ENGINE = MergeTree ORDER BY P_PARTKEY;" | |
clickhouse-client --query "CREATE TABLE supplier | |
( | |
S_SUPPKEY UInt32, | |
S_NAME String, | |
S_ADDRESS String, | |
S_CITY LowCardinality(String), | |
S_NATION LowCardinality(String), | |
S_REGION LowCardinality(String), | |
S_PHONE String | |
) | |
ENGINE = MergeTree ORDER BY S_SUPPKEY;" | |
- name: Import Data | |
shell: bash | |
run: | | |
df -lh | |
cd /data/jfs/chdata/ | |
clickhouse-client --query "INSERT INTO customer FORMAT CSV" < customer.tbl | |
clickhouse-client --query "INSERT INTO part FORMAT CSV" < part.tbl | |
clickhouse-client --query "INSERT INTO supplier FORMAT CSV" < supplier.tbl | |
time clickhouse-client --query "INSERT INTO lineorder FORMAT CSV" < lineorder.tbl | |
df -lh | |
- name: ClickHouse Test | |
shell: bash | |
run: | | |
clickhouse-client --query "SET max_memory_usage = 20000000000;" | |
time clickhouse-client --query "CREATE TABLE lineorder_flat | |
ENGINE = MergeTree | |
PARTITION BY toYear(LO_ORDERDATE) | |
ORDER BY (LO_ORDERDATE, LO_ORDERKEY) AS | |
SELECT | |
l.LO_ORDERKEY AS LO_ORDERKEY, | |
l.LO_LINENUMBER AS LO_LINENUMBER, | |
l.LO_CUSTKEY AS LO_CUSTKEY, | |
l.LO_PARTKEY AS LO_PARTKEY, | |
l.LO_SUPPKEY AS LO_SUPPKEY, | |
l.LO_ORDERDATE AS LO_ORDERDATE, | |
l.LO_ORDERPRIORITY AS LO_ORDERPRIORITY, | |
l.LO_SHIPPRIORITY AS LO_SHIPPRIORITY, | |
l.LO_QUANTITY AS LO_QUANTITY, | |
l.LO_EXTENDEDPRICE AS LO_EXTENDEDPRICE, | |
l.LO_ORDTOTALPRICE AS LO_ORDTOTALPRICE, | |
l.LO_DISCOUNT AS LO_DISCOUNT, | |
l.LO_REVENUE AS LO_REVENUE, | |
l.LO_SUPPLYCOST AS LO_SUPPLYCOST, | |
l.LO_TAX AS LO_TAX, | |
l.LO_COMMITDATE AS LO_COMMITDATE, | |
l.LO_SHIPMODE AS LO_SHIPMODE, | |
c.C_NAME AS C_NAME, | |
c.C_ADDRESS AS C_ADDRESS, | |
c.C_CITY AS C_CITY, | |
c.C_NATION AS C_NATION, | |
c.C_REGION AS C_REGION, | |
c.C_PHONE AS C_PHONE, | |
c.C_MKTSEGMENT AS C_MKTSEGMENT, | |
s.S_NAME AS S_NAME, | |
s.S_ADDRESS AS S_ADDRESS, | |
s.S_CITY AS S_CITY, | |
s.S_NATION AS S_NATION, | |
s.S_REGION AS S_REGION, | |
s.S_PHONE AS S_PHONE, | |
p.P_NAME AS P_NAME, | |
p.P_MFGR AS P_MFGR, | |
p.P_CATEGORY AS P_CATEGORY, | |
p.P_BRAND AS P_BRAND, | |
p.P_COLOR AS P_COLOR, | |
p.P_TYPE AS P_TYPE, | |
p.P_SIZE AS P_SIZE, | |
p.P_CONTAINER AS P_CONTAINER | |
FROM lineorder AS l | |
INNER JOIN customer AS c ON c.C_CUSTKEY = l.LO_CUSTKEY | |
INNER JOIN supplier AS s ON s.S_SUPPKEY = l.LO_SUPPKEY | |
INNER JOIN part AS p ON p.P_PARTKEY = l.LO_PARTKEY;" | |
echo "Q1.1" | |
time clickhouse-client --query "SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue | |
FROM lineorder_flat | |
WHERE toYear(LO_ORDERDATE) = 1993 AND LO_DISCOUNT BETWEEN 1 AND 3 AND LO_QUANTITY < 25;" | |
echo "Q1.2" | |
time clickhouse-client --query "SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue | |
FROM lineorder_flat | |
WHERE toYYYYMM(LO_ORDERDATE) = 199401 AND LO_DISCOUNT BETWEEN 4 AND 6 AND LO_QUANTITY BETWEEN 26 AND 35;" | |
echo "Q2.1" | |
time clickhouse-client --query "SELECT | |
sum(LO_REVENUE), | |
toYear(LO_ORDERDATE) AS year, | |
P_BRAND | |
FROM lineorder_flat | |
WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA' | |
GROUP BY | |
year, | |
P_BRAND | |
ORDER BY | |
year, | |
P_BRAND;" | |
echo "Q3.1" | |
time clickhouse-client --query "SELECT | |
C_NATION, | |
S_NATION, | |
toYear(LO_ORDERDATE) AS year, | |
sum(LO_REVENUE) AS revenue | |
FROM lineorder_flat | |
WHERE C_REGION = 'ASIA' AND S_REGION = 'ASIA' AND year >= 1992 AND year <= 1997 | |
GROUP BY | |
C_NATION, | |
S_NATION, | |
year | |
ORDER BY | |
year ASC, | |
revenue DESC;" | |
echo "Q4.1" | |
time clickhouse-client --query "SELECT | |
toYear(LO_ORDERDATE) AS year, | |
C_NATION, | |
sum(LO_REVENUE - LO_SUPPLYCOST) AS profit | |
FROM lineorder_flat | |
WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2') | |
GROUP BY | |
year, | |
C_NATION | |
ORDER BY | |
year ASC, | |
C_NATION ASC;" | |
df -lh | |
- name: Log | |
shell: bash | |
if: always() | |
run: | | |
if [ -f /var/log/juicefs.log ]; then | |
echo "juicefs log" | |
sudo tail -n 1000 /var/log/juicefs.log | |
grep "<FATAL>:" /var/log/juicefs.log && exit 1 || true | |
fi | |
- name: Send Slack Notification | |
if: failure() | |
uses: juicedata/slack-notify-action@main | |
with: | |
channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}" | |
slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}" |