-
Notifications
You must be signed in to change notification settings - Fork 987
303 lines (285 loc) · 14.8 KB
/
tpch_clickhouse.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
name: "tpch_clickhouse"
on:
push:
branches:
- 'release-**'
paths-ignore:
- 'docs/**'
pull_request:
branches:
- 'release-**'
paths-ignore:
- 'docs/**'
schedule:
- cron: '30 20 * * *'
workflow_dispatch:
jobs:
tpch-clickhouse:
strategy:
fail-fast: false
matrix:
type: [ 'normal', 'limitcache']
# type: ['local']
runs-on: ubuntu-20.04
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 1
- name: Build
uses: ./.github/actions/build
- name: Run Redis
shell: bash
run: |
sudo docker run -d --name redis -v redis-data:/data \
-p 6379:6379 redis redis-server --appendonly yes
- name: Setup minio
shell: bash
run: |
docker run -d -p 9010:9000 --name minio \
-e "MINIO_ACCESS_KEY=minioadmin" \
-e "MINIO_SECRET_KEY=minioadmin" \
-v /tmp/data:/data \
-v /tmp/config:/root/.minio \
minio/minio server /data
sleep 5
- name: Juicefs Format
shell: bash
run: |
sudo ./juicefs format --trash-days 0 --storage minio --bucket http://127.0.0.1:9010/mypics \
--access-key minioadmin \
--secret-key minioadmin \
redis://127.0.0.1:6379/1 pics
- name: Juicefs Mount
shell: bash
run: |
if [ "${{matrix.type}}" = "limitcache" ]; then
echo "mount: type is limitcache"
sudo ./juicefs mount -d redis://127.0.0.1:6379/1 /data/jfs --cache-size 3072 --no-usage-report
elif [ "${{matrix.type}}" = "normal" ]; then
echo "mount: type is normal"
sudo ./juicefs mount -d redis://127.0.0.1:6379/1 /data/jfs --no-usage-report
elif [ "${{matrix.type}}" = "local" ]; then
echo "mount: type is local"
sudo mkdir -p /data/jfs/
sudo chmod -R 777 /data/
else
echo "<FATAL>: invalid type"
exit 1
fi
- name: Install ClickHouse
shell: bash
run: |
sudo .github/scripts/apt_install.sh apt-transport-https ca-certificates dirmngr
echo "deb https://packages.clickhouse.com/deb stable main" | sudo tee /etc/apt/sources.list.d/clickhouse.list
sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754
sudo DEBIAN_FRONTEND=noninteractive .github/scripts/apt_install.sh clickhouse-server clickhouse-client
- name: Change Datadir
shell: bash
run: |
sudo chmod 777 /etc/clickhouse-server/
sudo chmod 777 /etc/clickhouse-server/config.xml
sudo sed -i "s?<path>/var/lib/clickhouse/</path>?<path>/data/jfs/</path>?" /etc/clickhouse-server/config.xml
sudo service clickhouse-server start
sudo service clickhouse-server status
ls -lah /data/jfs/
- name: Generate Data
shell: bash
run: |
cd /data/jfs/
git clone https://github.com/sanwan/chdata.git || (rm -rf chdata;git clone https://github.com/sanwan/chdata.git)
cd chdata
cat lineorder.tbl.gz* >lineorder.tbl.gz
gzip -dk lineorder.tbl.gz
git clone https://github.com/sanwan/actionfile.git || (rm -rf actionfile;git clone https://github.com/sanwan/actionfile.git)
mv actionfile/*.tbl .
rm lineorder.tbl.gz*
ls -lrth
#sudo du -sh /data/jfs/
df -lh
- name: Create Table
shell: bash
run: |
clickhouse-client --query "CREATE TABLE customer
(
C_CUSTKEY UInt32,
C_NAME String,
C_ADDRESS String,
C_CITY LowCardinality(String),
C_NATION LowCardinality(String),
C_REGION LowCardinality(String),
C_PHONE String,
C_MKTSEGMENT LowCardinality(String)
)
ENGINE = MergeTree ORDER BY (C_CUSTKEY);"
clickhouse-client --query "CREATE TABLE lineorder
(
LO_ORDERKEY UInt32,
LO_LINENUMBER UInt8,
LO_CUSTKEY UInt32,
LO_PARTKEY UInt32,
LO_SUPPKEY UInt32,
LO_ORDERDATE Date,
LO_ORDERPRIORITY LowCardinality(String),
LO_SHIPPRIORITY UInt8,
LO_QUANTITY UInt8,
LO_EXTENDEDPRICE UInt32,
LO_ORDTOTALPRICE UInt32,
LO_DISCOUNT UInt8,
LO_REVENUE UInt32,
LO_SUPPLYCOST UInt32,
LO_TAX UInt8,
LO_COMMITDATE Date,
LO_SHIPMODE LowCardinality(String)
)
ENGINE = MergeTree PARTITION BY toYear(LO_ORDERDATE) ORDER BY (LO_ORDERDATE, LO_ORDERKEY);"
clickhouse-client --query "CREATE TABLE part
(
P_PARTKEY UInt32,
P_NAME String,
P_MFGR LowCardinality(String),
P_CATEGORY LowCardinality(String),
P_BRAND LowCardinality(String),
P_COLOR LowCardinality(String),
P_TYPE LowCardinality(String),
P_SIZE UInt8,
P_CONTAINER LowCardinality(String)
)
ENGINE = MergeTree ORDER BY P_PARTKEY;"
clickhouse-client --query "CREATE TABLE supplier
(
S_SUPPKEY UInt32,
S_NAME String,
S_ADDRESS String,
S_CITY LowCardinality(String),
S_NATION LowCardinality(String),
S_REGION LowCardinality(String),
S_PHONE String
)
ENGINE = MergeTree ORDER BY S_SUPPKEY;"
- name: Import Data
shell: bash
run: |
df -lh
cd /data/jfs/chdata/
clickhouse-client --query "INSERT INTO customer FORMAT CSV" < customer.tbl
clickhouse-client --query "INSERT INTO part FORMAT CSV" < part.tbl
clickhouse-client --query "INSERT INTO supplier FORMAT CSV" < supplier.tbl
time clickhouse-client --query "INSERT INTO lineorder FORMAT CSV" < lineorder.tbl
df -lh
- name: ClickHouse Test
shell: bash
run: |
clickhouse-client --query "SET max_memory_usage = 20000000000;"
time clickhouse-client --query "CREATE TABLE lineorder_flat
ENGINE = MergeTree
PARTITION BY toYear(LO_ORDERDATE)
ORDER BY (LO_ORDERDATE, LO_ORDERKEY) AS
SELECT
l.LO_ORDERKEY AS LO_ORDERKEY,
l.LO_LINENUMBER AS LO_LINENUMBER,
l.LO_CUSTKEY AS LO_CUSTKEY,
l.LO_PARTKEY AS LO_PARTKEY,
l.LO_SUPPKEY AS LO_SUPPKEY,
l.LO_ORDERDATE AS LO_ORDERDATE,
l.LO_ORDERPRIORITY AS LO_ORDERPRIORITY,
l.LO_SHIPPRIORITY AS LO_SHIPPRIORITY,
l.LO_QUANTITY AS LO_QUANTITY,
l.LO_EXTENDEDPRICE AS LO_EXTENDEDPRICE,
l.LO_ORDTOTALPRICE AS LO_ORDTOTALPRICE,
l.LO_DISCOUNT AS LO_DISCOUNT,
l.LO_REVENUE AS LO_REVENUE,
l.LO_SUPPLYCOST AS LO_SUPPLYCOST,
l.LO_TAX AS LO_TAX,
l.LO_COMMITDATE AS LO_COMMITDATE,
l.LO_SHIPMODE AS LO_SHIPMODE,
c.C_NAME AS C_NAME,
c.C_ADDRESS AS C_ADDRESS,
c.C_CITY AS C_CITY,
c.C_NATION AS C_NATION,
c.C_REGION AS C_REGION,
c.C_PHONE AS C_PHONE,
c.C_MKTSEGMENT AS C_MKTSEGMENT,
s.S_NAME AS S_NAME,
s.S_ADDRESS AS S_ADDRESS,
s.S_CITY AS S_CITY,
s.S_NATION AS S_NATION,
s.S_REGION AS S_REGION,
s.S_PHONE AS S_PHONE,
p.P_NAME AS P_NAME,
p.P_MFGR AS P_MFGR,
p.P_CATEGORY AS P_CATEGORY,
p.P_BRAND AS P_BRAND,
p.P_COLOR AS P_COLOR,
p.P_TYPE AS P_TYPE,
p.P_SIZE AS P_SIZE,
p.P_CONTAINER AS P_CONTAINER
FROM lineorder AS l
INNER JOIN customer AS c ON c.C_CUSTKEY = l.LO_CUSTKEY
INNER JOIN supplier AS s ON s.S_SUPPKEY = l.LO_SUPPKEY
INNER JOIN part AS p ON p.P_PARTKEY = l.LO_PARTKEY;"
echo "Q1.1"
time clickhouse-client --query "SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
FROM lineorder_flat
WHERE toYear(LO_ORDERDATE) = 1993 AND LO_DISCOUNT BETWEEN 1 AND 3 AND LO_QUANTITY < 25;"
echo "Q1.2"
time clickhouse-client --query "SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
FROM lineorder_flat
WHERE toYYYYMM(LO_ORDERDATE) = 199401 AND LO_DISCOUNT BETWEEN 4 AND 6 AND LO_QUANTITY BETWEEN 26 AND 35;"
echo "Q2.1"
time clickhouse-client --query "SELECT
sum(LO_REVENUE),
toYear(LO_ORDERDATE) AS year,
P_BRAND
FROM lineorder_flat
WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA'
GROUP BY
year,
P_BRAND
ORDER BY
year,
P_BRAND;"
echo "Q3.1"
time clickhouse-client --query "SELECT
C_NATION,
S_NATION,
toYear(LO_ORDERDATE) AS year,
sum(LO_REVENUE) AS revenue
FROM lineorder_flat
WHERE C_REGION = 'ASIA' AND S_REGION = 'ASIA' AND year >= 1992 AND year <= 1997
GROUP BY
C_NATION,
S_NATION,
year
ORDER BY
year ASC,
revenue DESC;"
echo "Q4.1"
time clickhouse-client --query "SELECT
toYear(LO_ORDERDATE) AS year,
C_NATION,
sum(LO_REVENUE - LO_SUPPLYCOST) AS profit
FROM lineorder_flat
WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2')
GROUP BY
year,
C_NATION
ORDER BY
year ASC,
C_NATION ASC;"
df -lh
- name: Log
shell: bash
if: always()
run: |
if [ -f /var/log/juicefs.log ]; then
echo "juicefs log"
sudo tail -n 1000 /var/log/juicefs.log
grep "<FATAL>:" /var/log/juicefs.log && exit 1 || true
fi
- name: Send Slack Notification
if: failure()
uses: juicedata/slack-notify-action@main
with:
channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"