-
Notifications
You must be signed in to change notification settings - Fork 2
/
glue.yml
88 lines (88 loc) · 3.24 KB
/
glue.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
bucketDeploy: ${self:service}-${sls:stage}-glue-bucket-deploy
createBucket: true
s3Prefix: ""
jobs:
- name: pyspark_hello_world
scriptPath: jobs/pyspark_hello_world.py
type: spark
role: !Sub arn:aws:iam::${AWS::AccountId}:role/GlueJobRole/GlueJobRole
glueVersion: python3-4.0
MaxConcurrentRuns: 3
WorkerType: Standard
NumberOfWorkers: 1
Timeout: 3000
MaxRetries: 1
Connections:
- !Ref GlueConnection
DefaultArguments:
usePostgresDriver: true
enableS3ParquetOptimizedCommitter: true
enableGlueDatacatalog: true
enableMetrics: true
enableContinuousCloudwatchLog: true
enableContinuousLogFilter: true
continuousLogLogGroup: ${self:service}-${sls:stage}-logs
enableSparkUi: true
sparkEventLogsPath: s3://${self:service}-${sls:stage}-glue-bucket-deploy/pyspark_hello_world/spark-events-logs/
tempDir: s3://${self:service}-${sls:stage}-glue-bucket-deploy/tmp/
extraPyFiles: s3://${self:service}-${sls:stage}-glue-bucket-deploy/libs.zip,s3://${self:service}-${sls:stage}-glue-bucket-deploy/jobs.zip
customArguments:
# Install additional python packages
"additional-python-modules": "tldextract==3.3.0"
# Pass additional arguments to the pyspark_hello_world script
"CUSTOM_ARGUMENT": "CUSTOM_VALUE"
SupportFiles:
- local_path: libs.zip
s3_bucket: ${self:service}-${sls:stage}-glue-bucket-deploy
s3_prefix: ""
execute_upload: true
- local_path: jobs.zip
s3_bucket: ${self:service}-${sls:stage}-glue-bucket-deploy
s3_prefix: ""
execute_upload: true
- name: pythonshell_hello_world
scriptPath: jobs/pythonshell_hello_world.py
type: pythonshell
role: !Sub arn:aws:iam::${AWS::AccountId}:role/GlueJobRole/GlueJobRole
glueVersion: python3-4.0
MaxConcurrentRuns: 3
WorkerType: Standard
Timeout: 3000
MaxRetries: 1
Connections:
- !Ref GlueConnection
DefaultArguments:
usePostgresDriver: true
enableS3ParquetOptimizedCommitter: true
enableGlueDatacatalog: true
enableMetrics: true
enableContinuousCloudwatchLog: true
enableContinuousLogFilter: true
continuousLogLogGroup: ${self:service}-${sls:stage}-logs
tempDir: s3://${self:service}-${sls:stage}-glue-bucket-deploy/tmp/
extraPyFiles: s3://${self:service}-${sls:stage}-glue-bucket-deploy/libs.zip,s3://${self:service}-${sls:stage}-glue-bucket-deploy/jobs.zip
customArguments:
# Install additional python packages
"additional-python-modules": "tldextract==3.3.0"
# Pass additional arguments to the pythonshell_hello_world script
"CUSTOM_ARGUMENT": "CUSTOM_VALUE"
SupportFiles:
- local_path: libs.zip
s3_bucket: ${self:service}-${sls:stage}-glue-bucket-deploy
s3_prefix: ""
execute_upload: true
- local_path: jobs.zip
s3_bucket: ${self:service}-${sls:stage}-glue-bucket-deploy
s3_prefix: ""
execute_upload: true
triggers:
- name: trigger1
Tags:
trigger_tag_pyspark: pyspark
actions:
- name: pyspark
- name: trigger2
Tags:
trigger_tag_pythonshell: pythonshell_hello_world
actions:
- name: pythonshell_hello_world