-
Notifications
You must be signed in to change notification settings - Fork 17
/
spotty.yaml.tmpl
91 lines (87 loc) · 2.97 KB
/
spotty.yaml.tmpl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# You must delete disks manually on GCP :\
# https://console.cloud.google.com/compute/disks?project=hear2021-evaluation
project:
name: spotty-heareval
syncFilters:
- exclude:
- .idea/*
- .git/*
- '*/__pycache__/*'
- _workdir/*
- tasks/*
- embeddings/*
- .mypy_cache/*
- lightning_logs/*
- heareval.egg-info/*
- pretrained/*
- wandb/*
containers:
- projectDir: /workspace/project
#file: docker/Dockerfile-cuda11.2
image: turian/heareval
#image: turian/heareval:cuda11.2
# ports:
# # TensorBoard
# - containerPort: 6006
# hostPort: 6006
# # Jupyter
# - containerPort: 8888
# hostPort: 8888
volumeMounts:
- name: workspace
mountPath: /workspace
runtimeParameters: ['--shm-size', '20G']
instances:
- name: spotty-heareval-i1-USERNAME
provider: gcp
parameters:
# https://cloud.google.com/compute/docs/gpus/gpu-regions-zones
zone: europe-west4-a
# V100
machineType: n1-standard-4
# One CPU seems to exhaust memory and crash
#machineType: n1-standard-1
## A100. only west3 or maybe west4? :\
#machineType: a2-highgpu-1g
preemptibleInstance: True
gpu:
type: nvidia-tesla-v100
#type: nvidia-tesla-a100
count: 1
# https://console.cloud.google.com/compute/images?project=hear2021-evaluation
# https://github.com/spotty-cloud/spotty/issues/102
#imageUri: projects/ml-images/global/images/family/common-dl-gpu-debian-10
imageUri: projects/ml-images/global/images/c0-deeplearning-common-cu110-v20210818-debian-10
# spotInstance: True
# ports: [6006, 8888]
# dockerDataRoot: /docker
volumes:
- name: workspace
parameters:
size: 250
# Not implemented for GCP, all volumes will be retained
# deletionPolicy: retain
mountDir: /workspace
# - name: docker
# parameters:
# size: 200
# mountDir: /docker
# Pick SR!
# gsutil -m cp gs://hear2021/open-tasks/hear2021-task-*.tar . && for f in hear2021-task-*.tar; do tar xf "$f"; done
# gsutil -m cp gs://hear2021/open-tasks/hear2021-task-*sr48000.tar . && for f in hear2021-task-*sr48000.tar; do tar xf "$f"; done
# wget https://github.com/hearbenchmark/hear-baseline/raw/main/saved_models/naive_baseline.pt
# python3 -m heareval.embeddings.runner hearbaseline --model naive_baseline.pt
# python3 -m heareval.predictions.runner hearbaseline --model ./naive_baseline.pt
# python3 -m heareval.evaluation.runner
#
scripts:
get: |
gsutil -m cp gs://hear2021/open-tasks/hear2021-task-*.tar . && for f in hear2021-task-*.tar; do tar xf "$f"; done
# setup: |
# bash setup.sh
# train: |
# bash train.sh
# tensorboard: |
# tensorboard --bind_all --port 6006 --logdir /workspace/project/logs
# jupyter: |
# jupyter notebook --allow-root --ip 0.0.0.0 --notebook-dir=/workspace/project