-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.py
99 lines (75 loc) · 2.67 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
'''
train.py
MODIFIED FROM:
Pedro Sarmento, Adarsh Kumar, C J Carr, Zack Zukowski, Mathieu
Barthet, and Yi-Hsuan Yang. Dadagp: A dataset of tokenized guitarpro
songs for sequence models, 2021.
Sara Adkins 2022 Modifications
* Configured so model can run in parallel on multiple GPUs
'''
import os
import json
import yaml
import pickle
import datetime
import numpy as np
from model_ead import TransformerXL
import torch.distributed as dist
import torch.multiprocessing as mp
def setup(rank, world_size):
os.environ['MASTER_ADDR'] = 'localhost'
os.environ['MASTER_PORT'] = '12355'
# initialize the process group
dist.init_process_group("gloo", rank=rank, world_size=world_size)
def cleanup():
dist.destroy_process_group()
def main(rank, world_size):
setup(rank, world_size)
modelConfig, trainConfig = get_configs(rank)
dist.barrier()
# load dictionary
event2word = pickle.load(open(trainConfig['vocab_data_path'], 'rb'))
word2event = pickle.load(open(trainConfig['rev_vocab_data_path'], 'rb'))
# load train data
training_data = np.load(trainConfig['data_path'])
validation_data = np.load(trainConfig['val_path'])
# load trained model config
resume = trainConfig['resume_training_model']
# declare model
model = TransformerXL(
modelConfig,
rank,
event2word=event2word,
word2event=word2event,
is_training=True)
# train
model.train(training_data,
validation_data,
trainConfig,
resume)
cleanup()
def get_configs(rank):
cfg = yaml.full_load(open("full-data-config_5_lat1024.yml", 'r'))
modelConfig = cfg['MODEL']
trainConfig = cfg['TRAIN']
if rank == 0:
cur_date = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
experiment_Dir = os.path.join(trainConfig['output_dir'],"5_lat1024" + cur_date)
if not os.path.exists(experiment_Dir):
print('Creating experiment_dir:', experiment_Dir)
os.makedirs(experiment_Dir)
print('Experiment: ', experiment_Dir)
trainConfig.update({'experiment_dir': experiment_Dir})
with open(os.path.join(experiment_Dir, 'full-data-config.yml'), 'w') as f:
doc = yaml.dump(cfg, f)
print('='*5, 'Model configs', '='*5)
print(json.dumps(modelConfig, indent=1, sort_keys=True))
print('='*2, 'Training configs', '='*5)
print(json.dumps(trainConfig, indent=1, sort_keys=True))
return modelConfig, trainConfig
if __name__ == '__main__':
world_size = 1
mp.spawn(main,
args=(world_size,),
nprocs=world_size,
join=True)