forked from MaximumEntropy/Seq2Seq-PyTorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
wmt14_data.py
35 lines (32 loc) · 847 Bytes
/
wmt14_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
training_data_hparams = {
'shuffle': True,
'num_epochs': 1,
'batch_size': 80,
'allow_smaller_final_batch': False,
'source_dataset': {
"files": ['data/wmt14/train.en'],
'vocab_file': 'data/wmt14/vocab.en',
'max_seq_length': 50
},
'target_dataset': {
'files': ['data/wmt14/train.fr'],
'vocab_file': 'data/wmt14/vocab.fr',
'max_seq_length': 50
}
}
test_data_hparams = {
'shuffle': False,
'num_epochs': 1,
'batch_size': 80,
'allow_smaller_final_batch': False,
'source_dataset': {
"files": ['data/wmt14/test.en'],
'vocab_file': 'data/wmt14/vocab.en'
},
'target_dataset': {
'files': ['data/wmt14/test.fr'],
'vocab_file': 'data/wmt14/vocab.fr'
}
}
valid_data_hparams = test_data_hparams
encoding = 'cp1252'