forked from coqui-ai/TTS
-
Notifications
You must be signed in to change notification settings - Fork 1
/
train-hifigan.py
91 lines (83 loc) · 2.28 KB
/
train-hifigan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import os
from trainer import Trainer, TrainerArgs
from TTS.utils.audio import AudioProcessor
from TTS.vocoder.configs import HifiganConfig
from TTS.vocoder.datasets.preprocess import load_wav_data
from TTS.vocoder.models.gan import GAN
output_path = '/content/drive/MyDrive/output'
config = HifiganConfig(
batch_size=64,
eval_batch_size=32,
num_loader_workers=8,
num_eval_loader_workers=8,
run_eval=True,
test_delay_epochs=5,
epochs=1000,
seq_len=8192,
pad_short=2000,
use_noise_augment=False,
eval_split_size=10,
print_step=25,
print_eval=True,
mixed_precision=True,
save_n_checkpoints=1,
lr_gen=1e-4,
lr_disc=1e-4,
data_path="/content/sample_data/be",
output_path=output_path,
audio={
"fft_size": 1024,
"win_length": 1024,
"hop_length": 256,
"frame_shift_ms": None,
"frame_length_ms": None,
"stft_pad_mode": "reflect",
"sample_rate": 16000,
"resample": False,
"preemphasis": 0.0,
"ref_level_db": 20,
"do_sound_norm": True,
"log_func": "np.log10",
"do_trim_silence": True,
"trim_db": 45,
"do_rms_norm": False,
"db_level": None,
"power": 1.5,
"griffin_lim_iters": 60,
"num_mels": 80,
"mel_fmin": 50,
"mel_fmax": 8000,
"spec_gain": 20,
"do_amp_to_db_linear": True,
"do_amp_to_db_mel": True,
"pitch_fmax": 640.0,
"pitch_fmin": 0.0,
"signal_norm": True,
"min_level_db": -100,
"symmetric_norm": True,
"max_norm": 4.0,
"clip_norm": True,
"stats_path": None
},
l1_spec_loss_params={
"use_mel": True,
"sample_rate": 16000,
"n_fft": 1024,
"hop_length": 256,
"win_length": 1024,
"n_mels": 80,
"mel_fmin": 50.0,
"mel_fmax": 8000,
}
)
# init audio processor
ap = AudioProcessor.init_from_config(config)
# load training samples
eval_samples, train_samples = load_wav_data(config.data_path, config.eval_split_size)
# init model
model = GAN(config, ap)
# init the trainer and 🚀
trainer = Trainer(
TrainerArgs(), config, output_path, model=model, train_samples=train_samples, eval_samples=eval_samples
)
trainer.fit()