Client Improvement

[GUI] Disable the error report while no NVIDIA GPU is available [GUI] Resolve the issue where the dataset creator of GPTSoVITS fails to save edit results successfully [Core] Address the problem where the dataset creator of VITS encounters errors while creating temporary folders [Core] Reconstruct VITS2 Update README
Spr-Aachen · Jul 3, 2024 · 2a268a4 · 2a268a4
1 parent 1d7fe6d
commit 2a268a4
Show file tree

Hide file tree

Showing 54 changed files with 2,352 additions and 1,457 deletions.
diff --git a/.gitignore b/.gitignore
@@ -24,6 +24,7 @@
 **/GPT_SoVITS/GPT_SoVITS/pretrained_models/
 **/GPT_SoVITS/GPT_SoVITS/text/
 **/GPT_SoVITS/GPT_SoVITS/tools/i18n/locale/
+**/GPT_SoVITS/GPT_SoVITS/inference_webui.py
 **/GPT_SoVITS/GPT_SoVITS/my_utils.py
 **/GPT_SoVITS/GPT_SoVITS/onnx_export.py
 **/GPT_SoVITS/GPT_SoVITS/process_ckpt.py

diff --git a/EVT_Core/Dataset/GPT_SoVITS/utils/Creating_Directories.py b/EVT_Core/Dataset/GPT_SoVITS/utils/Creating_Directories.py
@@ -2,26 +2,26 @@
 
 
 def create_directories(
-    wav_dir_prepared,
     wav_dir_split,
+    csv_dir_prepared,
     csv_dir_merged,
     csv_dir_final
 ):
     '''
     Create csv directory
     '''
-    if not os.path.exists(wav_dir_prepared):
-        try:
-            os.makedirs(wav_dir_prepared, exist_ok = True)
-        except OSError:
-            print('Creation of directory %s failed' %wav_dir_prepared)
-
     if not os.path.exists(wav_dir_split):
         try:
             os.makedirs(wav_dir_split, exist_ok = True)
         except OSError:
             print('Creation of directory %s failed' %wav_dir_split)
 
+    if not os.path.exists(csv_dir_prepared):
+        try:
+            os.makedirs(csv_dir_prepared, exist_ok = True)
+        except OSError:
+            print('Creation of directory %s failed' %csv_dir_prepared)
+
     if not os.path.exists(csv_dir_merged):
         try:
             os.makedirs(csv_dir_merged, exist_ok = True)

diff --git a/EVT_Core/Dataset/VITS/utils/Creating_Directories.py b/EVT_Core/Dataset/VITS/utils/Creating_Directories.py
@@ -4,6 +4,7 @@
 def create_directories(
     wav_dir_prepared,
     wav_dir_split,
+    csv_dir_prepared,
     csv_dir_merged,
     csv_dir_final
 ):
@@ -22,6 +23,12 @@ def create_directories(
         except OSError:
             print('Creation of directory %s failed' %wav_dir_split)
 
+    if not os.path.exists(csv_dir_prepared):
+        try:
+            os.makedirs(csv_dir_prepared, exist_ok = True)
+        except OSError:
+            print('Creation of directory %s failed' %csv_dir_prepared)
+
     if not os.path.exists(csv_dir_merged):
         try:
             os.makedirs(csv_dir_merged, exist_ok = True)

diff --git a/EVT_Core/TTS/GPT_SoVITS/Convert.py b/EVT_Core/TTS/GPT_SoVITS/Convert.py
@@ -124,7 +124,7 @@ def change_tts_inference(
         os.environ["infer_ttswebui"]=str(webui_port_infer_tts)
         os.environ["is_share"]=str(is_share)
         os.environ['USE_WEBUI']=str(use_webui)
-        cmd = f'"{python_exec}" "GPT_SoVITS/inference.py"'
+        cmd = f'"{python_exec}" "GPT_SoVITS/inference_gui.py"'
         print("TTS推理进程已开启")
         print(cmd)
         p_tts_inference = subprocess.Popen(cmd, shell=True)

diff --git a/EVT_Core/TTS/GPT_SoVITS/GPT_SoVITS/inference_gui.py b/EVT_Core/TTS/GPT_SoVITS/GPT_SoVITS/inference_gui.py
@@ -0,0 +1,310 @@
+import os
+import sys
+from PyQt5.QtCore import QEvent
+from PyQt5.QtWidgets import QApplication, QMainWindow, QLabel, QLineEdit, QPushButton, QTextEdit
+from PyQt5.QtWidgets import QGridLayout, QVBoxLayout, QWidget, QFileDialog, QStatusBar, QComboBox
+import soundfile as sf
+
+from tools.i18n.i18n import I18nAuto
+i18n = I18nAuto()
+
+from inference_webui import gpt_path, sovits_path, change_gpt_weights, change_sovits_weights, get_tts_wav
+
+
+class GPTSoVITSGUI(QMainWindow):
+    GPT_Path = gpt_path
+    SoVITS_Path = sovits_path
+
+    def __init__(self):
+        super().__init__()
+
+        self.setWindowTitle('GPT-SoVITS GUI')
+        self.setGeometry(800, 450, 950, 850)
+
+        self.setStyleSheet("""
+            QWidget {
+                background-color: #a3d3b1; 
+            }
+
+            QTabWidget::pane {
+                background-color: #a3d3b1;  
+            }
+
+            QTabWidget::tab-bar {
+                alignment: left;
+            }
+
+            QTabBar::tab {
+                background: #8da4bf; 
+                color: #ffffff;  
+                padding: 8px;
+            }
+
+            QTabBar::tab:selected {
+                background: #2a3f54; 
+            }
+
+            QLabel {
+                color: #000000;  
+            }
+
+            QPushButton {
+                background-color: #4CAF50; 
+                color: white;  
+                padding: 8px;
+                border: 1px solid #4CAF50;
+                border-radius: 4px;
+            }
+
+            QPushButton:hover {
+                background-color: #45a049;  
+                border: 1px solid #45a049;
+                box-shadow: 2px 2px 2px rgba(0, 0, 0, 0.1);
+            }
+        """)    
+
+        license_text = (
+        "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. "
+        "如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.")
+        license_label = QLabel(license_text)
+        license_label.setWordWrap(True)
+
+        self.GPT_model_label = QLabel("选择GPT模型:")
+        self.GPT_model_input = QLineEdit()
+        self.GPT_model_input.setPlaceholderText("拖拽或选择文件")
+        self.GPT_model_input.setText(self.GPT_Path)
+        self.GPT_model_input.setReadOnly(True)
+        self.GPT_model_button = QPushButton("选择GPT模型文件")
+        self.GPT_model_button.clicked.connect(self.select_GPT_model)
+
+        self.SoVITS_model_label = QLabel("选择SoVITS模型:")
+        self.SoVITS_model_input = QLineEdit()
+        self.SoVITS_model_input.setPlaceholderText("拖拽或选择文件")
+        self.SoVITS_model_input.setText(self.SoVITS_Path)
+        self.SoVITS_model_input.setReadOnly(True)
+        self.SoVITS_model_button = QPushButton("选择SoVITS模型文件")
+        self.SoVITS_model_button.clicked.connect(self.select_SoVITS_model)
+
+        self.ref_audio_label = QLabel("上传参考音频:")
+        self.ref_audio_input = QLineEdit()
+        self.ref_audio_input.setPlaceholderText("拖拽或选择文件")
+        self.ref_audio_input.setReadOnly(True)
+        self.ref_audio_button = QPushButton("选择音频文件")
+        self.ref_audio_button.clicked.connect(self.select_ref_audio)
+
+        self.ref_text_label = QLabel("参考音频文本:")
+        self.ref_text_input = QLineEdit()
+        self.ref_text_input.setPlaceholderText("直接输入文字或上传文本")
+        self.ref_text_button = QPushButton("上传文本")
+        self.ref_text_button.clicked.connect(self.upload_ref_text)
+
+        self.ref_language_label = QLabel("参考音频语言:")
+        self.ref_language_combobox = QComboBox()
+        self.ref_language_combobox.addItems(["中文", "英文", "日文", "中英混合", "日英混合", "多语种混合"])
+        self.ref_language_combobox.setCurrentText("多语种混合")
+
+        self.target_text_label = QLabel("合成目标文本:")
+        self.target_text_input = QLineEdit()
+        self.target_text_input.setPlaceholderText("直接输入文字或上传文本")
+        self.target_text_button = QPushButton("上传文本")
+        self.target_text_button.clicked.connect(self.upload_target_text)
+
+        self.target_language_label = QLabel("合成音频语言:")
+        self.target_language_combobox = QComboBox()
+        self.target_language_combobox.addItems(["中文", "英文", "日文", "中英混合", "日英混合", "多语种混合"])
+        self.target_language_combobox.setCurrentText("多语种混合")
+
+        self.output_label = QLabel("输出音频路径:")
+        self.output_input = QLineEdit()
+        self.output_input.setPlaceholderText("拖拽或选择文件")
+        self.output_input.setReadOnly(True)
+        self.output_button = QPushButton("选择文件夹")
+        self.output_button.clicked.connect(self.select_output_path)
+
+        self.output_text = QTextEdit()
+        self.output_text.setReadOnly(True)
+
+        self.add_drag_drop_events([
+            self.GPT_model_input,
+            self.SoVITS_model_input,
+            self.ref_audio_input,
+            self.ref_text_input,
+            self.target_text_input,
+            self.output_input,
+        ])
+
+        self.synthesize_button = QPushButton("合成")
+        self.synthesize_button.clicked.connect(self.synthesize)
+
+        self.clear_output_button = QPushButton("清空输出")
+        self.clear_output_button.clicked.connect(self.clear_output)
+
+        self.status_bar = QStatusBar()
+
+        main_layout = QVBoxLayout()
+
+        input_layout = QGridLayout(self)
+        input_layout.setSpacing(10)
+
+        input_layout.addWidget(license_label, 0, 0, 1, 3)
+
+        input_layout.addWidget(self.GPT_model_label, 1, 0)
+        input_layout.addWidget(self.GPT_model_input, 2, 0, 1, 2)
+        input_layout.addWidget(self.GPT_model_button, 2, 2)
+
+        input_layout.addWidget(self.SoVITS_model_label, 3, 0)
+        input_layout.addWidget(self.SoVITS_model_input, 4, 0, 1, 2)
+        input_layout.addWidget(self.SoVITS_model_button, 4, 2)
+
+        input_layout.addWidget(self.ref_audio_label, 5, 0)
+        input_layout.addWidget(self.ref_audio_input, 6, 0, 1, 2)
+        input_layout.addWidget(self.ref_audio_button, 6, 2)
+
+        input_layout.addWidget(self.ref_language_label, 7, 0)
+        input_layout.addWidget(self.ref_language_combobox, 8, 0, 1, 1)
+        input_layout.addWidget(self.ref_text_label, 9, 0)
+        input_layout.addWidget(self.ref_text_input, 10, 0, 1, 2)
+        input_layout.addWidget(self.ref_text_button, 10, 2)
+
+        input_layout.addWidget(self.target_language_label, 11, 0)
+        input_layout.addWidget(self.target_language_combobox, 12, 0, 1, 1)
+        input_layout.addWidget(self.target_text_label, 13, 0)
+        input_layout.addWidget(self.target_text_input, 14, 0, 1, 2)
+        input_layout.addWidget(self.target_text_button, 14, 2)
+
+        input_layout.addWidget(self.output_label, 15, 0)
+        input_layout.addWidget(self.output_input, 16, 0, 1, 2)
+        input_layout.addWidget(self.output_button, 16, 2)
+
+        main_layout.addLayout(input_layout)
+
+        output_layout = QVBoxLayout()
+        output_layout.addWidget(self.output_text)
+        main_layout.addLayout(output_layout)
+
+        main_layout.addWidget(self.synthesize_button)
+
+        main_layout.addWidget(self.clear_output_button)
+
+        main_layout.addWidget(self.status_bar)
+
+        self.central_widget = QWidget()
+        self.central_widget.setLayout(main_layout)
+        self.setCentralWidget(self.central_widget)
+
+    def dragEnterEvent(self, event):
+        if event.mimeData().hasUrls():
+            event.acceptProposedAction()
+
+    def dropEvent(self, event):
+        if event.mimeData().hasUrls():
+            file_paths = [url.toLocalFile() for url in event.mimeData().urls()]
+            if len(file_paths) == 1:
+                self.update_ref_audio(file_paths[0])
+            else:
+                self.update_ref_audio(", ".join(file_paths))
+
+    def add_drag_drop_events(self, widgets):
+        for widget in widgets:
+            widget.setAcceptDrops(True)
+            widget.installEventFilter(self)
+
+    def eventFilter(self, obj, event):
+        if event.type() in (QEvent.DragEnter, QEvent.Drop):
+            mime_data = event.mimeData()
+            if mime_data.hasUrls():
+                event.acceptProposedAction()
+
+        return super().eventFilter(obj, event)
+
+    def select_GPT_model(self):
+        file_path, _ = QFileDialog.getOpenFileName(self, "选择GPT模型文件", "", "GPT Files (*.ckpt)")
+        if file_path:
+            self.GPT_model_input.setText(file_path)
+
+    def select_SoVITS_model(self):
+        file_path, _ = QFileDialog.getOpenFileName(self, "选择SoVITS模型文件", "", "SoVITS Files (*.pth)")
+        if file_path:
+            self.SoVITS_model_input.setText(file_path)
+
+    def select_ref_audio(self):
+        file_path, _ = QFileDialog.getOpenFileName(self, "选择参考音频文件", "", "Audio Files (*.wav *.mp3)")
+        if file_path:
+            self.update_ref_audio(file_path)
+
+    def upload_ref_text(self):
+        file_path, _ = QFileDialog.getOpenFileName(self, "选择文本文件", "", "Text Files (*.txt)")
+        if file_path:
+            with open(file_path, 'r', encoding='utf-8') as file:
+                content = file.read()
+                self.ref_text_input.setText(content)
+
+    def upload_target_text(self):
+        file_path, _ = QFileDialog.getOpenFileName(self, "选择文本文件", "", "Text Files (*.txt)")
+        if file_path:
+            with open(file_path, 'r', encoding='utf-8') as file:
+                content = file.read()
+                self.target_text_input.setText(content)
+
+    def select_output_path(self):
+        options = QFileDialog.Options()
+        options |= QFileDialog.DontUseNativeDialog
+        options |= QFileDialog.ShowDirsOnly
+
+        folder_dialog = QFileDialog()
+        folder_dialog.setOptions(options)
+        folder_dialog.setFileMode(QFileDialog.Directory)
+
+        if folder_dialog.exec_():
+            folder_path = folder_dialog.selectedFiles()[0]
+            self.output_input.setText(folder_path)
+
+    def update_ref_audio(self, file_path):
+        self.ref_audio_input.setText(file_path)
+
+    def clear_output(self):
+        self.output_text.clear()
+
+    def synthesize(self):
+        GPT_model_path = self.GPT_model_input.text()
+        SoVITS_model_path = self.SoVITS_model_input.text()
+        ref_audio_path = self.ref_audio_input.text()
+        language_combobox = self.ref_language_combobox.currentText()
+        language_combobox = i18n(language_combobox)
+        ref_text = self.ref_text_input.text()
+        target_language_combobox = self.target_language_combobox.currentText()
+        target_language_combobox = i18n(target_language_combobox)
+        target_text = self.target_text_input.text()
+        output_path = self.output_input.text()
+
+        if GPT_model_path != self.GPT_Path:
+            change_gpt_weights(gpt_path=GPT_model_path)
+            self.GPT_Path = GPT_model_path
+        if SoVITS_model_path != self.SoVITS_Path:
+            change_sovits_weights(sovits_path=SoVITS_model_path)
+            self.SoVITS_Path = SoVITS_model_path
+
+        synthesis_result = get_tts_wav(ref_wav_path=ref_audio_path, 
+                                       prompt_text=ref_text, 
+                                       prompt_language=language_combobox, 
+                                       text=target_text, 
+                                       text_language=target_language_combobox)
+
+        result_list = list(synthesis_result)
+
+        if result_list:
+            last_sampling_rate, last_audio_data = result_list[-1]
+            output_wav_path = os.path.join(output_path, "output.wav") 
+            sf.write(output_wav_path, last_audio_data, last_sampling_rate)
+
+            result = "Audio saved to " + output_wav_path
+
+        self.status_bar.showMessage("合成完成！输出路径：" + output_wav_path, 5000)
+        self.output_text.append("处理结果：\n" + result)
+
+
+if __name__ == '__main__':
+    app = QApplication(sys.argv)
+    mainWin = GPTSoVITSGUI()
+    mainWin.show()
+    sys.exit(app.exec_())