Skip to content

Commit

Permalink
支持本地语音TTS模型chatTTS
Browse files Browse the repository at this point in the history
  • Loading branch information
ddean2009 committed Jul 13, 2024
1 parent f176e0c commit 27825a5
Show file tree
Hide file tree
Showing 16 changed files with 466 additions and 80 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -152,12 +152,13 @@

# 更新预告

* 接下来会接入本地语音模型,敬请期待
* 已经支持本地语音模型ChatTTS了,教程稍后上
* 视频自动发布功能已经上线了!!!! 使用教程[MoneyPrinterPlus一键发布短视频到视频号,抖音,快手,小红书上线了](https://mp.weixin.qq.com/s/QOpSbb_q01E0CWAI3KQ89w)

* 会支持更多本地语音模型。

# 更新列表

- 20240713 支持本地语音模型ChatTTS
- 20240710 支持本地大模型:Ollama
- 20240708 逆天了!自动发布视频功能上线了。支持抖音,快手,小红书,视频号!!!
- 20240704 添加自动安装和自动启动脚本,方便小白使用。
Expand Down
1 change: 1 addition & 0 deletions chattts/1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
蘁淰敕欀摃誌緘義囡胹讵祪萀梂晳癧亇婇嚕儝揇偩贻咆煴淀蠀欐萵貺箚弃胦菍弁夞皥焆喦卢狧乩夏淔莨臃赽奛溕筡誑緶貿捨讖卢瑫嬅哙硚惣蚵刻玏炉跸徱澾登嬖絢烇嫷媓蔢产虜椪眕俟徊吞詸愣備恍珳湉璑訷珽菹訴痙濽圴謗瘾皡憖啤囊偐惏嶩役磅惃碝贬貇行楝薇磉数綊蟊弤夋荄壪攫撧杶岈硯葳赛悫宸岩稼琜串汏僎灡峂蝇筋茹聈柵焵皿綏缊橥爝澺縬樢訣潙许壚朔仑螽穨糼稰礌漖噍脠庭穪栽嚽袿蟢朁睬筸獸蜍荃俜椉狴掠歾泓葁潚蚗刣悬縶執萏淪肬涼覎培煟苇攁蕘瞥覹緌玽忖熒苼偶巴氶壡卝僕聥栘袴瞗匥弯剫堎搒烅芡渢蒺仉濃猿焳觔吼嚾簬伋諿圀晑牣缄澜枡溒甆欌槙螶璭惝賙扣氒嘕質僜乧畭徉蟖裔既流橊卺奪襾耨嬖脡甆槡巢誸倦訐忂匼俵宰凥覡穰捠斋孖瀤謹讗揲害祩歊蠯旸忎継亍憭徿礯蜷絕凵腂凾疼渴痳旑賧槢浃圕畧晖庞捻翺岊澛縃婳哵喳唗趢咊綼倅佹艅丽趔攪懦蟜牢庨蒘薪蜩煐揈羄获话涴婔傊庪蚫曃氻肙瞥响丹粫璯蕷舺捆搞爳瞻僱潜袄恛懝嗀碥嶎椓一奥濇嵊卂燡懼礅護懭爋蚿檠蟔氖謻淫曇乯槙孓僷疶笺慛誏籜扰固嚲幦吲朸罺眅晝噱簭椼嘎坷嬢粆师恢埨伮跭侂庒瞭幕擛裌藩屙径皎蕾猨徲徎俬渰畣瓂嵭璌砟勗睃沭吾嗅端匈椃棒瓁刉觤伎虗貉柨燜緷奦曛綡拷撮箓縳蠺綢臑栳愆蛴聱嫼亞人翢疋貼横査艼妽菪梷薓棆焉彘撙蝳籯嬎谡毮牥狊垦岩刡趄虾葤纵爩媳泟惏撙剗瓕濂届竨跘匊殱幓你侜羯籕匐璾凡樃俋臺虘蝄懇罶悥孆击捪蛖畋屁蠐蟦埙夬俟抗籵惉柌箼瞀庻勨串捅窮氶賰燧捵蕓汐藈噱臷児汱留翷枾昅想慱羆蚅聢珹礦諅坔嚇缤冫窙蟓壡洦啓茖汬嶉賭汯紡屒揁熀蛾数篧哞撌塔妥蓗懘犌富圃胃莧絗喘葔改脧焛摆儭庥挖謪擾緖蓐卼褟萎磗侻恏嫒愗欮樞羻喻厚欫参姿剝堬絊挒暘擋緷貧妖欷牶诬囌揋膝湷觸柗灚烚誵暡讟卒縉乍跊疥褧皏菈吓穭脓呲挿燐藒澬珹嗧茪芝灲吋崩请瀓蜋棦掙沝刴彸褕缥誐喘胤櫂愄娇肥吥匚佯揔舔瑪燣孲珬谱炆夤梑狕祠痸浾薐萂暟葯俴涊怰蕲眞煍嘷趌褖弹硒囑琋焧截嵨蘈卥呬畸痾厾橓槔赒熰毪稵囨瀺綰穧楳囹籽窷俆坵萵澳瘏穉焬睳洲蓴懬膄揳妦悰尯堇翩葾弉忲昦蟝慎摏衃榶硟兡啥焛堵汼殗搩枌狎斳蒞貼敱叏刳梋莯椥刣吿埓仹熖悲嫿嫤哆怔祸嵢狴斻肎唤樵糪禾瓺摏璂跨卶欢刖薁嬼蚨壳栮余育熪跭讘勖亾擕硬悦痕屺櫞袁椤穟帀㴃
Binary file added chattts/seed_1397_restored_emb.pt
Binary file not shown.
Binary file added chattts/seed_1528_restored_emb.pt
Binary file not shown.
3 changes: 3 additions & 0 deletions config/config.example.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ audio:
access_key_secret: ACCESS_KEY_SECRET
app_key: APP_KEY
provider: Azure
loca_tts:
provider: chatTTS
server_location: http://127.0.0.1:8080/

captioning:
provider: Azure
Expand Down
1 change: 1 addition & 0 deletions config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from tools.file_utils import read_yaml, save_yaml

audio_types = {'remote': "云服务", 'local': "本地模型" }
languages = {'zh-CN': "简体中文", 'en': "english", 'zh-TW': "繁體中文"}
audio_languages = {'zh-CN': "中文", 'en-US': "english"}
audio_voices_tencent = {
Expand Down
46 changes: 42 additions & 4 deletions gui.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import streamlit as st
from config.config import my_config, save_config, languages
from config.config import my_config, save_config, languages, test_config
from pages.common import common_ui
from tools.tr_utils import tr

Expand Down Expand Up @@ -45,6 +45,22 @@ def set_audio_provider():
save_config()


def set_local_audio_tts_provider():
test_config(my_config, "audio", "local_tts", 'provider')
my_config['audio']['local_tts']['provider'] = st.session_state['local_audio_tts_provider']
save_config()


def get_chatTTS_server_location():
return my_config['audio'].get('local_tts', {}).get('server_location','')


def set_chatTTS_server_location():
test_config(my_config, "audio", "local_tts", 'server_location')
my_config['audio']['local_tts']['server_location'] = st.session_state['chatTTS_server_location']
save_config()


def set_audio_key(provider, key):
if provider not in my_config['audio']:
my_config['audio'][provider] = {}
Expand Down Expand Up @@ -143,16 +159,38 @@ def set_llm_model_name(provider, key):
audio_container = st.container(border=True)
with audio_container:
st.info(tr("Audio Provider Info"))

local_tts_container = st.container(border=True)
with local_tts_container:
local_audio_tts_providers = ['chatTTS', ]
selected_local_audio_tts_provider = my_config['audio'].get('local_tts', {}).get('provider', '')
if not selected_local_audio_tts_provider:
selected_local_audio_tts_provider = 'chatTTS'
st.session_state['local_audio_tts_provider'] = selected_local_audio_tts_provider
set_local_audio_tts_provider()
selected_local_audio_tts_provider_index = 0
for i, provider in enumerate(local_audio_tts_providers):
if provider == selected_local_audio_tts_provider:
selected_local_audio_tts_provider_index = i
break

local_audio_tts_provider = st.selectbox(tr("Local Audio TTS Provider"), options=local_audio_tts_providers,
index=selected_local_audio_tts_provider_index,
key='local_audio_tts_provider', on_change=set_local_audio_tts_provider)
st.text_input(label=tr("ChatTTS http server location"), placeholder=tr("Input chatTTS http server address"),
value=get_chatTTS_server_location(),
key="chatTTS_server_location", on_change=set_chatTTS_server_location)

audio_providers = ['Azure', 'Ali', 'Tencent']
# audio_providers = ['Azure']
selected_audio_provider = my_config['audio']['provider']
selected_audio_provider_index = 0
for i, provider in enumerate(audio_providers):
if provider == selected_audio_provider:
selected_audio_provider_index = i
break

audio_provider = st.selectbox(tr("Audio Provider"), options=audio_providers, index=selected_audio_provider_index,
audio_provider = st.selectbox(tr("Remote Audio Provider"), options=audio_providers,
index=selected_audio_provider_index,
key='audio_provider', on_change=set_audio_provider)
with st.expander(audio_provider, expanded=True):
if audio_provider == 'Azure':
Expand Down Expand Up @@ -209,7 +247,7 @@ def set_llm_model_name(provider, key):
# 设置默认的LLM
llm_container = st.container(border=True)
with (llm_container):
llm_providers = ['OpenAI', 'Moonshot', 'Azure', 'Qianfan', 'Baichuan', 'Tongyi', 'DeepSeek','Ollama']
llm_providers = ['OpenAI', 'Moonshot', 'Azure', 'Qianfan', 'Baichuan', 'Tongyi', 'DeepSeek', 'Ollama']
saved_llm_provider = my_config['llm']['provider']
saved_llm_provider_index = 0
for i, provider in enumerate(llm_providers):
Expand Down
11 changes: 10 additions & 1 deletion locales/zh-CN.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
"Model Name": "Model Name",

"Audio Provider Info": "配置音频库信息",
"Audio Provider": "音频库",
"Remote Audio Provider": "云服务音频库",
"Local Audio TTS Provider": "本地语音TTS",
"ChatTTS http server location": "ChatTTS Http Server地址(http://127.0.0.1:8080/)",
"Generate Video dubbing": "生成视频配音",
"Testing Audio": "试听声音",
"Audio Azure config": "需要开通微软speech服务",
Expand All @@ -40,6 +42,13 @@
"Background music": "背景音乐",
"Background music volume": "背景音乐音量(默认0.3)",
"Enable background music": "是否开启背景音乐",
"Choose audio type": "选择配音类型",
"Refine text": "是否口语化",
"Audio Temperature": "Audio Temperature波动性",
"top_P": "top_P相关性",
"top_K": "top_K相似性",
"Use random voice": "是否使用随机声音",
"Local Chattts Dir": "本地chatTTS音色目录(以.pt或者.txt结尾)",

"Video Subtitles": "字幕配置",
"Enable subtitles": "是否开启字幕",
Expand Down
53 changes: 37 additions & 16 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
from config.config import my_config, audio_voices_azure, audio_voices_ali, audio_voices_tencent
from services.audio.alitts_service import AliAudioService
from services.audio.azure_service import AzureAudioService
from services.audio.chattts_service import ChatTTSAudioService
from services.audio.tencent_tts_service import TencentAudioService
from services.captioning.captioning_service import generate_caption, add_subtitles
from services.hunjian.hunjian_service import concat_audio_list, get_audio_and_video_list
from services.hunjian.hunjian_service import concat_audio_list, get_audio_and_video_list, get_audio_and_video_list_local
from services.llm.azure_service import MyAzureService
from services.llm.baichuan_service import MyBaichuanService
from services.llm.baidu_qianfan_service import BaiduQianfanService
Expand Down Expand Up @@ -103,6 +104,13 @@ def main_generate_video_content():
print("main_generate_video_content end")


def main_try_test_local_audio():
print("main_try_test_local_audio begin")
audio_service = ChatTTSAudioService()
video_content = "你好,我是程序那些事"
audio_service.read_with_content(video_content)


def main_try_test_audio():
print("main_try_test_audio begin")
audio_service = get_audio_service()
Expand All @@ -122,32 +130,45 @@ def main_try_test_audio():

def main_generate_video_dubbing():
print("main_generate_video_dubbing begin")
audio_service = get_audio_service()
video_content = get_must_session_option("video_content", "请先设置视频主题")
if video_content is None:
return

temp_file_name = random_with_system_time()
audio_output_file = os.path.join(audio_output_dir, str(temp_file_name) + ".wav")
st.session_state["audio_output_file"] = audio_output_file
audio_rate = get_audio_rate()

video_content = get_must_session_option("video_content", "请先设置视频主题")
if video_content is None:
return
audio_voice = get_must_session_option("audio_voice", "请先设置配音语音")
if audio_voice is None:
return
audio_service.save_with_ssml(video_content,
audio_output_file,
audio_voice,
audio_rate)
if st.session_state.get("audio_type") == "remote":
print("use remote audio")
audio_service = get_audio_service()
audio_rate = get_audio_rate()
audio_voice = get_must_session_option("audio_voice", "请先设置配音语音")
if audio_voice is None:
return
audio_service.save_with_ssml(video_content,
audio_output_file,
audio_voice,
audio_rate)
else:
print("use local audio")
audio_service = ChatTTSAudioService()
audio_service.chat_with_content(video_content, audio_output_file)
# 语音扩展2秒钟,防止突然结束很突兀
extent_audio(audio_output_file, 2)
print("main_generate_video_dubbing end")


def main_generate_video_dubbing_for_mix():
print("main_generate_video_dubbing_for_mix begin")
audio_service = get_audio_service()
audio_rate = get_audio_rate()
audio_output_file_list, video_dir_list = get_audio_and_video_list(audio_service, audio_rate)
if st.session_state.get("audio_type") == "remote":
print("use remote audio")
audio_service = get_audio_service()
audio_rate = get_audio_rate()
audio_output_file_list, video_dir_list = get_audio_and_video_list(audio_service, audio_rate)
else:
print("use local audio")
audio_service = ChatTTSAudioService()
audio_output_file_list, video_dir_list = get_audio_and_video_list_local(audio_service)
st.session_state["audio_output_file_list"] = audio_output_file_list
st.session_state["video_dir_list"] = video_dir_list
print("main_generate_video_dubbing_for_mix end")
Expand Down
Loading

0 comments on commit 27825a5

Please sign in to comment.