diff --git a/voicevox_engine/app/routers/morphing.py b/voicevox_engine/app/routers/morphing.py
index 01c9ff8..fba340f 100644
--- a/voicevox_engine/app/routers/morphing.py
+++ b/voicevox_engine/app/routers/morphing.py
@@ -1,14 +1,12 @@
"""モーフィング機能を提供する API Router"""
+import io
from functools import lru_cache
-from tempfile import NamedTemporaryFile
from typing import Annotated
import soundfile
-from fastapi import APIRouter, HTTPException, Query
+from fastapi import APIRouter, HTTPException, Query, Response
from pydantic.json_schema import SkipJsonSchema
-from starlette.background import BackgroundTask
-from starlette.responses import FileResponse
from voicevox_engine.aivm_manager import AivmManager
from voicevox_engine.metas.Metas import StyleId
@@ -24,7 +22,6 @@
)
from voicevox_engine.morphing.morphing import synthesize_morphed_wave
from voicevox_engine.tts_pipeline.tts_engine import LATEST_VERSION, TTSEngineManager
-from voicevox_engine.utility.file_utility import try_delete_file
# キャッシュを有効化
# モジュール側でlru_cacheを指定するとキャッシュを制御しにくいため、HTTPサーバ側で指定する
@@ -72,7 +69,7 @@ def morphable_targets(
@router.post(
"/synthesis_morphing",
- response_class=FileResponse,
+ response_class=Response,
responses={
200: {
"content": {
@@ -91,7 +88,7 @@ def _synthesis_morphing(
str | SkipJsonSchema[None],
Query(description="AivisSpeech Engine ではサポートされていないパラメータです (常に無視されます) 。"),
] = None, # fmt: skip # noqa
- ) -> FileResponse:
+ ) -> Response:
"""
指定された 2 種類のスタイルで音声を合成、指定した割合でモーフィングした音声を得ます。
モーフィングの割合は `morph_rate` で指定でき、0.0 でベースのスタイル、1.0 でターゲットのスタイルに近づきます。
@@ -127,18 +124,14 @@ def _synthesis_morphing(
output_stereo=query.outputStereo,
)
- with NamedTemporaryFile(delete=False) as f:
- soundfile.write(
- file=f,
- data=morph_wave,
- samplerate=query.outputSamplingRate,
- format="WAV",
- )
-
- return FileResponse(
- f.name,
- media_type="audio/wav",
- background=BackgroundTask(try_delete_file, f.name),
+ buffer = io.BytesIO()
+ soundfile.write(
+ file=buffer,
+ data=morph_wave,
+ samplerate=query.outputSamplingRate,
+ format="WAV",
)
+ return Response(buffer.getvalue(), media_type="audio/wav")
+
return router
diff --git a/voicevox_engine/app/routers/tts_pipeline.py b/voicevox_engine/app/routers/tts_pipeline.py
index 380600c..2499fcd 100644
--- a/voicevox_engine/app/routers/tts_pipeline.py
+++ b/voicevox_engine/app/routers/tts_pipeline.py
@@ -1,15 +1,13 @@
"""音声合成機能を提供する API Router"""
+import io
import zipfile
-from tempfile import NamedTemporaryFile, TemporaryFile
from typing import Annotated, Self
import soundfile
-from fastapi import APIRouter, HTTPException, Query, Request
+from fastapi import APIRouter, HTTPException, Query, Request, Response
from pydantic import BaseModel, Field
from pydantic.json_schema import SkipJsonSchema
-from starlette.background import BackgroundTask
-from starlette.responses import FileResponse
from voicevox_engine.cancellable_engine import CancellableEngine
from voicevox_engine.core.core_adapter import DeviceSupport
@@ -32,7 +30,6 @@
Score,
)
from voicevox_engine.tts_pipeline.tts_engine import LATEST_VERSION, TTSEngineManager
-from voicevox_engine.utility.file_utility import try_delete_file
class ParseKanaBadRequest(BaseModel):
@@ -263,7 +260,7 @@ def mora_pitch(
@router.post(
"/synthesis",
- response_class=FileResponse,
+ response_class=Response,
responses={
200: {
"content": {
@@ -285,7 +282,7 @@ def synthesis(
str | SkipJsonSchema[None],
Query(description="AivisSpeech Engine ではサポートされていないパラメータです (常に無視されます) 。"),
] = None, # fmt: skip # noqa
- ) -> FileResponse:
+ ) -> Response:
"""
指定されたスタイル ID に紐づく音声合成モデルを用いて音声合成を行います。
"""
@@ -295,20 +292,16 @@ def synthesis(
query, style_id, enable_interrogative_upspeak=enable_interrogative_upspeak
)
- with NamedTemporaryFile(delete=False) as f:
- soundfile.write(
- file=f, data=wave, samplerate=query.outputSamplingRate, format="WAV"
- )
-
- return FileResponse(
- f.name,
- media_type="audio/wav",
- background=BackgroundTask(try_delete_file, f.name),
+ buffer = io.BytesIO()
+ soundfile.write(
+ file=buffer, data=wave, samplerate=query.outputSamplingRate, format="WAV"
)
+ return Response(buffer.getvalue(), media_type="audio/wav")
+
@router.post(
"/cancellable_synthesis",
- response_class=FileResponse,
+ response_class=Response,
responses={
200: {
"content": {
@@ -328,7 +321,7 @@ def cancellable_synthesis(
str | SkipJsonSchema[None],
Query(description="AivisSpeech Engine ではサポートされていないパラメータです (常に無視されます) 。"),
] = None, # fmt: skip # noqa
- ) -> FileResponse:
+ ) -> Response:
raise HTTPException(
status_code=501,
detail="Cancelable synthesis is not supported in AivisSpeech Engine.",
@@ -359,7 +352,7 @@ def cancellable_synthesis(
@router.post(
"/multi_synthesis",
- response_class=FileResponse,
+ response_class=Response,
responses={
200: {
"content": {
@@ -379,36 +372,33 @@ def multi_synthesis(
str | SkipJsonSchema[None],
Query(description="AivisSpeech Engine ではサポートされていないパラメータです (常に無視されます) 。"),
] = None, # fmt: skip # noqa
- ) -> FileResponse:
+ ) -> Response:
version = core_version or LATEST_VERSION
engine = tts_engines.get_engine(version)
sampling_rate = queries[0].outputSamplingRate
- with NamedTemporaryFile(delete=False) as f:
- with zipfile.ZipFile(f, mode="a") as zip_file:
- for i in range(len(queries)):
- if queries[i].outputSamplingRate != sampling_rate:
- raise HTTPException(
- status_code=422,
- detail="サンプリングレートが異なるクエリがあります",
- )
-
- with TemporaryFile() as wav_file:
- wave = engine.synthesize_wave(queries[i], style_id)
- soundfile.write(
- file=wav_file,
- data=wave,
- samplerate=sampling_rate,
- format="WAV",
- )
- wav_file.seek(0)
- zip_file.writestr(f"{str(i + 1).zfill(3)}.wav", wav_file.read())
+ buffer = io.BytesIO()
+ with zipfile.ZipFile(buffer, mode="a") as zip_file:
+ for i in range(len(queries)):
+ if queries[i].outputSamplingRate != sampling_rate:
+ raise HTTPException(
+ status_code=422,
+ detail="サンプリングレートが異なるクエリがあります",
+ )
+
+ wav_file_buffer = io.BytesIO()
+ wave = engine.synthesize_wave(queries[i], style_id)
+ soundfile.write(
+ file=wav_file_buffer,
+ data=wave,
+ samplerate=sampling_rate,
+ format="WAV",
+ )
+ zip_file.writestr(
+ f"{str(i + 1).zfill(3)}.wav", wav_file_buffer.getvalue()
+ )
- return FileResponse(
- f.name,
- media_type="application/zip",
- background=BackgroundTask(try_delete_file, f.name),
- )
+ return Response(buffer.getvalue(), media_type="application/zip")
@router.post(
"/sing_frame_audio_query",
@@ -483,7 +473,7 @@ def sing_frame_volume(
@router.post(
"/frame_synthesis",
- response_class=FileResponse,
+ response_class=Response,
responses={
200: {
"content": {
@@ -501,7 +491,7 @@ def frame_synthesis(
str | SkipJsonSchema[None],
Query(description="AivisSpeech Engine ではサポートされていないパラメータです (常に無視されます) 。"),
] = None, # fmt: skip # noqa
- ) -> FileResponse:
+ ) -> Response:
# """
# 歌唱音声合成を行います。
# """
@@ -517,21 +507,17 @@ def frame_synthesis(
except TalkSingInvalidInputError as e:
raise HTTPException(status_code=400, detail=str(e))
- with NamedTemporaryFile(delete=False) as f:
- soundfile.write(
- file=f, data=wave, samplerate=query.outputSamplingRate, format="WAV"
- )
-
- return FileResponse(
- f.name,
- media_type="audio/wav",
- background=BackgroundTask(try_delete_file, f.name),
+ buffer = io.BytesIO()
+ soundfile.write(
+ file=buffer, data=wave, samplerate=query.outputSamplingRate, format="WAV"
)
+
+ return Response(buffer.getvalue(), media_type="audio/wav")
"""
@router.post(
"/connect_waves",
- response_class=FileResponse,
+ response_class=Response,
responses={
200: {
"content": {
@@ -542,7 +528,7 @@ def frame_synthesis(
tags=["音声合成"],
summary="base64エンコードされた複数のwavデータを一つに結合する",
)
- def connect_waves(waves: list[str]) -> FileResponse:
+ def connect_waves(waves: list[str]) -> Response:
"""
base64エンコードされたwavデータを一纏めにし、wavファイルで返します。
"""
@@ -551,20 +537,16 @@ def connect_waves(waves: list[str]) -> FileResponse:
except ConnectBase64WavesException as err:
raise HTTPException(status_code=422, detail=str(err))
- with NamedTemporaryFile(delete=False) as f:
- soundfile.write(
- file=f,
- data=waves_nparray,
- samplerate=sampling_rate,
- format="WAV",
- )
-
- return FileResponse(
- f.name,
- media_type="audio/wav",
- background=BackgroundTask(try_delete_file, f.name),
+ buffer = io.BytesIO()
+ soundfile.write(
+ file=buffer,
+ data=waves_nparray,
+ samplerate=sampling_rate,
+ format="WAV",
)
+ return Response(buffer.getvalue(), media_type="audio/wav")
+
@router.post(
"/validate_kana",
tags=["クエリ作成"],