Skip to content

Commit

Permalink
add oss
Browse files Browse the repository at this point in the history
  • Loading branch information
zzhangpurdue committed Jul 8, 2024
1 parent a6e43f3 commit 3a5327a
Show file tree
Hide file tree
Showing 7 changed files with 145 additions and 27 deletions.
13 changes: 7 additions & 6 deletions docker/tool_node.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,18 @@ COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
RUN pip install fastapi uvicorn

COPY modelscope_agent /app/modelscope_agent
# install ffmpeg
RUN wget -O ffmpeg.tar.xz https://modelscope-agent.oss-cn-hangzhou.aliyuncs.com/resources/ffmpeg.tar.xz && \
tar xvf ffmpeg.tar.xz


ENV PYTHONPATH $PYTHONPATH:/app/modelscope_agent:/app/modelscope_agent_servers
ENV BASE_TOOL_DIR /app/assets
ENV PATH=/app/ffmpeg-git-20240629-amd64-static:$PATH

# install tool_node
COPY modelscope_agent_servers /app/modelscope_agent_servers

# install ffmpeg
RUN wget -O ffmpeg.tar.xz https://modelscope-agent.oss-cn-hangzhou.aliyuncs.com/resources/ffmpeg.tar.xz && \
tar xvf ffmpeg.tar.xz && \
export PATH=`pwd`/ffmpeg-git-20240629-amd64-static:$PATH
COPY modelscope_agent /app/modelscope_agent

# start up script file
COPY scripts/run_tool_node.sh /app/run_tool_node.sh
Expand Down
39 changes: 23 additions & 16 deletions modelscope_agent/tools/dashscope_tools/paraformer_asr_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,22 +52,29 @@ def call(self, params: str, **kwargs):
raw_audio_file = kwargs[LOCAL_FILE_PATHS][params['audio_path']]
if not os.path.exists(raw_audio_file):
raise ValueError(f'audio file {raw_audio_file} not exists')

pcm_file = os.path.join(
WORK_DIR,
os.path.basename(params['audio_path']).split('.')[0] + '.pcm')
_preprocess(raw_audio_file, pcm_file)
if not os.path.exists(pcm_file):
raise ValueError(f'convert audio to pcm file {pcm_file} failed')
recognition = Recognition(
model='paraformer-realtime-v1',
format='pcm',
sample_rate=16000,
callback=None)
response = recognition.call(
pcm_file,
api_key=token,
)
try:
pcm_file = os.path.join(
WORK_DIR,
os.path.basename(params['audio_path']).split('.')[0] + '.pcm')
_preprocess(raw_audio_file, pcm_file)
if not os.path.exists(pcm_file):
raise ValueError(
f'convert audio to pcm file {pcm_file} failed')
recognition = Recognition(
model='paraformer-realtime-v1',
format='pcm',
sample_rate=16000,
callback=None)
response = recognition.call(
pcm_file,
api_key=token,
)
except Exception as e:
import traceback
print(
f'call paraformer asr failed, error: {e}, and traceback {traceback.format_exc()}'
)
raise ValueError(f'call paraformer asr failed, error: {e}')
result = ''
if response.status_code == HTTPStatus.OK:
sentences: List[Any] = response.get_sentence()
Expand Down
25 changes: 23 additions & 2 deletions modelscope_agent/tools/dashscope_tools/sambert_tts_tool.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import os
import uuid

import dashscope
from dashscope.audio.tts import SpeechSynthesizer
from modelscope_agent.constants import ApiNames
from modelscope_agent.tools.base import BaseTool, register_tool
from modelscope_agent.tools.utils.oss import OssStorage
from modelscope_agent.tools.utils.output_wrapper import AudioWrapper
from modelscope_agent.utils.utils import get_api_key
from modelscope_agent.utils.utils import get_api_key, get_upload_url

WORK_DIR = os.getenv('CODE_INTERPRETER_WORK_DIR', '/tmp/ci_workspace')

Expand All @@ -23,6 +25,7 @@ class SambertTtsTool(BaseTool):

def __init__(self, cfg={}):
self.cfg = cfg.get(self.name, {})
self.oss = None
super().__init__(cfg)

def call(self, params: str, **kwargs) -> str:
Expand All @@ -36,7 +39,8 @@ def call(self, params: str, **kwargs) -> str:
if tts_text is None or len(tts_text) == 0 or tts_text == '':
raise ValueError('tts input text is valid')
os.makedirs(WORK_DIR, exist_ok=True)
wav_file = WORK_DIR + '/sambert_tts_audio.wav'
wav_name = str(uuid.uuid4())[0:6] + '_sambert_tts_audio.wav'
wav_file = os.path.join(WORK_DIR, wav_name)
response = SpeechSynthesizer.call(
model='sambert-zhijia-v1',
format='wav',
Expand All @@ -45,8 +49,25 @@ def call(self, params: str, **kwargs) -> str:
if response.get_audio_data() is not None:
with open(wav_file, 'wb') as f:
f.write(response.get_audio_data())
if 'use_tool_api' in kwargs and kwargs['use_tool_api']:
try:
wav_url = self._upload_to_oss(wav_name, wav_file)
except Exception as e:
return (
f'Failed to save the audio file to oss with error: {e}, '
'please check the oss information')
return str(AudioWrapper(wav_url, **kwargs))
else:
raise ValueError(
f'call sambert tts failed, request id: {response.get_response()}'
)
return str(AudioWrapper(wav_file, **kwargs))

def _upload_to_oss(self, file_name: str, file_path: str):
if self.oss is None:
self.oss = OssStorage()
# this path is for modelscope only, please double-check
oss_path = os.path.join('tmp', self.name, file_name)
self.oss.upload(file_path, oss_path)
url = self.oss.get_signed_url(oss_path)
return url
79 changes: 79 additions & 0 deletions modelscope_agent/tools/utils/oss.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import os
import time

import oss2


class OssStorage(object):

def __init__(self):
oss_access_key_id = os.getenv('OSS_ACCESS_KEY_ID', None)
oss_access_key_secret = os.getenv('OSS_ACCESS_KEY_SECRET', None)
oss_bucket = os.getenv('OSS_BUCKET_NAME', None)
oss_endpoint = os.getenv('OSS_ENDPOINT', None)
if not oss_access_key_id or not oss_access_key_secret or not oss_bucket or not oss_endpoint:
raise ValueError(
'OSS_ACCESS_KEY_ID, OSS_ACCESS_KEY_SECRET, OSS_BUCKET_NAME, OSS_ENDPOINT must be set'
)
self.auth = oss2.Auth(oss_access_key_id, oss_access_key_secret)
self.bucket = oss2.Bucket(self.auth, oss_endpoint, oss_bucket)
self.endpoint = oss_endpoint
self.bucket_name = oss_bucket

def upload(self,
src_file,
oss_path,
max_retries=3,
retry_delay=1,
delete_src=True):
for i in range(max_retries):
try:
with open(src_file, 'rb') as f:
print(f'src address is {src_file}')
modality_data = f.read()
result = self.bucket.put_object(oss_path, modality_data)
print(result)
break
except Exception as e:
print(f'Error uploading file: {e}')
if i < max_retries - 1:
print(f'Retrying in {retry_delay} seconds...')
time.sleep(retry_delay)
else:
os.remove(src_file)
raise IOError(f'Exceed the Max retry with error {e}')

if delete_src:
os.remove(src_file)

def uploads(self,
src_files,
oss_paths,
max_retries=3,
retry_delay=1,
delete_src=True):
# get a list of files
for idx, src_file in enumerate(src_files):
oss_path = oss_paths[idx]
self.upload(src_file, oss_path, max_retries, retry_delay,
delete_src)

def get(self, oss_path):
return self.bucket.get_object(oss_path)

def get_signed_url(self, oss_path, expire_seconds=3 * 24 * 60 * 60):
url = self.bucket.sign_url(
'GET', oss_path, expire_seconds, slash_safe=True)
return url


if __name__ == '__main__':
from PIL import Image
from io import BytesIO

oss = OssStorage()
oss_path_test = 'zzc/test.png'
oss.upload('/Users/zhicheng/zzc2.png', oss_path_test, delete_src=False)
result1 = oss.get(oss_path_test)
image_data = Image.open(BytesIO(result1.read()))
image_data.show()
3 changes: 2 additions & 1 deletion modelscope_agent/tools/utils/output_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ def __init__(self, audio, **kwargs) -> None:

super().__init__()
if isinstance(audio, str):
if 'use_tool_api' in kwargs and 'https://' in audio:
# use_tool_api should use no file, just bypass url or base64
if 'use_tool_api' in kwargs and kwargs['use_tool_api']:
self._path = audio
else:
if os.path.isfile(audio):
Expand Down
4 changes: 2 additions & 2 deletions modelscope_agent/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,10 +230,10 @@ def get_upload_url(model: str, file_to_upload: str, api_key: str):
Args:
model(str): Theoretically, you can set this parameter freely. It will only affect
the information of the oss url and will not affect the function function.
the information of the oss url and will not affect the function.
file_to_upload(str): the local file path which you need to convert to oss url.And it should
start with 'file://'.
api_key(str): dashscope_api_key which you have set in enviroment.
api_key(str): dashscope_api_key which you have set in environment.
Returns:
An oss type url.
Expand Down
9 changes: 9 additions & 0 deletions tests/tools/test_dashscope_asr_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,15 @@ def test_sambert_tts():
assert res.endswith('.wav"/>')


@pytest.mark.skipif(IS_FORKED_PR, reason='only run modelscope-agent main repo')
def test_sambert_tts_with_tool_api():
params = """{'text': '今天天气怎么样?'}"""
tts_tool = SambertTtsTool()
kwargs = {'use_tool_api': True}
res = tts_tool.call(params, **kwargs)
assert res.startswith('<audio src="http://')


@pytest.mark.skipif(IS_FORKED_PR, reason='only run modelscope-agent main repo')
def test_paraformer_asr_agent():
role_template = '你扮演一个语音专家,用尽可能丰富的描述调用工具处理语音。'
Expand Down

0 comments on commit 3a5327a

Please sign in to comment.