add oss

modelscope · Jul 8, 2024 · 3a5327a · 3a5327a
1 parent a6e43f3
commit 3a5327a
Show file tree

Hide file tree

Showing 7 changed files with 145 additions and 27 deletions.
diff --git a/docker/tool_node.dockerfile b/docker/tool_node.dockerfile
@@ -32,17 +32,18 @@ COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 RUN pip install fastapi uvicorn
 
-COPY modelscope_agent /app/modelscope_agent
+# install ffmpeg
+RUN  wget -O ffmpeg.tar.xz https://modelscope-agent.oss-cn-hangzhou.aliyuncs.com/resources/ffmpeg.tar.xz && \
+     tar xvf ffmpeg.tar.xz
+
+
 ENV PYTHONPATH $PYTHONPATH:/app/modelscope_agent:/app/modelscope_agent_servers
 ENV BASE_TOOL_DIR /app/assets
+ENV PATH=/app/ffmpeg-git-20240629-amd64-static:$PATH
 
 # install tool_node
 COPY modelscope_agent_servers /app/modelscope_agent_servers
-
-# install ffmpeg
-RUN  wget -O ffmpeg.tar.xz https://modelscope-agent.oss-cn-hangzhou.aliyuncs.com/resources/ffmpeg.tar.xz && \
-     tar xvf ffmpeg.tar.xz  && \
-     export PATH=`pwd`/ffmpeg-git-20240629-amd64-static:$PATH
+COPY modelscope_agent /app/modelscope_agent
 
 # start up script file
 COPY scripts/run_tool_node.sh /app/run_tool_node.sh

diff --git a/modelscope_agent/tools/dashscope_tools/paraformer_asr_tool.py b/modelscope_agent/tools/dashscope_tools/paraformer_asr_tool.py
@@ -52,22 +52,29 @@ def call(self, params: str, **kwargs):
             raw_audio_file = kwargs[LOCAL_FILE_PATHS][params['audio_path']]
         if not os.path.exists(raw_audio_file):
             raise ValueError(f'audio file {raw_audio_file} not exists')
-
-        pcm_file = os.path.join(
-            WORK_DIR,
-            os.path.basename(params['audio_path']).split('.')[0] + '.pcm')
-        _preprocess(raw_audio_file, pcm_file)
-        if not os.path.exists(pcm_file):
-            raise ValueError(f'convert audio to pcm file {pcm_file} failed')
-        recognition = Recognition(
-            model='paraformer-realtime-v1',
-            format='pcm',
-            sample_rate=16000,
-            callback=None)
-        response = recognition.call(
-            pcm_file,
-            api_key=token,
-        )
+        try:
+            pcm_file = os.path.join(
+                WORK_DIR,
+                os.path.basename(params['audio_path']).split('.')[0] + '.pcm')
+            _preprocess(raw_audio_file, pcm_file)
+            if not os.path.exists(pcm_file):
+                raise ValueError(
+                    f'convert audio to pcm file {pcm_file} failed')
+            recognition = Recognition(
+                model='paraformer-realtime-v1',
+                format='pcm',
+                sample_rate=16000,
+                callback=None)
+            response = recognition.call(
+                pcm_file,
+                api_key=token,
+            )
+        except Exception as e:
+            import traceback
+            print(
+                f'call paraformer asr failed, error: {e}, and traceback {traceback.format_exc()}'
+            )
+            raise ValueError(f'call paraformer asr failed, error: {e}')
         result = ''
         if response.status_code == HTTPStatus.OK:
             sentences: List[Any] = response.get_sentence()

diff --git a/modelscope_agent/tools/dashscope_tools/sambert_tts_tool.py b/modelscope_agent/tools/dashscope_tools/sambert_tts_tool.py
@@ -1,11 +1,13 @@
 import os
+import uuid
 
 import dashscope
 from dashscope.audio.tts import SpeechSynthesizer
 from modelscope_agent.constants import ApiNames
 from modelscope_agent.tools.base import BaseTool, register_tool
+from modelscope_agent.tools.utils.oss import OssStorage
 from modelscope_agent.tools.utils.output_wrapper import AudioWrapper
-from modelscope_agent.utils.utils import get_api_key
+from modelscope_agent.utils.utils import get_api_key, get_upload_url
 
 WORK_DIR = os.getenv('CODE_INTERPRETER_WORK_DIR', '/tmp/ci_workspace')
 
@@ -23,6 +25,7 @@ class SambertTtsTool(BaseTool):
 
     def __init__(self, cfg={}):
         self.cfg = cfg.get(self.name, {})
+        self.oss = None
         super().__init__(cfg)
 
     def call(self, params: str, **kwargs) -> str:
@@ -36,7 +39,8 @@ def call(self, params: str, **kwargs) -> str:
         if tts_text is None or len(tts_text) == 0 or tts_text == '':
             raise ValueError('tts input text is valid')
         os.makedirs(WORK_DIR, exist_ok=True)
-        wav_file = WORK_DIR + '/sambert_tts_audio.wav'
+        wav_name = str(uuid.uuid4())[0:6] + '_sambert_tts_audio.wav'
+        wav_file = os.path.join(WORK_DIR, wav_name)
         response = SpeechSynthesizer.call(
             model='sambert-zhijia-v1',
             format='wav',
@@ -45,8 +49,25 @@ def call(self, params: str, **kwargs) -> str:
         if response.get_audio_data() is not None:
             with open(wav_file, 'wb') as f:
                 f.write(response.get_audio_data())
+            if 'use_tool_api' in kwargs and kwargs['use_tool_api']:
+                try:
+                    wav_url = self._upload_to_oss(wav_name, wav_file)
+                except Exception as e:
+                    return (
+                        f'Failed to save the audio file to oss with error: {e}, '
+                        'please check the oss information')
+                return str(AudioWrapper(wav_url, **kwargs))
         else:
             raise ValueError(
                 f'call sambert tts failed, request id: {response.get_response()}'
             )
         return str(AudioWrapper(wav_file, **kwargs))
+
+    def _upload_to_oss(self, file_name: str, file_path: str):
+        if self.oss is None:
+            self.oss = OssStorage()
+        # this path is for modelscope only, please double-check
+        oss_path = os.path.join('tmp', self.name, file_name)
+        self.oss.upload(file_path, oss_path)
+        url = self.oss.get_signed_url(oss_path)
+        return url
diff --git a/modelscope_agent/tools/utils/oss.py b/modelscope_agent/tools/utils/oss.py
@@ -0,0 +1,79 @@
+import os
+import time
+
+import oss2
+
+
+class OssStorage(object):
+
+    def __init__(self):
+        oss_access_key_id = os.getenv('OSS_ACCESS_KEY_ID', None)
+        oss_access_key_secret = os.getenv('OSS_ACCESS_KEY_SECRET', None)
+        oss_bucket = os.getenv('OSS_BUCKET_NAME', None)
+        oss_endpoint = os.getenv('OSS_ENDPOINT', None)
+        if not oss_access_key_id or not oss_access_key_secret or not oss_bucket or not oss_endpoint:
+            raise ValueError(
+                'OSS_ACCESS_KEY_ID, OSS_ACCESS_KEY_SECRET, OSS_BUCKET_NAME, OSS_ENDPOINT must be set'
+            )
+        self.auth = oss2.Auth(oss_access_key_id, oss_access_key_secret)
+        self.bucket = oss2.Bucket(self.auth, oss_endpoint, oss_bucket)
+        self.endpoint = oss_endpoint
+        self.bucket_name = oss_bucket
+
+    def upload(self,
+               src_file,
+               oss_path,
+               max_retries=3,
+               retry_delay=1,
+               delete_src=True):
+        for i in range(max_retries):
+            try:
+                with open(src_file, 'rb') as f:
+                    print(f'src address is {src_file}')
+                    modality_data = f.read()
+                    result = self.bucket.put_object(oss_path, modality_data)
+                    print(result)
+                break
+            except Exception as e:
+                print(f'Error uploading file: {e}')
+                if i < max_retries - 1:
+                    print(f'Retrying in {retry_delay} seconds...')
+                    time.sleep(retry_delay)
+                else:
+                    os.remove(src_file)
+                    raise IOError(f'Exceed the Max retry with error {e}')
+
+        if delete_src:
+            os.remove(src_file)
+
+    def uploads(self,
+                src_files,
+                oss_paths,
+                max_retries=3,
+                retry_delay=1,
+                delete_src=True):
+        # get a list of files
+        for idx, src_file in enumerate(src_files):
+            oss_path = oss_paths[idx]
+            self.upload(src_file, oss_path, max_retries, retry_delay,
+                        delete_src)
+
+    def get(self, oss_path):
+        return self.bucket.get_object(oss_path)
+
+    def get_signed_url(self, oss_path, expire_seconds=3 * 24 * 60 * 60):
+        url = self.bucket.sign_url(
+            'GET', oss_path, expire_seconds, slash_safe=True)
+        return url
+
+
+if __name__ == '__main__':
+    from PIL import Image
+    from io import BytesIO
+
+    oss = OssStorage()
+    oss_path_test = 'zzc/test.png'
+    oss.upload('/Users/zhicheng/zzc2.png', oss_path_test, delete_src=False)
+    result1 = oss.get(oss_path_test)
+    image_data = Image.open(BytesIO(result1.read()))
+    image_data.show()
diff --git a/modelscope_agent/tools/utils/output_wrapper.py b/modelscope_agent/tools/utils/output_wrapper.py
@@ -97,7 +97,8 @@ def __init__(self, audio, **kwargs) -> None:
 
         super().__init__()
         if isinstance(audio, str):
-            if 'use_tool_api' in kwargs and 'https://' in audio:
+            # use_tool_api should use no file, just bypass url or base64
+            if 'use_tool_api' in kwargs and kwargs['use_tool_api']:
                 self._path = audio
             else:
                 if os.path.isfile(audio):

diff --git a/modelscope_agent/utils/utils.py b/modelscope_agent/utils/utils.py
@@ -230,10 +230,10 @@ def get_upload_url(model: str, file_to_upload: str, api_key: str):
 
     Args:
         model(str): Theoretically, you can set this parameter freely. It will only affect
-                    the information of the oss url and will not affect the function function.
+                    the information of the oss url and will not affect the function.
         file_to_upload(str): the local file path which you need to convert to oss url.And it should
                             start with 'file://'.
-        api_key(str): dashscope_api_key which you have set in enviroment.
+        api_key(str): dashscope_api_key which you have set in environment.
 
     Returns:
         An oss type url.

diff --git a/tests/tools/test_dashscope_asr_tts.py b/tests/tools/test_dashscope_asr_tts.py
@@ -50,6 +50,15 @@ def test_sambert_tts():
     assert res.endswith('.wav"/>')
 
 
+@pytest.mark.skipif(IS_FORKED_PR, reason='only run modelscope-agent main repo')
+def test_sambert_tts_with_tool_api():
+    params = """{'text': '今天天气怎么样？'}"""
+    tts_tool = SambertTtsTool()
+    kwargs = {'use_tool_api': True}
+    res = tts_tool.call(params, **kwargs)
+    assert res.startswith('<audio src="http://')
+
+
 @pytest.mark.skipif(IS_FORKED_PR, reason='only run modelscope-agent main repo')
 def test_paraformer_asr_agent():
     role_template = '你扮演一个语音专家，用尽可能丰富的描述调用工具处理语音。'