diff --git a/README.md b/README.md index 08a18a8..c3fe7ec 100644 --- a/README.md +++ b/README.md @@ -150,7 +150,7 @@ You can then run the program by typing `speech-translate` in your terminal/conso - If you are **updating from an older version**, you need to add `--upgrade --force-reinstall` at the end of the command, if the update does not need new dependencies you can add `--no-deps` at the end of the command to speed up the installation process. - If you want to **install** from a **specific branch or commit**, you can do it by adding `@branch_name` or `@commit_hash` at the end of the url. Example: `pip install -U git+https://github.com/Dadangdut33/Speech-Translate.git@dev --extra-index-url https://download.pytorch.org/whl/cu118` -- The **--extra-index-url here might not always be up to date or compatible with your system**. You can check the latest version of pytorch [here](https://pytorch.org/get-started/locally/). You can check older version of pytorch [here](https://pytorch.org/get-started/previous-versions/) or [here](https://download.pytorch.org/whl/torch_stable.html). +- The **--extra-index-url here is for the version of CUDA**. If your device is not compatible or you need to use other version of CUDA you can check older version of pytorch [here](https://pytorch.org/get-started/previous-versions/) or [here](https://download.pytorch.org/whl/torch_stable.html). ## From Git @@ -180,6 +180,11 @@ Check out the [wiki](https://github.com/Dadangdut33/Speech-Translate/wiki) for m 5. Install all the dependencies needed by running `pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu118` if you are using GPU or `pip install -r requirements.txt` if you are using CPU. 6. Run `python Run.py` in root directory to run the app. +Notes: + +- If you forgot the `--recure-submodules` flag when cloning the repository, you can do `git submodule update --init --recursive` in the project directory to pull the needed submodules. +- The `--extra-index-url` is needed to install CUDA version of pytorch and for this one we are using CUDA 11.8. If your device is not compatible or you need to use other version of CUDA you can check the previous pytorch version in this [link](https://pytorch.org/get-started/previous-versions/) or [this](https://download.pytorch.org/whl/torch_stable.html). + ## Running the app You can run the app by running the [`Run.py`](./Run.py) located in **root directory**. Alternatively you can also run it using `python -m speech_translate` in the **root directory**. diff --git a/build.py b/build.py index 0e232c4..4140c31 100644 --- a/build.py +++ b/build.py @@ -54,12 +54,17 @@ def get_whisper_version(): print("Whisper version:", get_whisper_version()) folder_name = f"build/SpeechTranslate {app_version()} {get_env_name()}" +root = os.path.dirname(os.path.abspath(__file__)) + +print("ROOT:", root) +print("Assets:", os.path.abspath(os.path.join(root, "speech_translate", "assets"))) build_exe_options = { "excludes": ["yapf", "ruff", "cx_Freeze", "pylint", "isort"], "packages": ["torch", "soundfile", "sounddevice", "av", "stable_whisper", "faster_whisper", "whisper"], "build_exe": folder_name, - "include_msvcr": True + "include_msvcr": True, + "include_files": [(os.path.abspath(os.path.join(root, "speech_translate", "assets")), "lib/speech_translate/assets")], } BASE = "Win32GUI" if sys.platform == "win32" else None @@ -115,11 +120,6 @@ def get_whisper_version(): with open(f"{folder_name}/version.txt", "w", encoding="utf-8") as f: f.write(app_version()) -# copy install_ffmpeg.ps1 to build folder -print(">> Copying install_ffmpeg.ps1 to build folder") -shutil.copyfile("install_ffmpeg.ps1", f"{folder_name}/install_ffmpeg.ps1") -shutil.copyfile("install_ffmpeg.ps1", f"{folder_name}/lib/install_ffmpeg.ps1") - # create link to repo print(">> Creating link to repo") with open(f"{folder_name}/homepage.url", "w", encoding="utf-8") as f: diff --git a/build/pre_install_note.txt b/build/pre_install_note.txt index 15cfbe5..c6639c4 100644 --- a/build/pre_install_note.txt +++ b/build/pre_install_note.txt @@ -4,8 +4,7 @@ Speech Translate is a practical application that combines OpenAI's Whisper ASR m Requirements: - Windows 8.1 or higher for speaker input -- FFmpeg installed in your system (the app will prompt you to install it if you don't have it) -- Internet connection (for translation with API) +- Internet connection (for translation with API & downloading models) - Each whisper model requires the following VRAM: * tiny (~1 GB) * base (~1 GB) @@ -13,7 +12,7 @@ Requirements: * medium (~5 GB) * large (~10 GB) -Whisper can be used with CPU but will be very limited when doing so. It is recommended to use a cuda compatible GPU for better performance. +Whisper can be used with CPU but will be very limited / slow when doing so. It is recommended to use a cuda compatible GPU for better performance. Please also note that when using faster-whisper, the speed will be significantly faster and the model size will be reduced depending on the usage. For more information about this please visit https://github.com/guillaumekln/faster-whisper diff --git a/speech_translate/__main__.py b/speech_translate/__main__.py index acde962..ec90df0 100644 --- a/speech_translate/__main__.py +++ b/speech_translate/__main__.py @@ -1,4 +1,5 @@ import subprocess +import sys from os import environ from warnings import simplefilter @@ -9,6 +10,11 @@ # override loguru default format so we dont need to do logger.remove on the logger init environ["LOGURU_FORMAT"] = LOG_FORMAT +# If frozen, stdout will not work because there is no console. So we need to replace stdout +# with stderr so that any module that uses stdout will not break the app +if getattr(sys, "frozen", False): + sys.stdout = sys.stderr + # monkey patch subprocess.run class NoConsolePopen(subprocess.Popen): diff --git a/speech_translate/ui/window/main.py b/speech_translate/ui/window/main.py index 32255a8..812e1a1 100644 --- a/speech_translate/ui/window/main.py +++ b/speech_translate/ui/window/main.py @@ -1,4 +1,5 @@ import os +import sys from platform import processor, release, system, version from signal import SIGINT, signal # Import the signal module to handle Ctrl+C from threading import Thread @@ -6,7 +7,6 @@ from tkinter import Frame, Menu, StringVar, Tk, Toplevel, filedialog, ttk from typing import Callable, Dict, Literal, Optional -import static_ffmpeg from loguru import logger from PIL import Image, ImageDraw from pystray import Icon as icon @@ -81,6 +81,27 @@ ) +# modify static_ffmpeg add_paths +def add_ffmpeg_to_path(weak=False) -> bool: + """Add the ffmpeg executable to the path""" + if getattr(sys, "frozen", False): + # pylint: disable=import-outside-toplevel, protected-access + from static_ffmpeg import _add_paths, run + run.sys.stdout = sys.stderr + if weak: + has_ffmpeg = _add_paths._has("ffmpeg") is not None + has_ffprobe = _add_paths._has("ffprobe") is not None + if has_ffmpeg and has_ffprobe: + return False + ffmpeg, _ = run.get_or_fetch_platform_executables_else_raise() + os.environ["PATH"] = os.pathsep.join([os.path.dirname(ffmpeg), os.environ["PATH"]]) + return True + else: + # pylint: disable=import-outside-toplevel + from static_ffmpeg import _add_paths + return _add_paths.add_paths() + + # Function to handle Ctrl+C and exit just like clicking the exit button def signal_handler(_sig, _frame): logger.info("Received Ctrl+C, exiting...") @@ -646,7 +667,7 @@ def check_ffmpeg_start(self): def check_ffmpeg(): try: logger.debug("Checking ffmpeg...") - static_ffmpeg.add_paths() + add_ffmpeg_to_path() logger.debug("Checking ffmpeg done") bc.has_ffmpeg = True except Exception as e: