From 169ffd7b2692e09d494e45991cb3cbdead324ab7 Mon Sep 17 00:00:00 2001
From: TKS <32640296+bigsk1@users.noreply.github.com>
Date: Sat, 9 Nov 2024 16:41:25 -0800
Subject: [PATCH] add workflow - pin version numbers for requirements.txt -
 update readme

---
 .github/workflows/python-app.yml | 29 ++++++++++++++++
 README.md                        | 58 ++++++++++++++++++++++++++++++--
 requirements.txt                 | 29 ++++++++++------
 3 files changed, 104 insertions(+), 12 deletions(-)
 create mode 100644 .github/workflows/python-app.yml

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
new file mode 100644
index 0000000..ad7e9be
--- /dev/null
+++ b/.github/workflows/python-app.yml
@@ -0,0 +1,29 @@
+# This workflow will install Python dependencies, run tests and lint with a single version of Python
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: Python application
+
+on:
+  push:
+    branches: [ "main" ]
+
+permissions:
+  contents: read
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python 3.10
+      uses: actions/setup-python@v4
+      with:
+        python-version: "3.10"
+    - name: Lint with flake8
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+
diff --git a/README.md b/README.md
index 0b67893..111dd3e 100644
--- a/README.md
+++ b/README.md
@@ -26,6 +26,7 @@ You can run all locally, you can use openai for chat and voice, you can mix betw
 
 - Python 3.10
 - CUDA-enabled GPU
+- ffmpeg
 - Ollama models or Openai API or xAI for chat
 - Local XTTS or Openai API or ElevenLabs API for speech
 - Microsoft C++ Build Tools on windows
@@ -61,6 +62,8 @@ You can run all locally, you can use openai for chat and voice, you can mix betw
 
 3. Install dependencies:
 
+    Windows Only: Need to have Microsoft C++ 14.0 or greater Build Tools on windows for TTS
+    [Microsoft Build Tools](https://visualstudio.microsoft.com/visual-cpp-build-tools/)
 
    For GPU (CUDA) version: RECOMMEND
 
@@ -80,8 +83,10 @@ You can run all locally, you can use openai for chat and voice, you can mix betw
    pip install -r cpu_requirements.txt
    ```
 
-Need to have Microsoft C++ Build Tools on windows for TTS
-[Microsoft Build Tools](https://visualstudio.microsoft.com/visual-cpp-build-tools/)
+    Make sure you have ffmpeg downloaded, on windows terminal ( winget install ffmpeg ) or checkout https://ffmpeg.org/download.html then restart shell or vscode, type ffmpeg -version to see if installed correctly
+
+    Local TTS you also might need cuDNN for using nvidia GPU https://developer.nvidia.com/cudnn  and make sure C:\Program Files\NVIDIA\CUDNN\v9.5\bin\12.6
+is in system PATH
 
 ### Download Checkpoints
 
@@ -335,6 +340,55 @@ This is for sentiment analysis, based on what you say, you can guide the AI to r
 
 For XTTS find a .wav voice and add it to the wizard folder and name it as wizard.wav , the voice only needs to be 6 seconds long. Running the app will automatically find the .wav when it has the characters name and use it. If only using Openai Speech or ElevenLabs a .wav isn't needed
 
+
+## Troubleshooting
+
+### Could not locate cudnn_ops64_9.dll
+
+```bash
+Could not locate cudnn_ops64_9.dll. Please make sure it is in your library path!
+Invalid handle. Cannot load symbol cudnnCreateTensorDescriptor
+```
+To resolve this:
+
+Install cuDNN: Download cuDNN from the NVIDIA cuDNN page https://developer.nvidia.com/cudnn
+
+Here’s how to add it to the PATH:
+
+Open System Environment Variables:
+
+Press Win + R, type sysdm.cpl, and hit Enter.
+Go to the Advanced tab, and click on Environment Variables.
+Edit the System PATH Variable:
+
+In the System variables section, find the Path variable, select it, and click Edit.
+Click New and add the path to the bin directory where cudnn_ops64_9.dll is located. Based on your setup, you would add:
+
+```bash
+C:\Program Files\NVIDIA\CUDNN\v9.5\bin\12.6
+```
+
+Apply and Restart:
+
+Click OK to close all dialog boxes, then restart your terminal (or any running applications) to apply the changes.
+Verify the Change:
+
+Open a new terminal and run
+
+```bash
+where cudnn_ops64_9.dll
+```
+
+### Unanticipated host error
+
+```bash
+File "C:\Users\someguy\miniconda3\envs\voice-chat-ai\lib\site-packages\pyaudio\__init__.py", line 441, in __init__
+    self._stream = pa.open(**arguments)
+OSError: [Errno -9999] Unanticipated host error
+```
+
+Make sure ffmpeg is installed and added to PATH, on windows terminal ( winget install ffmpeg ) also make sure your microphone privacy settings on windows are ok and you set the microphone to the default device. I had this issue when using bluetooth apple airpods and this solved it.
+
 ## Watch the Demos
 
 
diff --git a/requirements.txt b/requirements.txt
index c43cf99..33483c1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,19 +6,28 @@ torchaudio==2.3.1+cu121
 torchvision==0.18.1+cu121
 -f https://download.pytorch.org/whl/torch_stable.html
 
-pyaudio
-numpy
+PyAudio==0.2.14
+numpy==1.22.0
 faster-whisper==1.0.2
-soundfile==0.12.1
+soundfile==0.12.1    
 langid==1.1.6
-TTS==0.22.0
+librosa==0.10.0
+scipy==1.11.4
+transformers==4.41.2
+pydantic==2.7.4
+pillow==10.3.0
+
 pydub==0.25.1
 openai==1.33.0
 textblob==0.18.0.post0
 python-dotenv==1.0.1
-Flask
-requests
-fastapi
-uvicorn
-elevenlabs
-aiohttp
\ No newline at end of file
+Flask==3.0.3  
+requests==2.32.3 
+fastapi==0.111.0
+uvicorn==0.30.1
+elevenlabs==1.12.1
+aiohttp==3.9.5
+spacy==3.7.5
+spacy-legacy==3.0.12
+spacy-loggers==1.0.5
+TTS==0.22.0
\ No newline at end of file