add xai

bigsk1 · Nov 6, 2024 · c9e1a9c · c9e1a9c
1 parent 4bc8bfc
commit c9e1a9c
Show file tree

Hide file tree

Showing 7 changed files with 221 additions and 22 deletions.
diff --git a/.env.sample b/.env.sample
@@ -2,7 +2,7 @@
 # Depending on the value of MODEL_PROVIDER, the corresponding service will be used when run.
 # You can mix and match; use local Ollama with OpenAI speech or use OpenAI model with local XTTS, etc.
 
-# Model Provider: openai or ollama
+# Model Provider: openai or ollama or xai
 MODEL_PROVIDER=ollama
 
 # Character to use - Options: alien_scientist, anarchist, bigfoot, chatgpt, clumsyhero, conandoyle, conspiracy, cyberpunk,
@@ -32,7 +32,7 @@ XTTS_SPEED=1.2
 # OpenAI API Key for models and speech (replace with your actual API key)
 OPENAI_API_KEY=sk-proj-1111111
 # Models to use - OPTIONAL: For screen analysis, if MODEL_PROVIDER is ollama, llava will be used by default.
-# Ensure you have llava downloaded with Ollama. If OpenAI is used, gpt-4o-mini works well.
+# Ensure you have llava downloaded with Ollama. If OpenAI is used, gpt-4o-mini works well. xai not support yet falls back to openai is xai is selected.
 OPENAI_MODEL=gpt-4o-mini
 
 # Endpoints:
@@ -45,6 +45,12 @@ OLLAMA_BASE_URL=http://localhost:11434
 # Model to use - llama3 or llama3.1 or 3.2 works well for local usage. In the UI you will have a list of popular models to choose from so the model here is just a starting point
 OLLAMA_MODEL=llama3
 
+# XAI Configuration
+XAI_MODEL=grok-beta
+XAI_API_KEY=your_api_key_here
+XAI_BASE_URL=https://api.x.ai/v1
+
+
 # NOTES:
 # List of trigger phrases to have the model view your desktop (desktop, browser, images, etc.).
 # It will describe what it sees, and you can ask questions about it:

diff --git a/README.md b/README.md
@@ -12,7 +12,7 @@ You can run all locally, you can use openai for chat and voice, you can mix betw
 
 ## Features
 
-- **Supports both OpenAI and Ollama language models**: Choose the model that best fits your needs.
+- **Supports OpenAI, xAI or Ollama language models**: Choose the model that best fits your needs.
 - **Provides text-to-speech synthesis using XTTS or OpenAI TTS or ElevenLabs**: Enjoy natural and expressive voices.
 - **No typing needed, just speak**: Hands-free interaction makes conversations smooth and effortless.
 - **Analyzes user mood and adjusts AI responses accordingly**: Get personalized responses based on your mood.
@@ -28,7 +28,7 @@ You can run all locally, you can use openai for chat and voice, you can mix betw
 
 - Python 3.10
 - CUDA-enabled GPU
-- Ollama models or Openai API for chat
+- Ollama models or Openai API or xAI for chat
 - Local XTTS or Openai API or ElevenLabs API for speech
 - Microsoft C++ Build Tools on windows
 - Microphone
@@ -201,6 +201,11 @@ OLLAMA_BASE_URL=http://localhost:11434
 # Models to use - llama3 works well for local usage.
 OLLAMA_MODEL=llama3
 
+# xAI Configuration
+XAI_MODEL=grok-beta
+XAI_API_KEY=your_api_key_here
+XAI_BASE_URL=https://api.x.ai/v1
+
 # NOTES:
 # List of trigger phrases to have the model view your desktop (desktop, browser, images, etc.).
 # It will describe what it sees, and you can ask questions about it:

diff --git a/app/app.py b/app/app.py
@@ -34,6 +34,9 @@
 OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
 OPENAI_MODEL = os.getenv('OPENAI_MODEL')
 OPENAI_BASE_URL = os.getenv('OPENAI_BASE_URL')
+XAI_API_KEY = os.getenv('XAI_API_KEY')
+XAI_MODEL = os.getenv('XAI_MODEL')
+XAI_BASE_URL = os.getenv('XAI_BASE_URL')
 OLLAMA_MODEL = os.getenv('OLLAMA_MODEL')
 OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL')
 ELEVENLABS_API_KEY = os.getenv('ELEVENLABS_API_KEY')
@@ -85,6 +88,11 @@ def init_openai_model(model_name):
     global OPENAI_MODEL
     OPENAI_MODEL = model_name
     print(f"Switched to OpenAI model: {model_name}")
+
+def init_xai_model(model_name):
+    global XAI_MODEL
+    XAI_MODEL = model_name
+    print(f"Switched to XAI model: {model_name}")
 
 def init_openai_tts_voice(voice_name):
     global OPENAI_TTS_VOICE
@@ -175,7 +183,7 @@ def sync_play_audio(file_path):
 
 print(f"Using device: {device}")
 print(f"Model provider: {MODEL_PROVIDER}")
-print(f"Model: {OPENAI_MODEL if MODEL_PROVIDER == 'openai' else OLLAMA_MODEL}")
+print(f"Model: {OPENAI_MODEL if MODEL_PROVIDER == 'openai' else XAI_MODEL if MODEL_PROVIDER == 'xai' else OLLAMA_MODEL}")
 print(f"Character: {character_display_name}")
 print(f"Text-to-Speech provider: {TTS_PROVIDER}")
 print("To stop chatting say Quit, Leave or Exit. Say, what's on my screen, to have AI view screen. One moment please loading...")
@@ -441,6 +449,50 @@ def chatgpt_streamed(user_input, system_message, mood_prompt, conversation_histo
         except requests.exceptions.RequestException as e:
             full_response = f"Error connecting to Ollama model: {e}"
             print(f"Debug: Ollama error - {e}")
+
+    elif MODEL_PROVIDER == 'xai':
+        messages = [{"role": "system", "content": system_message + "\n" + mood_prompt}] + conversation_history + [{"role": "user", "content": user_input}]
+        headers = {
+            'Authorization': f'Bearer {XAI_API_KEY}',
+            'Content-Type': 'application/json'
+        }
+        payload = {
+            "model": XAI_MODEL,
+            "messages": messages,
+            "stream": True
+        }
+        try:
+            print(f"Debug: Sending request to XAI: {XAI_BASE_URL}")
+            response = requests.post(f"{XAI_BASE_URL}/chat/completions", headers=headers, json=payload, stream=True, timeout=30)
+            response.raise_for_status()
+
+            print("Starting XAI stream...")
+            line_buffer = ""
+            for line in response.iter_lines(decode_unicode=True):
+                if line.startswith("data:"):
+                    line = line[5:].strip()
+                if line:
+                    try:
+                        chunk = json.loads(line)
+                        delta_content = chunk['choices'][0]['delta'].get('content', '')
+                        if delta_content:
+                            line_buffer += delta_content
+                            if '\n' in line_buffer:
+                                lines = line_buffer.split('\n')
+                                for line in lines[:-1]:
+                                    print(NEON_GREEN + line + RESET_COLOR)
+                                    full_response += line + '\n'
+                                line_buffer = lines[-1]
+                    except json.JSONDecodeError:
+                        continue
+            if line_buffer:
+                print(NEON_GREEN + line_buffer + RESET_COLOR)
+                full_response += line_buffer
+            print("\nXAI stream complete.")
+
+        except requests.exceptions.RequestException as e:
+            full_response = f"Error connecting to XAI model: {e}"
+            print(f"Debug: XAI error - {e}")
 
     elif MODEL_PROVIDER == 'openai':
         messages = [{"role": "system", "content": system_message + "\n" + mood_prompt}] + conversation_history + [{"role": "user", "content": user_input}]
@@ -590,6 +642,7 @@ async def encode_image(image_path):
 # Analyze Image
 async def analyze_image(image_path, question_prompt):
     encoded_image = await encode_image(image_path)
+
     if MODEL_PROVIDER == 'ollama':
         headers = {'Content-Type': 'application/json'}
         payload = {
@@ -612,24 +665,72 @@ async def analyze_image(image_path, question_prompt):
         except aiohttp.ClientError as e:
             print(f"Request failed: {e}")
             return {"choices": [{"message": {"content": "Failed to process the image with the llava model."}}]}
-    else:
-        headers = {"Content-Type": "application/json", "Authorization": f"Bearer {OPENAI_API_KEY}"}
+
+    elif MODEL_PROVIDER == 'xai':
+        # First, try XAI's image analysis if it's supported
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {XAI_API_KEY}"
+        }
         message = {
             "role": "user",
             "content": [
                 {"type": "text", "text": question_prompt},
                 {"type": "image_url", "image_url": {"url": f"data:image/jpg;base64,{encoded_image}", "detail": "low"}}
             ]
         }
-        payload = {"model": OPENAI_MODEL, "temperature": 0.5, "messages": [message], "max_tokens": 1000}
+        payload = {
+            "model": XAI_MODEL,
+            "temperature": 0.5,
+            "messages": [message],
+            "max_tokens": 1000
+        }
+
         try:
             async with aiohttp.ClientSession() as session:
-                async with session.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload, timeout=30) as response:
-                    response.raise_for_status()
-                    return await response.json()
+                async with session.post(f"{XAI_BASE_URL}/chat/completions", headers=headers, json=payload, timeout=30) as response:
+                    if response.status == 200:
+                        return await response.json()
+                    else:
+                        # If XAI doesn't support image analysis or returns an error,
+                        # fall back to OpenAI's image analysis
+                        print("XAI image analysis failed or not supported, falling back to OpenAI")
+                        return await fallback_to_openai_image_analysis(encoded_image, question_prompt)
         except aiohttp.ClientError as e:
-            print(f"Request failed: {e}")
-            return {"choices": [{"message": {"content": "Failed to process the image with the OpenAI model."}}]}
+            print(f"XAI image analysis failed: {e}, falling back to OpenAI")
+            return await fallback_to_openai_image_analysis(encoded_image, question_prompt)
+
+    else:  # OpenAI as default
+        return await fallback_to_openai_image_analysis(encoded_image, question_prompt)
+
+async def fallback_to_openai_image_analysis(encoded_image, question_prompt):
+    """Helper function for OpenAI image analysis fallback"""
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {OPENAI_API_KEY}"
+    }
+    message = {
+        "role": "user",
+        "content": [
+            {"type": "text", "text": question_prompt},
+            {"type": "image_url", "image_url": {"url": f"data:image/jpg;base64,{encoded_image}", "detail": "low"}}
+        ]
+    }
+    payload = {
+        "model": OPENAI_MODEL,
+        "temperature": 0.5,
+        "messages": [message],
+        "max_tokens": 1000
+    }
+
+    try:
+        async with aiohttp.ClientSession() as session:
+            async with session.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload, timeout=30) as response:
+                response.raise_for_status()
+                return await response.json()
+    except aiohttp.ClientError as e:
+        print(f"OpenAI fallback request failed: {e}")
+        return {"choices": [{"message": {"content": "Failed to process the image with both XAI and OpenAI models."}}]}
 
 
 async def generate_speech(text, temp_audio_path):

diff --git a/app/app_logic.py b/app/app_logic.py
@@ -15,6 +15,7 @@
     open_file,
     init_ollama_model,
     init_openai_model,
+    init_xai_model,
     init_openai_tts_voice,
     init_elevenlabs_tts_voice,
     init_xtts_speed,
@@ -117,6 +118,8 @@ def set_env_variable(key: str, value: str):
         init_ollama_model(value)  # Reinitialize Ollama model
     if key == "OPENAI_MODEL":
         init_openai_model(value)  # Reinitialize OpenAI model
+    if key == "XAI_MODEL":
+        init_xai_model(value)  # Reinitialize XAI model
     if key == "OPENAI_TTS_VOICE":
         init_openai_tts_voice(value)  # Reinitialize OpenAI TTS voice
     if key == "ELEVENLABS_TTS_VOICE":

diff --git a/app/static/js/scripts.js b/app/static/js/scripts.js
@@ -161,6 +161,11 @@ document.addEventListener("DOMContentLoaded", function() {
         websocket.send(JSON.stringify({ action: "set_ollama_model", model: selectedModel }));
     }
 
+    function setXAIModel() {
+        const selectedModel = document.getElementById('xai-model-select').value;
+        websocket.send(JSON.stringify({ action: "set_xai_model", model: selectedModel }));
+    }
+
     function setXTTSSpeed() {
         const selectedSpeed = document.getElementById('xtts-speed-select').value;
         websocket.send(JSON.stringify({ action: "set_xtts_speed", speed: selectedSpeed }));
@@ -181,6 +186,7 @@ document.addEventListener("DOMContentLoaded", function() {
     document.getElementById('openai-voice-select').addEventListener('change', setOpenAIVoice);
     document.getElementById('openai-model-select').addEventListener('change', setOpenAIModel);
     document.getElementById('ollama-model-select').addEventListener('change', setOllamaModel);
+    document.getElementById('xai-model-select').addEventListener('change', setXAIModel);
     document.getElementById('xtts-speed-select').addEventListener('change', setXTTSSpeed);
     document.getElementById('elevenlabs-voice-select').addEventListener('change', setElevenLabsVoice);
 

diff --git a/app/templates/index.html b/app/templates/index.html
@@ -60,6 +60,7 @@ <h1>
                 <select id="provider-select">
                     <option value="openai">OpenAI</option>
                     <option value="ollama">Ollama</option>
+                    <option value="xai">XAI</option>
                 </select>
             </div>
             <div class="setting-group">
@@ -110,6 +111,12 @@ <h1>
                     <option value="dolphin-llama3">Dolphin-Llama3</option>
                 </select>
             </div>
+            <div class="setting-group">
+                <label for="xai-model-select">XAI Model:</label>
+                <select id="xai-model-select">
+                    <option value="grok-beta">Grok Beta</option>
+                </select>
+            </div>
             <div class="setting-group">
                 <label for="xtts-speed-select">XTTS Speed:</label>
                 <select id="xtts-speed-select">