Skip to content

Commit

Permalink
add xai
Browse files Browse the repository at this point in the history
  • Loading branch information
bigsk1 committed Nov 6, 2024
1 parent 4bc8bfc commit c9e1a9c
Show file tree
Hide file tree
Showing 7 changed files with 221 additions and 22 deletions.
10 changes: 8 additions & 2 deletions .env.sample
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Depending on the value of MODEL_PROVIDER, the corresponding service will be used when run.
# You can mix and match; use local Ollama with OpenAI speech or use OpenAI model with local XTTS, etc.

# Model Provider: openai or ollama
# Model Provider: openai or ollama or xai
MODEL_PROVIDER=ollama

# Character to use - Options: alien_scientist, anarchist, bigfoot, chatgpt, clumsyhero, conandoyle, conspiracy, cyberpunk,
Expand Down Expand Up @@ -32,7 +32,7 @@ XTTS_SPEED=1.2
# OpenAI API Key for models and speech (replace with your actual API key)
OPENAI_API_KEY=sk-proj-1111111
# Models to use - OPTIONAL: For screen analysis, if MODEL_PROVIDER is ollama, llava will be used by default.
# Ensure you have llava downloaded with Ollama. If OpenAI is used, gpt-4o-mini works well.
# Ensure you have llava downloaded with Ollama. If OpenAI is used, gpt-4o-mini works well. xai not support yet falls back to openai is xai is selected.
OPENAI_MODEL=gpt-4o-mini

# Endpoints:
Expand All @@ -45,6 +45,12 @@ OLLAMA_BASE_URL=http://localhost:11434
# Model to use - llama3 or llama3.1 or 3.2 works well for local usage. In the UI you will have a list of popular models to choose from so the model here is just a starting point
OLLAMA_MODEL=llama3

# XAI Configuration
XAI_MODEL=grok-beta
XAI_API_KEY=your_api_key_here
XAI_BASE_URL=https://api.x.ai/v1


# NOTES:
# List of trigger phrases to have the model view your desktop (desktop, browser, images, etc.).
# It will describe what it sees, and you can ask questions about it:
Expand Down
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ You can run all locally, you can use openai for chat and voice, you can mix betw

## Features

- **Supports both OpenAI and Ollama language models**: Choose the model that best fits your needs.
- **Supports OpenAI, xAI or Ollama language models**: Choose the model that best fits your needs.
- **Provides text-to-speech synthesis using XTTS or OpenAI TTS or ElevenLabs**: Enjoy natural and expressive voices.
- **No typing needed, just speak**: Hands-free interaction makes conversations smooth and effortless.
- **Analyzes user mood and adjusts AI responses accordingly**: Get personalized responses based on your mood.
Expand All @@ -28,7 +28,7 @@ You can run all locally, you can use openai for chat and voice, you can mix betw

- Python 3.10
- CUDA-enabled GPU
- Ollama models or Openai API for chat
- Ollama models or Openai API or xAI for chat
- Local XTTS or Openai API or ElevenLabs API for speech
- Microsoft C++ Build Tools on windows
- Microphone
Expand Down Expand Up @@ -201,6 +201,11 @@ OLLAMA_BASE_URL=http://localhost:11434
# Models to use - llama3 works well for local usage.
OLLAMA_MODEL=llama3
# xAI Configuration
XAI_MODEL=grok-beta
XAI_API_KEY=your_api_key_here
XAI_BASE_URL=https://api.x.ai/v1
# NOTES:
# List of trigger phrases to have the model view your desktop (desktop, browser, images, etc.).
# It will describe what it sees, and you can ask questions about it:
Expand Down
119 changes: 110 additions & 9 deletions app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
OPENAI_MODEL = os.getenv('OPENAI_MODEL')
OPENAI_BASE_URL = os.getenv('OPENAI_BASE_URL')
XAI_API_KEY = os.getenv('XAI_API_KEY')
XAI_MODEL = os.getenv('XAI_MODEL')
XAI_BASE_URL = os.getenv('XAI_BASE_URL')
OLLAMA_MODEL = os.getenv('OLLAMA_MODEL')
OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL')
ELEVENLABS_API_KEY = os.getenv('ELEVENLABS_API_KEY')
Expand Down Expand Up @@ -85,6 +88,11 @@ def init_openai_model(model_name):
global OPENAI_MODEL
OPENAI_MODEL = model_name
print(f"Switched to OpenAI model: {model_name}")

def init_xai_model(model_name):
global XAI_MODEL
XAI_MODEL = model_name
print(f"Switched to XAI model: {model_name}")

def init_openai_tts_voice(voice_name):
global OPENAI_TTS_VOICE
Expand Down Expand Up @@ -175,7 +183,7 @@ def sync_play_audio(file_path):

print(f"Using device: {device}")
print(f"Model provider: {MODEL_PROVIDER}")
print(f"Model: {OPENAI_MODEL if MODEL_PROVIDER == 'openai' else OLLAMA_MODEL}")
print(f"Model: {OPENAI_MODEL if MODEL_PROVIDER == 'openai' else XAI_MODEL if MODEL_PROVIDER == 'xai' else OLLAMA_MODEL}")
print(f"Character: {character_display_name}")
print(f"Text-to-Speech provider: {TTS_PROVIDER}")
print("To stop chatting say Quit, Leave or Exit. Say, what's on my screen, to have AI view screen. One moment please loading...")
Expand Down Expand Up @@ -441,6 +449,50 @@ def chatgpt_streamed(user_input, system_message, mood_prompt, conversation_histo
except requests.exceptions.RequestException as e:
full_response = f"Error connecting to Ollama model: {e}"
print(f"Debug: Ollama error - {e}")

elif MODEL_PROVIDER == 'xai':
messages = [{"role": "system", "content": system_message + "\n" + mood_prompt}] + conversation_history + [{"role": "user", "content": user_input}]
headers = {
'Authorization': f'Bearer {XAI_API_KEY}',
'Content-Type': 'application/json'
}
payload = {
"model": XAI_MODEL,
"messages": messages,
"stream": True
}
try:
print(f"Debug: Sending request to XAI: {XAI_BASE_URL}")
response = requests.post(f"{XAI_BASE_URL}/chat/completions", headers=headers, json=payload, stream=True, timeout=30)
response.raise_for_status()

print("Starting XAI stream...")
line_buffer = ""
for line in response.iter_lines(decode_unicode=True):
if line.startswith("data:"):
line = line[5:].strip()
if line:
try:
chunk = json.loads(line)
delta_content = chunk['choices'][0]['delta'].get('content', '')
if delta_content:
line_buffer += delta_content
if '\n' in line_buffer:
lines = line_buffer.split('\n')
for line in lines[:-1]:
print(NEON_GREEN + line + RESET_COLOR)
full_response += line + '\n'
line_buffer = lines[-1]
except json.JSONDecodeError:
continue
if line_buffer:
print(NEON_GREEN + line_buffer + RESET_COLOR)
full_response += line_buffer
print("\nXAI stream complete.")

except requests.exceptions.RequestException as e:
full_response = f"Error connecting to XAI model: {e}"
print(f"Debug: XAI error - {e}")

elif MODEL_PROVIDER == 'openai':
messages = [{"role": "system", "content": system_message + "\n" + mood_prompt}] + conversation_history + [{"role": "user", "content": user_input}]
Expand Down Expand Up @@ -590,6 +642,7 @@ async def encode_image(image_path):
# Analyze Image
async def analyze_image(image_path, question_prompt):
encoded_image = await encode_image(image_path)

if MODEL_PROVIDER == 'ollama':
headers = {'Content-Type': 'application/json'}
payload = {
Expand All @@ -612,24 +665,72 @@ async def analyze_image(image_path, question_prompt):
except aiohttp.ClientError as e:
print(f"Request failed: {e}")
return {"choices": [{"message": {"content": "Failed to process the image with the llava model."}}]}
else:
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {OPENAI_API_KEY}"}

elif MODEL_PROVIDER == 'xai':
# First, try XAI's image analysis if it's supported
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {XAI_API_KEY}"
}
message = {
"role": "user",
"content": [
{"type": "text", "text": question_prompt},
{"type": "image_url", "image_url": {"url": f"data:image/jpg;base64,{encoded_image}", "detail": "low"}}
]
}
payload = {"model": OPENAI_MODEL, "temperature": 0.5, "messages": [message], "max_tokens": 1000}
payload = {
"model": XAI_MODEL,
"temperature": 0.5,
"messages": [message],
"max_tokens": 1000
}

try:
async with aiohttp.ClientSession() as session:
async with session.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload, timeout=30) as response:
response.raise_for_status()
return await response.json()
async with session.post(f"{XAI_BASE_URL}/chat/completions", headers=headers, json=payload, timeout=30) as response:
if response.status == 200:
return await response.json()
else:
# If XAI doesn't support image analysis or returns an error,
# fall back to OpenAI's image analysis
print("XAI image analysis failed or not supported, falling back to OpenAI")
return await fallback_to_openai_image_analysis(encoded_image, question_prompt)
except aiohttp.ClientError as e:
print(f"Request failed: {e}")
return {"choices": [{"message": {"content": "Failed to process the image with the OpenAI model."}}]}
print(f"XAI image analysis failed: {e}, falling back to OpenAI")
return await fallback_to_openai_image_analysis(encoded_image, question_prompt)

else: # OpenAI as default
return await fallback_to_openai_image_analysis(encoded_image, question_prompt)

async def fallback_to_openai_image_analysis(encoded_image, question_prompt):
"""Helper function for OpenAI image analysis fallback"""
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {OPENAI_API_KEY}"
}
message = {
"role": "user",
"content": [
{"type": "text", "text": question_prompt},
{"type": "image_url", "image_url": {"url": f"data:image/jpg;base64,{encoded_image}", "detail": "low"}}
]
}
payload = {
"model": OPENAI_MODEL,
"temperature": 0.5,
"messages": [message],
"max_tokens": 1000
}

try:
async with aiohttp.ClientSession() as session:
async with session.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload, timeout=30) as response:
response.raise_for_status()
return await response.json()
except aiohttp.ClientError as e:
print(f"OpenAI fallback request failed: {e}")
return {"choices": [{"message": {"content": "Failed to process the image with both XAI and OpenAI models."}}]}


async def generate_speech(text, temp_audio_path):
Expand Down
3 changes: 3 additions & 0 deletions app/app_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
open_file,
init_ollama_model,
init_openai_model,
init_xai_model,
init_openai_tts_voice,
init_elevenlabs_tts_voice,
init_xtts_speed,
Expand Down Expand Up @@ -117,6 +118,8 @@ def set_env_variable(key: str, value: str):
init_ollama_model(value) # Reinitialize Ollama model
if key == "OPENAI_MODEL":
init_openai_model(value) # Reinitialize OpenAI model
if key == "XAI_MODEL":
init_xai_model(value) # Reinitialize XAI model
if key == "OPENAI_TTS_VOICE":
init_openai_tts_voice(value) # Reinitialize OpenAI TTS voice
if key == "ELEVENLABS_TTS_VOICE":
Expand Down
6 changes: 6 additions & 0 deletions app/static/js/scripts.js
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,11 @@ document.addEventListener("DOMContentLoaded", function() {
websocket.send(JSON.stringify({ action: "set_ollama_model", model: selectedModel }));
}

function setXAIModel() {
const selectedModel = document.getElementById('xai-model-select').value;
websocket.send(JSON.stringify({ action: "set_xai_model", model: selectedModel }));
}

function setXTTSSpeed() {
const selectedSpeed = document.getElementById('xtts-speed-select').value;
websocket.send(JSON.stringify({ action: "set_xtts_speed", speed: selectedSpeed }));
Expand All @@ -181,6 +186,7 @@ document.addEventListener("DOMContentLoaded", function() {
document.getElementById('openai-voice-select').addEventListener('change', setOpenAIVoice);
document.getElementById('openai-model-select').addEventListener('change', setOpenAIModel);
document.getElementById('ollama-model-select').addEventListener('change', setOllamaModel);
document.getElementById('xai-model-select').addEventListener('change', setXAIModel);
document.getElementById('xtts-speed-select').addEventListener('change', setXTTSSpeed);
document.getElementById('elevenlabs-voice-select').addEventListener('change', setElevenLabsVoice);

Expand Down
7 changes: 7 additions & 0 deletions app/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ <h1>
<select id="provider-select">
<option value="openai">OpenAI</option>
<option value="ollama">Ollama</option>
<option value="xai">XAI</option>
</select>
</div>
<div class="setting-group">
Expand Down Expand Up @@ -110,6 +111,12 @@ <h1>
<option value="dolphin-llama3">Dolphin-Llama3</option>
</select>
</div>
<div class="setting-group">
<label for="xai-model-select">XAI Model:</label>
<select id="xai-model-select">
<option value="grok-beta">Grok Beta</option>
</select>
</div>
<div class="setting-group">
<label for="xtts-speed-select">XTTS Speed:</label>
<select id="xtts-speed-select">
Expand Down
Loading

0 comments on commit c9e1a9c

Please sign in to comment.