diff --git a/mint.json b/mint.json
index 2933750..1a273a9 100644
--- a/mint.json
+++ b/mint.json
@@ -136,6 +136,7 @@
"pages": [
"server/services/llm/anthropic",
"server/services/llm/azure",
+ "server/services/llm/cerebras",
"server/services/llm/fireworks",
"server/services/llm/gemini",
"server/services/llm/grok",
diff --git a/server/services/llm/cerebras.mdx b/server/services/llm/cerebras.mdx
new file mode 100644
index 0000000..29cc198
--- /dev/null
+++ b/server/services/llm/cerebras.mdx
@@ -0,0 +1,193 @@
+---
+title: "Cerebras"
+description: "LLM service implementation using Cerebras’s API with OpenAI-compatible interface"
+---
+
+## Overview
+
+`CerebrasLLMService` provides access to Cerebras's language models through an OpenAI-compatible interface. It inherits from `OpenAILLMService` and supports streaming responses, function calling, and context management.
+
+## Installation
+
+To use `CerebrasLLMService`, install the required dependencies:
+
+```bash
+pip install "pipecat-ai[cerebras]"
+```
+
+You'll need to set up your Cerebras API key as an environment variable: `CEREBRAS_API_KEY`
+
+## Configuration
+
+### Constructor Parameters
+
+
+ Your Cerebras API key
+
+
+
+ Model identifier
+
+
+
+ Cerebras API endpoint
+
+
+### Input Parameters
+
+Inherits OpenAI-compatible parameters:
+
+
+ Maximum number of tokens to generate. Must be greater than or equal to 1
+
+
+
+ Random seed for deterministic generation. Must be greater than or equal to 0
+
+
+
+ Controls randomness in the output. Range: [0.0, 1.5]
+
+
+
+ Controls diversity via nucleus sampling. Range: [0.0, 1.0]
+
+
+## Usage Example
+
+```python
+from pipecat.services.cerebras import CerebrasLLMService
+from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from openai.types.chat import ChatCompletionToolParam
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+
+# Configure service
+llm = CerebrasLLMService(
+ api_key="your-cerebras-api-key",
+ model="llama-3.3-70b"
+)
+
+# Define tools for function calling
+tools = [
+ ChatCompletionToolParam(
+ type="function",
+ function={
+ "name": "get_current_weather",
+ "description": "Get the current weather",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "description": "The city and state, e.g. San Francisco, CA"
+ },
+ "format": {
+ "type": "string",
+ "enum": ["celsius", "fahrenheit"],
+ "description": "The temperature unit to use"
+ }
+ },
+ "required": ["location", "format"]
+ }
+ }
+ )
+]
+
+# Create context with system message and tools
+context = OpenAILLMContext(
+ messages = [
+ {
+ "role": "system",
+ "content": """You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way.
+
+You have one functions available:
+
+1. get_current_weather is used to get current weather information.
+
+Infer whether to use Fahrenheit or Celsius automatically based on the location, unless the user specifies a preference. Start by asking me for my location. Then, use 'get_weather_current' to give me a forecast. Respond to what the user said in a creative and helpful way.""",
+ },
+ ]
+ tools=tools
+)
+
+# Register function handlers
+async def fetch_weather(function_name, tool_call_id, args, llm, context, result_callback):
+ await result_callback({"conditions": "nice", "temperature": "75"})
+
+llm.register_function(None, fetch_weather)
+
+# Create context aggregator for message handling
+context_aggregator = llm.create_context_aggregator(context)
+
+# Set up pipeline
+pipeline = Pipeline([
+ transport.input(),
+ context_aggregator.user(),
+ llm,
+ tts,
+ transport.output(),
+ context_aggregator.assistant()
+])
+
+# Create and configure task
+task = PipelineTask(
+ pipeline,
+ PipelineParams(
+ allow_interruptions=True,
+ enable_metrics=True,
+ enable_usage_metrics=True,
+ ),
+)
+```
+
+## Methods
+
+See the [LLM base class methods](/server/base-classes/llm#methods) for additional functionality.
+
+## Function Calling
+
+Supports OpenAI-compatible function calling. For optimal function calling performance, provide clear instructions in the system message about when and how to use functions.
+
+## Available Models
+
+Cerebras provides access to these models:
+
+| Model Name | Description |
+| --------------- | ------------------- |
+| `llama3.1-8b` | Llama 3.1 8B model |
+| `llama3.1-70b` | Llama 3.1 70B model |
+| `llama-3.3-70b` | Llama 3.3 70B model |
+
+## Frame Flow
+
+Inherits the OpenAI LLM Service frame flow:
+
+```mermaid
+graph TD
+ A[Input Context] --> B[CerebrasLLMService]
+ B --> C[LLMFullResponseStartFrame]
+ B --> D[TextFrame Chunks]
+ B --> E[Function Calls]
+ B --> F[LLMFullResponseEndFrame]
+ E --> G[Function Results]
+ G --> B
+```
+
+## Metrics Support
+
+The service collects standard LLM metrics:
+
+- Token usage (prompt and completion)
+- Processing duration
+- Time to First Byte (TTFB)
+- Function call metrics
+
+## Notes
+
+- OpenAI-compatible interface
+- Supports streaming responses
+- Handles function calling
+- Manages conversation context
+- Thread-safe processing
+- Automatic error handling
diff --git a/server/services/supported-services.mdx b/server/services/supported-services.mdx
index fdb1c35..e74be6a 100644
--- a/server/services/supported-services.mdx
+++ b/server/services/supported-services.mdx
@@ -25,6 +25,7 @@ description: "AI services integrated with Pipecat and their setup requirements"
| ---------------------------------------------- | ----------------------------------- |
| [Anthropic](/server/services/llm/anthropic) | `pip install pipecat-ai[anthropic]` |
| [Azure](/server/services/llm/azure) | `pip install pipecat-ai[azure]` |
+| [Cerebras](/server/services/llm/cerebras) | `pip install pipecat-ai[cerebras]` |
| [Fireworks AI](/server/services/llm/fireworks) | `pip install pipecat-ai[fireworks]` |
| [Google Gemini](/server/services/llm/gemini) | `pip install pipecat-ai[google]` |
| [Grok](/server/services/llm/grok) | `pip install pipecat-ai[grok]` |