diff --git a/mint.json b/mint.json index 2933750..1a273a9 100644 --- a/mint.json +++ b/mint.json @@ -136,6 +136,7 @@ "pages": [ "server/services/llm/anthropic", "server/services/llm/azure", + "server/services/llm/cerebras", "server/services/llm/fireworks", "server/services/llm/gemini", "server/services/llm/grok", diff --git a/server/services/llm/cerebras.mdx b/server/services/llm/cerebras.mdx new file mode 100644 index 0000000..29cc198 --- /dev/null +++ b/server/services/llm/cerebras.mdx @@ -0,0 +1,193 @@ +--- +title: "Cerebras" +description: "LLM service implementation using Cerebras’s API with OpenAI-compatible interface" +--- + +## Overview + +`CerebrasLLMService` provides access to Cerebras's language models through an OpenAI-compatible interface. It inherits from `OpenAILLMService` and supports streaming responses, function calling, and context management. + +## Installation + +To use `CerebrasLLMService`, install the required dependencies: + +```bash +pip install "pipecat-ai[cerebras]" +``` + +You'll need to set up your Cerebras API key as an environment variable: `CEREBRAS_API_KEY` + +## Configuration + +### Constructor Parameters + + + Your Cerebras API key + + + + Model identifier + + + + Cerebras API endpoint + + +### Input Parameters + +Inherits OpenAI-compatible parameters: + + + Maximum number of tokens to generate. Must be greater than or equal to 1 + + + + Random seed for deterministic generation. Must be greater than or equal to 0 + + + + Controls randomness in the output. Range: [0.0, 1.5] + + + + Controls diversity via nucleus sampling. Range: [0.0, 1.0] + + +## Usage Example + +```python +from pipecat.services.cerebras import CerebrasLLMService +from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext +from openai.types.chat import ChatCompletionToolParam +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.task import PipelineParams, PipelineTask + +# Configure service +llm = CerebrasLLMService( + api_key="your-cerebras-api-key", + model="llama-3.3-70b" +) + +# Define tools for function calling +tools = [ + ChatCompletionToolParam( + type="function", + function={ + "name": "get_current_weather", + "description": "Get the current weather", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA" + }, + "format": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + "description": "The temperature unit to use" + } + }, + "required": ["location", "format"] + } + } + ) +] + +# Create context with system message and tools +context = OpenAILLMContext( + messages = [ + { + "role": "system", + "content": """You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. + +You have one functions available: + +1. get_current_weather is used to get current weather information. + +Infer whether to use Fahrenheit or Celsius automatically based on the location, unless the user specifies a preference. Start by asking me for my location. Then, use 'get_weather_current' to give me a forecast. Respond to what the user said in a creative and helpful way.""", + }, + ] + tools=tools +) + +# Register function handlers +async def fetch_weather(function_name, tool_call_id, args, llm, context, result_callback): + await result_callback({"conditions": "nice", "temperature": "75"}) + +llm.register_function(None, fetch_weather) + +# Create context aggregator for message handling +context_aggregator = llm.create_context_aggregator(context) + +# Set up pipeline +pipeline = Pipeline([ + transport.input(), + context_aggregator.user(), + llm, + tts, + transport.output(), + context_aggregator.assistant() +]) + +# Create and configure task +task = PipelineTask( + pipeline, + PipelineParams( + allow_interruptions=True, + enable_metrics=True, + enable_usage_metrics=True, + ), +) +``` + +## Methods + +See the [LLM base class methods](/server/base-classes/llm#methods) for additional functionality. + +## Function Calling + +Supports OpenAI-compatible function calling. For optimal function calling performance, provide clear instructions in the system message about when and how to use functions. + +## Available Models + +Cerebras provides access to these models: + +| Model Name | Description | +| --------------- | ------------------- | +| `llama3.1-8b` | Llama 3.1 8B model | +| `llama3.1-70b` | Llama 3.1 70B model | +| `llama-3.3-70b` | Llama 3.3 70B model | + +## Frame Flow + +Inherits the OpenAI LLM Service frame flow: + +```mermaid +graph TD + A[Input Context] --> B[CerebrasLLMService] + B --> C[LLMFullResponseStartFrame] + B --> D[TextFrame Chunks] + B --> E[Function Calls] + B --> F[LLMFullResponseEndFrame] + E --> G[Function Results] + G --> B +``` + +## Metrics Support + +The service collects standard LLM metrics: + +- Token usage (prompt and completion) +- Processing duration +- Time to First Byte (TTFB) +- Function call metrics + +## Notes + +- OpenAI-compatible interface +- Supports streaming responses +- Handles function calling +- Manages conversation context +- Thread-safe processing +- Automatic error handling diff --git a/server/services/supported-services.mdx b/server/services/supported-services.mdx index fdb1c35..e74be6a 100644 --- a/server/services/supported-services.mdx +++ b/server/services/supported-services.mdx @@ -25,6 +25,7 @@ description: "AI services integrated with Pipecat and their setup requirements" | ---------------------------------------------- | ----------------------------------- | | [Anthropic](/server/services/llm/anthropic) | `pip install pipecat-ai[anthropic]` | | [Azure](/server/services/llm/azure) | `pip install pipecat-ai[azure]` | +| [Cerebras](/server/services/llm/cerebras) | `pip install pipecat-ai[cerebras]` | | [Fireworks AI](/server/services/llm/fireworks) | `pip install pipecat-ai[fireworks]` | | [Google Gemini](/server/services/llm/gemini) | `pip install pipecat-ai[google]` | | [Grok](/server/services/llm/grok) | `pip install pipecat-ai[grok]` |