From 9a75d2cdb354b34d0af591e056096a2166160c09 Mon Sep 17 00:00:00 2001 From: J S <49557684+svilupp@users.noreply.github.com> Date: Sun, 15 Sep 2024 14:44:19 +0100 Subject: [PATCH] OpenRouter support, new OpenAI o1 models (#207) --- CHANGELOG.md | 6 ++ docs/src/frequently_asked_questions.md | 40 ++++++++++++ examples/adding_custom_API.jl | 85 ++++++++++++++++++++++++++ src/llm_anthropic.jl | 15 ++++- src/llm_google.jl | 15 ++++- src/llm_interface.jl | 15 +++++ src/llm_ollama.jl | 12 +++- src/llm_openai.jl | 27 +++++++- src/llm_shared.jl | 17 ++++-- src/user_preferences.jl | 52 +++++++++++++++- test/llm_shared.jl | 29 ++++++++- 11 files changed, 293 insertions(+), 20 deletions(-) create mode 100644 examples/adding_custom_API.jl diff --git a/CHANGELOG.md b/CHANGELOG.md index bb5a5fb4f..c61ed96e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Added support for OpenAI's JSON mode for `aiextract` (just provide kwarg `json_mode=true`). Reference [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs). +- Added support for OpenRouter's API (you must set ENV `OPENROUTER_API_KEY`) to provide access to more models like Cohere Command R+ and OpenAI's o1 series. Reference [OpenRouter](https://openrouter.ai/). +- Added new OpenRouter hosted models to the model registry (prefixed with `or`): `oro1` (OpenAI's o1-preview), `oro1m` (OpenAI's o1-mini), `orcop` (Cohere's command-r-plus), `orco` (Cohere's command-r). The `or` prefix is to avoid conflicts with existing models and OpenAI's aliases, then the goal is to provide 2 letters for each model and 1 letter for additional qualifier (eg, "p" for plus, "m" for mini) -> `orcop` (OpenRouter cohere's COmmand-r-Plus). + +### Updated +- Updated FAQ with instructions on how to access new OpenAI o1 models via OpenRouter. +- Updated FAQ with instructions on how to add custom APIs (with an example `examples/adding_custom_API.jl`). ### Fixed - Fixed a bug in `aiclassify` for the OpenAI GPT4o models that have a different tokenizer. Unknown model IDs will throw an error. diff --git a/docs/src/frequently_asked_questions.md b/docs/src/frequently_asked_questions.md index d9a89d431..5e6c3ea54 100644 --- a/docs/src/frequently_asked_questions.md +++ b/docs/src/frequently_asked_questions.md @@ -119,6 +119,33 @@ Assuming the price per call was $0.0001, you'd pay 2 cents for the job and save Resources: - [OpenAI Pricing per 1000 tokens](https://openai.com/pricing) +## How to try new OpenAI models if I'm not Tier 5 customer? + +As of September 2024, you cannot access the new o1 models via API unless you're a Tier 5 customer. + +Fortunately, you can use OpenRouter to access these new models. + +1) Get your API key from [OpenRouter](https://openrouter.ai/keys) +2) Add some minimum [Credits](https://openrouter.ai/credits) to the account (eg, $5). +3) Set it as an environment variable (or use local preferences): `ENV["OPENROUTER_API_KEY"] = ""` +4) Use the model aliases with `or` prefix, eg, `oro1` for o1-preview or `oro1m` for o1-mini. + +Example: +```julia +# Let's use o1-preview model hosted on OpenRouter ("or" prefix) +msg = aigenerate("What is the meaning of life?"; model="oro1") +``` + +Note: There are some quirks for the o1 models. +For example, the new o1 series does NOT support `SystemMessage` yet, so OpenRouter does some tricks (likely converting them to normal user messages). +To be in control of this behavior and have comparable behavior to the native OpenAI API, you can use kwarg `no_system_message=true` in `aigenerate` to ensure OpenRouter does not do any tricks. + +Example: +```julia +# Let's use o1-mini and disable adding automatic system message +msg = aigenerate("What is the meaning of life?"; model="oro1m", no_system_message=true) +``` + ## Configuring the Environment Variable for API Key This is a guide for OpenAI's API key, but it works for any other API key you might need (eg, `MISTRALAI_API_KEY` for MistralAI API). @@ -202,6 +229,19 @@ There are three ways how you can customize your workflows (especially when you u 2) Register your model and its associated schema (`PT.register_model!(; name="123", schema=PT.OllamaSchema())`). You won't have to specify the schema anymore only the model name. See [Working with Ollama](#working-with-ollama) for more information. 3) Override your default model (`PT.MODEL_CHAT`) and schema (`PT.PROMPT_SCHEMA`). It can be done persistently with Preferences, eg, `PT.set_preferences!("PROMPT_SCHEMA" => "OllamaSchema", "MODEL_CHAT"=>"llama2")`. +## Using Custom API Providers like Azure or Databricks + +Several providers are directly supported (eg, Databricks), check the available prompt schemas (eg, `subtypes(PT.AbstractOpenAISchema)`). + +If you need a custom URL or a few keyword parameters, refer to the implementation of DatabricksOpenAISchema. +You effectively need to create your own prompt schema (`struct MySchema <: PT.AbstractOpenAISchema`) and override the OpenAI.jl behavior. The easiest way is to provide your custom method for `OpenAI.create_chat` and customize the `url`, `api_key`, and other `kwargs` fields. +You can follow the implementation of `create_chat` for `DatabricksOpenAISchema` in `src/llm_openAI.jl`. + +Once your schema is ready, you can register the necessary models via `PT.register_model!(; name="myschema", schema=MySchema())`. +You can also add aliases for easier access (eg, `PT.MODEL_ALIASES["mymodel"] = "my-model-with-really-long-name"`). + +If you would like to use some heavily customized API, eg, your company's internal LLM proxy (to change headers, URL paths, etc.), refer to the example `examples/adding_custom_API.jl` in the repo. + ## How to have Multi-turn Conversations? Let's say you would like to respond back to a model's response. How to do it? diff --git a/examples/adding_custom_API.jl b/examples/adding_custom_API.jl new file mode 100644 index 000000000..9f242d2dc --- /dev/null +++ b/examples/adding_custom_API.jl @@ -0,0 +1,85 @@ +# Example of custom API integration, eg, custom enterprise proxy with special headers +# +# This should NOT be necessary unless you have a private LLM / private proxy with specialized API structure and headers. +# For most new APIs, you should check out the FAQ on "Using Custom API Providers like Azure or Databricks" +# DatabricksOpenAISchema is a good example how to do simple API integration. +# +# For heavily customized APIs, follow the example below. Again, do this only if you have no other choice!! + +# We will need to provide a custom "provider" and custom methods for `OpenAI.jl` to override how it builds the AUTH headers and URL. + +using PromptingTools +const PT = PromptingTools +using HTTP +using JSON3 + +## OpenAI.jl work +# Define a custom provider for OpenAI to override the default behavior +abstract type MyCustomProvider <: PT.AbstractCustomProvider end + +@kwdef struct MyModelProvider <: MyCustomProvider + api_key::String = "" + base_url::String = "https://api.example.com/v1239123/modelxyz/completions_that_are_not_standard" + api_version::String = "" +end + +# Tell OpenAI not to use "api" (=endpoints) +function PT.OpenAI.build_url(provider::MyCustomProvider, api::AbstractString = "") + string(provider.base_url) +end + +function PT.OpenAI.auth_header( + provider::MyCustomProvider, api_key::AbstractString = provider.api_key) + ## Note this DOES NOT have any Basic Auth! Assumes you use something custom + ["Content-Type" => "application/json", "Extra-custom-authorization" => api_key] +end + +## PromptingTools.jl work +# Define a custom schema +struct MyCustomSchema <: PT.AbstractOpenAISchema end + +# Implement create_chat for the custom schema +function PT.OpenAI.create_chat(schema::MyCustomSchema, + api_key::AbstractString, + model::AbstractString, + conversation; + url::String = "", + ## Add any required kwargs here, APIs may have different requirements + max_tokens::Int = 2048, + kwargs...) + ## Depending on your needs, you can get api_key from ENV variable!! + ## Eg, api_key = get(ENV, "CUSTOM_API_KEY", "") + provider = MyModelProvider(; api_key, base_url = url) + + ## The first arg will be ignored, doesn't matter what you put there + PT.OpenAI.openai_request("ignore-me", provider; + method = "POST", + messages = conversation, + streamcallback = nothing, + max_tokens = max_tokens, + model = model, + kwargs...) +end + +## Model registration +## Any alias you like (can be many) +PromptingTools.MODEL_ALIASES["myprecious"] = "custom-model-xyz" +## Register the exact model name to send to your API +PromptingTools.register_model!(; + name = "custom-model-xyz", + schema = MyCustomSchema()) + +## Example usage +api_key = "..." # use ENV to provide this automatically +url = "..." # use ENV to provide this or hardcode in your create_chat function!! +msg = aigenerate("Hello, how are you?"; model = "myprecious", api_kwargs = (; api_key, url)) + +## Custom usage - no need to register anything +function myai(msg::AbstractString) + model = "custom-model-xyz" + schema = MyCustomSchema() + api_key = "..." # use ENV to provide this automatically + url = "..." # use ENV to provide this or hardcode in your create_chat function!! + aigenerate(schema, msg; model, api_kwargs = (; api_key, url)) +end +msg = myai("Hello, how are you?") diff --git a/src/llm_anthropic.jl b/src/llm_anthropic.jl index c3e6fc8d2..0a0a8153a 100644 --- a/src/llm_anthropic.jl +++ b/src/llm_anthropic.jl @@ -9,6 +9,7 @@ messages::Vector{<:AbstractMessage}; aiprefill::Union{Nothing, AbstractString} = nothing, conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[], + no_system_message::Bool = false, tools::Vector{<:Dict{String, <:Any}} = Dict{String, Any}[], cache::Union{Nothing, Symbol} = nothing, kwargs...) @@ -18,6 +19,7 @@ Builds a history of the conversation to provide the prompt to the API. All unspe # Keyword Arguments - `aiprefill`: A string to be used as a prefill for the AI response. This steer the AI response in a certain direction (and potentially save output tokens). - `conversation`: Past conversation to be included in the beginning of the prompt (for continued conversations). +- `no_system_message`: If `true`, do not include the default system message in the conversation history OR convert any provided system message to a user message. - `tools`: A list of tools to be used in the conversation. Added to the end of the system prompt to enforce its use. - `cache`: A symbol representing the caching strategy to be used. Currently only `nothing` (no caching), `:system`, `:tools`,`:last` and `:all` are supported. """ @@ -25,6 +27,7 @@ function render(schema::AbstractAnthropicSchema, messages::Vector{<:AbstractMessage}; aiprefill::Union{Nothing, AbstractString} = nothing, conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[], + no_system_message::Bool = false, tools::Vector{<:Dict{String, <:Any}} = Dict{String, Any}[], cache::Union{Nothing, Symbol} = nothing, kwargs...) @@ -35,7 +38,8 @@ function render(schema::AbstractAnthropicSchema, system = nothing ## First pass: keep the message types but make the replacements provided in `kwargs` - messages_replaced = render(NoSchema(), messages; conversation, kwargs...) + messages_replaced = render( + NoSchema(), messages; conversation, no_system_message, kwargs...) ## Second pass: convert to the message-based schema conversation = Dict{String, Any}[] @@ -73,7 +77,7 @@ function render(schema::AbstractAnthropicSchema, if is_valid_conversation && (cache == :last || cache == :all) conversation[end]["content"][end]["cache_control"] = Dict("type" => "ephemeral") end - if !isnothing(system) && (cache == :system || cache == :all) + if !no_system_message && !isnothing(system) && (cache == :system || cache == :all) ## Apply cache for system message system = [Dict("type" => "text", "text" => system, "cache_control" => Dict("type" => "ephemeral"))] @@ -214,6 +218,7 @@ end return_all::Bool = false, dry_run::Bool = false, conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[], streamcallback::Any = nothing, + no_system_message::Bool = false, aiprefill::Union{Nothing, AbstractString} = nothing, http_kwargs::NamedTuple = NamedTuple(), api_kwargs::NamedTuple = NamedTuple(), cache::Union{Nothing, Symbol} = nothing, @@ -232,6 +237,7 @@ Generate an AI response based on a given prompt using the Anthropic API. - `conversation::AbstractVector{<:AbstractMessage}=[]`: Not allowed for this schema. Provided only for compatibility. - `streamcallback::Any`: A callback function to handle streaming responses. Can be simply `stdout` or `StreamCallback` object. See `?StreamCallback` for details. Note: We configure the `StreamCallback` (and necessary `api_kwargs`) for you, unless you specify the `flavor`. See `?configure_callback!` for details. +- `no_system_message::Bool=false`: If `true`, do not include the default system message in the conversation history OR convert any provided system message to a user message. - `aiprefill::Union{Nothing, AbstractString}`: A string to be used as a prefill for the AI response. This steer the AI response in a certain direction (and potentially save output tokens). It MUST NOT end with a trailing with space. Useful for JSON formatting. - `http_kwargs::NamedTuple`: Additional keyword arguments for the HTTP request. Defaults to empty `NamedTuple`. - `api_kwargs::NamedTuple`: Additional keyword arguments for the Ollama API. Defaults to an empty `NamedTuple`. @@ -329,6 +335,7 @@ function aigenerate( return_all::Bool = false, dry_run::Bool = false, conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[], streamcallback::Any = nothing, + no_system_message::Bool = false, aiprefill::Union{Nothing, AbstractString} = nothing, http_kwargs::NamedTuple = NamedTuple(), api_kwargs::NamedTuple = NamedTuple(), cache::Union{Nothing, Symbol} = nothing, @@ -339,7 +346,8 @@ function aigenerate( @assert (isnothing(aiprefill)||!isempty(strip(aiprefill))) "`aiprefill` must not be empty`" ## Find the unique ID for the model alias provided model_id = get(MODEL_ALIASES, model, model) - conv_rendered = render(prompt_schema, prompt; aiprefill, conversation, cache, kwargs...) + conv_rendered = render( + prompt_schema, prompt; no_system_message, aiprefill, conversation, cache, kwargs...) if !dry_run @info conv_rendered.conversation @@ -383,6 +391,7 @@ function aigenerate( conversation, return_all, dry_run, + no_system_message, kwargs...) return output end diff --git a/src/llm_google.jl b/src/llm_google.jl index 0a9f0fa20..e193b8d8f 100644 --- a/src/llm_google.jl +++ b/src/llm_google.jl @@ -10,21 +10,24 @@ end render(schema::AbstractGoogleSchema, messages::Vector{<:AbstractMessage}; conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[], + no_system_message::Bool = false, kwargs...) Builds a history of the conversation to provide the prompt to the API. All unspecified kwargs are passed as replacements such that `{{key}}=>value` in the template. # Keyword Arguments - `conversation`: An optional vector of `AbstractMessage` objects representing the conversation history. If not provided, it is initialized as an empty vector. - +- `no_system_message::Bool=false`: If `true`, do not include the default system message in the conversation history OR convert any provided system message to a user message. """ function render(schema::AbstractGoogleSchema, messages::Vector{<:AbstractMessage}; conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[], + no_system_message::Bool = false, kwargs...) ## ## First pass: keep the message types but make the replacements provided in `kwargs` - messages_replaced = render(NoSchema(), messages; conversation, kwargs...) + messages_replaced = render( + NoSchema(), messages; conversation, no_system_message, kwargs...) ## Second pass: convert to the OpenAI schema conversation = Dict{Symbol, Any}[] @@ -78,6 +81,8 @@ end verbose::Bool = true, api_key::String = GOOGLE_API_KEY, model::String = "gemini-pro", return_all::Bool = false, dry_run::Bool = false, + conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[], + no_system_message::Bool = false, http_kwargs::NamedTuple = (retry_non_idempotent = true, retries = 5, readtimeout = 120), api_kwargs::NamedTuple = NamedTuple(), @@ -98,6 +103,7 @@ Note: - `return_all::Bool=false`: If `true`, returns the entire conversation history, otherwise returns only the last message (the `AIMessage`). - `dry_run::Bool=false`: If `true`, skips sending the messages to the model (for debugging, often used with `return_all=true`). - `conversation`: An optional vector of `AbstractMessage` objects representing the conversation history. If not provided, it is initialized as an empty vector. +- `no_system_message::Bool=false`: If `true`, do not include the default system message in the conversation history OR convert any provided system message to a user message. - `http_kwargs`: A named tuple of HTTP keyword arguments. - `api_kwargs`: A named tuple of API keyword arguments. - `kwargs`: Prompt variables to be used to fill the prompt/template @@ -151,6 +157,7 @@ function aigenerate(prompt_schema::AbstractGoogleSchema, prompt::ALLOWED_PROMPT_ api_key::String = GOOGLE_API_KEY, model::String = "gemini-pro", return_all::Bool = false, dry_run::Bool = false, conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[], + no_system_message::Bool = false, http_kwargs::NamedTuple = (retry_non_idempotent = true, retries = 5, readtimeout = 120), api_kwargs::NamedTuple = NamedTuple(), @@ -166,7 +173,8 @@ function aigenerate(prompt_schema::AbstractGoogleSchema, prompt::ALLOWED_PROMPT_ ## Find the unique ID for the model alias provided model_id = get(MODEL_ALIASES, model, model) - conv_rendered = render(prompt_schema, prompt; conversation, kwargs...) + conv_rendered = render( + prompt_schema, prompt; conversation, no_system_message, kwargs...) if !dry_run time = @elapsed r = ggi_generate_content(prompt_schema, api_key, @@ -195,6 +203,7 @@ function aigenerate(prompt_schema::AbstractGoogleSchema, prompt::ALLOWED_PROMPT_ conversation, return_all, dry_run, + no_system_message, kwargs...) return output diff --git a/src/llm_interface.jl b/src/llm_interface.jl index a56e84e10..40669844e 100644 --- a/src/llm_interface.jl +++ b/src/llm_interface.jl @@ -207,6 +207,21 @@ Requires one environment variables to be set: """ struct DeepSeekOpenAISchema <: AbstractOpenAISchema end +""" + OpenRouterOpenAISchema + +Schema to call the [OpenRouter](https://openrouter.ai/) API. + +Links: +- [Get your API key](https://openrouter.ai/keys) +- [API Reference](https://openrouter.ai/docs) +- [Available models](https://openrouter.ai/models) + +Requires one environment variable to be set: +- `OPENROUTER_API_KEY`: Your API key +""" +struct OpenRouterOpenAISchema <: AbstractOpenAISchema end + abstract type AbstractOllamaSchema <: AbstractPromptSchema end """ diff --git a/src/llm_ollama.jl b/src/llm_ollama.jl index 5d6bdd240..0c80a539a 100644 --- a/src/llm_ollama.jl +++ b/src/llm_ollama.jl @@ -12,21 +12,24 @@ render(schema::AbstractOllamaSchema, messages::Vector{<:AbstractMessage}; conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[], + no_system_message::Bool = false, kwargs...) Builds a history of the conversation to provide the prompt to the API. All unspecified kwargs are passed as replacements such that `{{key}}=>value` in the template. # Keyword Arguments - `conversation`: An optional vector of `AbstractMessage` objects representing the conversation history. If not provided, it is initialized as an empty vector. - +- `no_system_message`: If `true`, do not include the default system message in the conversation history OR convert any provided system message to a user message. """ function render(schema::AbstractOllamaSchema, messages::Vector{<:AbstractMessage}; conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[], + no_system_message::Bool = false, kwargs...) ## ## First pass: keep the message types but make the replacements provided in `kwargs` - messages_replaced = render(NoSchema(), messages; conversation, kwargs...) + messages_replaced = render( + NoSchema(), messages; conversation, no_system_message, kwargs...) ## Second pass: convert to the OpenAI schema conversation = Dict{String, Any}[] @@ -140,13 +143,15 @@ function aigenerate(prompt_schema::AbstractOllamaSchema, prompt::ALLOWED_PROMPT_ model::String = MODEL_CHAT, return_all::Bool = false, dry_run::Bool = false, conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[], + no_system_message::Bool = false, http_kwargs::NamedTuple = NamedTuple(), api_kwargs::NamedTuple = NamedTuple(), kwargs...) ## global MODEL_ALIASES ## Find the unique ID for the model alias provided model_id = get(MODEL_ALIASES, model, model) - conv_rendered = render(prompt_schema, prompt; conversation, kwargs...) + conv_rendered = render( + prompt_schema, prompt; conversation, no_system_message, kwargs...) if !dry_run time = @elapsed resp = ollama_api(prompt_schema, nothing; @@ -176,6 +181,7 @@ function aigenerate(prompt_schema::AbstractOllamaSchema, prompt::ALLOWED_PROMPT_ conversation, return_all, dry_run, + no_system_message, kwargs...) return output end diff --git a/src/llm_openai.jl b/src/llm_openai.jl index 5fde45cf6..1b11fc063 100644 --- a/src/llm_openai.jl +++ b/src/llm_openai.jl @@ -4,6 +4,7 @@ messages::Vector{<:AbstractMessage}; image_detail::AbstractString = "auto", conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[], + no_system_message::Bool = false, kwargs...) Builds a history of the conversation to provide the prompt to the API. All unspecified kwargs are passed as replacements such that `{{key}}=>value` in the template. @@ -11,17 +12,19 @@ Builds a history of the conversation to provide the prompt to the API. All unspe # Keyword Arguments - `image_detail`: Only for `UserMessageWithImages`. It represents the level of detail to include for images. Can be `"auto"`, `"high"`, or `"low"`. - `conversation`: An optional vector of `AbstractMessage` objects representing the conversation history. If not provided, it is initialized as an empty vector. - +- `no_system_message`: If `true`, do not include the default system message in the conversation history OR convert any provided system message to a user message. """ function render(schema::AbstractOpenAISchema, messages::Vector{<:AbstractMessage}; image_detail::AbstractString = "auto", conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[], + no_system_message::Bool = false, kwargs...) ## @assert image_detail in ["auto", "high", "low"] "Image detail must be one of: auto, high, low" ## First pass: keep the message types but make the replacements provided in `kwargs` - messages_replaced = render(NoSchema(), messages; conversation, kwargs...) + messages_replaced = render( + NoSchema(), messages; conversation, no_system_message, kwargs...) ## Second pass: convert to the OpenAI schema conversation = Dict{String, Any}[] @@ -226,6 +229,19 @@ function OpenAI.create_chat(schema::DeepSeekOpenAISchema, base_url = url) OpenAI.create_chat(provider, model, conversation; kwargs...) end +function OpenAI.create_chat(schema::OpenRouterOpenAISchema, + api_key::AbstractString, + model::AbstractString, + conversation; + url::String = "https://openrouter.ai/api/v1", + kwargs...) + # Build the corresponding provider object + # try to override provided api_key because the default is OpenAI key + provider = CustomProvider(; + api_key = isempty(OPENROUTER_API_KEY) ? api_key : OPENROUTER_API_KEY, + base_url = url) + OpenAI.create_chat(provider, model, conversation; kwargs...) +end function OpenAI.create_chat(schema::DatabricksOpenAISchema, api_key::AbstractString, model::AbstractString, @@ -443,6 +459,7 @@ end model::String = MODEL_CHAT, return_all::Bool = false, dry_run::Bool = false, conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[], streamcallback::Any = nothing, + no_system_message::Bool = false, http_kwargs::NamedTuple = (retry_non_idempotent = true, retries = 5, readtimeout = 120), api_kwargs::NamedTuple = NamedTuple(), @@ -461,6 +478,7 @@ Generate an AI response based on a given prompt using the OpenAI API. - `conversation`: An optional vector of `AbstractMessage` objects representing the conversation history. If not provided, it is initialized as an empty vector. - `streamcallback`: A callback function to handle streaming responses. Can be simply `stdout` or a `StreamCallback` object. See `?StreamCallback` for details. Note: We configure the `StreamCallback` (and necessary `api_kwargs`) for you, unless you specify the `flavor`. See `?configure_callback!` for details. +- `no_system_message::Bool=false`: If `true`, the default system message is not included in the conversation history. Any existing system message is converted to a `UserMessage`. - `http_kwargs`: A named tuple of HTTP keyword arguments. - `api_kwargs`: A named tuple of API keyword arguments. Useful parameters include: - `temperature`: A float representing the temperature for sampling (ie, the amount of "creativity"). Often defaults to `0.7`. @@ -538,6 +556,7 @@ function aigenerate(prompt_schema::AbstractOpenAISchema, prompt::ALLOWED_PROMPT_ model::String = MODEL_CHAT, return_all::Bool = false, dry_run::Bool = false, conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[], streamcallback::Any = nothing, + no_system_message::Bool = false, http_kwargs::NamedTuple = (retry_non_idempotent = true, retries = 5, readtimeout = 120), api_kwargs::NamedTuple = NamedTuple(), @@ -546,7 +565,8 @@ function aigenerate(prompt_schema::AbstractOpenAISchema, prompt::ALLOWED_PROMPT_ global MODEL_ALIASES ## Find the unique ID for the model alias provided model_id = get(MODEL_ALIASES, model, model) - conv_rendered = render(prompt_schema, prompt; conversation, kwargs...) + conv_rendered = render( + prompt_schema, prompt; conversation, no_system_message, kwargs...) if !dry_run time = @elapsed r = create_chat(prompt_schema, api_key, @@ -587,6 +607,7 @@ function aigenerate(prompt_schema::AbstractOpenAISchema, prompt::ALLOWED_PROMPT_ conversation, return_all, dry_run, + no_system_message, kwargs...) return output diff --git a/src/llm_shared.jl b/src/llm_shared.jl index 38682fde8..42cfceea9 100644 --- a/src/llm_shared.jl +++ b/src/llm_shared.jl @@ -11,6 +11,7 @@ role4render(schema::AbstractPromptSchema, msg::AIMessage) = "assistant" render(schema::NoSchema, messages::Vector{<:AbstractMessage}; conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[], + no_system_message::Bool = false, replacement_kwargs...) Renders a conversation history from a vector of messages with all replacement variables specified in `replacement_kwargs`. @@ -20,6 +21,7 @@ It is the first pass of the prompt rendering system, and is used by all other sc # Keyword Arguments - `image_detail`: Only for `UserMessageWithImages`. It represents the level of detail to include for images. Can be `"auto"`, `"high"`, or `"low"`. - `conversation`: An optional vector of `AbstractMessage` objects representing the conversation history. If not provided, it is initialized as an empty vector. +- `no_system_message`: If `true`, do not include the default system message in the conversation history OR convert any provided system message to a user message. # Notes - All unspecified kwargs are passed as replacements such that `{{key}}=>value` in the template. @@ -29,6 +31,7 @@ It is the first pass of the prompt rendering system, and is used by all other sc function render(schema::NoSchema, messages::Vector{<:AbstractMessage}; conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[], + no_system_message::Bool = false, replacement_kwargs...) ## copy the conversation to avoid mutating the original conversation = copy(conversation) @@ -41,8 +44,9 @@ function render(schema::NoSchema, replacements = ["{{$(key)}}" => value for (key, value) in pairs(replacement_kwargs) if key in msg.variables] + ## Force System message to UserMessage if no_system_message=true + MSGTYPE = no_system_message && issystemmessage(msg) ? UserMessage : typeof(msg) # Rebuild the message with the replaced content - MSGTYPE = typeof(msg) new_msg = MSGTYPE(; # unpack the type to replace only the content field [(field, getfield(msg, field)) for field in fieldnames(typeof(msg))]..., @@ -70,8 +74,10 @@ function render(schema::NoSchema, ## Multiple system prompts are not allowed (count_system_msg > 1) && throw(ArgumentError("Only one system message is allowed.")) ## Add default system prompt if not provided - (count_system_msg == 0) && pushfirst!(conversation, - SystemMessage("Act as a helpful AI assistant")) + if (count_system_msg == 0) && !no_system_message + pushfirst!(conversation, + SystemMessage("Act as a helpful AI assistant")) + end return conversation end @@ -82,6 +88,7 @@ end return_all::Bool = false, dry_run::Bool = false, conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[], + no_system_message::Bool = false, kwargs...) Finalizes the outputs of the ai* functions by either returning the conversation history or the last message. @@ -92,19 +99,21 @@ Finalizes the outputs of the ai* functions by either returning the conversation Useful for debugging when you want to check the specific schema rendering. - `conversation::AbstractVector{<:AbstractMessage}=[]`: An optional vector of `AbstractMessage` objects representing the conversation history. If not provided, it is initialized as an empty vector. - `kwargs...`: Variables to replace in the prompt template. +- `no_system_message::Bool=false`: If true, the default system message is not included in the conversation history. Any existing system message is converted to a `UserMessage`. """ function finalize_outputs(prompt::ALLOWED_PROMPT_TYPE, conv_rendered::Any, msg::Union{Nothing, AbstractMessage, AbstractVector{<:AbstractMessage}}; return_all::Bool = false, dry_run::Bool = false, conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[], + no_system_message::Bool = false, kwargs...) if return_all if !dry_run # If not a dry_run, re-create the messages sent to the model before schema application # This is a duplication of work, as we already have the rendered messages in conv_rendered, # but we prioritize the user's experience over performance here (ie, render(OpenAISchema,msgs) does everything under the hood) - output = render(NoSchema(), prompt; conversation, kwargs...) + output = render(NoSchema(), prompt; conversation, no_system_message, kwargs...) if msg isa AbstractVector ## handle multiple messages (multi-sample) append!(output, msg) diff --git a/src/user_preferences.jl b/src/user_preferences.jl index 935fe1b95..e8a6ed10e 100644 --- a/src/user_preferences.jl +++ b/src/user_preferences.jl @@ -21,6 +21,7 @@ Check your preferences by calling `get_preferences(key::String)`. - `VOYAGE_API_KEY`: The API key for the Voyage API. Free tier is upto 50M tokens! Get yours from [here](https://dash.voyageai.com/api-keys). - `GROQ_API_KEY`: The API key for the Groq API. Free in beta! Get yours from [here](https://console.groq.com/keys). - `DEEPSEEK_API_KEY`: The API key for the DeepSeek API. Get \$5 credit when you join. Get yours from [here](https://platform.deepseek.com/api_keys). +- `OPENROUTER_API_KEY`: The API key for the OpenRouter API. Get yours from [here](https://openrouter.ai/keys). - `MODEL_CHAT`: The default model to use for aigenerate and most ai* calls. See `MODEL_REGISTRY` for a list of available models or define your own. - `MODEL_EMBEDDING`: The default model to use for aiembed (embedding documents). See `MODEL_REGISTRY` for a list of available models or define your own. - `PROMPT_SCHEMA`: The default prompt schema to use for aigenerate and most ai* calls (if not specified in `MODEL_REGISTRY`). Set as a string, eg, `"OpenAISchema"`. @@ -49,6 +50,7 @@ Define your `register_model!()` calls in your `startup.jl` file to make them ava - `VOYAGE_API_KEY`: The API key for the Voyage API. Free tier is upto 50M tokens! Get yours from [here](https://dash.voyageai.com/api-keys). - `GROQ_API_KEY`: The API key for the Groq API. Free in beta! Get yours from [here](https://console.groq.com/keys). - `DEEPSEEK_API_KEY`: The API key for the DeepSeek API. Get \$5 credit when you join. Get yours from [here](https://platform.deepseek.com/api_keys). +- `OPENROUTER_API_KEY`: The API key for the OpenRouter API. Get yours from [here](https://openrouter.ai/keys). - `LOG_DIR`: The directory to save the logs to, eg, when using `SaverSchema <: AbstractTracerSchema`. Defaults to `joinpath(pwd(), "log")`. Refer to `?SaverSchema` for more information on how it works and examples. Preferences.jl takes priority over ENV variables, so if you set a preference, it will take precedence over the ENV variable. @@ -69,6 +71,7 @@ const ALLOWED_PREFERENCES = ["MISTRALAI_API_KEY", "VOYAGE_API_KEY", "GROQ_API_KEY", "DEEPSEEK_API_KEY", + "OPENROUTER_API_KEY", # Added OPENROUTER_API_KEY "MODEL_CHAT", "MODEL_EMBEDDING", "MODEL_ALIASES", @@ -147,6 +150,7 @@ global ANTHROPIC_API_KEY::String = "" global VOYAGE_API_KEY::String = "" global GROQ_API_KEY::String = "" global DEEPSEEK_API_KEY::String = "" +global OPENROUTER_API_KEY::String = "" global LOCAL_SERVER::String = "" global LOG_DIR::String = "" @@ -196,6 +200,9 @@ function load_api_keys!() global DEEPSEEK_API_KEY DEEPSEEK_API_KEY = @load_preference("DEEPSEEK_API_KEY", default=get(ENV, "DEEPSEEK_API_KEY", "")) + global OPENROUTER_API_KEY # Added OPENROUTER_API_KEY + OPENROUTER_API_KEY = @load_preference("OPENROUTER_API_KEY", + default=get(ENV, "OPENROUTER_API_KEY", "")) global LOCAL_SERVER LOCAL_SERVER = @load_preference("LOCAL_SERVER", default=get(ENV, "LOCAL_SERVER", "")) @@ -392,7 +399,12 @@ aliases = merge( "ggemma9" => "gemma2-9b-it", ## DeepSeek "dschat" => "deepseek-chat", - "dscode" => "deepseek-coder" + "dscode" => "deepseek-coder", + ## OpenRouter + "oro1" => "openai/o1-preview", + "oro1m" => "openai/o1-mini", + "orcop" => "cohere/command-r-plus-08-2024", + "orco" => "cohere/command-r-08-2024" ), ## Load aliases from preferences as well @load_preference("MODEL_ALIASES", default=Dict{String, String}())) @@ -824,7 +836,43 @@ registry = Dict{String, ModelSpec}( DeepSeekOpenAISchema(), 1.4e-7, 2.8e-7, - "Deepseek.com-hosted coding model. Max 16K context. See details [here](https://platform.deepseek.com/docs)") + "Deepseek.com-hosted coding model. Max 16K context. See details [here](https://platform.deepseek.com/docs)"), + ## OpenRouter models + "openai/o1-preview" => ModelSpec("openai/o1-preview", + OpenRouterOpenAISchema(), + 15e-6, + 60e-6, + "OpenRouter's hosted version of OpenAI's latest reasoning model o1-preview. 128K context, max output 32K tokens. Details unknown."), + "openai/o1-preview-2024-09-12" => ModelSpec("openai/o1-preview-2024-09-12", + OpenRouterOpenAISchema(), + 15e-6, + 60e-6, + "OpenRouter's hosted version of OpenAI's latest reasoning model o1-preview, version 2024-09-12. 128K context, max output 32K tokens. Details unknown."), + "openai/o1-mini" => ModelSpec("openai/o1-mini", + OpenRouterOpenAISchema(), + 3e-6, + 12e-6, + "OpenRouter's hosted version of OpenAI's latest and smallest reasoning model o1-mini. 128K context, max output 65K tokens. Details unknown."), + "openai/o1-mini-2024-09-12" => ModelSpec("openai/o1-mini-2024-09-12", + OpenRouterOpenAISchema(), + 3e-6, + 12e-6, + "OpenRouter's hosted version of OpenAI's latest and smallest reasoning model o1-mini, version 2024-09-12. 128K context, max output 65K tokens. Details unknown."), + "cohere/command-r-plus-08-2024" => ModelSpec("cohere/command-r-plus-08-2024", + OpenRouterOpenAISchema(), + 2.5e-6, + 10e-6, + "OpenRouter's hosted version of Cohere's latest and strongest model Command R Plus. 128K context, max output 4K tokens."), + "cohere/command-r-08-2024" => ModelSpec("cohere/command-r-08-2024", + OpenRouterOpenAISchema(), + 1.5e-7, + 6e-7, + "OpenRouter's hosted version of Cohere's latest smaller model Command R. 128K context, max output 4K tokens."), + "meta-llama/llama-3.1-405b" => ModelSpec("meta-llama/llama-3.1-405b", + OpenRouterOpenAISchema(), + 2e-6, + 2e-6, + "Meta's Llama3.1 405b, hosted by OpenRouter. This is a BASE model!! Max output 32K tokens, 131K context. See details [here](https://openrouter.ai/models/meta-llama/llama-3.1-405b)") ) """ diff --git a/test/llm_shared.jl b/test/llm_shared.jl index 82df905a5..d5451d84e 100644 --- a/test/llm_shared.jl +++ b/test/llm_shared.jl @@ -1,6 +1,6 @@ using PromptingTools: render, NoSchema using PromptingTools: AIMessage, SystemMessage, AbstractMessage, AbstractChatMessage -using PromptingTools: UserMessage, UserMessageWithImages +using PromptingTools: UserMessage, UserMessageWithImages, DataMessage using PromptingTools: finalize_outputs, role4render @testset "render-NoSchema" begin @@ -179,9 +179,34 @@ using PromptingTools: finalize_outputs, role4render ] conversation = render(schema, messages) @test conversation == expected_output + + # Test no_system_message + messages = [ + SystemMessage("System message 1"), + UserMessage("User message") + ] + expected_output = [ + UserMessage("System message 1"), + UserMessage("User message") + ] + conversation = render(schema, messages; no_system_message = true) + @test conversation[1] isa UserMessage + @test conversation[2] isa UserMessage + @test conversation[1].content == "System message 1" + @test conversation[2].content == "User message" + + ## No default message + messages = [ + UserMessage("User message") + ] + expected_output = [ + UserMessage("User message") + ] + conversation = render(schema, messages; no_system_message = true) + @test conversation[1] isa UserMessage + @test conversation[1].content == "User message" end -# Write 5 unit tests for finalize_outputs for various combinations of inputs. Use @test calls @testset "finalize_outputs" begin # Given a vector of messages and a single message, it should return the last message. messages = [