diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a1dbfec..2e7cf25d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,12 +10,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed -## [0.65.2] +## [0.66.0] + +### Added +- Added support for Groq's new Llama3.3 models. Updated `gllama370`, `gl70`, `glm` aliases to `llama-3.3-70b-versatile` and added `gl70s`, `glms` aliases to `llama-3.3-70b-specdec` (faster with speculative decoding). ### Fixed - Fixed a bug in `extract_docstring` where it would not correctly block "empty" docstrings on Julia 1.11. - ## [0.65.1] ### Fixed diff --git a/Project.toml b/Project.toml index 524a5fe6..15540b87 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "PromptingTools" uuid = "670122d1-24a8-4d70-bfce-740807c42192" authors = ["J S @svilupp and contributors"] -version = "0.65.2" +version = "0.66.0" [deps] AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" diff --git a/src/user_preferences.jl b/src/user_preferences.jl index 95aa2c2a..37750112 100644 --- a/src/user_preferences.jl +++ b/src/user_preferences.jl @@ -432,14 +432,16 @@ aliases = merge( ## Groq "gllama3" => "llama-3.1-8b-instant", "gl3" => "llama-3.1-8b-instant", - "gllama370" => "llama-3.1-70b-versatile", - "gl70" => "llama-3.1-70b-versatile", + "gllama370" => "llama-3.3-70b-versatile", + "gl70" => "llama-3.3-70b-versatile", + "gl70s" => "llama-3.3-70b-specdec", #s for small speculative decoding "gllama3405" => "llama-3.1-405b-reasoning", "gl405" => "llama-3.1-405b-reasoning", "glxxs" => "llama-3.2-1b-preview", #xxs for extra extra small "glxs" => "llama-3.2-3b-preview", #xs for extra small "gls" => "llama-3.1-8b-instant", #s for small - "glm" => "llama-3.1-70b-versatile", #m for medium + "glm" => "llama-3.3-70b-versatile", #m for medium + "glms" => "llama-3.3-70b-specdec", #ms for medium speculative decoding "gll" => "llama-3.1-405b-reasoning", #l for large "gmixtral" => "mixtral-8x7b-32768", "ggemma9" => "gemma2-9b-it", @@ -943,6 +945,16 @@ registry = Dict{String, ModelSpec}( 2.4e-5, "Anthropic's Claude 2.1 model."), ## Groq -- using preliminary pricing on https://wow.groq.com/ + "llama-3.3-70b-specdec" => ModelSpec("llama-3.3-70b-specdec", + GroqOpenAISchema(), + 5.9e-7, + 9.9e-7, + "Meta's Llama3.3 70b with speculative decoding (slightly more expensive), hosted by Groq. Max output 8192 tokens, 128K context. See details [here](https://console.groq.com/docs/models)"), + "llama-3.3-70b-versatile" => ModelSpec("llama-3.3-70b-versatile", + GroqOpenAISchema(), + 5.9e-7, + 7.9e-7, + "Meta's Llama3.3 70b, hosted by Groq. Max output 8192 tokens, 128K context. See details [here](https://console.groq.com/docs/models)"), "llama-3.1-405b-reasoning" => ModelSpec("llama-3.1-405b-reasoning", GroqOpenAISchema(), 5e-6, # based on prices at together.ai... likely it will be much cheaper