svilupp · svilupp · Dec 6, 2024 · Dec 6, 2024 · Dec 6, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,12 +10,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 
-## [0.65.2]
+## [0.66.0]
+
+### Added
+- Added support for Groq's new Llama3.3 models. Updated `gllama370`, `gl70`, `glm` aliases to `llama-3.3-70b-versatile` and added `gl70s`, `glms` aliases to `llama-3.3-70b-specdec` (faster with speculative decoding).
 
 ### Fixed
 - Fixed a bug in `extract_docstring` where it would not correctly block "empty" docstrings on Julia 1.11.
 
-
 ## [0.65.1]
 
 ### Fixed

diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "PromptingTools"
 uuid = "670122d1-24a8-4d70-bfce-740807c42192"
 authors = ["J S @svilupp and contributors"]
-version = "0.65.2"
+version = "0.66.0"
 
 [deps]
 AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"

diff --git a/src/user_preferences.jl b/src/user_preferences.jl
@@ -432,14 +432,16 @@ aliases = merge(
         ## Groq
         "gllama3" => "llama-3.1-8b-instant",
         "gl3" => "llama-3.1-8b-instant",
-        "gllama370" => "llama-3.1-70b-versatile",
-        "gl70" => "llama-3.1-70b-versatile",
+        "gllama370" => "llama-3.3-70b-versatile",
+        "gl70" => "llama-3.3-70b-versatile",
+        "gl70s" => "llama-3.3-70b-specdec", #s for small speculative decoding
         "gllama3405" => "llama-3.1-405b-reasoning",
         "gl405" => "llama-3.1-405b-reasoning",
         "glxxs" => "llama-3.2-1b-preview", #xxs for extra extra small
         "glxs" => "llama-3.2-3b-preview", #xs for extra small
         "gls" => "llama-3.1-8b-instant", #s for small
-        "glm" => "llama-3.1-70b-versatile", #m for medium
+        "glm" => "llama-3.3-70b-versatile", #m for medium
+        "glms" => "llama-3.3-70b-specdec", #ms for medium speculative decoding
         "gll" => "llama-3.1-405b-reasoning", #l for large
         "gmixtral" => "mixtral-8x7b-32768",
         "ggemma9" => "gemma2-9b-it",
@@ -943,6 +945,16 @@ registry = Dict{String, ModelSpec}(
         2.4e-5,
         "Anthropic's Claude 2.1 model."),
     ## Groq -- using preliminary pricing on https://wow.groq.com/
+    "llama-3.3-70b-specdec" => ModelSpec("llama-3.3-70b-specdec",
+        GroqOpenAISchema(),
+        5.9e-7,
+        9.9e-7,
+        "Meta's Llama3.3 70b with speculative decoding (slightly more expensive), hosted by Groq. Max output 8192 tokens, 128K context. See details [here](https://console.groq.com/docs/models)"),
+    "llama-3.3-70b-versatile" => ModelSpec("llama-3.3-70b-versatile",
+        GroqOpenAISchema(),
+        5.9e-7,
+        7.9e-7,
+        "Meta's Llama3.3 70b, hosted by Groq. Max output 8192 tokens, 128K context. See details [here](https://console.groq.com/docs/models)"),
     "llama-3.1-405b-reasoning" => ModelSpec("llama-3.1-405b-reasoning",
         GroqOpenAISchema(),
         5e-6, # based on prices at together.ai... likely it will be much cheaper