OpenAI model changes

svilupp · Jan 25, 2024 · 6f98096 · 6f98096
1 parent 38924ce
commit 6f98096
Show file tree

Hide file tree

Showing 5 changed files with 53 additions and 12 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 
+## [0.10.0]
+
+### Added
+- [BREAKING CHANGE] The default embedding model (`MODEL_EMBEDDING`) changes to "text-embedding-3-small" effectively immediately (lower cost, higher performance). The default chat model (`MODEL_CHAT`) will be changed by OpenAI to 0125 (from 0613) by mid-February. If you have older embeddings or rely on the exact chat model version, please set the model explicitly in your code or in your preferences. 
+- New OpenAI models added to the model registry (see the [release notes](https://openai.com/blog/new-embedding-models-and-api-updates)).
+  - "gpt4t" refers to whichever is the latest GPT-4 Turbo model ("gpt-4-0125-preview" at the time of writing)
+  - "gpt3t" refers to the latest GPT-3.5 Turbo model version 0125, which is 25-50% cheaper and has an updated knowledge (available from February 2024)
+  - "gpt3" still refers to the general endpoint "gpt-3.5-turbo", which OpenAI will move to version 0125 by mid-February (ie, "gpt3t" will be the same as "gpt3" then. We have reflected the approximate cost in the model registry but note that it will be incorrect in the transition period)
+  - "emb3small" refers to the small version of the new embedding model (dim=1536), which is 5x cheaper than Ada and promises higher quality
+  - "emb3large" refers to the large version of the new embedding model (dim=3072), which is only 30% more expensive than Ada
+
+### Fixed
+- Fixed typos in the documentation
+
 ## [0.9.0]
 
 ### Added

diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "PromptingTools"
 uuid = "670122d1-24a8-4d70-bfce-740807c42192"
 authors = ["J S @svilupp and contributors"]
-version = "0.9.0"
+version = "0.10.0"
 
 [deps]
 Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"

diff --git a/src/Experimental/RAGTools/preparation.jl b/src/Experimental/RAGTools/preparation.jl
@@ -41,7 +41,7 @@ function _normalize end
     get_chunks(files_or_docs::Vector{<:AbstractString}; reader::Symbol = :files,
         sources::Vector{<:AbstractString} = files_or_docs,
         verbose::Bool = true,
-        separators = ["\n\n", ". ", "\n"], max_length::Int = 256)
+        separators = ["\\n\\n", ". ", "\\n"], max_length::Int = 256)
 
 Chunks the provided `files_or_docs` into chunks of maximum length `max_length` (if possible with provided `separators`).
 
@@ -52,7 +52,7 @@ Supports two modes of operation:
 # Arguments
 - `files_or_docs`: A vector of valid file paths OR string documents to be chunked.
 - `reader`: A symbol indicating the type of input, can be either `:files` or `:docs`. Default is `:files`.
-- `separators`: A list of strings used as separators for splitting the text in each file into chunks. Default is `[\n\n", ". ", "\n"]`.
+- `separators`: A list of strings used as separators for splitting the text in each file into chunks. Default is `[\\n\\n", ". ", "\\n"]`.
 - `max_length`: The maximum length of each chunk (if possible with provided separators). Default is 256.
 - `sources`: A vector of strings indicating the source of each chunk. Default is equal to `files_or_docs` (for `reader=:files`)
 

diff --git a/src/user_preferences.jl b/src/user_preferences.jl
@@ -109,7 +109,7 @@ end
 ## Load up GLOBALS
 const MODEL_CHAT::String = @load_preference("MODEL_CHAT", default="gpt-3.5-turbo")
 const MODEL_EMBEDDING::String = @load_preference("MODEL_EMBEDDING",
-    default="text-embedding-ada-002")
+    default="text-embedding-3-small")
 # the prompt schema default is defined in llm_interace.jl !
 # const PROMPT_SCHEMA = OpenAISchema()
 
@@ -246,9 +246,11 @@ end
 aliases = merge(Dict("gpt3" => "gpt-3.5-turbo",
         "gpt4" => "gpt-4",
         "gpt4v" => "gpt-4-vision-preview", # 4v is for "4 vision"
-        "gpt4t" => "gpt-4-1106-preview", # 4t is for "4 turbo"
-        "gpt3t" => "gpt-3.5-turbo-1106", # 3t is for "3 turbo"
+        "gpt4t" => "gpt-4-turbo-preview", # 4t is for "4 turbo"
+        "gpt3t" => "gpt-3.5-turbo-0125", # 3t is for "3 turbo"
         "ada" => "text-embedding-ada-002",
+        "emb3small" => "text-embedding-3-small",
+        "emb3large" => "text-embedding-3-large",
         "yi34c" => "yi:34b-chat",
         "oh25" => "openhermes2.5-mistral",
         "starling" => "starling-lm",
@@ -258,14 +260,19 @@ aliases = merge(Dict("gpt3" => "gpt-3.5-turbo",
 
 registry = Dict{String, ModelSpec}("gpt-3.5-turbo" => ModelSpec("gpt-3.5-turbo",
         OpenAISchema(),
+        0.5e-6,
         1.5e-6,
-        2e-6,
-        "GPT-3.5 Turbo is a 175B parameter model and a common default on the OpenAI API."),
+        "GPT-3.5 Turbo is a 175B parameter model and a common default on the OpenAI API. From mid-Feb 2024, it will be using the new GPT-3.5 Turbo 0125 version (pricing is set assuming the 0125 version)."),
     "gpt-3.5-turbo-1106" => ModelSpec("gpt-3.5-turbo-1106",
         OpenAISchema(),
         1e-6,
         2e-6,
-        "GPT-3.5 Turbo is the latest version of GPT3.5 and the cheapest to use."),
+        "GPT-3.5 Turbo is an updated version of GPT3.5 that is much faster and cheaper to use. 1106 refers to the release date of November 6, 2023."),
+    "gpt-3.5-turbo-0125" => ModelSpec("gpt-3.5-turbo-0125",
+        OpenAISchema(),
+        0.5e-6,
+        1.5e-6,
+        "GPT-3.5 Turbo is an updated version of GPT3.5 that is much faster and cheaper to use. This is the cheapest GPT-3.5 Turbo model. 0125 refers to the release date of January 25, 2024."),
     "gpt-4" => ModelSpec("gpt-4",
         OpenAISchema(),
         3e-5,
@@ -275,7 +282,17 @@ registry = Dict{String, ModelSpec}("gpt-3.5-turbo" => ModelSpec("gpt-3.5-turbo",
         OpenAISchema(),
         1e-5,
         3e-5,
-        "GPT-4 Turbo is the latest version of GPT4 that is much faster and the cheapest to use."),
+        "GPT-4 Turbo 1106 is an updated version of GPT4 that is much faster and the cheaper to use. 1106 refers to the release date of November 6, 2023."),
+    "gpt-4-0125-preview" => ModelSpec("gpt-4-0125-preview",
+        OpenAISchema(),
+        1e-5,
+        3e-5,
+        "GPT-4 Turbo is an updated version of GPT4 that is much faster and the cheaper to use. 0125 refers to the release date of January 25, 2024."),
+    "gpt-4-turbo-preview" => ModelSpec("gpt-4-turbo-preview",
+        OpenAISchema(),
+        1e-5,
+        3e-5,
+        "GPT-4 Turbo is an updated version of GPT4 that is much faster and the cheaper to use. This is the general name for whatever is the latest GPT4 Turbo preview release. Right now it is 0125."),
     "gpt-4-vision-preview" => ModelSpec("gpt-4-vision-preview",
         OpenAISchema(),
         1e-5,
@@ -285,7 +302,17 @@ registry = Dict{String, ModelSpec}("gpt-3.5-turbo" => ModelSpec("gpt-3.5-turbo",
         OpenAISchema(),
         1e-7,
         0.0,
-        "Text Embedding Ada is a 1.75T parameter model and the largest model available on the OpenAI API."),
+        "Classic text embedding endpoint Ada from 2022 with 1536 dimensions."),
+    "text-embedding-3-small" => ModelSpec("text-embedding-3-small",
+        OpenAISchema(),
+        0.2e-7,
+        0.0,
+        "New text embedding endpoint with 1536 dimensions, but 5x cheaper than Ada and more performant."),
+    "text-embedding-3-large" => ModelSpec("text-embedding-3-large",
+        OpenAISchema(),
+        1.3e-7,
+        0.0,
+        "New text embedding endpoint with 3072 dimensions, c. 30% more expensive than Ada but more performant."),
     "llama2" => ModelSpec("llama2",
         OllamaSchema(),
         0.0,

diff --git a/src/utils.jl b/src/utils.jl
@@ -55,7 +55,7 @@ This is particularly useful for splitting larger documents or texts into smaller
 Splitting text with the default separator (" "):
 ```julia
 text = "Hello world. How are you?"
-chunks = splitbysize(text; max_length=13)
+chunks = split_by_length(text; max_length=13)
 length(chunks) # Output: 2
 ```