svilupp · svilupp · Nov 26, 2024 · Feb 26, 2024 · Feb 26, 2024 · Feb 27, 2024
diff --git a/.gitignore b/.gitignore
@@ -13,4 +13,7 @@
 docs/package-lock.json
 
 # Ignore Cursor rules
-.cursorrules
+.cursorrules
+
+# Ignore any local preferences
+**/LocalPreferences.toml
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -17,6 +17,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 - Added a new Gemini Experimental model from November 2024 (`gemini-exp-1121` with alias `gemexp`).
+- Added a new `AnnotationMessage` type for keeping human-only information in the message changes. See `?annotate!` on how to use it.
+- Added a new `ConversationMemory` type to enable long multi-turn conversations with a truncated memory of the conversation history. Truncation works in "batches" to not prevent caching. See `?ConversationMemory` and `get_last` for more information.
+
 
 ### Updated
 - Changed the ENV variable for MistralAI API from `MISTRALAI_API_KEY` to `MISTRAL_API_KEY` to be compatible with the Mistral docs.

diff --git a/src/PromptingTools.jl b/src/PromptingTools.jl
@@ -67,9 +67,13 @@ include("user_preferences.jl")
 
 ## Conversation history / Prompt elements
 export AIMessage
-# export UserMessage, UserMessageWithImages, SystemMessage, DataMessage # for debugging only
 include("messages.jl")
 
+# export ConversationMemory
+include("memory.jl")
+# export annotate!
+include("annotation.jl")
+
 export aitemplates, AITemplate
 include("templates.jl")
 

diff --git a/src/annotation.jl b/src/annotation.jl
@@ -0,0 +1,40 @@
+"""
+    annotate!(messages::AbstractVector{<:AbstractMessage}, content; kwargs...)
+    annotate!(message::AbstractMessage, content; kwargs...)
+
+Add an annotation message to a vector of messages or wrap a single message in a vector with an annotation.
+The annotation is always inserted after any existing annotation messages.
+
+# Arguments
+- `messages`: Vector of messages or single message to annotate
+- `content`: Content of the annotation
+- `kwargs...`: Additional fields for the AnnotationMessage (extras, tags, comment)
+
+# Returns
+Vector{AbstractMessage} with the annotation message inserted
+
+# Example
+```julia
+messages = [SystemMessage("Assistant"), UserMessage("Hello")]
+annotate!(messages, "This is important"; tags=[:important], comment="For review")
+```
+"""
+function annotate!(messages::AbstractVector{T}, content::AbstractString;
+        kwargs...) where {T <: AbstractMessage}
+    # Convert to Vector{AbstractMessage} if needed
+    messages_abstract = T == AbstractMessage ? messages :
+                        convert(Vector{AbstractMessage}, messages)
+
+    # Find last annotation message index
+    last_anno_idx = findlast(isabstractannotationmessage, messages_abstract)
+    insert_idx = isnothing(last_anno_idx) ? 1 : last_anno_idx + 1
+
+    # Create and insert annotation message
+    anno = AnnotationMessage(; content = content, kwargs...)
+    insert!(messages_abstract, insert_idx, anno)
+    return messages_abstract
+end
+
+function annotate!(message::AbstractMessage, content::AbstractString; kwargs...)
+    return annotate!(AbstractMessage[message], content; kwargs...)
+end
diff --git a/src/llm_anthropic.jl b/src/llm_anthropic.jl
@@ -28,10 +28,13 @@ function render(schema::AbstractAnthropicSchema,
         no_system_message::Bool = false,
         cache::Union{Nothing, Symbol} = nothing,
         kwargs...)
-    ## 
+    ##
     @assert count(issystemmessage, messages)<=1 "AbstractAnthropicSchema only supports at most 1 System message"
     @assert (isnothing(cache)||cache in [:system, :tools, :last, :all]) "Currently only `:system`, `:tools`, `:last`, `:all` are supported for Anthropic Prompt Caching"
 
+    # Filter out annotation messages before any processing
+    messages = filter(!isabstractannotationmessage, messages)
+
     system = nothing
 
     ## First pass: keep the message types but make the replacements provided in `kwargs`
@@ -44,6 +47,8 @@ function render(schema::AbstractAnthropicSchema,
     for msg in messages_replaced
         if issystemmessage(msg)
             system = msg.content
+        elseif isabstractannotationmessage(msg)
+            continue
         elseif isusermessage(msg) || isaimessage(msg)
             content = msg.content
             push!(conversation,

diff --git a/src/llm_google.jl b/src/llm_google.jl
@@ -25,6 +25,9 @@ function render(schema::AbstractGoogleSchema,
         no_system_message::Bool = false,
         kwargs...)
     ##
+    # Filter out annotation messages before any processing
+    messages = filter(!isabstractannotationmessage, messages)
+
     ## First pass: keep the message types but make the replacements provided in `kwargs`
     messages_replaced = render(
         NoSchema(), messages; conversation, no_system_message, kwargs...)
@@ -34,6 +37,9 @@ function render(schema::AbstractGoogleSchema,
 
     # replace any handlebar variables in the messages
     for msg in messages_replaced
+        if isabstractannotationmessage(msg)
+            continue
+        end
         push!(conversation,
             Dict(
                 :role => role4render(schema, msg), :parts => [Dict("text" => msg.content)]))

diff --git a/src/llm_interface.jl b/src/llm_interface.jl
@@ -41,14 +41,21 @@ struct OpenAISchema <: AbstractOpenAISchema end
 
 "Echoes the user's input back to them. Used for testing the implementation"
 @kwdef mutable struct TestEchoOpenAISchema <: AbstractOpenAISchema
-    response::AbstractDict
-    status::Integer
+    response::AbstractDict = Dict(
+        "choices" => [Dict("message" => Dict("content" => "Test response", "role" => "assistant"), "index" => 0, "finish_reason" => "stop")],
+        "usage" => Dict("prompt_tokens" => 10, "completion_tokens" => 20, "total_tokens" => 30),
+        "model" => "gpt-3.5-turbo",
+        "id" => "test-id",
+        "object" => "chat.completion",
+        "created" => 1234567890
+    )
+    status::Integer = 200
     model_id::String = ""
     inputs::Any = nothing
 end
 
 """
-    CustomOpenAISchema 
+    CustomOpenAISchema
 
 CustomOpenAISchema() allows user to call any OpenAI-compatible API.
 

diff --git a/src/llm_ollama.jl b/src/llm_ollama.jl
@@ -27,6 +27,9 @@ function render(schema::AbstractOllamaSchema,
         no_system_message::Bool = false,
         kwargs...)
     ##
+    # Filter out annotation messages before any processing
+    messages = filter(!isabstractannotationmessage, messages)
+
     ## First pass: keep the message types but make the replacements provided in `kwargs`
     messages_replaced = render(
         NoSchema(), messages; conversation, no_system_message, kwargs...)
@@ -36,6 +39,9 @@ function render(schema::AbstractOllamaSchema,
 
     # replace any handlebar variables in the messages
     for msg in messages_replaced
+        if isabstractannotationmessage(msg)
+            continue
+        end
         new_message = Dict{String, Any}(
             "role" => role4render(schema, msg), "content" => msg.content)
         ## Special case for images
@@ -376,4 +382,4 @@ end
 function aitools(prompt_schema::AbstractOllamaSchema, prompt::ALLOWED_PROMPT_TYPE;
         kwargs...)
     error("Managed schema does not support aitools. Please use OpenAISchema instead.")
-end
+end
diff --git a/src/llm_ollama_managed.jl b/src/llm_ollama_managed.jl
@@ -40,6 +40,8 @@ function render(schema::AbstractOllamaManagedSchema,
             system = msg.content
         elseif msg isa UserMessage
             prompt = msg.content
+        elseif isabstractannotationmessage(msg)
+            continue
         elseif msg isa UserMessageWithImages
             error("Managed schema does not support UserMessageWithImages. Please use OpenAISchema instead.")
         elseif msg isa AIMessage

diff --git a/src/llm_openai.jl b/src/llm_openai.jl
@@ -33,6 +33,10 @@ function render(schema::AbstractOpenAISchema,
         kwargs...)
     ##
     @assert image_detail in ["auto", "high", "low"] "Image detail must be one of: auto, high, low"
+
+    # Filter out annotation messages before any processing
+    messages = filter(!isabstractannotationmessage, messages)
+
     ## First pass: keep the message types but make the replacements provided in `kwargs`
     messages_replaced = render(
         NoSchema(), messages; conversation, no_system_message, kwargs...)
@@ -71,6 +75,8 @@ function render(schema::AbstractOpenAISchema,
             content = msg.content isa AbstractString ? msg.content : string(msg.content)
             Dict("role" => role4render(schema, msg), "content" => content,
                 "tool_call_id" => msg.tool_call_id)
+        elseif isabstractannotationmessage(msg)
+            continue
         else
             ## Vanilla assistant message
             Dict("role" => role4render(schema, msg),
@@ -1733,4 +1739,4 @@ function aitools(prompt_schema::AbstractOpenAISchema, prompt::ALLOWED_PROMPT_TYP
         kwargs...)
 
     return output
-end
+end
diff --git a/src/llm_shared.jl b/src/llm_shared.jl
@@ -8,6 +8,7 @@ role4render(schema::AbstractPromptSchema, msg::UserMessageWithImages) = "user"
 role4render(schema::AbstractPromptSchema, msg::AIMessage) = "assistant"
 role4render(schema::AbstractPromptSchema, msg::AIToolRequest) = "assistant"
 role4render(schema::AbstractPromptSchema, msg::ToolMessage) = "tool"
+role4render(schema::AbstractPromptSchema, msg::AbstractAnnotationMessage) = "annotation"
 """
     render(schema::NoSchema,
         messages::Vector{<:AbstractMessage};
@@ -39,6 +40,9 @@ function render(schema::NoSchema,
     count_system_msg = count(issystemmessage, conversation)
     # TODO: concat multiple system messages together (2nd pass)
 
+    # Filter out annotation messages from input messages
+    messages = filter(!isabstractannotationmessage, messages)
+
     # replace any handlebar variables in the messages
     for msg in messages
         if issystemmessage(msg) || isusermessage(msg) || isusermessagewithimages(msg)
@@ -73,6 +77,9 @@ function render(schema::NoSchema,
             count_system_msg += 1
             # move to the front
             pushfirst!(conversation, msg)
+        elseif isabstractannotationmessage(msg)
+            # Ignore annotation messages
+            continue
         else
             # Note: Ignores any DataMessage or other types for the prompt/conversation history
             @warn "Unexpected message type: $(typeof(msg)). Skipping."

diff --git a/src/llm_sharegpt.jl b/src/llm_sharegpt.jl
@@ -9,7 +9,7 @@ end
 function render(schema::AbstractShareGPTSchema, conv::AbstractVector{<:AbstractMessage})
     Dict("conversations" => [Dict("from" => role4render(schema, msg),
                                  "value" => msg.content)
-                             for msg in conv])
+                             for msg in conv if !isabstractannotationmessage(msg)])
 end
 
 ### AI Functions

diff --git a/src/llm_tracer.jl b/src/llm_tracer.jl
@@ -16,6 +16,9 @@ end
 function role4render(schema::AbstractTracerSchema, msg::AIMessage)
     role4render(schema.schema, msg)
 end
+function role4render(schema::AbstractTracerSchema, msg::AbstractAnnotationMessage)
+    role4render(schema.schema, msg)
+end
 """
     render(tracer_schema::AbstractTracerSchema,
         conv::AbstractVector{<:AbstractMessage}; kwargs...)