From c2ff9634e0444d488b9d4dcf71e81bec44da33e6 Mon Sep 17 00:00:00 2001 From: J S <49557684+svilupp@users.noreply.github.com> Date: Thu, 21 Dec 2023 21:03:20 +0000 Subject: [PATCH 01/14] add draft --- CHANGELOG.md | 1 + Project.toml | 13 +- ext/RAGToolsExperimentalExt.jl | 34 +++++ src/Experimental/Experimental.jl | 15 ++ src/Experimental/RAGTools/RAGTools.jl | 33 +++++ src/Experimental/RAGTools/evaluation.jl | 106 ++++++++++++++ src/Experimental/RAGTools/generation.jl | 128 +++++++++++++++++ src/Experimental/RAGTools/preparation.jl | 144 +++++++++++++++++++ src/Experimental/RAGTools/retrieval.jl | 45 ++++++ src/Experimental/RAGTools/types.jl | 132 +++++++++++++++++ src/Experimental/RAGTools/utils.jl | 17 +++ src/PromptingTools.jl | 3 + src/utils.jl | 125 +++++++++++++++- templates/RAG/CreateQAFromContext.json | 1 + templates/RAG/RAGAnswerFromContext.json | 1 + templates/RAG/RAGCreateQAFromContext.json | 1 + templates/RAG/RAGExtractMetadataLong.json | 1 + templates/RAG/RAGExtractMetadataShort.json | 1 + templates/RAG/RAGJudgeAnswerFromContext.json | 1 + test/Experimental/RAGTools.jl/preparation.jl | 68 +++++++++ test/Experimental/RAGTools.jl/retrieval.jl | 10 ++ test/Experimental/RAGTools.jl/runtests.jl | 10 ++ test/Experimental/RAGTools.jl/types.jl | 121 ++++++++++++++++ test/Experimental/RAGTools.jl/utils.jl | 0 test/runtests.jl | 1 + test/utils.jl | 61 ++++++-- 26 files changed, 1056 insertions(+), 17 deletions(-) create mode 100644 ext/RAGToolsExperimentalExt.jl create mode 100644 src/Experimental/Experimental.jl create mode 100644 src/Experimental/RAGTools/RAGTools.jl create mode 100644 src/Experimental/RAGTools/evaluation.jl create mode 100644 src/Experimental/RAGTools/generation.jl create mode 100644 src/Experimental/RAGTools/preparation.jl create mode 100644 src/Experimental/RAGTools/retrieval.jl create mode 100644 src/Experimental/RAGTools/types.jl create mode 100644 src/Experimental/RAGTools/utils.jl create mode 100644 templates/RAG/CreateQAFromContext.json create mode 100644 templates/RAG/RAGAnswerFromContext.json create mode 100644 templates/RAG/RAGCreateQAFromContext.json create mode 100644 templates/RAG/RAGExtractMetadataLong.json create mode 100644 templates/RAG/RAGExtractMetadataShort.json create mode 100644 templates/RAG/RAGJudgeAnswerFromContext.json create mode 100644 test/Experimental/RAGTools.jl/preparation.jl create mode 100644 test/Experimental/RAGTools.jl/retrieval.jl create mode 100644 test/Experimental/RAGTools.jl/runtests.jl create mode 100644 test/Experimental/RAGTools.jl/types.jl create mode 100644 test/Experimental/RAGTools.jl/utils.jl diff --git a/CHANGELOG.md b/CHANGELOG.md index 342e114d8..050419827 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added +- Experimental sub-module RAGTools providing basic Retrieval-Augmented Generation functionality. See `?RAGTools` for more information. It's nested inside of `PromptingTools.Experimental.RAGTools` to signify that it might change in the future. ### Fixed - Stricter code parsing in `AICode` to avoid false positives (code blocks must end with "```\n" to catch comments inside text) diff --git a/Project.toml b/Project.toml index 17136594e..66f9ada1d 100644 --- a/Project.toml +++ b/Project.toml @@ -12,21 +12,32 @@ OpenAI = "e9f21f70-7185-4079-aca2-91159181367c" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" Preferences = "21216c6a-2e73-6563-6e65-726566657250" +[weakdeps] +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" + +[extensions] +RAGToolsExperimentalExt = ["SparseArrays","LinearAlgebra"] + [compat] Aqua = "0.7" Base64 = "<0.0.1, 1" HTTP = "1" JSON3 = "1" Logging = "<0.0.1, 1" +LinearAlgebra = "<0.0.1, 1" OpenAI = "0.8.7" PrecompileTools = "1" Preferences = "1" +SparseArrays = "<0.0.1, 1" Test = "<0.0.1, 1" julia = "1.9,1.10" [extras] Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" [targets] -test = ["Aqua", "Test"] +test = ["Aqua", "Test", "SparseArrays","LinearAlgebra"] diff --git a/ext/RAGToolsExperimentalExt.jl b/ext/RAGToolsExperimentalExt.jl new file mode 100644 index 000000000..7047853c0 --- /dev/null +++ b/ext/RAGToolsExperimentalExt.jl @@ -0,0 +1,34 @@ +module RAGToolsExperimentalExt + +using PromptingTools, SparseArrays +using LinearAlgebra: normalize +const PT = PromptingTools + +using PromptingTools.Experimental.RAGTools + +# forward to LinearAlgebra.normalize +PromptingTools.Experimental.RAGTools._normalize(arr::AbstractArray) = normalize(arr) + +# "Builds a sparse matrix of tags and a vocabulary from the given vector of chunk metadata. Requires SparseArrays.jl to be loaded." +function PromptingTools.Experimental.RAGTools.build_tags(chunk_metadata::Vector{ + Vector{String}, + }) + tags_vocab_ = vcat(chunk_metadata...) |> unique |> sort + tags_vocab_index = Dict{String, Int}(t => i for (i, t) in enumerate(tags_vocab_)) + Is, Js = Int[], Int[] + for i in eachindex(chunk_metadata) + for tag in chunk_metadata[i] + push!(Is, i) + push!(Js, tags_vocab_index[tag]) + end + end + tags_ = sparse(Is, + Js, + trues(length(Is)), + length(chunk_metadata), + length(tags_vocab_), + &) + return tags_, tags_vocab_ +end + +end \ No newline at end of file diff --git a/src/Experimental/Experimental.jl b/src/Experimental/Experimental.jl new file mode 100644 index 000000000..6a1f2cb20 --- /dev/null +++ b/src/Experimental/Experimental.jl @@ -0,0 +1,15 @@ +""" + Experimental + +This module is for experimental code that is not yet ready for production. +It is not included in the main module, so it must be explicitly imported. + +Contains: +- `RAGTools`: Retrieval-Augmented Generation (RAG) functionality. +""" +module Experimental + +export RAGTools +include("RAGTools/RAGTools.jl") + +end # module Experimental \ No newline at end of file diff --git a/src/Experimental/RAGTools/RAGTools.jl b/src/Experimental/RAGTools/RAGTools.jl new file mode 100644 index 000000000..47d4113ce --- /dev/null +++ b/src/Experimental/RAGTools/RAGTools.jl @@ -0,0 +1,33 @@ +""" + RAGTools + +Provides Retrieval-Augmented Generation (RAG) functionality. + +Requires: LinearAlgebra, SparseArrays, PromptingTools for proper functionality. + +This module is experimental and may change at any time. It is intended to be moved to a separate package in the future. +""" +module RAGTools + +using PromptingTools +using JSON3 +const PT = PromptingTools + +include("utils.jl") + +export ChunkIndex, CandidateChunks # MultiIndex +include("types.jl") + +export build_index, build_tags +include("preparation.jl") + +export find_closest, find_tags, rerank +include("retrieval.jl") + +export airag +include("generation.jl") + +export build_qa_evals +include("evaluation.jl") + +end \ No newline at end of file diff --git a/src/Experimental/RAGTools/evaluation.jl b/src/Experimental/RAGTools/evaluation.jl new file mode 100644 index 000000000..5ded7896d --- /dev/null +++ b/src/Experimental/RAGTools/evaluation.jl @@ -0,0 +1,106 @@ +### For testing and eval +# This is a return_type for extraction when generating Q&A set with aiextract +@kwdef struct QAItem + question::String + answer::String +end +# This is for saving in JSON format for evaluation later +@kwdef struct QAEvalItem + source::String = "" + context::String = "" + question::String = "" + answer::String = "" +end + +"Provide the `final_rating` between 1-5. Provide the rationale for it." +@kwdef struct JudgeRating + rationale::Union{Nothing, String} = nothing + final_rating::Int +end +"Explain the `final_rating` in `rationale`" +@kwdef struct JudgeAllScores + relevance::Int + completeness::Int + clarity::Int + consistency::Int + helpfulness::Int + rationale::Union{Nothing, String} = nothing + final_rating::Int +end + +function Base.isvalid(x::QAEvalItem) + !isempty(x.question) && !isempty(x.answer) && !isempty(x.context) +end + +# Nicer show method with some colors! +function Base.show(io::IO, t::Union{QAItem, QAEvalItem}) + printstyled(io, "$(nameof(typeof(t))):\n", color = :green, bold = true) + for f in fieldnames(typeof(t)) + printstyled(io, " ", f, color = :blue, bold = true) + println(io, ": ", getfield(t, f)) + end +end +# Define how JSON3 should serialize/deserialize the struct into JSON files +JSON3.StructTypes.StructType(::Type{QAEvalItem}) = JSON3.StructTypes.Struct() + +""" + build_qa_evals(doc_chunks::Vector{<:AbstractString}, sources::Vector{<:AbstractString}; + model=PT.MODEL_CHAT, instructions="None.", qa_template::Symbol=:RAGCreateQAFromContext, verbose::Bool=true, kwargs...) -> Vector{QAEvalItem} + +Create a collection of question and answer evaluations (`QAEvalItem`) from document chunks and sources. +This function generates Q&A pairs based on the provided document chunks, using a specified AI model and template. + +# Arguments +- `doc_chunks::Vector{<:AbstractString}`: A vector of document chunks, each representing a segment of text. +- `sources::Vector{<:AbstractString}`: A vector of source identifiers corresponding to each chunk in `doc_chunks` (eg, filenames or paths). +- `model`: The AI model used for generating Q&A pairs. Default is `PT.MODEL_CHAT`. +- `instructions::String`: Additional instructions or context to provide to the model generating QA sets. Defaults to "None.". +- `qa_template::Symbol`: A template symbol that dictates the AITemplate that will be used. It must have placeholder `context`. Default is `:CreateQAFromContext`. +- `verbose::Bool`: If `true`, additional information like costs will be logged. Defaults to `true`. + +# Returns +`Vector{QAEvalItem}`: A vector of `QAEvalItem` structs, each containing a source, context, question, and answer. Invalid or empty items are filtered out. + +# Notes + +- The function internally uses `aiextract` to generate Q&A pairs based on the provided `qa_template`. So you can use any kwargs that you want. +- Each `QAEvalItem` includes the context (document chunk), the generated question and answer, and the source. +- The function tracks and reports the cost of AI calls if `verbose` is enabled. +- Items where the question, answer, or context is empty are considered invalid and are filtered out. + +# Examples + +Creating Q&A evaluations from a set of document chunks: +```julia +doc_chunks = ["Text from document 1", "Text from document 2"] +sources = ["source1", "source2"] +qa_evals = build_qa_evals(doc_chunks, sources) +``` +""" +function build_qa_evals(doc_chunks::Vector{<:AbstractString}, + sources::Vector{<:AbstractString}; + model = PT.MODEL_CHAT, instructions = "None.", + qa_template::Symbol = :RAGCreateQAFromContext, verbose::Bool = true, kwargs...) + ## + @assert length(doc_chunks)==length(sources) "Length of `doc_chunks` and `sources` must be the same." + placeholders = only(aitemplates(qa_template)).variables # only one template should be found + @assert (:context in placeholders) "Provided Q&A Template $(qa_template) is not suitable. It must have placeholder: `context`." + ## + cost_tracker = Threads.Atomic{Float64}(0.0) + output = asyncmap(zip(doc_chunks, sources)) do (context, source) + try + msg = aiextract(qa_template; + return_type = QAItem, + context, + instructions, + verbose, + model) + Threads.atomic_add!(cost_tracker, PT.call_cost(msg, model)) # track costs + QAEvalItem(; context, msg.content.question, msg.content.answer, source) + catch e + QAEvalItem() + end + end + verbose && @info "Q&A Sets built! (cost: \$$(round(cost_tracker[], digits=3)))" + return filter(isvalid, output) +end diff --git a/src/Experimental/RAGTools/generation.jl b/src/Experimental/RAGTools/generation.jl new file mode 100644 index 000000000..804e0fdd0 --- /dev/null +++ b/src/Experimental/RAGTools/generation.jl @@ -0,0 +1,128 @@ +# stub to be replaced with extension +function _normalize end + +""" + airag(index::AbstractChunkIndex, rag_template::Symbol=:RAGAnswerFromContext; + question::AbstractString, top_k::Int=3, tag_filter::Union{Symbol,Vector{String},Regex}=:auto, + rerank_strategy::RerankingStrategy=Passthrough(), model_embedding::String=PT.MODEL_EMBEDDING, + model_chat::String=PT.MODEL_CHAT, model_metadata::String=PT.MODEL_CHAT, + chunks_window_margin::Tuple{Int,Int}=(1, 1), return_context::Bool=false, verbose::Bool=true, kwargs...) -> Any + +Generates a response for a given question using a Retrieval-Augmented Generation (RAG) approach. + +The function selects relevant chunks from an `ChunkIndex`, optionally filters them based on metadata tags, reranks them, and then uses these chunks to construct a context for generating a response. + +# Arguments +- `index::AbstractChunkIndex`: The chunk index to search for relevant text. +- `rag_template::Symbol`: Template for the RAG model, defaults to `:RAGAnswerFromContext`. +- `question::AbstractString`: The question to be answered. +- `top_k::Int`: Number of top candidates to retrieve based on embedding similarity. +- `tag_filter::Union{Symbol, Vector{String}, Regex}`: Mechanism for filtering chunks based on tags (either automatically detected, specific tags, or a regex pattern). +- `rerank_strategy::RerankingStrategy`: Strategy for reranking the retrieved chunks. +- `model_embedding::String`: Model used for embedding the question, default is `PT.MODEL_EMBEDDING`. +- `model_chat::String`: Model used for generating the final response, default is `PT.MODEL_CHAT`. +- `model_metadata::String`: Model used for extracting metadata, default is `PT.MODEL_CHAT`. +- `chunks_window_margin::Tuple{Int,Int}`: The window size around each chunk to consider for context building. +- `return_context::Bool`: If `true`, returns the context used for RAG along with the response. +- `verbose::Bool`: If `true`, enables verbose logging. + +# Returns +- If `return_context` is `false`, returns the generated message (`msg`). +- If `return_context` is `true`, returns a tuple of the generated message (`msg`) and the RAG context (`rag_context`). + +# Notes +- The function first finds the closest chunks to the question embedding, then optionally filters these based on tags. After that, it reranks the candidates and builds a context for the RAG model. +- The `tag_filter` can be used to refine the search. If set to `:auto`, it attempts to automatically determine relevant tags (if `index` has them available). +- The `chunks_window_margin` allows including surrounding chunks for richer context, considering they are from the same source. +- The function currently supports only single `ChunkIndex`. + +# Examples + +Using `airag` to get a response for a question: +```julia +index = build_index(...) # create an index +question = "How to make a barplot in Makie.jl?" +msg = airag(index, :RAGAnswerFromContext; question) + +# or simply +msg = airag(index; question) +``` +""" +function airag(index::AbstractChunkIndex, rag_template::Symbol = :RAGAnswerFromContext; + question::AbstractString, + top_k::Int = 3, + tag_filter::Union{Symbol, Vector{String}, Regex} = :auto, + rerank_strategy::RerankingStrategy = Passthrough(), + model_embedding::String = PT.MODEL_EMBEDDING, model_chat::String = PT.MODEL_CHAT, + model_metadata::String = PT.MODEL_CHAT, + chunks_window_margin::Tuple{Int, Int} = (1, 1), + return_context::Bool = false, verbose::Bool = true, + kwargs...) + ## Note: Supports only single ChunkIndex for now + ## Checks + @assert tag_filter isa Symbol&&tag_filter == :auto "Only `:auto`, `Vector{String}`, or `Regex` are supported for `tag_filter`" + @assert chunks_window_margin[1] >= 0&&chunks_window_margin[2] >= 0 "Both `chunks_window_margin` values must be non-negative" + placeholders = only(aitemplates(rag_template)).variables # only one template should be found + @assert (:question in placeholders)&&(:context in placeholders) "Provided RAG Template $(rag_template) is not suitable. It must have placeholders: `question` and `context`." + + question_emb = aiembed(question, + _normalize; + model = model_embedding, + verbose).content .|> Float32 + emb_candidates = find_closest(index, question_emb; top_k) + + tag_candidates = if tag_filter == :auto && !isnothing(tags(index)) && + !isempty(model_metadata) + # extract metadata via LLM call + # Check that the provided model is known and that it is an OpenAI model (for the aiextract function to work) + @assert haskey(PT.MODEL_REGISTRY, + model_metadata)&&PT.MODEL_REGISTRY[model_metadata].schema == PT.OpenAISchema() "Only OpenAI models support the metadata extraction now. $model_metadata is not a registered OpenAI model." + metadata_ = try + msg = aiextract(metadata_template; return_type = MaybeMetadataItems, + text = chunk, + instructions = "In addition to extracted items, suggest 2-3 filter keywords that could be relevant to answer this question.", + verbose, model = model_metadata) + metadata_extract(msg.content.items) + catch + String[] + end + find_tags(index, metadata_) + elseif !(tag_filter isa Symbol) + find_tags(index, tag_filter) + else + ## not filtering -- use all rows and ignore this + nothing + end + + filtered_candidates = isnothing(tag_candidates) ? emb_candidates : + (emb_candidates & tag_candidates) + reranked_candidates = rerank(rerank_strategy, index, question, filtered_candidates) + + ## Build the context + context = String[] + for (i, position) in enumerate(reranked_candidates.positions) + ## Add surrounding chunks if they are from the same source (from `chunks_window_margin`) + chunks_ = chunks(index)[max(1, position - chunks_window_margin[1]):min(end, + position + chunks_window_margin[2])] + is_same_source = sources(index)[max(1, position - chunks_window_margin[1]):min(end, + position + chunks_window_margin[2])] .== sources(index)[position] + push!(context, "$(i). $(join(chunks_[is_same_source], "\n"))") + end + ## LLM call + msg = aigenerate(rag_template; question, + context = join(context, "\n\n"), model = model_chat, verbose, + kwargs...) + + if return_context # for evaluation + rag_context = RAGContext(; + question, + context, + emb_candidates, + tag_candidates, + filtered_candidates, + reranked_candidates) + return msg, rag_context + else + return msg + end +end \ No newline at end of file diff --git a/src/Experimental/RAGTools/preparation.jl b/src/Experimental/RAGTools/preparation.jl new file mode 100644 index 000000000..3c06dc403 --- /dev/null +++ b/src/Experimental/RAGTools/preparation.jl @@ -0,0 +1,144 @@ +### Preparation +# Types used to extract `tags` from document chunks +@kwdef struct MetadataItem + value::String + category::String +end +@kwdef struct MaybeMetadataItems + items::Union{Nothing, Vector{MetadataItem}} +end + +""" + metadata_extract(item::MetadataItem) + metadata_extract(items::Vector{MetadataItem}) + +Extracts the metadata item into a string of the form `category:::value` (lowercased and spaces replaced with underscores). + +# Example +```julia +msg = aiextract(:RAGExtractMetadataShort; return_type=MaybeMetadataItems, text="I like package DataFrames", instructions="None.") +metadata = metadata_extract(msg.content.items) +``` +""" +function metadata_extract(item::MetadataItem) + "$(strip(item.category)):::$(strip(item.value))" |> lowercase |> + x -> replace(x, " " => "_") +end +metadata_extract(items::Nothing) = String[] +metadata_extract(items::Vector{MetadataItem}) = metadata_extract.(items) + +"Builds a matrix of tags and a vocabulary list. REQUIRES SparseArrays and LinearAlgebra packages to be loaded!!" +function build_tags end +# Implementation in ext/RAGToolsExperimentalExt.jl + +"Build an index for RAG (Retriever-Augmented Generation) applications. REQUIRES SparseArrays and LinearAlgebra packages to be loaded!!" +function build_index end + +""" + build_index(files::Vector{<:AbstractString}; + separators=["\n\n", ". ", "\n"], max_length::Int=256, + extract_metadata::Bool=false, verbose::Bool=true, metadata_template::Symbol=:RAGExtractMetadataShort, + model_embedding::String=PT.MODEL_EMBEDDING, model_metadata::String=PT.MODEL_CHAT) + +Build an index for RAG (Retriever-Augmented Generation) applications from the provided file paths. +The function processes each file, splits its content into chunks, embeds these chunks, +optionally extracts metadata, and then compiles this information into a retrievable index. + +# Arguments +- `files`: A vector of valid file paths to be indexed. +- `separators`: A list of strings used as separators for splitting the text in each file into chunks. Default is `["\\n\\n", ". ", "\\n"]`. +- `max_length`: The maximum length of each chunk (if possible with provided separators). Default is 256. +- `extract_metadata`: A boolean flag indicating whether to extract metadata from each chunk (to build filter `tags` in the index). Default is `false`. + Metadata extraction incurs additional cost and requires `model_metadata` and `metadata_template` to be provided. +- `verbose`: A boolean flag for verbose output. Default is `true`. +- `metadata_template`: A symbol indicating the template to be used for metadata extraction. Default is `:RAGExtractMetadataShort`. +- `model_embedding`: The model to use for embedding. +- `model_metadata`: The model to use for metadata extraction. + +# Returns +- `ChunkIndex`: An object containing the compiled index of chunks, embeddings, tags, vocabulary, and sources. + +See also: `MultiIndex`, `CandidateChunks`, `find_closest`, `find_tags`, `rerank`, `airag` + +# Examples +```julia +# Assuming `test_files` is a vector of file paths +index = build_index(test_files; max_length=10, extract_metadata=true) + +# Another example with metadata extraction and verbose output +index = build_index(["file1.txt", "file2.txt"]; + separators=[". "], + extract_metadata=true, + verbose=true) +``` +""" +function build_index(files::Vector{<:AbstractString}; + separators = ["\n\n", ". ", "\n"], max_length::Int = 256, + extract_metadata::Bool = false, verbose::Bool = true, + metadata_template::Symbol = :RAGExtractMetadataShort, + model_embedding::String = PT.MODEL_EMBEDDING, + model_metadata::String = PT.MODEL_CHAT) + ## + @assert all(isfile, files) "Some `files` don't exist (Check: $(join(filter(!isfile,files),", "))" + + output_chunks = Vector{Vector{SubString{String}}}() + output_embeddings = Vector{Matrix{Float32}}() + output_metadata = Vector{Vector{Vector{String}}}() + output_sources = Vector{Vector{eltype(files)}}() + cost_tracker = Threads.Atomic{Float64}(0.0) + + for fn in files + verbose && @info "Processing file: $fn" + doc_raw = read(fn, String) + isempty(doc_raw) && continue + # split into chunks, if you want to start simple - just do `split(text,"\n\n")` + doc_chunks = PT.split_by_length(doc_raw, separators; max_length) .|> strip |> + x -> filter(!isempty, x) + # skip if no chunks found + isempty(doc_chunks) && continue + push!(output_chunks, doc_chunks) + push!(output_sources, fill(fn, length(doc_chunks))) + + # Notice that we embed all doc_chunks at once, not one by one + # OpenAI supports embedding multiple documents to reduce the number of API calls/network latency time + emb = aiembed(doc_chunks, _normalize; model = model_embedding, verbose) + Threads.atomic_add!(cost_tracker, PT.call_cost(emb, model_embedding)) # track costs + push!(output_embeddings, Float32.(emb.content)) + + if extract_metadata && !isempty(model_metadata) + # Check that the provided model is known and that it is an OpenAI model (for the aiextract function to work) + @assert haskey(PT.MODEL_REGISTRY, + model_metadata)&&PT.MODEL_REGISTRY[model_metadata].schema == PT.OpenAISchema() "Only OpenAI models support the metadata extraction now. $model_metadata is not a registered OpenAI model." + metadata_ = asyncmap(doc_chunks) do chunk + try + msg = aiextract(metadata_template; + return_type = MaybeMetadataItems, + text = chunk, + instructions = "None.", + verbose, + model = model_metadata) + Threads.atomic_add!(cost_tracker, PT.call_cost(msg, model_metadata)) # track costs + items = metadata_extract(msg.content.items) + catch + String[] + end + end + push!(output_metadata, metadata_) + end + end + ## Create metadata tags and associated vocabulary + tags, tags_vocab = if !isempty(output_metadata) + # Requires SparseArrays.jl! + _build_tags(vcat(output_metadata...)) # need to vcat to be on the "chunk-level" + else + tags, tags_vocab = nothing, nothing + end + verbose && @info "Index built! (cost: \$$(round(cost_tracker[], digits=3)))" + + index = ChunkIndex(; + embeddings = hcat(output_embeddings...), + tags, tags_vocab, + chunks = vcat(output_chunks...), + sources = vcat(output_sources...)) + return index +end diff --git a/src/Experimental/RAGTools/retrieval.jl b/src/Experimental/RAGTools/retrieval.jl new file mode 100644 index 000000000..b824b5396 --- /dev/null +++ b/src/Experimental/RAGTools/retrieval.jl @@ -0,0 +1,45 @@ +"Finds the indices of chunks (represented by embeddings in `emb`) that are closest (cosine similarity) to query embedding (`query_emb`). Returns only `top_k` closest indices." +function find_closest(emb::AbstractMatrix{<:Real}, + query_emb::AbstractVector{<:Real}; + top_k::Int = 100) + # emb is an embedding matrix where the first dimension is the embedding dimension + distances = query_emb' * emb |> vec + positions = distances |> sortperm |> reverse |> x -> first(x, top_k) + return positions, distances[positions] +end +function find_closest(index::AbstractChunkIndex, + query_emb::AbstractVector{<:Real}; + top_k::Int = 100) + isnothing(embeddings(index)) && CandidateChunks(; index_id = index.id) + positions, distances = find_closest(embeddings(index), query_emb; top_k) + return CandidateChunks(index.id, positions, Float32.(distances)) +end + +function find_tags(index::AbstractChunkIndex, + tag::Union{AbstractString, Regex}) + isnothing(tags(index)) && CandidateChunks(; index_id = index.id) + tag_idx = if tag isa AbstractString + findall(tags_vocab(index) .== tag) + else # assume it's a regex + findall(occursin.(tag, tags_vocab(index))) + end + # getindex.(x, 1) is to get the first dimension in each CartesianIndex + match_row_idx = @view(tags(index)[:, tag_idx]) |> findall |> + x -> getindex.(x, 1) |> unique + return CandidateChunks(index.id, match_row_idx, ones(Float32, length(match_row_idx))) +end +function find_tags(index::AbstractChunkIndex, + tags::Vector{<:AbstractString}) + pos = Int[find_tags(index, tag).positions for tag in tags] |> unique + return CandidateChunks(index.id, pos, ones(Float32, length(pos))) +end + +# Assuming the rerank and strategy definitions are in the Main module or relevant module +abstract type RerankingStrategy end + +struct Passthrough <: RerankingStrategy end + +function rerank(strategy::Passthrough, index, question, candidate_chunks; kwargs...) + # Since this is a Passthrough strategy, it returns the candidate_chunks unchanged + return candidate_chunks +end \ No newline at end of file diff --git a/src/Experimental/RAGTools/types.jl b/src/Experimental/RAGTools/types.jl new file mode 100644 index 000000000..c30f4408e --- /dev/null +++ b/src/Experimental/RAGTools/types.jl @@ -0,0 +1,132 @@ +### Types +# Defines three key types for RAG: ChunkIndex, MultiIndex, and CandidateChunks +# In addition, RAGContext is defined for debugging purposes + +abstract type AbstractDocumentIndex end +abstract type AbstractChunkIndex <: AbstractDocumentIndex end +# More advanced index would be: HybridChunkIndex + +# Stores document chunks and their embeddings +@kwdef struct ChunkIndex{ + T1 <: AbstractString, + T2 <: Union{Nothing, Matrix{<:Real}}, + T3 <: Union{Nothing, AbstractMatrix{<:Bool}}, +} <: AbstractChunkIndex + id::Symbol = gensym("ChunkIndex") + # underlying document chunks / snippets + chunks::Vector{T1} + # for semantic search + embeddings::T2 = nothing + # for exact search, filtering, etc. + # expected to be some sparse structure, eg, sparse matrix or nothing + # column oriented, ie, each column is one item in `tags_vocab` and rows are the chunks + tags::T3 = nothing + tags_vocab::Union{Nothing, Vector{<:AbstractString}} = nothing + sources::Vector{<:AbstractString} +end +embeddings(index::ChunkIndex) = index.embeddings +chunks(index::ChunkIndex) = index.chunks +tags(index::ChunkIndex) = index.tags +tags_vocab(index::ChunkIndex) = index.tags_vocab +sources(index::ChunkIndex) = index.sources + +function Base.var"=="(i1::ChunkIndex, i2::ChunkIndex) + ((i1.sources == i2.sources) && (i1.tags_vocab == i2.tags_vocab) && + (i1.embeddings == i2.embeddings) && (i1.chunks == i2.chunks) && (i1.tags == i2.tags)) +end + +function Base.vcat(i1::ChunkIndex, i2::ChunkIndex) + tags, tags_vocab = if (isnothing(tags(i1)) || isnothing(tags(i2))) + nothing, nothing + elseif tags_vocab(i1) == tags_vocab(i2) + vcat(tags(i1), tags(i2)), tags_vocab(i1) + else + merge_labeled_matrices(tags(i1), tags_vocab(i1), tags(i2), tags_vocab(i2)) + end + embeddings = (isnothing(embeddings(i1)) || isnothing(embeddings(i2))) ? nothing : + hcat(embeddings(i1), embeddings(i2)) + ChunkIndex(; + chunks = vcat(chunks(i1), chunks(i2)), + embeddings, + tags, + tags_vocab, + sources = vcat(i1.sources, i2.sources)) +end + +"Composite index that stores multiple ChunkIndex objects and their embeddings" +@kwdef struct MultiIndex <: AbstractDocumentIndex + id::Symbol = gensym("MultiIndex") + indexes::Vector{<:ChunkIndex} +end +indexes(index::MultiIndex) = index.indexes +# check that each index has a counterpart in the other MultiIndex +function Base.var"=="(i1::MultiIndex, i2::MultiIndex) + length(indexes(i1)) != length(indexes(i2)) && return false + for i in i1.indexes + if !(i in i2.indexes) + return false + end + end + for i in i2.indexes + if !(i in i1.indexes) + return false + end + end + return true +end + +abstract type AbstractCandidateChunks end +@kwdef struct CandidateChunks{T <: Real} <: AbstractCandidateChunks + index_id::Symbol + positions::Vector{Int} = Int[] + distances::Vector{T} = Float32[] +end +# combine/intersect two candidate chunks. average the score if available +function Base.var"&"(cc1::CandidateChunks, cc2::CandidateChunks) + cc1.index_id != cc2.index_id && return CandidateChunks(; index_id = cc1.index_id) + + positions = intersect(cc1.positions, cc2.positions) + distances = if !isempty(cc1.distances) && !isempty(cc2.distances) + (cc1.distances[positions] .+ cc2.distances[positions]) ./ 2 + else + Float32[] + end + CandidateChunks(cc1.index_id, positions, distances) +end +function Base.getindex(ci::ChunkIndex, candidate::CandidateChunks, field::Symbol = :chunks) + @assert field==:chunks "Only `chunks` field is supported for now" + if ci.id == candidate.index_id + chunks(ci)[candidate.positions] + else + eltype(chunks(ci))[] + end +end +function Base.getindex(mi::MultiIndex, candidate::CandidateChunks, field::Symbol = :chunks) + @assert field==:chunks "Only `chunks` field is supported for now" + valid_index = findfirst(x -> x.id == candidate.index_id, indexes(mi)) + if isnothing(valid_index) + String[] + else + getindex(indexes(mi)[valid_index], candidate) + end +end + +""" + RAGContext + +A struct for debugging RAG answers. It contains the question, context, and the candidate chunks at each step of the RAG pipeline. +""" +@kwdef struct RAGContext + question::AbstractString + context::Vector{<:AbstractString} + emb_candidates::CandidateChunks + tag_candidates::Union{Nothing, CandidateChunks} + filtered_candidates::CandidateChunks + reranked_candidates::CandidateChunks +end + +# Structured show method for easier reading (each kwarg on a new line) +function Base.show(io::IO, + t::Union{AbstractDocumentIndex, AbstractCandidateChunks, RAGContext}) + dump(IOContext(io, :limit => true), t, maxdepth = 1) +end diff --git a/src/Experimental/RAGTools/utils.jl b/src/Experimental/RAGTools/utils.jl new file mode 100644 index 000000000..d28ceae62 --- /dev/null +++ b/src/Experimental/RAGTools/utils.jl @@ -0,0 +1,17 @@ +# Utitity to be able to combine indices from different sources/documents easily +function merge_labeled_matrices(mat1::AbstractMatrix{T1}, + vocab1::Vector{String}, + mat2::AbstractMatrix{T2}, + vocab2::Vector{String}) where {T1 <: Number, T2 <: Number} + T = promote_type(T1, T2) + new_words = setdiff(vocab2, vocab1) + combined_vocab = [vocab1; new_words] + vocab2_indices = Dict(word => i for (i, word) in enumerate(vocab2)) + + aligned_mat1 = hcat(mat1, zeros(T, size(mat1, 1), length(new_words))) + aligned_mat2 = [haskey(vocab2_indices, word) ? @view(mat2[:, vocab2_indices[word]]) : + zeros(T, size(mat2, 1)) for word in combined_vocab] + aligned_mat2 = aligned_mat2 |> Base.Splat(hcat) + + return vcat(aligned_mat1, aligned_mat2), combined_vocab +end \ No newline at end of file diff --git a/src/PromptingTools.jl b/src/PromptingTools.jl index 7f5c6bdd1..a137bd866 100644 --- a/src/PromptingTools.jl +++ b/src/PromptingTools.jl @@ -65,6 +65,9 @@ include("llm_ollama_managed.jl") export @ai_str, @aai_str include("macros.jl") +## Experimental modules +include("Experimental/Experimental.jl") + function __init__() # Load templates load_templates!() diff --git a/src/utils.jl b/src/utils.jl index 168d772cb..a0f6b996b 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -66,7 +66,13 @@ split_by_length(text; separator=",", max_length=10000) # for 4K context window length(chunks[1]) # Output: 4 ``` """ -function split_by_length(text::String; separator::String = " ", max_length::Int = 35000) +function split_by_length(text::String; + separator::String = " ", + max_length::Int = 35000) + ## shortcut + length(text) <= max_length && return [text] + + ## split by separator minichunks = split(text, separator) sep_length = length(separator) chunks = String[] @@ -99,6 +105,66 @@ function split_by_length(text::String; separator::String = " ", max_length::Int return chunks end + +# Overload for dispatch on multiple separators +function split_by_length(text::String, + separator::String, + max_length::Int = 35000) + split_by_length(text; separator, max_length) +end + +""" + split_by_length(text::String, separators::Vector{String}; max_length::Int=35000) -> Vector{String} + +Split a given string `text` into chunks using a series of separators, with each chunk having a maximum length of `max_length`. +This function is useful for splitting large documents or texts into smaller segments that are more manageable for processing, particularly for models or systems with limited context windows. + +# Arguments +- `text::String`: The text to be split. +- `separators::Vector{String}`: An ordered list of separators used to split the text. The function iteratively applies these separators to split the text. +- `max_length::Int=35000`: The maximum length of each chunk. Defaults to 35,000 characters. This length is considered after each iteration of splitting, ensuring chunks fit within specified constraints. + +# Returns +`Vector{String}`: A vector of strings, where each string is a chunk of the original text that is smaller than or equal to `max_length`. + +# Notes + +- The function processes the text iteratively with each separator in the provided order. This ensures more nuanced splitting, especially in structured texts. +- Each chunk is as close to `max_length` as possible without exceeding it (unless we cannot split it any further) +- If the `text` is empty, the function returns an empty array. +- Separators are re-added to the text chunks after splitting, preserving the original structure of the text as closely as possible. Apply `strip` if you do not need them. + +# Examples + +Splitting text using multiple separators: +```julia +text = "Paragraph 1\n\nParagraph 2. Sentence 1. Sentence 2.\nParagraph 3" +separators = ["\n\n", ". ", "\n"] +chunks = split_by_length(text, separators, max_length=20) +``` + +Using a single separator: +```julia +text = "Hello,World," ^ 2900 # length 34900 characters +chunks = split_by_length(text, [","], max_length=10000) +``` +""" +function split_by_length(text, separators::Vector{String}; max_length) + @assert !isempty(separators) "`separators` can't be empty" + separator = popfirst!(separators) + chunks = split_by_length(text; separator, max_length) + + isempty(separators) && return chunks + ## Iteratively split by separators + for separator in separators + chunks = mapreduce(text_ -> split_by_length(text_; max_length, separator), + vcat, + chunks) + end + + return chunks +end + ### INTERNAL FUNCTIONS - DO NOT USE DIRECTLY # helper to extract handlebar variables (eg, `{{var}}`) from a prompt string function _extract_handlebar_variables(s::AbstractString) @@ -109,18 +175,63 @@ function _extract_handlebar_variables(vect::Vector{Dict{String, <:AbstractString unique([_extract_handlebar_variables(v) for d in vect for (k, v) in d if k == "text"]) end -# helper to produce summary message of how many tokens were used and for how much -function _report_stats(msg, - model::String, +""" + call_cost(msg, model::String; + cost_of_token_prompt::Number = default_prompt_cost, + cost_of_token_generation::Number = default_generation_cost) -> Number + +Calculate the cost of a call based on the number of tokens in the message and the cost per token. + +# Arguments +- `msg`: The message object, which should contain a `tokens` field + with two elements: [number_of_prompt_tokens, number_of_generation_tokens]. +- `model::String`: The name of the model to use for determining token costs. If the model + is not found in `MODEL_REGISTRY`, default costs are used. +- `cost_of_token_prompt::Number`: The cost per prompt token. Defaults to the cost in `MODEL_REGISTRY` + for the given model, or 0.0 if the model is not found. +- `cost_of_token_generation::Number`: The cost per generation token. Defaults to the cost in + `MODEL_REGISTRY` for the given model, or 0.0 if the model is not found. + +# Returns +- `Number`: The total cost of the call. + +# Examples +```julia +# Assuming MODEL_REGISTRY is set up with appropriate costs +MODEL_REGISTRY = Dict( + "model1" => (cost_of_token_prompt = 0.05, cost_of_token_generation = 0.10), + "model2" => (cost_of_token_prompt = 0.07, cost_of_token_generation = 0.02) +) + +msg1 = AIMessage([10, 20]) # 10 prompt tokens, 20 generation tokens +cost1 = call_cost(msg1, "model1") +# cost1 = 10 * 0.05 + 20 * 0.10 = 2.5 + +msg2 = DataMessage([15, 30]) # 15 prompt tokens, 30 generation tokens +cost2 = call_cost(msg2, "model2") +# cost2 = 15 * 0.07 + 30 * 0.02 = 1.35 + +# Using custom token costs +msg3 = AIMessage([5, 10]) +cost3 = call_cost(msg3, "model3", cost_of_token_prompt = 0.08, cost_of_token_generation = 0.12) +# cost3 = 5 * 0.08 + 10 * 0.12 = 1.6 +``` +""" +function call_cost(msg, model::String; cost_of_token_prompt::Number = get(MODEL_REGISTRY, model, (; cost_of_token_prompt = 0.0)).cost_of_token_prompt, cost_of_token_generation::Number = get(MODEL_REGISTRY, model, (; cost_of_token_generation = 0.0)).cost_of_token_generation) - cost = (msg.tokens[1] * cost_of_token_prompt + - msg.tokens[2] * cost_of_token_generation) + cost = msg.tokens[1] * cost_of_token_prompt + + msg.tokens[2] * cost_of_token_generation + return cost +end +# helper to produce summary message of how many tokens were used and for how much +function _report_stats(msg, + model::String) + cost = call_cost(msg, model) cost_str = iszero(cost) ? "" : " @ Cost: \$$(round(cost; digits=4))" - return "Tokens: $(sum(msg.tokens))$(cost_str) in $(round(msg.elapsed;digits=1)) seconds" end # Loads and encodes the provided image path as a base64 string diff --git a/templates/RAG/CreateQAFromContext.json b/templates/RAG/CreateQAFromContext.json new file mode 100644 index 000000000..83e900ba1 --- /dev/null +++ b/templates/RAG/CreateQAFromContext.json @@ -0,0 +1 @@ +[{"content":"Template Metadata","description":"For RAG applications. Generate Question and Answer from the provided Context.If you don't have any special instructions, provide `instructions=\"None.\"`. Placeholders: `context`, `instructions`","version":"1.0","source":"","_type":"metadatamessage"},{"content":"You are a world-class teacher preparing contextual Question & Answer sets for evaluating AI systems.\"),\n\n**Instructions for Question Generation:**\n1. Analyze the provided Context chunk thoroughly.\n2. Formulate a question that:\n - Is specific and directly related to the information in the context chunk.\n - Is not too short or generic; it should require detailed understanding of the context to answer.\n - Can only be answered using the information from the provided context, without needing external information.\n\n**Instructions for Reference Answer Creation:**\n1. Based on the generated question, compose a reference answer that:\n - Directly and comprehensively answers the question.\n - Stays strictly within the bounds of the provided context chunk.\n - Is clear, concise, and to the point, avoiding unnecessary elaboration or repetition.\n\n**Example 1:**\n- Context Chunk: \"In 1928, Alexander Fleming discovered penicillin, which marked the beginning of modern antibiotics.\"\n- Generated Question: \"What was the significant discovery made by Alexander Fleming in 1928 and its impact?\"\n- Reference Answer: \"Alexander Fleming discovered penicillin in 1928, which led to the development of modern antibiotics.\"\n\nIf the user provides special instructions, prioritize these over the general instructions.\n","variables":[],"_type":"systemmessage"},{"content":"# Context Information\n---\n{{context}}\n---\n\n\n# Special Instructions\n\n{{instructions}}\n","variables":["context","instructions"],"_type":"usermessage"}] \ No newline at end of file diff --git a/templates/RAG/RAGAnswerFromContext.json b/templates/RAG/RAGAnswerFromContext.json new file mode 100644 index 000000000..272ca4e20 --- /dev/null +++ b/templates/RAG/RAGAnswerFromContext.json @@ -0,0 +1 @@ +[{"content":"Template Metadata","description":"For RAG applications. Answers the provided Questions based on the Context. Placeholders: `question`, `context`","version":"1.0","source":"","_type":"metadatamessage"},{"content":"Act as a world-class AI assistant with access to the latest knowledge via Context Information. \n\n**Instructions:**\n- Answer the question based only on the provided Context.\n- If you don't know the answer, just say that you don't know, don't try to make up an answer.\n- Be brief and concise.\n\n**Context Information:**\n---\n{{context}}\n---\n","variables":["context"],"_type":"systemmessage"},{"content":"# Question\n\n{{question}}\n\n\n\n# Answer\n\n","variables":["question"],"_type":"usermessage"}] \ No newline at end of file diff --git a/templates/RAG/RAGCreateQAFromContext.json b/templates/RAG/RAGCreateQAFromContext.json new file mode 100644 index 000000000..83e900ba1 --- /dev/null +++ b/templates/RAG/RAGCreateQAFromContext.json @@ -0,0 +1 @@ +[{"content":"Template Metadata","description":"For RAG applications. Generate Question and Answer from the provided Context.If you don't have any special instructions, provide `instructions=\"None.\"`. Placeholders: `context`, `instructions`","version":"1.0","source":"","_type":"metadatamessage"},{"content":"You are a world-class teacher preparing contextual Question & Answer sets for evaluating AI systems.\"),\n\n**Instructions for Question Generation:**\n1. Analyze the provided Context chunk thoroughly.\n2. Formulate a question that:\n - Is specific and directly related to the information in the context chunk.\n - Is not too short or generic; it should require detailed understanding of the context to answer.\n - Can only be answered using the information from the provided context, without needing external information.\n\n**Instructions for Reference Answer Creation:**\n1. Based on the generated question, compose a reference answer that:\n - Directly and comprehensively answers the question.\n - Stays strictly within the bounds of the provided context chunk.\n - Is clear, concise, and to the point, avoiding unnecessary elaboration or repetition.\n\n**Example 1:**\n- Context Chunk: \"In 1928, Alexander Fleming discovered penicillin, which marked the beginning of modern antibiotics.\"\n- Generated Question: \"What was the significant discovery made by Alexander Fleming in 1928 and its impact?\"\n- Reference Answer: \"Alexander Fleming discovered penicillin in 1928, which led to the development of modern antibiotics.\"\n\nIf the user provides special instructions, prioritize these over the general instructions.\n","variables":[],"_type":"systemmessage"},{"content":"# Context Information\n---\n{{context}}\n---\n\n\n# Special Instructions\n\n{{instructions}}\n","variables":["context","instructions"],"_type":"usermessage"}] \ No newline at end of file diff --git a/templates/RAG/RAGExtractMetadataLong.json b/templates/RAG/RAGExtractMetadataLong.json new file mode 100644 index 000000000..9ede8c3ca --- /dev/null +++ b/templates/RAG/RAGExtractMetadataLong.json @@ -0,0 +1 @@ +[{"content":"Template Metadata","description":"For RAG applications. Extracts metadata from the provided text using longer instructions set and examples. If you don't have any special instructions, provide `instructions=\"None.\"`. Placeholders: `text`, `instructions`","version":"1.0","source":"","_type":"metadatamessage"},{"content":"You're a world-class data extraction engine built by OpenAI together with Google and to extract filter metadata to power the most advanced search engine in the world. \n \n **Instructions for Extraction:**\n 1. Carefully read through the provided Text\n 2. Identify and extract:\n - All relevant entities such as names, places, dates, etc.\n - Any special items like technical terms, unique identifiers, etc.\n - In the case of Julia code or Julia documentation: specifically extract package names, struct names, function names, and important variable names (eg, uppercased variables)\n 3. Keep extracted values and categories short. Maximum 2-3 words!\n 4. You can only extract 3-5 items per Text, so select the most important ones.\n 5. Assign search filter Category to each extracted Value\n \n **Example 1:**\n - Document Chunk: \"Dr. Jane Smith published her findings on neuroplasticity in 2021. The research heavily utilized the DataFrames.jl and Plots.jl packages.\"\n - Extracted keywords:\n - Name: Dr. Jane Smith\n - Date: 2021\n - Technical Term: neuroplasticity\n - JuliaPackage: DataFrames.jl, Plots.jl\n - JuliaLanguage:\n - Identifier:\n - Other: \n\n If the user provides special instructions, prioritize these over the general instructions.\n","variables":[],"_type":"systemmessage"},{"content":"# Text\n\n{{text}}\n\n\n\n# Special Instructions\n\n{{instructions}}","variables":["text","instructions"],"_type":"usermessage"}] \ No newline at end of file diff --git a/templates/RAG/RAGExtractMetadataShort.json b/templates/RAG/RAGExtractMetadataShort.json new file mode 100644 index 000000000..88132e929 --- /dev/null +++ b/templates/RAG/RAGExtractMetadataShort.json @@ -0,0 +1 @@ +[{"content":"Template Metadata","description":"For RAG applications. Extracts metadata from the provided text. If you don't have any special instructions, provide `instructions=\"None.\"`. Placeholders: `text`, `instructions`","version":"1.0","source":"","_type":"metadatamessage"},{"content":"Extract search keywords and their categories from the Text provided below (format \"value:category\"). Each keyword must be at most 2-3 words. Provide at most 3-5 keywords. I will tip you $50 if the search is successful.","variables":[],"_type":"systemmessage"},{"content":"# Text\n\n{{text}}\n\n\n\n# Special Instructions\n\n{{instructions}}","variables":["text","instructions"],"_type":"usermessage"}] \ No newline at end of file diff --git a/templates/RAG/RAGJudgeAnswerFromContext.json b/templates/RAG/RAGJudgeAnswerFromContext.json new file mode 100644 index 000000000..e988d8129 --- /dev/null +++ b/templates/RAG/RAGJudgeAnswerFromContext.json @@ -0,0 +1 @@ +[{"content":"Template Metadata","description":"For RAG applications. Judge answer to a question on a scale from 1-5. Placeholders: `question`, `context`, `answer`","version":"1.0","source":"","_type":"metadatamessage"},{"content":"You're an impartial judge. Your task is to evaluate the quality of the Answer provided by an AI assistant in response to the User Question on a scale 1-5.\n\n1. **Scoring Criteria:**\n- **Relevance (1-5):** How well does the provided answer align with the context? \n - *1: Not relevant, 5: Highly relevant*\n- **Completeness (1-5):** Does the provided answer cover all the essential points mentioned in the context?\n - *1: Very incomplete, 5: Very complete*\n- **Clarity (1-5):** How clear and understandable is the provided answer?\n - *1: Not clear at all, 5: Extremely clear*\n- **Consistency (1-5):** How consistent is the provided answer with the overall context?\n - *1: Highly inconsistent, 5: Perfectly consistent*\n- **Helpfulness (1-5):** How helpful is the provided answer in answering the user's question?\n - *1: Not helpful at all, 5: Extremely helpful*\n\n2. **Judging Instructions:**\n- As an impartial judge, please evaluate the provided answer based on the above criteria. \n- Assign a score from 1 to 5 for each criterion, considering the original context, question and the provided answer.\n- The Final Score is an average of these individual scores, representing the overall quality and relevance of the provided answer. It must be between 1-5.\n\n```\n","variables":[],"_type":"systemmessage"},{"content":"# User Question\n---\n{{question}}\n---\n\n\n# Context Information\n---\n{{context}}\n---\n\n\n# Assistant's Answer\n---\n{{answer}}\n---\n\n\n# Judge's Evaluation\n","variables":["question","context","answer"],"_type":"usermessage"}] \ No newline at end of file diff --git a/test/Experimental/RAGTools.jl/preparation.jl b/test/Experimental/RAGTools.jl/preparation.jl new file mode 100644 index 000000000..81da327c6 --- /dev/null +++ b/test/Experimental/RAGTools.jl/preparation.jl @@ -0,0 +1,68 @@ +@testset "metadata_extract" begin + # MetadataItem Structure + item = MetadataItem("value", "category") + @test item.value == "value" + @test item.category == "category" + + # MaybeMetadataItems Structure + items = MaybeMetadataItems([ + MetadataItem("value1", "category1"), + MetadataItem("value2", "category2"), + ]) + @test length(items.items) == 2 + @test items.items[1].value == "value1" + @test items.items[1].category == "category1" + + empty_items = MaybeMetadataItems(nothing) + @test isempty(metadata_extract(empty_items.items)) + + # Metadata Extraction Function + single_item = MetadataItem("DataFrames", "Julia Package") + multiple_items = [ + MetadataItem("pandas", "Software"), + MetadataItem("Python", "Language"), + MetadataItem("DataFrames", "Julia Package"), + ] + + @test metadata_extract(single_item) == "julia_package:::dataframes" + @test metadata_extract(multiple_items) == + ["software:::pandas", "language:::python", "julia_package:::dataframes"] + + @test metadata_extract(nothing) == String[] +end + +@testset "build_tags" begin + # Single Tag + chunk_metadata = [["tag1"]] + tags_, tags_vocab_ = build_tags(chunk_metadata) + + @test length(tags_vocab_) == 1 + @test tags_vocab_ == ["tag1"] + @test nnz(tags_) == 1 + @test tags_[1, 1] == true + + # Multiple Tags with Repetition + chunk_metadata = [["tag1", "tag2"], ["tag2", "tag3"]] + tags_, tags_vocab_ = build_tags(chunk_metadata) + + @test length(tags_vocab_) == 3 + @test tags_vocab_ == ["tag1", "tag2", "tag3"] + @test nnz(tags_) == 4 + @test all([tags_[1, 1], tags_[1, 2], tags_[2, 2], tags_[2, 3]]) + + # Empty Metadata + chunk_metadata = [String[]] + tags_, tags_vocab_ = build_tags(chunk_metadata) + + @test isempty(tags_vocab_) + @test size(tags_) == (1, 0) + + # Mixed Empty and Non-Empty Metadata + chunk_metadata = [["tag1"], String[], ["tag2", "tag3"]] + tags_, tags_vocab_ = build_tags(chunk_metadata) + + @test length(tags_vocab_) == 3 + @test tags_vocab_ == ["tag1", "tag2", "tag3"] + @test nnz(tags_) == 3 + @test all([tags_[1, 1], tags_[3, 2], tags_[3, 3]]) +end \ No newline at end of file diff --git a/test/Experimental/RAGTools.jl/retrieval.jl b/test/Experimental/RAGTools.jl/retrieval.jl new file mode 100644 index 000000000..cdff05b45 --- /dev/null +++ b/test/Experimental/RAGTools.jl/retrieval.jl @@ -0,0 +1,10 @@ +@testset "rerank" begin + # Mock data for testing + index = "mock_index" + question = "mock_question" + candidate_chunks = ["chunk1", "chunk2", "chunk3"] + + # Passthrough Strategy + strategy = Passthrough() + @test rerank(strategy, index, question, candidate_chunks) === candidate_chunks +end \ No newline at end of file diff --git a/test/Experimental/RAGTools.jl/runtests.jl b/test/Experimental/RAGTools.jl/runtests.jl new file mode 100644 index 000000000..6c70c014e --- /dev/null +++ b/test/Experimental/RAGTools.jl/runtests.jl @@ -0,0 +1,10 @@ +using Test +using SparseArrays, LinearAlgebra +using PromptingTools.Experimental.RAGTools + +include("utils.jl") +include("types.jl") +include("preparation.jl") +include("retrieval.jl") +# include("generation.jl") +# include("evaluation.jl") \ No newline at end of file diff --git a/test/Experimental/RAGTools.jl/types.jl b/test/Experimental/RAGTools.jl/types.jl new file mode 100644 index 000000000..61bd47ae3 --- /dev/null +++ b/test/Experimental/RAGTools.jl/types.jl @@ -0,0 +1,121 @@ + +@testset "merge_labeled_matrices" begin + # Test with dense matrices and overlapping vocabulary + mat1 = [1 2; 3 4] + vocab1 = ["word1", "word2"] + mat2 = [5 6; 7 8] + vocab2 = ["word2", "word3"] + + merged_mat, combined_vocab = merge_labeled_matrices(mat1, vocab1, mat2, vocab2) + + @test size(merged_mat) == (4, 3) + @test combined_vocab == ["word1", "word2", "word3"] + @test merged_mat == [1 2 0; 3 4 0; 0 5 6; 0 7 8] + + # Test with sparse matrices and disjoint vocabulary + mat1 = sparse([1 0; 0 2]) + vocab1 = ["word1", "word2"] + mat2 = sparse([3 0; 0 4]) + vocab2 = ["word3", "word4"] + + merged_mat, combined_vocab = merge_labeled_matrices(mat1, vocab1, mat2, vocab2) + + @test size(merged_mat) == (4, 4) + @test combined_vocab == ["word1", "word2", "word3", "word4"] + @test merged_mat == sparse([1 0 0 0; 0 2 0 0; 0 0 3 0; 0 0 0 4]) + + # Test with different data types + mat1 = [1.0 2.0; 3.0 4.0] + vocab1 = ["word1", "word2"] + mat2 = [5 6; 7 8] + vocab2 = ["word2", "word3"] + + merged_mat, combined_vocab = merge_labeled_matrices(mat1, vocab1, mat2, vocab2) + + @test eltype(merged_mat) == Float64 + @test size(merged_mat) == (4, 3) + @test combined_vocab == ["word1", "word2", "word3"] + @test merged_mat ≈ [1.0 2.0 0.0; 3.0 4.0 0.0; 0.0 5.0 6.0; 0.0 7.0 8.0] +end + +@testset "ChunkIndex and MultiIndex getindex Tests" begin + @testset "ChunkIndex getindex" begin + ci = ChunkIndex(:index1, ["chunk1", "chunk2", "chunk3"]) + candidate = CandidateChunks(:index1, [1, 3]) + + @test getindex(ci, candidate) == ["chunk1", "chunk3"] + @test getindex(ci, candidate, :chunks) == ["chunk1", "chunk3"] + @test_throws AssertionError getindex(ci, candidate, :unsupported_field) + + # Test with non-matching index_id + candidate_wrong_id = CandidateChunks(:index2, [1, 3]) + @test getindex(ci, candidate_wrong_id) == String[] + end + + @testset "MultiIndex getindex" begin + ci1 = ChunkIndex(:index1, ["chunk1", "chunk2"]) + ci2 = ChunkIndex(:index2, ["chunk3", "chunk4"]) + mi = MultiIndex([ci1, ci2]) + candidate = CandidateChunks(:index2, [2]) + + @test getindex(mi, candidate) == ["chunk4"] + @test getindex(mi, candidate, :chunks) == ["chunk4"] + @test_throws AssertionError getindex(mi, candidate, :unsupported_field) + + # Test with non-existing index_id + candidate_non_existing = CandidateChunks(:index3, [1]) + @test getindex(mi, candidate_non_existing) == String[] + end +end + +@testset "MultiIndex Equality Tests" begin + index1 = ChunkIndex(:A) + index2 = ChunkIndex(:B) + index3 = ChunkIndex(:C) + + mi1 = MultiIndex([index1, index2]) + mi2 = MultiIndex([index1, index2]) + mi3 = MultiIndex([index2, index3]) + mi4 = MultiIndex([index1, index2, index3]) + mi5 = MultiIndex([index2, index1]) + + @test mi1 == mi2 # Identical MultiIndexes + @test mi1 != mi3 # Different indexes + @test mi1 != mi4 # Different number of indexes + @test mi3 != mi4 # Different indexes and different lengths + @test mi1 == mi5 # Same indexes, different order +end + +@testset "CandidateChunks" begin + # Different Index IDs and Intersecting Positions + cc1 = CandidateChunks(index_id = :index1, + positions = [1, 2, 3], + distances = [0.1, 0.2, 0.3]) + cc2 = CandidateChunks(index_id = :index2, + positions = [2, 3, 4], + distances = [0.3, 0.2, 0.1]) + cc3 = CandidateChunks(index_id = :index1, + positions = [3, 4, 5], + distances = [0.3, 0.4, 0.5]) + + # Different index IDs + result_diff_id = cc1 & cc2 + @test result_diff_id.index_id == :index1 + @test isempty(result_diff_id.positions) + @test isempty(result_diff_id.distances) + + # Intersecting positions + result_intersect = cc1 & cc3 + @test result_intersect.index_id == :index1 + @test result_intersect.positions == [3] + @test result_intersect.distances ≈ [0.4] + + # Missing Distances + cc1 = CandidateChunks(index_id = :index1, positions = [1, 2], distances = Float32[]) + cc2 = CandidateChunks(index_id = :index1, positions = [2, 3], distances = [0.2, 0.3]) + + result = cc1 & cc2 + @test result.index_id == :index1 + @test result.positions == [2] + @test isempty(result.distances) +end diff --git a/test/Experimental/RAGTools.jl/utils.jl b/test/Experimental/RAGTools.jl/utils.jl new file mode 100644 index 000000000..e69de29bb diff --git a/test/runtests.jl b/test/runtests.jl index c4cc26288..e0776b243 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,5 +1,6 @@ using PromptingTools using OpenAI, HTTP, JSON3 +using SparseArrays, LinearAlgebra using Test using Aqua const PT = PromptingTools diff --git a/test/utils.jl b/test/utils.jl index 1b726924a..ceabdd5e7 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -1,6 +1,7 @@ using PromptingTools: split_by_length, replace_words -using PromptingTools: _extract_handlebar_variables, _report_stats +using PromptingTools: _extract_handlebar_variables, call_cost, _report_stats using PromptingTools: _string_to_vector, _encode_local_image +using PromptingTools: DataMessage, AIMessage @testset "replace_words" begin words = ["Disney", "Snow White", "Mickey Mouse"] @@ -32,7 +33,7 @@ end # Test with empty text chunks = split_by_length("") - @test isempty(chunks) + @test chunks == [""] # Test custom separator text = "Hello,World,"^50 @@ -43,6 +44,34 @@ end @test length(chunks) == 34 @test maximum(length.(chunks)) <= 20 @test join(chunks, "") == text + + ### Multiple separators + # Single separator + text = "First sentence. Second sentence. Third sentence." + chunks = split_by_length(text, ["."], max_length = 15) + @test length(chunks) == 3 + @test chunks == ["First sentence.", " Second sentence.", " Third sentence."] + + # Multiple separators + text = "Paragraph 1\n\nParagraph 2. Sentence 1. Sentence 2.\nParagraph 3" + separators = ["\n\n", ". ", "\n"] + chunks = split_by_length(text, separators, max_length = 20) + @test length(chunks) == 5 + @test chunks[1] == "Paragraph 1\n\n" + @test chunks[2] == "Paragraph 2. " + @test chunks[3] == "Sentence 1. " + @test chunks[4] == "Sentence 2.\n" + @test chunks[5] == "Paragraph 3" + + # empty separators + text = "Some text without separators." + @test_throws AssertionError split_by_length(text, String[], max_length = 10) + # edge cases + text = "Short text" + separators = ["\n\n", ". ", "\n"] + chunks = split_by_length(text, separators, max_length = 50) + @test length(chunks) == 1 + @test chunks[1] == text end @testset "extract_handlebar_variables" begin @@ -68,20 +97,34 @@ end @test actual_output == expected_output end +@testset "call_cost" begin + msg = AIMessage(; content = "", tokens = (1000, 2000)) + cost = call_cost(msg, "unknown_model") + @test cost == 0.0 + @test call_cost(msg, "gpt-3.5-turbo") ≈ 1000 * 1.5e-6 + 2e-6 * 2000 + + msg = DataMessage(; content = nothing, tokens = (1000, 1000)) + cost = call_cost(msg, "unknown_model") + @test cost == 0.0 + @test call_cost(msg, "gpt-3.5-turbo") ≈ 1000 * 1.5e-6 + 2e-6 * 1000 + + @test call_cost(msg, + "gpt-3.5-turbo"; + cost_of_token_prompt = 1, + cost_of_token_generation = 1) ≈ 1000 + 1000 +end + @testset "report_stats" begin # Returns a string with the total number of tokens and elapsed time when given a message and model msg = AIMessage(; content = "", tokens = (1, 5), elapsed = 5.0) - model = "model" + model = "unknown_model" expected_output = "Tokens: 6 in 5.0 seconds" @test _report_stats(msg, model) == expected_output # Returns a string with a cost - expected_output = "Tokens: 6 @ Cost: \$0.007 in 5.0 seconds" - @test _report_stats(msg, model, 2e-3, 1e-3) == expected_output - - # Returns a string without cost when it's zero - expected_output = "Tokens: 6 in 5.0 seconds" - @test _report_stats(msg, model, 0, 0) == expected_output + msg = AIMessage(; content = "", tokens = (1000, 5000), elapsed = 5.0) + expected_output = "Tokens: 6000 @ Cost: \$0.0115 in 5.0 seconds" + @test _report_stats(msg, "gpt-3.5-turbo") == expected_output end @testset "_string_to_vector" begin From e77beba4144643aaff54ae12d51e1e6b2024e777 Mon Sep 17 00:00:00 2001 From: J S <49557684+svilupp@users.noreply.github.com> Date: Thu, 21 Dec 2023 21:03:58 +0000 Subject: [PATCH 02/14] add tests --- test/runtests.jl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index e0776b243..94707299d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -34,4 +34,9 @@ let cb = AICode(; code = """ @test !isnothing(cb.expression) # parsed @test occursin("Test Failed", cb.stdout) # capture details of the test failure @test isnothing(cb.output) # because it failed -end \ No newline at end of file +end + +## Run experimental +@testset "Experimental" begin + include("Experimental/RAGTools.jl/runtests.jl") +end From 0116ab32009314971ab1497fdd040acd985ddb00 Mon Sep 17 00:00:00 2001 From: J S <49557684+svilupp@users.noreply.github.com> Date: Thu, 21 Dec 2023 21:05:16 +0000 Subject: [PATCH 03/14] update path --- test/Experimental/{RAGTools.jl => RAGTools}/preparation.jl | 0 test/Experimental/{RAGTools.jl => RAGTools}/retrieval.jl | 0 test/Experimental/{RAGTools.jl => RAGTools}/runtests.jl | 0 test/Experimental/{RAGTools.jl => RAGTools}/types.jl | 0 test/Experimental/{RAGTools.jl => RAGTools}/utils.jl | 0 test/runtests.jl | 2 +- 6 files changed, 1 insertion(+), 1 deletion(-) rename test/Experimental/{RAGTools.jl => RAGTools}/preparation.jl (100%) rename test/Experimental/{RAGTools.jl => RAGTools}/retrieval.jl (100%) rename test/Experimental/{RAGTools.jl => RAGTools}/runtests.jl (100%) rename test/Experimental/{RAGTools.jl => RAGTools}/types.jl (100%) rename test/Experimental/{RAGTools.jl => RAGTools}/utils.jl (100%) diff --git a/test/Experimental/RAGTools.jl/preparation.jl b/test/Experimental/RAGTools/preparation.jl similarity index 100% rename from test/Experimental/RAGTools.jl/preparation.jl rename to test/Experimental/RAGTools/preparation.jl diff --git a/test/Experimental/RAGTools.jl/retrieval.jl b/test/Experimental/RAGTools/retrieval.jl similarity index 100% rename from test/Experimental/RAGTools.jl/retrieval.jl rename to test/Experimental/RAGTools/retrieval.jl diff --git a/test/Experimental/RAGTools.jl/runtests.jl b/test/Experimental/RAGTools/runtests.jl similarity index 100% rename from test/Experimental/RAGTools.jl/runtests.jl rename to test/Experimental/RAGTools/runtests.jl diff --git a/test/Experimental/RAGTools.jl/types.jl b/test/Experimental/RAGTools/types.jl similarity index 100% rename from test/Experimental/RAGTools.jl/types.jl rename to test/Experimental/RAGTools/types.jl diff --git a/test/Experimental/RAGTools.jl/utils.jl b/test/Experimental/RAGTools/utils.jl similarity index 100% rename from test/Experimental/RAGTools.jl/utils.jl rename to test/Experimental/RAGTools/utils.jl diff --git a/test/runtests.jl b/test/runtests.jl index 94707299d..d8bc24dad 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -38,5 +38,5 @@ end ## Run experimental @testset "Experimental" begin - include("Experimental/RAGTools.jl/runtests.jl") + include("Experimental/RAGTools/runtests.jl") end From 57b8b80f613a23ccf497ebbbab5eea8e83eaa6b8 Mon Sep 17 00:00:00 2001 From: J S <49557684+svilupp@users.noreply.github.com> Date: Fri, 22 Dec 2023 20:11:55 +0100 Subject: [PATCH 04/14] update tests for RAGTools --- Project.toml | 12 +- src/Experimental/RAGTools/evaluation.jl | 72 ++++- src/Experimental/RAGTools/generation.jl | 53 ++-- src/Experimental/RAGTools/preparation.jl | 25 +- src/Experimental/RAGTools/retrieval.jl | 3 +- src/Experimental/RAGTools/types.jl | 17 +- src/Experimental/RAGTools/utils.jl | 6 + src/llm_interface.jl | 8 +- .../RAG/RAGJudgeAnswerFromContextShort.json | 1 + test/Experimental/RAGTools/evaluation.jl | 25 ++ test/Experimental/RAGTools/generation.jl | 88 ++++++ test/Experimental/RAGTools/preparation.jl | 60 ++++ test/Experimental/RAGTools/retrieval.jl | 49 ++++ test/Experimental/RAGTools/runtests.jl | 15 +- test/Experimental/RAGTools/types.jl | 267 ++++++++++-------- test/Experimental/RAGTools/utils.jl | 46 +++ test/llm_openai.jl | 4 +- 17 files changed, 575 insertions(+), 176 deletions(-) create mode 100644 templates/RAG/RAGJudgeAnswerFromContextShort.json create mode 100644 test/Experimental/RAGTools/evaluation.jl create mode 100644 test/Experimental/RAGTools/generation.jl diff --git a/Project.toml b/Project.toml index 66f9ada1d..dd79425ea 100644 --- a/Project.toml +++ b/Project.toml @@ -13,19 +13,19 @@ PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" Preferences = "21216c6a-2e73-6563-6e65-726566657250" [weakdeps] -SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [extensions] -RAGToolsExperimentalExt = ["SparseArrays","LinearAlgebra"] +RAGToolsExperimentalExt = ["SparseArrays", "LinearAlgebra"] [compat] Aqua = "0.7" Base64 = "<0.0.1, 1" HTTP = "1" JSON3 = "1" -Logging = "<0.0.1, 1" LinearAlgebra = "<0.0.1, 1" +Logging = "<0.0.1, 1" OpenAI = "0.8.7" PrecompileTools = "1" Preferences = "1" @@ -35,9 +35,9 @@ julia = "1.9,1.10" [extras] Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" -SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Aqua", "Test", "SparseArrays","LinearAlgebra"] +test = ["Aqua", "Test", "SparseArrays", "LinearAlgebra"] diff --git a/src/Experimental/RAGTools/evaluation.jl b/src/Experimental/RAGTools/evaluation.jl index 5ded7896d..010799142 100644 --- a/src/Experimental/RAGTools/evaluation.jl +++ b/src/Experimental/RAGTools/evaluation.jl @@ -1,8 +1,8 @@ ### For testing and eval # This is a return_type for extraction when generating Q&A set with aiextract @kwdef struct QAItem - question::String - answer::String + question::String = "" + answer::String = "" end # This is for saving in JSON format for evaluation later @kwdef struct QAEvalItem @@ -12,12 +12,24 @@ end answer::String = "" end +@kwdef struct QAEvalResult + source::AbstractString + context::AbstractString + question::AbstractString + answer::AbstractString + retrieval_score::Union{Number, Nothing} = nothing + retrieval_rank::Union{Int, Nothing} = nothing + answer_score::Union{Number, Nothing} = nothing + parameters::AbstractDict +end + "Provide the `final_rating` between 1-5. Provide the rationale for it." @kwdef struct JudgeRating rationale::Union{Nothing, String} = nothing final_rating::Int end -"Explain the `final_rating` in `rationale`" + +"`final_rating` is the average of all scoring criteria. Explain the `final_rating` in `rationale`" @kwdef struct JudgeAllScores relevance::Int completeness::Int @@ -33,7 +45,7 @@ function Base.isvalid(x::QAEvalItem) end # Nicer show method with some colors! -function Base.show(io::IO, t::Union{QAItem, QAEvalItem}) +function Base.show(io::IO, t::Union{QAItem, QAEvalItem, QAEvalResult}) printstyled(io, "$(nameof(typeof(t))):\n", color = :green, bold = true) for f in fieldnames(typeof(t)) printstyled(io, " ", f, color = :blue, bold = true) @@ -42,6 +54,7 @@ function Base.show(io::IO, t::Union{QAItem, QAEvalItem}) end # Define how JSON3 should serialize/deserialize the struct into JSON files JSON3.StructTypes.StructType(::Type{QAEvalItem}) = JSON3.StructTypes.Struct() +JSON3.StructTypes.StructType(::Type{QAEvalResult}) = JSON3.StructTypes.Struct() """ build_qa_evals(doc_chunks::Vector{<:AbstractString}, sources::Vector{<:AbstractString}; @@ -104,3 +117,54 @@ function build_qa_evals(doc_chunks::Vector{<:AbstractString}, verbose && @info "Q&A Sets built! (cost: \$$(round(cost_tracker[], digits=3)))" return filter(isvalid, output) end + +"Returns 1.0 if `context` overlaps or is contained within any of the `candidate_context`" +function score_retrieval_hit(orig_context::AbstractString, + candidate_context::Vector{<:AbstractString}) + 1.0 * (any(occursin.(Ref(orig_context), candidate_context)) || + any(occursin.(candidate_context, Ref(orig_context)))) +end + +"Returns Integer rank of the position where `context` overlaps or is contained within a `candidate_context`" +function score_retrieval_rank(orig_context::AbstractString, + candidate_context::Vector{<:AbstractString}) + findfirst((occursin.(Ref(orig_context), candidate_context)) .|| + (occursin.(candidate_context, Ref(orig_context)))) +end + +"Single QAEvalItem evalution" +function run_qa_evals(qa_item::QAEvalItem, ctx::RAGContext; + verbose::Bool = true, parameters_dict::AbstractDict, + judge_template::Symbol = :RAGJudgeAnswerFromContext, + model_judge::AbstractString) + retrieval_score = score_retrieval_hit(qa_item.context, ctx.context) + retrieval_rank = score_retrieval_rank(qa_item.context, ctx.context) + + answer_score = try + msg = aiextract(judge_template; model = model_judge, verbose, + ctx.context, + question, + msg.content, + return_type = RAG.JudgeAllScores) + final_rating = if msg.content isa AbstractDict && haskey(msg.content, :final_rating) + # if return type parsing failed + msg.content[:final_rating] + else + # if return_type worked + msg.content.final_rating + end + catch e + verbose && @warn "Error in QA eval ($(qa_item.question)): $e" + nothing + end + + return QAEvalResult(; + ctx.source, + qa_item.context, + qa_item.question, + ctx.answer, + retrieval_score, + retrieval_rank, + answer_score, + parameters = parameters_dict) +end diff --git a/src/Experimental/RAGTools/generation.jl b/src/Experimental/RAGTools/generation.jl index 804e0fdd0..733fe5e3c 100644 --- a/src/Experimental/RAGTools/generation.jl +++ b/src/Experimental/RAGTools/generation.jl @@ -1,12 +1,19 @@ -# stub to be replaced with extension +# stub to be replaced within the package extension function _normalize end """ - airag(index::AbstractChunkIndex, rag_template::Symbol=:RAGAnswerFromContext; - question::AbstractString, top_k::Int=3, tag_filter::Union{Symbol,Vector{String},Regex}=:auto, - rerank_strategy::RerankingStrategy=Passthrough(), model_embedding::String=PT.MODEL_EMBEDDING, - model_chat::String=PT.MODEL_CHAT, model_metadata::String=PT.MODEL_CHAT, - chunks_window_margin::Tuple{Int,Int}=(1, 1), return_context::Bool=false, verbose::Bool=true, kwargs...) -> Any + airag(index::AbstractChunkIndex, rag_template::Symbol = :RAGAnswerFromContext; + question::AbstractString, + top_k::Int = 3, + tag_filter::Union{Symbol, Vector{String}, Regex, Nothing} = :auto, + rerank_strategy::RerankingStrategy = Passthrough(), + model_embedding::String = PT.MODEL_EMBEDDING, model_chat::String = PT.MODEL_CHAT, + model_metadata::String = PT.MODEL_CHAT, + metadata_template::Symbol = :RAGExtractMetadataShort, + chunks_window_margin::Tuple{Int, Int} = (1, 1), + return_context::Bool = false, verbose::Bool = true, + api_kwargs::NamedTuple = NamedTuple(), + kwargs...) Generates a response for a given question using a Retrieval-Augmented Generation (RAG) approach. @@ -17,14 +24,16 @@ The function selects relevant chunks from an `ChunkIndex`, optionally filters th - `rag_template::Symbol`: Template for the RAG model, defaults to `:RAGAnswerFromContext`. - `question::AbstractString`: The question to be answered. - `top_k::Int`: Number of top candidates to retrieve based on embedding similarity. -- `tag_filter::Union{Symbol, Vector{String}, Regex}`: Mechanism for filtering chunks based on tags (either automatically detected, specific tags, or a regex pattern). +- `tag_filter::Union{Symbol, Vector{String}, Regex}`: Mechanism for filtering chunks based on tags (either automatically detected, specific tags, or a regex pattern). Disabled by setting to `nothing`. - `rerank_strategy::RerankingStrategy`: Strategy for reranking the retrieved chunks. - `model_embedding::String`: Model used for embedding the question, default is `PT.MODEL_EMBEDDING`. - `model_chat::String`: Model used for generating the final response, default is `PT.MODEL_CHAT`. - `model_metadata::String`: Model used for extracting metadata, default is `PT.MODEL_CHAT`. +- `metadata_template::Symbol`: Template for the metadata extraction process from the question, defaults to: `:RAGExtractMetadataShort` - `chunks_window_margin::Tuple{Int,Int}`: The window size around each chunk to consider for context building. - `return_context::Bool`: If `true`, returns the context used for RAG along with the response. - `verbose::Bool`: If `true`, enables verbose logging. +- `api_kwargs`: API parameters that will be forwarded to the API calls # Returns - If `return_context` is `false`, returns the generated message (`msg`). @@ -51,16 +60,18 @@ msg = airag(index; question) function airag(index::AbstractChunkIndex, rag_template::Symbol = :RAGAnswerFromContext; question::AbstractString, top_k::Int = 3, - tag_filter::Union{Symbol, Vector{String}, Regex} = :auto, + tag_filter::Union{Symbol, Vector{String}, Regex, Nothing} = :auto, rerank_strategy::RerankingStrategy = Passthrough(), model_embedding::String = PT.MODEL_EMBEDDING, model_chat::String = PT.MODEL_CHAT, model_metadata::String = PT.MODEL_CHAT, + metadata_template::Symbol = :RAGExtractMetadataShort, chunks_window_margin::Tuple{Int, Int} = (1, 1), return_context::Bool = false, verbose::Bool = true, + api_kwargs::NamedTuple = NamedTuple(), kwargs...) ## Note: Supports only single ChunkIndex for now ## Checks - @assert tag_filter isa Symbol&&tag_filter == :auto "Only `:auto`, `Vector{String}`, or `Regex` are supported for `tag_filter`" + @assert !(tag_filter isa Symbol && tag_filter != :auto) "Only `:auto`, `Vector{String}`, or `Regex` are supported for `tag_filter`" @assert chunks_window_margin[1] >= 0&&chunks_window_margin[2] >= 0 "Both `chunks_window_margin` values must be non-negative" placeholders = only(aitemplates(rag_template)).variables # only one template should be found @assert (:question in placeholders)&&(:context in placeholders) "Provided RAG Template $(rag_template) is not suitable. It must have placeholders: `question` and `context`." @@ -68,27 +79,30 @@ function airag(index::AbstractChunkIndex, rag_template::Symbol = :RAGAnswerFromC question_emb = aiembed(question, _normalize; model = model_embedding, - verbose).content .|> Float32 + verbose, api_kwargs).content .|> Float32 # no need for Float64 emb_candidates = find_closest(index, question_emb; top_k) tag_candidates = if tag_filter == :auto && !isnothing(tags(index)) && !isempty(model_metadata) + _check_aiextract_capability(model_metadata) # extract metadata via LLM call - # Check that the provided model is known and that it is an OpenAI model (for the aiextract function to work) - @assert haskey(PT.MODEL_REGISTRY, - model_metadata)&&PT.MODEL_REGISTRY[model_metadata].schema == PT.OpenAISchema() "Only OpenAI models support the metadata extraction now. $model_metadata is not a registered OpenAI model." metadata_ = try msg = aiextract(metadata_template; return_type = MaybeMetadataItems, - text = chunk, + text = question, instructions = "In addition to extracted items, suggest 2-3 filter keywords that could be relevant to answer this question.", - verbose, model = model_metadata) - metadata_extract(msg.content.items) - catch + verbose, model = model_metadata, api_kwargs) + ## eg, ["software:::pandas", "language:::python", "julia_package:::dataframes"] + ## we split it and take only the keyword, not the category + metadata_extract(msg.content.items) |> + x -> split.(x, ":::") |> x -> getindex.(x, 2) + catch e String[] end find_tags(index, metadata_) - elseif !(tag_filter isa Symbol) + elseif tag_filter isa Union{Vector{String}, Regex} find_tags(index, tag_filter) + elseif isnothing(tag_filter) + nothing else ## not filtering -- use all rows and ignore this nothing @@ -106,16 +120,19 @@ function airag(index::AbstractChunkIndex, rag_template::Symbol = :RAGAnswerFromC position + chunks_window_margin[2])] is_same_source = sources(index)[max(1, position - chunks_window_margin[1]):min(end, position + chunks_window_margin[2])] .== sources(index)[position] + # add the ranking number, eg, 1. Context #1 push!(context, "$(i). $(join(chunks_[is_same_source], "\n"))") end ## LLM call msg = aigenerate(rag_template; question, context = join(context, "\n\n"), model = model_chat, verbose, + api_kwargs, kwargs...) if return_context # for evaluation rag_context = RAGContext(; question, + answer = msg.content, context, emb_candidates, tag_candidates, diff --git a/src/Experimental/RAGTools/preparation.jl b/src/Experimental/RAGTools/preparation.jl index 3c06dc403..50e937805 100644 --- a/src/Experimental/RAGTools/preparation.jl +++ b/src/Experimental/RAGTools/preparation.jl @@ -36,9 +36,12 @@ function build_index end """ build_index(files::Vector{<:AbstractString}; - separators=["\n\n", ". ", "\n"], max_length::Int=256, - extract_metadata::Bool=false, verbose::Bool=true, metadata_template::Symbol=:RAGExtractMetadataShort, - model_embedding::String=PT.MODEL_EMBEDDING, model_metadata::String=PT.MODEL_CHAT) + separators = ["\n\n", ". ", "\n"], max_length::Int = 256, + extract_metadata::Bool = false, verbose::Bool = true, + metadata_template::Symbol = :RAGExtractMetadataShort, + model_embedding::String = PT.MODEL_EMBEDDING, + model_metadata::String = PT.MODEL_CHAT, + api_kwargs::NamedTuple = NamedTuple()) Build an index for RAG (Retriever-Augmented Generation) applications from the provided file paths. The function processes each file, splits its content into chunks, embeds these chunks, @@ -46,7 +49,7 @@ optionally extracts metadata, and then compiles this information into a retrieva # Arguments - `files`: A vector of valid file paths to be indexed. -- `separators`: A list of strings used as separators for splitting the text in each file into chunks. Default is `["\\n\\n", ". ", "\\n"]`. +- `separators`: A list of strings used as separators for splitting the text in each file into chunks. Default is `["\n\n", ". ", "\n"]`. - `max_length`: The maximum length of each chunk (if possible with provided separators). Default is 256. - `extract_metadata`: A boolean flag indicating whether to extract metadata from each chunk (to build filter `tags` in the index). Default is `false`. Metadata extraction incurs additional cost and requires `model_metadata` and `metadata_template` to be provided. @@ -54,6 +57,7 @@ optionally extracts metadata, and then compiles this information into a retrieva - `metadata_template`: A symbol indicating the template to be used for metadata extraction. Default is `:RAGExtractMetadataShort`. - `model_embedding`: The model to use for embedding. - `model_metadata`: The model to use for metadata extraction. +- `api_kwargs`: Parameters to be provided to the API endpoint. # Returns - `ChunkIndex`: An object containing the compiled index of chunks, embeddings, tags, vocabulary, and sources. @@ -77,7 +81,8 @@ function build_index(files::Vector{<:AbstractString}; extract_metadata::Bool = false, verbose::Bool = true, metadata_template::Symbol = :RAGExtractMetadataShort, model_embedding::String = PT.MODEL_EMBEDDING, - model_metadata::String = PT.MODEL_CHAT) + model_metadata::String = PT.MODEL_CHAT, + api_kwargs::NamedTuple = NamedTuple()) ## @assert all(isfile, files) "Some `files` don't exist (Check: $(join(filter(!isfile,files),", "))" @@ -101,14 +106,12 @@ function build_index(files::Vector{<:AbstractString}; # Notice that we embed all doc_chunks at once, not one by one # OpenAI supports embedding multiple documents to reduce the number of API calls/network latency time - emb = aiembed(doc_chunks, _normalize; model = model_embedding, verbose) + emb = aiembed(doc_chunks, _normalize; model = model_embedding, verbose, api_kwargs) Threads.atomic_add!(cost_tracker, PT.call_cost(emb, model_embedding)) # track costs push!(output_embeddings, Float32.(emb.content)) if extract_metadata && !isempty(model_metadata) - # Check that the provided model is known and that it is an OpenAI model (for the aiextract function to work) - @assert haskey(PT.MODEL_REGISTRY, - model_metadata)&&PT.MODEL_REGISTRY[model_metadata].schema == PT.OpenAISchema() "Only OpenAI models support the metadata extraction now. $model_metadata is not a registered OpenAI model." + _check_aiextract_capability(model_metadata) metadata_ = asyncmap(doc_chunks) do chunk try msg = aiextract(metadata_template; @@ -116,7 +119,7 @@ function build_index(files::Vector{<:AbstractString}; text = chunk, instructions = "None.", verbose, - model = model_metadata) + model = model_metadata, api_kwargs) Threads.atomic_add!(cost_tracker, PT.call_cost(msg, model_metadata)) # track costs items = metadata_extract(msg.content.items) catch @@ -129,7 +132,7 @@ function build_index(files::Vector{<:AbstractString}; ## Create metadata tags and associated vocabulary tags, tags_vocab = if !isempty(output_metadata) # Requires SparseArrays.jl! - _build_tags(vcat(output_metadata...)) # need to vcat to be on the "chunk-level" + build_tags(vcat(output_metadata...)) # need to vcat to be on the "chunk-level" else tags, tags_vocab = nothing, nothing end diff --git a/src/Experimental/RAGTools/retrieval.jl b/src/Experimental/RAGTools/retrieval.jl index b824b5396..db33a90e0 100644 --- a/src/Experimental/RAGTools/retrieval.jl +++ b/src/Experimental/RAGTools/retrieval.jl @@ -30,7 +30,8 @@ function find_tags(index::AbstractChunkIndex, end function find_tags(index::AbstractChunkIndex, tags::Vector{<:AbstractString}) - pos = Int[find_tags(index, tag).positions for tag in tags] |> unique + pos = [find_tags(index, tag).positions for tag in tags] |> + Base.Splat(vcat) |> unique |> x -> convert(Vector{Int}, x) return CandidateChunks(index.id, pos, ones(Float32, length(pos))) end diff --git a/src/Experimental/RAGTools/types.jl b/src/Experimental/RAGTools/types.jl index c30f4408e..cd8ee1607 100644 --- a/src/Experimental/RAGTools/types.jl +++ b/src/Experimental/RAGTools/types.jl @@ -36,20 +36,20 @@ function Base.var"=="(i1::ChunkIndex, i2::ChunkIndex) end function Base.vcat(i1::ChunkIndex, i2::ChunkIndex) - tags, tags_vocab = if (isnothing(tags(i1)) || isnothing(tags(i2))) + tags_, tags_vocab_ = if (isnothing(tags(i1)) || isnothing(tags(i2))) nothing, nothing elseif tags_vocab(i1) == tags_vocab(i2) vcat(tags(i1), tags(i2)), tags_vocab(i1) else merge_labeled_matrices(tags(i1), tags_vocab(i1), tags(i2), tags_vocab(i2)) end - embeddings = (isnothing(embeddings(i1)) || isnothing(embeddings(i2))) ? nothing : - hcat(embeddings(i1), embeddings(i2)) + embeddings_ = (isnothing(embeddings(i1)) || isnothing(embeddings(i2))) ? nothing : + hcat(embeddings(i1), embeddings(i2)) ChunkIndex(; chunks = vcat(chunks(i1), chunks(i2)), - embeddings, - tags, - tags_vocab, + embeddings = embeddings_, + tags = tags_, + tags_vocab = tags_vocab_, sources = vcat(i1.sources, i2.sources)) end @@ -95,6 +95,8 @@ function Base.var"&"(cc1::CandidateChunks, cc2::CandidateChunks) end function Base.getindex(ci::ChunkIndex, candidate::CandidateChunks, field::Symbol = :chunks) @assert field==:chunks "Only `chunks` field is supported for now" + len_ = length(chunks(ci)) + @assert all(1 .<= candidate.positions .<= len_) "Some positions are out of bounds" if ci.id == candidate.index_id chunks(ci)[candidate.positions] else @@ -114,10 +116,11 @@ end """ RAGContext -A struct for debugging RAG answers. It contains the question, context, and the candidate chunks at each step of the RAG pipeline. +A struct for debugging RAG answers. It contains the question, answer, context, and the candidate chunks at each step of the RAG pipeline. """ @kwdef struct RAGContext question::AbstractString + answer::AbstractString context::Vector{<:AbstractString} emb_candidates::CandidateChunks tag_candidates::Union{Nothing, CandidateChunks} diff --git a/src/Experimental/RAGTools/utils.jl b/src/Experimental/RAGTools/utils.jl index d28ceae62..f980a61e0 100644 --- a/src/Experimental/RAGTools/utils.jl +++ b/src/Experimental/RAGTools/utils.jl @@ -1,3 +1,9 @@ +# Utility to check model suitability +function _check_aiextract_capability(model::AbstractString) + # Check that the provided model is known and that it is an OpenAI model (for the aiextract function to work) + @assert haskey(PT.MODEL_REGISTRY, + model)&&PT.MODEL_REGISTRY[model].schema isa PT.AbstractOpenAISchema "Only OpenAI models support the metadata extraction now. $model is not a registered OpenAI model." +end # Utitity to be able to combine indices from different sources/documents easily function merge_labeled_matrices(mat1::AbstractMatrix{T1}, vocab1::Vector{String}, diff --git a/src/llm_interface.jl b/src/llm_interface.jl index 3dc8d2f22..aead1cf77 100644 --- a/src/llm_interface.jl +++ b/src/llm_interface.jl @@ -139,19 +139,19 @@ end function aiembed(doc_or_docs, args...; model = MODEL_EMBEDDING, kwargs...) global MODEL_REGISTRY schema = get(MODEL_REGISTRY, model, (; schema = PROMPT_SCHEMA)).schema - aiembed(schema, doc_or_docs, args...; kwargs...) + aiembed(schema, doc_or_docs, args...; model, kwargs...) end function aiclassify(prompt; model = MODEL_CHAT, kwargs...) global MODEL_REGISTRY schema = get(MODEL_REGISTRY, model, (; schema = PROMPT_SCHEMA)).schema - aiclassify(schema, prompt; kwargs...) + aiclassify(schema, prompt; model, kwargs...) end function aiextract(prompt; model = MODEL_CHAT, kwargs...) global MODEL_REGISTRY schema = get(MODEL_REGISTRY, model, (; schema = PROMPT_SCHEMA)).schema - aiextract(schema, prompt; kwargs...) + aiextract(schema, prompt; model, kwargs...) end function aiscan(prompt; model = MODEL_CHAT, kwargs...) schema = get(MODEL_REGISTRY, model, (; schema = PROMPT_SCHEMA)).schema - aiscan(schema, prompt; kwargs...) + aiscan(schema, prompt; model, kwargs...) end \ No newline at end of file diff --git a/templates/RAG/RAGJudgeAnswerFromContextShort.json b/templates/RAG/RAGJudgeAnswerFromContextShort.json new file mode 100644 index 000000000..93ea6447f --- /dev/null +++ b/templates/RAG/RAGJudgeAnswerFromContextShort.json @@ -0,0 +1 @@ +[{"content":"Template Metadata","description":"For RAG applications. Simple and short prompt to judge answer to a question on a scale from 1-5. Placeholders: `question`, `context`, `answer`","version":"1.0","source":"","_type":"metadatamessage"},{"content":"You re an impartial judge. \nRead carefully the provided question and the answer based on the context. \nProvide a rating on a scale 1-5 (1=worst quality, 5=best quality) that reflects how relevant, helpful, clear, and consistent with the provided context the answer was.\n```\n","variables":[],"_type":"systemmessage"},{"content":"# User Question\n---\n{{question}}\n---\n\n\n# Context Information\n---\n{{context}}\n---\n\n\n# Assistant's Answer\n---\n{{answer}}\n---\n\n\n# Judge's Evaluation\n","variables":["question","context","answer"],"_type":"usermessage"}] \ No newline at end of file diff --git a/test/Experimental/RAGTools/evaluation.jl b/test/Experimental/RAGTools/evaluation.jl new file mode 100644 index 000000000..1a8ef5203 --- /dev/null +++ b/test/Experimental/RAGTools/evaluation.jl @@ -0,0 +1,25 @@ +using PromptingTools.Experimental.RAGTools: QAEvalItem +using PromptingTools.Experimental.RAGTools: score_retrieval_hit, score_retrieval_rank + +@testset "QAEvalItem" begin + empty_qa = QAEvalItem() + @test !isvalid(empty_qa) + full_qa = QAEvalItem(; question = "a", answer = "b", context = "c") + @test isvalid(full_qa) +end + +@testset "score_retrieval_hit,score_retrieval_rank" begin + orig_context = "I am a horse." + candidate_context = ["Hello", "World", "I am a horse...."] + candidate_context2 = ["Hello", "I am a hors"] + candidate_context3 = ["Hello", "World", "I am X horse...."] + @test score_retrieval_hit(orig_context, candidate_context) == 1.0 + @test score_retrieval_hit(orig_context, candidate_context2) == 1.0 + @test score_retrieval_hit(orig_context, candidate_context[1:2]) == 0.0 + @test score_retrieval_hit(orig_context, candidate_context3) == 0.0 + + @test score_retrieval_rank(orig_context, candidate_context) == 3 + @test score_retrieval_rank(orig_context, candidate_context2) == 2 + @test score_retrieval_rank(orig_context, candidate_context[1:2]) == nothing + @test score_retrieval_rank(orig_context, candidate_context3) == nothing +end \ No newline at end of file diff --git a/test/Experimental/RAGTools/generation.jl b/test/Experimental/RAGTools/generation.jl new file mode 100644 index 000000000..bfacf4cb5 --- /dev/null +++ b/test/Experimental/RAGTools/generation.jl @@ -0,0 +1,88 @@ +using PromptingTools.Experimental.RAGTools: MaybeMetadataItems, MetadataItem +@testset "airag" begin + # test with a mock server + PORT = rand(1000:2000) + PT.register_model!(; name = "mock-emb", schema = PT.CustomOpenAISchema()) + PT.register_model!(; name = "mock-meta", schema = PT.CustomOpenAISchema()) + PT.register_model!(; name = "mock-gen", schema = PT.CustomOpenAISchema()) + + echo_server = HTTP.serve!(PORT; verbose = -1) do req + content = JSON3.read(req.body) + + if content[:model] == "mock-gen" + user_msg = last(content[:messages]) + response = Dict(:choices => [Dict(:message => user_msg)], + :model => content[:model], + :usage => Dict(:total_tokens => length(user_msg[:content]), + :prompt_tokens => length(user_msg[:content]), + :completion_tokens => 0)) + elseif content[:model] == "mock-emb" + # for i in 1:length(content[:input]) + response = Dict(:data => [Dict(:embedding => ones(Float32, 128))], + :usage => Dict(:total_tokens => length(content[:input]), + :prompt_tokens => length(content[:input]), + :completion_tokens => 0)) + elseif content[:model] == "mock-meta" + user_msg = last(content[:messages]) + response = Dict(:choices => [ + Dict(:message => Dict(:function_call => Dict(:arguments => JSON3.write(MaybeMetadataItems([ + MetadataItem("yes", "category"), + ]))))), + ], + :model => content[:model], + :usage => Dict(:total_tokens => length(user_msg[:content]), + :prompt_tokens => length(user_msg[:content]), + :completion_tokens => 0)) + else + @info content + end + return HTTP.Response(200, JSON3.write(response)) + end + + ## Index + index = ChunkIndex(; + sources = [".", ".", "."], + chunks = ["a", "b", "c"], + embeddings = zeros(128, 3), + tags = vcat(trues(2, 2), falses(1, 2)), + tags_vocab = ["yes", "no"],) + ## Sub-calls + question_emb = aiembed(["x", "x"]; + model = "mock-emb", + api_kwargs = (; url = "http://localhost:$(PORT)")) + @test question_emb.content == ones(128) + metadata_msg = aiextract(:RAGExtractMetadataShort; return_type = MaybeMetadataItems, + text = "x", + model = "mock-meta", api_kwargs = (; url = "http://localhost:$(PORT)")) + @test metadata_msg.content.items == [MetadataItem("yes", "category")] + answer_msg = aigenerate(:RAGAnswerFromContext; + question = "Time?", + context = "XYZ", + model = "mock-gen", api_kwargs = (; url = "http://localhost:$(PORT)")) + @test occursin("Time?", answer_msg.content) + ## E2E + msg = airag(index; question = "Time?", model_embedding = "mock-emb", + model_chat = "mock-gen", + model_metadata = "mock-meta", api_kwargs = (; url = "http://localhost:$(PORT)"), + tag_filter = ["yes"], + return_context = false) + @test occursin("Time?", msg.content) + # different kwargs + msg, ctx = airag(index; question = "Time?", model_embedding = "mock-emb", + model_chat = "mock-gen", + model_metadata = "mock-meta", api_kwargs = (; url = "http://localhost:$(PORT)"), + tag_filter = :auto, + extract_metadata = false, verbose = false, + return_context = true) + @test ctx.context == ["1. a\nb\nc", "2. a\nb"] + @test ctx.emb_candidates.positions == [3, 2, 1] + @test ctx.emb_candidates.distances == zeros(3) + @test ctx.tag_candidates.positions == [1, 2] + @test ctx.tag_candidates.distances == ones(2) + @test ctx.filtered_candidates.positions == [2, 1] #re-sort + @test ctx.filtered_candidates.distances == 0.5ones(2) + @test ctx.reranked_candidates.positions == [2, 1] # no change + @test ctx.reranked_candidates.distances == 0.5ones(2) # no change + # clean up + close(echo_server) +end diff --git a/test/Experimental/RAGTools/preparation.jl b/test/Experimental/RAGTools/preparation.jl index 81da327c6..3e8396fbc 100644 --- a/test/Experimental/RAGTools/preparation.jl +++ b/test/Experimental/RAGTools/preparation.jl @@ -1,3 +1,6 @@ +using PromptingTools.Experimental.RAGTools: metadata_extract, MetadataItem +using PromptingTools.Experimental.RAGTools: MaybeMetadataItems, build_tags, build_index + @testset "metadata_extract" begin # MetadataItem Structure item = MetadataItem("value", "category") @@ -65,4 +68,61 @@ end @test tags_vocab_ == ["tag1", "tag2", "tag3"] @test nnz(tags_) == 3 @test all([tags_[1, 1], tags_[3, 2], tags_[3, 3]]) +end + +@testset "build_index" begin + # test with a mock server + PORT = rand(1000:2000) + PT.register_model!(; name = "mock-emb", schema = PT.CustomOpenAISchema()) + PT.register_model!(; name = "mock-meta", schema = PT.CustomOpenAISchema()) + PT.register_model!(; name = "mock-get", schema = PT.CustomOpenAISchema()) + + echo_server = HTTP.serve!(PORT; verbose = -1) do req + content = JSON3.read(req.body) + + if content[:model] == "mock-gen" + user_msg = last(content[:messages]) + response = Dict(:choices => [Dict(:message => user_msg)], + :model => content[:model], + :usage => Dict(:total_tokens => length(user_msg[:content]), + :prompt_tokens => length(user_msg[:content]), + :completion_tokens => 0)) + elseif content[:model] == "mock-emb" + response = Dict(:data => [Dict(:embedding => ones(Float32, 128)) + for i in 1:length(content[:input])], + :usage => Dict(:total_tokens => length(content[:input]), + :prompt_tokens => length(content[:input]), + :completion_tokens => 0)) + elseif content[:model] == "mock-meta" + user_msg = last(content[:messages]) + response = Dict(:choices => [ + Dict(:message => Dict(:function_call => Dict(:arguments => JSON3.write(MaybeMetadataItems([ + MetadataItem("yes", "category"), + ]))))), + ], + :model => content[:model], + :usage => Dict(:total_tokens => length(user_msg[:content]), + :prompt_tokens => length(user_msg[:content]), + :completion_tokens => 0)) + else + @info content + end + return HTTP.Response(200, JSON3.write(response)) + end + + text = "This is a long text that will be split into chunks.\n\n It will be split by the separator. And also by the separator '\n'." + tmp, _ = mktemp() + write(tmp, text) + mini_files = [tmp, tmp] + index = build_index(mini_files; max_length = 10, extract_metadata = true, + model_embedding = "mock-emb", + model_metadata = "mock-meta", api_kwargs = (; url = "http://localhost:$(PORT)")) + @test index.embeddings == hcat(fill(normalize(ones(Float32, 128)), 8)...) + @test index.chunks[1:4] == index.chunks[5:8] + @test index.sources == fill(tmp, 8) + @test index.tags == ones(8, 1) + @test index.tags_vocab == ["category:::yes"] + + # clean up + close(echo_server) end \ No newline at end of file diff --git a/test/Experimental/RAGTools/retrieval.jl b/test/Experimental/RAGTools/retrieval.jl index cdff05b45..9f21b561e 100644 --- a/test/Experimental/RAGTools/retrieval.jl +++ b/test/Experimental/RAGTools/retrieval.jl @@ -1,3 +1,52 @@ +using PromptingTools.Experimental.RAGTools: find_closest, find_tags +using PromptingTools.Experimental.RAGTools: Passthrough, rerank + +@testset "find_closest" begin + test_embeddings = [1.0 2.0; 3.0 4.0; 5.0 6.0] |> + x -> mapreduce(normalize, hcat, eachcol(x)) + query_embedding = [0.1, 0.35, 0.5] |> normalize + positions, distances = find_closest(test_embeddings, query_embedding, top_k = 2) + # The query vector should be closer to the first embedding + @test positions == [1, 2] + @test isapprox(distances, [0.9975694083904584 + 0.9939123761133188], atol = 1e-3) + + # Test when top_k is more than available embeddings + positions, _ = find_closest(test_embeddings, query_embedding, top_k = 5) + @test length(positions) == size(test_embeddings, 2) + + # Test behavior with edge values (top_k == 0) + @test find_closest(test_embeddings, query_embedding, top_k = 0) == ([], []) +end + +@testset "find_tags" begin + test_embeddings = [1.0 2.0; 3.0 4.0; 5.0 6.0] |> + x -> mapreduce(normalize, hcat, eachcol(x)) + query_embedding = [0.1, 0.35, 0.5] |> normalize + test_tags_vocab = ["julia", "python", "jr"] + test_tags_matrix = sparse([1, 2], [1, 3], [true, true], 2, 3) + index = ChunkIndex(; + sources = [".", "."], + chunks = ["julia", "jr"], + embeddings = test_embeddings, + tags = test_tags_matrix, + tags_vocab = test_tags_vocab) + + # Test for finding the correct positions of a specific tag + @test find_tags(index, "julia").positions == [1] + @test find_tags(index, "julia").distances == [1.0] + + # Test for no tag found // not in vocab + @test find_tags(index, "python").positions |> isempty + @test find_tags(index, "java").positions |> isempty + + # Test with regex matching + @test find_tags(index, r"^j").positions == [1, 2] + + # Test with multiple tags in vocab + @test find_tags(index, ["python", "jr", "x"]).positions == [2] +end + @testset "rerank" begin # Mock data for testing index = "mock_index" diff --git a/test/Experimental/RAGTools/runtests.jl b/test/Experimental/RAGTools/runtests.jl index 6c70c014e..605ce5df5 100644 --- a/test/Experimental/RAGTools/runtests.jl +++ b/test/Experimental/RAGTools/runtests.jl @@ -1,10 +1,13 @@ using Test using SparseArrays, LinearAlgebra using PromptingTools.Experimental.RAGTools +using JSON3, HTTP -include("utils.jl") -include("types.jl") -include("preparation.jl") -include("retrieval.jl") -# include("generation.jl") -# include("evaluation.jl") \ No newline at end of file +@testset "RAGTools" begin + include("utils.jl") + include("types.jl") + include("preparation.jl") + include("retrieval.jl") + include("generation.jl") + include("evaluation.jl") +end diff --git a/test/Experimental/RAGTools/types.jl b/test/Experimental/RAGTools/types.jl index 61bd47ae3..bfb915919 100644 --- a/test/Experimental/RAGTools/types.jl +++ b/test/Experimental/RAGTools/types.jl @@ -1,121 +1,154 @@ - -@testset "merge_labeled_matrices" begin - # Test with dense matrices and overlapping vocabulary - mat1 = [1 2; 3 4] - vocab1 = ["word1", "word2"] - mat2 = [5 6; 7 8] - vocab2 = ["word2", "word3"] - - merged_mat, combined_vocab = merge_labeled_matrices(mat1, vocab1, mat2, vocab2) - - @test size(merged_mat) == (4, 3) - @test combined_vocab == ["word1", "word2", "word3"] - @test merged_mat == [1 2 0; 3 4 0; 0 5 6; 0 7 8] - - # Test with sparse matrices and disjoint vocabulary - mat1 = sparse([1 0; 0 2]) - vocab1 = ["word1", "word2"] - mat2 = sparse([3 0; 0 4]) - vocab2 = ["word3", "word4"] - - merged_mat, combined_vocab = merge_labeled_matrices(mat1, vocab1, mat2, vocab2) - - @test size(merged_mat) == (4, 4) - @test combined_vocab == ["word1", "word2", "word3", "word4"] - @test merged_mat == sparse([1 0 0 0; 0 2 0 0; 0 0 3 0; 0 0 0 4]) - - # Test with different data types - mat1 = [1.0 2.0; 3.0 4.0] - vocab1 = ["word1", "word2"] - mat2 = [5 6; 7 8] - vocab2 = ["word2", "word3"] - - merged_mat, combined_vocab = merge_labeled_matrices(mat1, vocab1, mat2, vocab2) - - @test eltype(merged_mat) == Float64 - @test size(merged_mat) == (4, 3) - @test combined_vocab == ["word1", "word2", "word3"] - @test merged_mat ≈ [1.0 2.0 0.0; 3.0 4.0 0.0; 0.0 5.0 6.0; 0.0 7.0 8.0] +using PromptingTools.Experimental.RAGTools: ChunkIndex, MultiIndex, CandidateChunks +using PromptingTools.Experimental.RAGTools: embeddings, chunks, tags, tags_vocab, sources + +@testset "ChunkIndex" begin + # Test constructors and basic accessors + chunks_test = ["chunk1", "chunk2"] + emb_test = ones(2, 2) + tags_test = sparse([1, 2], [1, 2], [true, true], 2, 2) + tags_vocab_test = ["vocab1", "vocab2"] + sources_test = ["source1", "source2"] + ci = ChunkIndex(chunks = chunks_test, + embeddings = emb_test, + tags = tags_test, + tags_vocab = tags_vocab_test, + sources = sources_test) + + @test chunks(ci) == chunks_test + @test (embeddings(ci)) == emb_test + @test tags(ci) == tags_test + @test tags_vocab(ci) == tags_vocab_test + @test sources(ci) == sources_test + + # Test identity/equality + ci1 = ChunkIndex(chunks = ["chunk1", "chunk2"], sources = ["source1", "source2"]) + ci2 = ChunkIndex(chunks = ["chunk1", "chunk2"], sources = ["source1", "source2"]) + @test ci1 == ci2 + + # Test equality with different chunks and sources + ci2 = ChunkIndex(chunks = ["chunk3", "chunk4"], sources = ["source3", "source4"]) + @test ci1 != ci2 + + # Test hcat with ChunkIndex + # Setup two different ChunkIndex with different tags and then hcat them + chunks1 = ["chunk1", "chunk2"] + tags1 = sparse([1, 2], [1, 2], [true, true], 2, 3) + tags_vocab1 = ["vocab1", "vocab2", "vocab3"] + sources1 = ["source1", "source1"] + ci1 = ChunkIndex(chunks = chunks1, + tags = tags1, + tags_vocab = tags_vocab1, + sources = sources1) + + chunks2 = ["chunk3", "chunk4"] + tags2 = sparse([1, 2], [1, 3], [true, true], 2, 3) + tags_vocab2 = ["vocab1", "vocab3", "vocab4"] + sources2 = ["source2", "source2"] + ci2 = ChunkIndex(chunks = chunks2, + tags = tags2, + tags_vocab = tags_vocab2, + sources = sources2) + + combined_ci = vcat(ci1, ci2) + @test size(tags(combined_ci), 1) == 4 + @test size(tags(combined_ci), 2) == 4 + @test length(unique(vcat(tags_vocab(ci1), tags_vocab(ci2)))) == + length(tags_vocab(combined_ci)) + @test sources(combined_ci) == vcat(sources(ci1), (sources(ci2))) + + # Test base var"==" with ChunkIndex + ci1 = ChunkIndex(chunks = ["chunk1"], + tags = trues(3, 1), + tags_vocab = ["vocab1"], + sources = ["source1"]) + ci2 = ChunkIndex(chunks = ["chunk1"], + tags = trues(3, 1), + tags_vocab = ["vocab1"], + sources = ["source1"]) + @test ci1 == ci2 end -@testset "ChunkIndex and MultiIndex getindex Tests" begin - @testset "ChunkIndex getindex" begin - ci = ChunkIndex(:index1, ["chunk1", "chunk2", "chunk3"]) - candidate = CandidateChunks(:index1, [1, 3]) - - @test getindex(ci, candidate) == ["chunk1", "chunk3"] - @test getindex(ci, candidate, :chunks) == ["chunk1", "chunk3"] - @test_throws AssertionError getindex(ci, candidate, :unsupported_field) - - # Test with non-matching index_id - candidate_wrong_id = CandidateChunks(:index2, [1, 3]) - @test getindex(ci, candidate_wrong_id) == String[] - end - - @testset "MultiIndex getindex" begin - ci1 = ChunkIndex(:index1, ["chunk1", "chunk2"]) - ci2 = ChunkIndex(:index2, ["chunk3", "chunk4"]) - mi = MultiIndex([ci1, ci2]) - candidate = CandidateChunks(:index2, [2]) - - @test getindex(mi, candidate) == ["chunk4"] - @test getindex(mi, candidate, :chunks) == ["chunk4"] - @test_throws AssertionError getindex(mi, candidate, :unsupported_field) - - # Test with non-existing index_id - candidate_non_existing = CandidateChunks(:index3, [1]) - @test getindex(mi, candidate_non_existing) == String[] - end +@testset "MultiIndex" begin + # Test constructors/accessors + # MultiIndex behaves as a container for ChunkIndexes + cin1 = ChunkIndex(chunks = ["chunk1"], sources = ["source1"]) + cin2 = ChunkIndex(chunks = ["chunk2"], sources = ["source2"]) + multi_index = MultiIndex(indexes = [cin1, cin2]) + @test length(multi_index.indexes) == 2 + @test cin1 in multi_index.indexes + @test cin2 in multi_index.indexes + + # Test base var"==" with MultiIndex + # Case where MultiIndexes are equal + cin1 = ChunkIndex(chunks = ["chunk1"], sources = ["source1"]) + cin2 = ChunkIndex(chunks = ["chunk2"], sources = ["source2"]) + mi1 = MultiIndex(indexes = [cin1, cin2]) + mi2 = MultiIndex(indexes = [cin1, cin2]) + @test mi1 == mi2 + + # Test equality with different ChunkIndexes inside + cin1 = ChunkIndex(chunks = ["chunk1"], sources = ["source1"]) + cin2 = ChunkIndex(chunks = ["chunk2"], sources = ["source2"]) + mi1 = MultiIndex(indexes = [cin1]) + mi2 = MultiIndex(indexes = [cin2]) + @test mi1 != mi2 end -@testset "MultiIndex Equality Tests" begin - index1 = ChunkIndex(:A) - index2 = ChunkIndex(:B) - index3 = ChunkIndex(:C) - - mi1 = MultiIndex([index1, index2]) - mi2 = MultiIndex([index1, index2]) - mi3 = MultiIndex([index2, index3]) - mi4 = MultiIndex([index1, index2, index3]) - mi5 = MultiIndex([index2, index1]) - - @test mi1 == mi2 # Identical MultiIndexes - @test mi1 != mi3 # Different indexes - @test mi1 != mi4 # Different number of indexes - @test mi3 != mi4 # Different indexes and different lengths - @test mi1 == mi5 # Same indexes, different order -end - -@testset "CandidateChunks" begin - # Different Index IDs and Intersecting Positions - cc1 = CandidateChunks(index_id = :index1, - positions = [1, 2, 3], - distances = [0.1, 0.2, 0.3]) - cc2 = CandidateChunks(index_id = :index2, - positions = [2, 3, 4], - distances = [0.3, 0.2, 0.1]) - cc3 = CandidateChunks(index_id = :index1, - positions = [3, 4, 5], - distances = [0.3, 0.4, 0.5]) - - # Different index IDs - result_diff_id = cc1 & cc2 - @test result_diff_id.index_id == :index1 - @test isempty(result_diff_id.positions) - @test isempty(result_diff_id.distances) - - # Intersecting positions - result_intersect = cc1 & cc3 - @test result_intersect.index_id == :index1 - @test result_intersect.positions == [3] - @test result_intersect.distances ≈ [0.4] - - # Missing Distances - cc1 = CandidateChunks(index_id = :index1, positions = [1, 2], distances = Float32[]) - cc2 = CandidateChunks(index_id = :index1, positions = [2, 3], distances = [0.2, 0.3]) - - result = cc1 & cc2 - @test result.index_id == :index1 - @test result.positions == [2] - @test isempty(result.distances) -end +@testset "getindex with CandidateChunks" begin + # Initialize a ChunkIndex with test data + chunks_data = ["First chunk", "Second chunk", "Third chunk"] + embeddings_data = rand(3, 3) # Random matrix with 3 embeddings + tags_data = sparse(Bool[1 1; 0 1; 1 0]) # Some arbitrary sparse matrix representation + tags_vocab_data = ["tag1", "tag2"] + chunk_sym = Symbol("TestChunkIndex") + test_chunk_index = ChunkIndex(chunks = chunks_data, + embeddings = embeddings_data, + tags = tags_data, + tags_vocab = tags_vocab_data, + sources = repeat(["test_source"], 3), + id = chunk_sym) + + # Test to get chunks based on valid CandidateChunks + candidate_chunks = CandidateChunks(index_id = chunk_sym, + positions = [1, 3], + distances = [0.1, 0.2]) + @test collect(test_chunk_index[candidate_chunks]) == ["First chunk", "Third chunk"] + + # Test with empty positions, which should result in an empty array + candidate_chunks_empty = CandidateChunks(index_id = chunk_sym, + positions = Int[], + distances = Float32[]) + @test isempty(test_chunk_index[candidate_chunks_empty]) + + # Test with positions out of bounds, should handle gracefully without errors + candidate_chunks_oob = CandidateChunks(index_id = chunk_sym, + positions = [10, -1], + distances = [0.5, 0.6]) + @test_throws AssertionError test_chunk_index[candidate_chunks_oob] + + # Test with an incorrect index_id, which should also result in an empty array + wrong_sym = Symbol("InvalidIndex") + candidate_chunks_wrong_id = CandidateChunks(index_id = wrong_sym, + positions = [1, 2], + distances = [0.3, 0.4]) + @test isempty(test_chunk_index[candidate_chunks_wrong_id]) + + # Test when chunks are requested from a MultiIndex, only chunks from the corresponding ChunkIndex should be returned + another_chuck_index = ChunkIndex(chunks = chunks_data, + embeddings = nothing, + tags = nothing, + tags_vocab = nothing, + sources = repeat(["another_source"], 3), + id = Symbol("AnotherChunkIndex")) + test_multi_index = MultiIndex(indexes = [ + test_chunk_index, + another_chuck_index, + ]) + @test collect(test_multi_index[candidate_chunks]) == ["First chunk", "Third chunk"] + + # Test when wrong index_id is used with MultiIndex, resulting in an empty array + @test isempty(test_multi_index[candidate_chunks_wrong_id]) + + # Test error case when trying to use a non-chunks field, should assert error as only :chunks field is supported + @test_throws AssertionError test_chunk_index[candidate_chunks, :nonexistent_field] +end \ No newline at end of file diff --git a/test/Experimental/RAGTools/utils.jl b/test/Experimental/RAGTools/utils.jl index e69de29bb..cc93c31f9 100644 --- a/test/Experimental/RAGTools/utils.jl +++ b/test/Experimental/RAGTools/utils.jl @@ -0,0 +1,46 @@ +using PromptingTools.Experimental.RAGTools: _check_aiextract_capability, + merge_labeled_matrices + +@testset "_check_aiextract_capability" begin + @test _check_aiextract_capability("gpt-3.5-turbo") == nothing + @test_throws AssertionError _check_aiextract_capability("llama2") +end + +@testset "merge_labeled_matrices" begin + # Test with dense matrices and overlapping vocabulary + mat1 = [1 2; 3 4] + vocab1 = ["word1", "word2"] + mat2 = [5 6; 7 8] + vocab2 = ["word2", "word3"] + + merged_mat, combined_vocab = merge_labeled_matrices(mat1, vocab1, mat2, vocab2) + + @test size(merged_mat) == (4, 3) + @test combined_vocab == ["word1", "word2", "word3"] + @test merged_mat == [1 2 0; 3 4 0; 0 5 6; 0 7 8] + + # Test with sparse matrices and disjoint vocabulary + mat1 = sparse([1 0; 0 2]) + vocab1 = ["word1", "word2"] + mat2 = sparse([3 0; 0 4]) + vocab2 = ["word3", "word4"] + + merged_mat, combined_vocab = merge_labeled_matrices(mat1, vocab1, mat2, vocab2) + + @test size(merged_mat) == (4, 4) + @test combined_vocab == ["word1", "word2", "word3", "word4"] + @test merged_mat == sparse([1 0 0 0; 0 2 0 0; 0 0 3 0; 0 0 0 4]) + + # Test with different data types + mat1 = [1.0 2.0; 3.0 4.0] + vocab1 = ["word1", "word2"] + mat2 = [5 6; 7 8] + vocab2 = ["word2", "word3"] + + merged_mat, combined_vocab = merge_labeled_matrices(mat1, vocab1, mat2, vocab2) + + @test eltype(merged_mat) == Float64 + @test size(merged_mat) == (4, 3) + @test combined_vocab == ["word1", "word2", "word3"] + @test merged_mat ≈ [1.0 2.0 0.0; 3.0 4.0 0.0; 0.0 5.0 6.0; 0.0 7.0 8.0] +end \ No newline at end of file diff --git a/test/llm_openai.jl b/test/llm_openai.jl index 95364b847..cc45494d0 100644 --- a/test/llm_openai.jl +++ b/test/llm_openai.jl @@ -180,7 +180,7 @@ end @testset "OpenAI.create_chat" begin # Test CustomOpenAISchema() with a mock server PORT = rand(1000:2000) - echo_server = HTTP.serve!(PORT) do req + echo_server = HTTP.serve!(PORT, verbose = -1) do req content = JSON3.read(req.body) user_msg = last(content[:messages]) response = Dict(:choices => [Dict(:message => user_msg)], @@ -206,7 +206,7 @@ end @testset "OpenAI.create_embeddings" begin # Test CustomOpenAISchema() with a mock server PORT = rand(1000:2000) - echo_server = HTTP.serve!(PORT) do req + echo_server = HTTP.serve!(PORT, verbose = -1) do req content = JSON3.read(req.body) response = Dict(:data => [Dict(:embedding => ones(128))], :usage => Dict(:total_tokens => length(content[:input]), From bda0ce2651c6155de92310b32c602d6e19c6bde3 Mon Sep 17 00:00:00 2001 From: J S <49557684+svilupp@users.noreply.github.com> Date: Fri, 22 Dec 2023 20:57:50 +0100 Subject: [PATCH 05/14] update docs --- src/Experimental/RAGTools/RAGTools.jl | 2 +- src/Experimental/RAGTools/evaluation.jl | 51 +++++++++++++++++++++---- src/Experimental/RAGTools/generation.jl | 1 + src/Experimental/RAGTools/types.jl | 1 + 4 files changed, 47 insertions(+), 8 deletions(-) diff --git a/src/Experimental/RAGTools/RAGTools.jl b/src/Experimental/RAGTools/RAGTools.jl index 47d4113ce..76fda1a55 100644 --- a/src/Experimental/RAGTools/RAGTools.jl +++ b/src/Experimental/RAGTools/RAGTools.jl @@ -27,7 +27,7 @@ include("retrieval.jl") export airag include("generation.jl") -export build_qa_evals +export build_qa_evals, run_qa_evals include("evaluation.jl") end \ No newline at end of file diff --git a/src/Experimental/RAGTools/evaluation.jl b/src/Experimental/RAGTools/evaluation.jl index 010799142..67dae42c1 100644 --- a/src/Experimental/RAGTools/evaluation.jl +++ b/src/Experimental/RAGTools/evaluation.jl @@ -132,20 +132,57 @@ function score_retrieval_rank(orig_context::AbstractString, (occursin.(candidate_context, Ref(orig_context)))) end -"Single QAEvalItem evalution" +""" + run_qa_evals(qa_item::QAEvalItem, ctx::RAGContext; verbose::Bool = true, + parameters_dict::AbstractDict, judge_template::Symbol = :RAGJudgeAnswerFromContext, + model_judge::AbstractString) -> QAEvalResult + +Evaluates a single `QAEvalItem` using a RAG context (`RAGContext`) and returns a `QAEvalResult` structure. This function assesses the relevance and accuracy of the answers generated in a QA evaluation context. + +# Arguments +- `qa_item::QAEvalItem`: The QA evaluation item containing the question and its answer. +- `ctx::RAGContext`: The context used for generating the QA pair, including the original context and the answers. + Comes from `airag(...; return_context=true)` +- `verbose::Bool`: If `true`, enables verbose logging. Defaults to `true`. +- `parameters_dict::AbstractDict`: Track any parameters used for later evaluations. +- `judge_template::Symbol`: The template symbol for the AI model used to judge the answer. Defaults to `:RAGJudgeAnswerFromContext`. +- `model_judge::AbstractString`: The AI model used for judging the answer's quality. + Defaults to standard chat model, but it is advisable to use more powerful model GPT-4. + +# Returns +`QAEvalResult`: An evaluation result that includes various scores and metadata related to the QA evaluation. + +# Notes +- The function computes a retrieval score and rank based on how well the context matches the QA context. +- It then uses the `judge_template` and `model_judge` to score the answer's accuracy and relevance. +- In case of errors during evaluation, the function logs a warning (if `verbose` is `true`) and the `answer_score` will be set to `nothing`. + +# Examples + +Evaluating a QA pair using a specific context and model: +```julia +qa_item = QAEvalItem(question="What is the capital of France?", answer="Paris", context="France is a country in Europe.") +ctx = RAGContext(source="Wikipedia", context="France is a country in Europe.", answer="Paris") +parameters_dict = Dict("param1" => "value1", "param2" => "value2") + +eval_result = run_qa_evals(qa_item, ctx, parameters_dict=parameters_dict, model_judge="MyAIJudgeModel") +``` +""" function run_qa_evals(qa_item::QAEvalItem, ctx::RAGContext; verbose::Bool = true, parameters_dict::AbstractDict, - judge_template::Symbol = :RAGJudgeAnswerFromContext, - model_judge::AbstractString) + judge_template::Symbol = :RAGJudgeAnswerFromContextShort, + model_judge::AbstractString = PT.MODEL_CHAT) retrieval_score = score_retrieval_hit(qa_item.context, ctx.context) retrieval_rank = score_retrieval_rank(qa_item.context, ctx.context) + # Note we could evaluate if RAGContext and QAEvalItem are at least using the same sources etc. + answer_score = try msg = aiextract(judge_template; model = model_judge, verbose, ctx.context, - question, - msg.content, - return_type = RAG.JudgeAllScores) + ctx.question, + ctx.answer, + return_type = JudgeAllScores) final_rating = if msg.content isa AbstractDict && haskey(msg.content, :final_rating) # if return type parsing failed msg.content[:final_rating] @@ -159,7 +196,7 @@ function run_qa_evals(qa_item::QAEvalItem, ctx::RAGContext; end return QAEvalResult(; - ctx.source, + qa_item.source, qa_item.context, qa_item.question, ctx.answer, diff --git a/src/Experimental/RAGTools/generation.jl b/src/Experimental/RAGTools/generation.jl index 733fe5e3c..93d10fd99 100644 --- a/src/Experimental/RAGTools/generation.jl +++ b/src/Experimental/RAGTools/generation.jl @@ -134,6 +134,7 @@ function airag(index::AbstractChunkIndex, rag_template::Symbol = :RAGAnswerFromC question, answer = msg.content, context, + sources = sources(index)[reranked_candidates.positions], emb_candidates, tag_candidates, filtered_candidates, diff --git a/src/Experimental/RAGTools/types.jl b/src/Experimental/RAGTools/types.jl index cd8ee1607..2aeb7a4d0 100644 --- a/src/Experimental/RAGTools/types.jl +++ b/src/Experimental/RAGTools/types.jl @@ -122,6 +122,7 @@ A struct for debugging RAG answers. It contains the question, answer, context, a question::AbstractString answer::AbstractString context::Vector{<:AbstractString} + sources::Vector{<:AbstractString} emb_candidates::CandidateChunks tag_candidates::Union{Nothing, CandidateChunks} filtered_candidates::CandidateChunks From 33770f31d2bdc7e032e9f642e90df4f585f6ff36 Mon Sep 17 00:00:00 2001 From: J S <49557684+svilupp@users.noreply.github.com> Date: Fri, 22 Dec 2023 21:09:06 +0100 Subject: [PATCH 06/14] add example --- examples/building_a_RAG.jl | 137 +++++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 examples/building_a_RAG.jl diff --git a/examples/building_a_RAG.jl b/examples/building_a_RAG.jl new file mode 100644 index 000000000..877ef5995 --- /dev/null +++ b/examples/building_a_RAG.jl @@ -0,0 +1,137 @@ +# # Small example for how to build a RAG system with new RAGTools +# Note: RAGTools is still experimental and will change in the future. Ideally, they will be cleaned up and moved to a dedicated package + +using LinearAlgebra, SparseArrays +using PromptingTools +using PromptingTools.Experimental.RAGTools +using JSON3, Serialization, DataFramesMeta +using Statistics: mean +const PT = PromptingTools +const RT = PromptingTools.Experimental.RAGTools + +# ## Ask questions E2E +# Let's put together a few copy&pasted text files from DataFrames.jl docs +dir_raw = joinpath("markdown", "DataFrames") # folder with text documents +files = ["comparison_with_python.txt", "database_style_joins.txt", "what_is_dataframes.txt"] +index = build_index(joinpath.(dir_raw, files); extract_metadata = false) + +# Ask a question +answer = airag(index; question = "I like dplyr, what is the equivalent in Julia?") +# AIMessage("The equivalent package in Julia to the dplyr package in R is DataFrames.jl.") +# The equivalent package in Julia to the dplyr package in R is DataFrames.jl. + +# First RAG in two lines? Done! +# +# What does it do? +# - `build_index` will chunk the documents into smaller pieces, embed them into numbers (to be able to judge similarity of chunks) and, optionally, create a lookup index of metadata/tags for each chunk) +# - `index` is the result of this step and it holds your chunks, embeddings, and other metadata! Just show it :) +# - `airag` will +# - embed your question +# - find the closest chunks in the index +# - [OPTIONAL] extract any potential tags/filters from the question and apply them to filter down the potential candidates +# - [OPTIONAL] rerank the candidate chunks +# - generate an answer from the closest chunks + +# You should save the index for later! +serialize("examples/index.jls", index) +index = deserialize("examples/index.jls") + +# # Evaluations +# However, we want to evaluate the quality of the system. For that, we need a set of questions and answers. +# Ideally, we would hand-craft a set of high quality Q&A pairs. However, this is time consuming and expensive. +# Let's generate them from the chunks in our index! + +# ## Generate Q&A pairs + +# We need to provide: chunks and sources (filepaths for future reference) +evals = build_qa_evals(RT.chunks(index), + RT.sources(index); + instructions = "None.", + verbose = true); +# Info: Q&A Sets built! (cost: $0.143) -- not bad! + +# Note: In practice, you would review each item in this golden evaluation set (and delete any generic/poor questions). +# It will determine the future success of your app, so you need to make sure it's good! + +## Save the evals for later +JSON3.write("examples/evals.json", evals) +evals = JSON3.read("examples/evals.json", Vector{RT.QAEvalItem}); + +# ## Explore one Q&A pair +## Let's explore one evals item -- it's not the best but gives you the idea! +evals[1] +# QAEvalItem: +# source: markdown/DataFrames/comparison_with_python.txt +# context: Comparisons +# This section compares DataFrames.jl with other data manipulation frameworks in Python, R, and Stata. + +# A sample data set can be created using the following code: + +# using DataFrames +# using Statistics +# question: What frameworks are compared with DataFrames.jl? +# answer: Python, R, and Stata + +# ## Evaluate this Q&A pair + +## Let's answer and evaluate this QA item with the judge +# Note: that we used the same question, but generated a different context and answer via `airag` +msg, ctx = airag(index; evals[1].question, return_context = true); + +# ctx is a RAGContext object that keeps all intermediate states of the RAG pipeline for easy evaluation +judged = aiextract(:RAGJudgeAnswerFromContext; + ctx.context, + ctx.question, + ctx.answer, + return_type = RT.JudgeAllScores) +judged.content +# Dict{Symbol, Any} with 7 entries: +# :final_rating => 4.8 +# :clarity => 5 +# :completeness => 5 +# :relevance => 5 +# :consistency => 4 +# :helpfulness => 5 +# :rationale => "The answer is highly relevant to the user's question, as it provides a comprehensive list of frameworks that are compared with DataFrames.jl. The answer is complete, covering all + +x = run_qa_evals(evals[10], ctx; + parameters_dict = Dict(:top_k => 3), verbose = true, model_judge = "gpt4t") +# Fortunately, we don't have to do this one by one -- let's evaluate all out Q&A pairs at once. + +# ## Evaluate the whole set + +# Let's run each question&answer through our eval loop in async (we do it only for the first 10) +# See the `?airag` for which parameters you can tweak, eg, top_k +results = asyncmap(evals[1:10]) do qa_item + ## Generate an answer -- often you want the model_judge to be the highest quality possible, eg, "GPT-4 Turbo" (alias "gpt4t) + msg, ctx = airag(index; qa_item.question, return_context = true, + top_k = 3, verbose = false, model_judge = "gpt4t") + ## Evaluate the response + # Note: you can log key parameters for easier analysis later + run_qa_evals(qa_item, ctx; parameters_dict = Dict(:top_k => 3), verbose = false) +end +# Note that failed evals can show as "nothing", so make sure to handle them +results = filter(!isnothing, results) + +## Let's take a simple average to calculate our score +@info "RAG Evals: $(length(results)) results, Avg. score: $(round(mean(x->x.answer_score, results);digits=1)), Retrieval score: $(100*round(mean(x->x.retrieval_score,results);digits=1))%" +# [ Info: RAG Evals: 10 results, Avg. score: 4.5, Retrieval score: 70.0% + +# or you can analyze it in a DataFrame +df = DataFrame(results) +# 10×8 DataFrame +# Row │ source context ... + +# We're done for today! + +# # What would we do next? +# - Review your evaluation golden data set and keep only the good items +# - Play with the chunk sizes (max_length in build_index) and see how it affects the quality +# - Explore using metadata/key filters (`extract_metadata=true` in build_index) +# - Add filtering for semantic similarity (embedding distance) to make sure we don't pick up irrelevant chunks in the context +# - Use multiple indices or a hybrid index (add a simple BM25 lookup from TextAnalysis.jl) +# - Data processing is the most important step - properly parsed and split text could make wonders +# - Add re-ranking of context (see `rerank` function, you can use Cohere ReRank API)`) +# - Improve the question embedding (eg, rephrase it, generate hypothetical answers and use them to find better context) +# +# ... and much more! See some ideas in [Anyscale RAG tutorial](https://www.anyscale.com/blog/a-comprehensive-guide-for-building-rag-based-llm-applications-part-1) From c936c4d5215a1fc5c3ed3e66cede600e37757e6c Mon Sep 17 00:00:00 2001 From: J S <49557684+svilupp@users.noreply.github.com> Date: Fri, 22 Dec 2023 21:11:45 +0100 Subject: [PATCH 07/14] example --- CHANGELOG.md | 2 +- examples/{building_a_RAG.jl => building_RAG.jl} | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) rename examples/{building_a_RAG.jl => building_RAG.jl} (97%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 050419827..1d17f6ea5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added -- Experimental sub-module RAGTools providing basic Retrieval-Augmented Generation functionality. See `?RAGTools` for more information. It's nested inside of `PromptingTools.Experimental.RAGTools` to signify that it might change in the future. +- Experimental sub-module RAGTools providing basic Retrieval-Augmented Generation functionality. See `?RAGTools` for more information. It's nested inside of `PromptingTools.Experimental.RAGTools` to signify that it might change in the future. Key functions are `build_index` and `airag`, but it also provides a suite to make evaluation easier (see `?build_qa_evals` and `?run_qa_evals` or just see the example `src/building_RAG.jl`) ### Fixed - Stricter code parsing in `AICode` to avoid false positives (code blocks must end with "```\n" to catch comments inside text) diff --git a/examples/building_a_RAG.jl b/examples/building_RAG.jl similarity index 97% rename from examples/building_a_RAG.jl rename to examples/building_RAG.jl index 877ef5995..c680b64de 100644 --- a/examples/building_a_RAG.jl +++ b/examples/building_RAG.jl @@ -1,9 +1,9 @@ -# # Small example for how to build a RAG system with new RAGTools +# # Small example for how to build a RAG system with the new RAGTools # Note: RAGTools is still experimental and will change in the future. Ideally, they will be cleaned up and moved to a dedicated package using LinearAlgebra, SparseArrays using PromptingTools -using PromptingTools.Experimental.RAGTools +using PromptingTools.Experimental.RAGTools # Experimental! May change using JSON3, Serialization, DataFramesMeta using Statistics: mean const PT = PromptingTools From 0a8a19c1fb363d49c341f3a4881bea8ef5fd7b10 Mon Sep 17 00:00:00 2001 From: J S <49557684+svilupp@users.noreply.github.com> Date: Fri, 22 Dec 2023 21:41:16 +0100 Subject: [PATCH 08/14] update docs --- docs/make.jl | 1 + docs/src/examples/building_RAG.md | 214 ++++++++++++++ examples/building_RAG.jl | 53 ++-- examples/data/database_style_joins.txt | 392 +++++++++++++++++++++++++ examples/data/what_is_dataframes.txt | 141 +++++++++ 5 files changed, 778 insertions(+), 23 deletions(-) create mode 100644 docs/src/examples/building_RAG.md create mode 100644 examples/data/database_style_joins.txt create mode 100644 examples/data/what_is_dataframes.txt diff --git a/docs/make.jl b/docs/make.jl index d3e676100..533af9d08 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -24,6 +24,7 @@ makedocs(; "Various examples" => "examples/readme_examples.md", "Using AITemplates" => "examples/working_with_aitemplates.md", "Local models with Ollama.ai" => "examples/working_with_ollama.md", + "Building RAG Application" => "examples/building_rag_application.md", ], "F.A.Q." => "frequently_asked_questions.md", "Reference" => "reference.md", diff --git a/docs/src/examples/building_RAG.md b/docs/src/examples/building_RAG.md new file mode 100644 index 000000000..4c0e57d43 --- /dev/null +++ b/docs/src/examples/building_RAG.md @@ -0,0 +1,214 @@ +```@meta +EditURL = "../../../examples/building_RAG.jl" +``` + +# Building a Simple Retrieval-Augmented Generation (RAG) System with RAGTools + +Note: RAGTools module is still experimental and will change in the future. Ideally, they will be cleaned up and moved to a dedicated package + +````julia +using LinearAlgebra, SparseArrays +using PromptingTools +using PromptingTools.Experimental.RAGTools # Experimental! May change +using JSON3, Serialization, DataFramesMeta +using Statistics: mean +const PT = PromptingTools +const RT = PromptingTools.Experimental.RAGTools +```` + +## Ask questions E2E +Let's put together a few copy&pasted text files from DataFrames.jl docs + +````julia +files = [ + joinpath("examples", "data", "database_style_joins.txt"), + joinpath("examples", "data", "what_is_dataframes.txt"), +] +index = build_index(files; extract_metadata = false); +```` + +Let's ask a question + +````julia +answer = airag(index; question = "I like dplyr, what is the equivalent in Julia?") +```` + +```` +AIMessage("The equivalent package in Julia to dplyr in R is DataFramesMeta.jl. It provides convenience functions for data manipulation with syntax similar to dplyr.") +```` + +First RAG in two lines? Done! + +What does it do? +- `build_index` will chunk the documents into smaller pieces, embed them into numbers (to be able to judge similarity of chunks) and, optionally, create a lookup index of metadata/tags for each chunk) + - `index` is the result of this step and it holds your chunks, embeddings, and other metadata! Just show it :) +- `airag` will + - embed your question + - find the closest chunks in the index + - [OPTIONAL] extract any potential tags/filters from the question and apply them to filter down the potential candidates + - [OPTIONAL] rerank the candidate chunks +- generate an answer from the closest chunks + +You should save the index for later! + +````julia +serialize("examples/index.jls", index) +index = deserialize("examples/index.jls"); +```` + +# Evaluations +However, we want to evaluate the quality of the system. For that, we need a set of questions and answers. +Ideally, we would handcraft a set of high-quality Q&A pairs. However, this is time-consuming and expensive. +Let's generate them from the chunks in our index! + +## Generate Q&A pairs + +We need to provide: chunks and sources (file paths for future reference) + +````julia +evals = build_qa_evals(RT.chunks(index), + RT.sources(index); + instructions = "None.", + verbose = true); +```` + +```` +[ Info: Q&A Sets built! (cost: $0.102) + +```` + +> [!TIP] +> In practice, you would review each item in this golden evaluation set (and delete any generic/poor questions). +> It will determine the future success of your app, so you need to make sure it's good! + +````julia +# Save the evals for later +JSON3.write("examples/evals.json", evals) +evals = JSON3.read("examples/evals.json", Vector{RT.QAEvalItem}); +```` + +## Explore one Q&A pair + +Let's explore one evals item -- it's not the best quality but gives you the idea! +````julia +evals[1] +```` + +```` +QAEvalItem: + source: examples/data/database_style_joins.txt + context: Database-Style Joins +Introduction to joins +We often need to combine two or more data sets together to provide a complete picture of the topic we are studying. For example, suppose that we have the following two data sets: + +julia> using DataFrames + question: What is the purpose of joining two or more data sets together? + answer: The purpose of joining two or more data sets together is to provide a complete picture of the topic being studied. + +```` + +## Evaluate this Q&A pair + +````julia +# Let's answer and evaluate this QA item with the judge +# Note: that we used the same question, but generated a different context and answer via `airag` +msg, ctx = airag(index; evals[1].question, return_context = true); +# ctx is a RAGContext object that keeps all intermediate states of the RAG pipeline for easy evaluation +judged = aiextract(:RAGJudgeAnswerFromContext; + ctx.context, + ctx.question, + ctx.answer, + return_type = RT.JudgeAllScores) +judged.content +```` + +```` +Dict{Symbol, Any} with 6 entries: + :final_rating => 4.8 + :clarity => 5 + :completeness => 4 + :relevance => 5 + :consistency => 5 + :helpfulness => 5 +```` + +We can also run the whole evaluation in a function (a few more metrics are available): +````julia +x = run_qa_evals(evals[10], ctx; + parameters_dict = Dict(:top_k => 3), verbose = true, model_judge = "gpt4t") +```` + +```` +QAEvalResult: + source: examples/data/database_style_joins.txt + context: outerjoin: the output contains rows for values of the key that exist in any of the passed data frames. +semijoin: Like an inner join, but output is restricted to columns from the first (left) argument. + question: What is the difference between outer join and semi join? + answer: The purpose of joining two or more data sets together is to combine them in order to provide a complete picture or analysis of a specific topic or dataset. By joining data sets, we can combine information from multiple sources to gain more insights and make more informed decisions. + retrieval_score: 0.0 + retrieval_rank: nothing + answer_score: 5 + parameters: Dict(:top_k => 3) + +```` + +Fortunately, we don't have to do this one by one -- let's evaluate all our Q&A pairs at once. + +## Evaluate the whole set + +Let's run each question&answer through our eval loop in async (we do it only for the first 10) +See the `?airag` for which parameters you can tweak, eg, top_k + +````julia +results = asyncmap(evals[1:10]) do qa_item + # Generate an answer -- often you want the model_judge to be the highest quality possible, eg, "GPT-4 Turbo" (alias "gpt4t) + msg, ctx = airag(index; qa_item.question, return_context = true, + top_k = 3, verbose = false, model_judge = "gpt4t") + # Evaluate the response + # Note: you can log key parameters for easier analysis later + run_qa_evals(qa_item, ctx; parameters_dict = Dict(:top_k => 3), verbose = false) +end +## Note that the "failed" evals can show as "nothing", so make sure to handle them. +results = filter(!isnothing, results); +```` + + +````julia + +# Let's take a simple average to calculate our score +@info "RAG Evals: $(length(results)) results, Avg. score: $(round(mean(x->x.answer_score, results);digits=1)), Retrieval score: $(100*round(mean(x->x.retrieval_score,results);digits=1))%" +```` + +```` +[ Info: RAG Evals: 10 results, Avg. score: 4.6, Retrieval score: 100.0% + +```` + +or you can analyze it in a DataFrame + +````julia +df = DataFrame(results) +```` + +```@raw html +
Row | source | context | question | answer | retrieval_score | retrieval_rank | answer_score | parameters |
---|---|---|---|---|---|---|---|---|
String | String | String | SubStrin… | Float64 | Int64 | Float64 | Dict… | |
1 | examples/data/database_style_joins.txt | Database-Style Joins\nIntroduction to joins\nWe often need to combine two or more data sets together to provide a complete picture of the topic we are studying. For example, suppose that we have the following two data sets:\n\njulia> using DataFrames | What is the purpose of joining two or more data sets together? | The purpose of joining two or more data sets together is to combine the data sets based on a common key and provide a complete picture of the topic being studied. | 1.0 | 1 | 5.0 | Dict(:top_k=>3) |
2 | examples/data/database_style_joins.txt | julia> people = DataFrame(ID=[20, 40], Name=["John Doe", "Jane Doe"])\n2×2 DataFrame\n Row │ ID Name\n │ Int64 String\n─────┼─────────────────\n 1 │ 20 John Doe\n 2 │ 40 Jane Doe | What is the DataFrame called 'people' composed of? | The DataFrame called 'people' consists of two columns: 'ID' and 'Name'. The 'ID' column contains integers, and the 'Name' column contains strings. | 1.0 | 1 | 4.0 | Dict(:top_k=>3) |
3 | examples/data/database_style_joins.txt | julia> jobs = DataFrame(ID=[20, 40], Job=["Lawyer", "Doctor"])\n2×2 DataFrame\n Row │ ID Job\n │ Int64 String\n─────┼───────────────\n 1 │ 20 Lawyer\n 2 │ 40 Doctor | What are the jobs and IDs listed in the dataframe? | The jobs and IDs listed in the dataframe are as follows:\n\nID: 20\nJob: Lawyer\n\nID: 40\nJob: Doctor | 1.0 | 1 | 4.67 | Dict(:top_k=>3) |
4 | examples/data/database_style_joins.txt | We might want to work with a larger data set that contains both the names and jobs for each ID. We can do this using the innerjoin function: | How can we combine the names and jobs for each ID in a larger data set? | We can use the `innerjoin` function to combine the names and jobs for each ID in a larger data set. | 1.0 | 1 | 4.33333 | Dict(:top_k=>3) |
5 | examples/data/database_style_joins.txt | julia> innerjoin(people, jobs, on = :ID)\n2×3 DataFrame\n Row │ ID Name Job\n │ Int64 String String\n─────┼─────────────────────────\n 1 │ 20 John Doe Lawyer\n 2 │ 40 Jane Doe Doctor | What is the name of the person with the ID 40 and their job? | The name of the person with the ID 40 is Jane Doe and their job is Doctor. | 1.0 | 2 | 4.67 | Dict(:top_k=>3) |
6 | examples/data/database_style_joins.txt | In relational database theory, this operation is generally referred to as a join. The columns used to determine which rows should be combined during a join are called keys.\n\nThe following functions are provided to perform seven kinds of joins: | What are the different kinds of joins? | The different kinds of joins are:\n\n1. Inner Join: Returns only the rows that have matching values in both data frames.\n2. Left Join: Returns all rows from the left data frame and the matching rows from the right data frame.\n3. Right Join: Returns all rows from the right data frame and the matching rows from the left data frame.\n4. Full Outer Join: Returns all rows from both data frames and fills in missing values with null.\n5. Cross Join: Returns the cartesian product of the rows from both data frames.\n6. Semi Join: Returns only the rows from the left data frame that have matching values in the right data frame.\n7. Anti Join: Returns only the rows from the left data frame that do not have matching values in the right data frame. | 1.0 | 1 | 4.66667 | Dict(:top_k=>3) |
7 | examples/data/database_style_joins.txt | innerjoin: the output contains rows for values of the key that exist in all passed data frames. | What does the output of the inner join operation contain? | The output of the inner join operation contains only the rows for values of the key that exist in all passed data frames. | 1.0 | 1 | 5.0 | Dict(:top_k=>3) |
8 | examples/data/database_style_joins.txt | leftjoin: the output contains rows for values of the key that exist in the first (left) argument, whether or not that value exists in the second (right) argument. | What is the purpose of the left join operation? | The purpose of the left join operation is to combine data from two tables based on a common key, where all rows from the left (first) table are included in the output, regardless of whether there is a match in the right (second) table. | 1.0 | 1 | 4.66667 | Dict(:top_k=>3) |
9 | examples/data/database_style_joins.txt | rightjoin: the output contains rows for values of the key that exist in the second (right) argument, whether or not that value exists in the first (left) argument. | What is the purpose of the right join operation? | The purpose of the right join operation is to include all the rows from the second (right) argument, regardless of whether a match is found in the first (left) argument. | 1.0 | 1 | 4.67 | Dict(:top_k=>3) |
10 | examples/data/database_style_joins.txt | outerjoin: the output contains rows for values of the key that exist in any of the passed data frames.\nsemijoin: Like an inner join, but output is restricted to columns from the first (left) argument. | What is the difference between outer join and semi join? | The difference between outer join and semi join is that outer join includes rows for values of the key that exist in any of the passed data frames, whereas semi join is like an inner join but only outputs columns from the first argument. | 1.0 | 1 | 4.66667 | Dict(:top_k=>3) |