Skip to content

Commit

Permalink
Register Llama3.1 + minor retrieval improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
svilupp authored Jul 23, 2024
1 parent 89d4c43 commit 0f1a334
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 21 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Fixed

## [0.42.0]

### Added
- Registered new Meta Llama 3.1 models hosted on GroqCloud and Together.ai (eg, Groq-hosted `gllama370` has been updated to point to the latest available model and 405b model now has alias `gllama3405`). Because that's quite clunky, I've added abbreviations based on sizes small/medium/large (that is 8b, 70b, 405b) under `gls/glm/gll` for Llama 3.1 hosted on GroqCloud (similarly, we now have `tls/tlm/tll` for Llama3.1 on Together.ai).
- Generic model aliases for Groq and Together.ai for Llama3 models have been updated to point to the latest available models (Llama 3.1).
- Added Gemma2 9b model hosted on GroqCloud to the model registry (alias `ggemma9`).

### Updated
- Minor optimizations to `SubDocumentTermMatrix` to reduce memory allocations and improve performance.

## [0.41.0]

### Added
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "PromptingTools"
uuid = "670122d1-24a8-4d70-bfce-740807c42192"
authors = ["J S @svilupp and contributors"]
version = "0.41.0"
version = "0.42.0"

[deps]
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
Expand Down
8 changes: 6 additions & 2 deletions src/Experimental/RAGTools/retrieval.jl
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,9 @@ function find_closest(
finder::AbstractSimilarityFinder, index::AbstractChunkIndex,
query_emb::AbstractVector{<:Real}, query_tokens::AbstractVector{<:AbstractString} = String[];
top_k::Int = 100, kwargs...)
isnothing(chunkdata(index)) && return CandidateChunks(; index_id = indexid(index))
if isnothing(chunkdata(parent(index)))
return CandidateChunks(; index_id = indexid(index))
end
positions, scores = find_closest(finder, chunkdata(index),
query_emb, query_tokens;
top_k, kwargs...)
Expand All @@ -244,7 +246,9 @@ function find_closest(
finder::AbstractSimilarityFinder, index::AbstractChunkIndex,
query_emb::AbstractMatrix{<:Real}, query_tokens::AbstractVector{<:AbstractVector{<:AbstractString}} = Vector{Vector{String}}();
top_k::Int = 100, kwargs...)
isnothing(chunkdata(index)) && CandidateChunks(; index_id = indexid(index))
if isnothing(chunkdata(parent(index)))
return CandidateChunks(; index_id = indexid(index))
end
## reduce top_k since we have more than one query
top_k_ = top_k ÷ size(query_emb, 2)
## simply vcat together (gets sorted from the highest similarity to the lowest)
Expand Down
26 changes: 17 additions & 9 deletions src/Experimental/RAGTools/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ Base.parent(index::AbstractDocumentIndex) = index
indexid(index::AbstractDocumentIndex) = index.id
chunkdata(index::AbstractChunkIndex) = index.chunkdata
"Access chunkdata for a subset of chunks, `chunk_idx` is a vector of chunk indices in the index"
function chunkdata(index::AbstractChunkIndex, chunk_idx::AbstractVector{<:Integer})
Base.@propagate_inbounds function chunkdata(
index::AbstractChunkIndex, chunk_idx::AbstractVector{<:Integer})
## We need this accessor because different chunk indices can have chunks in different dimensions!!
chkdata = chunkdata(index)
if isnothing(chkdata)
Expand Down Expand Up @@ -209,7 +210,7 @@ tf(dtm::SubDocumentTermMatrix) = dtm.tf
vocab(dtm::SubDocumentTermMatrix) = Base.parent(dtm) |> vocab
vocab_lookup(dtm::SubDocumentTermMatrix) = Base.parent(dtm) |> vocab_lookup
idf(dtm::SubDocumentTermMatrix) = Base.parent(dtm) |> idf
function doc_rel_length(dtm::SubDocumentTermMatrix)
Base.@propagate_inbounds function doc_rel_length(dtm::SubDocumentTermMatrix)
view(doc_rel_length(Base.parent(dtm)), positions(dtm))
end
# hcat for SubDocumentTermMatrix does not make sense -> the vocabulary is the same / shared
Expand All @@ -227,6 +228,7 @@ Base.@propagate_inbounds function Base.view(
throw(BoundsError(tf_mat, max_pos))
end
## computations on top of views of sparse arrays are expensive, materialize the view
## Moreover, nonzeros and rowvals accessors for SparseCSCMatrix are not defined for views
tf_ = tf_mat[doc_idx, :]
SubDocumentTermMatrix(dtm, tf_, collect(doc_idx))
end
Expand Down Expand Up @@ -315,7 +317,8 @@ end

HasKeywords(::ChunkKeywordsIndex) = true
"Access chunkdata for a subset of chunks, `chunk_idx` is a vector of chunk indices in the index"
function chunkdata(index::ChunkKeywordsIndex, chunk_idx::AbstractVector{<:Integer})
Base.@propagate_inbounds function chunkdata(
index::ChunkKeywordsIndex, chunk_idx::AbstractVector{<:Integer})
chkdata = index.chunkdata
if isnothing(chkdata)
return nothing
Expand Down Expand Up @@ -437,13 +440,18 @@ Base.parent(index::SubChunkIndex) = index.parent
HasEmbeddings(index::SubChunkIndex) = HasEmbeddings(parent(index))
HasKeywords(index::SubChunkIndex) = HasKeywords(parent(index))

chunks(index::SubChunkIndex) = view(chunks(parent(index)), positions(index))
sources(index::SubChunkIndex) = view(sources(parent(index)), positions(index))
function chunkdata(index::SubChunkIndex)
chkdata = chunkdata(parent(index), positions(index))
Base.@propagate_inbounds function chunks(index::SubChunkIndex)
view(chunks(parent(index)), positions(index))
end
Base.@propagate_inbounds function sources(index::SubChunkIndex)
view(sources(parent(index)), positions(index))
end
Base.@propagate_inbounds function chunkdata(index::SubChunkIndex)
chunkdata(parent(index), positions(index))
end
"Access chunkdata for a subset of chunks, `chunk_idx` is a vector of chunk indices in the index"
function chunkdata(index::SubChunkIndex, chunk_idx::AbstractVector{<:Integer})
Base.@propagate_inbounds function chunkdata(
index::SubChunkIndex, chunk_idx::AbstractVector{<:Integer})
## We need this accessor because different chunk indices can have chunks in different dimensions!!
index_chunk_idx = translate_positions_to_parent(index, chunk_idx)
pos = intersect(positions(index), index_chunk_idx)
Expand Down Expand Up @@ -501,7 +509,7 @@ Translate positions to the parent index. Useful to convert between positions in
Used whenever a `chunkdata()` or `tags()` are used to re-align positions to the "parent" index.
"""
function translate_positions_to_parent(
Base.@propagate_inbounds function translate_positions_to_parent(
index::SubChunkIndex, pos::AbstractVector{<:Integer})
sub_positions = positions(index)
return sub_positions[pos]
Expand Down
66 changes: 57 additions & 9 deletions src/user_preferences.jl
Original file line number Diff line number Diff line change
Expand Up @@ -346,8 +346,12 @@ aliases = merge(
## t-mixtral -> Together.ai Mixtral
"tmixtral" => "mistralai/Mixtral-8x7B-Instruct-v0.1",
"tmixtral22" => "mistralai/Mixtral-8x22B-Instruct-v0.1",
"tllama3" => "meta-llama/Llama-3-8b-chat-hf",
"tllama370" => "meta-llama/Llama-3-70b-chat-hf",
"tllama3" => "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
"tllama370" => "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
"tllama3405" => "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
"tls" => "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", #s for small
"tlm" => "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", #m for medium
"tll" => "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo", #l for large
## Mistral AI
"mistral-tiny" => "mistral-tiny",
"mistral-small" => "mistral-small-latest",
Expand All @@ -365,11 +369,17 @@ aliases = merge(
"claudes" => "claude-3-5-sonnet-20240620",
"claudeh" => "claude-3-haiku-20240307",
## Groq
"gllama3" => "llama3-8b-8192",
"gl3" => "llama3-8b-8192",
"gllama370" => "llama3-70b-8192",
"gl70" => "llama3-70b-8192",
"gllama3" => "llama-3.1-8b-instant",
"gl3" => "llama-3.1-8b-instant",
"gllama370" => "llama-3.1-70b-versatile",
"gl70" => "llama-3.1-70b-versatile",
"gllama3405" => "llama-3.1-405b-reasoning",
"gl405" => "llama-3.1-405b-reasoning",
"gls" => "llama-3.1-8b-instant", #s for small
"glm" => "llama-3.1-70b-versatile", #m for medium
"gll" => "llama-3.1-405b-reasoning", #l for large
"gmixtral" => "mixtral-8x7b-32768",
"ggemma9" => "gemma2-9b-it",
## DeepSeek
"dschat" => "deepseek-chat",
"dscode" => "deepseek-coder"
Expand Down Expand Up @@ -665,13 +675,31 @@ registry = Dict{String, ModelSpec}(
TogetherOpenAISchema(),
2e-7,
2e-7,
"Meta Llama3 8b from Mistral, hosted by Together.ai. For more information, see [models](https://docs.together.ai/docs/inference-models)."),
"Meta Llama3 8b, hosted by Together.ai. For more information, see [models](https://docs.together.ai/docs/inference-models)."),
"meta-llama/Llama-3-70b-chat-hf" => ModelSpec(
"meta-llama/Llama-3-70b-chat-hf",
TogetherOpenAISchema(),
9e-7,
9e-7,
"Meta Llama3 70b from Mistral, hosted by Together.ai. For more information, see [models](https://docs.together.ai/docs/inference-models)."),
"Meta Llama3 70b, hosted by Together.ai. For more information, see [models](https://docs.together.ai/docs/inference-models)."),
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo" => ModelSpec(
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
TogetherOpenAISchema(),
1e-7,
1.8e-7,
"Meta Llama3.1 8b, hosted by Together.ai. For more information, see [models](https://docs.together.ai/docs/inference-models)."),
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo" => ModelSpec(
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
TogetherOpenAISchema(),
5.4e-7,
8.8e-7,
"Meta Llama3.1 70b, hosted by Together.ai. For more information, see [models](https://docs.together.ai/docs/inference-models)."),
"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo" => ModelSpec(
"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
TogetherOpenAISchema(),
5e-6,
1.5e-5,
"Meta Llama3.1 405b, hosted by Together.ai. For more information, see [models](https://docs.together.ai/docs/inference-models)."),
### Anthropic models
"claude-3-5-sonnet-20240620" => ModelSpec("claude-3-5-sonnet-20240620",
AnthropicSchema(),
Expand Down Expand Up @@ -699,10 +727,25 @@ registry = Dict{String, ModelSpec}(
2.4e-5,
"Anthropic's Claude 2.1 model."),
## Groq -- using preliminary pricing on https://wow.groq.com/
"llama-3.1-405b-reasoning" => ModelSpec("llama-3.1-405b-reasoning",
GroqOpenAISchema(),
5e-6, # based on prices at together.ai... likely it will be much cheaper
1.5e-5, # based on prices at together.ai... likely it will be much cheaper
"Meta's Llama3.1 405b, hosted by Groq. Max output 16384 tokens, 131K context - during preview window limited to max tokens=16K. See details [here](https://console.groq.com/docs/models)"),
"llama-3.1-70b-versatile" => ModelSpec("llama-3.1-70b-versatile",
GroqOpenAISchema(),
5.9e-7,
7.9e-7,
"Meta's Llama3.1 70b, hosted by Groq. Max output 8192 tokens, 131K context - during preview window limited to max tokens=8K. See details [here](https://console.groq.com/docs/models)"),
"llama-3.1-8b-instant" => ModelSpec("llama-3.1-8b-instant",
GroqOpenAISchema(),
5e-8,
8e-8,
"Meta's Llama3.1 8b, hosted by Groq. Max output 8192 tokens, 131K context - during preview window limited to max tokens=8K. See details [here](https://console.groq.com/docs/models)"),
"llama3-8b-8192" => ModelSpec("llama3-8b-8192",
GroqOpenAISchema(),
5e-8,
1e-7,
8e-8,
"Meta's Llama3 8b, hosted by Groq. Max output 8192 tokens, 8K context. See details [here](https://console.groq.com/docs/models)"),
"llama3-70b-8192" => ModelSpec("llama3-70b-8192",
GroqOpenAISchema(),
Expand All @@ -714,6 +757,11 @@ registry = Dict{String, ModelSpec}(
2.7e-7,
2.7e-7,
"Mistral.ai Mixtral 8x7b, hosted by Groq. Max 32K context. See details [here](https://console.groq.com/docs/models)"),
"gemma2-9b-it" => ModelSpec("gemma2-9b-it",
GroqOpenAISchema(),
2e-7,
2e-7,
"Google's Gemma 2 9b, hosted by Groq. Max 8K context. See details [here](https://console.groq.com/docs/models)"),
"deepseek-chat" => ModelSpec("deepseek-chat",
DeepSeekOpenAISchema(),
1.4e-7,
Expand Down

0 comments on commit 0f1a334

Please sign in to comment.