Skip to content

Commit

Permalink
Update rankGPT
Browse files Browse the repository at this point in the history
  • Loading branch information
svilupp authored Jul 2, 2024
1 parent 44450c0 commit b5f089f
Show file tree
Hide file tree
Showing 6 changed files with 24 additions and 6 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Fixed

## [0.36.0]

### Added
- Added a prompt template for RAG query expansion for BM25 (`RAGQueryKeywordExpander`)

### Fixed
- Fixed a small bug in the truncation step of the RankGPT's `permutation_step!` (bad indexing of string characters).
- Fixed a bug where a certain combination of `rank_start` and `rank_end` would not result the last sliding window.
- Fixed a bug where partially filled `RAGResult` would fail pretty-printing with `pprint`

## [0.35.0]

### Added
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "PromptingTools"
uuid = "670122d1-24a8-4d70-bfce-740807c42192"
authors = ["J S @svilupp and contributors"]
version = "0.35.0"
version = "0.36.0"

[deps]
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
Expand Down
11 changes: 9 additions & 2 deletions src/Experimental/RAGTools/rank_gpt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ function create_permutation_instruction(
rank = 0
for ctx in context[rank_start:rank_end_adj]
rank += 1
push!(messages, PT.UserMessage("[$rank] $(strip(ctx)[1:min(end, max_length)])"))
push!(messages, PT.UserMessage("[$rank] $(first(strip(ctx),max_length))"))
push!(messages, PT.AIMessage("Received passage [$rank]."))
end
push!(messages, last_msg)
Expand Down Expand Up @@ -121,14 +121,21 @@ function rank_sliding_window!(
@assert rank_end>=window_size>=step "rank_end must be greater than or equal to window_size, which must be greater than or equal to step (Provided: rank_end=$rank_end, window_size=$window_size, step=$step)"
end_pos = min(rank_end, length(result.chunks))
start_pos = max(end_pos - window_size, 1)
while start_pos >= rank_start
while start_pos > rank_start
(verbose >= 1) && @info "Ranking chunks in positions $start_pos to $end_pos"
permutation_step!(result; rank_start = start_pos, rank_end = end_pos,
model, verbose = (verbose >= 1), kwargs...)
(verbose >= 2) && @info "Current ranking: $(result.positions)"
end_pos -= step
start_pos -= step
end
## Don't skip the last window, but ensure it's not negative
start_pos = max(start_pos, rank_start)
end_pos = max(end_pos, start_pos)
(verbose >= 1) && @info "Ranking chunks in positions $start_pos to $end_pos"
permutation_step!(result; rank_start = start_pos, rank_end = end_pos,
model, verbose = (verbose >= 1), kwargs...)
(verbose >= 2) && @info "Current ranking: $(result.positions)"
return result
end

Expand Down
4 changes: 2 additions & 2 deletions src/Experimental/RAGTools/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -679,15 +679,15 @@ function PT.pprint(
print(io, "\n", "-"^20, "\n")
print(io, content, "\n\n")
end
if !isempty(r.final_answer)
if !isnothing(r.final_answer) && !isempty(r.final_answer)
annotater = TrigramAnnotater()
root = annotate_support(annotater, r; annotater_kwargs...)
print(io, "-"^20, "\n")
printstyled(io, "ANSWER", color = :blue, bold = true)
print(io, "\n", "-"^20, "\n")
pprint(io, root; text_width)
end
if add_context
if add_context && !isempty(r.context)
print(io, "\n" * "-"^20, "\n")
printstyled(io, "CONTEXT", color = :blue, bold = true)
print(io, "\n", "-"^20, "\n")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"content":"Template Metadata","description":"Template for RAG query rephrasing that injects more keywords that could be relevant. Placeholders: `query`","version":"1.0","source":"","_type":"metadatamessage"},{"content":"You are an assistant tasked with taking a natural language query from a user and converting it into a keyword-based lookup in our search database.\n\nIn this process, you strip out information that is not relevant for the retrieval task. This is a pure information retrieval task.\n\nAugment this query with ADDITIONAL keywords that described the entities and concepts mentioned in the query (consider synonyms, rephrasing, related items). \nFocus on expanding mainly the specific / niche context of the query to improve the retrieval precision for uncommon words.\nGenerate synonyms, related terms, and alternative phrasings for each identified entity/concept.\nExpand any abbreviations, acronyms, or initialisms present in the query.\nInclude specific industry jargon, technical terms, or domain-specific vocabulary relevant to the query.\nAdd any references or additional metadata that you deem important to successfully answer this query with our search database.\n\nProvide the most powerful 5-10 keywords for the search engine.\n","variables":[],"_type":"systemmessage"},{"content":"Here is the user query: {{query}}\nRephrased query:","variables":["query"],"_type":"usermessage"}]
2 changes: 1 addition & 1 deletion test/Experimental/RAGTools/rank_gpt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ end
# Test with custom rank_start and rank_end
result = rank_gpt(["chunk1", "chunk2", "chunk3", "chunk4"],
"What is AI?"; rank_start = 2, rank_end = 3, window_size = 3, step = 2, model = "mock-gen")
@test result.positions == [1, 2, 3, 4] # Check positions with custom rank_start and rank_end
@test result.positions == [1, 3, 2, 4] # Flips because the signal say [2] > [1]
result = rank_gpt(["chunk1", "chunk2", "chunk3", "chunk4"],
"What is AI?"; rank_start = 1, rank_end = 4, window_size = 4,
step = 2, model = "mock-gen")
Expand Down

0 comments on commit b5f089f

Please sign in to comment.