Skip to content

Commit

Permalink
Add tokenizer type
Browse files Browse the repository at this point in the history
  • Loading branch information
bartowski1182 committed Nov 26, 2024
1 parent 8d243b6 commit 445904e
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 0 deletions.
1 change: 1 addition & 0 deletions include/llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ extern "C" {
LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24,
LLAMA_VOCAB_PRE_TYPE_EXAONE = 25,
LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26,
LLAMA_VOCAB_PRE_TYPE_OLMO2 = 27,
};

enum llama_rope_type {
Expand Down
3 changes: 3 additions & 0 deletions src/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6406,6 +6406,9 @@ static void llm_load_vocab(
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_CHAMELEON;
vocab.tokenizer_add_bos = true;
vocab.tokenizer_clean_spaces = false;
} else if (
tokenizer_pre == "olmo2") {
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_OLMO2;
} else {
throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
}
Expand Down

0 comments on commit 445904e

Please sign in to comment.