Skip to content

Commit

Permalink
feat: quick spinner while loading tokenizers
Browse files Browse the repository at this point in the history
Right now the goose CLI doesn't start immediately (about 1.4s)
because it has to load tokenizers. This makes it immediately responsive
  • Loading branch information
baxen committed Jan 10, 2025
1 parent 9210de0 commit 4f35490
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 0 deletions.
1 change: 1 addition & 0 deletions crates/goose/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ kill_tree = "0.2.4"
tracing = "0.1"
tracing-subscriber = "0.3"
wiremock = "0.6.0"
indicatif = "0.17"


keyring = { version = "3.6.1", features = [
Expand Down
19 changes: 19 additions & 0 deletions crates/goose/src/token_counter.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use crate::message::Message;
use include_dir::{include_dir, Dir};
use indicatif::{ProgressBar, ProgressStyle};
use mcp_core::tool::Tool;
use std::collections::HashMap;
use std::time::Duration;
use tokenizers::tokenizer::Tokenizer;

// Embed the tokenizer files directory
Expand All @@ -25,6 +27,18 @@ impl Default for TokenCounter {

impl TokenCounter {
fn load_tokenizer(&mut self, tokenizer_key: &str) {
// Create a spinner that will show during both loading and parsing
let pb = ProgressBar::new_spinner();
pb.set_style(
ProgressStyle::default_spinner()
.template("{spinner:.green} {msg}")
.unwrap(),
);

// Start spinner for loading phase
pb.set_message(format!("Loading {} tokenizer from disk...", tokenizer_key));
pb.enable_steady_tick(Duration::from_millis(120));

// Load from embedded tokenizer files. The tokenizer_key must match the directory name.
let tokenizer_path = format!("{}/tokenizer.json", tokenizer_key);
let file_content = TOKENIZER_FILES
Expand All @@ -33,13 +47,18 @@ impl TokenCounter {
.ok_or_else(|| format!("Embedded tokenizer file not found: {}", tokenizer_path))
.unwrap();

// Update spinner for parsing phase
pb.set_message(format!("Initializing {} tokenizer...", tokenizer_key));

let tokenizer = Tokenizer::from_bytes(file_content);

match tokenizer {
Ok(tokenizer) => {
self.tokenizers.insert(tokenizer_key.to_string(), tokenizer);
pb.finish_and_clear();
}
Err(e) => {
pb.finish_and_clear();
eprintln!("Failed to load tokenizer {}: {}", tokenizer_key, e);
}
}
Expand Down

0 comments on commit 4f35490

Please sign in to comment.