From 4f88da6ea11b2a614c77cee842f675303ee50431 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Sat, 30 Nov 2024 14:04:33 -0800 Subject: [PATCH] Document new server flags --- llama.cpp/main/main.1 | 16 ++++++++++ llama.cpp/main/main.1.asc | 20 +++++++++++++ llamafile/db.cpp | 11 ++----- llamafile/flags.cpp | 9 ++++++ llamafile/llamafile.h | 1 + llamafile/server/main.1 | 44 ++++++++++++++++++++++++++-- llamafile/server/main.1.asc | 52 +++++++++++++++++++++++++++++++-- llamafile/server/www/chatbot.js | 4 +++ llamafile/server/www/index.html | 7 +++-- 9 files changed, 149 insertions(+), 15 deletions(-) diff --git a/llama.cpp/main/main.1 b/llama.cpp/main/main.1 index 2e6a1383ec..68b332fea7 100644 --- a/llama.cpp/main/main.1 +++ b/llama.cpp/main/main.1 @@ -616,6 +616,22 @@ mode to print the llamafile logo in ASCII rather than UNICODE. .It Fl Fl verbose Enables verbose logger output in chatbot. This can be helpful for troubleshooting issues. +.It Fl Fl chat-template Ar NAME +Specifies or overrides chat template for model. +.Pp +Normally the GGUF metadata tokenizer.chat_template will specify this +value for instruct models. This flag may be used to either override the +chat template, or specify one when the GGUF metadata field is absent, +which effectively forces the web ui to enable chatbot mode. +.Pp +Supported chat template names are: chatml, llama2, llama3, mistral +(alias for llama2), phi3, zephyr, monarch, gemma, gemma2 (alias for +gemma), orion, openchat, vicuna, vicuna-orca, deepseek, command-r, +chatglm3, chatglm4, minicpm, deepseek2, or exaone3. +.Pp +It is also possible to pass the jinja2 template itself to this argument. +Since llamafiler doesn't currently support jinja2, a heuristic will be +used to guess which of the above templates the template represents. .El .Sh CLI OPTIONS The following options may be specified when diff --git a/llama.cpp/main/main.1.asc b/llama.cpp/main/main.1.asc index 9a632aa881..9bf2941066 100644 --- a/llama.cpp/main/main.1.asc +++ b/llama.cpp/main/main.1.asc @@ -592,6 +592,26 @@ Enables verbose logger output in chatbot. This can be helpful for troubleshooting issues. + --chat-template NAME + Specifies or overrides chat template for model. + + Normally the GGUF metadata tokenizer.chat_template will specify + this value for instruct models. This flag may be used to either + override the chat template, or specify one when the GGUF meta‐ + data field is absent, which effectively forces the web ui to + enable chatbot mode. + + Supported chat template names are: chatml, llama2, llama3, mis‐ + tral (alias for llama2), phi3, zephyr, monarch, gemma, gemma2 + (alias for gemma), orion, openchat, vicuna, vicuna-orca, + deepseek, command-r, chatglm3, chatglm4, minicpm, deepseek2, or + exaone3. + + It is also possible to pass the jinja2 template itself to this + argument. Since llamafiler doesn't currently support jinja2, a + heuristic will be used to guess which of the above templates + the template represents. + CLI OPTIONS The following options may be specified when llamafile is running in --cli mode. diff --git a/llamafile/db.cpp b/llamafile/db.cpp index 2501985f0d..9bf26aaee3 100644 --- a/llamafile/db.cpp +++ b/llamafile/db.cpp @@ -86,14 +86,9 @@ static sqlite3 *open_impl() { return nullptr; } char *errmsg = nullptr; - if (sqlite3_exec(db, "PRAGMA journal_mode=WAL;", nullptr, nullptr, &errmsg) != SQLITE_OK) { - fprintf(stderr, "%s: failed to set journal mode to wal: %s\n", path.c_str(), errmsg); - sqlite3_free(errmsg); - sqlite3_close(db); - return nullptr; - } - if (sqlite3_exec(db, "PRAGMA synchronous=NORMAL;", nullptr, nullptr, &errmsg) != SQLITE_OK) { - fprintf(stderr, "%s: failed to set synchronous to normal: %s\n", path.c_str(), errmsg); + if (sqlite3_exec(db, FLAG_db_startup_sql, nullptr, nullptr, &errmsg) != SQLITE_OK) { + fprintf(stderr, "%s: failed to execute startup SQL (%s) because: %s", path.c_str(), + FLAG_db_startup_sql, errmsg); sqlite3_free(errmsg); sqlite3_close(db); return nullptr; diff --git a/llamafile/flags.cpp b/llamafile/flags.cpp index 936a79dd90..def697e607 100644 --- a/llamafile/flags.cpp +++ b/llamafile/flags.cpp @@ -54,6 +54,8 @@ bool FLAG_trace = false; bool FLAG_unsecure = false; const char *FLAG_chat_template = ""; const char *FLAG_db = nullptr; +const char *FLAG_db_startup_sql = "PRAGMA journal_mode=WAL;" + "PRAGMA synchronous=NORMAL;"; const char *FLAG_file = nullptr; const char *FLAG_ip_header = nullptr; const char *FLAG_listen = "127.0.0.1:8080"; @@ -193,6 +195,13 @@ void llamafile_get_flags(int argc, char **argv) { continue; } + if (!strcmp(flag, "--db-startup-sql")) { + if (i == argc) + missing("--db-startup-sql"); + FLAG_db_startup_sql = argv[i++]; + continue; + } + ////////////////////////////////////////////////////////////////////// // server flags diff --git a/llamafile/llamafile.h b/llamafile/llamafile.h index d74b14c0eb..15f34a7b97 100644 --- a/llamafile/llamafile.h +++ b/llamafile/llamafile.h @@ -25,6 +25,7 @@ extern bool FLAG_trap; extern bool FLAG_unsecure; extern const char *FLAG_chat_template; extern const char *FLAG_db; +extern const char *FLAG_db_startup_sql; extern const char *FLAG_file; extern const char *FLAG_ip_header; extern const char *FLAG_listen; diff --git a/llamafile/server/main.1 b/llamafile/server/main.1 index b821ac6371..4b0a720a66 100644 --- a/llamafile/server/main.1 +++ b/llamafile/server/main.1 @@ -1,4 +1,4 @@ -.Dd August 17, 2024 +.Dd November 30, 2024 .Dt LLAMAFILER 1 .Os Mozilla Ocho .Sh NAME @@ -30,6 +30,11 @@ recommended that you run multiple instances of llamafiler behind a reverse proxy such as NGINX or Redbean. .It Fl mm Ar FNAME , Fl Fl mmproj Ar FNAME Path of vision model weights. +.It Fl Fl db Ar FILE +Specifies path of sqlite3 database. +.Pp +The default is +.Pa ~/.llamafile/llamafile.sqlite3 .It Fl l Ar HOSTPORT , Fl Fl listen Ar HOSTPORT Specifies the local [HOST:]PORT on which the HTTP server should listen. By default this is 0.0.0.0:8080 which means llamafiler will bind to port @@ -55,8 +60,10 @@ Please note that has a strong influence on how many slots can be created. .It Fl p Ar TEXT , Fl Fl prompt Ar TEXT Specifies system prompt. This value is passed along to the web frontend. -.It Fl Fl no-display-prompt Ar TEXT +.It Fl Fl no-display-prompt Hide system prompt from web user interface. +.It Fl Fl nologo +Hide llamafile logo icon from web ui. .It Fl Fl url-prefix Ar URLPREFIX Specifies a URL prefix (subdirectory) under which the HTTP server will make the API accessible, e.g. /lamafiler. Useful when running llamafiler @@ -130,6 +137,39 @@ supported by the host operating system. The default keepalive is 5. Size of HTTP output buffer size, in bytes. Default is 1048576. .It Fl Fl http-ibuf-size Ar N Size of HTTP input buffer size, in bytes. Default is 1048576. +.It Fl Fl chat-template Ar NAME +Specifies or overrides chat template for model. +.Pp +Normally the GGUF metadata tokenizer.chat_template will specify this +value for instruct models. This flag may be used to either override the +chat template, or specify one when the GGUF metadata field is absent, +which effectively forces the web ui to enable chatbot mode. +.Pp +Supported chat template names are: chatml, llama2, llama3, mistral +(alias for llama2), phi3, zephyr, monarch, gemma, gemma2 (alias for +gemma), orion, openchat, vicuna, vicuna-orca, deepseek, command-r, +chatglm3, chatglm4, minicpm, deepseek2, or exaone3. +.Pp +It is also possible to pass the jinja2 template itself to this argument. +Since llamafiler doesn't currently support jinja2, a heuristic will be +used to guess which of the above templates the template represents. +.It Fl Fl completion-mode +Forces web ui to operate in completion mode, rather than chat mode. +Normally the web ui chooses its mode based on the GGUF metadata. Base +models normally don't define tokenizer.chat_template whereas instruct +models do. If it's a base model, then the web ui will automatically use +completion mode only, without needing to specify this flag. This flag is +useful in cases where a prompt template is defined by the gguf, but it +is desirable for the chat interface to be disabled. +.It Fl Fl db-startup-sql +Specifies SQL code that should be executed whenever connecting to the +SQLite database. The default is the following code, which enables the +write-ahead log. +.Bd -literal -offset indent +PRAGMA journal_mode=WAL; +PRAGMA synchronous=NORMAL; +.Ed +.El .Sh EXAMPLE Here's an example of how you might start this server: .Pp diff --git a/llamafile/server/main.1.asc b/llamafile/server/main.1.asc index 9b498f8ec7..a9534f2350 100644 --- a/llamafile/server/main.1.asc +++ b/llamafile/server/main.1.asc @@ -32,6 +32,11 @@ -mm FNAME, --mmproj FNAME Path of vision model weights. + --db FILE + Specifies path of sqlite3 database. + + The default is ~/.llamafile/llamafile.sqlite3 + -l HOSTPORT, --listen HOSTPORT Specifies the local [HOST:]PORT on which the HTTP server should listen. By default this is 0.0.0.0:8080 which means llamafiler @@ -63,9 +68,12 @@ Specifies system prompt. This value is passed along to the web frontend. - --no-display-prompt TEXT + --no-display-prompt Hide system prompt from web user interface. + --nologo + Hide llamafile logo icon from web ui. + --url-prefix URLPREFIX Specifies a URL prefix (subdirectory) under which the HTTP server will make the API accessible, e.g. /lamafiler. Useful @@ -158,6 +166,44 @@ --http-ibuf-size N Size of HTTP input buffer size, in bytes. Default is 1048576. + --chat-template NAME + Specifies or overrides chat template for model. + + Normally the GGUF metadata tokenizer.chat_template will specify + this value for instruct models. This flag may be used to either + override the chat template, or specify one when the GGUF meta‐ + data field is absent, which effectively forces the web ui to + enable chatbot mode. + + Supported chat template names are: chatml, llama2, llama3, mis‐ + tral (alias for llama2), phi3, zephyr, monarch, gemma, gemma2 + (alias for gemma), orion, openchat, vicuna, vicuna-orca, + deepseek, command-r, chatglm3, chatglm4, minicpm, deepseek2, or + exaone3. + + It is also possible to pass the jinja2 template itself to this + argument. Since llamafiler doesn't currently support jinja2, a + heuristic will be used to guess which of the above templates + the template represents. + + --completion-mode + Forces web ui to operate in completion mode, rather than chat + mode. Normally the web ui chooses its mode based on the GGUF + metadata. Base models normally don't define tokenizer.chat_tem‐ + plate whereas instruct models do. If it's a base model, then + the web ui will automatically use completion mode only, without + needing to specify this flag. This flag is useful in cases + where a prompt template is defined by the gguf, but it is de‐ + sirable for the chat interface to be disabled. + + --db-startup-sql + Specifies SQL code that should be executed whenever connecting + to the SQLite database. The default is the following code, + which enables the write-ahead log. + + PRAGMA journal_mode=WAL; + PRAGMA synchronous=NORMAL; + EXAMPLE Here's an example of how you might start this server: @@ -172,10 +218,10 @@ curl -v http://127.0.0.1:8080/embedding?content=hello+world DOCUMENTATION - Read our Markdown documentation for additional help and tutorials. See + Read our Markdown documentation for additional help and tutorials. See llamafile/server/doc/index.md in the source repository on GitHub. SEE ALSO llamafile(1), whisperfile(1) -Mozilla Ocho August 17, 2024 LLAMAFILER(1) +Mozilla Ocho November 30, 2024 LLAMAFILER(1) diff --git a/llamafile/server/www/chatbot.js b/llamafile/server/www/chatbot.js index 7662ec2d89..ac68783e95 100644 --- a/llamafile/server/www/chatbot.js +++ b/llamafile/server/www/chatbot.js @@ -22,6 +22,7 @@ const DEFAULT_SYSTEM_PROMPT = const DEFAULT_FLAGZ = { "model": null, "prompt": null, + "nologo": false, "no_display_prompt": false, "frequency_penalty": 0, "presence_penalty": 0, @@ -369,6 +370,9 @@ function updateModelInfo() { document.getElementById("model").textContent = modelName; document.getElementById("model-completions").textContent = modelName; } + if (!flagz.nologo) { + document.getElementById("logo").style.display = "inline-block"; + } } function startChat(history) { diff --git a/llamafile/server/www/index.html b/llamafile/server/www/index.html index eec0250c16..aec6d5026a 100644 --- a/llamafile/server/www/index.html +++ b/llamafile/server/www/index.html @@ -9,8 +9,11 @@

- [llamafile] - llamafile + + loading...