From 4616b7d436b633958d1518ef312b20d64f396b01 Mon Sep 17 00:00:00 2001 From: cornzz <39997278+cornzz@users.noreply.github.com> Date: Wed, 18 Sep 2024 04:39:00 +0300 Subject: [PATCH] Update demo notes, add flagging hint --- src/app.css | 9 +++++++-- src/app.py | 15 ++++++++++----- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/app.css b/src/app.css index e3c93e4..27f384d 100644 --- a/src/app.css +++ b/src/app.css @@ -71,8 +71,7 @@ textarea::placeholder { } /* Settings */ -#settings, -#settings > div { +#settings { gap: 6px; } @@ -126,6 +125,12 @@ textarea::placeholder { background: transparent; } +.button-hint { + text-align: center; + color: var(--block-info-text-color); + font-size: 13px; +} + /* Examples component */ #examples .label { display: none; diff --git a/src/app.py b/src/app.py index 30aa8d5..83ce6d7 100644 --- a/src/app.py +++ b/src/app.py @@ -224,19 +224,20 @@ def run_demo( ) with gr.Accordion("About this demo (please read):", open=False, elem_classes="accordion"): gr.Markdown( - "Your prompt is sent to a target LLM for completion, both in its uncompressed form and compressed using [LLMLingua-2](https://llmlingua.com/llmlingua2.html). Evaluate the responses and select which one you prefer." + "Your prompt is sent to a target LLM for completion, both in its uncompressed form and compressed using [LLMLingua-2](https://llmlingua.com/llmlingua2.html). " + "Evaluate the responses and give feedback for each one by clicking on the respective button below the answer." ) gr.Markdown( f""" - - The order of the responses (prompt compressed / uncompressed) is randomized. Compression is performed {'on a CPU. Using a GPU would be faster.' if not (MPS_AVAILABLE or CUDA_AVAILABLE) else f'on a GPU {"using MPS." if MPS_AVAILABLE else f"({torch.cuda.get_device_name()})."}'} - - LLMLingua-2 is a task-agnostic compression model, the value of the question field is not considered in the compression process. + - **The order of the responses (prompt compressed / uncompressed) is randomized**. + - LLMLingua-2 is a task-agnostic compression model, the value of the question field is not considered in the compression process. Compression is performed {'on a CPU. Using a GPU would be faster.' if not (MPS_AVAILABLE or CUDA_AVAILABLE) else f'on a GPU {"using MPS." if MPS_AVAILABLE else f"({torch.cuda.get_device_name()})."}'} - The example prompts were (mostly) taken from the [MeetingBank-QA-Summary](https://huggingface.co/datasets/microsoft/MeetingBank-QA-Summary) dataset. Click on a question to autofill the question field. - Token counts are calculated using the [cl100k_base tokenizer](https://platform.openai.com/tokenizer) (GPT-3.5/-4), actual counts may vary for different target models. The saving metric is based on an API pricing of $0.03 / 1000 tokens. - - End-to-end latency: latency from submission to full response, including compression. While shown for reference, it is, by itself, not a good metric for evaluating the effectiveness of compression. + - End-to-end latency: latency from submission to full response, including compression. While shown for reference, this metric alone is not an effective measure of compression efficacy. """, elem_id="notes", ) - with gr.Column(variant="compact", elem_classes="settings"): + with gr.Column(variant="compact", elem_id="settings"): gr.Markdown("Tokens to Preserve") with gr.Row(): force_tokens = gr.Dropdown( @@ -309,6 +310,10 @@ def run_demo( b_yes = gr.Button("✅", interactive=False) b_no = gr.Button("❌", interactive=False) FLAG_BUTTONS = [a_yes, a_no, b_yes, b_no] + gr.Markdown( + '
" + ) # Examples gr.Markdown('