Skip to content

Commit

Permalink
Interface improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
cornzz committed Aug 22, 2024
1 parent 1f931c4 commit 4f41c3f
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 14 deletions.
27 changes: 25 additions & 2 deletions src/app.css
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,29 @@
/* Accordion component (about text) */
.accordion {
background: transparent;
padding: var(--spacing-lg) calc(var(--spacing-lg) + 2px);
}

.accordion .label-wrap span {
font-weight: bold;
font-size: 1rem;
}

/* Force tokens multiselect */
.accordion > div:last-child > div {
gap: var(--spacing-xl);
}

#notes {
padding: 4px 10px;
background: var(--background-fill-secondary);
}

/* Settings */
#settings,
#settings > div {
gap: 6px;
}

.force-tokens .wrap-inner {
padding: 3px 5px;
}
Expand All @@ -26,10 +41,18 @@
}

/* Examples component */
.gallery-item {
#examples .label {
display: none;
}

#examples .gallery-item {
max-height: 100px;
}

#examples .paginate {
font-size: var(--text-lg);
}

/* HighlightedText component (compressed diff) */
.textfield {
line-height: 1.29;
Expand Down
27 changes: 15 additions & 12 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,19 +202,22 @@ def run_demo(
) as demo:
gr.Markdown("# Prompt Compression A/B Test")
with gr.Accordion("About this demo (please read):", open=False, elem_classes="accordion"):
gr.Markdown(
"Your prompt is sent to a target LLM for completion, both in its uncompressed form and compressed using [LLMLingua-2](https://llmlingua.com/llmlingua2.html). Evaluate the responses and select the better one."
)
gr.Markdown(
f"""
Your prompt is sent to a target LLM for completion, both in its uncompressed form and compressed using [LLMLingua-2](https://llmlingua.com/llmlingua2.html). Evaluate the responses and select the better one.
Notes:
- The order of the responses (compressed / uncompressed prompt) is randomized.
- Compression time is included in the compressed end-to-end latency. {'Compression is done on a CPU. Using a GPU would be faster.' if not (MPS_AVAILABLE or CUDA_AVAILABLE) else 'Compression is done on a GPU using MPS.' if MPS_AVAILABLE else f'Compression is done on a GPU ({torch.cuda.get_device_name()}).'}
- The provided example prompts were taken from the [LLMLingua-2 Demo](https://huggingface.co/spaces/microsoft/llmlingua-2). Some include corresponding QA pairs generated by GPT-4. Click on a question to autofill the separate, uncompressed question field.
- This demo focuses on evaluating the quality of responses to compressed prompts. Uncompressed and compressed prompts are processed simultaneously. Thus, and due to other variables, the end-to-end latencies may not be very meaningful.
- Token counts are calculated based on the `cl100k_base` [tokenizer](https://platform.openai.com/tokenizer) (GPT-3.5/-4) and may vary for different target models. Saving metric is based on an API pricing of $0.03 / 1000 tokens.
- LLMLingua-2 is a task-agnostic compression model, so the value of the question field is not considered in the compression process.
"""
Notes:
- The order of the responses (compressed / uncompressed prompt) is randomized.
- Compression time is included in the compressed end-to-end latency. {'Compression is done on a CPU. Using a GPU would be faster.' if not (MPS_AVAILABLE or CUDA_AVAILABLE) else 'Compression is done on a GPU using MPS.' if MPS_AVAILABLE else f'Compression is done on a GPU ({torch.cuda.get_device_name()}).'}
- The provided example prompts were taken from the [LLMLingua-2 Demo](https://huggingface.co/spaces/microsoft/llmlingua-2). Click on a question to autofill the separate, uncompressed question field.
- Uncompressed and compressed prompts are processed simultaneously. Thus, and due to other variables, the end-to-end latencies may not be very meaningful.
- Token counts are calculated based on the [`cl100k_base` tokenizer](https://platform.openai.com/tokenizer) (GPT-3.5/-4) and may vary for different target models. Saving metric is based on an API pricing of $0.03 / 1000 tokens.
- LLMLingua-2 is a task-agnostic compression model, the value of the question field is not considered in the compression process.
""",
elem_id="notes"
)
with gr.Row(variant="compact"):
with gr.Row(variant="compact", elem_id="settings"):
with gr.Column():
gr.Markdown("UI Settings")
ui_settings = gr.CheckboxGroup(
Expand Down Expand Up @@ -270,7 +273,6 @@ def run_demo(
elem_classes="no-content",
)
with gr.Column(variant="panel") as responses:
# TODO: stream response?
with gr.Row():
response_a = gr.Textbox(label="LLM Response A", lines=10, max_lines=10, autoscroll=False, interactive=False)
response_a_obj = gr.Textbox(label="Response A", visible=False)
Expand All @@ -285,7 +287,7 @@ def run_demo(
gr.Markdown("## Examples (click to select)")
qa_pairs = gr.Dataframe(
label="GPT-4 generated QA pairs related to the selected example prompt:",
headers=["Question", "Answer"],
headers=["Question (click to select)", "Answer"],
elem_classes="qa-pairs",
visible=False,
)
Expand All @@ -294,6 +296,7 @@ def run_demo(
components=[gr.Textbox(visible=False)],
samples_per_page=5,
type="index",
elem_id="examples",
)

# Event handlers
Expand Down

0 comments on commit 4f41c3f

Please sign in to comment.