Interface improvements

cornzz · Aug 22, 2024 · 4f41c3f · 4f41c3f
1 parent 1f931c4
commit 4f41c3f
Show file tree

Hide file tree

Showing 2 changed files with 40 additions and 14 deletions.
diff --git a/src/app.css b/src/app.css
@@ -9,14 +9,29 @@
 /* Accordion component (about text) */
 .accordion {
     background: transparent;
+    padding: var(--spacing-lg) calc(var(--spacing-lg) + 2px);
 }
 
 .accordion .label-wrap span {
     font-weight: bold;
     font-size: 1rem;
 }
 
-/* Force tokens multiselect */
+.accordion > div:last-child > div {
+    gap: var(--spacing-xl);
+}
+
+#notes {
+    padding: 4px 10px;
+    background: var(--background-fill-secondary);
+}
+
+/* Settings */
+#settings,
+#settings > div {
+    gap: 6px;
+}
+
 .force-tokens .wrap-inner {
     padding: 3px 5px;
 }
@@ -26,10 +41,18 @@
 }
 
 /* Examples component */
-.gallery-item {
+#examples .label {
+    display: none;
+}
+
+#examples .gallery-item {
     max-height: 100px;
 }
 
+#examples .paginate {
+    font-size: var(--text-lg);
+}
+
 /* HighlightedText component (compressed diff) */
 .textfield {
     line-height: 1.29;

diff --git a/src/app.py b/src/app.py
@@ -202,19 +202,22 @@ def run_demo(
 ) as demo:
     gr.Markdown("# Prompt Compression A/B Test")
     with gr.Accordion("About this demo (please read):", open=False, elem_classes="accordion"):
+        gr.Markdown(
+            "Your prompt is sent to a target LLM for completion, both in its uncompressed form and compressed using [LLMLingua-2](https://llmlingua.com/llmlingua2.html). Evaluate the responses and select the better one."
+        )
         gr.Markdown(
             f"""
-            Your prompt is sent to a target LLM for completion, both in its uncompressed form and compressed using [LLMLingua-2](https://llmlingua.com/llmlingua2.html). Evaluate the responses and select the better one.
-            Notes:
-            - The order of the responses (compressed / uncompressed prompt) is randomized.
-            - Compression time is included in the compressed end-to-end latency. {'Compression is done on a CPU. Using a GPU would be faster.' if not (MPS_AVAILABLE or CUDA_AVAILABLE) else 'Compression is done on a GPU using MPS.' if MPS_AVAILABLE else f'Compression is done on a GPU ({torch.cuda.get_device_name()}).'}
-            - The provided example prompts were taken from the [LLMLingua-2 Demo](https://huggingface.co/spaces/microsoft/llmlingua-2). Some include corresponding QA pairs generated by GPT-4. Click on a question to autofill the separate, uncompressed question field.
-            - This demo focuses on evaluating the quality of responses to compressed prompts. Uncompressed and compressed prompts are processed simultaneously. Thus, and due to other variables, the end-to-end latencies may not be very meaningful.
-            - Token counts are calculated based on the `cl100k_base` [tokenizer](https://platform.openai.com/tokenizer) (GPT-3.5/-4) and may vary for different target models. Saving metric is based on an API pricing of $0.03 / 1000 tokens.
-            - LLMLingua-2 is a task-agnostic compression model, so the value of the question field is not considered in the compression process.
-        """
+                Notes:
+                - The order of the responses (compressed / uncompressed prompt) is randomized.
+                - Compression time is included in the compressed end-to-end latency. {'Compression is done on a CPU. Using a GPU would be faster.' if not (MPS_AVAILABLE or CUDA_AVAILABLE) else 'Compression is done on a GPU using MPS.' if MPS_AVAILABLE else f'Compression is done on a GPU ({torch.cuda.get_device_name()}).'}
+                - The provided example prompts were taken from the [LLMLingua-2 Demo](https://huggingface.co/spaces/microsoft/llmlingua-2). Click on a question to autofill the separate, uncompressed question field.
+                - Uncompressed and compressed prompts are processed simultaneously. Thus, and due to other variables, the end-to-end latencies may not be very meaningful.
+                - Token counts are calculated based on the [`cl100k_base` tokenizer](https://platform.openai.com/tokenizer) (GPT-3.5/-4) and may vary for different target models. Saving metric is based on an API pricing of $0.03 / 1000 tokens.
+                - LLMLingua-2 is a task-agnostic compression model, the value of the question field is not considered in the compression process.
+            """,
+            elem_id="notes"
         )
-        with gr.Row(variant="compact"):
+        with gr.Row(variant="compact", elem_id="settings"):
             with gr.Column():
                 gr.Markdown("UI Settings")
                 ui_settings = gr.CheckboxGroup(
@@ -270,7 +273,6 @@ def run_demo(
         elem_classes="no-content",
     )
     with gr.Column(variant="panel") as responses:
-        # TODO: stream response?
         with gr.Row():
             response_a = gr.Textbox(label="LLM Response A", lines=10, max_lines=10, autoscroll=False, interactive=False)
             response_a_obj = gr.Textbox(label="Response A", visible=False)
@@ -285,7 +287,7 @@ def run_demo(
     gr.Markdown("## Examples (click to select)")
     qa_pairs = gr.Dataframe(
         label="GPT-4 generated QA pairs related to the selected example prompt:",
-        headers=["Question", "Answer"],
+        headers=["Question (click to select)", "Answer"],
         elem_classes="qa-pairs",
         visible=False,
     )
@@ -294,6 +296,7 @@ def run_demo(
         components=[gr.Textbox(visible=False)],
         samples_per_page=5,
         type="index",
+        elem_id="examples",
     )
 
     # Event handlers