From 843821d57e0e8e4955178976ddcfa5bb34b81299 Mon Sep 17 00:00:00 2001
From: Joe McIlvain <joe.eli.mac@gmail.com>
Date: Wed, 26 Jun 2024 14:02:40 -0700
Subject: [PATCH] feat: add usage/fingerprint metadata to `KurtResult` for
 `KurtVertexAI` adapter

This implements the new optional `metadata` field of the `KurtResult` interface to provide the total
input and output tokens. Note that no system fingerprint is available from Vertex AI's Gemini API,
so that is omitted here. If it ever becomes available, we can add it.

Note that this commit involves regenerating all the test snapshots because we are now capturing and
relying on more from the upstream Vertex AI responses.
---
 .../spec/generateNaturalLanguage.spec.ts      |   9 +-
 .../spec/generateWithOptionalTools.spec.ts    |   4 +-
 packages/kurt-vertex-ai/spec/snapshots.ts     |  13 +-
 ...AI_generateNaturalLanguage_says_hello.yaml |  51 +++--
 ..._writes_a_haiku_with_high_temperature.yaml |  79 ++++---
 ...edData_says_hello_(response_format_1).yaml |  31 ++-
 ...edData_says_hello_(response_format_2).yaml |  35 +++-
 ...edData_says_hello_(response_format_3).yaml |  33 ++-
 ...alculator_(after_parallel_tool_calls).yaml | 100 +++++----
 ...nalTools_calculator_(after_tool_call).yaml |  49 +++--
 ...calculator_(with_parallel_tool_calls).yaml |  53 +++--
 ...onalTools_calculator_(with_tool_call).yaml |  40 ++--
 packages/kurt-vertex-ai/src/KurtVertexAI.ts   | 196 ++++++++++--------
 packages/kurt-vertex-ai/src/VertexAI.types.ts |   6 +
 14 files changed, 429 insertions(+), 270 deletions(-)
diff --git a/packages/kurt-vertex-ai/spec/generateNaturalLanguage.spec.ts b/packages/kurt-vertex-ai/spec/generateNaturalLanguage.spec.ts
index 42888b2..e9cd244 100644
--- a/packages/kurt-vertex-ai/spec/generateNaturalLanguage.spec.ts
+++ b/packages/kurt-vertex-ai/spec/generateNaturalLanguage.spec.ts
@@ -8,7 +8,7 @@ describe("KurtVertexAI generateNaturalLanguage", () => {
         prompt: "Say hello!",
       })
     )
-    expect(result.text).toEqual("Hello! How can I assist you today?")
+    expect(result.text).toEqual("Hello! 👋  😊\n")
   })
 
   test("writes a haiku with high temperature", async () => {
@@ -24,9 +24,10 @@ describe("KurtVertexAI generateNaturalLanguage", () => {
     )
     expect(result.text).toEqual(
       [
-        "Moon paints silver path,",
-        "Water sings to sleeping stones,",
-        "Night sighs on the wind.",
+        "Moon bathes silver stream,",
+        "Whispers flow through sleeping wood,",
+        "Stones dream in the dark.",
+        "",
       ].join("\n")
     )
   })
diff --git a/packages/kurt-vertex-ai/spec/generateWithOptionalTools.spec.ts b/packages/kurt-vertex-ai/spec/generateWithOptionalTools.spec.ts
index 66d52a3..6a384bc 100644
--- a/packages/kurt-vertex-ai/spec/generateWithOptionalTools.spec.ts
+++ b/packages/kurt-vertex-ai/spec/generateWithOptionalTools.spec.ts
@@ -50,7 +50,7 @@ describe("KurtVertexAI generateWithOptionalTools", () => {
         ],
       })
     )
-    expect(result.text).toEqual("That's about 324.")
+    expect(result.text).toEqual("That's about 324. \n")
   })
 
   test("calculator (with parallel tool calls)", async () => {
@@ -123,7 +123,7 @@ describe("KurtVertexAI generateWithOptionalTools", () => {
       [
         "1. 8026256882 divided by 3402398 is 2359.",
         "2. 1185835515 divided by 348263 is 3405.",
-        "3. 90135094495 minus 89944954350 is 190140145.",
+        "3. 90135094495 minus 89944954350 is 190140145. ",
         "",
       ].join("\n")
     )
diff --git a/packages/kurt-vertex-ai/spec/snapshots.ts b/packages/kurt-vertex-ai/spec/snapshots.ts
index 8277218..bcd6ff3 100644
--- a/packages/kurt-vertex-ai/spec/snapshots.ts
+++ b/packages/kurt-vertex-ai/spec/snapshots.ts
@@ -44,7 +44,7 @@ export async function snapshotAndMock<T>(
   // Here's the data structure we will use to snapshot a request/response cycle.
   const snapshot: {
     step1Request?: VertexAIRequest
-    step2RawChunks: VertexAIResponseChunkCandidate[]
+    step2RawChunks: VertexAIResponseChunk[]
     step3KurtEvents: KurtStreamEvent<T>[]
   } = {
     step1Request: undefined,
@@ -75,7 +75,7 @@ export async function snapshotAndMock<T>(
               snapshot.step2RawChunks = savedRawChunks
               async function* generator(): AsyncIterable<VertexAIResponseChunk> {
                 for await (const rawChunk of savedRawChunks) {
-                  yield { candidates: [rawChunk] }
+                  yield rawChunk
                 }
               }
               return { stream: generator() }
@@ -95,9 +95,14 @@ export async function snapshotAndMock<T>(
               for await (const rawEvent of response.stream) {
                 const candidate = rawEvent.candidates?.at(0)
                 if (candidate) {
-                  const rawChunk = { ...candidate }
+                  const partialCandidate = { ...candidate }
                   // biome-ignore lint/performance/noDelete: we don't care about performance in this test code
-                  delete rawChunk.safetyRatings
+                  delete partialCandidate.safetyRatings
+
+                  const rawChunk = {
+                    candidates: [partialCandidate],
+                    usageMetadata: rawEvent.usageMetadata,
+                  }
                   snapshot.step2RawChunks.push(rawChunk)
                 }
 
diff --git a/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateNaturalLanguage_says_hello.yaml b/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateNaturalLanguage_says_hello.yaml
index 7faaf26..c9cf2c8 100644
--- a/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateNaturalLanguage_says_hello.yaml
+++ b/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateNaturalLanguage_says_hello.yaml
@@ -8,24 +8,37 @@ step1Request:
       parts:
         - text: Say hello!
 step2RawChunks:
-  - content:
-      role: model
-      parts:
-        - text: Hello!
-    index: 0
-  - content:
-      role: model
-      parts:
-        - text: " How can I assist you today?"
-    index: 0
-  - content:
-      role: model
-      parts:
-        - text: ""
-    finishReason: STOP
-    index: 0
+  - candidates:
+      - content:
+          role: model
+          parts:
+            - text: Hello
+        index: 0
+  - candidates:
+      - content:
+          role: model
+          parts:
+            - text: |
+                ! 👋  😊
+        index: 0
+  - candidates:
+      - content:
+          role: model
+          parts:
+            - text: ""
+        finishReason: STOP
+        index: 0
+    usageMetadata:
+      promptTokenCount: 3
+      candidatesTokenCount: 7
+      totalTokenCount: 10
 step3KurtEvents:
-  - chunk: Hello!
-  - chunk: " How can I assist you today?"
+  - chunk: Hello
+  - chunk: |
+      ! 👋  😊
   - finished: true
-    text: Hello! How can I assist you today?
+    text: |
+      Hello! 👋  😊
+    metadata:
+      totalInputTokens: 3
+      totalOutputTokens: 7
diff --git a/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateNaturalLanguage_writes_a_haiku_with_high_temperature.yaml b/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateNaturalLanguage_writes_a_haiku_with_high_temperature.yaml
index fd56b03..99947a9 100644
--- a/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateNaturalLanguage_writes_a_haiku_with_high_temperature.yaml
+++ b/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateNaturalLanguage_writes_a_haiku_with_high_temperature.yaml
@@ -8,41 +8,52 @@ step1Request:
       parts:
         - text: Compose a haiku about a mountain stream at night.
 step2RawChunks:
-  - content:
-      role: model
-      parts:
-        - text: Moon
-    index: 0
-  - content:
-      role: model
-      parts:
-        - text: |2-
-             paints silver path,
-            Water sings to sleeping stones,
-            Night sighs on the
-    index: 0
-  - content:
-      role: model
-      parts:
-        - text: |2-
-             wind.
-    index: 0
-  - content:
-      role: model
-      parts:
-        - text: ""
-    finishReason: STOP
-    index: 0
+  - candidates:
+      - content:
+          role: model
+          parts:
+            - text: Moon
+        index: 0
+  - candidates:
+      - content:
+          role: model
+          parts:
+            - text: |2-
+                 bathes silver stream,
+                Whispers flow through sleeping wood,
+                Stones dream
+        index: 0
+  - candidates:
+      - content:
+          role: model
+          parts:
+            - text: |2
+                 in the dark.
+        index: 0
+  - candidates:
+      - content:
+          role: model
+          parts:
+            - text: ""
+        finishReason: STOP
+        index: 0
+    usageMetadata:
+      promptTokenCount: 10
+      candidatesTokenCount: 23
+      totalTokenCount: 33
 step3KurtEvents:
   - chunk: Moon
   - chunk: |2-
-       paints silver path,
-      Water sings to sleeping stones,
-      Night sighs on the
-  - chunk: |2-
-       wind.
+       bathes silver stream,
+      Whispers flow through sleeping wood,
+      Stones dream
+  - chunk: |2
+       in the dark.
   - finished: true
-    text: |-
-      Moon paints silver path,
-      Water sings to sleeping stones,
-      Night sighs on the wind.
+    text: |
+      Moon bathes silver stream,
+      Whispers flow through sleeping wood,
+      Stones dream in the dark.
+    metadata:
+      totalInputTokens: 10
+      totalOutputTokens: 23
diff --git a/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateStructuredData_says_hello_(response_format_1).yaml b/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateStructuredData_says_hello_(response_format_1).yaml
index 06c7aca..ac4224b 100644
--- a/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateStructuredData_says_hello_(response_format_1).yaml
+++ b/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateStructuredData_says_hello_(response_format_1).yaml
@@ -25,17 +25,32 @@ step1Request:
       allowed_function_names:
         - structured_data
 step2RawChunks:
-  - content:
-      role: model
-      parts:
-        - functionCall:
-            name: structured_data
-            args:
-              say: hello
-    index: 0
+  - candidates:
+      - content:
+          role: model
+          parts:
+            - functionCall:
+                name: structured_data
+                args:
+                  say: hello
+        index: 0
+  - candidates:
+      - content:
+          role: model
+          parts:
+            - text: ""
+        finishReason: STOP
+        index: 0
+    usageMetadata:
+      promptTokenCount: 16
+      candidatesTokenCount: 5
+      totalTokenCount: 21
 step3KurtEvents:
   - chunk: '{"say":"hello"}'
   - finished: true
     text: '{"say":"hello"}'
     data:
       say: hello
+    metadata:
+      totalInputTokens: 16
+      totalOutputTokens: 5
diff --git a/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateStructuredData_says_hello_(response_format_2).yaml b/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateStructuredData_says_hello_(response_format_2).yaml
index 5674e64..a044aca 100644
--- a/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateStructuredData_says_hello_(response_format_2).yaml
+++ b/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateStructuredData_says_hello_(response_format_2).yaml
@@ -25,19 +25,34 @@ step1Request:
       allowed_function_names:
         - structured_data
 step2RawChunks:
-  - content:
-      role: model
-      parts:
-        - functionCall:
-            name: call
-            args:
-              function: structured_data
-              say: hello
-              extension: default_api
-    index: 0
+  - candidates:
+      - content:
+          role: model
+          parts:
+            - functionCall:
+                name: call
+                args:
+                  function: structured_data
+                  say: hello
+                  extension: default_api
+        index: 0
+  - candidates:
+      - content:
+          role: model
+          parts:
+            - text: ""
+        finishReason: STOP
+        index: 0
+    usageMetadata:
+      promptTokenCount: 16
+      candidatesTokenCount: 8
+      totalTokenCount: 24
 step3KurtEvents:
   - chunk: '{"say":"hello"}'
   - finished: true
     text: '{"say":"hello"}'
     data:
       say: hello
+    metadata:
+      totalInputTokens: 16
+      totalOutputTokens: 8
diff --git a/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateStructuredData_says_hello_(response_format_3).yaml b/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateStructuredData_says_hello_(response_format_3).yaml
index b8d70f7..3d4ecd6 100644
--- a/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateStructuredData_says_hello_(response_format_3).yaml
+++ b/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateStructuredData_says_hello_(response_format_3).yaml
@@ -25,18 +25,33 @@ step1Request:
       allowed_function_names:
         - structured_data
 step2RawChunks:
-  - content:
-      role: model
-      parts:
-        - functionCall:
-            name: call
-            args: { say: "hello" }
-            function: structured_data
-            extension: default_api"
-    index: 0
+  - candidates:
+      - content:
+          role: model
+          parts:
+            - functionCall:
+                name: call
+                args: { say: "hello" }
+                function: structured_data
+                extension: default_api"
+        index: 0
+  - candidates:
+      - content:
+          role: model
+          parts:
+            - text: ""
+        finishReason: STOP
+        index: 0
+    usageMetadata:
+      promptTokenCount: 16
+      candidatesTokenCount: 8
+      totalTokenCount: 21
 step3KurtEvents:
   - chunk: '{"say":"hello"}'
   - finished: true
     text: '{"say":"hello"}'
     data:
       say: hello
+    metadata:
+      totalInputTokens: 16
+      totalOutputTokens: 8
diff --git a/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateWithOptionalTools_calculator_(after_parallel_tool_calls).yaml b/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateWithOptionalTools_calculator_(after_parallel_tool_calls).yaml
index 9540298..3ee36a6 100644
--- a/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateWithOptionalTools_calculator_(after_parallel_tool_calls).yaml
+++ b/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateWithOptionalTools_calculator_(after_parallel_tool_calls).yaml
@@ -81,47 +81,58 @@ step1Request:
               - dividend
               - divisor
 step2RawChunks:
-  - content:
-      role: model
-      parts:
-        - text: "1"
-    index: 0
-  - content:
-      role: model
-      parts:
-        - text: . 8026256882 divided by 3
-    index: 0
-  - content:
-      role: model
-      parts:
-        - text: |-
-            402398 is 2359.
-            2.
-    index: 0
-  - content:
-      role: model
-      parts:
-        - text: |2-
-             1185835515 divided by 348263 is 3405.
-            3. 9
-    index: 0
-  - content:
-      role: model
-      parts:
-        - text: 0135094495 minus 89944954350 is 1901401
-    index: 0
-  - content:
-      role: model
-      parts:
-        - text: |
-            45.
-    index: 0
-  - content:
-      role: model
-      parts:
-        - text: ""
-    finishReason: STOP
-    index: 0
+  - candidates:
+      - content:
+          role: model
+          parts:
+            - text: "1"
+        index: 0
+  - candidates:
+      - content:
+          role: model
+          parts:
+            - text: . 8026256882 divided by 3
+        index: 0
+  - candidates:
+      - content:
+          role: model
+          parts:
+            - text: |-
+                402398 is 2359.
+                2.
+        index: 0
+  - candidates:
+      - content:
+          role: model
+          parts:
+            - text: |2-
+                 1185835515 divided by 348263 is 3405.
+                3. 9
+        index: 0
+  - candidates:
+      - content:
+          role: model
+          parts:
+            - text: 0135094495 minus 89944954350 is 1901401
+        index: 0
+  - candidates:
+      - content:
+          role: model
+          parts:
+            - text: |
+                45. 
+        index: 0
+  - candidates:
+      - content:
+          role: model
+          parts:
+            - text: ""
+        finishReason: STOP
+        index: 0
+    usageMetadata:
+      promptTokenCount: 144
+      candidatesTokenCount: 102
+      totalTokenCount: 246
 step3KurtEvents:
   - chunk: "1"
   - chunk: . 8026256882 divided by 3
@@ -133,9 +144,12 @@ step3KurtEvents:
       3. 9
   - chunk: 0135094495 minus 89944954350 is 1901401
   - chunk: |
-      45.
+      45. 
   - finished: true
     text: |
       1. 8026256882 divided by 3402398 is 2359.
       2. 1185835515 divided by 348263 is 3405.
-      3. 90135094495 minus 89944954350 is 190140145.
+      3. 90135094495 minus 89944954350 is 190140145. 
+    metadata:
+      totalInputTokens: 144
+      totalOutputTokens: 102
diff --git a/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateWithOptionalTools_calculator_(after_tool_call).yaml b/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateWithOptionalTools_calculator_(after_tool_call).yaml
index 5e411e6..4c3679a 100644
--- a/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateWithOptionalTools_calculator_(after_tool_call).yaml
+++ b/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateWithOptionalTools_calculator_(after_tool_call).yaml
@@ -51,24 +51,37 @@ step1Request:
               - dividend
               - divisor
 step2RawChunks:
-  - content:
-      role: model
-      parts:
-        - text: That
-    index: 0
-  - content:
-      role: model
-      parts:
-        - text: "'s about 324."
-    index: 0
-  - content:
-      role: model
-      parts:
-        - text: ""
-    finishReason: STOP
-    index: 0
+  - candidates:
+      - content:
+          role: model
+          parts:
+            - text: That
+        index: 0
+  - candidates:
+      - content:
+          role: model
+          parts:
+            - text: |
+                's about 324. 
+        index: 0
+  - candidates:
+      - content:
+          role: model
+          parts:
+            - text: ""
+        finishReason: STOP
+        index: 0
+    usageMetadata:
+      promptTokenCount: 74
+      candidatesTokenCount: 11
+      totalTokenCount: 85
 step3KurtEvents:
   - chunk: That
-  - chunk: "'s about 324."
+  - chunk: |
+      's about 324. 
   - finished: true
-    text: "That's about 324."
+    text: |
+      That's about 324. 
+    metadata:
+      totalInputTokens: 74
+      totalOutputTokens: 11
diff --git a/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateWithOptionalTools_calculator_(with_parallel_tool_calls).yaml b/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateWithOptionalTools_calculator_(with_parallel_tool_calls).yaml
index 64443e6..2db5d83 100644
--- a/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateWithOptionalTools_calculator_(with_parallel_tool_calls).yaml
+++ b/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateWithOptionalTools_calculator_(with_parallel_tool_calls).yaml
@@ -42,25 +42,37 @@ step1Request:
               - dividend
               - divisor
 step2RawChunks:
-  - content:
-      role: model
-      parts:
-        - functionCall:
-            name: divide
-            args:
-              dividend: 8026256882
-              divisor: 3402398
-        - functionCall:
-            name: divide
-            args:
-              dividend: 1185835515
-              divisor: 348263
-        - functionCall:
-            name: subtract
-            args:
-              minuend: 90135094495
-              subtrahend: 89944954350
-    index: 0
+  - candidates:
+      - content:
+          role: model
+          parts:
+            - functionCall:
+                name: divide
+                args:
+                  divisor: 3402398
+                  dividend: 8026256882
+            - functionCall:
+                name: divide
+                args:
+                  divisor: 348263
+                  dividend: 1185835515
+            - functionCall:
+                name: subtract
+                args:
+                  minuend: 90135094495
+                  subtrahend: 89944954350
+        index: 0
+  - candidates:
+      - content:
+          role: model
+          parts:
+            - text: ""
+        finishReason: STOP
+        index: 0
+    usageMetadata:
+      promptTokenCount: 123
+      candidatesTokenCount: 12
+      totalTokenCount: 135
 step3KurtEvents:
   - chunk: '{"dividend":8026256882,"divisor":3402398}'
   - chunk: "\n"
@@ -86,3 +98,6 @@ step3KurtEvents:
         args:
           minuend: 90135094495
           subtrahend: 89944954350
+    metadata:
+      totalInputTokens: 123
+      totalOutputTokens: 12
diff --git a/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateWithOptionalTools_calculator_(with_tool_call).yaml b/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateWithOptionalTools_calculator_(with_tool_call).yaml
index 1edc6a3..3010972 100644
--- a/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateWithOptionalTools_calculator_(with_tool_call).yaml
+++ b/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateWithOptionalTools_calculator_(with_tool_call).yaml
@@ -38,21 +38,27 @@ step1Request:
               - dividend
               - divisor
 step2RawChunks:
-  - content:
-      role: model
-      parts:
-        - functionCall:
-            name: divide
-            args:
-              dividend: 9876356
-              divisor: 30487
-    index: 0
-  - content:
-      role: model
-      parts:
-        - text: ""
-    finishReason: STOP
-    index: 0
+  - candidates:
+      - content:
+          role: model
+          parts:
+            - functionCall:
+                name: divide
+                args:
+                  divisor: 30487
+                  dividend: 9876356
+        index: 0
+  - candidates:
+      - content:
+          role: model
+          parts:
+            - text: ""
+        finishReason: STOP
+        index: 0
+    usageMetadata:
+      promptTokenCount: 68
+      candidatesTokenCount: 3
+      totalTokenCount: 71
 step3KurtEvents:
   - chunk: '{"dividend":9876356,"divisor":30487}'
   - finished: true
@@ -62,4 +68,6 @@ step3KurtEvents:
       args:
         dividend: 9876356
         divisor: 30487
-
+    metadata:
+      totalInputTokens: 68
+      totalOutputTokens: 3
diff --git a/packages/kurt-vertex-ai/src/KurtVertexAI.ts b/packages/kurt-vertex-ai/src/KurtVertexAI.ts
index c6163dc..a6abe97 100644
--- a/packages/kurt-vertex-ai/src/KurtVertexAI.ts
+++ b/packages/kurt-vertex-ai/src/KurtVertexAI.ts
@@ -15,6 +15,7 @@ import type {
   KurtSchemaResultMaybe,
   KurtMessage,
   KurtSamplingOptions,
+  KurtResult,
 } from "@formula-monks/kurt"
 import type {
   VertexAI,
@@ -22,8 +23,11 @@ import type {
   VertexAIMessage,
   VertexAIRequest,
   VertexAIResponseChunk,
+  VertexAIResponseChunkCandidate,
+  VertexAIResponseFunctionCall,
   VertexAISchema,
   VertexAITool,
+  VertexAIUsageMetadata,
 } from "./VertexAI.types"
 
 // These models support function calling.
@@ -206,46 +210,55 @@ async function* transformStream<
   schema: S,
   rawEvents: AsyncIterable<VertexAIResponseChunk>
 ): AsyncGenerator<KurtStreamEvent<D>> {
+  let lastRawEvent: VertexAIResponseChunk | undefined
   const chunks: string[] = []
+  const functionCalls: VertexAIResponseFunctionCall[] = []
 
   for await (const rawEvent of rawEvents) {
+    lastRawEvent = rawEvent
+
     const choice = rawEvent.candidates?.at(0)
     if (!choice) continue
 
-    const isContentFinal = choice.finishReason !== undefined
-    const { parts } = choice.content
+    for (const part of choice.content.parts) {
+      const { functionCall } = part
+      if (functionCall) functionCalls.push(functionCall)
 
-    for (const [partIndex, part] of parts.entries()) {
       const chunk = part.text
-      const isFinal =
-        (isContentFinal && partIndex === parts.length - 1) || part.functionCall
-
       if (chunk) {
         chunks.push(chunk)
         yield { chunk }
       }
-      if (isFinal) {
-        if (schema) {
-          const { functionCall } = part
-          if (!functionCall) {
-            throw new Error(
-              `Expected function call in final chunk, but got ${JSON.stringify(
-                part
-              )}`
-            )
-          }
-          const data = applySchemaToFuzzyStructure(schema, functionCall) as D
-          const text = JSON.stringify(data)
-          yield { chunk: text }
-          yield { finished: true, text, data }
-        } else {
-          const text = chunks.join("")
-          yield { finished: true, text, data: undefined } as KurtStreamEvent<D>
-        }
-        return // No need to send more events once we've sent a finished event
-      }
     }
   }
+
+  const rawEvent = lastRawEvent
+  if (rawEvent) {
+    const metadata = convertMetadata(rawEvent)
+
+    if (schema) {
+      const functionCall = functionCalls[0]
+      if (!functionCall) throw new Error("Expected function call, but got none")
+      if (functionCalls.length > 1)
+        throw new Error(
+          `Expected just function call, but got ${functionCalls.length}`
+        )
+
+      const data = applySchemaToFuzzyStructure(schema, functionCall) as D
+      const text = JSON.stringify(data)
+      yield { chunk: text }
+      yield { finished: true, text, data, metadata }
+    } else {
+      const text = chunks.join("")
+      yield {
+        finished: true,
+        text,
+        data: undefined,
+        metadata,
+      } as KurtStreamEvent<D>
+    }
+    return // No need to send more events once we've sent a finished event
+  }
 }
 
 async function* transformStreamWithOptionalTools<
@@ -256,79 +269,82 @@ async function* transformStreamWithOptionalTools<
   tools: S,
   rawEvents: AsyncIterable<VertexAIResponseChunk>
 ): AsyncGenerator<KurtStreamEvent<D | undefined>> {
+  let lastRawEvent: VertexAIResponseChunk | undefined
   const chunks: string[] = []
+  const functionCalls: VertexAIResponseFunctionCall[] = []
 
   for await (const rawEvent of rawEvents) {
+    lastRawEvent = rawEvent
+
     const choice = rawEvent.candidates?.at(0)
     if (!choice) continue
 
-    const isContentFinal = choice.finishReason !== undefined
-    const { parts } = choice.content
+    for (const part of choice.content.parts) {
+      const { functionCall } = part
+      if (functionCall) functionCalls.push(functionCall)
 
-    for (const [partIndex, part] of parts.entries()) {
       const chunk = part.text
-      const isFinal =
-        (isContentFinal || part.functionCall) && partIndex === parts.length - 1
-
       if (chunk) {
         chunks.push(chunk)
         yield { chunk }
       }
-      if (isFinal) {
-        if (part.functionCall) {
-          const allData = parts.map((part) => {
-            if (!part.functionCall) {
-              throw new Error(
-                `Vertex AI mixed function calls with non-function calls in the same raw stream event: ${JSON.stringify(
-                  rawEvent
-                )}`
-              )
-            }
-
-            const { name } = part.functionCall
-
-            const schema = tools[name]
-            if (!schema) {
-              throw new Error(
-                `Vertex AI tried to call tool ${name} which isn't in the tool set ${JSON.stringify(
-                  Object.keys(tools)
-                )}}`
-              )
-            }
-            return {
-              name,
-              args: applySchemaToFuzzyStructure(schema, part.functionCall),
-            } as D
-          })
-
-          // Emit a text chunk for each tool call (with line breaks in between).
-          for (const [dataIndex, data] of allData.entries()) {
-            if (dataIndex > 0) {
-              chunks.push("\n")
-              yield { chunk: "\n" }
-            }
-            const text = JSON.stringify(data.args)
-            chunks.push(text)
-            yield { chunk: text }
-          }
-
-          if (!isNonEmptyArray(allData))
-            throw new Error("Empty here is impossible but TS doesn't know it")
-          const [data, ...additionalData] = allData
-          const text = chunks.join("")
-
-          if (additionalData.length > 0) {
-            yield { finished: true, text, data: data as D, additionalData }
-          } else {
-            yield { finished: true, text, data }
-          }
-        } else {
-          const text = chunks.join("")
-          yield { finished: true, text, data: undefined }
+    }
+  }
+
+  const rawEvent = lastRawEvent
+  if (rawEvent) {
+    const metadata = convertMetadata(rawEvent)
+
+    if (functionCalls.length >= 0) {
+      const allData = functionCalls.map((functionCall) => {
+        const { name } = functionCall
+
+        const schema = tools[name]
+        if (!schema) {
+          throw new Error(
+            `Vertex AI tried to call tool ${name} which isn't in the tool set ${JSON.stringify(
+              Object.keys(tools)
+            )}}`
+          )
+        }
+        return {
+          name,
+          args: applySchemaToFuzzyStructure(schema, functionCall),
+        } as D
+      })
+
+      // Emit a text chunk for each tool call (with line breaks in between).
+      for (const [dataIndex, data] of allData.entries()) {
+        if (dataIndex > 0) {
+          chunks.push("\n")
+          yield { chunk: "\n" }
+        }
+        const text = JSON.stringify(data.args)
+        chunks.push(text)
+        yield { chunk: text }
+      }
+
+      // if (!isNonEmptyArray(allData))
+      //   throw new Error("Empty here is impossible but TS doesn't know it")
+      const [data, ...additionalData] = allData
+      const text = chunks.join("")
+
+      if (additionalData.length > 0) {
+        yield {
+          finished: true,
+          text,
+          data: data as D,
+          additionalData,
+          metadata,
         }
-        return // No need to send more events once we've sent a finished event
+      } else {
+        yield { finished: true, text, data, metadata }
       }
+    } else {
+      const text = chunks.join("")
+      yield { finished: true, text, data: undefined, metadata }
     }
+    return // No need to send more events once we've sent a finished event
   }
 }
 
@@ -368,3 +384,15 @@ function applySchemaToFuzzyStructure<I extends KurtSchemaInner>(
 function isNonEmptyArray<T>(array: T[]): array is [T, ...T[]] {
   return array.length > 0
 }
+
+/**
+ * Convert the raw metadata from Vertex AI into Kurt's metadata format.
+ */
+function convertMetadata(info: {
+  usageMetadata?: VertexAIUsageMetadata
+}): KurtResult["metadata"] {
+  return {
+    totalInputTokens: info.usageMetadata?.promptTokenCount,
+    totalOutputTokens: info.usageMetadata?.candidatesTokenCount,
+  }
+}
diff --git a/packages/kurt-vertex-ai/src/VertexAI.types.ts b/packages/kurt-vertex-ai/src/VertexAI.types.ts
index 666afb4..4b04b82 100644
--- a/packages/kurt-vertex-ai/src/VertexAI.types.ts
+++ b/packages/kurt-vertex-ai/src/VertexAI.types.ts
@@ -6,6 +6,7 @@ import type {
   GenerateContentCandidate,
   FunctionDeclaration,
   FunctionDeclarationSchema,
+  UsageMetadata,
 } from "@google-cloud/vertexai"
 
 export type VertexAI = RealVertexAI
@@ -31,6 +32,7 @@ export type VertexAIResponse = Promise<{
 }>
 export type VertexAIResponseChunk = {
   candidates?: VertexAIResponseChunkCandidate[]
+  usageMetadata?: VertexAIUsageMetadata
 }
 export type VertexAIResponseChunkCandidate = Pick<
   GenerateContentCandidate,
@@ -38,3 +40,7 @@ export type VertexAIResponseChunkCandidate = Pick<
 > & {
   safetyRatings?: object[]
 }
+export type VertexAIResponseFunctionCall = NonNullable<
+  VertexAIResponseChunkCandidate["content"]["parts"][number]["functionCall"]
+>
+export type VertexAIUsageMetadata = UsageMetadata