diff --git a/biome.json b/biome.jsonc
similarity index 84%
rename from biome.json
rename to biome.jsonc
index debbd62..d56b5af 100644
--- a/biome.json
+++ b/biome.jsonc
@@ -1,5 +1,5 @@
 {
-  "$schema": "https://biomejs.dev/schemas/1.6.3/schema.json",
+  "$schema": "https://biomejs.dev/schemas/1.8.1/schema.json",
   "organizeImports": {
     "enabled": true
   },
@@ -7,9 +7,7 @@
     "enabled": true,
     "rules": {
       "recommended": true,
-      "nursery": {
-        "noNodejsModules": "off"
-      },
+      "nursery": {},
       "style": {
         "useNamingConvention": "off",
         "noUnusedTemplateLiteral": "off"
diff --git a/dist/main_bun.mjs b/dist/main_bun.mjs
index 5828035..8ab0bf4 100644
--- a/dist/main_bun.mjs
+++ b/dist/main_bun.mjs
@@ -1,59 +1,25 @@
-// node_modules/.deno/itty-router@5.0.17/node_modules/itty-router/index.mjs
-var t = ({ base: e = "", routes: t2 = [], ...r2 } = {}) => ({ __proto__: new Proxy({}, { get: (r3, o2, a2, s2) => (r4, ...c) => t2.push([o2.toUpperCase?.(), RegExp(`^${(s2 = (e + r4).replace(/\/+(\/|$)/g, "$1")).replace(/(\/?\.?):(\w+)\+/g, "($1(?<$2>*))").replace(/(\/?\.?):(\w+)/g, "($1(?<$2>[^$1/]+?))").replace(/\./g, "\\.").replace(/(\/?)\*/g, "($1.*)?")}/*$`), c, s2]) && a2 }), routes: t2, ...r2, async fetch(e2, ...o2) {
-  let a2, s2, c = new URL(e2.url), n = e2.query = { __proto__: null };
-  for (let [e3, t3] of c.searchParams) n[e3] = n[e3] ? [].concat(n[e3], t3) : t3;
-  e: try {
-    for (let t3 of r2.before || []) if (null != (a2 = await t3(e2.proxy ?? e2, ...o2))) break e;
-    t: for (let [r3, n2, l, i] of t2) if ((r3 == e2.method || "ALL" == r3) && (s2 = c.pathname.match(n2))) {
-      e2.params = s2.groups || {}, e2.route = i;
-      for (let t3 of l) if (null != (a2 = await t3(e2.proxy ?? e2, ...o2))) break t;
+// node_modules/.deno/itty-router@5.0.17/node_modules/itty-router/Router.mjs
+var r = ({ base: r2 = "", routes: e = [], ...a } = {}) => ({ __proto__: new Proxy({}, { get: (a2, t, o, c) => (a3, ...l) => e.push([t.toUpperCase?.(), RegExp(`^${(c = (r2 + a3).replace(/\/+(\/|$)/g, "$1")).replace(/(\/?\.?):(\w+)\+/g, "($1(?<$2>*))").replace(/(\/?\.?):(\w+)/g, "($1(?<$2>[^$1/]+?))").replace(/\./g, "\\.").replace(/(\/?)\*/g, "($1.*)?")}/*$`), l, c]) && o }), routes: e, ...a, async fetch(r3, ...t) {
+  let o, c, l = new URL(r3.url), p = r3.query = { __proto__: null };
+  for (let [r4, e2] of l.searchParams) p[r4] = p[r4] ? [].concat(p[r4], e2) : e2;
+  r: try {
+    for (let e2 of a.before || []) if (null != (o = await e2(r3.proxy ?? r3, ...t))) break r;
+    e: for (let [a2, p2, f, h] of e) if ((a2 == r3.method || "ALL" == a2) && (c = l.pathname.match(p2))) {
+      r3.params = c.groups || {}, r3.route = h;
+      for (let e2 of f) if (null != (o = await e2(r3.proxy ?? r3, ...t))) break e;
     }
-  } catch (t3) {
-    if (!r2.catch) throw t3;
-    a2 = await r2.catch(t3, e2.proxy ?? e2, ...o2);
+  } catch (e2) {
+    if (!a.catch) throw e2;
+    o = await a.catch(e2, r3.proxy ?? r3, ...t);
   }
   try {
-    for (let t3 of r2.finally || []) a2 = await t3(a2, e2.proxy ?? e2, ...o2) ?? a2;
-  } catch (t3) {
-    if (!r2.catch) throw t3;
-    a2 = await r2.catch(t3, e2.proxy ?? e2, ...o2);
+    for (let e2 of a.finally || []) o = await e2(o, r3.proxy ?? r3, ...t) ?? o;
+  } catch (e2) {
+    if (!a.catch) throw e2;
+    o = await a.catch(e2, r3.proxy ?? r3, ...t);
   }
-  return a2;
+  return o;
 } });
-var r = (e = "text/plain; charset=utf-8", t2) => (r2, o2 = {}) => {
-  if (void 0 === r2 || r2 instanceof Response) return r2;
-  const a2 = new Response(t2?.(r2) ?? r2, o2.url ? void 0 : o2);
-  return a2.headers.set("content-type", e), a2;
-};
-var o = r("application/json; charset=utf-8", JSON.stringify);
-var a = (e) => ({ 400: "Bad Request", 401: "Unauthorized", 403: "Forbidden", 404: "Not Found", 500: "Internal Server Error" })[e] || "Unknown Error";
-var s = (e = 500, t2) => {
-  if (e instanceof Error) {
-    const { message: r2, ...o2 } = e;
-    e = e.status || 500, t2 = { error: r2 || a(e), ...o2 };
-  }
-  return t2 = { status: e, ..."object" == typeof t2 ? t2 : { error: t2 || a(e) } }, o(t2, { status: e });
-};
-var p = r("text/plain; charset=utf-8", String);
-var f = r("text/html");
-var u = r("image/jpeg");
-var h = r("image/png");
-var g = r("image/webp");
-var y = (e = {}) => {
-  const { origin: t2 = "*", credentials: r2 = false, allowMethods: o2 = "*", allowHeaders: a2, exposeHeaders: s2, maxAge: c } = e, n = (e2) => {
-    const o3 = e2?.headers.get("origin");
-    return true === t2 ? o3 : t2 instanceof RegExp ? t2.test(o3) ? o3 : void 0 : Array.isArray(t2) ? t2.includes(o3) ? o3 : void 0 : t2 instanceof Function ? t2(o3) : "*" == t2 && r2 ? o3 : t2;
-  }, l = (e2, t3) => {
-    for (const [r3, o3] of Object.entries(t3)) o3 && e2.headers.append(r3, o3);
-    return e2;
-  };
-  return { corsify: (e2, t3) => e2?.headers?.get("access-control-allow-origin") || 101 == e2.status ? e2 : l(e2.clone(), { "access-control-allow-origin": n(t3), "access-control-allow-credentials": r2 }), preflight: (e2) => {
-    if ("OPTIONS" == e2.method) {
-      const t3 = new Response(null, { status: 204 });
-      return l(t3, { "access-control-allow-origin": n(e2), "access-control-allow-methods": o2?.join?.(",") ?? o2, "access-control-expose-headers": s2?.join?.(",") ?? s2, "access-control-allow-headers": a2?.join?.(",") ?? a2 ?? e2.headers.get("access-control-request-headers"), "access-control-max-age": c, "access-control-allow-credentials": r2 });
-    }
-  } };
-};
 
 // src/gemini-proxy.ts
 async function geminiProxy(rawReq) {
@@ -197,6 +163,55 @@ function hello(req) {
     `);
 }
 
+// src/itty-router/cors.ts
+var cors = (options = {}) => {
+  const { origin = "*", credentials = false, allowMethods = "*", allowHeaders, exposeHeaders, maxAge } = options;
+  const getAccessControlOrigin = (request) => {
+    const requestOrigin = request?.headers.get("origin");
+    if (!requestOrigin) return void 0;
+    if (origin === true) return requestOrigin;
+    if (origin instanceof RegExp) return origin.test(requestOrigin) ? requestOrigin : void 0;
+    if (Array.isArray(origin)) return origin.includes(requestOrigin) ? requestOrigin : void 0;
+    if (origin instanceof Function) return origin(requestOrigin);
+    return origin === "*" && credentials ? requestOrigin : origin;
+  };
+  const appendHeadersAndReturn = (response, headers) => {
+    for (const [key, value] of Object.entries(headers)) {
+      if (value) response.headers.append(key, value);
+    }
+    return response;
+  };
+  const preflight2 = (request) => {
+    if (request.method === "OPTIONS") {
+      const response = new Response(null, { status: 204 });
+      return appendHeadersAndReturn(response, {
+        "access-control-allow-origin": getAccessControlOrigin(request),
+        // @ts-ignore
+        "access-control-allow-methods": allowMethods?.join?.(",") ?? allowMethods,
+        // include allowed methods
+        // @ts-ignore
+        "access-control-expose-headers": exposeHeaders?.join?.(",") ?? exposeHeaders,
+        // include allowed headers
+        "access-control-allow-headers": (
+          // @ts-ignore
+          allowHeaders?.join?.(",") ?? allowHeaders ?? request.headers.get("access-control-request-headers")
+        ),
+        // include allowed headers
+        "access-control-max-age": maxAge?.toString(),
+        "access-control-allow-credentials": credentials.toString()
+      });
+    }
+  };
+  const corsify2 = (response, request) => {
+    if (response?.headers?.get("access-control-allow-origin") || response.status === 101) return response;
+    return appendHeadersAndReturn(response, {
+      "access-control-allow-origin": getAccessControlOrigin(request),
+      "access-control-allow-credentials": credentials.toString()
+    });
+  };
+  return { corsify: corsify2, preflight: preflight2 };
+};
+
 // src/log.ts
 var LEVEL = ["debug", "info", "warn", "error"];
 var Logger = class {
@@ -224,6 +239,146 @@ var Logger = class {
   }
 };
 
+// node_modules/.deno/eventsource-parser@1.1.2/node_modules/eventsource-parser/dist/index.js
+function createParser(onParse) {
+  let isFirstChunk;
+  let buffer;
+  let startingPosition;
+  let startingFieldLength;
+  let eventId;
+  let eventName;
+  let data;
+  reset();
+  return {
+    feed,
+    reset
+  };
+  function reset() {
+    isFirstChunk = true;
+    buffer = "";
+    startingPosition = 0;
+    startingFieldLength = -1;
+    eventId = void 0;
+    eventName = void 0;
+    data = "";
+  }
+  function feed(chunk) {
+    buffer = buffer ? buffer + chunk : chunk;
+    if (isFirstChunk && hasBom(buffer)) {
+      buffer = buffer.slice(BOM.length);
+    }
+    isFirstChunk = false;
+    const length = buffer.length;
+    let position = 0;
+    let discardTrailingNewline = false;
+    while (position < length) {
+      if (discardTrailingNewline) {
+        if (buffer[position] === "\n") {
+          ++position;
+        }
+        discardTrailingNewline = false;
+      }
+      let lineLength = -1;
+      let fieldLength = startingFieldLength;
+      let character;
+      for (let index = startingPosition; lineLength < 0 && index < length; ++index) {
+        character = buffer[index];
+        if (character === ":" && fieldLength < 0) {
+          fieldLength = index - position;
+        } else if (character === "\r") {
+          discardTrailingNewline = true;
+          lineLength = index - position;
+        } else if (character === "\n") {
+          lineLength = index - position;
+        }
+      }
+      if (lineLength < 0) {
+        startingPosition = length - position;
+        startingFieldLength = fieldLength;
+        break;
+      } else {
+        startingPosition = 0;
+        startingFieldLength = -1;
+      }
+      parseEventStreamLine(buffer, position, fieldLength, lineLength);
+      position += lineLength + 1;
+    }
+    if (position === length) {
+      buffer = "";
+    } else if (position > 0) {
+      buffer = buffer.slice(position);
+    }
+  }
+  function parseEventStreamLine(lineBuffer, index, fieldLength, lineLength) {
+    if (lineLength === 0) {
+      if (data.length > 0) {
+        onParse({
+          type: "event",
+          id: eventId,
+          event: eventName || void 0,
+          data: data.slice(0, -1)
+          // remove trailing newline
+        });
+        data = "";
+        eventId = void 0;
+      }
+      eventName = void 0;
+      return;
+    }
+    const noValue = fieldLength < 0;
+    const field = lineBuffer.slice(index, index + (noValue ? lineLength : fieldLength));
+    let step = 0;
+    if (noValue) {
+      step = lineLength;
+    } else if (lineBuffer[index + fieldLength + 1] === " ") {
+      step = fieldLength + 2;
+    } else {
+      step = fieldLength + 1;
+    }
+    const position = index + step;
+    const valueLength = lineLength - step;
+    const value = lineBuffer.slice(position, position + valueLength).toString();
+    if (field === "data") {
+      data += value ? "".concat(value, "\n") : "\n";
+    } else if (field === "event") {
+      eventName = value;
+    } else if (field === "id" && !value.includes("\0")) {
+      eventId = value;
+    } else if (field === "retry") {
+      const retry = parseInt(value, 10);
+      if (!Number.isNaN(retry)) {
+        onParse({
+          type: "reconnect-interval",
+          value: retry
+        });
+      }
+    }
+  }
+}
+var BOM = [239, 187, 191];
+function hasBom(buffer) {
+  return BOM.every((charCode, index) => buffer.charCodeAt(index) === charCode);
+}
+
+// node_modules/.deno/eventsource-parser@1.1.2/node_modules/eventsource-parser/dist/stream.js
+var EventSourceParserStream = class extends TransformStream {
+  constructor() {
+    let parser;
+    super({
+      start(controller) {
+        parser = createParser((event) => {
+          if (event.type === "event") {
+            controller.enqueue(event);
+          }
+        });
+      },
+      transform(chunk) {
+        parser.feed(chunk);
+      }
+    });
+  }
+};
+
 // src/gemini-api-client/errors.ts
 var GoogleGenerativeAIError = class extends Error {
   constructor(message) {
@@ -301,14 +456,20 @@ function formatBlockErrorMessage(response) {
 }
 
 // src/gemini-api-client/gemini-api-client.ts
-async function generateContent(apiParam, model, params, requestOptions) {
-  const url = new RequestUrl(model, "generateContent" /* GENERATE_CONTENT */, false, apiParam);
+async function* generateContent(apiParam, model, params, requestOptions) {
+  const url = new RequestUrl(model, "streamGenerateContent" /* STREAM_GENERATE_CONTENT */, true, apiParam);
   const response = await makeRequest(url, JSON.stringify(params), requestOptions);
-  const responseJson = await response.json();
-  const enhancedResponse = addHelpers(responseJson);
-  return {
-    response: enhancedResponse
-  };
+  const body = response.body;
+  if (body == null) {
+    return;
+  }
+  for await (const event of body.pipeThrough(new TextDecoderStream()).pipeThrough(new EventSourceParserStream())) {
+    const responseJson = JSON.parse(event.data);
+    const enhancedResponse = addHelpers(responseJson);
+    yield {
+      response: enhancedResponse
+    };
+  }
 }
 async function makeRequest(url, body, requestOptions) {
   let response;
@@ -322,18 +483,20 @@ async function makeRequest(url, body, requestOptions) {
       body
     });
     if (!response.ok) {
+      console.error(response);
       let message = "";
       try {
-        const json = await response.json();
-        message = json.error.message;
-        if (json.error.details) {
-          message += ` ${JSON.stringify(json.error.details)}`;
+        const errResp = await response.json();
+        message = errResp.error.message;
+        if (errResp.error.details) {
+          message += ` ${JSON.stringify(errResp.error.details)}`;
         }
       } catch (_e) {
       }
       throw new Error(`[${response.status} ${response.statusText}] ${message}`);
     }
   } catch (e) {
+    console.log(e);
     const err = new GoogleGenerativeAIError(`Error fetching from google -> ${e.message}`);
     err.stack = e.stack;
     throw err;
@@ -372,11 +535,22 @@ function buildFetchOptions(requestOptions) {
 // src/openai/chat/completions/NonStreamingChatProxyHandler.ts
 async function nonStreamingChatProxyHandler(req, apiParam, log) {
   const [model, geminiReq] = genModel(req);
-  const geminiResp = await generateContent(apiParam, model, geminiReq).then((it) => it.response.result()).catch((err) => {
+  let geminiResp = "";
+  try {
+    for await (const it of generateContent(apiParam, model, geminiReq)) {
+      const data = it.response.result();
+      if (typeof data === "string") {
+        geminiResp += data;
+      } else {
+        geminiResp = data;
+        break;
+      }
+    }
+  } catch (err) {
     log?.error(req);
     log?.error(err?.message ?? err.toString());
-    return err?.message ?? err.toString();
-  });
+    geminiResp = err?.message ?? err.toString();
+  }
   log?.debug(req);
   log?.debug(geminiResp);
   function genOpenAiResp(content) {
@@ -424,7 +598,16 @@ async function nonStreamingChatProxyHandler(req, apiParam, log) {
 // src/openai/chat/completions/StreamingChatProxyHandler.ts
 async function* streamingChatProxyHandler(req, apiParam) {
   const [model, geminiReq] = genModel(req);
-  const geminiResp = await generateContent(apiParam, model, geminiReq).then((it) => it.response.result()).catch((e) => e.message ?? e?.toString());
+  try {
+    for await (const it of generateContent(apiParam, model, geminiReq)) {
+      const data = it.response.result();
+      yield genOpenAiResp(data, false);
+    }
+  } catch (error) {
+    yield genOpenAiResp(error?.message ?? error.toString(), true);
+  }
+  yield genOpenAiResp("", true);
+  return void 0;
   function genOpenAiResp(content, stop) {
     if (typeof content === "string") {
       return {
@@ -455,8 +638,6 @@ async function* streamingChatProxyHandler(req, apiParam) {
       ]
     };
   }
-  yield genOpenAiResp(geminiResp, false);
-  yield genOpenAiResp("", true);
 }
 
 // src/openai/chat/completions/ChatProxyHandler.ts
@@ -477,34 +658,32 @@ async function chatProxyHandler(rawReq) {
   return sseResponse(
     async function* () {
       for await (const data of respArr) {
-        rawReq.logger?.debug(data);
+        rawReq.logger?.debug("streamingChatProxyHandler", data);
         yield JSON.stringify(data);
       }
       yield "[DONE]";
+      return void 0;
     }()
   );
 }
+var encoder = new TextEncoder();
 function sseResponse(dataStream) {
-  const { readable, writable } = new TransformStream();
-  const response = new Response(readable, {
+  const s = new ReadableStream({
+    async pull(controller) {
+      const { value, done } = await dataStream.next();
+      if (done) {
+        controller.close();
+      } else {
+        controller.enqueue(encoder.encode(toSseMsg({ data: value })));
+      }
+    }
+  });
+  const response = new Response(s, {
     status: 200,
     headers: new Headers({
       "Content-Type": "text/event-stream"
     })
   });
-  const encoder = new TextEncoder();
-  async function writer(data) {
-    const w = writable.getWriter();
-    await w.write(encoder.encode(data));
-    w.releaseLock();
-  }
-  ;
-  (async () => {
-    for await (const data of dataStream) {
-      await writer(toSseMsg({ data }));
-    }
-    await writable.close();
-  })();
   return response;
 }
 function toSseMsg(sseEvent) {
@@ -541,8 +720,8 @@ var modelDetail = (model) => {
 };
 
 // src/app.ts
-var { preflight, corsify } = y({ allowHeaders: "*" });
-var app = t({
+var { preflight, corsify } = cors({ allowHeaders: "*" });
+var app = r({
   before: [
     preflight,
     (req) => {
@@ -550,13 +729,11 @@ var app = t({
       req.logger.warn(`--> ${req.method} ${req.url}`);
     }
   ],
-  catch: s,
   finally: [
     corsify,
     (_, req) => {
       req.logger?.warn(`<-- ${req.method} ${req.url}`);
-    },
-    o
+    }
   ]
 });
 app.get("/", hello);
@@ -567,6 +744,33 @@ app.post(":model_version/models/:model_and_action", geminiProxy);
 app.all("*", () => new Response("Page Not Found", { status: 404 }));
 
 // main_bun.ts
+var PolyfillTextDecoderStream = class extends TransformStream {
+  encoding;
+  fatal;
+  ignoreBOM;
+  constructor(encoding = "utf-8", { fatal = false, ignoreBOM = false } = {}) {
+    const decoder = new TextDecoder(encoding, { fatal, ignoreBOM });
+    super({
+      transform(chunk, controller) {
+        const decoded = decoder.decode(chunk, { stream: true });
+        if (decoded.length > 0) {
+          controller.enqueue(decoded);
+        }
+      },
+      flush(controller) {
+        const output = decoder.decode();
+        if (output.length > 0) {
+          controller.enqueue(output);
+        }
+      }
+    });
+    this.encoding = encoding;
+    this.fatal = fatal;
+    this.ignoreBOM = ignoreBOM;
+  }
+  [Symbol.toStringTag] = "PolyfillTextDecoderStream";
+};
+globalThis.TextDecoderStream = PolyfillTextDecoderStream;
 console.log("Listening on http://localhost:8000/");
 Bun.serve({
   port: 8e3,
diff --git a/dist/main_cloudflare-workers.mjs b/dist/main_cloudflare-workers.mjs
index e576c8e..cbd9dce 100644
--- a/dist/main_cloudflare-workers.mjs
+++ b/dist/main_cloudflare-workers.mjs
@@ -1,59 +1,25 @@
-// node_modules/.deno/itty-router@5.0.17/node_modules/itty-router/index.mjs
-var t = ({ base: e = "", routes: t2 = [], ...r2 } = {}) => ({ __proto__: new Proxy({}, { get: (r3, o2, a2, s2) => (r4, ...c) => t2.push([o2.toUpperCase?.(), RegExp(`^${(s2 = (e + r4).replace(/\/+(\/|$)/g, "$1")).replace(/(\/?\.?):(\w+)\+/g, "($1(?<$2>*))").replace(/(\/?\.?):(\w+)/g, "($1(?<$2>[^$1/]+?))").replace(/\./g, "\\.").replace(/(\/?)\*/g, "($1.*)?")}/*$`), c, s2]) && a2 }), routes: t2, ...r2, async fetch(e2, ...o2) {
-  let a2, s2, c = new URL(e2.url), n = e2.query = { __proto__: null };
-  for (let [e3, t3] of c.searchParams) n[e3] = n[e3] ? [].concat(n[e3], t3) : t3;
-  e: try {
-    for (let t3 of r2.before || []) if (null != (a2 = await t3(e2.proxy ?? e2, ...o2))) break e;
-    t: for (let [r3, n2, l, i] of t2) if ((r3 == e2.method || "ALL" == r3) && (s2 = c.pathname.match(n2))) {
-      e2.params = s2.groups || {}, e2.route = i;
-      for (let t3 of l) if (null != (a2 = await t3(e2.proxy ?? e2, ...o2))) break t;
+// node_modules/.deno/itty-router@5.0.17/node_modules/itty-router/Router.mjs
+var r = ({ base: r2 = "", routes: e = [], ...a } = {}) => ({ __proto__: new Proxy({}, { get: (a2, t, o, c) => (a3, ...l) => e.push([t.toUpperCase?.(), RegExp(`^${(c = (r2 + a3).replace(/\/+(\/|$)/g, "$1")).replace(/(\/?\.?):(\w+)\+/g, "($1(?<$2>*))").replace(/(\/?\.?):(\w+)/g, "($1(?<$2>[^$1/]+?))").replace(/\./g, "\\.").replace(/(\/?)\*/g, "($1.*)?")}/*$`), l, c]) && o }), routes: e, ...a, async fetch(r3, ...t) {
+  let o, c, l = new URL(r3.url), p = r3.query = { __proto__: null };
+  for (let [r4, e2] of l.searchParams) p[r4] = p[r4] ? [].concat(p[r4], e2) : e2;
+  r: try {
+    for (let e2 of a.before || []) if (null != (o = await e2(r3.proxy ?? r3, ...t))) break r;
+    e: for (let [a2, p2, f, h] of e) if ((a2 == r3.method || "ALL" == a2) && (c = l.pathname.match(p2))) {
+      r3.params = c.groups || {}, r3.route = h;
+      for (let e2 of f) if (null != (o = await e2(r3.proxy ?? r3, ...t))) break e;
     }
-  } catch (t3) {
-    if (!r2.catch) throw t3;
-    a2 = await r2.catch(t3, e2.proxy ?? e2, ...o2);
+  } catch (e2) {
+    if (!a.catch) throw e2;
+    o = await a.catch(e2, r3.proxy ?? r3, ...t);
   }
   try {
-    for (let t3 of r2.finally || []) a2 = await t3(a2, e2.proxy ?? e2, ...o2) ?? a2;
-  } catch (t3) {
-    if (!r2.catch) throw t3;
-    a2 = await r2.catch(t3, e2.proxy ?? e2, ...o2);
+    for (let e2 of a.finally || []) o = await e2(o, r3.proxy ?? r3, ...t) ?? o;
+  } catch (e2) {
+    if (!a.catch) throw e2;
+    o = await a.catch(e2, r3.proxy ?? r3, ...t);
   }
-  return a2;
+  return o;
 } });
-var r = (e = "text/plain; charset=utf-8", t2) => (r2, o2 = {}) => {
-  if (void 0 === r2 || r2 instanceof Response) return r2;
-  const a2 = new Response(t2?.(r2) ?? r2, o2.url ? void 0 : o2);
-  return a2.headers.set("content-type", e), a2;
-};
-var o = r("application/json; charset=utf-8", JSON.stringify);
-var a = (e) => ({ 400: "Bad Request", 401: "Unauthorized", 403: "Forbidden", 404: "Not Found", 500: "Internal Server Error" })[e] || "Unknown Error";
-var s = (e = 500, t2) => {
-  if (e instanceof Error) {
-    const { message: r2, ...o2 } = e;
-    e = e.status || 500, t2 = { error: r2 || a(e), ...o2 };
-  }
-  return t2 = { status: e, ..."object" == typeof t2 ? t2 : { error: t2 || a(e) } }, o(t2, { status: e });
-};
-var p = r("text/plain; charset=utf-8", String);
-var f = r("text/html");
-var u = r("image/jpeg");
-var h = r("image/png");
-var g = r("image/webp");
-var y = (e = {}) => {
-  const { origin: t2 = "*", credentials: r2 = false, allowMethods: o2 = "*", allowHeaders: a2, exposeHeaders: s2, maxAge: c } = e, n = (e2) => {
-    const o3 = e2?.headers.get("origin");
-    return true === t2 ? o3 : t2 instanceof RegExp ? t2.test(o3) ? o3 : void 0 : Array.isArray(t2) ? t2.includes(o3) ? o3 : void 0 : t2 instanceof Function ? t2(o3) : "*" == t2 && r2 ? o3 : t2;
-  }, l = (e2, t3) => {
-    for (const [r3, o3] of Object.entries(t3)) o3 && e2.headers.append(r3, o3);
-    return e2;
-  };
-  return { corsify: (e2, t3) => e2?.headers?.get("access-control-allow-origin") || 101 == e2.status ? e2 : l(e2.clone(), { "access-control-allow-origin": n(t3), "access-control-allow-credentials": r2 }), preflight: (e2) => {
-    if ("OPTIONS" == e2.method) {
-      const t3 = new Response(null, { status: 204 });
-      return l(t3, { "access-control-allow-origin": n(e2), "access-control-allow-methods": o2?.join?.(",") ?? o2, "access-control-expose-headers": s2?.join?.(",") ?? s2, "access-control-allow-headers": a2?.join?.(",") ?? a2 ?? e2.headers.get("access-control-request-headers"), "access-control-max-age": c, "access-control-allow-credentials": r2 });
-    }
-  } };
-};
 
 // src/gemini-proxy.ts
 async function geminiProxy(rawReq) {
@@ -197,6 +163,55 @@ function hello(req) {
     `);
 }
 
+// src/itty-router/cors.ts
+var cors = (options = {}) => {
+  const { origin = "*", credentials = false, allowMethods = "*", allowHeaders, exposeHeaders, maxAge } = options;
+  const getAccessControlOrigin = (request) => {
+    const requestOrigin = request?.headers.get("origin");
+    if (!requestOrigin) return void 0;
+    if (origin === true) return requestOrigin;
+    if (origin instanceof RegExp) return origin.test(requestOrigin) ? requestOrigin : void 0;
+    if (Array.isArray(origin)) return origin.includes(requestOrigin) ? requestOrigin : void 0;
+    if (origin instanceof Function) return origin(requestOrigin);
+    return origin === "*" && credentials ? requestOrigin : origin;
+  };
+  const appendHeadersAndReturn = (response, headers) => {
+    for (const [key, value] of Object.entries(headers)) {
+      if (value) response.headers.append(key, value);
+    }
+    return response;
+  };
+  const preflight2 = (request) => {
+    if (request.method === "OPTIONS") {
+      const response = new Response(null, { status: 204 });
+      return appendHeadersAndReturn(response, {
+        "access-control-allow-origin": getAccessControlOrigin(request),
+        // @ts-ignore
+        "access-control-allow-methods": allowMethods?.join?.(",") ?? allowMethods,
+        // include allowed methods
+        // @ts-ignore
+        "access-control-expose-headers": exposeHeaders?.join?.(",") ?? exposeHeaders,
+        // include allowed headers
+        "access-control-allow-headers": (
+          // @ts-ignore
+          allowHeaders?.join?.(",") ?? allowHeaders ?? request.headers.get("access-control-request-headers")
+        ),
+        // include allowed headers
+        "access-control-max-age": maxAge?.toString(),
+        "access-control-allow-credentials": credentials.toString()
+      });
+    }
+  };
+  const corsify2 = (response, request) => {
+    if (response?.headers?.get("access-control-allow-origin") || response.status === 101) return response;
+    return appendHeadersAndReturn(response, {
+      "access-control-allow-origin": getAccessControlOrigin(request),
+      "access-control-allow-credentials": credentials.toString()
+    });
+  };
+  return { corsify: corsify2, preflight: preflight2 };
+};
+
 // src/log.ts
 var LEVEL = ["debug", "info", "warn", "error"];
 var Logger = class {
@@ -224,6 +239,146 @@ var Logger = class {
   }
 };
 
+// node_modules/.deno/eventsource-parser@1.1.2/node_modules/eventsource-parser/dist/index.js
+function createParser(onParse) {
+  let isFirstChunk;
+  let buffer;
+  let startingPosition;
+  let startingFieldLength;
+  let eventId;
+  let eventName;
+  let data;
+  reset();
+  return {
+    feed,
+    reset
+  };
+  function reset() {
+    isFirstChunk = true;
+    buffer = "";
+    startingPosition = 0;
+    startingFieldLength = -1;
+    eventId = void 0;
+    eventName = void 0;
+    data = "";
+  }
+  function feed(chunk) {
+    buffer = buffer ? buffer + chunk : chunk;
+    if (isFirstChunk && hasBom(buffer)) {
+      buffer = buffer.slice(BOM.length);
+    }
+    isFirstChunk = false;
+    const length = buffer.length;
+    let position = 0;
+    let discardTrailingNewline = false;
+    while (position < length) {
+      if (discardTrailingNewline) {
+        if (buffer[position] === "\n") {
+          ++position;
+        }
+        discardTrailingNewline = false;
+      }
+      let lineLength = -1;
+      let fieldLength = startingFieldLength;
+      let character;
+      for (let index = startingPosition; lineLength < 0 && index < length; ++index) {
+        character = buffer[index];
+        if (character === ":" && fieldLength < 0) {
+          fieldLength = index - position;
+        } else if (character === "\r") {
+          discardTrailingNewline = true;
+          lineLength = index - position;
+        } else if (character === "\n") {
+          lineLength = index - position;
+        }
+      }
+      if (lineLength < 0) {
+        startingPosition = length - position;
+        startingFieldLength = fieldLength;
+        break;
+      } else {
+        startingPosition = 0;
+        startingFieldLength = -1;
+      }
+      parseEventStreamLine(buffer, position, fieldLength, lineLength);
+      position += lineLength + 1;
+    }
+    if (position === length) {
+      buffer = "";
+    } else if (position > 0) {
+      buffer = buffer.slice(position);
+    }
+  }
+  function parseEventStreamLine(lineBuffer, index, fieldLength, lineLength) {
+    if (lineLength === 0) {
+      if (data.length > 0) {
+        onParse({
+          type: "event",
+          id: eventId,
+          event: eventName || void 0,
+          data: data.slice(0, -1)
+          // remove trailing newline
+        });
+        data = "";
+        eventId = void 0;
+      }
+      eventName = void 0;
+      return;
+    }
+    const noValue = fieldLength < 0;
+    const field = lineBuffer.slice(index, index + (noValue ? lineLength : fieldLength));
+    let step = 0;
+    if (noValue) {
+      step = lineLength;
+    } else if (lineBuffer[index + fieldLength + 1] === " ") {
+      step = fieldLength + 2;
+    } else {
+      step = fieldLength + 1;
+    }
+    const position = index + step;
+    const valueLength = lineLength - step;
+    const value = lineBuffer.slice(position, position + valueLength).toString();
+    if (field === "data") {
+      data += value ? "".concat(value, "\n") : "\n";
+    } else if (field === "event") {
+      eventName = value;
+    } else if (field === "id" && !value.includes("\0")) {
+      eventId = value;
+    } else if (field === "retry") {
+      const retry = parseInt(value, 10);
+      if (!Number.isNaN(retry)) {
+        onParse({
+          type: "reconnect-interval",
+          value: retry
+        });
+      }
+    }
+  }
+}
+var BOM = [239, 187, 191];
+function hasBom(buffer) {
+  return BOM.every((charCode, index) => buffer.charCodeAt(index) === charCode);
+}
+
+// node_modules/.deno/eventsource-parser@1.1.2/node_modules/eventsource-parser/dist/stream.js
+var EventSourceParserStream = class extends TransformStream {
+  constructor() {
+    let parser;
+    super({
+      start(controller) {
+        parser = createParser((event) => {
+          if (event.type === "event") {
+            controller.enqueue(event);
+          }
+        });
+      },
+      transform(chunk) {
+        parser.feed(chunk);
+      }
+    });
+  }
+};
+
 // src/gemini-api-client/errors.ts
 var GoogleGenerativeAIError = class extends Error {
   constructor(message) {
@@ -301,14 +456,20 @@ function formatBlockErrorMessage(response) {
 }
 
 // src/gemini-api-client/gemini-api-client.ts
-async function generateContent(apiParam, model, params, requestOptions) {
-  const url = new RequestUrl(model, "generateContent" /* GENERATE_CONTENT */, false, apiParam);
+async function* generateContent(apiParam, model, params, requestOptions) {
+  const url = new RequestUrl(model, "streamGenerateContent" /* STREAM_GENERATE_CONTENT */, true, apiParam);
   const response = await makeRequest(url, JSON.stringify(params), requestOptions);
-  const responseJson = await response.json();
-  const enhancedResponse = addHelpers(responseJson);
-  return {
-    response: enhancedResponse
-  };
+  const body = response.body;
+  if (body == null) {
+    return;
+  }
+  for await (const event of body.pipeThrough(new TextDecoderStream()).pipeThrough(new EventSourceParserStream())) {
+    const responseJson = JSON.parse(event.data);
+    const enhancedResponse = addHelpers(responseJson);
+    yield {
+      response: enhancedResponse
+    };
+  }
 }
 async function makeRequest(url, body, requestOptions) {
   let response;
@@ -322,18 +483,20 @@ async function makeRequest(url, body, requestOptions) {
       body
     });
     if (!response.ok) {
+      console.error(response);
       let message = "";
       try {
-        const json = await response.json();
-        message = json.error.message;
-        if (json.error.details) {
-          message += ` ${JSON.stringify(json.error.details)}`;
+        const errResp = await response.json();
+        message = errResp.error.message;
+        if (errResp.error.details) {
+          message += ` ${JSON.stringify(errResp.error.details)}`;
         }
       } catch (_e) {
       }
       throw new Error(`[${response.status} ${response.statusText}] ${message}`);
     }
   } catch (e) {
+    console.log(e);
     const err = new GoogleGenerativeAIError(`Error fetching from google -> ${e.message}`);
     err.stack = e.stack;
     throw err;
@@ -372,11 +535,22 @@ function buildFetchOptions(requestOptions) {
 // src/openai/chat/completions/NonStreamingChatProxyHandler.ts
 async function nonStreamingChatProxyHandler(req, apiParam, log) {
   const [model, geminiReq] = genModel(req);
-  const geminiResp = await generateContent(apiParam, model, geminiReq).then((it) => it.response.result()).catch((err) => {
+  let geminiResp = "";
+  try {
+    for await (const it of generateContent(apiParam, model, geminiReq)) {
+      const data = it.response.result();
+      if (typeof data === "string") {
+        geminiResp += data;
+      } else {
+        geminiResp = data;
+        break;
+      }
+    }
+  } catch (err) {
     log?.error(req);
     log?.error(err?.message ?? err.toString());
-    return err?.message ?? err.toString();
-  });
+    geminiResp = err?.message ?? err.toString();
+  }
   log?.debug(req);
   log?.debug(geminiResp);
   function genOpenAiResp(content) {
@@ -424,7 +598,16 @@ async function nonStreamingChatProxyHandler(req, apiParam, log) {
 // src/openai/chat/completions/StreamingChatProxyHandler.ts
 async function* streamingChatProxyHandler(req, apiParam) {
   const [model, geminiReq] = genModel(req);
-  const geminiResp = await generateContent(apiParam, model, geminiReq).then((it) => it.response.result()).catch((e) => e.message ?? e?.toString());
+  try {
+    for await (const it of generateContent(apiParam, model, geminiReq)) {
+      const data = it.response.result();
+      yield genOpenAiResp(data, false);
+    }
+  } catch (error) {
+    yield genOpenAiResp(error?.message ?? error.toString(), true);
+  }
+  yield genOpenAiResp("", true);
+  return void 0;
   function genOpenAiResp(content, stop) {
     if (typeof content === "string") {
       return {
@@ -455,8 +638,6 @@ async function* streamingChatProxyHandler(req, apiParam) {
       ]
     };
   }
-  yield genOpenAiResp(geminiResp, false);
-  yield genOpenAiResp("", true);
 }
 
 // src/openai/chat/completions/ChatProxyHandler.ts
@@ -477,34 +658,32 @@ async function chatProxyHandler(rawReq) {
   return sseResponse(
     async function* () {
       for await (const data of respArr) {
-        rawReq.logger?.debug(data);
+        rawReq.logger?.debug("streamingChatProxyHandler", data);
         yield JSON.stringify(data);
       }
       yield "[DONE]";
+      return void 0;
     }()
   );
 }
+var encoder = new TextEncoder();
 function sseResponse(dataStream) {
-  const { readable, writable } = new TransformStream();
-  const response = new Response(readable, {
+  const s = new ReadableStream({
+    async pull(controller) {
+      const { value, done } = await dataStream.next();
+      if (done) {
+        controller.close();
+      } else {
+        controller.enqueue(encoder.encode(toSseMsg({ data: value })));
+      }
+    }
+  });
+  const response = new Response(s, {
     status: 200,
     headers: new Headers({
       "Content-Type": "text/event-stream"
     })
   });
-  const encoder = new TextEncoder();
-  async function writer(data) {
-    const w = writable.getWriter();
-    await w.write(encoder.encode(data));
-    w.releaseLock();
-  }
-  ;
-  (async () => {
-    for await (const data of dataStream) {
-      await writer(toSseMsg({ data }));
-    }
-    await writable.close();
-  })();
   return response;
 }
 function toSseMsg(sseEvent) {
@@ -541,8 +720,8 @@ var modelDetail = (model) => {
 };
 
 // src/app.ts
-var { preflight, corsify } = y({ allowHeaders: "*" });
-var app = t({
+var { preflight, corsify } = cors({ allowHeaders: "*" });
+var app = r({
   before: [
     preflight,
     (req) => {
@@ -550,13 +729,11 @@ var app = t({
       req.logger.warn(`--> ${req.method} ${req.url}`);
     }
   ],
-  catch: s,
   finally: [
     corsify,
     (_, req) => {
       req.logger?.warn(`<-- ${req.method} ${req.url}`);
-    },
-    o
+    }
   ]
 });
 app.get("/", hello);
diff --git a/dist/main_deno.mjs b/dist/main_deno.mjs
index 0351791..900b4eb 100644
--- a/dist/main_deno.mjs
+++ b/dist/main_deno.mjs
@@ -1,59 +1,25 @@
-// node_modules/.deno/itty-router@5.0.17/node_modules/itty-router/index.mjs
-var t = ({ base: e = "", routes: t2 = [], ...r2 } = {}) => ({ __proto__: new Proxy({}, { get: (r3, o2, a2, s2) => (r4, ...c) => t2.push([o2.toUpperCase?.(), RegExp(`^${(s2 = (e + r4).replace(/\/+(\/|$)/g, "$1")).replace(/(\/?\.?):(\w+)\+/g, "($1(?<$2>*))").replace(/(\/?\.?):(\w+)/g, "($1(?<$2>[^$1/]+?))").replace(/\./g, "\\.").replace(/(\/?)\*/g, "($1.*)?")}/*$`), c, s2]) && a2 }), routes: t2, ...r2, async fetch(e2, ...o2) {
-  let a2, s2, c = new URL(e2.url), n = e2.query = { __proto__: null };
-  for (let [e3, t3] of c.searchParams) n[e3] = n[e3] ? [].concat(n[e3], t3) : t3;
-  e: try {
-    for (let t3 of r2.before || []) if (null != (a2 = await t3(e2.proxy ?? e2, ...o2))) break e;
-    t: for (let [r3, n2, l, i] of t2) if ((r3 == e2.method || "ALL" == r3) && (s2 = c.pathname.match(n2))) {
-      e2.params = s2.groups || {}, e2.route = i;
-      for (let t3 of l) if (null != (a2 = await t3(e2.proxy ?? e2, ...o2))) break t;
+// node_modules/.deno/itty-router@5.0.17/node_modules/itty-router/Router.mjs
+var r = ({ base: r2 = "", routes: e = [], ...a } = {}) => ({ __proto__: new Proxy({}, { get: (a2, t, o, c) => (a3, ...l) => e.push([t.toUpperCase?.(), RegExp(`^${(c = (r2 + a3).replace(/\/+(\/|$)/g, "$1")).replace(/(\/?\.?):(\w+)\+/g, "($1(?<$2>*))").replace(/(\/?\.?):(\w+)/g, "($1(?<$2>[^$1/]+?))").replace(/\./g, "\\.").replace(/(\/?)\*/g, "($1.*)?")}/*$`), l, c]) && o }), routes: e, ...a, async fetch(r3, ...t) {
+  let o, c, l = new URL(r3.url), p = r3.query = { __proto__: null };
+  for (let [r4, e2] of l.searchParams) p[r4] = p[r4] ? [].concat(p[r4], e2) : e2;
+  r: try {
+    for (let e2 of a.before || []) if (null != (o = await e2(r3.proxy ?? r3, ...t))) break r;
+    e: for (let [a2, p2, f, h] of e) if ((a2 == r3.method || "ALL" == a2) && (c = l.pathname.match(p2))) {
+      r3.params = c.groups || {}, r3.route = h;
+      for (let e2 of f) if (null != (o = await e2(r3.proxy ?? r3, ...t))) break e;
     }
-  } catch (t3) {
-    if (!r2.catch) throw t3;
-    a2 = await r2.catch(t3, e2.proxy ?? e2, ...o2);
+  } catch (e2) {
+    if (!a.catch) throw e2;
+    o = await a.catch(e2, r3.proxy ?? r3, ...t);
   }
   try {
-    for (let t3 of r2.finally || []) a2 = await t3(a2, e2.proxy ?? e2, ...o2) ?? a2;
-  } catch (t3) {
-    if (!r2.catch) throw t3;
-    a2 = await r2.catch(t3, e2.proxy ?? e2, ...o2);
+    for (let e2 of a.finally || []) o = await e2(o, r3.proxy ?? r3, ...t) ?? o;
+  } catch (e2) {
+    if (!a.catch) throw e2;
+    o = await a.catch(e2, r3.proxy ?? r3, ...t);
   }
-  return a2;
+  return o;
 } });
-var r = (e = "text/plain; charset=utf-8", t2) => (r2, o2 = {}) => {
-  if (void 0 === r2 || r2 instanceof Response) return r2;
-  const a2 = new Response(t2?.(r2) ?? r2, o2.url ? void 0 : o2);
-  return a2.headers.set("content-type", e), a2;
-};
-var o = r("application/json; charset=utf-8", JSON.stringify);
-var a = (e) => ({ 400: "Bad Request", 401: "Unauthorized", 403: "Forbidden", 404: "Not Found", 500: "Internal Server Error" })[e] || "Unknown Error";
-var s = (e = 500, t2) => {
-  if (e instanceof Error) {
-    const { message: r2, ...o2 } = e;
-    e = e.status || 500, t2 = { error: r2 || a(e), ...o2 };
-  }
-  return t2 = { status: e, ..."object" == typeof t2 ? t2 : { error: t2 || a(e) } }, o(t2, { status: e });
-};
-var p = r("text/plain; charset=utf-8", String);
-var f = r("text/html");
-var u = r("image/jpeg");
-var h = r("image/png");
-var g = r("image/webp");
-var y = (e = {}) => {
-  const { origin: t2 = "*", credentials: r2 = false, allowMethods: o2 = "*", allowHeaders: a2, exposeHeaders: s2, maxAge: c } = e, n = (e2) => {
-    const o3 = e2?.headers.get("origin");
-    return true === t2 ? o3 : t2 instanceof RegExp ? t2.test(o3) ? o3 : void 0 : Array.isArray(t2) ? t2.includes(o3) ? o3 : void 0 : t2 instanceof Function ? t2(o3) : "*" == t2 && r2 ? o3 : t2;
-  }, l = (e2, t3) => {
-    for (const [r3, o3] of Object.entries(t3)) o3 && e2.headers.append(r3, o3);
-    return e2;
-  };
-  return { corsify: (e2, t3) => e2?.headers?.get("access-control-allow-origin") || 101 == e2.status ? e2 : l(e2.clone(), { "access-control-allow-origin": n(t3), "access-control-allow-credentials": r2 }), preflight: (e2) => {
-    if ("OPTIONS" == e2.method) {
-      const t3 = new Response(null, { status: 204 });
-      return l(t3, { "access-control-allow-origin": n(e2), "access-control-allow-methods": o2?.join?.(",") ?? o2, "access-control-expose-headers": s2?.join?.(",") ?? s2, "access-control-allow-headers": a2?.join?.(",") ?? a2 ?? e2.headers.get("access-control-request-headers"), "access-control-max-age": c, "access-control-allow-credentials": r2 });
-    }
-  } };
-};
 
 // src/gemini-proxy.ts
 async function geminiProxy(rawReq) {
@@ -197,6 +163,55 @@ function hello(req) {
     `);
 }
 
+// src/itty-router/cors.ts
+var cors = (options = {}) => {
+  const { origin = "*", credentials = false, allowMethods = "*", allowHeaders, exposeHeaders, maxAge } = options;
+  const getAccessControlOrigin = (request) => {
+    const requestOrigin = request?.headers.get("origin");
+    if (!requestOrigin) return void 0;
+    if (origin === true) return requestOrigin;
+    if (origin instanceof RegExp) return origin.test(requestOrigin) ? requestOrigin : void 0;
+    if (Array.isArray(origin)) return origin.includes(requestOrigin) ? requestOrigin : void 0;
+    if (origin instanceof Function) return origin(requestOrigin);
+    return origin === "*" && credentials ? requestOrigin : origin;
+  };
+  const appendHeadersAndReturn = (response, headers) => {
+    for (const [key, value] of Object.entries(headers)) {
+      if (value) response.headers.append(key, value);
+    }
+    return response;
+  };
+  const preflight2 = (request) => {
+    if (request.method === "OPTIONS") {
+      const response = new Response(null, { status: 204 });
+      return appendHeadersAndReturn(response, {
+        "access-control-allow-origin": getAccessControlOrigin(request),
+        // @ts-ignore
+        "access-control-allow-methods": allowMethods?.join?.(",") ?? allowMethods,
+        // include allowed methods
+        // @ts-ignore
+        "access-control-expose-headers": exposeHeaders?.join?.(",") ?? exposeHeaders,
+        // include allowed headers
+        "access-control-allow-headers": (
+          // @ts-ignore
+          allowHeaders?.join?.(",") ?? allowHeaders ?? request.headers.get("access-control-request-headers")
+        ),
+        // include allowed headers
+        "access-control-max-age": maxAge?.toString(),
+        "access-control-allow-credentials": credentials.toString()
+      });
+    }
+  };
+  const corsify2 = (response, request) => {
+    if (response?.headers?.get("access-control-allow-origin") || response.status === 101) return response;
+    return appendHeadersAndReturn(response, {
+      "access-control-allow-origin": getAccessControlOrigin(request),
+      "access-control-allow-credentials": credentials.toString()
+    });
+  };
+  return { corsify: corsify2, preflight: preflight2 };
+};
+
 // src/log.ts
 var LEVEL = ["debug", "info", "warn", "error"];
 var Logger = class {
@@ -224,6 +239,146 @@ var Logger = class {
   }
 };
 
+// node_modules/.deno/eventsource-parser@1.1.2/node_modules/eventsource-parser/dist/index.js
+function createParser(onParse) {
+  let isFirstChunk;
+  let buffer;
+  let startingPosition;
+  let startingFieldLength;
+  let eventId;
+  let eventName;
+  let data;
+  reset();
+  return {
+    feed,
+    reset
+  };
+  function reset() {
+    isFirstChunk = true;
+    buffer = "";
+    startingPosition = 0;
+    startingFieldLength = -1;
+    eventId = void 0;
+    eventName = void 0;
+    data = "";
+  }
+  function feed(chunk) {
+    buffer = buffer ? buffer + chunk : chunk;
+    if (isFirstChunk && hasBom(buffer)) {
+      buffer = buffer.slice(BOM.length);
+    }
+    isFirstChunk = false;
+    const length = buffer.length;
+    let position = 0;
+    let discardTrailingNewline = false;
+    while (position < length) {
+      if (discardTrailingNewline) {
+        if (buffer[position] === "\n") {
+          ++position;
+        }
+        discardTrailingNewline = false;
+      }
+      let lineLength = -1;
+      let fieldLength = startingFieldLength;
+      let character;
+      for (let index = startingPosition; lineLength < 0 && index < length; ++index) {
+        character = buffer[index];
+        if (character === ":" && fieldLength < 0) {
+          fieldLength = index - position;
+        } else if (character === "\r") {
+          discardTrailingNewline = true;
+          lineLength = index - position;
+        } else if (character === "\n") {
+          lineLength = index - position;
+        }
+      }
+      if (lineLength < 0) {
+        startingPosition = length - position;
+        startingFieldLength = fieldLength;
+        break;
+      } else {
+        startingPosition = 0;
+        startingFieldLength = -1;
+      }
+      parseEventStreamLine(buffer, position, fieldLength, lineLength);
+      position += lineLength + 1;
+    }
+    if (position === length) {
+      buffer = "";
+    } else if (position > 0) {
+      buffer = buffer.slice(position);
+    }
+  }
+  function parseEventStreamLine(lineBuffer, index, fieldLength, lineLength) {
+    if (lineLength === 0) {
+      if (data.length > 0) {
+        onParse({
+          type: "event",
+          id: eventId,
+          event: eventName || void 0,
+          data: data.slice(0, -1)
+          // remove trailing newline
+        });
+        data = "";
+        eventId = void 0;
+      }
+      eventName = void 0;
+      return;
+    }
+    const noValue = fieldLength < 0;
+    const field = lineBuffer.slice(index, index + (noValue ? lineLength : fieldLength));
+    let step = 0;
+    if (noValue) {
+      step = lineLength;
+    } else if (lineBuffer[index + fieldLength + 1] === " ") {
+      step = fieldLength + 2;
+    } else {
+      step = fieldLength + 1;
+    }
+    const position = index + step;
+    const valueLength = lineLength - step;
+    const value = lineBuffer.slice(position, position + valueLength).toString();
+    if (field === "data") {
+      data += value ? "".concat(value, "\n") : "\n";
+    } else if (field === "event") {
+      eventName = value;
+    } else if (field === "id" && !value.includes("\0")) {
+      eventId = value;
+    } else if (field === "retry") {
+      const retry = parseInt(value, 10);
+      if (!Number.isNaN(retry)) {
+        onParse({
+          type: "reconnect-interval",
+          value: retry
+        });
+      }
+    }
+  }
+}
+var BOM = [239, 187, 191];
+function hasBom(buffer) {
+  return BOM.every((charCode, index) => buffer.charCodeAt(index) === charCode);
+}
+
+// node_modules/.deno/eventsource-parser@1.1.2/node_modules/eventsource-parser/dist/stream.js
+var EventSourceParserStream = class extends TransformStream {
+  constructor() {
+    let parser;
+    super({
+      start(controller) {
+        parser = createParser((event) => {
+          if (event.type === "event") {
+            controller.enqueue(event);
+          }
+        });
+      },
+      transform(chunk) {
+        parser.feed(chunk);
+      }
+    });
+  }
+};
+
 // src/gemini-api-client/errors.ts
 var GoogleGenerativeAIError = class extends Error {
   constructor(message) {
@@ -301,14 +456,20 @@ function formatBlockErrorMessage(response) {
 }
 
 // src/gemini-api-client/gemini-api-client.ts
-async function generateContent(apiParam, model, params, requestOptions) {
-  const url = new RequestUrl(model, "generateContent" /* GENERATE_CONTENT */, false, apiParam);
+async function* generateContent(apiParam, model, params, requestOptions) {
+  const url = new RequestUrl(model, "streamGenerateContent" /* STREAM_GENERATE_CONTENT */, true, apiParam);
   const response = await makeRequest(url, JSON.stringify(params), requestOptions);
-  const responseJson = await response.json();
-  const enhancedResponse = addHelpers(responseJson);
-  return {
-    response: enhancedResponse
-  };
+  const body = response.body;
+  if (body == null) {
+    return;
+  }
+  for await (const event of body.pipeThrough(new TextDecoderStream()).pipeThrough(new EventSourceParserStream())) {
+    const responseJson = JSON.parse(event.data);
+    const enhancedResponse = addHelpers(responseJson);
+    yield {
+      response: enhancedResponse
+    };
+  }
 }
 async function makeRequest(url, body, requestOptions) {
   let response;
@@ -322,18 +483,20 @@ async function makeRequest(url, body, requestOptions) {
       body
     });
     if (!response.ok) {
+      console.error(response);
       let message = "";
       try {
-        const json = await response.json();
-        message = json.error.message;
-        if (json.error.details) {
-          message += ` ${JSON.stringify(json.error.details)}`;
+        const errResp = await response.json();
+        message = errResp.error.message;
+        if (errResp.error.details) {
+          message += ` ${JSON.stringify(errResp.error.details)}`;
         }
       } catch (_e) {
       }
       throw new Error(`[${response.status} ${response.statusText}] ${message}`);
     }
   } catch (e) {
+    console.log(e);
     const err = new GoogleGenerativeAIError(`Error fetching from google -> ${e.message}`);
     err.stack = e.stack;
     throw err;
@@ -372,11 +535,22 @@ function buildFetchOptions(requestOptions) {
 // src/openai/chat/completions/NonStreamingChatProxyHandler.ts
 async function nonStreamingChatProxyHandler(req, apiParam, log) {
   const [model, geminiReq] = genModel(req);
-  const geminiResp = await generateContent(apiParam, model, geminiReq).then((it) => it.response.result()).catch((err) => {
+  let geminiResp = "";
+  try {
+    for await (const it of generateContent(apiParam, model, geminiReq)) {
+      const data = it.response.result();
+      if (typeof data === "string") {
+        geminiResp += data;
+      } else {
+        geminiResp = data;
+        break;
+      }
+    }
+  } catch (err) {
     log?.error(req);
     log?.error(err?.message ?? err.toString());
-    return err?.message ?? err.toString();
-  });
+    geminiResp = err?.message ?? err.toString();
+  }
   log?.debug(req);
   log?.debug(geminiResp);
   function genOpenAiResp(content) {
@@ -424,7 +598,16 @@ async function nonStreamingChatProxyHandler(req, apiParam, log) {
 // src/openai/chat/completions/StreamingChatProxyHandler.ts
 async function* streamingChatProxyHandler(req, apiParam) {
   const [model, geminiReq] = genModel(req);
-  const geminiResp = await generateContent(apiParam, model, geminiReq).then((it) => it.response.result()).catch((e) => e.message ?? e?.toString());
+  try {
+    for await (const it of generateContent(apiParam, model, geminiReq)) {
+      const data = it.response.result();
+      yield genOpenAiResp(data, false);
+    }
+  } catch (error) {
+    yield genOpenAiResp(error?.message ?? error.toString(), true);
+  }
+  yield genOpenAiResp("", true);
+  return void 0;
   function genOpenAiResp(content, stop) {
     if (typeof content === "string") {
       return {
@@ -455,8 +638,6 @@ async function* streamingChatProxyHandler(req, apiParam) {
       ]
     };
   }
-  yield genOpenAiResp(geminiResp, false);
-  yield genOpenAiResp("", true);
 }
 
 // src/openai/chat/completions/ChatProxyHandler.ts
@@ -477,34 +658,32 @@ async function chatProxyHandler(rawReq) {
   return sseResponse(
     async function* () {
       for await (const data of respArr) {
-        rawReq.logger?.debug(data);
+        rawReq.logger?.debug("streamingChatProxyHandler", data);
         yield JSON.stringify(data);
       }
       yield "[DONE]";
+      return void 0;
     }()
   );
 }
+var encoder = new TextEncoder();
 function sseResponse(dataStream) {
-  const { readable, writable } = new TransformStream();
-  const response = new Response(readable, {
+  const s = new ReadableStream({
+    async pull(controller) {
+      const { value, done } = await dataStream.next();
+      if (done) {
+        controller.close();
+      } else {
+        controller.enqueue(encoder.encode(toSseMsg({ data: value })));
+      }
+    }
+  });
+  const response = new Response(s, {
     status: 200,
     headers: new Headers({
       "Content-Type": "text/event-stream"
     })
   });
-  const encoder = new TextEncoder();
-  async function writer(data) {
-    const w = writable.getWriter();
-    await w.write(encoder.encode(data));
-    w.releaseLock();
-  }
-  ;
-  (async () => {
-    for await (const data of dataStream) {
-      await writer(toSseMsg({ data }));
-    }
-    await writable.close();
-  })();
   return response;
 }
 function toSseMsg(sseEvent) {
@@ -541,8 +720,8 @@ var modelDetail = (model) => {
 };
 
 // src/app.ts
-var { preflight, corsify } = y({ allowHeaders: "*" });
-var app = t({
+var { preflight, corsify } = cors({ allowHeaders: "*" });
+var app = r({
   before: [
     preflight,
     (req) => {
@@ -550,13 +729,11 @@ var app = t({
       req.logger.warn(`--> ${req.method} ${req.url}`);
     }
   ],
-  catch: s,
   finally: [
     corsify,
     (_, req) => {
       req.logger?.warn(`<-- ${req.method} ${req.url}`);
-    },
-    o
+    }
   ]
 });
 app.get("/", hello);
diff --git a/dist/main_node.mjs b/dist/main_node.mjs
index 186a1d0..69fb4e2 100644
--- a/dist/main_node.mjs
+++ b/dist/main_node.mjs
@@ -422,62 +422,28 @@ var serve = (options, listeningListener) => {
   return server;
 };
 
-// node_modules/.deno/itty-router@5.0.17/node_modules/itty-router/index.mjs
-var t = ({ base: e = "", routes: t2 = [], ...r2 } = {}) => ({ __proto__: new Proxy({}, { get: (r3, o2, a2, s2) => (r4, ...c) => t2.push([o2.toUpperCase?.(), RegExp(`^${(s2 = (e + r4).replace(/\/+(\/|$)/g, "$1")).replace(/(\/?\.?):(\w+)\+/g, "($1(?<$2>*))").replace(/(\/?\.?):(\w+)/g, "($1(?<$2>[^$1/]+?))").replace(/\./g, "\\.").replace(/(\/?)\*/g, "($1.*)?")}/*$`), c, s2]) && a2 }), routes: t2, ...r2, async fetch(e2, ...o2) {
-  let a2, s2, c = new URL(e2.url), n = e2.query = { __proto__: null };
-  for (let [e3, t3] of c.searchParams) n[e3] = n[e3] ? [].concat(n[e3], t3) : t3;
-  e: try {
-    for (let t3 of r2.before || []) if (null != (a2 = await t3(e2.proxy ?? e2, ...o2))) break e;
-    t: for (let [r3, n2, l, i] of t2) if ((r3 == e2.method || "ALL" == r3) && (s2 = c.pathname.match(n2))) {
-      e2.params = s2.groups || {}, e2.route = i;
-      for (let t3 of l) if (null != (a2 = await t3(e2.proxy ?? e2, ...o2))) break t;
-    }
-  } catch (t3) {
-    if (!r2.catch) throw t3;
-    a2 = await r2.catch(t3, e2.proxy ?? e2, ...o2);
+// node_modules/.deno/itty-router@5.0.17/node_modules/itty-router/Router.mjs
+var r = ({ base: r2 = "", routes: e = [], ...a } = {}) => ({ __proto__: new Proxy({}, { get: (a2, t, o, c) => (a3, ...l) => e.push([t.toUpperCase?.(), RegExp(`^${(c = (r2 + a3).replace(/\/+(\/|$)/g, "$1")).replace(/(\/?\.?):(\w+)\+/g, "($1(?<$2>*))").replace(/(\/?\.?):(\w+)/g, "($1(?<$2>[^$1/]+?))").replace(/\./g, "\\.").replace(/(\/?)\*/g, "($1.*)?")}/*$`), l, c]) && o }), routes: e, ...a, async fetch(r3, ...t) {
+  let o, c, l = new URL(r3.url), p = r3.query = { __proto__: null };
+  for (let [r4, e2] of l.searchParams) p[r4] = p[r4] ? [].concat(p[r4], e2) : e2;
+  r: try {
+    for (let e2 of a.before || []) if (null != (o = await e2(r3.proxy ?? r3, ...t))) break r;
+    e: for (let [a2, p2, f, h] of e) if ((a2 == r3.method || "ALL" == a2) && (c = l.pathname.match(p2))) {
+      r3.params = c.groups || {}, r3.route = h;
+      for (let e2 of f) if (null != (o = await e2(r3.proxy ?? r3, ...t))) break e;
+    }
+  } catch (e2) {
+    if (!a.catch) throw e2;
+    o = await a.catch(e2, r3.proxy ?? r3, ...t);
   }
   try {
-    for (let t3 of r2.finally || []) a2 = await t3(a2, e2.proxy ?? e2, ...o2) ?? a2;
-  } catch (t3) {
-    if (!r2.catch) throw t3;
-    a2 = await r2.catch(t3, e2.proxy ?? e2, ...o2);
+    for (let e2 of a.finally || []) o = await e2(o, r3.proxy ?? r3, ...t) ?? o;
+  } catch (e2) {
+    if (!a.catch) throw e2;
+    o = await a.catch(e2, r3.proxy ?? r3, ...t);
   }
-  return a2;
+  return o;
 } });
-var r = (e = "text/plain; charset=utf-8", t2) => (r2, o2 = {}) => {
-  if (void 0 === r2 || r2 instanceof Response) return r2;
-  const a2 = new Response(t2?.(r2) ?? r2, o2.url ? void 0 : o2);
-  return a2.headers.set("content-type", e), a2;
-};
-var o = r("application/json; charset=utf-8", JSON.stringify);
-var a = (e) => ({ 400: "Bad Request", 401: "Unauthorized", 403: "Forbidden", 404: "Not Found", 500: "Internal Server Error" })[e] || "Unknown Error";
-var s = (e = 500, t2) => {
-  if (e instanceof Error) {
-    const { message: r2, ...o2 } = e;
-    e = e.status || 500, t2 = { error: r2 || a(e), ...o2 };
-  }
-  return t2 = { status: e, ..."object" == typeof t2 ? t2 : { error: t2 || a(e) } }, o(t2, { status: e });
-};
-var p = r("text/plain; charset=utf-8", String);
-var f = r("text/html");
-var u = r("image/jpeg");
-var h = r("image/png");
-var g = r("image/webp");
-var y = (e = {}) => {
-  const { origin: t2 = "*", credentials: r2 = false, allowMethods: o2 = "*", allowHeaders: a2, exposeHeaders: s2, maxAge: c } = e, n = (e2) => {
-    const o3 = e2?.headers.get("origin");
-    return true === t2 ? o3 : t2 instanceof RegExp ? t2.test(o3) ? o3 : void 0 : Array.isArray(t2) ? t2.includes(o3) ? o3 : void 0 : t2 instanceof Function ? t2(o3) : "*" == t2 && r2 ? o3 : t2;
-  }, l = (e2, t3) => {
-    for (const [r3, o3] of Object.entries(t3)) o3 && e2.headers.append(r3, o3);
-    return e2;
-  };
-  return { corsify: (e2, t3) => e2?.headers?.get("access-control-allow-origin") || 101 == e2.status ? e2 : l(e2.clone(), { "access-control-allow-origin": n(t3), "access-control-allow-credentials": r2 }), preflight: (e2) => {
-    if ("OPTIONS" == e2.method) {
-      const t3 = new Response(null, { status: 204 });
-      return l(t3, { "access-control-allow-origin": n(e2), "access-control-allow-methods": o2?.join?.(",") ?? o2, "access-control-expose-headers": s2?.join?.(",") ?? s2, "access-control-allow-headers": a2?.join?.(",") ?? a2 ?? e2.headers.get("access-control-request-headers"), "access-control-max-age": c, "access-control-allow-credentials": r2 });
-    }
-  } };
-};
 
 // src/gemini-proxy.ts
 async function geminiProxy(rawReq) {
@@ -621,6 +587,55 @@ function hello(req) {
     `);
 }
 
+// src/itty-router/cors.ts
+var cors = (options = {}) => {
+  const { origin = "*", credentials = false, allowMethods = "*", allowHeaders, exposeHeaders, maxAge } = options;
+  const getAccessControlOrigin = (request) => {
+    const requestOrigin = request?.headers.get("origin");
+    if (!requestOrigin) return void 0;
+    if (origin === true) return requestOrigin;
+    if (origin instanceof RegExp) return origin.test(requestOrigin) ? requestOrigin : void 0;
+    if (Array.isArray(origin)) return origin.includes(requestOrigin) ? requestOrigin : void 0;
+    if (origin instanceof Function) return origin(requestOrigin);
+    return origin === "*" && credentials ? requestOrigin : origin;
+  };
+  const appendHeadersAndReturn = (response, headers) => {
+    for (const [key, value] of Object.entries(headers)) {
+      if (value) response.headers.append(key, value);
+    }
+    return response;
+  };
+  const preflight2 = (request) => {
+    if (request.method === "OPTIONS") {
+      const response = new Response(null, { status: 204 });
+      return appendHeadersAndReturn(response, {
+        "access-control-allow-origin": getAccessControlOrigin(request),
+        // @ts-ignore
+        "access-control-allow-methods": allowMethods?.join?.(",") ?? allowMethods,
+        // include allowed methods
+        // @ts-ignore
+        "access-control-expose-headers": exposeHeaders?.join?.(",") ?? exposeHeaders,
+        // include allowed headers
+        "access-control-allow-headers": (
+          // @ts-ignore
+          allowHeaders?.join?.(",") ?? allowHeaders ?? request.headers.get("access-control-request-headers")
+        ),
+        // include allowed headers
+        "access-control-max-age": maxAge?.toString(),
+        "access-control-allow-credentials": credentials.toString()
+      });
+    }
+  };
+  const corsify2 = (response, request) => {
+    if (response?.headers?.get("access-control-allow-origin") || response.status === 101) return response;
+    return appendHeadersAndReturn(response, {
+      "access-control-allow-origin": getAccessControlOrigin(request),
+      "access-control-allow-credentials": credentials.toString()
+    });
+  };
+  return { corsify: corsify2, preflight: preflight2 };
+};
+
 // src/log.ts
 var LEVEL = ["debug", "info", "warn", "error"];
 var Logger = class {
@@ -648,6 +663,146 @@ var Logger = class {
   }
 };
 
+// node_modules/.deno/eventsource-parser@1.1.2/node_modules/eventsource-parser/dist/index.js
+function createParser(onParse) {
+  let isFirstChunk;
+  let buffer;
+  let startingPosition;
+  let startingFieldLength;
+  let eventId;
+  let eventName;
+  let data;
+  reset();
+  return {
+    feed,
+    reset
+  };
+  function reset() {
+    isFirstChunk = true;
+    buffer = "";
+    startingPosition = 0;
+    startingFieldLength = -1;
+    eventId = void 0;
+    eventName = void 0;
+    data = "";
+  }
+  function feed(chunk) {
+    buffer = buffer ? buffer + chunk : chunk;
+    if (isFirstChunk && hasBom(buffer)) {
+      buffer = buffer.slice(BOM.length);
+    }
+    isFirstChunk = false;
+    const length = buffer.length;
+    let position = 0;
+    let discardTrailingNewline = false;
+    while (position < length) {
+      if (discardTrailingNewline) {
+        if (buffer[position] === "\n") {
+          ++position;
+        }
+        discardTrailingNewline = false;
+      }
+      let lineLength = -1;
+      let fieldLength = startingFieldLength;
+      let character;
+      for (let index = startingPosition; lineLength < 0 && index < length; ++index) {
+        character = buffer[index];
+        if (character === ":" && fieldLength < 0) {
+          fieldLength = index - position;
+        } else if (character === "\r") {
+          discardTrailingNewline = true;
+          lineLength = index - position;
+        } else if (character === "\n") {
+          lineLength = index - position;
+        }
+      }
+      if (lineLength < 0) {
+        startingPosition = length - position;
+        startingFieldLength = fieldLength;
+        break;
+      } else {
+        startingPosition = 0;
+        startingFieldLength = -1;
+      }
+      parseEventStreamLine(buffer, position, fieldLength, lineLength);
+      position += lineLength + 1;
+    }
+    if (position === length) {
+      buffer = "";
+    } else if (position > 0) {
+      buffer = buffer.slice(position);
+    }
+  }
+  function parseEventStreamLine(lineBuffer, index, fieldLength, lineLength) {
+    if (lineLength === 0) {
+      if (data.length > 0) {
+        onParse({
+          type: "event",
+          id: eventId,
+          event: eventName || void 0,
+          data: data.slice(0, -1)
+          // remove trailing newline
+        });
+        data = "";
+        eventId = void 0;
+      }
+      eventName = void 0;
+      return;
+    }
+    const noValue = fieldLength < 0;
+    const field = lineBuffer.slice(index, index + (noValue ? lineLength : fieldLength));
+    let step = 0;
+    if (noValue) {
+      step = lineLength;
+    } else if (lineBuffer[index + fieldLength + 1] === " ") {
+      step = fieldLength + 2;
+    } else {
+      step = fieldLength + 1;
+    }
+    const position = index + step;
+    const valueLength = lineLength - step;
+    const value = lineBuffer.slice(position, position + valueLength).toString();
+    if (field === "data") {
+      data += value ? "".concat(value, "\n") : "\n";
+    } else if (field === "event") {
+      eventName = value;
+    } else if (field === "id" && !value.includes("\0")) {
+      eventId = value;
+    } else if (field === "retry") {
+      const retry = parseInt(value, 10);
+      if (!Number.isNaN(retry)) {
+        onParse({
+          type: "reconnect-interval",
+          value: retry
+        });
+      }
+    }
+  }
+}
+var BOM = [239, 187, 191];
+function hasBom(buffer) {
+  return BOM.every((charCode, index) => buffer.charCodeAt(index) === charCode);
+}
+
+// node_modules/.deno/eventsource-parser@1.1.2/node_modules/eventsource-parser/dist/stream.js
+var EventSourceParserStream = class extends TransformStream {
+  constructor() {
+    let parser;
+    super({
+      start(controller) {
+        parser = createParser((event) => {
+          if (event.type === "event") {
+            controller.enqueue(event);
+          }
+        });
+      },
+      transform(chunk) {
+        parser.feed(chunk);
+      }
+    });
+  }
+};
+
 // src/gemini-api-client/errors.ts
 var GoogleGenerativeAIError = class extends Error {
   constructor(message) {
@@ -725,14 +880,20 @@ function formatBlockErrorMessage(response) {
 }
 
 // src/gemini-api-client/gemini-api-client.ts
-async function generateContent(apiParam, model, params, requestOptions) {
-  const url = new RequestUrl(model, "generateContent" /* GENERATE_CONTENT */, false, apiParam);
+async function* generateContent(apiParam, model, params, requestOptions) {
+  const url = new RequestUrl(model, "streamGenerateContent" /* STREAM_GENERATE_CONTENT */, true, apiParam);
   const response = await makeRequest(url, JSON.stringify(params), requestOptions);
-  const responseJson = await response.json();
-  const enhancedResponse = addHelpers(responseJson);
-  return {
-    response: enhancedResponse
-  };
+  const body = response.body;
+  if (body == null) {
+    return;
+  }
+  for await (const event of body.pipeThrough(new TextDecoderStream()).pipeThrough(new EventSourceParserStream())) {
+    const responseJson = JSON.parse(event.data);
+    const enhancedResponse = addHelpers(responseJson);
+    yield {
+      response: enhancedResponse
+    };
+  }
 }
 async function makeRequest(url, body, requestOptions) {
   let response;
@@ -746,18 +907,20 @@ async function makeRequest(url, body, requestOptions) {
       body
     });
     if (!response.ok) {
+      console.error(response);
       let message = "";
       try {
-        const json = await response.json();
-        message = json.error.message;
-        if (json.error.details) {
-          message += ` ${JSON.stringify(json.error.details)}`;
+        const errResp = await response.json();
+        message = errResp.error.message;
+        if (errResp.error.details) {
+          message += ` ${JSON.stringify(errResp.error.details)}`;
         }
       } catch (_e) {
       }
       throw new Error(`[${response.status} ${response.statusText}] ${message}`);
     }
   } catch (e) {
+    console.log(e);
     const err = new GoogleGenerativeAIError(`Error fetching from google -> ${e.message}`);
     err.stack = e.stack;
     throw err;
@@ -796,11 +959,22 @@ function buildFetchOptions(requestOptions) {
 // src/openai/chat/completions/NonStreamingChatProxyHandler.ts
 async function nonStreamingChatProxyHandler(req, apiParam, log) {
   const [model, geminiReq] = genModel(req);
-  const geminiResp = await generateContent(apiParam, model, geminiReq).then((it) => it.response.result()).catch((err) => {
+  let geminiResp = "";
+  try {
+    for await (const it of generateContent(apiParam, model, geminiReq)) {
+      const data = it.response.result();
+      if (typeof data === "string") {
+        geminiResp += data;
+      } else {
+        geminiResp = data;
+        break;
+      }
+    }
+  } catch (err) {
     log?.error(req);
     log?.error(err?.message ?? err.toString());
-    return err?.message ?? err.toString();
-  });
+    geminiResp = err?.message ?? err.toString();
+  }
   log?.debug(req);
   log?.debug(geminiResp);
   function genOpenAiResp(content) {
@@ -848,7 +1022,16 @@ async function nonStreamingChatProxyHandler(req, apiParam, log) {
 // src/openai/chat/completions/StreamingChatProxyHandler.ts
 async function* streamingChatProxyHandler(req, apiParam) {
   const [model, geminiReq] = genModel(req);
-  const geminiResp = await generateContent(apiParam, model, geminiReq).then((it) => it.response.result()).catch((e) => e.message ?? e?.toString());
+  try {
+    for await (const it of generateContent(apiParam, model, geminiReq)) {
+      const data = it.response.result();
+      yield genOpenAiResp(data, false);
+    }
+  } catch (error) {
+    yield genOpenAiResp(error?.message ?? error.toString(), true);
+  }
+  yield genOpenAiResp("", true);
+  return void 0;
   function genOpenAiResp(content, stop) {
     if (typeof content === "string") {
       return {
@@ -879,8 +1062,6 @@ async function* streamingChatProxyHandler(req, apiParam) {
       ]
     };
   }
-  yield genOpenAiResp(geminiResp, false);
-  yield genOpenAiResp("", true);
 }
 
 // src/openai/chat/completions/ChatProxyHandler.ts
@@ -901,34 +1082,32 @@ async function chatProxyHandler(rawReq) {
   return sseResponse(
     async function* () {
       for await (const data of respArr) {
-        rawReq.logger?.debug(data);
+        rawReq.logger?.debug("streamingChatProxyHandler", data);
         yield JSON.stringify(data);
       }
       yield "[DONE]";
+      return void 0;
     }()
   );
 }
+var encoder = new TextEncoder();
 function sseResponse(dataStream) {
-  const { readable, writable } = new TransformStream();
-  const response = new Response(readable, {
+  const s = new ReadableStream({
+    async pull(controller) {
+      const { value, done } = await dataStream.next();
+      if (done) {
+        controller.close();
+      } else {
+        controller.enqueue(encoder.encode(toSseMsg({ data: value })));
+      }
+    }
+  });
+  const response = new Response(s, {
     status: 200,
     headers: new Headers({
       "Content-Type": "text/event-stream"
     })
   });
-  const encoder = new TextEncoder();
-  async function writer(data) {
-    const w = writable.getWriter();
-    await w.write(encoder.encode(data));
-    w.releaseLock();
-  }
-  ;
-  (async () => {
-    for await (const data of dataStream) {
-      await writer(toSseMsg({ data }));
-    }
-    await writable.close();
-  })();
   return response;
 }
 function toSseMsg(sseEvent) {
@@ -965,8 +1144,8 @@ var modelDetail = (model) => {
 };
 
 // src/app.ts
-var { preflight, corsify } = y({ allowHeaders: "*" });
-var app = t({
+var { preflight, corsify } = cors({ allowHeaders: "*" });
+var app = r({
   before: [
     preflight,
     (req) => {
@@ -974,13 +1153,11 @@ var app = t({
       req.logger.warn(`--> ${req.method} ${req.url}`);
     }
   ],
-  catch: s,
   finally: [
     corsify,
     (_, req) => {
       req.logger?.warn(`<-- ${req.method} ${req.url}`);
-    },
-    o
+    }
   ]
 });
 app.get("/", hello);
diff --git a/main_bun.ts b/main_bun.ts
index b55f202..f3e67c1 100644
--- a/main_bun.ts
+++ b/main_bun.ts
@@ -1,4 +1,38 @@
 import { app } from "./src/app.ts"
+class PolyfillTextDecoderStream extends TransformStream<Uint8Array, string> implements TextDecoderStream {
+  readonly encoding: string
+  readonly fatal: boolean
+  readonly ignoreBOM: boolean
+
+  constructor(
+    encoding = "utf-8",
+    { fatal = false, ignoreBOM = false }: ConstructorParameters<typeof TextDecoder>[1] = {},
+  ) {
+    const decoder = new TextDecoder(encoding, { fatal, ignoreBOM })
+    super({
+      transform(chunk: Uint8Array, controller: TransformStreamDefaultController<string>) {
+        const decoded = decoder.decode(chunk, { stream: true })
+        if (decoded.length > 0) {
+          controller.enqueue(decoded)
+        }
+      },
+      flush(controller: TransformStreamDefaultController<string>) {
+        const output = decoder.decode()
+        if (output.length > 0) {
+          controller.enqueue(output)
+        }
+      },
+    })
+
+    this.encoding = encoding
+    this.fatal = fatal
+    this.ignoreBOM = ignoreBOM
+  }
+  [Symbol.toStringTag] = "PolyfillTextDecoderStream"
+}
+// fix bun does not support TextDecoderStream
+// https://github.com/oven-sh/bun/issues/5648
+globalThis.TextDecoderStream = PolyfillTextDecoderStream
 
 console.log("Listening on http://localhost:8000/")
 
diff --git a/package.json b/package.json
index cd31c52..33fd838 100644
--- a/package.json
+++ b/package.json
@@ -19,9 +19,10 @@
   },
   "dependencies": {
     "itty-router": "5.0.17",
+    "eventsource-parser": "1.1.2",
     "@hono/node-server": "1.11.2"
   },
   "devDependencies": {
-    "esbuild": "0.21.4"
+    "esbuild": "0.21.5"
   }
 }
diff --git a/src/app.ts b/src/app.ts
index 29a1ace..0b543bb 100644
--- a/src/app.ts
+++ b/src/app.ts
@@ -1,6 +1,8 @@
-import { type IRequest, type IttyRouterType, Router, cors, error, json } from "itty-router"
+import type { IRequest, IttyRouterType } from "itty-router"
+import { Router } from "itty-router/Router"
 import { geminiProxy } from "./gemini-proxy.ts"
 import { hello } from "./hello.ts"
+import { cors } from "./itty-router/cors.ts"
 import { Logger } from "./log.ts"
 import { chatProxyHandler } from "./openai/chat/completions/ChatProxyHandler.ts"
 import { modelDetail, models } from "./openai/models.ts"
@@ -15,14 +17,12 @@ const app: IttyRouterType = Router<IRequest>({
       req.logger.warn(`--> ${req.method} ${req.url}`)
     },
   ],
-  catch: error,
   finally: [
     corsify,
     (_, req) => {
       req.logger?.warn(`<-- ${req.method} ${req.url}`)
       // return resp
     },
-    json,
   ],
 })
 
diff --git a/src/gemini-api-client/gemini-api-client.ts b/src/gemini-api-client/gemini-api-client.ts
index 77016c3..9198cca 100644
--- a/src/gemini-api-client/gemini-api-client.ts
+++ b/src/gemini-api-client/gemini-api-client.ts
@@ -1,20 +1,28 @@
+import { EventSourceParserStream } from "eventsource-parser/stream"
 import type { ApiParam, GeminiModel } from "../utils.ts"
 import { GoogleGenerativeAIError } from "./errors.ts"
 import { addHelpers } from "./response-helper.ts"
 import type { GenerateContentRequest, GenerateContentResponse, GenerateContentResult, RequestOptions } from "./types.ts"
 
-export async function generateContent(
+export async function* generateContent(
   apiParam: ApiParam,
   model: GeminiModel,
   params: GenerateContentRequest,
   requestOptions?: RequestOptions,
-): Promise<GenerateContentResult> {
-  const url = new RequestUrl(model, Task.GENERATE_CONTENT, false, apiParam)
+): AsyncGenerator<GenerateContentResult> {
+  const url = new RequestUrl(model, Task.STREAM_GENERATE_CONTENT, true, apiParam)
   const response = await makeRequest(url, JSON.stringify(params), requestOptions)
-  const responseJson: GenerateContentResponse = await response.json()
-  const enhancedResponse = addHelpers(responseJson)
-  return {
-    response: enhancedResponse,
+  const body = response.body
+  if (body == null) {
+    return
+  }
+
+  for await (const event of body.pipeThrough(new TextDecoderStream()).pipeThrough(new EventSourceParserStream())) {
+    const responseJson: GenerateContentResponse = JSON.parse(event.data)
+    const enhancedResponse = addHelpers(responseJson)
+    yield {
+      response: enhancedResponse,
+    }
   }
 }
 export enum TaskType {
@@ -38,12 +46,13 @@ async function makeRequest(url: RequestUrl, body: string, requestOptions?: Reque
       body,
     })
     if (!response.ok) {
+      console.error(response)
       let message = ""
       try {
-        const json = await response.json()
-        message = json.error.message
-        if (json.error.details) {
-          message += ` ${JSON.stringify(json.error.details)}`
+        const errResp = await response.json()
+        message = errResp.error.message
+        if (errResp.error.details) {
+          message += ` ${JSON.stringify(errResp.error.details)}`
         }
       } catch (_e) {
         // ignored
@@ -51,6 +60,7 @@ async function makeRequest(url: RequestUrl, body: string, requestOptions?: Reque
       throw new Error(`[${response.status} ${response.statusText}] ${message}`)
     }
   } catch (e) {
+    console.log(e)
     const err = new GoogleGenerativeAIError(`Error fetching from google -> ${e.message}`)
     err.stack = e.stack
     throw err
diff --git a/src/generated-types/gemini-types.ts b/src/generated-types/gemini-types.ts
index 9b39bb6..0953789 100644
--- a/src/generated-types/gemini-types.ts
+++ b/src/generated-types/gemini-types.ts
@@ -1301,7 +1301,7 @@ export interface components {
       supportedGenerationMethods?: string[];
       /**
        * Format: float
-       * @description Controls the randomness of the output. Values can range over `[0.0,1.0]`, inclusive. A value closer to `1.0` will produce responses that are more varied, while a value closer to `0.0` will typically result in less surprising responses from the model. This value specifies default to be used by the backend while making the call to the model.
+       * @description Controls the randomness of the output. Values can range over `[0.0,2.0]`, inclusive. A higher value will produce responses that are more varied, while a value closer to `0.0` will typically result in less surprising responses from the model. This value specifies default to be used by the backend while making the call to the model.
        */
       temperature?: number;
       /**
@@ -1691,7 +1691,7 @@ export interface components {
       candidatesTokenCount?: number;
       /**
        * Format: int32
-       * @description Number of tokens in the prompt.
+       * @description Number of tokens in the prompt. When cached_content is set, this is still the total effective prompt size. I.e. this includes the number of tokens in the cached content.
        */
       promptTokenCount?: number;
       /**
diff --git a/src/generated-types/openai-types.ts b/src/generated-types/openai-types.ts
index 2467720..10dbfad 100644
--- a/src/generated-types/openai-types.ts
+++ b/src/generated-types/openai-types.ts
@@ -54,9 +54,9 @@ export interface paths {
      *
      * The Assistants API supports files up to 2 million tokens and of specific file types. See the [Assistants Tools guide](/docs/assistants/tools) for details.
      *
-     * The Fine-tuning API only supports `.jsonl` files.
+     * The Fine-tuning API only supports `.jsonl` files. The input also has certain required formats for fine-tuning [chat](/docs/api-reference/fine-tuning/chat-input) or [completions](/docs/api-reference/fine-tuning/completions-input) models.
      *
-     * The Batch API only supports `.jsonl` files up to 100 MB in size.
+     * The Batch API only supports `.jsonl` files up to 100 MB in size. The input also has a specific required [format](/docs/api-reference/batch/request-input).
      *
      * Please [contact us](https://help.openai.com/) if you need to increase these storage limits.
      */
@@ -243,7 +243,7 @@ export interface paths {
     get: operations["retrieveBatch"];
   };
   "/batches/{batch_id}/cancel": {
-    /** Cancels an in-progress batch. */
+    /** Cancels an in-progress batch. The batch will be in status `cancelling` for up to 10 minutes, before changing to `cancelled`, where it will have partial results (if any) available in the output file. */
     post: operations["cancelBatch"];
   };
 }
@@ -539,7 +539,33 @@ export interface components {
         /** @description The name of the function to call. */
         name: string;
         [key: string]: unknown;
-      };
+      } | null;
+      [key: string]: unknown;
+    };
+    /** Assistant message */
+    FineTuneChatCompletionRequestAssistantMessage: {
+      /** @description The contents of the assistant message. Required unless `function_call` is specified. */
+      content?: string | null;
+      /**
+       * @description The role of the messages author, in this case `assistant`.
+       * @enum {string}
+       */
+      role: "assistant";
+      /** @description An optional name for the participant. Provides the model information to differentiate between participants of the same role. */
+      name?: string;
+      /** @description The name and arguments of a function that should be called, as generated by the model. */
+      function_call?: {
+        /** @description The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function. */
+        arguments: string;
+        /** @description The name of the function to call. */
+        name: string;
+        [key: string]: unknown;
+      } | null;
+      /**
+       * @description Controls whether the assistant message is trained against (0 or 1)
+       * @enum {integer}
+       */
+      weight?: 0 | 1;
       [key: string]: unknown;
     };
     /** Tool message */
@@ -571,8 +597,9 @@ export interface components {
       name: string;
       [key: string]: unknown;
     };
+    FineTuneChatCompletionRequestFunctionMessage: Record<string, never> & components["schemas"]["ChatCompletionRequestFunctionMessage"];
     /**
-     * @description The parameters the functions accepts, described as a JSON Schema object. See the [guide](/docs/guides/text-generation/function-calling) for examples, and the [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for documentation about the format.
+     * @description The parameters the functions accepts, described as a JSON Schema object. See the [guide](/docs/guides/function-calling) for examples, and the [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for documentation about the format.
      *
      * Omitting `parameters` defines a function with an empty parameter list.
      */
@@ -635,6 +662,11 @@ export interface components {
       };
       [key: string]: unknown;
     };
+    /**
+     * @description Whether to enable [parallel function calling](/docs/guides/function-calling/parallel-function-calling) during tool use.
+     * @default true
+     */
+    ParallelToolCalls: boolean;
     /** @description The tool calls generated by the model, such as function calls. */
     ChatCompletionMessageToolCalls: components["schemas"]["ChatCompletionMessageToolCall"][];
     ChatCompletionMessageToolCall: {
@@ -844,6 +876,7 @@ export interface components {
       /** @description A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for. A max of 128 functions are supported. */
       tools?: components["schemas"]["ChatCompletionTool"][];
       tool_choice?: components["schemas"]["ChatCompletionToolChoiceOption"];
+      parallel_tool_calls?: components["schemas"]["ParallelToolCalls"];
       /**
        * @description A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).
        *
@@ -1294,7 +1327,7 @@ export interface components {
        *
        * @enum {string}
        */
-      purpose: "assistants" | "batch" | "fine-tune";
+      purpose: "assistants" | "batch" | "fine-tune" | "vision";
       [key: string]: unknown;
     };
     DeleteFileResponse: {
@@ -1319,6 +1352,8 @@ export interface components {
        *
        * Your dataset must be formatted as a JSONL file. Additionally, you must upload your file with the purpose `fine-tune`.
        *
+       * The contents of the file should differ depending on if the model uses the [chat](/docs/api-reference/fine-tuning/chat-input) or [completions](/docs/api-reference/fine-tuning/completions-input) format.
+       *
        * See the [fine-tuning guide](/docs/guides/fine-tuning) for more details.
        *
        * @example file-abc123
@@ -1868,6 +1903,21 @@ export interface components {
       object: "fine_tuning.job.checkpoint";
       [key: string]: unknown;
     };
+    /** @description The per-line training example of a fine-tuning input file for chat models */
+    FinetuneChatRequestInput: {
+      messages?: (components["schemas"]["ChatCompletionRequestSystemMessage"] | components["schemas"]["ChatCompletionRequestUserMessage"] | components["schemas"]["FineTuneChatCompletionRequestAssistantMessage"] | components["schemas"]["FineTuneChatCompletionRequestFunctionMessage"])[];
+      /** @description A list of functions the model may generate JSON inputs for. */
+      functions?: components["schemas"]["ChatCompletionFunctions"][];
+      [key: string]: unknown;
+    };
+    /** @description The per-line training example of a fine-tuning input file for completions models */
+    FinetuneCompletionRequestInput: {
+      /** @description The input prompt for this training example. */
+      prompt?: string;
+      /** @description The desired completion for this training example. */
+      completion?: string;
+      [key: string]: unknown;
+    };
     /** @description Usage statistics for the completion request. */
     CompletionUsage: {
       /** @description Number of tokens in the generated completion. */
@@ -2021,6 +2071,33 @@ export interface components {
           vector_stores?: {
               /** @description A list of [file](/docs/api-reference/files) IDs to add to the vector store. There can be a maximum of 10000 files in a vector store. */
               file_ids?: string[];
+              /** @description The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy. */
+              chunking_strategy?: OneOf<[{
+                /**
+                 * @description Always `auto`.
+                 * @enum {string}
+                 */
+                type: "auto";
+                [key: string]: unknown;
+              }, {
+                /**
+                 * @description Always `static`.
+                 * @enum {string}
+                 */
+                type: "static";
+                static: {
+                  /** @description The maximum number of tokens in each chunk. The default value is `800`. The minimum value is `100` and the maximum value is `4096`. */
+                  max_chunk_size_tokens: number;
+                  /**
+                   * @description The number of tokens that overlap between chunks. The default value is `400`.
+                   *
+                   * Note that the overlap must not exceed half of `max_chunk_size_tokens`.
+                   */
+                  chunk_overlap_tokens: number;
+                  [key: string]: unknown;
+                };
+                [key: string]: unknown;
+              }]>;
               /** @description Set of 16 key-value pairs that can be attached to a vector store. This can be useful for storing additional information about the vector store in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long. */
               metadata?: Record<string, never>;
               [key: string]: unknown;
@@ -2134,6 +2211,25 @@ export interface components {
     };
     /** FileSearch tool */
     AssistantToolsFileSearch: {
+      /**
+       * @description The type of tool being defined: `file_search`
+       * @enum {string}
+       */
+      type: "file_search";
+      /** @description Overrides for the file search tool. */
+      file_search?: {
+        /**
+         * @description The maximum number of results the file search tool should output. The default is 20 for gpt-4* models and 5 for gpt-3.5-turbo. This number should be between 1 and 50 inclusive.
+         *
+         * Note that the file search tool may output fewer than `max_num_results` results. See the [file search tool documentation](/docs/assistants/tools/file-search/number-of-chunks-returned) for more information.
+         */
+        max_num_results?: number;
+        [key: string]: unknown;
+      };
+      [key: string]: unknown;
+    };
+    /** FileSearch tool */
+    AssistantToolsFileSearchTypeOnly: {
       /**
        * @description The type of tool being defined: `file_search`
        * @enum {string}
@@ -2277,6 +2373,7 @@ export interface components {
       max_completion_tokens: number | null;
       truncation_strategy: components["schemas"]["TruncationObject"];
       tool_choice: components["schemas"]["AssistantsApiToolChoiceOption"];
+      parallel_tool_calls: components["schemas"]["ParallelToolCalls"];
       response_format: components["schemas"]["AssistantsApiResponseFormatOption"];
       [key: string]: unknown;
     };
@@ -2322,6 +2419,7 @@ export interface components {
       max_completion_tokens?: number | null;
       truncation_strategy?: components["schemas"]["TruncationObject"];
       tool_choice?: components["schemas"]["AssistantsApiToolChoiceOption"];
+      parallel_tool_calls?: components["schemas"]["ParallelToolCalls"];
       response_format?: components["schemas"]["AssistantsApiResponseFormatOption"];
       [key: string]: unknown;
     };
@@ -2432,6 +2530,7 @@ export interface components {
       max_completion_tokens?: number | null;
       truncation_strategy?: components["schemas"]["TruncationObject"];
       tool_choice?: components["schemas"]["AssistantsApiToolChoiceOption"];
+      parallel_tool_calls?: components["schemas"]["ParallelToolCalls"];
       response_format?: components["schemas"]["AssistantsApiResponseFormatOption"];
       [key: string]: unknown;
     };
@@ -2492,6 +2591,33 @@ export interface components {
           vector_stores?: {
               /** @description A list of [file](/docs/api-reference/files) IDs to add to the vector store. There can be a maximum of 10000 files in a vector store. */
               file_ids?: string[];
+              /** @description The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy. */
+              chunking_strategy?: OneOf<[{
+                /**
+                 * @description Always `auto`.
+                 * @enum {string}
+                 */
+                type: "auto";
+                [key: string]: unknown;
+              }, {
+                /**
+                 * @description Always `static`.
+                 * @enum {string}
+                 */
+                type: "static";
+                static: {
+                  /** @description The maximum number of tokens in each chunk. The default value is `800`. The minimum value is `100` and the maximum value is `4096`. */
+                  max_chunk_size_tokens: number;
+                  /**
+                   * @description The number of tokens that overlap between chunks. The default value is `400`.
+                   *
+                   * Note that the overlap must not exceed half of `max_chunk_size_tokens`.
+                   */
+                  chunk_overlap_tokens: number;
+                  [key: string]: unknown;
+                };
+                [key: string]: unknown;
+              }]>;
               /** @description Set of 16 key-value pairs that can be attached to a vector store. This can be useful for storing additional information about the vector store in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long. */
               metadata?: Record<string, never>;
               [key: string]: unknown;
@@ -2596,7 +2722,7 @@ export interface components {
           /** @description The ID of the file to attach to the message. */
           file_id?: string;
           /** @description The tools to add this file to. */
-          tools?: (components["schemas"]["AssistantToolsCode"] | components["schemas"]["AssistantToolsFileSearch"])[];
+          tools?: (components["schemas"]["AssistantToolsCode"] | components["schemas"]["AssistantToolsFileSearchTypeOnly"])[];
           [key: string]: unknown;
         })[]) | null;
       /** @description Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long. */
@@ -2643,7 +2769,7 @@ export interface components {
           /** @description The ID of the file to attach to the message. */
           file_id?: string;
           /** @description The tools to add this file to. */
-          tools?: (components["schemas"]["AssistantToolsCode"] | components["schemas"]["AssistantToolsFileSearch"])[];
+          tools?: (components["schemas"]["AssistantToolsCode"] | components["schemas"]["AssistantToolsFileSearchTypeOnly"])[];
           [key: string]: unknown;
         })[]) | null;
       /** @description Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long. */
@@ -3310,6 +3436,8 @@ export interface components {
       /** @description The name of the vector store. */
       name?: string;
       expires_after?: components["schemas"]["VectorStoreExpirationAfter"];
+      /** @description The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy. Only applicable if `file_ids` is non-empty. */
+      chunking_strategy?: components["schemas"]["AutoChunkingStrategyRequestParam"] | components["schemas"]["StaticChunkingStrategyRequestParam"];
       /** @description Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long. */
       metadata?: Record<string, unknown> | null;
       [key: string]: unknown;
@@ -3375,11 +3503,71 @@ export interface components {
         message: string;
         [key: string]: unknown;
       }) | null;
+      /** @description The strategy used to chunk the file. */
+      chunking_strategy?: components["schemas"]["StaticChunkingStrategyResponseParam"] | components["schemas"]["OtherChunkingStrategyResponseParam"];
+      [key: string]: unknown;
+    };
+    /**
+     * Other Chunking Strategy
+     * @description This is returned when the chunking strategy is unknown. Typically, this is because the file was indexed before the `chunking_strategy` concept was introduced in the API.
+     */
+    OtherChunkingStrategyResponseParam: {
+      /**
+       * @description Always `other`.
+       * @enum {string}
+       */
+      type: "other";
+      [key: string]: unknown;
+    };
+    /** Static Chunking Strategy */
+    StaticChunkingStrategyResponseParam: {
+      /**
+       * @description Always `static`.
+       * @enum {string}
+       */
+      type: "static";
+      static: components["schemas"]["StaticChunkingStrategy"];
+      [key: string]: unknown;
+    };
+    StaticChunkingStrategy: {
+      /** @description The maximum number of tokens in each chunk. The default value is `800`. The minimum value is `100` and the maximum value is `4096`. */
+      max_chunk_size_tokens: number;
+      /**
+       * @description The number of tokens that overlap between chunks. The default value is `400`.
+       *
+       * Note that the overlap must not exceed half of `max_chunk_size_tokens`.
+       */
+      chunk_overlap_tokens: number;
+      [key: string]: unknown;
+    };
+    /**
+     * Auto Chunking Strategy
+     * @description The default strategy. This strategy currently uses a `max_chunk_size_tokens` of `800` and `chunk_overlap_tokens` of `400`.
+     */
+    AutoChunkingStrategyRequestParam: {
+      /**
+       * @description Always `auto`.
+       * @enum {string}
+       */
+      type: "auto";
       [key: string]: unknown;
     };
+    /** Static Chunking Strategy */
+    StaticChunkingStrategyRequestParam: {
+      /**
+       * @description Always `static`.
+       * @enum {string}
+       */
+      type: "static";
+      static: components["schemas"]["StaticChunkingStrategy"];
+      [key: string]: unknown;
+    };
+    /** @description The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy. */
+    ChunkingStrategyRequestParam: components["schemas"]["AutoChunkingStrategyRequestParam"] | components["schemas"]["StaticChunkingStrategyRequestParam"];
     CreateVectorStoreFileRequest: {
       /** @description A [File](/docs/api-reference/files) ID that the vector store should use. Useful for tools like `file_search` that can access files. */
       file_id: string;
+      chunking_strategy?: components["schemas"]["ChunkingStrategyRequestParam"];
       [key: string]: unknown;
     };
     ListVectorStoreFilesResponse: {
@@ -3440,6 +3628,7 @@ export interface components {
     CreateVectorStoreFileBatchRequest: {
       /** @description A list of [File](/docs/api-reference/files) IDs that the vector store should use. Useful for tools like `file_search` that can access files. */
       file_ids: string[];
+      chunking_strategy?: components["schemas"]["ChunkingStrategyRequestParam"];
       [key: string]: unknown;
     };
     /**
@@ -3495,6 +3684,11 @@ export interface components {
       event: "thread.run.completed";
       data: components["schemas"]["RunObject"];
       [key: string]: unknown;
+    } | {
+      /** @enum {string} */
+      event: "thread.run.incomplete";
+      data: components["schemas"]["RunObject"];
+      [key: string]: unknown;
     } | {
       /** @enum {string} */
       event: "thread.run.failed";
@@ -3895,9 +4089,9 @@ export interface operations {
    *
    * The Assistants API supports files up to 2 million tokens and of specific file types. See the [Assistants Tools guide](/docs/assistants/tools) for details.
    *
-   * The Fine-tuning API only supports `.jsonl` files.
+   * The Fine-tuning API only supports `.jsonl` files. The input also has certain required formats for fine-tuning [chat](/docs/api-reference/fine-tuning/chat-input) or [completions](/docs/api-reference/fine-tuning/completions-input) models.
    *
-   * The Batch API only supports `.jsonl` files up to 100 MB in size.
+   * The Batch API only supports `.jsonl` files up to 100 MB in size. The input also has a specific required [format](/docs/api-reference/batch/request-input).
    *
    * Please [contact us](https://help.openai.com/) if you need to increase these storage limits.
    */
@@ -4938,7 +5132,7 @@ export interface operations {
            *
            * See [upload file](/docs/api-reference/files/create) for how to upload a file.
            *
-           * Your input file must be formatted as a [JSONL file](/docs/api-reference/batch/requestInput), and must be uploaded with the purpose `batch`. The file can contain up to 50,000 requests, and can be up to 100 MB in size.
+           * Your input file must be formatted as a [JSONL file](/docs/api-reference/batch/request-input), and must be uploaded with the purpose `batch`. The file can contain up to 50,000 requests, and can be up to 100 MB in size.
            */
           input_file_id: string;
           /**
@@ -4985,7 +5179,7 @@ export interface operations {
       };
     };
   };
-  /** Cancels an in-progress batch. */
+  /** Cancels an in-progress batch. The batch will be in status `cancelling` for up to 10 minutes, before changing to `cancelled`, where it will have partial results (if any) available in the output file. */
   cancelBatch: {
     parameters: {
       path: {
diff --git a/src/itty-router/cors.ts b/src/itty-router/cors.ts
new file mode 100644
index 0000000..24c417a
--- /dev/null
+++ b/src/itty-router/cors.ts
@@ -0,0 +1,73 @@
+import type { IRequest } from "itty-router"
+// see https://github.com/kwhitley/itty-router/issues/244
+export type CorsOptions = {
+  credentials?: true
+  origin?: boolean | string | string[] | RegExp | ((origin: string) => string | undefined)
+  maxAge?: number
+  allowMethods?: string | string[]
+  allowHeaders?: string | string[]
+  exposeHeaders?: string | string[]
+}
+
+export type Preflight = (request: IRequest) => Response | undefined
+export type Corsify = (response: Response, request?: IRequest) => Response
+
+export type CorsPair = {
+  preflight: Preflight
+  corsify: Corsify
+}
+
+// Create CORS function with default options.
+export const cors = (options: CorsOptions = {}) => {
+  // Destructure and set defaults for options.
+  const { origin = "*", credentials = false, allowMethods = "*", allowHeaders, exposeHeaders, maxAge } = options
+
+  const getAccessControlOrigin = (request?: Request): string | undefined => {
+    const requestOrigin = request?.headers.get("origin") // may be null if no request passed
+    if (!requestOrigin) return undefined
+    if (origin === true) return requestOrigin
+    if (origin instanceof RegExp) return origin.test(requestOrigin) ? requestOrigin : undefined
+    if (Array.isArray(origin)) return origin.includes(requestOrigin) ? requestOrigin : undefined
+    if (origin instanceof Function) return origin(requestOrigin)
+    return origin === "*" && credentials ? requestOrigin : (origin as string)
+  }
+
+  const appendHeadersAndReturn = (response: Response, headers: Record<string, string | undefined>): Response => {
+    for (const [key, value] of Object.entries(headers)) {
+      if (value) response.headers.append(key, value)
+    }
+    return response
+  }
+
+  const preflight = (request: Request) => {
+    if (request.method === "OPTIONS") {
+      const response = new Response(null, { status: 204 })
+
+      return appendHeadersAndReturn(response, {
+        "access-control-allow-origin": getAccessControlOrigin(request),
+        // @ts-ignore
+        "access-control-allow-methods": allowMethods?.join?.(",") ?? allowMethods, // include allowed methods
+        // @ts-ignore
+        "access-control-expose-headers": exposeHeaders?.join?.(",") ?? exposeHeaders, // include allowed headers
+        "access-control-allow-headers":
+          // @ts-ignore
+          allowHeaders?.join?.(",") ?? allowHeaders ?? request.headers.get("access-control-request-headers"), // include allowed headers
+        "access-control-max-age": maxAge?.toString(),
+        "access-control-allow-credentials": credentials.toString(),
+      })
+    } // otherwise ignore
+  }
+
+  const corsify = (response: Response, request?: Request) => {
+    // ignore if already has CORS headers
+    if (response?.headers?.get("access-control-allow-origin") || response.status === 101) return response
+
+    return appendHeadersAndReturn(response, {
+      "access-control-allow-origin": getAccessControlOrigin(request),
+      "access-control-allow-credentials": credentials.toString(),
+    })
+  }
+
+  // Return corsify and preflight methods.
+  return { corsify, preflight }
+}
diff --git a/src/openai/chat/completions/ChatProxyHandler.ts b/src/openai/chat/completions/ChatProxyHandler.ts
index 8c07437..b29831a 100644
--- a/src/openai/chat/completions/ChatProxyHandler.ts
+++ b/src/openai/chat/completions/ChatProxyHandler.ts
@@ -3,7 +3,7 @@ import { getToken } from "../../../utils.ts"
 import { nonStreamingChatProxyHandler } from "./NonStreamingChatProxyHandler.ts"
 import { streamingChatProxyHandler } from "./StreamingChatProxyHandler.ts"
 
-export async function chatProxyHandler(rawReq: Request) {
+export async function chatProxyHandler(rawReq: Request): Promise<Response> {
   const req = (await rawReq.json()) as OpenAI.Chat.ChatCompletionCreateParams
   const headers = rawReq.headers
   const apiParam = getToken(headers)
@@ -23,38 +23,36 @@ export async function chatProxyHandler(rawReq: Request) {
   return sseResponse(
     (async function* () {
       for await (const data of respArr) {
-        rawReq.logger?.debug(data)
+        rawReq.logger?.debug("streamingChatProxyHandler", data)
         yield JSON.stringify(data)
       }
       yield "[DONE]"
+      return undefined
     })(),
   )
 }
 
-export function sseResponse(dataStream: AsyncGenerator<string>): Response {
-  const { readable, writable } = new TransformStream<Uint8Array, Uint8Array>()
+const encoder = new TextEncoder()
 
-  const response = new Response(readable, {
+export function sseResponse(dataStream: AsyncGenerator<string, undefined>): Response {
+  const s = new ReadableStream({
+    async pull(controller) {
+      const { value, done } = await dataStream.next()
+      if (done) {
+        controller.close()
+      } else {
+        controller.enqueue(encoder.encode(toSseMsg({ data: value })))
+      }
+    },
+  })
+
+  const response = new Response(s, {
     status: 200,
     headers: new Headers({
       "Content-Type": "text/event-stream",
     }),
   })
 
-  const encoder = new TextEncoder()
-
-  async function writer(data: string) {
-    const w = writable.getWriter()
-    await w.write(encoder.encode(data))
-    w.releaseLock()
-  }
-  ;(async () => {
-    for await (const data of dataStream) {
-      await writer(toSseMsg({ data }))
-    }
-    await writable.close()
-  })()
-
   return response
 }
 
diff --git a/src/openai/chat/completions/NonStreamingChatProxyHandler.ts b/src/openai/chat/completions/NonStreamingChatProxyHandler.ts
index a8940f0..1e1b683 100644
--- a/src/openai/chat/completions/NonStreamingChatProxyHandler.ts
+++ b/src/openai/chat/completions/NonStreamingChatProxyHandler.ts
@@ -10,14 +10,25 @@ export async function nonStreamingChatProxyHandler(
   log?: Logger,
 ) {
   const [model, geminiReq] = genModel(req)
-  const geminiResp: string | FunctionCall = await generateContent(apiParam, model, geminiReq)
-    .then((it) => it.response.result())
-    .catch((err) => {
-      // 出现异常时打印请求参数和响应，以便调试
-      log?.error(req)
-      log?.error(err?.message ?? err.toString())
-      return err?.message ?? err.toString()
-    })
+  let geminiResp: string | FunctionCall = ""
+
+  try {
+    for await (const it of generateContent(apiParam, model, geminiReq)) {
+      const data = it.response.result()
+      if (typeof data === "string") {
+        geminiResp += data
+      } else {
+        geminiResp = data
+        break
+      }
+    }
+  } catch (err) {
+    // 出现异常时打印请求参数和响应，以便调试
+    log?.error(req)
+    log?.error(err?.message ?? err.toString())
+    geminiResp = err?.message ?? err.toString()
+  }
+
   log?.debug(req)
   log?.debug(geminiResp)
 
diff --git a/src/openai/chat/completions/StreamingChatProxyHandler.ts b/src/openai/chat/completions/StreamingChatProxyHandler.ts
index 162f2a8..8aa36cc 100644
--- a/src/openai/chat/completions/StreamingChatProxyHandler.ts
+++ b/src/openai/chat/completions/StreamingChatProxyHandler.ts
@@ -5,9 +5,17 @@ import { type ApiParam, genModel } from "../../../utils.ts"
 
 export async function* streamingChatProxyHandler(req: OpenAI.Chat.ChatCompletionCreateParams, apiParam: ApiParam) {
   const [model, geminiReq] = genModel(req)
-  const geminiResp: string | FunctionCall = await generateContent(apiParam, model, geminiReq)
-    .then((it) => it.response.result())
-    .catch((e: Error) => e.message ?? e?.toString())
+
+  try {
+    for await (const it of generateContent(apiParam, model, geminiReq)) {
+      const data = it.response.result()
+      yield genOpenAiResp(data, false)
+    }
+  } catch (error) {
+    yield genOpenAiResp(error?.message ?? error.toString(), true)
+  }
+  yield genOpenAiResp("", true)
+  return undefined
 
   function genOpenAiResp(content: string | FunctionCall, stop: boolean) {
     if (typeof content === "string") {
@@ -40,7 +48,4 @@ export async function* streamingChatProxyHandler(req: OpenAI.Chat.ChatCompletion
       ],
     } satisfies OpenAI.Chat.ChatCompletionChunk
   }
-
-  yield genOpenAiResp(geminiResp, false)
-  yield genOpenAiResp("", true)
 }
diff --git a/test/chat-completion_test.ts b/test/chat-completion_test.ts
index 096bb95..b47d5f8 100644
--- a/test/chat-completion_test.ts
+++ b/test/chat-completion_test.ts
@@ -1,7 +1,7 @@
 import { assertFalse } from "jsr:@std/assert"
 import { expect } from "jsr:@std/expect"
 import { afterEach, beforeEach, describe, it } from "jsr:@std/testing/bdd"
-import { type ParseEvent, createParser } from "https://esm.sh/eventsource-parser@1.1.2"
+import { type ParseEvent, createParser } from "eventsource-parser"
 import { app } from "../src/app.ts"
 import type { OpenAI } from "../src/types.ts"
 import { MockFetch } from "./mock-fetch.ts"
@@ -27,6 +27,23 @@ describe("openai to gemini test", () => {
               },
             }),
         )
+
+        fetchMocker.mock(
+          (req) =>
+            req.url.includes(`generativelanguage.googleapis.com/v1beta/models/${geminiModel}:streamGenerateContent`),
+          () =>
+            new Response(
+              `data: ${JSON.stringify(gemini_ok_resp)}
+
+`,
+              {
+                status: 200,
+                headers: {
+                  "Content-Type": "application/json",
+                },
+              },
+            ),
+        )
       })
 
       afterEach(() => {