fix(google): handle thoughtSignature-only parts to prevent Gemini stream hang

Gemini 3.1 Pro Preview may emit parts with only thoughtSignature and no text content, causing the stream to stall. Emit a thinking_signature event to keep the stream active, and start a thinking block when these parts arrive before any text. Fixes #76071
2026-05-06 06:00:43 +00:00 · 2026-05-02 21:20:23 +08:00
parent f7ed29e118
commit ea3416d8b5
2 changed files with 139 additions and 0 deletions
--- a/extensions/google/transport-stream.test.ts
+++ b/extensions/google/transport-stream.test.ts
@@ -767,4 +767,104 @@ describe("google transport stream", () => {
      thinkingConfig: { includeThoughts: true, thinkingBudget: expectedBudget },
    });
  });
+
+  it("emits a thinking_signature event for thoughtSignature-only parts to keep the stream active", async () => {
+    guardedFetchMock.mockResolvedValueOnce(
+      buildSseResponse([
+        {
+          candidates: [
+            {
+              content: {
+                parts: [
+                  { thought: true, text: "draft", thoughtSignature: "sig_1" },
+                  { thoughtSignature: "sig_2" },
+                  { text: "answer" },
+                ],
+              },
+              finishReason: "STOP",
+            },
+          ],
+          usageMetadata: {
+            promptTokenCount: 10,
+            candidatesTokenCount: 5,
+            thoughtsTokenCount: 3,
+            totalTokenCount: 18,
+          },
+        },
+      ]),
+    );
+
+    const model = buildGeminiModel({
+      id: "gemini-3.1-pro-preview",
+      name: "Gemini 3.1 Pro Preview",
+    });
+
+    const streamFn = createGoogleGenerativeAiTransportStreamFn();
+    const stream = await Promise.resolve(
+      streamFn(
+        model,
+        {
+          systemPrompt: "You are a helpful assistant.",
+          messages: [{ role: "user", content: "hello", timestamp: 0 }],
+        } as never,
+        { reasoning: "high" },
+      ),
+    );
+    const result = await stream.result();
+
+    expect(result.content).toEqual([
+      { type: "thinking", thinking: "draft", thinkingSignature: "sig_2" },
+      { type: "text", text: "answer" },
+    ]);
+  });
+
+  it("starts a thinking block for thoughtSignature-only parts that arrive before any text", async () => {
+    guardedFetchMock.mockResolvedValueOnce(
+      buildSseResponse([
+        {
+          candidates: [
+            {
+              content: {
+                parts: [
+                  { thoughtSignature: "sig_1" },
+                  { thought: true, text: "draft" },
+                  { text: "answer" },
+                ],
+              },
+              finishReason: "STOP",
+            },
+          ],
+          usageMetadata: {
+            promptTokenCount: 10,
+            candidatesTokenCount: 5,
+            thoughtsTokenCount: 3,
+            totalTokenCount: 18,
+          },
+        },
+      ]),
+    );
+
+    const model = buildGeminiModel({
+      id: "gemini-3.1-pro-preview",
+      name: "Gemini 3.1 Pro Preview",
+    });
+
+    const streamFn = createGoogleGenerativeAiTransportStreamFn();
+    const stream = await Promise.resolve(
+      streamFn(
+        model,
+        {
+          systemPrompt: "You are a helpful assistant.",
+          messages: [{ role: "user", content: "hello", timestamp: 0 }],
+        } as never,
+        { reasoning: "high" },
+      ),
+    );
+    const result = await stream.result();
+
+    expect(result.content).toEqual([
+      { type: "thinking", thinking: "draft", thinkingSignature: "sig_1" },
+      { type: "text", text: "answer" },
+    ]);
+  });
 });
--- a/extensions/google/transport-stream.ts
+++ b/extensions/google/transport-stream.ts
@@ -894,6 +894,45 @@ function createGoogleTransportStreamFn(kind: GoogleTransportApi): StreamFn {
                  partial: output as never,
                });
              }
+              // Gemini 3+ models can emit thoughtSignature-only parts during the
+              // thinking phase before user-visible text arrives. Emit a stream event
+              // so that idle-timeout wrappers detect model activity and don't kill
+              // the stream prematurely.
+              if (
+                typeof part.thoughtSignature === "string" &&
+                part.thoughtSignature.length > 0 &&
+                typeof part.text !== "string" &&
+                !part.functionCall
+              ) {
+                if (
+                  currentBlockIndex < 0 ||
+                  output.content[currentBlockIndex]?.type !== "thinking"
+                ) {
+                  if (currentBlockIndex >= 0) {
+                    pushTextBlockEnd(stream, output, currentBlockIndex);
+                  }
+                  output.content.push({ type: "thinking", thinking: "" });
+                  currentBlockIndex = output.content.length - 1;
+                  stream.push({
+                    type: "thinking_start",
+                    contentIndex: currentBlockIndex,
+                    partial: output as never,
+                  });
+                }
+                const activeBlock = output.content[currentBlockIndex];
+                if (activeBlock?.type === "thinking") {
+                  activeBlock.thinkingSignature = retainThoughtSignature(
+                    activeBlock.thinkingSignature,
+                    part.thoughtSignature,
+                  );
+                }
+                stream.push({
+                  type: "thinking_signature",
+                  contentIndex: currentBlockIndex,
+                  signature: part.thoughtSignature,
+                  partial: output as never,
+                });
+              }
            }
          }
          if (typeof candidate?.finishReason === "string") {