fix(google): handle thoughtSignature-only parts to prevent Gemini stream hang

Gemini 3.1 Pro Preview may emit parts with only thoughtSignature
and no text content, causing the stream to stall. Emit a
thinking_signature event to keep the stream active, and start
a thinking block when these parts arrive before any text.

Fixes #76071
This commit is contained in:
zhang-guiping
2026-05-02 21:20:23 +08:00
committed by Ayaan Zaidi
parent f7ed29e118
commit ea3416d8b5
2 changed files with 139 additions and 0 deletions

View File

@@ -767,4 +767,104 @@ describe("google transport stream", () => {
thinkingConfig: { includeThoughts: true, thinkingBudget: expectedBudget },
});
});
it("emits a thinking_signature event for thoughtSignature-only parts to keep the stream active", async () => {
guardedFetchMock.mockResolvedValueOnce(
buildSseResponse([
{
candidates: [
{
content: {
parts: [
{ thought: true, text: "draft", thoughtSignature: "sig_1" },
{ thoughtSignature: "sig_2" },
{ text: "answer" },
],
},
finishReason: "STOP",
},
],
usageMetadata: {
promptTokenCount: 10,
candidatesTokenCount: 5,
thoughtsTokenCount: 3,
totalTokenCount: 18,
},
},
]),
);
const model = buildGeminiModel({
id: "gemini-3.1-pro-preview",
name: "Gemini 3.1 Pro Preview",
});
const streamFn = createGoogleGenerativeAiTransportStreamFn();
const stream = await Promise.resolve(
streamFn(
model,
{
systemPrompt: "You are a helpful assistant.",
messages: [{ role: "user", content: "hello", timestamp: 0 }],
} as never,
{ reasoning: "high" },
),
);
const result = await stream.result();
expect(result.content).toEqual([
{ type: "thinking", thinking: "draft", thinkingSignature: "sig_2" },
{ type: "text", text: "answer" },
]);
});
it("starts a thinking block for thoughtSignature-only parts that arrive before any text", async () => {
guardedFetchMock.mockResolvedValueOnce(
buildSseResponse([
{
candidates: [
{
content: {
parts: [
{ thoughtSignature: "sig_1" },
{ thought: true, text: "draft" },
{ text: "answer" },
],
},
finishReason: "STOP",
},
],
usageMetadata: {
promptTokenCount: 10,
candidatesTokenCount: 5,
thoughtsTokenCount: 3,
totalTokenCount: 18,
},
},
]),
);
const model = buildGeminiModel({
id: "gemini-3.1-pro-preview",
name: "Gemini 3.1 Pro Preview",
});
const streamFn = createGoogleGenerativeAiTransportStreamFn();
const stream = await Promise.resolve(
streamFn(
model,
{
systemPrompt: "You are a helpful assistant.",
messages: [{ role: "user", content: "hello", timestamp: 0 }],
} as never,
{ reasoning: "high" },
),
);
const result = await stream.result();
expect(result.content).toEqual([
{ type: "thinking", thinking: "draft", thinkingSignature: "sig_1" },
{ type: "text", text: "answer" },
]);
});
});

View File

@@ -894,6 +894,45 @@ function createGoogleTransportStreamFn(kind: GoogleTransportApi): StreamFn {
partial: output as never,
});
}
// Gemini 3+ models can emit thoughtSignature-only parts during the
// thinking phase before user-visible text arrives. Emit a stream event
// so that idle-timeout wrappers detect model activity and don't kill
// the stream prematurely.
if (
typeof part.thoughtSignature === "string" &&
part.thoughtSignature.length > 0 &&
typeof part.text !== "string" &&
!part.functionCall
) {
if (
currentBlockIndex < 0 ||
output.content[currentBlockIndex]?.type !== "thinking"
) {
if (currentBlockIndex >= 0) {
pushTextBlockEnd(stream, output, currentBlockIndex);
}
output.content.push({ type: "thinking", thinking: "" });
currentBlockIndex = output.content.length - 1;
stream.push({
type: "thinking_start",
contentIndex: currentBlockIndex,
partial: output as never,
});
}
const activeBlock = output.content[currentBlockIndex];
if (activeBlock?.type === "thinking") {
activeBlock.thinkingSignature = retainThoughtSignature(
activeBlock.thinkingSignature,
part.thoughtSignature,
);
}
stream.push({
type: "thinking_signature",
contentIndex: currentBlockIndex,
signature: part.thoughtSignature,
partial: output as never,
});
}
}
}
if (typeof candidate?.finishReason === "string") {