diff --git a/src/llm/providers/openai-completions.test.ts b/src/llm/providers/openai-completions.test.ts index e4fe42a416f..cf7fb88e40e 100644 --- a/src/llm/providers/openai-completions.test.ts +++ b/src/llm/providers/openai-completions.test.ts @@ -799,6 +799,123 @@ describe("openai-completions stop-reason tool-call guard", () => { expect(result.content.some((block) => block.type === "thinking")).toBe(false); }); + it("seals the native reasoning block before the answer text begins", async () => { + // deepseek streams reasoning_content, then switches to content with no + // boundary event; thinking_end must precede the answer so channels do not + // merge the answer into the reasoning block. + mockChunksRef.chunks = [ + { + id: "chatcmpl-test", + choices: [{ index: 0, delta: { reasoning_content: "Let me think." } }], + }, + { + id: "chatcmpl-test", + choices: [{ index: 0, delta: { reasoning_content: " Still thinking." } }], + }, + makeTextChunk("The answer"), + makeTextChunk(" is 42."), + makeFinishChunk("stop"), + ]; + + const stream = streamOpenAICompletions(reasoningModel, context, { + apiKey: "sk-test", + reasoningEffort: "medium", + }); + const eventTypes: string[] = []; + for await (const event of stream as AsyncIterable<{ type: string }>) { + eventTypes.push(event.type); + } + const result = await stream.result(); + + const thinkingEndIndex = eventTypes.indexOf("thinking_end"); + const textStartIndex = eventTypes.indexOf("text_start"); + const firstTextDeltaIndex = eventTypes.indexOf("text_delta"); + expect(thinkingEndIndex).toBeGreaterThanOrEqual(0); + expect(textStartIndex).toBeGreaterThanOrEqual(0); + expect(thinkingEndIndex).toBeLessThan(textStartIndex); + expect(thinkingEndIndex).toBeLessThan(firstTextDeltaIndex); + // thinking_end is emitted exactly once even though the block is also + // visited by the end-of-stream finish loop. + expect(eventTypes.filter((type) => type === "thinking_end")).toHaveLength(1); + + expect(result.content).toContainEqual({ + type: "thinking", + thinking: "Let me think. Still thinking.", + thinkingSignature: "reasoning_content", + }); + expect(result.content).toContainEqual({ type: "text", text: "The answer is 42." }); + }); + + it("seals the native reasoning block before a following tool call", async () => { + mockChunksRef.chunks = [ + { + id: "chatcmpl-test", + choices: [{ index: 0, delta: { reasoning_content: "I should call a tool." } }], + }, + makeToolCallChunk("call_1", "bash", '{"cmd":"ls"}'), + makeFinishChunk("tool_calls"), + ]; + + const stream = streamOpenAICompletions(reasoningModel, context, { + apiKey: "sk-test", + reasoningEffort: "medium", + }); + const eventTypes: string[] = []; + for await (const event of stream as AsyncIterable<{ type: string }>) { + eventTypes.push(event.type); + } + await stream.result(); + + const thinkingEndIndex = eventTypes.indexOf("thinking_end"); + const toolCallStartIndex = eventTypes.indexOf("toolcall_start"); + expect(thinkingEndIndex).toBeGreaterThanOrEqual(0); + expect(toolCallStartIndex).toBeGreaterThanOrEqual(0); + expect(thinkingEndIndex).toBeLessThan(toolCallStartIndex); + expect(eventTypes.filter((type) => type === "thinking_end")).toHaveLength(1); + }); + + it("keeps one native reasoning block when content and reasoning co-occur", async () => { + mockChunksRef.chunks = [ + { + id: "chatcmpl-test", + choices: [{ index: 0, delta: { reasoning_content: "First thought." } }], + }, + { + id: "chatcmpl-test", + choices: [ + { + index: 0, + delta: { + content: "Visible text that shares the reasoning chunk.", + reasoning_content: " Second thought.", + }, + }, + ], + }, + makeTextChunk(" Final answer."), + makeFinishChunk("stop"), + ]; + + const stream = streamOpenAICompletions(reasoningModel, context, { + apiKey: "sk-test", + reasoningEffort: "medium", + }); + const eventTypes: string[] = []; + for await (const event of stream as AsyncIterable<{ type: string }>) { + eventTypes.push(event.type); + } + const result = await stream.result(); + + expect(eventTypes.filter((type) => type === "thinking_start")).toHaveLength(1); + expect(eventTypes.filter((type) => type === "thinking_end")).toHaveLength(1); + expect(eventTypes.indexOf("thinking_end")).toBeLessThan(eventTypes.indexOf("text_start")); + expect(result.content).toContainEqual({ + type: "thinking", + thinking: "First thought. Second thought.", + thinkingSignature: "reasoning_content", + }); + }); + it("promotes silent tool_calls with finish_reason stop to toolUse", async () => { mockChunksRef.chunks = [ makeToolCallChunk("call_1", "bash", '{"cmd":"ls"}'), diff --git a/src/llm/providers/openai-completions.ts b/src/llm/providers/openai-completions.ts index b4e0e3ece1c..0adc9fdb7a9 100644 --- a/src/llm/providers/openai-completions.ts +++ b/src/llm/providers/openai-completions.ts @@ -187,12 +187,17 @@ export const streamOpenAICompletions: StreamFunction< const toolCallBlocksByIndex = new Map(); const toolCallBlocksById = new Map(); const blocks = output.content as StreamingBlock[]; + // A block can be finished mid-stream (native reasoning sealed at the + // text-lane transition) and again by the end-of-stream loop; guard so its + // *_end event is emitted exactly once. + const finishedBlocks = new Set(); const getContentIndex = (block: StreamingBlock) => blocks.indexOf(block); const finishBlock = (block: StreamingBlock) => { const contentIndex = getContentIndex(block); - if (contentIndex === -1) { + if (contentIndex === -1 || finishedBlocks.has(block)) { return; } + finishedBlocks.add(block); if (block.type === "text") { stream.push({ type: "text_end", @@ -249,7 +254,19 @@ export const streamOpenAICompletions: StreamFunction< } return thinkingBlock; }; + // Native-thinking providers (e.g. deepseek `reasoning_content`) stream the + // reasoning lane, then switch to the answer via `content` with no boundary + // event. Seal the open thought when visible text begins so `thinking_end` + // precedes the answer; tag-based reasoning has no native thinking + // block (it is closed by the partitioner), so this is a no-op there. + const sealNativeReasoningBeforeText = () => { + if (thinkingBlock && !reasoningTagTextPartitioner.isInsideReasoning()) { + finishBlock(thinkingBlock); + thinkingBlock = null; + } + }; const appendTextDelta = (delta: string) => { + sealNativeReasoningBeforeText(); const block = ensureTextBlock(); block.text += delta; stream.push({ @@ -382,14 +399,6 @@ export const streamOpenAICompletions: StreamFunction< if (foundReasoningField) { reasoningTagTextPartitioner.markStrict(); } - if ( - choice.delta.content !== null && - choice.delta.content !== undefined && - choice.delta.content.length > 0 - ) { - appendPartitionedContent(choice.delta.content, Boolean(foundReasoningField)); - } - if (shouldEmitReasoning && foundReasoningField) { const delta = deltaFields[foundReasoningField]; if (typeof delta === "string" && delta.length > 0) { @@ -400,9 +409,19 @@ export const streamOpenAICompletions: StreamFunction< appendThinkingDelta(thinkingSignature, delta); } } + if ( + choice.delta.content !== null && + choice.delta.content !== undefined && + choice.delta.content.length > 0 + ) { + appendPartitionedContent(choice.delta.content, Boolean(foundReasoningField)); + } if (choice?.delta?.tool_calls) { flushPartitionedContent(); + // The tool-call lane is also a reasoning boundary; seal the thought + // before toolcall_start so thinking_end never trails the action. + sealNativeReasoningBeforeText(); for (const toolCall of choice.delta.tool_calls) { const block = ensureToolCallBlock(toolCall); if (!block.id && toolCall.id) {