mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-26 15:59:31 +00:00
fix(openai-completions): seal native reasoning before the answer under /reasoning on (#95283)
* fix(openai-completions): seal native reasoning before the answer deepseek-style providers stream reasoning via reasoning_content deltas then switch to the answer via content deltas with no boundary event. thinking_end was only emitted by the end-of-stream finishBlock loop, so it landed after the answer's text_delta and channels merged the answer into the reasoning block. Seal the open native thinking block when visible text (or a tool call) begins so thinking_end precedes the answer; tag-based <think> reasoning is unaffected (closed by the partitioner). finishBlock is now idempotent so the end-of-stream loop never re-emits thinking_end. * fix(openai-completions): preserve co-streamed reasoning * fix(openai-completions): order co-streamed reasoning * fix(openai-completions): seal co-streamed reasoning --------- Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
This commit is contained in:
@@ -799,6 +799,123 @@ describe("openai-completions stop-reason tool-call guard", () => {
|
||||
expect(result.content.some((block) => block.type === "thinking")).toBe(false);
|
||||
});
|
||||
|
||||
it("seals the native reasoning block before the answer text begins", async () => {
|
||||
// deepseek streams reasoning_content, then switches to content with no
|
||||
// boundary event; thinking_end must precede the answer so channels do not
|
||||
// merge the answer into the reasoning block.
|
||||
mockChunksRef.chunks = [
|
||||
{
|
||||
id: "chatcmpl-test",
|
||||
choices: [{ index: 0, delta: { reasoning_content: "Let me think." } }],
|
||||
},
|
||||
{
|
||||
id: "chatcmpl-test",
|
||||
choices: [{ index: 0, delta: { reasoning_content: " Still thinking." } }],
|
||||
},
|
||||
makeTextChunk("The answer"),
|
||||
makeTextChunk(" is 42."),
|
||||
makeFinishChunk("stop"),
|
||||
];
|
||||
|
||||
const stream = streamOpenAICompletions(reasoningModel, context, {
|
||||
apiKey: "sk-test",
|
||||
reasoningEffort: "medium",
|
||||
});
|
||||
const eventTypes: string[] = [];
|
||||
for await (const event of stream as AsyncIterable<{ type: string }>) {
|
||||
eventTypes.push(event.type);
|
||||
}
|
||||
const result = await stream.result();
|
||||
|
||||
const thinkingEndIndex = eventTypes.indexOf("thinking_end");
|
||||
const textStartIndex = eventTypes.indexOf("text_start");
|
||||
const firstTextDeltaIndex = eventTypes.indexOf("text_delta");
|
||||
expect(thinkingEndIndex).toBeGreaterThanOrEqual(0);
|
||||
expect(textStartIndex).toBeGreaterThanOrEqual(0);
|
||||
expect(thinkingEndIndex).toBeLessThan(textStartIndex);
|
||||
expect(thinkingEndIndex).toBeLessThan(firstTextDeltaIndex);
|
||||
// thinking_end is emitted exactly once even though the block is also
|
||||
// visited by the end-of-stream finish loop.
|
||||
expect(eventTypes.filter((type) => type === "thinking_end")).toHaveLength(1);
|
||||
|
||||
expect(result.content).toContainEqual({
|
||||
type: "thinking",
|
||||
thinking: "Let me think. Still thinking.",
|
||||
thinkingSignature: "reasoning_content",
|
||||
});
|
||||
expect(result.content).toContainEqual({ type: "text", text: "The answer is 42." });
|
||||
});
|
||||
|
||||
it("seals the native reasoning block before a following tool call", async () => {
|
||||
mockChunksRef.chunks = [
|
||||
{
|
||||
id: "chatcmpl-test",
|
||||
choices: [{ index: 0, delta: { reasoning_content: "I should call a tool." } }],
|
||||
},
|
||||
makeToolCallChunk("call_1", "bash", '{"cmd":"ls"}'),
|
||||
makeFinishChunk("tool_calls"),
|
||||
];
|
||||
|
||||
const stream = streamOpenAICompletions(reasoningModel, context, {
|
||||
apiKey: "sk-test",
|
||||
reasoningEffort: "medium",
|
||||
});
|
||||
const eventTypes: string[] = [];
|
||||
for await (const event of stream as AsyncIterable<{ type: string }>) {
|
||||
eventTypes.push(event.type);
|
||||
}
|
||||
await stream.result();
|
||||
|
||||
const thinkingEndIndex = eventTypes.indexOf("thinking_end");
|
||||
const toolCallStartIndex = eventTypes.indexOf("toolcall_start");
|
||||
expect(thinkingEndIndex).toBeGreaterThanOrEqual(0);
|
||||
expect(toolCallStartIndex).toBeGreaterThanOrEqual(0);
|
||||
expect(thinkingEndIndex).toBeLessThan(toolCallStartIndex);
|
||||
expect(eventTypes.filter((type) => type === "thinking_end")).toHaveLength(1);
|
||||
});
|
||||
|
||||
it("keeps one native reasoning block when content and reasoning co-occur", async () => {
|
||||
mockChunksRef.chunks = [
|
||||
{
|
||||
id: "chatcmpl-test",
|
||||
choices: [{ index: 0, delta: { reasoning_content: "First thought." } }],
|
||||
},
|
||||
{
|
||||
id: "chatcmpl-test",
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
delta: {
|
||||
content: "Visible text that shares the reasoning chunk.",
|
||||
reasoning_content: " Second thought.",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
makeTextChunk(" Final answer."),
|
||||
makeFinishChunk("stop"),
|
||||
];
|
||||
|
||||
const stream = streamOpenAICompletions(reasoningModel, context, {
|
||||
apiKey: "sk-test",
|
||||
reasoningEffort: "medium",
|
||||
});
|
||||
const eventTypes: string[] = [];
|
||||
for await (const event of stream as AsyncIterable<{ type: string }>) {
|
||||
eventTypes.push(event.type);
|
||||
}
|
||||
const result = await stream.result();
|
||||
|
||||
expect(eventTypes.filter((type) => type === "thinking_start")).toHaveLength(1);
|
||||
expect(eventTypes.filter((type) => type === "thinking_end")).toHaveLength(1);
|
||||
expect(eventTypes.indexOf("thinking_end")).toBeLessThan(eventTypes.indexOf("text_start"));
|
||||
expect(result.content).toContainEqual({
|
||||
type: "thinking",
|
||||
thinking: "First thought. Second thought.",
|
||||
thinkingSignature: "reasoning_content",
|
||||
});
|
||||
});
|
||||
|
||||
it("promotes silent tool_calls with finish_reason stop to toolUse", async () => {
|
||||
mockChunksRef.chunks = [
|
||||
makeToolCallChunk("call_1", "bash", '{"cmd":"ls"}'),
|
||||
|
||||
@@ -187,12 +187,17 @@ export const streamOpenAICompletions: StreamFunction<
|
||||
const toolCallBlocksByIndex = new Map<number, StreamingToolCallBlock>();
|
||||
const toolCallBlocksById = new Map<string, StreamingToolCallBlock>();
|
||||
const blocks = output.content as StreamingBlock[];
|
||||
// A block can be finished mid-stream (native reasoning sealed at the
|
||||
// text-lane transition) and again by the end-of-stream loop; guard so its
|
||||
// *_end event is emitted exactly once.
|
||||
const finishedBlocks = new Set<StreamingBlock>();
|
||||
const getContentIndex = (block: StreamingBlock) => blocks.indexOf(block);
|
||||
const finishBlock = (block: StreamingBlock) => {
|
||||
const contentIndex = getContentIndex(block);
|
||||
if (contentIndex === -1) {
|
||||
if (contentIndex === -1 || finishedBlocks.has(block)) {
|
||||
return;
|
||||
}
|
||||
finishedBlocks.add(block);
|
||||
if (block.type === "text") {
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
@@ -249,7 +254,19 @@ export const streamOpenAICompletions: StreamFunction<
|
||||
}
|
||||
return thinkingBlock;
|
||||
};
|
||||
// Native-thinking providers (e.g. deepseek `reasoning_content`) stream the
|
||||
// reasoning lane, then switch to the answer via `content` with no boundary
|
||||
// event. Seal the open thought when visible text begins so `thinking_end`
|
||||
// precedes the answer; tag-based <think> reasoning has no native thinking
|
||||
// block (it is closed by the partitioner), so this is a no-op there.
|
||||
const sealNativeReasoningBeforeText = () => {
|
||||
if (thinkingBlock && !reasoningTagTextPartitioner.isInsideReasoning()) {
|
||||
finishBlock(thinkingBlock);
|
||||
thinkingBlock = null;
|
||||
}
|
||||
};
|
||||
const appendTextDelta = (delta: string) => {
|
||||
sealNativeReasoningBeforeText();
|
||||
const block = ensureTextBlock();
|
||||
block.text += delta;
|
||||
stream.push({
|
||||
@@ -382,14 +399,6 @@ export const streamOpenAICompletions: StreamFunction<
|
||||
if (foundReasoningField) {
|
||||
reasoningTagTextPartitioner.markStrict();
|
||||
}
|
||||
if (
|
||||
choice.delta.content !== null &&
|
||||
choice.delta.content !== undefined &&
|
||||
choice.delta.content.length > 0
|
||||
) {
|
||||
appendPartitionedContent(choice.delta.content, Boolean(foundReasoningField));
|
||||
}
|
||||
|
||||
if (shouldEmitReasoning && foundReasoningField) {
|
||||
const delta = deltaFields[foundReasoningField];
|
||||
if (typeof delta === "string" && delta.length > 0) {
|
||||
@@ -400,9 +409,19 @@ export const streamOpenAICompletions: StreamFunction<
|
||||
appendThinkingDelta(thinkingSignature, delta);
|
||||
}
|
||||
}
|
||||
if (
|
||||
choice.delta.content !== null &&
|
||||
choice.delta.content !== undefined &&
|
||||
choice.delta.content.length > 0
|
||||
) {
|
||||
appendPartitionedContent(choice.delta.content, Boolean(foundReasoningField));
|
||||
}
|
||||
|
||||
if (choice?.delta?.tool_calls) {
|
||||
flushPartitionedContent();
|
||||
// The tool-call lane is also a reasoning boundary; seal the thought
|
||||
// before toolcall_start so thinking_end never trails the action.
|
||||
sealNativeReasoningBeforeText();
|
||||
for (const toolCall of choice.delta.tool_calls) {
|
||||
const block = ensureToolCallBlock(toolCall);
|
||||
if (!block.id && toolCall.id) {
|
||||
|
||||
Reference in New Issue
Block a user