mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 06:00:43 +00:00
test: add gpt-5.4 thinking visibility QA
This commit is contained in:
@@ -8,6 +8,10 @@ const QA_REASONING_ONLY_RECOVERY_PROMPT =
|
||||
"Reasoning-only continuation QA check: read QA_KICKOFF_TASK.md, then answer with exactly REASONING-RECOVERED-OK.";
|
||||
const QA_REASONING_ONLY_SIDE_EFFECT_PROMPT =
|
||||
"Reasoning-only after write safety check: write reasoning-only-side-effect.txt, then answer with exactly SIDE-EFFECT-GUARD-OK.";
|
||||
const QA_THINKING_VISIBILITY_OFF_PROMPT =
|
||||
"QA thinking visibility check off: answer exactly THINKING-OFF-OK.";
|
||||
const QA_THINKING_VISIBILITY_MAX_PROMPT =
|
||||
"QA thinking visibility check max: verify 17+24=41 internally, then answer exactly THINKING-MAX-OK.";
|
||||
const QA_EMPTY_RESPONSE_RECOVERY_PROMPT =
|
||||
"Empty response continuation QA check: read QA_KICKOFF_TASK.md, then answer with exactly EMPTY-RECOVERED-OK.";
|
||||
const QA_EMPTY_RESPONSE_EXHAUSTION_PROMPT =
|
||||
@@ -2049,6 +2053,54 @@ describe("qa mock openai server", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
it("scripts the GPT-5.4 thinking visibility switch prompts", async () => {
|
||||
const server = await startMockServer();
|
||||
|
||||
expect(
|
||||
await expectResponsesJson<{
|
||||
output?: Array<{ type?: string; content?: Array<{ text?: string }> }>;
|
||||
}>(server, {
|
||||
stream: false,
|
||||
model: "gpt-5.4",
|
||||
input: [makeUserInput(QA_THINKING_VISIBILITY_OFF_PROMPT)],
|
||||
}),
|
||||
).toMatchObject({
|
||||
output: [
|
||||
{
|
||||
type: "message",
|
||||
content: [{ text: "THINKING-OFF-OK" }],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
expect(
|
||||
await expectResponsesJson<{
|
||||
output?: Array<{
|
||||
type?: string;
|
||||
id?: string;
|
||||
summary?: Array<{ text?: string }>;
|
||||
content?: Array<{ text?: string }>;
|
||||
}>;
|
||||
}>(server, {
|
||||
stream: false,
|
||||
model: "gpt-5.4",
|
||||
input: [makeUserInput(QA_THINKING_VISIBILITY_MAX_PROMPT)],
|
||||
}),
|
||||
).toMatchObject({
|
||||
output: [
|
||||
{
|
||||
type: "reasoning",
|
||||
id: "rs_mock_thinking_visibility_max",
|
||||
summary: [],
|
||||
},
|
||||
{
|
||||
type: "message",
|
||||
content: [{ text: "THINKING-MAX-OK" }],
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it("keeps the reasoning-only side-effect path ready for no-auto-retry QA coverage", async () => {
|
||||
const server = await startMockServer();
|
||||
|
||||
|
||||
@@ -140,6 +140,8 @@ const TINY_PNG_BASE64 =
|
||||
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO7Z0nQAAAAASUVORK5CYII=";
|
||||
const QA_REASONING_ONLY_RECOVERY_PROMPT_RE = /reasoning-only continuation qa check/i;
|
||||
const QA_REASONING_ONLY_SIDE_EFFECT_PROMPT_RE = /reasoning-only after write safety check/i;
|
||||
const QA_THINKING_VISIBILITY_OFF_PROMPT_RE = /qa thinking visibility check off/i;
|
||||
const QA_THINKING_VISIBILITY_MAX_PROMPT_RE = /qa thinking visibility check max/i;
|
||||
const QA_EMPTY_RESPONSE_RECOVERY_PROMPT_RE = /empty response continuation qa check/i;
|
||||
const QA_EMPTY_RESPONSE_EXHAUSTION_PROMPT_RE = /empty response exhaustion qa check/i;
|
||||
const QA_QUIET_STREAMING_PROMPT_RE = /quiet streaming qa check/i;
|
||||
@@ -924,6 +926,61 @@ function buildReasoningOnlyEvents(summaryText: string, id: string): StreamEvent[
|
||||
];
|
||||
}
|
||||
|
||||
function buildReasoningAndAssistantEvents(params: {
|
||||
reasoningId: string;
|
||||
answerText: string;
|
||||
answerId?: string;
|
||||
}): StreamEvent[] {
|
||||
const reasoningItem = {
|
||||
type: "reasoning",
|
||||
id: params.reasoningId,
|
||||
summary: [],
|
||||
} as const;
|
||||
const answerItem = buildAssistantOutputItem({
|
||||
id: params.answerId ?? "msg_mock_reasoned_answer",
|
||||
phase: "final_answer",
|
||||
text: params.answerText,
|
||||
});
|
||||
return [
|
||||
{
|
||||
type: "response.output_item.added",
|
||||
item: {
|
||||
type: "reasoning",
|
||||
id: params.reasoningId,
|
||||
summary: [],
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "response.output_item.done",
|
||||
item: reasoningItem,
|
||||
},
|
||||
{
|
||||
type: "response.output_item.added",
|
||||
item: {
|
||||
type: "message",
|
||||
id: answerItem.id,
|
||||
role: "assistant",
|
||||
phase: "final_answer",
|
||||
content: [],
|
||||
status: "in_progress",
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "response.output_item.done",
|
||||
item: answerItem,
|
||||
},
|
||||
{
|
||||
type: "response.completed",
|
||||
response: {
|
||||
id: `resp_${params.reasoningId}`,
|
||||
status: "completed",
|
||||
output: [reasoningItem, answerItem],
|
||||
usage: { input_tokens: 64, output_tokens: 16, total_tokens: 80 },
|
||||
},
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
async function buildResponsesPayload(
|
||||
body: Record<string, unknown>,
|
||||
scenarioState: MockScenarioState,
|
||||
@@ -981,6 +1038,15 @@ async function buildResponsesPayload(
|
||||
}
|
||||
return buildAssistantEvents("BUG-SHOULD-NOT-AUTO-RETRY");
|
||||
}
|
||||
if (QA_THINKING_VISIBILITY_MAX_PROMPT_RE.test(prompt)) {
|
||||
return buildReasoningAndAssistantEvents({
|
||||
reasoningId: "rs_mock_thinking_visibility_max",
|
||||
answerText: "THINKING-MAX-OK",
|
||||
});
|
||||
}
|
||||
if (QA_THINKING_VISIBILITY_OFF_PROMPT_RE.test(prompt)) {
|
||||
return buildAssistantEvents("THINKING-OFF-OK");
|
||||
}
|
||||
if (QA_EMPTY_RESPONSE_RECOVERY_PROMPT_RE.test(allInputText)) {
|
||||
if (!toolOutput) {
|
||||
return buildToolCallEventsWithArgs("read", { path: "QA_KICKOFF_TASK.md" });
|
||||
|
||||
@@ -123,6 +123,32 @@ describe("qa scenario catalog", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("includes the GPT-5.4 thinking visibility switch scenario", () => {
|
||||
const scenario = readQaScenarioById("gpt54-thinking-visibility-switch");
|
||||
const config = readQaScenarioExecutionConfig("gpt54-thinking-visibility-switch") as
|
||||
| {
|
||||
requiredLiveProvider?: string;
|
||||
requiredLiveModel?: string;
|
||||
offDirective?: string;
|
||||
maxDirective?: string;
|
||||
reasoningDirective?: string;
|
||||
}
|
||||
| undefined;
|
||||
|
||||
expect(scenario.sourcePath).toBe("qa/scenarios/models/gpt54-thinking-visibility-switch.md");
|
||||
expect(config?.requiredLiveProvider).toBe("openai");
|
||||
expect(config?.requiredLiveModel).toBe("gpt-5.4");
|
||||
expect(config?.offDirective).toBe("/think off");
|
||||
expect(config?.maxDirective).toBe("/think max");
|
||||
expect(config?.reasoningDirective).toBe("/reasoning on");
|
||||
expect(scenario.execution.flow?.steps.map((step) => step.name)).toEqual([
|
||||
"enables reasoning display and disables thinking",
|
||||
"switches to max thinking",
|
||||
"verifies max thinking emits visible reasoning",
|
||||
"verifies max thinking completes the answer",
|
||||
]);
|
||||
});
|
||||
|
||||
it("includes the seeded mock-only broken-turn scenarios in the markdown pack", () => {
|
||||
const scenarioIds = [
|
||||
"reasoning-only-recovery-replay-safe-read",
|
||||
|
||||
211
qa/scenarios/models/gpt54-thinking-visibility-switch.md
Normal file
211
qa/scenarios/models/gpt54-thinking-visibility-switch.md
Normal file
@@ -0,0 +1,211 @@
|
||||
# GPT-5.4 thinking visibility switch
|
||||
|
||||
```yaml qa-scenario
|
||||
id: gpt54-thinking-visibility-switch
|
||||
title: GPT-5.4 thinking visibility switch
|
||||
surface: models
|
||||
coverage:
|
||||
primary:
|
||||
- models.thinking
|
||||
secondary:
|
||||
- runtime.reasoning-visibility
|
||||
objective: Verify GPT-5.4 can switch from disabled thinking to max thinking while reasoning display stays enabled.
|
||||
successCriteria:
|
||||
- Live runs target openai/gpt-5.4, not a mini or pro variant.
|
||||
- The session enables reasoning display before the comparison turns.
|
||||
- The disabled-thinking turn returns its visible marker without a Reasoning-prefixed message.
|
||||
- The max-thinking turn returns its visible marker and a separate Reasoning-prefixed message.
|
||||
docsRefs:
|
||||
- docs/tools/thinking.md
|
||||
- docs/help/testing.md
|
||||
- docs/concepts/qa-e2e-automation.md
|
||||
codeRefs:
|
||||
- src/auto-reply/reply/directives.ts
|
||||
- src/auto-reply/thinking.shared.ts
|
||||
- src/agents/pi-embedded-runner/run/payloads.ts
|
||||
- extensions/openai/openai-provider.ts
|
||||
- extensions/qa-lab/src/providers/mock-openai/server.ts
|
||||
execution:
|
||||
kind: flow
|
||||
summary: Toggle reasoning display and GPT-5.4 thinking between off/none and max/high, then verify visible reasoning only on the max turn.
|
||||
config:
|
||||
requiredLiveProvider: openai
|
||||
requiredLiveModel: gpt-5.4
|
||||
offDirective: /think off
|
||||
maxDirective: /think max
|
||||
reasoningDirective: /reasoning on
|
||||
conversationId: qa-thinking-visibility
|
||||
offPrompt: "QA thinking visibility check off: answer exactly THINKING-OFF-OK."
|
||||
maxPrompt: "QA thinking visibility check max: verify 17+24=41 internally, then answer exactly THINKING-MAX-OK."
|
||||
offMarker: THINKING-OFF-OK
|
||||
maxMarker: THINKING-MAX-OK
|
||||
```
|
||||
|
||||
```yaml qa-flow
|
||||
steps:
|
||||
- name: enables reasoning display and disables thinking
|
||||
actions:
|
||||
- call: waitForGatewayHealthy
|
||||
args:
|
||||
- ref: env
|
||||
- 60000
|
||||
- call: waitForQaChannelReady
|
||||
args:
|
||||
- ref: env
|
||||
- 60000
|
||||
- call: reset
|
||||
- set: selected
|
||||
value:
|
||||
expr: splitModelRef(env.primaryModel)
|
||||
- assert:
|
||||
expr: "env.providerMode !== 'live-frontier' || (selected?.provider === config.requiredLiveProvider && selected?.model === config.requiredLiveModel)"
|
||||
message:
|
||||
expr: "`expected live GPT-5.4, got ${env.primaryModel}`"
|
||||
- call: state.addInboundMessage
|
||||
args:
|
||||
- conversation:
|
||||
id:
|
||||
expr: config.conversationId
|
||||
kind: direct
|
||||
senderId: qa-operator
|
||||
senderName: QA Operator
|
||||
text:
|
||||
expr: config.reasoningDirective
|
||||
- call: waitForCondition
|
||||
saveAs: reasoningAck
|
||||
args:
|
||||
- lambda:
|
||||
expr: "state.getSnapshot().messages.filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === config.conversationId && /Reasoning visibility enabled/i.test(candidate.text)).at(-1)"
|
||||
- expr: liveTurnTimeoutMs(env, 20000)
|
||||
- call: state.addInboundMessage
|
||||
args:
|
||||
- conversation:
|
||||
id:
|
||||
expr: config.conversationId
|
||||
kind: direct
|
||||
senderId: qa-operator
|
||||
senderName: QA Operator
|
||||
text:
|
||||
expr: config.offDirective
|
||||
- call: waitForCondition
|
||||
saveAs: offAck
|
||||
args:
|
||||
- lambda:
|
||||
expr: "state.getSnapshot().messages.filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === config.conversationId && /Thinking disabled/i.test(candidate.text)).at(-1)"
|
||||
- expr: liveTurnTimeoutMs(env, 20000)
|
||||
- set: offCursor
|
||||
value:
|
||||
expr: state.getSnapshot().messages.length
|
||||
- call: state.addInboundMessage
|
||||
args:
|
||||
- conversation:
|
||||
id:
|
||||
expr: config.conversationId
|
||||
kind: direct
|
||||
senderId: qa-operator
|
||||
senderName: QA Operator
|
||||
text:
|
||||
expr: "`${config.offDirective} ${config.offPrompt}`"
|
||||
- call: waitForCondition
|
||||
saveAs: offAnswer
|
||||
args:
|
||||
- lambda:
|
||||
expr: "state.getSnapshot().messages.slice(offCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === config.conversationId && candidate.text.includes(config.offMarker)).at(-1)"
|
||||
- expr: liveTurnTimeoutMs(env, 30000)
|
||||
- set: offMessages
|
||||
value:
|
||||
expr: "state.getSnapshot().messages.slice(offCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === config.conversationId)"
|
||||
- assert:
|
||||
expr: "offMessages.some((candidate) => candidate.text.includes(config.offMarker))"
|
||||
message:
|
||||
expr: "`missing off marker; saw ${offMessages.map((message) => message.text).join(' | ')}`"
|
||||
- assert:
|
||||
expr: "!offMessages.some((candidate) => candidate.text.trimStart().startsWith('Reasoning:'))"
|
||||
message:
|
||||
expr: "`disabled thinking unexpectedly emitted reasoning: ${offMessages.map((message) => message.text).join(' | ')}`"
|
||||
- if:
|
||||
expr: "Boolean(env.mock)"
|
||||
then:
|
||||
- set: requests
|
||||
value:
|
||||
expr: "await fetchJson(`${env.mock.baseUrl}/debug/requests`)"
|
||||
- set: offRequest
|
||||
value:
|
||||
expr: "requests.find((request) => String(request.allInputText ?? '').includes(config.offPrompt))"
|
||||
- assert:
|
||||
expr: "String(offRequest?.model ?? '').includes('gpt-5.4')"
|
||||
message:
|
||||
expr: "`expected GPT-5.4 off mock request, got ${String(offRequest?.model ?? '')}`"
|
||||
detailsExpr: "`off ack=${offAck.text}; off answer=${offAnswer.text}`"
|
||||
- name: switches to max thinking
|
||||
actions:
|
||||
- call: state.addInboundMessage
|
||||
args:
|
||||
- conversation:
|
||||
id:
|
||||
expr: config.conversationId
|
||||
kind: direct
|
||||
senderId: qa-operator
|
||||
senderName: QA Operator
|
||||
text:
|
||||
expr: config.maxDirective
|
||||
- call: waitForCondition
|
||||
saveAs: maxAck
|
||||
args:
|
||||
- lambda:
|
||||
expr: "state.getSnapshot().messages.filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === config.conversationId && /Thinking level set to high/i.test(candidate.text)).at(-1)"
|
||||
- expr: liveTurnTimeoutMs(env, 20000)
|
||||
detailsExpr: "`max ack=${maxAck.text}`"
|
||||
- name: verifies max thinking emits visible reasoning
|
||||
actions:
|
||||
- set: maxCursor
|
||||
value:
|
||||
expr: state.getSnapshot().messages.length
|
||||
- call: state.addInboundMessage
|
||||
args:
|
||||
- conversation:
|
||||
id:
|
||||
expr: config.conversationId
|
||||
kind: direct
|
||||
senderId: qa-operator
|
||||
senderName: QA Operator
|
||||
text:
|
||||
expr: "`${config.maxDirective} ${config.maxPrompt}`"
|
||||
- call: waitForCondition
|
||||
saveAs: maxReasoning
|
||||
args:
|
||||
- lambda:
|
||||
expr: "state.getSnapshot().messages.slice(maxCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === config.conversationId && candidate.text.trimStart().startsWith('Reasoning:')).at(-1)"
|
||||
- expr: liveTurnTimeoutMs(env, 120000)
|
||||
- assert:
|
||||
expr: "maxReasoning.text.trimStart().startsWith('Reasoning:')"
|
||||
message:
|
||||
expr: "`missing max reasoning message near answer: ${recentOutboundSummary(state, 6)}`"
|
||||
detailsExpr: "`reasoning=${maxReasoning.text}`"
|
||||
- name: verifies max thinking completes the answer
|
||||
actions:
|
||||
- call: waitForCondition
|
||||
saveAs: maxAnswer
|
||||
args:
|
||||
- lambda:
|
||||
expr: "state.getSnapshot().messages.slice(maxCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === config.conversationId && candidate.text.includes(config.maxMarker)).at(-1)"
|
||||
- expr: liveTurnTimeoutMs(env, 120000)
|
||||
- assert:
|
||||
expr: "maxAnswer.text.includes(config.maxMarker)"
|
||||
message:
|
||||
expr: "`missing max marker: ${maxAnswer.text}`"
|
||||
- if:
|
||||
expr: "Boolean(env.mock)"
|
||||
then:
|
||||
- set: requests
|
||||
value:
|
||||
expr: "await fetchJson(`${env.mock.baseUrl}/debug/requests`)"
|
||||
- set: maxRequest
|
||||
value:
|
||||
expr: "requests.find((request) => String(request.allInputText ?? '').includes(config.maxPrompt))"
|
||||
- assert:
|
||||
expr: "String(maxRequest?.model ?? '').includes('gpt-5.4')"
|
||||
message:
|
||||
expr: "`expected GPT-5.4 mock request, got ${String(maxRequest?.model ?? '')}`"
|
||||
detailsExpr: "`answer=${maxAnswer.text}`"
|
||||
```
|
||||
@@ -513,7 +513,7 @@ describe("openai transport stream", () => {
|
||||
expect(params.input?.[0]).toMatchObject({ role: "developer" });
|
||||
});
|
||||
|
||||
it("defaults OpenAI Responses reasoning effort to high when unset", () => {
|
||||
it("does not infer high reasoning when Pi passes thinking off", () => {
|
||||
const params = buildOpenAIResponsesParams(
|
||||
{
|
||||
id: "gpt-5.4",
|
||||
@@ -535,8 +535,8 @@ describe("openai transport stream", () => {
|
||||
undefined,
|
||||
) as { reasoning?: unknown; include?: string[] };
|
||||
|
||||
expect(params.reasoning).toEqual({ effort: "high", summary: "auto" });
|
||||
expect(params.include).toEqual(["reasoning.encrypted_content"]);
|
||||
expect(params.reasoning).toEqual({ effort: "none" });
|
||||
expect(params).not.toHaveProperty("include");
|
||||
});
|
||||
|
||||
it("uses shared stream reasoning as OpenAI Responses effort", () => {
|
||||
|
||||
@@ -814,16 +814,12 @@ export function buildOpenAIResponsesParams(
|
||||
} else if (model.provider !== "github-copilot") {
|
||||
const reasoningEffort = resolveOpenAIReasoningEffortForModel({
|
||||
model,
|
||||
effort: "high",
|
||||
effort: "none",
|
||||
});
|
||||
if (reasoningEffort) {
|
||||
params.reasoning = {
|
||||
effort: reasoningEffort,
|
||||
...(reasoningEffort === "none" ? {} : { summary: "auto" }),
|
||||
};
|
||||
if (reasoningEffort !== "none") {
|
||||
params.include = ["reasoning.encrypted_content"];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ import type { AssistantMessage } from "@mariozechner/pi-ai";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
extractAssistantText,
|
||||
extractAssistantThinking,
|
||||
extractAssistantVisibleText,
|
||||
formatReasoningMessage,
|
||||
promoteThinkingTagsToBlocks,
|
||||
@@ -641,6 +642,27 @@ describe("formatReasoningMessage", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("extractAssistantThinking", () => {
|
||||
it("surfaces signed native reasoning even when the provider returns an empty summary", () => {
|
||||
const msg = makeAssistantMessage({
|
||||
role: "assistant",
|
||||
content: [
|
||||
{
|
||||
type: "thinking",
|
||||
thinking: "",
|
||||
thinkingSignature: JSON.stringify({ type: "reasoning", id: "rs_live", summary: [] }),
|
||||
},
|
||||
{ type: "text", text: "Done." },
|
||||
],
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
|
||||
expect(extractAssistantThinking(msg)).toBe(
|
||||
"Native reasoning was produced; no summary text was returned.",
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("stripDowngradedToolCallText", () => {
|
||||
it("strips downgraded marker blocks while preserving surrounding user-facing text", () => {
|
||||
const cases = [
|
||||
|
||||
@@ -147,7 +147,13 @@ export function extractAssistantThinking(msg: AssistantMessage): string {
|
||||
}
|
||||
const record = block as unknown as Record<string, unknown>;
|
||||
if (record.type === "thinking" && typeof record.thinking === "string") {
|
||||
return record.thinking.trim();
|
||||
const thinking = record.thinking.trim();
|
||||
if (thinking) {
|
||||
return thinking;
|
||||
}
|
||||
if (typeof record.thinkingSignature === "string" && record.thinkingSignature.trim()) {
|
||||
return "Native reasoning was produced; no summary text was returned.";
|
||||
}
|
||||
}
|
||||
return "";
|
||||
})
|
||||
|
||||
@@ -89,6 +89,7 @@ function parseInlineDirectivesForTest(body: string) {
|
||||
async function resolveHelloWithModelDefaults(params: {
|
||||
defaultThinking: "off" | "low";
|
||||
defaultReasoning: "on";
|
||||
sessionEntry?: SessionEntry;
|
||||
}) {
|
||||
const resolveDefaultThinkingLevel = vi.fn(async () => params.defaultThinking);
|
||||
const resolveDefaultReasoningLevel = vi.fn(async () => params.defaultReasoning);
|
||||
@@ -119,7 +120,7 @@ async function resolveHelloWithModelDefaults(params: {
|
||||
CommandBody: "hello",
|
||||
Provider: "whatsapp",
|
||||
} as TemplateContext,
|
||||
sessionEntry: makeSessionEntry(),
|
||||
sessionEntry: params.sessionEntry ?? makeSessionEntry(),
|
||||
sessionStore: {},
|
||||
sessionKey: "agent:main:whatsapp:+2000",
|
||||
storePath: "/tmp/sessions.json",
|
||||
@@ -427,6 +428,23 @@ describe("resolveReplyDirectives", () => {
|
||||
expect(resolveDefaultReasoningLevel).toHaveBeenCalledOnce();
|
||||
});
|
||||
|
||||
it("does not re-enable model reasoning when thinking was explicitly disabled", async () => {
|
||||
const { result, resolveDefaultReasoningLevel } = await resolveHelloWithModelDefaults({
|
||||
defaultThinking: "off",
|
||||
defaultReasoning: "on",
|
||||
sessionEntry: makeSessionEntry({ thinkingLevel: "off" }),
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
kind: "continue",
|
||||
result: expect.objectContaining({
|
||||
resolvedThinkLevel: "off",
|
||||
resolvedReasoningLevel: "off",
|
||||
}),
|
||||
});
|
||||
expect(resolveDefaultReasoningLevel).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("skips the model reasoning default when thinking is active", async () => {
|
||||
const { result, resolveDefaultReasoningLevel } = await resolveHelloWithModelDefaults({
|
||||
defaultThinking: "low",
|
||||
|
||||
@@ -495,9 +495,14 @@ export async function resolveReplyDirectives(params: {
|
||||
(await modelState.resolveDefaultThinkingLevel()) ??
|
||||
(agentCfg?.thinkingDefault as ThinkLevel | undefined);
|
||||
|
||||
const thinkingExplicitlySet =
|
||||
directives.thinkLevel !== undefined ||
|
||||
targetSessionEntry?.thinkingLevel !== undefined ||
|
||||
agentCfg?.thinkingDefault !== undefined;
|
||||
|
||||
// When neither directive nor session nor agent set reasoning, default to model capability
|
||||
// (e.g. OpenRouter with reasoning: true). Skip model default when thinking is active
|
||||
// to avoid redundant Reasoning: output alongside internal thinking blocks.
|
||||
// or when thinking was explicitly disabled.
|
||||
const hasAgentReasoningDefault =
|
||||
agentEntry?.reasoningDefault !== undefined && agentEntry?.reasoningDefault !== null;
|
||||
const reasoningExplicitlySet =
|
||||
@@ -506,7 +511,12 @@ export async function resolveReplyDirectives(params: {
|
||||
targetSessionEntry?.reasoningLevel !== null) ||
|
||||
hasAgentReasoningDefault;
|
||||
const thinkingActive = resolvedThinkLevelWithDefault !== "off";
|
||||
if (!reasoningExplicitlySet && resolvedReasoningLevel === "off" && !thinkingActive) {
|
||||
if (
|
||||
!reasoningExplicitlySet &&
|
||||
resolvedReasoningLevel === "off" &&
|
||||
!thinkingActive &&
|
||||
!thinkingExplicitlySet
|
||||
) {
|
||||
resolvedReasoningLevel = await modelState.resolveDefaultReasoningLevel();
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user