test: add gpt-5.4 thinking visibility QA

This commit is contained in:
Peter Steinberger
2026-04-21 06:12:17 +01:00
parent 663501206f
commit f5be489266
10 changed files with 419 additions and 12 deletions

View File

@@ -89,6 +89,7 @@ function parseInlineDirectivesForTest(body: string) {
async function resolveHelloWithModelDefaults(params: {
defaultThinking: "off" | "low";
defaultReasoning: "on";
sessionEntry?: SessionEntry;
}) {
const resolveDefaultThinkingLevel = vi.fn(async () => params.defaultThinking);
const resolveDefaultReasoningLevel = vi.fn(async () => params.defaultReasoning);
@@ -119,7 +120,7 @@ async function resolveHelloWithModelDefaults(params: {
CommandBody: "hello",
Provider: "whatsapp",
} as TemplateContext,
sessionEntry: makeSessionEntry(),
sessionEntry: params.sessionEntry ?? makeSessionEntry(),
sessionStore: {},
sessionKey: "agent:main:whatsapp:+2000",
storePath: "/tmp/sessions.json",
@@ -427,6 +428,23 @@ describe("resolveReplyDirectives", () => {
expect(resolveDefaultReasoningLevel).toHaveBeenCalledOnce();
});
it("does not re-enable model reasoning when thinking was explicitly disabled", async () => {
const { result, resolveDefaultReasoningLevel } = await resolveHelloWithModelDefaults({
defaultThinking: "off",
defaultReasoning: "on",
sessionEntry: makeSessionEntry({ thinkingLevel: "off" }),
});
expect(result).toEqual({
kind: "continue",
result: expect.objectContaining({
resolvedThinkLevel: "off",
resolvedReasoningLevel: "off",
}),
});
expect(resolveDefaultReasoningLevel).not.toHaveBeenCalled();
});
it("skips the model reasoning default when thinking is active", async () => {
const { result, resolveDefaultReasoningLevel } = await resolveHelloWithModelDefaults({
defaultThinking: "low",

View File

@@ -495,9 +495,14 @@ export async function resolveReplyDirectives(params: {
(await modelState.resolveDefaultThinkingLevel()) ??
(agentCfg?.thinkingDefault as ThinkLevel | undefined);
const thinkingExplicitlySet =
directives.thinkLevel !== undefined ||
targetSessionEntry?.thinkingLevel !== undefined ||
agentCfg?.thinkingDefault !== undefined;
// When neither directive nor session nor agent set reasoning, default to model capability
// (e.g. OpenRouter with reasoning: true). Skip model default when thinking is active
// to avoid redundant Reasoning: output alongside internal thinking blocks.
// or when thinking was explicitly disabled.
const hasAgentReasoningDefault =
agentEntry?.reasoningDefault !== undefined && agentEntry?.reasoningDefault !== null;
const reasoningExplicitlySet =
@@ -506,7 +511,12 @@ export async function resolveReplyDirectives(params: {
targetSessionEntry?.reasoningLevel !== null) ||
hasAgentReasoningDefault;
const thinkingActive = resolvedThinkLevelWithDefault !== "off";
if (!reasoningExplicitlySet && resolvedReasoningLevel === "off" && !thinkingActive) {
if (
!reasoningExplicitlySet &&
resolvedReasoningLevel === "off" &&
!thinkingActive &&
!thinkingExplicitlySet
) {
resolvedReasoningLevel = await modelState.resolveDefaultReasoningLevel();
}