agents: split GPT-5 prompt and retry behavior (#65597)

* agents: split GPT-5 prompt and retry behavior * agents: fix GPT-5 review follow-ups * agents: address GPT-5 review follow-ups * agents: avoid replaying side-effectful GPT retries * agents: mark subagent control as mutating * agents: fail closed on single-action retries * commands: stabilize channel legacy doctor migration test * agents: narrow single-action retry promise trigger
2026-05-06 12:30:44 +00:00 · 2026-04-12 18:52:22 -07:00
parent d0c83777fb
commit c848ebc8ce
8 changed files with 352 additions and 17 deletions
--- a/extensions/openai/index.test.ts
+++ b/extensions/openai/index.test.ts
@@ -14,6 +14,7 @@ import {
  OPENAI_FRIENDLY_PROMPT_OVERLAY,
  OPENAI_GPT5_EXECUTION_BIAS,
  OPENAI_GPT5_OUTPUT_CONTRACT,
+  OPENAI_GPT5_TOOL_CALL_STYLE,
 } from "./prompt-overlay.js";

 const runtimeMocks = vi.hoisted(() => ({
@@ -365,7 +366,7 @@ describe("openai plugin", () => {
    };

    expect(openaiProvider.resolveSystemPromptContribution?.(contributionContext)).toEqual({
-      stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
+      stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
      sectionOverrides: {
        interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY,
        execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
@@ -382,7 +383,7 @@ describe("openai plugin", () => {
      "Occasional emoji are welcome when they fit naturally, especially for warmth or brief celebration; keep them sparse.",
    );
    expect(codexProvider.resolveSystemPromptContribution?.(contributionContext)).toEqual({
-      stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
+      stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
      sectionOverrides: {
        interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY,
        execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
@@ -454,9 +455,22 @@ describe("openai plugin", () => {
    expect(OPENAI_FRIENDLY_PROMPT_OVERLAY).toContain(
      "Occasional emoji are welcome when they fit naturally, especially for warmth or brief celebration; keep them sparse.",
    );
+    expect(OPENAI_GPT5_EXECUTION_BIAS).toContain(
+      "Use a real tool call or concrete action FIRST when the task is actionable. Do not stop at a plan or promise-to-act reply.",
+    );
+    expect(OPENAI_GPT5_EXECUTION_BIAS).toContain(
+      "If the work will take multiple steps, keep calling tools until the task is done or you hit a real blocker. Do not stop after one step to ask permission.",
+    );
    expect(OPENAI_GPT5_EXECUTION_BIAS).toContain(
      "Do prerequisite lookup or discovery before dependent actions.",
    );
+    expect(OPENAI_GPT5_TOOL_CALL_STYLE).toContain(
+      "Call tools directly without narrating what you are about to do. Do not describe a plan before each tool call.",
+    );
+    expect(OPENAI_GPT5_TOOL_CALL_STYLE).toContain(
+      "When a first-class tool exists for an action, use the tool instead of asking the user to run a command.",
+    );
+    expect(OPENAI_GPT5_TOOL_CALL_STYLE).not.toContain("/approve");
    expect(OPENAI_GPT5_OUTPUT_CONTRACT).toContain(
      "Return the requested sections only, in the requested order.",
    );
@@ -486,7 +500,7 @@ describe("openai plugin", () => {
        agentId: undefined,
      }),
    ).toEqual({
-      stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
+      stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
      sectionOverrides: {
        interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY,
        execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
@@ -514,7 +528,7 @@ describe("openai plugin", () => {
        agentId: undefined,
      }),
    ).toEqual({
-      stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
+      stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
      sectionOverrides: {
        execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
      },
@@ -540,7 +554,7 @@ describe("openai plugin", () => {
        agentId: undefined,
      }),
    ).toEqual({
-      stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
+      stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
      sectionOverrides: {
        execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
      },
@@ -567,7 +581,7 @@ describe("openai plugin", () => {
        agentId: undefined,
      }),
    ).toEqual({
-      stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
+      stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
      sectionOverrides: {
        interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY,
        execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
@@ -594,7 +608,7 @@ describe("openai plugin", () => {
        agentId: undefined,
      }),
    ).toEqual({
-      stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
+      stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
      sectionOverrides: {
        interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY,
        execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
--- a/extensions/openai/prompt-overlay.ts
+++ b/extensions/openai/prompt-overlay.ts
@@ -64,11 +64,20 @@ Do not use em dashes unless the user explicitly asks for them or they are requir

 export const OPENAI_GPT5_EXECUTION_BIAS = `## Execution Bias

-Start the real work in the same turn when the next step is clear.
+Use a real tool call or concrete action FIRST when the task is actionable. Do not stop at a plan or promise-to-act reply.
+Commentary-only turns are incomplete when tools are available and the next action is clear.
+If the work will take multiple steps, keep calling tools until the task is done or you hit a real blocker. Do not stop after one step to ask permission.
 Do prerequisite lookup or discovery before dependent actions.
-If another tool call would likely improve correctness or completeness, keep going instead of stopping at partial progress.
 Multi-part requests stay incomplete until every requested item is handled or clearly marked blocked.
-Before the final answer, quickly verify correctness, coverage, formatting, and obvious side effects.`;
+Act first, then verify if needed. Do not pause to summarize or verify before taking the next action.`;
+
+export const OPENAI_GPT5_TOOL_CALL_STYLE = `## Tool Call Style
+
+Call tools directly without narrating what you are about to do. Do not describe a plan before each tool call.
+When a first-class tool exists for an action, use the tool instead of asking the user to run a command.
+If multiple tool calls are needed, call them in sequence without stopping to explain between calls.
+Default: do not narrate routine, low-risk tool calls (just call the tool).
+Narrate only when it genuinely helps: complex multi-step work, sensitive actions like deletions, or when the user explicitly asks for commentary.`;

 export type OpenAIPromptOverlayMode = "friendly" | "off";

@@ -103,8 +112,14 @@ export function resolveOpenAISystemPromptContribution(params: {
  ) {
    return undefined;
  }
+  // tool_call_style is NOT overridden via sectionOverrides because the
+  // default section includes dynamic channel-specific approval guidance
+  // from buildExecApprovalPromptGuidance() that varies per runtime
+  // channel. Overriding it with a static string would lose that dynamic
+  // content. Instead, the tool-first reinforcement lives in stablePrefix
+  // so it's always present alongside the default tool_call_style section.
  return {
-    stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
+    stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
    sectionOverrides: {
      execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
      ...(params.mode === "friendly" ? { interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY } : {}),