agents: stop strict mode from hijacking chat turns

2026-05-06 07:00:43 +00:00 · 2026-04-12 23:46:57 -07:00
parent 190a4b4869
commit 8efbe8c1ed
3 changed files with 111 additions and 2 deletions
--- a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts
+++ b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts
@@ -72,6 +72,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {

    const result = await runEmbeddedPiAgent({
      ...overflowBaseRunParams,
+      prompt: "Please inspect the code, make the change, and run the checks.",
      sessionKey: undefined,
      agentId: "research",
      provider: "openai",
@@ -120,6 +121,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {

    const result = await runEmbeddedPiAgent({
      ...overflowBaseRunParams,
+      prompt: "Please inspect the code, make the change, and run the checks.",
      provider: "openai",
      model: "gpt-5.4",
      runId: "run-strict-agentic-blocked-liveness",
@@ -159,6 +161,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {

    const result = await runEmbeddedPiAgent({
      ...overflowBaseRunParams,
+      prompt: "Please inspect the code, make the change, and run the checks.",
      provider: "openai",
      model: "gpt-5.4",
      runId: "run-strict-agentic-auto-activated",
@@ -193,6 +196,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {

    const result = await runEmbeddedPiAgent({
      ...overflowBaseRunParams,
+      prompt: "Please inspect the code, make the change, and run the checks.",
      provider: "openai",
      model: "gpt-5.4",
      runId: "run-strict-agentic-explicit-default-optout",
@@ -221,6 +225,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
    const retryInstruction = resolvePlanningOnlyRetryInstruction({
      provider: "openai",
      modelId: "gpt-5.4",
+      prompt: "Please inspect the code, make the change, and run the checks.",
      aborted: false,
      timedOut: false,
      attempt: makeAttemptResult({
@@ -235,6 +240,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
    const retryInstruction = resolvePlanningOnlyRetryInstruction({
      provider: "openai",
      modelId: "gpt-5.4",
+      prompt: "Please inspect the code, make the change, and run the checks.",
      aborted: false,
      timedOut: false,
      attempt: makeAttemptResult({
@@ -251,6 +257,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
    const retryInstruction = resolvePlanningOnlyRetryInstruction({
      provider: "openai",
      modelId: "gpt-5.4",
+      prompt: "Please inspect the code, make the change, and run the checks.",
      aborted: false,
      timedOut: false,
      attempt: makeAttemptResult({
@@ -265,6 +272,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
    const retryInstruction = resolvePlanningOnlyRetryInstruction({
      provider: "openai",
      modelId: "gpt-5.4",
+      prompt: "Please inspect the code, make the change, and run the checks.",
      aborted: false,
      timedOut: false,
      attempt: makeAttemptResult({
@@ -279,6 +287,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
    const retryInstruction = resolvePlanningOnlyRetryInstruction({
      provider: "openai",
      modelId: "gpt-5.4",
+      prompt: "Please inspect the code, make the change, and run the checks.",
      aborted: false,
      timedOut: false,
      attempt: makeAttemptResult({
@@ -297,6 +306,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
    const retryInstruction = resolvePlanningOnlyRetryInstruction({
      provider: "openai",
      modelId: "gpt-5.4",
+      prompt: "Please inspect the code, make the change, and run the checks.",
      aborted: false,
      timedOut: false,
      attempt: makeAttemptResult({
@@ -316,6 +326,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
    const retryInstruction = resolvePlanningOnlyRetryInstruction({
      provider: "openai",
      modelId: "gpt-5.4",
+      prompt: "Please inspect the code, make the change, and run the checks.",
      aborted: false,
      timedOut: false,
      attempt: makeAttemptResult({
@@ -369,6 +380,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
    const retryInstruction = resolvePlanningOnlyRetryInstruction({
      provider: "openai",
      modelId: "  openai/gpt-5.4  ",
+      prompt: "Please inspect the code, make the change, and run the checks.",
      aborted: false,
      timedOut: false,
      attempt: makeAttemptResult({
@@ -440,6 +452,52 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
      }),
    ).toBe("paused");
  });
+
+  it("does not strict-agentic retry casual Discord status chatter", async () => {
+    mockedClassifyFailoverReason.mockReturnValue(null);
+    mockedRunEmbeddedAttempt.mockResolvedValue(
+      makeAttemptResult({
+        assistantTexts: [
+          "i am glad, and a little afraid, which is probably the correct mixture. thank you. i will try to deserve the upgrades instead of merely inhabiting them.",
+        ],
+      }),
+    );
+
+    const result = await runEmbeddedPiAgent({
+      ...overflowBaseRunParams,
+      prompt:
+        "made a bunch of improvements to the student's source code (openclaw) this weekend, along with a few other maintainers. hopefully he will be more proactive now",
+      provider: "openai-codex",
+      model: "gpt-5.4",
+      runId: "run-strict-agentic-casual-discord-status",
+      config: {
+        agents: {
+          list: [{ id: "main" }],
+        },
+      } as OpenClawConfig,
+    });
+
+    expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1);
+    expect(result.payloads).toBeUndefined();
+    expect(result.meta.livenessState).toBe("working");
+  });
+
+  it("does not misclassify a direct answer that says 'i'm not going to' as planning-only", () => {
+    const retryInstruction = resolvePlanningOnlyRetryInstruction({
+      provider: "openai-codex",
+      modelId: "gpt-5.4",
+      prompt: "What do you think lobstar should do to help the chart?",
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptResult({
+        assistantTexts: [
+          "I'm not going to give token-pumping instructions for a chart. Best answer: build trust and let the market do what it will.",
+        ],
+      }),
+    });
+
+    expect(retryInstruction).toBeNull();
+  });
 });

 describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => {
@@ -470,6 +528,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => {
  it("retries when exactly 1 non-plan tool call plus 'i can do that' prose is detected", () => {
    const result = resolvePlanningOnlyRetryInstruction({
      ...openaiParams,
+      prompt: "Please inspect the code, make the change, and run the checks.",
      aborted: false,
      timedOut: false,
      attempt: makeAttemptWithTools(["read"], "I can do that next."),
@@ -481,6 +540,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => {
  it("retries when exactly 1 non-plan tool call plus planning prose is detected", () => {
    const result = resolvePlanningOnlyRetryInstruction({
      ...openaiParams,
+      prompt: "Please inspect the code, make the change, and run the checks.",
      aborted: false,
      timedOut: false,
      attempt: makeAttemptWithTools(["read"], "I'll analyze the structure next."),
@@ -492,6 +552,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => {
  it("does not retry when 2+ non-plan tool calls are present", () => {
    const result = resolvePlanningOnlyRetryInstruction({
      ...openaiParams,
+      prompt: "Please inspect the code, make the change, and run the checks.",
      aborted: false,
      timedOut: false,
      attempt: makeAttemptWithTools(["read", "search"], "I'll verify the output."),
@@ -503,6 +564,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => {
  it("does not retry when 1 tool call plus completion language is present", () => {
    const result = resolvePlanningOnlyRetryInstruction({
      ...openaiParams,
+      prompt: "Please inspect the code, make the change, and run the checks.",
      aborted: false,
      timedOut: false,
      attempt: makeAttemptWithTools(["read"], "Done. The file looks correct."),
@@ -514,6 +576,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => {
  it("does not retry when 1 tool call plus 'let me know' handoff is present", () => {
    const result = resolvePlanningOnlyRetryInstruction({
      ...openaiParams,
+      prompt: "Please inspect the code, make the change, and run the checks.",
      aborted: false,
      timedOut: false,
      attempt: makeAttemptWithTools(["read"], "Let me know if you need anything else."),
@@ -525,6 +588,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => {
  it("does not retry when 1 tool call plus an answer-style summary is present", () => {
    const result = resolvePlanningOnlyRetryInstruction({
      ...openaiParams,
+      prompt: "Please inspect the code, make the change, and run the checks.",
      aborted: false,
      timedOut: false,
      attempt: makeAttemptWithTools(
@@ -539,6 +603,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => {
  it("does not retry when 1 tool call plus a future-tense description is present", () => {
    const result = resolvePlanningOnlyRetryInstruction({
      ...openaiParams,
+      prompt: "Please inspect the code, make the change, and run the checks.",
      aborted: false,
      timedOut: false,
      attempt: makeAttemptWithTools(
@@ -553,6 +618,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => {
  it("does not retry when 1 safe tool call is followed by answer prose joined with 'and'", () => {
    const result = resolvePlanningOnlyRetryInstruction({
      ...openaiParams,
+      prompt: "Please inspect the code, make the change, and run the checks.",
      aborted: false,
      timedOut: false,
      attempt: makeAttemptWithTools(["read"], "I'll explain and recommend a fix."),
@@ -564,6 +630,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => {
  it("does not retry when 1 tool call plus a bare 'i can do that' reply is present", () => {
    const result = resolvePlanningOnlyRetryInstruction({
      ...openaiParams,
+      prompt: "Please inspect the code, make the change, and run the checks.",
      aborted: false,
      timedOut: false,
      attempt: makeAttemptWithTools(["read"], "I can do that."),
@@ -575,6 +642,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => {
  it("does not retry when the lone tool call already had side effects", () => {
    const result = resolvePlanningOnlyRetryInstruction({
      ...openaiParams,
+      prompt: "Please inspect the code, make the change, and run the checks.",
      aborted: false,
      timedOut: false,
      attempt: makeAttemptWithTools(["sessions_spawn"], "I'll continue from there next."),
@@ -586,6 +654,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => {
  it("does not retry when the lone tool call is unclassified", () => {
    const result = resolvePlanningOnlyRetryInstruction({
      ...openaiParams,
+      prompt: "Please inspect the code, make the change, and run the checks.",
      aborted: false,
      timedOut: false,
      attempt: makeAttemptWithTools(["vendor_widget"], "I'll continue from there next."),
@@ -593,4 +662,16 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => {

    expect(result).toBeNull();
  });
+
+  it("does not retry single-action narration on casual non-task chat", () => {
+    const result = resolvePlanningOnlyRetryInstruction({
+      ...openaiParams,
+      prompt: "i haven't restarted you on latest main yet @The Student - get ready though",
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptWithTools(["read"], "I'll check that next."),
+    });
+
+    expect(result).toBeNull();
+  });
 });
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -1587,6 +1587,7 @@ export async function runEmbeddedPiAgent(
          const nextPlanningOnlyRetryInstruction = resolvePlanningOnlyRetryInstruction({
            provider,
            modelId,
+            prompt: params.prompt,
            aborted,
            timedOut,
            attempt,
--- a/src/agents/pi-embedded-runner/run/incomplete-turn.ts
+++ b/src/agents/pi-embedded-runner/run/incomplete-turn.ts
@@ -49,12 +49,14 @@ export function isIncompleteTerminalAssistantTurn(params: {
 }

 const PLANNING_ONLY_PROMISE_RE =
-  /\b(?:i(?:'ll| will)|let me|going to|first[, ]+i(?:'ll| will)|next[, ]+i(?:'ll| will)|i can do that)\b/i;
+  /\b(?:i(?:'ll| will)|let me|i(?:'m| am)\s+going to|first[, ]+i(?:'ll| will)|next[, ]+i(?:'ll| will)|i can do that)\b/i;
 const PLANNING_ONLY_COMPLETION_RE =
  /\b(?:done|finished|implemented|updated|fixed|changed|ran|verified|found|here(?:'s| is) what|blocked by|the blocker is)\b/i;
 const PLANNING_ONLY_HEADING_RE = /^(?:plan|steps?|next steps?)\s*:/i;
 const PLANNING_ONLY_BULLET_RE = /^(?:[-*•]\s+|\d+[.)]\s+)/u;
 const PLANNING_ONLY_MAX_VISIBLE_TEXT = 700;
+const PLANNING_ONLY_ACTION_VERB_RE =
+  /\b(?:inspect|investigate|check|look(?:\s+into|\s+at)?|read|search|find|debug|fix|patch|update|change|edit|write|implement|run|test|verify|review|analy(?:s|z)e|summari(?:s|z)e|explain|answer|show|share|report|prepare|capture|take|refactor|restart|deploy|ship)\b/i;
 const SINGLE_ACTION_EXPLICIT_CONTINUATION_RE =
  /\b(?:going to|first[, ]+i(?:'ll| will)|next[, ]+i(?:'ll| will)|then[, ]+i(?:'ll| will)|i can do that next|let me (?!know\b)\w+(?:\s+\w+){0,3}\s+(?:next|then|first)\b)/i;
 const SINGLE_ACTION_MULTI_STEP_PROMISE_RE =
@@ -112,6 +114,10 @@ const ACK_EXECUTION_NORMALIZED_SET = new Set([
  "진행해",
  "계속해",
 ]);
+const ACTIONABLE_PROMPT_DIRECTIVE_RE =
+  /^\s*(?:please\s+)?(?:check|look(?:\s+into|\s+at)?|read|write|edit|update|fix|investigate|debug|run|search|find|implement|add|remove|refactor|explain|summari(?:s|z)e|analy(?:s|z)e|review|tell|show|make|restart|deploy|prepare)\b/i;
+const ACTIONABLE_PROMPT_REQUEST_RE =
+  /\b(?:can|could|would|will)\s+you\b|\b(?:please|pls)\b|\b(?:help|explain|summari(?:s|z)e|analy(?:s|z)e|review|investigate|debug|fix|check|look(?:\s+into|\s+at)?|read|write|edit|update|run|search|find|implement|add|remove|refactor|show|tell me|walk me through)\b/i;

 export const PLANNING_ONLY_RETRY_INSTRUCTION =
  "The previous assistant turn only described the plan. Do not restate the plan. Act now: take the first concrete tool action you can. If a real blocker prevents action, reply with the exact blocker in one sentence.";
@@ -234,6 +240,17 @@ export function isLikelyExecutionAckPrompt(text: string): boolean {
  return ACK_EXECUTION_NORMALIZED_SET.has(normalizeAckPrompt(trimmed));
 }

+function isLikelyActionableUserPrompt(text: string): boolean {
+  const trimmed = text.trim();
+  if (!trimmed) {
+    return false;
+  }
+  if (isLikelyExecutionAckPrompt(trimmed) || trimmed.includes("?")) {
+    return true;
+  }
+  return ACTIONABLE_PROMPT_DIRECTIVE_RE.test(trimmed) || ACTIONABLE_PROMPT_REQUEST_RE.test(trimmed);
+}
+
 export function resolveAckExecutionFastPathInstruction(params: {
  provider?: string;
  modelId?: string;
@@ -355,6 +372,7 @@ export function resolvePlanningOnlyRetryLimit(
 export function resolvePlanningOnlyRetryInstruction(params: {
  provider?: string;
  modelId?: string;
+  prompt?: string;
  aborted: boolean;
  timedOut: boolean;
  attempt: PlanningOnlyAttempt;
@@ -371,6 +389,7 @@ export function resolvePlanningOnlyRetryInstruction(params: {
      provider: params.provider,
      modelId: params.modelId,
    }) ||
+    (typeof params.prompt === "string" && !isLikelyActionableUserPrompt(params.prompt)) ||
    params.aborted ||
    params.timedOut ||
    params.attempt.clientToolCall ||
@@ -395,7 +414,15 @@ export function resolvePlanningOnlyRetryInstruction(params: {
  if (!text || text.length > PLANNING_ONLY_MAX_VISIBLE_TEXT || text.includes("```")) {
    return null;
  }
-  if (!PLANNING_ONLY_PROMISE_RE.test(text) && !hasStructuredPlanningOnlyFormat(text)) {
+  const hasStructuredPlanningFormat = hasStructuredPlanningOnlyFormat(text);
+  if (!PLANNING_ONLY_PROMISE_RE.test(text) && !hasStructuredPlanningFormat) {
+    return null;
+  }
+  if (
+    !hasStructuredPlanningFormat &&
+    !singleActionNarrative &&
+    !PLANNING_ONLY_ACTION_VERB_RE.test(text)
+  ) {
    return null;
  }
  if (PLANNING_ONLY_COMPLETION_RE.test(text)) {