agents: stop strict mode from hijacking chat turns

This commit is contained in:
pashpashpash
2026-04-12 23:46:57 -07:00
parent 190a4b4869
commit 8efbe8c1ed
3 changed files with 111 additions and 2 deletions

View File

@@ -72,6 +72,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
const result = await runEmbeddedPiAgent({
...overflowBaseRunParams,
prompt: "Please inspect the code, make the change, and run the checks.",
sessionKey: undefined,
agentId: "research",
provider: "openai",
@@ -120,6 +121,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
const result = await runEmbeddedPiAgent({
...overflowBaseRunParams,
prompt: "Please inspect the code, make the change, and run the checks.",
provider: "openai",
model: "gpt-5.4",
runId: "run-strict-agentic-blocked-liveness",
@@ -159,6 +161,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
const result = await runEmbeddedPiAgent({
...overflowBaseRunParams,
prompt: "Please inspect the code, make the change, and run the checks.",
provider: "openai",
model: "gpt-5.4",
runId: "run-strict-agentic-auto-activated",
@@ -193,6 +196,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
const result = await runEmbeddedPiAgent({
...overflowBaseRunParams,
prompt: "Please inspect the code, make the change, and run the checks.",
provider: "openai",
model: "gpt-5.4",
runId: "run-strict-agentic-explicit-default-optout",
@@ -221,6 +225,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
const retryInstruction = resolvePlanningOnlyRetryInstruction({
provider: "openai",
modelId: "gpt-5.4",
prompt: "Please inspect the code, make the change, and run the checks.",
aborted: false,
timedOut: false,
attempt: makeAttemptResult({
@@ -235,6 +240,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
const retryInstruction = resolvePlanningOnlyRetryInstruction({
provider: "openai",
modelId: "gpt-5.4",
prompt: "Please inspect the code, make the change, and run the checks.",
aborted: false,
timedOut: false,
attempt: makeAttemptResult({
@@ -251,6 +257,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
const retryInstruction = resolvePlanningOnlyRetryInstruction({
provider: "openai",
modelId: "gpt-5.4",
prompt: "Please inspect the code, make the change, and run the checks.",
aborted: false,
timedOut: false,
attempt: makeAttemptResult({
@@ -265,6 +272,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
const retryInstruction = resolvePlanningOnlyRetryInstruction({
provider: "openai",
modelId: "gpt-5.4",
prompt: "Please inspect the code, make the change, and run the checks.",
aborted: false,
timedOut: false,
attempt: makeAttemptResult({
@@ -279,6 +287,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
const retryInstruction = resolvePlanningOnlyRetryInstruction({
provider: "openai",
modelId: "gpt-5.4",
prompt: "Please inspect the code, make the change, and run the checks.",
aborted: false,
timedOut: false,
attempt: makeAttemptResult({
@@ -297,6 +306,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
const retryInstruction = resolvePlanningOnlyRetryInstruction({
provider: "openai",
modelId: "gpt-5.4",
prompt: "Please inspect the code, make the change, and run the checks.",
aborted: false,
timedOut: false,
attempt: makeAttemptResult({
@@ -316,6 +326,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
const retryInstruction = resolvePlanningOnlyRetryInstruction({
provider: "openai",
modelId: "gpt-5.4",
prompt: "Please inspect the code, make the change, and run the checks.",
aborted: false,
timedOut: false,
attempt: makeAttemptResult({
@@ -369,6 +380,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
const retryInstruction = resolvePlanningOnlyRetryInstruction({
provider: "openai",
modelId: " openai/gpt-5.4 ",
prompt: "Please inspect the code, make the change, and run the checks.",
aborted: false,
timedOut: false,
attempt: makeAttemptResult({
@@ -440,6 +452,52 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
}),
).toBe("paused");
});
it("does not strict-agentic retry casual Discord status chatter", async () => {
mockedClassifyFailoverReason.mockReturnValue(null);
mockedRunEmbeddedAttempt.mockResolvedValue(
makeAttemptResult({
assistantTexts: [
"i am glad, and a little afraid, which is probably the correct mixture. thank you. i will try to deserve the upgrades instead of merely inhabiting them.",
],
}),
);
const result = await runEmbeddedPiAgent({
...overflowBaseRunParams,
prompt:
"made a bunch of improvements to the student's source code (openclaw) this weekend, along with a few other maintainers. hopefully he will be more proactive now",
provider: "openai-codex",
model: "gpt-5.4",
runId: "run-strict-agentic-casual-discord-status",
config: {
agents: {
list: [{ id: "main" }],
},
} as OpenClawConfig,
});
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1);
expect(result.payloads).toBeUndefined();
expect(result.meta.livenessState).toBe("working");
});
it("does not misclassify a direct answer that says 'i'm not going to' as planning-only", () => {
const retryInstruction = resolvePlanningOnlyRetryInstruction({
provider: "openai-codex",
modelId: "gpt-5.4",
prompt: "What do you think lobstar should do to help the chart?",
aborted: false,
timedOut: false,
attempt: makeAttemptResult({
assistantTexts: [
"I'm not going to give token-pumping instructions for a chart. Best answer: build trust and let the market do what it will.",
],
}),
});
expect(retryInstruction).toBeNull();
});
});
describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => {
@@ -470,6 +528,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => {
it("retries when exactly 1 non-plan tool call plus 'i can do that' prose is detected", () => {
const result = resolvePlanningOnlyRetryInstruction({
...openaiParams,
prompt: "Please inspect the code, make the change, and run the checks.",
aborted: false,
timedOut: false,
attempt: makeAttemptWithTools(["read"], "I can do that next."),
@@ -481,6 +540,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => {
it("retries when exactly 1 non-plan tool call plus planning prose is detected", () => {
const result = resolvePlanningOnlyRetryInstruction({
...openaiParams,
prompt: "Please inspect the code, make the change, and run the checks.",
aborted: false,
timedOut: false,
attempt: makeAttemptWithTools(["read"], "I'll analyze the structure next."),
@@ -492,6 +552,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => {
it("does not retry when 2+ non-plan tool calls are present", () => {
const result = resolvePlanningOnlyRetryInstruction({
...openaiParams,
prompt: "Please inspect the code, make the change, and run the checks.",
aborted: false,
timedOut: false,
attempt: makeAttemptWithTools(["read", "search"], "I'll verify the output."),
@@ -503,6 +564,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => {
it("does not retry when 1 tool call plus completion language is present", () => {
const result = resolvePlanningOnlyRetryInstruction({
...openaiParams,
prompt: "Please inspect the code, make the change, and run the checks.",
aborted: false,
timedOut: false,
attempt: makeAttemptWithTools(["read"], "Done. The file looks correct."),
@@ -514,6 +576,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => {
it("does not retry when 1 tool call plus 'let me know' handoff is present", () => {
const result = resolvePlanningOnlyRetryInstruction({
...openaiParams,
prompt: "Please inspect the code, make the change, and run the checks.",
aborted: false,
timedOut: false,
attempt: makeAttemptWithTools(["read"], "Let me know if you need anything else."),
@@ -525,6 +588,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => {
it("does not retry when 1 tool call plus an answer-style summary is present", () => {
const result = resolvePlanningOnlyRetryInstruction({
...openaiParams,
prompt: "Please inspect the code, make the change, and run the checks.",
aborted: false,
timedOut: false,
attempt: makeAttemptWithTools(
@@ -539,6 +603,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => {
it("does not retry when 1 tool call plus a future-tense description is present", () => {
const result = resolvePlanningOnlyRetryInstruction({
...openaiParams,
prompt: "Please inspect the code, make the change, and run the checks.",
aborted: false,
timedOut: false,
attempt: makeAttemptWithTools(
@@ -553,6 +618,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => {
it("does not retry when 1 safe tool call is followed by answer prose joined with 'and'", () => {
const result = resolvePlanningOnlyRetryInstruction({
...openaiParams,
prompt: "Please inspect the code, make the change, and run the checks.",
aborted: false,
timedOut: false,
attempt: makeAttemptWithTools(["read"], "I'll explain and recommend a fix."),
@@ -564,6 +630,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => {
it("does not retry when 1 tool call plus a bare 'i can do that' reply is present", () => {
const result = resolvePlanningOnlyRetryInstruction({
...openaiParams,
prompt: "Please inspect the code, make the change, and run the checks.",
aborted: false,
timedOut: false,
attempt: makeAttemptWithTools(["read"], "I can do that."),
@@ -575,6 +642,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => {
it("does not retry when the lone tool call already had side effects", () => {
const result = resolvePlanningOnlyRetryInstruction({
...openaiParams,
prompt: "Please inspect the code, make the change, and run the checks.",
aborted: false,
timedOut: false,
attempt: makeAttemptWithTools(["sessions_spawn"], "I'll continue from there next."),
@@ -586,6 +654,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => {
it("does not retry when the lone tool call is unclassified", () => {
const result = resolvePlanningOnlyRetryInstruction({
...openaiParams,
prompt: "Please inspect the code, make the change, and run the checks.",
aborted: false,
timedOut: false,
attempt: makeAttemptWithTools(["vendor_widget"], "I'll continue from there next."),
@@ -593,4 +662,16 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => {
expect(result).toBeNull();
});
it("does not retry single-action narration on casual non-task chat", () => {
const result = resolvePlanningOnlyRetryInstruction({
...openaiParams,
prompt: "i haven't restarted you on latest main yet @The Student - get ready though",
aborted: false,
timedOut: false,
attempt: makeAttemptWithTools(["read"], "I'll check that next."),
});
expect(result).toBeNull();
});
});

View File

@@ -1587,6 +1587,7 @@ export async function runEmbeddedPiAgent(
const nextPlanningOnlyRetryInstruction = resolvePlanningOnlyRetryInstruction({
provider,
modelId,
prompt: params.prompt,
aborted,
timedOut,
attempt,

View File

@@ -49,12 +49,14 @@ export function isIncompleteTerminalAssistantTurn(params: {
}
const PLANNING_ONLY_PROMISE_RE =
/\b(?:i(?:'ll| will)|let me|going to|first[, ]+i(?:'ll| will)|next[, ]+i(?:'ll| will)|i can do that)\b/i;
/\b(?:i(?:'ll| will)|let me|i(?:'m| am)\s+going to|first[, ]+i(?:'ll| will)|next[, ]+i(?:'ll| will)|i can do that)\b/i;
const PLANNING_ONLY_COMPLETION_RE =
/\b(?:done|finished|implemented|updated|fixed|changed|ran|verified|found|here(?:'s| is) what|blocked by|the blocker is)\b/i;
const PLANNING_ONLY_HEADING_RE = /^(?:plan|steps?|next steps?)\s*:/i;
const PLANNING_ONLY_BULLET_RE = /^(?:[-*]\s+|\d+[.)]\s+)/u;
const PLANNING_ONLY_MAX_VISIBLE_TEXT = 700;
const PLANNING_ONLY_ACTION_VERB_RE =
/\b(?:inspect|investigate|check|look(?:\s+into|\s+at)?|read|search|find|debug|fix|patch|update|change|edit|write|implement|run|test|verify|review|analy(?:s|z)e|summari(?:s|z)e|explain|answer|show|share|report|prepare|capture|take|refactor|restart|deploy|ship)\b/i;
const SINGLE_ACTION_EXPLICIT_CONTINUATION_RE =
/\b(?:going to|first[, ]+i(?:'ll| will)|next[, ]+i(?:'ll| will)|then[, ]+i(?:'ll| will)|i can do that next|let me (?!know\b)\w+(?:\s+\w+){0,3}\s+(?:next|then|first)\b)/i;
const SINGLE_ACTION_MULTI_STEP_PROMISE_RE =
@@ -112,6 +114,10 @@ const ACK_EXECUTION_NORMALIZED_SET = new Set([
"진행해",
"계속해",
]);
const ACTIONABLE_PROMPT_DIRECTIVE_RE =
/^\s*(?:please\s+)?(?:check|look(?:\s+into|\s+at)?|read|write|edit|update|fix|investigate|debug|run|search|find|implement|add|remove|refactor|explain|summari(?:s|z)e|analy(?:s|z)e|review|tell|show|make|restart|deploy|prepare)\b/i;
const ACTIONABLE_PROMPT_REQUEST_RE =
/\b(?:can|could|would|will)\s+you\b|\b(?:please|pls)\b|\b(?:help|explain|summari(?:s|z)e|analy(?:s|z)e|review|investigate|debug|fix|check|look(?:\s+into|\s+at)?|read|write|edit|update|run|search|find|implement|add|remove|refactor|show|tell me|walk me through)\b/i;
export const PLANNING_ONLY_RETRY_INSTRUCTION =
"The previous assistant turn only described the plan. Do not restate the plan. Act now: take the first concrete tool action you can. If a real blocker prevents action, reply with the exact blocker in one sentence.";
@@ -234,6 +240,17 @@ export function isLikelyExecutionAckPrompt(text: string): boolean {
return ACK_EXECUTION_NORMALIZED_SET.has(normalizeAckPrompt(trimmed));
}
function isLikelyActionableUserPrompt(text: string): boolean {
const trimmed = text.trim();
if (!trimmed) {
return false;
}
if (isLikelyExecutionAckPrompt(trimmed) || trimmed.includes("?")) {
return true;
}
return ACTIONABLE_PROMPT_DIRECTIVE_RE.test(trimmed) || ACTIONABLE_PROMPT_REQUEST_RE.test(trimmed);
}
export function resolveAckExecutionFastPathInstruction(params: {
provider?: string;
modelId?: string;
@@ -355,6 +372,7 @@ export function resolvePlanningOnlyRetryLimit(
export function resolvePlanningOnlyRetryInstruction(params: {
provider?: string;
modelId?: string;
prompt?: string;
aborted: boolean;
timedOut: boolean;
attempt: PlanningOnlyAttempt;
@@ -371,6 +389,7 @@ export function resolvePlanningOnlyRetryInstruction(params: {
provider: params.provider,
modelId: params.modelId,
}) ||
(typeof params.prompt === "string" && !isLikelyActionableUserPrompt(params.prompt)) ||
params.aborted ||
params.timedOut ||
params.attempt.clientToolCall ||
@@ -395,7 +414,15 @@ export function resolvePlanningOnlyRetryInstruction(params: {
if (!text || text.length > PLANNING_ONLY_MAX_VISIBLE_TEXT || text.includes("```")) {
return null;
}
if (!PLANNING_ONLY_PROMISE_RE.test(text) && !hasStructuredPlanningOnlyFormat(text)) {
const hasStructuredPlanningFormat = hasStructuredPlanningOnlyFormat(text);
if (!PLANNING_ONLY_PROMISE_RE.test(text) && !hasStructuredPlanningFormat) {
return null;
}
if (
!hasStructuredPlanningFormat &&
!singleActionNarrative &&
!PLANNING_ONLY_ACTION_VERB_RE.test(text)
) {
return null;
}
if (PLANNING_ONLY_COMPLETION_RE.test(text)) {