mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-05 15:40:21 +00:00
fix(acp): classify gateway chat error kinds
This commit is contained in:
@@ -88,6 +88,7 @@ Docs: https://docs.openclaw.ai
|
|||||||
- Agents/failover: allow cooldown probes for `timeout` (including network outage classifications) so the primary model can recover after failover without a gateway restart. (#63996) Thanks @neeravmakwana.
|
- Agents/failover: allow cooldown probes for `timeout` (including network outage classifications) so the primary model can recover after failover without a gateway restart. (#63996) Thanks @neeravmakwana.
|
||||||
- iMessage (imsg): strip an accidental protobuf length-delimited UTF-8 field wrapper from inbound `text` and `reply_to_text` when it fully consumes the field, fixing leading garbage before the real message. (#63868) Thanks @neeravmakwana.
|
- iMessage (imsg): strip an accidental protobuf length-delimited UTF-8 field wrapper from inbound `text` and `reply_to_text` when it fully consumes the field, fixing leading garbage before the real message. (#63868) Thanks @neeravmakwana.
|
||||||
- Gateway/pairing: fail closed for paired device records that have no device tokens, and reject pairing approvals whose requested scopes do not match the requested device roles.
|
- Gateway/pairing: fail closed for paired device records that have no device tokens, and reject pairing approvals whose requested scopes do not match the requested device roles.
|
||||||
|
- ACP/gateway chat: classify lifecycle errors before forwarding them to ACP clients so refusals use ACP's refusal stop reason while transient backend errors continue to finish as normal turns.
|
||||||
|
|
||||||
## 2026.4.9
|
## 2026.4.9
|
||||||
|
|
||||||
|
|||||||
138
src/acp/translator.error-kind.test.ts
Normal file
138
src/acp/translator.error-kind.test.ts
Normal file
@@ -0,0 +1,138 @@
|
|||||||
|
import type { PromptRequest } from "@agentclientprotocol/sdk";
|
||||||
|
import { describe, expect, it, vi } from "vitest";
|
||||||
|
import type { GatewayClient } from "../gateway/client.js";
|
||||||
|
import type { EventFrame } from "../gateway/protocol/index.js";
|
||||||
|
import { createInMemorySessionStore } from "./session.js";
|
||||||
|
import { AcpGatewayAgent } from "./translator.js";
|
||||||
|
import { createAcpConnection, createAcpGateway } from "./translator.test-helpers.js";
|
||||||
|
|
||||||
|
type PendingPromptHarness = {
|
||||||
|
agent: AcpGatewayAgent;
|
||||||
|
promptPromise: ReturnType<AcpGatewayAgent["prompt"]>;
|
||||||
|
runId: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
const DEFAULT_SESSION_ID = "session-1";
|
||||||
|
const DEFAULT_SESSION_KEY = "agent:main:main";
|
||||||
|
const DEFAULT_PROMPT_TEXT = "hello";
|
||||||
|
|
||||||
|
function createSessionAgentHarness(
|
||||||
|
request: GatewayClient["request"],
|
||||||
|
options: { sessionId?: string; sessionKey?: string; cwd?: string } = {},
|
||||||
|
) {
|
||||||
|
const sessionId = options.sessionId ?? DEFAULT_SESSION_ID;
|
||||||
|
const sessionKey = options.sessionKey ?? DEFAULT_SESSION_KEY;
|
||||||
|
const sessionStore = createInMemorySessionStore();
|
||||||
|
sessionStore.createSession({
|
||||||
|
sessionId,
|
||||||
|
sessionKey,
|
||||||
|
cwd: options.cwd ?? "/tmp",
|
||||||
|
});
|
||||||
|
const agent = new AcpGatewayAgent(createAcpConnection(), createAcpGateway(request), {
|
||||||
|
sessionStore,
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
agent,
|
||||||
|
sessionId,
|
||||||
|
sessionKey,
|
||||||
|
sessionStore,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function promptAgent(
|
||||||
|
agent: AcpGatewayAgent,
|
||||||
|
sessionId = DEFAULT_SESSION_ID,
|
||||||
|
text = DEFAULT_PROMPT_TEXT,
|
||||||
|
) {
|
||||||
|
return agent.prompt({
|
||||||
|
sessionId,
|
||||||
|
prompt: [{ type: "text", text }],
|
||||||
|
_meta: {},
|
||||||
|
} as unknown as PromptRequest);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function createPendingPromptHarness(): Promise<PendingPromptHarness> {
|
||||||
|
let runId: string | undefined;
|
||||||
|
const request = vi.fn(async (method: string, params?: Record<string, unknown>) => {
|
||||||
|
if (method === "chat.send") {
|
||||||
|
runId = params?.idempotencyKey as string | undefined;
|
||||||
|
return new Promise<never>(() => {});
|
||||||
|
}
|
||||||
|
return {};
|
||||||
|
}) as GatewayClient["request"];
|
||||||
|
|
||||||
|
const { agent, sessionId } = createSessionAgentHarness(request);
|
||||||
|
const promptPromise = promptAgent(agent, sessionId);
|
||||||
|
|
||||||
|
await vi.waitFor(() => {
|
||||||
|
expect(runId).toBeDefined();
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
agent,
|
||||||
|
promptPromise,
|
||||||
|
runId: runId!,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function createChatEvent(payload: Record<string, unknown>): EventFrame {
|
||||||
|
return {
|
||||||
|
type: "event",
|
||||||
|
event: "chat",
|
||||||
|
payload,
|
||||||
|
} as EventFrame;
|
||||||
|
}
|
||||||
|
|
||||||
|
describe("acp translator errorKind mapping", () => {
|
||||||
|
it("maps errorKind: refusal to stopReason: refusal", async () => {
|
||||||
|
const { agent, promptPromise, runId } = await createPendingPromptHarness();
|
||||||
|
|
||||||
|
await agent.handleGatewayEvent(
|
||||||
|
createChatEvent({
|
||||||
|
runId,
|
||||||
|
sessionKey: DEFAULT_SESSION_KEY,
|
||||||
|
seq: 1,
|
||||||
|
state: "error",
|
||||||
|
errorKind: "refusal",
|
||||||
|
errorMessage: "I cannot fulfill this request.",
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
await expect(promptPromise).resolves.toEqual({ stopReason: "refusal" });
|
||||||
|
});
|
||||||
|
|
||||||
|
it("maps errorKind: timeout to stopReason: end_turn", async () => {
|
||||||
|
const { agent, promptPromise, runId } = await createPendingPromptHarness();
|
||||||
|
|
||||||
|
await agent.handleGatewayEvent(
|
||||||
|
createChatEvent({
|
||||||
|
runId,
|
||||||
|
sessionKey: DEFAULT_SESSION_KEY,
|
||||||
|
seq: 1,
|
||||||
|
state: "error",
|
||||||
|
errorKind: "timeout",
|
||||||
|
errorMessage: "gateway timeout",
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
await expect(promptPromise).resolves.toEqual({ stopReason: "end_turn" });
|
||||||
|
});
|
||||||
|
|
||||||
|
it("maps unknown errorKind to stopReason: end_turn", async () => {
|
||||||
|
const { agent, promptPromise, runId } = await createPendingPromptHarness();
|
||||||
|
|
||||||
|
await agent.handleGatewayEvent(
|
||||||
|
createChatEvent({
|
||||||
|
runId,
|
||||||
|
sessionKey: DEFAULT_SESSION_KEY,
|
||||||
|
seq: 1,
|
||||||
|
state: "error",
|
||||||
|
errorKind: "unknown",
|
||||||
|
errorMessage: "something went wrong",
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
await expect(promptPromise).resolves.toEqual({ stopReason: "end_turn" });
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -956,11 +956,9 @@ export class AcpGatewayAgent implements Agent {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (state === "error") {
|
if (state === "error") {
|
||||||
// ACP has no explicit "server_error" stop reason. Use "end_turn" so clients
|
const errorKind = payload.errorKind as string | undefined;
|
||||||
// do not treat transient backend errors (timeouts, rate-limits) as deliberate
|
const stopReason: StopReason = errorKind === "refusal" ? "refusal" : "end_turn";
|
||||||
// refusals. TODO: when ChatEventSchema gains a structured errorKind field
|
void this.finishPrompt(pending.sessionId, pending, stopReason);
|
||||||
// (e.g. "refusal" | "timeout" | "rate_limit"), use it to distinguish here.
|
|
||||||
void this.finishPrompt(pending.sessionId, pending, "end_turn");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -81,6 +81,15 @@ export const ChatEventSchema = Type.Object(
|
|||||||
]),
|
]),
|
||||||
message: Type.Optional(Type.Unknown()),
|
message: Type.Optional(Type.Unknown()),
|
||||||
errorMessage: Type.Optional(Type.String()),
|
errorMessage: Type.Optional(Type.String()),
|
||||||
|
errorKind: Type.Optional(
|
||||||
|
Type.Union([
|
||||||
|
Type.Literal("refusal"),
|
||||||
|
Type.Literal("timeout"),
|
||||||
|
Type.Literal("rate_limit"),
|
||||||
|
Type.Literal("context_length"),
|
||||||
|
Type.Literal("unknown"),
|
||||||
|
]),
|
||||||
|
),
|
||||||
usage: Type.Optional(Type.Unknown()),
|
usage: Type.Optional(Type.Unknown()),
|
||||||
stopReason: Type.Optional(Type.String()),
|
stopReason: Type.Optional(Type.String()),
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -1258,6 +1258,39 @@ describe("agent event handler", () => {
|
|||||||
expect(agentRunSeq.has("run-terminal-error")).toBe(false);
|
expect(agentRunSeq.has("run-terminal-error")).toBe(false);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("adds detected errorKind to chat lifecycle error payloads", () => {
|
||||||
|
const { broadcast, nodeSendToSession, handler } = createHarness({
|
||||||
|
resolveSessionKeyForRun: () => "session-detected-error",
|
||||||
|
lifecycleErrorRetryGraceMs: 0,
|
||||||
|
});
|
||||||
|
registerAgentRunContext("run-detected-error", { sessionKey: "session-detected-error" });
|
||||||
|
|
||||||
|
handler({
|
||||||
|
runId: "run-detected-error",
|
||||||
|
seq: 1,
|
||||||
|
stream: "lifecycle",
|
||||||
|
ts: Date.now(),
|
||||||
|
data: {
|
||||||
|
phase: "error",
|
||||||
|
error: Object.assign(new Error("Too many requests"), { code: 429 }),
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const payload = chatBroadcastCalls(broadcast).at(-1)?.[1] as {
|
||||||
|
state?: string;
|
||||||
|
errorKind?: string;
|
||||||
|
errorMessage?: string;
|
||||||
|
};
|
||||||
|
expect(payload.state).toBe("error");
|
||||||
|
expect(payload.errorKind).toBe("rate_limit");
|
||||||
|
expect(payload.errorMessage).toContain("Too many requests");
|
||||||
|
|
||||||
|
const nodePayload = sessionChatCalls(nodeSendToSession).at(-1)?.[2] as {
|
||||||
|
errorKind?: string;
|
||||||
|
};
|
||||||
|
expect(nodePayload.errorKind).toBe("rate_limit");
|
||||||
|
});
|
||||||
|
|
||||||
it("suppresses delayed lifecycle chat errors for active chat.send runs while still cleaning up", () => {
|
it("suppresses delayed lifecycle chat errors for active chat.send runs while still cleaning up", () => {
|
||||||
vi.useFakeTimers();
|
vi.useFakeTimers();
|
||||||
const { broadcast, clearAgentRunContext, agentRunSeq, handler } = createHarness({
|
const { broadcast, clearAgentRunContext, agentRunSeq, handler } = createHarness({
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import {
|
|||||||
} from "../auto-reply/tokens.js";
|
} from "../auto-reply/tokens.js";
|
||||||
import { loadConfig } from "../config/config.js";
|
import { loadConfig } from "../config/config.js";
|
||||||
import { type AgentEventPayload, getAgentRunContext } from "../infra/agent-events.js";
|
import { type AgentEventPayload, getAgentRunContext } from "../infra/agent-events.js";
|
||||||
|
import { detectErrorKind, type ErrorKind } from "../infra/errors.js";
|
||||||
import { resolveHeartbeatVisibility } from "../infra/heartbeat-visibility.js";
|
import { resolveHeartbeatVisibility } from "../infra/heartbeat-visibility.js";
|
||||||
import { stripInlineDirectiveTagsForDisplay } from "../utils/directive-tags.js";
|
import { stripInlineDirectiveTagsForDisplay } from "../utils/directive-tags.js";
|
||||||
import {
|
import {
|
||||||
@@ -437,6 +438,20 @@ export type ChatEventBroadcast = (
|
|||||||
|
|
||||||
export type NodeSendToSession = (sessionKey: string, event: string, payload: unknown) => void;
|
export type NodeSendToSession = (sessionKey: string, event: string, payload: unknown) => void;
|
||||||
|
|
||||||
|
const CHAT_ERROR_KINDS = new Set<ErrorKind>([
|
||||||
|
"refusal",
|
||||||
|
"timeout",
|
||||||
|
"rate_limit",
|
||||||
|
"context_length",
|
||||||
|
"unknown",
|
||||||
|
]);
|
||||||
|
|
||||||
|
function readChatErrorKind(value: unknown): ErrorKind | undefined {
|
||||||
|
return typeof value === "string" && CHAT_ERROR_KINDS.has(value as ErrorKind)
|
||||||
|
? (value as ErrorKind)
|
||||||
|
: undefined;
|
||||||
|
}
|
||||||
|
|
||||||
export type AgentEventHandlerOptions = {
|
export type AgentEventHandlerOptions = {
|
||||||
broadcast: ChatEventBroadcast;
|
broadcast: ChatEventBroadcast;
|
||||||
broadcastToConnIds: (
|
broadcastToConnIds: (
|
||||||
@@ -583,6 +598,8 @@ export function createAgentEventHandler({
|
|||||||
if (!isAborted) {
|
if (!isAborted) {
|
||||||
const evtStopReason =
|
const evtStopReason =
|
||||||
typeof evt.data?.stopReason === "string" ? evt.data.stopReason : undefined;
|
typeof evt.data?.stopReason === "string" ? evt.data.stopReason : undefined;
|
||||||
|
const evtErrorKind =
|
||||||
|
readChatErrorKind(evt.data?.errorKind) ?? detectErrorKind(evt.data?.error);
|
||||||
if (chatLink) {
|
if (chatLink) {
|
||||||
const finished = chatRunState.registry.shift(evt.runId);
|
const finished = chatRunState.registry.shift(evt.runId);
|
||||||
if (!finished) {
|
if (!finished) {
|
||||||
@@ -598,6 +615,7 @@ export function createAgentEventHandler({
|
|||||||
lifecyclePhase === "error" ? "error" : "done",
|
lifecyclePhase === "error" ? "error" : "done",
|
||||||
evt.data?.error,
|
evt.data?.error,
|
||||||
evtStopReason,
|
evtStopReason,
|
||||||
|
evtErrorKind,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
} else if (!(opts?.skipChatErrorFinal && lifecyclePhase === "error")) {
|
} else if (!(opts?.skipChatErrorFinal && lifecyclePhase === "error")) {
|
||||||
@@ -609,6 +627,7 @@ export function createAgentEventHandler({
|
|||||||
lifecyclePhase === "error" ? "error" : "done",
|
lifecyclePhase === "error" ? "error" : "done",
|
||||||
evt.data?.error,
|
evt.data?.error,
|
||||||
evtStopReason,
|
evtStopReason,
|
||||||
|
evtErrorKind,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -791,6 +810,7 @@ export function createAgentEventHandler({
|
|||||||
jobState: "done" | "error",
|
jobState: "done" | "error",
|
||||||
error?: unknown,
|
error?: unknown,
|
||||||
stopReason?: string,
|
stopReason?: string,
|
||||||
|
errorKind?: ErrorKind,
|
||||||
) => {
|
) => {
|
||||||
const { text, shouldSuppressSilent } = resolveBufferedChatTextState(clientRunId, sourceRunId);
|
const { text, shouldSuppressSilent } = resolveBufferedChatTextState(clientRunId, sourceRunId);
|
||||||
// Flush any throttled delta so streaming clients receive the complete text
|
// Flush any throttled delta so streaming clients receive the complete text
|
||||||
@@ -828,6 +848,7 @@ export function createAgentEventHandler({
|
|||||||
seq,
|
seq,
|
||||||
state: "error" as const,
|
state: "error" as const,
|
||||||
errorMessage: error ? formatForLog(error) : undefined,
|
errorMessage: error ? formatForLog(error) : undefined,
|
||||||
|
...(errorKind && { errorKind }),
|
||||||
};
|
};
|
||||||
broadcast("chat", payload);
|
broadcast("chat", payload);
|
||||||
nodeSendToSession(sessionKey, "chat", payload);
|
nodeSendToSession(sessionKey, "chat", payload);
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import { describe, expect, it } from "vitest";
|
import { describe, expect, it } from "vitest";
|
||||||
import {
|
import {
|
||||||
collectErrorGraphCandidates,
|
collectErrorGraphCandidates,
|
||||||
|
detectErrorKind,
|
||||||
extractErrorCode,
|
extractErrorCode,
|
||||||
formatErrorMessage,
|
formatErrorMessage,
|
||||||
formatUncaughtError,
|
formatUncaughtError,
|
||||||
@@ -94,6 +95,35 @@ describe("error helpers", () => {
|
|||||||
expect(formatted).not.toContain(token);
|
expect(formatted).not.toContain(token);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it.each([
|
||||||
|
{
|
||||||
|
value: new Error("Unhandled stop reason: refusal_policy"),
|
||||||
|
expected: "refusal",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
value: Object.assign(new Error("request timed out"), { code: "ETIMEDOUT" }),
|
||||||
|
expected: "timeout",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
value: Object.assign(new Error("Too many requests"), { code: 429 }),
|
||||||
|
expected: "rate_limit",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
value: new Error("context_window exceeded with too many tokens"),
|
||||||
|
expected: "context_length",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
value: new Error("plain provider failure"),
|
||||||
|
expected: undefined,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
value: undefined,
|
||||||
|
expected: undefined,
|
||||||
|
},
|
||||||
|
] as const)("detects error kind for case %#", ({ value, expected }) => {
|
||||||
|
expect(detectErrorKind(value)).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
it("uses message-only formatting for INVALID_CONFIG and stack formatting otherwise", () => {
|
it("uses message-only formatting for INVALID_CONFIG and stack formatting otherwise", () => {
|
||||||
const invalidConfig = Object.assign(new Error("TOKEN=sk-abcdefghijklmnopqrstuv"), {
|
const invalidConfig = Object.assign(new Error("TOKEN=sk-abcdefghijklmnopqrstuv"), {
|
||||||
code: "INVALID_CONFIG",
|
code: "INVALID_CONFIG",
|
||||||
|
|||||||
@@ -111,3 +111,42 @@ export function formatUncaughtError(err: unknown): string {
|
|||||||
}
|
}
|
||||||
return formatErrorMessage(err);
|
return formatErrorMessage(err);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export type ErrorKind = "refusal" | "timeout" | "rate_limit" | "context_length" | "unknown";
|
||||||
|
|
||||||
|
export function detectErrorKind(err: unknown): ErrorKind | undefined {
|
||||||
|
if (err === undefined) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
const message = formatErrorMessage(err).toLowerCase();
|
||||||
|
const code = extractErrorCode(err)?.toLowerCase();
|
||||||
|
|
||||||
|
if (
|
||||||
|
message.includes("refusal") ||
|
||||||
|
message.includes("content_filter") ||
|
||||||
|
message.includes("sensitive") ||
|
||||||
|
message.includes("unhandled stop reason: refusal_policy")
|
||||||
|
) {
|
||||||
|
return "refusal";
|
||||||
|
}
|
||||||
|
if (message.includes("timeout") || code === "etimedout" || code === "timeout") {
|
||||||
|
return "timeout";
|
||||||
|
}
|
||||||
|
if (
|
||||||
|
message.includes("rate limit") ||
|
||||||
|
message.includes("too many requests") ||
|
||||||
|
message.includes("429") ||
|
||||||
|
code === "429"
|
||||||
|
) {
|
||||||
|
return "rate_limit";
|
||||||
|
}
|
||||||
|
if (
|
||||||
|
message.includes("context length") ||
|
||||||
|
message.includes("too many tokens") ||
|
||||||
|
message.includes("token limit") ||
|
||||||
|
message.includes("context_window")
|
||||||
|
) {
|
||||||
|
return "context_length";
|
||||||
|
}
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user