mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-16 22:20:42 +00:00
190 lines
6.4 KiB
TypeScript
190 lines
6.4 KiB
TypeScript
import {
|
|
createContractFallbackConfig,
|
|
createContractRunResult,
|
|
OUTCOME_FALLBACK_RUNTIME_CONTRACT,
|
|
} from "openclaw/plugin-sdk/agent-runtime-test-contracts";
|
|
import { describe, expect, it, vi } from "vitest";
|
|
import type { OpenClawConfig } from "../config/types.openclaw.js";
|
|
import { runWithModelFallback } from "./model-fallback.js";
|
|
import { classifyEmbeddedPiRunResultForModelFallback } from "./pi-embedded-runner/result-fallback-classifier.js";
|
|
|
|
vi.mock("./auth-profiles/source-check.js", () => ({
|
|
hasAnyAuthProfileStoreSource: () => false,
|
|
}));
|
|
|
|
describe("Outcome/fallback runtime contract - Pi fallback classifier", () => {
|
|
const fallbackClassificationCases = [
|
|
["empty", "empty_result"],
|
|
["reasoning-only", "reasoning_only_result"],
|
|
["planning-only", "planning_only_result"],
|
|
] as const;
|
|
|
|
it.each(fallbackClassificationCases)(
|
|
"maps harness classification %s to a format fallback code",
|
|
(classification, code) => {
|
|
expect(
|
|
classifyEmbeddedPiRunResultForModelFallback({
|
|
provider: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryProvider,
|
|
model: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryModel,
|
|
result: createContractRunResult({
|
|
meta: {
|
|
durationMs: 1,
|
|
agentHarnessResultClassification: classification,
|
|
},
|
|
}),
|
|
}),
|
|
).toMatchObject({
|
|
reason: "format",
|
|
code,
|
|
});
|
|
},
|
|
);
|
|
|
|
it("advances to the configured fallback after a classified GPT-5 terminal result", async () => {
|
|
const primary = createContractRunResult({
|
|
meta: {
|
|
durationMs: 1,
|
|
agentHarnessResultClassification: "empty",
|
|
},
|
|
});
|
|
const fallback = createContractRunResult({
|
|
payloads: [{ text: "fallback ok" }],
|
|
meta: { durationMs: 1, finalAssistantVisibleText: "fallback ok" },
|
|
});
|
|
const run = vi.fn().mockResolvedValueOnce(primary).mockResolvedValueOnce(fallback);
|
|
|
|
const result = await runWithModelFallback({
|
|
cfg: createContractFallbackConfig() as unknown as OpenClawConfig,
|
|
provider: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryProvider,
|
|
model: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryModel,
|
|
run,
|
|
classifyResult: ({ provider, model, result }) =>
|
|
classifyEmbeddedPiRunResultForModelFallback({
|
|
provider,
|
|
model,
|
|
result,
|
|
}),
|
|
});
|
|
|
|
expect(result.result).toBe(fallback);
|
|
expect(run).toHaveBeenCalledTimes(2);
|
|
expect(run.mock.calls[1]).toEqual([
|
|
OUTCOME_FALLBACK_RUNTIME_CONTRACT.fallbackProvider,
|
|
OUTCOME_FALLBACK_RUNTIME_CONTRACT.fallbackModel,
|
|
]);
|
|
expect(result.attempts[0]).toMatchObject({
|
|
provider: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryProvider,
|
|
model: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryModel,
|
|
reason: "format",
|
|
code: "empty_result",
|
|
});
|
|
});
|
|
|
|
const nonFallbackCases = [
|
|
{
|
|
name: "intentional NO_REPLY",
|
|
result: createContractRunResult({
|
|
meta: { durationMs: 1, finalAssistantRawText: "NO_REPLY" },
|
|
}),
|
|
},
|
|
{
|
|
name: "visible reply",
|
|
result: createContractRunResult({
|
|
payloads: [{ text: "visible answer" }],
|
|
meta: { durationMs: 1 },
|
|
}),
|
|
},
|
|
{
|
|
name: "abort",
|
|
result: createContractRunResult({
|
|
meta: { durationMs: 1, aborted: true, agentHarnessResultClassification: "empty" },
|
|
}),
|
|
},
|
|
{
|
|
name: "tool summary side effect",
|
|
result: createContractRunResult({
|
|
meta: { durationMs: 1, toolSummary: { calls: 1, tools: ["message"] } },
|
|
}),
|
|
},
|
|
{
|
|
name: "messaging text side effect",
|
|
result: createContractRunResult({
|
|
messagingToolSentTexts: ["sent out of band"],
|
|
meta: { durationMs: 1, agentHarnessResultClassification: "empty" },
|
|
}),
|
|
},
|
|
{
|
|
name: "messaging media side effect",
|
|
result: createContractRunResult({
|
|
messagingToolSentMediaUrls: ["https://example.test/image.png"],
|
|
meta: { durationMs: 1, agentHarnessResultClassification: "empty" },
|
|
}),
|
|
},
|
|
{
|
|
name: "messaging target side effect",
|
|
result: createContractRunResult({
|
|
messagingToolSentTargets: [{ tool: "message", provider: "slack", to: "channel-1" }],
|
|
meta: { durationMs: 1, agentHarnessResultClassification: "empty" },
|
|
}),
|
|
},
|
|
{
|
|
name: "cron side effect",
|
|
result: createContractRunResult({
|
|
successfulCronAdds: 1,
|
|
meta: { durationMs: 1, agentHarnessResultClassification: "empty" },
|
|
}),
|
|
},
|
|
{
|
|
name: "direct block reply",
|
|
result: createContractRunResult({
|
|
meta: { durationMs: 1, agentHarnessResultClassification: "empty" },
|
|
}),
|
|
hasDirectlySentBlockReply: true,
|
|
},
|
|
{
|
|
name: "block reply pipeline output",
|
|
result: createContractRunResult({
|
|
meta: { durationMs: 1, agentHarnessResultClassification: "empty" },
|
|
}),
|
|
hasBlockReplyPipelineOutput: true,
|
|
},
|
|
];
|
|
|
|
it("does not classify terminal results with visible output or side effects as fallbacks", () => {
|
|
for (const contractCase of nonFallbackCases) {
|
|
expect(
|
|
classifyEmbeddedPiRunResultForModelFallback({
|
|
provider: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryProvider,
|
|
model: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryModel,
|
|
result: contractCase.result,
|
|
hasDirectlySentBlockReply: contractCase.hasDirectlySentBlockReply,
|
|
hasBlockReplyPipelineOutput: contractCase.hasBlockReplyPipelineOutput,
|
|
}),
|
|
).toBeNull();
|
|
}
|
|
});
|
|
|
|
it("keeps running on the primary when terminal output is not classified as fallback", async () => {
|
|
const contractCase = nonFallbackCases[0];
|
|
const run = vi.fn().mockResolvedValue(contractCase.result);
|
|
const result = await runWithModelFallback({
|
|
cfg: createContractFallbackConfig() as unknown as OpenClawConfig,
|
|
provider: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryProvider,
|
|
model: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryModel,
|
|
run,
|
|
classifyResult: ({ provider, model, result }) =>
|
|
classifyEmbeddedPiRunResultForModelFallback({
|
|
provider,
|
|
model,
|
|
result,
|
|
hasDirectlySentBlockReply: contractCase.hasDirectlySentBlockReply,
|
|
hasBlockReplyPipelineOutput: contractCase.hasBlockReplyPipelineOutput,
|
|
}),
|
|
});
|
|
|
|
expect(result.result).toBe(contractCase.result);
|
|
expect(result.attempts).toEqual([]);
|
|
expect(run).toHaveBeenCalledTimes(1);
|
|
});
|
|
});
|