fix: guard Anthropic Messages max tokens (#66664)

* Docs: add Anthropic max_tokens investigation memo Regeneration-Prompt: | Investigate the reported OpenClaw cron isolated-agent failure where an Anthropic Haiku run returned "max_tokens: must be greater than or equal to 1". Do not implement a fix yet. Inspect the cron isolated-agent execution path, the embedded runner, extra param plumbing, Anthropic transport code, and any model-selection or token-budget logic that could synthesize maxTokens = 0. Produce a concise maintainer memo with concrete file references, explain why cron itself is not the component setting maxTokens, identify the most likely root cause, describe the smallest repro shape, and recommend the cleanest fix. * openclaw-e82: guard Anthropic Messages maxTokens Regeneration-Prompt: | Fix the Anthropic Messages path so OpenClaw never sends max_tokens <= 0 to Anthropic. Match the positive-number guard already used by the Anthropic Vertex transport, but keep the change scoped: validate token limits in src/agents/anthropic-transport-stream.ts where transport options are resolved and where the final payload is assembled, fall back to the model limit when a runtime override is zero, fail locally when no positive token budget exists, and drop non-positive maxTokens from src/agents/pi-embedded-runner/extra-params.ts so hidden config params do not leak through. Add focused regression coverage for both the transport and extra-param forwarding path, and remove the earlier investigation memo from the branch so the PR diff only contains the fix. * fix: scope Anthropic max token guard * fix: document Anthropic max token guard * fix: floor Anthropic max token overrides
2026-05-06 08:00:42 +00:00 · 2026-04-14 15:05:04 -07:00
parent 9b25c8f8e1
commit ef3ac6a58e
4 changed files with 208 additions and 4 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -31,6 +31,7 @@ Docs: https://docs.openclaw.ai
 - Auto-reply/billing: classify pure billing cooldown fallback summaries from structured fallback reasons so users see billing guidance instead of the generic failure reply. (#66363) Thanks @Rohan5commit.
 - Agents/fallback: preserve the original prompt body on model fallback retries with session history so the retrying model keeps the active task instead of only seeing a generic continue message. (#66029) Thanks @WuKongAI-CMU.
 - Reply/secrets: resolve active reply channel/account SecretRefs before reply-run message-action discovery so channel token SecretRefs (for example Discord) do not degrade into discovery-time unresolved-secret failures. (#66796) Thanks @joshavant.
+- Agents/Anthropic: ignore non-positive Anthropic Messages token overrides and fail locally when no positive token budget remains, so invalid `max_tokens` values no longer reach the provider API. (#66664) thanks @jalehman

 ## 2026.4.14

--- a/src/agents/anthropic-transport-stream.test.ts
+++ b/src/agents/anthropic-transport-stream.test.ts
@@ -111,6 +111,141 @@ describe("anthropic transport stream", () => {
    );
  });

+  it("ignores non-positive runtime maxTokens overrides and falls back to the model limit", async () => {
+    const model = attachModelProviderRequestTransport(
+      {
+        id: "claude-sonnet-4-6",
+        name: "Claude Sonnet 4.6",
+        api: "anthropic-messages",
+        provider: "anthropic",
+        baseUrl: "https://api.anthropic.com",
+        reasoning: true,
+        input: ["text"],
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+        contextWindow: 200000,
+        maxTokens: 8192,
+      } satisfies Model<"anthropic-messages">,
+      {
+        proxy: {
+          mode: "env-proxy",
+        },
+      },
+    );
+    const streamFn = createAnthropicMessagesTransportStreamFn();
+
+    const stream = await Promise.resolve(
+      streamFn(
+        model,
+        {
+          messages: [{ role: "user", content: "hello" }],
+        } as Parameters<typeof streamFn>[1],
+        {
+          apiKey: "sk-ant-api",
+          maxTokens: 0,
+        } as Parameters<typeof streamFn>[2],
+      ),
+    );
+    await stream.result();
+
+    expect(anthropicMessagesStreamMock).toHaveBeenCalledWith(
+      expect.objectContaining({
+        model: "claude-sonnet-4-6",
+        max_tokens: 8192,
+        stream: true,
+      }),
+      undefined,
+    );
+  });
+
+  it("ignores fractional runtime maxTokens overrides that floor to zero", async () => {
+    const model = attachModelProviderRequestTransport(
+      {
+        id: "claude-sonnet-4-6",
+        name: "Claude Sonnet 4.6",
+        api: "anthropic-messages",
+        provider: "anthropic",
+        baseUrl: "https://api.anthropic.com",
+        reasoning: true,
+        input: ["text"],
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+        contextWindow: 200000,
+        maxTokens: 8192,
+      } satisfies Model<"anthropic-messages">,
+      {
+        proxy: {
+          mode: "env-proxy",
+        },
+      },
+    );
+    const streamFn = createAnthropicMessagesTransportStreamFn();
+
+    const stream = await Promise.resolve(
+      streamFn(
+        model,
+        {
+          messages: [{ role: "user", content: "hello" }],
+        } as Parameters<typeof streamFn>[1],
+        {
+          apiKey: "sk-ant-api",
+          maxTokens: 0.5,
+        } as Parameters<typeof streamFn>[2],
+      ),
+    );
+    await stream.result();
+
+    expect(anthropicMessagesStreamMock).toHaveBeenCalledWith(
+      expect.objectContaining({
+        model: "claude-sonnet-4-6",
+        max_tokens: 8192,
+        stream: true,
+      }),
+      undefined,
+    );
+  });
+
+  it("fails locally when Anthropic maxTokens is non-positive after resolution", async () => {
+    const model = attachModelProviderRequestTransport(
+      {
+        id: "claude-haiku-4-5",
+        name: "Claude Haiku 4.5",
+        api: "anthropic-messages",
+        provider: "anthropic",
+        baseUrl: "https://api.anthropic.com",
+        reasoning: false,
+        input: ["text"],
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+        contextWindow: 32000,
+        maxTokens: 0,
+      } satisfies Model<"anthropic-messages">,
+      {
+        proxy: {
+          mode: "env-proxy",
+        },
+      },
+    );
+    const streamFn = createAnthropicMessagesTransportStreamFn();
+
+    const stream = await Promise.resolve(
+      streamFn(
+        model,
+        {
+          messages: [{ role: "user", content: "hello" }],
+        } as Parameters<typeof streamFn>[1],
+        {
+          apiKey: "sk-ant-api",
+        } as Parameters<typeof streamFn>[2],
+      ),
+    );
+
+    const result = await stream.result();
+
+    expect(result.stopReason).toBe("error");
+    expect(result.errorMessage).toContain(
+      "Anthropic Messages transport requires a positive maxTokens value",
+    );
+    expect(anthropicMessagesStreamMock).not.toHaveBeenCalled();
+  });
+
  it("preserves Anthropic OAuth identity and tool-name remapping with transport overrides", async () => {
    anthropicMessagesStreamMock.mockReturnValueOnce(
      (async function* () {
--- a/src/agents/anthropic-transport-stream.ts
+++ b/src/agents/anthropic-transport-stream.ts
@@ -128,6 +128,26 @@ function clampReasoningLevel(level: ThinkingLevel): "minimal" | "low" | "medium"
  return level === "xhigh" ? "high" : level;
 }

+function resolvePositiveAnthropicMaxTokens(value: unknown): number | undefined {
+  if (typeof value !== "number" || !Number.isFinite(value)) {
+    return undefined;
+  }
+  const floored = Math.floor(value);
+  return floored > 0 ? floored : undefined;
+}
+
+function resolveAnthropicMessagesMaxTokens(params: {
+  modelMaxTokens: number | undefined;
+  requestedMaxTokens: number | undefined;
+}): number | undefined {
+  const requested = resolvePositiveAnthropicMaxTokens(params.requestedMaxTokens);
+  if (requested !== undefined) {
+    return requested;
+  }
+  const modelMax = resolvePositiveAnthropicMaxTokens(params.modelMaxTokens);
+  return modelMax !== undefined ? Math.min(modelMax, 32_000) : undefined;
+}
+
 function adjustMaxTokensForThinking(params: {
  baseMaxTokens: number;
  modelMaxTokens: number;
@@ -479,6 +499,15 @@ function buildAnthropicParams(
  isOAuthToken: boolean,
  options: AnthropicTransportOptions | undefined,
 ) {
+  const maxTokens = resolveAnthropicMessagesMaxTokens({
+    modelMaxTokens: model.maxTokens,
+    requestedMaxTokens: options?.maxTokens,
+  });
+  if (maxTokens === undefined) {
+    throw new Error(
+      `Anthropic Messages transport requires a positive maxTokens value for ${model.provider}/${model.id}`,
+    );
+  }
  const payloadPolicy = resolveAnthropicPayloadPolicy({
    provider: model.provider,
    api: model.api,
@@ -486,11 +515,10 @@ function buildAnthropicParams(
    cacheRetention: options?.cacheRetention,
    enableCacheControl: true,
  });
-  const defaultMaxTokens = Math.min(model.maxTokens, 32_000);
  const params: Record<string, unknown> = {
    model: model.id,
    messages: convertAnthropicMessages(context.messages, model, isOAuthToken),
-    max_tokens: options?.maxTokens || defaultMaxTokens,
+    max_tokens: maxTokens,
    stream: true,
  };
  if (isOAuthToken) {
@@ -555,7 +583,17 @@ function resolveAnthropicTransportOptions(
  options: AnthropicTransportOptions | undefined,
  apiKey: string,
 ): AnthropicTransportOptions {
-  const baseMaxTokens = options?.maxTokens || Math.min(model.maxTokens, 32_000);
+  const baseMaxTokens = resolveAnthropicMessagesMaxTokens({
+    modelMaxTokens: model.maxTokens,
+    requestedMaxTokens: options?.maxTokens,
+  });
+  if (baseMaxTokens === undefined) {
+    throw new Error(
+      `Anthropic Messages transport requires a positive maxTokens value for ${model.provider}/${model.id}`,
+    );
+  }
+  const reasoningModelMaxTokens =
+    resolvePositiveAnthropicMaxTokens(model.maxTokens) ?? baseMaxTokens;
  const resolved: AnthropicTransportOptions = {
    temperature: options?.temperature,
    maxTokens: baseMaxTokens,
@@ -583,7 +621,7 @@ function resolveAnthropicTransportOptions(
  }
  const adjusted = adjustMaxTokensForThinking({
    baseMaxTokens,
-    modelMaxTokens: model.maxTokens,
+    modelMaxTokens: reasoningModelMaxTokens,
    reasoningLevel: options.reasoning,
    customBudgets: options.thinkingBudgets,
  });
--- a/src/agents/pi-embedded-runner-extraparams.test.ts
+++ b/src/agents/pi-embedded-runner-extraparams.test.ts
@@ -1317,6 +1317,36 @@ describe("applyExtraParamsToAgent", () => {
    expect(calls[0]?.transport).toBe("websocket");
  });

+  it("preserves maxTokens: 0 in shared extra params for providers that forward it", () => {
+    const { calls, agent } = createOptionsCaptureAgent();
+    const cfg = {
+      agents: {
+        defaults: {
+          models: {
+            "openai/gpt-5": {
+              params: {
+                maxTokens: 0,
+              },
+            },
+          },
+        },
+      },
+    };
+
+    applyExtraParamsToAgent(agent, cfg, "openai", "gpt-5");
+
+    const model = {
+      api: "openai-responses",
+      provider: "openai",
+      id: "gpt-5",
+    } as Model<"openai-responses">;
+    const context: Context = { messages: [] };
+    void agent.streamFn?.(model, context, {});
+
+    expect(calls).toHaveLength(1);
+    expect(calls[0]?.maxTokens).toBe(0);
+  });
+
  it("defaults Codex transport to auto (WebSocket-first)", () => {
    const { calls, agent } = createOptionsCaptureAgent();