fix(ollama): reject garbled Kimi symbol output

This commit is contained in:
Peter Steinberger
2026-04-28 11:20:08 +01:00
parent 77192572f6
commit 1b13f53047
4 changed files with 130 additions and 0 deletions

View File

@@ -55,6 +55,7 @@ Docs: https://docs.openclaw.ai
- CLI/channels: list configured chat channel accounts from read-only setup metadata even when the standalone CLI has not loaded the runtime channel registry, so `openclaw channels list` shows Telegram accounts before auth providers. Fixes #73319 and #73322. Thanks @mlaihk.
- CLI/model probes: keep `infer model run --gateway` raw by skipping prior session transcript, bootstrap context, context-engine assembly, tools, and bundled MCP servers, so local backends can be tested without full agent-context overhead. Fixes #73308. Thanks @ScientificProgrammer.
- CLI/image describe: pass `--prompt` and `--timeout-ms` through `infer image describe` and `describe-many`, so custom vision instructions and slow local model budgets reach media-understanding providers such as Ollama, OpenAI, Google, and OpenRouter. Addresses #63700. Thanks @cedricjanssens.
- Providers/Ollama: reject long non-linguistic Kimi/GLM symbol runs as provider failures instead of storing them as successful visible assistant replies, so fallback or error handling can recover from garbled cloud output. Fixes #64262; refs #67019. Thanks @Kloz813 and @xiaomenger123.
- CLI/model probes: reject empty or whitespace-only `infer model run --prompt` values before calling local providers or the Gateway, so smoke checks do not spend provider calls on invalid turns. Fixes #73185. Thanks @iot2edge.
- Gateway/media: route text-only `chat.send` image offloads through media-understanding fields so `agents.defaults.imageModel` can describe WebChat attachments instead of leaving only an opaque `media://inbound` marker. Fixes #72968. Thanks @vorajeeah.
- Gateway/Windows: route no-listener restart handoffs through the Windows supervisor without leaving restart tokens in flight, so failed task scheduling can be retried and successful handoffs do not coalesce later restart requests. (#69056) Thanks @Thatgfsj.

View File

@@ -1062,6 +1062,18 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
</Accordion>
<Accordion title="Kimi or GLM returns garbled symbols">
Hosted Kimi/GLM responses that are long, non-linguistic symbol runs are treated as failed provider output instead of a successful assistant answer. That lets normal retry, fallback, or error handling take over without persisting the corrupted text into the session.
If it happens repeatedly, capture the raw model name, the current session file, and whether the run used `Cloud + Local` or `Cloud only`, then try a fresh session and a fallback model:
```bash
openclaw infer model run --model ollama/kimi-k2.5:cloud --prompt "Reply with exactly: ok" --json
openclaw models set ollama/gemma4
```
</Accordion>
<Accordion title="Cold local model times out">
Large local models can need a long first load before streaming begins. Keep the timeout scoped to the Ollama provider, and optionally ask Ollama to keep the model loaded between turns:

View File

@@ -1297,6 +1297,69 @@ describe("createOllamaStreamFn streaming events", () => {
);
});
it("emits an error instead of accepting garbled Kimi visible text", async () => {
const garbled =
'$$"##"%#"##"####""$""""##""$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$' +
'#"$"$"""$""""#$"""$"""%"%###"""#%""""&"#"""$"""#"#""""%#""""&"#"""$"""$"""#%"""';
await withMockNdjsonFetch(
[
JSON.stringify({
model: "kimi-k2.5:cloud",
created_at: "t",
message: { role: "assistant", content: garbled },
done: false,
}),
'{"model":"kimi-k2.5:cloud","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":20,"eval_count":40}',
],
async () => {
const stream = await createOllamaTestStream({
baseUrl: "http://ollama-host:11434",
model: { id: "kimi-k2.5:cloud", provider: "ollama" },
});
const events = await collectStreamEvents(stream);
const types = events.map((e) => e.type);
expect(types).toEqual(["start", "text_start", "text_delta", "error"]);
const errorEvent = events.at(-1);
expect(errorEvent).toMatchObject({
type: "error",
error: expect.objectContaining({
errorMessage: expect.stringContaining("garbled visible text"),
}),
});
},
);
});
it("does not reject punctuation-heavy text from unrelated Ollama models", async () => {
const punctuationHeavy =
'$$"##"%#"##"####""$""""##""$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$' +
'#"$"$"""$""""#$"""$"""%"%###"""#%""""&"#"""$"""#"#""""%#""""&"#"""$"""$"""#%"""';
await withMockNdjsonFetch(
[
JSON.stringify({
model: "qwen3:32b",
created_at: "t",
message: { role: "assistant", content: punctuationHeavy },
done: false,
}),
'{"model":"qwen3:32b","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":20,"eval_count":40}',
],
async () => {
const stream = await createOllamaTestStream({ baseUrl: "http://ollama-host:11434" });
const events = await collectStreamEvents(stream);
expect(events.map((e) => e.type)).toEqual([
"start",
"text_start",
"text_delta",
"text_end",
"done",
]);
},
);
});
it("emits a single text_delta for single-chunk responses", async () => {
await withMockNdjsonFetch(
[

View File

@@ -41,6 +41,54 @@ const log = createSubsystemLogger("ollama-stream");
export const OLLAMA_NATIVE_BASE_URL = OLLAMA_DEFAULT_BASE_URL;
const GARBLED_VISIBLE_TEXT_MODEL_RE = /\b(?:glm|kimi)\b/i;
const GARBLED_VISIBLE_TEXT_MIN_CHARS = 80;
const GARBLED_VISIBLE_TEXT_SYMBOL_RE = /[$#%&="'_~`^|\\/*+\-[\]{}()<>:;,.!?]/gu;
const LETTER_OR_DIGIT_RE = /[\p{L}\p{N}]/gu;
function countMatches(text: string, re: RegExp): number {
re.lastIndex = 0;
return Array.from(text.matchAll(re)).length;
}
function maxCharacterFrequency(text: string): number {
const counts = new Map<string, number>();
let max = 0;
for (const char of text) {
const count = (counts.get(char) ?? 0) + 1;
counts.set(char, count);
max = Math.max(max, count);
}
return max;
}
function isKnownOllamaGarbledVisibleTextModel(modelId: string): boolean {
return GARBLED_VISIBLE_TEXT_MODEL_RE.test(modelId);
}
function isLikelyGarbledVisibleText(params: { text: string; modelId: string }): boolean {
if (!isKnownOllamaGarbledVisibleTextModel(params.modelId)) {
return false;
}
const compact = params.text.replace(/\s+/g, "");
if (compact.length < GARBLED_VISIBLE_TEXT_MIN_CHARS) {
return false;
}
const letterOrDigitCount = countMatches(compact, LETTER_OR_DIGIT_RE);
const symbolCount = countMatches(compact, GARBLED_VISIBLE_TEXT_SYMBOL_RE);
const maxFrequency = maxCharacterFrequency(compact);
const letterOrDigitRatio = letterOrDigitCount / compact.length;
const symbolRatio = symbolCount / compact.length;
const dominantCharacterRatio = maxFrequency / compact.length;
return (
letterOrDigitRatio < 0.08 &&
symbolRatio > 0.6 &&
(dominantCharacterRatio > 0.22 || /[$#%&="'_~`^|\\/*+\-[\]{}()<>:;,.!?]{12,}/u.test(compact))
);
}
export function resolveOllamaBaseUrlForRun(params: {
modelBaseUrl?: string;
providerBaseUrl?: string;
@@ -1129,6 +1177,12 @@ export function createOllamaStreamFn(
throw new Error("Ollama API stream ended without a final response");
}
if (isLikelyGarbledVisibleText({ text: accumulatedContent, modelId: model.id })) {
throw new Error(
`Ollama returned non-linguistic garbled visible text for ${model.id}; retry or switch models`,
);
}
finalResponse.message.content = accumulatedContent;
if (accumulatedThinking) {
finalResponse.message.thinking = accumulatedThinking;