fix: evaluate shell wrapper inline commands against allowlist (#57377) (#57584)

When a skill constructs a compound command via a shell wrapper
(e.g. `sh -c "cat SKILL.md && gog-wrapper calendar events"`),
the allowlist check was comparing `/bin/sh` instead of the actual
target binaries, causing the entire command to be silently rejected.

This adds recursive inline command evaluation that:
- Detects chain operators (&&, ||, ;) in the -c payload
- Parses each sub-command independently via analyzeShellCommand
- Evaluates every sub-command against the allowlist
- Preserves per-sub-command segmentSatisfiedBy for accurate tracking
- Limits recursion depth to 3 to prevent abuse
- Skips recursion on Windows (no POSIX shell semantics)

Closes #57377

Co-authored-by: WZBbiao <wangzhenbiao326@gmail.com>
This commit is contained in:
biao
2026-04-03 00:06:40 +08:00
committed by GitHub
parent 578a0ed31a
commit 8d81e76f23
3 changed files with 253 additions and 3 deletions

View File

@@ -182,6 +182,11 @@ export function handleMessageStart(
// may deliver late text_end updates after message_end, which would otherwise
// re-trigger block replies.
ctx.resetAssistantMessageState(ctx.state.assistantTexts.length);
// Resolve text-repetition-guard config once per message to avoid re-parsing on every tick.
ctx.state.resolvedTextRepetitionGuardConfig = resolveTextRepetitionGuardConfig({
cfg: ctx.params.config,
agentId: ctx.params.agentId,
});
// Use assistant message_start as the earliest "writing" signal for typing.
void ctx.params.onAssistantMessageStart?.();
}
@@ -282,6 +287,21 @@ export function handleMessageUpdate(
}
}
// Text repetition guard: throttle checks to every N chars of new content.
const guardConfig = ctx.state.resolvedTextRepetitionGuardConfig;
if (guardConfig && guardConfig.enabled) {
const checkInterval = guardConfig.checkIntervalChars;
if (ctx.state.deltaBuffer.length - ctx.state.textRepetitionLastCheckedLen >= checkInterval) {
ctx.state.textRepetitionLastCheckedLen = ctx.state.deltaBuffer.length;
const result = detectTextRepetition(ctx.state.deltaBuffer, guardConfig);
if (result.looping) {
ctx.state.abortedByTextRepetitionGuard = true;
void ctx.params.session.abort();
return;
}
}
}
if (ctx.state.streamReasoning) {
// Handle partial <think> tags: stream whatever reasoning is visible so far.
ctx.emitReasoningStream(extractThinkingFromTaggedStream(ctx.state.deltaBuffer));
@@ -518,10 +538,10 @@ export function handleMessageEnd(
text &&
onBlockReply
) {
if (ctx.blockChunker?.hasBuffered()) {
if (ctx.blockChunker?.hasBuffered() && !ctx.state.abortedByTextRepetitionGuard) {
ctx.blockChunker.drain({ force: true, emit: ctx.emitBlockChunk });
ctx.blockChunker.reset();
} else if (text !== ctx.state.lastBlockReplyText) {
} else if (text !== ctx.state.lastBlockReplyText && !ctx.state.abortedByTextRepetitionGuard) {
// Check for duplicates before emitting (same logic as emitBlockChunk).
const normalizedText = normalizeTextForComparison(text);
if (