mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:20:43 +00:00
Repair shell command explainer automerge blockers
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import type { Parser } from "web-tree-sitter";
|
||||
import type { Node as TreeSitterNode, Parser, Tree } from "web-tree-sitter";
|
||||
import { explainShellCommand } from "./extract.js";
|
||||
import {
|
||||
getBashParserForCommandExplanation,
|
||||
@@ -15,6 +15,119 @@ function setParserLoaderForTest(loader: () => Promise<Parser>): void {
|
||||
setBashParserLoaderForCommandExplanationForTest(loader);
|
||||
}
|
||||
|
||||
type FakeNodeInit = {
|
||||
type: string;
|
||||
text: string;
|
||||
startIndex: number;
|
||||
endIndex: number;
|
||||
startPosition: TreeSitterNode["startPosition"];
|
||||
endPosition: TreeSitterNode["endPosition"];
|
||||
namedChildren?: TreeSitterNode[];
|
||||
fieldChildren?: Record<string, TreeSitterNode>;
|
||||
hasError?: boolean;
|
||||
};
|
||||
|
||||
function fakeNode(init: FakeNodeInit): TreeSitterNode {
|
||||
const named = init.namedChildren ?? [];
|
||||
const children = named;
|
||||
return {
|
||||
type: init.type,
|
||||
text: init.text,
|
||||
startIndex: init.startIndex,
|
||||
endIndex: init.endIndex,
|
||||
startPosition: init.startPosition,
|
||||
endPosition: init.endPosition,
|
||||
childCount: children.length,
|
||||
namedChildCount: named.length,
|
||||
hasError: init.hasError ?? false,
|
||||
child(index: number): TreeSitterNode | null {
|
||||
return children[index] ?? null;
|
||||
},
|
||||
namedChild(index: number): TreeSitterNode | null {
|
||||
return named[index] ?? null;
|
||||
},
|
||||
childForFieldName(name: string): TreeSitterNode | null {
|
||||
return init.fieldChildren?.[name] ?? null;
|
||||
},
|
||||
} as unknown as TreeSitterNode;
|
||||
}
|
||||
|
||||
function createByteIndexedUnicodeCommandTree(source: string): Tree {
|
||||
const firstCommand = "echo café";
|
||||
const separator = " && ";
|
||||
const secondCommand = "echo ok";
|
||||
const firstCommandEnd = Buffer.byteLength(firstCommand, "utf8");
|
||||
const secondCommandStart = Buffer.byteLength(firstCommand + separator, "utf8");
|
||||
const sourceEnd = Buffer.byteLength(source, "utf8");
|
||||
|
||||
const firstName = fakeNode({
|
||||
type: "command_name",
|
||||
text: "echo",
|
||||
startIndex: 0,
|
||||
endIndex: 4,
|
||||
startPosition: { row: 0, column: 0 },
|
||||
endPosition: { row: 0, column: 4 },
|
||||
});
|
||||
const firstArgument = fakeNode({
|
||||
type: "word",
|
||||
text: "café",
|
||||
startIndex: 5,
|
||||
endIndex: firstCommandEnd,
|
||||
startPosition: { row: 0, column: 5 },
|
||||
endPosition: { row: 0, column: firstCommandEnd },
|
||||
});
|
||||
const first = fakeNode({
|
||||
type: "command",
|
||||
text: firstCommand,
|
||||
startIndex: 0,
|
||||
endIndex: firstCommandEnd,
|
||||
startPosition: { row: 0, column: 0 },
|
||||
endPosition: { row: 0, column: firstCommandEnd },
|
||||
namedChildren: [firstName, firstArgument],
|
||||
fieldChildren: { name: firstName },
|
||||
});
|
||||
|
||||
const secondName = fakeNode({
|
||||
type: "command_name",
|
||||
text: "echo",
|
||||
startIndex: secondCommandStart,
|
||||
endIndex: secondCommandStart + 4,
|
||||
startPosition: { row: 0, column: secondCommandStart },
|
||||
endPosition: { row: 0, column: secondCommandStart + 4 },
|
||||
});
|
||||
const secondArgument = fakeNode({
|
||||
type: "word",
|
||||
text: "ok",
|
||||
startIndex: secondCommandStart + 5,
|
||||
endIndex: sourceEnd,
|
||||
startPosition: { row: 0, column: secondCommandStart + 5 },
|
||||
endPosition: { row: 0, column: sourceEnd },
|
||||
});
|
||||
const second = fakeNode({
|
||||
type: "command",
|
||||
text: secondCommand,
|
||||
startIndex: secondCommandStart,
|
||||
endIndex: sourceEnd,
|
||||
startPosition: { row: 0, column: secondCommandStart },
|
||||
endPosition: { row: 0, column: sourceEnd },
|
||||
namedChildren: [secondName, secondArgument],
|
||||
fieldChildren: { name: secondName },
|
||||
});
|
||||
|
||||
return {
|
||||
rootNode: fakeNode({
|
||||
type: "program",
|
||||
text: source,
|
||||
startIndex: 0,
|
||||
endIndex: sourceEnd,
|
||||
startPosition: { row: 0, column: 0 },
|
||||
endPosition: { row: 0, column: sourceEnd },
|
||||
namedChildren: [first, second],
|
||||
}),
|
||||
delete: vi.fn(),
|
||||
} as unknown as Tree;
|
||||
}
|
||||
|
||||
afterEach(() => {
|
||||
if (parserLoaderOverridden) {
|
||||
setBashParserLoaderForCommandExplanationForTest();
|
||||
@@ -94,6 +207,34 @@ describe("command explainer tree-sitter runtime", () => {
|
||||
expect(reset).toHaveBeenCalledOnce();
|
||||
});
|
||||
|
||||
it("maps parser byte offsets to JavaScript string spans for Unicode source", async () => {
|
||||
const source = "echo café && echo ok";
|
||||
const parser = {
|
||||
parse: vi.fn(() => createByteIndexedUnicodeCommandTree(source)),
|
||||
reset: vi.fn(),
|
||||
};
|
||||
setParserLoaderForTest(async () => parser as unknown as Parser);
|
||||
|
||||
const explanation = await explainShellCommand(source);
|
||||
|
||||
expect(explanation.topLevelCommands).toEqual([
|
||||
expect.objectContaining({
|
||||
executable: "echo",
|
||||
argv: ["echo", "café"],
|
||||
span: expect.objectContaining({ startIndex: 0, endIndex: 9 }),
|
||||
}),
|
||||
expect.objectContaining({
|
||||
executable: "echo",
|
||||
argv: ["echo", "ok"],
|
||||
span: expect.objectContaining({ startIndex: 13, endIndex: 20 }),
|
||||
}),
|
||||
]);
|
||||
for (const command of explanation.topLevelCommands) {
|
||||
expect(source.slice(command.span.startIndex, command.span.endIndex)).toBe(command.text);
|
||||
expect(command.span.endPosition.column).toBe(command.span.endIndex);
|
||||
}
|
||||
});
|
||||
|
||||
it("explains a pipeline with python inline eval", async () => {
|
||||
const explanation = await explainShellCommand('ls | grep "stuff" | python -c \'print("hi")\'');
|
||||
|
||||
@@ -566,7 +707,7 @@ describe("command explainer tree-sitter runtime", () => {
|
||||
'find . -name "*.ts" -exec grep -n TODO {} +',
|
||||
'bash -lc "echo hi | wc -c"',
|
||||
];
|
||||
const iterations = 10;
|
||||
const iterations = 3;
|
||||
for (let index = 0; index < iterations; index += 1) {
|
||||
for (const command of corpus) {
|
||||
const explanation = await explainShellCommand(command);
|
||||
|
||||
@@ -149,6 +149,92 @@ function advancePosition(
|
||||
return { row, column };
|
||||
}
|
||||
|
||||
function utf8ByteLengthForCodePoint(codePoint: number): number {
|
||||
if (codePoint <= 0x7f) {
|
||||
return 1;
|
||||
}
|
||||
if (codePoint <= 0x7ff) {
|
||||
return 2;
|
||||
}
|
||||
if (codePoint <= 0xffff) {
|
||||
return 3;
|
||||
}
|
||||
return 4;
|
||||
}
|
||||
|
||||
function utf8ByteLength(text: string): number {
|
||||
let length = 0;
|
||||
for (let index = 0; index < text.length; index += 1) {
|
||||
const codePoint = text.codePointAt(index);
|
||||
if (codePoint === undefined) {
|
||||
continue;
|
||||
}
|
||||
length += utf8ByteLengthForCodePoint(codePoint);
|
||||
if (codePoint > 0xffff) {
|
||||
index += 1;
|
||||
}
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
function utf8ByteOffsetToStringIndex(text: string, byteOffset: number): number {
|
||||
if (byteOffset <= 0) {
|
||||
return 0;
|
||||
}
|
||||
let currentByteOffset = 0;
|
||||
for (let index = 0; index < text.length; index += 1) {
|
||||
const codePoint = text.codePointAt(index);
|
||||
if (codePoint === undefined) {
|
||||
return text.length;
|
||||
}
|
||||
const codePointLength = utf8ByteLengthForCodePoint(codePoint);
|
||||
if (currentByteOffset + codePointLength > byteOffset) {
|
||||
return index;
|
||||
}
|
||||
currentByteOffset += codePointLength;
|
||||
if (currentByteOffset === byteOffset) {
|
||||
return codePoint > 0xffff ? index + 2 : index + 1;
|
||||
}
|
||||
if (codePoint > 0xffff) {
|
||||
index += 1;
|
||||
}
|
||||
}
|
||||
return text.length;
|
||||
}
|
||||
|
||||
function parserOffsetToStringIndex(
|
||||
source: string,
|
||||
rootNode: TreeSitterNode,
|
||||
): (offset: number) => number {
|
||||
const utf8Length = utf8ByteLength(source);
|
||||
if (utf8Length !== source.length && rootNode.endIndex === utf8Length) {
|
||||
return (offset) => utf8ByteOffsetToStringIndex(source, offset);
|
||||
}
|
||||
return (offset) => offset;
|
||||
}
|
||||
|
||||
function spanBaseForParserSource(
|
||||
source: string,
|
||||
rootNode: TreeSitterNode,
|
||||
base: SpanBase,
|
||||
): SpanBase {
|
||||
const offsetToStringIndex = parserOffsetToStringIndex(source, rootNode);
|
||||
return {
|
||||
startIndex: base.startIndex,
|
||||
startPosition: base.startPosition,
|
||||
mapOffset(offset) {
|
||||
const sourceIndex = offsetToStringIndex(offset);
|
||||
if (base.mapOffset) {
|
||||
return base.mapOffset(sourceIndex);
|
||||
}
|
||||
return {
|
||||
index: base.startIndex + sourceIndex,
|
||||
position: advancePosition(base.startPosition, source.slice(0, sourceIndex)),
|
||||
};
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function valuePrefixLength(node: TreeSitterNode): number {
|
||||
if (node.type === "string" || node.type === "raw_string") {
|
||||
return 1;
|
||||
@@ -1051,18 +1137,23 @@ async function walk(
|
||||
);
|
||||
if (wrapperPayload && state.wrapperPayloadDepth < MAX_WRAPPER_PAYLOAD_DEPTH) {
|
||||
const wrapperTree = await parseBashForCommandExplanation(wrapperPayload.command);
|
||||
const wrapperSpanBase = spanBaseForParserSource(
|
||||
wrapperPayload.command,
|
||||
wrapperTree.rootNode,
|
||||
wrapperPayload.spanBase,
|
||||
);
|
||||
try {
|
||||
if (wrapperTree.rootNode.hasError) {
|
||||
output.hasParseError = true;
|
||||
output.risks.push({
|
||||
kind: "syntax-error",
|
||||
text: wrapperPayload.command,
|
||||
span: spanFromNode(wrapperTree.rootNode, wrapperPayload.spanBase),
|
||||
span: spanFromNode(wrapperTree.rootNode, wrapperSpanBase),
|
||||
});
|
||||
}
|
||||
await walk(wrapperTree.rootNode, output, "wrapper-payload", {
|
||||
wrapperPayloadDepth: state.wrapperPayloadDepth + 1,
|
||||
spanBase: wrapperPayload.spanBase,
|
||||
spanBase: wrapperSpanBase,
|
||||
});
|
||||
} finally {
|
||||
wrapperTree.delete();
|
||||
@@ -1079,6 +1170,7 @@ async function walk(
|
||||
export async function explainShellCommand(source: string): Promise<CommandExplanation> {
|
||||
const tree = await parseBashForCommandExplanation(source);
|
||||
try {
|
||||
const spanBase = spanBaseForParserSource(source, tree.rootNode, ROOT_SPAN_BASE);
|
||||
const output: MutableExplanation = {
|
||||
shapes: new Set(),
|
||||
commands: [],
|
||||
@@ -1087,7 +1179,7 @@ export async function explainShellCommand(source: string): Promise<CommandExplan
|
||||
};
|
||||
await walk(tree.rootNode, output, "top-level", {
|
||||
wrapperPayloadDepth: 0,
|
||||
spanBase: ROOT_SPAN_BASE,
|
||||
spanBase,
|
||||
});
|
||||
const topLevelCommands = output.commands.filter((command) => command.context === "top-level");
|
||||
return {
|
||||
|
||||
Reference in New Issue
Block a user