Repair shell command explainer automerge blockers

This commit is contained in:
clawsweeper
2026-05-03 05:51:35 +00:00
parent 10249013d3
commit 47577579e9
2 changed files with 238 additions and 5 deletions

View File

@@ -1,5 +1,5 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import type { Parser } from "web-tree-sitter";
import type { Node as TreeSitterNode, Parser, Tree } from "web-tree-sitter";
import { explainShellCommand } from "./extract.js";
import {
getBashParserForCommandExplanation,
@@ -15,6 +15,119 @@ function setParserLoaderForTest(loader: () => Promise<Parser>): void {
setBashParserLoaderForCommandExplanationForTest(loader);
}
type FakeNodeInit = {
type: string;
text: string;
startIndex: number;
endIndex: number;
startPosition: TreeSitterNode["startPosition"];
endPosition: TreeSitterNode["endPosition"];
namedChildren?: TreeSitterNode[];
fieldChildren?: Record<string, TreeSitterNode>;
hasError?: boolean;
};
function fakeNode(init: FakeNodeInit): TreeSitterNode {
const named = init.namedChildren ?? [];
const children = named;
return {
type: init.type,
text: init.text,
startIndex: init.startIndex,
endIndex: init.endIndex,
startPosition: init.startPosition,
endPosition: init.endPosition,
childCount: children.length,
namedChildCount: named.length,
hasError: init.hasError ?? false,
child(index: number): TreeSitterNode | null {
return children[index] ?? null;
},
namedChild(index: number): TreeSitterNode | null {
return named[index] ?? null;
},
childForFieldName(name: string): TreeSitterNode | null {
return init.fieldChildren?.[name] ?? null;
},
} as unknown as TreeSitterNode;
}
function createByteIndexedUnicodeCommandTree(source: string): Tree {
const firstCommand = "echo café";
const separator = " && ";
const secondCommand = "echo ok";
const firstCommandEnd = Buffer.byteLength(firstCommand, "utf8");
const secondCommandStart = Buffer.byteLength(firstCommand + separator, "utf8");
const sourceEnd = Buffer.byteLength(source, "utf8");
const firstName = fakeNode({
type: "command_name",
text: "echo",
startIndex: 0,
endIndex: 4,
startPosition: { row: 0, column: 0 },
endPosition: { row: 0, column: 4 },
});
const firstArgument = fakeNode({
type: "word",
text: "café",
startIndex: 5,
endIndex: firstCommandEnd,
startPosition: { row: 0, column: 5 },
endPosition: { row: 0, column: firstCommandEnd },
});
const first = fakeNode({
type: "command",
text: firstCommand,
startIndex: 0,
endIndex: firstCommandEnd,
startPosition: { row: 0, column: 0 },
endPosition: { row: 0, column: firstCommandEnd },
namedChildren: [firstName, firstArgument],
fieldChildren: { name: firstName },
});
const secondName = fakeNode({
type: "command_name",
text: "echo",
startIndex: secondCommandStart,
endIndex: secondCommandStart + 4,
startPosition: { row: 0, column: secondCommandStart },
endPosition: { row: 0, column: secondCommandStart + 4 },
});
const secondArgument = fakeNode({
type: "word",
text: "ok",
startIndex: secondCommandStart + 5,
endIndex: sourceEnd,
startPosition: { row: 0, column: secondCommandStart + 5 },
endPosition: { row: 0, column: sourceEnd },
});
const second = fakeNode({
type: "command",
text: secondCommand,
startIndex: secondCommandStart,
endIndex: sourceEnd,
startPosition: { row: 0, column: secondCommandStart },
endPosition: { row: 0, column: sourceEnd },
namedChildren: [secondName, secondArgument],
fieldChildren: { name: secondName },
});
return {
rootNode: fakeNode({
type: "program",
text: source,
startIndex: 0,
endIndex: sourceEnd,
startPosition: { row: 0, column: 0 },
endPosition: { row: 0, column: sourceEnd },
namedChildren: [first, second],
}),
delete: vi.fn(),
} as unknown as Tree;
}
afterEach(() => {
if (parserLoaderOverridden) {
setBashParserLoaderForCommandExplanationForTest();
@@ -94,6 +207,34 @@ describe("command explainer tree-sitter runtime", () => {
expect(reset).toHaveBeenCalledOnce();
});
it("maps parser byte offsets to JavaScript string spans for Unicode source", async () => {
const source = "echo café && echo ok";
const parser = {
parse: vi.fn(() => createByteIndexedUnicodeCommandTree(source)),
reset: vi.fn(),
};
setParserLoaderForTest(async () => parser as unknown as Parser);
const explanation = await explainShellCommand(source);
expect(explanation.topLevelCommands).toEqual([
expect.objectContaining({
executable: "echo",
argv: ["echo", "café"],
span: expect.objectContaining({ startIndex: 0, endIndex: 9 }),
}),
expect.objectContaining({
executable: "echo",
argv: ["echo", "ok"],
span: expect.objectContaining({ startIndex: 13, endIndex: 20 }),
}),
]);
for (const command of explanation.topLevelCommands) {
expect(source.slice(command.span.startIndex, command.span.endIndex)).toBe(command.text);
expect(command.span.endPosition.column).toBe(command.span.endIndex);
}
});
it("explains a pipeline with python inline eval", async () => {
const explanation = await explainShellCommand('ls | grep "stuff" | python -c \'print("hi")\'');
@@ -566,7 +707,7 @@ describe("command explainer tree-sitter runtime", () => {
'find . -name "*.ts" -exec grep -n TODO {} +',
'bash -lc "echo hi | wc -c"',
];
const iterations = 10;
const iterations = 3;
for (let index = 0; index < iterations; index += 1) {
for (const command of corpus) {
const explanation = await explainShellCommand(command);

View File

@@ -149,6 +149,92 @@ function advancePosition(
return { row, column };
}
function utf8ByteLengthForCodePoint(codePoint: number): number {
if (codePoint <= 0x7f) {
return 1;
}
if (codePoint <= 0x7ff) {
return 2;
}
if (codePoint <= 0xffff) {
return 3;
}
return 4;
}
function utf8ByteLength(text: string): number {
let length = 0;
for (let index = 0; index < text.length; index += 1) {
const codePoint = text.codePointAt(index);
if (codePoint === undefined) {
continue;
}
length += utf8ByteLengthForCodePoint(codePoint);
if (codePoint > 0xffff) {
index += 1;
}
}
return length;
}
function utf8ByteOffsetToStringIndex(text: string, byteOffset: number): number {
if (byteOffset <= 0) {
return 0;
}
let currentByteOffset = 0;
for (let index = 0; index < text.length; index += 1) {
const codePoint = text.codePointAt(index);
if (codePoint === undefined) {
return text.length;
}
const codePointLength = utf8ByteLengthForCodePoint(codePoint);
if (currentByteOffset + codePointLength > byteOffset) {
return index;
}
currentByteOffset += codePointLength;
if (currentByteOffset === byteOffset) {
return codePoint > 0xffff ? index + 2 : index + 1;
}
if (codePoint > 0xffff) {
index += 1;
}
}
return text.length;
}
function parserOffsetToStringIndex(
source: string,
rootNode: TreeSitterNode,
): (offset: number) => number {
const utf8Length = utf8ByteLength(source);
if (utf8Length !== source.length && rootNode.endIndex === utf8Length) {
return (offset) => utf8ByteOffsetToStringIndex(source, offset);
}
return (offset) => offset;
}
function spanBaseForParserSource(
source: string,
rootNode: TreeSitterNode,
base: SpanBase,
): SpanBase {
const offsetToStringIndex = parserOffsetToStringIndex(source, rootNode);
return {
startIndex: base.startIndex,
startPosition: base.startPosition,
mapOffset(offset) {
const sourceIndex = offsetToStringIndex(offset);
if (base.mapOffset) {
return base.mapOffset(sourceIndex);
}
return {
index: base.startIndex + sourceIndex,
position: advancePosition(base.startPosition, source.slice(0, sourceIndex)),
};
},
};
}
function valuePrefixLength(node: TreeSitterNode): number {
if (node.type === "string" || node.type === "raw_string") {
return 1;
@@ -1051,18 +1137,23 @@ async function walk(
);
if (wrapperPayload && state.wrapperPayloadDepth < MAX_WRAPPER_PAYLOAD_DEPTH) {
const wrapperTree = await parseBashForCommandExplanation(wrapperPayload.command);
const wrapperSpanBase = spanBaseForParserSource(
wrapperPayload.command,
wrapperTree.rootNode,
wrapperPayload.spanBase,
);
try {
if (wrapperTree.rootNode.hasError) {
output.hasParseError = true;
output.risks.push({
kind: "syntax-error",
text: wrapperPayload.command,
span: spanFromNode(wrapperTree.rootNode, wrapperPayload.spanBase),
span: spanFromNode(wrapperTree.rootNode, wrapperSpanBase),
});
}
await walk(wrapperTree.rootNode, output, "wrapper-payload", {
wrapperPayloadDepth: state.wrapperPayloadDepth + 1,
spanBase: wrapperPayload.spanBase,
spanBase: wrapperSpanBase,
});
} finally {
wrapperTree.delete();
@@ -1079,6 +1170,7 @@ async function walk(
export async function explainShellCommand(source: string): Promise<CommandExplanation> {
const tree = await parseBashForCommandExplanation(source);
try {
const spanBase = spanBaseForParserSource(source, tree.rootNode, ROOT_SPAN_BASE);
const output: MutableExplanation = {
shapes: new Set(),
commands: [],
@@ -1087,7 +1179,7 @@ export async function explainShellCommand(source: string): Promise<CommandExplan
};
await walk(tree.rootNode, output, "top-level", {
wrapperPayloadDepth: 0,
spanBase: ROOT_SPAN_BASE,
spanBase,
});
const topLevelCommands = output.commands.filter((command) => command.context === "top-level");
return {