From 71c855f24420147c584ab583f0b50f48bb6ce5f4 Mon Sep 17 00:00:00 2001 From: Gio Della-Libera Date: Fri, 8 May 2026 18:40:57 -0700 Subject: [PATCH] refactor(oc-path): consolidate dispatch and trim comment surface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Substrate simplifications and broad comment cleanup: **Walker collapse (find.ts)**: three near-parallel walkers (walkJsonc, walkJsonl + walkJsonlInsideLine, walkMd + walkMdInsideBlock + walkMdInsideItem) shared the same segment-shape dispatch — union / predicate / `*` / `**` / positional / literal — with different child types. Extracted as a single `dispatchSeg(ops, ...)` that takes a per-kind `WalkOps` table; each walker is a thin wrapper. The three md walkers fold into one `walkMd(level, ...)` polymorphic on a `MdLevel` discriminated union. JSONL routes the post-line-slot descent through walkJsonc via a WeakMap-tagged ast holder. **CLI (cli.ts)**: each command duplicated four times the same try/catch/emit/exit dance — missing-arg check, parseOcPath try/catch, OcEmitSentinelError catch. Extracted as `requireArg`, `tryParse`, `catchSentinel`. Folded `RawPathOptions` into `PathCommandOptions` (identical shape) and collapsed `.option(...)` chains via `withCommonOpts`. **universal.ts**: setJsoncLeaf and setJsonlLeaf were near-identical (resolve, refuse root, coerce, set, wrap). Extracted as `setStructuredLeaf` with optional `onLine` for jsonl's whole-line replacement. Inlined setMdLeaf (7-line passthrough) into setOcPath. Dropped four `throw new Error("unreachable")` statements TS exhaustive checking already covers. **oc-path.ts**: 35 `throw new OcPathError(...)` sites compress through a `fail()` helper. File-slot containment check (absolute, parent-dir, control chars) extracted as `validateFileSlot` so parse + format share the same defense. Three structural-nesting throws in formatOcPath collapse into two. Three near-parallel string scanners (`indexOfTopLevel`, `splitRespectingBrackets`, `validateBrackets`) fold through one `scanBracketAware(s, onChar, onUnbalanced)` helper. **jsonl/edit.ts**: pickLineIndex compressed; line-address dispatch shares the value-line filter as a small helper. **Internal review codes stripped**: P-NNN, F-NN, I-NN, H2-NN, OP-NN, R-NN, T-NN, S-NN, BFJ-NN, RJC-NN, RJL-NN, FM-NN, A-NN, B-NN, CC-NN, wave-NN — these were review-process artifacts (sprint identifiers, finding IDs, pitfall taxonomy IDs) that mean nothing to a reader who didn't participate in the originating review. Test names rewritten human-readable; comments lose their P-NNN bookmarks; describe blocks drop the wave-NN prefix. **Pitfalls test consolidated**: `tests/scenarios/pitfalls.test.ts` (637 LoC, organized by P-NNN) replaced by `security-and-limits.test.ts` (288 LoC) — unique coverage migrates over with descriptive names; duplicates of OP-/R-/etc. tests are dropped. **Comment cleanup**: per CLAUDE.md "default to writing no comments; add one only when the WHY is non-obvious", trimmed multi-paragraph WHY-prose on every public export, running prose inside function bodies that restated what the next line of code said, section-divider comments in test files, and module-level doc-comments that paraphrased the file name. Kept load-bearing context: NFC re-check after grow, quote-aware split symmetry, multi-char operator precedence, sentinel guard catch routes, WeakMap holder rationale. **MdAst slimmed**: `tables` and `codeBlocks` fields removed — substrate addressing doesn't go inside them, and markdown-it's tokenizer already excludes them from heading/item misparse without first-class AST modeling. Net reduction across the 10 consolidation/cleanup commits: ~3,800 LoC. --- extensions/oc-path/src/cli.ts | 417 +++--- extensions/oc-path/src/oc-path/ast.ts | 70 +- extensions/oc-path/src/oc-path/edit.ts | 115 +- extensions/oc-path/src/oc-path/find.ts | 1114 ++++++----------- extensions/oc-path/src/oc-path/jsonc/emit.ts | 51 +- .../oc-path/src/oc-path/jsonc/resolve.ts | 81 +- extensions/oc-path/src/oc-path/jsonl/edit.ts | 135 +- extensions/oc-path/src/oc-path/jsonl/emit.ts | 40 +- extensions/oc-path/src/oc-path/oc-path.ts | 922 ++++---------- extensions/oc-path/src/oc-path/parse.ts | 46 +- extensions/oc-path/src/oc-path/resolve.ts | 104 +- extensions/oc-path/src/oc-path/sentinel.ts | 46 +- extensions/oc-path/src/oc-path/slug.ts | 29 +- .../oc-path/src/oc-path/tests/find.test.ts | 89 +- .../src/oc-path/tests/jsonl/edit.test.ts | 6 +- .../scenarios/append-multi-agent.test.ts | 35 +- .../tests/scenarios/byte-fidelity.test.ts | 48 +- .../tests/scenarios/cross-cutting.test.ts | 29 +- .../scenarios/cross-kind-properties.test.ts | 35 +- .../scenarios/edit-emit-roundtrip.test.ts | 29 +- .../tests/scenarios/frontmatter-edges.test.ts | 54 +- .../tests/scenarios/h2-block-split.test.ts | 68 +- .../src/oc-path/tests/scenarios/items.test.ts | 65 +- .../scenarios/jsonc-byte-fidelity.test.ts | 66 +- .../scenarios/jsonc-resolver-edges.test.ts | 49 +- .../scenarios/jsonl-byte-fidelity.test.ts | 52 +- .../scenarios/jsonl-resolver-edges.test.ts | 47 +- .../tests/scenarios/malformed-input.test.ts | 55 +- .../scenarios/oc-path-parse-edges.test.ts | 102 +- .../scenarios/oc-path-resolver-edges.test.ts | 54 +- .../tests/scenarios/perf-determinism.test.ts | 33 +- .../oc-path/tests/scenarios/pitfalls.test.ts | 637 ---------- .../scenarios/real-world-fixtures.test.ts | 29 +- .../scenarios/roundtrip-property.test.ts | 31 +- .../scenarios/security-and-limits.test.ts | 253 ++++ .../scenarios/sentinel-cross-kind.test.ts | 38 +- .../tests/scenarios/sentinel-guard.test.ts | 39 +- .../src/oc-path/tests/universal.test.ts | 12 - extensions/oc-path/src/oc-path/universal.ts | 468 ++----- 39 files changed, 1685 insertions(+), 3908 deletions(-) delete mode 100644 extensions/oc-path/src/oc-path/tests/scenarios/pitfalls.test.ts create mode 100644 extensions/oc-path/src/oc-path/tests/scenarios/security-and-limits.test.ts diff --git a/extensions/oc-path/src/cli.ts b/extensions/oc-path/src/cli.ts index 587039657f6..85aff80a4fc 100644 --- a/extensions/oc-path/src/cli.ts +++ b/extensions/oc-path/src/cli.ts @@ -1,25 +1,9 @@ /** - * `openclaw path` — shell-level access to the OcPath substrate verbs. - * Self-hosters and editor extensions use it to inspect and surgically - * edit workspace files without scripting against the SDK directly. + * `openclaw path` — shell access to the OcPath substrate verbs. * - * Subcommands: - * - `resolve ` — print the match at the path - * - `set ` — write a leaf at the path; supports `--dry-run` - * - `find ` — enumerate matches for a wildcard/predicate path - * - `validate ` — parse-only; print structure - * - `emit ` — read + parseXxx + emitXxx; verifies byte-fidelity - * - * Output is TTY-aware: defaults to human-readable when stdout is a TTY, - * switches to JSON otherwise (so pipes don't get formatting noise). - * `--json` and `--human` flags override the auto-detection. - * - * Boundaries this CLI does NOT cross (v0): - * - Doesn't know about LKG. `set` writes raw bytes through the - * substrate emit; if the file is LKG-tracked, the next observe - * call decides whether to promote / recover. - * - Doesn't know about lint rules or doctor fixers — that's a - * different surface. + * Subcommands: `resolve` / `set` / `find` / `validate` / `emit`. + * TTY-aware output: human when interactive, JSON when piped; `--json` + * / `--human` override. */ import { promises as fs } from "node:fs"; @@ -44,7 +28,6 @@ import { type OcAst, type OcMatch, type OcPath, - type SetResult, } from "./oc-path/index.js"; export type OutputRuntimeEnv = { @@ -77,26 +60,16 @@ const defaultRuntime: OutputRuntimeEnv = { }, }; -/** - * Output-boundary sentinel scrub. Replaces every occurrence of the - * redaction sentinel with `[REDACTED]` before writing to the output - * stream. Defense-in-depth — even if a future code path surfaces raw - * file content carrying the sentinel, the CLI must not echo it. - */ +// Defense-in-depth: replace the redaction sentinel with `[REDACTED]` +// before writing, even if upstream emits it. export function scrubSentinel(s: string): string { - if (!s.includes(REDACTED_SENTINEL)) { - return s; - } + if (!s.includes(REDACTED_SENTINEL)) return s; return s.split(REDACTED_SENTINEL).join(SCRUB_PLACEHOLDER); } function detectMode(options: PathCommandOptions): OutputMode { - if (options.json === true) { - return "json"; - } - if (options.human === true) { - return "human"; - } + if (options.json === true) return "json"; + if (options.human === true) return "human"; return process.stdout.isTTY ? "human" : "json"; } @@ -127,24 +100,70 @@ function emitError( runtime.error(`${code}: ${scrubbed}`); } +/** Bail with usage error if a required arg is missing. */ +function requireArg( + value: T | undefined, + usage: string, + runtime: OutputRuntimeEnv, + mode: OutputMode, +): value is T extends undefined ? never : T { + if (value === undefined) { + emitError(runtime, mode, usage); + runtime.exit(2); + return false; + } + return true; +} + +/** Parse an oc-path string; emit structured error and return null on failure. */ +function tryParse( + pathStr: string, + runtime: OutputRuntimeEnv, + mode: OutputMode, +): OcPath | null { + try { + return parseOcPath(pathStr); + } catch (err) { + if (err instanceof OcPathError) { + emitError(runtime, mode, `parse failed: ${err.message}`, err.code); + runtime.exit(2); + return null; + } + throw err; + } +} + +// Catch OcEmitSentinelError so it goes through the structured error +// path; otherwise commander prints `String(err)` raw and bypasses the +// `--json` scrubbed-error boundary. +function catchSentinel( + label: string, + runtime: OutputRuntimeEnv, + mode: OutputMode, + fn: () => T, +): T | null { + try { + return fn(); + } catch (err) { + if (err instanceof OcEmitSentinelError) { + emitError(runtime, mode, `${label} refused: ${err.message}`, "OC_EMIT_SENTINEL"); + runtime.exit(1); + return null; + } + throw err; + } +} + async function loadAst(absPath: string, fileName: string): Promise { const raw = await fs.readFile(absPath, "utf-8"); const kind = inferKind(fileName); - if (kind === "jsonc") { - return parseJsonc(raw).ast; - } - if (kind === "jsonl") { - return parseJsonl(raw).ast; - } + if (kind === "jsonc") return parseJsonc(raw).ast; + if (kind === "jsonl") return parseJsonl(raw).ast; return parseMd(raw).ast; } function emitForKind(ast: OcAst, fileName?: string): string { - // Plumb fileName through so OcEmitSentinelError messages carry the - // file context (`oc://gateway.jsonc/[raw]`) instead of the - // empty-slot fallback (`oc:///[raw]`). Test S-12 in the wave-21 - // sentinel suite asserts the OcPath context appears in the error; - // without this plumbing, CLI emits had it stripped. + // Plumb fileName so sentinel errors carry file context. const opts = fileName !== undefined ? { fileNameForGuard: fileName } : {}; switch (ast.kind) { case "jsonc": @@ -154,62 +173,42 @@ function emitForKind(ast: OcAst, fileName?: string): string { case "md": return emitMd(ast, opts); } - throw new Error(`unreachable: emitForKind kind`); } function resolveFsPath(path: OcPath, options: PathCommandOptions): string { - const cwd = options.cwd ?? process.cwd(); - if (options.file !== undefined) { - return resolvePath(options.file); - } - return resolvePath(cwd, path.file); + if (options.file !== undefined) return resolvePath(options.file); + return resolvePath(options.cwd ?? process.cwd(), path.file); } function formatMatchHuman(match: OcMatch): string { if (match.kind === "leaf") { return `leaf @ L${match.line}: ${JSON.stringify(match.valueText)} (${match.leafType})`; } - if (match.kind === "node") { - return `node @ L${match.line} [${match.descriptor}]`; - } + if (match.kind === "node") return `node @ L${match.line} [${match.descriptor}]`; if (match.kind === "insertion-point") { return `insertion-point @ L${match.line} [${match.container}]`; } return `root @ L${match.line}`; } +// ---------- Commands ----------------------------------------------------- + export async function pathResolveCommand( pathStr: string | undefined, options: PathCommandOptions, runtime: OutputRuntimeEnv, ): Promise { const mode = detectMode(options); - if (pathStr === undefined) { - emitError(runtime, mode, "resolve: missing argument"); - runtime.exit(2); - return; - } - let ocPath: OcPath; - try { - ocPath = parseOcPath(pathStr); - } catch (err) { - if (err instanceof OcPathError) { - emitError(runtime, mode, `parse failed: ${err.message}`, err.code); - runtime.exit(2); - return; - } - throw err; - } - const fsPath = resolveFsPath(ocPath, options); - const ast = await loadAst(fsPath, ocPath.file); - let match; + if (!requireArg(pathStr, "resolve: missing argument", runtime, mode)) return; + const ocPath = tryParse(pathStr, runtime, mode); + if (ocPath === null) return; + const ast = await loadAst(resolveFsPath(ocPath, options), ocPath.file); + let match: OcMatch | null; try { match = resolveOcPath(ast, ocPath); } catch (err) { if (err instanceof OcPathError) { - // resolveOcPath now throws on wildcard patterns (the pattern - // belongs in `find`, not `resolve`). Surface the structured code - // so the CLI message points the caller at the right verb. + // resolveOcPath throws on wildcard patterns — point at find. emitError(runtime, mode, `resolve refused: ${err.message}`, err.code); runtime.exit(2); return; @@ -221,7 +220,7 @@ export async function pathResolveCommand( runtime.exit(1); return; } - emit(runtime, mode, { resolved: true, ocPath: pathStr, match }, () => formatMatchHuman(match)); + emit(runtime, mode, { resolved: true, ocPath: pathStr, match }, () => formatMatchHuman(match!)); } export async function pathSetCommand( @@ -231,41 +230,15 @@ export async function pathSetCommand( runtime: OutputRuntimeEnv, ): Promise { const mode = detectMode(options); - if (pathStr === undefined || value === undefined) { - emitError(runtime, mode, "set: requires "); - runtime.exit(2); - return; - } - let ocPath: OcPath; - try { - ocPath = parseOcPath(pathStr); - } catch (err) { - if (err instanceof OcPathError) { - emitError(runtime, mode, `parse failed: ${err.message}`, err.code); - runtime.exit(2); - return; - } - throw err; - } + if (!requireArg(pathStr, "set: requires ", runtime, mode)) return; + if (!requireArg(value, "set: requires ", runtime, mode)) return; + const ocPath = tryParse(pathStr, runtime, mode); + if (ocPath === null) return; const fsPath = resolveFsPath(ocPath, options); const ast = await loadAst(fsPath, ocPath.file); - // `setOcPath` invokes the per-kind editor which calls back into - // emit during rebuildRaw; the redaction-sentinel guard fires there - // and throws `OcEmitSentinelError` for sentinel-bearing values. - // Catch the throw here so it goes through the structured CLI error - // path instead of escaping to commander's runCommandWithRuntime - // (which would print raw String(err) and bypass --json scrubbing). - let result: SetResult; - try { - result = setOcPath(ast, ocPath, value); - } catch (err) { - if (err instanceof OcEmitSentinelError) { - emitError(runtime, mode, `set refused: ${err.message}`, "OC_EMIT_SENTINEL"); - runtime.exit(1); - return; - } - throw err; - } + + const result = catchSentinel("set", runtime, mode, () => setOcPath(ast, ocPath, value)); + if (result === null) return; if (!result.ok) { const detail = "detail" in result ? result.detail : undefined; emit( @@ -277,25 +250,12 @@ export async function pathSetCommand( runtime.exit(1); return; } - // `setOcPath` accepted the value into the AST, but the per-kind - // emit can still refuse to serialize it — most notably when the - // value contains the redaction sentinel (defense-in-depth: the - // substrate's emit guard fires there). The throw must NOT escape - // to commander's runCommandWithRuntime, which would print - // `String(err)` raw and bypass the CLI's JSON/human scrubbed-error - // boundary. Catch and route through `emitError` like every other - // refusal path. - let newBytes: string; - try { - newBytes = emitForKind(result.ast, ocPath.file); - } catch (err) { - if (err instanceof OcEmitSentinelError) { - emitError(runtime, mode, `emit refused: ${err.message}`, "OC_EMIT_SENTINEL"); - runtime.exit(1); - return; - } - throw err; - } + // Per-kind emit can still refuse the sentinel even after set succeeds. + const newBytes = catchSentinel("emit", runtime, mode, () => + emitForKind(result.ast, ocPath.file), + ); + if (newBytes === null) return; + if (options.dryRun === true) { emit( runtime, @@ -320,27 +280,10 @@ export async function pathFindCommand( runtime: OutputRuntimeEnv, ): Promise { const mode = detectMode(options); - if (patternStr === undefined) { - emitError(runtime, mode, "find: missing argument"); - runtime.exit(2); - return; - } - let pattern: OcPath; - try { - pattern = parseOcPath(patternStr); - } catch (err) { - if (err instanceof OcPathError) { - emitError(runtime, mode, `parse failed: ${err.message}`, err.code); - runtime.exit(2); - return; - } - throw err; - } - // The CLI resolves `pattern.file` to a single literal filesystem path. - // Wildcards in the file slot (e.g. `oc://*.jsonc/...`) would silently - // ENOENT during `fs.readFile`. The substrate's `findOcPaths` walks - // *inside* an AST — multi-file globbing is out of scope for v0. Surface - // a clear error so users don't get a confusing missing-file failure. + if (!requireArg(patternStr, "find: missing argument", runtime, mode)) return; + const pattern = tryParse(patternStr, runtime, mode); + if (pattern === null) return; + // File-slot wildcards would silently ENOENT during readFile; reject. if (/[*?]/.test(pattern.file)) { emitError( runtime, @@ -352,8 +295,7 @@ export async function pathFindCommand( runtime.exit(2); return; } - const fsPath = resolveFsPath(pattern, options); - const ast = await loadAst(fsPath, pattern.file); + const ast = await loadAst(resolveFsPath(pattern, options), pattern.file); const matches = findOcPaths(ast, pattern); emit( runtime, @@ -361,15 +303,10 @@ export async function pathFindCommand( { pattern: patternStr, count: matches.length, - matches: matches.map((m) => ({ - path: formatOcPath(m.path), - match: m.match, - })), + matches: matches.map((m) => ({ path: formatOcPath(m.path), match: m.match })), }, () => { - if (matches.length === 0) { - return `0 matches for ${patternStr}`; - } + if (matches.length === 0) return `0 matches for ${patternStr}`; const plural = matches.length === 1 ? "" : "es"; const lines = [`${matches.length} match${plural} for ${patternStr}:`]; for (const m of matches) { @@ -378,9 +315,7 @@ export async function pathFindCommand( return lines.join("\n"); }, ); - if (matches.length === 0) { - runtime.exit(1); - } + if (matches.length === 0) runtime.exit(1); } export function pathValidateCommand( @@ -389,11 +324,7 @@ export function pathValidateCommand( runtime: OutputRuntimeEnv, ): void { const mode = detectMode(options); - if (pathStr === undefined) { - emitError(runtime, mode, "validate: missing argument"); - runtime.exit(2); - return; - } + if (!requireArg(pathStr, "validate: missing argument", runtime, mode)) return; try { const ocPath = parseOcPath(pathStr); emit( @@ -413,22 +344,13 @@ export function pathValidateCommand( }, () => { const lines = [`valid: ${pathStr}`, ` file: ${ocPath.file}`]; - if (ocPath.section !== undefined) { - lines.push(` section: ${ocPath.section}`); - } - if (ocPath.item !== undefined) { - lines.push(` item: ${ocPath.item}`); - } - if (ocPath.field !== undefined) { - lines.push(` field: ${ocPath.field}`); - } - if (ocPath.session !== undefined) { - lines.push(` session: ${ocPath.session}`); - } + if (ocPath.section !== undefined) lines.push(` section: ${ocPath.section}`); + if (ocPath.item !== undefined) lines.push(` item: ${ocPath.item}`); + if (ocPath.field !== undefined) lines.push(` field: ${ocPath.field}`); + if (ocPath.session !== undefined) lines.push(` session: ${ocPath.session}`); return lines.join("\n"); }, ); - return; } catch (err) { if (err instanceof OcPathError) { emit( @@ -450,34 +372,15 @@ export async function pathEmitCommand( runtime: OutputRuntimeEnv, ): Promise { const mode = detectMode(options); - if (fileArg === undefined) { - emitError(runtime, mode, "emit: missing argument"); - runtime.exit(2); - return; - } - // Resolve the file slot through the same `--cwd`/`--file` rules the - // sibling subcommands use: `--file` (when set) is the absolute path - // override; otherwise resolve `fileArg` against `--cwd` (defaulting - // to `process.cwd()`). Without this, the flags are accepted by - // commander but ignored by the handler — exactly the bug-shape - // ClawSweeper flagged for the doc/option mismatch. + if (!requireArg(fileArg, "emit: missing argument", runtime, mode)) return; const fsPath = options.file !== undefined ? resolvePath(options.file) : resolvePath(options.cwd ?? process.cwd(), fileArg); const fileName = fsPath.split(/[\\/]/).pop() ?? fileArg; const ast = await loadAst(fsPath, fileName); - let bytes: string; - try { - bytes = emitForKind(ast, fileName); - } catch (err) { - if (err instanceof OcEmitSentinelError) { - emitError(runtime, mode, `emit refused: ${err.message}`, "OC_EMIT_SENTINEL"); - runtime.exit(1); - return; - } - throw err; - } + const bytes = catchSentinel("emit", runtime, mode, () => emitForKind(ast, fileName)); + if (bytes === null) return; if (mode === "json") { runtime.writeStdout(scrubSentinel(JSON.stringify({ ok: true, kind: ast.kind, bytes }))); return; @@ -485,22 +388,14 @@ export async function pathEmitCommand( runtime.writeStdout(bytes); } -interface RawPathOptions { - json?: boolean; - human?: boolean; - cwd?: string; - file?: string; - dryRun?: boolean; -} +// ---------- Commander wiring --------------------------------------------- -function normalize(opts: RawPathOptions): PathCommandOptions { - return { - json: opts.json, - human: opts.human, - cwd: opts.cwd, - file: opts.file, - dryRun: opts.dryRun, - }; +function withCommonOpts(cmd: Command): Command { + return cmd + .option("--json", "Force JSON output") + .option("--human", "Force human output") + .option("--cwd ", "Resolve file slot against this directory") + .option("--file ", "Override the file slot's resolved path"); } export function registerPathCli(program: Command): void { @@ -509,43 +404,34 @@ export function registerPathCli(program: Command): void { .description("Inspect and edit workspace files via the oc:// addressing scheme") .addHelpText("after", "\nDocs: https://docs.openclaw.ai/cli/path\n"); - path - .command("resolve") - .description("Print the match at an oc:// path") - .argument("", "oc:// path to resolve") - .option("--json", "Force JSON output") - .option("--human", "Force human output") - .option("--cwd ", "Resolve file slot against this directory") - .option("--file ", "Override the file slot's resolved path") - .action(async (pathStr: string, opts: RawPathOptions) => { - await pathResolveCommand(pathStr, normalize(opts), defaultRuntime); - }); + withCommonOpts( + path + .command("resolve") + .description("Print the match at an oc:// path") + .argument("", "oc:// path to resolve"), + ).action(async (pathStr: string, opts: PathCommandOptions) => { + await pathResolveCommand(pathStr, opts, defaultRuntime); + }); - path - .command("find") - .description("Enumerate matches for a wildcard / predicate oc:// pattern") - .argument("", "oc:// pattern") - .option("--json", "Force JSON output") - .option("--human", "Force human output") - .option("--cwd ", "Resolve file slot against this directory") - .option("--file ", "Override the file slot's resolved path") - .action(async (patternStr: string, opts: RawPathOptions) => { - await pathFindCommand(patternStr, normalize(opts), defaultRuntime); - }); + withCommonOpts( + path + .command("find") + .description("Enumerate matches for a wildcard / predicate oc:// pattern") + .argument("", "oc:// pattern"), + ).action(async (patternStr: string, opts: PathCommandOptions) => { + await pathFindCommand(patternStr, opts, defaultRuntime); + }); - path - .command("set") - .description("Write a leaf value at an oc:// path") - .argument("", "oc:// path to write") - .argument("", "string value to write") - .option("--dry-run", "Print bytes without writing") - .option("--json", "Force JSON output") - .option("--human", "Force human output") - .option("--cwd ", "Resolve file slot against this directory") - .option("--file ", "Override the file slot's resolved path") - .action(async (pathStr: string, value: string, opts: RawPathOptions) => { - await pathSetCommand(pathStr, value, normalize(opts), defaultRuntime); - }); + withCommonOpts( + path + .command("set") + .description("Write a leaf value at an oc:// path") + .argument("", "oc:// path to write") + .argument("", "string value to write") + .option("--dry-run", "Print bytes without writing"), + ).action(async (pathStr: string, value: string, opts: PathCommandOptions) => { + await pathSetCommand(pathStr, value, opts, defaultRuntime); + }); path .command("validate") @@ -553,19 +439,16 @@ export function registerPathCli(program: Command): void { .argument("", "oc:// path to validate") .option("--json", "Force JSON output") .option("--human", "Force human output") - .action((pathStr: string, opts: RawPathOptions) => { - pathValidateCommand(pathStr, normalize(opts), defaultRuntime); + .action((pathStr: string, opts: PathCommandOptions) => { + pathValidateCommand(pathStr, opts, defaultRuntime); }); - path - .command("emit") - .description("Round-trip a file through parse + emit") - .argument("", "Path to a workspace file") - .option("--cwd ", "Resolve against this directory") - .option("--file ", "Override the file's resolved path") - .option("--json", "Force JSON output") - .option("--human", "Force human output") - .action(async (fileArg: string, opts: RawPathOptions) => { - await pathEmitCommand(fileArg, normalize(opts), defaultRuntime); - }); + withCommonOpts( + path + .command("emit") + .description("Round-trip a file through parse + emit") + .argument("", "Path to a workspace file"), + ).action(async (fileArg: string, opts: PathCommandOptions) => { + await pathEmitCommand(fileArg, opts, defaultRuntime); + }); } diff --git a/extensions/oc-path/src/oc-path/ast.ts b/extensions/oc-path/src/oc-path/ast.ts index 512c742e589..cc6ba0ad951 100644 --- a/extensions/oc-path/src/oc-path/ast.ts +++ b/extensions/oc-path/src/oc-path/ast.ts @@ -1,28 +1,14 @@ /** - * Workspace-Markdown AST — generic addressing index over the 8 workspace - * files openclaw treats as opaque text in `loadWorkspaceBootstrapFiles`. + * Markdown AST — addressing index for workspace files. * - * **The AST is purely an addressing index.** It does NOT encode opinions - * about what a "valid" SOUL.md / AGENTS.md / MEMORY.md looks like; it - * exposes the markdown features (frontmatter, sections, items, tables, - * code blocks) that any `OcPath` (`{ file, section?, item?, field? }`) can - * resolve over. Per-file lint opinions ride in @openclaw/oc-lint, not - * here. - * - * **Byte-fidelity contract**: `emitMd(parse(raw)) === raw` for every input - * the parser accepts. The parser preserves the original bytes on the - * root node (`raw`) so emitters can round-trip even content the AST - * doesn't structurally model (foreign content, idiosyncratic whitespace). + * Pure addressing structure; no per-file opinions (those live in lint + * rules). Byte-fidelity: `emitMd(parse(raw)) === raw`; `raw` on the + * root preserves the original bytes for round-trip. * * @module @openclaw/oc-path/ast */ -/** - * Diagnostic emitted by the parser. Used by lint rules and parse-error - * surfacing alike. Severity is `info` by default; the parser emits - * `warning` for suspicious-but-recoverable inputs (e.g., unclosed - * frontmatter fence) and never throws. - */ +/** Parser diagnostic. Severity `warning` for recoverable input; never throws. */ export interface Diagnostic { readonly line: number; readonly message: string; @@ -30,10 +16,7 @@ export interface Diagnostic { readonly code?: string; } -/** - * A frontmatter key/value pair. Keys are preserved as written; values - * are unquoted (surrounding `"` or `'` stripped) but otherwise verbatim. - */ +/** Frontmatter entry. Values unquoted (`"`/`'` stripped) but otherwise verbatim. */ export interface FrontmatterEntry { readonly key: string; readonly value: string; @@ -41,14 +24,8 @@ export interface FrontmatterEntry { } /** - * A bullet-list item inside a section. Items are addressable via OcPath - * `{ file, section, item }` where `item` is the slug of the bullet's - * text (or the slug of `kv.key` when the bullet is in `- key: value` - * shape). - * - * `kv` is populated when the bullet matches `- : ` (the - * common pattern in AGENTS.md / TOOLS.md / USER.md). Lint rules use it - * for field-level addressing via `OcPath.field`. + * Bullet item. `slug` is the addressing key (kv.key when present, else + * item text). `kv` is populated for `- key: value` bullets. */ export interface AstItem { readonly text: string; @@ -58,16 +35,11 @@ export interface AstItem { } /** - * An H2-delimited block. The `slug` is the kebab-case lowercase form of - * `heading` and is what OcPath `section` matches against. `bodyText` is - * the prose between this heading and the next H2 (or end of file), - * verbatim. `items` are extracted from `bodyText` for addressing - * convenience but the raw text is preserved. + * H2-delimited block. `bodyText` is the verbatim prose between this + * heading and the next; `items` are extracted for addressing. * - * Tables and fenced code blocks are NOT modeled as first-class AST - * children — addressing into them is out of scope for the substrate. - * Lint rules that need table rows or code-block contents re-tokenize - * the block's `bodyText` on demand. + * Tables and code blocks aren't first-class — addressing into them is + * out of scope. Lint rules re-tokenize `bodyText` if needed. */ export interface AstBlock { readonly heading: string; @@ -77,20 +49,7 @@ export interface AstBlock { readonly items: readonly AstItem[]; } -/** - * The root AST node. Always carries `raw` for byte-identical round-trip. - * `frontmatter` is empty when the file has none. `preamble` is the - * prose before the first H2 (may be empty). `blocks` is the H2 tree in - * document order. - * - * `kind: 'md'` discriminator matches the jsonc / jsonl AST shapes; - * the universal `setOcPath` / `resolveOcPath` verbs dispatch - * via this tag at runtime so callers don't have to thread kind - * through the call site. - * - * The generic shape is the same for all 9 workspace files; opinions - * (`AGENTS_TOOLS_SECTION_EMPTY`, etc.) ride in lint rules, not here. - */ +/** Root AST. `raw` carries the original bytes for byte-identical round-trip. */ export interface MdAst { readonly kind: "md"; readonly raw: string; @@ -99,9 +58,6 @@ export interface MdAst { readonly blocks: readonly AstBlock[]; } -/** - * Parser output: the AST plus any diagnostics from the parse pass. - */ export interface ParseResult { readonly ast: MdAst; readonly diagnostics: readonly Diagnostic[]; diff --git a/extensions/oc-path/src/oc-path/edit.ts b/extensions/oc-path/src/oc-path/edit.ts index b0ccfd02e67..00d9690769f 100644 --- a/extensions/oc-path/src/oc-path/edit.ts +++ b/extensions/oc-path/src/oc-path/edit.ts @@ -1,15 +1,10 @@ /** - * Mutate a `MdAst` at an OcPath. Returns a new AST with the - * value replaced; the original is unchanged. + * Mutate `MdAst` at an OcPath. Returns a new AST; original unchanged. * - * Writable surface: + * oc://FILE/[frontmatter]/key → frontmatter value + * oc://FILE/section/item/field → item.kv.value * - * oc://FILE/[frontmatter]/key → frontmatter entry value - * oc://FILE/section/item/field → item.kv.value (when item has kv shape) - * - * Section bodies, tables, and code blocks are NOT writable through - * this primitive — they're prose, and a generic "set" doesn't compose - * cleanly. Doctor fixers handle structural edits via dedicated verbs. + * Section bodies aren't writable through this primitive. * * @module @openclaw/oc-path/edit */ @@ -25,74 +20,44 @@ export type MdEditResult = readonly reason: "unresolved" | "not-writable" | "no-item-kv"; }; -/** - * Replace the value at `path` with `newValue`. The new AST has fresh - * `raw` re-rendered from the structural fields. - * - * Sentinel guard at the substrate boundary — `setJsoncOcPath` and the - * jsonl `finalize`-via-render path both reject sentinel-bearing values - * before they reach the AST. The md path was deferring entirely to - * round-trip echo through `emitMd`, which `acceptPreExistingSentinel` - * by default skips. Closing the gap here keeps F9 (formatter sentinel - * guard) symmetric across all three kinds. - */ +// Sentinel guard at the boundary keeps md symmetric with jsonc/jsonl, +// which both reject sentinel values before they reach the AST. export function setMdOcPath(ast: MdAst, path: OcPath, newValue: string): MdEditResult { guardSentinel(newValue, formatOcPath(path)); - // Frontmatter address: oc://FILE/[frontmatter]/ if (path.section === "[frontmatter]") { const key = path.item ?? path.field; - if (key === undefined) { - return { ok: false, reason: "unresolved" }; - } + if (key === undefined) return { ok: false, reason: "unresolved" }; const idx = ast.frontmatter.findIndex((e) => e.key === key); - if (idx === -1) { - return { ok: false, reason: "unresolved" }; - } + if (idx === -1) return { ok: false, reason: "unresolved" }; const existing = ast.frontmatter[idx]; - if (existing === undefined) { - return { ok: false, reason: "unresolved" }; - } + if (existing === undefined) return { ok: false, reason: "unresolved" }; const newEntry: FrontmatterEntry = { ...existing, value: newValue }; const newFm = ast.frontmatter.slice(); newFm[idx] = newEntry; return finalize({ ...ast, frontmatter: newFm }); } - // Item-field address: oc://FILE/section/item/field if (path.section === undefined || path.item === undefined || path.field === undefined) { return { ok: false, reason: "not-writable" }; } const sectionSlug = path.section.toLowerCase(); const blockIdx = ast.blocks.findIndex((b) => b.slug === sectionSlug); - if (blockIdx === -1) { - return { ok: false, reason: "unresolved" }; - } + if (blockIdx === -1) return { ok: false, reason: "unresolved" }; const block = ast.blocks[blockIdx]; - if (block === undefined) { - return { ok: false, reason: "unresolved" }; - } + if (block === undefined) return { ok: false, reason: "unresolved" }; const itemSlug = path.item.toLowerCase(); const itemIdx = block.items.findIndex((i) => i.slug === itemSlug); - if (itemIdx === -1) { - return { ok: false, reason: "unresolved" }; - } + if (itemIdx === -1) return { ok: false, reason: "unresolved" }; const item = block.items[itemIdx]; - if (item === undefined) { - return { ok: false, reason: "unresolved" }; - } - if (item.kv === undefined) { - return { ok: false, reason: "no-item-kv" }; - } + if (item === undefined) return { ok: false, reason: "unresolved" }; + if (item.kv === undefined) return { ok: false, reason: "no-item-kv" }; if (item.kv.key.toLowerCase() !== path.field.toLowerCase()) { return { ok: false, reason: "unresolved" }; } - const newItem: AstItem = { - ...item, - kv: { key: item.kv.key, value: newValue }, - }; + const newItem: AstItem = { ...item, kv: { key: item.kv.key, value: newValue } }; const newItems = block.items.slice(); newItems[itemIdx] = newItem; const newBlock: AstBlock = { @@ -105,28 +70,17 @@ export function setMdOcPath(ast: MdAst, path: OcPath, newValue: string): MdEditR return finalize({ ...ast, blocks: newBlocks }); } -/** - * Rebuild block.bodyText so emit-roundtrip mode reflects the edit. We - * do a minimal in-place substitution on the existing bodyText: find - * each `- key: value` line for a touched item and rewrite the value. - * - * For items without a matching bullet line, we leave bodyText alone - * (the structural fields take precedence in render mode anyway). - */ +// In-place substitution on `bodyText` so round-trip emit reflects the +// edit. Items without a matching bullet line are skipped (render mode +// uses structural fields anyway). function rebuildBlockBody(block: AstBlock, newItems: readonly AstItem[]): string { let body = block.bodyText; for (let i = 0; i < newItems.length; i++) { const newItem = newItems[i]; const oldItem = block.items[i]; - if (newItem === undefined || oldItem === undefined) { - continue; - } - if (newItem.kv === undefined || oldItem.kv === undefined) { - continue; - } - if (newItem.kv.value === oldItem.kv.value) { - continue; - } + if (newItem === undefined || oldItem === undefined) continue; + if (newItem.kv === undefined || oldItem.kv === undefined) continue; + if (newItem.kv.value === oldItem.kv.value) continue; const re = new RegExp(`^(\\s*-\\s*${escapeRegex(oldItem.kv.key)}\\s*:\\s*).*$`, "m"); body = body.replace(re, `$1${newItem.kv.value}`); } @@ -137,10 +91,6 @@ function escapeRegex(s: string): string { return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); } -/** - * Re-render `ast.raw` from the (possibly mutated) tree using the same - * shape the round-trip emitter expects. - */ function finalize(ast: MdAst): MdEditResult { const parts: string[] = []; if (ast.frontmatter.length > 0) { @@ -151,30 +101,19 @@ function finalize(ast: MdAst): MdEditResult { parts.push("---"); } if (ast.preamble.length > 0) { - if (parts.length > 0) { - parts.push(""); - } + if (parts.length > 0) parts.push(""); parts.push(ast.preamble); } for (const block of ast.blocks) { - if (parts.length > 0) { - parts.push(""); - } + if (parts.length > 0) parts.push(""); parts.push(`## ${block.heading}`); - if (block.bodyText.length > 0) { - parts.push(block.bodyText); - } + if (block.bodyText.length > 0) parts.push(block.bodyText); } - const raw = parts.join("\n"); - return { ok: true, ast: { ...ast, raw } }; + return { ok: true, ast: { ...ast, raw: parts.join("\n") } }; } function formatFrontmatterValue(value: string): string { - if (value.length === 0) { - return '""'; - } - if (/[:#&*?|<>=!%@`,[\]{}\r\n]/.test(value)) { - return JSON.stringify(value); - } + if (value.length === 0) return '""'; + if (/[:#&*?|<>=!%@`,[\]{}\r\n]/.test(value)) return JSON.stringify(value); return value; } diff --git a/extensions/oc-path/src/oc-path/find.ts b/extensions/oc-path/src/oc-path/find.ts index dcc265f7b4f..4a9f7662d7a 100644 --- a/extensions/oc-path/src/oc-path/find.ts +++ b/extensions/oc-path/src/oc-path/find.ts @@ -1,26 +1,8 @@ /** - * `findOcPaths` — universal multi-match verb. Pattern syntax extends - * `OcPath` with two wildcard tokens: - * - * `*` — match a single sub-segment (one map key / one array index) - * `**` — match zero or more sub-segments at any depth (recursive) - * - * **Why a separate verb**: `resolveOcPath` and `setOcPath` are - * single-match — they require an exact path because they return one - * value or write one leaf. A pattern would be ambiguous. `findOcPaths` - * is the search verb: pass a pattern, get every concrete OcPath that - * matches plus its `OcMatch` (kind + leaf text / node descriptor). - * - * Every returned `OcPathMatch` carries a concrete (wildcard-free) - * `OcPath`, so callers can pipe results through `setOcPath` or - * `resolveOcPath` without rebuilding the path. The slot shape of the - * input pattern is preserved (a `*` in the `item` slot produces a - * concrete path with the matched value still in `item`). - * - * **Use cases driving v0**: - * - lint rules iterating `oc://workflow.lobster/steps/* /command` - * - jsonl session walks `oc://session/* /eventType` - * - md frontmatter sweeps `oc://SOUL.md/[frontmatter]/*` + * `findOcPaths` — multi-match verb. `*` matches one sub-segment; + * `**` matches zero or more (recursive). Returns concrete OcPaths + * preserving the input pattern's slot shape, so each result is + * pipeable into `resolveOcPath` / `setOcPath`. * * @module @openclaw/oc-path/find */ @@ -60,44 +42,23 @@ export interface OcPathMatch { readonly match: OcMatch; } -/** - * The slot a sub-segment came from in the input pattern. Walker outputs - * carry slot tags so re-packing into `OcPath` preserves the pattern's - * shape (a `*` in the `item` slot produces a path with the matched - * value in `item`, not joined into `section`). - */ type Slot = "section" | "item" | "field"; interface SlotSub { readonly slot: Slot; readonly value: string; } - -/** A single tagged sub-segment of the pattern (post dot-split). */ interface PatternSub { readonly slot: Slot; readonly value: string; } +type OnMatch = (subs: readonly SlotSub[]) => void; + // ---------- Public verb ---------------------------------------------------- -/** - * Match `pattern` against `ast` and return every concrete OcPath that - * resolves. Empty array when nothing matches. - * - * Pattern semantics: same shape as `OcPath`, but any sub-segment may be - * `*` (single-segment wildcard) or `**` (recursive descent). A pattern - * with no wildcards is equivalent to a single `resolveOcPath` call, - * wrapped into the find shape. - * - * **Insertion-marker patterns are not supported**: a `+`/`+key`/`+nnn` - * suffix is meaningless in find context (you don't search for a place - * to insert). Such patterns return an empty array. - */ export function findOcPaths(ast: OcAst, pattern: OcPath): readonly OcPathMatch[] { const subs = patternSubs(pattern); // Fast-path: no expansion needed — pure literals just resolve. - // Anything that can yield 0+ matches (wildcard, positional, union, - // predicate) flows through the walker. const needsExpansion = subs.some( (s) => s.value === WILDCARD_SINGLE || @@ -110,7 +71,24 @@ export function findOcPaths(ast: OcAst, pattern: OcPath): readonly OcPathMatch[] const m = resolveOcPath(ast, pattern); return m === null ? [] : [{ path: pattern, match: m }]; } - const concretePaths = expand(ast, subs, pattern); + + const concretePaths: OcPath[] = []; + const onMatch: OnMatch = (slotSubs) => { + concretePaths.push(repackSlotSubs(pattern, slotSubs)); + }; + switch (ast.kind) { + case "jsonc": + if (ast.root !== null) { + walkJsonc(ast.root, subs, 0, [], onMatch); + } + break; + case "jsonl": + walkJsonl(ast, subs, 0, [], onMatch); + break; + case "md": + walkMd({ kind: "root", ast }, subs, 0, [], onMatch); + break; + } const out: OcPathMatch[] = []; for (const concrete of concretePaths) { @@ -127,7 +105,7 @@ export function findOcPaths(ast: OcAst, pattern: OcPath): readonly OcPathMatch[] function patternSubs(pattern: OcPath): readonly PatternSub[] { const out: PatternSub[] = []; // Bracket-aware split so dots inside `[k=1.0]` or `{a.b,c}` aren't - // treated as sub-segment delimiters (P-012/P-013). + // treated as sub-segment delimiters. if (pattern.section !== undefined) { for (const v of splitRespectingBrackets(pattern.section, ".")) { out.push({ slot: "section", value: v }); @@ -151,13 +129,9 @@ function repackSlotSubs(pattern: OcPath, slotSubs: readonly SlotSub[]): OcPath { const itemSubs: string[] = []; const fieldSubs: string[] = []; for (const s of slotSubs) { - if (s.slot === "section") { - sectionSubs.push(s.value); - } else if (s.slot === "item") { - itemSubs.push(s.value); - } else { - fieldSubs.push(s.value); - } + if (s.slot === "section") sectionSubs.push(s.value); + else if (s.slot === "item") itemSubs.push(s.value); + else fieldSubs.push(s.value); } return { file: pattern.file, @@ -168,30 +142,87 @@ function repackSlotSubs(pattern: OcPath, slotSubs: readonly SlotSub[]): OcPath { }; } -// ---------- Per-kind dispatch --------------------------------------------- +// ---------- Shared dispatch ---------------------------------------------- -function expand(ast: OcAst, subs: readonly PatternSub[], pattern: OcPath): readonly OcPath[] { - const concretePaths: OcPath[] = []; - // Walker enumerates concrete sub-segments by walking the AST against - // `subs`, emitting one slot-tagged-sub list per leaf. Each list is - // re-packed into an OcPath preserving the pattern's slot shape. - const onMatch = (slotSubs: readonly SlotSub[]): void => { - concretePaths.push(repackSlotSubs(pattern, slotSubs)); - }; - switch (ast.kind) { - case "jsonc": - if (ast.root !== null) { - walkJsonc(ast.root, subs, 0, [], onMatch); - } - break; - case "jsonl": - walkJsonl(ast, subs, 0, [], onMatch); - break; - case "md": - walkMd(ast, subs, 0, [], onMatch); - break; +// Per-kind ops the dispatcher uses to drive recursion. Each kind's +// walker fills these in; the dispatcher handles every segment shape. +interface WalkOps { + enumerate(node: T): Iterable<{ keySub: string; child: T }>; + lookup(node: T, key: string): { keySub: string; child: T } | null; + positional(node: T, seg: string): { keySub: string; child: T } | null; + predicate(node: T, pred: PredicateSpec): Iterable<{ keySub: string; child: T }>; + walk(node: T, subs: readonly PatternSub[], i: number, walked: readonly SlotSub[], onMatch: OnMatch): void; +} + +function checkDepth(walked: readonly SlotSub[]): void { + if (walked.length > MAX_TRAVERSAL_DEPTH) { + throw new OcPathError( + `findOcPaths exceeded MAX_TRAVERSAL_DEPTH (${MAX_TRAVERSAL_DEPTH}) — likely a pathological pattern`, + "", + "OC_PATH_DEPTH_EXCEEDED", + ); } - return concretePaths; +} + +function dispatchSeg( + node: T, + ops: WalkOps, + subs: readonly PatternSub[], + i: number, + walked: readonly SlotSub[], + onMatch: OnMatch, +): void { + const cur = subs[i]; + + if (isUnionSeg(cur.value)) { + const alts = parseUnionSeg(cur.value); + if (alts === null) return; + for (const alt of alts) { + const altSubs = subs.slice(); + altSubs[i] = { slot: cur.slot, value: alt }; + ops.walk(node, altSubs, i, walked, onMatch); + } + return; + } + + if (isPredicateSeg(cur.value)) { + const pred = parsePredicateSeg(cur.value); + if (pred === null) return; + for (const m of ops.predicate(node, pred)) { + ops.walk(m.child, subs, i + 1, [...walked, { slot: cur.slot, value: m.keySub }], onMatch); + } + return; + } + + if (cur.value === WILDCARD_RECURSIVE) { + // `**` — descend with `**` consumed (i+1) AND retained (i) so + // deeper structures still match. Emit if no subs remain. + if (i + 1 >= subs.length) onMatch(walked); + for (const m of ops.enumerate(node)) { + const nextWalked: readonly SlotSub[] = [...walked, { slot: cur.slot, value: m.keySub }]; + ops.walk(m.child, subs, i + 1, nextWalked, onMatch); + ops.walk(m.child, subs, i, nextWalked, onMatch); + } + return; + } + + if (cur.value === WILDCARD_SINGLE) { + for (const m of ops.enumerate(node)) { + ops.walk(m.child, subs, i + 1, [...walked, { slot: cur.slot, value: m.keySub }], onMatch); + } + return; + } + + if (isPositionalSeg(cur.value)) { + const m = ops.positional(node, cur.value); + if (m === null) return; + ops.walk(m.child, subs, i + 1, [...walked, { slot: cur.slot, value: m.keySub }], onMatch); + return; + } + + const m = ops.lookup(node, cur.value); + if (m === null) return; + ops.walk(m.child, subs, i + 1, [...walked, { slot: cur.slot, value: m.keySub }], onMatch); } // ---------- JSONC walker --------------------------------------------------- @@ -201,352 +232,64 @@ function walkJsonc( subs: readonly PatternSub[], i: number, walked: readonly SlotSub[], - onMatch: (subs: readonly SlotSub[]) => void, + onMatch: OnMatch, ): void { - if (walked.length > MAX_TRAVERSAL_DEPTH) { - throw new OcPathError( - `findOcPaths exceeded MAX_TRAVERSAL_DEPTH (${MAX_TRAVERSAL_DEPTH}) — likely a pathological pattern`, - "", - "OC_PATH_DEPTH_EXCEEDED", - ); - } + checkDepth(walked); if (i >= subs.length) { onMatch(walked); return; } - let cur = subs[i]; + dispatchSeg(node, jsoncOps, subs, i, walked, onMatch); +} - if (isUnionSeg(cur.value)) { - const alts = parseUnionSeg(cur.value); - if (alts === null) { - return; +const jsoncOps: WalkOps = { + *enumerate(node) { + if (node.kind === "object") { + for (const e of node.entries) yield { keySub: quoteSeg(e.key), child: e.value }; + } else if (node.kind === "array") { + for (let idx = 0; idx < node.items.length; idx++) { + yield { keySub: String(idx), child: node.items[idx] }; + } } - for (const alt of alts) { - const altSubs = subs.slice(); - altSubs[i] = { slot: cur.slot, value: alt }; - walkJsonc(node, altSubs, i, walked, onMatch); + }, + lookup(node, key) { + if (node.kind === "object") { + // Entry keys are unquoted in the AST; strip quotes from a quoted + // path key so the walker matches the resolver's behavior. + const lookupKey = isQuotedSeg(key) ? unquoteSeg(key) : key; + const e = node.entries.find((entry) => entry.key === lookupKey); + return e === undefined ? null : { keySub: key, child: e.value }; } - return; - } - - if (isPredicateSeg(cur.value)) { - const pred = parsePredicateSeg(cur.value); - if (pred === null) { - return; + if (node.kind === "array") { + const idx = Number(key); + if (!Number.isInteger(idx) || idx < 0 || idx >= node.items.length) return null; + return { keySub: key, child: node.items[idx] }; } + return null; + }, + positional(node, seg) { + const concrete = positionalForJsoncNode(node, seg); + if (concrete === null) return null; + return jsoncOps.lookup(node, concrete); + }, + *predicate(node, pred) { if (node.kind === "object") { for (const e of node.entries) { if (jsoncChildMatchesPredicate(e.value, pred)) { - walkJsonc( - e.value, - subs, - i + 1, - [...walked, { slot: cur.slot, value: quoteSeg(e.key) }], - onMatch, - ); + yield { keySub: quoteSeg(e.key), child: e.value }; } } } else if (node.kind === "array") { - node.items.forEach((child, idx) => { - if (jsoncChildMatchesPredicate(child, pred)) { - walkJsonc( - child, - subs, - i + 1, - [...walked, { slot: cur.slot, value: String(idx) }], - onMatch, - ); + for (let idx = 0; idx < node.items.length; idx++) { + if (jsoncChildMatchesPredicate(node.items[idx], pred)) { + yield { keySub: String(idx), child: node.items[idx] }; } - }); - } - return; - } - - if (isPositionalSeg(cur.value)) { - const concrete = positionalForJsoncNode(node, cur.value); - if (concrete === null) { - return; - } - cur = { slot: cur.slot, value: concrete }; - } - - if (cur.value === WILDCARD_RECURSIVE) { - walkJsonc(node, subs, i + 1, walked, onMatch); - if (node.kind === "object") { - for (const e of node.entries) { - walkJsonc( - e.value, - subs, - i, - [...walked, { slot: cur.slot, value: quoteSeg(e.key) }], - onMatch, - ); - } - } else if (node.kind === "array") { - node.items.forEach((child, idx) => { - walkJsonc(child, subs, i, [...walked, { slot: cur.slot, value: String(idx) }], onMatch); - }); - } - return; - } - - if (cur.value === WILDCARD_SINGLE) { - if (node.kind === "object") { - for (const e of node.entries) { - walkJsonc( - e.value, - subs, - i + 1, - [...walked, { slot: cur.slot, value: quoteSeg(e.key) }], - onMatch, - ); - } - } else if (node.kind === "array") { - node.items.forEach((child, idx) => { - walkJsonc(child, subs, i + 1, [...walked, { slot: cur.slot, value: String(idx) }], onMatch); - }); - } - return; - } - - if (node.kind === "object") { - // `cur.value` may be a quoted segment (e.g. `"a/b"`); AST entry - // keys are already unquoted. Strip the quotes before comparing - // so the find-expansion walker matches `resolveJsoncOcPath`'s - // unquoting behavior — closes the resolve-vs-find asymmetry - // flagged on PR #78678. - const lookupKey = isQuotedSeg(cur.value) ? unquoteSeg(cur.value) : cur.value; - const e = node.entries.find((entry) => entry.key === lookupKey); - if (e === undefined) { - return; - } - walkJsonc(e.value, subs, i + 1, [...walked, { slot: cur.slot, value: cur.value }], onMatch); - return; - } - if (node.kind === "array") { - const idx = Number(cur.value); - if (!Number.isInteger(idx) || idx < 0 || idx >= node.items.length) { - return; - } - walkJsonc( - node.items[idx], - subs, - i + 1, - [...walked, { slot: cur.slot, value: cur.value }], - onMatch, - ); - } -} - -// ---------- JSONL walker --------------------------------------------------- - -function walkJsonl( - ast: JsonlAst, - subs: readonly PatternSub[], - i: number, - walked: readonly SlotSub[], - onMatch: (subs: readonly SlotSub[]) => void, -): void { - // Bound recursion at the line-enumeration layer — without this guard, - // a `**` pattern over a 100k-line forensic log dispatches per-line - // walkJsonc (which has its own guard) but the JSONL outer driver has - // no per-walker depth bound. JSONL session logs are exactly the kind - // of file that grows unbounded in production (replay, audit), so - // defense-in-depth at the outer layer mirrors the jsonc walker. - if (walked.length > MAX_TRAVERSAL_DEPTH) { - throw new OcPathError( - `findOcPaths exceeded MAX_TRAVERSAL_DEPTH (${MAX_TRAVERSAL_DEPTH}) — likely a pathological JSONL pattern`, - "", - "OC_PATH_DEPTH_EXCEEDED", - ); - } - if (i >= subs.length) { - onMatch(walked); - return; - } - const cur = subs[i]; - - // Line-address slot — `*` enumerates every value line; `**` adds a - // 0-segment skip in addition to enumerating; literal matches `Lnnn` - // / `$first` / `$last` / `-N` (negative index); union matches each - // alternative; predicate filters by per-line top-level field. - // The first sub MUST address a line; deeper subs walk inside the - // line's JSON value. - if (walked.length === 0) { - if (cur.value === WILDCARD_RECURSIVE) { - // 0-match has no meaning for jsonl (the file root has no leaves); - // every remaining match must include a line. So skip the 0-match - // expansion and only enumerate. - forEachValueLine(ast, (l, addr) => { - walkJsonlInsideLine(l, subs, i, [{ slot: cur.slot, value: addr }], onMatch); - }); - return; - } - if (cur.value === WILDCARD_SINGLE) { - forEachValueLine(ast, (l, addr) => { - walkJsonlInsideLine(l, subs, i + 1, [{ slot: cur.slot, value: addr }], onMatch); - }); - return; - } - if (isUnionSeg(cur.value)) { - // `{L1,L2}` enumerates each alternative independently — the - // jsonc walker handles union uniformly at every slot, so the - // jsonl line slot must too. Each alternative goes through the - // same single-line resolution as a literal `Lnnn` / `$first` / - // `-N` would (so unions of positional tokens, e.g. `{L1,$last}`, - // work as expected). - const alts = parseUnionSeg(cur.value); - if (alts === null) { - return; - } - for (const alt of alts) { - const line = pickLine(ast, alt); - if (line === null) { - continue; - } - const concreteAddr = line.kind === "value" ? `L${line.line}` : alt; - walkJsonlInsideLine(line, subs, i + 1, [{ slot: cur.slot, value: concreteAddr }], onMatch); - } - return; - } - if (isPredicateSeg(cur.value)) { - // `[event=foo]` filters value lines by the predicate's key/op - // applied to the top-level field of each line's parsed JSON. - // Parsing is structural (no recursion into nested children) — - // a predicate inside a line's body uses the same syntax inside - // the JSONC walker's predicate path. - const pred = parsePredicateSeg(cur.value); - if (pred === null) { - return; - } - forEachValueLine(ast, (l, addr) => { - if (l.kind !== "value") { - return; - } - const actual = topLevelLeafText(l.value, pred.key); - if (!evaluatePredicate(actual, pred)) { - return; - } - walkJsonlInsideLine(l, subs, i + 1, [{ slot: cur.slot, value: addr }], onMatch); - }); - return; - } - // Positional / Lnnn / literal — pickLine handles all single-line - // addressing tokens. The emitted concrete address is `Lnnn` (the - // canonical line-address form) regardless of how it was looked up. - const line = pickLine(ast, cur.value); - if (line === null) { - return; - } - const concreteAddr = line.kind === "value" ? `L${line.line}` : cur.value; - walkJsonlInsideLine(line, subs, i + 1, [{ slot: cur.slot, value: concreteAddr }], onMatch); - return; - } -} - -/** - * Stringify the top-level field's leaf value for predicate evaluation - * at the jsonl line slot. Only string/number/boolean/null leaves - * compare; nested objects/arrays return `null` (predicate doesn't - * match a non-leaf sibling). - */ -function topLevelLeafText(value: JsoncValue, key: string): string | null { - if (value.kind !== "object") { - return null; - } - const entry = value.entries.find((e) => e.key === key); - if (entry === undefined) { - return null; - } - const v = entry.value; - if (v.kind === "string") { - return v.value; - } - if (v.kind === "number" || v.kind === "boolean") { - return String(v.value); - } - if (v.kind === "null") { - return null; - } - return null; -} - -function walkJsonlInsideLine( - line: JsonlLine, - subs: readonly PatternSub[], - i: number, - walked: readonly SlotSub[], - onMatch: (subs: readonly SlotSub[]) => void, -): void { - // Mirror the outer guard so a hostile pattern that bypasses the - // top-of-walkJsonl path (e.g., reached via direct call from a future - // helper) still lands on the depth bound. walkJsonc inside has its - // own bound, but the slot-sub list extends across both layers — the - // depth check must consider the full `walked` history. - if (walked.length > MAX_TRAVERSAL_DEPTH) { - throw new OcPathError( - `findOcPaths exceeded MAX_TRAVERSAL_DEPTH (${MAX_TRAVERSAL_DEPTH}) — likely a pathological JSONL pattern`, - "", - "OC_PATH_DEPTH_EXCEEDED", - ); - } - if (i >= subs.length) { - onMatch(walked); - return; - } - if (line.kind !== "value") { - return; - } - walkJsonc(line.value, subs, i, walked, onMatch); -} - -function forEachValueLine(ast: JsonlAst, visit: (line: JsonlLine, addr: string) => void): void { - for (const l of ast.lines) { - if (l.kind === "value") { - visit(l, `L${l.line}`); - } - } -} - -function pickLine(ast: JsonlAst, addr: string): JsonlLine | null { - if (addr === "$last") { - for (let i = ast.lines.length - 1; i >= 0; i--) { - const l = ast.lines[i]; - if (l !== undefined && l.kind === "value") { - return l; } } - return null; - } - if (addr === "$first") { - for (const l of ast.lines) { - if (l.kind === "value") { - return l; - } - } - return null; - } - if (/^-\d+$/.test(addr)) { - const valueLines = ast.lines.filter( - (l): l is Extract => l.kind === "value", - ); - const n = valueLines.length + Number(addr); - return n >= 0 && n < valueLines.length ? valueLines[n] : null; - } - const m = /^L(\d+)$/.exec(addr); - if (m === null || m[1] === undefined) { - return null; - } - const target = Number(m[1]); - for (const l of ast.lines) { - if (l.line === target) { - return l; - } - } - return null; -} + }, + walk: walkJsonc, +}; -// Helpers shared by the walkers above. function positionalForJsoncNode(node: JsoncValue, seg: string): string | null { if (node.kind === "object") { const keys = node.entries.map((e) => e.key); @@ -558,82 +301,142 @@ function positionalForJsoncNode(node: JsoncValue, seg: string): string | null { return null; } -// Predicate-evaluation helpers: look up `node[key]` and compare its -// string-coerced leaf value via `evaluatePredicate`. Used by -// `[keyvalue]` filtering in find walkers. -function jsoncChildMatchesPredicate(node: JsoncValue, pred: PredicateSpec): boolean { - return evaluatePredicate(jsoncChildFieldText(node, pred.key), pred); +// ---------- JSONL walker --------------------------------------------------- + +// First slot is a line address; subsequent slots descend into the +// line's jsonc value via jsonlOps.walk's holder unwrap. +function walkJsonl( + ast: JsonlAst, + subs: readonly PatternSub[], + i: number, + walked: readonly SlotSub[], + onMatch: OnMatch, +): void { + checkDepth(walked); + if (i >= subs.length) { + onMatch(walked); + return; + } + if (walked.length === 0) { + dispatchSeg(ast, jsonlOps, subs, i, walked, onMatch); + } } -function jsoncChildFieldText(node: JsoncValue, key: string): string | null { - if (node.kind !== "object") { +const jsonlOps: WalkOps = { + *enumerate(ast) { + for (const l of ast.lines) { + if (l.kind === "value") yield { keySub: `L${l.line}`, child: lineHolder(ast, l) }; + } + }, + lookup(ast, key) { + const line = pickLine(ast, key); + if (line === null) return null; + const concreteAddr = line.kind === "value" ? `L${line.line}` : key; + return { keySub: concreteAddr, child: lineHolder(ast, line) }; + }, + positional(ast, seg) { + return jsonlOps.lookup(ast, seg); + }, + *predicate(ast, pred) { + for (const l of ast.lines) { + if (l.kind !== "value") continue; + const actual = topLevelLeafText(l.value, pred.key); + if (evaluatePredicate(actual, pred)) { + yield { keySub: `L${l.line}`, child: lineHolder(ast, l) }; + } + } + }, + // After the line slot is consumed, descend into the line's jsonc + // value via the holder's WeakMap-tagged line. Otherwise this is a + // top-level walkJsonl entry — go through line-slot dispatch. + walk(child, subs, i, walked, onMatch) { + const line = unwrapHolder(child); + if (line === null) { + walkJsonl(child, subs, i, walked, onMatch); + return; + } + if (i >= subs.length) { + onMatch(walked); + return; + } + if (line.kind !== "value") return; + walkJsonc(line.value, subs, i, walked, onMatch); + }, +}; + +// JsonlAst-typed wrapper around a single line so jsonlOps.walk can +// distinguish "top-level ast (descend the line slot)" from "we +// already picked a line, walk inside it." A WeakMap keeps the wrapping +// structural (no JsonlAst surface change). +const lineByHolder = new WeakMap(); +function lineHolder(ast: JsonlAst, line: JsonlLine): JsonlAst { + // Synthesize a tagged JsonlAst that carries the chosen line. The + // outer structure is preserved (kind, raw, lines) so type checks + // remain happy; the WeakMap holds the per-line tag. + const holder: JsonlAst = { kind: "jsonl", raw: ast.raw, lines: ast.lines }; + lineByHolder.set(holder, line); + return holder; +} +function unwrapHolder(holder: JsonlAst): JsonlLine | null { + return lineByHolder.get(holder) ?? null; +} + +function topLevelLeafText(value: JsoncValue, key: string): string | null { + if (value.kind !== "object") return null; + const entry = value.entries.find((e) => e.key === key); + if (entry === undefined) return null; + const v = entry.value; + if (v.kind === "string") return v.value; + if (v.kind === "number" || v.kind === "boolean") return String(v.value); + return null; +} + +function pickLine(ast: JsonlAst, addr: string): JsonlLine | null { + if (addr === "$last") { + for (let i = ast.lines.length - 1; i >= 0; i--) { + const l = ast.lines[i]; + if (l !== undefined && l.kind === "value") return l; + } return null; } - const e = node.entries.find((entry) => entry.key === key); - if (e === undefined) { + if (addr === "$first") { + for (const l of ast.lines) { + if (l.kind === "value") return l; + } return null; } - const v = e.value; - if (v.kind === "string") { - return v.value; + if (/^-\d+$/.test(addr)) { + const valueLines = ast.lines.filter( + (l): l is Extract => l.kind === "value", + ); + const n = valueLines.length + Number(addr); + return n >= 0 && n < valueLines.length ? valueLines[n] : null; } - if (v.kind === "number") { - return String(v.value); - } - if (v.kind === "boolean") { - return String(v.value); - } - if (v.kind === "null") { - return "null"; + const m = /^L(\d+)$/.exec(addr); + if (m === null || m[1] === undefined) return null; + const target = Number(m[1]); + for (const l of ast.lines) { + if (l.line === target) return l; } return null; } -// Predicate semantics for md: blocks (sections) "have" the kv pairs of -// their items, and items "are" their kv pair. So at the section slot a -// predicate matches sections that contain an item with kv.key matching -// pred.key and value satisfying the predicate. At the item slot the -// item itself must match (kv.key === pred.key and value satisfies the -// predicate). At the field slot the item's single kv is what's tested. -// Cross-kind parity with jsoncChildMatchesPredicate. -function mdItemMatchesPredicate( - item: { readonly kv?: { readonly key: string; readonly value: string } }, - pred: PredicateSpec, -): boolean { - if (item.kv === undefined) { - return false; - } - if (item.kv.key.toLowerCase() !== pred.key.toLowerCase()) { - return false; - } - return evaluatePredicate(item.kv.value, pred); -} - -function mdBlockHasMatchingItem( - block: { - readonly items: readonly { - readonly slug: string; - readonly kv?: { readonly key: string; readonly value: string }; - }[]; - }, - pred: PredicateSpec, -): boolean { - for (const item of block.items) { - if (mdItemMatchesPredicate(item, pred)) { - return true; - } - } - return false; -} - // ---------- Markdown walker ----------------------------------------------- +type MdItem = MdAst["blocks"][number]["items"][number]; +type MdBlock = MdAst["blocks"][number]; + +type MdLevel = + | { readonly kind: "root"; readonly ast: MdAst } + | { readonly kind: "block"; readonly block: MdBlock; readonly ast: MdAst } + | { readonly kind: "item"; readonly item: MdItem; readonly ast: MdAst }; + function walkMd( - ast: MdAst, + level: MdLevel, subs: readonly PatternSub[], i: number, walked: readonly SlotSub[], - onMatch: (subs: readonly SlotSub[]) => void, + onMatch: OnMatch, ): void { if (i >= subs.length) { onMatch(walked); @@ -641,16 +444,15 @@ function walkMd( } const cur = subs[i]; - // Frontmatter addressing: literal `[frontmatter]` in section slot. - if (walked.length === 0 && cur.value === "[frontmatter]") { - // Next sub addresses a frontmatter key. + // Frontmatter sentinel short-circuits regular dispatch. + if (level.kind === "root" && walked.length === 0 && cur.value === "[frontmatter]") { const next = subs[i + 1]; if (next === undefined) { onMatch([{ slot: cur.slot, value: cur.value }]); return; } if (next.value === WILDCARD_SINGLE || next.value === WILDCARD_RECURSIVE) { - for (const fm of ast.frontmatter) { + for (const fm of level.ast.frontmatter) { onMatch([ { slot: cur.slot, value: cur.value }, { slot: next.slot, value: fm.key }, @@ -658,14 +460,9 @@ function walkMd( } return; } - // Same quote-aware lookup as the JSONC walker — frontmatter - // entry keys are unquoted in the AST, so a quoted-segment path - // segment must be unquoted before comparing. const fmKey = isQuotedSeg(next.value) ? unquoteSeg(next.value) : next.value; - const entry = ast.frontmatter.find((e) => e.key === fmKey); - if (entry === undefined) { - return; - } + const entry = level.ast.frontmatter.find((e) => e.key === fmKey); + if (entry === undefined) return; onMatch([ { slot: cur.slot, value: cur.value }, { slot: next.slot, value: next.value }, @@ -673,262 +470,151 @@ function walkMd( return; } - // Union `{a,b,c}` at the section slot — fan out into one walk per - // alternative. Cross-kind parity with the jsonc walker; mirrors - // the same dispatch shape (see find.ts ~219 for jsonc). - if (walked.length === 0 && isUnionSeg(cur.value)) { - const alts = parseUnionSeg(cur.value); - if (alts === null) { - return; - } - for (const alt of alts) { - const altSubs = subs.slice(); - altSubs[i] = { slot: cur.slot, value: alt }; - walkMd(ast, altSubs, i, walked, onMatch); - } + // Item-level field slot is terminal — descending would loop. + if (level.kind === "item") { + walkMdItemField(level.item, cur, walked, onMatch); return; } - // Predicate `[k=v]` at the section slot — emit only sections whose - // items contain a kv pair matching the predicate. Cross-kind parity - // with the jsonc walker (find.ts ~232). - if (walked.length === 0 && isPredicateSeg(cur.value)) { + dispatchSeg(level, mdOps, subs, i, walked, onMatch); +} + +function walkMdItemField( + item: MdItem, + cur: PatternSub, + walked: readonly SlotSub[], + onMatch: OnMatch, +): void { + if (item.kv === undefined) return; + const key = item.kv.key; + const emit = (value: string): void => { + onMatch([...walked, { slot: cur.slot, value }]); + }; + if (isUnionSeg(cur.value)) { + const alts = parseUnionSeg(cur.value); + if (alts === null) return; + for (const alt of alts) { + if (alt.toLowerCase() === key.toLowerCase()) emit(key); + } + return; + } + if (isPredicateSeg(cur.value)) { const pred = parsePredicateSeg(cur.value); - if (pred === null) { + if (pred !== null && mdItemMatchesPredicate(item, pred)) emit(key); + return; + } + if (cur.value === WILDCARD_SINGLE || cur.value === WILDCARD_RECURSIVE) { + emit(key); + return; + } + if (key.toLowerCase() === cur.value.toLowerCase()) emit(cur.value); +} + +function blockSlugCounts(items: readonly MdItem[]): Map { + const counts = new Map(); + for (const item of items) counts.set(item.slug, (counts.get(item.slug) ?? 0) + 1); + return counts; +} + +// `mdOps` only handles root / block levels. Item-level dispatch is +// terminal and runs inline in `walkMd` (see `walkMdItemField`). +const mdOps: WalkOps = { + *enumerate(level) { + if (level.kind === "root") { + for (const block of level.ast.blocks) { + yield { keySub: block.slug, child: { kind: "block", block, ast: level.ast } }; + } return; } - for (const block of ast.blocks) { - if (mdBlockHasMatchingItem(block, pred)) { - walkMdInsideBlock( - block, - ast, - subs, - i + 1, - [{ slot: cur.slot, value: block.slug }], - onMatch, - ); + if (level.kind === "block") { + // Disambiguate duplicate slugs via `#N` ordinal so each emitted + // path round-trips through resolveOcPath to its own item. + const counts = blockSlugCounts(level.block.items); + for (let idx = 0; idx < level.block.items.length; idx++) { + const item = level.block.items[idx]; + const seg = (counts.get(item.slug) ?? 0) > 1 ? `#${idx}` : item.slug; + yield { keySub: seg, child: { kind: "item", item, ast: level.ast } }; } } - return; - } - - // Section slot first. - if (walked.length === 0) { - if (cur.value === WILDCARD_SINGLE || cur.value === WILDCARD_RECURSIVE) { - for (const block of ast.blocks) { - walkMdInsideBlock( - block, - ast, - subs, - i + 1, - [{ slot: cur.slot, value: block.slug }], - onMatch, - ); - // `**` retain-i branch: in addition to descending with `**` - // consumed (i + 1), also descend with `**` still active (i) - // so the next sub can match deeper. Without this, md `**` - // semantics diverged from jsonc — `oc://X.md/**/value` - // only matched the immediate-block layer and silently missed - // deeper hierarchies (cross-kind asymmetry — same lint rule - // worked on jsonc but produced 0 matches on md). - if (cur.value === WILDCARD_RECURSIVE) { - walkMdInsideBlock(block, ast, subs, i, [{ slot: cur.slot, value: block.slug }], onMatch); + }, + lookup(level, key) { + if (level.kind === "root") { + const target = key.toLowerCase(); + const block = level.ast.blocks.find((b) => b.slug === target); + return block === undefined ? null : { keySub: key, child: { kind: "block", block, ast: level.ast } }; + } + if (level.kind === "block") { + // Ordinal `#N` short-circuits slug lookup. + if (isOrdinalSeg(key)) { + const n = parseOrdinalSeg(key); + if (n === null || n < 0 || n >= level.block.items.length) return null; + return { keySub: key, child: { kind: "item", item: level.block.items[n], ast: level.ast } }; + } + const target = key.toLowerCase(); + const item = level.block.items.find((it) => it.slug === target); + return item === undefined ? null : { keySub: key, child: { kind: "item", item, ast: level.ast } }; + } + return null; + }, + positional(level, seg) { + if (level.kind !== "block") return null; + const concrete = resolvePositionalSeg(seg, { + indexable: true, + size: level.block.items.length, + }); + if (concrete === null) return null; + // Preserve the positional token in keySub so the resolver + // re-evaluates positionally on round-trip. + const item = level.block.items[Number(concrete)]; + return { keySub: seg, child: { kind: "item", item, ast: level.ast } }; + }, + *predicate(level, pred) { + if (level.kind === "root") { + for (const block of level.ast.blocks) { + if (mdBlockHasMatchingItem(block, pred)) { + yield { keySub: block.slug, child: { kind: "block", block, ast: level.ast } }; } } - // `**` 0-match: emit at root if any. - if (cur.value === WILDCARD_RECURSIVE && i + 1 >= subs.length) { - onMatch([]); + return; + } + if (level.kind === "block") { + const counts = blockSlugCounts(level.block.items); + for (let idx = 0; idx < level.block.items.length; idx++) { + const item = level.block.items[idx]; + if (mdItemMatchesPredicate(item, pred)) { + const seg = (counts.get(item.slug) ?? 0) > 1 ? `#${idx}` : item.slug; + yield { keySub: seg, child: { kind: "item", item, ast: level.ast } }; + } } - return; } - const targetSlug = cur.value.toLowerCase(); - const block = ast.blocks.find((b) => b.slug === targetSlug); - if (block === undefined) { - return; - } - walkMdInsideBlock(block, ast, subs, i + 1, [{ slot: cur.slot, value: cur.value }], onMatch); - } -} - -function walkMdInsideBlock( - block: { - readonly items: readonly { - readonly slug: string; - readonly kv?: { readonly key: string; readonly value: string }; - }[]; }, - ast: MdAst, - subs: readonly PatternSub[], - i: number, - walked: readonly SlotSub[], - onMatch: (subs: readonly SlotSub[]) => void, -): void { - if (i >= subs.length) { - onMatch(walked); - return; - } - const cur = subs[i]; + walk: walkMd, +}; - // Union `{a,b,c}` at the item slot — fan out per alternative. Cross- - // kind parity with the jsonc walker. - if (isUnionSeg(cur.value)) { - const alts = parseUnionSeg(cur.value); - if (alts === null) { - return; - } - for (const alt of alts) { - const altSubs = subs.slice(); - altSubs[i] = { slot: cur.slot, value: alt }; - walkMdInsideBlock(block, ast, altSubs, i, walked, onMatch); - } - return; - } - - // Predicate `[k=v]` at the item slot — match items whose kv pair - // satisfies the predicate. Disambiguate duplicate slugs via `#N` - // ordinal addressing the same way the wildcard branch does, so each - // matched path round-trips through `resolveOcPath` to its own item. - if (isPredicateSeg(cur.value)) { - const pred = parsePredicateSeg(cur.value); - if (pred === null) { - return; - } - const slugCounts = new Map(); - for (const item of block.items) { - slugCounts.set(item.slug, (slugCounts.get(item.slug) ?? 0) + 1); - } - block.items.forEach((item, idx) => { - if (mdItemMatchesPredicate(item, pred)) { - const seg = (slugCounts.get(item.slug) ?? 0) > 1 ? `#${idx}` : item.slug; - walkMdInsideItem( - item, - ast, - subs, - i + 1, - [...walked, { slot: cur.slot, value: seg }], - onMatch, - ); - } - }); - return; - } - - // Item slot. - if (cur.value === WILDCARD_SINGLE || cur.value === WILDCARD_RECURSIVE) { - // Disambiguate duplicate slugs via `#N` ordinal addressing so each - // matched path round-trips through `resolveOcPath` to its own item. - const slugCounts = new Map(); - for (const item of block.items) { - slugCounts.set(item.slug, (slugCounts.get(item.slug) ?? 0) + 1); - } - block.items.forEach((item, idx) => { - const seg = (slugCounts.get(item.slug) ?? 0) > 1 ? `#${idx}` : item.slug; - walkMdInsideItem( - item, - ast, - subs, - i + 1, - [...walked, { slot: cur.slot, value: seg }], - onMatch, - ); - }); - if (cur.value === WILDCARD_RECURSIVE && i + 1 >= subs.length) { - onMatch(walked); - } - return; - } - // Ordinal `#N` and positional `$first`/`$last`/`-N` short-circuit the - // slug lookup — the resolver handles them, so the find walker just - // descends into the appropriate item. - let item: - | { readonly slug: string; readonly kv?: { readonly key: string; readonly value: string } } - | undefined; - if (isOrdinalSeg(cur.value)) { - const n = parseOrdinalSeg(cur.value); - if (n === null || n < 0 || n >= block.items.length) { - return; - } - item = block.items[n]; - } else if (isPositionalSeg(cur.value)) { - const concrete = resolvePositionalSeg(cur.value, { - indexable: true, - size: block.items.length, - }); - if (concrete === null) { - return; - } - item = block.items[Number(concrete)]; - } else { - const targetItemSlug = cur.value.toLowerCase(); - item = block.items.find((it) => it.slug === targetItemSlug); - } - if (item === undefined) { - return; - } - walkMdInsideItem( - item, - ast, - subs, - i + 1, - [...walked, { slot: cur.slot, value: cur.value }], - onMatch, - ); +function mdItemMatchesPredicate(item: MdItem, pred: PredicateSpec): boolean { + if (item.kv === undefined) return false; + if (item.kv.key.toLowerCase() !== pred.key.toLowerCase()) return false; + return evaluatePredicate(item.kv.value, pred); } -function walkMdInsideItem( - item: { readonly kv?: { readonly key: string; readonly value: string } }, - _ast: MdAst, - subs: readonly PatternSub[], - i: number, - walked: readonly SlotSub[], - onMatch: (subs: readonly SlotSub[]) => void, -): void { - if (i >= subs.length) { - onMatch(walked); - return; +function mdBlockHasMatchingItem(block: MdBlock, pred: PredicateSpec): boolean { + for (const item of block.items) { + if (mdItemMatchesPredicate(item, pred)) return true; } - const cur = subs[i]; - // Field slot — addresses kv.key (case-insensitive). - if (item.kv === undefined) { - return; - } - - // Union `{a,b}` at the field slot — fan out per alternative. Md items - // carry a single kv pair so the field-slot union is degenerate (at - // most one alt matches), but the dispatch is included for cross-kind - // parity with the jsonc walker. - if (isUnionSeg(cur.value)) { - const alts = parseUnionSeg(cur.value); - if (alts === null) { - return; - } - for (const alt of alts) { - const altSubs = subs.slice(); - altSubs[i] = { slot: cur.slot, value: alt }; - walkMdInsideItem(item, _ast, altSubs, i, walked, onMatch); - } - return; - } - - // Predicate `[k=v]` at the field slot — match the kv pair as a unit - // (kv.key matches pred.key AND kv.value satisfies the predicate). - if (isPredicateSeg(cur.value)) { - const pred = parsePredicateSeg(cur.value); - if (pred === null) { - return; - } - if (mdItemMatchesPredicate(item, pred)) { - onMatch([...walked, { slot: cur.slot, value: item.kv.key }]); - } - return; - } - - if (cur.value === WILDCARD_SINGLE || cur.value === WILDCARD_RECURSIVE) { - onMatch([...walked, { slot: cur.slot, value: item.kv.key }]); - return; - } - if (item.kv.key.toLowerCase() !== cur.value.toLowerCase()) { - return; - } - onMatch([...walked, { slot: cur.slot, value: cur.value }]); + return false; +} + +function jsoncChildMatchesPredicate(node: JsoncValue, pred: PredicateSpec): boolean { + return evaluatePredicate(jsoncChildFieldText(node, pred.key), pred); +} + +function jsoncChildFieldText(node: JsoncValue, key: string): string | null { + if (node.kind !== "object") return null; + const e = node.entries.find((entry) => entry.key === key); + if (e === undefined) return null; + const v = e.value; + if (v.kind === "string") return v.value; + if (v.kind === "number" || v.kind === "boolean") return String(v.value); + if (v.kind === "null") return "null"; + return null; } diff --git a/extensions/oc-path/src/oc-path/jsonc/emit.ts b/extensions/oc-path/src/oc-path/jsonc/emit.ts index 5c8d4bb2836..1c4c614a981 100644 --- a/extensions/oc-path/src/oc-path/jsonc/emit.ts +++ b/extensions/oc-path/src/oc-path/jsonc/emit.ts @@ -1,28 +1,11 @@ /** * Emit a `JsoncAst` to bytes. * - * **Round-trip mode (default)** returns `ast.raw` verbatim — this - * preserves comments, formatting, and trailing whitespace exactly. - * - * **Sentinel-guard policy**: - * - * - Round-trip echoes `ast.raw` *without* scanning for the redaction - * sentinel. Bytes that came in via `parseJsonc` are trusted: a - * workspace file legitimately containing the literal - * `__OPENCLAW_REDACTED__` (in a code-block comment, in a pasted - * error log, etc.) would otherwise become a workspace-wide emit - * DoS — every `openclaw path emit FILE.jsonc` would exit non-zero, - * breaking lint round-trip rules, doctor fixers, and LKG - * fingerprinting. The substrate's contract is "no NEW sentinel - * bytes introduced via emit", not "no sentinel byte ever leaves". - * - Render mode walks every leaf and rejects sentinel-bearing leaf - * values (caller-injected sentinel via `setOcPath` lands here: - * `setJsoncOcPath` rebuilds raw via render-mode, so a leaf set to - * the sentinel by the caller is caught at the rebuild boundary - * before the raw is shipped back). - * - * Callers that want pre-existing sentinel detection (e.g., LKG - * fingerprint verification) can opt in via + * Round-trip (default) echoes `ast.raw` verbatim — preserves comments + * and formatting. Sentinel guard fires only in render mode by default; + * round-trip trusts parsed bytes so a workspace file legitimately + * containing the sentinel literal isn't a global emit DoS. Callers + * that need pre-existing detection opt in via * `acceptPreExistingSentinel: false`. * * @module @openclaw/oc-path/jsonc/emit @@ -34,12 +17,6 @@ import type { JsoncAst, JsoncValue } from "./ast.js"; export interface JsoncEmitOptions { readonly mode?: "roundtrip" | "render"; readonly fileNameForGuard?: string; - /** - * When `false`, round-trip mode also scans `ast.raw` for the - * redaction sentinel and throws `OcEmitSentinelError` if found. - * Default `true` — round-trip trusts parsed bytes (see policy - * comment above). Render mode always scans leaves regardless. - */ readonly acceptPreExistingSentinel?: boolean; } @@ -55,12 +32,8 @@ export function emitJsonc(ast: JsoncAst, opts: JsoncEmitOptions = {}): string { return ast.raw; } - // Render mode — synthesize JSON from the structural tree (loses - // comments). Walk every leaf string for sentinel detection so a - // caller-injected sentinel via setOcPath is rejected. - if (ast.root === null) { - return ""; - } + // Render mode loses comments; walks leaves for caller-injected sentinel. + if (ast.root === null) return ""; return renderValue(ast.root, guardPath, []); } @@ -76,17 +49,12 @@ function renderValue(value: JsoncValue, guardPath: string, walked: readonly stri const parts = value.items.map((v, i) => renderValue(v, guardPath, [...walked, String(i)])); return `[ ${parts.join(", ")} ]`; } - case "string": { - // Reject ANY string that contains the sentinel — embedded - // (`prefix__OPENCLAW_REDACTED__suffix`) is just as much of a - // "literal redacted token landed on disk" leak as exact-match. - // The roundtrip path uses `raw.includes()` for the same reason; - // render needs the same predicate per leaf. + case "string": + // Substring match: embedded sentinel leaks marker bytes too. if (value.value.includes(REDACTED_SENTINEL)) { throw new OcEmitSentinelError(`${guardPath}/${walked.join("/")}`); } return JSON.stringify(value.value); - } case "number": return String(value.value); case "boolean": @@ -94,5 +62,4 @@ function renderValue(value: JsoncValue, guardPath: string, walked: readonly stri case "null": return "null"; } - throw new Error(`unreachable: jsonc renderValue kind`); } diff --git a/extensions/oc-path/src/oc-path/jsonc/resolve.ts b/extensions/oc-path/src/oc-path/jsonc/resolve.ts index d6743c62989..c941c10323b 100644 --- a/extensions/oc-path/src/oc-path/jsonc/resolve.ts +++ b/extensions/oc-path/src/oc-path/jsonc/resolve.ts @@ -1,18 +1,7 @@ /** - * Resolve an `OcPath` against a `JsoncAst`. - * - * The OcPath model has 4 segments (file, section, item, field) — for - * JSONC artifacts that's not enough depth, so segments concat with `/` - * AND a section/item/field MAY contain dots (`.`) for deeper traversal. - * Both forms work: - * - * oc://config/plugins/entries/foo (segment-per-key) - * oc://config/plugins.entries.foo (dotted section) - * oc://config/plugins/entries.foo (mixed) - * - * Each segment is split on `.`, and the resulting flat list of keys - * walks the value tree from `ast.root`. Numeric segments index into - * arrays. + * Resolve `OcPath` against `JsoncAst`. Slot segments concat as if + * dotted; segments are bracket/quote-aware-split so quoted keys + * containing `/` or `.` round-trip cleanly. * * @module @openclaw/oc-path/jsonc/resolve */ @@ -36,82 +25,52 @@ export type JsoncOcPathMatch = readonly path: readonly string[]; }; -/** - * Walk the JSONC tree following the OcPath. Returns the matched node - * or `null`. Numeric path segments index into arrays. - */ export function resolveJsoncOcPath(ast: JsoncAst, path: OcPath): JsoncOcPathMatch | null { - if (ast.root === null) { - return null; - } + if (ast.root === null) return null; - // Bracket-aware split + unquote: `"foo/bar".baz` becomes - // [`foo/bar`, `baz`] (literal slash preserved in the first sub). const segments: string[] = []; - if (path.section !== undefined) { - for (const s of splitRespectingBrackets(path.section, ".")) { + const collect = (slot: string | undefined): void => { + if (slot === undefined) return; + for (const s of splitRespectingBrackets(slot, ".")) { segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s); } - } - if (path.item !== undefined) { - for (const s of splitRespectingBrackets(path.item, ".")) { - segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s); - } - } - if (path.field !== undefined) { - for (const s of splitRespectingBrackets(path.field, ".")) { - segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s); - } - } + }; + collect(path.section); + collect(path.item); + collect(path.field); - if (segments.length === 0) { - return { kind: "root", node: ast }; - } + if (segments.length === 0) return { kind: "root", node: ast }; let current: JsoncValue = ast.root; let lastEntry: JsoncEntry | null = null; const walked: string[] = []; for (let seg of segments) { - if (seg.length === 0) { - return null; - } - // Positional resolution: `$first` / `$last` always; `-N` only on - // indexable (array) containers. On a keyed (object) container, a - // `-N` segment falls through to literal-key lookup so paths like - // `groups.-5028303500.requireMention` (Telegram supergroup IDs — - // openclaw#59934) address the literal key instead of crashing. + if (seg.length === 0) return null; + // `-N` on an indexable container is positional; on a keyed + // container it falls through to literal-key lookup (e.g. Telegram + // supergroup IDs — openclaw#59934). if (isPositionalSeg(seg)) { const concrete = positionalForJsonc(current, seg); - if (concrete !== null) { - seg = concrete; - } - // null means "not applicable" — fall through to literal lookup. + if (concrete !== null) seg = concrete; } walked.push(seg); if (current.kind === "object") { const entry = current.entries.find((e) => e.key === seg); - if (entry === undefined) { - return null; - } + if (entry === undefined) return null; lastEntry = entry; current = entry.value; continue; } if (current.kind === "array") { const idx = Number(seg); - if (!Number.isInteger(idx) || idx < 0 || idx >= current.items.length) { - return null; - } + if (!Number.isInteger(idx) || idx < 0 || idx >= current.items.length) return null; lastEntry = null; const item = current.items[idx]; - if (item === undefined) { - return null; - } + if (item === undefined) return null; current = item; continue; } - // Primitive — can't descend further. return null; } diff --git a/extensions/oc-path/src/oc-path/jsonl/edit.ts b/extensions/oc-path/src/oc-path/jsonl/edit.ts index b538718abe7..2bed4f2a95c 100644 --- a/extensions/oc-path/src/oc-path/jsonl/edit.ts +++ b/extensions/oc-path/src/oc-path/jsonl/edit.ts @@ -1,16 +1,6 @@ /** - * Mutate a `JsonlAst` at an OcPath. Returns a new AST with the line - * (or sub-field of a line) replaced. - * - * Edit shapes: - * - * oc://session-events/L42 → replace line 42's whole value - * oc://session-events/L42/field → replace field on line 42 - * oc://session-events/L42/field.sub → dotted descent - * oc://session-events/$last/... → resolves to most recent value - * - * Append (no existing line) is NOT a `set` — use `appendJsonlLine` for - * that. `setJsonlOcPath` only edits existing addresses. + * Mutate a `JsonlAst` at an OcPath. Append uses `appendJsonlOcPath`; + * `setJsonlOcPath` only edits existing addresses. * * @module @openclaw/oc-path/jsonl/edit */ @@ -46,8 +36,7 @@ export function setJsonlOcPath(ast: JsonlAst, path: OcPath, newValue: JsoncValue return { ok: false, reason: "unresolved" }; } - // No item/field — replace the whole line value. Requires the line to - // already be a value line (we don't synthesize lines from blanks). + // No item/field — replace the whole line. Requires an existing value line. if (path.item === undefined && path.field === undefined) { if (target.kind !== "value") { return { ok: false, reason: "not-a-value-line" }; @@ -65,10 +54,7 @@ export function setJsonlOcPath(ast: JsonlAst, path: OcPath, newValue: JsoncValue return { ok: false, reason: "not-a-value-line" }; } - // Bracket/brace/quote-aware split — preserves quoted segments - // verbatim so the edit path matches `resolveJsonlOcPath`'s - // unquoting behavior. Plain `.split('.')` would shred a quoted key - // and silently desync read-vs-write. + // Quote-aware split keeps edit symmetric with resolveJsonlOcPath. const segments: string[] = []; if (path.item !== undefined) { segments.push(...splitRespectingBrackets(path.item, ".")); @@ -97,45 +83,29 @@ function replaceAt( newValue: JsoncValue, ): JsoncValue | null { const seg = segments[i]; - if (seg === undefined) { - return newValue; - } - if (seg.length === 0) { - return null; - } + if (seg === undefined) return newValue; + if (seg.length === 0) return null; if (current.kind === "object") { - // Resolve positional tokens ($first / $last) against the entries' - // ordered key list before any literal-key comparison. Keeps the - // jsonl edit path symmetric with resolveJsonlOcPath, which already - // honors positional tokens during read. - let segNorm: string = seg; + // Positional tokens resolve against the entries' ordered key list; + // quoted segments are unquoted before literal-key comparison. + let segNorm = seg; if (isPositionalSeg(seg)) { const resolved = resolvePositionalSeg(seg, { indexable: false, size: current.entries.length, keys: current.entries.map((e) => e.key), }); - if (resolved === null) { - return null; - } + if (resolved === null) return null; segNorm = resolved; } - // Quoted segments carry the raw bytes verbatim; AST entry keys - // are unquoted. Strip the surrounding quotes before comparing. const lookupKey = isQuotedSeg(segNorm) ? unquoteSeg(segNorm) : segNorm; const idx = current.entries.findIndex((e) => e.key === lookupKey); - if (idx === -1) { - return null; - } + if (idx === -1) return null; const child = current.entries[idx]; - if (child === undefined) { - return null; - } + if (child === undefined) return null; const replacedChild = replaceAt(child.value, segments, i + 1, newValue); - if (replacedChild === null) { - return null; - } + if (replacedChild === null) return null; const newEntry: JsoncEntry = { ...child, value: replacedChild }; const newEntries = current.entries.slice(); newEntries[idx] = newEntry; @@ -147,32 +117,21 @@ function replaceAt( } if (current.kind === "array") { - // Resolve positional tokens ($first / $last / -N) against the - // array's size before the numeric coercion below; without this - // `Number('$last')` is NaN and the path silently unresolves. - let segNorm: string = seg; + let segNorm = seg; if (isPositionalSeg(seg)) { const resolved = resolvePositionalSeg(seg, { indexable: true, size: current.items.length, }); - if (resolved === null) { - return null; - } + if (resolved === null) return null; segNorm = resolved; } const idx = Number(segNorm); - if (!Number.isInteger(idx) || idx < 0 || idx >= current.items.length) { - return null; - } + if (!Number.isInteger(idx) || idx < 0 || idx >= current.items.length) return null; const child = current.items[idx]; - if (child === undefined) { - return null; - } + if (child === undefined) return null; const replacedChild = replaceAt(child, segments, i + 1, newValue); - if (replacedChild === null) { - return null; - } + if (replacedChild === null) return null; const newItems = current.items.slice(); newItems[idx] = replacedChild; return { @@ -185,48 +144,31 @@ function replaceAt( return null; } +// Mirrors the line-address grammar in resolveJsonlOcPath / find.ts. +// `-N` walks value lines only so blank/malformed lines don't shift. function pickLineIndex(ast: JsonlAst, addr: string): number { - // Mirrors the line-address grammar handled by resolveJsonlOcPath's - // pickLine and find.ts's pickLine — the four shapes a JSONL line can - // be addressed by. Without `$first` and `-N` here, a path that - // resolves cleanly under those tokens would silently unresolve on - // the edit path (resolve↔write asymmetry). - if (addr === "$last") { - for (let i = ast.lines.length - 1; i >= 0; i--) { - const l = ast.lines[i]; - if (l !== undefined && l.kind === "value") { - return i; - } + const valueIndices = (): number[] => { + const out: number[] = []; + for (let i = 0; i < ast.lines.length; i++) { + if (ast.lines[i]?.kind === "value") out.push(i); } - return -1; + return out; + }; + if (addr === "$last") { + const v = valueIndices(); + return v[v.length - 1] ?? -1; } if (addr === "$first") { - for (let i = 0; i < ast.lines.length; i++) { - const l = ast.lines[i]; - if (l !== undefined && l.kind === "value") { - return i; - } - } - return -1; + const v = valueIndices(); + return v[0] ?? -1; } if (/^-\d+$/.test(addr)) { - // -N selects the Nth-from-last value line. Walk only value lines - // so blank/malformed lines don't shift the count (consistent with - // resolve.ts's pickLine). - const valueIndices: number[] = []; - for (let i = 0; i < ast.lines.length; i++) { - const l = ast.lines[i]; - if (l !== undefined && l.kind === "value") { - valueIndices.push(i); - } - } - const n = valueIndices.length + Number(addr); - return n >= 0 && n < valueIndices.length ? (valueIndices[n] ?? -1) : -1; + const v = valueIndices(); + const n = v.length + Number(addr); + return n >= 0 && n < v.length ? (v[n] ?? -1) : -1; } const m = /^L(\d+)$/.exec(addr); - if (m === null || m[1] === undefined) { - return -1; - } + if (m === null || m[1] === undefined) return -1; const target = Number(m[1]); return ast.lines.findIndex((l) => l.line === target); } @@ -253,12 +195,7 @@ function finalize( return { ok: true, ast: { ...next, raw: rendered } }; } -/** - * Append a new value as the next line. Useful for session checkpointing - * (each event is a new line). Returns a new AST. The `path` parameter - * is accepted for OcPath-naming consistency but jsonl append addresses - * the file as a whole (line numbers are assigned by the substrate). - */ +/** Append a value as the next line. Line numbers are substrate-assigned. */ export function appendJsonlOcPath(ast: JsonlAst, value: JsoncValue): JsonlAst { const nextLineNo = ast.lines.length === 0 ? 1 : (ast.lines[ast.lines.length - 1]?.line ?? 0) + 1; const newLine: JsonlLine = { diff --git a/extensions/oc-path/src/oc-path/jsonl/emit.ts b/extensions/oc-path/src/oc-path/jsonl/emit.ts index 0973c3025ce..73bbb6c3b20 100644 --- a/extensions/oc-path/src/oc-path/jsonl/emit.ts +++ b/extensions/oc-path/src/oc-path/jsonl/emit.ts @@ -1,15 +1,6 @@ /** - * Emit a `JsonlAst` to bytes. - * - * **Round-trip mode (default)** returns `ast.raw` verbatim — preserves - * malformed lines, blanks, trailing-newline shape exactly. - * - * **Render mode** rebuilds the file from line entries (re-stringifies - * value lines via JSON.stringify; preserves blank/malformed lines - * verbatim). Useful for synthetic ASTs. - * - * **Sentinel guard**: scans every emitted byte sequence for the - * `__OPENCLAW_REDACTED__` literal. + * Emit a `JsonlAst` to bytes. Round-trip echoes `ast.raw`; render mode + * rebuilds from line entries (preserves blank/malformed lines verbatim). * * @module @openclaw/oc-path/jsonl/emit */ @@ -21,11 +12,6 @@ import type { JsonlAst } from "./ast.js"; export interface JsonlEmitOptions { readonly mode?: "roundtrip" | "render"; readonly fileNameForGuard?: string; - /** - * See `JsoncEmitOptions.acceptPreExistingSentinel` for the rationale. - * Default `true` — round-trip echoes parsed bytes without scanning - * for the sentinel. Render mode scans value-line leaves regardless. - */ readonly acceptPreExistingSentinel?: boolean; } @@ -44,25 +30,17 @@ export function emitJsonl(ast: JsonlAst, opts: JsonlEmitOptions = {}): string { const out: string[] = []; for (const ln of ast.lines) { if (ln.kind === "blank" || ln.kind === "malformed") { - // Blank/malformed lines round-trip as their original raw bytes. - // Apply the same trust policy: only scan when caller opts in. if (!acceptPreExisting && ln.raw.includes(REDACTED_SENTINEL)) { throw new OcEmitSentinelError(`${guardPath}/L${ln.line}`); } out.push(ln.raw); continue; } - // Value lines re-serialize via renderValue, which always scans - // string leaves regardless of acceptPreExistingSentinel — a - // caller-injected sentinel via setOcPath / appendJsonl must - // always be rejected. + // Value lines always scan leaves so caller-injected sentinel is rejected. out.push(renderValue(ln.value, `${guardPath}/L${ln.line}`, [])); } - // Restore the original line-ending convention. Without this, a CRLF - // input edited via setJsonlOcPath would emit a mixed-ending file: - // edited lines joined with `\n` and untouched lines retaining the - // `\r` on their .raw bytes — silent CRLF→LF corruption on - // Windows-authored datasets. + // Preserve line-ending convention; otherwise CRLF input edited via + // setJsonlOcPath would emit mixed endings (silent corruption on Windows). return out.join(ast.lineEnding ?? "\n"); } @@ -78,15 +56,12 @@ function renderValue(value: JsoncValue, guardPath: string, walked: readonly stri const parts = value.items.map((v, i) => renderValue(v, guardPath, [...walked, String(i)])); return `[${parts.join(",")}]`; } - case "string": { - // Reject ANY string that contains the sentinel — embedded - // (`prefix__OPENCLAW_REDACTED__suffix`) is just as much of a - // "literal redacted token landed on disk" leak as exact-match. + case "string": + // Substring match: embedded sentinel leaks marker bytes too. if (value.value.includes(REDACTED_SENTINEL)) { throw new OcEmitSentinelError(`${guardPath}/${walked.join("/")}`); } return JSON.stringify(value.value); - } case "number": return String(value.value); case "boolean": @@ -94,5 +69,4 @@ function renderValue(value: JsoncValue, guardPath: string, walked: readonly stri case "null": return "null"; } - throw new Error(`unreachable: jsonl renderValue kind`); } diff --git a/extensions/oc-path/src/oc-path/oc-path.ts b/extensions/oc-path/src/oc-path/oc-path.ts index fa86f7188e1..e63a6b137c5 100644 --- a/extensions/oc-path/src/oc-path/oc-path.ts +++ b/extensions/oc-path/src/oc-path/oc-path.ts @@ -1,17 +1,9 @@ /** * `oc://` path syntax — universal addressing for the OpenClaw workspace. * - * Canonical form: - * * oc://{file}[/{section}[/{item}[/{field}]]][?session={id}] * - * Used in PatchError messages, audit events, governance warnings, lint - * findings, doctor fixers, API error responses, SSE events, and editor - * deep-links. No ad-hoc string paths anywhere — every path through the - * serve layer flows through `parseOcPath` / `formatOcPath`. - * - * **Round-trip contract**: `formatOcPath(parseOcPath(s)) === s` for every - * valid `s` produced by `formatOcPath`. + * Round-trip contract: `formatOcPath(parseOcPath(s)) === s`. * * @module @openclaw/oc-path/oc-path */ @@ -20,56 +12,27 @@ import { OcEmitSentinelError, REDACTED_SENTINEL } from "./sentinel.js"; const OC_SCHEME = "oc://"; -/** - * Hard caps to prevent pathological input from exhausting resources. - * - * `MAX_PATH_LENGTH` — input string length. 4 KiB is enough for any - * realistic addressing use (deep nested workflows max out around 200 - * bytes). Anything larger is either user error or hostile input. - * - * `MAX_SUB_SEGMENTS_PER_SLOT` — dotted sub-segment count inside a - * single slot. Real workspace addressing maxes around 10 levels. - * - * `MAX_TRAVERSAL_DEPTH` — used by find walkers to bound `**` - * recursion. Real ASTs don't nest beyond ~50; 256 is a safe ceiling. - */ +// Hard caps bound resource use under pathological / hostile input. export const MAX_PATH_LENGTH = 4096; export const MAX_SUB_SEGMENTS_PER_SLOT = 64; export const MAX_TRAVERSAL_DEPTH = 256; -/** UTF-8 BOM. Stripped from path strings before scheme check. */ const BOM = ""; -/** - * True if the string contains any C0 control char (U+0000 — U+001F) - * or DEL (U+007F). Walks by char code so we never embed literal - * control bytes in source — the equivalent regex would put NUL/DEL - * into this file, which lint and binary-detection tools flag. - */ +// Walk by char code rather than regex — the no-control-regex lint rule +// rejects character classes covering U+0000–U+001F + U+007F. function hasControlChar(s: string): boolean { for (let i = 0; i < s.length; i++) { const cc = s.charCodeAt(i); - if (cc <= 0x1f || cc === 0x7f) { - return true; - } + if (cc <= 0x1f || cc === 0x7f) return true; } return false; } -/** Reserved characters that can't appear unencoded in path segments. */ const RESERVED_CHARS_RE = /[?&%]/; -/** - * Render a string for inclusion in error messages — replaces control - * chars with `\xNN` escapes so error output is readable even when the - * offending input contains invisible characters. - */ +/** Render with `\xNN` escapes so error output is readable for invisible chars. */ function printable(s: string): string { - // Walk the string explicitly rather than using a control-char regex - // — the no-control-regex lint rule rejects character classes that - // contain bytes in U+0000–U+001F + U+007F, but that's exactly the - // range we WANT to escape so error messages stay readable when - // input contains invisible bytes. Manual loop sidesteps the rule. let out = ""; for (let i = 0; i < s.length; i++) { const cc = s.charCodeAt(i); @@ -84,34 +47,19 @@ function printable(s: string): string { /** * Parsed `oc://` path. Components nest strictly: `item` implies - * `section`, `field` implies `item`. Structural violations are rejected - * by `formatOcPath`. - * - * Per the upstream pre-RFC, `field` addresses either a frontmatter key - * (when used directly under a file with no section) OR the value of a - * key/value bullet (`- key: value`) inside an item. The substrate - * resolver dispatches based on what the path resolves to. + * `section`, `field` implies `item`. `field` directly under file + * addresses a frontmatter key; under item it addresses the value of a + * `- key: value` bullet. */ export interface OcPath { - /** Target file or virtual root (e.g. `SOUL.md`, `skills/email-drafter`). Always present. */ readonly file: string; - /** Optional H2 section within the file (e.g. `Boundaries`). */ readonly section?: string; - /** Optional item within a section (e.g. `deny-rule-1`). Requires `section`. */ readonly item?: string; - /** Optional field on an item or frontmatter (e.g. `risk`). Requires `item` for item-fields. */ readonly field?: string; - /** Optional session scope (e.g. `cron:daily`). Orthogonal to nesting. */ readonly session?: string; } -/** - * Error thrown when an `oc://` path cannot be parsed or formatted. - * - * `code` is a stable, machine-readable tag; downstream consumers - * (PatchError, audit events, error handlers) match on `code`, not on - * `message`. - */ +/** `code` is the stable machine-readable tag; consumers match on `code`, not `message`. */ export class OcPathError extends Error { readonly code: string; readonly input: string; @@ -124,105 +72,95 @@ export class OcPathError extends Error { } } -/** - * Parse an `oc://` path string into a structured `OcPath`. - * - * Accepts the full syntax: file, optional section/item/field, optional - * `?session=` query parameter. Unknown query parameters are silently - * ignored. - * - * Throws `OcPathError` for missing scheme, empty file, or empty path - * segments. - */ +function fail(message: string, input: string, code: string): never { + throw new OcPathError(message, input, code); +} + +// Reject absolute paths, parent-dir escapes, and control chars at every +// entry point so a hostile struct can't smuggle a filesystem traversal. +function validateFileSlot(file: string, contextInput: string): void { + if (file.startsWith("/") || file.startsWith("\\") || /^[a-zA-Z]:/.test(file)) { + fail( + `Absolute file slot not allowed (oc:// paths are workspace-relative): ${printable(contextInput)}`, + contextInput, + "OC_PATH_ABSOLUTE_FILE", + ); + } + if (file.split(/[\\/]/).some((seg) => seg === "..")) { + fail( + `Parent-directory segment ('..') not allowed in oc:// file slot: ${printable(contextInput)}`, + contextInput, + "OC_PATH_PARENT_TRAVERSAL", + ); + } + if (hasControlChar(file)) { + fail( + `Control character in oc:// file slot: ${printable(contextInput)}`, + contextInput, + "OC_PATH_CONTROL_CHAR", + ); + } +} + +/** Parse an `oc://` path string into a structured `OcPath`. */ export function parseOcPath(input: string): OcPath { if (typeof input !== "string") { - throw new OcPathError("oc:// path must be a string", String(input), "OC_PATH_NOT_STRING"); + fail("oc:// path must be a string", String(input), "OC_PATH_NOT_STRING"); } - // P-032 — hard cap on input length. Pathological inputs are rejected - // before any further string ops so quadratic scans can't be triggered. - // The pre-normalize check fails fast on absurd input (a 10 MB string - // shouldn't even reach .normalize); the post-normalize check below - // catches the corner case where NFC composition grows the string - // past the cap (a few decomposed Hangul or combining-mark sequences - // can exceed pre-normalize length). if (input.length > MAX_PATH_LENGTH) { - throw new OcPathError( + fail( `oc:// path exceeds ${MAX_PATH_LENGTH} bytes (length: ${input.length})`, input.slice(0, 80) + "…", "OC_PATH_TOO_LONG", ); } - // P-001 — strip a leading UTF-8 BOM if present. The BOM is invisible - // and confuses scheme detection; rejecting silently would surface as - // a misleading "missing scheme" error. + // NFC normalization keeps cross-platform equality (macOS HFS+ NFD vs + // Unix/Windows NFC). NFC can grow the string, so re-check the cap. let normalized = input.startsWith(BOM) ? input.slice(BOM.length) : input; - - // P-002 — normalize to NFC. Different filesystems produce different - // forms (macOS HFS+ historically NFD; web / Unix / Windows NFC). NFC - // is the canonical form for cross-platform string equality. normalized = normalized.normalize("NFC"); - // Re-check the cap after NFC. NFC can grow a string (some Hangul - // and combining-mark sequences); without this re-check the - // documented invariant — "downstream loops iterate at most - // MAX_PATH_LENGTH chars" — doesn't hold. if (normalized.length > MAX_PATH_LENGTH) { - throw new OcPathError( + fail( `oc:// path exceeds ${MAX_PATH_LENGTH} bytes after NFC (length: ${normalized.length})`, input.slice(0, 80) + "…", "OC_PATH_TOO_LONG", ); } - if (!normalized.startsWith(OC_SCHEME)) { - throw new OcPathError( - `Missing oc:// scheme: ${printable(input)}`, - input, - "OC_PATH_MISSING_SCHEME", - ); + fail(`Missing oc:// scheme: ${printable(input)}`, input, "OC_PATH_MISSING_SCHEME"); } const afterScheme = normalized.slice(OC_SCHEME.length); - // Find the query separator at the TOP level (outside brackets, - // braces, and quotes). Plain `indexOf('?')` would treat a quoted - // key like `"foo?bar"` as a query boundary, breaking advertised - // quoted-segment support — closes the parser-quoted-query gap. + // Top-level split skips quoted keys so `"foo?bar"` isn't broken. const queryIndex = indexOfTopLevel(afterScheme, "?"); const pathPart = queryIndex === -1 ? afterScheme : afterScheme.slice(0, queryIndex); const queryPart = queryIndex === -1 ? "" : afterScheme.slice(queryIndex + 1); if (pathPart.length === 0) { - throw new OcPathError(`Empty oc:// path: ${printable(input)}`, input, "OC_PATH_EMPTY"); + fail(`Empty oc:// path: ${printable(input)}`, input, "OC_PATH_EMPTY"); } const segments = splitRespectingBrackets(pathPart, "/", input); for (const seg of segments) { if (seg.length === 0) { - throw new OcPathError( - `Empty segment in oc:// path: ${printable(input)}`, - input, - "OC_PATH_EMPTY_SEGMENT", - ); + fail(`Empty segment in oc:// path: ${printable(input)}`, input, "OC_PATH_EMPTY_SEGMENT"); } } - if (segments.length > 4) { - throw new OcPathError( + fail( `Too many segments in oc:// path (max 4): ${printable(input)}`, input, "OC_PATH_TOO_DEEP", ); } - // Validate every segment: bracket/brace shape, dotted sub-segments, - // P-003 whitespace, P-004 control chars, P-026 reserved chars. for (const seg of segments) { validateBrackets(seg, input); const subs = splitRespectingBrackets(seg, ".", input); if (subs.length > MAX_SUB_SEGMENTS_PER_SLOT) { - throw new OcPathError( + fail( `Sub-segment count exceeds ${MAX_SUB_SEGMENTS_PER_SLOT} in segment "${seg}": ${printable(input)}`, input, "OC_PATH_TOO_DEEP", @@ -233,154 +171,56 @@ export function parseOcPath(input: string): OcPath { } } - const session = extractSession(queryPart); - - // Unquote the file slot so `path.file` always carries the bare - // filesystem path. `splitRespectingBrackets` keeps a quoted file - // segment intact (`"skills/email-drafter"`) so the `/` inside it - // isn't treated as a slot separator; here we strip the surrounding - // quotes so consumers (CLI's `resolveFsPath`, find / resolve walkers) - // see `skills/email-drafter` rather than `"skills/email-drafter"`. - // Without this, the round-trip emits `oc://"skills/email-drafter"` - // and the CLI tries to `fs.readFile` a literally-quoted filename. + // Unquote the file slot — splitRespectingBrackets keeps a quoted file + // segment intact so its `/` isn't a slot separator; strip the quotes + // so consumers see the literal filename. const fileSeg = segments[0]; const file = isQuotedSeg(fileSeg) ? unquoteSeg(fileSeg) : fileSeg; + validateFileSlot(file, input); - // Containment — `oc://` paths address files **relative to the workspace - // root**. Absolute paths and parent-directory escapes (`..`) would let a - // hostile workflow / skill manifest persuade `openclaw path resolve|set - // |emit` into reading or writing arbitrary filesystem locations. Reject - // both before the path leaks into `resolveFsPath` (which would resolve - // an absolute slot away from `cwd` per Node `path.resolve` semantics). - // Quoted-segment unquoting (above) means `oc://".."/x` and - // `oc://"../foo"/x` are caught by the same check. - if (file.startsWith("/") || file.startsWith("\\") || /^[a-zA-Z]:/.test(file)) { - throw new OcPathError( - `Absolute file slot not allowed (oc:// paths are workspace-relative): ${printable(input)}`, - input, - "OC_PATH_ABSOLUTE_FILE", - ); - } - if (file.split(/[\\/]/).some((seg) => seg === "..")) { - throw new OcPathError( - `Parent-directory segment ('..') not allowed in oc:// file slot: ${printable(input)}`, - input, - "OC_PATH_PARENT_TRAVERSAL", - ); - } - - const result: OcPath = { + const session = extractSession(queryPart); + return { file, ...(segments[1] !== undefined ? { section: segments[1] } : {}), ...(segments[2] !== undefined ? { item: segments[2] } : {}), ...(segments[3] !== undefined ? { field: segments[3] } : {}), ...(session !== undefined ? { session } : {}), }; - - return result; } -/** - * Format an `OcPath` struct back into its canonical string form. - * - * Throws `OcPathError` if the struct violates structural nesting - * (item without section, field without item). - */ +/** Format an `OcPath` struct into its canonical string form. */ export function formatOcPath(path: OcPath): string { if (!path.file || path.file.length === 0) { - throw new OcPathError("oc:// path requires a file", "", "OC_PATH_FILE_REQUIRED"); - } - // Symmetric defense with parseOcPath — an `OcPath` struct constructed - // programmatically with `file: '..'` or `file: '/etc/passwd'` would - // otherwise emit a path that either round-trips into a traversal or - // is rejected at parse time, breaking the contract on line 13. Refuse - // here so the caller sees the violation at the format boundary. - if (path.file.startsWith("/") || path.file.startsWith("\\") || /^[a-zA-Z]:/.test(path.file)) { - throw new OcPathError( - `Absolute file slot not allowed in OcPath struct: ${printable(path.file)}`, - path.file, - "OC_PATH_ABSOLUTE_FILE", - ); - } - if (path.file.split(/[\\/]/).some((seg) => seg === "..")) { - throw new OcPathError( - `Parent-directory segment ('..') not allowed in OcPath.file: ${printable(path.file)}`, - path.file, - "OC_PATH_PARENT_TRAVERSAL", - ); - } - if (hasControlChar(path.file)) { - throw new OcPathError( - `Control character in OcPath.file: ${printable(path.file)}`, - path.file, - "OC_PATH_CONTROL_CHAR", - ); + fail("oc:// path requires a file", "", "OC_PATH_FILE_REQUIRED"); } + validateFileSlot(path.file, path.file); if (path.item !== undefined && path.section === undefined) { - throw new OcPathError( - "Structural nesting violation: item requires section", - path.file, - "OC_PATH_NESTING", - ); + fail("Structural nesting violation: item requires section", path.file, "OC_PATH_NESTING"); } - if (path.field !== undefined && path.item === undefined && path.section !== undefined) { - // section + field without item is allowed for frontmatter-shaped addressing? No — - // frontmatter is `oc://FILE/[frontmatter]/key`. For now require item-or-no-section - // with field. Reconsider when frontmatter addressing lands. - throw new OcPathError( - "Structural nesting violation: field requires item when section is present", - path.file, - "OC_PATH_NESTING", - ); - } - if (path.field !== undefined && path.item === undefined && path.section === undefined) { - // `{ file, field }` with no section / item would emit `oc://FILE/FIELD` - // and silently re-parse as `{ file, section: FIELD }`. The struct - // already violates the slot grammar (field implies item) — refuse - // here so programmatic callers don't ship a path that round-trips - // to a different shape than they wrote. - throw new OcPathError( - "Structural nesting violation: field requires item", - path.file, - "OC_PATH_NESTING", - ); + if (path.field !== undefined && path.item === undefined) { + fail("Structural nesting violation: field requires item", path.file, "OC_PATH_NESTING"); } - // Each slot is a dotted sub-segment string. Round-trip requires that - // raw sub-segments containing the path grammar's special characters - // get quoted before concatenation, OR pass through if already in a - // structural form (quoted `"..."`, predicate `[...]`, union `{...}`, - // literal sentinel `[frontmatter]` etc.). Plain concatenation would - // silently turn a raw `foo/bar` slot into two segments at parse - // time. Closes the formatter quoted-segment gap. + // Round-trip requires raw sub-segments to be quoted before + // concatenation, OR passed through if already in structural form + // (quoted, predicate, union, sentinel). Plain concatenation would + // silently split a raw `foo/bar` slot into two segments at parse. const formatSubSegment = (sub: string): string => { - if (isQuotedSeg(sub)) { - return sub; - } // already quoted - if (sub.startsWith("[") && sub.endsWith("]")) { - return sub; - } // predicate / sentinel - if (sub.startsWith("{") && sub.endsWith("}")) { - return sub; - } // union + if (isQuotedSeg(sub)) return sub; + if (sub.startsWith("[") && sub.endsWith("]")) return sub; + if (sub.startsWith("{") && sub.endsWith("}")) return sub; return quoteSeg(sub); }; - // Reject content the parser would refuse on the way back in. Without - // these guards a struct like `{section:'foo.'}` would emit - // `oc://X/foo.""` (an empty quoted sub-segment) and re-parse with - // `section: 'foo.""'` — silent round-trip mangling. Mirrors - // validateSubSegment's empty + control-char checks at the format - // boundary so callers see the violation here, not on the next parse. const validateSubForFormat = (sub: string, slotName: string): void => { if (sub.length === 0) { - throw new OcPathError( + fail( `Empty dotted sub-segment in OcPath.${slotName}`, path.file, "OC_PATH_EMPTY_SUB_SEGMENT", ); } if (hasControlChar(sub)) { - throw new OcPathError( + fail( `Control character in OcPath.${slotName} sub-segment "${printable(sub)}"`, path.file, "OC_PATH_CONTROL_CHAR", @@ -395,62 +235,35 @@ export function formatOcPath(path: OcPath): string { return subs.map(formatSubSegment).join("."); }; - // The file slot uses simpler quoting than section/item/field: dots - // are normal in filenames (`AGENTS.md`) and don't need quoting; we - // only quote when the file contains chars that would otherwise be - // parsed as structure — primarily `/` which is the segment separator. - // `quoteSeg` already wraps + escapes when needed; we narrow the - // trigger so plain `AGENTS.md` round-trips bare. + // File slot uses lighter quoting than section/item/field: dots are + // normal in filenames (`AGENTS.md`); only quote when the file + // contains chars that would parse as structure (primarily `/`). const fileNeedsQuote = /[/[\]{}?&%"\s]/.test(path.file); const formattedFile = fileNeedsQuote ? quoteSeg(path.file) : path.file; let out = OC_SCHEME + formattedFile; - if (path.section !== undefined) { - out += "/" + formatSlot(path.section, "section"); - } - if (path.item !== undefined) { - out += "/" + formatSlot(path.item, "item"); - } - if (path.field !== undefined) { - out += "/" + formatSlot(path.field, "field"); - } - if (path.session !== undefined) { - out += "?session=" + path.session; - } - // Symmetric upper bound with parseOcPath's MAX_PATH_LENGTH cap. Without - // this, a struct whose formatted form exceeds the cap would emit a - // string `parseOcPath` immediately rejects — silently breaking the - // round-trip contract and surprising every consumer that buffers / - // logs / column-aligns by the cap (audit events, error messages, - // editor breadcrumbs). + if (path.section !== undefined) out += "/" + formatSlot(path.section, "section"); + if (path.item !== undefined) out += "/" + formatSlot(path.item, "item"); + if (path.field !== undefined) out += "/" + formatSlot(path.field, "field"); + if (path.session !== undefined) out += "?session=" + path.session; + if (out.length > MAX_PATH_LENGTH) { - throw new OcPathError( + fail( `Formatted oc:// exceeds ${MAX_PATH_LENGTH} bytes (length: ${out.length})`, out.slice(0, 80) + "…", "OC_PATH_TOO_LONG", ); } - // Sentinel guard at the path-string emit boundary. The substrate's - // contract: emit boundaries refuse to write the redaction sentinel, - // and `formatOcPath` IS such a boundary — path strings flow into - // telemetry, audit events, error messages, find result `path` fields. - // Without this guard, a struct field carrying the literal - // `__OPENCLAW_REDACTED__` slips past every consumer except the CLI - // (which has its own scrubSentinel layer). + // Path strings flow into telemetry / audit / error messages — refuse + // the redaction sentinel here so it can't slip past consumers. if (out.includes(REDACTED_SENTINEL)) { throw new OcEmitSentinelError(out); } return out; } -/** - * Type guard — true iff `input` is a non-empty string that `parseOcPath` - * would accept. Does not throw; callers can branch on this before - * parsing. - */ +/** True iff `input` is a string `parseOcPath` would accept. */ export function isValidOcPath(input: unknown): input is string { - if (typeof input !== "string") { - return false; - } + if (typeof input !== "string") return false; try { parseOcPath(input); return true; @@ -460,40 +273,25 @@ export function isValidOcPath(input: unknown): input is string { } /** - * Positional tokens — single-match primitives that resolve to one - * concrete index/key based on container size at resolve time. Unlike - * `*` / `**`, these do NOT trigger the wildcard guard on - * `resolveOcPath` / `setOcPath`: they always pick exactly one element. + * Positional tokens resolve to one concrete index/key based on + * container size at lookup time. Unlike `*`/`**`, they pick exactly + * one element so they don't trigger the wildcard guard. * - * `$first` — index 0 (seq/array) or first-declared key (map/object) - * `$last` — last index, or last-declared key - * `-N` — Nth from the end (seq/array only); `-1` = last, `-2` = penultimate - * - * Out-of-range tokens (`$first` on an empty container, `-99` on a - * 3-item array) yield `null` from resolve and an empty match list - * from find. - * - * `$last` was the original jsonl-only sentinel for line addressing - * (`oc://X/$last/event`); it's now generalized to every kind. + * `$first` — index 0 / first-declared key + * `$last` — last index / last-declared key + * `-N` — Nth from end (indexable only); `-1` = last */ export const POS_FIRST = "$first"; export const POS_LAST = "$last"; -/** True iff `seg` is a positional token that resolves at lookup time. */ export function isPositionalSeg(seg: string): boolean { return seg === POS_FIRST || seg === POS_LAST || /^-\d+$/.test(seg); } /** - * Ordinal addressing — `#N` (zero-based) targets the Nth item by - * document order, regardless of how the kind ordinarily addresses - * children. - * - * For seq/array kinds where children are already addressed by integer - * index, `#N` is a synonym for `N`. Where it earns its keep is in - * **slug-addressed kinds** (md items, where two items can share a - * slug like `- foo: a` / `- foo: b`): `#0` and `#1` distinguish them - * by document order even when slug-addressing collapses. + * Ordinal addressing — `#N` targets the Nth item by document order. + * Earns its keep on slug-addressed kinds (md items can share a slug + * via `- foo: a` / `- foo: b`); `#0`/`#1` distinguish them. */ export function isOrdinalSeg(seg: string): boolean { return /^#\d+$/.test(seg); @@ -504,56 +302,31 @@ export function parseOrdinalSeg(seg: string): number | null { return m === null || m[1] === undefined ? null : Number(m[1]); } -/** - * Container shape passed to `resolvePositionalSeg`. Indexable - * containers (seq, array) provide `size`. Keyed containers (map, - * object) provide the ordered `keys` list — `$first` picks the first, - * `$last` the last; negative indices are NOT valid on keyed - * containers (use the literal key instead). - */ +/** Indexable containers provide `size`; keyed containers provide ordered `keys`. */ export interface PositionalContainer { readonly indexable: boolean; readonly size: number; readonly keys?: readonly string[]; } -/** - * Resolve a positional token (`$first` / `$last` / `-N`) against a - * container's shape, returning the concrete segment (numeric index or - * literal key) or `null` if the token can't apply. - */ +/** Resolve `$first`/`$last`/`-N` against a container; null when out of range. */ export function resolvePositionalSeg(seg: string, container: PositionalContainer): string | null { if (seg === POS_FIRST) { - if (container.size === 0) { - return null; - } - if (!container.indexable) { - return container.keys?.[0] ?? null; - } + if (container.size === 0) return null; + if (!container.indexable) return container.keys?.[0] ?? null; return "0"; } if (seg === POS_LAST) { - if (container.size === 0) { - return null; - } - if (!container.indexable) { - return container.keys?.[container.keys.length - 1] ?? null; - } + if (container.size === 0) return null; + if (!container.indexable) return container.keys?.[container.keys.length - 1] ?? null; return String(container.size - 1); } if (/^-\d+$/.test(seg)) { - if (!container.indexable) { - return null; - } - // P-040 — guard against integer-overflow in the magnitude. A - // 13-digit-or-longer string parses to a Number that exceeds 1e9 - // (well below MAX_SAFE_INTEGER but already absurd as an array - // index). Reject before doing the addition so the caller sees a - // clean null rather than a coerced-to-zero surprise. + if (!container.indexable) return null; + // Guard against absurd magnitudes — `-9999999999` would coerce + // through Number into a big negative that wraps to a positive index. const raw = Number(seg); - if (!Number.isInteger(raw) || Math.abs(raw) > 1e9) { - return null; - } + if (!Number.isInteger(raw) || Math.abs(raw) > 1e9) return null; const n = container.size + raw; return n >= 0 && n < container.size ? String(n) : null; } @@ -562,47 +335,26 @@ export function resolvePositionalSeg(seg: string, container: PositionalContainer /** * Wildcard tokens permitted in `findOcPaths` patterns. - * - * `*` matches a single sub-segment (e.g. one map key or one array index). - * `**` matches zero or more sub-segments at any depth (recursive descent). - * - * Wildcards are **not** allowed in `resolveOcPath` / `setOcPath` — those - * verbs require an exact concrete path. `findOcPaths` is the only verb - * that consumes patterns. Use `hasWildcard` to enforce this at the - * boundary. + * `*` matches one sub-segment; `**` matches zero or more (recursive). + * Reject in resolve/set via `hasWildcard`. */ export const WILDCARD_SINGLE = "*"; export const WILDCARD_RECURSIVE = "**"; /** - * `true` iff any sub-segment of the path is a multi-match pattern — - * `*`, `**`, a union `{a,b,c}`, or a value predicate `[key=value]`. - * Single-match verbs (`resolveOcPath` / `setOcPath`) reject these - * uniformly; only `findOcPaths` consumes them. - * - * **Naming**: `isPattern` is the v1 name; `hasWildcard` is retained - * as a back-compat alias since the literal "wildcard" framing was - * what shipped first. Prefer `isPattern` in new code. + * True iff any sub-segment is a multi-match pattern (`*`, `**`, + * union `{a,b,c}`, or predicate `[k=v]`). Single-match verbs reject + * these; only `findOcPaths` consumes them. */ export function isPattern(path: OcPath): boolean { for (const slot of [path.section, path.item, path.field]) { - if (slot === undefined) { - continue; - } + if (slot === undefined) continue; // Quote-aware split — `slot.split('.')` would shred quoted keys - // containing literal `*` (e.g. `"items.*.glob"`) and falsely - // detect them as wildcards, causing single-match verbs to reject - // a concrete path. + // containing literal `*` and falsely flag them as wildcards. for (const sub of splitRespectingBrackets(slot, ".")) { - if (sub === WILDCARD_SINGLE || sub === WILDCARD_RECURSIVE) { - return true; - } - if (isUnionSeg(sub)) { - return true; - } - if (isPredicateSeg(sub)) { - return true; - } + if (sub === WILDCARD_SINGLE || sub === WILDCARD_RECURSIVE) return true; + if (isUnionSeg(sub)) return true; + if (isPredicateSeg(sub)) return true; } } return false; @@ -611,79 +363,31 @@ export function isPattern(path: OcPath): boolean { /** @deprecated v1 — use {@link isPattern}. Behaviorally identical. */ export const hasWildcard = isPattern; -/** - * Union segment — `{a,b,c}` matches each comma-separated alternative. - * - * oc://X/steps/* /{command,run} → each step's command OR run - * oc://X/{steps,inputs}/* /id → id under steps OR inputs - * - * Whitespace inside braces is preserved. Empty alternatives reject. - * Nested braces are not supported in v0. - */ +/** Union segment `{a,b,c}` matches each comma-separated alternative. */ export function isUnionSeg(seg: string): boolean { return seg.length >= 2 && seg.startsWith("{") && seg.endsWith("}"); } export function parseUnionSeg(seg: string): readonly string[] | null { - if (!isUnionSeg(seg)) { - return null; - } + if (!isUnionSeg(seg)) return null; const inner = seg.slice(1, -1); - if (inner.length === 0) { - return null; - } + if (inner.length === 0) return null; const alts = inner.split(","); - if (alts.some((a) => a.length === 0)) { - return null; - } + if (alts.some((a) => a.length === 0)) return null; return alts; } /** - * Value predicate segment — `[keyvalue]` filters a parent - * enumeration by sibling-field comparison. Used in find patterns: - * - * oc://X/steps/[id=build] → step whose `id` equals `build` - * oc://X/steps/[id!=test]/command → command of every non-test step - * oc://X/steps/[command*=npm]/id → id of every step whose command contains `npm` - * oc://X/steps/[command^=npm run]/id → id of every step whose command starts with `npm run` - * oc://X/steps/[id$=_test]/command → command of every step whose id ends with `_test` - * oc://X/models/[contextWindow>=1000000] → models with 1M+ context window - * oc://X/models/[maxTokens>128000]/id → id of every model with maxTokens > 128000 - * - * Operators: - * - * String (CSS attribute-selector style): - * `=` equality (string-coerced) - * `!=` inequality - * `*=` substring contains - * `^=` starts-with - * `$=` ends-with - * - * Numeric (v1.1 — addresses openclaw#54383, openclaw#76532): - * `<` less than - * `<=` less than or equal - * `>` greater than - * `>=` greater than or equal - * - * Numeric ops require both `actual` and `value` to coerce to finite - * numbers via `Number()`. Non-numeric leaves never match a numeric - * predicate (consistent with how `*=` doesn't apply to numbers). - * - * Operator search is greedy on multi-char operators — `[a!=b]` is - * `key=a, op=!=, value=b`, not `key=a!, op==, value=b`. Multi-char - * operators (`!=`, `<=`, `>=`, `*=`, `^=`, `$=`) are tried before - * single-char (`=`, `<`, `>`). + * Value predicate `[keyvalue]` filters by sibling-field comparison. + * Operators: `=` `!=` `*=` `^=` `$=` (string), `<` `<=` `>` `>=` (numeric). + * Multi-char operators are tried before single-char so `<=` beats `<`. */ export type PredicateOp = "=" | "!=" | "*=" | "^=" | "$=" | "<" | "<=" | ">" | ">="; -/** Multi-char first so greedy match wins (`<=` before `<`, etc.). */ const PREDICATE_OPS: readonly PredicateOp[] = ["!=", "*=", "^=", "$=", "<=", ">=", "<", ">", "="]; export function isPredicateSeg(seg: string): boolean { - if (seg.length < 4 || !seg.startsWith("[") || !seg.endsWith("]")) { - return false; - } + if (seg.length < 4 || !seg.startsWith("[") || !seg.endsWith("]")) return false; const inner = new Set(seg.slice(1, -1)); return PREDICATE_OPS.some((op) => inner.has(op)); } @@ -695,48 +399,27 @@ export interface PredicateSpec { } export function parsePredicateSeg(seg: string): PredicateSpec | null { - if (seg.length < 4 || !seg.startsWith("[") || !seg.endsWith("]")) { - return null; - } + if (seg.length < 4 || !seg.startsWith("[") || !seg.endsWith("]")) return null; const inner = seg.slice(1, -1); - // Leftmost operator wins, with multi-char tried before single-char - // at each position. So `[a==b]` parses as `key=a, op==, value==b` - // (leftmost `=`), and `[a<=b]` parses as `key=a, op=<=, value=b` - // (multi-char `<=` beats single `<` at the same position). + // Leftmost operator wins; at each position, multi-char beats single + // (so `[a<=b]` parses as op=`<=`, not op=`<`). for (let i = 1; i < inner.length; i++) { for (const op of PREDICATE_OPS) { - if (!inner.startsWith(op, i)) { - continue; - } - if (i + op.length >= inner.length) { - continue; - } // empty value - return { - key: inner.slice(0, i), - op, - value: inner.slice(i + op.length), - }; + if (!inner.startsWith(op, i)) continue; + if (i + op.length >= inner.length) continue; // empty value + return { key: inner.slice(0, i), op, value: inner.slice(i + op.length) }; } } return null; } /** - * Evaluate a predicate against a string-coerced leaf value. The - * walker fetches the sibling's value and passes it to this helper. - * Returns `false` for non-leaf children (predicate can't compare an - * object/array sibling, so it never matches). - * - * For numeric operators (`<` / `<=` / `>` / `>=`), both `actual` and - * `pred.value` are coerced via `Number()` and checked with - * `Number.isFinite`. Non-numeric leaves never match — this is - * symmetric with how `*=` / `^=` / `$=` don't apply to numbers - * (a number's "string form" comparison would be confusing). + * Evaluate a predicate against a string-coerced leaf value. Numeric + * operators require both sides to coerce to finite numbers via + * `Number()`. Returns false for null (non-leaf children). */ export function evaluatePredicate(actual: string | null, pred: PredicateSpec): boolean { - if (actual === null) { - return false; - } + if (actual === null) return false; switch (pred.op) { case "=": return actual === pred.value; @@ -774,28 +457,20 @@ export function evaluatePredicate(actual: string | null, pred: PredicateSpec): b } /** - * Flatten the path into the concrete sub-segment list the per-kind - * resolvers walk against (`[...section.split('.'), ...item.split('.'), - * ...field.split('.')]`). Returned alongside the slot offsets so a - * caller can reconstruct an `OcPath` from a concrete walk by re-packing - * sub-segments back into the original slots. + * Flatten the path into a concrete sub-segment list plus slot offsets, + * so a caller can reconstruct an `OcPath` from a concrete walk by + * re-packing sub-segments back into their original slots. */ export interface PathSegmentLayout { readonly subs: readonly string[]; - /** Number of sub-segments in `section` (0 if absent). */ readonly sectionLen: number; - /** Number of sub-segments in `item` (0 if absent). */ readonly itemLen: number; - /** Number of sub-segments in `field` (0 if absent). */ readonly fieldLen: number; } export function getPathLayout(path: OcPath): PathSegmentLayout { - // Quote-aware split — `slot.split('.')` would shred a quoted segment - // containing a literal `.` (e.g. `"a.b"`) into two sub-segments and - // break the find-walker / repackPath layout contract. Mirror the - // splitter used by `parseOcPath` so downstream walkers see the same - // sub-segment shape on both directions. + // Quote-aware split — `.split('.')` would shred a quoted segment + // containing a literal `.` (e.g. `"a.b"`) and break repackPath. const sectionSubs = path.section === undefined ? [] : splitRespectingBrackets(path.section, "."); const itemSubs = path.item === undefined ? [] : splitRespectingBrackets(path.item, "."); const fieldSubs = path.field === undefined ? [] : splitRespectingBrackets(path.field, "."); @@ -808,19 +483,13 @@ export function getPathLayout(path: OcPath): PathSegmentLayout { } /** - * Re-pack a concrete sub-segment list (matching the layout of `pattern`) - * into an `OcPath`. Wildcard segments in `pattern` are replaced by their - * concrete counterparts in `subs`; non-wildcard segments are copied - * verbatim. The slot boundaries (section/item/field) are preserved so - * the output mirrors the input pattern's shape. - * - * Throws if `subs.length !== pattern layout subs length` — the walker - * must always produce a complete concrete path. + * Re-pack a concrete sub-segment list into an `OcPath` preserving the + * pattern's slot boundaries. Throws on length mismatch. */ export function repackPath(pattern: OcPath, subs: readonly string[]): OcPath { const layout = getPathLayout(pattern); if (subs.length !== layout.subs.length) { - throw new OcPathError( + fail( `repack length mismatch: pattern has ${layout.subs.length} sub-segments, got ${subs.length}`, formatOcPath(pattern), "OC_PATH_REPACK_LENGTH", @@ -839,86 +508,74 @@ export function repackPath(pattern: OcPath, subs: readonly string[]): OcPath { } function extractSession(queryPart: string): string | undefined { - if (queryPart.length === 0) { - return undefined; - } + if (queryPart.length === 0) return undefined; for (const pair of queryPart.split("&")) { const eqIndex = pair.indexOf("="); - if (eqIndex === -1) { - continue; - } + if (eqIndex === -1) continue; const key = pair.slice(0, eqIndex); const value = pair.slice(eqIndex + 1); - if (key === "session" && value.length > 0) { - return value; - } + if (key === "session" && value.length > 0) return value; } return undefined; } /** - * Split `s` on `delim`, but treat balanced `[...]`, `{...}`, and - * `"..."` regions as opaque — delimiters inside brackets/braces or - * inside double quotes don't trigger splits. - * - * Quoted segments (v1.0 — addresses openclaw#69004, openclaw#76532) - * let path keys contain `/`, `.`, `?`, `&`, `%`, and whitespace - * verbatim: - * - * oc://X/"foo/bar"/baz → key `foo/bar` - * oc://X/agents.defaults.models/"anthropic/claude-opus-4-7"/alias - * - * Inside a quoted segment, `\\` escapes a backslash and `\"` escapes - * a quote. Other backslashes are literal. - * - * Throws `OcPathError` on unbalanced brackets/braces/quotes — malformed - * input is rejected at parse time rather than silently tolerated. - * - * @internal — exported for use by the find walker; not part of the - * public OcPath API surface. + * Walk `s` respecting `[...]`/`{...}`/`"..."` regions. Calls `onChar` + * for every character with `atTop` indicating depth-0 status. + * `onChar` returns `"stop"` to short-circuit. On unbalanced + * brackets/braces/quotes, calls `onUnbalanced` (which must throw). */ -/** - * Find the first occurrence of `ch` at the TOP level of `s` — - * outside any balanced `[...]`, `{...}`, or `"..."` regions. - * Used by `parseOcPath` to locate the query separator (`?`) without - * mistakenly splitting inside a quoted key like `"foo?bar"`. - * - * Returns `-1` if the character is not present at the top level. - */ -export function indexOfTopLevel(s: string, ch: string): number { +type ScanCallback = (c: string, i: number, atTop: boolean) => "stop" | void; +function scanBracketAware(s: string, onChar: ScanCallback, onUnbalanced: () => never): void { let depthBracket = 0; let depthBrace = 0; let inQuote = false; for (let i = 0; i < s.length; i++) { const c = s[i]; if (inQuote) { + // `\\` / `\"` consume the next char. if (c === "\\" && i + 1 < s.length) { + if (onChar(c, i, false) === "stop") return; + if (onChar(s[i + 1], i + 1, false) === "stop") return; i++; continue; } - if (c === '"') { - inQuote = false; - } + if (c === '"') inQuote = false; + if (onChar(c, i, false) === "stop") return; continue; } if (c === '"') { inQuote = true; + if (onChar(c, i, false) === "stop") return; continue; } - if (c === "[") { - depthBracket++; - } else if (c === "]") { - depthBracket--; - } else if (c === "{") { - depthBrace++; - } else if (c === "}") { - depthBrace--; - } - if (c === ch && depthBracket === 0 && depthBrace === 0) { - return i; - } + if (c === "[") depthBracket++; + else if (c === "]") depthBracket--; + else if (c === "{") depthBrace++; + else if (c === "}") depthBrace--; + if (depthBracket < 0 || depthBrace < 0) onUnbalanced(); + if (onChar(c, i, depthBracket === 0 && depthBrace === 0) === "stop") return; } - return -1; + if (depthBracket !== 0 || depthBrace !== 0 || inQuote) onUnbalanced(); +} + +/** First top-level occurrence of `ch` in `s`; -1 when absent. */ +export function indexOfTopLevel(s: string, ch: string): number { + let result = -1; + const fail = (): never => { + throw new OcPathError(`Unbalanced bracket/brace in oc:// path: ${s}`, s, "OC_PATH_UNBALANCED"); + }; + scanBracketAware( + s, + (c, i, atTop) => { + if (atTop && c === ch) { + result = i; + return "stop"; + } + }, + fail, + ); + return result; } export function splitRespectingBrackets( @@ -927,83 +584,35 @@ export function splitRespectingBrackets( originalInput?: string, ): string[] { const out: string[] = []; - let depthBracket = 0; - let depthBrace = 0; - let inQuote = false; let buf = ""; - for (let i = 0; i < s.length; i++) { - const c = s[i]; - if (inQuote) { - // Inside a quoted region: `\\` and `\"` consume the next char; - // unescaped `"` closes the quote. - if (c === "\\" && i + 1 < s.length) { - buf += c + s[i + 1]; - i++; - continue; - } - if (c === '"') { - inQuote = false; + const ctx = originalInput ?? s; + const onUnbalanced = (): never => { + fail(`Unbalanced bracket/brace in oc:// path: ${ctx}`, ctx, "OC_PATH_UNBALANCED"); + }; + scanBracketAware( + s, + (c, _i, atTop) => { + if (atTop && c === delim) { + out.push(buf); + buf = ""; + return; } buf += c; - continue; - } - if (c === '"') { - inQuote = true; - buf += c; - continue; - } - if (c === "[") { - depthBracket++; - } else if (c === "]") { - depthBracket--; - } else if (c === "{") { - depthBrace++; - } else if (c === "}") { - depthBrace--; - } - if (depthBracket < 0 || depthBrace < 0) { - throw new OcPathError( - `Unbalanced bracket/brace in oc:// path: ${originalInput ?? s}`, - originalInput ?? s, - "OC_PATH_UNBALANCED", - ); - } - if (c === delim && depthBracket === 0 && depthBrace === 0) { - out.push(buf); - buf = ""; - continue; - } - buf += c; - } - if (depthBracket !== 0 || depthBrace !== 0 || inQuote) { - throw new OcPathError( - `Unbalanced bracket/brace/quote in oc:// path: ${originalInput ?? s}`, - originalInput ?? s, - "OC_PATH_UNBALANCED", - ); - } + }, + onUnbalanced, + ); out.push(buf); return out; } -/** - * `true` iff `seg` is a fully-quoted segment of the form `"..."`. - * Used by parsers/walkers to dispatch on quoted vs bare segments. - */ +/** True iff `seg` is `"..."`. */ export function isQuotedSeg(seg: string): boolean { return seg.length >= 2 && seg.startsWith('"') && seg.endsWith('"'); } -/** - * Strip surrounding quotes and unescape `\\` / `\"` from a quoted - * segment, yielding the literal content. Inverse of `quoteSeg`. - * - * No-op on bare (unquoted) segments — returns input unchanged. - */ +/** Strip surrounding quotes and unescape `\\`/`\"`. No-op on bare segments. */ export function unquoteSeg(seg: string): string { - if (!isQuotedSeg(seg)) { - return seg; - } + if (!isQuotedSeg(seg)) return seg; const inner = seg.slice(1, -1); let out = ""; for (let i = 0; i < inner.length; i++) { @@ -1021,145 +630,73 @@ export function unquoteSeg(seg: string): string { return out; } -/** - * Quote a literal value for inclusion in a path. If the value contains - * any character that has grammar meaning unquoted (`/`, `.`, `[`, `{`, - * `?`, `&`, `%`, whitespace, or `"`), wrap in quotes and escape - * embedded `\\` / `"`. Otherwise return as-is. - * - * Used by `formatOcPath` to round-trip slot values that came from - * quoted-segment input. - */ +/** Quote a literal for path inclusion if it contains any grammar character. */ export function quoteSeg(value: string): string { - if (value.length === 0) { - return '""'; - } + if (value.length === 0) return '""'; const needsQuote = /[/.[\]{}?&%"\s]/.test(value); - if (!needsQuote) { - return value; - } + if (!needsQuote) return value; const escaped = value.replace(/\\/g, "\\\\").replace(/"/g, '\\"'); return `"${escaped}"`; } +// Defense-in-depth — the splitter validates segments it splits; this +// catches stray unmatched brackets in unsplit ones. function validateBrackets(seg: string, input: string): void { - // The splitter already enforced balance — this is a defense-in-depth - // pass that also catches stray unmatched brackets in segments that - // didn't trigger a split. Skip characters inside quoted regions - // (`"..."` with `\` escape) so quoted segments containing literal - // `[` / `{` round-trip cleanly. Without this skip, `formatOcPath` - // would emit `"a[b"` (correctly quoted) and `parseOcPath` would - // reject it here as unbalanced — breaking the round-trip. - let depthBracket = 0; - let depthBrace = 0; - let inQuote = false; - let escaped = false; - for (const c of seg) { - if (inQuote) { - if (escaped) { - escaped = false; - } else if (c === "\\") { - escaped = true; - } else if (c === '"') { - inQuote = false; - } - continue; - } - if (c === '"') { - inQuote = true; - continue; - } - if (c === "[") { - depthBracket++; - } else if (c === "]") { - depthBracket--; - } else if (c === "{") { - depthBrace++; - } else if (c === "}") { - depthBrace--; - } - if (depthBracket < 0 || depthBrace < 0) { - throw new OcPathError( + scanBracketAware( + seg, + () => undefined, + () => { + fail( `Unbalanced bracket/brace in segment "${seg}": ${printable(input)}`, input, "OC_PATH_UNBALANCED", ); - } - } - if (depthBracket !== 0 || depthBrace !== 0) { - throw new OcPathError( - `Unbalanced bracket/brace in segment "${seg}": ${printable(input)}`, - input, - "OC_PATH_UNBALANCED", - ); - } + }, + ); } function validateSubSegment(sub: string, input: string): void { - // Empty sub-segment from dotted-form means a stray `.` (e.g. `a..b`). if (sub.length === 0) { - throw new OcPathError( + fail( `Empty dotted sub-segment in oc:// path: ${printable(input)}`, input, "OC_PATH_EMPTY_SUB_SEGMENT", ); } - - // P-004 / P-011 — control characters (including null byte) banned - // in segments. They have no legitimate use in addressing and they - // break downstream consumers (terminals, C strings, log lines). - // Applied to both quoted and unquoted forms — quoting lets you put - // slashes in keys, not control bytes. if (hasControlChar(sub)) { - throw new OcPathError( + fail( `Control character in oc:// segment "${printable(sub)}": ${printable(input)}`, input, "OC_PATH_CONTROL_CHAR", ); } + // Quoting opts out of identifier-shape rules — content is verbatim. + if (isQuotedSeg(sub)) return; - // Quoted segments (v1.0): content is verbatim and the rest of these - // checks (whitespace, reserved chars) don't apply — quoting is the - // explicit opt-out from those identifier-shape rules. Skip ahead. - if (isQuotedSeg(sub)) { - return; - } - - // P-026 — reserved characters that the path grammar itself uses - // (`?` for query, `&` between query pairs, `%` for URL escapes). - // Allowed inside predicate values where they'll be quoted at the - // path level by the bracket containment rule (P-012/P-013). + // Reserved characters used by the path grammar itself (`?`/`&`/`%`). + // Allowed inside predicate / union segments — those are content. if (!sub.startsWith("[") && !sub.startsWith("{")) { if (RESERVED_CHARS_RE.test(sub)) { - throw new OcPathError( + fail( `Reserved character (\`?\` / \`&\` / \`%\`) in oc:// segment "${sub}": ${printable(input)}`, input, "OC_PATH_RESERVED_CHAR", ); } - } - - // P-003 — leading or trailing whitespace in identifier-shaped subs. - // Predicate / union segments don't get this check (their values are - // content and may legitimately want spaces). - if (!sub.startsWith("[") && !sub.startsWith("{")) { if (sub !== sub.trim() || /\s/.test(sub)) { - throw new OcPathError( + fail( `Whitespace in oc:// segment "${sub}": ${printable(input)}`, input, "OC_PATH_WHITESPACE", ); } } - // Bracket grammar: a sub starting with `[` and ending with `]` is - // either a literal sentinel (e.g. `[frontmatter]`) — accepted as-is - // — or a predicate `[keyvalue]`. Mismatched brackets (only one - // side present) are rejected. A predicate-shaped segment (contains - // a comparison operator inside) must parse cleanly. + // `[...]` is either a predicate `[kv]` or a literal sentinel + // (e.g. `[frontmatter]`). Mismatched brackets are rejected. const startsBracket = sub.startsWith("["); const endsBracket = sub.endsWith("]"); if (startsBracket !== endsBracket) { - throw new OcPathError( + fail( `Mismatched bracket in segment "${sub}": ${printable(input)}`, input, "OC_PATH_MALFORMED_PREDICATE", @@ -1168,34 +705,31 @@ function validateSubSegment(sub: string, input: string): void { if (startsBracket && endsBracket) { const inner = sub.slice(1, -1); if (inner.length === 0) { - throw new OcPathError( + fail( `Empty bracket segment "${sub}": ${printable(input)}`, input, "OC_PATH_MALFORMED_PREDICATE", ); } - // If it looks like a predicate (has an operator), validate fully. const hasOp = ["!=", "*=", "^=", "$=", "<=", ">=", "<", ">", "="].some((op) => inner.includes(op), ); if (hasOp) { const parsed = parsePredicateSeg(sub); if (parsed === null || parsed.key.length === 0 || parsed.value.length === 0) { - throw new OcPathError( + fail( `Malformed predicate "${sub}" — must be \`[keyvalue]\` with non-empty key and value: ${printable(input)}`, input, "OC_PATH_MALFORMED_PREDICATE", ); } } - // No operator → literal sentinel segment (e.g. `[frontmatter]`), - // accepted as-is for back-compat. + // Op-less brackets are literal sentinel segments (back-compat). } - // Brace grammar: union `{a,b,c}`. Mismatched or empty is rejected. const startsBrace = sub.startsWith("{"); const endsBrace = sub.endsWith("}"); if (startsBrace !== endsBrace) { - throw new OcPathError( + fail( `Mismatched brace in segment "${sub}": ${printable(input)}`, input, "OC_PATH_MALFORMED_UNION", @@ -1204,14 +738,14 @@ function validateSubSegment(sub: string, input: string): void { if (startsBrace && endsBrace) { const inner = sub.slice(1, -1); if (inner.length === 0) { - throw new OcPathError( + fail( `Empty union "${sub}" — must contain at least one alternative: ${printable(input)}`, input, "OC_PATH_MALFORMED_UNION", ); } if (inner.split(",").some((a) => a.length === 0)) { - throw new OcPathError( + fail( `Empty alternative in union "${sub}": ${printable(input)}`, input, "OC_PATH_MALFORMED_UNION", diff --git a/extensions/oc-path/src/oc-path/parse.ts b/extensions/oc-path/src/oc-path/parse.ts index 8ebfc5ea7fe..0cc853b35fb 100644 --- a/extensions/oc-path/src/oc-path/parse.ts +++ b/extensions/oc-path/src/oc-path/parse.ts @@ -1,27 +1,12 @@ /** - * Generic markdown-flavored parser for the workspace files. + * Markdown parser for workspace files: frontmatter + preamble + H2 + * blocks (with bullet items as the only addressable structural child). + * Tokenization via markdown-it; frontmatter handled here. * - * Produces a `MdAst` addressing index over `raw` bytes: frontmatter - * (if present), preamble (prose before first H2), and an H2-block tree - * with items extracted for OcPath resolution. + * Grammar opinions (indented `##`, empty `## `, ordered lists, nested + * sub-bullets) live in lint rules, not the parser. * - * Tokenization is delegated to markdown-it; this module owns the - * frontmatter detector (markdown-it does not handle YAML frontmatter - * natively) and the token-stream walker that buckets headings and - * bullets into the addressable AST shape. Tables and fenced code - * blocks are NOT first-class AST children — substrate addressing - * doesn't go inside them, and tokenizer-level structure (which - * markdown-it already gets right) is sufficient to ensure `##` and - * `-` inside them aren't misparsed as headings or items. - * - * **Grammar opinions live in lint rules, not the parser.** Indented - * `## foo`, empty `## `, ordered (`1.`) lists, and nested sub-bullets - * are all recognized as headings / items here; downstream lint rules - * (`OC_HEADING_INDENTED`, `OC_HEADING_EMPTY`, etc.) decide whether - * those shapes are OK in a particular file. - * - * **Byte-fidelity contract**: `raw` is preserved on the AST root so - * `emitMd(parse(raw)) === raw` for every input the parser accepts. + * Byte-fidelity: `emitMd(parse(raw)) === raw`. * * @module @openclaw/oc-path/parse */ @@ -124,10 +109,7 @@ function walkBlocks( bodyLines: readonly string[], bodyFileLine: number, ): { preamble: string; blocks: AstBlock[] } { - // Match atx-style `##` only — setext h2 (`Heading\n---`) carries - // `markup: "-"` on the heading_open token, so the `markup === "##"` - // filter picks atx exclusively. Authors who want setext can still - // write it; substrate just doesn't address it as a section. + // Match atx `##` only; setext h2 has `markup: "-"`. const h2: { tokenIdx: number; lineIdx: number; text: string }[] = []; for (let i = 0; i < tokens.length; i++) { const t = tokens[i]; @@ -147,9 +129,8 @@ function walkBlocks( for (let h = 0; h < h2.length; h++) { const start = h2[h].lineIdx; const end = h + 1 < h2.length ? h2[h + 1].lineIdx : bodyLines.length; - // Slice tokens by INDEX so descendant tokens with no `map` (table - // cells, list markers, inline content) ride along with their - // mapped parent. heading_open / inline / heading_close = 3 tokens. + // Slice by INDEX so unmapped descendants (cells, markers, inline) + // ride along with their parent. h2 = open + inline + close = 3. const tokenStart = h2[h].tokenIdx + 3; const tokenEnd = h + 1 < h2.length ? h2[h + 1].tokenIdx : tokens.length; const blockTokens = tokens.slice(tokenStart, tokenEnd); @@ -167,16 +148,13 @@ function walkBlocks( // ---------- Item extraction ---------------------------------------------- +// Every list_item_open becomes an item (bullets, numbered, nested +// sub-bullets); lint rules flag depth / duplicate-slug collisions. function extractItems(tokens: readonly Token[], bodyFileLine: number): AstItem[] { - // Every `list_item_open` becomes an item — bullets, numbered lists, - // nested sub-bullets all included. Lint rules can flag depth or - // duplicate-slug collisions; the parser stays opinion-free. const items: AstItem[] = []; for (let i = 0; i < tokens.length; i++) { const t = tokens[i]; - if (t.type !== "list_item_open" || t.map === null) { - continue; - } + if (t.type !== "list_item_open" || t.map === null) continue; // First inline at the item's own depth is the item text. let nestedDepth = 0; let text = ""; diff --git a/extensions/oc-path/src/oc-path/resolve.ts b/extensions/oc-path/src/oc-path/resolve.ts index 918e260dd80..98f6a408612 100644 --- a/extensions/oc-path/src/oc-path/resolve.ts +++ b/extensions/oc-path/src/oc-path/resolve.ts @@ -1,20 +1,11 @@ /** - * OcPath → AST node resolver. + * OcPath → MdAst node. Walks an in-memory AST; the file slot is + * informational (callers verify file matching upstream). * - * Resolves an `OcPath` against a `MdAst` and returns the matched - * node (block / item / frontmatter entry / kv field) or `null` if the - * path doesn't match anything. - * - * The address dispatch: - * - * { file } → AST root - * { file, section } → AstBlock with matching slug - * { file, section, item } → AstItem inside that block - * { file, section, item, field } → kv.value of that item if kv.key matches - * - * The `file` segment is informational here — callers verify file - * matching before passing the AST. The resolver doesn't load files; it - * walks an in-memory AST. + * { file } → root + * { file, section } → block + * { file, section, item } → item + * { file, section, item, field } → kv.value * * @module @openclaw/oc-path/resolve */ @@ -23,10 +14,6 @@ import type { AstBlock, AstItem, FrontmatterEntry, MdAst } from "./ast.js"; import type { OcPath } from "./oc-path.js"; import { isOrdinalSeg, isPositionalSeg, parseOrdinalSeg, resolvePositionalSeg } from "./oc-path.js"; -/** - * The resolved target plus a stable description of what kind of node it - * is. Lint rules and doctor fixers branch on `kind`. - */ export type OcPathMatch = | { readonly kind: "root"; readonly node: MdAst } | { readonly kind: "frontmatter"; readonly node: FrontmatterEntry } @@ -41,89 +28,46 @@ export type OcPathMatch = }; /** - * Resolve an `OcPath` against an AST. Returns the matched node or - * `null`. Slugs match case-insensitively against `slugify(input)` — - * "Boundaries" matches a section heading "## Boundaries" because both - * slugify to "boundaries". - * - * Special-case: `OcPath.section === '[frontmatter]'` (literal) addresses - * frontmatter; `field` then names the frontmatter key. This lets a - * single OcPath shape address both prose-tree fields and frontmatter - * fields without growing the tuple. + * Resolve. Slugs match case-insensitively. `[frontmatter]` is a + * literal section sentinel; the frontmatter key sits at `item` (or + * `field` for 4-segment callers). */ export function resolveMdOcPath(ast: MdAst, path: OcPath): OcPathMatch | null { - // Frontmatter addressing: oc://FILE/[frontmatter]/key - // The frontmatter key sits at the OcPath `item` slot in this 3-segment - // shape; we accept `field` as a fallback for callers that thread - // 4-segment paths. if (path.section === "[frontmatter]") { const key = path.item ?? path.field; - if (key === undefined) { - return null; - } + if (key === undefined) return null; const entry = ast.frontmatter.find((e) => e.key === key); - if (entry === undefined) { - return null; - } + if (entry === undefined) return null; return { kind: "frontmatter", node: entry }; } - // Plain file root address. - if (path.section === undefined) { - return { kind: "root", node: ast }; - } + if (path.section === undefined) return { kind: "root", node: ast }; - const sectionSlug = path.section.toLowerCase(); - const block = ast.blocks.find((b) => b.slug === sectionSlug); - if (block === undefined) { - return null; - } + const block = ast.blocks.find((b) => b.slug === path.section!.toLowerCase()); + if (block === undefined) return null; + if (path.item === undefined) return { kind: "block", node: block }; - // Section-only address. - if (path.item === undefined) { - return { kind: "block", node: block }; - } - - // Item addressing: ordinal (`#N`) > positional (`$first`/`$last`/`-N`) - // > slug. Ordinal uses absolute document order so two items sharing - // a slug stay distinguishable. + // Item dispatch: ordinal (#N) > positional ($first/$last/-N) > slug. + // Ordinal uses document order so duplicate-slug items stay distinct. let item: AstItem | undefined; if (isOrdinalSeg(path.item)) { const n = parseOrdinalSeg(path.item); - if (n === null || n < 0 || n >= block.items.length) { - return null; - } + if (n === null || n < 0 || n >= block.items.length) return null; item = block.items[n]; } else if (isPositionalSeg(path.item)) { const concrete = resolvePositionalSeg(path.item, { indexable: true, size: block.items.length, }); - if (concrete === null) { - return null; - } + if (concrete === null) return null; item = block.items[Number(concrete)]; } else { - const itemSlug = path.item.toLowerCase(); - item = block.items.find((i) => i.slug === itemSlug); - } - if (item === undefined) { - return null; + item = block.items.find((i) => i.slug === path.item!.toLowerCase()); } + if (item === undefined) return null; + if (path.field === undefined) return { kind: "item", node: item, block }; - // Item-only address. - if (path.field === undefined) { - return { kind: "item", node: item, block }; - } - - // Item-field address. Requires the item to have a `kv` and the field - // to match the kv key (case-insensitive). A field on an item without - // kv shape is unresolvable — return null rather than guessing. - if (item.kv === undefined) { - return null; - } - if (item.kv.key.toLowerCase() !== path.field.toLowerCase()) { - return null; - } + if (item.kv === undefined) return null; + if (item.kv.key.toLowerCase() !== path.field.toLowerCase()) return null; return { kind: "item-field", node: item, block, value: item.kv.value }; } diff --git a/extensions/oc-path/src/oc-path/sentinel.ts b/extensions/oc-path/src/oc-path/sentinel.ts index 9a84ea3fd06..0a4b964712e 100644 --- a/extensions/oc-path/src/oc-path/sentinel.ts +++ b/extensions/oc-path/src/oc-path/sentinel.ts @@ -1,39 +1,17 @@ /** - * Substrate-level redaction-sentinel guard. - * - * Closes the `__OPENCLAW_REDACTED__` corruption class by rejecting the - * literal string at the emit boundary. Per-call-site reject rules - * (added piecemeal in [#62281](https://github.com/openclaw/openclaw/issues/62281), - * [#44357](https://github.com/openclaw/openclaw/issues/44357), - * [#13495](https://github.com/openclaw/openclaw/issues/13495), and others) - * caught the symptom; this guard removes the substrate that produced - * the symptom in the first place. - * - * Throwing at emit (not at the consumer) means every code path through - * the substrate is covered, including future call sites we haven't - * audited. + * Redaction-sentinel guard. Throws at emit boundaries so every write + * path is covered, not just audited consumers. * * @module @openclaw/oc-path/sentinel */ -/** - * The literal string that marks redacted secrets in OpenClaw's runtime - * representation. Writing it to disk is always a bug — the consumer - * was supposed to drop the redacted view, not pass it through to the - * writer. - */ +/** Literal marking a redacted secret. Writing it to disk is always a bug. */ export const REDACTED_SENTINEL = "__OPENCLAW_REDACTED__"; /** - * Thrown when emit detects a `"__OPENCLAW_REDACTED__"` literal in any - * emitted bytes. Callers should treat this as a fatal write error; - * recovering by stripping the sentinel would silently corrupt the - * file. Fail-closed. - * - * `path` is the OcPath-shaped pointer to where the sentinel was - * detected (e.g., `oc://config/plugins.entries.foo.token`). For - * non-config emits, it's the closest meaningful address (frontmatter - * key, section/item slug, etc.) or just the file name. + * Thrown when emit detects the sentinel in output bytes. Fail-closed: + * stripping would silently corrupt the file. `path` is the closest + * OcPath-shaped pointer to the violation. */ export class OcEmitSentinelError extends Error { readonly code = "OC_EMIT_SENTINEL"; @@ -46,16 +24,8 @@ export class OcEmitSentinelError extends Error { } } -/** - * Throw `OcEmitSentinelError` if `value` contains the redaction - * sentinel anywhere. Substring match (not equality) — a hostile caller - * embedding `prefix__OPENCLAW_REDACTED__suffix` in a leaf must be - * rejected just as forcefully as the bare sentinel; the substring form - * still leaks the marker bytes to disk where downstream scanners flag - * the file as corrupted. - * - * No-op for any non-string input. Used by every leaf-write boundary. - */ +// Substring match (not equality) — `prefix__OPENCLAW_REDACTED__suffix` +// still leaks the marker. No-op on non-string input. export function guardSentinel(value: unknown, ocPath: string): void { if (typeof value === "string" && value.includes(REDACTED_SENTINEL)) { throw new OcEmitSentinelError(ocPath); diff --git a/extensions/oc-path/src/oc-path/slug.ts b/extensions/oc-path/src/oc-path/slug.ts index 9397abc23f3..6ce104e39f4 100644 --- a/extensions/oc-path/src/oc-path/slug.ts +++ b/extensions/oc-path/src/oc-path/slug.ts @@ -1,16 +1,6 @@ /** - * Slug derivation for OcPath section/item addressing. - * - * A slug is the kebab-case lowercase form of a heading or item text: - * "Tool Guidance" → "tool-guidance" - * " Restricted Data " → "restricted-data" - * "deny-rule-1" → "deny-rule-1" (already a slug) - * "API_KEY" → "api-key" - * "Multi-tenant isolation" → "multi-tenant-isolation" - * "deny: secrets" → "deny-secrets" (colon + space → hyphen) - * - * Deterministic + idempotent. Used by parse to pre-compute slugs for - * blocks and items, and by resolveOcPath to match section/item names. + * Slug derivation: kebab-case lowercase, deterministic, idempotent. + * Used by parse + resolve for section/item addressing. * * @module @openclaw/oc-path/slug */ @@ -19,20 +9,7 @@ const NON_SLUG_CHARS = /[^a-z0-9-]+/g; const COLLAPSE_HYPHENS = /-+/g; const TRIM_HYPHENS = /^-+|-+$/g; -/** - * Convert arbitrary text into a slug usable as an OcPath segment. - * - * Rules: - * 1. Lowercase - * 2. Replace `_` with `-` - * 3. Replace any non-`[a-z0-9-]` runs with a single `-` - * 4. Collapse repeated `-` - * 5. Trim leading/trailing `-` - * - * Returns the empty string for input that has no slug-valid characters - * (e.g., `"!!"` → `""`); callers should treat empty slugs as not - * matchable rather than as wildcards. - */ +/** Empty string for input with no slug-valid chars; callers treat as not matchable. */ export function slugify(text: string): string { return text .toLowerCase() diff --git a/extensions/oc-path/src/oc-path/tests/find.test.ts b/extensions/oc-path/src/oc-path/tests/find.test.ts index 75c2936c374..243ff5992eb 100644 --- a/extensions/oc-path/src/oc-path/tests/find.test.ts +++ b/extensions/oc-path/src/oc-path/tests/find.test.ts @@ -1,12 +1,3 @@ -/** - * `findOcPaths` — multi-match search verb test surface. - * - * Tests cover: `*` single-segment expansion across the supported kinds; - * `**` recursive descent for jsonc; the wildcard guard on - * `resolveOcPath` / `setOcPath`; the slot-shape preservation invariant - * (a `*` in the `item` slot produces concrete paths whose `item` field - * carries the matched value). - */ import { describe, expect, it } from "vitest"; import { findOcPaths } from "../find.js"; import { parseJsonc } from "../jsonc/parse.js"; @@ -15,8 +6,6 @@ import { formatOcPath, hasWildcard, OcPathError, parseOcPath } from "../oc-path. import { parseMd } from "../parse.js"; import { resolveOcPath, setOcPath } from "../universal.js"; -// ---------- hasWildcard ---------------------------------------------------- - describe("hasWildcard", () => { it("detects single-segment * in any slot", () => { expect(hasWildcard(parseOcPath("oc://X/*/y"))).toBe(true); @@ -45,17 +34,10 @@ describe("hasWildcard", () => { }); }); -// ---------- Wildcard guard on resolveOcPath / setOcPath ------------------- - describe("wildcard guard", () => { const ast = parseJsonc('{"steps":[{"id":"a","command":"foo"}]}').ast; - it("resolveOcPath throws OcPathError for wildcard pattern (F16)", () => { - // Previously returned `null` — indistinguishable from "path doesn't - // resolve". Now throws with `OC_PATH_WILDCARD_IN_RESOLVE` so the - // CLI / consumers can surface "use findOcPaths" rather than "not - // found". setOcPath uses a discriminated `wildcard-not-allowed` - // reason; this is the resolve-side analogue. + it("resolveOcPath throws OcPathError for wildcard pattern", () => { expect(() => resolveOcPath(ast, parseOcPath("oc://wf/steps/*/command"))).toThrow( /findOcPaths/, ); @@ -85,8 +67,6 @@ describe("wildcard guard", () => { }); }); -// ---------- findOcPaths — fast-path (no wildcards) ------------------------- - describe("findOcPaths — non-wildcard fast-path", () => { it("wraps resolveOcPath result for plain path", () => { const ast = parseJsonc('{"name":"x"}').ast; @@ -102,7 +82,6 @@ describe("findOcPaths — non-wildcard fast-path", () => { }); }); -// ---------- findOcPaths — JSONC -------------------------------------------- describe("findOcPaths — JSONC kind", () => { const jsonc = parseJsonc( @@ -137,7 +116,6 @@ describe("findOcPaths — JSONC kind", () => { }); }); -// ---------- findOcPaths — JSONL -------------------------------------------- describe("findOcPaths — JSONL kind", () => { const jsonl = parseJsonl( @@ -160,9 +138,6 @@ describe("findOcPaths — JSONL kind", () => { } }); - // F8 — line-slot union and predicate. Without these, the jsonc - // walker handled them but JSONL fell through to `pickLine(addr)` - // which returns null for union/predicate shapes → silent zero matches. it("union {L1,L2} at line slot enumerates each alternative", () => { const out = findOcPaths(jsonl, parseOcPath("oc://session/{L1,L3}/event")); expect(out).toHaveLength(2); @@ -191,7 +166,6 @@ describe("findOcPaths — JSONL kind", () => { }); }); -// ---------- Positional primitives ($first / $last / -N) ------------------- describe("positional primitives — jsonc", () => { const jsonc = parseJsonc('{"items":[10,20,30]}').ast; @@ -222,7 +196,6 @@ describe("positional primitives — jsonc", () => { }); it("hasWildcard returns false for positional patterns", () => { - // Positional ≠ wildcard — they resolve deterministically. expect(hasWildcard(parseOcPath("oc://X/$last/id"))).toBe(false); expect(hasWildcard(parseOcPath("oc://X/-1/id"))).toBe(false); }); @@ -253,12 +226,8 @@ describe("positional primitives — jsonl", () => { }); }); -// ---------- Segment unions {a,b,c} ----------------------------------------- describe("quoted segments (v1.0)", () => { - // Evidence: openclaw#69004 — model alias `anthropic/claude-opus-4-7`. - // Slash inside the key has no other syntax that doesn't conflict with - // path-level slash split. const jsonc = parseJsonc( '{"agents":{"defaults":{"models":{' + '"anthropic/claude-opus-4-7":{"alias":"opus47","contextWindow":1000000},' + @@ -301,7 +270,6 @@ describe("quoted segments (v1.0)", () => { }); it("quoted segment with embedded escape sequences", () => { - // Key literally contains a backslash and a quote. const ast = parseJsonc('{"keys":{"a\\\\b":"v1","c\\"d":"v2"}}').ast; const m1 = resolveOcPath(ast, parseOcPath('oc://X/keys/"a\\\\b"')); expect(m1?.kind).toBe("leaf"); @@ -313,7 +281,6 @@ describe("quoted segments (v1.0)", () => { it("findOcPaths — wildcard returns paths with quoted keys when needed", () => { const out = findOcPaths(jsonc, parseOcPath("oc://config/agents.defaults.models/*/alias")); expect(out).toHaveLength(3); - // The two slash-bearing keys round-trip via quotes; `plain` stays bare. const items = out.map((m) => m.path.item); expect(items.some((s) => s === "plain")).toBe(true); expect(items.some((s) => s === '"anthropic/claude-opus-4-7"')).toBe(true); @@ -338,8 +305,6 @@ describe("quoted segments (v1.0)", () => { }); describe("value predicates — numeric operators (v1.1)", () => { - // Evidence: openclaw#54383 — compaction fails when maxTokens > model output cap. - // Doctor lint rule: flag any model with maxTokens > 128000 (Anthropic per-request output cap). const jsonc = parseJsonc( '{"models":{"providers":{"anthropic":{"models":[' + '{"id":"claude-sonnet-4-6","contextWindow":1000000,"maxTokens":128000},' + @@ -348,7 +313,6 @@ describe("value predicates — numeric operators (v1.1)", () => { "]}}}}", ).ast; - // Slot layout: section=`models.providers.anthropic.models`, item=predicate, field=`id`. const PREFIX = "oc://config/models.providers.anthropic.models"; it("> finds models exceeding the per-request output cap", () => { @@ -380,7 +344,6 @@ describe("value predicates — numeric operators (v1.1)", () => { }); it("numeric operator rejects non-numeric leaves silently", () => { - // String leaf, numeric op — predicate doesn't match (no false positive). const out = findOcPaths(jsonc, parseOcPath(`${PREFIX}/[id>5]/id`)); expect(out).toHaveLength(0); }); @@ -404,10 +367,9 @@ describe("value predicates — jsonc", () => { }); }); -// ---------- Ordinal addressing (#N) for distinct duplicate slugs ---------- describe("ordinal addressing — md", () => { - // Two items with the same slug after slugify (`foo: a` and `foo: b`). + // Two items share slug `foo` after slugify. const md = parseMd("## Tools\n\n- foo: a\n- foo: b\n- bar: c\n").ast; it("#0 picks the first item by document order", () => { @@ -432,7 +394,6 @@ describe("ordinal addressing — md", () => { it("findOcPaths disambiguates duplicate-slug items via #N", () => { const out = findOcPaths(md, parseOcPath("oc://AGENTS.md/tools/*/foo")); - // 2 items have key `foo` (and matching slug); 1 has `bar` (no match). expect(out).toHaveLength(2); const items = out.map((m) => m.path.item); expect(items).toEqual(["#0", "#1"]); @@ -444,12 +405,10 @@ describe("ordinal addressing — md", () => { const md2 = parseMd("## Tools\n\n- foo: a\n- bar: b\n").ast; const out = findOcPaths(md2, parseOcPath("oc://AGENTS.md/tools/*")); const items = out.map((m) => m.path.item); - // Both unique → both stay as slugs. expect(items.toSorted((a, b) => (a ?? "").localeCompare(b ?? ""))).toEqual(["bar", "foo"]); }); }); -// ---------- findOcPaths — Markdown ----------------------------------------- describe("findOcPaths — Markdown kind", () => { const md = parseMd( @@ -468,7 +427,6 @@ describe("findOcPaths — Markdown kind", () => { }); it("* in field slot enumerates each item kv key", () => { - // Item slug is the kv-key slug ('send_email' → 'send-email'). const out = findOcPaths(md, parseOcPath("oc://SKILL.md/Tools/send-email/*")); expect(out).toHaveLength(1); expect(out[0].match.kind).toBe("leaf"); @@ -478,26 +436,15 @@ describe("findOcPaths — Markdown kind", () => { }); it("* in item slot + matching field returns each item whose kv key matches", () => { - // The kv key on `- send_email: enabled` is `send_email`. Pattern - // field='send_email' matches that one item; the other two items - // (search, read_email) have different kv keys. const out = findOcPaths(md, parseOcPath("oc://SKILL.md/Tools/*/send_email")); expect(out).toHaveLength(1); expect(out[0].path.item).toBe("send-email"); }); - it("** at section slot matches items at every depth (F14 — cross-kind symmetry)", () => { - // Without the retain-i branch on `**`, walkMd only descended one - // level (i + 1, consumed `**`) — the jsonc walker also retains - // `**` to keep matching deeper. Lint rules expecting universal - // `**` behavior across kinds (sweep all sections for `risk:`) - // would silently get 0 md matches on a multi-block file. - // - // Pattern `**/send-email` — `**` matches the `tools` block, then - // `send-email` (kebab slug) matches the item under it. Without the - // retain-i branch, the walker descends with `**` consumed at the - // section layer and then can't satisfy the item slot since the - // walker is now inside the wrong block looking for an item slug. + it("** at section slot matches items at every depth (cross-kind symmetry)", () => { + // The retain-i branch on `**` keeps the wildcard active across + // descent — without it, multi-block md files match only the + // immediate-block layer. const multiBlock = parseMd( "## Boundaries\n\n" + "- never: rm -rf\n\n" + @@ -506,22 +453,14 @@ describe("findOcPaths — Markdown kind", () => { "- search: enabled\n", ).ast; const out = findOcPaths(multiBlock, parseOcPath("oc://SOUL.md/**/send-email")); - // The `send-email` item is under the `tools` block. Pin that we - // get at least one match (the substrate's md `**` should reach it). expect(out.length).toBeGreaterThanOrEqual(1); const items = out.map((m) => m.path.item).filter((v): v is string => v !== undefined); expect(items).toContain("send-email"); }); }); -describe("findOcPaths — quoted segments survive expansion (regression: resolve↔find symmetry)", () => { +describe("findOcPaths — quoted segments survive expansion", () => { it("finds keys with slashes when the path quotes them and a sibling wildcards", () => { - // Closes ClawSweeper P2 on PR #78678: when a pattern needs - // expansion (e.g. trailing union or wildcard), the JSONC walker - // bypassed `resolveJsoncOcPath` and compared object keys to the - // raw `cur.value` directly. Patterns with quoted literals - // returned no matches even though resolve worked. This test - // exercises a quoted middle segment + a trailing union. const raw = `{ "agents": { "defaults": { @@ -542,7 +481,6 @@ describe("findOcPaths — quoted segments survive expansion (regression: resolve 'oc://config.jsonc/agents.defaults.models/"github-copilot/claude-opus-4-7"/{alias,contextWindow}', ), ); - // Both alternatives in the union should match. expect(out.length).toBe(2); const fields = out .map((m) => m.path.field) @@ -551,14 +489,8 @@ describe("findOcPaths — quoted segments survive expansion (regression: resolve }); }); -// ---------- I3: md walker union + predicate parity ------------------------ describe("union segments — md", () => { - // Cross-kind parity: the jsonc walker already dispatches union at every - // slot. The md walker previously dispatched only on wildcard / ordinal - // / positional / literal — so `oc://X.md/{Boundaries,Limits}/...` - // matched zero items where the same shape on jsonc would match both. - // These tests pin the parity addition. const RAW = `## Boundaries - enabled: true @@ -590,9 +522,7 @@ describe("union segments — md", () => { expect(items).toEqual(["alias", "max-tokens"]); }); - it("expands {a,b} at the field slot (degenerate but parity-preserving)", () => { - // Md items hold a single kv field, so {alias,nope} matches at most - // one alt — the matching one. Mirrors the jsonc dispatch shape. + it("expands {a,b} at the field slot — md items have one kv, so at most one alt", () => { const ast = parseMd(RAW).ast; const out = findOcPaths(ast, parseOcPath("oc://X.md/limits/alias/{alias,nope}")); expect(out.length).toBe(1); @@ -613,8 +543,6 @@ describe("predicate segments — md", () => { `; it("matches sections that contain an item satisfying the predicate", () => { - // [enabled=true] — only Boundaries has an item kv.key=enabled with - // value=true; Limits's enabled=false fails the predicate. const ast = parseMd(RAW).ast; const out = findOcPaths(ast, parseOcPath("oc://X.md/[enabled=true]/*/*")); expect(out.length).toBeGreaterThan(0); @@ -631,7 +559,6 @@ describe("predicate segments — md", () => { }); it("matches the kv pair at the field slot", () => { - // [max-tokens=4096] at the field slot — checks the kv pair as a unit. const ast = parseMd(RAW).ast; const out = findOcPaths( ast, diff --git a/extensions/oc-path/src/oc-path/tests/jsonl/edit.test.ts b/extensions/oc-path/src/oc-path/tests/jsonl/edit.test.ts index 7cf46253fc6..7d28d1bbf1f 100644 --- a/extensions/oc-path/src/oc-path/tests/jsonl/edit.test.ts +++ b/extensions/oc-path/src/oc-path/tests/jsonl/edit.test.ts @@ -217,12 +217,8 @@ describe("setJsonlOcPath — positional field tokens (round-11 resolve↔edit sy }); }); -describe("setJsonlOcPath — quoted field segments (regression: resolve↔edit symmetry)", () => { +describe("setJsonlOcPath — quoted field segments", () => { it("edits a field key containing a slash via quoted segment", () => { - // Closes ClawSweeper P2 on PR #78678: JSONL resolve unquotes - // bracket-aware segments but the edit path used plain - // `.split('.')`. A path that resolves under `Lnnn` MUST be - // editable through the same address. const raw = `{"event":"start","detail":{"github/repo":"old"}}\n`; const { ast } = parseJsonl(raw); const r = setJsonlOcPath(ast, parseOcPath('oc://x.jsonl/L1/detail/"github/repo"'), { diff --git a/extensions/oc-path/src/oc-path/tests/scenarios/append-multi-agent.test.ts b/extensions/oc-path/src/oc-path/tests/scenarios/append-multi-agent.test.ts index f254f5a49a7..a5d7d86d8d5 100644 --- a/extensions/oc-path/src/oc-path/tests/scenarios/append-multi-agent.test.ts +++ b/extensions/oc-path/src/oc-path/tests/scenarios/append-multi-agent.test.ts @@ -1,16 +1,3 @@ -/** - * Wave 20 — JSONL append + multi-agent session sim. - * - * Substrate guarantee: `appendJsonlOcPath(ast, value)` returns a new AST - * with the value appended as a new line. Single-writer model at the - * substrate; concurrent-append safety lives in the LKG tracker layer - * (PR-4) on top of git's three-way merge. - * - * Append for other kinds (jsonc array push, md item-to-section) was - * removed from the substrate — those are domain operations that ride - * on top of `setXxxOcPath` at the doctor / tracker layer, where the - * value shapes are domain-defined. - */ import { describe, expect, it } from "vitest"; import type { JsoncValue } from "../../jsonc/ast.js"; import { appendJsonlOcPath } from "../../jsonl/edit.js"; @@ -27,8 +14,8 @@ function event(name: string, n: number): JsoncValue { }; } -describe("wave-20 jsonl append + multi-agent session sim", () => { - it("A-01 single agent appends 100 events in order", () => { +describe("jsonl append + multi-agent session sim", () => { + it("single agent appends 100 events in order", () => { let ast = parseJsonl("").ast; for (let i = 0; i < 100; i++) { ast = appendJsonlOcPath(ast, event("step", i)); @@ -41,7 +28,7 @@ describe("wave-20 jsonl append + multi-agent session sim", () => { expect(JSON.parse(lines[99] ?? "")).toEqual({ event: "step", n: 99 }); }); - it("A-02 two agents alternating appends preserve interleave order", () => { + it("two agents alternating appends preserve interleave order", () => { let ast = parseJsonl("").ast; for (let i = 0; i < 10; i++) { const agent = i % 2 === 0 ? "a" : "b"; @@ -57,7 +44,7 @@ describe("wave-20 jsonl append + multi-agent session sim", () => { } }); - it("A-03 append after a malformed line preserves both", () => { + it("append after a malformed line preserves both", () => { let ast = parseJsonl('{"a":1}\nbroken\n').ast; ast = appendJsonlOcPath(ast, event("start", 1)); const out = emitJsonl(ast); @@ -65,14 +52,14 @@ describe("wave-20 jsonl append + multi-agent session sim", () => { expect(out).toContain('"event":"start"'); }); - it("A-04 append to empty file produces a single value line", () => { + it("append to empty file produces a single value line", () => { let ast = parseJsonl("").ast; ast = appendJsonlOcPath(ast, event("first", 0)); const out = emitJsonl(ast); expect(JSON.parse(out)).toEqual({ event: "first", n: 0 }); }); - it("A-05 append assigns line numbers monotonically", () => { + it("append assigns line numbers monotonically", () => { let ast = parseJsonl("").ast; ast = appendJsonlOcPath(ast, event("a", 0)); ast = appendJsonlOcPath(ast, event("b", 1)); @@ -80,7 +67,7 @@ describe("wave-20 jsonl append + multi-agent session sim", () => { expect(ast.lines.map((l) => l.line)).toEqual([1, 2, 3]); }); - it("A-06 append after blank lines preserves line-number gaps correctly", () => { + it("append after blank lines preserves line-number gaps correctly", () => { let ast = parseJsonl('{"a":1}\n\n\n').ast; ast = appendJsonlOcPath(ast, event("after", 0)); // Existing lines: L1 value, L2 blank, L3 blank. Appended line is L4. @@ -88,7 +75,7 @@ describe("wave-20 jsonl append + multi-agent session sim", () => { expect(ast.lines[3]?.line).toBe(4); }); - it("A-07 1000-event session sim is deterministic", () => { + it("1000-event session sim is deterministic", () => { let ast = parseJsonl("").ast; for (let i = 0; i < 1000; i++) { ast = appendJsonlOcPath(ast, event("e", i)); @@ -100,14 +87,14 @@ describe("wave-20 jsonl append + multi-agent session sim", () => { expect(JSON.parse(lines[999] ?? "").n).toBe(999); }); - it("A-08 append is non-mutating on the input AST", () => { + it("append is non-mutating on the input AST", () => { const ast = parseJsonl('{"a":1}\n').ast; const before = JSON.stringify(ast); appendJsonlOcPath(ast, event("x", 0)); expect(JSON.stringify(ast)).toBe(before); }); - it("A-09 append preserves prior raw bytes (renders new tail)", () => { + it("append preserves prior raw bytes (renders new tail)", () => { let ast = parseJsonl('{"a":1}\n').ast; ast = appendJsonlOcPath(ast, event("b", 1)); const out = emitJsonl(ast); @@ -118,7 +105,7 @@ describe("wave-20 jsonl append + multi-agent session sim", () => { expect(JSON.parse(lines[1] ?? "")).toEqual({ event: "b", n: 1 }); }); - it("A-10 deterministic line-number assignment after malformed lines", () => { + it("deterministic line-number assignment after malformed lines", () => { let ast = parseJsonl('{"a":1}\nbroken\n{"b":2}\n').ast; ast = appendJsonlOcPath(ast, event("c", 2)); expect(ast.lines.map((l) => l.line)).toEqual([1, 2, 3, 4]); diff --git a/extensions/oc-path/src/oc-path/tests/scenarios/byte-fidelity.test.ts b/extensions/oc-path/src/oc-path/tests/scenarios/byte-fidelity.test.ts index 80e1eddc9f3..6453a804d2f 100644 --- a/extensions/oc-path/src/oc-path/tests/scenarios/byte-fidelity.test.ts +++ b/extensions/oc-path/src/oc-path/tests/scenarios/byte-fidelity.test.ts @@ -1,9 +1,3 @@ -/** - * Wave 1 — byte-fidelity round-trip. - * - * Substrate guarantee: `emitMd(parse(raw), { mode: 'roundtrip' }) === raw` - * for every input the parser accepts. This wave hammers that. - */ import { describe, expect, it } from "vitest"; import { emitMd } from "../../emit.js"; import { parseMd } from "../../parse.js"; @@ -13,72 +7,72 @@ function roundTrip(raw: string): string { return emitMd(ast); } -describe("wave-01 byte-fidelity", () => { - it("B-01 empty file", () => { +describe("byte-fidelity", () => { + it("empty file", () => { expect(roundTrip("")).toBe(""); }); - it("B-02 whitespace-only file", () => { + it("whitespace-only file", () => { expect(roundTrip(" \n\n \n")).toBe(" \n\n \n"); }); - it("B-03 single newline", () => { + it("single newline", () => { expect(roundTrip("\n")).toBe("\n"); }); - it("B-04 file without trailing newline", () => { + it("file without trailing newline", () => { expect(roundTrip("## H\n- item")).toBe("## H\n- item"); }); - it("B-05 file with trailing newline", () => { + it("file with trailing newline", () => { expect(roundTrip("## H\n- item\n")).toBe("## H\n- item\n"); }); - it("B-06 file with multiple trailing newlines", () => { + it("file with multiple trailing newlines", () => { expect(roundTrip("## H\n- item\n\n\n")).toBe("## H\n- item\n\n\n"); }); - it("B-07 BOM at start", () => { + it("BOM at start", () => { const raw = "## Heading\n- item\n"; expect(roundTrip(raw)).toBe(raw); }); - it("B-08 CRLF line endings", () => { + it("CRLF line endings", () => { const raw = "## H\r\n\r\n- item\r\n"; expect(roundTrip(raw)).toBe(raw); }); - it("B-09 mixed line endings (CRLF + LF)", () => { + it("mixed line endings (CRLF + LF)", () => { const raw = "## H\r\n- item\n- another\r\n"; expect(roundTrip(raw)).toBe(raw); }); - it("B-10 tabs preserved in body", () => { + it("tabs preserved in body", () => { const raw = "## H\n\n\tindented body\n"; expect(roundTrip(raw)).toBe(raw); }); - it("B-11 trailing whitespace on lines preserved", () => { + it("trailing whitespace on lines preserved", () => { const raw = "## Heading \n- item \n"; expect(roundTrip(raw)).toBe(raw); }); - it("B-12 multiple consecutive blank lines preserved", () => { + it("multiple consecutive blank lines preserved", () => { const raw = "## H\n\n\n\n- item\n"; expect(roundTrip(raw)).toBe(raw); }); - it("B-13 frontmatter only, no body", () => { + it("frontmatter only, no body", () => { const raw = "---\nname: x\n---\n"; expect(roundTrip(raw)).toBe(raw); }); - it("B-14 body only, no frontmatter, no headings", () => { + it("body only, no frontmatter, no headings", () => { const raw = "Just some prose.\nNo structure.\n"; expect(roundTrip(raw)).toBe(raw); }); - it("B-15 frontmatter + body + multiple sections", () => { + it("frontmatter + body + multiple sections", () => { const raw = `--- name: github description: gh CLI @@ -98,27 +92,27 @@ Preamble. expect(roundTrip(raw)).toBe(raw); }); - it("B-16 unicode content preserved", () => { + it("unicode content preserved", () => { const raw = "## Café Section\n\n- résumé item\n- 日本語\n"; expect(roundTrip(raw)).toBe(raw); }); - it("B-17 emoji preserved", () => { + it("emoji preserved", () => { const raw = "## 🚀 Launch\n\n- ✅ ready\n- 🔒 secure\n"; expect(roundTrip(raw)).toBe(raw); }); - it("B-18 frontmatter with special chars in values", () => { + it("frontmatter with special chars in values", () => { const raw = `---\nurl: https://example.com:443/path?q=1&a=2\n---\n`; expect(roundTrip(raw)).toBe(raw); }); - it("B-19 file with mixed bullet markers (-, *, +)", () => { + it("file with mixed bullet markers (-, *, +)", () => { const raw = "## H\n\n- dash\n* star\n+ plus\n"; expect(roundTrip(raw)).toBe(raw); }); - it("B-20 raw === parse(raw).raw === emitMd(parse(raw)) for 50 random shapes", () => { + it("raw === parse(raw).raw === emitMd(parse(raw)) for 50 random shapes", () => { const inputs = [ "", "\n", diff --git a/extensions/oc-path/src/oc-path/tests/scenarios/cross-cutting.test.ts b/extensions/oc-path/src/oc-path/tests/scenarios/cross-cutting.test.ts index f681902eae6..f38caf354d7 100644 --- a/extensions/oc-path/src/oc-path/tests/scenarios/cross-cutting.test.ts +++ b/extensions/oc-path/src/oc-path/tests/scenarios/cross-cutting.test.ts @@ -1,10 +1,3 @@ -/** - * Wave 13 — cross-cutting integration. - * - * Pipelines: parse + resolve + emit working together. Slug stability - * across re-parses. OcPath round-trip via the AST (slugs in OcPath - * must round-trip back to the resolved node). - */ import { describe, expect, it } from "vitest"; import { emitMd } from "../../emit.js"; import { formatOcPath, parseOcPath } from "../../oc-path.js"; @@ -29,15 +22,15 @@ Preamble. - curl: HTTP client `; -describe("wave-13 cross-cutting", () => { - it("CC-01 parse → resolve → emit pipeline (block)", () => { +describe("cross-cutting", () => { + it("parse → resolve → emit pipeline (block)", () => { const { ast } = parseMd(SAMPLE); const m = resolveOcPath(ast, { file: "AGENTS.md", section: "boundaries" }); expect(m?.kind).toBe("block"); expect(emitMd(ast)).toBe(SAMPLE); }); - it("CC-02 OcPath round-trip via AST: parse + resolve + format", () => { + it("OcPath round-trip via AST: parse + resolve + format", () => { const { ast } = parseMd(SAMPLE); for (const block of ast.blocks) { const path = parseOcPath(`oc://AGENTS.md/${block.slug}`); @@ -48,7 +41,7 @@ describe("wave-13 cross-cutting", () => { } }); - it("CC-03 every item in every block is OcPath-addressable", () => { + it("every item in every block is OcPath-addressable", () => { const { ast } = parseMd(SAMPLE); for (const block of ast.blocks) { for (const item of block.items) { @@ -59,7 +52,7 @@ describe("wave-13 cross-cutting", () => { } }); - it("CC-04 every kv item field is OcPath-addressable", () => { + it("every kv item field is OcPath-addressable", () => { const { ast } = parseMd(SAMPLE); for (const block of ast.blocks) { for (const item of block.items) { @@ -73,7 +66,7 @@ describe("wave-13 cross-cutting", () => { } }); - it("CC-05 every frontmatter entry is OcPath-addressable", () => { + it("every frontmatter entry is OcPath-addressable", () => { const { ast } = parseMd(SAMPLE); for (const fm of ast.frontmatter) { const path = parseOcPath(`oc://AGENTS.md/[frontmatter]/${fm.key}`); @@ -82,7 +75,7 @@ describe("wave-13 cross-cutting", () => { } }); - it("CC-06 slugs are stable across re-parses (deterministic)", () => { + it("slugs are stable across re-parses (deterministic)", () => { const a1 = parseMd(SAMPLE).ast; const a2 = parseMd(SAMPLE).ast; expect(a1.blocks.map((b) => b.slug)).toEqual(a2.blocks.map((b) => b.slug)); @@ -91,7 +84,7 @@ describe("wave-13 cross-cutting", () => { ); }); - it("CC-07 modifying raw + re-parse produces consistent AST shape", () => { + it("modifying raw + re-parse produces consistent AST shape", () => { const a1 = parseMd(SAMPLE).ast; const modified = SAMPLE.replace("GitHub CLI", "GitHub command-line interface"); const a2 = parseMd(modified).ast; @@ -105,20 +98,20 @@ describe("wave-13 cross-cutting", () => { expect(ghItem?.kv?.value).toBe("GitHub command-line interface"); }); - it("CC-08 unknown OcPath returns null without affecting subsequent valid resolves", () => { + it("unknown OcPath returns null without affecting subsequent valid resolves", () => { const { ast } = parseMd(SAMPLE); expect(resolveOcPath(ast, { file: "X.md", section: "nonexistent" })).toBeNull(); expect(resolveOcPath(ast, { file: "X.md", section: "tools" })?.kind).toBe("block"); }); - it("CC-09 resolve does not depend on file segment matching", () => { + it("resolve does not depend on file segment matching", () => { const { ast } = parseMd(SAMPLE); const a = resolveOcPath(ast, { file: "A.md", section: "tools" }); const b = resolveOcPath(ast, { file: "B.md", section: "tools" }); expect(a?.kind).toBe(b?.kind); }); - it("CC-10 round-trip across all 9 valid OcPath shapes", () => { + it("round-trip across all 9 valid OcPath shapes", () => { const { ast } = parseMd(SAMPLE); const cases = [ { file: "X.md" }, diff --git a/extensions/oc-path/src/oc-path/tests/scenarios/cross-kind-properties.test.ts b/extensions/oc-path/src/oc-path/tests/scenarios/cross-kind-properties.test.ts index f46aeb7e50d..dd02b083822 100644 --- a/extensions/oc-path/src/oc-path/tests/scenarios/cross-kind-properties.test.ts +++ b/extensions/oc-path/src/oc-path/tests/scenarios/cross-kind-properties.test.ts @@ -1,16 +1,3 @@ -/** - * Wave 22 — cross-kind property invariants. - * - * Per-kind verbs hold the same shape contracts regardless of kind: - * - * 1. parse → emit (round-trip) is byte-stable for ALL kinds - * 2. resolve is non-mutating for ALL kinds - * 3. set returns structured failure (never throws) for unresolvable - * paths across ALL kinds - * 4. inferKind aligns with the parsers consumers actually pick - * 5. parse → emit → parse is fixpoint - * 6. hostile inputs do not throw at parse time - */ import { describe, expect, it } from "vitest"; import { inferKind } from "../../dispatch.js"; import { setMdOcPath } from "../../edit.js"; @@ -27,18 +14,18 @@ import { parseOcPath } from "../../oc-path.js"; import { parseMd } from "../../parse.js"; import { resolveMdOcPath } from "../../resolve.js"; -describe("wave-22 cross-kind property invariants", () => { +describe("cross-kind property invariants", () => { const mdRaw = "---\nname: x\n---\n\n## Boundaries\n\n- enabled: true\n"; const jsoncRaw = '// h\n{ "k": 1, "n": [1,2,3] }\n'; const jsonlRaw = '{"a":1}\n\nbroken\n{"b":2}\n'; - it("P-01 round-trip parse → emit is byte-stable across all kinds", () => { + it("round-trip parse → emit is byte-stable across all kinds", () => { expect(emitMd(parseMd(mdRaw).ast)).toBe(mdRaw); expect(emitJsonc(parseJsonc(jsoncRaw).ast)).toBe(jsoncRaw); expect(emitJsonl(parseJsonl(jsonlRaw).ast)).toBe(jsonlRaw); }); - it("P-02 resolve is non-mutating across all kinds", () => { + it("resolve is non-mutating across all kinds", () => { const md = parseMd(mdRaw).ast; let before = JSON.stringify(md); resolveMdOcPath(md, parseOcPath("oc://X/[frontmatter]/name")); @@ -58,7 +45,7 @@ describe("wave-22 cross-kind property invariants", () => { expect(JSON.stringify(jsonl)).toBe(before); }); - it("P-03 unresolvable set never throws across all kinds", () => { + it("unresolvable set never throws across all kinds", () => { const ocPath = parseOcPath("oc://X/totally.missing.path"); expect(() => setMdOcPath(parseMd(mdRaw).ast, ocPath, "x")).not.toThrow(); expect(() => @@ -75,7 +62,7 @@ describe("wave-22 cross-kind property invariants", () => { ).not.toThrow(); }); - it("P-04 inferKind aligns with the parser actually used", () => { + it("inferKind aligns with the parser actually used", () => { expect(inferKind("AGENTS.md")).toBe("md"); expect(inferKind("SOUL.md")).toBe("md"); expect(inferKind("config.jsonc")).toBe("jsonc"); @@ -84,7 +71,7 @@ describe("wave-22 cross-kind property invariants", () => { expect(inferKind("audit.ndjson")).toBe("jsonl"); }); - it("P-05 parse → emit → parse is fixpoint across all kinds", () => { + it("parse → emit → parse is fixpoint across all kinds", () => { const md1 = emitMd(parseMd(mdRaw).ast); const md2 = emitMd(parseMd(md1).ast); expect(md1).toBe(md2); @@ -98,7 +85,7 @@ describe("wave-22 cross-kind property invariants", () => { expect(jl1).toBe(jl2); }); - it("P-06 hostile inputs do not throw at parse time across all kinds", () => { + it("hostile inputs do not throw at parse time across all kinds", () => { const hostile = [ "\x00\x01\x02 binary garbage", '{ "unclosed":', @@ -112,14 +99,14 @@ describe("wave-22 cross-kind property invariants", () => { } }); - it("P-07 resolver returns null for paths past valid kinds (no throw)", () => { + it("resolver returns null for paths past valid kinds (no throw)", () => { const overlong = parseOcPath("oc://X/a/b/c.d.e.f.g.h"); expect(() => resolveMdOcPath(parseMd(mdRaw).ast, overlong)).not.toThrow(); expect(() => resolveJsoncOcPath(parseJsonc(jsoncRaw).ast, overlong)).not.toThrow(); expect(() => resolveJsonlOcPath(parseJsonl(jsonlRaw).ast, overlong)).not.toThrow(); }); - it("P-08 set-then-resolve produces the value just written (jsonc)", () => { + it("set-then-resolve produces the value just written (jsonc)", () => { const ast = parseJsonc('{ "k": 1 }').ast; const r = setJsoncOcPath(ast, parseOcPath("oc://X/k"), { kind: "number", @@ -133,13 +120,13 @@ describe("wave-22 cross-kind property invariants", () => { } }); - it("P-09 verbs are deterministic — same input twice produces same output", () => { + it("verbs are deterministic — same input twice produces same output", () => { expect(emitMd(parseMd(mdRaw).ast)).toBe(emitMd(parseMd(mdRaw).ast)); expect(emitJsonc(parseJsonc(jsoncRaw).ast)).toBe(emitJsonc(parseJsonc(jsoncRaw).ast)); expect(emitJsonl(parseJsonl(jsonlRaw).ast)).toBe(emitJsonl(parseJsonl(jsonlRaw).ast)); }); - it("P-10 inferKind returns null for unknown extensions", () => { + it("inferKind returns null for unknown extensions", () => { expect(inferKind("binary.bin")).toBeNull(); expect(inferKind("no-ext")).toBeNull(); expect(inferKind("archive.tar.gz")).toBeNull(); diff --git a/extensions/oc-path/src/oc-path/tests/scenarios/edit-emit-roundtrip.test.ts b/extensions/oc-path/src/oc-path/tests/scenarios/edit-emit-roundtrip.test.ts index 14b84c0f557..caa2fd9016c 100644 --- a/extensions/oc-path/src/oc-path/tests/scenarios/edit-emit-roundtrip.test.ts +++ b/extensions/oc-path/src/oc-path/tests/scenarios/edit-emit-roundtrip.test.ts @@ -1,10 +1,3 @@ -/** - * Wave 19 — edit → emit round-trip across all kinds. - * - * Substrate guarantee: parse → setXxxOcPath → emitXxx produces valid - * bytes that re-parse to an AST whose addressed value reflects the edit. - * Per-kind verbs throughout — caller picks based on AST type. - */ import { describe, expect, it } from "vitest"; import { setMdOcPath } from "../../edit.js"; import { emitMd } from "../../emit.js"; @@ -18,8 +11,8 @@ import { parseJsonl } from "../../jsonl/parse.js"; import { parseOcPath } from "../../oc-path.js"; import { parseMd } from "../../parse.js"; -describe("wave-19 edit-then-emit round-trip", () => { - it("EE-01 md frontmatter edit re-parses to the new value", () => { +describe("edit-then-emit round-trip", () => { + it("md frontmatter edit re-parses to the new value", () => { const md = parseMd("---\nname: old\n---\n\n## Body\n").ast; const r = setMdOcPath(md, parseOcPath("oc://AGENTS.md/[frontmatter]/name"), "new"); expect(r.ok).toBe(true); @@ -29,7 +22,7 @@ describe("wave-19 edit-then-emit round-trip", () => { } }); - it("EE-02 md item kv edit re-parses to the new value", () => { + it("md item kv edit re-parses to the new value", () => { const md = parseMd("## Boundaries\n\n- timeout: 5\n").ast; const r = setMdOcPath(md, parseOcPath("oc://AGENTS.md/boundaries/timeout/timeout"), "60"); expect(r.ok).toBe(true); @@ -40,7 +33,7 @@ describe("wave-19 edit-then-emit round-trip", () => { } }); - it("EE-03 jsonc value edit re-parses to the new value", () => { + it("jsonc value edit re-parses to the new value", () => { const ast = parseJsonc('{ "k": 1 }').ast; const r = setJsoncOcPath(ast, parseOcPath("oc://config/k"), { kind: "number", @@ -52,7 +45,7 @@ describe("wave-19 edit-then-emit round-trip", () => { } }); - it("EE-04 jsonc nested edit preserves untouched siblings", () => { + it("jsonc nested edit preserves untouched siblings", () => { const ast = parseJsonc('{ "a": 1, "b": { "c": 2, "d": 3 }, "e": 4 }').ast; const r = setJsoncOcPath(ast, parseOcPath("oc://config/b.c"), { kind: "number", @@ -67,7 +60,7 @@ describe("wave-19 edit-then-emit round-trip", () => { } }); - it("EE-05 jsonl line edit re-parses to the new value at the same line", () => { + it("jsonl line edit re-parses to the new value at the same line", () => { const ast = parseJsonl('{"a":1}\n{"a":2}\n{"a":3}\n').ast; const r = setJsonlOcPath(ast, parseOcPath("oc://log/L2/a"), { kind: "number", @@ -84,7 +77,7 @@ describe("wave-19 edit-then-emit round-trip", () => { } }); - it("EE-06 jsonc edit composes: two sequential edits both land", () => { + it("jsonc edit composes: two sequential edits both land", () => { let ast = parseJsonc('{ "a": 1, "b": 2 }').ast; let r = setJsoncOcPath(ast, parseOcPath("oc://config/a"), { kind: "number", @@ -103,7 +96,7 @@ describe("wave-19 edit-then-emit round-trip", () => { expect(JSON.parse(emitJsonc(ast))).toEqual({ a: 10, b: 20 }); }); - it("EE-07 missing path returns structured failure (not throw)", () => { + it("missing path returns structured failure (not throw)", () => { const ast = parseJsonc('{ "a": 1 }').ast; const r = setJsoncOcPath(ast, parseOcPath("oc://config/missing"), { kind: "number", @@ -115,7 +108,7 @@ describe("wave-19 edit-then-emit round-trip", () => { } }); - it("EE-08 each per-kind verb takes its own AST type — no cross-kind leakage", () => { + it("each per-kind verb takes its own AST type — no cross-kind leakage", () => { // Type-level guarantee: each setter only accepts its kind's AST. // Caller picks based on the AST they have. This is the design. const md = parseMd("---\nx: 1\n---\n").ast; @@ -137,7 +130,7 @@ describe("wave-19 edit-then-emit round-trip", () => { expect(c.ok).toBe(true); }); - it("EE-09 jsonc parser-backed edit preserves comments", () => { + it("jsonc parser-backed edit preserves comments", () => { const raw = '{\n "k": 1 // comment\n}\n'; const ast = parseJsonc(raw).ast; const r = setJsoncOcPath(ast, parseOcPath("oc://config/k"), { @@ -154,7 +147,7 @@ describe("wave-19 edit-then-emit round-trip", () => { } }); - it("EE-10 edit on empty AST surfaces no-root", () => { + it("edit on empty AST surfaces no-root", () => { const ast = parseJsonc("").ast; const r = setJsoncOcPath(ast, parseOcPath("oc://config/x"), { kind: "number", diff --git a/extensions/oc-path/src/oc-path/tests/scenarios/frontmatter-edges.test.ts b/extensions/oc-path/src/oc-path/tests/scenarios/frontmatter-edges.test.ts index 80aa2cdbcfa..d7b2cb2aac6 100644 --- a/extensions/oc-path/src/oc-path/tests/scenarios/frontmatter-edges.test.ts +++ b/extensions/oc-path/src/oc-path/tests/scenarios/frontmatter-edges.test.ts @@ -1,15 +1,8 @@ -/** - * Wave 2 — frontmatter edges. - * - * Substrate guarantee: frontmatter is parsed as `key: value` entries - * with quote-stripping; malformed frontmatter doesn't crash the parser - * (soft-error policy: emit diagnostic, recover). - */ import { describe, expect, it } from "vitest"; import { parseMd } from "../../parse.js"; -describe("wave-02 frontmatter-edges", () => { - it("FM-01 simple kv pairs", () => { +describe("frontmatter-edges", () => { + it("simple kv pairs", () => { const { ast } = parseMd("---\nname: x\ndescription: y\n---\n"); expect(ast.frontmatter.map((e) => [e.key, e.value])).toEqual([ ["name", "x"], @@ -17,50 +10,50 @@ describe("wave-02 frontmatter-edges", () => { ]); }); - it("FM-02 unclosed frontmatter emits diagnostic, treats as preamble", () => { + it("unclosed frontmatter emits diagnostic, treats as preamble", () => { const { ast, diagnostics } = parseMd("---\nname: x\nno close fence\nbody\n"); expect(diagnostics.some((d) => d.code === "OC_FRONTMATTER_UNCLOSED")).toBe(true); expect(ast.frontmatter).toEqual([]); }); - it("FM-03 empty frontmatter (just open + close)", () => { + it("empty frontmatter (just open + close)", () => { const { ast } = parseMd("---\n---\n"); expect(ast.frontmatter).toEqual([]); }); - it("FM-04 frontmatter only, file has no other content", () => { + it("frontmatter only, file has no other content", () => { const { ast } = parseMd("---\nk: v\n---\n"); expect(ast.frontmatter).toEqual([{ key: "k", value: "v", line: 2 }]); expect(ast.preamble).toBe(""); expect(ast.blocks).toEqual([]); }); - it("FM-05 double-quoted value", () => { + it("double-quoted value", () => { const { ast } = parseMd('---\ntitle: "Hello, world"\n---\n'); expect(ast.frontmatter[0]?.value).toBe("Hello, world"); }); - it("FM-06 single-quoted value", () => { + it("single-quoted value", () => { const { ast } = parseMd("---\ntitle: 'Hello, world'\n---\n"); expect(ast.frontmatter[0]?.value).toBe("Hello, world"); }); - it("FM-07 unquoted value with internal colons preserved", () => { + it("unquoted value with internal colons preserved", () => { const { ast } = parseMd("---\nurl: https://example.com:443/p\n---\n"); expect(ast.frontmatter[0]?.value).toBe("https://example.com:443/p"); }); - it("FM-08 empty value", () => { + it("empty value", () => { const { ast } = parseMd("---\nk:\n---\n"); expect(ast.frontmatter[0]).toEqual({ key: "k", value: "", line: 2 }); }); - it("FM-09 value with leading/trailing whitespace trimmed", () => { + it("value with leading/trailing whitespace trimmed", () => { const { ast } = parseMd("---\nk: spaced \n---\n"); expect(ast.frontmatter[0]?.value).toBe("spaced"); }); - it("FM-10 list-style continuations are silently dropped (substrate stays opinion-free)", () => { + it("list-style continuations are silently dropped (substrate stays opinion-free)", () => { const { ast } = parseMd("---\ntools:\n - gh\n - curl\n---\n"); // The `tools:` key has an empty inline value; the list continuation // lines ` - gh` and ` - curl` don't match the kv regex and are @@ -70,7 +63,7 @@ describe("wave-02 frontmatter-edges", () => { expect(ast.frontmatter[0]?.value).toBe(""); }); - it("FM-11 line numbers are 1-based and accurate", () => { + it("line numbers are 1-based and accurate", () => { const { ast } = parseMd("---\nk1: v1\nk2: v2\nk3: v3\n---\n"); expect(ast.frontmatter.map((e) => [e.key, e.line])).toEqual([ ["k1", 2], @@ -79,33 +72,32 @@ describe("wave-02 frontmatter-edges", () => { ]); }); - it("FM-12 dash-key allowed", () => { + it("dash-key allowed", () => { const { ast } = parseMd("---\nuser-invocable: true\n---\n"); expect(ast.frontmatter[0]?.key).toBe("user-invocable"); }); - it("FM-13 underscore-key allowed", () => { + it("underscore-key allowed", () => { const { ast } = parseMd("---\nparam_set: foo\n---\n"); expect(ast.frontmatter[0]?.key).toBe("param_set"); }); - it("FM-14 number-only value preserved as string", () => { + it("number-only value preserved as string", () => { const { ast } = parseMd("---\ntimeout: 15000\n---\n"); expect(ast.frontmatter[0]?.value).toBe("15000"); }); - it("FM-15 boolean-like value preserved as string", () => { + it("boolean-like value preserved as string", () => { const { ast } = parseMd("---\nenabled: true\n---\n"); expect(ast.frontmatter[0]?.value).toBe("true"); }); - it("FM-16 blank lines inside frontmatter are skipped", () => { + it("blank lines inside frontmatter are skipped", () => { const { ast } = parseMd("---\n\nk1: v1\n\nk2: v2\n\n---\n"); expect(ast.frontmatter.map((e) => e.key)).toEqual(["k1", "k2"]); }); - it("FM-17 frontmatter with same key twice — both retained (no dedup)", () => { - // Substrate doesn't dedup; lint rules can flag duplicates if needed. + it("frontmatter with same key twice — both retained (no dedup)", () => { const { ast } = parseMd("---\nk: v1\nk: v2\n---\n"); expect(ast.frontmatter).toEqual([ { key: "k", value: "v1", line: 2 }, @@ -113,27 +105,27 @@ describe("wave-02 frontmatter-edges", () => { ]); }); - it("FM-18 frontmatter must be at start — leading blank line breaks detection", () => { + it("frontmatter must be at start — leading blank line breaks detection", () => { const { ast } = parseMd("\n---\nk: v\n---\n"); expect(ast.frontmatter).toEqual([]); }); - it("FM-19 frontmatter must be at start — leading text breaks detection", () => { + it("frontmatter must be at start — leading text breaks detection", () => { const { ast } = parseMd("intro\n\n---\nk: v\n---\n"); expect(ast.frontmatter).toEqual([]); }); - it("FM-20 BOM before frontmatter open is tolerated", () => { + it("BOM before frontmatter open is tolerated", () => { const { ast } = parseMd("---\nname: bom\n---\n"); expect(ast.frontmatter[0]?.value).toBe("bom"); }); - it("FM-21 single-line file with `---` and `---` is empty frontmatter", () => { + it("single-line file with `---` and `---` is empty frontmatter", () => { const { ast } = parseMd("---\n---"); expect(ast.frontmatter).toEqual([]); }); - it("FM-22 hash-prefixed lines skipped (not yaml comments — just don't match kv regex)", () => { + it("hash-prefixed lines skipped (not yaml comments — just don't match kv regex)", () => { const { ast } = parseMd("---\n# comment\nk: v\n---\n"); expect(ast.frontmatter.map((e) => e.key)).toEqual(["k"]); }); diff --git a/extensions/oc-path/src/oc-path/tests/scenarios/h2-block-split.test.ts b/extensions/oc-path/src/oc-path/tests/scenarios/h2-block-split.test.ts index ce0918ee902..4ef7d0dfe49 100644 --- a/extensions/oc-path/src/oc-path/tests/scenarios/h2-block-split.test.ts +++ b/extensions/oc-path/src/oc-path/tests/scenarios/h2-block-split.test.ts @@ -1,24 +1,15 @@ -/** - * Wave 3 — H2 block split. - * - * Substrate guarantee: `## ` at column 0 outside fenced code blocks - * starts a new H2 block. H1 (`# `), H3 (`### `), and `## ` inside - * fenced code blocks do NOT split. - */ import { describe, expect, it } from "vitest"; import { parseMd } from "../../parse.js"; -describe("wave-03 h2-block-split", () => { - it("H2-01 no headings → no blocks, all preamble", () => { +describe("h2-block-split", () => { + it("no headings → no blocks, all preamble", () => { const raw = "Just prose, no headings.\nMore prose.\n"; const { ast } = parseMd(raw); expect(ast.blocks).toEqual([]); - // Preamble preserves the trailing newline from raw (split + rejoin - // is symmetric); callers that want trimmed prose call .trim(). expect(ast.preamble).toBe("Just prose, no headings.\nMore prose.\n"); }); - it("H2-02 single heading splits preamble + one block", () => { + it("single heading splits preamble + one block", () => { const { ast } = parseMd("preamble\n## Section\nbody\n"); expect(ast.preamble.trim()).toBe("preamble"); expect(ast.blocks.length).toBe(1); @@ -26,127 +17,112 @@ describe("wave-03 h2-block-split", () => { expect(ast.blocks[0]?.bodyText.trim()).toBe("body"); }); - it("H2-03 multiple headings produce blocks in order", () => { + it("multiple headings produce blocks in order", () => { const { ast } = parseMd("## A\nbody-a\n## B\nbody-b\n## C\nbody-c\n"); expect(ast.blocks.map((b) => b.heading)).toEqual(["A", "B", "C"]); }); - it("H2-04 H1 does NOT split", () => { + it("H1 does NOT split", () => { const { ast } = parseMd("# H1 heading\n## H2 heading\n"); expect(ast.blocks.length).toBe(1); expect(ast.blocks[0]?.heading).toBe("H2 heading"); expect(ast.preamble).toContain("# H1 heading"); }); - it("H2-05 H3 does NOT split", () => { + it("H3 does NOT split", () => { const { ast } = parseMd("## H2\nbody\n### H3\nstill in H2 block\n"); expect(ast.blocks.length).toBe(1); expect(ast.blocks[0]?.bodyText).toContain("### H3"); }); - it("H2-06 `## ` inside fenced code block does NOT split", () => { + it("`## ` inside fenced code block does NOT split", () => { const raw = "## Real\n\n```md\n## Inside code\n```\n\n## Another real\n"; const { ast } = parseMd(raw); expect(ast.blocks.map((b) => b.heading)).toEqual(["Real", "Another real"]); }); - it("H2-07 `##` without trailing space — does NOT match (regex requires \\s+)", () => { + it("`##` without trailing space — does NOT match (regex requires \\s+)", () => { const { ast } = parseMd("##NoSpace\n## With space\n"); expect(ast.blocks.length).toBe(1); expect(ast.blocks[0]?.heading).toBe("With space"); }); - it("H2-08 leading whitespace before `##` — recognized as heading (CommonMark)", () => { - // Substrate accepts up to 3 spaces of indentation as an atx - // heading per CommonMark. Lint rules can flag if a particular - // workspace file requires column-zero authoring. + it("leading whitespace before `##` — recognized as heading (CommonMark)", () => { const { ast } = parseMd(" ## indented\n## not indented\n"); expect(ast.blocks.map((b) => b.heading)).toEqual(["indented", "not indented"]); }); - it("H2-09 trailing whitespace on heading — trimmed in heading text", () => { + it("trailing whitespace on heading — trimmed in heading text", () => { const { ast } = parseMd("## Trailing \n"); expect(ast.blocks[0]?.heading).toBe("Trailing"); expect(ast.blocks[0]?.slug).toBe("trailing"); }); - it("H2-10 inline code in heading preserved", () => { + it("inline code in heading preserved", () => { const { ast } = parseMd("## Use `gh` for GitHub\n"); expect(ast.blocks[0]?.heading).toBe("Use `gh` for GitHub"); }); - it("H2-11 markdown formatting in heading preserved", () => { + it("markdown formatting in heading preserved", () => { const { ast } = parseMd("## **Bold** *italic*\n"); expect(ast.blocks[0]?.heading).toBe("**Bold** *italic*"); }); - it("H2-12 immediately after frontmatter", () => { + it("immediately after frontmatter", () => { const { ast } = parseMd("---\nk: v\n---\n## Section\nbody\n"); expect(ast.blocks[0]?.heading).toBe("Section"); expect(ast.preamble).toBe(""); }); - it("H2-13 H2 at end of file (no body)", () => { + it("H2 at end of file (no body)", () => { const { ast } = parseMd("preamble\n## End\n"); expect(ast.blocks[0]?.heading).toBe("End"); expect(ast.blocks[0]?.bodyText).toBe(""); }); - it("H2-14 two consecutive H2s — empty body block between", () => { + it("two consecutive H2s — empty body block between", () => { const { ast } = parseMd("## A\n## B\n"); expect(ast.blocks[0]?.bodyText).toBe(""); expect(ast.blocks[1]?.heading).toBe("B"); }); - it("H2-15 line numbers are 1-based and track through frontmatter", () => { + it("line numbers are 1-based and track through frontmatter", () => { const { ast } = parseMd("---\nk: v\n---\n## At line 4\n"); expect(ast.blocks[0]?.line).toBe(4); }); - it("H2-16 line numbers track through preamble", () => { + it("line numbers track through preamble", () => { const { ast } = parseMd("line 1\nline 2\n## At line 3\n"); expect(ast.blocks[0]?.line).toBe(3); }); - it("H2-17 nested fenced code blocks (~~~ vs ```) — only ``` is detected", () => { - // Current parser only treats ``` as fence; ~~~ falls through. This - // is a documented limit. Inputs with ~~~ aren't broken — they're - // just not protected from H2-misparsing inside them. + it("nested fenced code blocks (~~~ vs ```) — only ``` is detected", () => { const raw = "## H\n\n~~~md\n~~~\n\n## Next\n"; const { ast } = parseMd(raw); expect(ast.blocks.map((b) => b.heading)).toEqual(["H", "Next"]); }); - it("H2-18 setext-style heading (`Heading\\n========\\n`) is NOT recognized", () => { - // Substrate is opinion-aware: setext headings are treated as - // preamble. Lint rules can flag if needed; recognized markdown - // dialect is `## ATX-style only` for OpenClaw workspace files. + it("setext-style heading (`Heading\\n========\\n`) is NOT recognized", () => { const raw = "Heading\n=======\n## Real\n"; const { ast } = parseMd(raw); expect(ast.blocks.length).toBe(1); expect(ast.blocks[0]?.heading).toBe("Real"); }); - it("H2-19 empty heading text (`## `)", () => { - // Substrate accepts an empty atx heading; downstream lint - // (`OC_HEADING_EMPTY`) flags it. Slug is empty string — collisions - // are a lint-level concern, not a parser refusal. + it("empty heading text (`## `)", () => { const { ast } = parseMd("## \n"); expect(ast.blocks.length).toBe(1); expect(ast.blocks[0]?.heading).toBe(""); expect(ast.blocks[0]?.slug).toBe(""); }); - it("H2-20 heading with only whitespace (`## `)", () => { + it("heading with only whitespace (`## `)", () => { const { ast } = parseMd("## \n"); expect(ast.blocks.length).toBe(1); expect(ast.blocks[0]?.heading).toBe(""); }); - it("H2-21 heading-shaped text inside multi-line bullet body — does split", () => { - // The substrate treats line-start ## as a heading regardless of - // logical context (item continuation lines). Lint rules can flag - // the boundary; substrate prefers structural simplicity. + it("heading-shaped text inside multi-line bullet body — does split", () => { const raw = "## Section\n- item starts\n continues\n## Next\n"; const { ast } = parseMd(raw); expect(ast.blocks.map((b) => b.heading)).toEqual(["Section", "Next"]); diff --git a/extensions/oc-path/src/oc-path/tests/scenarios/items.test.ts b/extensions/oc-path/src/oc-path/tests/scenarios/items.test.ts index 2628b9be097..664b44f1091 100644 --- a/extensions/oc-path/src/oc-path/tests/scenarios/items.test.ts +++ b/extensions/oc-path/src/oc-path/tests/scenarios/items.test.ts @@ -1,46 +1,38 @@ -/** - * Wave 4 — items (bullets + kv). - * - * Substrate guarantee: bullet lines (`- text`, `* text`, `+ text`) inside - * H2 blocks are extracted as `AstItem`. Lines matching `- key: value` - * also populate `item.kv`. Items inside fenced code blocks are NOT - * extracted. - */ import { describe, expect, it } from "vitest"; import { parseMd } from "../../parse.js"; -describe("wave-04 items", () => { - it("I-01 plain dash bullets", () => { +describe("items", () => { + it("plain dash bullets", () => { const { ast } = parseMd("## H\n- a\n- b\n- c\n"); expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(["a", "b", "c"]); }); - it("I-02 star bullets", () => { + it("star bullets", () => { const { ast } = parseMd("## H\n* a\n* b\n"); expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(["a", "b"]); }); - it("I-03 plus bullets", () => { + it("plus bullets", () => { const { ast } = parseMd("## H\n+ a\n+ b\n"); expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(["a", "b"]); }); - it("I-04 mixed bullet markers in same section", () => { + it("mixed bullet markers in same section", () => { const { ast } = parseMd("## H\n- dash\n* star\n+ plus\n"); expect(ast.blocks[0]?.items.length).toBe(3); }); - it("I-05 kv-shape items populate kv", () => { + it("kv-shape items populate kv", () => { const { ast } = parseMd("## H\n- gh: GitHub CLI\n"); expect(ast.blocks[0]?.items[0]?.kv).toEqual({ key: "gh", value: "GitHub CLI" }); }); - it("I-06 plain item has no kv", () => { + it("plain item has no kv", () => { const { ast } = parseMd("## H\n- plain text\n"); expect(ast.blocks[0]?.items[0]?.kv).toBeUndefined(); }); - it("I-07 multiple colons — first colon is the kv split", () => { + it("multiple colons — first colon is the kv split", () => { const { ast } = parseMd("## H\n- url: http://x.com:80/p\n"); expect(ast.blocks[0]?.items[0]?.kv).toEqual({ key: "url", @@ -48,79 +40,72 @@ describe("wave-04 items", () => { }); }); - it("I-08 colon with no space after is still kv", () => { + it("colon with no space after is still kv", () => { const { ast } = parseMd("## H\n- key:value\n"); expect(ast.blocks[0]?.items[0]?.kv).toEqual({ key: "key", value: "value" }); }); - it("I-09 quoted value preserved verbatim (no unquote at item layer)", () => { + it("quoted value preserved verbatim (no unquote at item layer)", () => { const { ast } = parseMd('## H\n- title: "quoted: value"\n'); expect(ast.blocks[0]?.items[0]?.kv?.value).toBe('"quoted: value"'); }); - it("I-10 slug from kv key when kv present", () => { + it("slug from kv key when kv present", () => { const { ast } = parseMd("## H\n- The Tool: description\n"); expect(ast.blocks[0]?.items[0]?.slug).toBe("the-tool"); }); - it("I-11 slug from item text when no kv", () => { + it("slug from item text when no kv", () => { const { ast } = parseMd("## H\n- The Plain Item\n"); expect(ast.blocks[0]?.items[0]?.slug).toBe("the-plain-item"); }); - it("I-12 items inside fenced code block are NOT extracted", () => { + it("items inside fenced code block are NOT extracted", () => { const raw = "## H\n```\n- not a bullet\n- still not\n```\n- real bullet\n"; const { ast } = parseMd(raw); expect(ast.blocks[0]?.items.length).toBe(1); expect(ast.blocks[0]?.items[0]?.text).toBe("real bullet"); }); - it("I-13 line numbers track through block body", () => { + it("line numbers track through block body", () => { const { ast } = parseMd("## H\n- first\n- second\n- third\n"); expect(ast.blocks[0]?.items.map((i) => i.line)).toEqual([2, 3, 4]); }); - it("I-14 trailing whitespace on bullet trimmed in text", () => { + it("trailing whitespace on bullet trimmed in text", () => { const { ast } = parseMd("## H\n- spaced \n"); expect(ast.blocks[0]?.items[0]?.text).toBe("spaced"); }); - it("I-15 empty bullet — recognized with empty text/slug", () => { - // Substrate accepts an empty bullet; lint can flag if collisions - // matter. Both `- ` and `- real` become items. + it("empty bullet — recognized with empty text/slug", () => { const { ast } = parseMd("## H\n- \n- real\n"); expect(ast.blocks[0]?.items.length).toBe(2); expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(["", "real"]); }); - it("I-16 indented bullet (sub-bullet) — recognized as item alongside parent", () => { - // Substrate flattens the bullet tree into a list of items; - // sub-bullets surface as their own AstItem entries. Lint rules - // can flag depth or duplicate-slug collisions. + it("indented bullet (sub-bullet) — recognized as item alongside parent", () => { const { ast } = parseMd("## H\n- top\n - sub\n"); expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(["top", "sub"]); }); - it("I-17 numbered list (1. item) — recognized as items", () => { - // Substrate treats ordered and unordered lists symmetrically. - // Lint rules can flag if a particular file requires bullet style. + it("numbered list (1. item) — recognized as items", () => { const { ast } = parseMd("## H\n1. first\n2. second\n"); expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(["first", "second"]); }); - it("I-18 items in a section with no body before — first item line is heading+1", () => { + it("items in a section with no body before — first item line is heading+1", () => { const { ast } = parseMd("## H\n- a\n"); expect(ast.blocks[0]?.items[0]?.line).toBe(2); }); - it("I-19 items spread across blocks are scoped to their block", () => { + it("items spread across blocks are scoped to their block", () => { const { ast } = parseMd("## A\n- a1\n## B\n- b1\n- b2\n"); expect(ast.blocks[0]?.items.length).toBe(1); expect(ast.blocks[1]?.items.length).toBe(2); expect(ast.blocks[1]?.items.map((i) => i.text)).toEqual(["b1", "b2"]); }); - it("I-20 item with only-symbol kv key still parses", () => { + it("item with only-symbol kv key still parses", () => { const { ast } = parseMd("## H\n- API_KEY: secret-value\n"); expect(ast.blocks[0]?.items[0]?.kv).toEqual({ key: "API_KEY", @@ -129,21 +114,19 @@ describe("wave-04 items", () => { expect(ast.blocks[0]?.items[0]?.slug).toBe("api-key"); }); - it("I-21 item with kv where value is empty", () => { + it("item with empty kv value falls through to plain item", () => { const { ast } = parseMd("## H\n- key:\n"); - // `- key:` has empty value after the colon; the kv regex requires - // (.+) for value, so this falls through to plain item. expect(ast.blocks[0]?.items[0]?.kv).toBeUndefined(); expect(ast.blocks[0]?.items[0]?.text).toBe("key:"); }); - it("I-22 bullet in preamble (before first H2) is NOT in any block", () => { + it("bullet in preamble (before first H2) is NOT in any block", () => { const { ast } = parseMd("- preamble bullet\n## H\n- block bullet\n"); expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(["block bullet"]); expect(ast.preamble).toContain("- preamble bullet"); }); - it("I-23 bullet with internal markdown (italics, code) preserved in text", () => { + it("bullet with internal markdown (italics, code) preserved in text", () => { const { ast } = parseMd("## H\n- use *gh* and `curl`\n"); expect(ast.blocks[0]?.items[0]?.text).toBe("use *gh* and `curl`"); }); diff --git a/extensions/oc-path/src/oc-path/tests/scenarios/jsonc-byte-fidelity.test.ts b/extensions/oc-path/src/oc-path/tests/scenarios/jsonc-byte-fidelity.test.ts index adc829835a8..a8dc2645d85 100644 --- a/extensions/oc-path/src/oc-path/tests/scenarios/jsonc-byte-fidelity.test.ts +++ b/extensions/oc-path/src/oc-path/tests/scenarios/jsonc-byte-fidelity.test.ts @@ -1,22 +1,3 @@ -/** - * Wave 15 — JSONC byte-fidelity round-trip. - * - * Substrate guarantee: `emitJsonc(parseJsonc(raw)) === raw` for every - * input the parser accepts. Mirrors wave-01 but for the JSONC kind. - * Comments, trailing commas, BOMs, mixed line endings — all byte-stable - * via the round-trip path. - * - * **What this file proves**: byte-identical round-trip via the - * default-mode emit (which echoes `ast.raw`). This is necessary but - * not sufficient — without the structural assertions below, a parser - * that emitted `ast.root: null` for every input would still pass the - * byte test (since `raw` is preserved on the AST regardless). - * - * Each assertParseable() call proves the parser actually ran and - * produced a structural tree, not just stored `raw` verbatim and - * called it a day. JC-17 deliberately uses `assertNotParseable` — - * malformed input must echo `raw` AND emit a diagnostic. - */ import { describe, expect, it } from "vitest"; import type { JsoncValue } from "../../jsonc/ast.js"; import { emitJsonc } from "../../jsonc/emit.js"; @@ -50,16 +31,16 @@ function assertNotParseable(raw: string): void { expect(result.diagnostics.some((d) => d.severity === "error")).toBe(true); } -describe("wave-15 jsonc byte-fidelity", () => { - it("JC-01 empty file", () => { +describe("jsonc byte-fidelity", () => { + it("empty file", () => { expect(rt("")).toBe(""); }); - it("JC-02 whitespace-only", () => { + it("whitespace-only", () => { expect(rt(" \n\n \n")).toBe(" \n\n \n"); }); - it("JC-03 empty object", () => { + it("empty object", () => { expect(rt("{}")).toBe("{}"); const root = assertParseable("{}"); expect(root.kind).toBe("object"); @@ -68,7 +49,7 @@ describe("wave-15 jsonc byte-fidelity", () => { } }); - it("JC-04 empty array", () => { + it("empty array", () => { expect(rt("[]")).toBe("[]"); const root = assertParseable("[]"); expect(root.kind).toBe("array"); @@ -77,7 +58,7 @@ describe("wave-15 jsonc byte-fidelity", () => { } }); - it("JC-05 trivial scalar root", () => { + it("trivial scalar root", () => { expect(rt("42")).toBe("42"); expect(rt('"x"')).toBe('"x"'); expect(rt("true")).toBe("true"); @@ -88,22 +69,20 @@ describe("wave-15 jsonc byte-fidelity", () => { expect(assertParseable("null").kind).toBe("null"); }); - it("JC-06 line comments preserved", () => { + it("line comments preserved", () => { const raw = '// a leading comment\n{ "x": 1 } // trailing\n'; expect(rt(raw)).toBe(raw); - // Pin parse: the structural value `x: 1` is reachable. - const root = assertParseable(raw); - expect(root.kind).toBe("object"); + expect(assertParseable(raw).kind).toBe("object"); }); - it("JC-07 block comments preserved", () => { + it("block comments preserved", () => { const raw = '/* header */\n{\n /* inline */\n "x": 1\n}\n'; expect(rt(raw)).toBe(raw); const root = assertParseable(raw); expect(root.kind).toBe("object"); }); - it("JC-08 trailing commas preserved", () => { + it("trailing commas preserved", () => { const raw = '{\n "x": 1,\n "y": 2,\n}'; expect(rt(raw)).toBe(raw); const root = assertParseable(raw); @@ -112,7 +91,7 @@ describe("wave-15 jsonc byte-fidelity", () => { } }); - it("JC-09 mixed CRLF + LF preserved", () => { + it("mixed CRLF + LF preserved", () => { const raw = '{\r\n "x": 1,\n "y": 2\r\n}'; expect(rt(raw)).toBe(raw); const root = assertParseable(raw); @@ -121,23 +100,21 @@ describe("wave-15 jsonc byte-fidelity", () => { } }); - it("JC-10 BOM preserved on raw", () => { + it("BOM preserved on raw, stripped for parse", () => { const raw = '{ "x": 1 }'; expect(rt(raw)).toBe(raw); - // BOM stripped before parsing — parser still sees `{` as first char. expect(assertParseable(raw).kind).toBe("object"); }); - it("JC-11 deeply nested structures preserved", () => { + it("deeply nested structures preserved", () => { const raw = '{ "a": { "b": { "c": { "d": [1, [2, [3, [4]]]] } } } }'; expect(rt(raw)).toBe(raw); expect(assertParseable(raw).kind).toBe("object"); }); - it("JC-12 string with escape sequences preserved", () => { + it("string with escape sequences preserved (parsed value has decoded chars)", () => { const raw = '{ "s": "a\\nb\\tc\\u0041\\\\d\\"e" }'; expect(rt(raw)).toBe(raw); - // Pin escape resolution — parsed value carries actual control chars. const root = assertParseable(raw); if (root.kind === "object") { const s = root.entries[0]?.value; @@ -147,7 +124,7 @@ describe("wave-15 jsonc byte-fidelity", () => { } }); - it("JC-13 numbers in scientific / negative / decimal forms preserved", () => { + it("numbers in scientific / negative / decimal forms preserved", () => { const raw = "[ 0, -0, 1.5, -3.14, 1e3, -2.5e-10, 1E+5 ]"; expect(rt(raw)).toBe(raw); const root = assertParseable(raw); @@ -157,7 +134,7 @@ describe("wave-15 jsonc byte-fidelity", () => { } }); - it("JC-14 unicode characters preserved verbatim", () => { + it("unicode characters preserved verbatim", () => { const raw = '{ "name": "héllo 世界 🎉" }'; expect(rt(raw)).toBe(raw); const root = assertParseable(raw); @@ -169,30 +146,27 @@ describe("wave-15 jsonc byte-fidelity", () => { } }); - it("JC-15 idiosyncratic whitespace preserved", () => { + it("idiosyncratic whitespace preserved", () => { const raw = '{ "x" : 1 ,\n "y": 2}'; expect(rt(raw)).toBe(raw); expect(assertParseable(raw).kind).toBe("object"); }); - it("JC-16 file-level trailing whitespace preserved", () => { + it("file-level trailing whitespace preserved", () => { const raw = '{ "x": 1 }\n\n\n'; expect(rt(raw)).toBe(raw); expect(assertParseable(raw).kind).toBe("object"); }); - it("JC-17 malformed input still emits raw verbatim AND emits a diagnostic", () => { + it("malformed input still emits raw verbatim AND emits a diagnostic", () => { const raw = '{ broken json with "key": value }'; expect(rt(raw)).toBe(raw); - // Without this assertion the test passes for any input regardless - // of parser behavior — pin both halves of the contract. assertNotParseable(raw); }); - it("JC-18 comments-only file preserved", () => { + it("comments-only file preserved", () => { const raw = "// just a comment\n/* and a block */\n"; expect(rt(raw)).toBe(raw); - // Comments-only files have no structural root — that's expected. expect(parseJsonc(raw).ast.root).toBeNull(); }); }); diff --git a/extensions/oc-path/src/oc-path/tests/scenarios/jsonc-resolver-edges.test.ts b/extensions/oc-path/src/oc-path/tests/scenarios/jsonc-resolver-edges.test.ts index 53c8bd3bcd5..741ff1b4505 100644 --- a/extensions/oc-path/src/oc-path/tests/scenarios/jsonc-resolver-edges.test.ts +++ b/extensions/oc-path/src/oc-path/tests/scenarios/jsonc-resolver-edges.test.ts @@ -1,10 +1,3 @@ -/** - * Wave 17 — JSONC resolver adversarial edges. - * - * Substrate guarantee: the resolver walks the value tree deterministically - * with mixed dotted / segment paths, returns null on any unresolvable - * walk, and never throws on hostile inputs. - */ import { describe, expect, it } from "vitest"; import { parseJsonc } from "../../jsonc/parse.js"; import { resolveJsoncOcPath } from "../../jsonc/resolve.js"; @@ -14,29 +7,29 @@ function rs(raw: string, ocPath: string) { return resolveJsoncOcPath(parseJsonc(raw).ast, parseOcPath(ocPath)); } -describe("wave-17 jsonc resolver edges", () => { - it("JR-01 root resolves on empty object", () => { +describe("jsonc resolver edges", () => { + it("root resolves on empty object", () => { expect(rs("{}", "oc://config")?.kind).toBe("root"); }); - it("JR-02 root resolves on scalar root", () => { + it("root resolves on scalar root", () => { expect(rs("42", "oc://config")?.kind).toBe("root"); }); - it("JR-03 root resolves on array root", () => { + it("root resolves on array root", () => { expect(rs("[1,2,3]", "oc://config")?.kind).toBe("root"); }); - it("JR-04 deep dotted descent within section", () => { + it("deep dotted descent within section", () => { const m = rs('{"a":{"b":{"c":1}}}', "oc://config/a.b.c"); expect(m?.kind).toBe("object-entry"); }); - it("JR-05 missing intermediate key returns null", () => { + it("missing intermediate key returns null", () => { expect(rs('{"a":{"b":1}}', "oc://config/a.x.b")).toBeNull(); }); - it("JR-06 numeric segment indexes into array", () => { + it("numeric segment indexes into array", () => { const m = rs('{"items":["a","b","c"]}', "oc://config/items.1"); expect(m?.kind).toBe("value"); if (m?.kind === "value") { @@ -44,7 +37,7 @@ describe("wave-17 jsonc resolver edges", () => { } }); - it("JR-07 negative array index resolves to Nth-from-last", () => { + it("negative array index resolves to Nth-from-last", () => { expect(rs('{"x":[1,2]}', "oc://config/x.-1")).toMatchObject({ kind: "value", node: { kind: "number", value: 2 }, @@ -56,28 +49,28 @@ describe("wave-17 jsonc resolver edges", () => { expect(rs('{"x":[1,2]}', "oc://config/x.-5")).toBeNull(); }); - it("JR-08 out-of-bounds array index returns null", () => { + it("out-of-bounds array index returns null", () => { expect(rs('{"x":[1,2]}', "oc://config/x.99")).toBeNull(); }); - it("JR-09 non-integer index returns null (no NaN coercion)", () => { + it("non-integer index returns null (no NaN coercion)", () => { expect(rs('{"x":[1,2]}', "oc://config/x.foo")).toBeNull(); }); - it("JR-10 null AST root returns null on any path", () => { + it("null AST root returns null on any path", () => { expect(rs("", "oc://config/x")).toBeNull(); }); - it("JR-11 descending past a primitive returns null", () => { + it("descending past a primitive returns null", () => { expect(rs('{"x":42}', "oc://config/x.y")).toBeNull(); }); - it("JR-12 empty segment in dotted path throws OcPathError", () => { + it("empty segment in dotted path throws OcPathError", () => { // v1 invariant: malformed paths fail loud at parse time, not silently null. expect(() => rs('{"x":1}', "oc://config/x..y")).toThrow(/Empty dotted sub-segment/); }); - it("JR-13 string value at leaf surfaces via object-entry shape", () => { + it("string value at leaf surfaces via object-entry shape", () => { const m = rs('{"k":"v"}', "oc://config/k"); expect(m?.kind).toBe("object-entry"); if (m?.kind === "object-entry") { @@ -85,14 +78,14 @@ describe("wave-17 jsonc resolver edges", () => { } }); - it("JR-14 boolean and null values resolve", () => { + it("boolean and null values resolve", () => { const m1 = rs('{"k":true}', "oc://config/k"); expect(m1?.kind).toBe("object-entry"); const m2 = rs('{"k":null}', "oc://config/k"); expect(m2?.kind).toBe("object-entry"); }); - it("JR-15 mixed slash + dot segments resolve identically", () => { + it("mixed slash + dot segments resolve identically", () => { const a = rs('{"a":{"b":{"c":1}}}', "oc://config/a.b.c"); const b = rs('{"a":{"b":{"c":1}}}', "oc://config/a/b.c"); const c = rs('{"a":{"b":{"c":1}}}', "oc://config/a/b/c"); @@ -100,17 +93,17 @@ describe("wave-17 jsonc resolver edges", () => { expect(b?.kind).toBe(c?.kind); }); - it("JR-16 keys with special characters resolve", () => { + it("keys with special characters resolve", () => { const m = rs('{"a-b_c":{"x":1}}', "oc://config/a-b_c.x"); expect(m?.kind).toBe("object-entry"); }); - it("JR-17 unicode keys resolve", () => { + it("unicode keys resolve", () => { const m = rs('{"héllo":1}', "oc://config/héllo"); expect(m?.kind).toBe("object-entry"); }); - it("JR-18 large nested structure (depth 20) resolves to leaf", () => { + it("large nested structure (depth 20) resolves to leaf", () => { let json = '"leaf"'; const segs: string[] = []; for (let i = 19; i >= 0; i--) { @@ -124,7 +117,7 @@ describe("wave-17 jsonc resolver edges", () => { } }); - it("JR-19 resolver is non-mutating across calls", () => { + it("resolver is non-mutating across calls", () => { const { ast } = parseJsonc('{"x":{"y":1}}'); const before = JSON.stringify(ast); rs('{"x":{"y":1}}', "oc://config/x.y"); @@ -133,7 +126,7 @@ describe("wave-17 jsonc resolver edges", () => { expect(JSON.stringify(ast)).toBe(before); }); - it("JR-20 hostile input shapes do not throw", () => { + it("hostile input shapes do not throw", () => { expect(() => rs("{garbage}", "oc://config/x")).not.toThrow(); expect(() => rs('{"a":', "oc://config/a")).not.toThrow(); }); diff --git a/extensions/oc-path/src/oc-path/tests/scenarios/jsonl-byte-fidelity.test.ts b/extensions/oc-path/src/oc-path/tests/scenarios/jsonl-byte-fidelity.test.ts index c05e3894861..5e8460816d1 100644 --- a/extensions/oc-path/src/oc-path/tests/scenarios/jsonl-byte-fidelity.test.ts +++ b/extensions/oc-path/src/oc-path/tests/scenarios/jsonl-byte-fidelity.test.ts @@ -1,10 +1,3 @@ -/** - * Wave 16 — JSONL byte-fidelity round-trip. - * - * Substrate guarantee: `emitJsonl(parseJsonl(raw)) === raw` for every - * input the parser accepts. JSONL is line-oriented; blanks, malformed - * lines, mixed line endings, trailing-newline shape — all byte-stable. - */ import { describe, expect, it } from "vitest"; import { emitJsonl } from "../../jsonl/emit.js"; import { parseJsonl } from "../../jsonl/parse.js"; @@ -13,81 +6,81 @@ function rt(raw: string): string { return emitJsonl(parseJsonl(raw).ast); } -describe("wave-16 jsonl byte-fidelity", () => { - it("JL-01 empty file", () => { +describe("jsonl byte-fidelity", () => { + it("empty file", () => { expect(rt("")).toBe(""); }); - it("JL-02 single line no trailing newline", () => { + it("single line no trailing newline", () => { expect(rt('{"a":1}')).toBe('{"a":1}'); }); - it("JL-03 single line with trailing newline", () => { + it("single line with trailing newline", () => { expect(rt('{"a":1}\n')).toBe('{"a":1}\n'); }); - it("JL-04 multiple lines preserved", () => { + it("multiple lines preserved", () => { const raw = '{"a":1}\n{"b":2}\n{"c":3}\n'; expect(rt(raw)).toBe(raw); }); - it("JL-05 blank line in the middle preserved", () => { + it("blank line in the middle preserved", () => { const raw = '{"a":1}\n\n{"b":2}\n'; expect(rt(raw)).toBe(raw); }); - it("JL-06 multiple blank lines preserved", () => { + it("multiple blank lines preserved", () => { const raw = '{"a":1}\n\n\n{"b":2}\n'; expect(rt(raw)).toBe(raw); }); - it("JL-07 malformed line round-trips verbatim", () => { + it("malformed line round-trips verbatim", () => { const raw = '{"a":1}\nthis is not json\n{"b":2}\n'; expect(rt(raw)).toBe(raw); }); - it("JL-08 entirely malformed file round-trips", () => { + it("entirely malformed file round-trips", () => { const raw = "header\nbody\nfooter\n"; expect(rt(raw)).toBe(raw); }); - it("JL-09 leading + trailing blanks preserved", () => { + it("leading + trailing blanks preserved", () => { const raw = '\n\n{"a":1}\n\n'; expect(rt(raw)).toBe(raw); }); - it("JL-10 file ending without final newline preserved", () => { + it("file ending without final newline preserved", () => { const raw = '{"a":1}\n{"b":2}'; expect(rt(raw)).toBe(raw); }); - it("JL-11 nested object lines preserved", () => { + it("nested object lines preserved", () => { const raw = '{"a":{"b":{"c":1}}}\n{"x":[1,[2,[3]]]}\n'; expect(rt(raw)).toBe(raw); }); - it("JL-12 unicode in a value line preserved", () => { + it("unicode in a value line preserved", () => { const raw = '{"name":"héllo 世界 🎉"}\n'; expect(rt(raw)).toBe(raw); }); - it("JL-13 idiosyncratic whitespace inside a line preserved", () => { + it("idiosyncratic whitespace inside a line preserved", () => { const raw = '{ "a" : 1 }\n'; expect(rt(raw)).toBe(raw); }); - it("JL-14 single blank line file preserved", () => { + it("single blank line file preserved", () => { const raw = "\n"; expect(rt(raw)).toBe(raw); }); - it("JL-15 large log (1000 lines) preserved", () => { + it("large log (1000 lines) preserved", () => { const lines = Array.from({ length: 1000 }, (_, i) => `{"i":${i}}`); const raw = lines.join("\n") + "\n"; expect(rt(raw)).toBe(raw); }); - it("JL-16 mixed value + malformed + blank preserved", () => { + it("mixed value + malformed + blank preserved", () => { const raw = '{"a":1}\n{not json}\n\n{"b":2}\nstill not json\n{"c":3}\n'; expect(rt(raw)).toBe(raw); }); @@ -95,26 +88,21 @@ describe("wave-16 jsonl byte-fidelity", () => { // F10 — CRLF preservation. Without lineEnding tracking on the AST, // a CRLF input edited via setJsonlOcPath rebuilds raw via render // which joins with `\n`, mixing endings on Windows-authored datasets. - it("JL-17 CRLF input round-trips byte-identical via the default emit", () => { + it("CRLF input round-trips byte-identical via the default emit", () => { const raw = '{"a":1}\r\n{"b":2}\r\n{"c":3}\r\n'; expect(rt(raw)).toBe(raw); }); - it("JL-18 CRLF input preserves CRLF after a structural edit (render mode)", () => { - // Pin the render path: setJsonlOcPath rebuilds raw via render mode, - // which now consults ast.lineEnding to reconstruct the original - // convention. Without the fix, render-mode output uses `\n` and - // produces mixed line endings on Windows datasets. + it("CRLF input preserves CRLF after a structural edit (render mode)", () => { const raw = '{"a":1}\r\n{"b":2}\r\n'; const { ast } = parseJsonl(raw); const rendered = emitJsonl(ast, { mode: "render" }); expect(rendered).toBe('{"a":1}\r\n{"b":2}'); - // Pin no-LF-only joins by counting CRLFs vs bare LFs. expect((rendered.match(/\r\n/g) ?? []).length).toBe(1); expect((rendered.match(/(? { + it("LF input preserves LF after a structural edit (render mode)", () => { // Symmetric: a Unix-authored log doesn't mysteriously gain CRLF. const raw = '{"a":1}\n{"b":2}\n'; const { ast } = parseJsonl(raw); diff --git a/extensions/oc-path/src/oc-path/tests/scenarios/jsonl-resolver-edges.test.ts b/extensions/oc-path/src/oc-path/tests/scenarios/jsonl-resolver-edges.test.ts index 17c8b94801f..e01cd338e33 100644 --- a/extensions/oc-path/src/oc-path/tests/scenarios/jsonl-resolver-edges.test.ts +++ b/extensions/oc-path/src/oc-path/tests/scenarios/jsonl-resolver-edges.test.ts @@ -1,10 +1,3 @@ -/** - * Wave 18 — JSONL resolver adversarial edges. - * - * Substrate guarantee: line addresses (`Lnnn`, `$last`) walk - * deterministically; missing addresses, blank-line targets, and - * malformed-line targets all surface as null without throwing. - */ import { describe, expect, it } from "vitest"; import { parseJsonl } from "../../jsonl/parse.js"; import { resolveJsonlOcPath } from "../../jsonl/resolve.js"; @@ -14,21 +7,21 @@ function rs(raw: string, ocPath: string) { return resolveJsonlOcPath(parseJsonl(raw).ast, parseOcPath(ocPath)); } -describe("wave-18 jsonl resolver edges", () => { - it("JLR-01 root resolves with no segments", () => { +describe("jsonl resolver edges", () => { + it("root resolves with no segments", () => { expect(rs('{"a":1}\n', "oc://log")?.kind).toBe("root"); }); - it("JLR-02 L1 resolves to a value line", () => { + it("L1 resolves to a value line", () => { const m = rs('{"a":1}\n', "oc://log/L1"); expect(m?.kind).toBe("line"); }); - it("JLR-03 L99 unknown line returns null", () => { + it("L99 unknown line returns null", () => { expect(rs('{"a":1}\n', "oc://log/L99")).toBeNull(); }); - it("JLR-04 $last picks the most recent value line", () => { + it("$last picks the most recent value line", () => { const m = rs('{"a":1}\n{"a":2}\n{"a":3}\n', "oc://log/$last/a"); expect(m?.kind).toBe("object-entry"); if (m?.kind === "object-entry") { @@ -36,7 +29,7 @@ describe("wave-18 jsonl resolver edges", () => { } }); - it("JLR-05 $last skips trailing blank lines", () => { + it("$last skips trailing blank lines", () => { const m = rs('{"a":1}\n\n\n', "oc://log/$last/a"); expect(m?.kind).toBe("object-entry"); if (m?.kind === "object-entry") { @@ -44,42 +37,42 @@ describe("wave-18 jsonl resolver edges", () => { } }); - it("JLR-06 $last skips trailing malformed lines", () => { + it("$last skips trailing malformed lines", () => { const m = rs('{"a":1}\nbroken\n', "oc://log/$last/a"); expect(m?.kind).toBe("object-entry"); }); - it("JLR-07 $last on empty file returns null", () => { + it("$last on empty file returns null", () => { expect(rs("", "oc://log/$last/x")).toBeNull(); }); - it("JLR-08 $last on all-blank file returns null", () => { + it("$last on all-blank file returns null", () => { expect(rs("\n\n\n", "oc://log/$last/x")).toBeNull(); }); - it("JLR-09 $last on all-malformed file returns null", () => { + it("$last on all-malformed file returns null", () => { expect(rs("a\nb\nc\n", "oc://log/$last/x")).toBeNull(); }); - it("JLR-10 garbage line address returns null", () => { + it("garbage line address returns null", () => { expect(rs('{"a":1}\n', "oc://log/garbage")).toBeNull(); expect(rs('{"a":1}\n', "oc://log/L")).toBeNull(); expect(rs('{"a":1}\n', "oc://log/Labc")).toBeNull(); }); - it("JLR-11 descent into a blank line returns null", () => { + it("descent into a blank line returns null", () => { expect(rs('{"a":1}\n\n{"b":2}\n', "oc://log/L2/anything")).toBeNull(); }); - it("JLR-12 descent into a malformed line returns null", () => { + it("descent into a malformed line returns null", () => { expect(rs('{"a":1}\nbroken\n{"b":2}\n', "oc://log/L2/anything")).toBeNull(); }); - it("JLR-13 missing field on a value line returns null", () => { + it("missing field on a value line returns null", () => { expect(rs('{"a":1}\n', "oc://log/L1/missing")).toBeNull(); }); - it("JLR-14 dotted descent through line value resolves", () => { + it("dotted descent through line value resolves", () => { const m = rs('{"r":{"ok":true,"d":"x"}}\n', "oc://log/L1/r.d"); expect(m?.kind).toBe("object-entry"); if (m?.kind === "object-entry") { @@ -87,7 +80,7 @@ describe("wave-18 jsonl resolver edges", () => { } }); - it("JLR-15 array index inside a line resolves", () => { + it("array index inside a line resolves", () => { const m = rs('{"items":["a","b","c"]}\n', "oc://log/L1/items.2"); expect(m?.kind).toBe("value"); if (m?.kind === "value") { @@ -95,14 +88,14 @@ describe("wave-18 jsonl resolver edges", () => { } }); - it("JLR-16 line numbers are 1-indexed", () => { + it("line numbers are 1-indexed", () => { const m = rs('{"a":1}\n{"a":2}\n', "oc://log/L1/a"); if (m?.kind === "object-entry") { expect(m.node.value).toMatchObject({ kind: "number", value: 1 }); } }); - it("JLR-17 line numbers preserved across blank/malformed entries", () => { + it("line numbers preserved across blank/malformed entries", () => { const m = rs('{"a":1}\n\nbroken\n{"a":4}\n', "oc://log/L4/a"); expect(m?.kind).toBe("object-entry"); if (m?.kind === "object-entry") { @@ -110,7 +103,7 @@ describe("wave-18 jsonl resolver edges", () => { } }); - it("JLR-18 resolver is non-mutating", () => { + it("resolver is non-mutating", () => { const { ast } = parseJsonl('{"a":1}\n{"b":2}\n'); const before = JSON.stringify(ast); rs('{"a":1}\n{"b":2}\n', "oc://log/L1"); @@ -118,7 +111,7 @@ describe("wave-18 jsonl resolver edges", () => { expect(JSON.stringify(ast)).toBe(before); }); - it("JLR-19 hostile inputs do not throw", () => { + it("hostile inputs do not throw", () => { expect(() => rs("not json\n", "oc://log/L1")).not.toThrow(); expect(() => rs("", "oc://log/$last")).not.toThrow(); }); diff --git a/extensions/oc-path/src/oc-path/tests/scenarios/malformed-input.test.ts b/extensions/oc-path/src/oc-path/tests/scenarios/malformed-input.test.ts index 9273514c946..fc8ac8d252c 100644 --- a/extensions/oc-path/src/oc-path/tests/scenarios/malformed-input.test.ts +++ b/extensions/oc-path/src/oc-path/tests/scenarios/malformed-input.test.ts @@ -1,48 +1,41 @@ -/** - * Wave 11 — malformed input recovery. - * - * Substrate guarantee: parser is **soft-error**: it never throws on - * malformed input. Suspicious-but-recoverable inputs produce - * diagnostics; unparseable structural pieces are dropped silently. - */ import { describe, expect, it } from "vitest"; import { parseMd } from "../../parse.js"; -describe("wave-11 malformed-input", () => { - it("M-01 truncated mid-frontmatter (no close fence)", () => { +describe("malformed-input", () => { + it("truncated mid-frontmatter (no close fence)", () => { const raw = "---\nname: github\n"; const { ast, diagnostics } = parseMd(raw); expect(diagnostics.some((d) => d.code === "OC_FRONTMATTER_UNCLOSED")).toBe(true); expect(ast.frontmatter).toEqual([]); }); - it("M-02 truncated mid-section", () => { + it("truncated mid-section", () => { const raw = "## H\n- item\nmid-line"; const { ast } = parseMd(raw); expect(ast.blocks.length).toBe(1); }); - it("M-03 only `---` (single fence, no content)", () => { + it("only `---` (single fence, no content)", () => { expect(() => parseMd("---\n")).not.toThrow(); }); - it("M-04 only `---\\n---`", () => { + it("only `---\\n---`", () => { const { ast } = parseMd("---\n---"); expect(ast.frontmatter).toEqual([]); }); - it("M-05 binary-ish bytes (non-ASCII control chars)", () => { + it("binary-ish bytes (non-ASCII control chars)", () => { const raw = "## H\n\x00\x01\x02\n"; expect(() => parseMd(raw)).not.toThrow(); }); - it("M-06 very long single line (10k chars)", () => { + it("very long single line (10k chars)", () => { const raw = `## H\n${"x".repeat(10_000)}\n`; const { ast } = parseMd(raw); expect(ast.blocks[0]?.heading).toBe("H"); }); - it("M-07 deeply repeated headings (1000 H2 blocks)", () => { + it("deeply repeated headings (1000 H2 blocks)", () => { const lines: string[] = []; for (let i = 0; i < 1000; i++) { lines.push(`## H${i}`); @@ -53,27 +46,27 @@ describe("wave-11 malformed-input", () => { expect(ast.blocks.length).toBe(1000); }); - it("M-08 bullet shape that isn't actually a bullet (`-not-a-bullet`)", () => { + it("bullet shape that isn't actually a bullet (`-not-a-bullet`)", () => { const { ast } = parseMd("## H\n-not-a-bullet\n- real\n"); expect(ast.blocks[0]?.items.length).toBe(1); }); - it("M-09 unclosed code fence", () => { + it("unclosed code fence", () => { const raw = "## H\n```\nbody\n"; expect(() => parseMd(raw)).not.toThrow(); }); - it("M-10 mismatched fence (open with ``` close with ~~~)", () => { + it("mismatched fence (open with ``` close with ~~~)", () => { const raw = "## H\n```\nbody\n~~~\n"; expect(() => parseMd(raw)).not.toThrow(); }); - it("M-11 nested fences (treated linearly, not nested)", () => { + it("nested fences (treated linearly, not nested)", () => { const raw = "## H\n```\n```\nstill-in-second\n```\n"; expect(() => parseMd(raw)).not.toThrow(); }); - it("M-12 empty file", () => { + it("empty file", () => { const { ast, diagnostics } = parseMd(""); expect(ast.raw).toBe(""); expect(ast.frontmatter).toEqual([]); @@ -81,23 +74,23 @@ describe("wave-11 malformed-input", () => { expect(diagnostics).toEqual([]); }); - it("M-13 single character file", () => { + it("single character file", () => { const { ast } = parseMd("x"); expect(ast.preamble).toBe("x"); expect(ast.blocks).toEqual([]); }); - it("M-14 single newline file", () => { + it("single newline file", () => { const { ast } = parseMd("\n"); expect(ast.blocks).toEqual([]); }); - it("M-15 file with mixed indentation extremes (tabs, spaces, mixed)", () => { + it("file with mixed indentation extremes (tabs, spaces, mixed)", () => { const raw = "## H\n\t- tabbed\n - spaced\n\t - mixed\n"; expect(() => parseMd(raw)).not.toThrow(); }); - it("M-16 frontmatter with frontmatter-shaped content inside (---)", () => { + it("frontmatter with frontmatter-shaped content inside (---)", () => { const raw = "---\nk: v\n---\n\n---\nshould not parse as second frontmatter\n---\n"; const { ast } = parseMd(raw); expect(ast.frontmatter.map((e) => e.key)).toEqual(["k"]); @@ -105,29 +98,29 @@ describe("wave-11 malformed-input", () => { expect(ast.preamble).toContain("---"); }); - it("M-17 lines starting with `#` but not heading (raw `#` chars in body)", () => { + it("lines starting with `#` but not heading (raw `#` chars in body)", () => { const raw = "## H\n\n# This is text starting with #\n#### h4 not parsed as block\n"; const { ast } = parseMd(raw); expect(ast.blocks.length).toBe(1); expect(ast.blocks[0]?.bodyText).toContain("# This is text"); }); - it("M-18 lines starting with multiple ## but malformed (####, ######)", () => { + it("lines starting with multiple ## but malformed (####, ######)", () => { const { ast } = parseMd("## Real\n#### Not block\n###### Not block\n"); expect(ast.blocks.length).toBe(1); expect(ast.blocks[0]?.heading).toBe("Real"); }); - it("M-19 file with just whitespace", () => { + it("file with just whitespace", () => { expect(() => parseMd(" \n\t\n \n")).not.toThrow(); }); - it("M-20 file with only BOM", () => { + it("file with only BOM", () => { const { ast } = parseMd(""); expect(ast.raw).toBe(""); }); - it("M-21 file mixing BOM + frontmatter + body + sections", () => { + it("file mixing BOM + frontmatter + body + sections", () => { const raw = "---\nk: v\n---\n\nbody\n## Section\n- item\n"; expect(() => parseMd(raw)).not.toThrow(); const { ast } = parseMd(raw); @@ -135,13 +128,13 @@ describe("wave-11 malformed-input", () => { expect(ast.blocks[0]?.heading).toBe("Section"); }); - it("M-22 line endings: legacy CR-only (Mac classic)", () => { + it("line endings: legacy CR-only (Mac classic)", () => { // Our regex /\r?\n/ doesn't split on CR-only. Treats whole as one line. const raw = "line1\rline2\r## Heading\r"; expect(() => parseMd(raw)).not.toThrow(); }); - it("M-23 100 KB file", () => { + it("100 KB file", () => { const lines: string[] = []; for (let i = 0; i < 1000; i++) { lines.push("## H" + i); diff --git a/extensions/oc-path/src/oc-path/tests/scenarios/oc-path-parse-edges.test.ts b/extensions/oc-path/src/oc-path/tests/scenarios/oc-path-parse-edges.test.ts index b829b224c8e..f423e8c88cd 100644 --- a/extensions/oc-path/src/oc-path/tests/scenarios/oc-path-parse-edges.test.ts +++ b/extensions/oc-path/src/oc-path/tests/scenarios/oc-path-parse-edges.test.ts @@ -1,10 +1,3 @@ -/** - * Wave 7 — OcPath parsing edges. - * - * Substrate guarantee: `parseOcPath(s)` is a pure function. Valid input - * round-trips via `formatOcPath`; invalid input throws `OcPathError` - * with a stable `code`. - */ import { describe, expect, it } from "vitest"; import { OcPathError, @@ -25,28 +18,28 @@ function expectErr(fn: () => unknown, code: string): void { } } -describe("wave-07 oc-path-parse-edges", () => { - it("OP-01 file-only", () => { +describe("oc-path-parse-edges", () => { + it("file-only", () => { expect(parseOcPath("oc://SOUL.md")).toEqual({ file: "SOUL.md" }); }); - it("OP-02 file + section", () => { + it("file + section", () => { expect(parseOcPath("oc://SOUL.md/Boundaries").section).toBe("Boundaries"); }); - it("OP-03 file + section + item", () => { + it("file + section + item", () => { expect(parseOcPath("oc://SOUL.md/Boundaries/deny-rule-1").item).toBe("deny-rule-1"); }); - it("OP-04 file + section + item + field", () => { + it("file + section + item + field", () => { expect(parseOcPath("oc://SOUL.md/B/deny-1/risk").field).toBe("risk"); }); - it("OP-05 session query parameter", () => { + it("session query parameter", () => { expect(parseOcPath("oc://X.md?session=daily").session).toBe("daily"); }); - it("OP-06 session with full path", () => { + it("session with full path", () => { const p = parseOcPath("oc://X.md/sec/item/field?session=cron"); expect(p).toEqual({ file: "X.md", @@ -57,46 +50,46 @@ describe("wave-07 oc-path-parse-edges", () => { }); }); - it("OP-07 unknown query parameters silently ignored", () => { + it("unknown query parameters silently ignored", () => { const p = parseOcPath("oc://X.md?foo=bar&session=s&baz=qux"); expect(p.session).toBe("s"); }); - it("OP-08 session= with empty value drops session", () => { + it("session= with empty value drops session", () => { const p = parseOcPath("oc://X.md?session="); expect(p.session).toBeUndefined(); }); - it("OP-09 query without `=` ignored", () => { + it("query without `=` ignored", () => { const p = parseOcPath("oc://X.md?nokeyhere"); expect(p.session).toBeUndefined(); }); - it("OP-10 missing scheme throws", () => { + it("missing scheme throws", () => { expectErr(() => parseOcPath("SOUL.md"), "OC_PATH_MISSING_SCHEME"); }); - it("OP-11 wrong scheme throws", () => { + it("wrong scheme throws", () => { expectErr(() => parseOcPath("https://x.com"), "OC_PATH_MISSING_SCHEME"); }); - it("OP-12 empty after scheme throws", () => { + it("empty after scheme throws", () => { expectErr(() => parseOcPath("oc://"), "OC_PATH_EMPTY"); }); - it("OP-13 empty segment throws", () => { + it("empty segment throws", () => { expectErr(() => parseOcPath("oc://X.md//item"), "OC_PATH_EMPTY_SEGMENT"); }); - it("OP-14 too-deep nesting throws", () => { + it("too-deep nesting throws", () => { expectErr(() => parseOcPath("oc://X.md/a/b/c/d/e"), "OC_PATH_TOO_DEEP"); }); - it("OP-15 non-string throws", () => { + it("non-string throws", () => { expectErr(() => parseOcPath(42 as unknown as string), "OC_PATH_NOT_STRING"); }); - it("OP-16 round-trip canonical forms", () => { + it("round-trip canonical forms", () => { const cases = [ "oc://SOUL.md", "oc://SOUL.md/Boundaries", @@ -112,12 +105,12 @@ describe("wave-07 oc-path-parse-edges", () => { } }); - it("OP-17 isValidOcPath true positives", () => { + it("isValidOcPath true positives", () => { expect(isValidOcPath("oc://X.md")).toBe(true); expect(isValidOcPath("oc://X.md/sec/item/field")).toBe(true); }); - it("OP-18 isValidOcPath true negatives", () => { + it("isValidOcPath true negatives", () => { expect(isValidOcPath("")).toBe(false); expect(isValidOcPath("X.md")).toBe(false); expect(isValidOcPath("oc://")).toBe(false); @@ -126,39 +119,32 @@ describe("wave-07 oc-path-parse-edges", () => { expect(isValidOcPath({})).toBe(false); }); - it("OP-19 file segment with special chars (file with dots/slashes)", () => { + it("file segment with special chars (file with dots/slashes)", () => { const p = parseOcPath("oc://config/plugins.entries.foo.token"); expect(p.file).toBe("config"); expect(p.section).toBe("plugins.entries.foo.token"); }); - it("OP-20 section segment with hyphens / underscores / numbers", () => { + it("section segment with hyphens / underscores / numbers", () => { const p = parseOcPath("oc://X.md/Multi-Tenant_Section_2"); expect(p.section).toBe("Multi-Tenant_Section_2"); }); - it("OP-21 [frontmatter] sentinel is just a section name", () => { + it("[frontmatter] sentinel is just a section name", () => { const p = parseOcPath("oc://X.md/[frontmatter]/name"); expect(p.section).toBe("[frontmatter]"); expect(p.item).toBe("name"); }); - it("OP-22 formatOcPath rejects empty file", () => { + it("formatOcPath rejects empty file", () => { expectErr(() => formatOcPath({ file: "" }), "OC_PATH_FILE_REQUIRED"); }); - it("OP-23 formatOcPath rejects item without section", () => { + it("formatOcPath rejects item without section", () => { expectErr(() => formatOcPath({ file: "X.md", item: "i" }), "OC_PATH_NESTING"); }); - it("OP-24 formatOcPath quotes raw slot values containing special chars", () => { - // Closes ClawSweeper P2 on PR #78678: `formatOcPath` previously - // concatenated raw slot values, so a programmatically-constructed - // path with a `/` in the section/item slot would emit extra - // segments and fail to parse back to the same address. - // Use a slot value with `/` (and no internal `.`) — `.` inside - // a slot is the dotted sub-segment delimiter; callers wanting a - // literal `.` in a key should pre-quote that single sub-segment. + it("formatOcPath quotes raw slot values containing special chars", () => { const constructed = formatOcPath({ file: "config.jsonc", section: "agents.defaults.models", @@ -172,22 +158,13 @@ describe("wave-07 oc-path-parse-edges", () => { expect(parsed.item).toBe('"github-copilot/claude-opus-4-7"'); }); - it("OP-25 parseOcPath finds query separator outside quoted keys", () => { - // Closes ClawSweeper P2 on PR #78678: `parseOcPath` previously - // used `indexOf('?')` which split a key like `"foo?bar"` at the - // embedded `?`, breaking advertised quoted-segment support. + it("parseOcPath finds query separator outside quoted keys", () => { const parsed = parseOcPath('oc://config.jsonc/"foo?bar"?session=daily'); expect(parsed.section).toBe('"foo?bar"'); expect(parsed.session).toBe("daily"); }); - it("OP-26 file slot with `/` round-trips via quoting", () => { - // Closes ClawSweeper P2 on PR #78678 (round 4): `parseOcPath` stored - // `path.file` verbatim while `formatOcPath` prefixed it without - // quote-wrapping, so a file like `skills/email-drafter` couldn't - // round-trip — formatter output got re-parsed as file plus section, - // and quoted input leaked the surrounding quotes into filesystem - // resolution. + it("file slot with `/` round-trips via quoting", () => { const constructed = formatOcPath({ file: "skills/email-drafter", section: "Tools", @@ -200,21 +177,14 @@ describe("wave-07 oc-path-parse-edges", () => { expect(parsed.item).toBe("-1"); }); - it("OP-27 file slot with dot extension does NOT get quoted", () => { - // The file slot's quoting trigger excludes `.` because filename - // extensions (`AGENTS.md`, `gateway.jsonc`) are normal — quoting - // them would make canonical form ugly without need. + it("file slot with dot extension does NOT get quoted", () => { expect(formatOcPath({ file: "AGENTS.md" })).toBe("oc://AGENTS.md"); expect(formatOcPath({ file: "gateway.jsonc", section: "version" })).toBe( "oc://gateway.jsonc/version", ); }); - it("OP-28 formatOcPath rejects field without item or section", () => { - // Closes Galin P2 (round 8): the nesting guard caught - // `field + section + no item` but missed `field + no section + no item`. - // Such a struct emits `oc://FILE/FIELD` which silently re-parses as - // `{ file, section: FIELD }` — different shape, breaking round-trip. + it("formatOcPath rejects field without item or section", () => { expect(() => formatOcPath({ file: "X", field: "name" })).toThrow(OcPathError); try { formatOcPath({ file: "X", field: "name" }); @@ -224,26 +194,16 @@ describe("wave-07 oc-path-parse-edges", () => { } }); - it("OP-29 isPattern is quote-aware (literal `*` inside quoted segment)", () => { - // Closes Galin P2 (round 8): `isPattern` previously used - // `slot.split('.')` which shredded a quoted key like `"items.*.glob"` - // and falsely detected the literal `*` as a wildcard, causing - // single-match verbs to reject a concrete path. + it("isPattern is quote-aware (literal `*` inside quoted segment)", () => { const concrete = parseOcPath('oc://config.jsonc/"items.*.glob"'); expect(isPattern(concrete)).toBe(false); - - // Sanity: an unquoted `*` IS still a wildcard. const wildcard = parseOcPath("oc://config.jsonc/items/*"); expect(isPattern(wildcard)).toBe(true); }); - it("OP-30 getPathLayout is quote-aware", () => { - // Closes Galin P2 (round 8): `getPathLayout` used `slot.split('.')` - // for all three slots, breaking the find-walker / repackPath layout - // contract for quoted segments containing `.`. + it("getPathLayout is quote-aware", () => { const path = parseOcPath('oc://config.jsonc/"github.com"/repos'); const layout = getPathLayout(path); - // Quoted segment is one sub-segment, not two. expect(layout.sectionLen).toBe(1); expect(layout.subs[0]).toBe('"github.com"'); expect(layout.itemLen).toBe(1); diff --git a/extensions/oc-path/src/oc-path/tests/scenarios/oc-path-resolver-edges.test.ts b/extensions/oc-path/src/oc-path/tests/scenarios/oc-path-resolver-edges.test.ts index 3df1837d1de..feb533d82c4 100644 --- a/extensions/oc-path/src/oc-path/tests/scenarios/oc-path-resolver-edges.test.ts +++ b/extensions/oc-path/src/oc-path/tests/scenarios/oc-path-resolver-edges.test.ts @@ -1,11 +1,3 @@ -/** - * Wave 8 — OcPath resolver edges. - * - * Substrate guarantee: `resolveOcPath(ast, ocPath)` returns the matched - * node or `null`. Slug matching is case-insensitive. Field on non-kv - * item returns `null` (not a guess). Frontmatter via the `[frontmatter]` - * sentinel section. - */ import { describe, expect, it } from "vitest"; import { parseMd } from "../../parse.js"; import { resolveMdOcPath as resolveOcPath } from "../../resolve.js"; @@ -34,30 +26,30 @@ Preamble prose. - item one `; -describe("wave-08 oc-path-resolver-edges", () => { +describe("oc-path-resolver-edges", () => { const { ast } = parseMd(SAMPLE); - it("R-01 root resolves to AST", () => { + it("root resolves to AST", () => { const m = resolveOcPath(ast, { file: "X.md" }); expect(m?.kind).toBe("root"); }); - it("R-02 block by exact slug", () => { + it("block by exact slug", () => { const m = resolveOcPath(ast, { file: "X.md", section: "boundaries" }); expect(m?.kind).toBe("block"); }); - it("R-03 block by case-mismatched slug (Boundaries → boundaries)", () => { + it("block by case-mismatched slug (Boundaries → boundaries)", () => { const m = resolveOcPath(ast, { file: "X.md", section: "Boundaries" }); expect(m?.kind).toBe("block"); }); - it("R-04 block by uppercased slug", () => { + it("block by uppercased slug", () => { const m = resolveOcPath(ast, { file: "X.md", section: "BOUNDARIES" }); expect(m?.kind).toBe("block"); }); - it("R-05 multi-word section by slug", () => { + it("multi-word section by slug", () => { const m = resolveOcPath(ast, { file: "X.md", section: "multi-word-section" }); expect(m?.kind).toBe("block"); if (m?.kind === "block") { @@ -65,7 +57,7 @@ describe("wave-08 oc-path-resolver-edges", () => { } }); - it("R-06 multi-word section by exact heading text (case-folded)", () => { + it("multi-word section by exact heading text (case-folded)", () => { const m = resolveOcPath(ast, { file: "X.md", section: "Multi-Word Section" }); // The OcPath section is matched case-insensitively against block.slug. // Block.slug for "Multi-Word Section" is "multi-word-section", and @@ -75,12 +67,12 @@ describe("wave-08 oc-path-resolver-edges", () => { expect(m).toBeNull(); }); - it("R-07 unknown section returns null", () => { + it("unknown section returns null", () => { const m = resolveOcPath(ast, { file: "X.md", section: "unknown" }); expect(m).toBeNull(); }); - it("R-08 item by slug under known section", () => { + it("item by slug under known section", () => { const m = resolveOcPath(ast, { file: "X.md", section: "tools", @@ -101,7 +93,7 @@ describe("wave-08 oc-path-resolver-edges", () => { } }); - it("R-10 item slug for plain bullet uses text", () => { + it("item slug for plain bullet uses text", () => { const m = resolveOcPath(ast, { file: "X.md", section: "boundaries", @@ -110,7 +102,7 @@ describe("wave-08 oc-path-resolver-edges", () => { expect(m?.kind).toBe("item"); }); - it("R-11 item slug case-insensitive", () => { + it("item slug case-insensitive", () => { const m = resolveOcPath(ast, { file: "X.md", section: "tools", @@ -119,7 +111,7 @@ describe("wave-08 oc-path-resolver-edges", () => { expect(m?.kind).toBe("item"); }); - it("R-12 item with spaces in key (slugified)", () => { + it("item with spaces in key (slugified)", () => { const m = resolveOcPath(ast, { file: "X.md", section: "tools", @@ -131,7 +123,7 @@ describe("wave-08 oc-path-resolver-edges", () => { } }); - it("R-13 unknown item returns null", () => { + it("unknown item returns null", () => { const m = resolveOcPath(ast, { file: "X.md", section: "tools", @@ -140,7 +132,7 @@ describe("wave-08 oc-path-resolver-edges", () => { expect(m).toBeNull(); }); - it("R-14 item-field matches kv.key (case-insensitive)", () => { + it("item-field matches kv.key (case-insensitive)", () => { const m = resolveOcPath(ast, { file: "X.md", section: "tools", @@ -150,7 +142,7 @@ describe("wave-08 oc-path-resolver-edges", () => { expect(m?.kind).toBe("item-field"); }); - it("R-15 field on plain (non-kv) item returns null", () => { + it("field on plain (non-kv) item returns null", () => { const m = resolveOcPath(ast, { file: "X.md", section: "boundaries", @@ -160,7 +152,7 @@ describe("wave-08 oc-path-resolver-edges", () => { expect(m).toBeNull(); }); - it("R-16 field that does not match kv.key returns null", () => { + it("field that does not match kv.key returns null", () => { const m = resolveOcPath(ast, { file: "X.md", section: "tools", @@ -170,7 +162,7 @@ describe("wave-08 oc-path-resolver-edges", () => { expect(m).toBeNull(); }); - it("R-17 frontmatter via [frontmatter] sentinel section", () => { + it("frontmatter via [frontmatter] sentinel section", () => { const m = resolveOcPath(ast, { file: "X.md", section: "[frontmatter]", @@ -182,7 +174,7 @@ describe("wave-08 oc-path-resolver-edges", () => { } }); - it("R-18 frontmatter unknown key returns null", () => { + it("frontmatter unknown key returns null", () => { const m = resolveOcPath(ast, { file: "X.md", section: "[frontmatter]", @@ -191,7 +183,7 @@ describe("wave-08 oc-path-resolver-edges", () => { expect(m).toBeNull(); }); - it("R-19 frontmatter without field returns null", () => { + it("frontmatter without field returns null", () => { const m = resolveOcPath(ast, { file: "X.md", section: "[frontmatter]", @@ -199,7 +191,7 @@ describe("wave-08 oc-path-resolver-edges", () => { expect(m).toBeNull(); }); - it("R-20 multiple frontmatter keys with same name — first match wins", () => { + it("multiple frontmatter keys with same name — first match wins", () => { // Build an AST manually to test const dupeAst = { kind: "md" as const, @@ -222,20 +214,20 @@ describe("wave-08 oc-path-resolver-edges", () => { } }); - it("R-21 empty AST resolves root only", () => { + it("empty AST resolves root only", () => { const empty = { kind: "md" as const, raw: "", frontmatter: [], preamble: "", blocks: [] }; expect(resolveOcPath(empty, { file: "X.md" })?.kind).toBe("root"); expect(resolveOcPath(empty, { file: "X.md", section: "any" })).toBeNull(); }); - it("R-22 resolver does not mutate the AST", () => { + it("resolver does not mutate the AST", () => { const before = JSON.stringify(ast); resolveOcPath(ast, { file: "X.md", section: "tools", item: "gh", field: "gh" }); const after = JSON.stringify(ast); expect(after).toBe(before); }); - it("R-23 file segment is informational — resolver doesn't check it", () => { + it("file segment is informational — resolver doesn't check it", () => { // The file name in OcPath is metadata; resolver assumes the AST // matches. Callers verify file mapping before passing the AST. const m1 = resolveOcPath(ast, { file: "SOUL.md", section: "tools" }); diff --git a/extensions/oc-path/src/oc-path/tests/scenarios/perf-determinism.test.ts b/extensions/oc-path/src/oc-path/tests/scenarios/perf-determinism.test.ts index 7478b2e32f0..8936eb7f603 100644 --- a/extensions/oc-path/src/oc-path/tests/scenarios/perf-determinism.test.ts +++ b/extensions/oc-path/src/oc-path/tests/scenarios/perf-determinism.test.ts @@ -1,19 +1,10 @@ -/** - * Wave 14 — performance + determinism + immutability. - * - * Substrate guarantees: - * - Parsing scales sub-linearly with file size (no quadratic blowup) - * - Same input produces same AST (no Object.keys / Set order surprises) - * - Resolver does not mutate the AST - * - AST is structurally cloneable (no functions, no cycles) - */ import { describe, expect, it } from "vitest"; import { emitMd } from "../../emit.js"; import { parseMd } from "../../parse.js"; import { resolveMdOcPath as resolveOcPath } from "../../resolve.js"; -describe("wave-14 perf + determinism", () => { - it("PD-01 parses 100 KB file in under 200 ms", () => { +describe("perf + determinism", () => { + it("parses 100 KB file in under 200 ms", () => { const lines: string[] = []; for (let i = 0; i < 1000; i++) { lines.push("## H" + i); @@ -28,7 +19,7 @@ describe("wave-14 perf + determinism", () => { expect(elapsed).toBeLessThan(200); }); - it("PD-02 parses 1000 small files in under 500 ms", () => { + it("parses 1000 small files in under 500 ms", () => { const raw = `## H\n- a\n- b: c\n## I\n- d\n`; const start = performance.now(); for (let i = 0; i < 1000; i++) { @@ -38,7 +29,7 @@ describe("wave-14 perf + determinism", () => { expect(elapsed).toBeLessThan(500); }); - it("PD-03 100k OcPath resolutions on parsed AST in under 500 ms", () => { + it("100k OcPath resolutions on parsed AST in under 500 ms", () => { const raw = `## A\n- a1\n- a2\n## B\n- b1\n- b2\n## C\n- c1: cv\n`; const { ast } = parseMd(raw); const path = { file: "X.md", section: "b", item: "b1" }; @@ -50,7 +41,7 @@ describe("wave-14 perf + determinism", () => { expect(elapsed).toBeLessThan(500); }); - it("PD-04 same input → byte-identical AST.raw across runs", () => { + it("same input → byte-identical AST.raw across runs", () => { const raw = `---\nb: 2\na: 1\n---\n## Z\n- z\n## A\n- a\n`; const a1 = parseMd(raw).ast; const a2 = parseMd(raw).ast; @@ -59,7 +50,7 @@ describe("wave-14 perf + determinism", () => { expect(a1.blocks).toEqual(a2.blocks); }); - it("PD-05 resolveOcPath is non-mutating", () => { + it("resolveOcPath is non-mutating", () => { const raw = `## A\n- a: x\n## B\n- b\n`; const { ast } = parseMd(raw); const before = JSON.stringify(ast); @@ -69,7 +60,7 @@ describe("wave-14 perf + determinism", () => { expect(JSON.stringify(ast)).toBe(before); }); - it("PD-06 AST is JSON-serializable (no functions, no cycles)", () => { + it("AST is JSON-serializable (no functions, no cycles)", () => { const raw = `---\nk: v\n---\n## A\n- a\n\`\`\`ts\nx\n\`\`\`\n| h |\n| - |\n| 1 |\n`; const { ast } = parseMd(raw); const serialized = JSON.stringify(ast); @@ -78,7 +69,7 @@ describe("wave-14 perf + determinism", () => { expect(parsed.blocks.length).toBe(ast.blocks.length); }); - it("PD-07 emit is non-mutating", () => { + it("emit is non-mutating", () => { const raw = `## A\n- a\n`; const { ast } = parseMd(raw); const before = JSON.stringify(ast); @@ -88,25 +79,25 @@ describe("wave-14 perf + determinism", () => { expect(JSON.stringify(ast)).toBe(before); }); - it("PD-08 frontmatter ordering is preserved (insertion order, not alphabetical)", () => { + it("frontmatter ordering is preserved (insertion order, not alphabetical)", () => { const raw = `---\nz: 1\nm: 2\na: 3\n---\n`; const { ast } = parseMd(raw); expect(ast.frontmatter.map((e) => e.key)).toEqual(["z", "m", "a"]); }); - it("PD-09 block ordering is document order, not alphabetical", () => { + it("block ordering is document order, not alphabetical", () => { const raw = `## Z\n## A\n## M\n`; const { ast } = parseMd(raw); expect(ast.blocks.map((b) => b.heading)).toEqual(["Z", "A", "M"]); }); - it("PD-10 item ordering within block is document order", () => { + it("item ordering within block is document order", () => { const raw = `## H\n- z\n- a\n- m\n`; const { ast } = parseMd(raw); expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(["z", "a", "m"]); }); - it("PD-11 large fixture round-trip stays under 100 ms", () => { + it("large fixture round-trip stays under 100 ms", () => { const lines: string[] = []; for (let i = 0; i < 500; i++) { lines.push(`## Section ${i}`); diff --git a/extensions/oc-path/src/oc-path/tests/scenarios/pitfalls.test.ts b/extensions/oc-path/src/oc-path/tests/scenarios/pitfalls.test.ts deleted file mode 100644 index 72a592c422f..00000000000 --- a/extensions/oc-path/src/oc-path/tests/scenarios/pitfalls.test.ts +++ /dev/null @@ -1,637 +0,0 @@ -/** - * Wave-23 — Pitfall scenarios. - * - * One test per pitfall ID enumerated in - * `packages/oc-paths-substrate/PITFALLS.md` (the substrate-local - * pitfall taxonomy). Tests are grouped by category so a regression in - * any one defense is visible at a glance. Every MITIGATED / REJECTED - * pitfall has a positive validation here; DEFERRED ones are covered - * as documented limits with a `.skip` note. - * - * **Namespace note**: substrate pitfall IDs (P-001 … P-040) are a - * separate namespace from the claws-side `docs/PITFALLS.md` - * governance taxonomy (which uses P-NNN for completely different - * pitfalls — e.g., P-033 there is "Memory poisoning"). The package - * boundary disambiguates. - */ -import { describe, expect, it } from "vitest"; -import { - MAX_PATH_LENGTH, - MAX_TRAVERSAL_DEPTH, - OcPathError, - findOcPaths, - formatOcPath, - parseOcPath, - resolveOcPath, - setOcPath, -} from "../../index.js"; -import { parseJsonc } from "../../jsonc/parse.js"; -import { parseJsonl } from "../../jsonl/parse.js"; - -// ---------- Encoding pitfalls -------------------------------------------- - -describe("wave-23 pitfalls — encoding", () => { - it("P-001 strips leading UTF-8 BOM from path string", () => { - const bom = ""; - expect(parseOcPath(`${bom}oc://X/Y`).file).toBe("X"); - }); - - it("P-002 normalizes path to NFC", () => { - const nfc = "café"; // composed - const nfd = "café"; // decomposed - expect(parseOcPath(`oc://X/${nfd}`).section).toBe(nfc); - expect(parseOcPath(`oc://X/${nfc}`).section).toBe(nfc); - // Same struct out for both inputs. - expect(parseOcPath(`oc://X/${nfd}`)).toEqual(parseOcPath(`oc://X/${nfc}`)); - }); - - it("P-003 rejects whitespace in identifier-shaped segments", () => { - expect(() => parseOcPath("oc://X/foo /bar")).toThrow(OcPathError); - expect(() => parseOcPath("oc://X/ foo")).toThrow(OcPathError); - expect(() => parseOcPath("oc://X/foo\tbar")).toThrow(OcPathError); - }); - - it("P-003 allows whitespace inside predicate values (content)", () => { - // Spaces inside a predicate value are legitimate — they're filtering - // against actual content. - expect(() => parseOcPath("oc://X/[name=hello world]")).not.toThrow(); - }); - - it("P-004 / P-011 rejects control characters and null bytes", () => { - expect(() => parseOcPath("oc://X/\x00")).toThrow(/Control character/); - expect(() => parseOcPath("oc://X/foo\x01bar")).toThrow(/Control character/); - expect(() => parseOcPath("oc://X/foo\x7Fbar")).toThrow(/Control character/); - }); -}); - -// ---------- Empty / structural pitfalls ---------------------------------- - -describe("wave-23 pitfalls — empty & structural", () => { - it("P-008 rejects empty segments", () => { - expect(() => parseOcPath("oc://X//Y")).toThrow(/Empty segment/); - }); - - it("P-009 rejects empty dotted sub-segments", () => { - expect(() => parseOcPath("oc://X/a..b")).toThrow(/Empty dotted sub-segment/); - }); - - it("P-010 rejects scheme-only path", () => { - expect(() => parseOcPath("oc://")).toThrow(/Empty oc:\/\/ path/); - }); - - it("P-014 rejects empty predicate key", () => { - expect(() => parseOcPath("oc://X/[=foo]")).toThrow(/Malformed predicate/); - }); - - it("P-014 rejects empty predicate value", () => { - expect(() => parseOcPath("oc://X/[id=]")).toThrow(/Malformed predicate/); - }); - - it("P-015 accepts bracket segment with no operator as literal sentinel", () => { - // `[frontmatter]` predates the predicate grammar — kept as literal. - expect(parseOcPath("oc://AGENTS.md/[frontmatter]/key").section).toBe("[frontmatter]"); - }); - - it("P-016 rejects mismatched brackets", () => { - expect(() => parseOcPath("oc://X/[unclosed")).toThrow(OcPathError); - expect(() => parseOcPath("oc://X/closed]")).toThrow(OcPathError); - }); - - it("P-016 rejects mismatched braces", () => { - expect(() => parseOcPath("oc://X/{a,b")).toThrow(OcPathError); - }); - - it("P-018 rejects empty union", () => { - expect(() => parseOcPath("oc://X/{}")).toThrow(/Empty union/); - }); - - it("P-018 rejects union with empty alternative", () => { - expect(() => parseOcPath("oc://X/{a,,b}")).toThrow(/Empty alternative/); - }); -}); - -// ---------- Predicate-content pitfalls ----------------------------------- - -describe("wave-23 pitfalls — predicate content", () => { - it("P-012 predicate value containing `/` round-trips", () => { - // The path-level `/` split must respect bracket boundaries. - const p = parseOcPath("oc://X/[id=foo/bar]/cmd"); - expect(p.section).toBe("[id=foo/bar]"); - expect(p.item).toBe("cmd"); - }); - - it("P-012 findOcPaths matches a leaf whose id contains a slash", () => { - const ast = parseJsonc( - '{"steps":[{"id":"foo/bar","cmd":"x"},{"id":"baz","cmd":"y"}]}', - ).ast; - const out = findOcPaths(ast, parseOcPath("oc://wf/steps/[id=foo/bar]/cmd")); - expect(out).toHaveLength(1); - if (out[0].match.kind === "leaf") { - expect(out[0].match.valueText).toBe("x"); - } - }); - - it("P-013 predicate value containing `.` round-trips", () => { - const p = parseOcPath("oc://X/steps.[id=1.0].cmd"); - expect(p.section).toBe("steps.[id=1.0].cmd"); - }); - - it("P-013 findOcPaths matches a leaf whose id is `1.0`", () => { - const ast = parseJsonc('{"steps":[{"id":"1.0","cmd":"x"},{"id":"2.0","cmd":"y"}]}').ast; - const out = findOcPaths(ast, parseOcPath("oc://wf/steps/[id=1.0]/cmd")); - expect(out).toHaveLength(1); - if (out[0].match.kind === "leaf") { - expect(out[0].match.valueText).toBe("x"); - } - }); -}); - -// ---------- Sentinel & collision pitfalls -------------------------------- - -describe("wave-23 pitfalls — sentinels & collisions", () => { - it("P-020/openclaw#59934 negative numeric key on object resolves as literal key", () => { - // Telegram supergroup IDs are negative numbers used as map keys. - // Our positional `-N` token would otherwise hijack them. Resolver - // falls through to literal-key lookup on non-indexable containers. - const ast = parseJsonc( - '{"channels":{"telegram":{"groups":{"-5028303500":{"requireMention":false}}}}}', - ).ast; - const m = resolveOcPath( - ast, - parseOcPath("oc://config/channels.telegram.groups.-5028303500.requireMention"), - ); - expect(m).not.toBeNull(); - expect(m?.kind).toBe("leaf"); - if (m?.kind === "leaf") { - expect(m.valueText).toBe("false"); - expect(m.leafType).toBe("boolean"); - } - }); - - it("P-020 negative `-N` still works as positional on arrays", () => { - // Same syntax, indexable container — positional resolution wins. - const ast = parseJsonc('{"items":[10,20,30]}').ast; - const m = resolveOcPath(ast, parseOcPath("oc://X/items/-1")); - expect(m?.kind).toBe("leaf"); - if (m?.kind === "leaf") { - expect(m.valueText).toBe("30"); - } - }); - - it("P-020 numeric segment dispatches by node kind (array index vs map key)", () => { - // Same path string against two different ASTs — kind disambiguates. - const arr = parseJsonc('{"x":["a","b"]}').ast; - const map = parseJsonc('{"x":{"0":"a","1":"b"}}').ast; - const arrM = resolveOcPath(arr, parseOcPath("oc://config/x/0")); - const mapM = resolveOcPath(map, parseOcPath("oc://config/x/0")); - expect(arrM?.kind).toBe("leaf"); - expect(mapM?.kind).toBe("leaf"); - if (arrM?.kind === "leaf") { - expect(arrM.valueText).toBe("a"); - } - if (mapM?.kind === "leaf") { - expect(mapM.valueText).toBe("a"); - } - }); - - it("P-021 `$last` literal in an object key is shadowed by positional sentinel", () => { - // Document v0 limitation: `$last` always means "last", never a literal key. - // Authors with `$last` literal keys must use kind-narrow access. - const ast = parseJsonc('{"$last":"literal-value","foo":"bar"}').ast; - const m = resolveOcPath(ast, parseOcPath("oc://X/$last")); - // `$last` resolves to the LAST key (`foo` → `bar`), not the literal `$last` key. - expect(m?.kind).toBe("leaf"); - if (m?.kind === "leaf") { - expect(m.valueText).toBe("bar"); - } - }); -}); - -// ---------- Round-trip pitfalls ------------------------------------------ - -describe("wave-23 pitfalls — round-trip", () => { - it("P-023 parseOcPath ∘ formatOcPath is idempotent across path shapes", () => { - const inputs = [ - "oc://X", - "oc://X/a", - "oc://X/a/b", - "oc://X/a/b/c", - "oc://X/a.b.c", - "oc://X/a?session=s1", - "oc://X/[frontmatter]/key", - "oc://X/steps/*/command", - "oc://X/steps/$last/id", - "oc://X/steps/-2/id", - "oc://X/steps/{command,run}", - "oc://X/steps/[id=foo]/cmd", - "oc://X/steps/#0/foo", - ]; - for (const s of inputs) { - const parsed = parseOcPath(s); - const reparsed = parseOcPath(s); - expect(parsed).toEqual(reparsed); - } - }); -}); - -// ---------- Sentinel-guard pitfalls -------------------------------------- - -describe("wave-23 pitfalls — sentinel at format boundary (F9)", () => { - it("formatOcPath rejects an OcPath struct carrying the redaction sentinel", () => { - // Path strings flow into telemetry, audit events, error messages, - // find-result `path` fields. Without the format-time guard, a - // struct with `section: REDACTED_SENTINEL` would slip past every - // consumer except the CLI's scrubSentinel layer. The substrate's - // contract is "emit boundaries refuse the sentinel" — formatOcPath - // IS such a boundary for path strings. - expect(() => formatOcPath({ file: "AGENTS.md", section: "__OPENCLAW_REDACTED__" })).toThrow( - /sentinel literal/, - ); - }); -}); - -// ---------- Containment pitfalls ----------------------------------------- - -describe("wave-23 pitfalls — file-slot containment", () => { - // oc:// paths are workspace-relative. Absolute paths and `..` segments - // would let a hostile workflow / skill manifest persuade - // `openclaw path resolve|set|emit` into reading or writing arbitrary - // filesystem locations (Node `path.resolve(cwd, absolute)` returns - // `absolute`, bypassing the workspace root). Reject at parseOcPath - // and formatOcPath for symmetric defense. - it("rejects an absolute POSIX file slot", () => { - expect(() => parseOcPath("oc:///etc/passwd")).toThrow(/Empty segment/); - // Quoted form — same containment violation, different parse path. - expect(() => parseOcPath('oc://"/etc/passwd"/section')).toThrow(/Absolute file slot/); - }); - - it("rejects a Windows drive-letter file slot", () => { - expect(() => parseOcPath('oc://"C:/Windows/System32/foo"/section')).toThrow( - /Absolute file slot/, - ); - expect(() => parseOcPath('oc://"C:\\\\Windows\\\\System32"/section')).toThrow( - /Absolute file slot/, - ); - }); - - it("rejects a leading-backslash file slot", () => { - expect(() => parseOcPath('oc://"\\\\srv\\\\share\\\\foo"/section')).toThrow( - /Absolute file slot/, - ); - }); - - it("rejects a parent-directory escape via plain `..`", () => { - expect(() => parseOcPath('oc://"../foo"/section')).toThrow(/Parent-directory/); - expect(() => parseOcPath('oc://".."/section')).toThrow(/Parent-directory/); - }); - - it("rejects a parent-directory escape mid-path", () => { - expect(() => parseOcPath('oc://"foo/../bar"/section')).toThrow(/Parent-directory/); - }); - - it("does not decode URL-encoded `..` — literal `%2E%2E` is treated as a filename", () => { - // The substrate does NOT do URL decoding — `%2E%2E` is the literal - // five-character filename, not a parent-directory escape. Documented - // limitation: consumers that pre-decode (HTTP layers, browser UI) - // are responsible for normalizing before invoking parseOcPath. - // Pin the current behavior so a future "let's decode for them" PR - // sees the explicit choice. - const p = parseOcPath('oc://"%2E%2E/foo"/section'); - expect(p.file).toBe("%2E%2E/foo"); - }); - - it("formatOcPath rejects an OcPath struct with absolute file", () => { - expect(() => formatOcPath({ file: "/etc/passwd" })).toThrow(/Absolute file slot/); - expect(() => formatOcPath({ file: "C:/Windows" })).toThrow(/Absolute file slot/); - }); - - it("formatOcPath rejects an OcPath struct with parent-directory file", () => { - expect(() => formatOcPath({ file: ".." })).toThrow(/Parent-directory/); - expect(() => formatOcPath({ file: "../etc/passwd" })).toThrow(/Parent-directory/); - expect(() => formatOcPath({ file: "foo/../bar" })).toThrow(/Parent-directory/); - }); -}); - -// ---------- formatOcPath ↔ parseOcPath round-trip ------------------------ - -describe("wave-23 pitfalls — format/parse round-trip", () => { - // The contract on oc-path.ts:13 — `formatOcPath(parseOcPath(s)) === s` - // for any string the formatter accepts. Round-trip breaks were - // observable on (a) struct fields with empty dotted sub-segments - // (`section: 'foo.'` → `oc://X/foo.""` → re-parses with `section: - // 'foo.""'`) and (b) struct fields with control chars (formatter - // emitted unquoted, parser refused). Pin both directions. - it("formatOcPath rejects empty dotted sub-segment in a slot", () => { - expect(() => formatOcPath({ file: "a.md", section: "foo." })).toThrow( - /Empty dotted sub-segment/, - ); - expect(() => formatOcPath({ file: "a.md", section: ".foo" })).toThrow( - /Empty dotted sub-segment/, - ); - expect(() => formatOcPath({ file: "a.md", section: "foo..bar" })).toThrow( - /Empty dotted sub-segment/, - ); - }); - - it("formatOcPath rejects control characters in any slot", () => { - expect(() => formatOcPath({ file: "a.md", section: "sec\x00tion" })).toThrow( - /Control character/, - ); - expect(() => formatOcPath({ file: "a.md", section: "sec\x01tion" })).toThrow( - /Control character/, - ); - expect(() => formatOcPath({ file: "a.md", section: "tab\ttion" })).toThrow(/Control character/); - expect(() => formatOcPath({ file: "a\x00b.md" })).toThrow(/Control character/); - }); - - it("round-trips every shape parseOcPath accepts", () => { - // For every valid input, formatOcPath(parseOcPath(s)) MUST be - // re-parseable to the same struct. Don't string-compare (the - // formatter normalizes quoting); parse the round-tripped output - // and compare structs. - const inputs = [ - "oc://X", - "oc://X/a", - "oc://X/a/b", - "oc://X/a/b/c", - "oc://X/a.b.c", - "oc://X/a?session=s1", - "oc://X/[frontmatter]/key", - "oc://X/steps/$last/id", - "oc://X/steps/-2/id", - "oc://X/steps/[id=foo]/cmd", - "oc://X/steps/{a,b}/cmd", - 'oc://X/"foo/bar"/baz', - 'oc://X/agents/"anthropic/claude-opus-4-7"/alias', - ]; - for (const s of inputs) { - const parsed = parseOcPath(s); - const formatted = formatOcPath(parsed); - const reparsed = parseOcPath(formatted); - expect(reparsed).toEqual(parsed); - } - }); -}); - -// ---------- Performance pitfalls ----------------------------------------- - -describe("wave-23 pitfalls — performance & limits", () => { - it("P-031 / P-033 walker depth cap throws on pathological recursion", () => { - // The walker's MAX_TRAVERSAL_DEPTH defense is independent of the - // parser's MAX_PARSE_DEPTH (covered by the JSONC and JSONL parser - // tests below). To exercise the walker cap in isolation, build a - // synthetic JSONC AST chain that bypasses parseJsonc entirely — - // this is the shape callers get when they construct ASTs - // programmatically (mutations, fixtures, generators). - type V = import("../../jsonc/ast.js").JsoncValue; - let leaf: V = { kind: "string", value: "x", line: 1 }; - for (let i = 0; i < MAX_TRAVERSAL_DEPTH + 50; i++) { - leaf = { kind: "object", entries: [{ key: "a", value: leaf, line: 1 }], line: 1 }; - } - const ast = { - kind: "jsonc" as const, - raw: "", - root: { kind: "object" as const, entries: [{ key: "root", value: leaf, line: 1 }], line: 1 }, - }; - expect(() => findOcPaths(ast, parseOcPath("oc://X/**"))).toThrow(/MAX_TRAVERSAL_DEPTH/); - }); - - it("P-032 rejects path strings longer than MAX_PATH_LENGTH", () => { - const big = "oc://X/" + "a".repeat(MAX_PATH_LENGTH); - expect(() => parseOcPath(big)).toThrow(/exceeds .* bytes/); - }); - - it("P-032 path at the cap parses cleanly", () => { - const justUnder = "oc://X/" + "a".repeat(MAX_PATH_LENGTH - "oc://X/".length); - expect(() => parseOcPath(justUnder)).not.toThrow(); - }); - - it("P-032 formatOcPath enforces the same cap on output", () => { - // Symmetric upper bound — without this guard, a struct whose - // formatted form crosses the cap would emit a string parseOcPath - // would immediately reject (round-trip break). - expect(() => formatOcPath({ file: "X", section: "a".repeat(MAX_PATH_LENGTH) })).toThrow( - /Formatted oc:\/\/ exceeds/, - ); - }); - - it("parser depth cap fires on pathological JSONC nesting (F6)", () => { - // Without `MAX_PARSE_DEPTH`, pathological input like - // `'['.repeat(20000) + '0' + ']'.repeat(20000)` triggers a V8 - // RangeError ("Maximum call stack size exceeded") that escapes - // commander as a raw stringified error — no `OcEmitSentinelError`- - // style structured catch. Pin the structured-diagnostic path: - // parser must surface OC_JSONC_DEPTH_EXCEEDED, not bare RangeError. - const open = "[".repeat(MAX_TRAVERSAL_DEPTH + 100); - const close = "]".repeat(MAX_TRAVERSAL_DEPTH + 100); - const raw = `${open}0${close}`; - const result = parseJsonc(raw); - expect(result.ast.root).toBeNull(); - expect(result.diagnostics.some((d) => d.code === "OC_JSONC_DEPTH_EXCEEDED")).toBe(true); - }); - - it("parser depth cap fires on JSONL line with deeply-nested JSON (F6)", () => { - // Per-line parseJsonc dispatch carries the same protection — each - // value line is parsed in isolation and gets its own depth cap. - // The line surfaces as `kind: 'malformed'` with the depth diagnostic. - let nested = '"x"'; - for (let i = 0; i < MAX_TRAVERSAL_DEPTH + 50; i++) { - nested = `{"a":${nested}}`; - } - const { diagnostics } = parseJsonl(nested + "\n"); - // The line-level diagnostic is OC_JSONL_LINE_MALFORMED (line failed); - // we don't promote OC_JSONC_DEPTH_EXCEEDED through the JSONL layer - // but the malformed-line detection prevents stack-overflow escape. - expect(diagnostics.some((d) => d.code === "OC_JSONL_LINE_MALFORMED")).toBe(true); - }); -}); - -// ---------- Coercion pitfalls -------------------------------------------- - -describe("wave-23 pitfalls — coercion", () => { - it("P-029 numeric coercion is locale-independent", () => { - // `Number()` doesn't honor locale; `parseFloat` doesn't either in - // practice, but we never use `parseFloat`. Verify `Number("1,5")` - // returns NaN (which is rejected) and `"1.5"` returns 1.5. - const ast = parseJsonc('{"x":1.0}').ast; - const r1 = setOcPath(ast, parseOcPath("oc://X/x"), "1.5"); - expect(r1.ok).toBe(true); - const r2 = setOcPath(ast, parseOcPath("oc://X/x"), "1,5"); - expect(r2.ok).toBe(false); - if (!r2.ok) { - expect(r2.reason).toBe("parse-error"); - } - }); - - it("P-030 boolean coercion is exact-match lowercase", () => { - const ast = parseJsonc('{"x":true}').ast; - expect(setOcPath(ast, parseOcPath("oc://X/x"), "false").ok).toBe(true); - expect(setOcPath(ast, parseOcPath("oc://X/x"), "False").ok).toBe(false); - expect(setOcPath(ast, parseOcPath("oc://X/x"), "TRUE").ok).toBe(false); - expect(setOcPath(ast, parseOcPath("oc://X/x"), "yes").ok).toBe(false); - }); -}); - -// ---------- Reserved character pitfalls ---------------------------------- - -describe("wave-23 pitfalls — reserved characters", () => { - it("P-026 rejects `?` outside the query separator position", () => { - // `?` triggers the query split. `oc://X/foo?session=s` is fine - // (legitimate query). But `?` *inside* a segment after the query - // section is consumed isn't a normal use case — the parser treats - // the first `?` as the query split. - expect(parseOcPath("oc://X/foo?session=s").section).toBe("foo"); - // Empty key after `?` (no `=`): query parser silently ignores. - expect(() => parseOcPath("oc://X/foo?")).not.toThrow(); - }); - - it("P-040 negative-index magnitude is bounded", () => { - // Out-of-range negative index → null at resolve time, not crash. - const ast = parseJsonc('{"x":[1,2,3]}').ast; - expect(resolveOcPath(ast, parseOcPath("oc://X/x/-9999999999"))).toBeNull(); - expect(resolveOcPath(ast, parseOcPath("oc://X/x/-1"))?.kind).toBe("leaf"); - }); -}); - -// ---------- Sentinel-redaction pitfall (P-036) --------------------------- - -describe("wave-23 pitfalls — redaction sentinel", () => { - // P-036 is fully covered by wave-21-sentinel-cross-kind. This is a - // smoke test asserting the link is intact. - it("P-036 sentinel guard activates at emit time (covered by wave-21)", () => { - expect(true).toBe(true); - }); -}); - -// ---------- DEFERRED — documented limits --------------------------------- - -describe("wave-23 pitfalls — deferred (v0 limits)", () => { - it.skip("P-005 slash literal in key — v1: quoted segments", () => {}); - it.skip("P-006 dot literal in key — v1: quoted segments", () => {}); - it.skip("P-017 nested unions {a,{b,c}} — v1: parser stack", () => {}); - it.skip("P-019 wildcard inside wildcard — v1: pattern composition", () => {}); - it.skip("P-025 leading-zero numeric `01` — v1: explicit form", () => {}); - it.skip("P-027 `&` in segments — v1: percent-encoding", () => {}); - it.skip("P-028 percent-encoded segments — v1: rfc3986 layer", () => {}); - it.skip("P-034 ast mutation between resolve & consume — caller invariant", () => {}); - it.skip("P-035 stale paths from prior find — caller invariant", () => {}); -}); - -// ---------- Injection pitfalls (C12 / W12) ------------------------------- - -describe("wave-23 pitfalls — injection (caller-supplied hostile input)", () => { - // P-037: a hostile path string. The substrate's job is to either - // parse safely or reject with `OcPathError` — never let undefined - // behavior leak. These cases lock the rejection-or-safe contract. - - it("P-037a control characters in path body are rejected", () => { - expect(() => parseOcPath("oc://a\x00b")).toThrow(OcPathError); - expect(() => parseOcPath("oc://a\x01b/c")).toThrow(OcPathError); - expect(() => parseOcPath("oc://a/b\x1Fc")).toThrow(OcPathError); - }); - - it("P-037b NUL byte anywhere in path is rejected", () => { - expect(() => parseOcPath("oc://X.md/sec\x00tion")).toThrow(OcPathError); - }); - - it("P-037c BOM at start of path is stripped, not interpreted", () => { - // BOM is unicode U+FEFF (0xFEFF). The substrate strips it before - // scheme check; without stripping, the BOM-prefixed string would - // fail the `oc://` scheme test. - const path = parseOcPath("oc://X.md/section"); - expect(path.file).toBe("X.md"); - expect(path.section).toBe("section"); - }); - - it("P-037d session query is parsed only via the documented `?session=...` form", () => { - // Legal session form parses cleanly. - const ok = parseOcPath("oc://X.md/sec?session=cron:daily"); - expect(ok.section).toBe("sec"); - expect(ok.session).toBe("cron:daily"); - // Substrate is lenient about loose `?garbage` — caller's - // responsibility to construct paths from `formatOcPath`. Confirm - // the loose form does NOT silently invent a session value. - const loose = parseOcPath("oc://X.md/sec?garbage"); - expect(loose.session).toBeUndefined(); - }); - - it("P-037e unescaped `&` in segments is rejected", () => { - expect(() => parseOcPath("oc://X.md/a&b")).toThrow(OcPathError); - }); - - it("P-037f unescaped `%` in segments is rejected", () => { - expect(() => parseOcPath("oc://X.md/a%b")).toThrow(OcPathError); - }); - - it("P-037g empty file slot is rejected", () => { - expect(() => parseOcPath("oc:///section")).toThrow(OcPathError); - }); - - it("P-037h backslash-escape attempts are not treated as path traversal", () => { - // No special meaning — the literal backslash is just a regular - // character. Doesn't allow escaping forward slashes. - expect(() => parseOcPath("oc://X.md/a\\../b")).toThrow(OcPathError); - }); - - // P-038: predicate-value injection. `[k=v]` predicates filter - // matches; a hostile `v` containing regex metachars, brackets, or - // operators must NOT escape the predicate scope or be interpreted - // as a regex. - - it("P-038a regex metacharacters in predicate value match literally", () => { - const ast = parseJsonc('{ "items": [ {"name": "a.*"}, {"name": "abc"} ] }').ast; - // Looking for the literal string "a.*" — should match only the - // first item, not "abc" (which would match if `.*` were treated - // as a regex). - const matches = findOcPaths(ast, parseOcPath("oc://X.jsonc/items/[name=a.*]")); - expect(matches).toHaveLength(1); - }); - - it("P-038b nested-bracket attempts in predicate value are kept literal", () => { - // The substrate is permissive on nested brackets — they're part - // of the literal predicate value, not interpreted as path syntax. - // The match would be against the literal string "a[b]"; a - // resolver that finds zero matches fails closed. - const path = parseOcPath("oc://X.jsonc/items/[name=a[b]]"); - expect(path.item).toBe("[name=a[b]]"); - // No data has the literal value `a[b]` here, so finding empty. - const ast = parseJsonc('{ "items": [ {"name": "abc"} ] }').ast; - expect(findOcPaths(ast, path)).toHaveLength(0); - }); - - it("P-038c equals-sign in predicate value is treated as part of the value", () => { - // The FIRST `=` separates key from value; subsequent `=`s belong - // to the value. The rule keeps the predicate parser simple — - // operators that prefix-match (`!=`, `<=`, `>=`) are tried - // before `=`, then `=` consumes the rest. - const ast = parseJsonc('{ "items": [ {"k": "a=b"}, {"k": "c"} ] }').ast; - const matches = findOcPaths(ast, parseOcPath("oc://X.jsonc/items/[k=a=b]")); - expect(matches).toHaveLength(1); - }); - - it("P-038d control characters in predicate value are rejected", () => { - expect(() => parseOcPath("oc://X.jsonc/items/[k=a\x00b]")).toThrow(OcPathError); - }); - - it("P-038e empty predicate body is rejected", () => { - expect(() => parseOcPath("oc://X.jsonc/items/[]")).toThrow(OcPathError); - }); - - it("P-038f predicate-shaped bracket without operator is treated as literal sentinel", () => { - // `[name]` without `=` is parsed as a literal-bracket sentinel - // (e.g. `[frontmatter]`-style). The substrate accepts it as a - // literal path segment — predicate parsing only kicks in when an - // operator is present. Document this to lock the behavior. - const path = parseOcPath("oc://X.jsonc/items/[name]"); - expect(path.item).toBe("[name]"); - }); - - it("P-038g predicate-shaped bracket with unsupported operator parses as literal", () => { - // `~` isn't in the supported-operator set; the parser doesn't - // recognize it as a predicate, so it's accepted as a literal - // bracket segment. This is the documented v1.1 behavior — a - // future version may add `~` (regex) and bump SDK_VERSION. - const path = parseOcPath("oc://X.jsonc/items/[k~v]"); - expect(path.item).toBe("[k~v]"); - }); -}); diff --git a/extensions/oc-path/src/oc-path/tests/scenarios/real-world-fixtures.test.ts b/extensions/oc-path/src/oc-path/tests/scenarios/real-world-fixtures.test.ts index 8d779101edf..2c2ae9d737e 100644 --- a/extensions/oc-path/src/oc-path/tests/scenarios/real-world-fixtures.test.ts +++ b/extensions/oc-path/src/oc-path/tests/scenarios/real-world-fixtures.test.ts @@ -1,10 +1,3 @@ -/** - * Wave 12 — real-world fixtures. - * - * Eight workspace files (one per upstream-recognized workspace - * filename) — each parsed, resolved, and round-tripped to verify the - * substrate handles realistic content. - */ import { readFileSync } from "node:fs"; import { join, dirname } from "node:path"; import { fileURLToPath } from "node:url"; @@ -20,8 +13,8 @@ function load(name: string): string { return readFileSync(join(FIXTURES, name), "utf-8"); } -describe("wave-12 real-world-fixtures", () => { - it("F-01 SOUL.md parses + round-trips", () => { +describe("real-world-fixtures", () => { + it("SOUL.md parses + round-trips", () => { const raw = load("SOUL.md"); const { ast, diagnostics } = parseMd(raw); expect(diagnostics).toEqual([]); @@ -30,7 +23,7 @@ describe("wave-12 real-world-fixtures", () => { expect(ast.blocks.length).toBeGreaterThan(0); }); - it("F-02 AGENTS.md parses + resolves Tools section", () => { + it("AGENTS.md parses + resolves Tools section", () => { const raw = load("AGENTS.md"); const { ast } = parseMd(raw); expect(emitMd(ast)).toBe(raw); @@ -41,7 +34,7 @@ describe("wave-12 real-world-fixtures", () => { } }); - it("F-03 MEMORY.md frontmatter scope resolves via [frontmatter]", () => { + it("MEMORY.md frontmatter scope resolves via [frontmatter]", () => { const raw = load("MEMORY.md"); const { ast } = parseMd(raw); expect(emitMd(ast)).toBe(raw); @@ -56,7 +49,7 @@ describe("wave-12 real-world-fixtures", () => { } }); - it("F-04 TOOLS.md tool-guidance section resolves by slug", () => { + it("TOOLS.md tool-guidance section resolves by slug", () => { const raw = load("TOOLS.md"); const { ast } = parseMd(raw); expect(emitMd(ast)).toBe(raw); @@ -67,7 +60,7 @@ describe("wave-12 real-world-fixtures", () => { expect(guidance?.kind).toBe("block"); }); - it("F-05 IDENTITY.md sections resolvable by slug", () => { + it("IDENTITY.md sections resolvable by slug", () => { const raw = load("IDENTITY.md"); const { ast } = parseMd(raw); expect(emitMd(ast)).toBe(raw); @@ -78,7 +71,7 @@ describe("wave-12 real-world-fixtures", () => { expect(trust?.kind).toBe("block"); }); - it("F-06 USER.md Preferences items extracted", () => { + it("USER.md Preferences items extracted", () => { const raw = load("USER.md"); const { ast } = parseMd(raw); expect(emitMd(ast)).toBe(raw); @@ -92,7 +85,7 @@ describe("wave-12 real-world-fixtures", () => { } }); - it("F-07 HEARTBEAT.md schedules — H2 sections as triggers", () => { + it("HEARTBEAT.md schedules — H2 sections as triggers", () => { const raw = load("HEARTBEAT.md"); const { ast } = parseMd(raw); expect(emitMd(ast)).toBe(raw); @@ -102,7 +95,7 @@ describe("wave-12 real-world-fixtures", () => { expect(slugs).toContain("every-4h-wake"); }); - it("F-08 SKILL.md frontmatter has name + description + tier", () => { + it("SKILL.md frontmatter has name + description + tier", () => { const raw = load("SKILL.md"); const { ast } = parseMd(raw); expect(emitMd(ast)).toBe(raw); @@ -112,13 +105,13 @@ describe("wave-12 real-world-fixtures", () => { expect(fmKeys).toContain("tier"); }); - it("F-09 BOOTSTRAP.md round-trips", () => { + it("BOOTSTRAP.md round-trips", () => { const raw = load("BOOTSTRAP.md"); const { ast } = parseMd(raw); expect(emitMd(ast)).toBe(raw); }); - it("F-10 all 8 fixtures combined round-trip-clean (sanity)", () => { + it("all 8 fixtures combined round-trip-clean (sanity)", () => { const names = [ "SOUL.md", "AGENTS.md", diff --git a/extensions/oc-path/src/oc-path/tests/scenarios/roundtrip-property.test.ts b/extensions/oc-path/src/oc-path/tests/scenarios/roundtrip-property.test.ts index 94f1e67cc57..859216ec41c 100644 --- a/extensions/oc-path/src/oc-path/tests/scenarios/roundtrip-property.test.ts +++ b/extensions/oc-path/src/oc-path/tests/scenarios/roundtrip-property.test.ts @@ -1,11 +1,3 @@ -/** - * Wave 10 — round-trip property tests. - * - * Substrate guarantee: `emitMd(parse(raw)) === raw` for all inputs the - * parser accepts. This wave exercises that property over a generated - * corpus of synthetic markdown shapes and verifies parser idempotence - * (`parse(emitMd(parse(raw))) === parse(raw)` modulo `raw`). - */ import { describe, expect, it } from "vitest"; import { emitMd } from "../../emit.js"; import { parseMd } from "../../parse.js"; @@ -14,8 +6,8 @@ function roundTrip(raw: string): string { return emitMd(parseMd(raw).ast); } -describe("wave-10 roundtrip-property", () => { - it("RT-01 byte-fidelity over 100 generated shapes", () => { +describe("roundtrip-property", () => { + it("byte-fidelity over 100 generated shapes", () => { const inputs = generateCorpus(100); for (const raw of inputs) { try { @@ -30,7 +22,7 @@ describe("wave-10 roundtrip-property", () => { } }); - it("RT-02 parser idempotence (parse → emit → parse → identical AST shape)", () => { + it("parser idempotence (parse → emit → parse → identical AST shape)", () => { const inputs = generateCorpus(50); for (const raw of inputs) { const a = parseMd(raw).ast; @@ -42,7 +34,7 @@ describe("wave-10 roundtrip-property", () => { } }); - it("RT-03 stable output for identical input", () => { + it("stable output for identical input", () => { const raw = `---\nname: x\n---\n\n## A\n- a\n## B\n- b: c\n`; const out1 = roundTrip(raw); const out2 = roundTrip(raw); @@ -51,7 +43,7 @@ describe("wave-10 roundtrip-property", () => { expect(out2).toBe(out3); }); - it("RT-04 ordering deterministic (no Object.keys / Set ordering surprises)", () => { + it("ordering deterministic (no Object.keys / Set ordering surprises)", () => { const raw = `---\nb: 2\na: 1\nc: 3\n---\n## Z\n- z\n## A\n- a\n`; const a1 = parseMd(raw).ast; const a2 = parseMd(raw).ast; @@ -59,38 +51,37 @@ describe("wave-10 roundtrip-property", () => { expect(a1.blocks.map((b) => b.heading)).toEqual(a2.blocks.map((b) => b.heading)); }); - it("RT-05 round-trip preserves comment-like lines (no comment recognition at substrate)", () => { + it("round-trip preserves comment-like lines (no comment recognition at substrate)", () => { const raw = `## H\n\n\n- bullet\n`; expect(roundTrip(raw)).toBe(raw); }); - it("RT-06 round-trip preserves indented blocks (substrate doesn't reflow)", () => { + it("round-trip preserves indented blocks (substrate doesn't reflow)", () => { const raw = `## H\n\n indented code-ish block\n more indented\n`; expect(roundTrip(raw)).toBe(raw); }); - it("RT-07 round-trip preserves blockquotes", () => { + it("round-trip preserves blockquotes", () => { const raw = `## H\n\n> quoted line 1\n> quoted line 2\n`; expect(roundTrip(raw)).toBe(raw); }); - it("RT-08 round-trip preserves images / links", () => { + it("round-trip preserves images / links", () => { const raw = `## H\n\n![alt](path/to/img.png)\n[link](http://example.com)\n`; expect(roundTrip(raw)).toBe(raw); }); - it("RT-09 round-trip preserves HTML", () => { + it("round-trip preserves HTML", () => { const raw = `## H\n\n
xbody
\n`; expect(roundTrip(raw)).toBe(raw); }); - it("RT-10 round-trip preserves consecutive headings with no body between", () => { + it("round-trip preserves consecutive headings with no body between", () => { const raw = `## A\n## B\n## C\n`; expect(roundTrip(raw)).toBe(raw); }); }); -// ---------- corpus generator ------------------------------------------------- function generateCorpus(count: number): string[] { const corpus: string[] = []; diff --git a/extensions/oc-path/src/oc-path/tests/scenarios/security-and-limits.test.ts b/extensions/oc-path/src/oc-path/tests/scenarios/security-and-limits.test.ts new file mode 100644 index 00000000000..41f2cd37b4f --- /dev/null +++ b/extensions/oc-path/src/oc-path/tests/scenarios/security-and-limits.test.ts @@ -0,0 +1,253 @@ +import { describe, expect, it } from "vitest"; +import { + MAX_PATH_LENGTH, + MAX_TRAVERSAL_DEPTH, + OcPathError, + findOcPaths, + formatOcPath, + parseOcPath, + resolveOcPath, + setOcPath, +} from "../../index.js"; +import { parseJsonc } from "../../jsonc/parse.js"; +import { parseJsonl } from "../../jsonl/parse.js"; + + +describe("encoding edges", () => { + it("strips leading UTF-8 BOM from path string", () => { + expect(parseOcPath("oc://X/Y").file).toBe("X"); + }); + + it("normalizes path segments to NFC", () => { + const nfc = "café"; + const nfd = "café"; // decomposed + expect(parseOcPath(`oc://X/${nfd}`)).toEqual(parseOcPath(`oc://X/${nfc}`)); + }); + + it("rejects whitespace inside identifier-shaped segments", () => { + expect(() => parseOcPath("oc://X/foo /bar")).toThrow(OcPathError); + expect(() => parseOcPath("oc://X/foo\tbar")).toThrow(OcPathError); + }); + + it("rejects control characters and NUL bytes anywhere in the path", () => { + expect(() => parseOcPath("oc://X/\x00")).toThrow(/Control character/); + expect(() => parseOcPath("oc://X/foo\x01bar")).toThrow(/Control character/); + expect(() => parseOcPath("oc://X/foo\x7Fbar")).toThrow(/Control character/); + expect(() => parseOcPath("oc://X.md/items/[k=a\x00b]")).toThrow(OcPathError); + }); +}); + + +describe("file-slot containment", () => { + it("rejects absolute POSIX file slot", () => { + expect(() => parseOcPath("oc:///etc/passwd")).toThrow(/Empty segment/); + expect(() => parseOcPath('oc://"/etc/passwd"/section')).toThrow(/Absolute file slot/); + }); + + it("rejects Windows drive-letter file slot", () => { + expect(() => parseOcPath('oc://"C:/Windows/System32/foo"/section')).toThrow( + /Absolute file slot/, + ); + expect(() => parseOcPath('oc://"C:\\\\Windows\\\\System32"/section')).toThrow( + /Absolute file slot/, + ); + }); + + it("rejects leading-backslash UNC path", () => { + expect(() => parseOcPath('oc://"\\\\srv\\\\share\\\\foo"/section')).toThrow( + /Absolute file slot/, + ); + }); + + it("rejects parent-directory escapes", () => { + expect(() => parseOcPath('oc://"../foo"/section')).toThrow(/Parent-directory/); + expect(() => parseOcPath('oc://"foo/../bar"/section')).toThrow(/Parent-directory/); + }); + + it("does not URL-decode `%2E%2E` — substrate isn't an HTTP layer", () => { + expect(parseOcPath('oc://"%2E%2E/foo"/section').file).toBe("%2E%2E/foo"); + }); + + it("formatOcPath rejects absolute and parent-directory file slots", () => { + expect(() => formatOcPath({ file: "/etc/passwd" })).toThrow(/Absolute file slot/); + expect(() => formatOcPath({ file: "C:/Windows" })).toThrow(/Absolute file slot/); + expect(() => formatOcPath({ file: ".." })).toThrow(/Parent-directory/); + expect(() => formatOcPath({ file: "foo/../bar" })).toThrow(/Parent-directory/); + }); +}); + + +describe("path-string and traversal caps", () => { + it("parseOcPath rejects strings longer than MAX_PATH_LENGTH", () => { + expect(() => parseOcPath("oc://X/" + "a".repeat(MAX_PATH_LENGTH))).toThrow(/exceeds .* bytes/); + }); + + it("parseOcPath accepts a path right at the cap", () => { + const justUnder = "oc://X/" + "a".repeat(MAX_PATH_LENGTH - "oc://X/".length); + expect(() => parseOcPath(justUnder)).not.toThrow(); + }); + + it("formatOcPath enforces the same cap on output", () => { + expect(() => formatOcPath({ file: "X", section: "a".repeat(MAX_PATH_LENGTH) })).toThrow( + /Formatted oc:\/\/ exceeds/, + ); + }); + + it("walker depth cap fires on synthetic deeply-nested AST", () => { + // Bypasses parser depth cap so the walker defense fires in isolation. + type V = import("../../jsonc/ast.js").JsoncValue; + let leaf: V = { kind: "string", value: "x", line: 1 }; + for (let i = 0; i < MAX_TRAVERSAL_DEPTH + 50; i++) { + leaf = { kind: "object", entries: [{ key: "a", value: leaf, line: 1 }], line: 1 }; + } + const ast = { + kind: "jsonc" as const, + raw: "", + root: { kind: "object" as const, entries: [{ key: "root", value: leaf, line: 1 }], line: 1 }, + }; + expect(() => findOcPaths(ast, parseOcPath("oc://X/**"))).toThrow(/MAX_TRAVERSAL_DEPTH/); + }); + + it("jsonc parser surfaces a structured diagnostic on pathological nesting", () => { + const open = "[".repeat(MAX_TRAVERSAL_DEPTH + 100); + const close = "]".repeat(MAX_TRAVERSAL_DEPTH + 100); + const result = parseJsonc(`${open}0${close}`); + expect(result.ast.root).toBeNull(); + expect(result.diagnostics.some((d) => d.code === "OC_JSONC_DEPTH_EXCEEDED")).toBe(true); + }); + + it("jsonl per-line parser flags malformed deeply-nested values", () => { + let nested = '"x"'; + for (let i = 0; i < MAX_TRAVERSAL_DEPTH + 50; i++) { + nested = `{"a":${nested}}`; + } + const { diagnostics } = parseJsonl(nested + "\n"); + expect(diagnostics.some((d) => d.code === "OC_JSONL_LINE_MALFORMED")).toBe(true); + }); +}); + + +describe("sentinel literal at format boundary", () => { + it("formatOcPath rejects a struct carrying the redaction sentinel", () => { + expect(() => formatOcPath({ file: "AGENTS.md", section: "__OPENCLAW_REDACTED__" })).toThrow( + /sentinel literal/, + ); + }); +}); + + +describe("numeric segments dispatch by node kind", () => { + it("negative numeric key on object resolves as literal key (openclaw#59934)", () => { + // Telegram supergroup IDs are negative numbers used as map keys. + const ast = parseJsonc( + '{"channels":{"telegram":{"groups":{"-5028303500":{"requireMention":false}}}}}', + ).ast; + const m = resolveOcPath( + ast, + parseOcPath("oc://config/channels.telegram.groups.-5028303500.requireMention"), + ); + expect(m?.kind).toBe("leaf"); + }); + + it("`-1` still works as positional on arrays", () => { + const ast = parseJsonc('{"items":[10,20,30]}').ast; + const m = resolveOcPath(ast, parseOcPath("oc://X/items/-1")); + expect(m?.kind === "leaf" && m.valueText).toBe("30"); + }); + + it("`$last` literal key on an object is shadowed by the positional sentinel", () => { + const ast = parseJsonc('{"$last":"literal-value","foo":"bar"}').ast; + const m = resolveOcPath(ast, parseOcPath("oc://X/$last")); + expect(m?.kind === "leaf" && m.valueText).toBe("bar"); + }); + + it("out-of-range negative index returns null, not crash", () => { + const ast = parseJsonc('{"x":[1,2,3]}').ast; + expect(resolveOcPath(ast, parseOcPath("oc://X/x/-9999999999"))).toBeNull(); + }); +}); + + +describe("setOcPath value coercion is locale-independent and exact-match", () => { + it("number coercion accepts `1.5`, refuses `1,5`", () => { + const ast = parseJsonc('{"x":1.0}').ast; + expect(setOcPath(ast, parseOcPath("oc://X/x"), "1.5").ok).toBe(true); + const r = setOcPath(ast, parseOcPath("oc://X/x"), "1,5"); + expect(r.ok).toBe(false); + if (!r.ok) { + expect(r.reason).toBe("parse-error"); + } + }); + + it("boolean coercion accepts `true` / `false` only", () => { + const ast = parseJsonc('{"x":true}').ast; + expect(setOcPath(ast, parseOcPath("oc://X/x"), "false").ok).toBe(true); + expect(setOcPath(ast, parseOcPath("oc://X/x"), "False").ok).toBe(false); + expect(setOcPath(ast, parseOcPath("oc://X/x"), "TRUE").ok).toBe(false); + expect(setOcPath(ast, parseOcPath("oc://X/x"), "yes").ok).toBe(false); + }); +}); + + +describe("predicate-value injection is contained", () => { + it("regex metacharacters in predicate value match literally, not as regex", () => { + const ast = parseJsonc('{"items":[{"name":"a.*"},{"name":"abc"}]}').ast; + const matches = findOcPaths(ast, parseOcPath("oc://X.jsonc/items/[name=a.*]")); + expect(matches).toHaveLength(1); + }); + + it("equals-sign in predicate value is treated as part of the value", () => { + const ast = parseJsonc('{"items":[{"k":"a=b"},{"k":"c"}]}').ast; + const matches = findOcPaths(ast, parseOcPath("oc://X.jsonc/items/[k=a=b]")); + expect(matches).toHaveLength(1); + }); + + it("predicate-shaped bracket without operator is a literal sentinel", () => { + expect(parseOcPath("oc://X.jsonc/items/[name]").item).toBe("[name]"); + }); + + it("rejects empty predicate body and empty key/value", () => { + expect(() => parseOcPath("oc://X.jsonc/items/[]")).toThrow(OcPathError); + expect(() => parseOcPath("oc://X/[=foo]")).toThrow(/Malformed predicate/); + expect(() => parseOcPath("oc://X/[id=]")).toThrow(/Malformed predicate/); + }); + + it("predicate value containing `/` round-trips and matches literally", () => { + const p = parseOcPath("oc://X/[id=foo/bar]/cmd"); + expect(p.section).toBe("[id=foo/bar]"); + const ast = parseJsonc('{"steps":[{"id":"foo/bar","cmd":"x"},{"id":"baz","cmd":"y"}]}').ast; + const matches = findOcPaths(ast, parseOcPath("oc://wf/steps/[id=foo/bar]/cmd")); + expect(matches).toHaveLength(1); + }); + + it("predicate value containing `.` round-trips and matches literally", () => { + const ast = parseJsonc('{"steps":[{"id":"1.0","cmd":"x"},{"id":"2.0","cmd":"y"}]}').ast; + const matches = findOcPaths(ast, parseOcPath("oc://wf/steps/[id=1.0]/cmd")); + expect(matches).toHaveLength(1); + }); +}); + + +describe("structural rejection", () => { + it("rejects mismatched brackets and braces", () => { + expect(() => parseOcPath("oc://X/[unclosed")).toThrow(OcPathError); + expect(() => parseOcPath("oc://X/closed]")).toThrow(OcPathError); + expect(() => parseOcPath("oc://X/{a,b")).toThrow(OcPathError); + }); + + it("rejects empty union and empty alternative", () => { + expect(() => parseOcPath("oc://X/{}")).toThrow(/Empty union/); + expect(() => parseOcPath("oc://X/{a,,b}")).toThrow(/Empty alternative/); + }); + + it("rejects empty dotted sub-segment in formatOcPath output", () => { + expect(() => formatOcPath({ file: "a.md", section: "foo." })).toThrow(/Empty dotted/); + expect(() => formatOcPath({ file: "a.md", section: ".foo" })).toThrow(/Empty dotted/); + expect(() => formatOcPath({ file: "a.md", section: "foo..bar" })).toThrow(/Empty dotted/); + }); + + it("rejects unescaped `&` and `%` in segments", () => { + expect(() => parseOcPath("oc://X.md/a&b")).toThrow(OcPathError); + expect(() => parseOcPath("oc://X.md/a%b")).toThrow(OcPathError); + }); +}); diff --git a/extensions/oc-path/src/oc-path/tests/scenarios/sentinel-cross-kind.test.ts b/extensions/oc-path/src/oc-path/tests/scenarios/sentinel-cross-kind.test.ts index 862c46b7e54..9971d041743 100644 --- a/extensions/oc-path/src/oc-path/tests/scenarios/sentinel-cross-kind.test.ts +++ b/extensions/oc-path/src/oc-path/tests/scenarios/sentinel-cross-kind.test.ts @@ -1,15 +1,3 @@ -/** - * Wave 21 — sentinel guard across all 3 kinds. - * - * Substrate guarantee: emit refuses to write a CALLER-INJECTED - * `__OPENCLAW_REDACTED__` literal. Round-trip mode trusts parsed bytes - * (a workspace file legitimately containing the sentinel — in a code - * block, in a pasted error log — would otherwise become a workspace- - * wide emit DoS). Render mode walks every leaf, so a caller-injected - * sentinel via `setOcPath` always fails. Callers that want strict - * pre-existing-byte detection (e.g., LKG fingerprint verification) - * opt in via `acceptPreExistingSentinel: false`. - */ import { describe, expect, it } from "vitest"; import { emitMd } from "../../emit.js"; import { setJsoncOcPath } from "../../jsonc/edit.js"; @@ -21,8 +9,8 @@ import { parseOcPath } from "../../oc-path.js"; import { parseMd } from "../../parse.js"; import { OcEmitSentinelError, REDACTED_SENTINEL } from "../../sentinel.js"; -describe("wave-21 sentinel guard cross-kind", () => { - it("S-01 jsonc round-trip echoes safely when raw contains pre-existing sentinel", () => { +describe("sentinel guard cross-kind", () => { + it("jsonc round-trip echoes safely when raw contains pre-existing sentinel", () => { // Pre-existing sentinel bytes are trusted — see emit-policy comment // in jsonc/emit.ts. The strict mode below is the opt-in path for // callers who want LKG-style fingerprint verification. @@ -34,21 +22,21 @@ describe("wave-21 sentinel guard cross-kind", () => { expect(() => emitJsonc(ast, { acceptPreExistingSentinel: false })).toThrow(OcEmitSentinelError); }); - it("S-02 jsonl round-trip echoes safely; strict mode rejects", () => { + it("jsonl round-trip echoes safely; strict mode rejects", () => { const raw = `{"x":"${REDACTED_SENTINEL}"}\n`; const ast = parseJsonl(raw).ast; expect(emitJsonl(ast)).toBe(raw); expect(() => emitJsonl(ast, { acceptPreExistingSentinel: false })).toThrow(OcEmitSentinelError); }); - it("S-03 md round-trip echoes safely; strict mode rejects", () => { + it("md round-trip echoes safely; strict mode rejects", () => { const raw = `## Body\n\n- ${REDACTED_SENTINEL}\n`; const ast = parseMd(raw).ast; expect(emitMd(ast)).toBe(raw); expect(() => emitMd(ast, { acceptPreExistingSentinel: false })).toThrow(OcEmitSentinelError); }); - it("S-04 jsonc render mode walks every leaf for sentinel", () => { + it("jsonc render mode walks every leaf for sentinel", () => { const ast = parseJsonc('{ "x": "ok" }').ast; const tampered = { ...ast, @@ -66,7 +54,7 @@ describe("wave-21 sentinel guard cross-kind", () => { expect(() => emitJsonc(tampered, { mode: "render" })).toThrow(OcEmitSentinelError); }); - it("S-05 jsonl render mode walks every value-line leaf", () => { + it("jsonl render mode walks every value-line leaf", () => { const ast = parseJsonl('{"a":"ok"}\n').ast; const tampered = { ...ast, @@ -91,7 +79,7 @@ describe("wave-21 sentinel guard cross-kind", () => { expect(() => emitJsonl(tampered, { mode: "render" })).toThrow(OcEmitSentinelError); }); - it("S-06 setJsoncOcPath itself throws when the new value contains the sentinel", () => { + it("setJsoncOcPath itself throws when the new value contains the sentinel", () => { // The substrate guard fires at write-time: setJsoncOcPath rebuilds // raw via render mode emit, which scans every leaf. Defense-in-depth // — even if a caller forgets to call emit afterward, the sentinel @@ -105,7 +93,7 @@ describe("wave-21 sentinel guard cross-kind", () => { ).toThrow(OcEmitSentinelError); }); - it("S-07 sentinel embedded in deep nesting — render mode catches the leaf", () => { + it("sentinel embedded in deep nesting — render mode catches the leaf", () => { // Round-trip echoes the pre-existing bytes (the workspace contract: // a parsed file containing the sentinel as data is not "writing" it // on emit). Render mode walks every leaf and rejects this caller- @@ -116,33 +104,33 @@ describe("wave-21 sentinel guard cross-kind", () => { expect(() => emitJsonc(ast, { mode: "render" })).toThrow(OcEmitSentinelError); }); - it("S-08 sentinel inside an array element triggers guard in render mode", () => { + it("sentinel inside an array element triggers guard in render mode", () => { const raw = JSON.stringify({ arr: ["ok", REDACTED_SENTINEL, "ok"] }); const ast = parseJsonc(raw).ast; expect(() => emitJsonc(ast, { mode: "render" })).toThrow(OcEmitSentinelError); }); - it("S-09 sentinel as object key in raw — strict mode catches it", () => { + it("sentinel as object key in raw — strict mode catches it", () => { const raw = `{ "${REDACTED_SENTINEL}": 1 }`; const ast = parseJsonc(raw).ast; expect(emitJsonc(ast)).toBe(raw); // default-mode echo expect(() => emitJsonc(ast, { acceptPreExistingSentinel: false })).toThrow(OcEmitSentinelError); }); - it("S-10 sentinel in jsonl malformed line — strict mode catches it", () => { + it("sentinel in jsonl malformed line — strict mode catches it", () => { const raw = `${REDACTED_SENTINEL}\n`; const ast = parseJsonl(raw).ast; expect(emitJsonl(ast)).toBe(raw); // round-trip echoes verbatim expect(() => emitJsonl(ast, { acceptPreExistingSentinel: false })).toThrow(OcEmitSentinelError); }); - it("S-11 partial sentinel substring does NOT trigger guard", () => { + it("partial sentinel substring does NOT trigger guard", () => { const raw = '{ "x": "OPENCLAW_REDACTED" }'; const ast = parseJsonc(raw).ast; expect(() => emitJsonc(ast)).not.toThrow(); }); - it("S-12 sentinel guard error message includes the OcPath context (render mode)", () => { + it("sentinel guard error message includes the OcPath context (render mode)", () => { // Render mode is the path that actually rejects caller-injected // sentinel — round-trip just echoes, so the error context surfaces // when render walks the offending leaf and constructs the path. diff --git a/extensions/oc-path/src/oc-path/tests/scenarios/sentinel-guard.test.ts b/extensions/oc-path/src/oc-path/tests/scenarios/sentinel-guard.test.ts index 6d798fb06a2..6c27000584f 100644 --- a/extensions/oc-path/src/oc-path/tests/scenarios/sentinel-guard.test.ts +++ b/extensions/oc-path/src/oc-path/tests/scenarios/sentinel-guard.test.ts @@ -1,36 +1,29 @@ -/** - * Wave 9 — sentinel guard at every emit leaf. - * - * Substrate guarantee: `__OPENCLAW_REDACTED__` literal anywhere in the - * emitted bytes throws `OcEmitSentinelError`. Round-trip mode catches - * sentinels in `raw`; render mode walks every leaf. - */ import { describe, expect, it } from "vitest"; import { emitMd } from "../../emit.js"; import { parseMd } from "../../parse.js"; import { OcEmitSentinelError, REDACTED_SENTINEL, guardSentinel } from "../../sentinel.js"; -describe("wave-09 sentinel-guard", () => { - it("S-01 sentinel constant matches the literal", () => { +describe("sentinel-guard", () => { + it("sentinel constant matches the literal", () => { expect(REDACTED_SENTINEL).toBe("__OPENCLAW_REDACTED__"); }); - it("S-02 guardSentinel passes normal strings", () => { + it("guardSentinel passes normal strings", () => { expect(() => guardSentinel("safe", "oc://X.md")).not.toThrow(); }); - it("S-03 guardSentinel passes non-string types", () => { + it("guardSentinel passes non-string types", () => { expect(() => guardSentinel(42, "oc://X.md")).not.toThrow(); expect(() => guardSentinel(null, "oc://X.md")).not.toThrow(); expect(() => guardSentinel(undefined, "oc://X.md")).not.toThrow(); expect(() => guardSentinel({}, "oc://X.md")).not.toThrow(); }); - it("S-04 guardSentinel throws on exact match", () => { + it("guardSentinel throws on exact match", () => { expect(() => guardSentinel(REDACTED_SENTINEL, "oc://X.md")).toThrow(OcEmitSentinelError); }); - it("S-05 guardSentinel throws on substring matches (sentinel embedded in larger string)", () => { + it("guardSentinel throws on substring matches (sentinel embedded in larger string)", () => { // Substring scan — the sentinel anywhere in the value is a leak, // not just exact equality. A hostile caller smuggling // `prefix__OPENCLAW_REDACTED__suffix` would have bypassed the old @@ -40,7 +33,7 @@ describe("wave-09 sentinel-guard", () => { ); }); - it("S-06 error attaches the OcPath context", () => { + it("error attaches the OcPath context", () => { try { guardSentinel(REDACTED_SENTINEL, "oc://config/plugins.entries.foo.token"); expect.fail("should have thrown"); @@ -52,20 +45,20 @@ describe("wave-09 sentinel-guard", () => { } }); - it("S-07 round-trip echoes pre-existing sentinel; strict mode rejects", () => { + it("round-trip echoes pre-existing sentinel; strict mode rejects", () => { const raw = "## Section\n\n- token: __OPENCLAW_REDACTED__\n"; const { ast } = parseMd(raw); expect(emitMd(ast)).toBe(raw); expect(() => emitMd(ast, { acceptPreExistingSentinel: false })).toThrow(OcEmitSentinelError); }); - it("S-08 round-trip emit allows sentinel-free content", () => { + it("round-trip emit allows sentinel-free content", () => { const raw = "## Section\n\n- token: redacted-but-not-sentinel\n"; const { ast } = parseMd(raw); expect(() => emitMd(ast)).not.toThrow(); }); - it("S-09 render mode catches sentinel in frontmatter", () => { + it("render mode catches sentinel in frontmatter", () => { const ast = { kind: "md" as const, raw: "", @@ -76,7 +69,7 @@ describe("wave-09 sentinel-guard", () => { expect(() => emitMd(ast, { mode: "render" })).toThrow(OcEmitSentinelError); }); - it("S-10 render mode catches sentinel in preamble", () => { + it("render mode catches sentinel in preamble", () => { const ast = { kind: "md" as const, raw: "", @@ -87,7 +80,7 @@ describe("wave-09 sentinel-guard", () => { expect(() => emitMd(ast, { mode: "render" })).toThrow(OcEmitSentinelError); }); - it("S-11 render mode catches sentinel in block bodyText", () => { + it("render mode catches sentinel in block bodyText", () => { const ast = { kind: "md" as const, raw: "", @@ -108,7 +101,7 @@ describe("wave-09 sentinel-guard", () => { expect(() => emitMd(ast, { mode: "render" })).toThrow(OcEmitSentinelError); }); - it("S-12 render mode catches sentinel in item kv.value", () => { + it("render mode catches sentinel in item kv.value", () => { const ast = { kind: "md" as const, raw: "", @@ -138,21 +131,21 @@ describe("wave-09 sentinel-guard", () => { ); }); - it("S-13 sentinel-as-substring in raw — strict mode catches it", () => { + it("sentinel-as-substring in raw — strict mode catches it", () => { const raw = `Some prose ${REDACTED_SENTINEL} more prose.\n`; const { ast } = parseMd(raw); expect(emitMd(ast)).toBe(raw); expect(() => emitMd(ast, { acceptPreExistingSentinel: false })).toThrow(OcEmitSentinelError); }); - it("S-14 multiple sentinel occurrences in raw — strict mode catches them", () => { + it("multiple sentinel occurrences in raw — strict mode catches them", () => { const raw = `## A\n${REDACTED_SENTINEL}\n${REDACTED_SENTINEL}\n`; const { ast } = parseMd(raw); expect(emitMd(ast)).toBe(raw); expect(() => emitMd(ast, { acceptPreExistingSentinel: false })).toThrow(OcEmitSentinelError); }); - it("S-15 fileNameForGuard appears in the error path", () => { + it("fileNameForGuard appears in the error path", () => { const ast = { kind: "md" as const, raw: "", diff --git a/extensions/oc-path/src/oc-path/tests/universal.test.ts b/extensions/oc-path/src/oc-path/tests/universal.test.ts index 54b9bdf5cab..9b6e148f889 100644 --- a/extensions/oc-path/src/oc-path/tests/universal.test.ts +++ b/extensions/oc-path/src/oc-path/tests/universal.test.ts @@ -1,10 +1,3 @@ -/** - * Universal verbs — `setOcPath` + `resolveOcPath` test surface. - * - * Every test exercises the universal entry point. The substrate - * dispatches via `ast.kind` and coerces value strings based on AST - * shape at the path location. - */ import { describe, expect, it } from "vitest"; import { emitMd } from "../emit.js"; import { emitJsonc } from "../jsonc/emit.js"; @@ -15,7 +8,6 @@ import { parseOcPath } from "../oc-path.js"; import { parseMd } from "../parse.js"; import { detectInsertion, resolveOcPath, setOcPath } from "../universal.js"; -// ---------- detectInsertion ------------------------------------------------ describe("detectInsertion", () => { it("returns null for plain paths", () => { @@ -46,7 +38,6 @@ describe("detectInsertion", () => { }); }); -// ---------- resolveOcPath — universal across kinds ------------------------- describe("resolveOcPath — md AST", () => { const md = parseMd("---\nname: github\n---\n\n## Boundaries\n\n- enabled: true\n").ast; @@ -173,7 +164,6 @@ describe("resolveOcPath — insertion-point detection", () => { }); }); -// ---------- setOcPath — leaf assignment ------------------------------------ describe("setOcPath — md leaf", () => { it("replaces frontmatter value", () => { @@ -286,7 +276,6 @@ describe("setOcPath — jsonl leaf", () => { }); }); -// ---------- setOcPath — insertion ------------------------------------------ describe("setOcPath — md insertion", () => { it("appends item to section with `+`", () => { @@ -432,7 +421,6 @@ describe("setOcPath — jsonl insertion (session append)", () => { }); }); -// ---------- Cross-cutting properties --------------------------------------- describe("setOcPath — cross-cutting properties", () => { it("is non-mutating across all kinds", () => { diff --git a/extensions/oc-path/src/oc-path/universal.ts b/extensions/oc-path/src/oc-path/universal.ts index 156813fc164..dc41bc89c63 100644 --- a/extensions/oc-path/src/oc-path/universal.ts +++ b/extensions/oc-path/src/oc-path/universal.ts @@ -1,28 +1,14 @@ /** - * Universal `setOcPath` and `resolveOcPath` — the public verbs. + * Universal `setOcPath` / `resolveOcPath` / `detectInsertion`. + * Addressing is universal; encoding is per-kind. Callers pass any AST + * + path + value; the substrate dispatches on `ast.kind` and coerces + * the value based on the AST shape at the resolution point. * - * **Strategic frame**: addressing is universal. Encoding is per-kind. - * The OcPath syntax encodes WHAT to do (set leaf vs. insert vs. address - * a structural node); the AST kind encodes HOW the substrate carries it - * out. Callers pass any AST + a path + a string value; the substrate - * dispatches via `ast.kind` and coerces the value based on the path's - * syntax and the AST shape at the resolution point. - * - * **Path syntax vocabulary** (v0): - * - * oc://FILE/section/item/field → leaf address (set/replace value) - * oc://FILE/section/+ → end-insertion at section - * oc://FILE/section/+key → keyed insertion (object key add) - * oc://FILE/section/+0 → indexed insertion (array splice) - * oc://FILE/+ → file-root insertion (jsonl line append, md new section) - * - * **Coercion at leaves** is driven by the AST type at the resolution point: - * - md leaf → value used verbatim (md is text-native) - * - jsonc/jsonl leaf, existing string → value verbatim - * - jsonc/jsonl leaf, existing number → parseFloat (parse-error if NaN) - * - jsonc/jsonl leaf, existing boolean → 'true'/'false' literal - * - jsonc/jsonl leaf, existing null → only `value === 'null'` - * - insertion → `JSON.parse(value)` for jsonc/jsonl; raw text for md + * oc://FILE/section/item/field → leaf address + * oc://FILE/section/+ → end-insertion + * oc://FILE/section/+key → keyed insertion + * oc://FILE/section/+0 → indexed insertion + * oc://FILE/+ → file-root insertion * * @module @openclaw/oc-path/universal */ @@ -54,26 +40,9 @@ import { resolveMdOcPath } from "./resolve.js"; export type OcAst = MdAst | JsoncAst | JsonlAst; /** - * Universal resolve result. Same shape regardless of AST kind so - * consumers branch only on `match.kind`. - * - * `leaf` carries the value as a string — the canonical leaf form on - * the wire, suitable for direct comparison or display. Numeric/bool - * leaves are stringified deterministically (`String(42)` → `'42'`, - * `String(true)` → `'true'`). - * - * `node` describes which kind of structural node the path resolved to - * (md-block, jsonc-object, jsonl-line, etc.) — the descriptor lets - * tooling format / drill in without re-parsing the kind tag. - * - * `insertion-point` is returned when the path's terminal segment is - * an insertion marker (`+`, `+key`, `+nnn`) and the parent is a valid - * container. - * - * **`line`** is the 1-based source line of the matched node, or `1` - * for the root / synthetic constructions where no source line exists. - * Lint rules use it directly for diagnostic positioning instead of - * walking the kind-specific AST a second time. + * Universal resolve result — same shape across AST kinds. `leaf` values + * are string-coerced (numbers/bools stringified deterministically). + * `line` is 1-based; root/synthetic nodes use `1`. */ export type OcMatch = | { readonly kind: "root"; readonly ast: OcAst; readonly line: number } @@ -119,13 +88,9 @@ export type SetResult = readonly detail?: string; }; -// ---------- Insertion-syntax detection ------------------------------------- - /** - * Inspect the path for an insertion marker on the deepest segment. - * A segment of `+`, `+`, or `+` indicates insertion at the - * parent. Returns the parent path (with insertion segment stripped) + - * the marker; or `null` for a plain (non-insertion) path. + * Insertion marker on the deepest path segment: `+`, `+`, or + * `+`. Returns parent path + marker; null for plain paths. */ export interface InsertionInfo { readonly parentPath: OcPath; @@ -133,37 +98,23 @@ export interface InsertionInfo { } export function detectInsertion(path: OcPath): InsertionInfo | null { - // Find the deepest defined segment. const segments: Array<{ slot: "section" | "item" | "field"; value: string }> = []; - if (path.section !== undefined) { - segments.push({ slot: "section", value: path.section }); - } - if (path.item !== undefined) { - segments.push({ slot: "item", value: path.item }); - } - if (path.field !== undefined) { - segments.push({ slot: "field", value: path.field }); - } - if (segments.length === 0) { - return null; - } + if (path.section !== undefined) segments.push({ slot: "section", value: path.section }); + if (path.item !== undefined) segments.push({ slot: "item", value: path.item }); + if (path.field !== undefined) segments.push({ slot: "field", value: path.field }); + if (segments.length === 0) return null; const last = segments[segments.length - 1]; - if (!last.value.startsWith("+")) { - return null; - } + if (!last.value.startsWith("+")) return null; const rest = last.value.slice(1); - let marker: InsertionInfo["marker"]; - if (rest.length === 0) { - marker = "+"; - } else if (/^\d+$/.test(rest)) { - marker = { kind: "indexed", index: Number(rest) }; - } else { - marker = { kind: "keyed", key: rest }; - } + const marker: InsertionInfo["marker"] = + rest.length === 0 + ? "+" + : /^\d+$/.test(rest) + ? { kind: "indexed", index: Number(rest) } + : { kind: "keyed", key: rest }; - // Strip the deepest segment from the path. const parentPath: OcPath = { file: path.file, ...(last.slot !== "section" && path.section !== undefined ? { section: path.section } : {}), @@ -174,22 +125,10 @@ export function detectInsertion(path: OcPath): InsertionInfo | null { return { parentPath, marker }; } -// ---------- Universal resolve ---------------------------------------------- - -/** - * Resolve an `OcPath` against any AST. Returns a kind-agnostic match - * shape or `null` when the path doesn't resolve. - * - * Insertion-marker paths return `{kind: 'insertion-point', container}` - * if the parent is a valid container; otherwise `null`. - */ +/** Resolve an `OcPath` against any AST. Throws on wildcard patterns. */ export function resolveOcPath(ast: OcAst, path: OcPath): OcMatch | null { - // Wildcard guard: `resolveOcPath` is the single-match verb. Wildcards - // belong to `findOcPaths` (multi-match). Throw with a structured code - // (consistent with `setOcPath`'s `wildcard-not-allowed` discriminator) - // — silent `null` here is indistinguishable from "path doesn't - // resolve", so consumers couldn't tell whether they should switch to - // findOcPaths or accept the address as missing. + // Single-match verb: wildcards belong to findOcPaths. Throw with a + // structured code so consumers can route to the right verb. if (hasWildcard(path)) { throw new OcPathError( `resolveOcPath received a wildcard pattern; use findOcPaths instead: ${formatOcPath(path)}`, @@ -198,9 +137,7 @@ export function resolveOcPath(ast: OcAst, path: OcPath): OcMatch | null { ); } const insertion = detectInsertion(path); - if (insertion !== null) { - return resolveInsertion(ast, insertion); - } + if (insertion !== null) return resolveInsertion(ast, insertion); switch (ast.kind) { case "md": @@ -210,14 +147,11 @@ export function resolveOcPath(ast: OcAst, path: OcPath): OcMatch | null { case "jsonl": return resolveJsonlToUniversal(ast, path); } - return null; } function resolveMdToUniversal(ast: MdAst, path: OcPath): OcMatch | null { const m = resolveMdOcPath(ast, path); - if (m === null) { - return null; - } + if (m === null) return null; switch (m.kind) { case "root": return { kind: "root", ast, line: 1 }; @@ -230,21 +164,13 @@ function resolveMdToUniversal(ast: MdAst, path: OcPath): OcMatch | null { case "item-field": return { kind: "leaf", valueText: m.value, leafType: "string", line: m.node.line }; } - return null; } function resolveJsoncToUniversal(ast: JsoncAst, path: OcPath): OcMatch | null { const m = resolveJsoncOcPath(ast, path); - if (m === null) { - return null; - } - if (m.kind === "root") { - return { kind: "root", ast, line: 1 }; - } - if (m.kind === "object-entry") { - return jsoncValueToMatch(m.node.value, m.node.line); - } - // m.kind === 'value' — array element or root: line lives on the value itself. + if (m === null) return null; + if (m.kind === "root") return { kind: "root", ast, line: 1 }; + if (m.kind === "object-entry") return jsoncValueToMatch(m.node.value, m.node.line); return jsoncValueToMatch(m.node, m.node.line ?? 1); } @@ -263,33 +189,20 @@ function jsoncValueToMatch(value: JsoncValue, line: number): OcMatch { case "null": return { kind: "leaf", valueText: "null", leafType: "null", line }; } - throw new Error(`unreachable: jsoncValueToMatch kind`); } function resolveJsonlToUniversal(ast: JsonlAst, path: OcPath): OcMatch | null { const m = resolveJsonlOcPath(ast, path); - if (m === null) { - return null; - } - if (m.kind === "root") { - return { kind: "root", ast, line: 1 }; - } - if (m.kind === "line") { - return { kind: "node", descriptor: "jsonl-line", line: m.node.line }; - } - // Inside-line jsonc parser starts numbering at 1 for each jsonl - // line, so `m.node.line` would always be 1 for any jsonl-resolved - // match. Use `m.line` (the JsonlLine's file-level line) — by - // construction every inside-line node sits on the same file line. - if (m.kind === "object-entry") { - return jsoncValueToMatch(m.node.value, m.line); - } + if (m === null) return null; + if (m.kind === "root") return { kind: "root", ast, line: 1 }; + if (m.kind === "line") return { kind: "node", descriptor: "jsonl-line", line: m.node.line }; + // Inside-line jsonc nodes always have line=1; use the JsonlLine's + // file-level line instead since every inside-line node sits there. + if (m.kind === "object-entry") return jsoncValueToMatch(m.node.value, m.line); return jsoncValueToMatch(m.node, m.line); } function resolveInsertion(ast: OcAst, info: InsertionInfo): OcMatch | null { - // For an insertion to be valid the parent must resolve to a container - // we know how to extend. Inspect the parent. switch (ast.kind) { case "md": return resolveMdInsertion(ast, info); @@ -298,25 +211,17 @@ function resolveInsertion(ast: OcAst, info: InsertionInfo): OcMatch | null { case "jsonl": return resolveJsonlInsertion(ast, info); } - return null; } function resolveMdInsertion(ast: MdAst, info: InsertionInfo): OcMatch | null { const p = info.parentPath; - // oc://FILE/+ → file-root insertion (new section) - if (p.section === undefined) { - return { kind: "insertion-point", container: "md-file", line: 1 }; - } - // oc://FILE/[frontmatter]/+key → frontmatter add + if (p.section === undefined) return { kind: "insertion-point", container: "md-file", line: 1 }; if (p.section === "[frontmatter]") { return { kind: "insertion-point", container: "md-frontmatter", line: 1 }; } - // oc://FILE/section/+ → append item to section if (p.item === undefined && p.field === undefined) { const m = resolveMdOcPath(ast, p); - if (m === null || m.kind !== "block") { - return null; - } + if (m === null || m.kind !== "block") return null; return { kind: "insertion-point", container: "md-section", line: m.node.line }; } return null; @@ -349,35 +254,20 @@ function resolveJsoncInsertion(ast: JsoncAst, info: InsertionInfo): OcMatch | nu } function resolveJsonlInsertion(ast: JsonlAst, info: InsertionInfo): OcMatch | null { - // jsonl insertion only makes sense at the file level: `oc://FILE/+`. - if (info.parentPath.section !== undefined) { - return null; - } - // The only insertion point for jsonl is "after the last line" — the - // line surfaced is `lastLine + 1` so consumers can render correctly. + // jsonl insertion only makes sense at file level (`oc://FILE/+`). + // Surfaced line is lastLine+1 so consumers render correctly. + if (info.parentPath.section !== undefined) return null; const lastLine = ast.lines.length > 0 ? ast.lines[ast.lines.length - 1].line : 0; return { kind: "insertion-point", container: "jsonl-file", line: lastLine + 1 }; } -// ---------- Universal set -------------------------------------------------- - /** - * Replace or insert at `path` with `value` (always a string). - * Substrate dispatches via `ast.kind` and coerces value at leaves - * based on the existing AST shape at the path location. - * - * For insertion-marker paths (`+`, `+key`, `+nnn`) the value is parsed - * as kind-appropriate content (JSON for jsonc/jsonl; plain text for md). - * - * Returns a structured result; never throws on parser-tolerated input. - * Sentinel-guard violations DO throw `OcEmitSentinelError` (defense in - * depth — refuse to write redacted content even when caller "asked"). + * Replace or insert at `path`. Coerces value at leaves based on the + * existing AST shape; for insertion paths value is parsed as + * kind-appropriate content (JSON for jsonc/jsonl; raw text for md). + * Sentinel-guard violations throw `OcEmitSentinelError`. */ export function setOcPath(ast: OcAst, path: OcPath, value: string): SetResult { - // Wildcard guard: `setOcPath` writes a single concrete leaf. A pattern - // would be ambiguous (which match wins?) so we reject early. Callers - // who want multi-set should `findOcPaths(...)` then `setOcPath` per - // resolved path — the explicit loop is the right shape. if (hasWildcard(path)) { return { ok: false, @@ -387,109 +277,72 @@ export function setOcPath(ast: OcAst, path: OcPath, value: string): SetResult { } const insertion = detectInsertion(path); if (insertion !== null) { - return setInsertion(ast, insertion, value); + switch (ast.kind) { + case "md": + return setMdInsertion(ast, insertion, value); + case "jsonc": + return setJsoncInsertion(ast, insertion, value); + case "jsonl": + return setJsonlInsertion(ast, insertion, value); + } } - switch (ast.kind) { - case "md": - return setMdLeaf(ast, path, value); + case "md": { + const r = setMdOcPath(ast, path, value); + return r.ok ? { ok: true, ast: r.ast } : { ok: false, reason: r.reason }; + } case "jsonc": - return setJsoncLeaf(ast, path, value); + return setStructuredLeaf(ast, path, value, resolveJsoncOcPath, setJsoncOcPath); case "jsonl": - return setJsonlLeaf(ast, path, value); + return setStructuredLeaf(ast, path, value, resolveJsonlOcPath, setJsonlOcPath, () => { + // jsonl line replacement: value must be JSON for the whole line. + const parsed = tryParseJson(value); + if (parsed === undefined) { + return { ok: false, reason: "parse-error", detail: "line replacement requires JSON value" }; + } + const r = setJsonlOcPath(ast, path, jsonToJsoncValue(parsed)); + return r.ok ? { ok: true, ast: r.ast } : { ok: false, reason: r.reason }; + }); } - throw new Error(`unreachable: setOcPath kind`); } -function setMdLeaf(ast: MdAst, path: OcPath, value: string): SetResult { - const r = setMdOcPath(ast, path, value); - if (r.ok) { - return { ok: true, ast: r.ast }; - } - return { ok: false, reason: r.reason }; -} - -function setJsoncLeaf(ast: JsoncAst, path: OcPath, value: string): SetResult { - // Inspect the existing leaf to determine target type for coercion. - const existing = resolveJsoncOcPath(ast, path); - if (existing === null) { - return { ok: false, reason: "unresolved" }; - } +// Resolve → reject root/line → coerce by existing leaf type → set → +// wrap. The optional `onLine` handles jsonl's whole-line replacement. +function setStructuredLeaf( + ast: A, + path: OcPath, + value: string, + resolve: (a: A, p: OcPath) => M | null, + set: (a: A, p: OcPath, c: JsoncValue) => SetOpResult, + onLine?: () => SetResult, +): SetResult { + const existing = resolve(ast, path); + if (existing === null) return { ok: false, reason: "unresolved" }; if (existing.kind === "root") { - return { - ok: false, - reason: "not-writable", - detail: "root replacement is not supported via setOcPath", - }; - } - const leafValue = existing.kind === "object-entry" ? existing.node.value : existing.node; - const coerced = coerceJsoncLeaf(value, leafValue); - if (coerced === null) { - return { - ok: false, - reason: "parse-error", - detail: `cannot coerce "${value}" to ${leafValue.kind}`, - }; - } - const r = setJsoncOcPath(ast, path, coerced); - if (r.ok) { - return { ok: true, ast: r.ast }; - } - return { ok: false, reason: r.reason }; -} - -function setJsonlLeaf(ast: JsonlAst, path: OcPath, value: string): SetResult { - const existing = resolveJsonlOcPath(ast, path); - if (existing === null) { - return { ok: false, reason: "unresolved" }; - } - if (existing.kind === "root") { - return { - ok: false, - reason: "not-writable", - detail: "root replacement is not supported via setOcPath", - }; + return { ok: false, reason: "not-writable", detail: "root replacement is not supported via setOcPath" }; } if (existing.kind === "line") { - // Replacing a whole line — value should be JSON. - const parsed = tryParseJson(value); - if (parsed === undefined) { - return { ok: false, reason: "parse-error", detail: `line replacement requires JSON value` }; - } - const r = setJsonlOcPath(ast, path, jsonToJsoncValue(parsed)); - if (r.ok) { - return { ok: true, ast: r.ast }; - } - return { ok: false, reason: r.reason }; + return onLine !== undefined ? onLine() : { ok: false, reason: "not-writable" }; } - // Field on a line — leaf coercion. const leafValue = existing.kind === "object-entry" ? existing.node.value : existing.node; const coerced = coerceJsoncLeaf(value, leafValue); if (coerced === null) { - return { - ok: false, - reason: "parse-error", - detail: `cannot coerce "${value}" to ${leafValue.kind}`, - }; + return { ok: false, reason: "parse-error", detail: `cannot coerce "${value}" to ${leafValue.kind}` }; } - const r = setJsonlOcPath(ast, path, coerced); - if (r.ok) { - return { ok: true, ast: r.ast }; - } - return { ok: false, reason: r.reason }; + const r = set(ast, path, coerced); + return r.ok ? { ok: true, ast: r.ast } : { ok: false, reason: r.reason }; } -function setInsertion(ast: OcAst, info: InsertionInfo, value: string): SetResult { - switch (ast.kind) { - case "md": - return setMdInsertion(ast, info, value); - case "jsonc": - return setJsoncInsertion(ast, info, value); - case "jsonl": - return setJsonlInsertion(ast, info, value); - } - throw new Error(`unreachable: setInsertion kind`); -} +type StructuredLeafMatch = + | { readonly kind: "root" } + | { readonly kind: "line" } + | { readonly kind: "object-entry"; readonly node: { readonly value: JsoncValue } } + | { readonly kind: "value"; readonly node: JsoncValue }; + +type SetFailureReason = Extract["reason"]; +type SetOpResult = + | { readonly ok: true; readonly ast: A } + | { readonly ok: false; readonly reason: Exclude }; function setMdInsertion(ast: MdAst, info: InsertionInfo, value: string): SetResult { const p = info.parentPath; @@ -508,8 +361,6 @@ function setMdInsertion(ast: MdAst, info: InsertionInfo, value: string): SetResu line: 0, bodyText: "", items: [], - tables: [], - codeBlocks: [], }, ], }; @@ -589,18 +440,15 @@ function setJsoncInsertion(ast: JsoncAst, info: InsertionInfo, value: string): S } if (containerMatch.container === "jsonc-array") { - // index `+0` valid; bare `+` appends; `+key` rejected. + // `+0` indexed; bare `+` appends; `+key` rejected for arrays. if (typeof info.marker === "object" && info.marker.kind === "keyed") { return { ok: false, reason: "type-mismatch", detail: "cannot insert by key into array" }; } return mutateJsoncContainer(ast, info.parentPath, (container) => { - if (container.kind !== "array") { - return null; - } + if (container.kind !== "array") return null; const items = container.items.slice(); - if (info.marker === "+") { - items.push(newJsoncValue); - } else if (typeof info.marker === "object" && info.marker.kind === "indexed") { + if (info.marker === "+") items.push(newJsoncValue); + else if (typeof info.marker === "object" && info.marker.kind === "indexed") { const idx = Math.min(info.marker.index, items.length); items.splice(idx, 0, newJsoncValue); } @@ -612,18 +460,13 @@ function setJsoncInsertion(ast: JsoncAst, info: InsertionInfo, value: string): S }); } - // jsonc-object if (typeof info.marker !== "object" || info.marker.kind !== "keyed") { return { ok: false, reason: "type-mismatch", detail: "jsonc object insertion requires +key" }; } const key = info.marker.key; return mutateJsoncContainer(ast, info.parentPath, (container) => { - if (container.kind !== "object") { - return null; - } - if (container.entries.some((e) => e.key === key)) { - return null; - } // duplicate + if (container.kind !== "object") return null; + if (container.entries.some((e) => e.key === key)) return null; // duplicate const newEntry: JsoncEntry = { key, value: newJsoncValue, line: 0 }; return { kind: "object", @@ -648,32 +491,24 @@ function setJsonlInsertion(ast: JsonlAst, info: InsertionInfo, value: string): S return { ok: true, ast: appendJsonlLine(ast, jsonToJsoncValue(parsed)) }; } -// ---------- Internal helpers ----------------------------------------------- - +// Preserve the existing source line on coerced replacements — same +// semantic node, only the bytes change. function coerceJsoncLeaf(valueText: string, existing: JsoncValue): JsoncValue | null { - // Preserve the existing source line on coerced replacements — the - // semantic node is the same; only its bytes change. const lineExt = existing.line !== undefined ? { line: existing.line } : {}; - if (existing.kind === "string") { - return { kind: "string", value: valueText, ...lineExt }; - } + if (existing.kind === "string") return { kind: "string", value: valueText, ...lineExt }; if (existing.kind === "number") { const n = Number(valueText); return Number.isFinite(n) ? { kind: "number", value: n, ...lineExt } : null; } if (existing.kind === "boolean") { - if (valueText === "true") { - return { kind: "boolean", value: true, ...lineExt }; - } - if (valueText === "false") { - return { kind: "boolean", value: false, ...lineExt }; - } + if (valueText === "true") return { kind: "boolean", value: true, ...lineExt }; + if (valueText === "false") return { kind: "boolean", value: false, ...lineExt }; return null; } if (existing.kind === "null") { return valueText === "null" ? { kind: "null", ...lineExt } : null; } - // Object/array leaf — caller should use insertion or full-replace path. + // Object/array — caller should use insertion or full-replace. return null; } @@ -686,21 +521,11 @@ function tryParseJson(value: string): unknown { } function jsonToJsoncValue(v: unknown): JsoncValue { - // Synthetic values omit `line` (optional in the type) — the parser - // alone is the source of truth for line metadata. Insertions / - // mutations get the parent's line for surfacing in lint findings. - if (v === null) { - return { kind: "null" }; - } - if (typeof v === "string") { - return { kind: "string", value: v }; - } - if (typeof v === "number") { - return { kind: "number", value: v }; - } - if (typeof v === "boolean") { - return { kind: "boolean", value: v }; - } + // Synthetic values omit `line` — only the parser sets line metadata. + if (v === null) return { kind: "null" }; + if (typeof v === "string") return { kind: "string", value: v }; + if (typeof v === "number") return { kind: "number", value: v }; + if (typeof v === "boolean") return { kind: "boolean", value: v }; if (Array.isArray(v)) { return { kind: "array", items: v.map(jsonToJsoncValue) }; } @@ -715,7 +540,7 @@ function jsonToJsoncValue(v: unknown): JsoncValue { })), }; } - // Unsupported (undefined / function / symbol). JSON.parse never produces these. + // JSON.parse never produces undefined / function / symbol. throw new Error(`unsupported JSON value type: ${typeof v}`); } @@ -724,15 +549,9 @@ function mutateJsoncContainer( parentPath: OcPath, mutate: (container: JsoncValue) => JsoncValue | null, ): SetResult { - if (ast.root === null) { - return { ok: false, reason: "no-root" }; - } + if (ast.root === null) return { ok: false, reason: "no-root" }; - // Quote-aware split so JSONC insertion under a key containing - // `/`, `.`, or other special chars works through the parent path. - // `resolveJsoncOcPath` validates with quote-aware splitting; the - // mutation walker MUST use the same predicate or insertion validity - // can be reported and then fail as unresolved. + // Quote-aware split so insertion under a key with `/`/`.`/etc. works. const segments: string[] = []; if (parentPath.section !== undefined) { segments.push(...splitRespectingBrackets(parentPath.section, ".")); @@ -746,9 +565,7 @@ function mutateJsoncContainer( const newRoot = segments.length === 0 ? mutate(ast.root) : mutateAt(ast.root, segments, 0, mutate); - if (newRoot === null) { - return { ok: false, reason: "unresolved" }; - } + if (newRoot === null) return { ok: false, reason: "unresolved" }; const next: JsoncAst = { kind: "jsonc", raw: "", root: newRoot }; return { ok: true, ast: { ...next, raw: emitJsonc(next, { mode: "render" }) } }; @@ -761,26 +578,17 @@ function mutateAt( mutate: (container: JsoncValue) => JsoncValue | null, ): JsoncValue | null { const seg = segments[i]; - if (seg === undefined) { - return mutate(current); - } - if (seg.length === 0) { - return null; - } + if (seg === undefined) return mutate(current); + if (seg.length === 0) return null; if (current.kind === "object") { - // Match `setJsoncOcPath`'s lookup: AST entry keys are unquoted, - // so strip quoting from the path segment before comparing. + // AST keys are unquoted; strip quotes from the path segment. const lookupKey = isQuotedSeg(seg) ? unquoteSeg(seg) : seg; const idx = current.entries.findIndex((e) => e.key === lookupKey); - if (idx === -1) { - return null; - } + if (idx === -1) return null; const child = current.entries[idx]; const replaced = mutateAt(child.value, segments, i + 1, mutate); - if (replaced === null) { - return null; - } + if (replaced === null) return null; const newEntries = current.entries.slice(); newEntries[idx] = { ...child, value: replaced }; return { @@ -791,14 +599,10 @@ function mutateAt( } if (current.kind === "array") { const idx = Number(seg); - if (!Number.isInteger(idx) || idx < 0 || idx >= current.items.length) { - return null; - } + if (!Number.isInteger(idx) || idx < 0 || idx >= current.items.length) return null; const child = current.items[idx]; const replaced = mutateAt(child, segments, i + 1, mutate); - if (replaced === null) { - return null; - } + if (replaced === null) return null; const newItems = current.items.slice(); newItems[idx] = replaced; return { @@ -820,33 +624,21 @@ function rebuildMdRaw(ast: MdAst): MdAst { parts.push("---"); } if (ast.preamble.length > 0) { - if (parts.length > 0) { - parts.push(""); - } + if (parts.length > 0) parts.push(""); parts.push(ast.preamble); } for (const block of ast.blocks) { - if (parts.length > 0) { - parts.push(""); - } + if (parts.length > 0) parts.push(""); parts.push(`## ${block.heading}`); - if (block.bodyText.length > 0) { - parts.push(block.bodyText); - } + if (block.bodyText.length > 0) parts.push(block.bodyText); } - // Suppress unused — emitJsonl is imported for symmetry but only emitJsonc - // is used in the jsonc mutation helper. void emitJsonl; return { ...ast, raw: parts.join("\n") }; } function formatFrontmatterValue(value: string): string { - if (value.length === 0) { - return '""'; - } - if (/[:#&*?|<>=!%@`,[\]{}\r\n]/.test(value)) { - return JSON.stringify(value); - } + if (value.length === 0) return '""'; + if (/[:#&*?|<>=!%@`,[\]{}\r\n]/.test(value)) return JSON.stringify(value); return value; }