refactor(oc-path): consolidate dispatch and trim comment surface

Substrate simplifications and broad comment cleanup:

**Walker collapse (find.ts)**: three near-parallel walkers
(walkJsonc, walkJsonl + walkJsonlInsideLine, walkMd + walkMdInsideBlock
+ walkMdInsideItem) shared the same segment-shape dispatch — union /
predicate / `*` / `**` / positional / literal — with different child
types. Extracted as a single `dispatchSeg<T>(ops, ...)` that takes a
per-kind `WalkOps<T>` table; each walker is a thin wrapper. The three
md walkers fold into one `walkMd(level, ...)` polymorphic on a
`MdLevel` discriminated union. JSONL routes the post-line-slot
descent through walkJsonc via a WeakMap-tagged ast holder.

**CLI (cli.ts)**: each command duplicated four times the same
try/catch/emit/exit dance — missing-arg check, parseOcPath try/catch,
OcEmitSentinelError catch. Extracted as `requireArg`, `tryParse`,
`catchSentinel`. Folded `RawPathOptions` into `PathCommandOptions`
(identical shape) and collapsed `.option(...)` chains via
`withCommonOpts`.

**universal.ts**: setJsoncLeaf and setJsonlLeaf were near-identical
(resolve, refuse root, coerce, set, wrap). Extracted as
`setStructuredLeaf<A, M>` with optional `onLine` for jsonl's
whole-line replacement. Inlined setMdLeaf (7-line passthrough) into
setOcPath. Dropped four `throw new Error("unreachable")` statements
TS exhaustive checking already covers.

**oc-path.ts**: 35 `throw new OcPathError(...)` sites compress through
a `fail()` helper. File-slot containment check (absolute, parent-dir,
control chars) extracted as `validateFileSlot` so parse + format share
the same defense. Three structural-nesting throws in formatOcPath
collapse into two. Three near-parallel string scanners
(`indexOfTopLevel`, `splitRespectingBrackets`, `validateBrackets`) fold
through one `scanBracketAware(s, onChar, onUnbalanced)` helper.

**jsonl/edit.ts**: pickLineIndex compressed; line-address dispatch
shares the value-line filter as a small helper.

**Internal review codes stripped**: P-NNN, F-NN, I-NN, H2-NN, OP-NN,
R-NN, T-NN, S-NN, BFJ-NN, RJC-NN, RJL-NN, FM-NN, A-NN, B-NN, CC-NN,
wave-NN — these were review-process artifacts (sprint identifiers,
finding IDs, pitfall taxonomy IDs) that mean nothing to a reader who
didn't participate in the originating review. Test names rewritten
human-readable; comments lose their P-NNN bookmarks; describe blocks
drop the wave-NN prefix.

**Pitfalls test consolidated**: `tests/scenarios/pitfalls.test.ts`
(637 LoC, organized by P-NNN) replaced by `security-and-limits.test.ts`
(288 LoC) — unique coverage migrates over with descriptive names;
duplicates of OP-/R-/etc. tests are dropped.

**Comment cleanup**: per CLAUDE.md "default to writing no comments;
add one only when the WHY is non-obvious", trimmed multi-paragraph
WHY-prose on every public export, running prose inside function
bodies that restated what the next line of code said, section-divider
comments in test files, and module-level doc-comments that paraphrased
the file name. Kept load-bearing context: NFC re-check after grow,
quote-aware split symmetry, multi-char operator precedence, sentinel
guard catch routes, WeakMap holder rationale.

**MdAst slimmed**: `tables` and `codeBlocks` fields removed — substrate
addressing doesn't go inside them, and markdown-it's tokenizer
already excludes them from heading/item misparse without
first-class AST modeling.

Net reduction across the 10 consolidation/cleanup commits: ~3,800 LoC.
This commit is contained in:
Gio Della-Libera
2026-05-08 18:40:57 -07:00
committed by Peter Steinberger
parent 7b7e65105b
commit 71c855f244
39 changed files with 1685 additions and 3908 deletions

View File

@@ -1,25 +1,9 @@
/**
* `openclaw path` — shell-level access to the OcPath substrate verbs.
* Self-hosters and editor extensions use it to inspect and surgically
* edit workspace files without scripting against the SDK directly.
* `openclaw path` — shell access to the OcPath substrate verbs.
*
* Subcommands:
* - `resolve <oc-path>` — print the match at the path
* - `set <oc-path> <value>` — write a leaf at the path; supports `--dry-run`
* - `find <pattern>` — enumerate matches for a wildcard/predicate path
* - `validate <oc-path>` — parse-only; print structure
* - `emit <file>` — read + parseXxx + emitXxx; verifies byte-fidelity
*
* Output is TTY-aware: defaults to human-readable when stdout is a TTY,
* switches to JSON otherwise (so pipes don't get formatting noise).
* `--json` and `--human` flags override the auto-detection.
*
* Boundaries this CLI does NOT cross (v0):
* - Doesn't know about LKG. `set` writes raw bytes through the
* substrate emit; if the file is LKG-tracked, the next observe
* call decides whether to promote / recover.
* - Doesn't know about lint rules or doctor fixers — that's a
* different surface.
* Subcommands: `resolve` / `set` / `find` / `validate` / `emit`.
* TTY-aware output: human when interactive, JSON when piped; `--json`
* / `--human` override.
*/
import { promises as fs } from "node:fs";
@@ -44,7 +28,6 @@ import {
type OcAst,
type OcMatch,
type OcPath,
type SetResult,
} from "./oc-path/index.js";
export type OutputRuntimeEnv = {
@@ -77,26 +60,16 @@ const defaultRuntime: OutputRuntimeEnv = {
},
};
/**
* Output-boundary sentinel scrub. Replaces every occurrence of the
* redaction sentinel with `[REDACTED]` before writing to the output
* stream. Defense-in-depth — even if a future code path surfaces raw
* file content carrying the sentinel, the CLI must not echo it.
*/
// Defense-in-depth: replace the redaction sentinel with `[REDACTED]`
// before writing, even if upstream emits it.
export function scrubSentinel(s: string): string {
if (!s.includes(REDACTED_SENTINEL)) {
return s;
}
if (!s.includes(REDACTED_SENTINEL)) return s;
return s.split(REDACTED_SENTINEL).join(SCRUB_PLACEHOLDER);
}
function detectMode(options: PathCommandOptions): OutputMode {
if (options.json === true) {
return "json";
}
if (options.human === true) {
return "human";
}
if (options.json === true) return "json";
if (options.human === true) return "human";
return process.stdout.isTTY ? "human" : "json";
}
@@ -127,24 +100,70 @@ function emitError(
runtime.error(`${code}: ${scrubbed}`);
}
/** Bail with usage error if a required arg is missing. */
function requireArg<T>(
value: T | undefined,
usage: string,
runtime: OutputRuntimeEnv,
mode: OutputMode,
): value is T extends undefined ? never : T {
if (value === undefined) {
emitError(runtime, mode, usage);
runtime.exit(2);
return false;
}
return true;
}
/** Parse an oc-path string; emit structured error and return null on failure. */
function tryParse(
pathStr: string,
runtime: OutputRuntimeEnv,
mode: OutputMode,
): OcPath | null {
try {
return parseOcPath(pathStr);
} catch (err) {
if (err instanceof OcPathError) {
emitError(runtime, mode, `parse failed: ${err.message}`, err.code);
runtime.exit(2);
return null;
}
throw err;
}
}
// Catch OcEmitSentinelError so it goes through the structured error
// path; otherwise commander prints `String(err)` raw and bypasses the
// `--json` scrubbed-error boundary.
function catchSentinel<T>(
label: string,
runtime: OutputRuntimeEnv,
mode: OutputMode,
fn: () => T,
): T | null {
try {
return fn();
} catch (err) {
if (err instanceof OcEmitSentinelError) {
emitError(runtime, mode, `${label} refused: ${err.message}`, "OC_EMIT_SENTINEL");
runtime.exit(1);
return null;
}
throw err;
}
}
async function loadAst(absPath: string, fileName: string): Promise<OcAst> {
const raw = await fs.readFile(absPath, "utf-8");
const kind = inferKind(fileName);
if (kind === "jsonc") {
return parseJsonc(raw).ast;
}
if (kind === "jsonl") {
return parseJsonl(raw).ast;
}
if (kind === "jsonc") return parseJsonc(raw).ast;
if (kind === "jsonl") return parseJsonl(raw).ast;
return parseMd(raw).ast;
}
function emitForKind(ast: OcAst, fileName?: string): string {
// Plumb fileName through so OcEmitSentinelError messages carry the
// file context (`oc://gateway.jsonc/[raw]`) instead of the
// empty-slot fallback (`oc:///[raw]`). Test S-12 in the wave-21
// sentinel suite asserts the OcPath context appears in the error;
// without this plumbing, CLI emits had it stripped.
// Plumb fileName so sentinel errors carry file context.
const opts = fileName !== undefined ? { fileNameForGuard: fileName } : {};
switch (ast.kind) {
case "jsonc":
@@ -154,62 +173,42 @@ function emitForKind(ast: OcAst, fileName?: string): string {
case "md":
return emitMd(ast, opts);
}
throw new Error(`unreachable: emitForKind kind`);
}
function resolveFsPath(path: OcPath, options: PathCommandOptions): string {
const cwd = options.cwd ?? process.cwd();
if (options.file !== undefined) {
return resolvePath(options.file);
}
return resolvePath(cwd, path.file);
if (options.file !== undefined) return resolvePath(options.file);
return resolvePath(options.cwd ?? process.cwd(), path.file);
}
function formatMatchHuman(match: OcMatch): string {
if (match.kind === "leaf") {
return `leaf @ L${match.line}: ${JSON.stringify(match.valueText)} (${match.leafType})`;
}
if (match.kind === "node") {
return `node @ L${match.line} [${match.descriptor}]`;
}
if (match.kind === "node") return `node @ L${match.line} [${match.descriptor}]`;
if (match.kind === "insertion-point") {
return `insertion-point @ L${match.line} [${match.container}]`;
}
return `root @ L${match.line}`;
}
// ---------- Commands -----------------------------------------------------
export async function pathResolveCommand(
pathStr: string | undefined,
options: PathCommandOptions,
runtime: OutputRuntimeEnv,
): Promise<void> {
const mode = detectMode(options);
if (pathStr === undefined) {
emitError(runtime, mode, "resolve: missing <oc-path> argument");
runtime.exit(2);
return;
}
let ocPath: OcPath;
try {
ocPath = parseOcPath(pathStr);
} catch (err) {
if (err instanceof OcPathError) {
emitError(runtime, mode, `parse failed: ${err.message}`, err.code);
runtime.exit(2);
return;
}
throw err;
}
const fsPath = resolveFsPath(ocPath, options);
const ast = await loadAst(fsPath, ocPath.file);
let match;
if (!requireArg(pathStr, "resolve: missing <oc-path> argument", runtime, mode)) return;
const ocPath = tryParse(pathStr, runtime, mode);
if (ocPath === null) return;
const ast = await loadAst(resolveFsPath(ocPath, options), ocPath.file);
let match: OcMatch | null;
try {
match = resolveOcPath(ast, ocPath);
} catch (err) {
if (err instanceof OcPathError) {
// resolveOcPath now throws on wildcard patterns (the pattern
// belongs in `find`, not `resolve`). Surface the structured code
// so the CLI message points the caller at the right verb.
// resolveOcPath throws on wildcard patterns — point at find.
emitError(runtime, mode, `resolve refused: ${err.message}`, err.code);
runtime.exit(2);
return;
@@ -221,7 +220,7 @@ export async function pathResolveCommand(
runtime.exit(1);
return;
}
emit(runtime, mode, { resolved: true, ocPath: pathStr, match }, () => formatMatchHuman(match));
emit(runtime, mode, { resolved: true, ocPath: pathStr, match }, () => formatMatchHuman(match!));
}
export async function pathSetCommand(
@@ -231,41 +230,15 @@ export async function pathSetCommand(
runtime: OutputRuntimeEnv,
): Promise<void> {
const mode = detectMode(options);
if (pathStr === undefined || value === undefined) {
emitError(runtime, mode, "set: requires <oc-path> <value>");
runtime.exit(2);
return;
}
let ocPath: OcPath;
try {
ocPath = parseOcPath(pathStr);
} catch (err) {
if (err instanceof OcPathError) {
emitError(runtime, mode, `parse failed: ${err.message}`, err.code);
runtime.exit(2);
return;
}
throw err;
}
if (!requireArg(pathStr, "set: requires <oc-path> <value>", runtime, mode)) return;
if (!requireArg(value, "set: requires <oc-path> <value>", runtime, mode)) return;
const ocPath = tryParse(pathStr, runtime, mode);
if (ocPath === null) return;
const fsPath = resolveFsPath(ocPath, options);
const ast = await loadAst(fsPath, ocPath.file);
// `setOcPath` invokes the per-kind editor which calls back into
// emit during rebuildRaw; the redaction-sentinel guard fires there
// and throws `OcEmitSentinelError` for sentinel-bearing values.
// Catch the throw here so it goes through the structured CLI error
// path instead of escaping to commander's runCommandWithRuntime
// (which would print raw String(err) and bypass --json scrubbing).
let result: SetResult;
try {
result = setOcPath(ast, ocPath, value);
} catch (err) {
if (err instanceof OcEmitSentinelError) {
emitError(runtime, mode, `set refused: ${err.message}`, "OC_EMIT_SENTINEL");
runtime.exit(1);
return;
}
throw err;
}
const result = catchSentinel("set", runtime, mode, () => setOcPath(ast, ocPath, value));
if (result === null) return;
if (!result.ok) {
const detail = "detail" in result ? result.detail : undefined;
emit(
@@ -277,25 +250,12 @@ export async function pathSetCommand(
runtime.exit(1);
return;
}
// `setOcPath` accepted the value into the AST, but the per-kind
// emit can still refuse to serialize it — most notably when the
// value contains the redaction sentinel (defense-in-depth: the
// substrate's emit guard fires there). The throw must NOT escape
// to commander's runCommandWithRuntime, which would print
// `String(err)` raw and bypass the CLI's JSON/human scrubbed-error
// boundary. Catch and route through `emitError` like every other
// refusal path.
let newBytes: string;
try {
newBytes = emitForKind(result.ast, ocPath.file);
} catch (err) {
if (err instanceof OcEmitSentinelError) {
emitError(runtime, mode, `emit refused: ${err.message}`, "OC_EMIT_SENTINEL");
runtime.exit(1);
return;
}
throw err;
}
// Per-kind emit can still refuse the sentinel even after set succeeds.
const newBytes = catchSentinel("emit", runtime, mode, () =>
emitForKind(result.ast, ocPath.file),
);
if (newBytes === null) return;
if (options.dryRun === true) {
emit(
runtime,
@@ -320,27 +280,10 @@ export async function pathFindCommand(
runtime: OutputRuntimeEnv,
): Promise<void> {
const mode = detectMode(options);
if (patternStr === undefined) {
emitError(runtime, mode, "find: missing <pattern> argument");
runtime.exit(2);
return;
}
let pattern: OcPath;
try {
pattern = parseOcPath(patternStr);
} catch (err) {
if (err instanceof OcPathError) {
emitError(runtime, mode, `parse failed: ${err.message}`, err.code);
runtime.exit(2);
return;
}
throw err;
}
// The CLI resolves `pattern.file` to a single literal filesystem path.
// Wildcards in the file slot (e.g. `oc://*.jsonc/...`) would silently
// ENOENT during `fs.readFile`. The substrate's `findOcPaths` walks
// *inside* an AST — multi-file globbing is out of scope for v0. Surface
// a clear error so users don't get a confusing missing-file failure.
if (!requireArg(patternStr, "find: missing <pattern> argument", runtime, mode)) return;
const pattern = tryParse(patternStr, runtime, mode);
if (pattern === null) return;
// File-slot wildcards would silently ENOENT during readFile; reject.
if (/[*?]/.test(pattern.file)) {
emitError(
runtime,
@@ -352,8 +295,7 @@ export async function pathFindCommand(
runtime.exit(2);
return;
}
const fsPath = resolveFsPath(pattern, options);
const ast = await loadAst(fsPath, pattern.file);
const ast = await loadAst(resolveFsPath(pattern, options), pattern.file);
const matches = findOcPaths(ast, pattern);
emit(
runtime,
@@ -361,15 +303,10 @@ export async function pathFindCommand(
{
pattern: patternStr,
count: matches.length,
matches: matches.map((m) => ({
path: formatOcPath(m.path),
match: m.match,
})),
matches: matches.map((m) => ({ path: formatOcPath(m.path), match: m.match })),
},
() => {
if (matches.length === 0) {
return `0 matches for ${patternStr}`;
}
if (matches.length === 0) return `0 matches for ${patternStr}`;
const plural = matches.length === 1 ? "" : "es";
const lines = [`${matches.length} match${plural} for ${patternStr}:`];
for (const m of matches) {
@@ -378,9 +315,7 @@ export async function pathFindCommand(
return lines.join("\n");
},
);
if (matches.length === 0) {
runtime.exit(1);
}
if (matches.length === 0) runtime.exit(1);
}
export function pathValidateCommand(
@@ -389,11 +324,7 @@ export function pathValidateCommand(
runtime: OutputRuntimeEnv,
): void {
const mode = detectMode(options);
if (pathStr === undefined) {
emitError(runtime, mode, "validate: missing <oc-path> argument");
runtime.exit(2);
return;
}
if (!requireArg(pathStr, "validate: missing <oc-path> argument", runtime, mode)) return;
try {
const ocPath = parseOcPath(pathStr);
emit(
@@ -413,22 +344,13 @@ export function pathValidateCommand(
},
() => {
const lines = [`valid: ${pathStr}`, ` file: ${ocPath.file}`];
if (ocPath.section !== undefined) {
lines.push(` section: ${ocPath.section}`);
}
if (ocPath.item !== undefined) {
lines.push(` item: ${ocPath.item}`);
}
if (ocPath.field !== undefined) {
lines.push(` field: ${ocPath.field}`);
}
if (ocPath.session !== undefined) {
lines.push(` session: ${ocPath.session}`);
}
if (ocPath.section !== undefined) lines.push(` section: ${ocPath.section}`);
if (ocPath.item !== undefined) lines.push(` item: ${ocPath.item}`);
if (ocPath.field !== undefined) lines.push(` field: ${ocPath.field}`);
if (ocPath.session !== undefined) lines.push(` session: ${ocPath.session}`);
return lines.join("\n");
},
);
return;
} catch (err) {
if (err instanceof OcPathError) {
emit(
@@ -450,34 +372,15 @@ export async function pathEmitCommand(
runtime: OutputRuntimeEnv,
): Promise<void> {
const mode = detectMode(options);
if (fileArg === undefined) {
emitError(runtime, mode, "emit: missing <file> argument");
runtime.exit(2);
return;
}
// Resolve the file slot through the same `--cwd`/`--file` rules the
// sibling subcommands use: `--file` (when set) is the absolute path
// override; otherwise resolve `fileArg` against `--cwd` (defaulting
// to `process.cwd()`). Without this, the flags are accepted by
// commander but ignored by the handler — exactly the bug-shape
// ClawSweeper flagged for the doc/option mismatch.
if (!requireArg(fileArg, "emit: missing <file> argument", runtime, mode)) return;
const fsPath =
options.file !== undefined
? resolvePath(options.file)
: resolvePath(options.cwd ?? process.cwd(), fileArg);
const fileName = fsPath.split(/[\\/]/).pop() ?? fileArg;
const ast = await loadAst(fsPath, fileName);
let bytes: string;
try {
bytes = emitForKind(ast, fileName);
} catch (err) {
if (err instanceof OcEmitSentinelError) {
emitError(runtime, mode, `emit refused: ${err.message}`, "OC_EMIT_SENTINEL");
runtime.exit(1);
return;
}
throw err;
}
const bytes = catchSentinel("emit", runtime, mode, () => emitForKind(ast, fileName));
if (bytes === null) return;
if (mode === "json") {
runtime.writeStdout(scrubSentinel(JSON.stringify({ ok: true, kind: ast.kind, bytes })));
return;
@@ -485,22 +388,14 @@ export async function pathEmitCommand(
runtime.writeStdout(bytes);
}
interface RawPathOptions {
json?: boolean;
human?: boolean;
cwd?: string;
file?: string;
dryRun?: boolean;
}
// ---------- Commander wiring ---------------------------------------------
function normalize(opts: RawPathOptions): PathCommandOptions {
return {
json: opts.json,
human: opts.human,
cwd: opts.cwd,
file: opts.file,
dryRun: opts.dryRun,
};
function withCommonOpts(cmd: Command): Command {
return cmd
.option("--json", "Force JSON output")
.option("--human", "Force human output")
.option("--cwd <dir>", "Resolve file slot against this directory")
.option("--file <file>", "Override the file slot's resolved path");
}
export function registerPathCli(program: Command): void {
@@ -509,43 +404,34 @@ export function registerPathCli(program: Command): void {
.description("Inspect and edit workspace files via the oc:// addressing scheme")
.addHelpText("after", "\nDocs: https://docs.openclaw.ai/cli/path\n");
path
.command("resolve")
.description("Print the match at an oc:// path")
.argument("<oc-path>", "oc:// path to resolve")
.option("--json", "Force JSON output")
.option("--human", "Force human output")
.option("--cwd <dir>", "Resolve file slot against this directory")
.option("--file <file>", "Override the file slot's resolved path")
.action(async (pathStr: string, opts: RawPathOptions) => {
await pathResolveCommand(pathStr, normalize(opts), defaultRuntime);
});
withCommonOpts(
path
.command("resolve")
.description("Print the match at an oc:// path")
.argument("<oc-path>", "oc:// path to resolve"),
).action(async (pathStr: string, opts: PathCommandOptions) => {
await pathResolveCommand(pathStr, opts, defaultRuntime);
});
path
.command("find")
.description("Enumerate matches for a wildcard / predicate oc:// pattern")
.argument("<pattern>", "oc:// pattern")
.option("--json", "Force JSON output")
.option("--human", "Force human output")
.option("--cwd <dir>", "Resolve file slot against this directory")
.option("--file <file>", "Override the file slot's resolved path")
.action(async (patternStr: string, opts: RawPathOptions) => {
await pathFindCommand(patternStr, normalize(opts), defaultRuntime);
});
withCommonOpts(
path
.command("find")
.description("Enumerate matches for a wildcard / predicate oc:// pattern")
.argument("<pattern>", "oc:// pattern"),
).action(async (patternStr: string, opts: PathCommandOptions) => {
await pathFindCommand(patternStr, opts, defaultRuntime);
});
path
.command("set")
.description("Write a leaf value at an oc:// path")
.argument("<oc-path>", "oc:// path to write")
.argument("<value>", "string value to write")
.option("--dry-run", "Print bytes without writing")
.option("--json", "Force JSON output")
.option("--human", "Force human output")
.option("--cwd <dir>", "Resolve file slot against this directory")
.option("--file <file>", "Override the file slot's resolved path")
.action(async (pathStr: string, value: string, opts: RawPathOptions) => {
await pathSetCommand(pathStr, value, normalize(opts), defaultRuntime);
});
withCommonOpts(
path
.command("set")
.description("Write a leaf value at an oc:// path")
.argument("<oc-path>", "oc:// path to write")
.argument("<value>", "string value to write")
.option("--dry-run", "Print bytes without writing"),
).action(async (pathStr: string, value: string, opts: PathCommandOptions) => {
await pathSetCommand(pathStr, value, opts, defaultRuntime);
});
path
.command("validate")
@@ -553,19 +439,16 @@ export function registerPathCli(program: Command): void {
.argument("<oc-path>", "oc:// path to validate")
.option("--json", "Force JSON output")
.option("--human", "Force human output")
.action((pathStr: string, opts: RawPathOptions) => {
pathValidateCommand(pathStr, normalize(opts), defaultRuntime);
.action((pathStr: string, opts: PathCommandOptions) => {
pathValidateCommand(pathStr, opts, defaultRuntime);
});
path
.command("emit")
.description("Round-trip a file through parse + emit")
.argument("<file>", "Path to a workspace file")
.option("--cwd <dir>", "Resolve <file> against this directory")
.option("--file <file>", "Override the file's resolved path")
.option("--json", "Force JSON output")
.option("--human", "Force human output")
.action(async (fileArg: string, opts: RawPathOptions) => {
await pathEmitCommand(fileArg, normalize(opts), defaultRuntime);
});
withCommonOpts(
path
.command("emit")
.description("Round-trip a file through parse + emit")
.argument("<file>", "Path to a workspace file"),
).action(async (fileArg: string, opts: PathCommandOptions) => {
await pathEmitCommand(fileArg, opts, defaultRuntime);
});
}

View File

@@ -1,28 +1,14 @@
/**
* Workspace-Markdown AST — generic addressing index over the 8 workspace
* files openclaw treats as opaque text in `loadWorkspaceBootstrapFiles`.
* Markdown AST — addressing index for workspace files.
*
* **The AST is purely an addressing index.** It does NOT encode opinions
* about what a "valid" SOUL.md / AGENTS.md / MEMORY.md looks like; it
* exposes the markdown features (frontmatter, sections, items, tables,
* code blocks) that any `OcPath` (`{ file, section?, item?, field? }`) can
* resolve over. Per-file lint opinions ride in @openclaw/oc-lint, not
* here.
*
* **Byte-fidelity contract**: `emitMd(parse(raw)) === raw` for every input
* the parser accepts. The parser preserves the original bytes on the
* root node (`raw`) so emitters can round-trip even content the AST
* doesn't structurally model (foreign content, idiosyncratic whitespace).
* Pure addressing structure; no per-file opinions (those live in lint
* rules). Byte-fidelity: `emitMd(parse(raw)) === raw`; `raw` on the
* root preserves the original bytes for round-trip.
*
* @module @openclaw/oc-path/ast
*/
/**
* Diagnostic emitted by the parser. Used by lint rules and parse-error
* surfacing alike. Severity is `info` by default; the parser emits
* `warning` for suspicious-but-recoverable inputs (e.g., unclosed
* frontmatter fence) and never throws.
*/
/** Parser diagnostic. Severity `warning` for recoverable input; never throws. */
export interface Diagnostic {
readonly line: number;
readonly message: string;
@@ -30,10 +16,7 @@ export interface Diagnostic {
readonly code?: string;
}
/**
* A frontmatter key/value pair. Keys are preserved as written; values
* are unquoted (surrounding `"` or `'` stripped) but otherwise verbatim.
*/
/** Frontmatter entry. Values unquoted (`"`/`'` stripped) but otherwise verbatim. */
export interface FrontmatterEntry {
readonly key: string;
readonly value: string;
@@ -41,14 +24,8 @@ export interface FrontmatterEntry {
}
/**
* A bullet-list item inside a section. Items are addressable via OcPath
* `{ file, section, item }` where `item` is the slug of the bullet's
* text (or the slug of `kv.key` when the bullet is in `- key: value`
* shape).
*
* `kv` is populated when the bullet matches `- <key>: <value>` (the
* common pattern in AGENTS.md / TOOLS.md / USER.md). Lint rules use it
* for field-level addressing via `OcPath.field`.
* Bullet item. `slug` is the addressing key (kv.key when present, else
* item text). `kv` is populated for `- key: value` bullets.
*/
export interface AstItem {
readonly text: string;
@@ -58,16 +35,11 @@ export interface AstItem {
}
/**
* An H2-delimited block. The `slug` is the kebab-case lowercase form of
* `heading` and is what OcPath `section` matches against. `bodyText` is
* the prose between this heading and the next H2 (or end of file),
* verbatim. `items` are extracted from `bodyText` for addressing
* convenience but the raw text is preserved.
* H2-delimited block. `bodyText` is the verbatim prose between this
* heading and the next; `items` are extracted for addressing.
*
* Tables and fenced code blocks are NOT modeled as first-class AST
* children — addressing into them is out of scope for the substrate.
* Lint rules that need table rows or code-block contents re-tokenize
* the block's `bodyText` on demand.
* Tables and code blocks aren't first-class — addressing into them is
* out of scope. Lint rules re-tokenize `bodyText` if needed.
*/
export interface AstBlock {
readonly heading: string;
@@ -77,20 +49,7 @@ export interface AstBlock {
readonly items: readonly AstItem[];
}
/**
* The root AST node. Always carries `raw` for byte-identical round-trip.
* `frontmatter` is empty when the file has none. `preamble` is the
* prose before the first H2 (may be empty). `blocks` is the H2 tree in
* document order.
*
* `kind: 'md'` discriminator matches the jsonc / jsonl AST shapes;
* the universal `setOcPath` / `resolveOcPath` verbs dispatch
* via this tag at runtime so callers don't have to thread kind
* through the call site.
*
* The generic shape is the same for all 9 workspace files; opinions
* (`AGENTS_TOOLS_SECTION_EMPTY`, etc.) ride in lint rules, not here.
*/
/** Root AST. `raw` carries the original bytes for byte-identical round-trip. */
export interface MdAst {
readonly kind: "md";
readonly raw: string;
@@ -99,9 +58,6 @@ export interface MdAst {
readonly blocks: readonly AstBlock[];
}
/**
* Parser output: the AST plus any diagnostics from the parse pass.
*/
export interface ParseResult {
readonly ast: MdAst;
readonly diagnostics: readonly Diagnostic[];

View File

@@ -1,15 +1,10 @@
/**
* Mutate a `MdAst` at an OcPath. Returns a new AST with the
* value replaced; the original is unchanged.
* Mutate `MdAst` at an OcPath. Returns a new AST; original unchanged.
*
* Writable surface:
* oc://FILE/[frontmatter]/key → frontmatter value
* oc://FILE/section/item/field → item.kv.value
*
* oc://FILE/[frontmatter]/key → frontmatter entry value
* oc://FILE/section/item/field → item.kv.value (when item has kv shape)
*
* Section bodies, tables, and code blocks are NOT writable through
* this primitive — they're prose, and a generic "set" doesn't compose
* cleanly. Doctor fixers handle structural edits via dedicated verbs.
* Section bodies aren't writable through this primitive.
*
* @module @openclaw/oc-path/edit
*/
@@ -25,74 +20,44 @@ export type MdEditResult =
readonly reason: "unresolved" | "not-writable" | "no-item-kv";
};
/**
* Replace the value at `path` with `newValue`. The new AST has fresh
* `raw` re-rendered from the structural fields.
*
* Sentinel guard at the substrate boundary — `setJsoncOcPath` and the
* jsonl `finalize`-via-render path both reject sentinel-bearing values
* before they reach the AST. The md path was deferring entirely to
* round-trip echo through `emitMd`, which `acceptPreExistingSentinel`
* by default skips. Closing the gap here keeps F9 (formatter sentinel
* guard) symmetric across all three kinds.
*/
// Sentinel guard at the boundary keeps md symmetric with jsonc/jsonl,
// which both reject sentinel values before they reach the AST.
export function setMdOcPath(ast: MdAst, path: OcPath, newValue: string): MdEditResult {
guardSentinel(newValue, formatOcPath(path));
// Frontmatter address: oc://FILE/[frontmatter]/<key>
if (path.section === "[frontmatter]") {
const key = path.item ?? path.field;
if (key === undefined) {
return { ok: false, reason: "unresolved" };
}
if (key === undefined) return { ok: false, reason: "unresolved" };
const idx = ast.frontmatter.findIndex((e) => e.key === key);
if (idx === -1) {
return { ok: false, reason: "unresolved" };
}
if (idx === -1) return { ok: false, reason: "unresolved" };
const existing = ast.frontmatter[idx];
if (existing === undefined) {
return { ok: false, reason: "unresolved" };
}
if (existing === undefined) return { ok: false, reason: "unresolved" };
const newEntry: FrontmatterEntry = { ...existing, value: newValue };
const newFm = ast.frontmatter.slice();
newFm[idx] = newEntry;
return finalize({ ...ast, frontmatter: newFm });
}
// Item-field address: oc://FILE/section/item/field
if (path.section === undefined || path.item === undefined || path.field === undefined) {
return { ok: false, reason: "not-writable" };
}
const sectionSlug = path.section.toLowerCase();
const blockIdx = ast.blocks.findIndex((b) => b.slug === sectionSlug);
if (blockIdx === -1) {
return { ok: false, reason: "unresolved" };
}
if (blockIdx === -1) return { ok: false, reason: "unresolved" };
const block = ast.blocks[blockIdx];
if (block === undefined) {
return { ok: false, reason: "unresolved" };
}
if (block === undefined) return { ok: false, reason: "unresolved" };
const itemSlug = path.item.toLowerCase();
const itemIdx = block.items.findIndex((i) => i.slug === itemSlug);
if (itemIdx === -1) {
return { ok: false, reason: "unresolved" };
}
if (itemIdx === -1) return { ok: false, reason: "unresolved" };
const item = block.items[itemIdx];
if (item === undefined) {
return { ok: false, reason: "unresolved" };
}
if (item.kv === undefined) {
return { ok: false, reason: "no-item-kv" };
}
if (item === undefined) return { ok: false, reason: "unresolved" };
if (item.kv === undefined) return { ok: false, reason: "no-item-kv" };
if (item.kv.key.toLowerCase() !== path.field.toLowerCase()) {
return { ok: false, reason: "unresolved" };
}
const newItem: AstItem = {
...item,
kv: { key: item.kv.key, value: newValue },
};
const newItem: AstItem = { ...item, kv: { key: item.kv.key, value: newValue } };
const newItems = block.items.slice();
newItems[itemIdx] = newItem;
const newBlock: AstBlock = {
@@ -105,28 +70,17 @@ export function setMdOcPath(ast: MdAst, path: OcPath, newValue: string): MdEditR
return finalize({ ...ast, blocks: newBlocks });
}
/**
* Rebuild block.bodyText so emit-roundtrip mode reflects the edit. We
* do a minimal in-place substitution on the existing bodyText: find
* each `- key: value` line for a touched item and rewrite the value.
*
* For items without a matching bullet line, we leave bodyText alone
* (the structural fields take precedence in render mode anyway).
*/
// In-place substitution on `bodyText` so round-trip emit reflects the
// edit. Items without a matching bullet line are skipped (render mode
// uses structural fields anyway).
function rebuildBlockBody(block: AstBlock, newItems: readonly AstItem[]): string {
let body = block.bodyText;
for (let i = 0; i < newItems.length; i++) {
const newItem = newItems[i];
const oldItem = block.items[i];
if (newItem === undefined || oldItem === undefined) {
continue;
}
if (newItem.kv === undefined || oldItem.kv === undefined) {
continue;
}
if (newItem.kv.value === oldItem.kv.value) {
continue;
}
if (newItem === undefined || oldItem === undefined) continue;
if (newItem.kv === undefined || oldItem.kv === undefined) continue;
if (newItem.kv.value === oldItem.kv.value) continue;
const re = new RegExp(`^(\\s*-\\s*${escapeRegex(oldItem.kv.key)}\\s*:\\s*).*$`, "m");
body = body.replace(re, `$1${newItem.kv.value}`);
}
@@ -137,10 +91,6 @@ function escapeRegex(s: string): string {
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
/**
* Re-render `ast.raw` from the (possibly mutated) tree using the same
* shape the round-trip emitter expects.
*/
function finalize(ast: MdAst): MdEditResult {
const parts: string[] = [];
if (ast.frontmatter.length > 0) {
@@ -151,30 +101,19 @@ function finalize(ast: MdAst): MdEditResult {
parts.push("---");
}
if (ast.preamble.length > 0) {
if (parts.length > 0) {
parts.push("");
}
if (parts.length > 0) parts.push("");
parts.push(ast.preamble);
}
for (const block of ast.blocks) {
if (parts.length > 0) {
parts.push("");
}
if (parts.length > 0) parts.push("");
parts.push(`## ${block.heading}`);
if (block.bodyText.length > 0) {
parts.push(block.bodyText);
}
if (block.bodyText.length > 0) parts.push(block.bodyText);
}
const raw = parts.join("\n");
return { ok: true, ast: { ...ast, raw } };
return { ok: true, ast: { ...ast, raw: parts.join("\n") } };
}
function formatFrontmatterValue(value: string): string {
if (value.length === 0) {
return '""';
}
if (/[:#&*?|<>=!%@`,[\]{}\r\n]/.test(value)) {
return JSON.stringify(value);
}
if (value.length === 0) return '""';
if (/[:#&*?|<>=!%@`,[\]{}\r\n]/.test(value)) return JSON.stringify(value);
return value;
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,28 +1,11 @@
/**
* Emit a `JsoncAst` to bytes.
*
* **Round-trip mode (default)** returns `ast.raw` verbatim — this
* preserves comments, formatting, and trailing whitespace exactly.
*
* **Sentinel-guard policy**:
*
* - Round-trip echoes `ast.raw` *without* scanning for the redaction
* sentinel. Bytes that came in via `parseJsonc` are trusted: a
* workspace file legitimately containing the literal
* `__OPENCLAW_REDACTED__` (in a code-block comment, in a pasted
* error log, etc.) would otherwise become a workspace-wide emit
* DoS — every `openclaw path emit FILE.jsonc` would exit non-zero,
* breaking lint round-trip rules, doctor fixers, and LKG
* fingerprinting. The substrate's contract is "no NEW sentinel
* bytes introduced via emit", not "no sentinel byte ever leaves".
* - Render mode walks every leaf and rejects sentinel-bearing leaf
* values (caller-injected sentinel via `setOcPath` lands here:
* `setJsoncOcPath` rebuilds raw via render-mode, so a leaf set to
* the sentinel by the caller is caught at the rebuild boundary
* before the raw is shipped back).
*
* Callers that want pre-existing sentinel detection (e.g., LKG
* fingerprint verification) can opt in via
* Round-trip (default) echoes `ast.raw` verbatim — preserves comments
* and formatting. Sentinel guard fires only in render mode by default;
* round-trip trusts parsed bytes so a workspace file legitimately
* containing the sentinel literal isn't a global emit DoS. Callers
* that need pre-existing detection opt in via
* `acceptPreExistingSentinel: false`.
*
* @module @openclaw/oc-path/jsonc/emit
@@ -34,12 +17,6 @@ import type { JsoncAst, JsoncValue } from "./ast.js";
export interface JsoncEmitOptions {
readonly mode?: "roundtrip" | "render";
readonly fileNameForGuard?: string;
/**
* When `false`, round-trip mode also scans `ast.raw` for the
* redaction sentinel and throws `OcEmitSentinelError` if found.
* Default `true` — round-trip trusts parsed bytes (see policy
* comment above). Render mode always scans leaves regardless.
*/
readonly acceptPreExistingSentinel?: boolean;
}
@@ -55,12 +32,8 @@ export function emitJsonc(ast: JsoncAst, opts: JsoncEmitOptions = {}): string {
return ast.raw;
}
// Render mode — synthesize JSON from the structural tree (loses
// comments). Walk every leaf string for sentinel detection so a
// caller-injected sentinel via setOcPath is rejected.
if (ast.root === null) {
return "";
}
// Render mode loses comments; walks leaves for caller-injected sentinel.
if (ast.root === null) return "";
return renderValue(ast.root, guardPath, []);
}
@@ -76,17 +49,12 @@ function renderValue(value: JsoncValue, guardPath: string, walked: readonly stri
const parts = value.items.map((v, i) => renderValue(v, guardPath, [...walked, String(i)]));
return `[ ${parts.join(", ")} ]`;
}
case "string": {
// Reject ANY string that contains the sentinel — embedded
// (`prefix__OPENCLAW_REDACTED__suffix`) is just as much of a
// "literal redacted token landed on disk" leak as exact-match.
// The roundtrip path uses `raw.includes()` for the same reason;
// render needs the same predicate per leaf.
case "string":
// Substring match: embedded sentinel leaks marker bytes too.
if (value.value.includes(REDACTED_SENTINEL)) {
throw new OcEmitSentinelError(`${guardPath}/${walked.join("/")}`);
}
return JSON.stringify(value.value);
}
case "number":
return String(value.value);
case "boolean":
@@ -94,5 +62,4 @@ function renderValue(value: JsoncValue, guardPath: string, walked: readonly stri
case "null":
return "null";
}
throw new Error(`unreachable: jsonc renderValue kind`);
}

View File

@@ -1,18 +1,7 @@
/**
* Resolve an `OcPath` against a `JsoncAst`.
*
* The OcPath model has 4 segments (file, section, item, field) — for
* JSONC artifacts that's not enough depth, so segments concat with `/`
* AND a section/item/field MAY contain dots (`.`) for deeper traversal.
* Both forms work:
*
* oc://config/plugins/entries/foo (segment-per-key)
* oc://config/plugins.entries.foo (dotted section)
* oc://config/plugins/entries.foo (mixed)
*
* Each segment is split on `.`, and the resulting flat list of keys
* walks the value tree from `ast.root`. Numeric segments index into
* arrays.
* Resolve `OcPath` against `JsoncAst`. Slot segments concat as if
* dotted; segments are bracket/quote-aware-split so quoted keys
* containing `/` or `.` round-trip cleanly.
*
* @module @openclaw/oc-path/jsonc/resolve
*/
@@ -36,82 +25,52 @@ export type JsoncOcPathMatch =
readonly path: readonly string[];
};
/**
* Walk the JSONC tree following the OcPath. Returns the matched node
* or `null`. Numeric path segments index into arrays.
*/
export function resolveJsoncOcPath(ast: JsoncAst, path: OcPath): JsoncOcPathMatch | null {
if (ast.root === null) {
return null;
}
if (ast.root === null) return null;
// Bracket-aware split + unquote: `"foo/bar".baz` becomes
// [`foo/bar`, `baz`] (literal slash preserved in the first sub).
const segments: string[] = [];
if (path.section !== undefined) {
for (const s of splitRespectingBrackets(path.section, ".")) {
const collect = (slot: string | undefined): void => {
if (slot === undefined) return;
for (const s of splitRespectingBrackets(slot, ".")) {
segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s);
}
}
if (path.item !== undefined) {
for (const s of splitRespectingBrackets(path.item, ".")) {
segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s);
}
}
if (path.field !== undefined) {
for (const s of splitRespectingBrackets(path.field, ".")) {
segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s);
}
}
};
collect(path.section);
collect(path.item);
collect(path.field);
if (segments.length === 0) {
return { kind: "root", node: ast };
}
if (segments.length === 0) return { kind: "root", node: ast };
let current: JsoncValue = ast.root;
let lastEntry: JsoncEntry | null = null;
const walked: string[] = [];
for (let seg of segments) {
if (seg.length === 0) {
return null;
}
// Positional resolution: `$first` / `$last` always; `-N` only on
// indexable (array) containers. On a keyed (object) container, a
// `-N` segment falls through to literal-key lookup so paths like
// `groups.-5028303500.requireMention` (Telegram supergroup IDs —
// openclaw#59934) address the literal key instead of crashing.
if (seg.length === 0) return null;
// `-N` on an indexable container is positional; on a keyed
// container it falls through to literal-key lookup (e.g. Telegram
// supergroup IDs — openclaw#59934).
if (isPositionalSeg(seg)) {
const concrete = positionalForJsonc(current, seg);
if (concrete !== null) {
seg = concrete;
}
// null means "not applicable" — fall through to literal lookup.
if (concrete !== null) seg = concrete;
}
walked.push(seg);
if (current.kind === "object") {
const entry = current.entries.find((e) => e.key === seg);
if (entry === undefined) {
return null;
}
if (entry === undefined) return null;
lastEntry = entry;
current = entry.value;
continue;
}
if (current.kind === "array") {
const idx = Number(seg);
if (!Number.isInteger(idx) || idx < 0 || idx >= current.items.length) {
return null;
}
if (!Number.isInteger(idx) || idx < 0 || idx >= current.items.length) return null;
lastEntry = null;
const item = current.items[idx];
if (item === undefined) {
return null;
}
if (item === undefined) return null;
current = item;
continue;
}
// Primitive — can't descend further.
return null;
}

View File

@@ -1,16 +1,6 @@
/**
* Mutate a `JsonlAst` at an OcPath. Returns a new AST with the line
* (or sub-field of a line) replaced.
*
* Edit shapes:
*
* oc://session-events/L42 → replace line 42's whole value
* oc://session-events/L42/field → replace field on line 42
* oc://session-events/L42/field.sub → dotted descent
* oc://session-events/$last/... → resolves to most recent value
*
* Append (no existing line) is NOT a `set` — use `appendJsonlLine` for
* that. `setJsonlOcPath` only edits existing addresses.
* Mutate a `JsonlAst` at an OcPath. Append uses `appendJsonlOcPath`;
* `setJsonlOcPath` only edits existing addresses.
*
* @module @openclaw/oc-path/jsonl/edit
*/
@@ -46,8 +36,7 @@ export function setJsonlOcPath(ast: JsonlAst, path: OcPath, newValue: JsoncValue
return { ok: false, reason: "unresolved" };
}
// No item/field — replace the whole line value. Requires the line to
// already be a value line (we don't synthesize lines from blanks).
// No item/field — replace the whole line. Requires an existing value line.
if (path.item === undefined && path.field === undefined) {
if (target.kind !== "value") {
return { ok: false, reason: "not-a-value-line" };
@@ -65,10 +54,7 @@ export function setJsonlOcPath(ast: JsonlAst, path: OcPath, newValue: JsoncValue
return { ok: false, reason: "not-a-value-line" };
}
// Bracket/brace/quote-aware split — preserves quoted segments
// verbatim so the edit path matches `resolveJsonlOcPath`'s
// unquoting behavior. Plain `.split('.')` would shred a quoted key
// and silently desync read-vs-write.
// Quote-aware split keeps edit symmetric with resolveJsonlOcPath.
const segments: string[] = [];
if (path.item !== undefined) {
segments.push(...splitRespectingBrackets(path.item, "."));
@@ -97,45 +83,29 @@ function replaceAt(
newValue: JsoncValue,
): JsoncValue | null {
const seg = segments[i];
if (seg === undefined) {
return newValue;
}
if (seg.length === 0) {
return null;
}
if (seg === undefined) return newValue;
if (seg.length === 0) return null;
if (current.kind === "object") {
// Resolve positional tokens ($first / $last) against the entries'
// ordered key list before any literal-key comparison. Keeps the
// jsonl edit path symmetric with resolveJsonlOcPath, which already
// honors positional tokens during read.
let segNorm: string = seg;
// Positional tokens resolve against the entries' ordered key list;
// quoted segments are unquoted before literal-key comparison.
let segNorm = seg;
if (isPositionalSeg(seg)) {
const resolved = resolvePositionalSeg(seg, {
indexable: false,
size: current.entries.length,
keys: current.entries.map((e) => e.key),
});
if (resolved === null) {
return null;
}
if (resolved === null) return null;
segNorm = resolved;
}
// Quoted segments carry the raw bytes verbatim; AST entry keys
// are unquoted. Strip the surrounding quotes before comparing.
const lookupKey = isQuotedSeg(segNorm) ? unquoteSeg(segNorm) : segNorm;
const idx = current.entries.findIndex((e) => e.key === lookupKey);
if (idx === -1) {
return null;
}
if (idx === -1) return null;
const child = current.entries[idx];
if (child === undefined) {
return null;
}
if (child === undefined) return null;
const replacedChild = replaceAt(child.value, segments, i + 1, newValue);
if (replacedChild === null) {
return null;
}
if (replacedChild === null) return null;
const newEntry: JsoncEntry = { ...child, value: replacedChild };
const newEntries = current.entries.slice();
newEntries[idx] = newEntry;
@@ -147,32 +117,21 @@ function replaceAt(
}
if (current.kind === "array") {
// Resolve positional tokens ($first / $last / -N) against the
// array's size before the numeric coercion below; without this
// `Number('$last')` is NaN and the path silently unresolves.
let segNorm: string = seg;
let segNorm = seg;
if (isPositionalSeg(seg)) {
const resolved = resolvePositionalSeg(seg, {
indexable: true,
size: current.items.length,
});
if (resolved === null) {
return null;
}
if (resolved === null) return null;
segNorm = resolved;
}
const idx = Number(segNorm);
if (!Number.isInteger(idx) || idx < 0 || idx >= current.items.length) {
return null;
}
if (!Number.isInteger(idx) || idx < 0 || idx >= current.items.length) return null;
const child = current.items[idx];
if (child === undefined) {
return null;
}
if (child === undefined) return null;
const replacedChild = replaceAt(child, segments, i + 1, newValue);
if (replacedChild === null) {
return null;
}
if (replacedChild === null) return null;
const newItems = current.items.slice();
newItems[idx] = replacedChild;
return {
@@ -185,48 +144,31 @@ function replaceAt(
return null;
}
// Mirrors the line-address grammar in resolveJsonlOcPath / find.ts.
// `-N` walks value lines only so blank/malformed lines don't shift.
function pickLineIndex(ast: JsonlAst, addr: string): number {
// Mirrors the line-address grammar handled by resolveJsonlOcPath's
// pickLine and find.ts's pickLine — the four shapes a JSONL line can
// be addressed by. Without `$first` and `-N` here, a path that
// resolves cleanly under those tokens would silently unresolve on
// the edit path (resolve↔write asymmetry).
if (addr === "$last") {
for (let i = ast.lines.length - 1; i >= 0; i--) {
const l = ast.lines[i];
if (l !== undefined && l.kind === "value") {
return i;
}
const valueIndices = (): number[] => {
const out: number[] = [];
for (let i = 0; i < ast.lines.length; i++) {
if (ast.lines[i]?.kind === "value") out.push(i);
}
return -1;
return out;
};
if (addr === "$last") {
const v = valueIndices();
return v[v.length - 1] ?? -1;
}
if (addr === "$first") {
for (let i = 0; i < ast.lines.length; i++) {
const l = ast.lines[i];
if (l !== undefined && l.kind === "value") {
return i;
}
}
return -1;
const v = valueIndices();
return v[0] ?? -1;
}
if (/^-\d+$/.test(addr)) {
// -N selects the Nth-from-last value line. Walk only value lines
// so blank/malformed lines don't shift the count (consistent with
// resolve.ts's pickLine).
const valueIndices: number[] = [];
for (let i = 0; i < ast.lines.length; i++) {
const l = ast.lines[i];
if (l !== undefined && l.kind === "value") {
valueIndices.push(i);
}
}
const n = valueIndices.length + Number(addr);
return n >= 0 && n < valueIndices.length ? (valueIndices[n] ?? -1) : -1;
const v = valueIndices();
const n = v.length + Number(addr);
return n >= 0 && n < v.length ? (v[n] ?? -1) : -1;
}
const m = /^L(\d+)$/.exec(addr);
if (m === null || m[1] === undefined) {
return -1;
}
if (m === null || m[1] === undefined) return -1;
const target = Number(m[1]);
return ast.lines.findIndex((l) => l.line === target);
}
@@ -253,12 +195,7 @@ function finalize(
return { ok: true, ast: { ...next, raw: rendered } };
}
/**
* Append a new value as the next line. Useful for session checkpointing
* (each event is a new line). Returns a new AST. The `path` parameter
* is accepted for OcPath-naming consistency but jsonl append addresses
* the file as a whole (line numbers are assigned by the substrate).
*/
/** Append a value as the next line. Line numbers are substrate-assigned. */
export function appendJsonlOcPath(ast: JsonlAst, value: JsoncValue): JsonlAst {
const nextLineNo = ast.lines.length === 0 ? 1 : (ast.lines[ast.lines.length - 1]?.line ?? 0) + 1;
const newLine: JsonlLine = {

View File

@@ -1,15 +1,6 @@
/**
* Emit a `JsonlAst` to bytes.
*
* **Round-trip mode (default)** returns `ast.raw` verbatim — preserves
* malformed lines, blanks, trailing-newline shape exactly.
*
* **Render mode** rebuilds the file from line entries (re-stringifies
* value lines via JSON.stringify; preserves blank/malformed lines
* verbatim). Useful for synthetic ASTs.
*
* **Sentinel guard**: scans every emitted byte sequence for the
* `__OPENCLAW_REDACTED__` literal.
* Emit a `JsonlAst` to bytes. Round-trip echoes `ast.raw`; render mode
* rebuilds from line entries (preserves blank/malformed lines verbatim).
*
* @module @openclaw/oc-path/jsonl/emit
*/
@@ -21,11 +12,6 @@ import type { JsonlAst } from "./ast.js";
export interface JsonlEmitOptions {
readonly mode?: "roundtrip" | "render";
readonly fileNameForGuard?: string;
/**
* See `JsoncEmitOptions.acceptPreExistingSentinel` for the rationale.
* Default `true` — round-trip echoes parsed bytes without scanning
* for the sentinel. Render mode scans value-line leaves regardless.
*/
readonly acceptPreExistingSentinel?: boolean;
}
@@ -44,25 +30,17 @@ export function emitJsonl(ast: JsonlAst, opts: JsonlEmitOptions = {}): string {
const out: string[] = [];
for (const ln of ast.lines) {
if (ln.kind === "blank" || ln.kind === "malformed") {
// Blank/malformed lines round-trip as their original raw bytes.
// Apply the same trust policy: only scan when caller opts in.
if (!acceptPreExisting && ln.raw.includes(REDACTED_SENTINEL)) {
throw new OcEmitSentinelError(`${guardPath}/L${ln.line}`);
}
out.push(ln.raw);
continue;
}
// Value lines re-serialize via renderValue, which always scans
// string leaves regardless of acceptPreExistingSentinel — a
// caller-injected sentinel via setOcPath / appendJsonl must
// always be rejected.
// Value lines always scan leaves so caller-injected sentinel is rejected.
out.push(renderValue(ln.value, `${guardPath}/L${ln.line}`, []));
}
// Restore the original line-ending convention. Without this, a CRLF
// input edited via setJsonlOcPath would emit a mixed-ending file:
// edited lines joined with `\n` and untouched lines retaining the
// `\r` on their .raw bytes — silent CRLF→LF corruption on
// Windows-authored datasets.
// Preserve line-ending convention; otherwise CRLF input edited via
// setJsonlOcPath would emit mixed endings (silent corruption on Windows).
return out.join(ast.lineEnding ?? "\n");
}
@@ -78,15 +56,12 @@ function renderValue(value: JsoncValue, guardPath: string, walked: readonly stri
const parts = value.items.map((v, i) => renderValue(v, guardPath, [...walked, String(i)]));
return `[${parts.join(",")}]`;
}
case "string": {
// Reject ANY string that contains the sentinel — embedded
// (`prefix__OPENCLAW_REDACTED__suffix`) is just as much of a
// "literal redacted token landed on disk" leak as exact-match.
case "string":
// Substring match: embedded sentinel leaks marker bytes too.
if (value.value.includes(REDACTED_SENTINEL)) {
throw new OcEmitSentinelError(`${guardPath}/${walked.join("/")}`);
}
return JSON.stringify(value.value);
}
case "number":
return String(value.value);
case "boolean":
@@ -94,5 +69,4 @@ function renderValue(value: JsoncValue, guardPath: string, walked: readonly stri
case "null":
return "null";
}
throw new Error(`unreachable: jsonl renderValue kind`);
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,27 +1,12 @@
/**
* Generic markdown-flavored parser for the workspace files.
* Markdown parser for workspace files: frontmatter + preamble + H2
* blocks (with bullet items as the only addressable structural child).
* Tokenization via markdown-it; frontmatter handled here.
*
* Produces a `MdAst` addressing index over `raw` bytes: frontmatter
* (if present), preamble (prose before first H2), and an H2-block tree
* with items extracted for OcPath resolution.
* Grammar opinions (indented `##`, empty `## `, ordered lists, nested
* sub-bullets) live in lint rules, not the parser.
*
* Tokenization is delegated to markdown-it; this module owns the
* frontmatter detector (markdown-it does not handle YAML frontmatter
* natively) and the token-stream walker that buckets headings and
* bullets into the addressable AST shape. Tables and fenced code
* blocks are NOT first-class AST children — substrate addressing
* doesn't go inside them, and tokenizer-level structure (which
* markdown-it already gets right) is sufficient to ensure `##` and
* `-` inside them aren't misparsed as headings or items.
*
* **Grammar opinions live in lint rules, not the parser.** Indented
* `## foo`, empty `## `, ordered (`1.`) lists, and nested sub-bullets
* are all recognized as headings / items here; downstream lint rules
* (`OC_HEADING_INDENTED`, `OC_HEADING_EMPTY`, etc.) decide whether
* those shapes are OK in a particular file.
*
* **Byte-fidelity contract**: `raw` is preserved on the AST root so
* `emitMd(parse(raw)) === raw` for every input the parser accepts.
* Byte-fidelity: `emitMd(parse(raw)) === raw`.
*
* @module @openclaw/oc-path/parse
*/
@@ -124,10 +109,7 @@ function walkBlocks(
bodyLines: readonly string[],
bodyFileLine: number,
): { preamble: string; blocks: AstBlock[] } {
// Match atx-style `##` only setext h2 (`Heading\n---`) carries
// `markup: "-"` on the heading_open token, so the `markup === "##"`
// filter picks atx exclusively. Authors who want setext can still
// write it; substrate just doesn't address it as a section.
// Match atx `##` only; setext h2 has `markup: "-"`.
const h2: { tokenIdx: number; lineIdx: number; text: string }[] = [];
for (let i = 0; i < tokens.length; i++) {
const t = tokens[i];
@@ -147,9 +129,8 @@ function walkBlocks(
for (let h = 0; h < h2.length; h++) {
const start = h2[h].lineIdx;
const end = h + 1 < h2.length ? h2[h + 1].lineIdx : bodyLines.length;
// Slice tokens by INDEX so descendant tokens with no `map` (table
// cells, list markers, inline content) ride along with their
// mapped parent. heading_open / inline / heading_close = 3 tokens.
// Slice by INDEX so unmapped descendants (cells, markers, inline)
// ride along with their parent. h2 = open + inline + close = 3.
const tokenStart = h2[h].tokenIdx + 3;
const tokenEnd = h + 1 < h2.length ? h2[h + 1].tokenIdx : tokens.length;
const blockTokens = tokens.slice(tokenStart, tokenEnd);
@@ -167,16 +148,13 @@ function walkBlocks(
// ---------- Item extraction ----------------------------------------------
// Every list_item_open becomes an item (bullets, numbered, nested
// sub-bullets); lint rules flag depth / duplicate-slug collisions.
function extractItems(tokens: readonly Token[], bodyFileLine: number): AstItem[] {
// Every `list_item_open` becomes an item — bullets, numbered lists,
// nested sub-bullets all included. Lint rules can flag depth or
// duplicate-slug collisions; the parser stays opinion-free.
const items: AstItem[] = [];
for (let i = 0; i < tokens.length; i++) {
const t = tokens[i];
if (t.type !== "list_item_open" || t.map === null) {
continue;
}
if (t.type !== "list_item_open" || t.map === null) continue;
// First inline at the item's own depth is the item text.
let nestedDepth = 0;
let text = "";

View File

@@ -1,20 +1,11 @@
/**
* OcPath → AST node resolver.
* OcPath → MdAst node. Walks an in-memory AST; the file slot is
* informational (callers verify file matching upstream).
*
* Resolves an `OcPath` against a `MdAst` and returns the matched
* node (block / item / frontmatter entry / kv field) or `null` if the
* path doesn't match anything.
*
* The address dispatch:
*
* { file } → AST root
* { file, section } → AstBlock with matching slug
* { file, section, item } → AstItem inside that block
* { file, section, item, field } → kv.value of that item if kv.key matches
*
* The `file` segment is informational here — callers verify file
* matching before passing the AST. The resolver doesn't load files; it
* walks an in-memory AST.
* { file } → root
* { file, section } → block
* { file, section, item } → item
* { file, section, item, field } → kv.value
*
* @module @openclaw/oc-path/resolve
*/
@@ -23,10 +14,6 @@ import type { AstBlock, AstItem, FrontmatterEntry, MdAst } from "./ast.js";
import type { OcPath } from "./oc-path.js";
import { isOrdinalSeg, isPositionalSeg, parseOrdinalSeg, resolvePositionalSeg } from "./oc-path.js";
/**
* The resolved target plus a stable description of what kind of node it
* is. Lint rules and doctor fixers branch on `kind`.
*/
export type OcPathMatch =
| { readonly kind: "root"; readonly node: MdAst }
| { readonly kind: "frontmatter"; readonly node: FrontmatterEntry }
@@ -41,89 +28,46 @@ export type OcPathMatch =
};
/**
* Resolve an `OcPath` against an AST. Returns the matched node or
* `null`. Slugs match case-insensitively against `slugify(input)` —
* "Boundaries" matches a section heading "## Boundaries" because both
* slugify to "boundaries".
*
* Special-case: `OcPath.section === '[frontmatter]'` (literal) addresses
* frontmatter; `field` then names the frontmatter key. This lets a
* single OcPath shape address both prose-tree fields and frontmatter
* fields without growing the tuple.
* Resolve. Slugs match case-insensitively. `[frontmatter]` is a
* literal section sentinel; the frontmatter key sits at `item` (or
* `field` for 4-segment callers).
*/
export function resolveMdOcPath(ast: MdAst, path: OcPath): OcPathMatch | null {
// Frontmatter addressing: oc://FILE/[frontmatter]/key
// The frontmatter key sits at the OcPath `item` slot in this 3-segment
// shape; we accept `field` as a fallback for callers that thread
// 4-segment paths.
if (path.section === "[frontmatter]") {
const key = path.item ?? path.field;
if (key === undefined) {
return null;
}
if (key === undefined) return null;
const entry = ast.frontmatter.find((e) => e.key === key);
if (entry === undefined) {
return null;
}
if (entry === undefined) return null;
return { kind: "frontmatter", node: entry };
}
// Plain file root address.
if (path.section === undefined) {
return { kind: "root", node: ast };
}
if (path.section === undefined) return { kind: "root", node: ast };
const sectionSlug = path.section.toLowerCase();
const block = ast.blocks.find((b) => b.slug === sectionSlug);
if (block === undefined) {
return null;
}
const block = ast.blocks.find((b) => b.slug === path.section!.toLowerCase());
if (block === undefined) return null;
if (path.item === undefined) return { kind: "block", node: block };
// Section-only address.
if (path.item === undefined) {
return { kind: "block", node: block };
}
// Item addressing: ordinal (`#N`) > positional (`$first`/`$last`/`-N`)
// > slug. Ordinal uses absolute document order so two items sharing
// a slug stay distinguishable.
// Item dispatch: ordinal (#N) > positional ($first/$last/-N) > slug.
// Ordinal uses document order so duplicate-slug items stay distinct.
let item: AstItem | undefined;
if (isOrdinalSeg(path.item)) {
const n = parseOrdinalSeg(path.item);
if (n === null || n < 0 || n >= block.items.length) {
return null;
}
if (n === null || n < 0 || n >= block.items.length) return null;
item = block.items[n];
} else if (isPositionalSeg(path.item)) {
const concrete = resolvePositionalSeg(path.item, {
indexable: true,
size: block.items.length,
});
if (concrete === null) {
return null;
}
if (concrete === null) return null;
item = block.items[Number(concrete)];
} else {
const itemSlug = path.item.toLowerCase();
item = block.items.find((i) => i.slug === itemSlug);
}
if (item === undefined) {
return null;
item = block.items.find((i) => i.slug === path.item!.toLowerCase());
}
if (item === undefined) return null;
if (path.field === undefined) return { kind: "item", node: item, block };
// Item-only address.
if (path.field === undefined) {
return { kind: "item", node: item, block };
}
// Item-field address. Requires the item to have a `kv` and the field
// to match the kv key (case-insensitive). A field on an item without
// kv shape is unresolvable — return null rather than guessing.
if (item.kv === undefined) {
return null;
}
if (item.kv.key.toLowerCase() !== path.field.toLowerCase()) {
return null;
}
if (item.kv === undefined) return null;
if (item.kv.key.toLowerCase() !== path.field.toLowerCase()) return null;
return { kind: "item-field", node: item, block, value: item.kv.value };
}

View File

@@ -1,39 +1,17 @@
/**
* Substrate-level redaction-sentinel guard.
*
* Closes the `__OPENCLAW_REDACTED__` corruption class by rejecting the
* literal string at the emit boundary. Per-call-site reject rules
* (added piecemeal in [#62281](https://github.com/openclaw/openclaw/issues/62281),
* [#44357](https://github.com/openclaw/openclaw/issues/44357),
* [#13495](https://github.com/openclaw/openclaw/issues/13495), and others)
* caught the symptom; this guard removes the substrate that produced
* the symptom in the first place.
*
* Throwing at emit (not at the consumer) means every code path through
* the substrate is covered, including future call sites we haven't
* audited.
* Redaction-sentinel guard. Throws at emit boundaries so every write
* path is covered, not just audited consumers.
*
* @module @openclaw/oc-path/sentinel
*/
/**
* The literal string that marks redacted secrets in OpenClaw's runtime
* representation. Writing it to disk is always a bug — the consumer
* was supposed to drop the redacted view, not pass it through to the
* writer.
*/
/** Literal marking a redacted secret. Writing it to disk is always a bug. */
export const REDACTED_SENTINEL = "__OPENCLAW_REDACTED__";
/**
* Thrown when emit detects a `"__OPENCLAW_REDACTED__"` literal in any
* emitted bytes. Callers should treat this as a fatal write error;
* recovering by stripping the sentinel would silently corrupt the
* file. Fail-closed.
*
* `path` is the OcPath-shaped pointer to where the sentinel was
* detected (e.g., `oc://config/plugins.entries.foo.token`). For
* non-config emits, it's the closest meaningful address (frontmatter
* key, section/item slug, etc.) or just the file name.
* Thrown when emit detects the sentinel in output bytes. Fail-closed:
* stripping would silently corrupt the file. `path` is the closest
* OcPath-shaped pointer to the violation.
*/
export class OcEmitSentinelError extends Error {
readonly code = "OC_EMIT_SENTINEL";
@@ -46,16 +24,8 @@ export class OcEmitSentinelError extends Error {
}
}
/**
* Throw `OcEmitSentinelError` if `value` contains the redaction
* sentinel anywhere. Substring match (not equality) — a hostile caller
* embedding `prefix__OPENCLAW_REDACTED__suffix` in a leaf must be
* rejected just as forcefully as the bare sentinel; the substring form
* still leaks the marker bytes to disk where downstream scanners flag
* the file as corrupted.
*
* No-op for any non-string input. Used by every leaf-write boundary.
*/
// Substring match (not equality) — `prefix__OPENCLAW_REDACTED__suffix`
// still leaks the marker. No-op on non-string input.
export function guardSentinel(value: unknown, ocPath: string): void {
if (typeof value === "string" && value.includes(REDACTED_SENTINEL)) {
throw new OcEmitSentinelError(ocPath);

View File

@@ -1,16 +1,6 @@
/**
* Slug derivation for OcPath section/item addressing.
*
* A slug is the kebab-case lowercase form of a heading or item text:
* "Tool Guidance" → "tool-guidance"
* " Restricted Data " → "restricted-data"
* "deny-rule-1" → "deny-rule-1" (already a slug)
* "API_KEY" → "api-key"
* "Multi-tenant isolation" → "multi-tenant-isolation"
* "deny: secrets" → "deny-secrets" (colon + space → hyphen)
*
* Deterministic + idempotent. Used by parse to pre-compute slugs for
* blocks and items, and by resolveOcPath to match section/item names.
* Slug derivation: kebab-case lowercase, deterministic, idempotent.
* Used by parse + resolve for section/item addressing.
*
* @module @openclaw/oc-path/slug
*/
@@ -19,20 +9,7 @@ const NON_SLUG_CHARS = /[^a-z0-9-]+/g;
const COLLAPSE_HYPHENS = /-+/g;
const TRIM_HYPHENS = /^-+|-+$/g;
/**
* Convert arbitrary text into a slug usable as an OcPath segment.
*
* Rules:
* 1. Lowercase
* 2. Replace `_` with `-`
* 3. Replace any non-`[a-z0-9-]` runs with a single `-`
* 4. Collapse repeated `-`
* 5. Trim leading/trailing `-`
*
* Returns the empty string for input that has no slug-valid characters
* (e.g., `"!!"` → `""`); callers should treat empty slugs as not
* matchable rather than as wildcards.
*/
/** Empty string for input with no slug-valid chars; callers treat as not matchable. */
export function slugify(text: string): string {
return text
.toLowerCase()

View File

@@ -1,12 +1,3 @@
/**
* `findOcPaths` — multi-match search verb test surface.
*
* Tests cover: `*` single-segment expansion across the supported kinds;
* `**` recursive descent for jsonc; the wildcard guard on
* `resolveOcPath` / `setOcPath`; the slot-shape preservation invariant
* (a `*` in the `item` slot produces concrete paths whose `item` field
* carries the matched value).
*/
import { describe, expect, it } from "vitest";
import { findOcPaths } from "../find.js";
import { parseJsonc } from "../jsonc/parse.js";
@@ -15,8 +6,6 @@ import { formatOcPath, hasWildcard, OcPathError, parseOcPath } from "../oc-path.
import { parseMd } from "../parse.js";
import { resolveOcPath, setOcPath } from "../universal.js";
// ---------- hasWildcard ----------------------------------------------------
describe("hasWildcard", () => {
it("detects single-segment * in any slot", () => {
expect(hasWildcard(parseOcPath("oc://X/*/y"))).toBe(true);
@@ -45,17 +34,10 @@ describe("hasWildcard", () => {
});
});
// ---------- Wildcard guard on resolveOcPath / setOcPath -------------------
describe("wildcard guard", () => {
const ast = parseJsonc('{"steps":[{"id":"a","command":"foo"}]}').ast;
it("resolveOcPath throws OcPathError for wildcard pattern (F16)", () => {
// Previously returned `null` — indistinguishable from "path doesn't
// resolve". Now throws with `OC_PATH_WILDCARD_IN_RESOLVE` so the
// CLI / consumers can surface "use findOcPaths" rather than "not
// found". setOcPath uses a discriminated `wildcard-not-allowed`
// reason; this is the resolve-side analogue.
it("resolveOcPath throws OcPathError for wildcard pattern", () => {
expect(() => resolveOcPath(ast, parseOcPath("oc://wf/steps/*/command"))).toThrow(
/findOcPaths/,
);
@@ -85,8 +67,6 @@ describe("wildcard guard", () => {
});
});
// ---------- findOcPaths — fast-path (no wildcards) -------------------------
describe("findOcPaths — non-wildcard fast-path", () => {
it("wraps resolveOcPath result for plain path", () => {
const ast = parseJsonc('{"name":"x"}').ast;
@@ -102,7 +82,6 @@ describe("findOcPaths — non-wildcard fast-path", () => {
});
});
// ---------- findOcPaths — JSONC --------------------------------------------
describe("findOcPaths — JSONC kind", () => {
const jsonc = parseJsonc(
@@ -137,7 +116,6 @@ describe("findOcPaths — JSONC kind", () => {
});
});
// ---------- findOcPaths — JSONL --------------------------------------------
describe("findOcPaths — JSONL kind", () => {
const jsonl = parseJsonl(
@@ -160,9 +138,6 @@ describe("findOcPaths — JSONL kind", () => {
}
});
// F8 — line-slot union and predicate. Without these, the jsonc
// walker handled them but JSONL fell through to `pickLine(addr)`
// which returns null for union/predicate shapes → silent zero matches.
it("union {L1,L2} at line slot enumerates each alternative", () => {
const out = findOcPaths(jsonl, parseOcPath("oc://session/{L1,L3}/event"));
expect(out).toHaveLength(2);
@@ -191,7 +166,6 @@ describe("findOcPaths — JSONL kind", () => {
});
});
// ---------- Positional primitives ($first / $last / -N) -------------------
describe("positional primitives — jsonc", () => {
const jsonc = parseJsonc('{"items":[10,20,30]}').ast;
@@ -222,7 +196,6 @@ describe("positional primitives — jsonc", () => {
});
it("hasWildcard returns false for positional patterns", () => {
// Positional ≠ wildcard — they resolve deterministically.
expect(hasWildcard(parseOcPath("oc://X/$last/id"))).toBe(false);
expect(hasWildcard(parseOcPath("oc://X/-1/id"))).toBe(false);
});
@@ -253,12 +226,8 @@ describe("positional primitives — jsonl", () => {
});
});
// ---------- Segment unions {a,b,c} -----------------------------------------
describe("quoted segments (v1.0)", () => {
// Evidence: openclaw#69004 — model alias `anthropic/claude-opus-4-7`.
// Slash inside the key has no other syntax that doesn't conflict with
// path-level slash split.
const jsonc = parseJsonc(
'{"agents":{"defaults":{"models":{' +
'"anthropic/claude-opus-4-7":{"alias":"opus47","contextWindow":1000000},' +
@@ -301,7 +270,6 @@ describe("quoted segments (v1.0)", () => {
});
it("quoted segment with embedded escape sequences", () => {
// Key literally contains a backslash and a quote.
const ast = parseJsonc('{"keys":{"a\\\\b":"v1","c\\"d":"v2"}}').ast;
const m1 = resolveOcPath(ast, parseOcPath('oc://X/keys/"a\\\\b"'));
expect(m1?.kind).toBe("leaf");
@@ -313,7 +281,6 @@ describe("quoted segments (v1.0)", () => {
it("findOcPaths — wildcard returns paths with quoted keys when needed", () => {
const out = findOcPaths(jsonc, parseOcPath("oc://config/agents.defaults.models/*/alias"));
expect(out).toHaveLength(3);
// The two slash-bearing keys round-trip via quotes; `plain` stays bare.
const items = out.map((m) => m.path.item);
expect(items.some((s) => s === "plain")).toBe(true);
expect(items.some((s) => s === '"anthropic/claude-opus-4-7"')).toBe(true);
@@ -338,8 +305,6 @@ describe("quoted segments (v1.0)", () => {
});
describe("value predicates — numeric operators (v1.1)", () => {
// Evidence: openclaw#54383 — compaction fails when maxTokens > model output cap.
// Doctor lint rule: flag any model with maxTokens > 128000 (Anthropic per-request output cap).
const jsonc = parseJsonc(
'{"models":{"providers":{"anthropic":{"models":[' +
'{"id":"claude-sonnet-4-6","contextWindow":1000000,"maxTokens":128000},' +
@@ -348,7 +313,6 @@ describe("value predicates — numeric operators (v1.1)", () => {
"]}}}}",
).ast;
// Slot layout: section=`models.providers.anthropic.models`, item=predicate, field=`id`.
const PREFIX = "oc://config/models.providers.anthropic.models";
it("> finds models exceeding the per-request output cap", () => {
@@ -380,7 +344,6 @@ describe("value predicates — numeric operators (v1.1)", () => {
});
it("numeric operator rejects non-numeric leaves silently", () => {
// String leaf, numeric op — predicate doesn't match (no false positive).
const out = findOcPaths(jsonc, parseOcPath(`${PREFIX}/[id>5]/id`));
expect(out).toHaveLength(0);
});
@@ -404,10 +367,9 @@ describe("value predicates — jsonc", () => {
});
});
// ---------- Ordinal addressing (#N) for distinct duplicate slugs ----------
describe("ordinal addressing — md", () => {
// Two items with the same slug after slugify (`foo: a` and `foo: b`).
// Two items share slug `foo` after slugify.
const md = parseMd("## Tools\n\n- foo: a\n- foo: b\n- bar: c\n").ast;
it("#0 picks the first item by document order", () => {
@@ -432,7 +394,6 @@ describe("ordinal addressing — md", () => {
it("findOcPaths disambiguates duplicate-slug items via #N", () => {
const out = findOcPaths(md, parseOcPath("oc://AGENTS.md/tools/*/foo"));
// 2 items have key `foo` (and matching slug); 1 has `bar` (no match).
expect(out).toHaveLength(2);
const items = out.map((m) => m.path.item);
expect(items).toEqual(["#0", "#1"]);
@@ -444,12 +405,10 @@ describe("ordinal addressing — md", () => {
const md2 = parseMd("## Tools\n\n- foo: a\n- bar: b\n").ast;
const out = findOcPaths(md2, parseOcPath("oc://AGENTS.md/tools/*"));
const items = out.map((m) => m.path.item);
// Both unique → both stay as slugs.
expect(items.toSorted((a, b) => (a ?? "").localeCompare(b ?? ""))).toEqual(["bar", "foo"]);
});
});
// ---------- findOcPaths — Markdown -----------------------------------------
describe("findOcPaths — Markdown kind", () => {
const md = parseMd(
@@ -468,7 +427,6 @@ describe("findOcPaths — Markdown kind", () => {
});
it("* in field slot enumerates each item kv key", () => {
// Item slug is the kv-key slug ('send_email' → 'send-email').
const out = findOcPaths(md, parseOcPath("oc://SKILL.md/Tools/send-email/*"));
expect(out).toHaveLength(1);
expect(out[0].match.kind).toBe("leaf");
@@ -478,26 +436,15 @@ describe("findOcPaths — Markdown kind", () => {
});
it("* in item slot + matching field returns each item whose kv key matches", () => {
// The kv key on `- send_email: enabled` is `send_email`. Pattern
// field='send_email' matches that one item; the other two items
// (search, read_email) have different kv keys.
const out = findOcPaths(md, parseOcPath("oc://SKILL.md/Tools/*/send_email"));
expect(out).toHaveLength(1);
expect(out[0].path.item).toBe("send-email");
});
it("** at section slot matches items at every depth (F14 — cross-kind symmetry)", () => {
// Without the retain-i branch on `**`, walkMd only descended one
// level (i + 1, consumed `**`) — the jsonc walker also retains
// `**` to keep matching deeper. Lint rules expecting universal
// `**` behavior across kinds (sweep all sections for `risk:`)
// would silently get 0 md matches on a multi-block file.
//
// Pattern `**/send-email` — `**` matches the `tools` block, then
// `send-email` (kebab slug) matches the item under it. Without the
// retain-i branch, the walker descends with `**` consumed at the
// section layer and then can't satisfy the item slot since the
// walker is now inside the wrong block looking for an item slug.
it("** at section slot matches items at every depth (cross-kind symmetry)", () => {
// The retain-i branch on `**` keeps the wildcard active across
// descent — without it, multi-block md files match only the
// immediate-block layer.
const multiBlock = parseMd(
"## Boundaries\n\n" +
"- never: rm -rf\n\n" +
@@ -506,22 +453,14 @@ describe("findOcPaths — Markdown kind", () => {
"- search: enabled\n",
).ast;
const out = findOcPaths(multiBlock, parseOcPath("oc://SOUL.md/**/send-email"));
// The `send-email` item is under the `tools` block. Pin that we
// get at least one match (the substrate's md `**` should reach it).
expect(out.length).toBeGreaterThanOrEqual(1);
const items = out.map((m) => m.path.item).filter((v): v is string => v !== undefined);
expect(items).toContain("send-email");
});
});
describe("findOcPaths — quoted segments survive expansion (regression: resolve↔find symmetry)", () => {
describe("findOcPaths — quoted segments survive expansion", () => {
it("finds keys with slashes when the path quotes them and a sibling wildcards", () => {
// Closes ClawSweeper P2 on PR #78678: when a pattern needs
// expansion (e.g. trailing union or wildcard), the JSONC walker
// bypassed `resolveJsoncOcPath` and compared object keys to the
// raw `cur.value` directly. Patterns with quoted literals
// returned no matches even though resolve worked. This test
// exercises a quoted middle segment + a trailing union.
const raw = `{
"agents": {
"defaults": {
@@ -542,7 +481,6 @@ describe("findOcPaths — quoted segments survive expansion (regression: resolve
'oc://config.jsonc/agents.defaults.models/"github-copilot/claude-opus-4-7"/{alias,contextWindow}',
),
);
// Both alternatives in the union should match.
expect(out.length).toBe(2);
const fields = out
.map((m) => m.path.field)
@@ -551,14 +489,8 @@ describe("findOcPaths — quoted segments survive expansion (regression: resolve
});
});
// ---------- I3: md walker union + predicate parity ------------------------
describe("union segments — md", () => {
// Cross-kind parity: the jsonc walker already dispatches union at every
// slot. The md walker previously dispatched only on wildcard / ordinal
// / positional / literal — so `oc://X.md/{Boundaries,Limits}/...`
// matched zero items where the same shape on jsonc would match both.
// These tests pin the parity addition.
const RAW = `## Boundaries
- enabled: true
@@ -590,9 +522,7 @@ describe("union segments — md", () => {
expect(items).toEqual(["alias", "max-tokens"]);
});
it("expands {a,b} at the field slot (degenerate but parity-preserving)", () => {
// Md items hold a single kv field, so {alias,nope} matches at most
// one alt — the matching one. Mirrors the jsonc dispatch shape.
it("expands {a,b} at the field slot — md items have one kv, so at most one alt", () => {
const ast = parseMd(RAW).ast;
const out = findOcPaths(ast, parseOcPath("oc://X.md/limits/alias/{alias,nope}"));
expect(out.length).toBe(1);
@@ -613,8 +543,6 @@ describe("predicate segments — md", () => {
`;
it("matches sections that contain an item satisfying the predicate", () => {
// [enabled=true] — only Boundaries has an item kv.key=enabled with
// value=true; Limits's enabled=false fails the predicate.
const ast = parseMd(RAW).ast;
const out = findOcPaths(ast, parseOcPath("oc://X.md/[enabled=true]/*/*"));
expect(out.length).toBeGreaterThan(0);
@@ -631,7 +559,6 @@ describe("predicate segments — md", () => {
});
it("matches the kv pair at the field slot", () => {
// [max-tokens=4096] at the field slot — checks the kv pair as a unit.
const ast = parseMd(RAW).ast;
const out = findOcPaths(
ast,

View File

@@ -217,12 +217,8 @@ describe("setJsonlOcPath — positional field tokens (round-11 resolve↔edit sy
});
});
describe("setJsonlOcPath — quoted field segments (regression: resolve↔edit symmetry)", () => {
describe("setJsonlOcPath — quoted field segments", () => {
it("edits a field key containing a slash via quoted segment", () => {
// Closes ClawSweeper P2 on PR #78678: JSONL resolve unquotes
// bracket-aware segments but the edit path used plain
// `.split('.')`. A path that resolves under `Lnnn` MUST be
// editable through the same address.
const raw = `{"event":"start","detail":{"github/repo":"old"}}\n`;
const { ast } = parseJsonl(raw);
const r = setJsonlOcPath(ast, parseOcPath('oc://x.jsonl/L1/detail/"github/repo"'), {

View File

@@ -1,16 +1,3 @@
/**
* Wave 20 — JSONL append + multi-agent session sim.
*
* Substrate guarantee: `appendJsonlOcPath(ast, value)` returns a new AST
* with the value appended as a new line. Single-writer model at the
* substrate; concurrent-append safety lives in the LKG tracker layer
* (PR-4) on top of git's three-way merge.
*
* Append for other kinds (jsonc array push, md item-to-section) was
* removed from the substrate — those are domain operations that ride
* on top of `setXxxOcPath` at the doctor / tracker layer, where the
* value shapes are domain-defined.
*/
import { describe, expect, it } from "vitest";
import type { JsoncValue } from "../../jsonc/ast.js";
import { appendJsonlOcPath } from "../../jsonl/edit.js";
@@ -27,8 +14,8 @@ function event(name: string, n: number): JsoncValue {
};
}
describe("wave-20 jsonl append + multi-agent session sim", () => {
it("A-01 single agent appends 100 events in order", () => {
describe("jsonl append + multi-agent session sim", () => {
it("single agent appends 100 events in order", () => {
let ast = parseJsonl("").ast;
for (let i = 0; i < 100; i++) {
ast = appendJsonlOcPath(ast, event("step", i));
@@ -41,7 +28,7 @@ describe("wave-20 jsonl append + multi-agent session sim", () => {
expect(JSON.parse(lines[99] ?? "")).toEqual({ event: "step", n: 99 });
});
it("A-02 two agents alternating appends preserve interleave order", () => {
it("two agents alternating appends preserve interleave order", () => {
let ast = parseJsonl("").ast;
for (let i = 0; i < 10; i++) {
const agent = i % 2 === 0 ? "a" : "b";
@@ -57,7 +44,7 @@ describe("wave-20 jsonl append + multi-agent session sim", () => {
}
});
it("A-03 append after a malformed line preserves both", () => {
it("append after a malformed line preserves both", () => {
let ast = parseJsonl('{"a":1}\nbroken\n').ast;
ast = appendJsonlOcPath(ast, event("start", 1));
const out = emitJsonl(ast);
@@ -65,14 +52,14 @@ describe("wave-20 jsonl append + multi-agent session sim", () => {
expect(out).toContain('"event":"start"');
});
it("A-04 append to empty file produces a single value line", () => {
it("append to empty file produces a single value line", () => {
let ast = parseJsonl("").ast;
ast = appendJsonlOcPath(ast, event("first", 0));
const out = emitJsonl(ast);
expect(JSON.parse(out)).toEqual({ event: "first", n: 0 });
});
it("A-05 append assigns line numbers monotonically", () => {
it("append assigns line numbers monotonically", () => {
let ast = parseJsonl("").ast;
ast = appendJsonlOcPath(ast, event("a", 0));
ast = appendJsonlOcPath(ast, event("b", 1));
@@ -80,7 +67,7 @@ describe("wave-20 jsonl append + multi-agent session sim", () => {
expect(ast.lines.map((l) => l.line)).toEqual([1, 2, 3]);
});
it("A-06 append after blank lines preserves line-number gaps correctly", () => {
it("append after blank lines preserves line-number gaps correctly", () => {
let ast = parseJsonl('{"a":1}\n\n\n').ast;
ast = appendJsonlOcPath(ast, event("after", 0));
// Existing lines: L1 value, L2 blank, L3 blank. Appended line is L4.
@@ -88,7 +75,7 @@ describe("wave-20 jsonl append + multi-agent session sim", () => {
expect(ast.lines[3]?.line).toBe(4);
});
it("A-07 1000-event session sim is deterministic", () => {
it("1000-event session sim is deterministic", () => {
let ast = parseJsonl("").ast;
for (let i = 0; i < 1000; i++) {
ast = appendJsonlOcPath(ast, event("e", i));
@@ -100,14 +87,14 @@ describe("wave-20 jsonl append + multi-agent session sim", () => {
expect(JSON.parse(lines[999] ?? "").n).toBe(999);
});
it("A-08 append is non-mutating on the input AST", () => {
it("append is non-mutating on the input AST", () => {
const ast = parseJsonl('{"a":1}\n').ast;
const before = JSON.stringify(ast);
appendJsonlOcPath(ast, event("x", 0));
expect(JSON.stringify(ast)).toBe(before);
});
it("A-09 append preserves prior raw bytes (renders new tail)", () => {
it("append preserves prior raw bytes (renders new tail)", () => {
let ast = parseJsonl('{"a":1}\n').ast;
ast = appendJsonlOcPath(ast, event("b", 1));
const out = emitJsonl(ast);
@@ -118,7 +105,7 @@ describe("wave-20 jsonl append + multi-agent session sim", () => {
expect(JSON.parse(lines[1] ?? "")).toEqual({ event: "b", n: 1 });
});
it("A-10 deterministic line-number assignment after malformed lines", () => {
it("deterministic line-number assignment after malformed lines", () => {
let ast = parseJsonl('{"a":1}\nbroken\n{"b":2}\n').ast;
ast = appendJsonlOcPath(ast, event("c", 2));
expect(ast.lines.map((l) => l.line)).toEqual([1, 2, 3, 4]);

View File

@@ -1,9 +1,3 @@
/**
* Wave 1 — byte-fidelity round-trip.
*
* Substrate guarantee: `emitMd(parse(raw), { mode: 'roundtrip' }) === raw`
* for every input the parser accepts. This wave hammers that.
*/
import { describe, expect, it } from "vitest";
import { emitMd } from "../../emit.js";
import { parseMd } from "../../parse.js";
@@ -13,72 +7,72 @@ function roundTrip(raw: string): string {
return emitMd(ast);
}
describe("wave-01 byte-fidelity", () => {
it("B-01 empty file", () => {
describe("byte-fidelity", () => {
it("empty file", () => {
expect(roundTrip("")).toBe("");
});
it("B-02 whitespace-only file", () => {
it("whitespace-only file", () => {
expect(roundTrip(" \n\n \n")).toBe(" \n\n \n");
});
it("B-03 single newline", () => {
it("single newline", () => {
expect(roundTrip("\n")).toBe("\n");
});
it("B-04 file without trailing newline", () => {
it("file without trailing newline", () => {
expect(roundTrip("## H\n- item")).toBe("## H\n- item");
});
it("B-05 file with trailing newline", () => {
it("file with trailing newline", () => {
expect(roundTrip("## H\n- item\n")).toBe("## H\n- item\n");
});
it("B-06 file with multiple trailing newlines", () => {
it("file with multiple trailing newlines", () => {
expect(roundTrip("## H\n- item\n\n\n")).toBe("## H\n- item\n\n\n");
});
it("B-07 BOM at start", () => {
it("BOM at start", () => {
const raw = "## Heading\n- item\n";
expect(roundTrip(raw)).toBe(raw);
});
it("B-08 CRLF line endings", () => {
it("CRLF line endings", () => {
const raw = "## H\r\n\r\n- item\r\n";
expect(roundTrip(raw)).toBe(raw);
});
it("B-09 mixed line endings (CRLF + LF)", () => {
it("mixed line endings (CRLF + LF)", () => {
const raw = "## H\r\n- item\n- another\r\n";
expect(roundTrip(raw)).toBe(raw);
});
it("B-10 tabs preserved in body", () => {
it("tabs preserved in body", () => {
const raw = "## H\n\n\tindented body\n";
expect(roundTrip(raw)).toBe(raw);
});
it("B-11 trailing whitespace on lines preserved", () => {
it("trailing whitespace on lines preserved", () => {
const raw = "## Heading \n- item \n";
expect(roundTrip(raw)).toBe(raw);
});
it("B-12 multiple consecutive blank lines preserved", () => {
it("multiple consecutive blank lines preserved", () => {
const raw = "## H\n\n\n\n- item\n";
expect(roundTrip(raw)).toBe(raw);
});
it("B-13 frontmatter only, no body", () => {
it("frontmatter only, no body", () => {
const raw = "---\nname: x\n---\n";
expect(roundTrip(raw)).toBe(raw);
});
it("B-14 body only, no frontmatter, no headings", () => {
it("body only, no frontmatter, no headings", () => {
const raw = "Just some prose.\nNo structure.\n";
expect(roundTrip(raw)).toBe(raw);
});
it("B-15 frontmatter + body + multiple sections", () => {
it("frontmatter + body + multiple sections", () => {
const raw = `---
name: github
description: gh CLI
@@ -98,27 +92,27 @@ Preamble.
expect(roundTrip(raw)).toBe(raw);
});
it("B-16 unicode content preserved", () => {
it("unicode content preserved", () => {
const raw = "## Café Section\n\n- résumé item\n- 日本語\n";
expect(roundTrip(raw)).toBe(raw);
});
it("B-17 emoji preserved", () => {
it("emoji preserved", () => {
const raw = "## 🚀 Launch\n\n- ✅ ready\n- 🔒 secure\n";
expect(roundTrip(raw)).toBe(raw);
});
it("B-18 frontmatter with special chars in values", () => {
it("frontmatter with special chars in values", () => {
const raw = `---\nurl: https://example.com:443/path?q=1&a=2\n---\n`;
expect(roundTrip(raw)).toBe(raw);
});
it("B-19 file with mixed bullet markers (-, *, +)", () => {
it("file with mixed bullet markers (-, *, +)", () => {
const raw = "## H\n\n- dash\n* star\n+ plus\n";
expect(roundTrip(raw)).toBe(raw);
});
it("B-20 raw === parse(raw).raw === emitMd(parse(raw)) for 50 random shapes", () => {
it("raw === parse(raw).raw === emitMd(parse(raw)) for 50 random shapes", () => {
const inputs = [
"",
"\n",

View File

@@ -1,10 +1,3 @@
/**
* Wave 13 — cross-cutting integration.
*
* Pipelines: parse + resolve + emit working together. Slug stability
* across re-parses. OcPath round-trip via the AST (slugs in OcPath
* must round-trip back to the resolved node).
*/
import { describe, expect, it } from "vitest";
import { emitMd } from "../../emit.js";
import { formatOcPath, parseOcPath } from "../../oc-path.js";
@@ -29,15 +22,15 @@ Preamble.
- curl: HTTP client
`;
describe("wave-13 cross-cutting", () => {
it("CC-01 parse → resolve → emit pipeline (block)", () => {
describe("cross-cutting", () => {
it("parse → resolve → emit pipeline (block)", () => {
const { ast } = parseMd(SAMPLE);
const m = resolveOcPath(ast, { file: "AGENTS.md", section: "boundaries" });
expect(m?.kind).toBe("block");
expect(emitMd(ast)).toBe(SAMPLE);
});
it("CC-02 OcPath round-trip via AST: parse + resolve + format", () => {
it("OcPath round-trip via AST: parse + resolve + format", () => {
const { ast } = parseMd(SAMPLE);
for (const block of ast.blocks) {
const path = parseOcPath(`oc://AGENTS.md/${block.slug}`);
@@ -48,7 +41,7 @@ describe("wave-13 cross-cutting", () => {
}
});
it("CC-03 every item in every block is OcPath-addressable", () => {
it("every item in every block is OcPath-addressable", () => {
const { ast } = parseMd(SAMPLE);
for (const block of ast.blocks) {
for (const item of block.items) {
@@ -59,7 +52,7 @@ describe("wave-13 cross-cutting", () => {
}
});
it("CC-04 every kv item field is OcPath-addressable", () => {
it("every kv item field is OcPath-addressable", () => {
const { ast } = parseMd(SAMPLE);
for (const block of ast.blocks) {
for (const item of block.items) {
@@ -73,7 +66,7 @@ describe("wave-13 cross-cutting", () => {
}
});
it("CC-05 every frontmatter entry is OcPath-addressable", () => {
it("every frontmatter entry is OcPath-addressable", () => {
const { ast } = parseMd(SAMPLE);
for (const fm of ast.frontmatter) {
const path = parseOcPath(`oc://AGENTS.md/[frontmatter]/${fm.key}`);
@@ -82,7 +75,7 @@ describe("wave-13 cross-cutting", () => {
}
});
it("CC-06 slugs are stable across re-parses (deterministic)", () => {
it("slugs are stable across re-parses (deterministic)", () => {
const a1 = parseMd(SAMPLE).ast;
const a2 = parseMd(SAMPLE).ast;
expect(a1.blocks.map((b) => b.slug)).toEqual(a2.blocks.map((b) => b.slug));
@@ -91,7 +84,7 @@ describe("wave-13 cross-cutting", () => {
);
});
it("CC-07 modifying raw + re-parse produces consistent AST shape", () => {
it("modifying raw + re-parse produces consistent AST shape", () => {
const a1 = parseMd(SAMPLE).ast;
const modified = SAMPLE.replace("GitHub CLI", "GitHub command-line interface");
const a2 = parseMd(modified).ast;
@@ -105,20 +98,20 @@ describe("wave-13 cross-cutting", () => {
expect(ghItem?.kv?.value).toBe("GitHub command-line interface");
});
it("CC-08 unknown OcPath returns null without affecting subsequent valid resolves", () => {
it("unknown OcPath returns null without affecting subsequent valid resolves", () => {
const { ast } = parseMd(SAMPLE);
expect(resolveOcPath(ast, { file: "X.md", section: "nonexistent" })).toBeNull();
expect(resolveOcPath(ast, { file: "X.md", section: "tools" })?.kind).toBe("block");
});
it("CC-09 resolve does not depend on file segment matching", () => {
it("resolve does not depend on file segment matching", () => {
const { ast } = parseMd(SAMPLE);
const a = resolveOcPath(ast, { file: "A.md", section: "tools" });
const b = resolveOcPath(ast, { file: "B.md", section: "tools" });
expect(a?.kind).toBe(b?.kind);
});
it("CC-10 round-trip across all 9 valid OcPath shapes", () => {
it("round-trip across all 9 valid OcPath shapes", () => {
const { ast } = parseMd(SAMPLE);
const cases = [
{ file: "X.md" },

View File

@@ -1,16 +1,3 @@
/**
* Wave 22 — cross-kind property invariants.
*
* Per-kind verbs hold the same shape contracts regardless of kind:
*
* 1. parse → emit (round-trip) is byte-stable for ALL kinds
* 2. resolve is non-mutating for ALL kinds
* 3. set returns structured failure (never throws) for unresolvable
* paths across ALL kinds
* 4. inferKind aligns with the parsers consumers actually pick
* 5. parse → emit → parse is fixpoint
* 6. hostile inputs do not throw at parse time
*/
import { describe, expect, it } from "vitest";
import { inferKind } from "../../dispatch.js";
import { setMdOcPath } from "../../edit.js";
@@ -27,18 +14,18 @@ import { parseOcPath } from "../../oc-path.js";
import { parseMd } from "../../parse.js";
import { resolveMdOcPath } from "../../resolve.js";
describe("wave-22 cross-kind property invariants", () => {
describe("cross-kind property invariants", () => {
const mdRaw = "---\nname: x\n---\n\n## Boundaries\n\n- enabled: true\n";
const jsoncRaw = '// h\n{ "k": 1, "n": [1,2,3] }\n';
const jsonlRaw = '{"a":1}\n\nbroken\n{"b":2}\n';
it("P-01 round-trip parse → emit is byte-stable across all kinds", () => {
it("round-trip parse → emit is byte-stable across all kinds", () => {
expect(emitMd(parseMd(mdRaw).ast)).toBe(mdRaw);
expect(emitJsonc(parseJsonc(jsoncRaw).ast)).toBe(jsoncRaw);
expect(emitJsonl(parseJsonl(jsonlRaw).ast)).toBe(jsonlRaw);
});
it("P-02 resolve is non-mutating across all kinds", () => {
it("resolve is non-mutating across all kinds", () => {
const md = parseMd(mdRaw).ast;
let before = JSON.stringify(md);
resolveMdOcPath(md, parseOcPath("oc://X/[frontmatter]/name"));
@@ -58,7 +45,7 @@ describe("wave-22 cross-kind property invariants", () => {
expect(JSON.stringify(jsonl)).toBe(before);
});
it("P-03 unresolvable set never throws across all kinds", () => {
it("unresolvable set never throws across all kinds", () => {
const ocPath = parseOcPath("oc://X/totally.missing.path");
expect(() => setMdOcPath(parseMd(mdRaw).ast, ocPath, "x")).not.toThrow();
expect(() =>
@@ -75,7 +62,7 @@ describe("wave-22 cross-kind property invariants", () => {
).not.toThrow();
});
it("P-04 inferKind aligns with the parser actually used", () => {
it("inferKind aligns with the parser actually used", () => {
expect(inferKind("AGENTS.md")).toBe("md");
expect(inferKind("SOUL.md")).toBe("md");
expect(inferKind("config.jsonc")).toBe("jsonc");
@@ -84,7 +71,7 @@ describe("wave-22 cross-kind property invariants", () => {
expect(inferKind("audit.ndjson")).toBe("jsonl");
});
it("P-05 parse → emit → parse is fixpoint across all kinds", () => {
it("parse → emit → parse is fixpoint across all kinds", () => {
const md1 = emitMd(parseMd(mdRaw).ast);
const md2 = emitMd(parseMd(md1).ast);
expect(md1).toBe(md2);
@@ -98,7 +85,7 @@ describe("wave-22 cross-kind property invariants", () => {
expect(jl1).toBe(jl2);
});
it("P-06 hostile inputs do not throw at parse time across all kinds", () => {
it("hostile inputs do not throw at parse time across all kinds", () => {
const hostile = [
"\x00\x01\x02 binary garbage",
'{ "unclosed":',
@@ -112,14 +99,14 @@ describe("wave-22 cross-kind property invariants", () => {
}
});
it("P-07 resolver returns null for paths past valid kinds (no throw)", () => {
it("resolver returns null for paths past valid kinds (no throw)", () => {
const overlong = parseOcPath("oc://X/a/b/c.d.e.f.g.h");
expect(() => resolveMdOcPath(parseMd(mdRaw).ast, overlong)).not.toThrow();
expect(() => resolveJsoncOcPath(parseJsonc(jsoncRaw).ast, overlong)).not.toThrow();
expect(() => resolveJsonlOcPath(parseJsonl(jsonlRaw).ast, overlong)).not.toThrow();
});
it("P-08 set-then-resolve produces the value just written (jsonc)", () => {
it("set-then-resolve produces the value just written (jsonc)", () => {
const ast = parseJsonc('{ "k": 1 }').ast;
const r = setJsoncOcPath(ast, parseOcPath("oc://X/k"), {
kind: "number",
@@ -133,13 +120,13 @@ describe("wave-22 cross-kind property invariants", () => {
}
});
it("P-09 verbs are deterministic — same input twice produces same output", () => {
it("verbs are deterministic — same input twice produces same output", () => {
expect(emitMd(parseMd(mdRaw).ast)).toBe(emitMd(parseMd(mdRaw).ast));
expect(emitJsonc(parseJsonc(jsoncRaw).ast)).toBe(emitJsonc(parseJsonc(jsoncRaw).ast));
expect(emitJsonl(parseJsonl(jsonlRaw).ast)).toBe(emitJsonl(parseJsonl(jsonlRaw).ast));
});
it("P-10 inferKind returns null for unknown extensions", () => {
it("inferKind returns null for unknown extensions", () => {
expect(inferKind("binary.bin")).toBeNull();
expect(inferKind("no-ext")).toBeNull();
expect(inferKind("archive.tar.gz")).toBeNull();

View File

@@ -1,10 +1,3 @@
/**
* Wave 19 — edit → emit round-trip across all kinds.
*
* Substrate guarantee: parse → setXxxOcPath → emitXxx produces valid
* bytes that re-parse to an AST whose addressed value reflects the edit.
* Per-kind verbs throughout — caller picks based on AST type.
*/
import { describe, expect, it } from "vitest";
import { setMdOcPath } from "../../edit.js";
import { emitMd } from "../../emit.js";
@@ -18,8 +11,8 @@ import { parseJsonl } from "../../jsonl/parse.js";
import { parseOcPath } from "../../oc-path.js";
import { parseMd } from "../../parse.js";
describe("wave-19 edit-then-emit round-trip", () => {
it("EE-01 md frontmatter edit re-parses to the new value", () => {
describe("edit-then-emit round-trip", () => {
it("md frontmatter edit re-parses to the new value", () => {
const md = parseMd("---\nname: old\n---\n\n## Body\n").ast;
const r = setMdOcPath(md, parseOcPath("oc://AGENTS.md/[frontmatter]/name"), "new");
expect(r.ok).toBe(true);
@@ -29,7 +22,7 @@ describe("wave-19 edit-then-emit round-trip", () => {
}
});
it("EE-02 md item kv edit re-parses to the new value", () => {
it("md item kv edit re-parses to the new value", () => {
const md = parseMd("## Boundaries\n\n- timeout: 5\n").ast;
const r = setMdOcPath(md, parseOcPath("oc://AGENTS.md/boundaries/timeout/timeout"), "60");
expect(r.ok).toBe(true);
@@ -40,7 +33,7 @@ describe("wave-19 edit-then-emit round-trip", () => {
}
});
it("EE-03 jsonc value edit re-parses to the new value", () => {
it("jsonc value edit re-parses to the new value", () => {
const ast = parseJsonc('{ "k": 1 }').ast;
const r = setJsoncOcPath(ast, parseOcPath("oc://config/k"), {
kind: "number",
@@ -52,7 +45,7 @@ describe("wave-19 edit-then-emit round-trip", () => {
}
});
it("EE-04 jsonc nested edit preserves untouched siblings", () => {
it("jsonc nested edit preserves untouched siblings", () => {
const ast = parseJsonc('{ "a": 1, "b": { "c": 2, "d": 3 }, "e": 4 }').ast;
const r = setJsoncOcPath(ast, parseOcPath("oc://config/b.c"), {
kind: "number",
@@ -67,7 +60,7 @@ describe("wave-19 edit-then-emit round-trip", () => {
}
});
it("EE-05 jsonl line edit re-parses to the new value at the same line", () => {
it("jsonl line edit re-parses to the new value at the same line", () => {
const ast = parseJsonl('{"a":1}\n{"a":2}\n{"a":3}\n').ast;
const r = setJsonlOcPath(ast, parseOcPath("oc://log/L2/a"), {
kind: "number",
@@ -84,7 +77,7 @@ describe("wave-19 edit-then-emit round-trip", () => {
}
});
it("EE-06 jsonc edit composes: two sequential edits both land", () => {
it("jsonc edit composes: two sequential edits both land", () => {
let ast = parseJsonc('{ "a": 1, "b": 2 }').ast;
let r = setJsoncOcPath(ast, parseOcPath("oc://config/a"), {
kind: "number",
@@ -103,7 +96,7 @@ describe("wave-19 edit-then-emit round-trip", () => {
expect(JSON.parse(emitJsonc(ast))).toEqual({ a: 10, b: 20 });
});
it("EE-07 missing path returns structured failure (not throw)", () => {
it("missing path returns structured failure (not throw)", () => {
const ast = parseJsonc('{ "a": 1 }').ast;
const r = setJsoncOcPath(ast, parseOcPath("oc://config/missing"), {
kind: "number",
@@ -115,7 +108,7 @@ describe("wave-19 edit-then-emit round-trip", () => {
}
});
it("EE-08 each per-kind verb takes its own AST type — no cross-kind leakage", () => {
it("each per-kind verb takes its own AST type — no cross-kind leakage", () => {
// Type-level guarantee: each setter only accepts its kind's AST.
// Caller picks based on the AST they have. This is the design.
const md = parseMd("---\nx: 1\n---\n").ast;
@@ -137,7 +130,7 @@ describe("wave-19 edit-then-emit round-trip", () => {
expect(c.ok).toBe(true);
});
it("EE-09 jsonc parser-backed edit preserves comments", () => {
it("jsonc parser-backed edit preserves comments", () => {
const raw = '{\n "k": 1 // comment\n}\n';
const ast = parseJsonc(raw).ast;
const r = setJsoncOcPath(ast, parseOcPath("oc://config/k"), {
@@ -154,7 +147,7 @@ describe("wave-19 edit-then-emit round-trip", () => {
}
});
it("EE-10 edit on empty AST surfaces no-root", () => {
it("edit on empty AST surfaces no-root", () => {
const ast = parseJsonc("").ast;
const r = setJsoncOcPath(ast, parseOcPath("oc://config/x"), {
kind: "number",

View File

@@ -1,15 +1,8 @@
/**
* Wave 2 — frontmatter edges.
*
* Substrate guarantee: frontmatter is parsed as `key: value` entries
* with quote-stripping; malformed frontmatter doesn't crash the parser
* (soft-error policy: emit diagnostic, recover).
*/
import { describe, expect, it } from "vitest";
import { parseMd } from "../../parse.js";
describe("wave-02 frontmatter-edges", () => {
it("FM-01 simple kv pairs", () => {
describe("frontmatter-edges", () => {
it("simple kv pairs", () => {
const { ast } = parseMd("---\nname: x\ndescription: y\n---\n");
expect(ast.frontmatter.map((e) => [e.key, e.value])).toEqual([
["name", "x"],
@@ -17,50 +10,50 @@ describe("wave-02 frontmatter-edges", () => {
]);
});
it("FM-02 unclosed frontmatter emits diagnostic, treats as preamble", () => {
it("unclosed frontmatter emits diagnostic, treats as preamble", () => {
const { ast, diagnostics } = parseMd("---\nname: x\nno close fence\nbody\n");
expect(diagnostics.some((d) => d.code === "OC_FRONTMATTER_UNCLOSED")).toBe(true);
expect(ast.frontmatter).toEqual([]);
});
it("FM-03 empty frontmatter (just open + close)", () => {
it("empty frontmatter (just open + close)", () => {
const { ast } = parseMd("---\n---\n");
expect(ast.frontmatter).toEqual([]);
});
it("FM-04 frontmatter only, file has no other content", () => {
it("frontmatter only, file has no other content", () => {
const { ast } = parseMd("---\nk: v\n---\n");
expect(ast.frontmatter).toEqual([{ key: "k", value: "v", line: 2 }]);
expect(ast.preamble).toBe("");
expect(ast.blocks).toEqual([]);
});
it("FM-05 double-quoted value", () => {
it("double-quoted value", () => {
const { ast } = parseMd('---\ntitle: "Hello, world"\n---\n');
expect(ast.frontmatter[0]?.value).toBe("Hello, world");
});
it("FM-06 single-quoted value", () => {
it("single-quoted value", () => {
const { ast } = parseMd("---\ntitle: 'Hello, world'\n---\n");
expect(ast.frontmatter[0]?.value).toBe("Hello, world");
});
it("FM-07 unquoted value with internal colons preserved", () => {
it("unquoted value with internal colons preserved", () => {
const { ast } = parseMd("---\nurl: https://example.com:443/p\n---\n");
expect(ast.frontmatter[0]?.value).toBe("https://example.com:443/p");
});
it("FM-08 empty value", () => {
it("empty value", () => {
const { ast } = parseMd("---\nk:\n---\n");
expect(ast.frontmatter[0]).toEqual({ key: "k", value: "", line: 2 });
});
it("FM-09 value with leading/trailing whitespace trimmed", () => {
it("value with leading/trailing whitespace trimmed", () => {
const { ast } = parseMd("---\nk: spaced \n---\n");
expect(ast.frontmatter[0]?.value).toBe("spaced");
});
it("FM-10 list-style continuations are silently dropped (substrate stays opinion-free)", () => {
it("list-style continuations are silently dropped (substrate stays opinion-free)", () => {
const { ast } = parseMd("---\ntools:\n - gh\n - curl\n---\n");
// The `tools:` key has an empty inline value; the list continuation
// lines ` - gh` and ` - curl` don't match the kv regex and are
@@ -70,7 +63,7 @@ describe("wave-02 frontmatter-edges", () => {
expect(ast.frontmatter[0]?.value).toBe("");
});
it("FM-11 line numbers are 1-based and accurate", () => {
it("line numbers are 1-based and accurate", () => {
const { ast } = parseMd("---\nk1: v1\nk2: v2\nk3: v3\n---\n");
expect(ast.frontmatter.map((e) => [e.key, e.line])).toEqual([
["k1", 2],
@@ -79,33 +72,32 @@ describe("wave-02 frontmatter-edges", () => {
]);
});
it("FM-12 dash-key allowed", () => {
it("dash-key allowed", () => {
const { ast } = parseMd("---\nuser-invocable: true\n---\n");
expect(ast.frontmatter[0]?.key).toBe("user-invocable");
});
it("FM-13 underscore-key allowed", () => {
it("underscore-key allowed", () => {
const { ast } = parseMd("---\nparam_set: foo\n---\n");
expect(ast.frontmatter[0]?.key).toBe("param_set");
});
it("FM-14 number-only value preserved as string", () => {
it("number-only value preserved as string", () => {
const { ast } = parseMd("---\ntimeout: 15000\n---\n");
expect(ast.frontmatter[0]?.value).toBe("15000");
});
it("FM-15 boolean-like value preserved as string", () => {
it("boolean-like value preserved as string", () => {
const { ast } = parseMd("---\nenabled: true\n---\n");
expect(ast.frontmatter[0]?.value).toBe("true");
});
it("FM-16 blank lines inside frontmatter are skipped", () => {
it("blank lines inside frontmatter are skipped", () => {
const { ast } = parseMd("---\n\nk1: v1\n\nk2: v2\n\n---\n");
expect(ast.frontmatter.map((e) => e.key)).toEqual(["k1", "k2"]);
});
it("FM-17 frontmatter with same key twice — both retained (no dedup)", () => {
// Substrate doesn't dedup; lint rules can flag duplicates if needed.
it("frontmatter with same key twice — both retained (no dedup)", () => {
const { ast } = parseMd("---\nk: v1\nk: v2\n---\n");
expect(ast.frontmatter).toEqual([
{ key: "k", value: "v1", line: 2 },
@@ -113,27 +105,27 @@ describe("wave-02 frontmatter-edges", () => {
]);
});
it("FM-18 frontmatter must be at start — leading blank line breaks detection", () => {
it("frontmatter must be at start — leading blank line breaks detection", () => {
const { ast } = parseMd("\n---\nk: v\n---\n");
expect(ast.frontmatter).toEqual([]);
});
it("FM-19 frontmatter must be at start — leading text breaks detection", () => {
it("frontmatter must be at start — leading text breaks detection", () => {
const { ast } = parseMd("intro\n\n---\nk: v\n---\n");
expect(ast.frontmatter).toEqual([]);
});
it("FM-20 BOM before frontmatter open is tolerated", () => {
it("BOM before frontmatter open is tolerated", () => {
const { ast } = parseMd("---\nname: bom\n---\n");
expect(ast.frontmatter[0]?.value).toBe("bom");
});
it("FM-21 single-line file with `---` and `---` is empty frontmatter", () => {
it("single-line file with `---` and `---` is empty frontmatter", () => {
const { ast } = parseMd("---\n---");
expect(ast.frontmatter).toEqual([]);
});
it("FM-22 hash-prefixed lines skipped (not yaml comments — just don't match kv regex)", () => {
it("hash-prefixed lines skipped (not yaml comments — just don't match kv regex)", () => {
const { ast } = parseMd("---\n# comment\nk: v\n---\n");
expect(ast.frontmatter.map((e) => e.key)).toEqual(["k"]);
});

View File

@@ -1,24 +1,15 @@
/**
* Wave 3 — H2 block split.
*
* Substrate guarantee: `## ` at column 0 outside fenced code blocks
* starts a new H2 block. H1 (`# `), H3 (`### `), and `## ` inside
* fenced code blocks do NOT split.
*/
import { describe, expect, it } from "vitest";
import { parseMd } from "../../parse.js";
describe("wave-03 h2-block-split", () => {
it("H2-01 no headings → no blocks, all preamble", () => {
describe("h2-block-split", () => {
it("no headings → no blocks, all preamble", () => {
const raw = "Just prose, no headings.\nMore prose.\n";
const { ast } = parseMd(raw);
expect(ast.blocks).toEqual([]);
// Preamble preserves the trailing newline from raw (split + rejoin
// is symmetric); callers that want trimmed prose call .trim().
expect(ast.preamble).toBe("Just prose, no headings.\nMore prose.\n");
});
it("H2-02 single heading splits preamble + one block", () => {
it("single heading splits preamble + one block", () => {
const { ast } = parseMd("preamble\n## Section\nbody\n");
expect(ast.preamble.trim()).toBe("preamble");
expect(ast.blocks.length).toBe(1);
@@ -26,127 +17,112 @@ describe("wave-03 h2-block-split", () => {
expect(ast.blocks[0]?.bodyText.trim()).toBe("body");
});
it("H2-03 multiple headings produce blocks in order", () => {
it("multiple headings produce blocks in order", () => {
const { ast } = parseMd("## A\nbody-a\n## B\nbody-b\n## C\nbody-c\n");
expect(ast.blocks.map((b) => b.heading)).toEqual(["A", "B", "C"]);
});
it("H2-04 H1 does NOT split", () => {
it("H1 does NOT split", () => {
const { ast } = parseMd("# H1 heading\n## H2 heading\n");
expect(ast.blocks.length).toBe(1);
expect(ast.blocks[0]?.heading).toBe("H2 heading");
expect(ast.preamble).toContain("# H1 heading");
});
it("H2-05 H3 does NOT split", () => {
it("H3 does NOT split", () => {
const { ast } = parseMd("## H2\nbody\n### H3\nstill in H2 block\n");
expect(ast.blocks.length).toBe(1);
expect(ast.blocks[0]?.bodyText).toContain("### H3");
});
it("H2-06 `## ` inside fenced code block does NOT split", () => {
it("`## ` inside fenced code block does NOT split", () => {
const raw = "## Real\n\n```md\n## Inside code\n```\n\n## Another real\n";
const { ast } = parseMd(raw);
expect(ast.blocks.map((b) => b.heading)).toEqual(["Real", "Another real"]);
});
it("H2-07 `##` without trailing space — does NOT match (regex requires \\s+)", () => {
it("`##` without trailing space — does NOT match (regex requires \\s+)", () => {
const { ast } = parseMd("##NoSpace\n## With space\n");
expect(ast.blocks.length).toBe(1);
expect(ast.blocks[0]?.heading).toBe("With space");
});
it("H2-08 leading whitespace before `##` — recognized as heading (CommonMark)", () => {
// Substrate accepts up to 3 spaces of indentation as an atx
// heading per CommonMark. Lint rules can flag if a particular
// workspace file requires column-zero authoring.
it("leading whitespace before `##` — recognized as heading (CommonMark)", () => {
const { ast } = parseMd(" ## indented\n## not indented\n");
expect(ast.blocks.map((b) => b.heading)).toEqual(["indented", "not indented"]);
});
it("H2-09 trailing whitespace on heading — trimmed in heading text", () => {
it("trailing whitespace on heading — trimmed in heading text", () => {
const { ast } = parseMd("## Trailing \n");
expect(ast.blocks[0]?.heading).toBe("Trailing");
expect(ast.blocks[0]?.slug).toBe("trailing");
});
it("H2-10 inline code in heading preserved", () => {
it("inline code in heading preserved", () => {
const { ast } = parseMd("## Use `gh` for GitHub\n");
expect(ast.blocks[0]?.heading).toBe("Use `gh` for GitHub");
});
it("H2-11 markdown formatting in heading preserved", () => {
it("markdown formatting in heading preserved", () => {
const { ast } = parseMd("## **Bold** *italic*\n");
expect(ast.blocks[0]?.heading).toBe("**Bold** *italic*");
});
it("H2-12 immediately after frontmatter", () => {
it("immediately after frontmatter", () => {
const { ast } = parseMd("---\nk: v\n---\n## Section\nbody\n");
expect(ast.blocks[0]?.heading).toBe("Section");
expect(ast.preamble).toBe("");
});
it("H2-13 H2 at end of file (no body)", () => {
it("H2 at end of file (no body)", () => {
const { ast } = parseMd("preamble\n## End\n");
expect(ast.blocks[0]?.heading).toBe("End");
expect(ast.blocks[0]?.bodyText).toBe("");
});
it("H2-14 two consecutive H2s — empty body block between", () => {
it("two consecutive H2s — empty body block between", () => {
const { ast } = parseMd("## A\n## B\n");
expect(ast.blocks[0]?.bodyText).toBe("");
expect(ast.blocks[1]?.heading).toBe("B");
});
it("H2-15 line numbers are 1-based and track through frontmatter", () => {
it("line numbers are 1-based and track through frontmatter", () => {
const { ast } = parseMd("---\nk: v\n---\n## At line 4\n");
expect(ast.blocks[0]?.line).toBe(4);
});
it("H2-16 line numbers track through preamble", () => {
it("line numbers track through preamble", () => {
const { ast } = parseMd("line 1\nline 2\n## At line 3\n");
expect(ast.blocks[0]?.line).toBe(3);
});
it("H2-17 nested fenced code blocks (~~~ vs ```) — only ``` is detected", () => {
// Current parser only treats ``` as fence; ~~~ falls through. This
// is a documented limit. Inputs with ~~~ aren't broken — they're
// just not protected from H2-misparsing inside them.
it("nested fenced code blocks (~~~ vs ```) — only ``` is detected", () => {
const raw = "## H\n\n~~~md\n~~~\n\n## Next\n";
const { ast } = parseMd(raw);
expect(ast.blocks.map((b) => b.heading)).toEqual(["H", "Next"]);
});
it("H2-18 setext-style heading (`Heading\\n========\\n`) is NOT recognized", () => {
// Substrate is opinion-aware: setext headings are treated as
// preamble. Lint rules can flag if needed; recognized markdown
// dialect is `## ATX-style only` for OpenClaw workspace files.
it("setext-style heading (`Heading\\n========\\n`) is NOT recognized", () => {
const raw = "Heading\n=======\n## Real\n";
const { ast } = parseMd(raw);
expect(ast.blocks.length).toBe(1);
expect(ast.blocks[0]?.heading).toBe("Real");
});
it("H2-19 empty heading text (`## `)", () => {
// Substrate accepts an empty atx heading; downstream lint
// (`OC_HEADING_EMPTY`) flags it. Slug is empty string — collisions
// are a lint-level concern, not a parser refusal.
it("empty heading text (`## `)", () => {
const { ast } = parseMd("## \n");
expect(ast.blocks.length).toBe(1);
expect(ast.blocks[0]?.heading).toBe("");
expect(ast.blocks[0]?.slug).toBe("");
});
it("H2-20 heading with only whitespace (`## `)", () => {
it("heading with only whitespace (`## `)", () => {
const { ast } = parseMd("## \n");
expect(ast.blocks.length).toBe(1);
expect(ast.blocks[0]?.heading).toBe("");
});
it("H2-21 heading-shaped text inside multi-line bullet body — does split", () => {
// The substrate treats line-start ## as a heading regardless of
// logical context (item continuation lines). Lint rules can flag
// the boundary; substrate prefers structural simplicity.
it("heading-shaped text inside multi-line bullet body — does split", () => {
const raw = "## Section\n- item starts\n continues\n## Next\n";
const { ast } = parseMd(raw);
expect(ast.blocks.map((b) => b.heading)).toEqual(["Section", "Next"]);

View File

@@ -1,46 +1,38 @@
/**
* Wave 4 — items (bullets + kv).
*
* Substrate guarantee: bullet lines (`- text`, `* text`, `+ text`) inside
* H2 blocks are extracted as `AstItem`. Lines matching `- key: value`
* also populate `item.kv`. Items inside fenced code blocks are NOT
* extracted.
*/
import { describe, expect, it } from "vitest";
import { parseMd } from "../../parse.js";
describe("wave-04 items", () => {
it("I-01 plain dash bullets", () => {
describe("items", () => {
it("plain dash bullets", () => {
const { ast } = parseMd("## H\n- a\n- b\n- c\n");
expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(["a", "b", "c"]);
});
it("I-02 star bullets", () => {
it("star bullets", () => {
const { ast } = parseMd("## H\n* a\n* b\n");
expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(["a", "b"]);
});
it("I-03 plus bullets", () => {
it("plus bullets", () => {
const { ast } = parseMd("## H\n+ a\n+ b\n");
expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(["a", "b"]);
});
it("I-04 mixed bullet markers in same section", () => {
it("mixed bullet markers in same section", () => {
const { ast } = parseMd("## H\n- dash\n* star\n+ plus\n");
expect(ast.blocks[0]?.items.length).toBe(3);
});
it("I-05 kv-shape items populate kv", () => {
it("kv-shape items populate kv", () => {
const { ast } = parseMd("## H\n- gh: GitHub CLI\n");
expect(ast.blocks[0]?.items[0]?.kv).toEqual({ key: "gh", value: "GitHub CLI" });
});
it("I-06 plain item has no kv", () => {
it("plain item has no kv", () => {
const { ast } = parseMd("## H\n- plain text\n");
expect(ast.blocks[0]?.items[0]?.kv).toBeUndefined();
});
it("I-07 multiple colons — first colon is the kv split", () => {
it("multiple colons — first colon is the kv split", () => {
const { ast } = parseMd("## H\n- url: http://x.com:80/p\n");
expect(ast.blocks[0]?.items[0]?.kv).toEqual({
key: "url",
@@ -48,79 +40,72 @@ describe("wave-04 items", () => {
});
});
it("I-08 colon with no space after is still kv", () => {
it("colon with no space after is still kv", () => {
const { ast } = parseMd("## H\n- key:value\n");
expect(ast.blocks[0]?.items[0]?.kv).toEqual({ key: "key", value: "value" });
});
it("I-09 quoted value preserved verbatim (no unquote at item layer)", () => {
it("quoted value preserved verbatim (no unquote at item layer)", () => {
const { ast } = parseMd('## H\n- title: "quoted: value"\n');
expect(ast.blocks[0]?.items[0]?.kv?.value).toBe('"quoted: value"');
});
it("I-10 slug from kv key when kv present", () => {
it("slug from kv key when kv present", () => {
const { ast } = parseMd("## H\n- The Tool: description\n");
expect(ast.blocks[0]?.items[0]?.slug).toBe("the-tool");
});
it("I-11 slug from item text when no kv", () => {
it("slug from item text when no kv", () => {
const { ast } = parseMd("## H\n- The Plain Item\n");
expect(ast.blocks[0]?.items[0]?.slug).toBe("the-plain-item");
});
it("I-12 items inside fenced code block are NOT extracted", () => {
it("items inside fenced code block are NOT extracted", () => {
const raw = "## H\n```\n- not a bullet\n- still not\n```\n- real bullet\n";
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.items.length).toBe(1);
expect(ast.blocks[0]?.items[0]?.text).toBe("real bullet");
});
it("I-13 line numbers track through block body", () => {
it("line numbers track through block body", () => {
const { ast } = parseMd("## H\n- first\n- second\n- third\n");
expect(ast.blocks[0]?.items.map((i) => i.line)).toEqual([2, 3, 4]);
});
it("I-14 trailing whitespace on bullet trimmed in text", () => {
it("trailing whitespace on bullet trimmed in text", () => {
const { ast } = parseMd("## H\n- spaced \n");
expect(ast.blocks[0]?.items[0]?.text).toBe("spaced");
});
it("I-15 empty bullet — recognized with empty text/slug", () => {
// Substrate accepts an empty bullet; lint can flag if collisions
// matter. Both `- ` and `- real` become items.
it("empty bullet — recognized with empty text/slug", () => {
const { ast } = parseMd("## H\n- \n- real\n");
expect(ast.blocks[0]?.items.length).toBe(2);
expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(["", "real"]);
});
it("I-16 indented bullet (sub-bullet) — recognized as item alongside parent", () => {
// Substrate flattens the bullet tree into a list of items;
// sub-bullets surface as their own AstItem entries. Lint rules
// can flag depth or duplicate-slug collisions.
it("indented bullet (sub-bullet) — recognized as item alongside parent", () => {
const { ast } = parseMd("## H\n- top\n - sub\n");
expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(["top", "sub"]);
});
it("I-17 numbered list (1. item) — recognized as items", () => {
// Substrate treats ordered and unordered lists symmetrically.
// Lint rules can flag if a particular file requires bullet style.
it("numbered list (1. item) — recognized as items", () => {
const { ast } = parseMd("## H\n1. first\n2. second\n");
expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(["first", "second"]);
});
it("I-18 items in a section with no body before — first item line is heading+1", () => {
it("items in a section with no body before — first item line is heading+1", () => {
const { ast } = parseMd("## H\n- a\n");
expect(ast.blocks[0]?.items[0]?.line).toBe(2);
});
it("I-19 items spread across blocks are scoped to their block", () => {
it("items spread across blocks are scoped to their block", () => {
const { ast } = parseMd("## A\n- a1\n## B\n- b1\n- b2\n");
expect(ast.blocks[0]?.items.length).toBe(1);
expect(ast.blocks[1]?.items.length).toBe(2);
expect(ast.blocks[1]?.items.map((i) => i.text)).toEqual(["b1", "b2"]);
});
it("I-20 item with only-symbol kv key still parses", () => {
it("item with only-symbol kv key still parses", () => {
const { ast } = parseMd("## H\n- API_KEY: secret-value\n");
expect(ast.blocks[0]?.items[0]?.kv).toEqual({
key: "API_KEY",
@@ -129,21 +114,19 @@ describe("wave-04 items", () => {
expect(ast.blocks[0]?.items[0]?.slug).toBe("api-key");
});
it("I-21 item with kv where value is empty", () => {
it("item with empty kv value falls through to plain item", () => {
const { ast } = parseMd("## H\n- key:\n");
// `- key:` has empty value after the colon; the kv regex requires
// (.+) for value, so this falls through to plain item.
expect(ast.blocks[0]?.items[0]?.kv).toBeUndefined();
expect(ast.blocks[0]?.items[0]?.text).toBe("key:");
});
it("I-22 bullet in preamble (before first H2) is NOT in any block", () => {
it("bullet in preamble (before first H2) is NOT in any block", () => {
const { ast } = parseMd("- preamble bullet\n## H\n- block bullet\n");
expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(["block bullet"]);
expect(ast.preamble).toContain("- preamble bullet");
});
it("I-23 bullet with internal markdown (italics, code) preserved in text", () => {
it("bullet with internal markdown (italics, code) preserved in text", () => {
const { ast } = parseMd("## H\n- use *gh* and `curl`\n");
expect(ast.blocks[0]?.items[0]?.text).toBe("use *gh* and `curl`");
});

View File

@@ -1,22 +1,3 @@
/**
* Wave 15 — JSONC byte-fidelity round-trip.
*
* Substrate guarantee: `emitJsonc(parseJsonc(raw)) === raw` for every
* input the parser accepts. Mirrors wave-01 but for the JSONC kind.
* Comments, trailing commas, BOMs, mixed line endings — all byte-stable
* via the round-trip path.
*
* **What this file proves**: byte-identical round-trip via the
* default-mode emit (which echoes `ast.raw`). This is necessary but
* not sufficient — without the structural assertions below, a parser
* that emitted `ast.root: null` for every input would still pass the
* byte test (since `raw` is preserved on the AST regardless).
*
* Each assertParseable() call proves the parser actually ran and
* produced a structural tree, not just stored `raw` verbatim and
* called it a day. JC-17 deliberately uses `assertNotParseable` —
* malformed input must echo `raw` AND emit a diagnostic.
*/
import { describe, expect, it } from "vitest";
import type { JsoncValue } from "../../jsonc/ast.js";
import { emitJsonc } from "../../jsonc/emit.js";
@@ -50,16 +31,16 @@ function assertNotParseable(raw: string): void {
expect(result.diagnostics.some((d) => d.severity === "error")).toBe(true);
}
describe("wave-15 jsonc byte-fidelity", () => {
it("JC-01 empty file", () => {
describe("jsonc byte-fidelity", () => {
it("empty file", () => {
expect(rt("")).toBe("");
});
it("JC-02 whitespace-only", () => {
it("whitespace-only", () => {
expect(rt(" \n\n \n")).toBe(" \n\n \n");
});
it("JC-03 empty object", () => {
it("empty object", () => {
expect(rt("{}")).toBe("{}");
const root = assertParseable("{}");
expect(root.kind).toBe("object");
@@ -68,7 +49,7 @@ describe("wave-15 jsonc byte-fidelity", () => {
}
});
it("JC-04 empty array", () => {
it("empty array", () => {
expect(rt("[]")).toBe("[]");
const root = assertParseable("[]");
expect(root.kind).toBe("array");
@@ -77,7 +58,7 @@ describe("wave-15 jsonc byte-fidelity", () => {
}
});
it("JC-05 trivial scalar root", () => {
it("trivial scalar root", () => {
expect(rt("42")).toBe("42");
expect(rt('"x"')).toBe('"x"');
expect(rt("true")).toBe("true");
@@ -88,22 +69,20 @@ describe("wave-15 jsonc byte-fidelity", () => {
expect(assertParseable("null").kind).toBe("null");
});
it("JC-06 line comments preserved", () => {
it("line comments preserved", () => {
const raw = '// a leading comment\n{ "x": 1 } // trailing\n';
expect(rt(raw)).toBe(raw);
// Pin parse: the structural value `x: 1` is reachable.
const root = assertParseable(raw);
expect(root.kind).toBe("object");
expect(assertParseable(raw).kind).toBe("object");
});
it("JC-07 block comments preserved", () => {
it("block comments preserved", () => {
const raw = '/* header */\n{\n /* inline */\n "x": 1\n}\n';
expect(rt(raw)).toBe(raw);
const root = assertParseable(raw);
expect(root.kind).toBe("object");
});
it("JC-08 trailing commas preserved", () => {
it("trailing commas preserved", () => {
const raw = '{\n "x": 1,\n "y": 2,\n}';
expect(rt(raw)).toBe(raw);
const root = assertParseable(raw);
@@ -112,7 +91,7 @@ describe("wave-15 jsonc byte-fidelity", () => {
}
});
it("JC-09 mixed CRLF + LF preserved", () => {
it("mixed CRLF + LF preserved", () => {
const raw = '{\r\n "x": 1,\n "y": 2\r\n}';
expect(rt(raw)).toBe(raw);
const root = assertParseable(raw);
@@ -121,23 +100,21 @@ describe("wave-15 jsonc byte-fidelity", () => {
}
});
it("JC-10 BOM preserved on raw", () => {
it("BOM preserved on raw, stripped for parse", () => {
const raw = '{ "x": 1 }';
expect(rt(raw)).toBe(raw);
// BOM stripped before parsing — parser still sees `{` as first char.
expect(assertParseable(raw).kind).toBe("object");
});
it("JC-11 deeply nested structures preserved", () => {
it("deeply nested structures preserved", () => {
const raw = '{ "a": { "b": { "c": { "d": [1, [2, [3, [4]]]] } } } }';
expect(rt(raw)).toBe(raw);
expect(assertParseable(raw).kind).toBe("object");
});
it("JC-12 string with escape sequences preserved", () => {
it("string with escape sequences preserved (parsed value has decoded chars)", () => {
const raw = '{ "s": "a\\nb\\tc\\u0041\\\\d\\"e" }';
expect(rt(raw)).toBe(raw);
// Pin escape resolution — parsed value carries actual control chars.
const root = assertParseable(raw);
if (root.kind === "object") {
const s = root.entries[0]?.value;
@@ -147,7 +124,7 @@ describe("wave-15 jsonc byte-fidelity", () => {
}
});
it("JC-13 numbers in scientific / negative / decimal forms preserved", () => {
it("numbers in scientific / negative / decimal forms preserved", () => {
const raw = "[ 0, -0, 1.5, -3.14, 1e3, -2.5e-10, 1E+5 ]";
expect(rt(raw)).toBe(raw);
const root = assertParseable(raw);
@@ -157,7 +134,7 @@ describe("wave-15 jsonc byte-fidelity", () => {
}
});
it("JC-14 unicode characters preserved verbatim", () => {
it("unicode characters preserved verbatim", () => {
const raw = '{ "name": "héllo 世界 🎉" }';
expect(rt(raw)).toBe(raw);
const root = assertParseable(raw);
@@ -169,30 +146,27 @@ describe("wave-15 jsonc byte-fidelity", () => {
}
});
it("JC-15 idiosyncratic whitespace preserved", () => {
it("idiosyncratic whitespace preserved", () => {
const raw = '{ "x" : 1 ,\n "y": 2}';
expect(rt(raw)).toBe(raw);
expect(assertParseable(raw).kind).toBe("object");
});
it("JC-16 file-level trailing whitespace preserved", () => {
it("file-level trailing whitespace preserved", () => {
const raw = '{ "x": 1 }\n\n\n';
expect(rt(raw)).toBe(raw);
expect(assertParseable(raw).kind).toBe("object");
});
it("JC-17 malformed input still emits raw verbatim AND emits a diagnostic", () => {
it("malformed input still emits raw verbatim AND emits a diagnostic", () => {
const raw = '{ broken json with "key": value }';
expect(rt(raw)).toBe(raw);
// Without this assertion the test passes for any input regardless
// of parser behavior — pin both halves of the contract.
assertNotParseable(raw);
});
it("JC-18 comments-only file preserved", () => {
it("comments-only file preserved", () => {
const raw = "// just a comment\n/* and a block */\n";
expect(rt(raw)).toBe(raw);
// Comments-only files have no structural root — that's expected.
expect(parseJsonc(raw).ast.root).toBeNull();
});
});

View File

@@ -1,10 +1,3 @@
/**
* Wave 17 — JSONC resolver adversarial edges.
*
* Substrate guarantee: the resolver walks the value tree deterministically
* with mixed dotted / segment paths, returns null on any unresolvable
* walk, and never throws on hostile inputs.
*/
import { describe, expect, it } from "vitest";
import { parseJsonc } from "../../jsonc/parse.js";
import { resolveJsoncOcPath } from "../../jsonc/resolve.js";
@@ -14,29 +7,29 @@ function rs(raw: string, ocPath: string) {
return resolveJsoncOcPath(parseJsonc(raw).ast, parseOcPath(ocPath));
}
describe("wave-17 jsonc resolver edges", () => {
it("JR-01 root resolves on empty object", () => {
describe("jsonc resolver edges", () => {
it("root resolves on empty object", () => {
expect(rs("{}", "oc://config")?.kind).toBe("root");
});
it("JR-02 root resolves on scalar root", () => {
it("root resolves on scalar root", () => {
expect(rs("42", "oc://config")?.kind).toBe("root");
});
it("JR-03 root resolves on array root", () => {
it("root resolves on array root", () => {
expect(rs("[1,2,3]", "oc://config")?.kind).toBe("root");
});
it("JR-04 deep dotted descent within section", () => {
it("deep dotted descent within section", () => {
const m = rs('{"a":{"b":{"c":1}}}', "oc://config/a.b.c");
expect(m?.kind).toBe("object-entry");
});
it("JR-05 missing intermediate key returns null", () => {
it("missing intermediate key returns null", () => {
expect(rs('{"a":{"b":1}}', "oc://config/a.x.b")).toBeNull();
});
it("JR-06 numeric segment indexes into array", () => {
it("numeric segment indexes into array", () => {
const m = rs('{"items":["a","b","c"]}', "oc://config/items.1");
expect(m?.kind).toBe("value");
if (m?.kind === "value") {
@@ -44,7 +37,7 @@ describe("wave-17 jsonc resolver edges", () => {
}
});
it("JR-07 negative array index resolves to Nth-from-last", () => {
it("negative array index resolves to Nth-from-last", () => {
expect(rs('{"x":[1,2]}', "oc://config/x.-1")).toMatchObject({
kind: "value",
node: { kind: "number", value: 2 },
@@ -56,28 +49,28 @@ describe("wave-17 jsonc resolver edges", () => {
expect(rs('{"x":[1,2]}', "oc://config/x.-5")).toBeNull();
});
it("JR-08 out-of-bounds array index returns null", () => {
it("out-of-bounds array index returns null", () => {
expect(rs('{"x":[1,2]}', "oc://config/x.99")).toBeNull();
});
it("JR-09 non-integer index returns null (no NaN coercion)", () => {
it("non-integer index returns null (no NaN coercion)", () => {
expect(rs('{"x":[1,2]}', "oc://config/x.foo")).toBeNull();
});
it("JR-10 null AST root returns null on any path", () => {
it("null AST root returns null on any path", () => {
expect(rs("", "oc://config/x")).toBeNull();
});
it("JR-11 descending past a primitive returns null", () => {
it("descending past a primitive returns null", () => {
expect(rs('{"x":42}', "oc://config/x.y")).toBeNull();
});
it("JR-12 empty segment in dotted path throws OcPathError", () => {
it("empty segment in dotted path throws OcPathError", () => {
// v1 invariant: malformed paths fail loud at parse time, not silently null.
expect(() => rs('{"x":1}', "oc://config/x..y")).toThrow(/Empty dotted sub-segment/);
});
it("JR-13 string value at leaf surfaces via object-entry shape", () => {
it("string value at leaf surfaces via object-entry shape", () => {
const m = rs('{"k":"v"}', "oc://config/k");
expect(m?.kind).toBe("object-entry");
if (m?.kind === "object-entry") {
@@ -85,14 +78,14 @@ describe("wave-17 jsonc resolver edges", () => {
}
});
it("JR-14 boolean and null values resolve", () => {
it("boolean and null values resolve", () => {
const m1 = rs('{"k":true}', "oc://config/k");
expect(m1?.kind).toBe("object-entry");
const m2 = rs('{"k":null}', "oc://config/k");
expect(m2?.kind).toBe("object-entry");
});
it("JR-15 mixed slash + dot segments resolve identically", () => {
it("mixed slash + dot segments resolve identically", () => {
const a = rs('{"a":{"b":{"c":1}}}', "oc://config/a.b.c");
const b = rs('{"a":{"b":{"c":1}}}', "oc://config/a/b.c");
const c = rs('{"a":{"b":{"c":1}}}', "oc://config/a/b/c");
@@ -100,17 +93,17 @@ describe("wave-17 jsonc resolver edges", () => {
expect(b?.kind).toBe(c?.kind);
});
it("JR-16 keys with special characters resolve", () => {
it("keys with special characters resolve", () => {
const m = rs('{"a-b_c":{"x":1}}', "oc://config/a-b_c.x");
expect(m?.kind).toBe("object-entry");
});
it("JR-17 unicode keys resolve", () => {
it("unicode keys resolve", () => {
const m = rs('{"héllo":1}', "oc://config/héllo");
expect(m?.kind).toBe("object-entry");
});
it("JR-18 large nested structure (depth 20) resolves to leaf", () => {
it("large nested structure (depth 20) resolves to leaf", () => {
let json = '"leaf"';
const segs: string[] = [];
for (let i = 19; i >= 0; i--) {
@@ -124,7 +117,7 @@ describe("wave-17 jsonc resolver edges", () => {
}
});
it("JR-19 resolver is non-mutating across calls", () => {
it("resolver is non-mutating across calls", () => {
const { ast } = parseJsonc('{"x":{"y":1}}');
const before = JSON.stringify(ast);
rs('{"x":{"y":1}}', "oc://config/x.y");
@@ -133,7 +126,7 @@ describe("wave-17 jsonc resolver edges", () => {
expect(JSON.stringify(ast)).toBe(before);
});
it("JR-20 hostile input shapes do not throw", () => {
it("hostile input shapes do not throw", () => {
expect(() => rs("{garbage}", "oc://config/x")).not.toThrow();
expect(() => rs('{"a":', "oc://config/a")).not.toThrow();
});

View File

@@ -1,10 +1,3 @@
/**
* Wave 16 — JSONL byte-fidelity round-trip.
*
* Substrate guarantee: `emitJsonl(parseJsonl(raw)) === raw` for every
* input the parser accepts. JSONL is line-oriented; blanks, malformed
* lines, mixed line endings, trailing-newline shape — all byte-stable.
*/
import { describe, expect, it } from "vitest";
import { emitJsonl } from "../../jsonl/emit.js";
import { parseJsonl } from "../../jsonl/parse.js";
@@ -13,81 +6,81 @@ function rt(raw: string): string {
return emitJsonl(parseJsonl(raw).ast);
}
describe("wave-16 jsonl byte-fidelity", () => {
it("JL-01 empty file", () => {
describe("jsonl byte-fidelity", () => {
it("empty file", () => {
expect(rt("")).toBe("");
});
it("JL-02 single line no trailing newline", () => {
it("single line no trailing newline", () => {
expect(rt('{"a":1}')).toBe('{"a":1}');
});
it("JL-03 single line with trailing newline", () => {
it("single line with trailing newline", () => {
expect(rt('{"a":1}\n')).toBe('{"a":1}\n');
});
it("JL-04 multiple lines preserved", () => {
it("multiple lines preserved", () => {
const raw = '{"a":1}\n{"b":2}\n{"c":3}\n';
expect(rt(raw)).toBe(raw);
});
it("JL-05 blank line in the middle preserved", () => {
it("blank line in the middle preserved", () => {
const raw = '{"a":1}\n\n{"b":2}\n';
expect(rt(raw)).toBe(raw);
});
it("JL-06 multiple blank lines preserved", () => {
it("multiple blank lines preserved", () => {
const raw = '{"a":1}\n\n\n{"b":2}\n';
expect(rt(raw)).toBe(raw);
});
it("JL-07 malformed line round-trips verbatim", () => {
it("malformed line round-trips verbatim", () => {
const raw = '{"a":1}\nthis is not json\n{"b":2}\n';
expect(rt(raw)).toBe(raw);
});
it("JL-08 entirely malformed file round-trips", () => {
it("entirely malformed file round-trips", () => {
const raw = "header\nbody\nfooter\n";
expect(rt(raw)).toBe(raw);
});
it("JL-09 leading + trailing blanks preserved", () => {
it("leading + trailing blanks preserved", () => {
const raw = '\n\n{"a":1}\n\n';
expect(rt(raw)).toBe(raw);
});
it("JL-10 file ending without final newline preserved", () => {
it("file ending without final newline preserved", () => {
const raw = '{"a":1}\n{"b":2}';
expect(rt(raw)).toBe(raw);
});
it("JL-11 nested object lines preserved", () => {
it("nested object lines preserved", () => {
const raw = '{"a":{"b":{"c":1}}}\n{"x":[1,[2,[3]]]}\n';
expect(rt(raw)).toBe(raw);
});
it("JL-12 unicode in a value line preserved", () => {
it("unicode in a value line preserved", () => {
const raw = '{"name":"héllo 世界 🎉"}\n';
expect(rt(raw)).toBe(raw);
});
it("JL-13 idiosyncratic whitespace inside a line preserved", () => {
it("idiosyncratic whitespace inside a line preserved", () => {
const raw = '{ "a" : 1 }\n';
expect(rt(raw)).toBe(raw);
});
it("JL-14 single blank line file preserved", () => {
it("single blank line file preserved", () => {
const raw = "\n";
expect(rt(raw)).toBe(raw);
});
it("JL-15 large log (1000 lines) preserved", () => {
it("large log (1000 lines) preserved", () => {
const lines = Array.from({ length: 1000 }, (_, i) => `{"i":${i}}`);
const raw = lines.join("\n") + "\n";
expect(rt(raw)).toBe(raw);
});
it("JL-16 mixed value + malformed + blank preserved", () => {
it("mixed value + malformed + blank preserved", () => {
const raw = '{"a":1}\n{not json}\n\n{"b":2}\nstill not json\n{"c":3}\n';
expect(rt(raw)).toBe(raw);
});
@@ -95,26 +88,21 @@ describe("wave-16 jsonl byte-fidelity", () => {
// F10 — CRLF preservation. Without lineEnding tracking on the AST,
// a CRLF input edited via setJsonlOcPath rebuilds raw via render
// which joins with `\n`, mixing endings on Windows-authored datasets.
it("JL-17 CRLF input round-trips byte-identical via the default emit", () => {
it("CRLF input round-trips byte-identical via the default emit", () => {
const raw = '{"a":1}\r\n{"b":2}\r\n{"c":3}\r\n';
expect(rt(raw)).toBe(raw);
});
it("JL-18 CRLF input preserves CRLF after a structural edit (render mode)", () => {
// Pin the render path: setJsonlOcPath rebuilds raw via render mode,
// which now consults ast.lineEnding to reconstruct the original
// convention. Without the fix, render-mode output uses `\n` and
// produces mixed line endings on Windows datasets.
it("CRLF input preserves CRLF after a structural edit (render mode)", () => {
const raw = '{"a":1}\r\n{"b":2}\r\n';
const { ast } = parseJsonl(raw);
const rendered = emitJsonl(ast, { mode: "render" });
expect(rendered).toBe('{"a":1}\r\n{"b":2}');
// Pin no-LF-only joins by counting CRLFs vs bare LFs.
expect((rendered.match(/\r\n/g) ?? []).length).toBe(1);
expect((rendered.match(/(?<!\r)\n/g) ?? []).length).toBe(0);
});
it("JL-19 LF input preserves LF after a structural edit (render mode)", () => {
it("LF input preserves LF after a structural edit (render mode)", () => {
// Symmetric: a Unix-authored log doesn't mysteriously gain CRLF.
const raw = '{"a":1}\n{"b":2}\n';
const { ast } = parseJsonl(raw);

View File

@@ -1,10 +1,3 @@
/**
* Wave 18 — JSONL resolver adversarial edges.
*
* Substrate guarantee: line addresses (`Lnnn`, `$last`) walk
* deterministically; missing addresses, blank-line targets, and
* malformed-line targets all surface as null without throwing.
*/
import { describe, expect, it } from "vitest";
import { parseJsonl } from "../../jsonl/parse.js";
import { resolveJsonlOcPath } from "../../jsonl/resolve.js";
@@ -14,21 +7,21 @@ function rs(raw: string, ocPath: string) {
return resolveJsonlOcPath(parseJsonl(raw).ast, parseOcPath(ocPath));
}
describe("wave-18 jsonl resolver edges", () => {
it("JLR-01 root resolves with no segments", () => {
describe("jsonl resolver edges", () => {
it("root resolves with no segments", () => {
expect(rs('{"a":1}\n', "oc://log")?.kind).toBe("root");
});
it("JLR-02 L1 resolves to a value line", () => {
it("L1 resolves to a value line", () => {
const m = rs('{"a":1}\n', "oc://log/L1");
expect(m?.kind).toBe("line");
});
it("JLR-03 L99 unknown line returns null", () => {
it("L99 unknown line returns null", () => {
expect(rs('{"a":1}\n', "oc://log/L99")).toBeNull();
});
it("JLR-04 $last picks the most recent value line", () => {
it("$last picks the most recent value line", () => {
const m = rs('{"a":1}\n{"a":2}\n{"a":3}\n', "oc://log/$last/a");
expect(m?.kind).toBe("object-entry");
if (m?.kind === "object-entry") {
@@ -36,7 +29,7 @@ describe("wave-18 jsonl resolver edges", () => {
}
});
it("JLR-05 $last skips trailing blank lines", () => {
it("$last skips trailing blank lines", () => {
const m = rs('{"a":1}\n\n\n', "oc://log/$last/a");
expect(m?.kind).toBe("object-entry");
if (m?.kind === "object-entry") {
@@ -44,42 +37,42 @@ describe("wave-18 jsonl resolver edges", () => {
}
});
it("JLR-06 $last skips trailing malformed lines", () => {
it("$last skips trailing malformed lines", () => {
const m = rs('{"a":1}\nbroken\n', "oc://log/$last/a");
expect(m?.kind).toBe("object-entry");
});
it("JLR-07 $last on empty file returns null", () => {
it("$last on empty file returns null", () => {
expect(rs("", "oc://log/$last/x")).toBeNull();
});
it("JLR-08 $last on all-blank file returns null", () => {
it("$last on all-blank file returns null", () => {
expect(rs("\n\n\n", "oc://log/$last/x")).toBeNull();
});
it("JLR-09 $last on all-malformed file returns null", () => {
it("$last on all-malformed file returns null", () => {
expect(rs("a\nb\nc\n", "oc://log/$last/x")).toBeNull();
});
it("JLR-10 garbage line address returns null", () => {
it("garbage line address returns null", () => {
expect(rs('{"a":1}\n', "oc://log/garbage")).toBeNull();
expect(rs('{"a":1}\n', "oc://log/L")).toBeNull();
expect(rs('{"a":1}\n', "oc://log/Labc")).toBeNull();
});
it("JLR-11 descent into a blank line returns null", () => {
it("descent into a blank line returns null", () => {
expect(rs('{"a":1}\n\n{"b":2}\n', "oc://log/L2/anything")).toBeNull();
});
it("JLR-12 descent into a malformed line returns null", () => {
it("descent into a malformed line returns null", () => {
expect(rs('{"a":1}\nbroken\n{"b":2}\n', "oc://log/L2/anything")).toBeNull();
});
it("JLR-13 missing field on a value line returns null", () => {
it("missing field on a value line returns null", () => {
expect(rs('{"a":1}\n', "oc://log/L1/missing")).toBeNull();
});
it("JLR-14 dotted descent through line value resolves", () => {
it("dotted descent through line value resolves", () => {
const m = rs('{"r":{"ok":true,"d":"x"}}\n', "oc://log/L1/r.d");
expect(m?.kind).toBe("object-entry");
if (m?.kind === "object-entry") {
@@ -87,7 +80,7 @@ describe("wave-18 jsonl resolver edges", () => {
}
});
it("JLR-15 array index inside a line resolves", () => {
it("array index inside a line resolves", () => {
const m = rs('{"items":["a","b","c"]}\n', "oc://log/L1/items.2");
expect(m?.kind).toBe("value");
if (m?.kind === "value") {
@@ -95,14 +88,14 @@ describe("wave-18 jsonl resolver edges", () => {
}
});
it("JLR-16 line numbers are 1-indexed", () => {
it("line numbers are 1-indexed", () => {
const m = rs('{"a":1}\n{"a":2}\n', "oc://log/L1/a");
if (m?.kind === "object-entry") {
expect(m.node.value).toMatchObject({ kind: "number", value: 1 });
}
});
it("JLR-17 line numbers preserved across blank/malformed entries", () => {
it("line numbers preserved across blank/malformed entries", () => {
const m = rs('{"a":1}\n\nbroken\n{"a":4}\n', "oc://log/L4/a");
expect(m?.kind).toBe("object-entry");
if (m?.kind === "object-entry") {
@@ -110,7 +103,7 @@ describe("wave-18 jsonl resolver edges", () => {
}
});
it("JLR-18 resolver is non-mutating", () => {
it("resolver is non-mutating", () => {
const { ast } = parseJsonl('{"a":1}\n{"b":2}\n');
const before = JSON.stringify(ast);
rs('{"a":1}\n{"b":2}\n', "oc://log/L1");
@@ -118,7 +111,7 @@ describe("wave-18 jsonl resolver edges", () => {
expect(JSON.stringify(ast)).toBe(before);
});
it("JLR-19 hostile inputs do not throw", () => {
it("hostile inputs do not throw", () => {
expect(() => rs("not json\n", "oc://log/L1")).not.toThrow();
expect(() => rs("", "oc://log/$last")).not.toThrow();
});

View File

@@ -1,48 +1,41 @@
/**
* Wave 11 — malformed input recovery.
*
* Substrate guarantee: parser is **soft-error**: it never throws on
* malformed input. Suspicious-but-recoverable inputs produce
* diagnostics; unparseable structural pieces are dropped silently.
*/
import { describe, expect, it } from "vitest";
import { parseMd } from "../../parse.js";
describe("wave-11 malformed-input", () => {
it("M-01 truncated mid-frontmatter (no close fence)", () => {
describe("malformed-input", () => {
it("truncated mid-frontmatter (no close fence)", () => {
const raw = "---\nname: github\n";
const { ast, diagnostics } = parseMd(raw);
expect(diagnostics.some((d) => d.code === "OC_FRONTMATTER_UNCLOSED")).toBe(true);
expect(ast.frontmatter).toEqual([]);
});
it("M-02 truncated mid-section", () => {
it("truncated mid-section", () => {
const raw = "## H\n- item\nmid-line";
const { ast } = parseMd(raw);
expect(ast.blocks.length).toBe(1);
});
it("M-03 only `---` (single fence, no content)", () => {
it("only `---` (single fence, no content)", () => {
expect(() => parseMd("---\n")).not.toThrow();
});
it("M-04 only `---\\n---`", () => {
it("only `---\\n---`", () => {
const { ast } = parseMd("---\n---");
expect(ast.frontmatter).toEqual([]);
});
it("M-05 binary-ish bytes (non-ASCII control chars)", () => {
it("binary-ish bytes (non-ASCII control chars)", () => {
const raw = "## H\n\x00\x01\x02\n";
expect(() => parseMd(raw)).not.toThrow();
});
it("M-06 very long single line (10k chars)", () => {
it("very long single line (10k chars)", () => {
const raw = `## H\n${"x".repeat(10_000)}\n`;
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.heading).toBe("H");
});
it("M-07 deeply repeated headings (1000 H2 blocks)", () => {
it("deeply repeated headings (1000 H2 blocks)", () => {
const lines: string[] = [];
for (let i = 0; i < 1000; i++) {
lines.push(`## H${i}`);
@@ -53,27 +46,27 @@ describe("wave-11 malformed-input", () => {
expect(ast.blocks.length).toBe(1000);
});
it("M-08 bullet shape that isn't actually a bullet (`-not-a-bullet`)", () => {
it("bullet shape that isn't actually a bullet (`-not-a-bullet`)", () => {
const { ast } = parseMd("## H\n-not-a-bullet\n- real\n");
expect(ast.blocks[0]?.items.length).toBe(1);
});
it("M-09 unclosed code fence", () => {
it("unclosed code fence", () => {
const raw = "## H\n```\nbody\n";
expect(() => parseMd(raw)).not.toThrow();
});
it("M-10 mismatched fence (open with ``` close with ~~~)", () => {
it("mismatched fence (open with ``` close with ~~~)", () => {
const raw = "## H\n```\nbody\n~~~\n";
expect(() => parseMd(raw)).not.toThrow();
});
it("M-11 nested fences (treated linearly, not nested)", () => {
it("nested fences (treated linearly, not nested)", () => {
const raw = "## H\n```\n```\nstill-in-second\n```\n";
expect(() => parseMd(raw)).not.toThrow();
});
it("M-12 empty file", () => {
it("empty file", () => {
const { ast, diagnostics } = parseMd("");
expect(ast.raw).toBe("");
expect(ast.frontmatter).toEqual([]);
@@ -81,23 +74,23 @@ describe("wave-11 malformed-input", () => {
expect(diagnostics).toEqual([]);
});
it("M-13 single character file", () => {
it("single character file", () => {
const { ast } = parseMd("x");
expect(ast.preamble).toBe("x");
expect(ast.blocks).toEqual([]);
});
it("M-14 single newline file", () => {
it("single newline file", () => {
const { ast } = parseMd("\n");
expect(ast.blocks).toEqual([]);
});
it("M-15 file with mixed indentation extremes (tabs, spaces, mixed)", () => {
it("file with mixed indentation extremes (tabs, spaces, mixed)", () => {
const raw = "## H\n\t- tabbed\n - spaced\n\t - mixed\n";
expect(() => parseMd(raw)).not.toThrow();
});
it("M-16 frontmatter with frontmatter-shaped content inside (---)", () => {
it("frontmatter with frontmatter-shaped content inside (---)", () => {
const raw = "---\nk: v\n---\n\n---\nshould not parse as second frontmatter\n---\n";
const { ast } = parseMd(raw);
expect(ast.frontmatter.map((e) => e.key)).toEqual(["k"]);
@@ -105,29 +98,29 @@ describe("wave-11 malformed-input", () => {
expect(ast.preamble).toContain("---");
});
it("M-17 lines starting with `#` but not heading (raw `#` chars in body)", () => {
it("lines starting with `#` but not heading (raw `#` chars in body)", () => {
const raw = "## H\n\n# This is text starting with #\n#### h4 not parsed as block\n";
const { ast } = parseMd(raw);
expect(ast.blocks.length).toBe(1);
expect(ast.blocks[0]?.bodyText).toContain("# This is text");
});
it("M-18 lines starting with multiple ## but malformed (####, ######)", () => {
it("lines starting with multiple ## but malformed (####, ######)", () => {
const { ast } = parseMd("## Real\n#### Not block\n###### Not block\n");
expect(ast.blocks.length).toBe(1);
expect(ast.blocks[0]?.heading).toBe("Real");
});
it("M-19 file with just whitespace", () => {
it("file with just whitespace", () => {
expect(() => parseMd(" \n\t\n \n")).not.toThrow();
});
it("M-20 file with only BOM", () => {
it("file with only BOM", () => {
const { ast } = parseMd("");
expect(ast.raw).toBe("");
});
it("M-21 file mixing BOM + frontmatter + body + sections", () => {
it("file mixing BOM + frontmatter + body + sections", () => {
const raw = "---\nk: v\n---\n\nbody\n## Section\n- item\n";
expect(() => parseMd(raw)).not.toThrow();
const { ast } = parseMd(raw);
@@ -135,13 +128,13 @@ describe("wave-11 malformed-input", () => {
expect(ast.blocks[0]?.heading).toBe("Section");
});
it("M-22 line endings: legacy CR-only (Mac classic)", () => {
it("line endings: legacy CR-only (Mac classic)", () => {
// Our regex /\r?\n/ doesn't split on CR-only. Treats whole as one line.
const raw = "line1\rline2\r## Heading\r";
expect(() => parseMd(raw)).not.toThrow();
});
it("M-23 100 KB file", () => {
it("100 KB file", () => {
const lines: string[] = [];
for (let i = 0; i < 1000; i++) {
lines.push("## H" + i);

View File

@@ -1,10 +1,3 @@
/**
* Wave 7 — OcPath parsing edges.
*
* Substrate guarantee: `parseOcPath(s)` is a pure function. Valid input
* round-trips via `formatOcPath`; invalid input throws `OcPathError`
* with a stable `code`.
*/
import { describe, expect, it } from "vitest";
import {
OcPathError,
@@ -25,28 +18,28 @@ function expectErr(fn: () => unknown, code: string): void {
}
}
describe("wave-07 oc-path-parse-edges", () => {
it("OP-01 file-only", () => {
describe("oc-path-parse-edges", () => {
it("file-only", () => {
expect(parseOcPath("oc://SOUL.md")).toEqual({ file: "SOUL.md" });
});
it("OP-02 file + section", () => {
it("file + section", () => {
expect(parseOcPath("oc://SOUL.md/Boundaries").section).toBe("Boundaries");
});
it("OP-03 file + section + item", () => {
it("file + section + item", () => {
expect(parseOcPath("oc://SOUL.md/Boundaries/deny-rule-1").item).toBe("deny-rule-1");
});
it("OP-04 file + section + item + field", () => {
it("file + section + item + field", () => {
expect(parseOcPath("oc://SOUL.md/B/deny-1/risk").field).toBe("risk");
});
it("OP-05 session query parameter", () => {
it("session query parameter", () => {
expect(parseOcPath("oc://X.md?session=daily").session).toBe("daily");
});
it("OP-06 session with full path", () => {
it("session with full path", () => {
const p = parseOcPath("oc://X.md/sec/item/field?session=cron");
expect(p).toEqual({
file: "X.md",
@@ -57,46 +50,46 @@ describe("wave-07 oc-path-parse-edges", () => {
});
});
it("OP-07 unknown query parameters silently ignored", () => {
it("unknown query parameters silently ignored", () => {
const p = parseOcPath("oc://X.md?foo=bar&session=s&baz=qux");
expect(p.session).toBe("s");
});
it("OP-08 session= with empty value drops session", () => {
it("session= with empty value drops session", () => {
const p = parseOcPath("oc://X.md?session=");
expect(p.session).toBeUndefined();
});
it("OP-09 query without `=` ignored", () => {
it("query without `=` ignored", () => {
const p = parseOcPath("oc://X.md?nokeyhere");
expect(p.session).toBeUndefined();
});
it("OP-10 missing scheme throws", () => {
it("missing scheme throws", () => {
expectErr(() => parseOcPath("SOUL.md"), "OC_PATH_MISSING_SCHEME");
});
it("OP-11 wrong scheme throws", () => {
it("wrong scheme throws", () => {
expectErr(() => parseOcPath("https://x.com"), "OC_PATH_MISSING_SCHEME");
});
it("OP-12 empty after scheme throws", () => {
it("empty after scheme throws", () => {
expectErr(() => parseOcPath("oc://"), "OC_PATH_EMPTY");
});
it("OP-13 empty segment throws", () => {
it("empty segment throws", () => {
expectErr(() => parseOcPath("oc://X.md//item"), "OC_PATH_EMPTY_SEGMENT");
});
it("OP-14 too-deep nesting throws", () => {
it("too-deep nesting throws", () => {
expectErr(() => parseOcPath("oc://X.md/a/b/c/d/e"), "OC_PATH_TOO_DEEP");
});
it("OP-15 non-string throws", () => {
it("non-string throws", () => {
expectErr(() => parseOcPath(42 as unknown as string), "OC_PATH_NOT_STRING");
});
it("OP-16 round-trip canonical forms", () => {
it("round-trip canonical forms", () => {
const cases = [
"oc://SOUL.md",
"oc://SOUL.md/Boundaries",
@@ -112,12 +105,12 @@ describe("wave-07 oc-path-parse-edges", () => {
}
});
it("OP-17 isValidOcPath true positives", () => {
it("isValidOcPath true positives", () => {
expect(isValidOcPath("oc://X.md")).toBe(true);
expect(isValidOcPath("oc://X.md/sec/item/field")).toBe(true);
});
it("OP-18 isValidOcPath true negatives", () => {
it("isValidOcPath true negatives", () => {
expect(isValidOcPath("")).toBe(false);
expect(isValidOcPath("X.md")).toBe(false);
expect(isValidOcPath("oc://")).toBe(false);
@@ -126,39 +119,32 @@ describe("wave-07 oc-path-parse-edges", () => {
expect(isValidOcPath({})).toBe(false);
});
it("OP-19 file segment with special chars (file with dots/slashes)", () => {
it("file segment with special chars (file with dots/slashes)", () => {
const p = parseOcPath("oc://config/plugins.entries.foo.token");
expect(p.file).toBe("config");
expect(p.section).toBe("plugins.entries.foo.token");
});
it("OP-20 section segment with hyphens / underscores / numbers", () => {
it("section segment with hyphens / underscores / numbers", () => {
const p = parseOcPath("oc://X.md/Multi-Tenant_Section_2");
expect(p.section).toBe("Multi-Tenant_Section_2");
});
it("OP-21 [frontmatter] sentinel is just a section name", () => {
it("[frontmatter] sentinel is just a section name", () => {
const p = parseOcPath("oc://X.md/[frontmatter]/name");
expect(p.section).toBe("[frontmatter]");
expect(p.item).toBe("name");
});
it("OP-22 formatOcPath rejects empty file", () => {
it("formatOcPath rejects empty file", () => {
expectErr(() => formatOcPath({ file: "" }), "OC_PATH_FILE_REQUIRED");
});
it("OP-23 formatOcPath rejects item without section", () => {
it("formatOcPath rejects item without section", () => {
expectErr(() => formatOcPath({ file: "X.md", item: "i" }), "OC_PATH_NESTING");
});
it("OP-24 formatOcPath quotes raw slot values containing special chars", () => {
// Closes ClawSweeper P2 on PR #78678: `formatOcPath` previously
// concatenated raw slot values, so a programmatically-constructed
// path with a `/` in the section/item slot would emit extra
// segments and fail to parse back to the same address.
// Use a slot value with `/` (and no internal `.`) — `.` inside
// a slot is the dotted sub-segment delimiter; callers wanting a
// literal `.` in a key should pre-quote that single sub-segment.
it("formatOcPath quotes raw slot values containing special chars", () => {
const constructed = formatOcPath({
file: "config.jsonc",
section: "agents.defaults.models",
@@ -172,22 +158,13 @@ describe("wave-07 oc-path-parse-edges", () => {
expect(parsed.item).toBe('"github-copilot/claude-opus-4-7"');
});
it("OP-25 parseOcPath finds query separator outside quoted keys", () => {
// Closes ClawSweeper P2 on PR #78678: `parseOcPath` previously
// used `indexOf('?')` which split a key like `"foo?bar"` at the
// embedded `?`, breaking advertised quoted-segment support.
it("parseOcPath finds query separator outside quoted keys", () => {
const parsed = parseOcPath('oc://config.jsonc/"foo?bar"?session=daily');
expect(parsed.section).toBe('"foo?bar"');
expect(parsed.session).toBe("daily");
});
it("OP-26 file slot with `/` round-trips via quoting", () => {
// Closes ClawSweeper P2 on PR #78678 (round 4): `parseOcPath` stored
// `path.file` verbatim while `formatOcPath` prefixed it without
// quote-wrapping, so a file like `skills/email-drafter` couldn't
// round-trip — formatter output got re-parsed as file plus section,
// and quoted input leaked the surrounding quotes into filesystem
// resolution.
it("file slot with `/` round-trips via quoting", () => {
const constructed = formatOcPath({
file: "skills/email-drafter",
section: "Tools",
@@ -200,21 +177,14 @@ describe("wave-07 oc-path-parse-edges", () => {
expect(parsed.item).toBe("-1");
});
it("OP-27 file slot with dot extension does NOT get quoted", () => {
// The file slot's quoting trigger excludes `.` because filename
// extensions (`AGENTS.md`, `gateway.jsonc`) are normal — quoting
// them would make canonical form ugly without need.
it("file slot with dot extension does NOT get quoted", () => {
expect(formatOcPath({ file: "AGENTS.md" })).toBe("oc://AGENTS.md");
expect(formatOcPath({ file: "gateway.jsonc", section: "version" })).toBe(
"oc://gateway.jsonc/version",
);
});
it("OP-28 formatOcPath rejects field without item or section", () => {
// Closes Galin P2 (round 8): the nesting guard caught
// `field + section + no item` but missed `field + no section + no item`.
// Such a struct emits `oc://FILE/FIELD` which silently re-parses as
// `{ file, section: FIELD }` — different shape, breaking round-trip.
it("formatOcPath rejects field without item or section", () => {
expect(() => formatOcPath({ file: "X", field: "name" })).toThrow(OcPathError);
try {
formatOcPath({ file: "X", field: "name" });
@@ -224,26 +194,16 @@ describe("wave-07 oc-path-parse-edges", () => {
}
});
it("OP-29 isPattern is quote-aware (literal `*` inside quoted segment)", () => {
// Closes Galin P2 (round 8): `isPattern` previously used
// `slot.split('.')` which shredded a quoted key like `"items.*.glob"`
// and falsely detected the literal `*` as a wildcard, causing
// single-match verbs to reject a concrete path.
it("isPattern is quote-aware (literal `*` inside quoted segment)", () => {
const concrete = parseOcPath('oc://config.jsonc/"items.*.glob"');
expect(isPattern(concrete)).toBe(false);
// Sanity: an unquoted `*` IS still a wildcard.
const wildcard = parseOcPath("oc://config.jsonc/items/*");
expect(isPattern(wildcard)).toBe(true);
});
it("OP-30 getPathLayout is quote-aware", () => {
// Closes Galin P2 (round 8): `getPathLayout` used `slot.split('.')`
// for all three slots, breaking the find-walker / repackPath layout
// contract for quoted segments containing `.`.
it("getPathLayout is quote-aware", () => {
const path = parseOcPath('oc://config.jsonc/"github.com"/repos');
const layout = getPathLayout(path);
// Quoted segment is one sub-segment, not two.
expect(layout.sectionLen).toBe(1);
expect(layout.subs[0]).toBe('"github.com"');
expect(layout.itemLen).toBe(1);

View File

@@ -1,11 +1,3 @@
/**
* Wave 8 — OcPath resolver edges.
*
* Substrate guarantee: `resolveOcPath(ast, ocPath)` returns the matched
* node or `null`. Slug matching is case-insensitive. Field on non-kv
* item returns `null` (not a guess). Frontmatter via the `[frontmatter]`
* sentinel section.
*/
import { describe, expect, it } from "vitest";
import { parseMd } from "../../parse.js";
import { resolveMdOcPath as resolveOcPath } from "../../resolve.js";
@@ -34,30 +26,30 @@ Preamble prose.
- item one
`;
describe("wave-08 oc-path-resolver-edges", () => {
describe("oc-path-resolver-edges", () => {
const { ast } = parseMd(SAMPLE);
it("R-01 root resolves to AST", () => {
it("root resolves to AST", () => {
const m = resolveOcPath(ast, { file: "X.md" });
expect(m?.kind).toBe("root");
});
it("R-02 block by exact slug", () => {
it("block by exact slug", () => {
const m = resolveOcPath(ast, { file: "X.md", section: "boundaries" });
expect(m?.kind).toBe("block");
});
it("R-03 block by case-mismatched slug (Boundaries → boundaries)", () => {
it("block by case-mismatched slug (Boundaries → boundaries)", () => {
const m = resolveOcPath(ast, { file: "X.md", section: "Boundaries" });
expect(m?.kind).toBe("block");
});
it("R-04 block by uppercased slug", () => {
it("block by uppercased slug", () => {
const m = resolveOcPath(ast, { file: "X.md", section: "BOUNDARIES" });
expect(m?.kind).toBe("block");
});
it("R-05 multi-word section by slug", () => {
it("multi-word section by slug", () => {
const m = resolveOcPath(ast, { file: "X.md", section: "multi-word-section" });
expect(m?.kind).toBe("block");
if (m?.kind === "block") {
@@ -65,7 +57,7 @@ describe("wave-08 oc-path-resolver-edges", () => {
}
});
it("R-06 multi-word section by exact heading text (case-folded)", () => {
it("multi-word section by exact heading text (case-folded)", () => {
const m = resolveOcPath(ast, { file: "X.md", section: "Multi-Word Section" });
// The OcPath section is matched case-insensitively against block.slug.
// Block.slug for "Multi-Word Section" is "multi-word-section", and
@@ -75,12 +67,12 @@ describe("wave-08 oc-path-resolver-edges", () => {
expect(m).toBeNull();
});
it("R-07 unknown section returns null", () => {
it("unknown section returns null", () => {
const m = resolveOcPath(ast, { file: "X.md", section: "unknown" });
expect(m).toBeNull();
});
it("R-08 item by slug under known section", () => {
it("item by slug under known section", () => {
const m = resolveOcPath(ast, {
file: "X.md",
section: "tools",
@@ -101,7 +93,7 @@ describe("wave-08 oc-path-resolver-edges", () => {
}
});
it("R-10 item slug for plain bullet uses text", () => {
it("item slug for plain bullet uses text", () => {
const m = resolveOcPath(ast, {
file: "X.md",
section: "boundaries",
@@ -110,7 +102,7 @@ describe("wave-08 oc-path-resolver-edges", () => {
expect(m?.kind).toBe("item");
});
it("R-11 item slug case-insensitive", () => {
it("item slug case-insensitive", () => {
const m = resolveOcPath(ast, {
file: "X.md",
section: "tools",
@@ -119,7 +111,7 @@ describe("wave-08 oc-path-resolver-edges", () => {
expect(m?.kind).toBe("item");
});
it("R-12 item with spaces in key (slugified)", () => {
it("item with spaces in key (slugified)", () => {
const m = resolveOcPath(ast, {
file: "X.md",
section: "tools",
@@ -131,7 +123,7 @@ describe("wave-08 oc-path-resolver-edges", () => {
}
});
it("R-13 unknown item returns null", () => {
it("unknown item returns null", () => {
const m = resolveOcPath(ast, {
file: "X.md",
section: "tools",
@@ -140,7 +132,7 @@ describe("wave-08 oc-path-resolver-edges", () => {
expect(m).toBeNull();
});
it("R-14 item-field matches kv.key (case-insensitive)", () => {
it("item-field matches kv.key (case-insensitive)", () => {
const m = resolveOcPath(ast, {
file: "X.md",
section: "tools",
@@ -150,7 +142,7 @@ describe("wave-08 oc-path-resolver-edges", () => {
expect(m?.kind).toBe("item-field");
});
it("R-15 field on plain (non-kv) item returns null", () => {
it("field on plain (non-kv) item returns null", () => {
const m = resolveOcPath(ast, {
file: "X.md",
section: "boundaries",
@@ -160,7 +152,7 @@ describe("wave-08 oc-path-resolver-edges", () => {
expect(m).toBeNull();
});
it("R-16 field that does not match kv.key returns null", () => {
it("field that does not match kv.key returns null", () => {
const m = resolveOcPath(ast, {
file: "X.md",
section: "tools",
@@ -170,7 +162,7 @@ describe("wave-08 oc-path-resolver-edges", () => {
expect(m).toBeNull();
});
it("R-17 frontmatter via [frontmatter] sentinel section", () => {
it("frontmatter via [frontmatter] sentinel section", () => {
const m = resolveOcPath(ast, {
file: "X.md",
section: "[frontmatter]",
@@ -182,7 +174,7 @@ describe("wave-08 oc-path-resolver-edges", () => {
}
});
it("R-18 frontmatter unknown key returns null", () => {
it("frontmatter unknown key returns null", () => {
const m = resolveOcPath(ast, {
file: "X.md",
section: "[frontmatter]",
@@ -191,7 +183,7 @@ describe("wave-08 oc-path-resolver-edges", () => {
expect(m).toBeNull();
});
it("R-19 frontmatter without field returns null", () => {
it("frontmatter without field returns null", () => {
const m = resolveOcPath(ast, {
file: "X.md",
section: "[frontmatter]",
@@ -199,7 +191,7 @@ describe("wave-08 oc-path-resolver-edges", () => {
expect(m).toBeNull();
});
it("R-20 multiple frontmatter keys with same name — first match wins", () => {
it("multiple frontmatter keys with same name — first match wins", () => {
// Build an AST manually to test
const dupeAst = {
kind: "md" as const,
@@ -222,20 +214,20 @@ describe("wave-08 oc-path-resolver-edges", () => {
}
});
it("R-21 empty AST resolves root only", () => {
it("empty AST resolves root only", () => {
const empty = { kind: "md" as const, raw: "", frontmatter: [], preamble: "", blocks: [] };
expect(resolveOcPath(empty, { file: "X.md" })?.kind).toBe("root");
expect(resolveOcPath(empty, { file: "X.md", section: "any" })).toBeNull();
});
it("R-22 resolver does not mutate the AST", () => {
it("resolver does not mutate the AST", () => {
const before = JSON.stringify(ast);
resolveOcPath(ast, { file: "X.md", section: "tools", item: "gh", field: "gh" });
const after = JSON.stringify(ast);
expect(after).toBe(before);
});
it("R-23 file segment is informational — resolver doesn't check it", () => {
it("file segment is informational — resolver doesn't check it", () => {
// The file name in OcPath is metadata; resolver assumes the AST
// matches. Callers verify file mapping before passing the AST.
const m1 = resolveOcPath(ast, { file: "SOUL.md", section: "tools" });

View File

@@ -1,19 +1,10 @@
/**
* Wave 14 — performance + determinism + immutability.
*
* Substrate guarantees:
* - Parsing scales sub-linearly with file size (no quadratic blowup)
* - Same input produces same AST (no Object.keys / Set order surprises)
* - Resolver does not mutate the AST
* - AST is structurally cloneable (no functions, no cycles)
*/
import { describe, expect, it } from "vitest";
import { emitMd } from "../../emit.js";
import { parseMd } from "../../parse.js";
import { resolveMdOcPath as resolveOcPath } from "../../resolve.js";
describe("wave-14 perf + determinism", () => {
it("PD-01 parses 100 KB file in under 200 ms", () => {
describe("perf + determinism", () => {
it("parses 100 KB file in under 200 ms", () => {
const lines: string[] = [];
for (let i = 0; i < 1000; i++) {
lines.push("## H" + i);
@@ -28,7 +19,7 @@ describe("wave-14 perf + determinism", () => {
expect(elapsed).toBeLessThan(200);
});
it("PD-02 parses 1000 small files in under 500 ms", () => {
it("parses 1000 small files in under 500 ms", () => {
const raw = `## H\n- a\n- b: c\n## I\n- d\n`;
const start = performance.now();
for (let i = 0; i < 1000; i++) {
@@ -38,7 +29,7 @@ describe("wave-14 perf + determinism", () => {
expect(elapsed).toBeLessThan(500);
});
it("PD-03 100k OcPath resolutions on parsed AST in under 500 ms", () => {
it("100k OcPath resolutions on parsed AST in under 500 ms", () => {
const raw = `## A\n- a1\n- a2\n## B\n- b1\n- b2\n## C\n- c1: cv\n`;
const { ast } = parseMd(raw);
const path = { file: "X.md", section: "b", item: "b1" };
@@ -50,7 +41,7 @@ describe("wave-14 perf + determinism", () => {
expect(elapsed).toBeLessThan(500);
});
it("PD-04 same input → byte-identical AST.raw across runs", () => {
it("same input → byte-identical AST.raw across runs", () => {
const raw = `---\nb: 2\na: 1\n---\n## Z\n- z\n## A\n- a\n`;
const a1 = parseMd(raw).ast;
const a2 = parseMd(raw).ast;
@@ -59,7 +50,7 @@ describe("wave-14 perf + determinism", () => {
expect(a1.blocks).toEqual(a2.blocks);
});
it("PD-05 resolveOcPath is non-mutating", () => {
it("resolveOcPath is non-mutating", () => {
const raw = `## A\n- a: x\n## B\n- b\n`;
const { ast } = parseMd(raw);
const before = JSON.stringify(ast);
@@ -69,7 +60,7 @@ describe("wave-14 perf + determinism", () => {
expect(JSON.stringify(ast)).toBe(before);
});
it("PD-06 AST is JSON-serializable (no functions, no cycles)", () => {
it("AST is JSON-serializable (no functions, no cycles)", () => {
const raw = `---\nk: v\n---\n## A\n- a\n\`\`\`ts\nx\n\`\`\`\n| h |\n| - |\n| 1 |\n`;
const { ast } = parseMd(raw);
const serialized = JSON.stringify(ast);
@@ -78,7 +69,7 @@ describe("wave-14 perf + determinism", () => {
expect(parsed.blocks.length).toBe(ast.blocks.length);
});
it("PD-07 emit is non-mutating", () => {
it("emit is non-mutating", () => {
const raw = `## A\n- a\n`;
const { ast } = parseMd(raw);
const before = JSON.stringify(ast);
@@ -88,25 +79,25 @@ describe("wave-14 perf + determinism", () => {
expect(JSON.stringify(ast)).toBe(before);
});
it("PD-08 frontmatter ordering is preserved (insertion order, not alphabetical)", () => {
it("frontmatter ordering is preserved (insertion order, not alphabetical)", () => {
const raw = `---\nz: 1\nm: 2\na: 3\n---\n`;
const { ast } = parseMd(raw);
expect(ast.frontmatter.map((e) => e.key)).toEqual(["z", "m", "a"]);
});
it("PD-09 block ordering is document order, not alphabetical", () => {
it("block ordering is document order, not alphabetical", () => {
const raw = `## Z\n## A\n## M\n`;
const { ast } = parseMd(raw);
expect(ast.blocks.map((b) => b.heading)).toEqual(["Z", "A", "M"]);
});
it("PD-10 item ordering within block is document order", () => {
it("item ordering within block is document order", () => {
const raw = `## H\n- z\n- a\n- m\n`;
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(["z", "a", "m"]);
});
it("PD-11 large fixture round-trip stays under 100 ms", () => {
it("large fixture round-trip stays under 100 ms", () => {
const lines: string[] = [];
for (let i = 0; i < 500; i++) {
lines.push(`## Section ${i}`);

View File

@@ -1,637 +0,0 @@
/**
* Wave-23 — Pitfall scenarios.
*
* One test per pitfall ID enumerated in
* `packages/oc-paths-substrate/PITFALLS.md` (the substrate-local
* pitfall taxonomy). Tests are grouped by category so a regression in
* any one defense is visible at a glance. Every MITIGATED / REJECTED
* pitfall has a positive validation here; DEFERRED ones are covered
* as documented limits with a `.skip` note.
*
* **Namespace note**: substrate pitfall IDs (P-001 … P-040) are a
* separate namespace from the claws-side `docs/PITFALLS.md`
* governance taxonomy (which uses P-NNN for completely different
* pitfalls — e.g., P-033 there is "Memory poisoning"). The package
* boundary disambiguates.
*/
import { describe, expect, it } from "vitest";
import {
MAX_PATH_LENGTH,
MAX_TRAVERSAL_DEPTH,
OcPathError,
findOcPaths,
formatOcPath,
parseOcPath,
resolveOcPath,
setOcPath,
} from "../../index.js";
import { parseJsonc } from "../../jsonc/parse.js";
import { parseJsonl } from "../../jsonl/parse.js";
// ---------- Encoding pitfalls --------------------------------------------
describe("wave-23 pitfalls — encoding", () => {
it("P-001 strips leading UTF-8 BOM from path string", () => {
const bom = "";
expect(parseOcPath(`${bom}oc://X/Y`).file).toBe("X");
});
it("P-002 normalizes path to NFC", () => {
const nfc = "café"; // composed
const nfd = "café"; // decomposed
expect(parseOcPath(`oc://X/${nfd}`).section).toBe(nfc);
expect(parseOcPath(`oc://X/${nfc}`).section).toBe(nfc);
// Same struct out for both inputs.
expect(parseOcPath(`oc://X/${nfd}`)).toEqual(parseOcPath(`oc://X/${nfc}`));
});
it("P-003 rejects whitespace in identifier-shaped segments", () => {
expect(() => parseOcPath("oc://X/foo /bar")).toThrow(OcPathError);
expect(() => parseOcPath("oc://X/ foo")).toThrow(OcPathError);
expect(() => parseOcPath("oc://X/foo\tbar")).toThrow(OcPathError);
});
it("P-003 allows whitespace inside predicate values (content)", () => {
// Spaces inside a predicate value are legitimate — they're filtering
// against actual content.
expect(() => parseOcPath("oc://X/[name=hello world]")).not.toThrow();
});
it("P-004 / P-011 rejects control characters and null bytes", () => {
expect(() => parseOcPath("oc://X/\x00")).toThrow(/Control character/);
expect(() => parseOcPath("oc://X/foo\x01bar")).toThrow(/Control character/);
expect(() => parseOcPath("oc://X/foo\x7Fbar")).toThrow(/Control character/);
});
});
// ---------- Empty / structural pitfalls ----------------------------------
describe("wave-23 pitfalls — empty & structural", () => {
it("P-008 rejects empty segments", () => {
expect(() => parseOcPath("oc://X//Y")).toThrow(/Empty segment/);
});
it("P-009 rejects empty dotted sub-segments", () => {
expect(() => parseOcPath("oc://X/a..b")).toThrow(/Empty dotted sub-segment/);
});
it("P-010 rejects scheme-only path", () => {
expect(() => parseOcPath("oc://")).toThrow(/Empty oc:\/\/ path/);
});
it("P-014 rejects empty predicate key", () => {
expect(() => parseOcPath("oc://X/[=foo]")).toThrow(/Malformed predicate/);
});
it("P-014 rejects empty predicate value", () => {
expect(() => parseOcPath("oc://X/[id=]")).toThrow(/Malformed predicate/);
});
it("P-015 accepts bracket segment with no operator as literal sentinel", () => {
// `[frontmatter]` predates the predicate grammar — kept as literal.
expect(parseOcPath("oc://AGENTS.md/[frontmatter]/key").section).toBe("[frontmatter]");
});
it("P-016 rejects mismatched brackets", () => {
expect(() => parseOcPath("oc://X/[unclosed")).toThrow(OcPathError);
expect(() => parseOcPath("oc://X/closed]")).toThrow(OcPathError);
});
it("P-016 rejects mismatched braces", () => {
expect(() => parseOcPath("oc://X/{a,b")).toThrow(OcPathError);
});
it("P-018 rejects empty union", () => {
expect(() => parseOcPath("oc://X/{}")).toThrow(/Empty union/);
});
it("P-018 rejects union with empty alternative", () => {
expect(() => parseOcPath("oc://X/{a,,b}")).toThrow(/Empty alternative/);
});
});
// ---------- Predicate-content pitfalls -----------------------------------
describe("wave-23 pitfalls — predicate content", () => {
it("P-012 predicate value containing `/` round-trips", () => {
// The path-level `/` split must respect bracket boundaries.
const p = parseOcPath("oc://X/[id=foo/bar]/cmd");
expect(p.section).toBe("[id=foo/bar]");
expect(p.item).toBe("cmd");
});
it("P-012 findOcPaths matches a leaf whose id contains a slash", () => {
const ast = parseJsonc(
'{"steps":[{"id":"foo/bar","cmd":"x"},{"id":"baz","cmd":"y"}]}',
).ast;
const out = findOcPaths(ast, parseOcPath("oc://wf/steps/[id=foo/bar]/cmd"));
expect(out).toHaveLength(1);
if (out[0].match.kind === "leaf") {
expect(out[0].match.valueText).toBe("x");
}
});
it("P-013 predicate value containing `.` round-trips", () => {
const p = parseOcPath("oc://X/steps.[id=1.0].cmd");
expect(p.section).toBe("steps.[id=1.0].cmd");
});
it("P-013 findOcPaths matches a leaf whose id is `1.0`", () => {
const ast = parseJsonc('{"steps":[{"id":"1.0","cmd":"x"},{"id":"2.0","cmd":"y"}]}').ast;
const out = findOcPaths(ast, parseOcPath("oc://wf/steps/[id=1.0]/cmd"));
expect(out).toHaveLength(1);
if (out[0].match.kind === "leaf") {
expect(out[0].match.valueText).toBe("x");
}
});
});
// ---------- Sentinel & collision pitfalls --------------------------------
describe("wave-23 pitfalls — sentinels & collisions", () => {
it("P-020/openclaw#59934 negative numeric key on object resolves as literal key", () => {
// Telegram supergroup IDs are negative numbers used as map keys.
// Our positional `-N` token would otherwise hijack them. Resolver
// falls through to literal-key lookup on non-indexable containers.
const ast = parseJsonc(
'{"channels":{"telegram":{"groups":{"-5028303500":{"requireMention":false}}}}}',
).ast;
const m = resolveOcPath(
ast,
parseOcPath("oc://config/channels.telegram.groups.-5028303500.requireMention"),
);
expect(m).not.toBeNull();
expect(m?.kind).toBe("leaf");
if (m?.kind === "leaf") {
expect(m.valueText).toBe("false");
expect(m.leafType).toBe("boolean");
}
});
it("P-020 negative `-N` still works as positional on arrays", () => {
// Same syntax, indexable container — positional resolution wins.
const ast = parseJsonc('{"items":[10,20,30]}').ast;
const m = resolveOcPath(ast, parseOcPath("oc://X/items/-1"));
expect(m?.kind).toBe("leaf");
if (m?.kind === "leaf") {
expect(m.valueText).toBe("30");
}
});
it("P-020 numeric segment dispatches by node kind (array index vs map key)", () => {
// Same path string against two different ASTs — kind disambiguates.
const arr = parseJsonc('{"x":["a","b"]}').ast;
const map = parseJsonc('{"x":{"0":"a","1":"b"}}').ast;
const arrM = resolveOcPath(arr, parseOcPath("oc://config/x/0"));
const mapM = resolveOcPath(map, parseOcPath("oc://config/x/0"));
expect(arrM?.kind).toBe("leaf");
expect(mapM?.kind).toBe("leaf");
if (arrM?.kind === "leaf") {
expect(arrM.valueText).toBe("a");
}
if (mapM?.kind === "leaf") {
expect(mapM.valueText).toBe("a");
}
});
it("P-021 `$last` literal in an object key is shadowed by positional sentinel", () => {
// Document v0 limitation: `$last` always means "last", never a literal key.
// Authors with `$last` literal keys must use kind-narrow access.
const ast = parseJsonc('{"$last":"literal-value","foo":"bar"}').ast;
const m = resolveOcPath(ast, parseOcPath("oc://X/$last"));
// `$last` resolves to the LAST key (`foo` → `bar`), not the literal `$last` key.
expect(m?.kind).toBe("leaf");
if (m?.kind === "leaf") {
expect(m.valueText).toBe("bar");
}
});
});
// ---------- Round-trip pitfalls ------------------------------------------
describe("wave-23 pitfalls — round-trip", () => {
it("P-023 parseOcPath ∘ formatOcPath is idempotent across path shapes", () => {
const inputs = [
"oc://X",
"oc://X/a",
"oc://X/a/b",
"oc://X/a/b/c",
"oc://X/a.b.c",
"oc://X/a?session=s1",
"oc://X/[frontmatter]/key",
"oc://X/steps/*/command",
"oc://X/steps/$last/id",
"oc://X/steps/-2/id",
"oc://X/steps/{command,run}",
"oc://X/steps/[id=foo]/cmd",
"oc://X/steps/#0/foo",
];
for (const s of inputs) {
const parsed = parseOcPath(s);
const reparsed = parseOcPath(s);
expect(parsed).toEqual(reparsed);
}
});
});
// ---------- Sentinel-guard pitfalls --------------------------------------
describe("wave-23 pitfalls — sentinel at format boundary (F9)", () => {
it("formatOcPath rejects an OcPath struct carrying the redaction sentinel", () => {
// Path strings flow into telemetry, audit events, error messages,
// find-result `path` fields. Without the format-time guard, a
// struct with `section: REDACTED_SENTINEL` would slip past every
// consumer except the CLI's scrubSentinel layer. The substrate's
// contract is "emit boundaries refuse the sentinel" — formatOcPath
// IS such a boundary for path strings.
expect(() => formatOcPath({ file: "AGENTS.md", section: "__OPENCLAW_REDACTED__" })).toThrow(
/sentinel literal/,
);
});
});
// ---------- Containment pitfalls -----------------------------------------
describe("wave-23 pitfalls — file-slot containment", () => {
// oc:// paths are workspace-relative. Absolute paths and `..` segments
// would let a hostile workflow / skill manifest persuade
// `openclaw path resolve|set|emit` into reading or writing arbitrary
// filesystem locations (Node `path.resolve(cwd, absolute)` returns
// `absolute`, bypassing the workspace root). Reject at parseOcPath
// and formatOcPath for symmetric defense.
it("rejects an absolute POSIX file slot", () => {
expect(() => parseOcPath("oc:///etc/passwd")).toThrow(/Empty segment/);
// Quoted form — same containment violation, different parse path.
expect(() => parseOcPath('oc://"/etc/passwd"/section')).toThrow(/Absolute file slot/);
});
it("rejects a Windows drive-letter file slot", () => {
expect(() => parseOcPath('oc://"C:/Windows/System32/foo"/section')).toThrow(
/Absolute file slot/,
);
expect(() => parseOcPath('oc://"C:\\\\Windows\\\\System32"/section')).toThrow(
/Absolute file slot/,
);
});
it("rejects a leading-backslash file slot", () => {
expect(() => parseOcPath('oc://"\\\\srv\\\\share\\\\foo"/section')).toThrow(
/Absolute file slot/,
);
});
it("rejects a parent-directory escape via plain `..`", () => {
expect(() => parseOcPath('oc://"../foo"/section')).toThrow(/Parent-directory/);
expect(() => parseOcPath('oc://".."/section')).toThrow(/Parent-directory/);
});
it("rejects a parent-directory escape mid-path", () => {
expect(() => parseOcPath('oc://"foo/../bar"/section')).toThrow(/Parent-directory/);
});
it("does not decode URL-encoded `..` — literal `%2E%2E` is treated as a filename", () => {
// The substrate does NOT do URL decoding — `%2E%2E` is the literal
// five-character filename, not a parent-directory escape. Documented
// limitation: consumers that pre-decode (HTTP layers, browser UI)
// are responsible for normalizing before invoking parseOcPath.
// Pin the current behavior so a future "let's decode for them" PR
// sees the explicit choice.
const p = parseOcPath('oc://"%2E%2E/foo"/section');
expect(p.file).toBe("%2E%2E/foo");
});
it("formatOcPath rejects an OcPath struct with absolute file", () => {
expect(() => formatOcPath({ file: "/etc/passwd" })).toThrow(/Absolute file slot/);
expect(() => formatOcPath({ file: "C:/Windows" })).toThrow(/Absolute file slot/);
});
it("formatOcPath rejects an OcPath struct with parent-directory file", () => {
expect(() => formatOcPath({ file: ".." })).toThrow(/Parent-directory/);
expect(() => formatOcPath({ file: "../etc/passwd" })).toThrow(/Parent-directory/);
expect(() => formatOcPath({ file: "foo/../bar" })).toThrow(/Parent-directory/);
});
});
// ---------- formatOcPath ↔ parseOcPath round-trip ------------------------
describe("wave-23 pitfalls — format/parse round-trip", () => {
// The contract on oc-path.ts:13 — `formatOcPath(parseOcPath(s)) === s`
// for any string the formatter accepts. Round-trip breaks were
// observable on (a) struct fields with empty dotted sub-segments
// (`section: 'foo.'` → `oc://X/foo.""` → re-parses with `section:
// 'foo.""'`) and (b) struct fields with control chars (formatter
// emitted unquoted, parser refused). Pin both directions.
it("formatOcPath rejects empty dotted sub-segment in a slot", () => {
expect(() => formatOcPath({ file: "a.md", section: "foo." })).toThrow(
/Empty dotted sub-segment/,
);
expect(() => formatOcPath({ file: "a.md", section: ".foo" })).toThrow(
/Empty dotted sub-segment/,
);
expect(() => formatOcPath({ file: "a.md", section: "foo..bar" })).toThrow(
/Empty dotted sub-segment/,
);
});
it("formatOcPath rejects control characters in any slot", () => {
expect(() => formatOcPath({ file: "a.md", section: "sec\x00tion" })).toThrow(
/Control character/,
);
expect(() => formatOcPath({ file: "a.md", section: "sec\x01tion" })).toThrow(
/Control character/,
);
expect(() => formatOcPath({ file: "a.md", section: "tab\ttion" })).toThrow(/Control character/);
expect(() => formatOcPath({ file: "a\x00b.md" })).toThrow(/Control character/);
});
it("round-trips every shape parseOcPath accepts", () => {
// For every valid input, formatOcPath(parseOcPath(s)) MUST be
// re-parseable to the same struct. Don't string-compare (the
// formatter normalizes quoting); parse the round-tripped output
// and compare structs.
const inputs = [
"oc://X",
"oc://X/a",
"oc://X/a/b",
"oc://X/a/b/c",
"oc://X/a.b.c",
"oc://X/a?session=s1",
"oc://X/[frontmatter]/key",
"oc://X/steps/$last/id",
"oc://X/steps/-2/id",
"oc://X/steps/[id=foo]/cmd",
"oc://X/steps/{a,b}/cmd",
'oc://X/"foo/bar"/baz',
'oc://X/agents/"anthropic/claude-opus-4-7"/alias',
];
for (const s of inputs) {
const parsed = parseOcPath(s);
const formatted = formatOcPath(parsed);
const reparsed = parseOcPath(formatted);
expect(reparsed).toEqual(parsed);
}
});
});
// ---------- Performance pitfalls -----------------------------------------
describe("wave-23 pitfalls — performance & limits", () => {
it("P-031 / P-033 walker depth cap throws on pathological recursion", () => {
// The walker's MAX_TRAVERSAL_DEPTH defense is independent of the
// parser's MAX_PARSE_DEPTH (covered by the JSONC and JSONL parser
// tests below). To exercise the walker cap in isolation, build a
// synthetic JSONC AST chain that bypasses parseJsonc entirely —
// this is the shape callers get when they construct ASTs
// programmatically (mutations, fixtures, generators).
type V = import("../../jsonc/ast.js").JsoncValue;
let leaf: V = { kind: "string", value: "x", line: 1 };
for (let i = 0; i < MAX_TRAVERSAL_DEPTH + 50; i++) {
leaf = { kind: "object", entries: [{ key: "a", value: leaf, line: 1 }], line: 1 };
}
const ast = {
kind: "jsonc" as const,
raw: "",
root: { kind: "object" as const, entries: [{ key: "root", value: leaf, line: 1 }], line: 1 },
};
expect(() => findOcPaths(ast, parseOcPath("oc://X/**"))).toThrow(/MAX_TRAVERSAL_DEPTH/);
});
it("P-032 rejects path strings longer than MAX_PATH_LENGTH", () => {
const big = "oc://X/" + "a".repeat(MAX_PATH_LENGTH);
expect(() => parseOcPath(big)).toThrow(/exceeds .* bytes/);
});
it("P-032 path at the cap parses cleanly", () => {
const justUnder = "oc://X/" + "a".repeat(MAX_PATH_LENGTH - "oc://X/".length);
expect(() => parseOcPath(justUnder)).not.toThrow();
});
it("P-032 formatOcPath enforces the same cap on output", () => {
// Symmetric upper bound — without this guard, a struct whose
// formatted form crosses the cap would emit a string parseOcPath
// would immediately reject (round-trip break).
expect(() => formatOcPath({ file: "X", section: "a".repeat(MAX_PATH_LENGTH) })).toThrow(
/Formatted oc:\/\/ exceeds/,
);
});
it("parser depth cap fires on pathological JSONC nesting (F6)", () => {
// Without `MAX_PARSE_DEPTH`, pathological input like
// `'['.repeat(20000) + '0' + ']'.repeat(20000)` triggers a V8
// RangeError ("Maximum call stack size exceeded") that escapes
// commander as a raw stringified error — no `OcEmitSentinelError`-
// style structured catch. Pin the structured-diagnostic path:
// parser must surface OC_JSONC_DEPTH_EXCEEDED, not bare RangeError.
const open = "[".repeat(MAX_TRAVERSAL_DEPTH + 100);
const close = "]".repeat(MAX_TRAVERSAL_DEPTH + 100);
const raw = `${open}0${close}`;
const result = parseJsonc(raw);
expect(result.ast.root).toBeNull();
expect(result.diagnostics.some((d) => d.code === "OC_JSONC_DEPTH_EXCEEDED")).toBe(true);
});
it("parser depth cap fires on JSONL line with deeply-nested JSON (F6)", () => {
// Per-line parseJsonc dispatch carries the same protection — each
// value line is parsed in isolation and gets its own depth cap.
// The line surfaces as `kind: 'malformed'` with the depth diagnostic.
let nested = '"x"';
for (let i = 0; i < MAX_TRAVERSAL_DEPTH + 50; i++) {
nested = `{"a":${nested}}`;
}
const { diagnostics } = parseJsonl(nested + "\n");
// The line-level diagnostic is OC_JSONL_LINE_MALFORMED (line failed);
// we don't promote OC_JSONC_DEPTH_EXCEEDED through the JSONL layer
// but the malformed-line detection prevents stack-overflow escape.
expect(diagnostics.some((d) => d.code === "OC_JSONL_LINE_MALFORMED")).toBe(true);
});
});
// ---------- Coercion pitfalls --------------------------------------------
describe("wave-23 pitfalls — coercion", () => {
it("P-029 numeric coercion is locale-independent", () => {
// `Number()` doesn't honor locale; `parseFloat` doesn't either in
// practice, but we never use `parseFloat`. Verify `Number("1,5")`
// returns NaN (which is rejected) and `"1.5"` returns 1.5.
const ast = parseJsonc('{"x":1.0}').ast;
const r1 = setOcPath(ast, parseOcPath("oc://X/x"), "1.5");
expect(r1.ok).toBe(true);
const r2 = setOcPath(ast, parseOcPath("oc://X/x"), "1,5");
expect(r2.ok).toBe(false);
if (!r2.ok) {
expect(r2.reason).toBe("parse-error");
}
});
it("P-030 boolean coercion is exact-match lowercase", () => {
const ast = parseJsonc('{"x":true}').ast;
expect(setOcPath(ast, parseOcPath("oc://X/x"), "false").ok).toBe(true);
expect(setOcPath(ast, parseOcPath("oc://X/x"), "False").ok).toBe(false);
expect(setOcPath(ast, parseOcPath("oc://X/x"), "TRUE").ok).toBe(false);
expect(setOcPath(ast, parseOcPath("oc://X/x"), "yes").ok).toBe(false);
});
});
// ---------- Reserved character pitfalls ----------------------------------
describe("wave-23 pitfalls — reserved characters", () => {
it("P-026 rejects `?` outside the query separator position", () => {
// `?` triggers the query split. `oc://X/foo?session=s` is fine
// (legitimate query). But `?` *inside* a segment after the query
// section is consumed isn't a normal use case — the parser treats
// the first `?` as the query split.
expect(parseOcPath("oc://X/foo?session=s").section).toBe("foo");
// Empty key after `?` (no `=`): query parser silently ignores.
expect(() => parseOcPath("oc://X/foo?")).not.toThrow();
});
it("P-040 negative-index magnitude is bounded", () => {
// Out-of-range negative index → null at resolve time, not crash.
const ast = parseJsonc('{"x":[1,2,3]}').ast;
expect(resolveOcPath(ast, parseOcPath("oc://X/x/-9999999999"))).toBeNull();
expect(resolveOcPath(ast, parseOcPath("oc://X/x/-1"))?.kind).toBe("leaf");
});
});
// ---------- Sentinel-redaction pitfall (P-036) ---------------------------
describe("wave-23 pitfalls — redaction sentinel", () => {
// P-036 is fully covered by wave-21-sentinel-cross-kind. This is a
// smoke test asserting the link is intact.
it("P-036 sentinel guard activates at emit time (covered by wave-21)", () => {
expect(true).toBe(true);
});
});
// ---------- DEFERRED — documented limits ---------------------------------
describe("wave-23 pitfalls — deferred (v0 limits)", () => {
it.skip("P-005 slash literal in key — v1: quoted segments", () => {});
it.skip("P-006 dot literal in key — v1: quoted segments", () => {});
it.skip("P-017 nested unions {a,{b,c}} — v1: parser stack", () => {});
it.skip("P-019 wildcard inside wildcard — v1: pattern composition", () => {});
it.skip("P-025 leading-zero numeric `01` — v1: explicit form", () => {});
it.skip("P-027 `&` in segments — v1: percent-encoding", () => {});
it.skip("P-028 percent-encoded segments — v1: rfc3986 layer", () => {});
it.skip("P-034 ast mutation between resolve & consume — caller invariant", () => {});
it.skip("P-035 stale paths from prior find — caller invariant", () => {});
});
// ---------- Injection pitfalls (C12 / W12) -------------------------------
describe("wave-23 pitfalls — injection (caller-supplied hostile input)", () => {
// P-037: a hostile path string. The substrate's job is to either
// parse safely or reject with `OcPathError` — never let undefined
// behavior leak. These cases lock the rejection-or-safe contract.
it("P-037a control characters in path body are rejected", () => {
expect(() => parseOcPath("oc://a\x00b")).toThrow(OcPathError);
expect(() => parseOcPath("oc://a\x01b/c")).toThrow(OcPathError);
expect(() => parseOcPath("oc://a/b\x1Fc")).toThrow(OcPathError);
});
it("P-037b NUL byte anywhere in path is rejected", () => {
expect(() => parseOcPath("oc://X.md/sec\x00tion")).toThrow(OcPathError);
});
it("P-037c BOM at start of path is stripped, not interpreted", () => {
// BOM is unicode U+FEFF (0xFEFF). The substrate strips it before
// scheme check; without stripping, the BOM-prefixed string would
// fail the `oc://` scheme test.
const path = parseOcPath("oc://X.md/section");
expect(path.file).toBe("X.md");
expect(path.section).toBe("section");
});
it("P-037d session query is parsed only via the documented `?session=...` form", () => {
// Legal session form parses cleanly.
const ok = parseOcPath("oc://X.md/sec?session=cron:daily");
expect(ok.section).toBe("sec");
expect(ok.session).toBe("cron:daily");
// Substrate is lenient about loose `?garbage` — caller's
// responsibility to construct paths from `formatOcPath`. Confirm
// the loose form does NOT silently invent a session value.
const loose = parseOcPath("oc://X.md/sec?garbage");
expect(loose.session).toBeUndefined();
});
it("P-037e unescaped `&` in segments is rejected", () => {
expect(() => parseOcPath("oc://X.md/a&b")).toThrow(OcPathError);
});
it("P-037f unescaped `%` in segments is rejected", () => {
expect(() => parseOcPath("oc://X.md/a%b")).toThrow(OcPathError);
});
it("P-037g empty file slot is rejected", () => {
expect(() => parseOcPath("oc:///section")).toThrow(OcPathError);
});
it("P-037h backslash-escape attempts are not treated as path traversal", () => {
// No special meaning — the literal backslash is just a regular
// character. Doesn't allow escaping forward slashes.
expect(() => parseOcPath("oc://X.md/a\\../b")).toThrow(OcPathError);
});
// P-038: predicate-value injection. `[k=v]` predicates filter
// matches; a hostile `v` containing regex metachars, brackets, or
// operators must NOT escape the predicate scope or be interpreted
// as a regex.
it("P-038a regex metacharacters in predicate value match literally", () => {
const ast = parseJsonc('{ "items": [ {"name": "a.*"}, {"name": "abc"} ] }').ast;
// Looking for the literal string "a.*" — should match only the
// first item, not "abc" (which would match if `.*` were treated
// as a regex).
const matches = findOcPaths(ast, parseOcPath("oc://X.jsonc/items/[name=a.*]"));
expect(matches).toHaveLength(1);
});
it("P-038b nested-bracket attempts in predicate value are kept literal", () => {
// The substrate is permissive on nested brackets — they're part
// of the literal predicate value, not interpreted as path syntax.
// The match would be against the literal string "a[b]"; a
// resolver that finds zero matches fails closed.
const path = parseOcPath("oc://X.jsonc/items/[name=a[b]]");
expect(path.item).toBe("[name=a[b]]");
// No data has the literal value `a[b]` here, so finding empty.
const ast = parseJsonc('{ "items": [ {"name": "abc"} ] }').ast;
expect(findOcPaths(ast, path)).toHaveLength(0);
});
it("P-038c equals-sign in predicate value is treated as part of the value", () => {
// The FIRST `=` separates key from value; subsequent `=`s belong
// to the value. The rule keeps the predicate parser simple —
// operators that prefix-match (`!=`, `<=`, `>=`) are tried
// before `=`, then `=` consumes the rest.
const ast = parseJsonc('{ "items": [ {"k": "a=b"}, {"k": "c"} ] }').ast;
const matches = findOcPaths(ast, parseOcPath("oc://X.jsonc/items/[k=a=b]"));
expect(matches).toHaveLength(1);
});
it("P-038d control characters in predicate value are rejected", () => {
expect(() => parseOcPath("oc://X.jsonc/items/[k=a\x00b]")).toThrow(OcPathError);
});
it("P-038e empty predicate body is rejected", () => {
expect(() => parseOcPath("oc://X.jsonc/items/[]")).toThrow(OcPathError);
});
it("P-038f predicate-shaped bracket without operator is treated as literal sentinel", () => {
// `[name]` without `=` is parsed as a literal-bracket sentinel
// (e.g. `[frontmatter]`-style). The substrate accepts it as a
// literal path segment — predicate parsing only kicks in when an
// operator is present. Document this to lock the behavior.
const path = parseOcPath("oc://X.jsonc/items/[name]");
expect(path.item).toBe("[name]");
});
it("P-038g predicate-shaped bracket with unsupported operator parses as literal", () => {
// `~` isn't in the supported-operator set; the parser doesn't
// recognize it as a predicate, so it's accepted as a literal
// bracket segment. This is the documented v1.1 behavior — a
// future version may add `~` (regex) and bump SDK_VERSION.
const path = parseOcPath("oc://X.jsonc/items/[k~v]");
expect(path.item).toBe("[k~v]");
});
});

View File

@@ -1,10 +1,3 @@
/**
* Wave 12 — real-world fixtures.
*
* Eight workspace files (one per upstream-recognized workspace
* filename) — each parsed, resolved, and round-tripped to verify the
* substrate handles realistic content.
*/
import { readFileSync } from "node:fs";
import { join, dirname } from "node:path";
import { fileURLToPath } from "node:url";
@@ -20,8 +13,8 @@ function load(name: string): string {
return readFileSync(join(FIXTURES, name), "utf-8");
}
describe("wave-12 real-world-fixtures", () => {
it("F-01 SOUL.md parses + round-trips", () => {
describe("real-world-fixtures", () => {
it("SOUL.md parses + round-trips", () => {
const raw = load("SOUL.md");
const { ast, diagnostics } = parseMd(raw);
expect(diagnostics).toEqual([]);
@@ -30,7 +23,7 @@ describe("wave-12 real-world-fixtures", () => {
expect(ast.blocks.length).toBeGreaterThan(0);
});
it("F-02 AGENTS.md parses + resolves Tools section", () => {
it("AGENTS.md parses + resolves Tools section", () => {
const raw = load("AGENTS.md");
const { ast } = parseMd(raw);
expect(emitMd(ast)).toBe(raw);
@@ -41,7 +34,7 @@ describe("wave-12 real-world-fixtures", () => {
}
});
it("F-03 MEMORY.md frontmatter scope resolves via [frontmatter]", () => {
it("MEMORY.md frontmatter scope resolves via [frontmatter]", () => {
const raw = load("MEMORY.md");
const { ast } = parseMd(raw);
expect(emitMd(ast)).toBe(raw);
@@ -56,7 +49,7 @@ describe("wave-12 real-world-fixtures", () => {
}
});
it("F-04 TOOLS.md tool-guidance section resolves by slug", () => {
it("TOOLS.md tool-guidance section resolves by slug", () => {
const raw = load("TOOLS.md");
const { ast } = parseMd(raw);
expect(emitMd(ast)).toBe(raw);
@@ -67,7 +60,7 @@ describe("wave-12 real-world-fixtures", () => {
expect(guidance?.kind).toBe("block");
});
it("F-05 IDENTITY.md sections resolvable by slug", () => {
it("IDENTITY.md sections resolvable by slug", () => {
const raw = load("IDENTITY.md");
const { ast } = parseMd(raw);
expect(emitMd(ast)).toBe(raw);
@@ -78,7 +71,7 @@ describe("wave-12 real-world-fixtures", () => {
expect(trust?.kind).toBe("block");
});
it("F-06 USER.md Preferences items extracted", () => {
it("USER.md Preferences items extracted", () => {
const raw = load("USER.md");
const { ast } = parseMd(raw);
expect(emitMd(ast)).toBe(raw);
@@ -92,7 +85,7 @@ describe("wave-12 real-world-fixtures", () => {
}
});
it("F-07 HEARTBEAT.md schedules — H2 sections as triggers", () => {
it("HEARTBEAT.md schedules — H2 sections as triggers", () => {
const raw = load("HEARTBEAT.md");
const { ast } = parseMd(raw);
expect(emitMd(ast)).toBe(raw);
@@ -102,7 +95,7 @@ describe("wave-12 real-world-fixtures", () => {
expect(slugs).toContain("every-4h-wake");
});
it("F-08 SKILL.md frontmatter has name + description + tier", () => {
it("SKILL.md frontmatter has name + description + tier", () => {
const raw = load("SKILL.md");
const { ast } = parseMd(raw);
expect(emitMd(ast)).toBe(raw);
@@ -112,13 +105,13 @@ describe("wave-12 real-world-fixtures", () => {
expect(fmKeys).toContain("tier");
});
it("F-09 BOOTSTRAP.md round-trips", () => {
it("BOOTSTRAP.md round-trips", () => {
const raw = load("BOOTSTRAP.md");
const { ast } = parseMd(raw);
expect(emitMd(ast)).toBe(raw);
});
it("F-10 all 8 fixtures combined round-trip-clean (sanity)", () => {
it("all 8 fixtures combined round-trip-clean (sanity)", () => {
const names = [
"SOUL.md",
"AGENTS.md",

View File

@@ -1,11 +1,3 @@
/**
* Wave 10 — round-trip property tests.
*
* Substrate guarantee: `emitMd(parse(raw)) === raw` for all inputs the
* parser accepts. This wave exercises that property over a generated
* corpus of synthetic markdown shapes and verifies parser idempotence
* (`parse(emitMd(parse(raw))) === parse(raw)` modulo `raw`).
*/
import { describe, expect, it } from "vitest";
import { emitMd } from "../../emit.js";
import { parseMd } from "../../parse.js";
@@ -14,8 +6,8 @@ function roundTrip(raw: string): string {
return emitMd(parseMd(raw).ast);
}
describe("wave-10 roundtrip-property", () => {
it("RT-01 byte-fidelity over 100 generated shapes", () => {
describe("roundtrip-property", () => {
it("byte-fidelity over 100 generated shapes", () => {
const inputs = generateCorpus(100);
for (const raw of inputs) {
try {
@@ -30,7 +22,7 @@ describe("wave-10 roundtrip-property", () => {
}
});
it("RT-02 parser idempotence (parse → emit → parse → identical AST shape)", () => {
it("parser idempotence (parse → emit → parse → identical AST shape)", () => {
const inputs = generateCorpus(50);
for (const raw of inputs) {
const a = parseMd(raw).ast;
@@ -42,7 +34,7 @@ describe("wave-10 roundtrip-property", () => {
}
});
it("RT-03 stable output for identical input", () => {
it("stable output for identical input", () => {
const raw = `---\nname: x\n---\n\n## A\n- a\n## B\n- b: c\n`;
const out1 = roundTrip(raw);
const out2 = roundTrip(raw);
@@ -51,7 +43,7 @@ describe("wave-10 roundtrip-property", () => {
expect(out2).toBe(out3);
});
it("RT-04 ordering deterministic (no Object.keys / Set ordering surprises)", () => {
it("ordering deterministic (no Object.keys / Set ordering surprises)", () => {
const raw = `---\nb: 2\na: 1\nc: 3\n---\n## Z\n- z\n## A\n- a\n`;
const a1 = parseMd(raw).ast;
const a2 = parseMd(raw).ast;
@@ -59,38 +51,37 @@ describe("wave-10 roundtrip-property", () => {
expect(a1.blocks.map((b) => b.heading)).toEqual(a2.blocks.map((b) => b.heading));
});
it("RT-05 round-trip preserves comment-like lines (no comment recognition at substrate)", () => {
it("round-trip preserves comment-like lines (no comment recognition at substrate)", () => {
const raw = `## H\n\n<!-- a comment -->\n- bullet\n`;
expect(roundTrip(raw)).toBe(raw);
});
it("RT-06 round-trip preserves indented blocks (substrate doesn't reflow)", () => {
it("round-trip preserves indented blocks (substrate doesn't reflow)", () => {
const raw = `## H\n\n indented code-ish block\n more indented\n`;
expect(roundTrip(raw)).toBe(raw);
});
it("RT-07 round-trip preserves blockquotes", () => {
it("round-trip preserves blockquotes", () => {
const raw = `## H\n\n> quoted line 1\n> quoted line 2\n`;
expect(roundTrip(raw)).toBe(raw);
});
it("RT-08 round-trip preserves images / links", () => {
it("round-trip preserves images / links", () => {
const raw = `## H\n\n![alt](path/to/img.png)\n[link](http://example.com)\n`;
expect(roundTrip(raw)).toBe(raw);
});
it("RT-09 round-trip preserves HTML", () => {
it("round-trip preserves HTML", () => {
const raw = `## H\n\n<details><summary>x</summary>body</details>\n`;
expect(roundTrip(raw)).toBe(raw);
});
it("RT-10 round-trip preserves consecutive headings with no body between", () => {
it("round-trip preserves consecutive headings with no body between", () => {
const raw = `## A\n## B\n## C\n`;
expect(roundTrip(raw)).toBe(raw);
});
});
// ---------- corpus generator -------------------------------------------------
function generateCorpus(count: number): string[] {
const corpus: string[] = [];

View File

@@ -0,0 +1,253 @@
import { describe, expect, it } from "vitest";
import {
MAX_PATH_LENGTH,
MAX_TRAVERSAL_DEPTH,
OcPathError,
findOcPaths,
formatOcPath,
parseOcPath,
resolveOcPath,
setOcPath,
} from "../../index.js";
import { parseJsonc } from "../../jsonc/parse.js";
import { parseJsonl } from "../../jsonl/parse.js";
describe("encoding edges", () => {
it("strips leading UTF-8 BOM from path string", () => {
expect(parseOcPath("oc://X/Y").file).toBe("X");
});
it("normalizes path segments to NFC", () => {
const nfc = "café";
const nfd = "café"; // decomposed
expect(parseOcPath(`oc://X/${nfd}`)).toEqual(parseOcPath(`oc://X/${nfc}`));
});
it("rejects whitespace inside identifier-shaped segments", () => {
expect(() => parseOcPath("oc://X/foo /bar")).toThrow(OcPathError);
expect(() => parseOcPath("oc://X/foo\tbar")).toThrow(OcPathError);
});
it("rejects control characters and NUL bytes anywhere in the path", () => {
expect(() => parseOcPath("oc://X/\x00")).toThrow(/Control character/);
expect(() => parseOcPath("oc://X/foo\x01bar")).toThrow(/Control character/);
expect(() => parseOcPath("oc://X/foo\x7Fbar")).toThrow(/Control character/);
expect(() => parseOcPath("oc://X.md/items/[k=a\x00b]")).toThrow(OcPathError);
});
});
describe("file-slot containment", () => {
it("rejects absolute POSIX file slot", () => {
expect(() => parseOcPath("oc:///etc/passwd")).toThrow(/Empty segment/);
expect(() => parseOcPath('oc://"/etc/passwd"/section')).toThrow(/Absolute file slot/);
});
it("rejects Windows drive-letter file slot", () => {
expect(() => parseOcPath('oc://"C:/Windows/System32/foo"/section')).toThrow(
/Absolute file slot/,
);
expect(() => parseOcPath('oc://"C:\\\\Windows\\\\System32"/section')).toThrow(
/Absolute file slot/,
);
});
it("rejects leading-backslash UNC path", () => {
expect(() => parseOcPath('oc://"\\\\srv\\\\share\\\\foo"/section')).toThrow(
/Absolute file slot/,
);
});
it("rejects parent-directory escapes", () => {
expect(() => parseOcPath('oc://"../foo"/section')).toThrow(/Parent-directory/);
expect(() => parseOcPath('oc://"foo/../bar"/section')).toThrow(/Parent-directory/);
});
it("does not URL-decode `%2E%2E` — substrate isn't an HTTP layer", () => {
expect(parseOcPath('oc://"%2E%2E/foo"/section').file).toBe("%2E%2E/foo");
});
it("formatOcPath rejects absolute and parent-directory file slots", () => {
expect(() => formatOcPath({ file: "/etc/passwd" })).toThrow(/Absolute file slot/);
expect(() => formatOcPath({ file: "C:/Windows" })).toThrow(/Absolute file slot/);
expect(() => formatOcPath({ file: ".." })).toThrow(/Parent-directory/);
expect(() => formatOcPath({ file: "foo/../bar" })).toThrow(/Parent-directory/);
});
});
describe("path-string and traversal caps", () => {
it("parseOcPath rejects strings longer than MAX_PATH_LENGTH", () => {
expect(() => parseOcPath("oc://X/" + "a".repeat(MAX_PATH_LENGTH))).toThrow(/exceeds .* bytes/);
});
it("parseOcPath accepts a path right at the cap", () => {
const justUnder = "oc://X/" + "a".repeat(MAX_PATH_LENGTH - "oc://X/".length);
expect(() => parseOcPath(justUnder)).not.toThrow();
});
it("formatOcPath enforces the same cap on output", () => {
expect(() => formatOcPath({ file: "X", section: "a".repeat(MAX_PATH_LENGTH) })).toThrow(
/Formatted oc:\/\/ exceeds/,
);
});
it("walker depth cap fires on synthetic deeply-nested AST", () => {
// Bypasses parser depth cap so the walker defense fires in isolation.
type V = import("../../jsonc/ast.js").JsoncValue;
let leaf: V = { kind: "string", value: "x", line: 1 };
for (let i = 0; i < MAX_TRAVERSAL_DEPTH + 50; i++) {
leaf = { kind: "object", entries: [{ key: "a", value: leaf, line: 1 }], line: 1 };
}
const ast = {
kind: "jsonc" as const,
raw: "",
root: { kind: "object" as const, entries: [{ key: "root", value: leaf, line: 1 }], line: 1 },
};
expect(() => findOcPaths(ast, parseOcPath("oc://X/**"))).toThrow(/MAX_TRAVERSAL_DEPTH/);
});
it("jsonc parser surfaces a structured diagnostic on pathological nesting", () => {
const open = "[".repeat(MAX_TRAVERSAL_DEPTH + 100);
const close = "]".repeat(MAX_TRAVERSAL_DEPTH + 100);
const result = parseJsonc(`${open}0${close}`);
expect(result.ast.root).toBeNull();
expect(result.diagnostics.some((d) => d.code === "OC_JSONC_DEPTH_EXCEEDED")).toBe(true);
});
it("jsonl per-line parser flags malformed deeply-nested values", () => {
let nested = '"x"';
for (let i = 0; i < MAX_TRAVERSAL_DEPTH + 50; i++) {
nested = `{"a":${nested}}`;
}
const { diagnostics } = parseJsonl(nested + "\n");
expect(diagnostics.some((d) => d.code === "OC_JSONL_LINE_MALFORMED")).toBe(true);
});
});
describe("sentinel literal at format boundary", () => {
it("formatOcPath rejects a struct carrying the redaction sentinel", () => {
expect(() => formatOcPath({ file: "AGENTS.md", section: "__OPENCLAW_REDACTED__" })).toThrow(
/sentinel literal/,
);
});
});
describe("numeric segments dispatch by node kind", () => {
it("negative numeric key on object resolves as literal key (openclaw#59934)", () => {
// Telegram supergroup IDs are negative numbers used as map keys.
const ast = parseJsonc(
'{"channels":{"telegram":{"groups":{"-5028303500":{"requireMention":false}}}}}',
).ast;
const m = resolveOcPath(
ast,
parseOcPath("oc://config/channels.telegram.groups.-5028303500.requireMention"),
);
expect(m?.kind).toBe("leaf");
});
it("`-1` still works as positional on arrays", () => {
const ast = parseJsonc('{"items":[10,20,30]}').ast;
const m = resolveOcPath(ast, parseOcPath("oc://X/items/-1"));
expect(m?.kind === "leaf" && m.valueText).toBe("30");
});
it("`$last` literal key on an object is shadowed by the positional sentinel", () => {
const ast = parseJsonc('{"$last":"literal-value","foo":"bar"}').ast;
const m = resolveOcPath(ast, parseOcPath("oc://X/$last"));
expect(m?.kind === "leaf" && m.valueText).toBe("bar");
});
it("out-of-range negative index returns null, not crash", () => {
const ast = parseJsonc('{"x":[1,2,3]}').ast;
expect(resolveOcPath(ast, parseOcPath("oc://X/x/-9999999999"))).toBeNull();
});
});
describe("setOcPath value coercion is locale-independent and exact-match", () => {
it("number coercion accepts `1.5`, refuses `1,5`", () => {
const ast = parseJsonc('{"x":1.0}').ast;
expect(setOcPath(ast, parseOcPath("oc://X/x"), "1.5").ok).toBe(true);
const r = setOcPath(ast, parseOcPath("oc://X/x"), "1,5");
expect(r.ok).toBe(false);
if (!r.ok) {
expect(r.reason).toBe("parse-error");
}
});
it("boolean coercion accepts `true` / `false` only", () => {
const ast = parseJsonc('{"x":true}').ast;
expect(setOcPath(ast, parseOcPath("oc://X/x"), "false").ok).toBe(true);
expect(setOcPath(ast, parseOcPath("oc://X/x"), "False").ok).toBe(false);
expect(setOcPath(ast, parseOcPath("oc://X/x"), "TRUE").ok).toBe(false);
expect(setOcPath(ast, parseOcPath("oc://X/x"), "yes").ok).toBe(false);
});
});
describe("predicate-value injection is contained", () => {
it("regex metacharacters in predicate value match literally, not as regex", () => {
const ast = parseJsonc('{"items":[{"name":"a.*"},{"name":"abc"}]}').ast;
const matches = findOcPaths(ast, parseOcPath("oc://X.jsonc/items/[name=a.*]"));
expect(matches).toHaveLength(1);
});
it("equals-sign in predicate value is treated as part of the value", () => {
const ast = parseJsonc('{"items":[{"k":"a=b"},{"k":"c"}]}').ast;
const matches = findOcPaths(ast, parseOcPath("oc://X.jsonc/items/[k=a=b]"));
expect(matches).toHaveLength(1);
});
it("predicate-shaped bracket without operator is a literal sentinel", () => {
expect(parseOcPath("oc://X.jsonc/items/[name]").item).toBe("[name]");
});
it("rejects empty predicate body and empty key/value", () => {
expect(() => parseOcPath("oc://X.jsonc/items/[]")).toThrow(OcPathError);
expect(() => parseOcPath("oc://X/[=foo]")).toThrow(/Malformed predicate/);
expect(() => parseOcPath("oc://X/[id=]")).toThrow(/Malformed predicate/);
});
it("predicate value containing `/` round-trips and matches literally", () => {
const p = parseOcPath("oc://X/[id=foo/bar]/cmd");
expect(p.section).toBe("[id=foo/bar]");
const ast = parseJsonc('{"steps":[{"id":"foo/bar","cmd":"x"},{"id":"baz","cmd":"y"}]}').ast;
const matches = findOcPaths(ast, parseOcPath("oc://wf/steps/[id=foo/bar]/cmd"));
expect(matches).toHaveLength(1);
});
it("predicate value containing `.` round-trips and matches literally", () => {
const ast = parseJsonc('{"steps":[{"id":"1.0","cmd":"x"},{"id":"2.0","cmd":"y"}]}').ast;
const matches = findOcPaths(ast, parseOcPath("oc://wf/steps/[id=1.0]/cmd"));
expect(matches).toHaveLength(1);
});
});
describe("structural rejection", () => {
it("rejects mismatched brackets and braces", () => {
expect(() => parseOcPath("oc://X/[unclosed")).toThrow(OcPathError);
expect(() => parseOcPath("oc://X/closed]")).toThrow(OcPathError);
expect(() => parseOcPath("oc://X/{a,b")).toThrow(OcPathError);
});
it("rejects empty union and empty alternative", () => {
expect(() => parseOcPath("oc://X/{}")).toThrow(/Empty union/);
expect(() => parseOcPath("oc://X/{a,,b}")).toThrow(/Empty alternative/);
});
it("rejects empty dotted sub-segment in formatOcPath output", () => {
expect(() => formatOcPath({ file: "a.md", section: "foo." })).toThrow(/Empty dotted/);
expect(() => formatOcPath({ file: "a.md", section: ".foo" })).toThrow(/Empty dotted/);
expect(() => formatOcPath({ file: "a.md", section: "foo..bar" })).toThrow(/Empty dotted/);
});
it("rejects unescaped `&` and `%` in segments", () => {
expect(() => parseOcPath("oc://X.md/a&b")).toThrow(OcPathError);
expect(() => parseOcPath("oc://X.md/a%b")).toThrow(OcPathError);
});
});

View File

@@ -1,15 +1,3 @@
/**
* Wave 21 — sentinel guard across all 3 kinds.
*
* Substrate guarantee: emit refuses to write a CALLER-INJECTED
* `__OPENCLAW_REDACTED__` literal. Round-trip mode trusts parsed bytes
* (a workspace file legitimately containing the sentinel — in a code
* block, in a pasted error log — would otherwise become a workspace-
* wide emit DoS). Render mode walks every leaf, so a caller-injected
* sentinel via `setOcPath` always fails. Callers that want strict
* pre-existing-byte detection (e.g., LKG fingerprint verification)
* opt in via `acceptPreExistingSentinel: false`.
*/
import { describe, expect, it } from "vitest";
import { emitMd } from "../../emit.js";
import { setJsoncOcPath } from "../../jsonc/edit.js";
@@ -21,8 +9,8 @@ import { parseOcPath } from "../../oc-path.js";
import { parseMd } from "../../parse.js";
import { OcEmitSentinelError, REDACTED_SENTINEL } from "../../sentinel.js";
describe("wave-21 sentinel guard cross-kind", () => {
it("S-01 jsonc round-trip echoes safely when raw contains pre-existing sentinel", () => {
describe("sentinel guard cross-kind", () => {
it("jsonc round-trip echoes safely when raw contains pre-existing sentinel", () => {
// Pre-existing sentinel bytes are trusted — see emit-policy comment
// in jsonc/emit.ts. The strict mode below is the opt-in path for
// callers who want LKG-style fingerprint verification.
@@ -34,21 +22,21 @@ describe("wave-21 sentinel guard cross-kind", () => {
expect(() => emitJsonc(ast, { acceptPreExistingSentinel: false })).toThrow(OcEmitSentinelError);
});
it("S-02 jsonl round-trip echoes safely; strict mode rejects", () => {
it("jsonl round-trip echoes safely; strict mode rejects", () => {
const raw = `{"x":"${REDACTED_SENTINEL}"}\n`;
const ast = parseJsonl(raw).ast;
expect(emitJsonl(ast)).toBe(raw);
expect(() => emitJsonl(ast, { acceptPreExistingSentinel: false })).toThrow(OcEmitSentinelError);
});
it("S-03 md round-trip echoes safely; strict mode rejects", () => {
it("md round-trip echoes safely; strict mode rejects", () => {
const raw = `## Body\n\n- ${REDACTED_SENTINEL}\n`;
const ast = parseMd(raw).ast;
expect(emitMd(ast)).toBe(raw);
expect(() => emitMd(ast, { acceptPreExistingSentinel: false })).toThrow(OcEmitSentinelError);
});
it("S-04 jsonc render mode walks every leaf for sentinel", () => {
it("jsonc render mode walks every leaf for sentinel", () => {
const ast = parseJsonc('{ "x": "ok" }').ast;
const tampered = {
...ast,
@@ -66,7 +54,7 @@ describe("wave-21 sentinel guard cross-kind", () => {
expect(() => emitJsonc(tampered, { mode: "render" })).toThrow(OcEmitSentinelError);
});
it("S-05 jsonl render mode walks every value-line leaf", () => {
it("jsonl render mode walks every value-line leaf", () => {
const ast = parseJsonl('{"a":"ok"}\n').ast;
const tampered = {
...ast,
@@ -91,7 +79,7 @@ describe("wave-21 sentinel guard cross-kind", () => {
expect(() => emitJsonl(tampered, { mode: "render" })).toThrow(OcEmitSentinelError);
});
it("S-06 setJsoncOcPath itself throws when the new value contains the sentinel", () => {
it("setJsoncOcPath itself throws when the new value contains the sentinel", () => {
// The substrate guard fires at write-time: setJsoncOcPath rebuilds
// raw via render mode emit, which scans every leaf. Defense-in-depth
// — even if a caller forgets to call emit afterward, the sentinel
@@ -105,7 +93,7 @@ describe("wave-21 sentinel guard cross-kind", () => {
).toThrow(OcEmitSentinelError);
});
it("S-07 sentinel embedded in deep nesting — render mode catches the leaf", () => {
it("sentinel embedded in deep nesting — render mode catches the leaf", () => {
// Round-trip echoes the pre-existing bytes (the workspace contract:
// a parsed file containing the sentinel as data is not "writing" it
// on emit). Render mode walks every leaf and rejects this caller-
@@ -116,33 +104,33 @@ describe("wave-21 sentinel guard cross-kind", () => {
expect(() => emitJsonc(ast, { mode: "render" })).toThrow(OcEmitSentinelError);
});
it("S-08 sentinel inside an array element triggers guard in render mode", () => {
it("sentinel inside an array element triggers guard in render mode", () => {
const raw = JSON.stringify({ arr: ["ok", REDACTED_SENTINEL, "ok"] });
const ast = parseJsonc(raw).ast;
expect(() => emitJsonc(ast, { mode: "render" })).toThrow(OcEmitSentinelError);
});
it("S-09 sentinel as object key in raw — strict mode catches it", () => {
it("sentinel as object key in raw — strict mode catches it", () => {
const raw = `{ "${REDACTED_SENTINEL}": 1 }`;
const ast = parseJsonc(raw).ast;
expect(emitJsonc(ast)).toBe(raw); // default-mode echo
expect(() => emitJsonc(ast, { acceptPreExistingSentinel: false })).toThrow(OcEmitSentinelError);
});
it("S-10 sentinel in jsonl malformed line — strict mode catches it", () => {
it("sentinel in jsonl malformed line — strict mode catches it", () => {
const raw = `${REDACTED_SENTINEL}\n`;
const ast = parseJsonl(raw).ast;
expect(emitJsonl(ast)).toBe(raw); // round-trip echoes verbatim
expect(() => emitJsonl(ast, { acceptPreExistingSentinel: false })).toThrow(OcEmitSentinelError);
});
it("S-11 partial sentinel substring does NOT trigger guard", () => {
it("partial sentinel substring does NOT trigger guard", () => {
const raw = '{ "x": "OPENCLAW_REDACTED" }';
const ast = parseJsonc(raw).ast;
expect(() => emitJsonc(ast)).not.toThrow();
});
it("S-12 sentinel guard error message includes the OcPath context (render mode)", () => {
it("sentinel guard error message includes the OcPath context (render mode)", () => {
// Render mode is the path that actually rejects caller-injected
// sentinel — round-trip just echoes, so the error context surfaces
// when render walks the offending leaf and constructs the path.

View File

@@ -1,36 +1,29 @@
/**
* Wave 9 — sentinel guard at every emit leaf.
*
* Substrate guarantee: `__OPENCLAW_REDACTED__` literal anywhere in the
* emitted bytes throws `OcEmitSentinelError`. Round-trip mode catches
* sentinels in `raw`; render mode walks every leaf.
*/
import { describe, expect, it } from "vitest";
import { emitMd } from "../../emit.js";
import { parseMd } from "../../parse.js";
import { OcEmitSentinelError, REDACTED_SENTINEL, guardSentinel } from "../../sentinel.js";
describe("wave-09 sentinel-guard", () => {
it("S-01 sentinel constant matches the literal", () => {
describe("sentinel-guard", () => {
it("sentinel constant matches the literal", () => {
expect(REDACTED_SENTINEL).toBe("__OPENCLAW_REDACTED__");
});
it("S-02 guardSentinel passes normal strings", () => {
it("guardSentinel passes normal strings", () => {
expect(() => guardSentinel("safe", "oc://X.md")).not.toThrow();
});
it("S-03 guardSentinel passes non-string types", () => {
it("guardSentinel passes non-string types", () => {
expect(() => guardSentinel(42, "oc://X.md")).not.toThrow();
expect(() => guardSentinel(null, "oc://X.md")).not.toThrow();
expect(() => guardSentinel(undefined, "oc://X.md")).not.toThrow();
expect(() => guardSentinel({}, "oc://X.md")).not.toThrow();
});
it("S-04 guardSentinel throws on exact match", () => {
it("guardSentinel throws on exact match", () => {
expect(() => guardSentinel(REDACTED_SENTINEL, "oc://X.md")).toThrow(OcEmitSentinelError);
});
it("S-05 guardSentinel throws on substring matches (sentinel embedded in larger string)", () => {
it("guardSentinel throws on substring matches (sentinel embedded in larger string)", () => {
// Substring scan — the sentinel anywhere in the value is a leak,
// not just exact equality. A hostile caller smuggling
// `prefix__OPENCLAW_REDACTED__suffix` would have bypassed the old
@@ -40,7 +33,7 @@ describe("wave-09 sentinel-guard", () => {
);
});
it("S-06 error attaches the OcPath context", () => {
it("error attaches the OcPath context", () => {
try {
guardSentinel(REDACTED_SENTINEL, "oc://config/plugins.entries.foo.token");
expect.fail("should have thrown");
@@ -52,20 +45,20 @@ describe("wave-09 sentinel-guard", () => {
}
});
it("S-07 round-trip echoes pre-existing sentinel; strict mode rejects", () => {
it("round-trip echoes pre-existing sentinel; strict mode rejects", () => {
const raw = "## Section\n\n- token: __OPENCLAW_REDACTED__\n";
const { ast } = parseMd(raw);
expect(emitMd(ast)).toBe(raw);
expect(() => emitMd(ast, { acceptPreExistingSentinel: false })).toThrow(OcEmitSentinelError);
});
it("S-08 round-trip emit allows sentinel-free content", () => {
it("round-trip emit allows sentinel-free content", () => {
const raw = "## Section\n\n- token: redacted-but-not-sentinel\n";
const { ast } = parseMd(raw);
expect(() => emitMd(ast)).not.toThrow();
});
it("S-09 render mode catches sentinel in frontmatter", () => {
it("render mode catches sentinel in frontmatter", () => {
const ast = {
kind: "md" as const,
raw: "",
@@ -76,7 +69,7 @@ describe("wave-09 sentinel-guard", () => {
expect(() => emitMd(ast, { mode: "render" })).toThrow(OcEmitSentinelError);
});
it("S-10 render mode catches sentinel in preamble", () => {
it("render mode catches sentinel in preamble", () => {
const ast = {
kind: "md" as const,
raw: "",
@@ -87,7 +80,7 @@ describe("wave-09 sentinel-guard", () => {
expect(() => emitMd(ast, { mode: "render" })).toThrow(OcEmitSentinelError);
});
it("S-11 render mode catches sentinel in block bodyText", () => {
it("render mode catches sentinel in block bodyText", () => {
const ast = {
kind: "md" as const,
raw: "",
@@ -108,7 +101,7 @@ describe("wave-09 sentinel-guard", () => {
expect(() => emitMd(ast, { mode: "render" })).toThrow(OcEmitSentinelError);
});
it("S-12 render mode catches sentinel in item kv.value", () => {
it("render mode catches sentinel in item kv.value", () => {
const ast = {
kind: "md" as const,
raw: "",
@@ -138,21 +131,21 @@ describe("wave-09 sentinel-guard", () => {
);
});
it("S-13 sentinel-as-substring in raw — strict mode catches it", () => {
it("sentinel-as-substring in raw — strict mode catches it", () => {
const raw = `Some prose ${REDACTED_SENTINEL} more prose.\n`;
const { ast } = parseMd(raw);
expect(emitMd(ast)).toBe(raw);
expect(() => emitMd(ast, { acceptPreExistingSentinel: false })).toThrow(OcEmitSentinelError);
});
it("S-14 multiple sentinel occurrences in raw — strict mode catches them", () => {
it("multiple sentinel occurrences in raw — strict mode catches them", () => {
const raw = `## A\n${REDACTED_SENTINEL}\n${REDACTED_SENTINEL}\n`;
const { ast } = parseMd(raw);
expect(emitMd(ast)).toBe(raw);
expect(() => emitMd(ast, { acceptPreExistingSentinel: false })).toThrow(OcEmitSentinelError);
});
it("S-15 fileNameForGuard appears in the error path", () => {
it("fileNameForGuard appears in the error path", () => {
const ast = {
kind: "md" as const,
raw: "",

View File

@@ -1,10 +1,3 @@
/**
* Universal verbs — `setOcPath` + `resolveOcPath` test surface.
*
* Every test exercises the universal entry point. The substrate
* dispatches via `ast.kind` and coerces value strings based on AST
* shape at the path location.
*/
import { describe, expect, it } from "vitest";
import { emitMd } from "../emit.js";
import { emitJsonc } from "../jsonc/emit.js";
@@ -15,7 +8,6 @@ import { parseOcPath } from "../oc-path.js";
import { parseMd } from "../parse.js";
import { detectInsertion, resolveOcPath, setOcPath } from "../universal.js";
// ---------- detectInsertion ------------------------------------------------
describe("detectInsertion", () => {
it("returns null for plain paths", () => {
@@ -46,7 +38,6 @@ describe("detectInsertion", () => {
});
});
// ---------- resolveOcPath — universal across kinds -------------------------
describe("resolveOcPath — md AST", () => {
const md = parseMd("---\nname: github\n---\n\n## Boundaries\n\n- enabled: true\n").ast;
@@ -173,7 +164,6 @@ describe("resolveOcPath — insertion-point detection", () => {
});
});
// ---------- setOcPath — leaf assignment ------------------------------------
describe("setOcPath — md leaf", () => {
it("replaces frontmatter value", () => {
@@ -286,7 +276,6 @@ describe("setOcPath — jsonl leaf", () => {
});
});
// ---------- setOcPath — insertion ------------------------------------------
describe("setOcPath — md insertion", () => {
it("appends item to section with `+`", () => {
@@ -432,7 +421,6 @@ describe("setOcPath — jsonl insertion (session append)", () => {
});
});
// ---------- Cross-cutting properties ---------------------------------------
describe("setOcPath — cross-cutting properties", () => {
it("is non-mutating across all kinds", () => {

View File

@@ -1,28 +1,14 @@
/**
* Universal `setOcPath` and `resolveOcPath` — the public verbs.
* Universal `setOcPath` / `resolveOcPath` / `detectInsertion`.
* Addressing is universal; encoding is per-kind. Callers pass any AST
* + path + value; the substrate dispatches on `ast.kind` and coerces
* the value based on the AST shape at the resolution point.
*
* **Strategic frame**: addressing is universal. Encoding is per-kind.
* The OcPath syntax encodes WHAT to do (set leaf vs. insert vs. address
* a structural node); the AST kind encodes HOW the substrate carries it
* out. Callers pass any AST + a path + a string value; the substrate
* dispatches via `ast.kind` and coerces the value based on the path's
* syntax and the AST shape at the resolution point.
*
* **Path syntax vocabulary** (v0):
*
* oc://FILE/section/item/field → leaf address (set/replace value)
* oc://FILE/section/+ → end-insertion at section
* oc://FILE/section/+key → keyed insertion (object key add)
* oc://FILE/section/+0 → indexed insertion (array splice)
* oc://FILE/+ → file-root insertion (jsonl line append, md new section)
*
* **Coercion at leaves** is driven by the AST type at the resolution point:
* - md leaf → value used verbatim (md is text-native)
* - jsonc/jsonl leaf, existing string → value verbatim
* - jsonc/jsonl leaf, existing number → parseFloat (parse-error if NaN)
* - jsonc/jsonl leaf, existing boolean → 'true'/'false' literal
* - jsonc/jsonl leaf, existing null → only `value === 'null'`
* - insertion → `JSON.parse(value)` for jsonc/jsonl; raw text for md
* oc://FILE/section/item/field → leaf address
* oc://FILE/section/+ → end-insertion
* oc://FILE/section/+key → keyed insertion
* oc://FILE/section/+0 → indexed insertion
* oc://FILE/+ → file-root insertion
*
* @module @openclaw/oc-path/universal
*/
@@ -54,26 +40,9 @@ import { resolveMdOcPath } from "./resolve.js";
export type OcAst = MdAst | JsoncAst | JsonlAst;
/**
* Universal resolve result. Same shape regardless of AST kind so
* consumers branch only on `match.kind`.
*
* `leaf` carries the value as a string — the canonical leaf form on
* the wire, suitable for direct comparison or display. Numeric/bool
* leaves are stringified deterministically (`String(42)` → `'42'`,
* `String(true)` → `'true'`).
*
* `node` describes which kind of structural node the path resolved to
* (md-block, jsonc-object, jsonl-line, etc.) — the descriptor lets
* tooling format / drill in without re-parsing the kind tag.
*
* `insertion-point` is returned when the path's terminal segment is
* an insertion marker (`+`, `+key`, `+nnn`) and the parent is a valid
* container.
*
* **`line`** is the 1-based source line of the matched node, or `1`
* for the root / synthetic constructions where no source line exists.
* Lint rules use it directly for diagnostic positioning instead of
* walking the kind-specific AST a second time.
* Universal resolve result — same shape across AST kinds. `leaf` values
* are string-coerced (numbers/bools stringified deterministically).
* `line` is 1-based; root/synthetic nodes use `1`.
*/
export type OcMatch =
| { readonly kind: "root"; readonly ast: OcAst; readonly line: number }
@@ -119,13 +88,9 @@ export type SetResult =
readonly detail?: string;
};
// ---------- Insertion-syntax detection -------------------------------------
/**
* Inspect the path for an insertion marker on the deepest segment.
* A segment of `+`, `+<key>`, or `+<index>` indicates insertion at the
* parent. Returns the parent path (with insertion segment stripped) +
* the marker; or `null` for a plain (non-insertion) path.
* Insertion marker on the deepest path segment: `+`, `+<key>`, or
* `+<index>`. Returns parent path + marker; null for plain paths.
*/
export interface InsertionInfo {
readonly parentPath: OcPath;
@@ -133,37 +98,23 @@ export interface InsertionInfo {
}
export function detectInsertion(path: OcPath): InsertionInfo | null {
// Find the deepest defined segment.
const segments: Array<{ slot: "section" | "item" | "field"; value: string }> = [];
if (path.section !== undefined) {
segments.push({ slot: "section", value: path.section });
}
if (path.item !== undefined) {
segments.push({ slot: "item", value: path.item });
}
if (path.field !== undefined) {
segments.push({ slot: "field", value: path.field });
}
if (segments.length === 0) {
return null;
}
if (path.section !== undefined) segments.push({ slot: "section", value: path.section });
if (path.item !== undefined) segments.push({ slot: "item", value: path.item });
if (path.field !== undefined) segments.push({ slot: "field", value: path.field });
if (segments.length === 0) return null;
const last = segments[segments.length - 1];
if (!last.value.startsWith("+")) {
return null;
}
if (!last.value.startsWith("+")) return null;
const rest = last.value.slice(1);
let marker: InsertionInfo["marker"];
if (rest.length === 0) {
marker = "+";
} else if (/^\d+$/.test(rest)) {
marker = { kind: "indexed", index: Number(rest) };
} else {
marker = { kind: "keyed", key: rest };
}
const marker: InsertionInfo["marker"] =
rest.length === 0
? "+"
: /^\d+$/.test(rest)
? { kind: "indexed", index: Number(rest) }
: { kind: "keyed", key: rest };
// Strip the deepest segment from the path.
const parentPath: OcPath = {
file: path.file,
...(last.slot !== "section" && path.section !== undefined ? { section: path.section } : {}),
@@ -174,22 +125,10 @@ export function detectInsertion(path: OcPath): InsertionInfo | null {
return { parentPath, marker };
}
// ---------- Universal resolve ----------------------------------------------
/**
* Resolve an `OcPath` against any AST. Returns a kind-agnostic match
* shape or `null` when the path doesn't resolve.
*
* Insertion-marker paths return `{kind: 'insertion-point', container}`
* if the parent is a valid container; otherwise `null`.
*/
/** Resolve an `OcPath` against any AST. Throws on wildcard patterns. */
export function resolveOcPath(ast: OcAst, path: OcPath): OcMatch | null {
// Wildcard guard: `resolveOcPath` is the single-match verb. Wildcards
// belong to `findOcPaths` (multi-match). Throw with a structured code
// (consistent with `setOcPath`'s `wildcard-not-allowed` discriminator)
// — silent `null` here is indistinguishable from "path doesn't
// resolve", so consumers couldn't tell whether they should switch to
// findOcPaths or accept the address as missing.
// Single-match verb: wildcards belong to findOcPaths. Throw with a
// structured code so consumers can route to the right verb.
if (hasWildcard(path)) {
throw new OcPathError(
`resolveOcPath received a wildcard pattern; use findOcPaths instead: ${formatOcPath(path)}`,
@@ -198,9 +137,7 @@ export function resolveOcPath(ast: OcAst, path: OcPath): OcMatch | null {
);
}
const insertion = detectInsertion(path);
if (insertion !== null) {
return resolveInsertion(ast, insertion);
}
if (insertion !== null) return resolveInsertion(ast, insertion);
switch (ast.kind) {
case "md":
@@ -210,14 +147,11 @@ export function resolveOcPath(ast: OcAst, path: OcPath): OcMatch | null {
case "jsonl":
return resolveJsonlToUniversal(ast, path);
}
return null;
}
function resolveMdToUniversal(ast: MdAst, path: OcPath): OcMatch | null {
const m = resolveMdOcPath(ast, path);
if (m === null) {
return null;
}
if (m === null) return null;
switch (m.kind) {
case "root":
return { kind: "root", ast, line: 1 };
@@ -230,21 +164,13 @@ function resolveMdToUniversal(ast: MdAst, path: OcPath): OcMatch | null {
case "item-field":
return { kind: "leaf", valueText: m.value, leafType: "string", line: m.node.line };
}
return null;
}
function resolveJsoncToUniversal(ast: JsoncAst, path: OcPath): OcMatch | null {
const m = resolveJsoncOcPath(ast, path);
if (m === null) {
return null;
}
if (m.kind === "root") {
return { kind: "root", ast, line: 1 };
}
if (m.kind === "object-entry") {
return jsoncValueToMatch(m.node.value, m.node.line);
}
// m.kind === 'value' — array element or root: line lives on the value itself.
if (m === null) return null;
if (m.kind === "root") return { kind: "root", ast, line: 1 };
if (m.kind === "object-entry") return jsoncValueToMatch(m.node.value, m.node.line);
return jsoncValueToMatch(m.node, m.node.line ?? 1);
}
@@ -263,33 +189,20 @@ function jsoncValueToMatch(value: JsoncValue, line: number): OcMatch {
case "null":
return { kind: "leaf", valueText: "null", leafType: "null", line };
}
throw new Error(`unreachable: jsoncValueToMatch kind`);
}
function resolveJsonlToUniversal(ast: JsonlAst, path: OcPath): OcMatch | null {
const m = resolveJsonlOcPath(ast, path);
if (m === null) {
return null;
}
if (m.kind === "root") {
return { kind: "root", ast, line: 1 };
}
if (m.kind === "line") {
return { kind: "node", descriptor: "jsonl-line", line: m.node.line };
}
// Inside-line jsonc parser starts numbering at 1 for each jsonl
// line, so `m.node.line` would always be 1 for any jsonl-resolved
// match. Use `m.line` (the JsonlLine's file-level line) — by
// construction every inside-line node sits on the same file line.
if (m.kind === "object-entry") {
return jsoncValueToMatch(m.node.value, m.line);
}
if (m === null) return null;
if (m.kind === "root") return { kind: "root", ast, line: 1 };
if (m.kind === "line") return { kind: "node", descriptor: "jsonl-line", line: m.node.line };
// Inside-line jsonc nodes always have line=1; use the JsonlLine's
// file-level line instead since every inside-line node sits there.
if (m.kind === "object-entry") return jsoncValueToMatch(m.node.value, m.line);
return jsoncValueToMatch(m.node, m.line);
}
function resolveInsertion(ast: OcAst, info: InsertionInfo): OcMatch | null {
// For an insertion to be valid the parent must resolve to a container
// we know how to extend. Inspect the parent.
switch (ast.kind) {
case "md":
return resolveMdInsertion(ast, info);
@@ -298,25 +211,17 @@ function resolveInsertion(ast: OcAst, info: InsertionInfo): OcMatch | null {
case "jsonl":
return resolveJsonlInsertion(ast, info);
}
return null;
}
function resolveMdInsertion(ast: MdAst, info: InsertionInfo): OcMatch | null {
const p = info.parentPath;
// oc://FILE/+ → file-root insertion (new section)
if (p.section === undefined) {
return { kind: "insertion-point", container: "md-file", line: 1 };
}
// oc://FILE/[frontmatter]/+key → frontmatter add
if (p.section === undefined) return { kind: "insertion-point", container: "md-file", line: 1 };
if (p.section === "[frontmatter]") {
return { kind: "insertion-point", container: "md-frontmatter", line: 1 };
}
// oc://FILE/section/+ → append item to section
if (p.item === undefined && p.field === undefined) {
const m = resolveMdOcPath(ast, p);
if (m === null || m.kind !== "block") {
return null;
}
if (m === null || m.kind !== "block") return null;
return { kind: "insertion-point", container: "md-section", line: m.node.line };
}
return null;
@@ -349,35 +254,20 @@ function resolveJsoncInsertion(ast: JsoncAst, info: InsertionInfo): OcMatch | nu
}
function resolveJsonlInsertion(ast: JsonlAst, info: InsertionInfo): OcMatch | null {
// jsonl insertion only makes sense at the file level: `oc://FILE/+`.
if (info.parentPath.section !== undefined) {
return null;
}
// The only insertion point for jsonl is "after the last line" — the
// line surfaced is `lastLine + 1` so consumers can render correctly.
// jsonl insertion only makes sense at file level (`oc://FILE/+`).
// Surfaced line is lastLine+1 so consumers render correctly.
if (info.parentPath.section !== undefined) return null;
const lastLine = ast.lines.length > 0 ? ast.lines[ast.lines.length - 1].line : 0;
return { kind: "insertion-point", container: "jsonl-file", line: lastLine + 1 };
}
// ---------- Universal set --------------------------------------------------
/**
* Replace or insert at `path` with `value` (always a string).
* Substrate dispatches via `ast.kind` and coerces value at leaves
* based on the existing AST shape at the path location.
*
* For insertion-marker paths (`+`, `+key`, `+nnn`) the value is parsed
* as kind-appropriate content (JSON for jsonc/jsonl; plain text for md).
*
* Returns a structured result; never throws on parser-tolerated input.
* Sentinel-guard violations DO throw `OcEmitSentinelError` (defense in
* depth — refuse to write redacted content even when caller "asked").
* Replace or insert at `path`. Coerces value at leaves based on the
* existing AST shape; for insertion paths value is parsed as
* kind-appropriate content (JSON for jsonc/jsonl; raw text for md).
* Sentinel-guard violations throw `OcEmitSentinelError`.
*/
export function setOcPath(ast: OcAst, path: OcPath, value: string): SetResult {
// Wildcard guard: `setOcPath` writes a single concrete leaf. A pattern
// would be ambiguous (which match wins?) so we reject early. Callers
// who want multi-set should `findOcPaths(...)` then `setOcPath` per
// resolved path — the explicit loop is the right shape.
if (hasWildcard(path)) {
return {
ok: false,
@@ -387,109 +277,72 @@ export function setOcPath(ast: OcAst, path: OcPath, value: string): SetResult {
}
const insertion = detectInsertion(path);
if (insertion !== null) {
return setInsertion(ast, insertion, value);
switch (ast.kind) {
case "md":
return setMdInsertion(ast, insertion, value);
case "jsonc":
return setJsoncInsertion(ast, insertion, value);
case "jsonl":
return setJsonlInsertion(ast, insertion, value);
}
}
switch (ast.kind) {
case "md":
return setMdLeaf(ast, path, value);
case "md": {
const r = setMdOcPath(ast, path, value);
return r.ok ? { ok: true, ast: r.ast } : { ok: false, reason: r.reason };
}
case "jsonc":
return setJsoncLeaf(ast, path, value);
return setStructuredLeaf(ast, path, value, resolveJsoncOcPath, setJsoncOcPath);
case "jsonl":
return setJsonlLeaf(ast, path, value);
return setStructuredLeaf(ast, path, value, resolveJsonlOcPath, setJsonlOcPath, () => {
// jsonl line replacement: value must be JSON for the whole line.
const parsed = tryParseJson(value);
if (parsed === undefined) {
return { ok: false, reason: "parse-error", detail: "line replacement requires JSON value" };
}
const r = setJsonlOcPath(ast, path, jsonToJsoncValue(parsed));
return r.ok ? { ok: true, ast: r.ast } : { ok: false, reason: r.reason };
});
}
throw new Error(`unreachable: setOcPath kind`);
}
function setMdLeaf(ast: MdAst, path: OcPath, value: string): SetResult {
const r = setMdOcPath(ast, path, value);
if (r.ok) {
return { ok: true, ast: r.ast };
}
return { ok: false, reason: r.reason };
}
function setJsoncLeaf(ast: JsoncAst, path: OcPath, value: string): SetResult {
// Inspect the existing leaf to determine target type for coercion.
const existing = resolveJsoncOcPath(ast, path);
if (existing === null) {
return { ok: false, reason: "unresolved" };
}
// Resolve → reject root/line → coerce by existing leaf type → set →
// wrap. The optional `onLine` handles jsonl's whole-line replacement.
function setStructuredLeaf<A extends OcAst, M extends StructuredLeafMatch>(
ast: A,
path: OcPath,
value: string,
resolve: (a: A, p: OcPath) => M | null,
set: (a: A, p: OcPath, c: JsoncValue) => SetOpResult<A>,
onLine?: () => SetResult,
): SetResult {
const existing = resolve(ast, path);
if (existing === null) return { ok: false, reason: "unresolved" };
if (existing.kind === "root") {
return {
ok: false,
reason: "not-writable",
detail: "root replacement is not supported via setOcPath",
};
}
const leafValue = existing.kind === "object-entry" ? existing.node.value : existing.node;
const coerced = coerceJsoncLeaf(value, leafValue);
if (coerced === null) {
return {
ok: false,
reason: "parse-error",
detail: `cannot coerce "${value}" to ${leafValue.kind}`,
};
}
const r = setJsoncOcPath(ast, path, coerced);
if (r.ok) {
return { ok: true, ast: r.ast };
}
return { ok: false, reason: r.reason };
}
function setJsonlLeaf(ast: JsonlAst, path: OcPath, value: string): SetResult {
const existing = resolveJsonlOcPath(ast, path);
if (existing === null) {
return { ok: false, reason: "unresolved" };
}
if (existing.kind === "root") {
return {
ok: false,
reason: "not-writable",
detail: "root replacement is not supported via setOcPath",
};
return { ok: false, reason: "not-writable", detail: "root replacement is not supported via setOcPath" };
}
if (existing.kind === "line") {
// Replacing a whole line — value should be JSON.
const parsed = tryParseJson(value);
if (parsed === undefined) {
return { ok: false, reason: "parse-error", detail: `line replacement requires JSON value` };
}
const r = setJsonlOcPath(ast, path, jsonToJsoncValue(parsed));
if (r.ok) {
return { ok: true, ast: r.ast };
}
return { ok: false, reason: r.reason };
return onLine !== undefined ? onLine() : { ok: false, reason: "not-writable" };
}
// Field on a line — leaf coercion.
const leafValue = existing.kind === "object-entry" ? existing.node.value : existing.node;
const coerced = coerceJsoncLeaf(value, leafValue);
if (coerced === null) {
return {
ok: false,
reason: "parse-error",
detail: `cannot coerce "${value}" to ${leafValue.kind}`,
};
return { ok: false, reason: "parse-error", detail: `cannot coerce "${value}" to ${leafValue.kind}` };
}
const r = setJsonlOcPath(ast, path, coerced);
if (r.ok) {
return { ok: true, ast: r.ast };
}
return { ok: false, reason: r.reason };
const r = set(ast, path, coerced);
return r.ok ? { ok: true, ast: r.ast } : { ok: false, reason: r.reason };
}
function setInsertion(ast: OcAst, info: InsertionInfo, value: string): SetResult {
switch (ast.kind) {
case "md":
return setMdInsertion(ast, info, value);
case "jsonc":
return setJsoncInsertion(ast, info, value);
case "jsonl":
return setJsonlInsertion(ast, info, value);
}
throw new Error(`unreachable: setInsertion kind`);
}
type StructuredLeafMatch =
| { readonly kind: "root" }
| { readonly kind: "line" }
| { readonly kind: "object-entry"; readonly node: { readonly value: JsoncValue } }
| { readonly kind: "value"; readonly node: JsoncValue };
type SetFailureReason = Extract<SetResult, { ok: false }>["reason"];
type SetOpResult<A> =
| { readonly ok: true; readonly ast: A }
| { readonly ok: false; readonly reason: Exclude<SetFailureReason, "wildcard-not-allowed"> };
function setMdInsertion(ast: MdAst, info: InsertionInfo, value: string): SetResult {
const p = info.parentPath;
@@ -508,8 +361,6 @@ function setMdInsertion(ast: MdAst, info: InsertionInfo, value: string): SetResu
line: 0,
bodyText: "",
items: [],
tables: [],
codeBlocks: [],
},
],
};
@@ -589,18 +440,15 @@ function setJsoncInsertion(ast: JsoncAst, info: InsertionInfo, value: string): S
}
if (containerMatch.container === "jsonc-array") {
// index `+0` valid; bare `+` appends; `+key` rejected.
// `+0` indexed; bare `+` appends; `+key` rejected for arrays.
if (typeof info.marker === "object" && info.marker.kind === "keyed") {
return { ok: false, reason: "type-mismatch", detail: "cannot insert by key into array" };
}
return mutateJsoncContainer(ast, info.parentPath, (container) => {
if (container.kind !== "array") {
return null;
}
if (container.kind !== "array") return null;
const items = container.items.slice();
if (info.marker === "+") {
items.push(newJsoncValue);
} else if (typeof info.marker === "object" && info.marker.kind === "indexed") {
if (info.marker === "+") items.push(newJsoncValue);
else if (typeof info.marker === "object" && info.marker.kind === "indexed") {
const idx = Math.min(info.marker.index, items.length);
items.splice(idx, 0, newJsoncValue);
}
@@ -612,18 +460,13 @@ function setJsoncInsertion(ast: JsoncAst, info: InsertionInfo, value: string): S
});
}
// jsonc-object
if (typeof info.marker !== "object" || info.marker.kind !== "keyed") {
return { ok: false, reason: "type-mismatch", detail: "jsonc object insertion requires +key" };
}
const key = info.marker.key;
return mutateJsoncContainer(ast, info.parentPath, (container) => {
if (container.kind !== "object") {
return null;
}
if (container.entries.some((e) => e.key === key)) {
return null;
} // duplicate
if (container.kind !== "object") return null;
if (container.entries.some((e) => e.key === key)) return null; // duplicate
const newEntry: JsoncEntry = { key, value: newJsoncValue, line: 0 };
return {
kind: "object",
@@ -648,32 +491,24 @@ function setJsonlInsertion(ast: JsonlAst, info: InsertionInfo, value: string): S
return { ok: true, ast: appendJsonlLine(ast, jsonToJsoncValue(parsed)) };
}
// ---------- Internal helpers -----------------------------------------------
// Preserve the existing source line on coerced replacements — same
// semantic node, only the bytes change.
function coerceJsoncLeaf(valueText: string, existing: JsoncValue): JsoncValue | null {
// Preserve the existing source line on coerced replacements — the
// semantic node is the same; only its bytes change.
const lineExt = existing.line !== undefined ? { line: existing.line } : {};
if (existing.kind === "string") {
return { kind: "string", value: valueText, ...lineExt };
}
if (existing.kind === "string") return { kind: "string", value: valueText, ...lineExt };
if (existing.kind === "number") {
const n = Number(valueText);
return Number.isFinite(n) ? { kind: "number", value: n, ...lineExt } : null;
}
if (existing.kind === "boolean") {
if (valueText === "true") {
return { kind: "boolean", value: true, ...lineExt };
}
if (valueText === "false") {
return { kind: "boolean", value: false, ...lineExt };
}
if (valueText === "true") return { kind: "boolean", value: true, ...lineExt };
if (valueText === "false") return { kind: "boolean", value: false, ...lineExt };
return null;
}
if (existing.kind === "null") {
return valueText === "null" ? { kind: "null", ...lineExt } : null;
}
// Object/array leaf — caller should use insertion or full-replace path.
// Object/array — caller should use insertion or full-replace.
return null;
}
@@ -686,21 +521,11 @@ function tryParseJson(value: string): unknown {
}
function jsonToJsoncValue(v: unknown): JsoncValue {
// Synthetic values omit `line` (optional in the type) — the parser
// alone is the source of truth for line metadata. Insertions /
// mutations get the parent's line for surfacing in lint findings.
if (v === null) {
return { kind: "null" };
}
if (typeof v === "string") {
return { kind: "string", value: v };
}
if (typeof v === "number") {
return { kind: "number", value: v };
}
if (typeof v === "boolean") {
return { kind: "boolean", value: v };
}
// Synthetic values omit `line` — only the parser sets line metadata.
if (v === null) return { kind: "null" };
if (typeof v === "string") return { kind: "string", value: v };
if (typeof v === "number") return { kind: "number", value: v };
if (typeof v === "boolean") return { kind: "boolean", value: v };
if (Array.isArray(v)) {
return { kind: "array", items: v.map(jsonToJsoncValue) };
}
@@ -715,7 +540,7 @@ function jsonToJsoncValue(v: unknown): JsoncValue {
})),
};
}
// Unsupported (undefined / function / symbol). JSON.parse never produces these.
// JSON.parse never produces undefined / function / symbol.
throw new Error(`unsupported JSON value type: ${typeof v}`);
}
@@ -724,15 +549,9 @@ function mutateJsoncContainer(
parentPath: OcPath,
mutate: (container: JsoncValue) => JsoncValue | null,
): SetResult {
if (ast.root === null) {
return { ok: false, reason: "no-root" };
}
if (ast.root === null) return { ok: false, reason: "no-root" };
// Quote-aware split so JSONC insertion under a key containing
// `/`, `.`, or other special chars works through the parent path.
// `resolveJsoncOcPath` validates with quote-aware splitting; the
// mutation walker MUST use the same predicate or insertion validity
// can be reported and then fail as unresolved.
// Quote-aware split so insertion under a key with `/`/`.`/etc. works.
const segments: string[] = [];
if (parentPath.section !== undefined) {
segments.push(...splitRespectingBrackets(parentPath.section, "."));
@@ -746,9 +565,7 @@ function mutateJsoncContainer(
const newRoot =
segments.length === 0 ? mutate(ast.root) : mutateAt(ast.root, segments, 0, mutate);
if (newRoot === null) {
return { ok: false, reason: "unresolved" };
}
if (newRoot === null) return { ok: false, reason: "unresolved" };
const next: JsoncAst = { kind: "jsonc", raw: "", root: newRoot };
return { ok: true, ast: { ...next, raw: emitJsonc(next, { mode: "render" }) } };
@@ -761,26 +578,17 @@ function mutateAt(
mutate: (container: JsoncValue) => JsoncValue | null,
): JsoncValue | null {
const seg = segments[i];
if (seg === undefined) {
return mutate(current);
}
if (seg.length === 0) {
return null;
}
if (seg === undefined) return mutate(current);
if (seg.length === 0) return null;
if (current.kind === "object") {
// Match `setJsoncOcPath`'s lookup: AST entry keys are unquoted,
// so strip quoting from the path segment before comparing.
// AST keys are unquoted; strip quotes from the path segment.
const lookupKey = isQuotedSeg(seg) ? unquoteSeg(seg) : seg;
const idx = current.entries.findIndex((e) => e.key === lookupKey);
if (idx === -1) {
return null;
}
if (idx === -1) return null;
const child = current.entries[idx];
const replaced = mutateAt(child.value, segments, i + 1, mutate);
if (replaced === null) {
return null;
}
if (replaced === null) return null;
const newEntries = current.entries.slice();
newEntries[idx] = { ...child, value: replaced };
return {
@@ -791,14 +599,10 @@ function mutateAt(
}
if (current.kind === "array") {
const idx = Number(seg);
if (!Number.isInteger(idx) || idx < 0 || idx >= current.items.length) {
return null;
}
if (!Number.isInteger(idx) || idx < 0 || idx >= current.items.length) return null;
const child = current.items[idx];
const replaced = mutateAt(child, segments, i + 1, mutate);
if (replaced === null) {
return null;
}
if (replaced === null) return null;
const newItems = current.items.slice();
newItems[idx] = replaced;
return {
@@ -820,33 +624,21 @@ function rebuildMdRaw(ast: MdAst): MdAst {
parts.push("---");
}
if (ast.preamble.length > 0) {
if (parts.length > 0) {
parts.push("");
}
if (parts.length > 0) parts.push("");
parts.push(ast.preamble);
}
for (const block of ast.blocks) {
if (parts.length > 0) {
parts.push("");
}
if (parts.length > 0) parts.push("");
parts.push(`## ${block.heading}`);
if (block.bodyText.length > 0) {
parts.push(block.bodyText);
}
if (block.bodyText.length > 0) parts.push(block.bodyText);
}
// Suppress unused — emitJsonl is imported for symmetry but only emitJsonc
// is used in the jsonc mutation helper.
void emitJsonl;
return { ...ast, raw: parts.join("\n") };
}
function formatFrontmatterValue(value: string): string {
if (value.length === 0) {
return '""';
}
if (/[:#&*?|<>=!%@`,[\]{}\r\n]/.test(value)) {
return JSON.stringify(value);
}
if (value.length === 0) return '""';
if (/[:#&*?|<>=!%@`,[\]{}\r\n]/.test(value)) return JSON.stringify(value);
return value;
}