From bc735f4fde6fb2a4ee288d3d83d1176666ec6115 Mon Sep 17 00:00:00 2001 From: Gio Della-Libera Date: Thu, 7 May 2026 22:26:28 -0700 Subject: [PATCH] feat(workspace): oc-path addressing substrate + openclaw path CLI (md/jsonc/jsonl/yaml) (#78678) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements #78051 — oc:// addressing substrate for workspace files. New src/oc-path/ substrate (parser/formatter, per-kind parse+emit for md/jsonc/jsonl/yaml, universal resolveOcPath/setOcPath/findOcPaths verbs, sentinel emit guard) + openclaw path resolve|find|set|validate|emit CLI + docs/cli/path.md reference page + CHANGELOG entry. Co-authored-by: giodl73-repo <235387111+giodl73-repo@users.noreply.github.com> Co-authored-by: galiniliev <5711535+galiniliev@users.noreply.github.com> --- .gitignore | 4 + CHANGELOG.md | 1 + docs/cli/index.md | 2 +- docs/cli/path.md | 121 ++ src/cli/path-cli.ts | 113 ++ src/cli/program/register.subclis-core.ts | 5 + src/cli/program/subcli-descriptors.ts | 5 + src/commands/path.test.ts | 291 +++++ src/commands/path.ts | 537 ++++++++ src/oc-path/ast.ts | 125 ++ src/oc-path/dispatch.ts | 31 + src/oc-path/edit.ts | 153 +++ src/oc-path/emit.ts | 137 ++ src/oc-path/find.ts | 852 +++++++++++++ src/oc-path/index.ts | 133 ++ src/oc-path/jsonc/ast.ts | 49 + src/oc-path/jsonc/edit.ts | 184 +++ src/oc-path/jsonc/emit.ts | 99 ++ src/oc-path/jsonc/parse.ts | 311 +++++ src/oc-path/jsonc/resolve.ts | 122 ++ src/oc-path/jsonl/ast.ts | 49 + src/oc-path/jsonl/edit.ts | 228 ++++ src/oc-path/jsonl/emit.ts | 100 ++ src/oc-path/jsonl/parse.ts | 74 ++ src/oc-path/jsonl/resolve.ts | 157 +++ src/oc-path/oc-path.ts | 1114 +++++++++++++++++ src/oc-path/parse.ts | 294 +++++ src/oc-path/resolve.ts | 113 ++ src/oc-path/sentinel.ts | 63 + src/oc-path/slug.ts | 43 + src/oc-path/tests/edit.test.ts | 99 ++ src/oc-path/tests/emit.test.ts | 108 ++ src/oc-path/tests/find.test.ts | 707 +++++++++++ src/oc-path/tests/fixtures/real/AGENTS.md | 17 + src/oc-path/tests/fixtures/real/BOOTSTRAP.md | 17 + src/oc-path/tests/fixtures/real/HEARTBEAT.md | 16 + src/oc-path/tests/fixtures/real/IDENTITY.md | 19 + src/oc-path/tests/fixtures/real/MEMORY.md | 18 + src/oc-path/tests/fixtures/real/SKILL.md | 38 + src/oc-path/tests/fixtures/real/SOUL.md | 17 + src/oc-path/tests/fixtures/real/TOOLS.md | 21 + src/oc-path/tests/fixtures/real/USER.md | 16 + src/oc-path/tests/jsonc/edit.test.ts | 185 +++ src/oc-path/tests/jsonc/emit.test.ts | 94 ++ src/oc-path/tests/jsonc/parse.test.ts | 144 +++ src/oc-path/tests/jsonc/resolve.test.ts | 76 ++ src/oc-path/tests/jsonl/edit.test.ts | 242 ++++ src/oc-path/tests/jsonl/emit.test.ts | 101 ++ src/oc-path/tests/jsonl/parse.test.ts | 43 + src/oc-path/tests/jsonl/resolve.test.ts | 99 ++ src/oc-path/tests/oc-path.test.ts | 136 ++ src/oc-path/tests/parse.test.ts | 203 +++ src/oc-path/tests/resolve.test.ts | 100 ++ .../scenarios/append-multi-agent.test.ts | 120 ++ .../tests/scenarios/byte-fidelity.test.ts | 179 +++ .../tests/scenarios/code-blocks.test.ts | 97 ++ .../tests/scenarios/cross-cutting.test.ts | 139 ++ .../scenarios/cross-kind-properties.test.ts | 153 +++ .../scenarios/edit-emit-roundtrip.test.ts | 161 +++ .../tests/scenarios/frontmatter-edges.test.ts | 140 +++ .../tests/scenarios/h2-block-split.test.ts | 149 +++ src/oc-path/tests/scenarios/items.test.ts | 146 +++ .../scenarios/jsonc-byte-fidelity.test.ts | 188 +++ .../scenarios/jsonc-resolver-edges.test.ts | 132 ++ .../scenarios/jsonl-byte-fidelity.test.ts | 125 ++ .../scenarios/jsonl-resolver-edges.test.ts | 125 ++ .../tests/scenarios/malformed-input.test.ts | 155 +++ .../scenarios/oc-path-parse-edges.test.ts | 252 ++++ .../scenarios/oc-path-resolver-edges.test.ts | 235 ++++ .../tests/scenarios/perf-determinism.test.ts | 127 ++ src/oc-path/tests/scenarios/pitfalls.test.ts | 624 +++++++++ .../scenarios/real-world-fixtures.test.ts | 140 +++ .../scenarios/roundtrip-property.test.ts | 155 +++ .../scenarios/sentinel-cross-kind.test.ts | 177 +++ .../tests/scenarios/sentinel-guard.test.ts | 180 +++ src/oc-path/tests/scenarios/tables.test.ts | 154 +++ src/oc-path/tests/sentinel.test.ts | 36 + src/oc-path/tests/slug.test.ts | 50 + src/oc-path/tests/universal.test.ts | 475 +++++++ src/oc-path/tests/yaml/yaml-kind.test.ts | 248 ++++ src/oc-path/universal.ts | 869 +++++++++++++ src/oc-path/yaml/ast.ts | 37 + src/oc-path/yaml/edit.ts | 236 ++++ src/oc-path/yaml/emit.ts | 49 + src/oc-path/yaml/parse.ts | 48 + src/oc-path/yaml/resolve.ts | 147 +++ 86 files changed, 14273 insertions(+), 1 deletion(-) create mode 100644 docs/cli/path.md create mode 100644 src/cli/path-cli.ts create mode 100644 src/commands/path.test.ts create mode 100644 src/commands/path.ts create mode 100644 src/oc-path/ast.ts create mode 100644 src/oc-path/dispatch.ts create mode 100644 src/oc-path/edit.ts create mode 100644 src/oc-path/emit.ts create mode 100644 src/oc-path/find.ts create mode 100644 src/oc-path/index.ts create mode 100644 src/oc-path/jsonc/ast.ts create mode 100644 src/oc-path/jsonc/edit.ts create mode 100644 src/oc-path/jsonc/emit.ts create mode 100644 src/oc-path/jsonc/parse.ts create mode 100644 src/oc-path/jsonc/resolve.ts create mode 100644 src/oc-path/jsonl/ast.ts create mode 100644 src/oc-path/jsonl/edit.ts create mode 100644 src/oc-path/jsonl/emit.ts create mode 100644 src/oc-path/jsonl/parse.ts create mode 100644 src/oc-path/jsonl/resolve.ts create mode 100644 src/oc-path/oc-path.ts create mode 100644 src/oc-path/parse.ts create mode 100644 src/oc-path/resolve.ts create mode 100644 src/oc-path/sentinel.ts create mode 100644 src/oc-path/slug.ts create mode 100644 src/oc-path/tests/edit.test.ts create mode 100644 src/oc-path/tests/emit.test.ts create mode 100644 src/oc-path/tests/find.test.ts create mode 100644 src/oc-path/tests/fixtures/real/AGENTS.md create mode 100644 src/oc-path/tests/fixtures/real/BOOTSTRAP.md create mode 100644 src/oc-path/tests/fixtures/real/HEARTBEAT.md create mode 100644 src/oc-path/tests/fixtures/real/IDENTITY.md create mode 100644 src/oc-path/tests/fixtures/real/MEMORY.md create mode 100644 src/oc-path/tests/fixtures/real/SKILL.md create mode 100644 src/oc-path/tests/fixtures/real/SOUL.md create mode 100644 src/oc-path/tests/fixtures/real/TOOLS.md create mode 100644 src/oc-path/tests/fixtures/real/USER.md create mode 100644 src/oc-path/tests/jsonc/edit.test.ts create mode 100644 src/oc-path/tests/jsonc/emit.test.ts create mode 100644 src/oc-path/tests/jsonc/parse.test.ts create mode 100644 src/oc-path/tests/jsonc/resolve.test.ts create mode 100644 src/oc-path/tests/jsonl/edit.test.ts create mode 100644 src/oc-path/tests/jsonl/emit.test.ts create mode 100644 src/oc-path/tests/jsonl/parse.test.ts create mode 100644 src/oc-path/tests/jsonl/resolve.test.ts create mode 100644 src/oc-path/tests/oc-path.test.ts create mode 100644 src/oc-path/tests/parse.test.ts create mode 100644 src/oc-path/tests/resolve.test.ts create mode 100644 src/oc-path/tests/scenarios/append-multi-agent.test.ts create mode 100644 src/oc-path/tests/scenarios/byte-fidelity.test.ts create mode 100644 src/oc-path/tests/scenarios/code-blocks.test.ts create mode 100644 src/oc-path/tests/scenarios/cross-cutting.test.ts create mode 100644 src/oc-path/tests/scenarios/cross-kind-properties.test.ts create mode 100644 src/oc-path/tests/scenarios/edit-emit-roundtrip.test.ts create mode 100644 src/oc-path/tests/scenarios/frontmatter-edges.test.ts create mode 100644 src/oc-path/tests/scenarios/h2-block-split.test.ts create mode 100644 src/oc-path/tests/scenarios/items.test.ts create mode 100644 src/oc-path/tests/scenarios/jsonc-byte-fidelity.test.ts create mode 100644 src/oc-path/tests/scenarios/jsonc-resolver-edges.test.ts create mode 100644 src/oc-path/tests/scenarios/jsonl-byte-fidelity.test.ts create mode 100644 src/oc-path/tests/scenarios/jsonl-resolver-edges.test.ts create mode 100644 src/oc-path/tests/scenarios/malformed-input.test.ts create mode 100644 src/oc-path/tests/scenarios/oc-path-parse-edges.test.ts create mode 100644 src/oc-path/tests/scenarios/oc-path-resolver-edges.test.ts create mode 100644 src/oc-path/tests/scenarios/perf-determinism.test.ts create mode 100644 src/oc-path/tests/scenarios/pitfalls.test.ts create mode 100644 src/oc-path/tests/scenarios/real-world-fixtures.test.ts create mode 100644 src/oc-path/tests/scenarios/roundtrip-property.test.ts create mode 100644 src/oc-path/tests/scenarios/sentinel-cross-kind.test.ts create mode 100644 src/oc-path/tests/scenarios/sentinel-guard.test.ts create mode 100644 src/oc-path/tests/scenarios/tables.test.ts create mode 100644 src/oc-path/tests/sentinel.test.ts create mode 100644 src/oc-path/tests/slug.test.ts create mode 100644 src/oc-path/tests/universal.test.ts create mode 100644 src/oc-path/tests/yaml/yaml-kind.test.ts create mode 100644 src/oc-path/universal.ts create mode 100644 src/oc-path/yaml/ast.ts create mode 100644 src/oc-path/yaml/edit.ts create mode 100644 src/oc-path/yaml/emit.ts create mode 100644 src/oc-path/yaml/parse.ts create mode 100644 src/oc-path/yaml/resolve.ts diff --git a/.gitignore b/.gitignore index 7d420cdcc0e..f99a22c1091 100644 --- a/.gitignore +++ b/.gitignore @@ -95,6 +95,10 @@ docs/internal/ tmp/ IDENTITY.md USER.md +# Exception: oc-path real-world test fixtures need to be tracked even +# though the bare names match the local-untracked rule above. +!src/oc-path/tests/fixtures/real/IDENTITY.md +!src/oc-path/tests/fixtures/real/USER.md *.tgz *.tar.gz *.zip diff --git a/CHANGELOG.md b/CHANGELOG.md index c7922447c81..d76f1626782 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ Docs: https://docs.openclaw.ai - Control UI/usage: add transcript-backed historical lineage rollups for rotated logical sessions, with current-instance vs historical-lineage scope controls and long-range presets so usage history stays visible after restarts and updates. Fixes #50701. Thanks @dev-gideon-llc and @BunsDev. - Agents/failover: harden state-aware lane suspension by persisting quota resume transitions, restoring configured lane concurrency, preserving non-quota failure reasons, and exporting model failover events through diagnostics OTLP. Thanks @BunsDev. - Channels/streaming: make progress draft labels scroll away with other progress lines, render structured tool rows as compact emoji/title/details, show web-search queries from provider-native argument shapes, and skip empty Discord apply-patch starts until a patch summary exists. (#79146) +- Workspace/oc-path: add the `oc://` addressing substrate (`src/oc-path/`) — a universal, kind-dispatched path scheme for addressing leaves and nodes inside markdown, jsonc, jsonl, and yaml workspace files, with `parseOcPath`/`formatOcPath`, per-kind `parseXxx`/`emitXxx`, universal `resolveOcPath`/`setOcPath`/`findOcPaths` verbs, the `__OPENCLAW_REDACTED__` sentinel emit guard, and the new `openclaw path resolve|find|set|validate|emit` CLI for shell-level inspection and surgical edits. Implements #78051. (#78678) Thanks @giodl73-repo. - Telegram: preserve the channel-specific 10-option poll cap in the unified outbound adapter so over-limit polls are rejected before send. (#78762) Thanks @obviyus. - Slack: route handled top-level channel turns in implicit-conversation channels to thread-scoped sessions when Slack reply threading is enabled, keeping the root turn and later thread replies on one OpenClaw session. (#78522) Thanks @zeroth-blip. - Telegram: re-probe the primary fetch transport after repeated sticky fallback success so transient IPv4 or pinned-IP fallback promotion can recover without a gateway restart. Fixes #77088. (#77157) Thanks @MkDev11. diff --git a/docs/cli/index.md b/docs/cli/index.md index 3ff71cc2ac3..9e54a7f850a 100644 --- a/docs/cli/index.md +++ b/docs/cli/index.md @@ -24,7 +24,7 @@ apply across the CLI. | Network and nodes | [`directory`](/cli/directory) · [`nodes`](/cli/nodes) · [`devices`](/cli/devices) · [`node`](/cli/node) | | Runtime and sandbox | [`approvals`](/cli/approvals) · `exec-policy` (see [`approvals`](/cli/approvals)) · [`sandbox`](/cli/sandbox) · [`tui`](/cli/tui) · `chat`/`terminal` (aliases for [`tui --local`](/cli/tui)) · [`browser`](/cli/browser) | | Automation | [`cron`](/cli/cron) · [`tasks`](/cli/tasks) · [`hooks`](/cli/hooks) · [`webhooks`](/cli/webhooks) | -| Discovery and docs | [`dns`](/cli/dns) · [`docs`](/cli/docs) | +| Discovery and docs | [`dns`](/cli/dns) · [`docs`](/cli/docs) · [`path`](/cli/path) | | Pairing and channels | [`pairing`](/cli/pairing) · [`qr`](/cli/qr) · [`channels`](/cli/channels) | | Security and plugins | [`security`](/cli/security) · [`secrets`](/cli/secrets) · [`skills`](/cli/skills) · [`plugins`](/cli/plugins) · [`proxy`](/cli/proxy) | | Legacy aliases | [`daemon`](/cli/daemon) (gateway service) · [`clawbot`](/cli/clawbot) (namespace) | diff --git a/docs/cli/path.md b/docs/cli/path.md new file mode 100644 index 00000000000..ae44bd265d0 --- /dev/null +++ b/docs/cli/path.md @@ -0,0 +1,121 @@ +--- +summary: "CLI reference for `openclaw path` (inspect and edit workspace files via the `oc://` addressing scheme)" +read_when: + - You want to read or write a leaf inside a workspace file from the terminal + - You're scripting against workspace state and want a stable, kind-agnostic addressing scheme + - You're debugging a `oc://` path (validate the syntax, see what it resolves to) +title: "Path" +--- + +# `openclaw path` + +Shell-level access to the `oc://` addressing substrate — one universal, +kind-dispatched path scheme for inspecting and surgically editing workspace +files (markdown, jsonc, jsonl, yaml). Self-hosters and editor extensions use +it to read or write a single leaf inside a workspace file without scripting +against the SDK directly. + +## Subcommands + +| Subcommand | Purpose | +| ----------------------- | ---------------------------------------------------------------------------- | +| `resolve ` | Print the match at the path (or "not found"). | +| `find ` | Enumerate matches for a wildcard / predicate path. | +| `set ` | Write a leaf at the path. Supports `--dry-run`. | +| `validate ` | Parse-only — print structural breakdown (file / section / item / field). | +| `emit ` | Round-trip a file through `parseXxx` + `emitXxx` (byte-fidelity diagnostic). | + +## Global flags + +| Flag | Purpose | +| --------------- | ------------------------------------------------------------------------ | +| `--cwd ` | Resolve the file slot against this directory (default: `process.cwd()`). | +| `--file ` | Override the file slot's resolved path (absolute access). | +| `--json` | Force JSON output (default when stdout is not a TTY). | +| `--human` | Force human output (default when stdout is a TTY). | +| `--dry-run` | (only on `set`) print the bytes that would be written without writing. | + +## `oc://` syntax + +``` +oc://FILE/SECTION/ITEM/FIELD?session=SCOPE +``` + +Slot rules — `field` requires `item`, `item` requires `section`. Across all +four slots: + +- **Quoted segments** — `"a/b.c"` survives `/` and `.` separators. + `"\\"` and `"\""` are the only escapes inside quotes. + The file slot is also quote-aware: `oc://"skills/email-drafter"/Tools/-1` + treats `skills/email-drafter` as a single file path. +- **Predicates** — `[k=v]`, `[k!=v]`, `[k*=v]`, `[k^=v]`, `[k$=v]`, + `[kv]`, `[k>=v]`. +- **Unions** — `{a,b,c}` matches any of the alternatives. +- **Wildcards** — `*` (single sub-segment) and `**` (zero-or-more, + recursive). `find` accepts these; `resolve` and `set` reject them as + ambiguous. +- **Positional** — `$first`, `$last`, `-N` (Nth from end). +- **Ordinal** — `#N` for Nth match. +- **Insertion markers** — `+`, `+key`, `+nnn` for keyed / indexed + insertion (use with `set`). +- **Session scope** — `?session=cron:daily` etc. Orthogonal to slot + nesting. + +Reserved characters (`?`, `&`, `%`) outside quoted, predicate, or union +segments are rejected. Control characters (U+0000–U+001F, U+007F) are +rejected anywhere. + +## Examples + +```bash +# Validate a path (no filesystem access) +openclaw path validate 'oc://AGENTS.md/Tools/-1/risk' + +# Read a leaf +openclaw path resolve 'oc://gateway.jsonc/version' + +# Wildcard search +openclaw path find 'oc://session.jsonl/*/event' --file ./logs/session.jsonl + +# Dry-run a write +openclaw path set 'oc://gateway.jsonc/version' '2.0' --dry-run + +# Apply the write +openclaw path set 'oc://gateway.jsonc/version' '2.0' + +# Byte-fidelity round-trip (diagnostic) +openclaw path emit ./AGENTS.md +``` + +## Exit codes + +| Code | Meaning | +| ---- | -------------------------------------------------------------------------- | +| `0` | Success. (`resolve` / `find`: at least one match. `set`: write succeeded.) | +| `1` | No match, or `set` rejected by the substrate (no system-level error). | +| `2` | Argument or parse error. | + +## Output mode + +`openclaw path` is TTY-aware: human-readable output on a terminal, JSON when +stdout is piped or redirected. `--json` and `--human` override the +auto-detection. + +## Notes + +- `set` writes raw bytes through the substrate's emit path, which applies the + redaction-sentinel guard automatically. A leaf carrying + `__OPENCLAW_REDACTED__` (verbatim or as a substring) is refused at write + time. +- `set` on a JSONC file currently re-renders the file (drops comments and + trailing-comma formatting) when it mutates a leaf. Read-path round-trip is + byte-identical. A byte-splice editor that preserves comments through + writes is planned as a follow-up. +- `path` does not know about LKG. If the file is LKG-tracked, the next + observe call decides whether to promote / recover. `set --batch` for + atomic multi-set through the LKG promote/recover lifecycle is planned + alongside the LKG-recovery substrate. + +## Related + +- [CLI reference](/cli) diff --git a/src/cli/path-cli.ts b/src/cli/path-cli.ts new file mode 100644 index 00000000000..ee10ffacb08 --- /dev/null +++ b/src/cli/path-cli.ts @@ -0,0 +1,113 @@ +import type { Command } from "commander"; +import { + pathEmitCommand, + pathFindCommand, + pathResolveCommand, + pathSetCommand, + pathValidateCommand, + type PathCommandOptions, +} from "../commands/path.js"; +import { defaultRuntime } from "../runtime.js"; +import { formatDocsLink } from "../terminal/links.js"; +import { theme } from "../terminal/theme.js"; +import { runCommandWithRuntime } from "./cli-utils.js"; +import { applyParentDefaultHelpAction } from "./program/parent-default-help.js"; + +interface RawPathOptions { + json?: boolean; + human?: boolean; + cwd?: string; + file?: string; + dryRun?: boolean; +} + +function normalize(opts: RawPathOptions): PathCommandOptions { + return { + json: opts.json, + human: opts.human, + cwd: opts.cwd, + file: opts.file, + dryRun: opts.dryRun, + }; +} + +export function registerPathCli(program: Command) { + const path = program + .command("path") + .description("Inspect and edit workspace files via the oc:// addressing scheme") + .addHelpText( + "after", + () => + `\n${theme.muted("Docs:")} ${formatDocsLink("/cli/path", "docs.openclaw.ai/cli/path")}\n`, + ); + + path + .command("resolve") + .description("Print the match at an oc:// path") + .argument("", "oc:// path to resolve") + .option("--json", "Force JSON output") + .option("--human", "Force human output") + .option("--cwd ", "Resolve file slot against this directory") + .option("--file ", "Override the file slot's resolved path (absolute access)") + .action(async (pathStr: string, opts: RawPathOptions) => { + await runCommandWithRuntime(defaultRuntime, async () => { + await pathResolveCommand(pathStr, normalize(opts), defaultRuntime); + }); + }); + + path + .command("find") + .description("Enumerate matches for a wildcard / predicate oc:// pattern") + .argument("", "oc:// pattern (supports * and **)") + .option("--json", "Force JSON output") + .option("--human", "Force human output") + .option("--cwd ", "Resolve file slot against this directory") + .option("--file ", "Override the file slot's resolved path (absolute access)") + .action(async (patternStr: string, opts: RawPathOptions) => { + await runCommandWithRuntime(defaultRuntime, async () => { + await pathFindCommand(patternStr, normalize(opts), defaultRuntime); + }); + }); + + path + .command("set") + .description("Write a leaf value at an oc:// path") + .argument("", "oc:// path to write") + .argument("", "string value to write") + .option("--dry-run", "Print bytes without writing") + .option("--json", "Force JSON output") + .option("--human", "Force human output") + .option("--cwd ", "Resolve file slot against this directory") + .option("--file ", "Override the file slot's resolved path (absolute access)") + .action(async (pathStr: string, value: string, opts: RawPathOptions) => { + await runCommandWithRuntime(defaultRuntime, async () => { + await pathSetCommand(pathStr, value, normalize(opts), defaultRuntime); + }); + }); + + path + .command("validate") + .description("Parse an oc:// path and print its slot structure") + .argument("", "oc:// path to validate") + .option("--json", "Force JSON output") + .option("--human", "Force human output") + .action((pathStr: string, opts: RawPathOptions) => { + pathValidateCommand(pathStr, normalize(opts), defaultRuntime); + }); + + path + .command("emit") + .description("Round-trip a file through parseXxx + emitXxx (byte-fidelity diagnostic)") + .argument("", "Path to a workspace file (md / jsonc / jsonl / yaml)") + .option("--cwd ", "Resolve against this directory (default: process.cwd())") + .option("--file ", "Override the file's resolved path (absolute access)") + .option("--json", "Force JSON output") + .option("--human", "Force human output") + .action(async (fileArg: string, opts: RawPathOptions) => { + await runCommandWithRuntime(defaultRuntime, async () => { + await pathEmitCommand(fileArg, normalize(opts), defaultRuntime); + }); + }); + + applyParentDefaultHelpAction(path); +} diff --git a/src/cli/program/register.subclis-core.ts b/src/cli/program/register.subclis-core.ts index 04c64af93c4..f9fd77796b3 100644 --- a/src/cli/program/register.subclis-core.ts +++ b/src/cli/program/register.subclis-core.ts @@ -167,6 +167,11 @@ const entrySpecs: readonly CommandGroupDescriptorSpec[] = [ loadModule: () => import("../docs-cli.js"), exportName: "registerDocsCli", }, + { + commandNames: ["path"], + loadModule: () => import("../path-cli.js"), + exportName: "registerPathCli", + }, { commandNames: ["qa"], loadModule: loadPrivateQaCliModule, diff --git a/src/cli/program/subcli-descriptors.ts b/src/cli/program/subcli-descriptors.ts index 71ce5e7f4ea..9d6449c3cf1 100644 --- a/src/cli/program/subcli-descriptors.ts +++ b/src/cli/program/subcli-descriptors.ts @@ -93,6 +93,11 @@ const subCliCommandCatalog = defineCommandDescriptorCatalog([ description: "Search the live OpenClaw docs", hasSubcommands: false, }, + { + name: "path", + description: "Inspect and edit workspace files via the oc:// addressing scheme", + hasSubcommands: true, + }, { name: "qa", description: "Run QA scenarios and launch the private QA debugger UI", diff --git a/src/commands/path.test.ts b/src/commands/path.test.ts new file mode 100644 index 00000000000..e6b171479f2 --- /dev/null +++ b/src/commands/path.test.ts @@ -0,0 +1,291 @@ +/** + * Smoke tests for the `openclaw path` CLI handlers. + * + * Tests invoke each subcommand handler directly with a capturing + * `OutputRuntimeEnv` — no commander wiring, no child process spawn. + * Assertions inspect captured stdout/stderr and the exit code the + * handler set on the runtime. + */ +import { mkdtempSync, readFileSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import type { OutputRuntimeEnv } from "../runtime.js"; +import { + pathEmitCommand, + pathFindCommand, + pathResolveCommand, + pathSetCommand, + pathValidateCommand, +} from "./path.js"; + +interface TestRuntime extends OutputRuntimeEnv { + readonly stdout: string[]; + readonly stderr: string[]; + exitCode: number; +} + +function createTestRuntime(): TestRuntime { + const stdout: string[] = []; + const stderr: string[] = []; + const runtime: TestRuntime = { + stdout, + stderr, + exitCode: 0, + log: (...args) => { + stdout.push(args.map((a) => (typeof a === "string" ? a : String(a))).join(" ")); + }, + error: (...args) => { + stderr.push(args.map((a) => (typeof a === "string" ? a : String(a))).join(" ")); + }, + writeStdout: (value) => { + stdout.push(value); + }, + writeJson: (value, space = 2) => { + stdout.push(JSON.stringify(value, null, space > 0 ? space : undefined)); + }, + exit: (code) => { + runtime.exitCode = code; + }, + }; + return runtime; +} + +const stdoutText = (rt: TestRuntime): string => rt.stdout.join("\n"); +const stderrText = (rt: TestRuntime): string => rt.stderr.join("\n"); + +describe("openclaw path CLI", () => { + let workspaceDir: string; + + beforeEach(() => { + workspaceDir = mkdtempSync(join(tmpdir(), "oc-path-cli-")); + }); + afterEach(() => { + // mkdtemp leaves a small dir; OS will GC it. Skip cleanup to keep + // the test deterministic on Windows where rmdir flakes. + }); + + describe("validate", () => { + it("CLI-V01 accepts a well-formed path with --json", () => { + const rt = createTestRuntime(); + pathValidateCommand("oc://AGENTS.md/Tools/-1", { json: true }, rt); + expect(rt.exitCode).toBe(0); + const out = JSON.parse(stdoutText(rt)); + expect(out.valid).toBe(true); + expect(out.structure.file).toBe("AGENTS.md"); + expect(out.structure.section).toBe("Tools"); + }); + + it("CLI-V02 rejects a malformed path with code 1", () => { + const rt = createTestRuntime(); + pathValidateCommand("oc://X/a\x00b", { json: true }, rt); + expect(rt.exitCode).toBe(1); + const out = JSON.parse(stdoutText(rt)); + expect(out.valid).toBe(false); + }); + + it("CLI-V03 missing argument returns 2", () => { + const rt = createTestRuntime(); + pathValidateCommand(undefined, { json: true }, rt); + expect(rt.exitCode).toBe(2); + expect(stderrText(rt)).toContain("missing"); + }); + }); + + describe("resolve", () => { + it("CLI-R01 finds a leaf in jsonc and prints it", async () => { + const filePath = join(workspaceDir, "gateway.jsonc"); + writeFileSync(filePath, '{ "version": "1.0" }', "utf-8"); + const rt = createTestRuntime(); + await pathResolveCommand( + "oc://gateway.jsonc/version", + { cwd: workspaceDir, json: true }, + rt, + ); + expect(rt.exitCode).toBe(0); + const out = JSON.parse(stdoutText(rt)); + expect(out.resolved).toBe(true); + expect(out.match.kind).toBe("leaf"); + expect(out.match.valueText).toBe("1.0"); + }); + + it("CLI-R02 returns 1 for not-found path", async () => { + const filePath = join(workspaceDir, "gateway.jsonc"); + writeFileSync(filePath, '{ "version": "1.0" }', "utf-8"); + const rt = createTestRuntime(); + await pathResolveCommand( + "oc://gateway.jsonc/missing", + { cwd: workspaceDir, json: true }, + rt, + ); + expect(rt.exitCode).toBe(1); + const out = JSON.parse(stdoutText(rt)); + expect(out.resolved).toBe(false); + }); + + it("CLI-R03 missing argument returns 2", async () => { + const rt = createTestRuntime(); + await pathResolveCommand(undefined, { json: true }, rt); + expect(rt.exitCode).toBe(2); + expect(stderrText(rt)).toContain("missing"); + }); + }); + + describe("set", () => { + it("CLI-S01 writes new bytes when path resolves", async () => { + const filePath = join(workspaceDir, "gateway.jsonc"); + writeFileSync(filePath, '{ "version": "1.0" }', "utf-8"); + const rt = createTestRuntime(); + await pathSetCommand( + "oc://gateway.jsonc/version", + "2.0", + { cwd: workspaceDir, json: true }, + rt, + ); + expect(rt.exitCode).toBe(0); + const after = readFileSync(filePath, "utf-8"); + expect(after).toContain('"2.0"'); + }); + + it("CLI-S02 --dry-run does not write to disk", async () => { + const filePath = join(workspaceDir, "gateway.jsonc"); + const before = '{ "version": "1.0" }'; + writeFileSync(filePath, before, "utf-8"); + const rt = createTestRuntime(); + await pathSetCommand( + "oc://gateway.jsonc/version", + "2.0", + { cwd: workspaceDir, json: true, dryRun: true }, + rt, + ); + expect(rt.exitCode).toBe(0); + const out = JSON.parse(stdoutText(rt)); + expect(out.dryRun).toBe(true); + expect(out.bytes).toContain('"2.0"'); + // File on disk unchanged. + expect(readFileSync(filePath, "utf-8")).toBe(before); + }); + + it("CLI-S03 sentinel-bearing value is refused at emit", async () => { + const filePath = join(workspaceDir, "gateway.jsonc"); + writeFileSync(filePath, '{ "token": "x" }', "utf-8"); + const rt = createTestRuntime(); + // The sentinel-bearing value is accepted into the AST by setOcPath, + // but `emitForKind` refuses to serialize it (defense-in-depth at + // the per-kind emit boundary). The CLI handler must catch that + // refusal and route it through the structured error boundary — + // a thrown error escaping commander would print raw `String(err)` + // and bypass our JSON/human scrubbing. Pin the structured shape: + // exit code 1, stable code OC_EMIT_SENTINEL, message scrubbed. + await pathSetCommand( + "oc://gateway.jsonc/token", + "__OPENCLAW_REDACTED__", + { cwd: workspaceDir, json: true }, + rt, + ); + expect(rt.exitCode).toBe(1); + expect(stderrText(rt)).toContain("OC_EMIT_SENTINEL"); + // F13 — file context in sentinel error. Without fileNameForGuard + // plumbing through emitForKind, the message would carry the + // empty-slot fallback (`oc:///[raw]`); now it carries the actual + // file (`oc://gateway.jsonc/[raw]`). Forensics + audit pipelines + // rely on this — without the file context, "sentinel rejected + // somewhere" doesn't tell you WHICH file was involved. + expect(stderrText(rt)).toContain("gateway.jsonc"); + }); + + it("CLI-S04 missing args returns 2", async () => { + const rt = createTestRuntime(); + await pathSetCommand(undefined, undefined, { json: true }, rt); + expect(rt.exitCode).toBe(2); + expect(stderrText(rt)).toContain("requires"); + }); + }); + + describe("find", () => { + it("CLI-F01 enumerates wildcard matches", async () => { + const filePath = join(workspaceDir, "config.jsonc"); + writeFileSync(filePath, '{ "items": [ { "id": "a" }, { "id": "b" } ] }', "utf-8"); + const rt = createTestRuntime(); + await pathFindCommand( + "oc://config.jsonc/items/*/id", + { cwd: workspaceDir, json: true }, + rt, + ); + expect(rt.exitCode).toBe(0); + const out = JSON.parse(stdoutText(rt)); + expect(out.count).toBe(2); + }); + + it("CLI-F02 returns 1 when zero matches", async () => { + const filePath = join(workspaceDir, "gateway.jsonc"); + writeFileSync(filePath, "{}", "utf-8"); + const rt = createTestRuntime(); + await pathFindCommand( + "oc://gateway.jsonc/nope/*", + { cwd: workspaceDir, json: true }, + rt, + ); + expect(rt.exitCode).toBe(1); + }); + + it("CLI-F03 file-slot wildcard rejected with clear error (no ENOENT)", async () => { + // Closes Galin P3 (round 8): `find` resolves `pattern.file` to one + // literal path, so `oc://*.jsonc/...` would silently ENOENT during + // fs.readFile. The CLI now surfaces a clear error before touching + // the filesystem, with stable code OC_PATH_FILE_WILDCARD_UNSUPPORTED. + const rt = createTestRuntime(); + await pathFindCommand( + "oc://*.jsonc/items", + { cwd: workspaceDir, json: true }, + rt, + ); + expect(rt.exitCode).toBe(2); + expect(stderrText(rt)).toContain("OC_PATH_FILE_WILDCARD_UNSUPPORTED"); + expect(stderrText(rt)).toContain("file-slot wildcards are not supported"); + }); + }); + + describe("emit", () => { + it("CLI-E01 round-trips jsonc bytes verbatim (byte-fidelity proof)", async () => { + const filePath = join(workspaceDir, "gateway.jsonc"); + const before = '// keep this comment\n{\n "v": 1\n}\n'; + writeFileSync(filePath, before, "utf-8"); + const rt = createTestRuntime(); + await pathEmitCommand(filePath, { json: true }, rt); + expect(rt.exitCode).toBe(0); + const out = JSON.parse(stdoutText(rt)); + expect(out.kind).toBe("jsonc"); + expect(out.bytes).toBe(before); + }); + + it("CLI-E02 round-trips md verbatim", async () => { + const filePath = join(workspaceDir, "AGENTS.md"); + const before = "## Tools\n- gh\n## Boundaries\n- never rm -rf\n"; + writeFileSync(filePath, before, "utf-8"); + const rt = createTestRuntime(); + await pathEmitCommand(filePath, { json: true }, rt); + expect(rt.exitCode).toBe(0); + const out = JSON.parse(stdoutText(rt)); + expect(out.kind).toBe("md"); + expect(out.bytes).toBe(before); + }); + + it("CLI-E03 emit --cwd resolves against the supplied directory", async () => { + // Closes round-10 finding F2: emit advertises --cwd / --file in + // the docs but the handler resolved against process.cwd() + // ignoring both. Pin the new wiring: a relative resolves + // against --cwd, not against process.cwd(). + const filePath = join(workspaceDir, "AGENTS.md"); + writeFileSync(filePath, "## Tools\n- gh\n", "utf-8"); + const rt = createTestRuntime(); + // Pass a RELATIVE filename + explicit --cwd. If the handler + // ignored --cwd, loadAst would ENOENT against process.cwd(). + await pathEmitCommand("AGENTS.md", { cwd: workspaceDir, json: true }, rt); + expect(rt.exitCode).toBe(0); + const out = JSON.parse(stdoutText(rt)); + expect(out.kind).toBe("md"); + expect(out.bytes).toBe("## Tools\n- gh\n"); + }); + }); +}); diff --git a/src/commands/path.ts b/src/commands/path.ts new file mode 100644 index 00000000000..902d3e4241f --- /dev/null +++ b/src/commands/path.ts @@ -0,0 +1,537 @@ +/** + * `openclaw path` — shell-level access to the OcPath substrate verbs. + * Self-hosters and editor extensions use it to inspect and surgically + * edit workspace files without scripting against the SDK directly. + * + * Subcommands: + * - `resolve ` — print the match at the path + * - `set ` — write a leaf at the path; supports `--dry-run` + * - `find ` — enumerate matches for a wildcard/predicate path + * - `validate ` — parse-only; print structure + * - `emit ` — read + parseXxx + emitXxx; verifies byte-fidelity + * + * Output is TTY-aware: defaults to human-readable when stdout is a TTY, + * switches to JSON otherwise (so pipes don't get formatting noise). + * `--json` and `--human` flags override the auto-detection. + * + * Boundaries this CLI does NOT cross (v0): + * - Doesn't know about LKG. `set` writes raw bytes through the + * substrate emit; if the file is LKG-tracked, the next observe + * call decides whether to promote / recover. + * - Doesn't know about lint rules or doctor fixers — that's a + * different surface. + */ + +import { promises as fs } from "node:fs"; +import { resolve as resolvePath } from "node:path"; +import { + OcEmitSentinelError, + OcPathError, + REDACTED_SENTINEL, + emitJsonc, + emitJsonl, + emitMd, + emitYaml, + findOcPaths, + formatOcPath, + inferKind, + parseJsonc, + parseJsonl, + parseMd, + parseOcPath, + parseYaml, + resolveOcPath, + setOcPath, + type OcAst, + type OcMatch, + type OcPath, + type SetResult, +} from "../oc-path/index.js"; +import type { OutputRuntimeEnv } from "../runtime.js"; + +export interface PathCommandOptions { + readonly json?: boolean; + readonly human?: boolean; + readonly cwd?: string; + readonly file?: string; + readonly dryRun?: boolean; +} + +type OutputMode = "human" | "json"; + +const SCRUB_PLACEHOLDER = "[REDACTED]"; + +/** + * Output-boundary sentinel scrub. Replaces every occurrence of the + * redaction sentinel with `[REDACTED]` before writing to the output + * stream. Defense-in-depth — even if a future code path surfaces raw + * file content carrying the sentinel, the CLI must not echo it. + */ +export function scrubSentinel(s: string): string { + if (!s.includes(REDACTED_SENTINEL)) { + return s; + } + return s.split(REDACTED_SENTINEL).join(SCRUB_PLACEHOLDER); +} + +function detectMode(options: PathCommandOptions): OutputMode { + if (options.json === true) { + return "json"; + } + if (options.human === true) { + return "human"; + } + return process.stdout.isTTY ? "human" : "json"; +} + +function emit( + runtime: OutputRuntimeEnv, + mode: OutputMode, + value: unknown, + humanFallback: () => string, +): void { + if (mode === "json") { + runtime.writeStdout(scrubSentinel(JSON.stringify(value, null, 2))); + return; + } + runtime.writeStdout(scrubSentinel(humanFallback())); +} + +function emitError( + runtime: OutputRuntimeEnv, + mode: OutputMode, + message: string, + code = "ERR", +): void { + const scrubbed = scrubSentinel(message); + if (mode === "json") { + runtime.error(JSON.stringify({ error: { code, message: scrubbed } })); + return; + } + runtime.error(`${code}: ${scrubbed}`); +} + +async function loadAst(absPath: string, fileName: string): Promise { + const raw = await fs.readFile(absPath, "utf-8"); + const kind = inferKind(fileName); + if (kind === "jsonc") { + return parseJsonc(raw).ast; + } + if (kind === "jsonl") { + return parseJsonl(raw).ast; + } + if (kind === "yaml") { + return parseYaml(raw).ast; + } + return parseMd(raw).ast; +} + +function emitForKind(ast: OcAst, fileName?: string): string { + // Plumb fileName through so OcEmitSentinelError messages carry the + // file context (`oc://gateway.jsonc/[raw]`) instead of the + // empty-slot fallback (`oc:///[raw]`). Test S-12 in the wave-21 + // sentinel suite asserts the OcPath context appears in the error; + // without this plumbing, CLI emits had it stripped. + const opts = fileName !== undefined ? { fileNameForGuard: fileName } : {}; + switch (ast.kind) { + case "jsonc": + return emitJsonc(ast, opts); + case "jsonl": + return emitJsonl(ast, opts); + case "yaml": + // Default round-trip mode preserves bytes verbatim for unmodified + // ASTs (so `openclaw path emit foo.yaml` is a true byte-fidelity + // diagnostic). After `setOcPath` mutates a YAML AST the substrate + // re-renders into `ast.raw` already, so round-trip mode emits the + // mutated bytes too — no need for the render-mode override. + return emitYaml(ast, opts); + case "md": + return emitMd(ast, opts); + } + throw new Error(`unreachable: emitForKind kind`); +} + +function resolveFsPath(path: OcPath, options: PathCommandOptions): string { + const cwd = options.cwd ?? process.cwd(); + if (options.file !== undefined) { + return resolvePath(options.file); + } + return resolvePath(cwd, path.file); +} + +function formatMatchHuman(match: OcMatch): string { + if (match.kind === "leaf") { + return `leaf @ L${match.line}: ${JSON.stringify(match.valueText)} (${match.leafType})`; + } + if (match.kind === "node") { + return `node @ L${match.line} [${match.descriptor}]`; + } + if (match.kind === "insertion-point") { + return `insertion-point @ L${match.line} [${match.container}]`; + } + return `root @ L${match.line}`; +} + +export async function pathResolveCommand( + pathStr: string | undefined, + options: PathCommandOptions, + runtime: OutputRuntimeEnv, +): Promise { + const mode = detectMode(options); + if (pathStr === undefined) { + emitError(runtime, mode, "resolve: missing argument"); + runtime.exit(2); + return; + } + let ocPath: OcPath; + try { + ocPath = parseOcPath(pathStr); + } catch (err) { + if (err instanceof OcPathError) { + emitError(runtime, mode, `parse failed: ${err.message}`, err.code); + runtime.exit(2); + return; + } + throw err; + } + const fsPath = resolveFsPath(ocPath, options); + const ast = await loadAst(fsPath, ocPath.file); + let match; + try { + match = resolveOcPath(ast, ocPath); + } catch (err) { + if (err instanceof OcPathError) { + // resolveOcPath now throws on wildcard patterns (the pattern + // belongs in `find`, not `resolve`). Surface the structured code + // so the CLI message points the caller at the right verb. + emitError(runtime, mode, `resolve refused: ${err.message}`, err.code); + runtime.exit(2); + return; + } + throw err; + } + if (match === null) { + emit( + runtime, + mode, + { resolved: false, ocPath: pathStr }, + () => `not found: ${pathStr}`, + ); + runtime.exit(1); + return; + } + emit( + runtime, + mode, + { resolved: true, ocPath: pathStr, match }, + () => formatMatchHuman(match), + ); +} + +export async function pathSetCommand( + pathStr: string | undefined, + value: string | undefined, + options: PathCommandOptions, + runtime: OutputRuntimeEnv, +): Promise { + const mode = detectMode(options); + if (pathStr === undefined || value === undefined) { + emitError(runtime, mode, "set: requires "); + runtime.exit(2); + return; + } + let ocPath: OcPath; + try { + ocPath = parseOcPath(pathStr); + } catch (err) { + if (err instanceof OcPathError) { + emitError(runtime, mode, `parse failed: ${err.message}`, err.code); + runtime.exit(2); + return; + } + throw err; + } + const fsPath = resolveFsPath(ocPath, options); + const ast = await loadAst(fsPath, ocPath.file); + // `setOcPath` invokes the per-kind editor which calls back into + // emit during rebuildRaw; the redaction-sentinel guard fires there + // and throws `OcEmitSentinelError` for sentinel-bearing values. + // Catch the throw here so it goes through the structured CLI error + // path instead of escaping to commander's runCommandWithRuntime + // (which would print raw String(err) and bypass --json scrubbing). + let result: SetResult; + try { + result = setOcPath(ast, ocPath, value); + } catch (err) { + if (err instanceof OcEmitSentinelError) { + emitError( + runtime, + mode, + `set refused: ${err.message}`, + "OC_EMIT_SENTINEL", + ); + runtime.exit(1); + return; + } + throw err; + } + if (!result.ok) { + const detail = "detail" in result ? result.detail : undefined; + emit( + runtime, + mode, + { ok: false, reason: result.reason, detail }, + () => + `set failed: ${result.reason}${detail !== undefined ? ` — ${detail}` : ""}`, + ); + runtime.exit(1); + return; + } + // `setOcPath` accepted the value into the AST, but the per-kind + // emit can still refuse to serialize it — most notably when the + // value contains the redaction sentinel (defense-in-depth: the + // substrate's emit guard fires there). The throw must NOT escape + // to commander's runCommandWithRuntime, which would print + // `String(err)` raw and bypass the CLI's JSON/human scrubbed-error + // boundary. Catch and route through `emitError` like every other + // refusal path. + let newBytes: string; + try { + newBytes = emitForKind(result.ast, ocPath.file); + } catch (err) { + if (err instanceof OcEmitSentinelError) { + emitError( + runtime, + mode, + `emit refused: ${err.message}`, + "OC_EMIT_SENTINEL", + ); + runtime.exit(1); + return; + } + throw err; + } + // Edit-then-emit through render mode drops jsonc comments and yaml + // formatting. Self-hosters running `openclaw path set` on a + // commented file should see the warning explicitly. + const lossyKinds: ReadonlySet = new Set(["jsonc", "yaml"]); + const formatLossWarning = lossyKinds.has(result.ast.kind) + ? `note: ${result.ast.kind} edit-then-emit drops comments / original formatting (render mode)` + : null; + if (options.dryRun === true) { + emit( + runtime, + mode, + { + ok: true, + dryRun: true, + bytes: newBytes, + ...(formatLossWarning !== null ? { warning: formatLossWarning } : {}), + }, + () => { + const lines = [`--dry-run: would write ${newBytes.length} bytes to ${fsPath}`]; + if (formatLossWarning !== null) { + lines.push(formatLossWarning); + } + lines.push(newBytes); + return lines.join("\n"); + }, + ); + return; + } + await fs.writeFile(fsPath, newBytes, "utf-8"); + emit( + runtime, + mode, + { + ok: true, + dryRun: false, + bytesWritten: newBytes.length, + fsPath, + ...(formatLossWarning !== null ? { warning: formatLossWarning } : {}), + }, + () => { + const lines = [`wrote ${newBytes.length} bytes to ${fsPath}`]; + if (formatLossWarning !== null) { + lines.push(formatLossWarning); + } + return lines.join("\n"); + }, + ); +} + +export async function pathFindCommand( + patternStr: string | undefined, + options: PathCommandOptions, + runtime: OutputRuntimeEnv, +): Promise { + const mode = detectMode(options); + if (patternStr === undefined) { + emitError(runtime, mode, "find: missing argument"); + runtime.exit(2); + return; + } + let pattern: OcPath; + try { + pattern = parseOcPath(patternStr); + } catch (err) { + if (err instanceof OcPathError) { + emitError(runtime, mode, `parse failed: ${err.message}`, err.code); + runtime.exit(2); + return; + } + throw err; + } + // The CLI resolves `pattern.file` to a single literal filesystem path. + // Wildcards in the file slot (e.g. `oc://*.jsonc/...`) would silently + // ENOENT during `fs.readFile`. The substrate's `findOcPaths` walks + // *inside* an AST — multi-file globbing is out of scope for v0. Surface + // a clear error so users don't get a confusing missing-file failure. + if (/[*?]/.test(pattern.file)) { + emitError( + runtime, + mode, + `find: file-slot wildcards are not supported (got "${pattern.file}"). ` + + `Pass a concrete file path; multi-file globbing is a follow-up feature.`, + "OC_PATH_FILE_WILDCARD_UNSUPPORTED", + ); + runtime.exit(2); + return; + } + const fsPath = resolveFsPath(pattern, options); + const ast = await loadAst(fsPath, pattern.file); + const matches = findOcPaths(ast, pattern); + emit( + runtime, + mode, + { + pattern: patternStr, + count: matches.length, + matches: matches.map((m) => ({ + path: formatOcPath(m.path), + match: m.match, + })), + }, + () => { + if (matches.length === 0) { + return `0 matches for ${patternStr}`; + } + const plural = matches.length === 1 ? "" : "es"; + const lines = [`${matches.length} match${plural} for ${patternStr}:`]; + for (const m of matches) { + lines.push(` ${formatOcPath(m.path)} → ${formatMatchHuman(m.match)}`); + } + return lines.join("\n"); + }, + ); + if (matches.length === 0) { + runtime.exit(1); + } +} + +export function pathValidateCommand( + pathStr: string | undefined, + options: PathCommandOptions, + runtime: OutputRuntimeEnv, +): void { + const mode = detectMode(options); + if (pathStr === undefined) { + emitError(runtime, mode, "validate: missing argument"); + runtime.exit(2); + return; + } + try { + const ocPath = parseOcPath(pathStr); + emit( + runtime, + mode, + { + valid: true, + ocPath: pathStr, + formatted: formatOcPath(ocPath), + structure: { + file: ocPath.file, + section: ocPath.section, + item: ocPath.item, + field: ocPath.field, + session: ocPath.session, + }, + }, + () => { + const lines = [`valid: ${pathStr}`, ` file: ${ocPath.file}`]; + if (ocPath.section !== undefined) { + lines.push(` section: ${ocPath.section}`); + } + if (ocPath.item !== undefined) { + lines.push(` item: ${ocPath.item}`); + } + if (ocPath.field !== undefined) { + lines.push(` field: ${ocPath.field}`); + } + if (ocPath.session !== undefined) { + lines.push(` session: ${ocPath.session}`); + } + return lines.join("\n"); + }, + ); + return; + } catch (err) { + if (err instanceof OcPathError) { + emit( + runtime, + mode, + { valid: false, code: err.code, message: err.message }, + () => `INVALID: ${err.code}: ${err.message}`, + ); + runtime.exit(1); + return; + } + throw err; + } +} + +export async function pathEmitCommand( + fileArg: string | undefined, + options: PathCommandOptions, + runtime: OutputRuntimeEnv, +): Promise { + const mode = detectMode(options); + if (fileArg === undefined) { + emitError(runtime, mode, "emit: missing argument"); + runtime.exit(2); + return; + } + // Resolve the file slot through the same `--cwd`/`--file` rules the + // sibling subcommands use: `--file` (when set) is the absolute path + // override; otherwise resolve `fileArg` against `--cwd` (defaulting + // to `process.cwd()`). Without this, the flags are accepted by + // commander but ignored by the handler — exactly the bug-shape + // ClawSweeper flagged for the doc/option mismatch. + const fsPath = + options.file !== undefined + ? resolvePath(options.file) + : resolvePath(options.cwd ?? process.cwd(), fileArg); + const fileName = fsPath.split(/[\\/]/).pop() ?? fileArg; + const ast = await loadAst(fsPath, fileName); + let bytes: string; + try { + bytes = emitForKind(ast, fileName); + } catch (err) { + if (err instanceof OcEmitSentinelError) { + emitError( + runtime, + mode, + `emit refused: ${err.message}`, + "OC_EMIT_SENTINEL", + ); + runtime.exit(1); + return; + } + throw err; + } + if (mode === "json") { + runtime.writeStdout(JSON.stringify({ ok: true, kind: ast.kind, bytes })); + return; + } + runtime.writeStdout(bytes); +} diff --git a/src/oc-path/ast.ts b/src/oc-path/ast.ts new file mode 100644 index 00000000000..881206f427a --- /dev/null +++ b/src/oc-path/ast.ts @@ -0,0 +1,125 @@ +/** + * Workspace-Markdown AST — generic addressing index over the 8 workspace + * files openclaw treats as opaque text in `loadWorkspaceBootstrapFiles`. + * + * **The AST is purely an addressing index.** It does NOT encode opinions + * about what a "valid" SOUL.md / AGENTS.md / MEMORY.md looks like; it + * exposes the markdown features (frontmatter, sections, items, tables, + * code blocks) that any `OcPath` (`{ file, section?, item?, field? }`) can + * resolve over. Per-file lint opinions ride in @openclaw/oc-lint, not + * here. + * + * **Byte-fidelity contract**: `emitMd(parse(raw)) === raw` for every input + * the parser accepts. The parser preserves the original bytes on the + * root node (`raw`) so emitters can round-trip even content the AST + * doesn't structurally model (foreign content, idiosyncratic whitespace). + * + * @module @openclaw/oc-path/ast + */ + +/** + * Diagnostic emitted by the parser. Used by lint rules and parse-error + * surfacing alike. Severity is `info` by default; the parser emits + * `warning` for suspicious-but-recoverable inputs (e.g., unclosed + * frontmatter fence) and never throws. + */ +export interface Diagnostic { + readonly line: number; + readonly message: string; + readonly severity: 'info' | 'warning' | 'error'; + readonly code?: string; +} + +/** + * A frontmatter key/value pair. Keys are preserved as written; values + * are unquoted (surrounding `"` or `'` stripped) but otherwise verbatim. + */ +export interface FrontmatterEntry { + readonly key: string; + readonly value: string; + readonly line: number; +} + +/** + * A bullet-list item inside a section. Items are addressable via OcPath + * `{ file, section, item }` where `item` is the slug of the bullet's + * text (or the slug of `kv.key` when the bullet is in `- key: value` + * shape). + * + * `kv` is populated when the bullet matches `- : ` (the + * common pattern in AGENTS.md / TOOLS.md / USER.md). Lint rules use it + * for field-level addressing via `OcPath.field`. + */ +export interface AstItem { + readonly text: string; + readonly slug: string; + readonly line: number; + readonly kv?: { readonly key: string; readonly value: string }; +} + +/** + * A markdown table. Tables surface in `## Tool Guidance` blocks and + * elsewhere; lint rules can address rows by header value if needed. + */ +export interface AstTable { + readonly headers: readonly string[]; + readonly rows: readonly (readonly string[])[]; + readonly line: number; +} + +/** + * A fenced code block. Carries the language tag (or `null`) and the + * verbatim body. + */ +export interface AstCodeBlock { + readonly lang: string | null; + readonly text: string; + readonly line: number; +} + +/** + * An H2-delimited block. The `slug` is the kebab-case lowercase form of + * `heading` and is what OcPath `section` matches against. `bodyText` is + * the prose between this heading and the next H2 (or end of file), + * verbatim. `items`, `tables`, `codeBlocks` are extracted from + * `bodyText` for addressing convenience but the raw text is preserved. + */ +export interface AstBlock { + readonly heading: string; + readonly slug: string; + readonly line: number; + readonly bodyText: string; + readonly items: readonly AstItem[]; + readonly tables: readonly AstTable[]; + readonly codeBlocks: readonly AstCodeBlock[]; +} + +/** + * The root AST node. Always carries `raw` for byte-identical round-trip. + * `frontmatter` is empty when the file has none. `preamble` is the + * prose before the first H2 (may be empty). `blocks` is the H2 tree in + * document order. + * + * `kind: 'md'` discriminator matches the jsonc / jsonl / yaml AST + * shapes; the universal `setOcPath` / `resolveOcPath` verbs dispatch + * via this tag at runtime so callers don't have to thread kind + * through the call site. + * + * The generic shape is the same for all 9 workspace files; opinions + * (`AGENTS_TOOLS_SECTION_EMPTY`, etc.) ride in lint rules, not here. + */ +export interface MdAst { + readonly kind: 'md'; + readonly raw: string; + readonly frontmatter: readonly FrontmatterEntry[]; + readonly preamble: string; + readonly blocks: readonly AstBlock[]; +} + +/** + * Parser output: the AST plus any diagnostics from the parse pass. + */ +export interface ParseResult { + readonly ast: MdAst; + readonly diagnostics: readonly Diagnostic[]; +} diff --git a/src/oc-path/dispatch.ts b/src/oc-path/dispatch.ts new file mode 100644 index 00000000000..f36a83b0fa3 --- /dev/null +++ b/src/oc-path/dispatch.ts @@ -0,0 +1,31 @@ +/** + * Cross-kind utilities. The substrate exposes per-kind verbs only; + * `inferKind` is a convention helper for callers who want to map + * filename → kind so they can pick the right `parseXxx` / `setXxx` / + * `resolveXxx` function. + * + * Earlier drafts had `resolveOcPath` / `setOcPath` / `appendOcPath` + * universal dispatchers with tagged-union AST inputs. They were dropped + * — the kind tag bled through every consumer (lint runner, doctor + * fixers, tests) since those code paths still needed to know the kind + * to use the result. Per-kind verbs are honest about input/output. + * + * @module @openclaw/oc-path/dispatch + */ + +export type OcKind = 'md' | 'jsonc' | 'jsonl' | 'yaml'; + +/** + * Recommend a kind from a filename. Pure convention helper — returns + * the substrate's default mapping. Consumers can override. + */ +export function inferKind(filename: string): OcKind | null { + const lower = filename.toLowerCase(); + if (lower.endsWith('.md')) {return 'md';} + if (lower.endsWith('.jsonl') || lower.endsWith('.ndjson')) {return 'jsonl';} + if (lower.endsWith('.jsonc') || lower.endsWith('.json')) {return 'jsonc';} + if (lower.endsWith('.yaml') || lower.endsWith('.yml') || lower.endsWith('.lobster')) { + return 'yaml'; + } + return null; +} diff --git a/src/oc-path/edit.ts b/src/oc-path/edit.ts new file mode 100644 index 00000000000..b762d982d2d --- /dev/null +++ b/src/oc-path/edit.ts @@ -0,0 +1,153 @@ +/** + * Mutate a `MdAst` at an OcPath. Returns a new AST with the + * value replaced; the original is unchanged. + * + * Writable surface: + * + * oc://FILE/[frontmatter]/key → frontmatter entry value + * oc://FILE/section/item/field → item.kv.value (when item has kv shape) + * + * Section bodies, tables, and code blocks are NOT writable through + * this primitive — they're prose, and a generic "set" doesn't compose + * cleanly. Doctor fixers handle structural edits via dedicated verbs. + * + * @module @openclaw/oc-path/edit + */ + +import type { AstBlock, AstItem, FrontmatterEntry, MdAst } from './ast.js'; +import type { OcPath } from './oc-path.js'; + +export type MdEditResult = + | { readonly ok: true; readonly ast: MdAst } + | { + readonly ok: false; + readonly reason: 'unresolved' | 'not-writable' | 'no-item-kv'; + }; + +/** + * Replace the value at `path` with `newValue`. The new AST has fresh + * `raw` re-rendered from the structural fields. + */ +export function setMdOcPath( + ast: MdAst, + path: OcPath, + newValue: string, +): MdEditResult { + // Frontmatter address: oc://FILE/[frontmatter]/ + if (path.section === '[frontmatter]') { + const key = path.item ?? path.field; + if (key === undefined) {return { ok: false, reason: 'unresolved' };} + const idx = ast.frontmatter.findIndex((e) => e.key === key); + if (idx === -1) {return { ok: false, reason: 'unresolved' };} + const existing = ast.frontmatter[idx]; + if (existing === undefined) {return { ok: false, reason: 'unresolved' };} + const newEntry: FrontmatterEntry = { ...existing, value: newValue }; + const newFm = ast.frontmatter.slice(); + newFm[idx] = newEntry; + return finalize({ ...ast, frontmatter: newFm }); + } + + // Item-field address: oc://FILE/section/item/field + if ( + path.section === undefined || + path.item === undefined || + path.field === undefined + ) { + return { ok: false, reason: 'not-writable' }; + } + + const sectionSlug = path.section.toLowerCase(); + const blockIdx = ast.blocks.findIndex((b) => b.slug === sectionSlug); + if (blockIdx === -1) {return { ok: false, reason: 'unresolved' };} + const block = ast.blocks[blockIdx]; + if (block === undefined) {return { ok: false, reason: 'unresolved' };} + + const itemSlug = path.item.toLowerCase(); + const itemIdx = block.items.findIndex((i) => i.slug === itemSlug); + if (itemIdx === -1) {return { ok: false, reason: 'unresolved' };} + const item = block.items[itemIdx]; + if (item === undefined) {return { ok: false, reason: 'unresolved' };} + if (item.kv === undefined) {return { ok: false, reason: 'no-item-kv' };} + if (item.kv.key.toLowerCase() !== path.field.toLowerCase()) { + return { ok: false, reason: 'unresolved' }; + } + + const newItem: AstItem = { + ...item, + kv: { key: item.kv.key, value: newValue }, + }; + const newItems = block.items.slice(); + newItems[itemIdx] = newItem; + const newBlock: AstBlock = { + ...block, + items: newItems, + bodyText: rebuildBlockBody(block, newItems), + }; + const newBlocks = ast.blocks.slice(); + newBlocks[blockIdx] = newBlock; + return finalize({ ...ast, blocks: newBlocks }); +} + +/** + * Rebuild block.bodyText so emit-roundtrip mode reflects the edit. We + * do a minimal in-place substitution on the existing bodyText: find + * each `- key: value` line for a touched item and rewrite the value. + * + * For items without a matching bullet line, we leave bodyText alone + * (the structural fields take precedence in render mode anyway). + */ +function rebuildBlockBody(block: AstBlock, newItems: readonly AstItem[]): string { + let body = block.bodyText; + for (let i = 0; i < newItems.length; i++) { + const newItem = newItems[i]; + const oldItem = block.items[i]; + if (newItem === undefined || oldItem === undefined) {continue;} + if (newItem.kv === undefined || oldItem.kv === undefined) {continue;} + if (newItem.kv.value === oldItem.kv.value) {continue;} + const re = new RegExp( + `^(\\s*-\\s*${escapeRegex(oldItem.kv.key)}\\s*:\\s*).*$`, + 'm', + ); + body = body.replace(re, `$1${newItem.kv.value}`); + } + return body; +} + +function escapeRegex(s: string): string { + return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} + +/** + * Re-render `ast.raw` from the (possibly mutated) tree using the same + * shape the round-trip emitter expects. + */ +function finalize(ast: MdAst): MdEditResult { + const parts: string[] = []; + if (ast.frontmatter.length > 0) { + parts.push('---'); + for (const fm of ast.frontmatter) { + parts.push(`${fm.key}: ${formatFrontmatterValue(fm.value)}`); + } + parts.push('---'); + } + if (ast.preamble.length > 0) { + if (parts.length > 0) {parts.push('');} + parts.push(ast.preamble); + } + for (const block of ast.blocks) { + if (parts.length > 0) {parts.push('');} + parts.push(`## ${block.heading}`); + if (block.bodyText.length > 0) {parts.push(block.bodyText);} + } + const raw = parts.join('\n'); + return { ok: true, ast: { ...ast, raw } }; +} + +function formatFrontmatterValue(value: string): string { + if (value.length === 0) {return '""';} + if (/[:#&*?|<>=!%@`,[\]{}\r\n]/.test(value)) { + return JSON.stringify(value); + } + return value; +} + diff --git a/src/oc-path/emit.ts b/src/oc-path/emit.ts new file mode 100644 index 00000000000..79b8ce4b32f --- /dev/null +++ b/src/oc-path/emit.ts @@ -0,0 +1,137 @@ +/** + * Emit an AST back to bytes. + * + * **Two modes**: + * + * 1. **Round-trip** — the AST hasn't been mutated since `parseMd` + * produced it. Returns `ast.raw` verbatim. Byte-identical. + * + * 2. **Mutation-aware** — the AST has been modified (frontmatter + * entry edited, item kv.value changed, block reordered). Returns + * a freshly-rendered representation. **Not** byte-identical to a + * hypothetical "perfect" rewrite — we render canonical forms + * (LF endings, single space after `:` in frontmatter, etc.). + * Callers needing byte-fidelity for partial edits should patch + * `raw` directly instead of mutating the AST. + * + * In both modes, every emitted leaf flows through `guardSentinel` so a + * `__OPENCLAW_REDACTED__` literal anywhere in the output throws + * `OcEmitSentinelError`. This is the substrate guard: callers can't + * accidentally write a redacted view to disk through this emitter. + * + * @module @openclaw/oc-path/emit + */ + +import type { FrontmatterEntry, MdAst } from './ast.js'; +import { guardSentinel } from './sentinel.js'; + +/** + * Emit options. `mode: 'roundtrip'` (default) returns `ast.raw` if + * present and not flagged as dirty; `mode: 'render'` always + * re-renders. + */ +export interface EmitOptions { + readonly mode?: 'roundtrip' | 'render'; + /** + * When provided, the emitter walks every emitted leaf string through + * `guardSentinel(value, ocPath)`. Default uses the file name + * (`oc://`) when the field-precise path can't be determined. + * Callers that want richer error context can supply `ocPathFor` to + * compute a path per leaf. + */ + readonly fileNameForGuard?: string; + /** + * See `JsoncEmitOptions.acceptPreExistingSentinel` for the rationale. + * Default `true` — round-trip echoes parsed bytes without scanning + * for the sentinel. Render mode scans every leaf regardless. + */ + readonly acceptPreExistingSentinel?: boolean; +} + +/** + * Emit the AST. In render mode, throws `OcEmitSentinelError` if any + * leaf string matches `REDACTED_SENTINEL`. In round-trip mode, echoes + * `ast.raw` verbatim (does not scan unless caller opts in via + * `acceptPreExistingSentinel: false`). + */ +export function emitMd(ast: MdAst, opts: EmitOptions = {}): string { + const mode = opts.mode ?? 'roundtrip'; + const guardPath = opts.fileNameForGuard ? `oc://${opts.fileNameForGuard}` : 'oc://'; + const acceptPreExisting = opts.acceptPreExistingSentinel ?? true; + + if (mode === 'roundtrip') { + // Round-trip trusts parsed bytes — see emit-policy comment in + // jsonc/emit.ts. A markdown file legitimately containing the + // sentinel literal (in a code block, in a pasted error log) would + // otherwise become a workspace-wide emit DoS. + if (!acceptPreExisting && ast.raw.includes('__OPENCLAW_REDACTED__')) { + guardSentinel('__OPENCLAW_REDACTED__', `${guardPath}/[raw]`); + } + return ast.raw; + } + + // Render mode: rebuild from structural fields. This loses + // formatting details (extra blank lines, custom whitespace, etc.) + // but is correct. + const parts: string[] = []; + + if (ast.frontmatter.length > 0) { + parts.push('---'); + for (const fm of ast.frontmatter) { + guardSentinel(fm.value, `${guardPath}/[frontmatter]/${fm.key}`); + parts.push(`${fm.key}: ${formatFrontmatterValue(fm.value)}`); + } + parts.push('---'); + } + + if (ast.preamble.length > 0) { + guardSentinel(ast.preamble, `${guardPath}/[preamble]`); + if (parts.length > 0) {parts.push('');} + parts.push(ast.preamble); + } + + for (const block of ast.blocks) { + if (parts.length > 0) {parts.push('');} + parts.push(`## ${block.heading}`); + if (block.bodyText.length > 0) { + // Walk items + frontmatter-key value strings for sentinels; + // body text is also walked as one big string in case of any raw + // sentinel. + guardSentinel(block.bodyText, `${guardPath}/${block.slug}/[body]`); + for (const item of block.items) { + if (item.kv) { + guardSentinel(item.kv.value, `${guardPath}/${block.slug}/${item.slug}/${item.kv.key}`); + } + } + parts.push(block.bodyText); + } + } + + return parts.join('\n'); +} + +function formatFrontmatterValue(value: string): string { + // Quote values containing characters that would confuse a YAML + // parser; otherwise emit bare. + if (value.length === 0) {return '""';} + if (/[:#&*?|<>=!%@`,[\]{}\r\n]/.test(value)) { + return JSON.stringify(value); + } + return value; +} + +/** + * Mark an AST as "dirty" — useful for callers that mutate the AST + * structurally and want emitMd() to re-render rather than round-trip. + * + * Currently a no-op flag — emitMd() decides based on `opts.mode`. Kept + * as an extension point for a future invariant where the AST tracks + * its own dirty state. + */ +export function markDirty(_ast: MdAst): void { + // intentionally empty +} + +// Re-export the frontmatter type for convenience so tests don't need +// to import from ast.ts. +export type { FrontmatterEntry }; diff --git a/src/oc-path/find.ts b/src/oc-path/find.ts new file mode 100644 index 00000000000..6a7d2a64e79 --- /dev/null +++ b/src/oc-path/find.ts @@ -0,0 +1,852 @@ +/** + * `findOcPaths` — universal multi-match verb. Pattern syntax extends + * `OcPath` with two wildcard tokens: + * + * `*` — match a single sub-segment (one map key / one array index) + * `**` — match zero or more sub-segments at any depth (recursive) + * + * **Why a separate verb**: `resolveOcPath` and `setOcPath` are + * single-match — they require an exact path because they return one + * value or write one leaf. A pattern would be ambiguous. `findOcPaths` + * is the search verb: pass a pattern, get every concrete OcPath that + * matches plus its `OcMatch` (kind + leaf text / node descriptor). + * + * Every returned `OcPathMatch` carries a concrete (wildcard-free) + * `OcPath`, so callers can pipe results through `setOcPath` or + * `resolveOcPath` without rebuilding the path. The slot shape of the + * input pattern is preserved (a `*` in the `item` slot produces a + * concrete path with the matched value still in `item`). + * + * **Use cases driving v0**: + * - lint rules iterating `oc://workflow.lobster/steps/* /command` + * - jsonl session walks `oc://session/* /eventType` + * - md frontmatter sweeps `oc://SOUL.md/[frontmatter]/*` + * + * @module @openclaw/oc-path/find + */ + +import { isMap, isScalar, isSeq, type Node, type Pair } from 'yaml'; +import type { JsoncValue } from './jsonc/ast.js'; +import type { JsonlAst, JsonlLine } from './jsonl/ast.js'; +import type { MdAst } from './ast.js'; +import type { OcPath } from './oc-path.js'; +import { + MAX_TRAVERSAL_DEPTH, + OcPathError, + WILDCARD_RECURSIVE, + WILDCARD_SINGLE, + evaluatePredicate, + isOrdinalSeg, + isPositionalSeg, + isPredicateSeg, + isQuotedSeg, + isUnionSeg, + parseOrdinalSeg, + parsePredicateSeg, + parseUnionSeg, + quoteSeg, + resolvePositionalSeg, + splitRespectingBrackets, + unquoteSeg, +} from './oc-path.js'; +import type { PredicateSpec } from './oc-path.js'; +import type { OcAst, OcMatch } from './universal.js'; +import { resolveOcPath } from './universal.js'; + +// ---------- Public types --------------------------------------------------- + +/** A find result: a concrete (wildcard-free) path plus its match info. */ +export interface OcPathMatch { + readonly path: OcPath; + readonly match: OcMatch; +} + +/** + * The slot a sub-segment came from in the input pattern. Walker outputs + * carry slot tags so re-packing into `OcPath` preserves the pattern's + * shape (a `*` in the `item` slot produces a path with the matched + * value in `item`, not joined into `section`). + */ +type Slot = 'section' | 'item' | 'field'; +interface SlotSub { + readonly slot: Slot; + readonly value: string; +} + +/** A single tagged sub-segment of the pattern (post dot-split). */ +interface PatternSub { + readonly slot: Slot; + readonly value: string; +} + +// ---------- Public verb ---------------------------------------------------- + +/** + * Match `pattern` against `ast` and return every concrete OcPath that + * resolves. Empty array when nothing matches. + * + * Pattern semantics: same shape as `OcPath`, but any sub-segment may be + * `*` (single-segment wildcard) or `**` (recursive descent). A pattern + * with no wildcards is equivalent to a single `resolveOcPath` call, + * wrapped into the find shape. + * + * **Insertion-marker patterns are not supported**: a `+`/`+key`/`+nnn` + * suffix is meaningless in find context (you don't search for a place + * to insert). Such patterns return an empty array. + */ +export function findOcPaths(ast: OcAst, pattern: OcPath): readonly OcPathMatch[] { + const subs = patternSubs(pattern); + // Fast-path: no expansion needed — pure literals just resolve. + // Anything that can yield 0+ matches (wildcard, positional, union, + // predicate) flows through the walker. + const needsExpansion = subs.some( + (s) => + s.value === WILDCARD_SINGLE || + s.value === WILDCARD_RECURSIVE || + isPositionalSeg(s.value) || + isUnionSeg(s.value) || + isPredicateSeg(s.value), + ); + if (!needsExpansion) { + const m = resolveOcPath(ast, pattern); + return m === null ? [] : [{ path: pattern, match: m }]; + } + const concretePaths = expand(ast, subs, pattern); + + const out: OcPathMatch[] = []; + for (const concrete of concretePaths) { + const m = resolveOcPath(ast, concrete); + if (m !== null) {out.push({ path: concrete, match: m });} + } + return out; +} + +// ---------- Pattern unpacking --------------------------------------------- + +function patternSubs(pattern: OcPath): readonly PatternSub[] { + const out: PatternSub[] = []; + // Bracket-aware split so dots inside `[k=1.0]` or `{a.b,c}` aren't + // treated as sub-segment delimiters (P-012/P-013). + if (pattern.section !== undefined) { + for (const v of splitRespectingBrackets(pattern.section, '.')) {out.push({ slot: 'section', value: v });} + } + if (pattern.item !== undefined) { + for (const v of splitRespectingBrackets(pattern.item, '.')) {out.push({ slot: 'item', value: v });} + } + if (pattern.field !== undefined) { + for (const v of splitRespectingBrackets(pattern.field, '.')) {out.push({ slot: 'field', value: v });} + } + return out; +} + +function repackSlotSubs(pattern: OcPath, slotSubs: readonly SlotSub[]): OcPath { + const sectionSubs: string[] = []; + const itemSubs: string[] = []; + const fieldSubs: string[] = []; + for (const s of slotSubs) { + if (s.slot === 'section') {sectionSubs.push(s.value);} + else if (s.slot === 'item') {itemSubs.push(s.value);} + else {fieldSubs.push(s.value);} + } + return { + file: pattern.file, + ...(sectionSubs.length > 0 ? { section: sectionSubs.join('.') } : {}), + ...(itemSubs.length > 0 ? { item: itemSubs.join('.') } : {}), + ...(fieldSubs.length > 0 ? { field: fieldSubs.join('.') } : {}), + ...(pattern.session !== undefined ? { session: pattern.session } : {}), + }; +} + +// ---------- Per-kind dispatch --------------------------------------------- + +function expand(ast: OcAst, subs: readonly PatternSub[], pattern: OcPath): readonly OcPath[] { + const concretePaths: OcPath[] = []; + // Walker enumerates concrete sub-segments by walking the AST against + // `subs`, emitting one slot-tagged-sub list per leaf. Each list is + // re-packed into an OcPath preserving the pattern's slot shape. + const onMatch = (slotSubs: readonly SlotSub[]): void => { + concretePaths.push(repackSlotSubs(pattern, slotSubs)); + }; + switch (ast.kind) { + case 'yaml': + walkYaml(ast.doc.contents as Node | null, subs, 0, [], onMatch); + break; + case 'jsonc': + if (ast.root !== null) {walkJsonc(ast.root, subs, 0, [], onMatch);} + break; + case 'jsonl': + walkJsonl(ast, subs, 0, [], onMatch); + break; + case 'md': + walkMd(ast, subs, 0, [], onMatch); + break; + } + return concretePaths; +} + +// ---------- YAML walker ---------------------------------------------------- + +function walkYaml( + node: Node | null, + subs: readonly PatternSub[], + i: number, + walked: readonly SlotSub[], + onMatch: (subs: readonly SlotSub[]) => void, +): void { + // P-031 / P-033 (substrate pitfall taxonomy — see + // `oc-paths-substrate/PITFALLS.md`) — depth cap kills runaway + // recursion from `**` over deeply nested ASTs and from yaml-anchor + // cycles (a cycle just makes recursion unbounded). Cap is liberal + // (256) — real workspaces top out around 50 — and covers both + // pitfalls with one defense. + if (walked.length > MAX_TRAVERSAL_DEPTH) { + throw new OcPathError( + `findOcPaths exceeded MAX_TRAVERSAL_DEPTH (${MAX_TRAVERSAL_DEPTH}) — likely a cycle or pathological pattern`, + '', + 'OC_PATH_DEPTH_EXCEEDED', + ); + } + // Out of pattern → emit at whatever node we landed on. + if (i >= subs.length) { + onMatch(walked); + return; + } + if (node === null) {return;} + let cur = subs[i]; + + // Union `{a,b,c}` — fan out into one walk per alternative. Each + // alternative replaces `cur.value` with the chosen literal. + if (isUnionSeg(cur.value)) { + const alts = parseUnionSeg(cur.value); + if (alts === null) {return;} + for (const alt of alts) { + const altSubs = subs.slice(); + altSubs[i] = { slot: cur.slot, value: alt }; + walkYaml(node, altSubs, i, walked, onMatch); + } + return; + } + + // Predicate `[keyvalue]` — like wildcard, but emit only children + // whose `key` field matches the predicate. + if (isPredicateSeg(cur.value)) { + const pred = parsePredicateSeg(cur.value); + if (pred === null) {return;} + if (isMap(node)) { + for (const pair of (node as { items: Pair[] }).items) { + const k = isScalar(pair.key) ? String(pair.key.value) : String(pair.key); + const childVal = pair.value as Node; + if (yamlChildMatchesPredicate(childVal, pred)) { + walkYaml(childVal, subs, i + 1, [...walked, { slot: cur.slot, value: quoteSeg(k) }], onMatch); + } + } + } else if (isSeq(node)) { + (node as { items: Node[] }).items.forEach((child, idx) => { + if (yamlChildMatchesPredicate(child, pred)) { + walkYaml(child, subs, i + 1, [...walked, { slot: cur.slot, value: String(idx) }], onMatch); + } + }); + } + return; + } + + // Positional tokens (`$first` / `$last` / `-N`) → resolve to a + // single concrete segment and descend as if the pattern had carried + // that literal. Walker then continues with the concrete value, so + // emitted paths carry the resolved index/key. + if (isPositionalSeg(cur.value)) { + const concrete = positionalForYamlNode(node, cur.value); + if (concrete === null) {return;} + cur = { slot: cur.slot, value: concrete }; + } + + // `**` — match 0 or more segments. + if (cur.value === WILDCARD_RECURSIVE) { + // 0-match: skip past `**`, retry pattern at this node. + walkYaml(node, subs, i + 1, walked, onMatch); + // 1+ match: descend one step, stay on this `**` slot. + if (isMap(node)) { + for (const pair of (node as { items: Pair[] }).items) { + const k = isScalar(pair.key) ? String(pair.key.value) : String(pair.key); + walkYaml(pair.value as Node, subs, i, [...walked, { slot: cur.slot, value: quoteSeg(k) }], onMatch); + } + } else if (isSeq(node)) { + (node as { items: Node[] }).items.forEach((child, idx) => { + walkYaml(child, subs, i, [...walked, { slot: cur.slot, value: String(idx) }], onMatch); + }); + } + return; + } + + // `*` — match exactly one segment. + if (cur.value === WILDCARD_SINGLE) { + if (isMap(node)) { + for (const pair of (node as { items: Pair[] }).items) { + const k = isScalar(pair.key) ? String(pair.key.value) : String(pair.key); + walkYaml(pair.value as Node, subs, i + 1, [...walked, { slot: cur.slot, value: quoteSeg(k) }], onMatch); + } + } else if (isSeq(node)) { + (node as { items: Node[] }).items.forEach((child, idx) => { + walkYaml(child, subs, i + 1, [...walked, { slot: cur.slot, value: String(idx) }], onMatch); + }); + } + return; + } + + // Literal — descend exactly into the matching key/index. + // Literal lookup — quoted segments unwrap to their literal key form. + const literal = isQuotedSeg(cur.value) ? unquoteSeg(cur.value) : cur.value; + if (isMap(node)) { + const pair = (node as { items: Pair[] }).items.find((p) => { + const k = isScalar(p.key) ? String(p.key.value) : String(p.key); + return k === literal; + }); + if (pair === undefined) {return;} + walkYaml( + pair.value as Node, + subs, + i + 1, + [...walked, { slot: cur.slot, value: cur.value }], + onMatch, + ); + return; + } + if (isSeq(node)) { + const idx = Number(literal); + if (!Number.isInteger(idx) || idx < 0 || idx >= (node as { items: Node[] }).items.length) {return;} + walkYaml( + (node as { items: Node[] }).items[idx], + subs, + i + 1, + [...walked, { slot: cur.slot, value: cur.value }], + onMatch, + ); + return; + } +} + +// ---------- JSONC walker --------------------------------------------------- + +function walkJsonc( + node: JsoncValue, + subs: readonly PatternSub[], + i: number, + walked: readonly SlotSub[], + onMatch: (subs: readonly SlotSub[]) => void, +): void { + if (walked.length > MAX_TRAVERSAL_DEPTH) { + throw new OcPathError( + `findOcPaths exceeded MAX_TRAVERSAL_DEPTH (${MAX_TRAVERSAL_DEPTH}) — likely a pathological pattern`, + '', + 'OC_PATH_DEPTH_EXCEEDED', + ); + } + if (i >= subs.length) { + onMatch(walked); + return; + } + let cur = subs[i]; + + if (isUnionSeg(cur.value)) { + const alts = parseUnionSeg(cur.value); + if (alts === null) {return;} + for (const alt of alts) { + const altSubs = subs.slice(); + altSubs[i] = { slot: cur.slot, value: alt }; + walkJsonc(node, altSubs, i, walked, onMatch); + } + return; + } + + if (isPredicateSeg(cur.value)) { + const pred = parsePredicateSeg(cur.value); + if (pred === null) {return;} + if (node.kind === 'object') { + for (const e of node.entries) { + if (jsoncChildMatchesPredicate(e.value, pred)) { + walkJsonc(e.value, subs, i + 1, [...walked, { slot: cur.slot, value: quoteSeg(e.key) }], onMatch); + } + } + } else if (node.kind === 'array') { + node.items.forEach((child, idx) => { + if (jsoncChildMatchesPredicate(child, pred)) { + walkJsonc(child, subs, i + 1, [...walked, { slot: cur.slot, value: String(idx) }], onMatch); + } + }); + } + return; + } + + if (isPositionalSeg(cur.value)) { + const concrete = positionalForJsoncNode(node, cur.value); + if (concrete === null) {return;} + cur = { slot: cur.slot, value: concrete }; + } + + if (cur.value === WILDCARD_RECURSIVE) { + walkJsonc(node, subs, i + 1, walked, onMatch); + if (node.kind === 'object') { + for (const e of node.entries) { + walkJsonc(e.value, subs, i, [...walked, { slot: cur.slot, value: quoteSeg(e.key) }], onMatch); + } + } else if (node.kind === 'array') { + node.items.forEach((child, idx) => { + walkJsonc(child, subs, i, [...walked, { slot: cur.slot, value: String(idx) }], onMatch); + }); + } + return; + } + + if (cur.value === WILDCARD_SINGLE) { + if (node.kind === 'object') { + for (const e of node.entries) { + walkJsonc(e.value, subs, i + 1, [...walked, { slot: cur.slot, value: quoteSeg(e.key) }], onMatch); + } + } else if (node.kind === 'array') { + node.items.forEach((child, idx) => { + walkJsonc(child, subs, i + 1, [...walked, { slot: cur.slot, value: String(idx) }], onMatch); + }); + } + return; + } + + if (node.kind === 'object') { + // `cur.value` may be a quoted segment (e.g. `"a/b"`); AST entry + // keys are already unquoted. Strip the quotes before comparing + // so the find-expansion walker matches `resolveJsoncOcPath`'s + // unquoting behavior — closes the resolve-vs-find asymmetry + // flagged on PR #78678. + const lookupKey = isQuotedSeg(cur.value) ? unquoteSeg(cur.value) : cur.value; + const e = node.entries.find((entry) => entry.key === lookupKey); + if (e === undefined) {return;} + walkJsonc(e.value, subs, i + 1, [...walked, { slot: cur.slot, value: cur.value }], onMatch); + return; + } + if (node.kind === 'array') { + const idx = Number(cur.value); + if (!Number.isInteger(idx) || idx < 0 || idx >= node.items.length) {return;} + walkJsonc(node.items[idx], subs, i + 1, [...walked, { slot: cur.slot, value: cur.value }], onMatch); + } +} + +// ---------- JSONL walker --------------------------------------------------- + +function walkJsonl( + ast: JsonlAst, + subs: readonly PatternSub[], + i: number, + walked: readonly SlotSub[], + onMatch: (subs: readonly SlotSub[]) => void, +): void { + // Bound recursion at the line-enumeration layer — without this guard, + // a `**` pattern over a 100k-line forensic log dispatches per-line + // walkJsonc (which has its own guard) but the JSONL outer driver has + // no per-walker depth bound. JSONL session logs are exactly the kind + // of file that grows unbounded in production (replay, audit), so + // defense-in-depth at the outer layer mirrors the yaml/jsonc walkers. + if (walked.length > MAX_TRAVERSAL_DEPTH) { + throw new OcPathError( + `findOcPaths exceeded MAX_TRAVERSAL_DEPTH (${MAX_TRAVERSAL_DEPTH}) — likely a pathological JSONL pattern`, + '', + 'OC_PATH_DEPTH_EXCEEDED', + ); + } + if (i >= subs.length) { + onMatch(walked); + return; + } + const cur = subs[i]; + + // Line-address slot — `*` enumerates every value line; `**` adds a + // 0-segment skip in addition to enumerating; literal matches `Lnnn` + // / `$first` / `$last` / `-N` (negative index); union matches each + // alternative; predicate filters by per-line top-level field. + // The first sub MUST address a line; deeper subs walk inside the + // line's JSON value. + if (walked.length === 0) { + if (cur.value === WILDCARD_RECURSIVE) { + // 0-match has no meaning for jsonl (the file root has no leaves); + // every remaining match must include a line. So skip the 0-match + // expansion and only enumerate. + forEachValueLine(ast, (l, addr) => { + walkJsonlInsideLine(l, subs, i, [{ slot: cur.slot, value: addr }], onMatch); + }); + return; + } + if (cur.value === WILDCARD_SINGLE) { + forEachValueLine(ast, (l, addr) => { + walkJsonlInsideLine(l, subs, i + 1, [{ slot: cur.slot, value: addr }], onMatch); + }); + return; + } + if (isUnionSeg(cur.value)) { + // `{L1,L2}` enumerates each alternative independently — yaml / + // jsonc walkers handle union uniformly at every slot, so the + // jsonl line slot must too. Each alternative goes through the + // same single-line resolution as a literal `Lnnn` / `$first` / + // `-N` would (so unions of positional tokens, e.g. `{L1,$last}`, + // work as expected). + const alts = parseUnionSeg(cur.value); + if (alts === null) {return;} + for (const alt of alts) { + const line = pickLine(ast, alt); + if (line === null) {continue;} + const concreteAddr = line.kind === 'value' ? `L${line.line}` : alt; + walkJsonlInsideLine(line, subs, i + 1, [{ slot: cur.slot, value: concreteAddr }], onMatch); + } + return; + } + if (isPredicateSeg(cur.value)) { + // `[event=foo]` filters value lines by the predicate's key/op + // applied to the top-level field of each line's parsed JSON. + // Parsing is structural (no recursion into nested children) — + // a predicate inside a line's body uses the same syntax inside + // the JSONC walker's predicate path. + const pred = parsePredicateSeg(cur.value); + if (pred === null) {return;} + forEachValueLine(ast, (l, addr) => { + if (l.kind !== 'value') {return;} + const actual = topLevelLeafText(l.value, pred.key); + if (!evaluatePredicate(actual, pred)) {return;} + walkJsonlInsideLine(l, subs, i + 1, [{ slot: cur.slot, value: addr }], onMatch); + }); + return; + } + // Positional / Lnnn / literal — pickLine handles all single-line + // addressing tokens. The emitted concrete address is `Lnnn` (the + // canonical line-address form) regardless of how it was looked up. + const line = pickLine(ast, cur.value); + if (line === null) {return;} + const concreteAddr = line.kind === 'value' ? `L${line.line}` : cur.value; + walkJsonlInsideLine(line, subs, i + 1, [{ slot: cur.slot, value: concreteAddr }], onMatch); + return; + } +} + +/** + * Stringify the top-level field's leaf value for predicate evaluation + * at the jsonl line slot. Only string/number/boolean/null leaves + * compare; nested objects/arrays return `null` (predicate doesn't + * match a non-leaf sibling). + */ +function topLevelLeafText(value: JsoncValue, key: string): string | null { + if (value.kind !== 'object') {return null;} + const entry = value.entries.find((e) => e.key === key); + if (entry === undefined) {return null;} + const v = entry.value; + if (v.kind === 'string') {return v.value;} + if (v.kind === 'number' || v.kind === 'boolean') {return String(v.value);} + if (v.kind === 'null') {return null;} + return null; +} + +function walkJsonlInsideLine( + line: JsonlLine, + subs: readonly PatternSub[], + i: number, + walked: readonly SlotSub[], + onMatch: (subs: readonly SlotSub[]) => void, +): void { + // Mirror the outer guard so a hostile pattern that bypasses the + // top-of-walkJsonl path (e.g., reached via direct call from a future + // helper) still lands on the depth bound. walkJsonc inside has its + // own bound, but the slot-sub list extends across both layers — the + // depth check must consider the full `walked` history. + if (walked.length > MAX_TRAVERSAL_DEPTH) { + throw new OcPathError( + `findOcPaths exceeded MAX_TRAVERSAL_DEPTH (${MAX_TRAVERSAL_DEPTH}) — likely a pathological JSONL pattern`, + '', + 'OC_PATH_DEPTH_EXCEEDED', + ); + } + if (i >= subs.length) { + onMatch(walked); + return; + } + if (line.kind !== 'value') {return;} + walkJsonc(line.value, subs, i, walked, onMatch); +} + +function forEachValueLine( + ast: JsonlAst, + visit: (line: JsonlLine, addr: string) => void, +): void { + for (const l of ast.lines) { + if (l.kind === 'value') {visit(l, `L${l.line}`);} + } +} + +function pickLine(ast: JsonlAst, addr: string): JsonlLine | null { + if (addr === '$last') { + for (let i = ast.lines.length - 1; i >= 0; i--) { + const l = ast.lines[i]; + if (l !== undefined && l.kind === 'value') {return l;} + } + return null; + } + if (addr === '$first') { + for (const l of ast.lines) { + if (l.kind === 'value') {return l;} + } + return null; + } + if (/^-\d+$/.test(addr)) { + const valueLines = ast.lines.filter((l): l is Extract => l.kind === 'value'); + const n = valueLines.length + Number(addr); + return n >= 0 && n < valueLines.length ? valueLines[n] : null; + } + const m = /^L(\d+)$/.exec(addr); + if (m === null || m[1] === undefined) {return null;} + const target = Number(m[1]); + for (const l of ast.lines) { + if (l.line === target) {return l;} + } + return null; +} + +// Helpers shared by the walkers above. +function positionalForYamlNode(node: Node, seg: string): string | null { + if (isMap(node)) { + const pairs = (node as { items: Pair[] }).items; + const keys = pairs.map((p) => String(isScalar(p.key) ? p.key.value : p.key)); + return resolvePositionalSeg(seg, { indexable: false, size: keys.length, keys }); + } + if (isSeq(node)) { + const items = (node as { items: Node[] }).items; + return resolvePositionalSeg(seg, { indexable: true, size: items.length }); + } + return null; +} + +function positionalForJsoncNode(node: JsoncValue, seg: string): string | null { + if (node.kind === 'object') { + const keys = node.entries.map((e) => e.key); + return resolvePositionalSeg(seg, { indexable: false, size: keys.length, keys }); + } + if (node.kind === 'array') { + return resolvePositionalSeg(seg, { indexable: true, size: node.items.length }); + } + return null; +} + +// Predicate-evaluation helpers: look up `node[key]` and compare its +// string-coerced leaf value via `evaluatePredicate`. Used by +// `[keyvalue]` filtering in find walkers. +function yamlChildMatchesPredicate(node: Node | null, pred: PredicateSpec): boolean { + return evaluatePredicate(yamlChildFieldText(node, pred.key), pred); +} + +function yamlChildFieldText(node: Node | null, key: string): string | null { + if (node === null) {return null;} + if (!isMap(node)) {return null;} + for (const pair of (node as { items: Pair[] }).items) { + const k = isScalar(pair.key) ? String(pair.key.value) : String(pair.key); + if (k !== key) {continue;} + const v = pair.value; + if (isScalar(v)) { + const sv = v.value; + if (sv === null) {return 'null';} + if (typeof sv === 'string') {return sv;} + if (typeof sv === 'number' || typeof sv === 'boolean') {return String(sv);} + return JSON.stringify(sv) ?? 'null'; + } + return null; + } + return null; +} + +function jsoncChildMatchesPredicate(node: JsoncValue, pred: PredicateSpec): boolean { + return evaluatePredicate(jsoncChildFieldText(node, pred.key), pred); +} + +function jsoncChildFieldText(node: JsoncValue, key: string): string | null { + if (node.kind !== 'object') {return null;} + const e = node.entries.find((entry) => entry.key === key); + if (e === undefined) {return null;} + const v = e.value; + if (v.kind === 'string') {return v.value;} + if (v.kind === 'number') {return String(v.value);} + if (v.kind === 'boolean') {return String(v.value);} + if (v.kind === 'null') {return 'null';} + return null; +} + +// ---------- Markdown walker ----------------------------------------------- + +function walkMd( + ast: MdAst, + subs: readonly PatternSub[], + i: number, + walked: readonly SlotSub[], + onMatch: (subs: readonly SlotSub[]) => void, +): void { + if (i >= subs.length) { + onMatch(walked); + return; + } + const cur = subs[i]; + + // Frontmatter addressing: literal `[frontmatter]` in section slot. + if (walked.length === 0 && cur.value === '[frontmatter]') { + // Next sub addresses a frontmatter key. + const next = subs[i + 1]; + if (next === undefined) { + onMatch([{ slot: cur.slot, value: cur.value }]); + return; + } + if (next.value === WILDCARD_SINGLE || next.value === WILDCARD_RECURSIVE) { + for (const fm of ast.frontmatter) { + onMatch([ + { slot: cur.slot, value: cur.value }, + { slot: next.slot, value: fm.key }, + ]); + } + return; + } + // Same quote-aware lookup as the JSONC walker — frontmatter + // entry keys are unquoted in the AST, so a quoted-segment path + // segment must be unquoted before comparing. + const fmKey = isQuotedSeg(next.value) ? unquoteSeg(next.value) : next.value; + const entry = ast.frontmatter.find((e) => e.key === fmKey); + if (entry === undefined) {return;} + onMatch([ + { slot: cur.slot, value: cur.value }, + { slot: next.slot, value: next.value }, + ]); + return; + } + + // Section slot first. + if (walked.length === 0) { + if (cur.value === WILDCARD_SINGLE || cur.value === WILDCARD_RECURSIVE) { + for (const block of ast.blocks) { + walkMdInsideBlock( + block, + ast, + subs, + i + 1, + [{ slot: cur.slot, value: block.slug }], + onMatch, + ); + // `**` retain-i branch: in addition to descending with `**` + // consumed (i + 1), also descend with `**` still active (i) + // so the next sub can match deeper. Without this, md `**` + // semantics diverged from yaml/jsonc — `oc://X.md/**/value` + // only matched the immediate-block layer and silently missed + // deeper hierarchies (cross-kind asymmetry — same lint rule + // worked on yaml but produced 0 matches on md). + if (cur.value === WILDCARD_RECURSIVE) { + walkMdInsideBlock( + block, + ast, + subs, + i, + [{ slot: cur.slot, value: block.slug }], + onMatch, + ); + } + } + // `**` 0-match: emit at root if any. + if (cur.value === WILDCARD_RECURSIVE && i + 1 >= subs.length) { + onMatch([]); + } + return; + } + const targetSlug = cur.value.toLowerCase(); + const block = ast.blocks.find((b) => b.slug === targetSlug); + if (block === undefined) {return;} + walkMdInsideBlock( + block, + ast, + subs, + i + 1, + [{ slot: cur.slot, value: cur.value }], + onMatch, + ); + } +} + +function walkMdInsideBlock( + block: { readonly items: readonly { readonly slug: string; readonly kv?: { readonly key: string; readonly value: string } }[] }, + ast: MdAst, + subs: readonly PatternSub[], + i: number, + walked: readonly SlotSub[], + onMatch: (subs: readonly SlotSub[]) => void, +): void { + if (i >= subs.length) { + onMatch(walked); + return; + } + const cur = subs[i]; + + // Item slot. + if (cur.value === WILDCARD_SINGLE || cur.value === WILDCARD_RECURSIVE) { + // Disambiguate duplicate slugs via `#N` ordinal addressing so each + // matched path round-trips through `resolveOcPath` to its own item. + const slugCounts = new Map(); + for (const item of block.items) { + slugCounts.set(item.slug, (slugCounts.get(item.slug) ?? 0) + 1); + } + block.items.forEach((item, idx) => { + const seg = (slugCounts.get(item.slug) ?? 0) > 1 ? `#${idx}` : item.slug; + walkMdInsideItem( + item, + ast, + subs, + i + 1, + [...walked, { slot: cur.slot, value: seg }], + onMatch, + ); + }); + if (cur.value === WILDCARD_RECURSIVE && i + 1 >= subs.length) { + onMatch(walked); + } + return; + } + // Ordinal `#N` and positional `$first`/`$last`/`-N` short-circuit the + // slug lookup — the resolver handles them, so the find walker just + // descends into the appropriate item. + let item: { readonly slug: string; readonly kv?: { readonly key: string; readonly value: string } } | undefined; + if (isOrdinalSeg(cur.value)) { + const n = parseOrdinalSeg(cur.value); + if (n === null || n < 0 || n >= block.items.length) {return;} + item = block.items[n]; + } else if (isPositionalSeg(cur.value)) { + const concrete = resolvePositionalSeg(cur.value, { + indexable: true, + size: block.items.length, + }); + if (concrete === null) {return;} + item = block.items[Number(concrete)]; + } else { + const targetItemSlug = cur.value.toLowerCase(); + item = block.items.find((it) => it.slug === targetItemSlug); + } + if (item === undefined) {return;} + walkMdInsideItem(item, ast, subs, i + 1, [...walked, { slot: cur.slot, value: cur.value }], onMatch); +} + +function walkMdInsideItem( + item: { readonly kv?: { readonly key: string; readonly value: string } }, + _ast: MdAst, + subs: readonly PatternSub[], + i: number, + walked: readonly SlotSub[], + onMatch: (subs: readonly SlotSub[]) => void, +): void { + if (i >= subs.length) { + onMatch(walked); + return; + } + const cur = subs[i]; + // Field slot — addresses kv.key (case-insensitive). + if (item.kv === undefined) {return;} + if (cur.value === WILDCARD_SINGLE || cur.value === WILDCARD_RECURSIVE) { + onMatch([...walked, { slot: cur.slot, value: item.kv.key }]); + return; + } + if (item.kv.key.toLowerCase() !== cur.value.toLowerCase()) {return;} + onMatch([...walked, { slot: cur.slot, value: cur.value }]); +} + diff --git a/src/oc-path/index.ts b/src/oc-path/index.ts new file mode 100644 index 00000000000..b7b3410be24 --- /dev/null +++ b/src/oc-path/index.ts @@ -0,0 +1,133 @@ +/** + * `@openclaw/oc-path` — substrate package public surface. + * + * **Strategic frame**: workspace files are byte-stable and addressable + * via the `oc://` scheme — the addressing scheme is universal across + * file kinds (md / jsonc / jsonl / yaml). Encoding (parse/emit) is + * per-kind; addressing (resolve/set) is universal. + * + * **Public verbs**: + * - One `setOcPath(ast, path, value)` — universal, kind-dispatched + * - One `resolveOcPath(ast, path)` — universal, kind-dispatched + * - Per-kind `parseXxx` / `emitXxx` (parsing IS per-kind by nature) + * + * `setOcPath` accepts a string value; the substrate coerces based on + * AST shape at the path location. The OcPath syntax encodes the + * operation: plain path = leaf set, `+` suffix = insertion. + * + * Per-kind set/resolve helpers exist as internal implementation; they + * aren't on the public surface. Callers don't need to pick a kind — + * the AST carries its `kind` discriminator and the universal verbs + * dispatch internally. + * + * @module @openclaw/oc-path + */ + +/** + * SDK version this build of `@openclaw/oc-path` exposes. Bumped on + * every breaking change to AST shape, OcPath syntax, or universal + * verbs (`resolveOcPath`, `setOcPath`, `findOcPaths`, `parseXxx`, + * `emitXxx`). Plugin packs that depend on the substrate declare the + * version they were authored against and the host warns on mismatch. + */ +export const SDK_VERSION = '0.1.0'; + +// AST types +export type { + AstBlock, + AstCodeBlock, + AstItem, + AstTable, + Diagnostic, + FrontmatterEntry, + ParseResult, + MdAst, +} from './ast.js'; +export type { JsoncAst, JsoncEntry, JsoncValue } from './jsonc/ast.js'; +export type { JsonlAst, JsonlLine } from './jsonl/ast.js'; +export type { YamlAst } from './yaml/ast.js'; + +// OcPath types + parser/formatter +export type { + OcPath, + PathSegmentLayout, + PositionalContainer, + PredicateSpec, +} from './oc-path.js'; +// Public OcPath surface — what plugin authors and callers use. +export { + MAX_PATH_LENGTH, + MAX_SUB_SEGMENTS_PER_SLOT, + MAX_TRAVERSAL_DEPTH, + OcPathError, + POS_FIRST, + POS_LAST, + WILDCARD_RECURSIVE, + WILDCARD_SINGLE, + formatOcPath, + hasWildcard, + isOrdinalSeg, + isPattern, + isPositionalSeg, + isPredicateSeg, + isQuotedSeg, + isUnionSeg, + isValidOcPath, + parseOcPath, +} from './oc-path.js'; + +// `evaluatePredicate`, `getPathLayout`, `parseOrdinalSeg`, +// `parsePredicateSeg`, `parseUnionSeg`, `quoteSeg`, `unquoteSeg`, +// `repackPath`, `resolvePositionalSeg`, `splitRespectingBrackets` +// were exported from earlier prototypes. They're substrate-internal +// helpers — used by `find.ts`, the per-kind resolvers, and the parser +// itself, but not part of the upstream-portable public surface. +// Callers that need their behavior should round-trip through +// `parseOcPath` / `formatOcPath` / `findOcPaths`. + +// Per-kind parse / emit (encoding is genuinely per-kind) +export { parseMd } from './parse.js'; +export { parseJsonc } from './jsonc/parse.js'; +export { parseJsonl } from './jsonl/parse.js'; +export { parseYaml } from './yaml/parse.js'; +export type { JsoncParseResult } from './jsonc/parse.js'; +export type { JsonlParseResult } from './jsonl/parse.js'; +export type { YamlParseResult } from './yaml/parse.js'; + +export type { EmitOptions } from './emit.js'; +export { emitMd, markDirty } from './emit.js'; +export type { JsoncEmitOptions } from './jsonc/emit.js'; +export { emitJsonc } from './jsonc/emit.js'; +export type { JsonlEmitOptions } from './jsonl/emit.js'; +export { emitJsonl } from './jsonl/emit.js'; +export type { YamlEmitOptions } from './yaml/emit.js'; +export { emitYaml } from './yaml/emit.js'; + +// Universal verbs — the only public resolve / set on the surface. +export type { + OcAst, + OcMatch, + LeafType, + NodeDescriptor, + ContainerKind, + SetResult, + InsertionInfo, +} from './universal.js'; +export { resolveOcPath, setOcPath, detectInsertion } from './universal.js'; + +// Multi-match search verb — the wildcard-accepting cousin of resolve. +export type { OcPathMatch } from './find.js'; +export { findOcPaths } from './find.js'; + +// Cross-kind utility — filename → kind hint. +export { inferKind } from './dispatch.js'; +export type { OcKind } from './dispatch.js'; + +// Sentinel guard +export { OcEmitSentinelError, REDACTED_SENTINEL, guardSentinel } from './sentinel.js'; + +// Slug helper +export { slugify } from './slug.js'; + +// Workspace manifest is a separate concern (filesystem classifier); +// it's not part of this PR's scope. diff --git a/src/oc-path/jsonc/ast.ts b/src/oc-path/jsonc/ast.ts new file mode 100644 index 00000000000..d7343ef3d9d --- /dev/null +++ b/src/oc-path/jsonc/ast.ts @@ -0,0 +1,49 @@ +/** + * JSONC AST types — the addressing skeleton for JSONC files (gateway + * config, plugin manifests, JSON-with-comments artifacts). + * + * **Per-kind discriminator**: every AST in this substrate carries a + * `kind` field. The OcPath resolver dispatches on `kind` so md / jsonc + * / json / jsonl can share one resolver entry point. + * + * **Byte-fidelity**: `raw` is preserved on the root for round-trip + * emit. The minimal prototype parser doesn't preserve every formatting + * detail in the structural tree — for production, a fuller + * comment-preserving parser ports from `openclaw-workspace`. + * + * @module @openclaw/oc-path/jsonc/ast + */ + +/** The root JSONC AST. `raw` round-trips byte-identical via emit. */ +export interface JsoncAst { + readonly kind: 'jsonc'; + readonly raw: string; + /** Parsed value tree, or `null` if the file is empty / unparseable. */ + readonly root: JsoncValue | null; +} + +/** + * A JSONC value node — discriminated union over the standard JSON kinds. + * + * `line` is the 1-based line where the value's literal token starts + * (the `{`, `[`, opening `"`, or first digit). The parser always sets + * it; synthetic constructions (mutations, fixtures) may omit it and + * consumers fall back to 1 / parent line. Optional rather than + * required so test fixtures and externally-constructed values stay + * concise. + */ +export type JsoncValue = + | { readonly kind: 'object'; readonly entries: readonly JsoncEntry[]; readonly line?: number } + | { readonly kind: 'array'; readonly items: readonly JsoncValue[]; readonly line?: number } + | { readonly kind: 'string'; readonly value: string; readonly line?: number } + | { readonly kind: 'number'; readonly value: number; readonly line?: number } + | { readonly kind: 'boolean'; readonly value: boolean; readonly line?: number } + | { readonly kind: 'null'; readonly line?: number }; + +/** Object key/value entry. Keys are unquoted; quoting happens at emit. */ +export interface JsoncEntry { + readonly key: string; + readonly value: JsoncValue; + /** 1-based line number of the key. */ + readonly line: number; +} diff --git a/src/oc-path/jsonc/edit.ts b/src/oc-path/jsonc/edit.ts new file mode 100644 index 00000000000..a05e3109575 --- /dev/null +++ b/src/oc-path/jsonc/edit.ts @@ -0,0 +1,184 @@ +/** + * Mutate a `JsoncAst` at an OcPath. Returns a new AST with the value + * replaced; the original AST is unchanged. + * + * **Why immutable**: callers can hold the pre-edit AST for diffing / + * audit while applying the edit. Plays well with LKG observe (compare + * pre vs post fingerprints). + * + * # Known limitation: trivia loss after edit (tracked as follow-up) + * + * `setJsoncOcPath` rebuilds `ast.raw` via `emitJsonc({mode:'render'})`, + * which RE-SERIALIZES the structural tree. **Comments, blank lines, + * key-order whitespace, and trailing-comma style are dropped** in the + * post-edit `raw`. This is the cost of edit-then-emit in the prototype. + * + * The byte-fidelity guarantee in this PR applies to the **read path** + * (`parseJsonc → emitJsonc` round-trip) — that's exercised by the + * `jsonc-byte-fidelity` scenario test and holds byte-identical for + * arbitrary input. The **write path** (`parseJsonc → setJsoncOcPath → + * emitJsonc`) loses trivia. + * + * Why we ship as-is: a comment-preserving editor needs the parser to + * track byte offsets per node, plus splice-aware mutation logic. That + * is its own lift. The follow-up adds parser offsets and a byte-splice + * editor; existing callers that need post-edit byte fidelity should + * patch `raw` directly until then. + * + * @module @openclaw/oc-path/jsonc/edit + */ + +import type { OcPath } from '../oc-path.js'; +import { + isPositionalSeg, + isQuotedSeg, + resolvePositionalSeg, + splitRespectingBrackets, + unquoteSeg, +} from '../oc-path.js'; +import type { JsoncAst, JsoncEntry, JsoncValue } from './ast.js'; +import { emitJsonc } from './emit.js'; + +export type JsoncEditResult = + | { readonly ok: true; readonly ast: JsoncAst } + | { readonly ok: false; readonly reason: 'unresolved' | 'no-root' }; + +/** + * Replace the value at `path` with `newValue`. Returns the new AST or + * a structured failure reason. Numeric segments index into arrays. + */ +export function setJsoncOcPath( + ast: JsoncAst, + path: OcPath, + newValue: JsoncValue, +): JsoncEditResult { + if (ast.root === null) {return { ok: false, reason: 'no-root' };} + + // Use bracket/brace/quote-aware split so that quoted segments + // (e.g. `"anthropic/claude-opus-4-7"`) — which can contain dots, + // slashes, and other punctuation verbatim — survive as one segment. + // Plain `.split('.')` would shred them and break the round-trip with + // `resolveJsoncOcPath`, which already respects quoting. Closes the + // resolve-vs-edit asymmetry flagged on PR #78678. + const segments: string[] = []; + if (path.section !== undefined) {segments.push(...splitRespectingBrackets(path.section, '.'));} + if (path.item !== undefined) {segments.push(...splitRespectingBrackets(path.item, '.'));} + if (path.field !== undefined) {segments.push(...splitRespectingBrackets(path.field, '.'));} + + // Empty path — replace the root. + if (segments.length === 0) { + const next = { ...ast, root: newValue }; + return { ok: true, ast: rebuildRaw(next, path.file) }; + } + + const replaced = replaceAt(ast.root, segments, 0, newValue); + if (replaced === null) {return { ok: false, reason: 'unresolved' };} + const next = { ...ast, root: replaced }; + return { ok: true, ast: rebuildRaw(next, path.file) }; +} + +function replaceAt( + current: JsoncValue, + segments: readonly string[], + i: number, + newValue: JsoncValue, +): JsoncValue | null { + const seg = segments[i]; + if (seg === undefined) {return newValue;} + if (seg.length === 0) {return null;} + + if (current.kind === 'object') { + // Resolve positional tokens ($first / $last) against the entries + // ordered key list before any literal-key comparison. Without + // this, `oc://x.jsonc/agents/$first/alias` would look for a key + // literally named `$first` and miss the actual first agent. + // Negative indices (-N) don't apply to keyed containers and + // resolvePositionalSeg returns null in that case → unresolved. + let segNorm: string = seg; + if (isPositionalSeg(seg)) { + const resolved = resolvePositionalSeg(seg, { + indexable: false, + size: current.entries.length, + keys: current.entries.map((e) => e.key), + }); + if (resolved === null) {return null;} + segNorm = resolved; + } + // Quoted segments (e.g. `"anthropic/claude-opus-4-7"`) carry the + // raw bytes verbatim; the entry key in the AST is unquoted, so + // strip the surrounding quotes before comparing. Bare segments + // pass through unchanged. + const lookupKey = isQuotedSeg(segNorm) ? unquoteSeg(segNorm) : segNorm; + const idx = current.entries.findIndex((e) => e.key === lookupKey); + if (idx === -1) {return null;} + const child = current.entries[idx]; + if (child === undefined) {return null;} + const replacedChild = replaceAt(child.value, segments, i + 1, newValue); + if (replacedChild === null) {return null;} + const newEntry: JsoncEntry = { ...child, value: replacedChild }; + const newEntries = current.entries.slice(); + newEntries[idx] = newEntry; + return { + kind: 'object', + entries: newEntries, + ...(current.line !== undefined ? { line: current.line } : {}), + }; + } + + if (current.kind === 'array') { + // Resolve positional tokens ($first / $last / -N) against the + // array's size before the numeric coercion below; without this + // `Number('$last')` is NaN and the path silently unresolves. + let segNorm: string = seg; + if (isPositionalSeg(seg)) { + const resolved = resolvePositionalSeg(seg, { + indexable: true, + size: current.items.length, + }); + if (resolved === null) {return null;} + segNorm = resolved; + } + const idx = Number(segNorm); + if (!Number.isInteger(idx) || idx < 0 || idx >= current.items.length) {return null;} + const child = current.items[idx]; + if (child === undefined) {return null;} + const replacedChild = replaceAt(child, segments, i + 1, newValue); + if (replacedChild === null) {return null;} + const newItems = current.items.slice(); + newItems[idx] = replacedChild; + return { + kind: 'array', + items: newItems, + ...(current.line !== undefined ? { line: current.line } : {}), + }; + } + + // Primitive — can't descend. + return null; +} + +/** + * Re-render `ast.raw` from the (possibly mutated) tree. + * + * **Trivia is dropped** — see the module-level "Known limitation" + * section above. Subsequent `emitJsonc(returnedAst)` returns these + * synthesized bytes, NOT the original byte-fidelity input. + * + * Production-quality fix: parser tracks byte offsets per node; + * `setJsoncOcPath` does a `raw.slice(0,start) + newBytes + raw.slice(end)` + * splice, leaving trivia untouched. Tracked as PR follow-up. + */ +function rebuildRaw(ast: JsoncAst, fileName?: string): JsoncAst { + // Plumb fileName so render-mode emit's sentinel guard reports the + // file context (`oc://gateway.jsonc/[path]`) instead of the empty + // fallback (`oc:///[path]`). The throw originates here when a + // caller-injected sentinel reaches a leaf — without the file + // context, forensics + audit pipelines see "rejected somewhere" + // with no way to identify the file. + const opts = fileName !== undefined + ? { mode: 'render' as const, fileNameForGuard: fileName } + : { mode: 'render' as const }; + const next: JsoncAst = { kind: 'jsonc', raw: '', root: ast.root }; + const rendered = emitJsonc(next, opts); + return { ...ast, raw: rendered }; +} diff --git a/src/oc-path/jsonc/emit.ts b/src/oc-path/jsonc/emit.ts new file mode 100644 index 00000000000..75a5f354f75 --- /dev/null +++ b/src/oc-path/jsonc/emit.ts @@ -0,0 +1,99 @@ +/** + * Emit a `JsoncAst` to bytes. + * + * **Round-trip mode (default)** returns `ast.raw` verbatim — this + * preserves comments, formatting, and trailing whitespace exactly. + * + * **Sentinel-guard policy**: + * + * - Round-trip echoes `ast.raw` *without* scanning for the redaction + * sentinel. Bytes that came in via `parseJsonc` are trusted: a + * workspace file legitimately containing the literal + * `__OPENCLAW_REDACTED__` (in a code-block comment, in a pasted + * error log, etc.) would otherwise become a workspace-wide emit + * DoS — every `openclaw path emit FILE.jsonc` would exit non-zero, + * breaking lint round-trip rules, doctor fixers, and LKG + * fingerprinting. The substrate's contract is "no NEW sentinel + * bytes introduced via emit", not "no sentinel byte ever leaves". + * - Render mode walks every leaf and rejects sentinel-bearing leaf + * values (caller-injected sentinel via `setOcPath` lands here: + * `setJsoncOcPath` rebuilds raw via render-mode, so a leaf set to + * the sentinel by the caller is caught at the rebuild boundary + * before the raw is shipped back). + * + * Callers that want pre-existing sentinel detection (e.g., LKG + * fingerprint verification) can opt in via + * `acceptPreExistingSentinel: false`. + * + * @module @openclaw/oc-path/jsonc/emit + */ + +import { OcEmitSentinelError, REDACTED_SENTINEL } from '../sentinel.js'; +import type { JsoncAst, JsoncValue } from './ast.js'; + +export interface JsoncEmitOptions { + readonly mode?: 'roundtrip' | 'render'; + readonly fileNameForGuard?: string; + /** + * When `false`, round-trip mode also scans `ast.raw` for the + * redaction sentinel and throws `OcEmitSentinelError` if found. + * Default `true` — round-trip trusts parsed bytes (see policy + * comment above). Render mode always scans leaves regardless. + */ + readonly acceptPreExistingSentinel?: boolean; +} + +export function emitJsonc(ast: JsoncAst, opts: JsoncEmitOptions = {}): string { + const mode = opts.mode ?? 'roundtrip'; + const guardPath = opts.fileNameForGuard ? `oc://${opts.fileNameForGuard}` : 'oc://'; + const acceptPreExisting = opts.acceptPreExistingSentinel ?? true; + + if (mode === 'roundtrip') { + if (!acceptPreExisting && ast.raw.includes(REDACTED_SENTINEL)) { + throw new OcEmitSentinelError(`${guardPath}/[raw]`); + } + return ast.raw; + } + + // Render mode — synthesize JSON from the structural tree (loses + // comments). Walk every leaf string for sentinel detection so a + // caller-injected sentinel via setOcPath is rejected. + if (ast.root === null) {return '';} + return renderValue(ast.root, guardPath, []); +} + +function renderValue(value: JsoncValue, guardPath: string, walked: readonly string[]): string { + switch (value.kind) { + case 'object': { + const parts = value.entries.map( + (e) => + `${JSON.stringify(e.key)}: ${renderValue(e.value, guardPath, [...walked, e.key])}`, + ); + return `{ ${parts.join(', ')} }`; + } + case 'array': { + const parts = value.items.map((v, i) => + renderValue(v, guardPath, [...walked, String(i)]), + ); + return `[ ${parts.join(', ')} ]`; + } + case 'string': { + // Reject ANY string that contains the sentinel — embedded + // (`prefix__OPENCLAW_REDACTED__suffix`) is just as much of a + // "literal redacted token landed on disk" leak as exact-match. + // The roundtrip path uses `raw.includes()` for the same reason; + // render needs the same predicate per leaf. + if (value.value.includes(REDACTED_SENTINEL)) { + throw new OcEmitSentinelError(`${guardPath}/${walked.join('/')}`); + } + return JSON.stringify(value.value); + } + case 'number': + return String(value.value); + case 'boolean': + return String(value.value); + case 'null': + return 'null'; + } + throw new Error(`unreachable: jsonc renderValue kind`); +} diff --git a/src/oc-path/jsonc/parse.ts b/src/oc-path/jsonc/parse.ts new file mode 100644 index 00000000000..28ba56590a6 --- /dev/null +++ b/src/oc-path/jsonc/parse.ts @@ -0,0 +1,311 @@ +/** + * Minimal JSONC parser — handles JSON + line comments and block + * comments + trailing commas. Produces a structural tree for OcPath + * resolution; full byte-fidelity emit relies on `raw` on the AST root. + * + * **Prototype scope**: this parser handles the input shapes openclaw + * config files actually use. Production landing ports the full + * comment-preserving parser from `openclaw-workspace` (1248 LoC). + * + * @module @openclaw/oc-path/jsonc/parse + */ + +import type { Diagnostic } from '../ast.js'; +import type { JsoncAst, JsoncEntry, JsoncValue } from './ast.js'; + +/** + * Bound on parse-time recursion depth. Mirrors `MAX_TRAVERSAL_DEPTH` + * from oc-path; real configs don't nest beyond ~10 levels, so 256 is + * a safe ceiling. Pathological input like + * `'['.repeat(20000) + '0' + ']'.repeat(20000)` would otherwise + * trigger V8 RangeError before any structural diagnostic — the CLI + * loads attacker-supplied workspace files via `loadAst`, so this + * defense fires before raw stack overflow escapes to commander. + */ +export const MAX_PARSE_DEPTH = 256; + +export interface JsoncParseResult { + readonly ast: JsoncAst; + readonly diagnostics: readonly Diagnostic[]; +} + +class ParseDepthError extends Error { + readonly code = 'OC_JSONC_DEPTH_EXCEEDED'; + constructor(line: number) { + super(`structural depth exceeded MAX_PARSE_DEPTH (${MAX_PARSE_DEPTH}) at line ${line}`); + this.name = 'ParseDepthError'; + } +} + +class ParseState { + pos = 0; + line = 1; + + constructor(public readonly src: string) {} + + peek(): string | undefined { + return this.src[this.pos]; + } + + advance(): string | undefined { + const c = this.src[this.pos]; + this.pos++; + if (c === '\n') {this.line++;} + return c; + } + + eof(): boolean { + return this.pos >= this.src.length; + } +} + +/** + * Parse a JSONC string. Soft-error policy: doesn't throw; suspicious + * inputs surface as diagnostics. An entirely unparseable input + * produces an AST with `root: null` and an error diagnostic. + */ +export function parseJsonc(raw: string): JsoncParseResult { + const diagnostics: Diagnostic[] = []; + // Strip BOM for parsing convenience; raw is preserved on the AST. + const withoutBom = raw.startsWith('') ? raw.slice(1) : raw; + const st = new ParseState(withoutBom); + + skipWs(st); + if (st.eof()) { + return { ast: { kind: 'jsonc', raw, root: null }, diagnostics }; + } + + let root: JsoncValue | null = null; + try { + root = parseValue(st, diagnostics, 0); + skipWs(st); + if (!st.eof()) { + diagnostics.push({ + line: st.line, + message: `unexpected trailing input at offset ${st.pos}`, + severity: 'warning', + code: 'OC_JSONC_TRAILING_INPUT', + }); + } + } catch (err) { + diagnostics.push({ + line: st.line, + message: err instanceof Error ? err.message : String(err), + severity: 'error', + code: err instanceof ParseDepthError ? err.code : 'OC_JSONC_PARSE_FAILED', + }); + } + + return { ast: { kind: 'jsonc', raw, root }, diagnostics }; +} + +// ---------- internal -------------------------------------------------------- + +function skipWs(st: ParseState): void { + while (!st.eof()) { + const c = st.peek(); + if (c === ' ' || c === '\t' || c === '\n' || c === '\r') { + st.advance(); + continue; + } + if (c === '/') { + const next = st.src[st.pos + 1]; + if (next === '/') { + // Line comment — skip until newline. + while (!st.eof() && st.peek() !== '\n') {st.advance();} + continue; + } + if (next === '*') { + // Block comment — skip until closing star-slash. + st.advance(); + st.advance(); + while (!st.eof()) { + if (st.peek() === '*' && st.src[st.pos + 1] === '/') { + st.advance(); + st.advance(); + break; + } + st.advance(); + } + continue; + } + } + return; + } +} + +function parseValue(st: ParseState, diags: Diagnostic[], depth: number): JsoncValue { + // Bound recursion. Without this guard, pathological input like + // `'['.repeat(20000) + '0' + ']'.repeat(20000)` triggers V8 + // RangeError before any structural diagnostic — the CLI loads + // attacker-supplied workspace files via `loadAst`, so unbounded + // recursion would escape commander as a raw stack-overflow string. + if (depth > MAX_PARSE_DEPTH) {throw new ParseDepthError(st.line);} + skipWs(st); + const startLine = st.line; + const c = st.peek(); + if (c === '{') {return parseObject(st, diags, startLine, depth);} + if (c === '[') {return parseArray(st, diags, startLine, depth);} + if (c === '"') {return { kind: 'string', value: parseString(st), line: startLine };} + if (c === 't' || c === 'f') {return parseBoolean(st, startLine);} + if (c === 'n') {return parseNull(st, startLine);} + if (c === '-' || (c !== undefined && c >= '0' && c <= '9')) {return parseNumber(st, startLine);} + throw new Error( + `unexpected character ${JSON.stringify(c)} at line ${st.line} (offset ${st.pos})`, + ); +} + +function parseObject(st: ParseState, diags: Diagnostic[], startLine: number, depth: number): JsoncValue { + if (st.advance() !== '{') {throw new Error('expected `{`');} + const entries: JsoncEntry[] = []; + skipWs(st); + if (st.peek() === '}') { + st.advance(); + return { kind: 'object', entries, line: startLine }; + } + while (true) { + skipWs(st); + if (st.peek() !== '"') { + throw new Error(`expected string key at line ${st.line} (offset ${st.pos})`); + } + const keyLine = st.line; + const key = parseString(st); + skipWs(st); + if (st.advance() !== ':') { + throw new Error(`expected \`:\` after key at line ${st.line}`); + } + skipWs(st); + const value = parseValue(st, diags, depth + 1); + entries.push({ key, value, line: keyLine }); + skipWs(st); + const next = st.peek(); + if (next === ',') { + st.advance(); + skipWs(st); + // Trailing comma? Allow. + if (st.peek() === '}') { + st.advance(); + return { kind: 'object', entries, line: startLine }; + } + continue; + } + if (next === '}') { + st.advance(); + return { kind: 'object', entries, line: startLine }; + } + throw new Error( + `expected \`,\` or \`}\` after value at line ${st.line} (offset ${st.pos})`, + ); + } +} + +function parseArray(st: ParseState, diags: Diagnostic[], startLine: number, depth: number): JsoncValue { + if (st.advance() !== '[') {throw new Error('expected `[`');} + const items: JsoncValue[] = []; + skipWs(st); + if (st.peek() === ']') { + st.advance(); + return { kind: 'array', items, line: startLine }; + } + while (true) { + skipWs(st); + items.push(parseValue(st, diags, depth + 1)); + skipWs(st); + const next = st.peek(); + if (next === ',') { + st.advance(); + skipWs(st); + if (st.peek() === ']') { + st.advance(); + return { kind: 'array', items, line: startLine }; + } + continue; + } + if (next === ']') { + st.advance(); + return { kind: 'array', items, line: startLine }; + } + throw new Error( + `expected \`,\` or \`]\` after value at line ${st.line} (offset ${st.pos})`, + ); + } +} + +function parseString(st: ParseState): string { + if (st.advance() !== '"') {throw new Error('expected `"`');} + let out = ''; + while (!st.eof()) { + const c = st.advance(); + if (c === '"') {return out;} + if (c === '\\') { + const esc = st.advance(); + switch (esc) { + case '"': out += '"'; break; + case '\\': out += '\\'; break; + case '/': out += '/'; break; + case 'b': out += '\b'; break; + case 'f': out += '\f'; break; + case 'n': out += '\n'; break; + case 'r': out += '\r'; break; + case 't': out += '\t'; break; + case 'u': { + const hex = st.src.slice(st.pos, st.pos + 4); + if (!/^[0-9a-fA-F]{4}$/.test(hex)) { + throw new Error(`invalid unicode escape at line ${st.line}`); + } + out += String.fromCharCode(Number.parseInt(hex, 16)); + st.pos += 4; + break; + } + default: + throw new Error(`invalid escape \\${esc} at line ${st.line}`); + } + continue; + } + out += c; + } + throw new Error(`unterminated string starting at line ${st.line}`); +} + +function parseBoolean(st: ParseState, line: number): JsoncValue { + if (st.src.slice(st.pos, st.pos + 4) === 'true') { + st.pos += 4; + return { kind: 'boolean', value: true, line }; + } + if (st.src.slice(st.pos, st.pos + 5) === 'false') { + st.pos += 5; + return { kind: 'boolean', value: false, line }; + } + throw new Error(`expected true/false at line ${st.line}`); +} + +function parseNull(st: ParseState, line: number): JsoncValue { + if (st.src.slice(st.pos, st.pos + 4) === 'null') { + st.pos += 4; + return { kind: 'null', line }; + } + throw new Error(`expected null at line ${st.line}`); +} + +function parseNumber(st: ParseState, line: number): JsoncValue { + const start = st.pos; + if (st.peek() === '-') {st.advance();} + while (!st.eof() && /[0-9]/.test(st.peek() ?? '')) {st.advance();} + if (st.peek() === '.') { + st.advance(); + while (!st.eof() && /[0-9]/.test(st.peek() ?? '')) {st.advance();} + } + if (st.peek() === 'e' || st.peek() === 'E') { + st.advance(); + if (st.peek() === '+' || st.peek() === '-') {st.advance();} + while (!st.eof() && /[0-9]/.test(st.peek() ?? '')) {st.advance();} + } + const text = st.src.slice(start, st.pos); + const value = Number(text); + if (!Number.isFinite(value)) { + throw new Error(`invalid number "${text}" at line ${st.line}`); + } + return { kind: 'number', value, line }; +} + +export type { Diagnostic }; diff --git a/src/oc-path/jsonc/resolve.ts b/src/oc-path/jsonc/resolve.ts new file mode 100644 index 00000000000..2787a41ac2f --- /dev/null +++ b/src/oc-path/jsonc/resolve.ts @@ -0,0 +1,122 @@ +/** + * Resolve an `OcPath` against a `JsoncAst`. + * + * The OcPath model has 4 segments (file, section, item, field) — for + * JSONC artifacts that's not enough depth, so segments concat with `/` + * AND a section/item/field MAY contain dots (`.`) for deeper traversal. + * Both forms work: + * + * oc://config/plugins/entries/foo (segment-per-key) + * oc://config/plugins.entries.foo (dotted section) + * oc://config/plugins/entries.foo (mixed) + * + * Each segment is split on `.`, and the resulting flat list of keys + * walks the value tree from `ast.root`. Numeric segments index into + * arrays. + * + * @module @openclaw/oc-path/jsonc/resolve + */ + +import type { OcPath } from '../oc-path.js'; +import { + isPositionalSeg, + isQuotedSeg, + resolvePositionalSeg, + splitRespectingBrackets, + unquoteSeg, +} from '../oc-path.js'; +import type { JsoncAst, JsoncEntry, JsoncValue } from './ast.js'; + +export type JsoncOcPathMatch = + | { readonly kind: 'root'; readonly node: JsoncAst } + | { readonly kind: 'value'; readonly node: JsoncValue; readonly path: readonly string[] } + | { + readonly kind: 'object-entry'; + readonly node: JsoncEntry; + readonly path: readonly string[]; + }; + +/** + * Walk the JSONC tree following the OcPath. Returns the matched node + * or `null`. Numeric path segments index into arrays. + */ +export function resolveJsoncOcPath( + ast: JsoncAst, + path: OcPath, +): JsoncOcPathMatch | null { + if (ast.root === null) {return null;} + + // Bracket-aware split + unquote: `"foo/bar".baz` becomes + // [`foo/bar`, `baz`] (literal slash preserved in the first sub). + const segments: string[] = []; + if (path.section !== undefined) { + for (const s of splitRespectingBrackets(path.section, '.')) { + segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s); + } + } + if (path.item !== undefined) { + for (const s of splitRespectingBrackets(path.item, '.')) { + segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s); + } + } + if (path.field !== undefined) { + for (const s of splitRespectingBrackets(path.field, '.')) { + segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s); + } + } + + if (segments.length === 0) {return { kind: 'root', node: ast };} + + let current: JsoncValue = ast.root; + let lastEntry: JsoncEntry | null = null; + const walked: string[] = []; + + for (let seg of segments) { + if (seg.length === 0) {return null;} + // Positional resolution: `$first` / `$last` always; `-N` only on + // indexable (array) containers. On a keyed (object) container, a + // `-N` segment falls through to literal-key lookup so paths like + // `groups.-5028303500.requireMention` (Telegram supergroup IDs — + // openclaw#59934) address the literal key instead of crashing. + if (isPositionalSeg(seg)) { + const concrete = positionalForJsonc(current, seg); + if (concrete !== null) {seg = concrete;} + // null means "not applicable" — fall through to literal lookup. + } + walked.push(seg); + if (current.kind === 'object') { + const entry = current.entries.find((e) => e.key === seg); + if (entry === undefined) {return null;} + lastEntry = entry; + current = entry.value; + continue; + } + if (current.kind === 'array') { + const idx = Number(seg); + if (!Number.isInteger(idx) || idx < 0 || idx >= current.items.length) {return null;} + lastEntry = null; + const item = current.items[idx]; + if (item === undefined) {return null;} + current = item; + continue; + } + // Primitive — can't descend further. + return null; + } + + if (lastEntry !== null && current === lastEntry.value) { + return { kind: 'object-entry', node: lastEntry, path: walked }; + } + return { kind: 'value', node: current, path: walked }; +} + +function positionalForJsonc(node: JsoncValue, seg: string): string | null { + if (node.kind === 'object') { + const keys = node.entries.map((e) => e.key); + return resolvePositionalSeg(seg, { indexable: false, size: keys.length, keys }); + } + if (node.kind === 'array') { + return resolvePositionalSeg(seg, { indexable: true, size: node.items.length }); + } + return null; +} diff --git a/src/oc-path/jsonl/ast.ts b/src/oc-path/jsonl/ast.ts new file mode 100644 index 00000000000..02904aca885 --- /dev/null +++ b/src/oc-path/jsonl/ast.ts @@ -0,0 +1,49 @@ +/** + * JSONL AST types — JSON-Lines: one JSON value per line, separated by + * `\n`. The shape used by openclaw session-event logs, audit trails, + * and LKG checkpoints (which is why JSONL is part of the universal + * OcPath addressing scheme). + * + * **Per-kind discriminator**: every AST in this substrate carries a + * `kind` field. The OcPath resolver dispatches on `kind`. + * + * **Byte-fidelity**: `raw` is preserved on the root for round-trip + * emit. JSONL is line-oriented, so blank lines and per-line comments + * (we don't strip them in render mode either — we preserve them as + * "raw" line entries) live in the AST. + * + * @module @openclaw/oc-path/jsonl/ast + */ + +import type { JsoncValue } from '../jsonc/ast.js'; + +/** The root JSONL AST. `raw` round-trips byte-identical via emit. */ +export interface JsonlAst { + readonly kind: 'jsonl'; + readonly raw: string; + readonly lines: readonly JsonlLine[]; + /** + * Line-ending convention detected at parse time. Used by render mode + * to reconstruct the original convention (Windows-authored datasets + * use CRLF; Unix uses LF). Optional for back-compat with synthetic + * ASTs that don't track this — render mode falls back to LF when + * undefined. + */ + readonly lineEnding?: '\r\n' | '\n'; +} + +/** + * One line of a JSONL file. Either a parsed JSON value, a blank line + * (preserved for round-trip), or a malformed line (emit verbatim; + * emit-time sentinel guard still scans). + */ +export type JsonlLine = + | { + readonly kind: 'value'; + readonly line: number; + readonly value: JsoncValue; + /** The original line text (without trailing newline). */ + readonly raw: string; + } + | { readonly kind: 'blank'; readonly line: number; readonly raw: string } + | { readonly kind: 'malformed'; readonly line: number; readonly raw: string }; diff --git a/src/oc-path/jsonl/edit.ts b/src/oc-path/jsonl/edit.ts new file mode 100644 index 00000000000..172fa8d4a6a --- /dev/null +++ b/src/oc-path/jsonl/edit.ts @@ -0,0 +1,228 @@ +/** + * Mutate a `JsonlAst` at an OcPath. Returns a new AST with the line + * (or sub-field of a line) replaced. + * + * Edit shapes: + * + * oc://session-events/L42 → replace line 42's whole value + * oc://session-events/L42/field → replace field on line 42 + * oc://session-events/L42/field.sub → dotted descent + * oc://session-events/$last/... → resolves to most recent value + * + * Append (no existing line) is NOT a `set` — use `appendJsonlLine` for + * that. `setJsonlOcPath` only edits existing addresses. + * + * @module @openclaw/oc-path/jsonl/edit + */ + +import type { OcPath } from '../oc-path.js'; +import { + isPositionalSeg, + isQuotedSeg, + resolvePositionalSeg, + splitRespectingBrackets, + unquoteSeg, +} from '../oc-path.js'; +import type { JsoncEntry, JsoncValue } from '../jsonc/ast.js'; +import type { JsonlAst, JsonlLine } from './ast.js'; +import { emitJsonl } from './emit.js'; + +export type JsonlEditResult = + | { readonly ok: true; readonly ast: JsonlAst } + | { readonly ok: false; readonly reason: 'unresolved' | 'not-a-value-line' }; + +export function setJsonlOcPath( + ast: JsonlAst, + path: OcPath, + newValue: JsoncValue, +): JsonlEditResult { + const head = path.section; + if (head === undefined) {return { ok: false, reason: 'unresolved' };} + + const lineIdx = pickLineIndex(ast, head); + if (lineIdx === -1) {return { ok: false, reason: 'unresolved' };} + const target = ast.lines[lineIdx]; + if (target === undefined) {return { ok: false, reason: 'unresolved' };} + + // No item/field — replace the whole line value. Requires the line to + // already be a value line (we don't synthesize lines from blanks). + if (path.item === undefined && path.field === undefined) { + if (target.kind !== 'value') {return { ok: false, reason: 'not-a-value-line' };} + const newLine: JsonlLine = { + kind: 'value', + line: target.line, + value: newValue, + raw: target.raw, + }; + return finalize(ast, lineIdx, newLine, path.file); + } + + if (target.kind !== 'value') {return { ok: false, reason: 'not-a-value-line' };} + + // Bracket/brace/quote-aware split — preserves quoted segments + // verbatim so the edit path matches `resolveJsonlOcPath`'s + // unquoting behavior. Plain `.split('.')` would shred a quoted key + // and silently desync read-vs-write. + const segments: string[] = []; + if (path.item !== undefined) {segments.push(...splitRespectingBrackets(path.item, '.'));} + if (path.field !== undefined) {segments.push(...splitRespectingBrackets(path.field, '.'));} + + const replaced = replaceAt(target.value, segments, 0, newValue); + if (replaced === null) {return { ok: false, reason: 'unresolved' };} + const newLine: JsonlLine = { + kind: 'value', + line: target.line, + value: replaced, + raw: target.raw, + }; + return finalize(ast, lineIdx, newLine, path.file); +} + +function replaceAt( + current: JsoncValue, + segments: readonly string[], + i: number, + newValue: JsoncValue, +): JsoncValue | null { + const seg = segments[i]; + if (seg === undefined) {return newValue;} + if (seg.length === 0) {return null;} + + if (current.kind === 'object') { + // Resolve positional tokens ($first / $last) against the entries' + // ordered key list before any literal-key comparison. Keeps the + // jsonl edit path symmetric with resolveJsonlOcPath, which already + // honors positional tokens during read. + let segNorm: string = seg; + if (isPositionalSeg(seg)) { + const resolved = resolvePositionalSeg(seg, { + indexable: false, + size: current.entries.length, + keys: current.entries.map((e) => e.key), + }); + if (resolved === null) {return null;} + segNorm = resolved; + } + // Quoted segments carry the raw bytes verbatim; AST entry keys + // are unquoted. Strip the surrounding quotes before comparing. + const lookupKey = isQuotedSeg(segNorm) ? unquoteSeg(segNorm) : segNorm; + const idx = current.entries.findIndex((e) => e.key === lookupKey); + if (idx === -1) {return null;} + const child = current.entries[idx]; + if (child === undefined) {return null;} + const replacedChild = replaceAt(child.value, segments, i + 1, newValue); + if (replacedChild === null) {return null;} + const newEntry: JsoncEntry = { ...child, value: replacedChild }; + const newEntries = current.entries.slice(); + newEntries[idx] = newEntry; + return { + kind: 'object', + entries: newEntries, + ...(current.line !== undefined ? { line: current.line } : {}), + }; + } + + if (current.kind === 'array') { + // Resolve positional tokens ($first / $last / -N) against the + // array's size before the numeric coercion below; without this + // `Number('$last')` is NaN and the path silently unresolves. + let segNorm: string = seg; + if (isPositionalSeg(seg)) { + const resolved = resolvePositionalSeg(seg, { + indexable: true, + size: current.items.length, + }); + if (resolved === null) {return null;} + segNorm = resolved; + } + const idx = Number(segNorm); + if (!Number.isInteger(idx) || idx < 0 || idx >= current.items.length) {return null;} + const child = current.items[idx]; + if (child === undefined) {return null;} + const replacedChild = replaceAt(child, segments, i + 1, newValue); + if (replacedChild === null) {return null;} + const newItems = current.items.slice(); + newItems[idx] = replacedChild; + return { + kind: 'array', + items: newItems, + ...(current.line !== undefined ? { line: current.line } : {}), + }; + } + + return null; +} + +function pickLineIndex(ast: JsonlAst, addr: string): number { + // Mirrors the line-address grammar handled by resolveJsonlOcPath's + // pickLine and find.ts's pickLine — the four shapes a JSONL line can + // be addressed by. Without `$first` and `-N` here, a path that + // resolves cleanly under those tokens would silently unresolve on + // the edit path (resolve↔write asymmetry). + if (addr === '$last') { + for (let i = ast.lines.length - 1; i >= 0; i--) { + const l = ast.lines[i]; + if (l !== undefined && l.kind === 'value') {return i;} + } + return -1; + } + if (addr === '$first') { + for (let i = 0; i < ast.lines.length; i++) { + const l = ast.lines[i]; + if (l !== undefined && l.kind === 'value') {return i;} + } + return -1; + } + if (/^-\d+$/.test(addr)) { + // -N selects the Nth-from-last value line. Walk only value lines + // so blank/malformed lines don't shift the count (consistent with + // resolve.ts's pickLine). + const valueIndices: number[] = []; + for (let i = 0; i < ast.lines.length; i++) { + const l = ast.lines[i]; + if (l !== undefined && l.kind === 'value') {valueIndices.push(i);} + } + const n = valueIndices.length + Number(addr); + return n >= 0 && n < valueIndices.length ? (valueIndices[n] ?? -1) : -1; + } + const m = /^L(\d+)$/.exec(addr); + if (m === null || m[1] === undefined) {return -1;} + const target = Number(m[1]); + return ast.lines.findIndex((l) => l.line === target); +} + +function finalize(ast: JsonlAst, lineIdx: number, newLine: JsonlLine, fileName?: string): JsonlEditResult { + const newLines = ast.lines.slice(); + newLines[lineIdx] = newLine; + const next: JsonlAst = { + kind: 'jsonl', + raw: '', + lines: newLines, + ...(ast.lineEnding !== undefined ? { lineEnding: ast.lineEnding } : {}), + }; + const opts = fileName !== undefined + ? { mode: 'render' as const, fileNameForGuard: fileName } + : { mode: 'render' as const }; + const rendered = emitJsonl(next, opts); + return { ok: true, ast: { ...next, raw: rendered } }; +} + +/** + * Append a new value as the next line. Useful for session checkpointing + * (each event is a new line). Returns a new AST. The `path` parameter + * is accepted for OcPath-naming consistency but jsonl append addresses + * the file as a whole (line numbers are assigned by the substrate). + */ +export function appendJsonlOcPath(ast: JsonlAst, value: JsoncValue): JsonlAst { + const nextLineNo = + ast.lines.length === 0 ? 1 : (ast.lines[ast.lines.length - 1]?.line ?? 0) + 1; + const newLine: JsonlLine = { + kind: 'value', + line: nextLineNo, + value, + raw: '', + }; + const next: JsonlAst = { kind: 'jsonl', raw: '', lines: [...ast.lines, newLine] }; + const rendered = emitJsonl(next, { mode: 'render' }); + return { ...next, raw: rendered }; +} diff --git a/src/oc-path/jsonl/emit.ts b/src/oc-path/jsonl/emit.ts new file mode 100644 index 00000000000..a2554c9edae --- /dev/null +++ b/src/oc-path/jsonl/emit.ts @@ -0,0 +1,100 @@ +/** + * Emit a `JsonlAst` to bytes. + * + * **Round-trip mode (default)** returns `ast.raw` verbatim — preserves + * malformed lines, blanks, trailing-newline shape exactly. + * + * **Render mode** rebuilds the file from line entries (re-stringifies + * value lines via JSON.stringify; preserves blank/malformed lines + * verbatim). Useful for synthetic ASTs. + * + * **Sentinel guard**: scans every emitted byte sequence for the + * `__OPENCLAW_REDACTED__` literal. + * + * @module @openclaw/oc-path/jsonl/emit + */ + +import { OcEmitSentinelError, REDACTED_SENTINEL } from '../sentinel.js'; +import type { JsoncValue } from '../jsonc/ast.js'; +import type { JsonlAst } from './ast.js'; + +export interface JsonlEmitOptions { + readonly mode?: 'roundtrip' | 'render'; + readonly fileNameForGuard?: string; + /** + * See `JsoncEmitOptions.acceptPreExistingSentinel` for the rationale. + * Default `true` — round-trip echoes parsed bytes without scanning + * for the sentinel. Render mode scans value-line leaves regardless. + */ + readonly acceptPreExistingSentinel?: boolean; +} + +export function emitJsonl(ast: JsonlAst, opts: JsonlEmitOptions = {}): string { + const mode = opts.mode ?? 'roundtrip'; + const guardPath = opts.fileNameForGuard ? `oc://${opts.fileNameForGuard}` : 'oc://'; + const acceptPreExisting = opts.acceptPreExistingSentinel ?? true; + + if (mode === 'roundtrip') { + if (!acceptPreExisting && ast.raw.includes(REDACTED_SENTINEL)) { + throw new OcEmitSentinelError(`${guardPath}/[raw]`); + } + return ast.raw; + } + + const out: string[] = []; + for (const ln of ast.lines) { + if (ln.kind === 'blank' || ln.kind === 'malformed') { + // Blank/malformed lines round-trip as their original raw bytes. + // Apply the same trust policy: only scan when caller opts in. + if (!acceptPreExisting && ln.raw.includes(REDACTED_SENTINEL)) { + throw new OcEmitSentinelError(`${guardPath}/L${ln.line}`); + } + out.push(ln.raw); + continue; + } + // Value lines re-serialize via renderValue, which always scans + // string leaves regardless of acceptPreExistingSentinel — a + // caller-injected sentinel via setOcPath / appendJsonl must + // always be rejected. + out.push(renderValue(ln.value, `${guardPath}/L${ln.line}`, [])); + } + // Restore the original line-ending convention. Without this, a CRLF + // input edited via setJsonlOcPath would emit a mixed-ending file: + // edited lines joined with `\n` and untouched lines retaining the + // `\r` on their .raw bytes — silent CRLF→LF corruption on + // Windows-authored datasets. + return out.join(ast.lineEnding ?? '\n'); +} + +function renderValue(value: JsoncValue, guardPath: string, walked: readonly string[]): string { + switch (value.kind) { + case 'object': { + const parts = value.entries.map( + (e) => `${JSON.stringify(e.key)}:${renderValue(e.value, guardPath, [...walked, e.key])}`, + ); + return `{${parts.join(',')}}`; + } + case 'array': { + const parts = value.items.map((v, i) => + renderValue(v, guardPath, [...walked, String(i)]), + ); + return `[${parts.join(',')}]`; + } + case 'string': { + // Reject ANY string that contains the sentinel — embedded + // (`prefix__OPENCLAW_REDACTED__suffix`) is just as much of a + // "literal redacted token landed on disk" leak as exact-match. + if (value.value.includes(REDACTED_SENTINEL)) { + throw new OcEmitSentinelError(`${guardPath}/${walked.join('/')}`); + } + return JSON.stringify(value.value); + } + case 'number': + return String(value.value); + case 'boolean': + return String(value.value); + case 'null': + return 'null'; + } + throw new Error(`unreachable: jsonl renderValue kind`); +} diff --git a/src/oc-path/jsonl/parse.ts b/src/oc-path/jsonl/parse.ts new file mode 100644 index 00000000000..df91dd00480 --- /dev/null +++ b/src/oc-path/jsonl/parse.ts @@ -0,0 +1,74 @@ +/** + * JSONL parser — splits on `\n`, parses each non-empty line as JSONC + * (allowing comments/trailing-comma is harmless and matches what + * openclaw session logs actually emit). Soft-error policy: malformed + * lines surface as `kind: 'malformed'` AST entries plus a diagnostic. + * + * @module @openclaw/oc-path/jsonl/parse + */ + +import type { Diagnostic } from '../ast.js'; +import { parseJsonc } from '../jsonc/parse.js'; +import type { JsonlAst, JsonlLine } from './ast.js'; + +export interface JsonlParseResult { + readonly ast: JsonlAst; + readonly diagnostics: readonly Diagnostic[]; +} + +export function parseJsonl(raw: string): JsonlParseResult { + const diagnostics: Diagnostic[] = []; + // Detect the line-ending convention from the input. Windows-authored + // datasets use CRLF; Unix and most cross-platform tooling use LF. We + // count CRLF occurrences and call CRLF if the majority of newlines + // are CRLF — this handles mixed-ending files (e.g., a Unix log + // edited once on Windows) by picking the dominant convention. + // Without this, `setJsonlOcPath` rebuilds a CRLF input via render + // mode which joins with `\n`, producing mixed endings on a + // previously-CRLF file. + const crlfCount = (raw.match(/\r\n/g) ?? []).length; + const lfCount = (raw.match(/\n/g) ?? []).length; + const lineEnding: '\r\n' | '\n' = + crlfCount > 0 && crlfCount * 2 >= lfCount ? '\r\n' : '\n'; + + // Trim trailing newline so we don't fabricate a blank line at EOF + // for files that end with `\n` (which is most of them). + let body = raw.endsWith('\r\n') ? raw.slice(0, -2) : raw.endsWith('\n') ? raw.slice(0, -1) : raw; + // Normalize line endings to LF for consistent splitting; per-line + // `raw` is stored without the trailing `\r`, and render mode + // restores the original convention via `lineEnding`. + body = body.replace(/\r\n/g, '\n'); + const lines: JsonlLine[] = []; + + if (body.length === 0) { + return { ast: { kind: 'jsonl', raw, lines, lineEnding }, diagnostics }; + } + + const parts = body.split('\n'); + parts.forEach((lineText, idx) => { + const lineNo = idx + 1; + if (lineText.trim().length === 0) { + lines.push({ kind: 'blank', line: lineNo, raw: lineText }); + return; + } + const r = parseJsonc(lineText); + if (r.ast.root === null) { + lines.push({ kind: 'malformed', line: lineNo, raw: lineText }); + diagnostics.push({ + line: lineNo, + message: `line ${lineNo} could not be parsed as JSON`, + severity: 'warning', + code: 'OC_JSONL_LINE_MALFORMED', + }); + return; + } + lines.push({ + kind: 'value', + line: lineNo, + value: r.ast.root, + raw: lineText, + }); + }); + + return { ast: { kind: 'jsonl', raw, lines, lineEnding }, diagnostics }; +} diff --git a/src/oc-path/jsonl/resolve.ts b/src/oc-path/jsonl/resolve.ts new file mode 100644 index 00000000000..6a2c45c387c --- /dev/null +++ b/src/oc-path/jsonl/resolve.ts @@ -0,0 +1,157 @@ +/** + * Resolve an `OcPath` against a `JsonlAst`. + * + * Convention for JSONL OcPaths: + * + * oc://session-events/L42 → entire line 42 value + * oc://session-events/L42/result → field on line 42's value + * oc://session-events/L42/result.detail → dotted descent + * oc://session-events/$last → final non-blank value + * + * `Lnnn` (line address) and `$last` are the addressing primitives + * unique to JSONL — they're how forensics / replay refers to a + * specific entry without committing to a content key. + * + * @module @openclaw/oc-path/jsonl/resolve + */ + +import type { OcPath } from '../oc-path.js'; +import { + POS_FIRST, + POS_LAST, + isPositionalSeg, + isQuotedSeg, + resolvePositionalSeg, + splitRespectingBrackets, + unquoteSeg, +} from '../oc-path.js'; +import type { JsoncEntry, JsoncValue } from '../jsonc/ast.js'; +import type { JsonlAst, JsonlLine } from './ast.js'; + +export type JsonlOcPathMatch = + | { readonly kind: 'root'; readonly node: JsonlAst } + | { readonly kind: 'line'; readonly node: JsonlLine } + | { + readonly kind: 'value'; + readonly node: JsoncValue; + readonly line: number; + readonly path: readonly string[]; + } + | { + readonly kind: 'object-entry'; + readonly node: JsoncEntry; + readonly line: number; + readonly path: readonly string[]; + }; + +export function resolveJsonlOcPath( + ast: JsonlAst, + path: OcPath, +): JsonlOcPathMatch | null { + // The first non-file segment is the line address (Lnnn or $last). + const head = path.section; + if (head === undefined) {return { kind: 'root', node: ast };} + + const lineEntry = pickLine(ast, head); + if (lineEntry === null) {return null;} + + // No further descent — return the line entry itself. + if (path.item === undefined && path.field === undefined) { + return { kind: 'line', node: lineEntry }; + } + + if (lineEntry.kind !== 'value') {return null;} + + const segments: string[] = []; + if (path.item !== undefined) { + for (const s of splitRespectingBrackets(path.item, '.')) { + segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s); + } + } + if (path.field !== undefined) { + for (const s of splitRespectingBrackets(path.field, '.')) { + segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s); + } + } + + let current: JsoncValue = lineEntry.value; + let lastEntry: JsoncEntry | null = null; + const walked: string[] = []; + + for (let seg of segments) { + if (seg.length === 0) {return null;} + // See openclaw#59934 — positional `-N` falls through on keyed containers. + if (isPositionalSeg(seg)) { + const concrete = positionalForJsonc(current, seg); + if (concrete !== null) {seg = concrete;} + } + walked.push(seg); + if (current.kind === 'object') { + const entry = current.entries.find((e) => e.key === seg); + if (entry === undefined) {return null;} + lastEntry = entry; + current = entry.value; + continue; + } + if (current.kind === 'array') { + const idx = Number(seg); + if (!Number.isInteger(idx) || idx < 0 || idx >= current.items.length) {return null;} + lastEntry = null; + const item = current.items[idx]; + if (item === undefined) {return null;} + current = item; + continue; + } + return null; + } + + if (lastEntry !== null && current === lastEntry.value) { + return { + kind: 'object-entry', + node: lastEntry, + line: lineEntry.line, + path: walked, + }; + } + return { kind: 'value', node: current, line: lineEntry.line, path: walked }; +} + +function pickLine(ast: JsonlAst, addr: string): JsonlLine | null { + if (addr === POS_LAST) { + for (let i = ast.lines.length - 1; i >= 0; i--) { + const l = ast.lines[i]; + if (l !== undefined && l.kind === 'value') {return l;} + } + return null; + } + if (addr === POS_FIRST) { + for (const l of ast.lines) { + if (l.kind === 'value') {return l;} + } + return null; + } + // Negative line address: `-N` selects the Nth-from-last value line. + if (/^-\d+$/.test(addr)) { + const valueLines = ast.lines.filter((l): l is Extract => l.kind === 'value'); + const n = valueLines.length + Number(addr); + return n >= 0 && n < valueLines.length ? valueLines[n] : null; + } + const m = /^L(\d+)$/.exec(addr); + if (m === null || m[1] === undefined) {return null;} + const target = Number(m[1]); + for (const l of ast.lines) { + if (l.line === target) {return l;} + } + return null; +} + +function positionalForJsonc(node: JsoncValue, seg: string): string | null { + if (node.kind === 'object') { + const keys = node.entries.map((e) => e.key); + return resolvePositionalSeg(seg, { indexable: false, size: keys.length, keys }); + } + if (node.kind === 'array') { + return resolvePositionalSeg(seg, { indexable: true, size: node.items.length }); + } + return null; +} diff --git a/src/oc-path/oc-path.ts b/src/oc-path/oc-path.ts new file mode 100644 index 00000000000..e4c318cd759 --- /dev/null +++ b/src/oc-path/oc-path.ts @@ -0,0 +1,1114 @@ +/** + * `oc://` path syntax — universal addressing for the OpenClaw workspace. + * + * Canonical form: + * + * oc://{file}[/{section}[/{item}[/{field}]]][?session={id}] + * + * Used in PatchError messages, audit events, governance warnings, lint + * findings, doctor fixers, API error responses, SSE events, and editor + * deep-links. No ad-hoc string paths anywhere — every path through the + * serve layer flows through `parseOcPath` / `formatOcPath`. + * + * **Round-trip contract**: `formatOcPath(parseOcPath(s)) === s` for every + * valid `s` produced by `formatOcPath`. + * + * @module @openclaw/oc-path/oc-path + */ + +import { OcEmitSentinelError, REDACTED_SENTINEL } from './sentinel.js'; + +const OC_SCHEME = 'oc://'; + +/** + * Hard caps to prevent pathological input from exhausting resources. + * + * `MAX_PATH_LENGTH` — input string length. 4 KiB is enough for any + * realistic addressing use (deep nested workflows max out around 200 + * bytes). Anything larger is either user error or hostile input. + * + * `MAX_SUB_SEGMENTS_PER_SLOT` — dotted sub-segment count inside a + * single slot. Real workspace addressing maxes around 10 levels. + * + * `MAX_TRAVERSAL_DEPTH` — used by find walkers to bound `**` + * recursion. Real ASTs don't nest beyond ~50; 256 is a safe ceiling. + */ +export const MAX_PATH_LENGTH = 4096; +export const MAX_SUB_SEGMENTS_PER_SLOT = 64; +export const MAX_TRAVERSAL_DEPTH = 256; + +/** UTF-8 BOM. Stripped from path strings before scheme check. */ +const BOM = ''; + +/** + * True if the string contains any C0 control char (U+0000 — U+001F) + * or DEL (U+007F). Walks by char code so we never embed literal + * control bytes in source — the equivalent regex would put NUL/DEL + * into this file, which lint and binary-detection tools flag. + */ +function hasControlChar(s: string): boolean { + for (let i = 0; i < s.length; i++) { + const cc = s.charCodeAt(i); + if (cc <= 0x1f || cc === 0x7f) { + return true; + } + } + return false; +} + +/** Reserved characters that can't appear unencoded in path segments. */ +const RESERVED_CHARS_RE = /[?&%]/; + +/** + * Render a string for inclusion in error messages — replaces control + * chars with `\xNN` escapes so error output is readable even when the + * offending input contains invisible characters. + */ +function printable(s: string): string { + // Walk the string explicitly rather than using a control-char regex + // — the no-control-regex lint rule rejects character classes that + // contain bytes in U+0000–U+001F + U+007F, but that's exactly the + // range we WANT to escape so error messages stay readable when + // input contains invisible bytes. Manual loop sidesteps the rule. + let out = ''; + for (let i = 0; i < s.length; i++) { + const cc = s.charCodeAt(i); + if (cc <= 0x1f || cc === 0x7f) { + out += `\\x${cc.toString(16).padStart(2, '0')}`; + } else { + out += s[i]; + } + } + return out; +} + +/** + * Parsed `oc://` path. Components nest strictly: `item` implies + * `section`, `field` implies `item`. Structural violations are rejected + * by `formatOcPath`. + * + * Per the upstream pre-RFC, `field` addresses either a frontmatter key + * (when used directly under a file with no section) OR the value of a + * key/value bullet (`- key: value`) inside an item. The substrate + * resolver dispatches based on what the path resolves to. + */ +export interface OcPath { + /** Target file or virtual root (e.g. `SOUL.md`, `skills/email-drafter`). Always present. */ + readonly file: string; + /** Optional H2 section within the file (e.g. `Boundaries`). */ + readonly section?: string; + /** Optional item within a section (e.g. `deny-rule-1`). Requires `section`. */ + readonly item?: string; + /** Optional field on an item or frontmatter (e.g. `risk`). Requires `item` for item-fields. */ + readonly field?: string; + /** Optional session scope (e.g. `cron:daily`). Orthogonal to nesting. */ + readonly session?: string; +} + +/** + * Error thrown when an `oc://` path cannot be parsed or formatted. + * + * `code` is a stable, machine-readable tag; downstream consumers + * (PatchError, audit events, error handlers) match on `code`, not on + * `message`. + */ +export class OcPathError extends Error { + readonly code: string; + readonly input: string; + + constructor(message: string, input: string, code: string) { + super(message); + this.name = 'OcPathError'; + this.input = input; + this.code = code; + } +} + +/** + * Parse an `oc://` path string into a structured `OcPath`. + * + * Accepts the full syntax: file, optional section/item/field, optional + * `?session=` query parameter. Unknown query parameters are silently + * ignored. + * + * Throws `OcPathError` for missing scheme, empty file, or empty path + * segments. + */ +export function parseOcPath(input: string): OcPath { + if (typeof input !== 'string') { + throw new OcPathError('oc:// path must be a string', String(input), 'OC_PATH_NOT_STRING'); + } + + // P-032 — hard cap on input length. Pathological inputs are rejected + // before any further string ops so quadratic scans can't be triggered. + // The pre-normalize check fails fast on absurd input (a 10 MB string + // shouldn't even reach .normalize); the post-normalize check below + // catches the corner case where NFC composition grows the string + // past the cap (a few decomposed Hangul or combining-mark sequences + // can exceed pre-normalize length). + if (input.length > MAX_PATH_LENGTH) { + throw new OcPathError( + `oc:// path exceeds ${MAX_PATH_LENGTH} bytes (length: ${input.length})`, + input.slice(0, 80) + '…', + 'OC_PATH_TOO_LONG', + ); + } + + // P-001 — strip a leading UTF-8 BOM if present. The BOM is invisible + // and confuses scheme detection; rejecting silently would surface as + // a misleading "missing scheme" error. + let normalized = input.startsWith(BOM) ? input.slice(BOM.length) : input; + + // P-002 — normalize to NFC. Different filesystems produce different + // forms (macOS HFS+ historically NFD; web / Unix / Windows NFC). NFC + // is the canonical form for cross-platform string equality. + normalized = normalized.normalize('NFC'); + + // Re-check the cap after NFC. NFC can grow a string (some Hangul + // and combining-mark sequences); without this re-check the + // documented invariant — "downstream loops iterate at most + // MAX_PATH_LENGTH chars" — doesn't hold. + if (normalized.length > MAX_PATH_LENGTH) { + throw new OcPathError( + `oc:// path exceeds ${MAX_PATH_LENGTH} bytes after NFC (length: ${normalized.length})`, + input.slice(0, 80) + '…', + 'OC_PATH_TOO_LONG', + ); + } + + if (!normalized.startsWith(OC_SCHEME)) { + throw new OcPathError(`Missing oc:// scheme: ${printable(input)}`, input, 'OC_PATH_MISSING_SCHEME'); + } + + const afterScheme = normalized.slice(OC_SCHEME.length); + // Find the query separator at the TOP level (outside brackets, + // braces, and quotes). Plain `indexOf('?')` would treat a quoted + // key like `"foo?bar"` as a query boundary, breaking advertised + // quoted-segment support — closes the parser-quoted-query gap. + const queryIndex = indexOfTopLevel(afterScheme, '?'); + const pathPart = queryIndex === -1 ? afterScheme : afterScheme.slice(0, queryIndex); + const queryPart = queryIndex === -1 ? '' : afterScheme.slice(queryIndex + 1); + + if (pathPart.length === 0) { + throw new OcPathError(`Empty oc:// path: ${printable(input)}`, input, 'OC_PATH_EMPTY'); + } + + const segments = splitRespectingBrackets(pathPart, '/', input); + for (const seg of segments) { + if (seg.length === 0) { + throw new OcPathError(`Empty segment in oc:// path: ${printable(input)}`, input, 'OC_PATH_EMPTY_SEGMENT'); + } + } + + if (segments.length > 4) { + throw new OcPathError( + `Too many segments in oc:// path (max 4): ${printable(input)}`, + input, + 'OC_PATH_TOO_DEEP', + ); + } + + // Validate every segment: bracket/brace shape, dotted sub-segments, + // P-003 whitespace, P-004 control chars, P-026 reserved chars. + for (const seg of segments) { + validateBrackets(seg, input); + const subs = splitRespectingBrackets(seg, '.', input); + if (subs.length > MAX_SUB_SEGMENTS_PER_SLOT) { + throw new OcPathError( + `Sub-segment count exceeds ${MAX_SUB_SEGMENTS_PER_SLOT} in segment "${seg}": ${printable(input)}`, + input, + 'OC_PATH_TOO_DEEP', + ); + } + for (const sub of subs) { + validateSubSegment(sub, input); + } + } + + const session = extractSession(queryPart); + + // Unquote the file slot so `path.file` always carries the bare + // filesystem path. `splitRespectingBrackets` keeps a quoted file + // segment intact (`"skills/email-drafter"`) so the `/` inside it + // isn't treated as a slot separator; here we strip the surrounding + // quotes so consumers (CLI's `resolveFsPath`, find / resolve walkers) + // see `skills/email-drafter` rather than `"skills/email-drafter"`. + // Without this, the round-trip emits `oc://"skills/email-drafter"` + // and the CLI tries to `fs.readFile` a literally-quoted filename. + const fileSeg = segments[0]; + const file = isQuotedSeg(fileSeg) ? unquoteSeg(fileSeg) : fileSeg; + + // Containment — `oc://` paths address files **relative to the workspace + // root**. Absolute paths and parent-directory escapes (`..`) would let a + // hostile workflow / skill manifest persuade `openclaw path resolve|set + // |emit` into reading or writing arbitrary filesystem locations. Reject + // both before the path leaks into `resolveFsPath` (which would resolve + // an absolute slot away from `cwd` per Node `path.resolve` semantics). + // Quoted-segment unquoting (above) means `oc://".."/x` and + // `oc://"../foo"/x` are caught by the same check. + if (file.startsWith('/') || file.startsWith('\\') || /^[a-zA-Z]:/.test(file)) { + throw new OcPathError( + `Absolute file slot not allowed (oc:// paths are workspace-relative): ${printable(input)}`, + input, + 'OC_PATH_ABSOLUTE_FILE', + ); + } + if (file.split(/[\\/]/).some((seg) => seg === '..')) { + throw new OcPathError( + `Parent-directory segment ('..') not allowed in oc:// file slot: ${printable(input)}`, + input, + 'OC_PATH_PARENT_TRAVERSAL', + ); + } + + const result: OcPath = { + file, + ...(segments[1] !== undefined ? { section: segments[1] } : {}), + ...(segments[2] !== undefined ? { item: segments[2] } : {}), + ...(segments[3] !== undefined ? { field: segments[3] } : {}), + ...(session !== undefined ? { session } : {}), + }; + + return result; +} + +/** + * Format an `OcPath` struct back into its canonical string form. + * + * Throws `OcPathError` if the struct violates structural nesting + * (item without section, field without item). + */ +export function formatOcPath(path: OcPath): string { + if (!path.file || path.file.length === 0) { + throw new OcPathError('oc:// path requires a file', '', 'OC_PATH_FILE_REQUIRED'); + } + // Symmetric defense with parseOcPath — an `OcPath` struct constructed + // programmatically with `file: '..'` or `file: '/etc/passwd'` would + // otherwise emit a path that either round-trips into a traversal or + // is rejected at parse time, breaking the contract on line 13. Refuse + // here so the caller sees the violation at the format boundary. + if (path.file.startsWith('/') || path.file.startsWith('\\') || /^[a-zA-Z]:/.test(path.file)) { + throw new OcPathError( + `Absolute file slot not allowed in OcPath struct: ${printable(path.file)}`, + path.file, + 'OC_PATH_ABSOLUTE_FILE', + ); + } + if (path.file.split(/[\\/]/).some((seg) => seg === '..')) { + throw new OcPathError( + `Parent-directory segment ('..') not allowed in OcPath.file: ${printable(path.file)}`, + path.file, + 'OC_PATH_PARENT_TRAVERSAL', + ); + } + if (hasControlChar(path.file)) { + throw new OcPathError( + `Control character in OcPath.file: ${printable(path.file)}`, + path.file, + 'OC_PATH_CONTROL_CHAR', + ); + } + if (path.item !== undefined && path.section === undefined) { + throw new OcPathError( + 'Structural nesting violation: item requires section', + path.file, + 'OC_PATH_NESTING', + ); + } + if (path.field !== undefined && path.item === undefined && path.section !== undefined) { + // section + field without item is allowed for frontmatter-shaped addressing? No — + // frontmatter is `oc://FILE/[frontmatter]/key`. For now require item-or-no-section + // with field. Reconsider when frontmatter addressing lands. + throw new OcPathError( + 'Structural nesting violation: field requires item when section is present', + path.file, + 'OC_PATH_NESTING', + ); + } + if (path.field !== undefined && path.item === undefined && path.section === undefined) { + // `{ file, field }` with no section / item would emit `oc://FILE/FIELD` + // and silently re-parse as `{ file, section: FIELD }`. The struct + // already violates the slot grammar (field implies item) — refuse + // here so programmatic callers don't ship a path that round-trips + // to a different shape than they wrote. + throw new OcPathError( + 'Structural nesting violation: field requires item', + path.file, + 'OC_PATH_NESTING', + ); + } + + // Each slot is a dotted sub-segment string. Round-trip requires that + // raw sub-segments containing the path grammar's special characters + // get quoted before concatenation, OR pass through if already in a + // structural form (quoted `"..."`, predicate `[...]`, union `{...}`, + // literal sentinel `[frontmatter]` etc.). Plain concatenation would + // silently turn a raw `foo/bar` slot into two segments at parse + // time. Closes the formatter quoted-segment gap. + const formatSubSegment = (sub: string): string => { + if (isQuotedSeg(sub)) {return sub;} // already quoted + if (sub.startsWith('[') && sub.endsWith(']')) {return sub;} // predicate / sentinel + if (sub.startsWith('{') && sub.endsWith('}')) {return sub;} // union + return quoteSeg(sub); + }; + // Reject content the parser would refuse on the way back in. Without + // these guards a struct like `{section:'foo.'}` would emit + // `oc://X/foo.""` (an empty quoted sub-segment) and re-parse with + // `section: 'foo.""'` — silent round-trip mangling. Mirrors + // validateSubSegment's empty + control-char checks at the format + // boundary so callers see the violation here, not on the next parse. + const validateSubForFormat = (sub: string, slotName: string): void => { + if (sub.length === 0) { + throw new OcPathError( + `Empty dotted sub-segment in OcPath.${slotName}`, + path.file, + 'OC_PATH_EMPTY_SUB_SEGMENT', + ); + } + if (hasControlChar(sub)) { + throw new OcPathError( + `Control character in OcPath.${slotName} sub-segment "${printable(sub)}"`, + path.file, + 'OC_PATH_CONTROL_CHAR', + ); + } + }; + const formatSlot = (slot: string, slotName: string): string => { + const subs = splitRespectingBrackets(slot, '.'); + for (const sub of subs) {validateSubForFormat(sub, slotName);} + return subs.map(formatSubSegment).join('.'); + }; + + // The file slot uses simpler quoting than section/item/field: dots + // are normal in filenames (`AGENTS.md`) and don't need quoting; we + // only quote when the file contains chars that would otherwise be + // parsed as structure — primarily `/` which is the segment separator. + // `quoteSeg` already wraps + escapes when needed; we narrow the + // trigger so plain `AGENTS.md` round-trips bare. + const fileNeedsQuote = /[/[\]{}?&%"\s]/.test(path.file); + const formattedFile = fileNeedsQuote ? quoteSeg(path.file) : path.file; + let out = OC_SCHEME + formattedFile; + if (path.section !== undefined) {out += '/' + formatSlot(path.section, 'section');} + if (path.item !== undefined) {out += '/' + formatSlot(path.item, 'item');} + if (path.field !== undefined) {out += '/' + formatSlot(path.field, 'field');} + if (path.session !== undefined) {out += '?session=' + path.session;} + // Symmetric upper bound with parseOcPath's MAX_PATH_LENGTH cap. Without + // this, a struct whose formatted form exceeds the cap would emit a + // string `parseOcPath` immediately rejects — silently breaking the + // round-trip contract and surprising every consumer that buffers / + // logs / column-aligns by the cap (audit events, error messages, + // editor breadcrumbs). + if (out.length > MAX_PATH_LENGTH) { + throw new OcPathError( + `Formatted oc:// exceeds ${MAX_PATH_LENGTH} bytes (length: ${out.length})`, + out.slice(0, 80) + '…', + 'OC_PATH_TOO_LONG', + ); + } + // Sentinel guard at the path-string emit boundary. The substrate's + // contract: emit boundaries refuse to write the redaction sentinel, + // and `formatOcPath` IS such a boundary — path strings flow into + // telemetry, audit events, error messages, find result `path` fields. + // Without this guard, a struct field carrying the literal + // `__OPENCLAW_REDACTED__` slips past every consumer except the CLI + // (which has its own scrubSentinel layer). + if (out.includes(REDACTED_SENTINEL)) { + throw new OcEmitSentinelError(out); + } + return out; +} + +/** + * Type guard — true iff `input` is a non-empty string that `parseOcPath` + * would accept. Does not throw; callers can branch on this before + * parsing. + */ +export function isValidOcPath(input: unknown): input is string { + if (typeof input !== 'string') {return false;} + try { + parseOcPath(input); + return true; + } catch { + return false; + } +} + +/** + * Positional tokens — single-match primitives that resolve to one + * concrete index/key based on container size at resolve time. Unlike + * `*` / `**`, these do NOT trigger the wildcard guard on + * `resolveOcPath` / `setOcPath`: they always pick exactly one element. + * + * `$first` — index 0 (seq/array) or first-declared key (map/object) + * `$last` — last index, or last-declared key + * `-N` — Nth from the end (seq/array only); `-1` = last, `-2` = penultimate + * + * Out-of-range tokens (`$first` on an empty container, `-99` on a + * 3-item array) yield `null` from resolve and an empty match list + * from find. + * + * `$last` was the original jsonl-only sentinel for line addressing + * (`oc://X/$last/event`); it's now generalized to every kind. + */ +export const POS_FIRST = '$first'; +export const POS_LAST = '$last'; + +/** True iff `seg` is a positional token that resolves at lookup time. */ +export function isPositionalSeg(seg: string): boolean { + return seg === POS_FIRST || seg === POS_LAST || /^-\d+$/.test(seg); +} + +/** + * Ordinal addressing — `#N` (zero-based) targets the Nth item by + * document order, regardless of how the kind ordinarily addresses + * children. + * + * For seq/array kinds where children are already addressed by integer + * index, `#N` is a synonym for `N`. Where it earns its keep is in + * **slug-addressed kinds** (md items, where two items can share a + * slug like `- foo: a` / `- foo: b`): `#0` and `#1` distinguish them + * by document order even when slug-addressing collapses. + */ +export function isOrdinalSeg(seg: string): boolean { + return /^#\d+$/.test(seg); +} + +export function parseOrdinalSeg(seg: string): number | null { + const m = /^#(\d+)$/.exec(seg); + return m === null || m[1] === undefined ? null : Number(m[1]); +} + +/** + * Container shape passed to `resolvePositionalSeg`. Indexable + * containers (seq, array) provide `size`. Keyed containers (map, + * object) provide the ordered `keys` list — `$first` picks the first, + * `$last` the last; negative indices are NOT valid on keyed + * containers (use the literal key instead). + */ +export interface PositionalContainer { + readonly indexable: boolean; + readonly size: number; + readonly keys?: readonly string[]; +} + +/** + * Resolve a positional token (`$first` / `$last` / `-N`) against a + * container's shape, returning the concrete segment (numeric index or + * literal key) or `null` if the token can't apply. + */ +export function resolvePositionalSeg( + seg: string, + container: PositionalContainer, +): string | null { + if (seg === POS_FIRST) { + if (container.size === 0) {return null;} + if (!container.indexable) {return container.keys?.[0] ?? null;} + return '0'; + } + if (seg === POS_LAST) { + if (container.size === 0) {return null;} + if (!container.indexable) {return container.keys?.[container.keys.length - 1] ?? null;} + return String(container.size - 1); + } + if (/^-\d+$/.test(seg)) { + if (!container.indexable) {return null;} + // P-040 — guard against integer-overflow in the magnitude. A + // 13-digit-or-longer string parses to a Number that exceeds 1e9 + // (well below MAX_SAFE_INTEGER but already absurd as an array + // index). Reject before doing the addition so the caller sees a + // clean null rather than a coerced-to-zero surprise. + const raw = Number(seg); + if (!Number.isInteger(raw) || Math.abs(raw) > 1e9) {return null;} + const n = container.size + raw; + return n >= 0 && n < container.size ? String(n) : null; + } + return null; +} + +/** + * Wildcard tokens permitted in `findOcPaths` patterns. + * + * `*` matches a single sub-segment (e.g. one map key or one array index). + * `**` matches zero or more sub-segments at any depth (recursive descent). + * + * Wildcards are **not** allowed in `resolveOcPath` / `setOcPath` — those + * verbs require an exact concrete path. `findOcPaths` is the only verb + * that consumes patterns. Use `hasWildcard` to enforce this at the + * boundary. + */ +export const WILDCARD_SINGLE = '*'; +export const WILDCARD_RECURSIVE = '**'; + +/** + * `true` iff any sub-segment of the path is a multi-match pattern — + * `*`, `**`, a union `{a,b,c}`, or a value predicate `[key=value]`. + * Single-match verbs (`resolveOcPath` / `setOcPath`) reject these + * uniformly; only `findOcPaths` consumes them. + * + * **Naming**: `isPattern` is the v1 name; `hasWildcard` is retained + * as a back-compat alias since the literal "wildcard" framing was + * what shipped first. Prefer `isPattern` in new code. + */ +export function isPattern(path: OcPath): boolean { + for (const slot of [path.section, path.item, path.field]) { + if (slot === undefined) {continue;} + // Quote-aware split — `slot.split('.')` would shred quoted keys + // containing literal `*` (e.g. `"items.*.glob"`) and falsely + // detect them as wildcards, causing single-match verbs to reject + // a concrete path. + for (const sub of splitRespectingBrackets(slot, '.')) { + if (sub === WILDCARD_SINGLE || sub === WILDCARD_RECURSIVE) {return true;} + if (isUnionSeg(sub)) {return true;} + if (isPredicateSeg(sub)) {return true;} + } + } + return false; +} + +/** @deprecated v1 — use {@link isPattern}. Behaviorally identical. */ +export const hasWildcard = isPattern; + +/** + * Union segment — `{a,b,c}` matches each comma-separated alternative. + * + * oc://X/steps/* /{command,run} → each step's command OR run + * oc://X/{steps,inputs}/* /id → id under steps OR inputs + * + * Whitespace inside braces is preserved. Empty alternatives reject. + * Nested braces are not supported in v0. + */ +export function isUnionSeg(seg: string): boolean { + return seg.length >= 2 && seg.startsWith('{') && seg.endsWith('}'); +} + +export function parseUnionSeg(seg: string): readonly string[] | null { + if (!isUnionSeg(seg)) {return null;} + const inner = seg.slice(1, -1); + if (inner.length === 0) {return null;} + const alts = inner.split(','); + if (alts.some((a) => a.length === 0)) {return null;} + return alts; +} + +/** + * Value predicate segment — `[keyvalue]` filters a parent + * enumeration by sibling-field comparison. Used in find patterns: + * + * oc://X/steps/[id=build] → step whose `id` equals `build` + * oc://X/steps/[id!=test]/command → command of every non-test step + * oc://X/steps/[command*=npm]/id → id of every step whose command contains `npm` + * oc://X/steps/[command^=npm run]/id → id of every step whose command starts with `npm run` + * oc://X/steps/[id$=_test]/command → command of every step whose id ends with `_test` + * oc://X/models/[contextWindow>=1000000] → models with 1M+ context window + * oc://X/models/[maxTokens>128000]/id → id of every model with maxTokens > 128000 + * + * Operators: + * + * String (CSS attribute-selector style): + * `=` equality (string-coerced) + * `!=` inequality + * `*=` substring contains + * `^=` starts-with + * `$=` ends-with + * + * Numeric (v1.1 — addresses openclaw#54383, openclaw#76532): + * `<` less than + * `<=` less than or equal + * `>` greater than + * `>=` greater than or equal + * + * Numeric ops require both `actual` and `value` to coerce to finite + * numbers via `Number()`. Non-numeric leaves never match a numeric + * predicate (consistent with how `*=` doesn't apply to numbers). + * + * Operator search is greedy on multi-char operators — `[a!=b]` is + * `key=a, op=!=, value=b`, not `key=a!, op==, value=b`. Multi-char + * operators (`!=`, `<=`, `>=`, `*=`, `^=`, `$=`) are tried before + * single-char (`=`, `<`, `>`). + */ +export type PredicateOp = '=' | '!=' | '*=' | '^=' | '$=' | '<' | '<=' | '>' | '>='; + +/** Multi-char first so greedy match wins (`<=` before `<`, etc.). */ +const PREDICATE_OPS: readonly PredicateOp[] = ['!=', '*=', '^=', '$=', '<=', '>=', '<', '>', '=']; + +export function isPredicateSeg(seg: string): boolean { + if (seg.length < 4 || !seg.startsWith('[') || !seg.endsWith(']')) {return false;} + const inner = new Set(seg.slice(1, -1)); + return PREDICATE_OPS.some((op) => inner.has(op)); +} + +export interface PredicateSpec { + readonly key: string; + readonly op: PredicateOp; + readonly value: string; +} + +export function parsePredicateSeg(seg: string): PredicateSpec | null { + if (seg.length < 4 || !seg.startsWith('[') || !seg.endsWith(']')) {return null;} + const inner = seg.slice(1, -1); + // Leftmost operator wins, with multi-char tried before single-char + // at each position. So `[a==b]` parses as `key=a, op==, value==b` + // (leftmost `=`), and `[a<=b]` parses as `key=a, op=<=, value=b` + // (multi-char `<=` beats single `<` at the same position). + for (let i = 1; i < inner.length; i++) { + for (const op of PREDICATE_OPS) { + if (!inner.startsWith(op, i)) {continue;} + if (i + op.length >= inner.length) {continue;} // empty value + return { + key: inner.slice(0, i), + op, + value: inner.slice(i + op.length), + }; + } + } + return null; +} + +/** + * Evaluate a predicate against a string-coerced leaf value. The + * walker fetches the sibling's value and passes it to this helper. + * Returns `false` for non-leaf children (predicate can't compare an + * object/array sibling, so it never matches). + * + * For numeric operators (`<` / `<=` / `>` / `>=`), both `actual` and + * `pred.value` are coerced via `Number()` and checked with + * `Number.isFinite`. Non-numeric leaves never match — this is + * symmetric with how `*=` / `^=` / `$=` don't apply to numbers + * (a number's "string form" comparison would be confusing). + */ +export function evaluatePredicate(actual: string | null, pred: PredicateSpec): boolean { + if (actual === null) {return false;} + switch (pred.op) { + case '=': + return actual === pred.value; + case '!=': + return actual !== pred.value; + case '*=': + return actual.includes(pred.value); + case '^=': + return actual.startsWith(pred.value); + case '$=': + return actual.endsWith(pred.value); + case '<': + case '<=': + case '>': + case '>=': { + const a = Number(actual); + const b = Number(pred.value); + if (!Number.isFinite(a) || !Number.isFinite(b)) {return false;} + switch (pred.op) { + case '<': return a < b; + case '<=': return a <= b; + case '>': return a > b; + case '>=': return a >= b; + } + return false; + } + } + return false; +} + +/** + * Flatten the path into the concrete sub-segment list the per-kind + * resolvers walk against (`[...section.split('.'), ...item.split('.'), + * ...field.split('.')]`). Returned alongside the slot offsets so a + * caller can reconstruct an `OcPath` from a concrete walk by re-packing + * sub-segments back into the original slots. + */ +export interface PathSegmentLayout { + readonly subs: readonly string[]; + /** Number of sub-segments in `section` (0 if absent). */ + readonly sectionLen: number; + /** Number of sub-segments in `item` (0 if absent). */ + readonly itemLen: number; + /** Number of sub-segments in `field` (0 if absent). */ + readonly fieldLen: number; +} + +export function getPathLayout(path: OcPath): PathSegmentLayout { + // Quote-aware split — `slot.split('.')` would shred a quoted segment + // containing a literal `.` (e.g. `"a.b"`) into two sub-segments and + // break the find-walker / repackPath layout contract. Mirror the + // splitter used by `parseOcPath` so downstream walkers see the same + // sub-segment shape on both directions. + const sectionSubs = path.section === undefined ? [] : splitRespectingBrackets(path.section, '.'); + const itemSubs = path.item === undefined ? [] : splitRespectingBrackets(path.item, '.'); + const fieldSubs = path.field === undefined ? [] : splitRespectingBrackets(path.field, '.'); + return { + subs: [...sectionSubs, ...itemSubs, ...fieldSubs], + sectionLen: sectionSubs.length, + itemLen: itemSubs.length, + fieldLen: fieldSubs.length, + }; +} + +/** + * Re-pack a concrete sub-segment list (matching the layout of `pattern`) + * into an `OcPath`. Wildcard segments in `pattern` are replaced by their + * concrete counterparts in `subs`; non-wildcard segments are copied + * verbatim. The slot boundaries (section/item/field) are preserved so + * the output mirrors the input pattern's shape. + * + * Throws if `subs.length !== pattern layout subs length` — the walker + * must always produce a complete concrete path. + */ +export function repackPath( + pattern: OcPath, + subs: readonly string[], +): OcPath { + const layout = getPathLayout(pattern); + if (subs.length !== layout.subs.length) { + throw new OcPathError( + `repack length mismatch: pattern has ${layout.subs.length} sub-segments, got ${subs.length}`, + formatOcPath(pattern), + 'OC_PATH_REPACK_LENGTH', + ); + } + const sectionSubs = subs.slice(0, layout.sectionLen); + const itemSubs = subs.slice(layout.sectionLen, layout.sectionLen + layout.itemLen); + const fieldSubs = subs.slice(layout.sectionLen + layout.itemLen); + return { + file: pattern.file, + ...(sectionSubs.length > 0 ? { section: sectionSubs.join('.') } : {}), + ...(itemSubs.length > 0 ? { item: itemSubs.join('.') } : {}), + ...(fieldSubs.length > 0 ? { field: fieldSubs.join('.') } : {}), + ...(pattern.session !== undefined ? { session: pattern.session } : {}), + }; +} + +function extractSession(queryPart: string): string | undefined { + if (queryPart.length === 0) {return undefined;} + for (const pair of queryPart.split('&')) { + const eqIndex = pair.indexOf('='); + if (eqIndex === -1) {continue;} + const key = pair.slice(0, eqIndex); + const value = pair.slice(eqIndex + 1); + if (key === 'session' && value.length > 0) {return value;} + } + return undefined; +} + +/** + * Split `s` on `delim`, but treat balanced `[...]`, `{...}`, and + * `"..."` regions as opaque — delimiters inside brackets/braces or + * inside double quotes don't trigger splits. + * + * Quoted segments (v1.0 — addresses openclaw#69004, openclaw#76532) + * let path keys contain `/`, `.`, `?`, `&`, `%`, and whitespace + * verbatim: + * + * oc://X/"foo/bar"/baz → key `foo/bar` + * oc://X/agents.defaults.models/"anthropic/claude-opus-4-7"/alias + * + * Inside a quoted segment, `\\` escapes a backslash and `\"` escapes + * a quote. Other backslashes are literal. + * + * Throws `OcPathError` on unbalanced brackets/braces/quotes — malformed + * input is rejected at parse time rather than silently tolerated. + * + * @internal — exported for use by the find walker; not part of the + * public OcPath API surface. + */ +/** + * Find the first occurrence of `ch` at the TOP level of `s` — + * outside any balanced `[...]`, `{...}`, or `"..."` regions. + * Used by `parseOcPath` to locate the query separator (`?`) without + * mistakenly splitting inside a quoted key like `"foo?bar"`. + * + * Returns `-1` if the character is not present at the top level. + */ +export function indexOfTopLevel(s: string, ch: string): number { + let depthBracket = 0; + let depthBrace = 0; + let inQuote = false; + for (let i = 0; i < s.length; i++) { + const c = s[i]; + if (inQuote) { + if (c === '\\' && i + 1 < s.length) { i++; continue; } + if (c === '"') {inQuote = false;} + continue; + } + if (c === '"') { inQuote = true; continue; } + if (c === '[') {depthBracket++;} + else if (c === ']') {depthBracket--;} + else if (c === '{') {depthBrace++;} + else if (c === '}') {depthBrace--;} + if (c === ch && depthBracket === 0 && depthBrace === 0) {return i;} + } + return -1; +} + +export function splitRespectingBrackets(s: string, delim: string, originalInput?: string): string[] { + const out: string[] = []; + let depthBracket = 0; + let depthBrace = 0; + let inQuote = false; + let buf = ''; + for (let i = 0; i < s.length; i++) { + const c = s[i]; + if (inQuote) { + // Inside a quoted region: `\\` and `\"` consume the next char; + // unescaped `"` closes the quote. + if (c === '\\' && i + 1 < s.length) { + buf += c + s[i + 1]; + i++; + continue; + } + if (c === '"') { + inQuote = false; + } + buf += c; + continue; + } + if (c === '"') { + inQuote = true; + buf += c; + continue; + } + if (c === '[') {depthBracket++;} + else if (c === ']') {depthBracket--;} + else if (c === '{') {depthBrace++;} + else if (c === '}') {depthBrace--;} + if (depthBracket < 0 || depthBrace < 0) { + throw new OcPathError( + `Unbalanced bracket/brace in oc:// path: ${originalInput ?? s}`, + originalInput ?? s, + 'OC_PATH_UNBALANCED', + ); + } + if (c === delim && depthBracket === 0 && depthBrace === 0) { + out.push(buf); + buf = ''; + continue; + } + buf += c; + } + if (depthBracket !== 0 || depthBrace !== 0 || inQuote) { + throw new OcPathError( + `Unbalanced bracket/brace/quote in oc:// path: ${originalInput ?? s}`, + originalInput ?? s, + 'OC_PATH_UNBALANCED', + ); + } + out.push(buf); + return out; +} + +/** + * `true` iff `seg` is a fully-quoted segment of the form `"..."`. + * Used by parsers/walkers to dispatch on quoted vs bare segments. + */ +export function isQuotedSeg(seg: string): boolean { + return seg.length >= 2 && seg.startsWith('"') && seg.endsWith('"'); +} + +/** + * Strip surrounding quotes and unescape `\\` / `\"` from a quoted + * segment, yielding the literal content. Inverse of `quoteSeg`. + * + * No-op on bare (unquoted) segments — returns input unchanged. + */ +export function unquoteSeg(seg: string): string { + if (!isQuotedSeg(seg)) {return seg;} + const inner = seg.slice(1, -1); + let out = ''; + for (let i = 0; i < inner.length; i++) { + const c = inner[i]; + if (c === '\\' && i + 1 < inner.length) { + const next = inner[i + 1]; + if (next === '\\' || next === '"') { + out += next; + i++; + continue; + } + } + out += c; + } + return out; +} + +/** + * Quote a literal value for inclusion in a path. If the value contains + * any character that has grammar meaning unquoted (`/`, `.`, `[`, `{`, + * `?`, `&`, `%`, whitespace, or `"`), wrap in quotes and escape + * embedded `\\` / `"`. Otherwise return as-is. + * + * Used by `formatOcPath` to round-trip slot values that came from + * quoted-segment input. + */ +export function quoteSeg(value: string): string { + if (value.length === 0) {return '""';} + const needsQuote = /[/.[\]{}?&%"\s]/.test(value); + if (!needsQuote) {return value;} + const escaped = value.replace(/\\/g, '\\\\').replace(/"/g, '\\"'); + return `"${escaped}"`; +} + +function validateBrackets(seg: string, input: string): void { + // The splitter already enforced balance — this is a defense-in-depth + // pass that also catches stray unmatched brackets in segments that + // didn't trigger a split. Skip characters inside quoted regions + // (`"..."` with `\` escape) so quoted segments containing literal + // `[` / `{` round-trip cleanly. Without this skip, `formatOcPath` + // would emit `"a[b"` (correctly quoted) and `parseOcPath` would + // reject it here as unbalanced — breaking the round-trip. + let depthBracket = 0; + let depthBrace = 0; + let inQuote = false; + let escaped = false; + for (const c of seg) { + if (inQuote) { + if (escaped) { + escaped = false; + } else if (c === '\\') { + escaped = true; + } else if (c === '"') { + inQuote = false; + } + continue; + } + if (c === '"') { + inQuote = true; + continue; + } + if (c === '[') {depthBracket++;} + else if (c === ']') {depthBracket--;} + else if (c === '{') {depthBrace++;} + else if (c === '}') {depthBrace--;} + if (depthBracket < 0 || depthBrace < 0) { + throw new OcPathError( + `Unbalanced bracket/brace in segment "${seg}": ${printable(input)}`, + input, + 'OC_PATH_UNBALANCED', + ); + } + } + if (depthBracket !== 0 || depthBrace !== 0) { + throw new OcPathError( + `Unbalanced bracket/brace in segment "${seg}": ${printable(input)}`, + input, + 'OC_PATH_UNBALANCED', + ); + } +} + +function validateSubSegment(sub: string, input: string): void { + // Empty sub-segment from dotted-form means a stray `.` (e.g. `a..b`). + if (sub.length === 0) { + throw new OcPathError( + `Empty dotted sub-segment in oc:// path: ${printable(input)}`, + input, + 'OC_PATH_EMPTY_SUB_SEGMENT', + ); + } + + // P-004 / P-011 — control characters (including null byte) banned + // in segments. They have no legitimate use in addressing and they + // break downstream consumers (terminals, C strings, log lines). + // Applied to both quoted and unquoted forms — quoting lets you put + // slashes in keys, not control bytes. + if (hasControlChar(sub)) { + throw new OcPathError( + `Control character in oc:// segment "${printable(sub)}": ${printable(input)}`, + input, + 'OC_PATH_CONTROL_CHAR', + ); + } + + // Quoted segments (v1.0): content is verbatim and the rest of these + // checks (whitespace, reserved chars) don't apply — quoting is the + // explicit opt-out from those identifier-shape rules. Skip ahead. + if (isQuotedSeg(sub)) {return;} + + // P-026 — reserved characters that the path grammar itself uses + // (`?` for query, `&` between query pairs, `%` for URL escapes). + // Allowed inside predicate values where they'll be quoted at the + // path level by the bracket containment rule (P-012/P-013). + if (!sub.startsWith('[') && !sub.startsWith('{')) { + if (RESERVED_CHARS_RE.test(sub)) { + throw new OcPathError( + `Reserved character (\`?\` / \`&\` / \`%\`) in oc:// segment "${sub}": ${printable(input)}`, + input, + 'OC_PATH_RESERVED_CHAR', + ); + } + } + + // P-003 — leading or trailing whitespace in identifier-shaped subs. + // Predicate / union segments don't get this check (their values are + // content and may legitimately want spaces). + if (!sub.startsWith('[') && !sub.startsWith('{')) { + if (sub !== sub.trim() || /\s/.test(sub)) { + throw new OcPathError( + `Whitespace in oc:// segment "${sub}": ${printable(input)}`, + input, + 'OC_PATH_WHITESPACE', + ); + } + } + // Bracket grammar: a sub starting with `[` and ending with `]` is + // either a literal sentinel (e.g. `[frontmatter]`) — accepted as-is + // — or a predicate `[keyvalue]`. Mismatched brackets (only one + // side present) are rejected. A predicate-shaped segment (contains + // a comparison operator inside) must parse cleanly. + const startsBracket = sub.startsWith('['); + const endsBracket = sub.endsWith(']'); + if (startsBracket !== endsBracket) { + throw new OcPathError( + `Mismatched bracket in segment "${sub}": ${printable(input)}`, + input, + 'OC_PATH_MALFORMED_PREDICATE', + ); + } + if (startsBracket && endsBracket) { + const inner = sub.slice(1, -1); + if (inner.length === 0) { + throw new OcPathError( + `Empty bracket segment "${sub}": ${printable(input)}`, + input, + 'OC_PATH_MALFORMED_PREDICATE', + ); + } + // If it looks like a predicate (has an operator), validate fully. + const hasOp = ['!=', '*=', '^=', '$=', '<=', '>=', '<', '>', '='].some((op) => inner.includes(op)); + if (hasOp) { + const parsed = parsePredicateSeg(sub); + if (parsed === null || parsed.key.length === 0 || parsed.value.length === 0) { + throw new OcPathError( + `Malformed predicate "${sub}" — must be \`[keyvalue]\` with non-empty key and value: ${printable(input)}`, + input, + 'OC_PATH_MALFORMED_PREDICATE', + ); + } + } + // No operator → literal sentinel segment (e.g. `[frontmatter]`), + // accepted as-is for back-compat. + } + // Brace grammar: union `{a,b,c}`. Mismatched or empty is rejected. + const startsBrace = sub.startsWith('{'); + const endsBrace = sub.endsWith('}'); + if (startsBrace !== endsBrace) { + throw new OcPathError( + `Mismatched brace in segment "${sub}": ${printable(input)}`, + input, + 'OC_PATH_MALFORMED_UNION', + ); + } + if (startsBrace && endsBrace) { + const inner = sub.slice(1, -1); + if (inner.length === 0) { + throw new OcPathError( + `Empty union "${sub}" — must contain at least one alternative: ${printable(input)}`, + input, + 'OC_PATH_MALFORMED_UNION', + ); + } + if (inner.split(',').some((a) => a.length === 0)) { + throw new OcPathError( + `Empty alternative in union "${sub}": ${printable(input)}`, + input, + 'OC_PATH_MALFORMED_UNION', + ); + } + } +} diff --git a/src/oc-path/parse.ts b/src/oc-path/parse.ts new file mode 100644 index 00000000000..066badc0a42 --- /dev/null +++ b/src/oc-path/parse.ts @@ -0,0 +1,294 @@ +/** + * Generic markdown-flavored parser for the 8 workspace files. + * + * Produces a `MdAst` addressing index over `raw` bytes: + * frontmatter (if present), preamble (prose before first H2), and an + * H2-block tree with items/tables/code-blocks extracted for OcPath + * resolution. + * + * **No file-kind discrimination.** Same parse path for SOUL.md / + * AGENTS.md / MEMORY.md / TOOLS.md / IDENTITY.md / USER.md / + * HEARTBEAT.md / SKILL.md. Per-file lint opinions ride downstream + * (`@openclaw/oc-lint` rule packs). + * + * **Byte-fidelity contract**: `raw` is preserved on the AST root so + * `emitMd(parse(raw)) === raw` for every input the parser accepts. + * + * @module @openclaw/oc-path/parse + */ + +import type { + AstBlock, + AstCodeBlock, + AstItem, + AstTable, + Diagnostic, + FrontmatterEntry, + ParseResult, + MdAst, +} from './ast.js'; +import { slugify } from './slug.js'; + +const FENCE = '---'; +const BOM = ''; + +/** + * Parse raw bytes into a `MdAst`. Soft-error policy: never + * throws. Suspicious-but-recoverable inputs (unclosed frontmatter, + * malformed bullet) become diagnostics. + */ +export function parseMd(raw: string): ParseResult { + const diagnostics: Diagnostic[] = []; + + // Strip a leading BOM for parsing convenience; keep the raw input + // intact on the AST so emit can round-trip the BOM if present. + const withoutBom = raw.startsWith(BOM) ? raw.slice(BOM.length) : raw; + const lines = withoutBom.split(/\r?\n/); + + const fm = detectFrontmatter(lines, diagnostics); + const bodyStartLine = fm === null ? 0 : fm.endLine + 1; + const bodyLines = lines.slice(bodyStartLine); + + const { preamble, blocks } = splitH2Blocks(bodyLines, bodyStartLine + 1, diagnostics); + + const ast: MdAst = { + kind: 'md', + raw, + frontmatter: fm?.entries ?? [], + preamble, + blocks, + }; + + return { ast, diagnostics }; +} + +// ---------- Frontmatter --------------------------------------------------- + +interface FrontmatterRange { + readonly entries: readonly FrontmatterEntry[]; + /** 0-based line index of the closing `---`. */ + readonly endLine: number; +} + +function detectFrontmatter( + lines: readonly string[], + diagnostics: Diagnostic[], +): FrontmatterRange | null { + if (lines.length < 2) {return null;} + if (lines[0] !== FENCE) {return null;} + + let closeIndex = -1; + for (let i = 1; i < lines.length; i++) { + if (lines[i] === FENCE) { + closeIndex = i; + break; + } + } + if (closeIndex === -1) { + diagnostics.push({ + line: 1, + message: 'frontmatter opens with --- but never closes', + severity: 'warning', + code: 'OC_FRONTMATTER_UNCLOSED', + }); + return null; + } + + const entries: FrontmatterEntry[] = []; + for (let i = 1; i < closeIndex; i++) { + const line = lines[i]; + if (line.trim().length === 0) {continue;} + const m = /^([a-zA-Z_][a-zA-Z0-9_-]*)\s*:\s*(.*)$/.exec(line); + if (m === null) { + // Could be a list-style continuation (` - item`) for the previous key; + // we don't structurally model lists in frontmatter at the substrate + // layer (lint rules can do that against the raw substring if they + // need to). Skip silently — keeps the parser opinion-free. + continue; + } + entries.push({ + key: m[1], + value: unquote(m[2].trim()), + line: i + 1, + }); + } + + return { entries, endLine: closeIndex }; +} + +function unquote(value: string): string { + if (value.length >= 2) { + const first = value.charCodeAt(0); + const last = value.charCodeAt(value.length - 1); + if (first === last && (first === 34 /* " */ || first === 39 /* ' */)) { + return value.slice(1, -1); + } + } + return value; +} + +// ---------- H2 block split ------------------------------------------------- + +function splitH2Blocks( + bodyLines: readonly string[], + /** 1-based line number of `bodyLines[0]` in the original file. */ + bodyStartLineNum: number, + diagnostics: Diagnostic[], +): { preamble: string; blocks: AstBlock[] } { + // Track code-block state so `##` inside a fenced block doesn't get + // parsed as a heading. + let inCode = false; + const headings: { line: number; text: string }[] = []; + + for (let i = 0; i < bodyLines.length; i++) { + const line = bodyLines[i]; + if (line.startsWith('```')) { + inCode = !inCode; + continue; + } + if (inCode) {continue;} + const m = /^##\s+(\S.*?)\s*$/.exec(line); + if (m !== null) { + headings.push({ line: i, text: m[1] }); + } + } + + if (headings.length === 0) { + return { + preamble: bodyLines.join('\n'), + blocks: [], + }; + } + + const preamble = bodyLines.slice(0, headings[0].line).join('\n'); + const blocks: AstBlock[] = []; + + for (let h = 0; h < headings.length; h++) { + const start = headings[h].line; + const end = h + 1 < headings.length ? headings[h + 1].line : bodyLines.length; + const headingText = headings[h].text; + const blockBodyLines = bodyLines.slice(start + 1, end); + const bodyText = blockBodyLines.join('\n'); + const headingLineNum = bodyStartLineNum + start; + + const items = extractItems(blockBodyLines, headingLineNum + 1, diagnostics); + const tables = extractTables(blockBodyLines, headingLineNum + 1); + const codeBlocks = extractCodeBlocks(blockBodyLines, headingLineNum + 1); + + blocks.push({ + heading: headingText, + slug: slugify(headingText), + line: headingLineNum, + bodyText, + items, + tables, + codeBlocks, + }); + } + + return { preamble, blocks }; +} + +// ---------- Items ---------------------------------------------------------- + +const BULLET_RE = /^(?:[-*+])\s+(.+?)\s*$/; +const KV_RE = /^([^:]+?)\s*:\s*(.+)$/; + +function extractItems( + blockBodyLines: readonly string[], + startLineNum: number, + _diagnostics: Diagnostic[], +): AstItem[] { + const items: AstItem[] = []; + let inCode = false; + + for (let i = 0; i < blockBodyLines.length; i++) { + const line = blockBodyLines[i]; + if (line.startsWith('```')) { + inCode = !inCode; + continue; + } + if (inCode) {continue;} + const m = BULLET_RE.exec(line); + if (m === null) {continue;} + const text = m[1]; + const kvMatch = KV_RE.exec(text); + const item: AstItem = { + text, + slug: kvMatch ? slugify(kvMatch[1]) : slugify(text), + line: startLineNum + i, + ...(kvMatch !== null + ? { kv: { key: kvMatch[1].trim(), value: kvMatch[2].trim() } } + : {}), + }; + items.push(item); + } + + return items; +} + +// ---------- Tables --------------------------------------------------------- + +function extractTables( + blockBodyLines: readonly string[], + startLineNum: number, +): AstTable[] { + const tables: AstTable[] = []; + let i = 0; + while (i < blockBodyLines.length) { + const headerLine = blockBodyLines[i]; + const sepLine = blockBodyLines[i + 1]; + if ( + headerLine.trim().startsWith('|') && + sepLine !== undefined && + /^\s*\|\s*[:-]+(?:\s*\|\s*[:-]+)*\s*\|?\s*$/.test(sepLine) + ) { + const headers = splitTableRow(headerLine); + const rows: string[][] = []; + let j = i + 2; + while (j < blockBodyLines.length && blockBodyLines[j].trim().startsWith('|')) { + rows.push(splitTableRow(blockBodyLines[j])); + j++; + } + tables.push({ headers, rows, line: startLineNum + i }); + i = j; + continue; + } + i++; + } + return tables; +} + +function splitTableRow(line: string): string[] { + const trimmed = line.trim().replace(/^\|/, '').replace(/\|$/, ''); + return trimmed.split('|').map((cell) => cell.trim()); +} + +// ---------- Code blocks --------------------------------------------------- + +function extractCodeBlocks( + blockBodyLines: readonly string[], + startLineNum: number, +): AstCodeBlock[] { + const codeBlocks: AstCodeBlock[] = []; + let i = 0; + while (i < blockBodyLines.length) { + const open = blockBodyLines[i]; + if (open.startsWith('```')) { + const lang = open.slice(3).trim(); + const langField = lang.length > 0 ? lang : null; + const startLine = startLineNum + i; + let j = i + 1; + const bodyLines: string[] = []; + while (j < blockBodyLines.length && !blockBodyLines[j].startsWith('```')) { + bodyLines.push(blockBodyLines[j]); + j++; + } + codeBlocks.push({ lang: langField, text: bodyLines.join('\n'), line: startLine }); + i = j + 1; + continue; + } + i++; + } + return codeBlocks; +} diff --git a/src/oc-path/resolve.ts b/src/oc-path/resolve.ts new file mode 100644 index 00000000000..f27f0048391 --- /dev/null +++ b/src/oc-path/resolve.ts @@ -0,0 +1,113 @@ +/** + * OcPath → AST node resolver. + * + * Resolves an `OcPath` against a `MdAst` and returns the matched + * node (block / item / frontmatter entry / kv field) or `null` if the + * path doesn't match anything. + * + * The address dispatch: + * + * { file } → AST root + * { file, section } → AstBlock with matching slug + * { file, section, item } → AstItem inside that block + * { file, section, item, field } → kv.value of that item if kv.key matches + * + * The `file` segment is informational here — callers verify file + * matching before passing the AST. The resolver doesn't load files; it + * walks an in-memory AST. + * + * @module @openclaw/oc-path/resolve + */ + +import type { AstBlock, AstItem, FrontmatterEntry, MdAst } from './ast.js'; +import type { OcPath } from './oc-path.js'; +import { isOrdinalSeg, isPositionalSeg, parseOrdinalSeg, resolvePositionalSeg } from './oc-path.js'; + +/** + * The resolved target plus a stable description of what kind of node it + * is. Lint rules and doctor fixers branch on `kind`. + */ +export type OcPathMatch = + | { readonly kind: 'root'; readonly node: MdAst } + | { readonly kind: 'frontmatter'; readonly node: FrontmatterEntry } + | { readonly kind: 'block'; readonly node: AstBlock } + | { readonly kind: 'item'; readonly node: AstItem; readonly block: AstBlock } + | { + readonly kind: 'item-field'; + readonly node: AstItem; + readonly block: AstBlock; + /** The kv.value string, surfaced for convenience. */ + readonly value: string; + }; + +/** + * Resolve an `OcPath` against an AST. Returns the matched node or + * `null`. Slugs match case-insensitively against `slugify(input)` — + * "Boundaries" matches a section heading "## Boundaries" because both + * slugify to "boundaries". + * + * Special-case: `OcPath.section === '[frontmatter]'` (literal) addresses + * frontmatter; `field` then names the frontmatter key. This lets a + * single OcPath shape address both prose-tree fields and frontmatter + * fields without growing the tuple. + */ +export function resolveMdOcPath(ast: MdAst, path: OcPath): OcPathMatch | null { + // Frontmatter addressing: oc://FILE/[frontmatter]/key + // The frontmatter key sits at the OcPath `item` slot in this 3-segment + // shape; we accept `field` as a fallback for callers that thread + // 4-segment paths. + if (path.section === '[frontmatter]') { + const key = path.item ?? path.field; + if (key === undefined) {return null;} + const entry = ast.frontmatter.find((e) => e.key === key); + if (entry === undefined) {return null;} + return { kind: 'frontmatter', node: entry }; + } + + // Plain file root address. + if (path.section === undefined) { + return { kind: 'root', node: ast }; + } + + const sectionSlug = path.section.toLowerCase(); + const block = ast.blocks.find((b) => b.slug === sectionSlug); + if (block === undefined) {return null;} + + // Section-only address. + if (path.item === undefined) { + return { kind: 'block', node: block }; + } + + // Item addressing: ordinal (`#N`) > positional (`$first`/`$last`/`-N`) + // > slug. Ordinal uses absolute document order so two items sharing + // a slug stay distinguishable. + let item: AstItem | undefined; + if (isOrdinalSeg(path.item)) { + const n = parseOrdinalSeg(path.item); + if (n === null || n < 0 || n >= block.items.length) {return null;} + item = block.items[n]; + } else if (isPositionalSeg(path.item)) { + const concrete = resolvePositionalSeg(path.item, { + indexable: true, + size: block.items.length, + }); + if (concrete === null) {return null;} + item = block.items[Number(concrete)]; + } else { + const itemSlug = path.item.toLowerCase(); + item = block.items.find((i) => i.slug === itemSlug); + } + if (item === undefined) {return null;} + + // Item-only address. + if (path.field === undefined) { + return { kind: 'item', node: item, block }; + } + + // Item-field address. Requires the item to have a `kv` and the field + // to match the kv key (case-insensitive). A field on an item without + // kv shape is unresolvable — return null rather than guessing. + if (item.kv === undefined) {return null;} + if (item.kv.key.toLowerCase() !== path.field.toLowerCase()) {return null;} + return { kind: 'item-field', node: item, block, value: item.kv.value }; +} diff --git a/src/oc-path/sentinel.ts b/src/oc-path/sentinel.ts new file mode 100644 index 00000000000..b0167138590 --- /dev/null +++ b/src/oc-path/sentinel.ts @@ -0,0 +1,63 @@ +/** + * Substrate-level redaction-sentinel guard. + * + * Closes the `__OPENCLAW_REDACTED__` corruption class by rejecting the + * literal string at the emit boundary. Per-call-site reject rules + * (added piecemeal in [#62281](https://github.com/openclaw/openclaw/issues/62281), + * [#44357](https://github.com/openclaw/openclaw/issues/44357), + * [#13495](https://github.com/openclaw/openclaw/issues/13495), and others) + * caught the symptom; this guard removes the substrate that produced + * the symptom in the first place. + * + * Throwing at emit (not at the consumer) means every code path through + * the substrate is covered, including future call sites we haven't + * audited. + * + * @module @openclaw/oc-path/sentinel + */ + +/** + * The literal string that marks redacted secrets in OpenClaw's runtime + * representation. Writing it to disk is always a bug — the consumer + * was supposed to drop the redacted view, not pass it through to the + * writer. + */ +export const REDACTED_SENTINEL = '__OPENCLAW_REDACTED__'; + +/** + * Thrown when emit detects a `"__OPENCLAW_REDACTED__"` literal in any + * emitted bytes. Callers should treat this as a fatal write error; + * recovering by stripping the sentinel would silently corrupt the + * file. Fail-closed. + * + * `path` is the OcPath-shaped pointer to where the sentinel was + * detected (e.g., `oc://config/plugins.entries.foo.token`). For + * non-config emits, it's the closest meaningful address (frontmatter + * key, section/item slug, etc.) or just the file name. + */ +export class OcEmitSentinelError extends Error { + readonly code = 'OC_EMIT_SENTINEL'; + readonly path: string; + + constructor(path: string) { + super(`emit refused to write "${REDACTED_SENTINEL}" sentinel literal at ${path}`); + this.name = 'OcEmitSentinelError'; + this.path = path; + } +} + +/** + * Throw `OcEmitSentinelError` if `value` contains the redaction + * sentinel anywhere. Substring match (not equality) — a hostile caller + * embedding `prefix__OPENCLAW_REDACTED__suffix` in a leaf must be + * rejected just as forcefully as the bare sentinel; the substring form + * still leaks the marker bytes to disk where downstream scanners flag + * the file as corrupted. + * + * No-op for any non-string input. Used by every leaf-write boundary. + */ +export function guardSentinel(value: unknown, ocPath: string): void { + if (typeof value === 'string' && value.includes(REDACTED_SENTINEL)) { + throw new OcEmitSentinelError(ocPath); + } +} diff --git a/src/oc-path/slug.ts b/src/oc-path/slug.ts new file mode 100644 index 00000000000..7c326673d81 --- /dev/null +++ b/src/oc-path/slug.ts @@ -0,0 +1,43 @@ +/** + * Slug derivation for OcPath section/item addressing. + * + * A slug is the kebab-case lowercase form of a heading or item text: + * "Tool Guidance" → "tool-guidance" + * " Restricted Data " → "restricted-data" + * "deny-rule-1" → "deny-rule-1" (already a slug) + * "API_KEY" → "api-key" + * "Multi-tenant isolation" → "multi-tenant-isolation" + * "deny: secrets" → "deny-secrets" (colon + space → hyphen) + * + * Deterministic + idempotent. Used by parse to pre-compute slugs for + * blocks and items, and by resolveOcPath to match section/item names. + * + * @module @openclaw/oc-path/slug + */ + +const NON_SLUG_CHARS = /[^a-z0-9-]+/g; +const COLLAPSE_HYPHENS = /-+/g; +const TRIM_HYPHENS = /^-+|-+$/g; + +/** + * Convert arbitrary text into a slug usable as an OcPath segment. + * + * Rules: + * 1. Lowercase + * 2. Replace `_` with `-` + * 3. Replace any non-`[a-z0-9-]` runs with a single `-` + * 4. Collapse repeated `-` + * 5. Trim leading/trailing `-` + * + * Returns the empty string for input that has no slug-valid characters + * (e.g., `"!!"` → `""`); callers should treat empty slugs as not + * matchable rather than as wildcards. + */ +export function slugify(text: string): string { + return text + .toLowerCase() + .replace(/_/g, '-') + .replace(NON_SLUG_CHARS, '-') + .replace(COLLAPSE_HYPHENS, '-') + .replace(TRIM_HYPHENS, ''); +} diff --git a/src/oc-path/tests/edit.test.ts b/src/oc-path/tests/edit.test.ts new file mode 100644 index 00000000000..e3e6695ab72 --- /dev/null +++ b/src/oc-path/tests/edit.test.ts @@ -0,0 +1,99 @@ +import { describe, expect, it } from 'vitest'; +import { setMdOcPath as setOcPath } from '../edit.js'; +import { parseOcPath } from '../oc-path.js'; +import { parseMd } from '../parse.js'; + +describe('setOcPath — frontmatter', () => { + it('replaces a frontmatter value', () => { + const raw = `--- +name: github +description: old desc +--- + +Body. +`; + const { ast } = parseMd(raw); + const r = setOcPath( + ast, + parseOcPath('oc://AGENTS.md/[frontmatter]/description'), + 'new desc', + ); + expect(r.ok).toBe(true); + if (r.ok) { + expect(r.ast.raw).toContain('description: new desc'); + expect(r.ast.raw).not.toContain('old desc'); + } + }); + + it('reports unresolved when the key is missing', () => { + const { ast } = parseMd('---\nname: x\n---\n'); + const r = setOcPath( + ast, + parseOcPath('oc://AGENTS.md/[frontmatter]/nope'), + 'x', + ); + expect(r).toEqual({ ok: false, reason: 'unresolved' }); + }); + + it('quotes values that need YAML-escaping', () => { + const { ast } = parseMd('---\nx: a\n---\n'); + const r = setOcPath(ast, parseOcPath('oc://AGENTS.md/[frontmatter]/x'), 'has: colon'); + expect(r.ok).toBe(true); + if (r.ok) {expect(r.ast.raw).toContain('x: "has: colon"');} + }); +}); + +describe('setOcPath — item kv field', () => { + it('replaces an item kv value and reflects it in the rebuilt body', () => { + const raw = `## Boundaries + +- enabled: true +- timeout: 5 +`; + const { ast } = parseMd(raw); + const r = setOcPath( + ast, + parseOcPath('oc://AGENTS.md/boundaries/timeout/timeout'), + '30', + ); + expect(r.ok).toBe(true); + if (r.ok) { + expect(r.ast.raw).toContain('- timeout: 30'); + expect(r.ast.raw).toContain('- enabled: true'); + } + }); + + it('reports no-item-kv for an item without kv shape', () => { + const raw = `## Boundaries + +- plain bullet +`; + const { ast } = parseMd(raw); + const r = setOcPath( + ast, + parseOcPath('oc://AGENTS.md/boundaries/plain-bullet/plain-bullet'), + 'x', + ); + expect(r).toEqual({ ok: false, reason: 'no-item-kv' }); + }); + + it('reports unresolved when section/item is missing', () => { + const { ast } = parseMd('## Other\n\n- foo: bar\n'); + const r = setOcPath( + ast, + parseOcPath('oc://AGENTS.md/missing/foo/foo'), + 'x', + ); + expect(r).toEqual({ ok: false, reason: 'unresolved' }); + }); + + it('reports not-writable for section-only addresses', () => { + const { ast } = parseMd('## Boundaries\n\n- enabled: true\n'); + const r = setOcPath( + ast, + parseOcPath('oc://AGENTS.md/boundaries'), + 'x', + ); + expect(r).toEqual({ ok: false, reason: 'not-writable' }); + }); +}); diff --git a/src/oc-path/tests/emit.test.ts b/src/oc-path/tests/emit.test.ts new file mode 100644 index 00000000000..63f81008ec9 --- /dev/null +++ b/src/oc-path/tests/emit.test.ts @@ -0,0 +1,108 @@ +import { describe, expect, it } from 'vitest'; +import { emitMd } from '../emit.js'; +import { parseMd } from '../parse.js'; +import { OcEmitSentinelError } from '../sentinel.js'; + +describe('emit — round-trip mode (default)', () => { + it('returns the raw bytes byte-for-byte', () => { + const raw = `---\nname: x\n---\n\n## Sec\n\n- a\n- b\n`; + const { ast } = parseMd(raw); + expect(emitMd(ast)).toBe(raw); + }); + + it('round-trips CRLF line endings', () => { + const raw = '## Heading\r\n\r\n- item\r\n'; + const { ast } = parseMd(raw); + expect(emitMd(ast)).toBe(raw); + }); + + it('round-trips a file with no frontmatter and no sections', () => { + const raw = 'Just preamble. No structure.\n'; + const { ast } = parseMd(raw); + expect(emitMd(ast)).toBe(raw); + }); + + it('echoes raw bytes containing the sentinel by default; strict mode rejects', () => { + // Round-trip trusts parsed bytes — see emit.ts policy comment. + // Strict mode (acceptPreExistingSentinel: false) is the opt-in + // path for callers that want LKG-style fingerprint verification. + const raw = '## Section\n\n- token: __OPENCLAW_REDACTED__\n'; + const { ast } = parseMd(raw); + expect(emitMd(ast)).toBe(raw); + expect(() => emitMd(ast, { acceptPreExistingSentinel: false })).toThrow( + OcEmitSentinelError, + ); + }); +}); + +describe('emit — render mode', () => { + it('renders frontmatter + blocks', () => { + const ast = { + kind: "md" as const, + raw: '', + frontmatter: [ + { key: 'name', value: 'github', line: 2 }, + { key: 'description', value: 'gh CLI', line: 3 }, + ], + preamble: '', + blocks: [ + { + heading: 'Tools', + slug: 'tools', + line: 5, + bodyText: '- gh: GitHub', + items: [{ text: 'gh: GitHub', slug: 'gh', line: 7, kv: { key: 'gh', value: 'GitHub' } }], + tables: [], + codeBlocks: [], + }, + ], + }; + const output = emitMd(ast, { mode: 'render' }); + expect(output).toContain('name: github'); + expect(output).toContain('description: gh CLI'); + expect(output).toContain('## Tools'); + expect(output).toContain('- gh: GitHub'); + }); + + it('quotes frontmatter values containing special chars', () => { + const ast = { + kind: "md" as const, + raw: '', + frontmatter: [{ key: 'title', value: 'a: b', line: 2 }], + preamble: '', + blocks: [], + }; + const output = emitMd(ast, { mode: 'render' }); + expect(output).toContain('title: "a: b"'); + }); + + it('throws if a kv item value matches the sentinel', () => { + const ast = { + kind: "md" as const, + raw: '', + frontmatter: [], + preamble: '', + blocks: [ + { + heading: 'Secrets', + slug: 'secrets', + line: 1, + bodyText: '- token: __OPENCLAW_REDACTED__', + items: [ + { + text: 'token: __OPENCLAW_REDACTED__', + slug: 'token', + line: 2, + kv: { key: 'token', value: '__OPENCLAW_REDACTED__' }, + }, + ], + tables: [], + codeBlocks: [], + }, + ], + }; + expect(() => emitMd(ast, { mode: 'render', fileNameForGuard: 'AGENTS.md' })).toThrow( + OcEmitSentinelError, + ); + }); +}); diff --git a/src/oc-path/tests/find.test.ts b/src/oc-path/tests/find.test.ts new file mode 100644 index 00000000000..34ba3345ca8 --- /dev/null +++ b/src/oc-path/tests/find.test.ts @@ -0,0 +1,707 @@ +/** + * `findOcPaths` — multi-match search verb test surface. + * + * Tests cover: `*` single-segment expansion across all 4 kinds; `**` + * recursive descent for jsonc + yaml; the wildcard guard on + * `resolveOcPath` / `setOcPath`; the slot-shape preservation invariant + * (a `*` in the `item` slot produces concrete paths whose `item` field + * carries the matched value). + */ +import { describe, expect, it } from 'vitest'; +import { findOcPaths } from '../find.js'; +import { parseJsonc } from '../jsonc/parse.js'; +import { parseJsonl } from '../jsonl/parse.js'; +import { parseMd } from '../parse.js'; +import { parseYaml } from '../yaml/parse.js'; +import { + formatOcPath, + hasWildcard, + OcPathError, + parseOcPath, +} from '../oc-path.js'; +import { + resolveOcPath, + setOcPath, +} from '../universal.js'; + +// ---------- hasWildcard ---------------------------------------------------- + +describe('hasWildcard', () => { + it('detects single-segment * in any slot', () => { + expect(hasWildcard(parseOcPath('oc://X/*/y'))).toBe(true); + expect(hasWildcard(parseOcPath('oc://X/a/*'))).toBe(true); + expect(hasWildcard(parseOcPath('oc://X/a/b/*'))).toBe(true); + }); + + it('detects ** in any slot', () => { + expect(hasWildcard(parseOcPath('oc://X/**'))).toBe(true); + expect(hasWildcard(parseOcPath('oc://X/a/**/c'))).toBe(true); + }); + + it('detects wildcards inside dotted sub-segments', () => { + expect(hasWildcard(parseOcPath('oc://X/a.*.c'))).toBe(true); + expect(hasWildcard(parseOcPath('oc://X/a.**.c'))).toBe(true); + }); + + it('returns false for plain paths', () => { + expect(hasWildcard(parseOcPath('oc://X/a/b/c'))).toBe(false); + expect(hasWildcard(parseOcPath('oc://X/a.b.c'))).toBe(false); + }); + + it('treats `*` inside an identifier as literal', () => { + expect(hasWildcard(parseOcPath('oc://X/foo*bar'))).toBe(false); + expect(hasWildcard(parseOcPath('oc://X/a*'))).toBe(false); + }); +}); + +// ---------- Wildcard guard on resolveOcPath / setOcPath ------------------- + +describe('wildcard guard', () => { + const yaml = parseYaml('steps:\n - id: a\n command: foo\n').ast; + + it('resolveOcPath throws OcPathError for wildcard pattern (F16)', () => { + // Previously returned `null` — indistinguishable from "path doesn't + // resolve". Now throws with `OC_PATH_WILDCARD_IN_RESOLVE` so the + // CLI / consumers can surface "use findOcPaths" rather than "not + // found". setOcPath uses a discriminated `wildcard-not-allowed` + // reason; this is the resolve-side analogue. + expect(() => + resolveOcPath(yaml, parseOcPath('oc://wf/steps/*/command')), + ).toThrow(/findOcPaths/); + try { + resolveOcPath(yaml, parseOcPath('oc://wf/**')); + expect.fail('should have thrown'); + } catch (err) { + expect(err).toBeInstanceOf(OcPathError); + expect((err as OcPathError).code).toBe('OC_PATH_WILDCARD_IN_RESOLVE'); + } + }); + + it('setOcPath returns wildcard-not-allowed for wildcard pattern', () => { + const r = setOcPath(yaml, parseOcPath('oc://wf/steps/*/command'), 'bar'); + expect(r.ok).toBe(false); + if (!r.ok) {expect(r.reason).toBe('wildcard-not-allowed');} + }); + + it('setOcPath wildcard guard reason carries actionable detail', () => { + const r = setOcPath(yaml, parseOcPath('oc://wf/**'), 'bar'); + expect(r.ok).toBe(false); + if (!r.ok) {expect(r.detail).toContain('findOcPaths');} + }); +}); + +// ---------- findOcPaths — fast-path (no wildcards) ------------------------- + +describe('findOcPaths — non-wildcard fast-path', () => { + it('wraps resolveOcPath result for plain path', () => { + const ast = parseYaml('name: x\n').ast; + const out = findOcPaths(ast, parseOcPath('oc://wf/name')); + expect(out).toHaveLength(1); + expect(out[0].match.kind).toBe('leaf'); + expect(formatOcPath(out[0].path)).toBe('oc://wf/name'); + }); + + it('returns empty for unresolved plain path', () => { + const ast = parseYaml('name: x\n').ast; + expect(findOcPaths(ast, parseOcPath('oc://wf/missing'))).toHaveLength(0); + }); +}); + +// ---------- findOcPaths — YAML -------------------------------------------- + +describe('findOcPaths — YAML kind', () => { + const yaml = parseYaml( + 'steps:\n' + + ' - id: build\n' + + ' command: npm run build\n' + + ' - id: test\n' + + ' command: npm test\n' + + ' - id: lint\n' + + ' command: npm run lint\n' + ).ast; + + it('* in item slot enumerates each step', () => { + const out = findOcPaths(yaml, parseOcPath('oc://wf.lobster/steps/*/command')); + expect(out).toHaveLength(3); + const paths = out.map((m) => formatOcPath(m.path)); + expect(paths).toEqual([ + 'oc://wf.lobster/steps/0/command', + 'oc://wf.lobster/steps/1/command', + 'oc://wf.lobster/steps/2/command', + ]); + }); + + it('preserves slot shape — concrete path has matched value in item slot', () => { + const out = findOcPaths(yaml, parseOcPath('oc://wf/steps/*/id')); + expect(out).toHaveLength(3); + for (const m of out) { + expect(m.path.section).toBe('steps'); + expect(m.path.field).toBe('id'); + expect(m.path.item).toMatch(/^[0-2]$/); + } + }); + + it('returns leaf valueText for each match', () => { + const out = findOcPaths(yaml, parseOcPath('oc://wf/steps/*/id')); + const leaves = out.map((m) => m.match.kind === 'leaf' ? m.match.valueText : null); + expect(leaves).toEqual(['build', 'test', 'lint']); + }); + + it('** descends recursively', () => { + const yaml2 = parseYaml( + 'a:\n b:\n c: deep\n d: shallow\n' + ).ast; + const out = findOcPaths(yaml2, parseOcPath('oc://wf/**')); + // ** matches root + a + a.b + a.b.c + a.d + const leaves = out.filter((m) => m.match.kind === 'leaf').map((m) => m.match.kind === 'leaf' ? m.match.valueText : ''); + expect(leaves.toSorted()).toEqual(['deep', 'shallow']); + }); + + it('returns empty for path that does not match', () => { + const out = findOcPaths(yaml, parseOcPath('oc://wf/missing/*/x')); + expect(out).toHaveLength(0); + }); + + it('every returned path is consumable by resolveOcPath', () => { + const out = findOcPaths(yaml, parseOcPath('oc://wf/steps/*/command')); + for (const m of out) { + const r = resolveOcPath(yaml, m.path); + expect(r).not.toBeNull(); + expect(r?.kind).toBe('leaf'); + } + }); +}); + +// ---------- findOcPaths — JSONC -------------------------------------------- + +describe('findOcPaths — JSONC kind', () => { + const jsonc = parseJsonc( + '{\n' + + ' "plugins": {\n' + + ' "github": {"enabled": true},\n' + + ' "gitlab": {"enabled": false},\n' + + ' "slack": {"enabled": true}\n' + + ' }\n' + + '}\n' + ).ast; + + it('* in item slot enumerates each plugin', () => { + const out = findOcPaths(jsonc, parseOcPath('oc://config/plugins/*/enabled')); + expect(out).toHaveLength(3); + const keys = out.map((m) => m.path.item); + expect(keys.toSorted((a, b) => (a ?? '').localeCompare(b ?? ''))).toEqual(['github', 'gitlab', 'slack']); + }); + + it('returns boolean leaves with leafType', () => { + const out = findOcPaths(jsonc, parseOcPath('oc://config/plugins/*/enabled')); + for (const m of out) { + expect(m.match.kind).toBe('leaf'); + if (m.match.kind === 'leaf') { + expect(m.match.leafType).toBe('boolean'); + } + } + }); +}); + +// ---------- findOcPaths — JSONL -------------------------------------------- + +describe('findOcPaths — JSONL kind', () => { + const jsonl = parseJsonl( + '{"event":"start","userId":"u1"}\n' + + '{"event":"action","userId":"u1"}\n' + + '{"event":"end","userId":"u1"}\n' + ).ast; + + it('* in section slot enumerates each value line', () => { + const out = findOcPaths(jsonl, parseOcPath('oc://session/*/event')); + expect(out).toHaveLength(3); + const events = out.map((m) => m.match.kind === 'leaf' ? m.match.valueText : ''); + expect(events).toEqual(['start', 'action', 'end']); + }); + + it('preserves Lnnn line addresses in concrete paths', () => { + const out = findOcPaths(jsonl, parseOcPath('oc://session/*/event')); + for (const m of out) { + expect(m.path.section).toMatch(/^L\d+$/); + } + }); + + // F8 — line-slot union and predicate. Without these, yaml/jsonc + // walkers handled them but JSONL fell through to `pickLine(addr)` + // which returns null for union/predicate shapes → silent zero matches. + it('union {L1,L2} at line slot enumerates each alternative', () => { + const out = findOcPaths(jsonl, parseOcPath('oc://session/{L1,L3}/event')); + expect(out).toHaveLength(2); + const events = out.map((m) => (m.match.kind === 'leaf' ? m.match.valueText : '')); + expect(events).toEqual(['start', 'end']); + }); + + it('union of positional + literal line addresses works', () => { + const out = findOcPaths(jsonl, parseOcPath('oc://session/{L1,$last}/event')); + expect(out).toHaveLength(2); + const events = out.map((m) => (m.match.kind === 'leaf' ? m.match.valueText : '')); + expect(events).toEqual(['start', 'end']); + }); + + it('predicate [event=action] at line slot filters by top-level field', () => { + const out = findOcPaths(jsonl, parseOcPath('oc://session/[event=action]/userId')); + expect(out).toHaveLength(1); + if (out[0]?.match.kind === 'leaf') {expect(out[0].match.valueText).toBe('u1');} + }); + + it('predicate [event=missing] at line slot matches zero lines (silent zero is correct)', () => { + const out = findOcPaths(jsonl, parseOcPath('oc://session/[event=missing]/userId')); + expect(out).toHaveLength(0); + }); +}); + +// ---------- Positional primitives ($first / $last / -N) ------------------- + +describe('positional primitives — yaml', () => { + const yaml = parseYaml( + 'steps:\n - id: a\n - id: b\n - id: c\n' + ).ast; + + it('resolveOcPath accepts $first', () => { + const m = resolveOcPath(yaml, parseOcPath('oc://wf/steps/$first/id')); + expect(m?.kind).toBe('leaf'); + if (m?.kind === 'leaf') {expect(m.valueText).toBe('a');} + }); + + it('resolveOcPath accepts $last', () => { + const m = resolveOcPath(yaml, parseOcPath('oc://wf/steps/$last/id')); + expect(m?.kind).toBe('leaf'); + if (m?.kind === 'leaf') {expect(m.valueText).toBe('c');} + }); + + it('resolveOcPath accepts negative index', () => { + const m = resolveOcPath(yaml, parseOcPath('oc://wf/steps/-2/id')); + expect(m?.kind).toBe('leaf'); + if (m?.kind === 'leaf') {expect(m.valueText).toBe('b');} + }); + + it('out-of-range positional returns null', () => { + expect(resolveOcPath(yaml, parseOcPath('oc://wf/steps/-99/id'))).toBeNull(); + }); + + it('positional on empty container returns null', () => { + const empty = parseYaml('steps: []\n').ast; + expect(resolveOcPath(empty, parseOcPath('oc://wf/steps/$first/id'))).toBeNull(); + }); + + it('findOcPaths emits concrete index for positional', () => { + const out = findOcPaths(yaml, parseOcPath('oc://wf/steps/$last/id')); + expect(out).toHaveLength(1); + expect(out[0].path.item).toBe('2'); + }); + + it('hasWildcard returns false for positional patterns', () => { + // Positional ≠ wildcard — they resolve deterministically. + expect(hasWildcard(parseOcPath('oc://X/$last/id'))).toBe(false); + expect(hasWildcard(parseOcPath('oc://X/-1/id'))).toBe(false); + }); +}); + +describe('positional primitives — jsonc', () => { + const jsonc = parseJsonc('{"items":[10,20,30]}').ast; + + it('$first picks first array element', () => { + const m = resolveOcPath(jsonc, parseOcPath('oc://config/items/$first')); + expect(m?.kind).toBe('leaf'); + if (m?.kind === 'leaf') {expect(m.valueText).toBe('10');} + }); + + it('$last picks last array element', () => { + const m = resolveOcPath(jsonc, parseOcPath('oc://config/items/$last')); + expect(m?.kind).toBe('leaf'); + if (m?.kind === 'leaf') {expect(m.valueText).toBe('30');} + }); + + it('$first on object picks first-declared key', () => { + const obj = parseJsonc('{"a":1,"b":2,"c":3}').ast; + const m = resolveOcPath(obj, parseOcPath('oc://config/$first')); + expect(m?.kind).toBe('leaf'); + if (m?.kind === 'leaf') {expect(m.valueText).toBe('1');} + }); +}); + +describe('positional primitives — jsonl', () => { + const jsonl = parseJsonl( + '{"event":"start"}\n{"event":"step"}\n{"event":"end"}\n' + ).ast; + + it('$first picks first value line', () => { + const m = resolveOcPath(jsonl, parseOcPath('oc://session/$first/event')); + if (m?.kind === 'leaf') {expect(m.valueText).toBe('start');} + }); + + it('$last picks last value line (existing behavior)', () => { + const m = resolveOcPath(jsonl, parseOcPath('oc://session/$last/event')); + if (m?.kind === 'leaf') {expect(m.valueText).toBe('end');} + }); + + it('-1 is alias for $last', () => { + const m = resolveOcPath(jsonl, parseOcPath('oc://session/-1/event')); + if (m?.kind === 'leaf') {expect(m.valueText).toBe('end');} + }); +}); + +// ---------- Segment unions {a,b,c} ----------------------------------------- + +describe('union segments — yaml', () => { + const yaml = parseYaml( + 'steps:\n' + + ' - id: a\n command: x\n' + + ' - id: b\n run: y\n' + + ' - id: c\n pipeline: z\n' + ).ast; + + it('{command,run} matches each step that has either field', () => { + const out = findOcPaths(yaml, parseOcPath('oc://wf/steps/*/{command,run}')); + expect(out).toHaveLength(2); + const fields = out.map((m) => m.path.field); + expect(fields.toSorted((a, b) => (a ?? '').localeCompare(b ?? ''))).toEqual(['command', 'run']); + }); + + it('preserves the chosen alternative in concrete paths', () => { + const out = findOcPaths(yaml, parseOcPath('oc://wf/steps/*/{command,pipeline}')); + expect(out).toHaveLength(2); + for (const m of out) { + expect(['command', 'pipeline']).toContain(m.path.field); + } + }); + + it('unions on top-level keys', () => { + const yaml2 = parseYaml('a: 1\nb: 2\nc: 3\n').ast; + const out = findOcPaths(yaml2, parseOcPath('oc://X/{a,c}')); + expect(out).toHaveLength(2); + const values = out.map((m) => m.match.kind === 'leaf' ? m.match.valueText : ''); + expect(values.toSorted()).toEqual(['1', '3']); + }); + + it('hasWildcard detects unions (single-match guard rejects them)', () => { + expect(hasWildcard(parseOcPath('oc://X/{a,b}'))).toBe(true); + // F16 — wildcard guard now throws OC_PATH_WILDCARD_IN_RESOLVE + // instead of returning silent null. + expect(() => + resolveOcPath(parseYaml('a: 1\nb: 2\n').ast, parseOcPath('oc://X/{a,b}')), + ).toThrow(/findOcPaths/); + }); +}); + +// ---------- Value predicates [key=value] ---------------------------------- + +describe('value predicates — yaml', () => { + const yaml = parseYaml( + 'steps:\n' + + ' - id: build\n command: npm run build\n' + + ' - id: test\n command: npm test\n' + + ' - id: lint\n command: npm run lint\n' + ).ast; + + it('[id=test] selects the matching step', () => { + const out = findOcPaths(yaml, parseOcPath('oc://wf/steps/[id=test]/command')); + expect(out).toHaveLength(1); + if (out[0].match.kind === 'leaf') { + expect(out[0].match.valueText).toBe('npm test'); + } + expect(out[0].path.item).toBe('1'); // concrete index of the matched step + }); + + it('predicate yields no matches when key/value missing', () => { + expect(findOcPaths(yaml, parseOcPath('oc://wf/steps/[id=nonexistent]/command'))).toHaveLength(0); + }); + + it('predicate concretizes the index — path round-trips through resolveOcPath', () => { + const out = findOcPaths(yaml, parseOcPath('oc://wf/steps/[id=build]/command')); + expect(out).toHaveLength(1); + const resolved = resolveOcPath(yaml, out[0].path); + expect(resolved?.kind).toBe('leaf'); + }); + + it('predicate rejects single-match verbs (treated as wildcard)', () => { + // F16 — wildcard guard throws on predicate too (predicate is a + // multi-match shape; resolveOcPath is single-match only). + expect(() => + resolveOcPath(yaml, parseOcPath('oc://wf/steps/[id=build]')), + ).toThrow(/findOcPaths/); + }); +}); + +describe('quoted segments (v1.0)', () => { + // Evidence: openclaw#69004 — model alias `anthropic/claude-opus-4-7`. + // Slash inside the key has no other syntax that doesn't conflict with + // path-level slash split. + const jsonc = parseJsonc( + '{"agents":{"defaults":{"models":{' + + '"anthropic/claude-opus-4-7":{"alias":"opus47","contextWindow":1000000},' + + '"github-copilot/claude-opus-4.7-1m-internal":{"alias":"copilot-opus-1m","contextWindow":1000000},' + + '"plain":{"alias":"p","contextWindow":200000}' + + '}}}}' + ).ast; + + it('resolveOcPath — quoted segment with literal slash', () => { + const m = resolveOcPath( + jsonc, + parseOcPath('oc://config/agents.defaults.models/"anthropic/claude-opus-4-7"/alias'), + ); + expect(m?.kind).toBe('leaf'); + if (m?.kind === 'leaf') {expect(m.valueText).toBe('opus47');} + }); + + it('resolveOcPath — quoted segment with literal slash AND dot', () => { + const m = resolveOcPath( + jsonc, + parseOcPath('oc://config/agents.defaults.models/"github-copilot/claude-opus-4.7-1m-internal"/alias'), + ); + expect(m?.kind).toBe('leaf'); + if (m?.kind === 'leaf') {expect(m.valueText).toBe('copilot-opus-1m');} + }); + + it('quoted segment with whitespace', () => { + const ast = parseJsonc('{"prompts":{"hello world":"value"}}').ast; + const m = resolveOcPath(ast, parseOcPath('oc://X/prompts/"hello world"')); + expect(m?.kind).toBe('leaf'); + if (m?.kind === 'leaf') {expect(m.valueText).toBe('value');} + }); + + it('quoted segment with embedded escape sequences', () => { + // Key literally contains a backslash and a quote. + const ast = parseJsonc('{"keys":{"a\\\\b":"v1","c\\"d":"v2"}}').ast; + const m1 = resolveOcPath(ast, parseOcPath('oc://X/keys/"a\\\\b"')); + expect(m1?.kind).toBe('leaf'); + if (m1?.kind === 'leaf') {expect(m1.valueText).toBe('v1');} + }); + + it('findOcPaths — wildcard returns paths with quoted keys when needed', () => { + const out = findOcPaths(jsonc, parseOcPath('oc://config/agents.defaults.models/*/alias')); + expect(out).toHaveLength(3); + // The two slash-bearing keys round-trip via quotes; `plain` stays bare. + const items = out.map((m) => m.path.item); + expect(items.some((s) => s === 'plain')).toBe(true); + expect(items.some((s) => s === '"anthropic/claude-opus-4-7"')).toBe(true); + expect(items.some((s) => s === '"github-copilot/claude-opus-4.7-1m-internal"')).toBe(true); + }); + + it('findOcPaths — emitted paths round-trip through resolveOcPath', () => { + const out = findOcPaths(jsonc, parseOcPath('oc://config/agents.defaults.models/*/alias')); + for (const m of out) { + const r = resolveOcPath(jsonc, m.path); + expect(r?.kind).toBe('leaf'); + } + }); + + it('rejects unbalanced quotes at parse time', () => { + expect(() => parseOcPath('oc://X/"unterminated')).toThrow(/Unbalanced/); + }); + + it('control characters still rejected inside quotes', () => { + expect(() => parseOcPath('oc://X/"\x00"')).toThrow(/Control character/); + }); +}); + +describe('value predicates — numeric operators (v1.1)', () => { + // Evidence: openclaw#54383 — compaction fails when maxTokens > model output cap. + // Doctor lint rule: flag any model with maxTokens > 128000 (Anthropic per-request output cap). + const jsonc = parseJsonc( + '{"models":{"providers":{"anthropic":{"models":[' + + '{"id":"claude-sonnet-4-6","contextWindow":1000000,"maxTokens":128000},' + + '{"id":"claude-opus-4-7","contextWindow":1000000,"maxTokens":240000},' + + '{"id":"claude-sonnet-4-7","contextWindow":200000,"maxTokens":64000}' + + ']}}}}' + ).ast; + + // Slot layout: section=`models.providers.anthropic.models`, item=predicate, field=`id`. + const PREFIX = 'oc://config/models.providers.anthropic.models'; + + it('> finds models exceeding the per-request output cap', () => { + const out = findOcPaths(jsonc, parseOcPath(`${PREFIX}/[maxTokens>128000]/id`)); + expect(out).toHaveLength(1); + if (out[0].match.kind === 'leaf') {expect(out[0].match.valueText).toBe('claude-opus-4-7');} + }); + + it('>= matches the boundary', () => { + const out = findOcPaths(jsonc, parseOcPath(`${PREFIX}/[maxTokens>=128000]/id`)); + const ids = out.map((m) => m.match.kind === 'leaf' ? m.match.valueText : ''); + expect(ids.toSorted()).toEqual(['claude-opus-4-7', 'claude-sonnet-4-6']); + }); + + it('< filters small context windows', () => { + const out = findOcPaths(jsonc, parseOcPath(`${PREFIX}/[contextWindow<500000]/id`)); + expect(out).toHaveLength(1); + if (out[0].match.kind === 'leaf') {expect(out[0].match.valueText).toBe('claude-sonnet-4-7');} + }); + + it('<= matches the boundary', () => { + const out = findOcPaths(jsonc, parseOcPath(`${PREFIX}/[contextWindow<=200000]/id`)); + const ids = out.map((m) => m.match.kind === 'leaf' ? m.match.valueText : ''); + expect(ids).toEqual(['claude-sonnet-4-7']); + }); + + it('numeric operator rejects non-numeric leaves silently', () => { + // String leaf, numeric op — predicate doesn't match (no false positive). + const out = findOcPaths(jsonc, parseOcPath(`${PREFIX}/[id>5]/id`)); + expect(out).toHaveLength(0); + }); + + it('rejects numeric predicate value that is not a number', () => { + const out = findOcPaths(jsonc, parseOcPath(`${PREFIX}/[maxTokens>foo]/id`)); + expect(out).toHaveLength(0); + }); +}); + +describe('value predicates — jsonc', () => { + const jsonc = parseJsonc( + '{"plugins":{"github":{"enabled":true,"role":"vcs"},"slack":{"enabled":false,"role":"chat"},"jira":{"enabled":true,"role":"tracker"}}}' + ).ast; + + it('[enabled=true] filters by sibling boolean', () => { + const out = findOcPaths(jsonc, parseOcPath('oc://config/plugins/[enabled=true]/role')); + expect(out).toHaveLength(2); + const roles = out.map((m) => m.match.kind === 'leaf' ? m.match.valueText : ''); + expect(roles.toSorted()).toEqual(['tracker', 'vcs']); + }); +}); + +// ---------- Ordinal addressing (#N) for distinct duplicate slugs ---------- + +describe('ordinal addressing — md', () => { + // Two items with the same slug after slugify (`foo: a` and `foo: b`). + const md = parseMd( + '## Tools\n\n- foo: a\n- foo: b\n- bar: c\n' + ).ast; + + it('#0 picks the first item by document order', () => { + const m = resolveOcPath(md, parseOcPath('oc://AGENTS.md/tools/#0/foo')); + expect(m?.kind).toBe('leaf'); + if (m?.kind === 'leaf') {expect(m.valueText).toBe('a');} + }); + + it('#1 picks the second item — distinct from #0 even though slug collides', () => { + const m = resolveOcPath(md, parseOcPath('oc://AGENTS.md/tools/#1/foo')); + expect(m?.kind).toBe('leaf'); + if (m?.kind === 'leaf') {expect(m.valueText).toBe('b');} + }); + + it('out-of-range #N returns null', () => { + expect(resolveOcPath(md, parseOcPath('oc://AGENTS.md/tools/#99/foo'))).toBeNull(); + }); + + it('findOcPaths disambiguates duplicate-slug items via #N', () => { + const out = findOcPaths(md, parseOcPath('oc://AGENTS.md/tools/*/foo')); + // 2 items have key `foo` (and matching slug); 1 has `bar` (no match). + expect(out).toHaveLength(2); + const items = out.map((m) => m.path.item); + expect(items).toEqual(['#0', '#1']); + const values = out.map((m) => m.match.kind === 'leaf' ? m.match.valueText : ''); + expect(values.toSorted()).toEqual(['a', 'b']); + }); + + it('non-duplicate slug keeps slug form (back-compat)', () => { + const md2 = parseMd('## Tools\n\n- foo: a\n- bar: b\n').ast; + const out = findOcPaths(md2, parseOcPath('oc://AGENTS.md/tools/*')); + const items = out.map((m) => m.path.item); + // Both unique → both stay as slugs. + expect(items.toSorted((a, b) => (a ?? '').localeCompare(b ?? ''))).toEqual(['bar', 'foo']); + }); +}); + +// ---------- findOcPaths — Markdown ----------------------------------------- + +describe('findOcPaths — Markdown kind', () => { + const md = parseMd( + '---\nname: drafter\nrole: writer\n---\n\n' + + '## Tools\n\n' + + '- send_email: enabled\n' + + '- search: enabled\n' + + '- read_email: disabled\n' + ).ast; + + it('* in field slot enumerates frontmatter keys', () => { + const out = findOcPaths(md, parseOcPath('oc://SOUL.md/[frontmatter]/*')); + expect(out).toHaveLength(2); + const keys = out.map((m) => m.path.item ?? m.path.field); + expect(keys.toSorted((a, b) => (a ?? '').localeCompare(b ?? ''))).toEqual(['name', 'role']); + }); + + it('* in field slot enumerates each item kv key', () => { + // Item slug is the kv-key slug ('send_email' → 'send-email'). + const out = findOcPaths(md, parseOcPath('oc://SKILL.md/Tools/send-email/*')); + expect(out).toHaveLength(1); + expect(out[0].match.kind).toBe('leaf'); + if (out[0].match.kind === 'leaf') { + expect(out[0].match.valueText).toBe('enabled'); + } + }); + + it('* in item slot + matching field returns each item whose kv key matches', () => { + // The kv key on `- send_email: enabled` is `send_email`. Pattern + // field='send_email' matches that one item; the other two items + // (search, read_email) have different kv keys. + const out = findOcPaths(md, parseOcPath('oc://SKILL.md/Tools/*/send_email')); + expect(out).toHaveLength(1); + expect(out[0].path.item).toBe('send-email'); + }); + + it('** at section slot matches items at every depth (F14 — cross-kind symmetry)', () => { + // Without the retain-i branch on `**`, walkMd only descended one + // level (i + 1, consumed `**`) — yaml/jsonc walkers also retain + // `**` to keep matching deeper. Lint rules expecting universal + // `**` behavior across kinds (sweep all sections for `risk:`) + // would silently get 0 md matches on a multi-block file. + // + // Pattern `**/send-email` — `**` matches the `tools` block, then + // `send-email` (kebab slug) matches the item under it. Without the + // retain-i branch, the walker descends with `**` consumed at the + // section layer and then can't satisfy the item slot since the + // walker is now inside the wrong block looking for an item slug. + const multiBlock = parseMd( + '## Boundaries\n\n' + + '- never: rm -rf\n\n' + + '## Tools\n\n' + + '- send_email: enabled\n' + + '- search: enabled\n', + ).ast; + const out = findOcPaths(multiBlock, parseOcPath('oc://SOUL.md/**/send-email')); + // The `send-email` item is under the `tools` block. Pin that we + // get at least one match (the substrate's md `**` should reach it). + expect(out.length).toBeGreaterThanOrEqual(1); + const items = out.map((m) => m.path.item).filter((v): v is string => v !== undefined); + expect(items).toContain('send-email'); + }); +}); + +describe('findOcPaths — quoted segments survive expansion (regression: resolve↔find symmetry)', () => { + it('finds keys with slashes when the path quotes them and a sibling wildcards', () => { + // Closes ClawSweeper P2 on PR #78678: when a pattern needs + // expansion (e.g. trailing union or wildcard), the JSONC walker + // bypassed `resolveJsoncOcPath` and compared object keys to the + // raw `cur.value` directly. Patterns with quoted literals + // returned no matches even though resolve worked. This test + // exercises a quoted middle segment + a trailing union. + const raw = `{ + "agents": { + "defaults": { + "models": { + "github-copilot/claude-opus-4-7": { + "alias": "opus-internal", + "contextWindow": 200000 + } + } + } + } +} +`; + const { ast } = parseJsonc(raw); + const out = findOcPaths( + ast, + parseOcPath( + 'oc://config.jsonc/agents.defaults.models/"github-copilot/claude-opus-4-7"/{alias,contextWindow}', + ), + ); + // Both alternatives in the union should match. + expect(out.length).toBe(2); + const fields = out.map((m) => m.path.field).toSorted((a, b) => (a ?? '').localeCompare(b ?? '')); + expect(fields).toEqual(['alias', 'contextWindow']); + }); +}); diff --git a/src/oc-path/tests/fixtures/real/AGENTS.md b/src/oc-path/tests/fixtures/real/AGENTS.md new file mode 100644 index 00000000000..a79f0e6d24d --- /dev/null +++ b/src/oc-path/tests/fixtures/real/AGENTS.md @@ -0,0 +1,17 @@ +## Roles + +- planner: breaks down user goals into tasks +- executor: runs the planned tasks one at a time +- reviewer: checks output before user-visible writes + +## Tools + +- gh: GitHub CLI for issues, PRs, CI +- curl: HTTP client +- rg: ripgrep — fast file content search + +## Boundaries + +- never edit /etc, /usr, or system paths +- always confirm before destructive operations +- read SOUL.md before each session for persona context diff --git a/src/oc-path/tests/fixtures/real/BOOTSTRAP.md b/src/oc-path/tests/fixtures/real/BOOTSTRAP.md new file mode 100644 index 00000000000..c6c266c8c71 --- /dev/null +++ b/src/oc-path/tests/fixtures/real/BOOTSTRAP.md @@ -0,0 +1,17 @@ +# Workspace bootstrap + +This is the first thing the agent reads on a fresh workspace. Once +the user finishes setup (filling in SOUL.md, USER.md, etc.), +BOOTSTRAP.md gets removed and the workspace is "live." + +## Setup checklist + +- review SOUL.md and add personal context +- review USER.md and add role/preferences +- run `openclaw doctor` to verify config + workspace are valid +- confirm the gateway can reach your providers + +## Removing this file + +When the checklist is complete, delete BOOTSTRAP.md. The runtime +detects its absence as "setup complete." diff --git a/src/oc-path/tests/fixtures/real/HEARTBEAT.md b/src/oc-path/tests/fixtures/real/HEARTBEAT.md new file mode 100644 index 00000000000..b9bcbd33838 --- /dev/null +++ b/src/oc-path/tests/fixtures/real/HEARTBEAT.md @@ -0,0 +1,16 @@ +## Every 30m wake + +- check unread Slack DMs in #incidents +- summarize new PR review comments since last wake +- if any test fails on main, surface to user immediately + +## Every 4h wake + +- compile a brief status summary of in-flight tasks +- check Linear for new high-priority issues +- update the daily log entry + +## On user-presence wake + +- briefly orient on what changed since last user interaction +- prioritize incoming items by urgency diff --git a/src/oc-path/tests/fixtures/real/IDENTITY.md b/src/oc-path/tests/fixtures/real/IDENTITY.md new file mode 100644 index 00000000000..cddcd60c940 --- /dev/null +++ b/src/oc-path/tests/fixtures/real/IDENTITY.md @@ -0,0 +1,19 @@ +## Organization + +Example Org / Platform Team + +## Team + +OpenClaw infrastructure & tooling + +## Trust Level + +internal-trusted + +## Region + +us-west + +## Compliance scope + +SOC 2 Type II + FedRAMP Moderate (in audit) diff --git a/src/oc-path/tests/fixtures/real/MEMORY.md b/src/oc-path/tests/fixtures/real/MEMORY.md new file mode 100644 index 00000000000..b0924b2d307 --- /dev/null +++ b/src/oc-path/tests/fixtures/real/MEMORY.md @@ -0,0 +1,18 @@ +--- +scope: project +--- + +## User prefers async communication + +The user has mentioned twice (sessions 2026-04-15 and 2026-04-22) that +they prefer Slack DMs over meetings for short questions. + +## Project uses TypeScript with strict mode + +The codebase enforces `strict: true` and `noUncheckedIndexedAccess`. +Avoid `any`; prefer `unknown` with narrowing. + +## Deploy on Tuesdays only + +Production deploys happen Tue 9am-12pm Pacific. Outside that window, +deploys go to staging and wait for the next Tuesday window. diff --git a/src/oc-path/tests/fixtures/real/SKILL.md b/src/oc-path/tests/fixtures/real/SKILL.md new file mode 100644 index 00000000000..8efafb4c05c --- /dev/null +++ b/src/oc-path/tests/fixtures/real/SKILL.md @@ -0,0 +1,38 @@ +--- +name: github +description: Use gh for GitHub issues, PR status, CI/logs, comments, reviews, releases, and API queries. +tier: T1 +tools: + - gh + - bash +trigger_phrases: + - github + - pr + - issue + - workflow +metadata: { "openclaw": { "emoji": "🐙", "requires": { "bins": ["gh"] } } } +user-invocable: true +--- + +# When to use + +Use this skill when the user asks anything about GitHub: issues, pull +requests, CI runs, releases, comments, code review, or organizational +metadata. Prefer the `gh` CLI over web URLs — `gh` handles auth, +pagination, and structured output natively. + +## Common commands + +```bash +gh pr view 123 # view PR details +gh pr checks 123 # CI status +gh issue list --state open # list open issues +gh run list -L 5 # last 5 workflow runs +gh release create v1.2.3 # cut a release +``` + +## When NOT to use + +- The user's repo is on a non-GitHub forge (GitLab, Gitea, Bitbucket). + Use the appropriate CLI instead. +- Operations that require admin permissions the agent doesn't have. diff --git a/src/oc-path/tests/fixtures/real/SOUL.md b/src/oc-path/tests/fixtures/real/SOUL.md new file mode 100644 index 00000000000..abff7cebc7a --- /dev/null +++ b/src/oc-path/tests/fixtures/real/SOUL.md @@ -0,0 +1,17 @@ +# Persona + +I'm a thoughtful, methodical assistant. I ask clarifying questions +when the user's request is ambiguous, and I'd rather be slightly +slower than confidently wrong. + +## Voice + +- terse and direct +- no filler words +- code snippets > prose when explaining technical things + +## Boundaries + +- never write to /etc or system paths +- always confirm before deleting files +- redact secrets from logs and audit trails diff --git a/src/oc-path/tests/fixtures/real/TOOLS.md b/src/oc-path/tests/fixtures/real/TOOLS.md new file mode 100644 index 00000000000..96940bf02b4 --- /dev/null +++ b/src/oc-path/tests/fixtures/real/TOOLS.md @@ -0,0 +1,21 @@ +## Tool Guidance + +| tool | guidance | +| --- | --- | +| gh | Use for GitHub operations (issues, PRs, CI). Prefer over web. | +| curl | HTTP client. Use --silent for clean output. | +| rg | ripgrep — content search. Faster than grep for code. | +| fd | find replacement. Use over `find` when available. | + +## Allow / Deny + +- enabled: gh +- enabled: curl +- enabled: rg +- enabled: fd +- disabled: legacy-tool + +## Notes + +The agent reads this file at session start; runtime tool gates honor +the `enabled` flags. diff --git a/src/oc-path/tests/fixtures/real/USER.md b/src/oc-path/tests/fixtures/real/USER.md new file mode 100644 index 00000000000..de536bed64a --- /dev/null +++ b/src/oc-path/tests/fixtures/real/USER.md @@ -0,0 +1,16 @@ +## Role + +Senior PM working on AI runtime + governance layers. Reports to a VP-level +stakeholder; coordinates across 4-6 engineering teams. + +## Preferences + +- async-first communication (Slack DMs > meetings) +- terse responses; avoid filler +- code snippets > prose for technical detail +- always include repo:file:line citations for code claims + +## Working hours + +- Mon-Fri 9am-6pm Pacific +- occasional evening for sync with EU teams diff --git a/src/oc-path/tests/jsonc/edit.test.ts b/src/oc-path/tests/jsonc/edit.test.ts new file mode 100644 index 00000000000..b3a2c563048 --- /dev/null +++ b/src/oc-path/tests/jsonc/edit.test.ts @@ -0,0 +1,185 @@ +import { describe, expect, it } from 'vitest'; +import { setJsoncOcPath } from '../../jsonc/edit.js'; +import { emitJsonc } from '../../jsonc/emit.js'; +import { parseJsonc } from '../../jsonc/parse.js'; +import { parseOcPath } from '../../oc-path.js'; + +describe('setJsoncOcPath — value replacement', () => { + const config = `{ + "plugins": { + "entries": { + "github": { + "token": "old" + } + } + } +}`; + + it('replaces a leaf string value', () => { + const { ast } = parseJsonc(config); + const r = setJsoncOcPath( + ast, + parseOcPath('oc://config/plugins.entries.github.token'), + { kind: 'string', value: 'new' }, + ); + expect(r.ok).toBe(true); + if (r.ok) { + const out = emitJsonc(r.ast); + expect(JSON.parse(out)).toEqual({ + plugins: { entries: { github: { token: 'new' } } }, + }); + } + }); + + it('replaces nested objects', () => { + const { ast } = parseJsonc(config); + const r = setJsoncOcPath(ast, parseOcPath('oc://config/plugins.entries'), { + kind: 'object', + entries: [ + { key: 'gitlab', line: 0, value: { kind: 'string', value: 'tok' } }, + ], + }); + expect(r.ok).toBe(true); + if (r.ok) { + expect(JSON.parse(emitJsonc(r.ast))).toEqual({ + plugins: { entries: { gitlab: 'tok' } }, + }); + } + }); + + it('replaces an array element by index', () => { + const { ast } = parseJsonc('{ "limits": [10, 20, 30] }'); + const r = setJsoncOcPath(ast, parseOcPath('oc://config/limits.1'), { + kind: 'number', + value: 99, + }); + expect(r.ok).toBe(true); + if (r.ok) {expect(JSON.parse(emitJsonc(r.ast))).toEqual({ limits: [10, 99, 30] });} + }); + + it('reports unresolved when a key is missing', () => { + const { ast } = parseJsonc(config); + const r = setJsoncOcPath( + ast, + parseOcPath('oc://config/plugins.entries.gitlab'), + { kind: 'string', value: 'x' }, + ); + expect(r).toEqual({ ok: false, reason: 'unresolved' }); + }); + + it('reports no-root on empty AST', () => { + const { ast } = parseJsonc(''); + const r = setJsoncOcPath(ast, parseOcPath('oc://config/x'), { + kind: 'string', + value: 'y', + }); + expect(r).toEqual({ ok: false, reason: 'no-root' }); + }); + + it('does not mutate the original AST', () => { + const { ast } = parseJsonc(config); + const before = JSON.stringify(ast); + setJsoncOcPath(ast, parseOcPath('oc://config/plugins.entries.github.token'), { + kind: 'string', + value: 'new', + }); + expect(JSON.stringify(ast)).toBe(before); + }); +}); + +describe('setJsoncOcPath — positional tokens (round-11 resolve↔edit symmetry)', () => { + // ClawSweeper round-11 P2 — `$first` / `$last` / `-N` resolved on + // the read path but not on the edit path. Pin the new behavior: + // editing through a positional address must reach the same child + // that `resolveJsoncOcPath` would have returned. + it('edits the first array element via $first', () => { + const { ast } = parseJsonc('{ "items": [10, 20, 30] }'); + const r = setJsoncOcPath( + ast, + parseOcPath('oc://config.jsonc/items/$first'), + { kind: 'number', value: 99 }, + ); + expect(r.ok).toBe(true); + if (r.ok) {expect(JSON.parse(emitJsonc(r.ast))).toEqual({ items: [99, 20, 30] });} + }); + + it('edits the last array element via $last', () => { + const { ast } = parseJsonc('{ "items": [10, 20, 30] }'); + const r = setJsoncOcPath( + ast, + parseOcPath('oc://config.jsonc/items/$last'), + { kind: 'number', value: 99 }, + ); + expect(r.ok).toBe(true); + if (r.ok) {expect(JSON.parse(emitJsonc(r.ast))).toEqual({ items: [10, 20, 99] });} + }); + + it('edits the second-to-last array element via -2', () => { + const { ast } = parseJsonc('{ "items": [10, 20, 30] }'); + const r = setJsoncOcPath( + ast, + parseOcPath('oc://config.jsonc/items/-2'), + { kind: 'number', value: 99 }, + ); + expect(r.ok).toBe(true); + if (r.ok) {expect(JSON.parse(emitJsonc(r.ast))).toEqual({ items: [10, 99, 30] });} + }); + + it('edits the first object entry value via $first', () => { + const { ast } = parseJsonc('{ "a": 1, "b": 2, "c": 3 }'); + const r = setJsoncOcPath( + ast, + parseOcPath('oc://config.jsonc/$first'), + { kind: 'number', value: 99 }, + ); + expect(r.ok).toBe(true); + if (r.ok) {expect(JSON.parse(emitJsonc(r.ast))).toEqual({ a: 99, b: 2, c: 3 });} + }); + + it('reports unresolved for $first against an empty array', () => { + const { ast } = parseJsonc('{ "items": [] }'); + const r = setJsoncOcPath( + ast, + parseOcPath('oc://config.jsonc/items/$first'), + { kind: 'number', value: 99 }, + ); + expect(r).toEqual({ ok: false, reason: 'unresolved' }); + }); +}); + +describe('setJsoncOcPath — quoted segments (regression: resolve↔edit symmetry)', () => { + it('edits a key containing slashes via quoted segment', () => { + // The provider/model alias key contains a `/`; without quoting + // it would be split as two segments. `resolveJsoncOcPath` handles + // this; `setJsoncOcPath` MUST handle it the same way or the path + // becomes resolve-only. Closes ClawSweeper P2 on PR #78678. + const raw = `{ + "agents": { + "defaults": { + "models": { + "anthropic/claude-opus-4-7": { "alias": "opus" } + } + } + } +} +`; + const { ast } = parseJsonc(raw); + const r = setJsoncOcPath( + ast, + parseOcPath('oc://config.jsonc/agents.defaults.models/"anthropic/claude-opus-4-7"/alias'), + { kind: 'string', value: 'big-opus' }, + ); + expect(r.ok).toBe(true); + if (r.ok) { + expect(JSON.parse(emitJsonc(r.ast))).toEqual({ + agents: { + defaults: { + models: { + 'anthropic/claude-opus-4-7': { alias: 'big-opus' }, + }, + }, + }, + }); + } + }); +}); diff --git a/src/oc-path/tests/jsonc/emit.test.ts b/src/oc-path/tests/jsonc/emit.test.ts new file mode 100644 index 00000000000..9308abd02fe --- /dev/null +++ b/src/oc-path/tests/jsonc/emit.test.ts @@ -0,0 +1,94 @@ +import { describe, expect, it } from 'vitest'; +import { emitJsonc } from '../../jsonc/emit.js'; +import { parseJsonc } from '../../jsonc/parse.js'; +import { + OcEmitSentinelError, + REDACTED_SENTINEL, +} from '../../sentinel.js'; + +describe('emitJsonc — round-trip', () => { + it('returns raw bytes verbatim by default', () => { + const raw = `{ + // comment is preserved on round-trip + "x": 1, + "y": [/* inline */ 2, 3], +} +`; + const { ast } = parseJsonc(raw); + expect(emitJsonc(ast)).toBe(raw); + }); + + it('echoes pre-existing sentinel bytes by default; strict mode rejects', () => { + // Round-trip trusts parsed bytes — workspace files legitimately + // containing the sentinel (in code blocks, pasted error logs) + // would otherwise become a workspace-wide emit DoS. Strict mode + // is the opt-in path. + const raw = `{ "x": "${REDACTED_SENTINEL}" }`; + const { ast } = parseJsonc(raw); + expect(emitJsonc(ast)).toBe(raw); + expect(() => + emitJsonc(ast, { fileNameForGuard: 'config', acceptPreExistingSentinel: false }), + ).toThrow(OcEmitSentinelError); + }); +}); + +describe('emitJsonc — render mode', () => { + it('re-stringifies the structural tree (no comments)', () => { + const { ast } = parseJsonc('{ /* drop me */ "x": 1, "y": [2, 3] }'); + const out = emitJsonc(ast, { mode: 'render' }); + expect(out).not.toContain('drop me'); + expect(JSON.parse(out)).toEqual({ x: 1, y: [2, 3] }); + }); + + it('throws OcEmitSentinelError when a leaf string is the sentinel', () => { + const ast = parseJsonc('{ "x": "ok" }').ast; + const tampered = { + ...ast, + root: { + kind: 'object' as const, + entries: [ + { + key: 'x', + line: 1, + value: { kind: 'string' as const, value: REDACTED_SENTINEL }, + }, + ], + }, + }; + expect(() => emitJsonc(tampered, { mode: 'render' })).toThrow( + OcEmitSentinelError, + ); + }); + + it('throws when a leaf string EMBEDS the sentinel (prefix/suffix wrap)', () => { + // Regression: prior to this fix, render mode used `value.value === SENTINEL` + // (exact match), so `prefix__OPENCLAW_REDACTED__suffix` slipped through. + // The roundtrip path always used `.includes()` for the same reason — + // render must too. Catches the sentinel-guard bypass class. + const ast = parseJsonc('{ "x": "ok" }').ast; + const tampered = { + ...ast, + root: { + kind: 'object' as const, + entries: [ + { + key: 'x', + line: 1, + value: { + kind: 'string' as const, + value: `prefix-${REDACTED_SENTINEL}-suffix`, + }, + }, + ], + }, + }; + expect(() => emitJsonc(tampered, { mode: 'render' })).toThrow( + OcEmitSentinelError, + ); + }); + + it('renders empty AST as empty string', () => { + const { ast } = parseJsonc(''); + expect(emitJsonc(ast, { mode: 'render' })).toBe(''); + }); +}); diff --git a/src/oc-path/tests/jsonc/parse.test.ts b/src/oc-path/tests/jsonc/parse.test.ts new file mode 100644 index 00000000000..cd6615e9c97 --- /dev/null +++ b/src/oc-path/tests/jsonc/parse.test.ts @@ -0,0 +1,144 @@ +import { describe, expect, it } from 'vitest'; +import { parseJsonc } from '../../jsonc/parse.js'; + +describe('parseJsonc — basic shapes', () => { + it('parses an empty object', () => { + const { ast, diagnostics } = parseJsonc('{}'); + expect(diagnostics).toEqual([]); + expect(ast.kind).toBe('jsonc'); + expect(ast.root).toEqual({ kind: 'object', entries: [], line: 1 }); + }); + + it('parses an empty array', () => { + const { ast, diagnostics } = parseJsonc('[]'); + expect(diagnostics).toEqual([]); + expect(ast.root).toEqual({ kind: 'array', items: [], line: 1 }); + }); + + it('parses an empty input as null root', () => { + const { ast, diagnostics } = parseJsonc(''); + expect(diagnostics).toEqual([]); + expect(ast.root).toBeNull(); + }); + + it('parses scalars', () => { + expect(parseJsonc('42').ast.root).toEqual({ kind: 'number', value: 42, line: 1 }); + expect(parseJsonc('-3.14').ast.root).toEqual({ kind: 'number', value: -3.14, line: 1 }); + expect(parseJsonc('1e3').ast.root).toEqual({ kind: 'number', value: 1000, line: 1 }); + expect(parseJsonc('"hello"').ast.root).toEqual({ kind: 'string', value: 'hello', line: 1 }); + expect(parseJsonc('true').ast.root).toEqual({ kind: 'boolean', value: true, line: 1 }); + expect(parseJsonc('false').ast.root).toEqual({ kind: 'boolean', value: false, line: 1 }); + expect(parseJsonc('null').ast.root).toEqual({ kind: 'null', line: 1 }); + }); + + it('parses nested object/array', () => { + const raw = '{ "plugins": { "entries": ["a", "b"] } }'; + const { ast, diagnostics } = parseJsonc(raw); + expect(diagnostics).toEqual([]); + expect(ast.root).toEqual({ + kind: 'object', + line: 1, + entries: [ + { + key: 'plugins', + line: 1, + value: { + kind: 'object', + line: 1, + entries: [ + { + key: 'entries', + line: 1, + value: { + kind: 'array', + line: 1, + items: [ + { kind: 'string', value: 'a', line: 1 }, + { kind: 'string', value: 'b', line: 1 }, + ], + }, + }, + ], + }, + }, + ], + }); + }); + + it('preserves raw on the AST root for byte-fidelity emit', () => { + const raw = '{\n "x": 1\n}\n'; + const { ast } = parseJsonc(raw); + expect(ast.raw).toBe(raw); + }); +}); + +describe('parseJsonc — JSONC extensions', () => { + it('skips line comments', () => { + const raw = `{ + // comment + "x": 1 // trailing comment + }`; + const { ast, diagnostics } = parseJsonc(raw); + expect(diagnostics).toEqual([]); + expect(ast.root).toEqual({ + kind: 'object', + line: 1, + entries: [{ key: 'x', value: { kind: 'number', value: 1, line: 3 }, line: 3 }], + }); + }); + + it('skips block comments', () => { + const raw = '{ /* hi */ "x": /* mid */ 1 }'; + const { ast, diagnostics } = parseJsonc(raw); + expect(diagnostics).toEqual([]); + expect(ast.root).toEqual({ + kind: 'object', + line: 1, + entries: [{ key: 'x', value: { kind: 'number', value: 1, line: 1 }, line: 1 }], + }); + }); + + it('tolerates trailing commas in objects', () => { + const { ast, diagnostics } = parseJsonc('{ "x": 1, }'); + expect(diagnostics).toEqual([]); + expect(ast.root).toEqual({ + kind: 'object', + line: 1, + entries: [{ key: 'x', value: { kind: 'number', value: 1, line: 1 }, line: 1 }], + }); + }); + + it('tolerates trailing commas in arrays', () => { + const { ast } = parseJsonc('[1, 2, 3,]'); + expect(ast.root).toEqual({ + kind: 'array', + line: 1, + items: [ + { kind: 'number', value: 1, line: 1 }, + { kind: 'number', value: 2, line: 1 }, + { kind: 'number', value: 3, line: 1 }, + ], + }); + }); + + it('handles escape sequences in strings', () => { + const { ast } = parseJsonc('"a\\nb\\tc\\u0041"'); + expect(ast.root).toEqual({ kind: 'string', value: 'a\nb\tcA', line: 1 }); + }); +}); + +describe('parseJsonc — soft errors', () => { + it('returns null root + error diagnostic on unrecoverable input', () => { + const { ast, diagnostics } = parseJsonc('{ "x" 1 }'); + expect(ast.root).toBeNull(); + expect(diagnostics).toHaveLength(1); + expect(diagnostics[0]?.severity).toBe('error'); + }); + + it('warns on trailing input after a valid value', () => { + const { diagnostics } = parseJsonc('1 garbage'); + expect(diagnostics).toHaveLength(1); + expect(diagnostics[0]?.severity).toBe('warning'); + expect(diagnostics[0]?.code).toBe('OC_JSONC_TRAILING_INPUT'); + }); +}); diff --git a/src/oc-path/tests/jsonc/resolve.test.ts b/src/oc-path/tests/jsonc/resolve.test.ts new file mode 100644 index 00000000000..bce034ec1ff --- /dev/null +++ b/src/oc-path/tests/jsonc/resolve.test.ts @@ -0,0 +1,76 @@ +import { describe, expect, it } from 'vitest'; +import { parseJsonc } from '../../jsonc/parse.js'; +import { resolveJsoncOcPath } from '../../jsonc/resolve.js'; +import { parseOcPath } from '../../oc-path.js'; + +function rs(raw: string, ocPath: string) { + const { ast } = parseJsonc(raw); + const path = parseOcPath(ocPath); + return resolveJsoncOcPath(ast, path); +} + +describe('resolveJsoncOcPath', () => { + const config = `{ + "plugins": { + "entries": { + "github": { + "token": "secret", + "enabled": true + } + } + }, + "limits": [10, 20, 30] +}`; + + it('resolves the root when no segments are given', () => { + const m = rs(config, 'oc://config'); + expect(m?.kind).toBe('root'); + }); + + it('walks dotted section paths', () => { + const m = rs(config, 'oc://config/plugins.entries.github.token'); + expect(m?.kind).toBe('object-entry'); + if (m?.kind === 'object-entry') { + expect(m.node.key).toBe('token'); + expect(m.node.value).toMatchObject({ kind: 'string', value: 'secret' }); + } + }); + + it('walks 4-segment slash paths up to OcPath depth limit', () => { + const m = rs(config, 'oc://config/plugins/entries/github'); + expect(m?.kind).toBe('object-entry'); + if (m?.kind === 'object-entry') { + expect(m.node.key).toBe('github'); + } + }); + + it('walks mixed dotted+slash paths', () => { + const m = rs(config, 'oc://config/plugins/entries.github.token'); + expect(m?.kind).toBe('object-entry'); + }); + + it('indexes into arrays via numeric segments', () => { + const m = rs(config, 'oc://config/limits.1'); + expect(m?.kind).toBe('value'); + if (m?.kind === 'value') { + expect(m.node).toMatchObject({ kind: 'number', value: 20 }); + } + }); + + it('returns null for missing keys', () => { + expect(rs(config, 'oc://config/plugins.entries.gitlab')).toBeNull(); + }); + + it('returns null for out-of-bounds array indexes', () => { + expect(rs(config, 'oc://config/limits.99')).toBeNull(); + }); + + it('returns null when descending past a primitive', () => { + expect(rs(config, 'oc://config/plugins.entries.github.token.x')).toBeNull(); + }); + + it('returns null on empty AST', () => { + const { ast } = parseJsonc(''); + expect(resolveJsoncOcPath(ast, parseOcPath('oc://config/x'))).toBeNull(); + }); +}); diff --git a/src/oc-path/tests/jsonl/edit.test.ts b/src/oc-path/tests/jsonl/edit.test.ts new file mode 100644 index 00000000000..fa21c56e01d --- /dev/null +++ b/src/oc-path/tests/jsonl/edit.test.ts @@ -0,0 +1,242 @@ +import { describe, expect, it } from 'vitest'; +import { + appendJsonlOcPath, + setJsonlOcPath, +} from '../../jsonl/edit.js'; +import { emitJsonl } from '../../jsonl/emit.js'; +import { parseJsonl } from '../../jsonl/parse.js'; +import { parseOcPath } from '../../oc-path.js'; + +describe('setJsonlOcPath — value replacement', () => { + const log = '{"event":"start"}\n{"event":"step","n":1}\n{"event":"end"}\n'; + + it('replaces a field on a specific line', () => { + const { ast } = parseJsonl(log); + const r = setJsonlOcPath(ast, parseOcPath('oc://session-events/L2/n'), { + kind: 'number', + value: 42, + }); + expect(r.ok).toBe(true); + if (r.ok) { + const lines = emitJsonl(r.ast).split('\n'); + expect(JSON.parse(lines[1] ?? '')).toEqual({ event: 'step', n: 42 }); + } + }); + + it('replaces an entire line value', () => { + const { ast } = parseJsonl(log); + const r = setJsonlOcPath(ast, parseOcPath('oc://session-events/L2'), { + kind: 'object', + entries: [ + { key: 'event', line: 0, value: { kind: 'string', value: 'replaced' } }, + ], + }); + expect(r.ok).toBe(true); + if (r.ok) { + const lines = emitJsonl(r.ast).split('\n'); + expect(JSON.parse(lines[1] ?? '')).toEqual({ event: 'replaced' }); + } + }); + + it('resolves $last and edits the most recent value line', () => { + const { ast } = parseJsonl(log); + const r = setJsonlOcPath(ast, parseOcPath('oc://session-events/$last/event'), { + kind: 'string', + value: 'final', + }); + expect(r.ok).toBe(true); + if (r.ok) { + const lines = emitJsonl(r.ast).split('\n'); + expect(JSON.parse(lines[2] ?? '')).toEqual({ event: 'final' }); + } + }); + + it('reports unresolved for unknown line addresses', () => { + const { ast } = parseJsonl(log); + const r = setJsonlOcPath(ast, parseOcPath('oc://session-events/L99/x'), { + kind: 'number', + value: 1, + }); + expect(r).toEqual({ ok: false, reason: 'unresolved' }); + }); + + it('reports not-a-value-line when targeting a blank line', () => { + const { ast } = parseJsonl('{"a":1}\n\n{"b":2}\n'); + const r = setJsonlOcPath(ast, parseOcPath('oc://session-events/L2'), { + kind: 'number', + value: 1, + }); + expect(r).toEqual({ ok: false, reason: 'not-a-value-line' }); + }); +}); + +describe('appendJsonlOcPath — session checkpointing primitive', () => { + it('appends to an empty file', () => { + const { ast } = parseJsonl(''); + const next = appendJsonlOcPath(ast, { + kind: 'object', + entries: [{ key: 'event', line: 0, value: { kind: 'string', value: 'start' } }], + }); + expect(emitJsonl(next)).toBe('{"event":"start"}'); + }); + + it('appends to an existing log preserving prior lines', () => { + const { ast } = parseJsonl('{"a":1}\n'); + const next = appendJsonlOcPath(ast, { + kind: 'object', + entries: [{ key: 'b', line: 0, value: { kind: 'number', value: 2 } }], + }); + const out = emitJsonl(next).split('\n'); + expect(out).toHaveLength(2); + expect(JSON.parse(out[1] ?? '')).toEqual({ b: 2 }); + }); +}); + +describe('setJsonlOcPath — line-address positional tokens (resolve↔edit symmetry)', () => { + // Line-address slot must accept every token shape pickLine accepts + // (resolve.ts and find.ts already do). Without `$first` and `-N` here, + // a path that reads under those tokens silently unresolves on write. + const log = '{"event":"start","n":1}\n{"event":"step","n":2}\n{"event":"end","n":3}\n'; + + it('writes under $first line address', () => { + const { ast } = parseJsonl(log); + const r = setJsonlOcPath(ast, parseOcPath('oc://session-events/$first/n'), { + kind: 'number', + value: 99, + }); + expect(r.ok).toBe(true); + if (r.ok) { + const lines = emitJsonl(r.ast).split('\n'); + expect(JSON.parse(lines[0] ?? '')).toEqual({ event: 'start', n: 99 }); + } + }); + + it('writes under -1 line address (alias for last value line)', () => { + const { ast } = parseJsonl(log); + const r = setJsonlOcPath(ast, parseOcPath('oc://session-events/-1/n'), { + kind: 'number', + value: 99, + }); + expect(r.ok).toBe(true); + if (r.ok) { + const lines = emitJsonl(r.ast).split('\n'); + expect(JSON.parse(lines[2] ?? '')).toEqual({ event: 'end', n: 99 }); + } + }); + + it('writes under -2 line address (penultimate value line)', () => { + const { ast } = parseJsonl(log); + const r = setJsonlOcPath(ast, parseOcPath('oc://session-events/-2/n'), { + kind: 'number', + value: 99, + }); + expect(r.ok).toBe(true); + if (r.ok) { + const lines = emitJsonl(r.ast).split('\n'); + expect(JSON.parse(lines[1] ?? '')).toEqual({ event: 'step', n: 99 }); + } + }); + + it('reports unresolved for $first against an empty log', () => { + const { ast } = parseJsonl(''); + const r = setJsonlOcPath(ast, parseOcPath('oc://session-events/$first/n'), { + kind: 'number', + value: 99, + }); + expect(r).toEqual({ ok: false, reason: 'unresolved' }); + }); + + it('reports unresolved for -99 (out-of-range) line address', () => { + const { ast } = parseJsonl(log); + const r = setJsonlOcPath(ast, parseOcPath('oc://session-events/-99/n'), { + kind: 'number', + value: 99, + }); + expect(r).toEqual({ ok: false, reason: 'unresolved' }); + }); +}); + +describe('setJsonlOcPath — positional field tokens (round-11 resolve↔edit symmetry)', () => { + // ClawSweeper round-11 P2 — JSONL line-address `$last` already + // resolved (pickLineIndex), but positional tokens INSIDE a line's + // structural body (item / field) were not. Pin the in-line edit + // path: a `$first` / `$last` / `-N` field-segment must reach the + // same child as resolveJsonlOcPath. + const log = '{"items":[10,20,30],"events":{"a":1,"b":2}}\n'; + + it('edits the first array item on a line via $first', () => { + const { ast } = parseJsonl(log); + const r = setJsonlOcPath( + ast, + parseOcPath('oc://session-events/L1/items/$first'), + { kind: 'number', value: 99 }, + ); + expect(r.ok).toBe(true); + if (r.ok) { + const firstLine = emitJsonl(r.ast).split('\n').find((l) => l.length > 0) ?? ''; + expect(JSON.parse(firstLine)).toEqual({ + items: [99, 20, 30], + events: { a: 1, b: 2 }, + }); + } + }); + + it('edits the last array item on a line via $last', () => { + const { ast } = parseJsonl(log); + const r = setJsonlOcPath( + ast, + parseOcPath('oc://session-events/L1/items/$last'), + { kind: 'number', value: 99 }, + ); + expect(r.ok).toBe(true); + if (r.ok) { + const firstLine = emitJsonl(r.ast).split('\n').find((l) => l.length > 0) ?? ''; + expect(JSON.parse(firstLine)).toEqual({ + items: [10, 20, 99], + events: { a: 1, b: 2 }, + }); + } + }); + + it('edits the first object entry on a line via $first', () => { + const { ast } = parseJsonl(log); + const r = setJsonlOcPath( + ast, + parseOcPath('oc://session-events/L1/events/$first'), + { kind: 'number', value: 99 }, + ); + expect(r.ok).toBe(true); + if (r.ok) { + const firstLine = emitJsonl(r.ast).split('\n').find((l) => l.length > 0) ?? ''; + expect(JSON.parse(firstLine)).toEqual({ + items: [10, 20, 30], + events: { a: 99, b: 2 }, + }); + } + }); +}); + +describe('setJsonlOcPath — quoted field segments (regression: resolve↔edit symmetry)', () => { + it('edits a field key containing a slash via quoted segment', () => { + // Closes ClawSweeper P2 on PR #78678: JSONL resolve unquotes + // bracket-aware segments but the edit path used plain + // `.split('.')`. A path that resolves under `Lnnn` MUST be + // editable through the same address. + const raw = `{"event":"start","detail":{"github/repo":"old"}}\n`; + const { ast } = parseJsonl(raw); + const r = setJsonlOcPath( + ast, + parseOcPath('oc://x.jsonl/L1/detail/"github/repo"'), + { kind: 'string', value: 'new' }, + ); + expect(r.ok).toBe(true); + if (r.ok) { + const lines = emitJsonl(r.ast).split('\n').filter((l) => l.length > 0); + expect(lines).toHaveLength(1); + expect(JSON.parse(lines[0] ?? '')).toEqual({ + event: 'start', + detail: { 'github/repo': 'new' }, + }); + } + }); +}); diff --git a/src/oc-path/tests/jsonl/emit.test.ts b/src/oc-path/tests/jsonl/emit.test.ts new file mode 100644 index 00000000000..b174d6aed1c --- /dev/null +++ b/src/oc-path/tests/jsonl/emit.test.ts @@ -0,0 +1,101 @@ +import { describe, expect, it } from 'vitest'; +import { emitJsonl } from '../../jsonl/emit.js'; +import { parseJsonl } from '../../jsonl/parse.js'; +import { + OcEmitSentinelError, + REDACTED_SENTINEL, +} from '../../sentinel.js'; + +describe('emitJsonl — round-trip', () => { + it('returns raw bytes verbatim by default', () => { + const raw = '{"a":1}\n\n{"b":2}\nthis is malformed\n'; + const { ast } = parseJsonl(raw); + expect(emitJsonl(ast)).toBe(raw); + }); + + it('echoes pre-existing sentinel bytes by default; strict mode rejects', () => { + const raw = `{"a":"${REDACTED_SENTINEL}"}\n`; + const { ast } = parseJsonl(raw); + expect(emitJsonl(ast)).toBe(raw); + expect(() => + emitJsonl(ast, { + fileNameForGuard: 'session-events', + acceptPreExistingSentinel: false, + }), + ).toThrow(OcEmitSentinelError); + }); +}); + +describe('emitJsonl — render mode', () => { + it('rebuilds value lines via JSON-stringify', () => { + const { ast } = parseJsonl('{"a":1}\n{"b":2}\n'); + const out = emitJsonl(ast, { mode: 'render' }); + expect(out.split('\n')).toEqual(['{"a":1}', '{"b":2}']); + }); + + it('preserves blank and malformed lines verbatim in render mode', () => { + const { ast } = parseJsonl('{"a":1}\n\nbroken\n{"b":2}\n'); + const out = emitJsonl(ast, { mode: 'render' }); + expect(out.split('\n')).toEqual(['{"a":1}', '', 'broken', '{"b":2}']); + }); + + it('throws when a value-leaf is the sentinel under render mode', () => { + const ast = parseJsonl('{"a":"ok"}\n').ast; + const tampered = { + ...ast, + lines: [ + { + kind: 'value' as const, + line: 1, + raw: '{"a":"ok"}', + value: { + kind: 'object' as const, + entries: [ + { + key: 'a', + line: 1, + value: { kind: 'string' as const, value: REDACTED_SENTINEL }, + }, + ], + }, + }, + ], + }; + expect(() => emitJsonl(tampered, { mode: 'render' })).toThrow( + OcEmitSentinelError, + ); + }); + + it('throws when a value-leaf EMBEDS the sentinel (prefix/suffix wrap)', () => { + // Regression: prior to this fix, render mode used exact-match + // (`value.value === SENTINEL`), so `prefix__OPENCLAW_REDACTED__suffix` + // slipped through. The contains-check is the right invariant. + const ast = parseJsonl('{"a":"ok"}\n').ast; + const tampered = { + ...ast, + lines: [ + { + kind: 'value' as const, + line: 1, + raw: '{"a":"ok"}', + value: { + kind: 'object' as const, + entries: [ + { + key: 'a', + line: 1, + value: { + kind: 'string' as const, + value: `wrap-${REDACTED_SENTINEL}-end`, + }, + }, + ], + }, + }, + ], + }; + expect(() => emitJsonl(tampered, { mode: 'render' })).toThrow( + OcEmitSentinelError, + ); + }); +}); diff --git a/src/oc-path/tests/jsonl/parse.test.ts b/src/oc-path/tests/jsonl/parse.test.ts new file mode 100644 index 00000000000..88cfbc3117f --- /dev/null +++ b/src/oc-path/tests/jsonl/parse.test.ts @@ -0,0 +1,43 @@ +import { describe, expect, it } from 'vitest'; +import { parseJsonl } from '../../jsonl/parse.js'; + +describe('parseJsonl', () => { + it('parses an empty file as zero lines', () => { + const { ast, diagnostics } = parseJsonl(''); + expect(diagnostics).toEqual([]); + expect(ast.lines).toEqual([]); + }); + + it('parses each line as a JSON value', () => { + const raw = `{"event":"start"} +{"event":"step","n":1} +{"event":"end"} +`; + const { ast, diagnostics } = parseJsonl(raw); + expect(diagnostics).toEqual([]); + expect(ast.lines).toHaveLength(3); + expect(ast.lines[0]?.kind).toBe('value'); + expect(ast.lines[2]?.kind).toBe('value'); + }); + + it('preserves blank lines as blank entries', () => { + const raw = '{"a":1}\n\n{"b":2}\n'; + const { ast, diagnostics } = parseJsonl(raw); + expect(diagnostics).toEqual([]); + expect(ast.lines.map((l) => l.kind)).toEqual(['value', 'blank', 'value']); + }); + + it('flags malformed lines as warnings without aborting', () => { + const raw = '{"a":1}\nthis is not json\n{"b":2}\n'; + const { ast, diagnostics } = parseJsonl(raw); + expect(ast.lines.map((l) => l.kind)).toEqual(['value', 'malformed', 'value']); + expect(diagnostics).toHaveLength(1); + expect(diagnostics[0]?.code).toBe('OC_JSONL_LINE_MALFORMED'); + }); + + it('preserves raw on the AST root for byte-fidelity emit', () => { + const raw = '{"a":1}\n{"b":2}\n'; + const { ast } = parseJsonl(raw); + expect(ast.raw).toBe(raw); + }); +}); diff --git a/src/oc-path/tests/jsonl/resolve.test.ts b/src/oc-path/tests/jsonl/resolve.test.ts new file mode 100644 index 00000000000..9fccd944bf6 --- /dev/null +++ b/src/oc-path/tests/jsonl/resolve.test.ts @@ -0,0 +1,99 @@ +import { describe, expect, it } from 'vitest'; +import { parseJsonl } from '../../jsonl/parse.js'; +import { resolveJsonlOcPath } from '../../jsonl/resolve.js'; +import { parseOcPath } from '../../oc-path.js'; +import { resolveOcPath } from '../../universal.js'; +import { findOcPaths } from '../../find.js'; + +const log = `{"event":"start","ts":1} +{"event":"step","n":1,"result":{"ok":true,"detail":"a"}} + +{"event":"end","ts":99} +`; + +function rs(ocPath: string) { + const { ast } = parseJsonl(log); + return resolveJsonlOcPath(ast, parseOcPath(ocPath)); +} + +describe('resolveJsonlOcPath', () => { + it('returns root when no segments are given', () => { + expect(rs('oc://session-events')?.kind).toBe('root'); + }); + + it('addresses an entire line by line number', () => { + const m = rs('oc://session-events/L1'); + expect(m?.kind).toBe('line'); + }); + + it('addresses fields under a line via item segment', () => { + const m = rs('oc://session-events/L2/event'); + expect(m?.kind).toBe('object-entry'); + if (m?.kind === 'object-entry') { + expect(m.node.value).toMatchObject({ kind: 'string', value: 'step' }); + } + }); + + it('descends via dotted item paths', () => { + const m = rs('oc://session-events/L2/result.ok'); + expect(m?.kind).toBe('object-entry'); + if (m?.kind === 'object-entry') { + expect(m.node.value).toMatchObject({ kind: 'boolean', value: true }); + } + }); + + it('resolves $last to the most recent value line', () => { + const m = rs('oc://session-events/$last/event'); + expect(m?.kind).toBe('object-entry'); + if (m?.kind === 'object-entry') { + expect(m.node.value).toMatchObject({ kind: 'string', value: 'end' }); + } + }); + + it('returns null for unknown line addresses', () => { + expect(rs('oc://session-events/L99')).toBeNull(); + expect(rs('oc://session-events/garbage')).toBeNull(); + }); + + it('returns null when descending into a blank line', () => { + expect(rs('oc://session-events/L3/anything')).toBeNull(); + }); +}); + +describe('resolveJsonlToUniversal — file-relative line metadata (regression)', () => { + // Regression: surfaced via the openclaw-path CLI scenario run on + // a multi-line session.jsonl. Every match returned `line: 1` + // because the inside-line jsonc parser numbers from 1 within each + // line's bytes; the universal resolve was preferring that local + // number over the JsonlLine's file-relative line. + + const log = [ + '{"event":"start"}', // line 1 + '{"event":"step","n":1}', // line 2 + '{"event":"step","n":2}', // line 3 + '{"event":"end"}', // line 4 + '', // line 5 (blank) + ].join('\n'); + + it('resolves L2/event with line=2 (not 1)', () => { + const { ast } = parseJsonl(log); + const m = resolveOcPath(ast, parseOcPath('oc://session.jsonl/L2/event')); + expect(m).not.toBeNull(); + if (m !== null) {expect(m.line).toBe(2);} + }); + + it('resolves L4/event with line=4', () => { + const { ast } = parseJsonl(log); + const m = resolveOcPath(ast, parseOcPath('oc://session.jsonl/L4/event')); + expect(m).not.toBeNull(); + if (m !== null) {expect(m.line).toBe(4);} + }); + + it('findOcPaths over wildcard surfaces correct file-relative lines', () => { + const { ast } = parseJsonl(log); + const matches = findOcPaths(ast, parseOcPath('oc://session.jsonl/*/event')); + expect(matches).toHaveLength(4); + const lines = matches.map((m) => m.match.line); + expect(lines).toEqual([1, 2, 3, 4]); + }); +}); diff --git a/src/oc-path/tests/oc-path.test.ts b/src/oc-path/tests/oc-path.test.ts new file mode 100644 index 00000000000..10707c0febb --- /dev/null +++ b/src/oc-path/tests/oc-path.test.ts @@ -0,0 +1,136 @@ +import { describe, expect, it } from 'vitest'; +import { + OcPathError, + formatOcPath, + isValidOcPath, + parseOcPath, +} from '../oc-path.js'; + +describe('parseOcPath', () => { + it('parses file-only path', () => { + expect(parseOcPath('oc://SOUL.md')).toEqual({ file: 'SOUL.md' }); + }); + + it('parses file + section', () => { + expect(parseOcPath('oc://SOUL.md/Boundaries')).toEqual({ + file: 'SOUL.md', + section: 'Boundaries', + }); + }); + + it('parses file + section + item', () => { + expect(parseOcPath('oc://SOUL.md/Boundaries/deny-rule-1')).toEqual({ + file: 'SOUL.md', + section: 'Boundaries', + item: 'deny-rule-1', + }); + }); + + it('parses file + section + item + field', () => { + expect(parseOcPath('oc://SOUL.md/Boundaries/deny-rule-1/risk')).toEqual({ + file: 'SOUL.md', + section: 'Boundaries', + item: 'deny-rule-1', + field: 'risk', + }); + }); + + it('parses session query', () => { + expect(parseOcPath('oc://SOUL.md?session=daily-cron')).toEqual({ + file: 'SOUL.md', + session: 'daily-cron', + }); + }); + + it('rejects missing scheme', () => { + expectOcPathError(() => parseOcPath('SOUL.md'), 'OC_PATH_MISSING_SCHEME'); + }); + + it('rejects empty path after scheme', () => { + expectOcPathError(() => parseOcPath('oc://'), 'OC_PATH_EMPTY'); + }); + + it('rejects empty segment', () => { + expectOcPathError(() => parseOcPath('oc://SOUL.md//deny-rule-1'), 'OC_PATH_EMPTY_SEGMENT'); + }); + + it('rejects too-deep nesting', () => { + expectOcPathError(() => parseOcPath('oc://SOUL.md/a/b/c/d/e'), 'OC_PATH_TOO_DEEP'); + }); + + it('rejects non-string input', () => { + expectOcPathError(() => parseOcPath(123 as unknown as string), 'OC_PATH_NOT_STRING'); + }); +}); + +function expectOcPathError(fn: () => unknown, expectedCode: string): void { + try { + fn(); + expect.fail(`expected OcPathError with code "${expectedCode}" but no error thrown`); + } catch (err) { + expect(err).toBeInstanceOf(OcPathError); + expect((err as OcPathError).code).toBe(expectedCode); + } +} + +describe('formatOcPath', () => { + it('round-trips file-only', () => { + expect(formatOcPath({ file: 'SOUL.md' })).toBe('oc://SOUL.md'); + }); + + it('round-trips full nesting', () => { + expect( + formatOcPath({ + file: 'SOUL.md', + section: 'Boundaries', + item: 'deny-rule-1', + field: 'risk', + }), + ).toBe('oc://SOUL.md/Boundaries/deny-rule-1/risk'); + }); + + it('round-trips session', () => { + expect(formatOcPath({ file: 'SOUL.md', session: 'cron' })).toBe( + 'oc://SOUL.md?session=cron', + ); + }); + + it('rejects empty file', () => { + expectOcPathError(() => formatOcPath({ file: '' }), 'OC_PATH_FILE_REQUIRED'); + }); + + it('rejects item without section', () => { + expectOcPathError(() => formatOcPath({ file: 'F.md', item: 'i' }), 'OC_PATH_NESTING'); + }); +}); + +describe('round-trip', () => { + const cases = [ + 'oc://SOUL.md', + 'oc://SOUL.md/Boundaries', + 'oc://SOUL.md/Boundaries/deny-rule-1', + 'oc://SOUL.md/Boundaries/deny-rule-1/risk', + 'oc://SOUL.md?session=daily', + 'oc://AGENTS.md/Tools/gh/risk', + ]; + for (const input of cases) { + it(`formatOcPath(parseOcPath("${input}")) === "${input}"`, () => { + expect(formatOcPath(parseOcPath(input))).toBe(input); + }); + } +}); + +describe('isValidOcPath', () => { + it('returns true for valid paths', () => { + expect(isValidOcPath('oc://SOUL.md')).toBe(true); + expect(isValidOcPath('oc://SOUL.md/Boundaries')).toBe(true); + }); + + it('returns false for invalid paths', () => { + expect(isValidOcPath('SOUL.md')).toBe(false); + expect(isValidOcPath('oc://')).toBe(false); + expect(isValidOcPath(null)).toBe(false); + expect(isValidOcPath(undefined)).toBe(false); + expect(isValidOcPath(42)).toBe(false); + }); +}); diff --git a/src/oc-path/tests/parse.test.ts b/src/oc-path/tests/parse.test.ts new file mode 100644 index 00000000000..10196c3d6e5 --- /dev/null +++ b/src/oc-path/tests/parse.test.ts @@ -0,0 +1,203 @@ +import { describe, expect, it } from 'vitest'; +import { parseMd } from '../parse.js'; + +describe('parseMd — frontmatter', () => { + it('parses simple frontmatter', () => { + const raw = `--- +name: github +description: gh CLI for issues, PRs, runs +--- + +Body text. +`; + const { ast, diagnostics } = parseMd(raw); + expect(diagnostics).toEqual([]); + expect(ast.frontmatter).toEqual([ + { key: 'name', value: 'github', line: 2 }, + { key: 'description', value: 'gh CLI for issues, PRs, runs', line: 3 }, + ]); + }); + + it('handles no frontmatter', () => { + const raw = `## First section\n\nContent.\n`; + const { ast } = parseMd(raw); + expect(ast.frontmatter).toEqual([]); + expect(ast.preamble).toBe(''); + expect(ast.blocks.length).toBe(1); + }); + + it('emits diagnostic for unclosed frontmatter', () => { + const raw = `--- +name: github +description: never closes + +Body. +`; + const { diagnostics } = parseMd(raw); + expect(diagnostics).toContainEqual( + expect.objectContaining({ code: 'OC_FRONTMATTER_UNCLOSED' }), + ); + }); + + it('strips quotes from values', () => { + const raw = `--- +title: "Hello world" +hint: 'quoted' +--- +`; + const { ast } = parseMd(raw); + expect(ast.frontmatter[0]?.value).toBe('Hello world'); + expect(ast.frontmatter[1]?.value).toBe('quoted'); + }); +}); + +describe('parseMd — H2 blocks', () => { + it('splits sections', () => { + const raw = `Preamble text. + +## First + +Body of first. + +## Second + +Body of second. +`; + const { ast } = parseMd(raw); + expect(ast.preamble.trim()).toBe('Preamble text.'); + expect(ast.blocks.length).toBe(2); + expect(ast.blocks[0]?.heading).toBe('First'); + expect(ast.blocks[0]?.slug).toBe('first'); + expect(ast.blocks[1]?.heading).toBe('Second'); + }); + + it('preserves line numbers (1-based)', () => { + const raw = `Line 1 +## Heading at line 2 +Line 3 +`; + const { ast } = parseMd(raw); + expect(ast.blocks[0]?.line).toBe(2); + }); + + it('does NOT split on `## ` inside fenced code blocks', () => { + const raw = `## Real section + +\`\`\`md +## Not a heading +content +\`\`\` + +## Another section +`; + const { ast } = parseMd(raw); + expect(ast.blocks.map((b) => b.heading)).toEqual(['Real section', 'Another section']); + }); +}); + +describe('parseMd — items', () => { + it('extracts plain bullet items', () => { + const raw = `## Boundaries + +- never write to /etc +- always confirm before deleting +`; + const { ast } = parseMd(raw); + expect(ast.blocks[0]?.items.length).toBe(2); + expect(ast.blocks[0]?.items[0]?.text).toBe('never write to /etc'); + expect(ast.blocks[0]?.items[0]?.kv).toBeUndefined(); + }); + + it('extracts kv items', () => { + const raw = `## Tools + +- gh: GitHub CLI +- curl: HTTP client +`; + const { ast } = parseMd(raw); + expect(ast.blocks[0]?.items[0]?.kv).toEqual({ key: 'gh', value: 'GitHub CLI' }); + expect(ast.blocks[0]?.items[0]?.slug).toBe('gh'); + expect(ast.blocks[0]?.items[1]?.kv).toEqual({ key: 'curl', value: 'HTTP client' }); + }); + + it('does NOT extract bullets inside fenced code', () => { + const raw = `## Section + +\`\`\` +- not a bullet +\`\`\` + +- real bullet +`; + const { ast } = parseMd(raw); + expect(ast.blocks[0]?.items.length).toBe(1); + expect(ast.blocks[0]?.items[0]?.text).toBe('real bullet'); + }); +}); + +describe('parseMd — tables', () => { + it('extracts a simple table', () => { + const raw = `## Tool Guidance + +| tool | guidance | +| --- | --- | +| gh | use for GitHub | +| curl | HTTP client | +`; + const { ast } = parseMd(raw); + const table = ast.blocks[0]?.tables[0]; + expect(table).toBeDefined(); + expect(table?.headers).toEqual(['tool', 'guidance']); + expect(table?.rows.length).toBe(2); + expect(table?.rows[0]).toEqual(['gh', 'use for GitHub']); + }); +}); + +describe('parseMd — code blocks', () => { + it('extracts a fenced code block', () => { + const raw = `## Examples + +\`\`\`ts +const x = 1; +\`\`\` +`; + const { ast } = parseMd(raw); + expect(ast.blocks[0]?.codeBlocks[0]).toMatchObject({ + lang: 'ts', + text: 'const x = 1;', + }); + }); + + it('handles unlanguaged fences', () => { + const raw = `## Block + +\`\`\` +plain text +\`\`\` +`; + const { ast } = parseMd(raw); + expect(ast.blocks[0]?.codeBlocks[0]?.lang).toBeNull(); + }); +}); + +describe('parseMd — byte-fidelity', () => { + it('preserves raw on the AST', () => { + const raw = `---\nname: x\n---\n\n## Sec\n\n- a\n- b\n`; + const { ast } = parseMd(raw); + expect(ast.raw).toBe(raw); + }); + + it('preserves BOM in raw but ignores it for parsing', () => { + const raw = '## Heading\n'; + const { ast } = parseMd(raw); + expect(ast.raw).toBe(raw); + expect(ast.blocks[0]?.heading).toBe('Heading'); + }); + + it('handles CRLF line endings', () => { + const raw = '## Heading\r\n\r\n- item\r\n'; + const { ast } = parseMd(raw); + expect(ast.blocks[0]?.heading).toBe('Heading'); + expect(ast.blocks[0]?.items[0]?.text).toBe('item'); + }); +}); diff --git a/src/oc-path/tests/resolve.test.ts b/src/oc-path/tests/resolve.test.ts new file mode 100644 index 00000000000..8b9abd358a0 --- /dev/null +++ b/src/oc-path/tests/resolve.test.ts @@ -0,0 +1,100 @@ +import { describe, expect, it } from 'vitest'; +import { parseMd } from '../parse.js'; +import { resolveMdOcPath as resolveOcPath } from '../resolve.js'; + +const SAMPLE = `--- +name: github +description: gh CLI +--- + +Preamble. + +## Boundaries + +- never write to /etc +- deny: secrets + +## Tools + +- gh: GitHub CLI +- curl: HTTP client +`; + +describe('resolveOcPath', () => { + const { ast } = parseMd(SAMPLE); + + it('resolves root', () => { + const m = resolveOcPath(ast, { file: 'AGENTS.md' }); + expect(m?.kind).toBe('root'); + }); + + it('resolves block by slug', () => { + const m = resolveOcPath(ast, { file: 'AGENTS.md', section: 'boundaries' }); + expect(m?.kind).toBe('block'); + if (m?.kind === 'block') { + expect(m.node.heading).toBe('Boundaries'); + } + }); + + it('resolves item by slug', () => { + const m = resolveOcPath(ast, { + file: 'AGENTS.md', + section: 'tools', + item: 'gh', + }); + expect(m?.kind).toBe('item'); + if (m?.kind === 'item') { + expect(m.node.kv?.value).toBe('GitHub CLI'); + expect(m.block.heading).toBe('Tools'); + } + }); + + it('resolves item-field via kv', () => { + const m = resolveOcPath(ast, { + file: 'AGENTS.md', + section: 'tools', + item: 'gh', + field: 'gh', + }); + expect(m?.kind).toBe('item-field'); + if (m?.kind === 'item-field') { + expect(m.value).toBe('GitHub CLI'); + } + }); + + it('resolves frontmatter via [frontmatter] sentinel section', () => { + const m = resolveOcPath(ast, { + file: 'AGENTS.md', + section: '[frontmatter]', + field: 'name', + }); + expect(m?.kind).toBe('frontmatter'); + if (m?.kind === 'frontmatter') { + expect(m.node.value).toBe('github'); + } + }); + + it('returns null for unknown section', () => { + const m = resolveOcPath(ast, { file: 'AGENTS.md', section: 'nonexistent' }); + expect(m).toBeNull(); + }); + + it('returns null for unknown item', () => { + const m = resolveOcPath(ast, { + file: 'AGENTS.md', + section: 'tools', + item: 'nonexistent', + }); + expect(m).toBeNull(); + }); + + it('returns null for field on non-kv item', () => { + const m = resolveOcPath(ast, { + file: 'AGENTS.md', + section: 'boundaries', + item: 'never-write-to-etc', + field: 'risk', + }); + expect(m).toBeNull(); + }); +}); diff --git a/src/oc-path/tests/scenarios/append-multi-agent.test.ts b/src/oc-path/tests/scenarios/append-multi-agent.test.ts new file mode 100644 index 00000000000..3b45afeb740 --- /dev/null +++ b/src/oc-path/tests/scenarios/append-multi-agent.test.ts @@ -0,0 +1,120 @@ +/** + * Wave 20 — JSONL append + multi-agent session sim. + * + * Substrate guarantee: `appendJsonlOcPath(ast, value)` returns a new AST + * with the value appended as a new line. Single-writer model at the + * substrate; concurrent-append safety lives in the LKG tracker layer + * (PR-4) on top of git's three-way merge. + * + * Append for other kinds (jsonc array push, md item-to-section) was + * removed from the substrate — those are domain operations that ride + * on top of `setXxxOcPath` at the doctor / tracker layer, where the + * value shapes are domain-defined. + */ +import { describe, expect, it } from 'vitest'; +import { emitJsonl } from '../../jsonl/emit.js'; +import { appendJsonlOcPath } from '../../jsonl/edit.js'; +import { parseJsonl } from '../../jsonl/parse.js'; +import type { JsoncValue } from '../../jsonc/ast.js'; + +function event(name: string, n: number): JsoncValue { + return { + kind: 'object', + entries: [ + { key: 'event', line: 0, value: { kind: 'string', value: name } }, + { key: 'n', line: 0, value: { kind: 'number', value: n } }, + ], + }; +} + +describe('wave-20 jsonl append + multi-agent session sim', () => { + it('A-01 single agent appends 100 events in order', () => { + let ast = parseJsonl('').ast; + for (let i = 0; i < 100; i++) { + ast = appendJsonlOcPath(ast, event('step', i)); + } + const lines = emitJsonl(ast).split('\n').filter((l) => l.length > 0); + expect(lines).toHaveLength(100); + expect(JSON.parse(lines[0] ?? '')).toEqual({ event: 'step', n: 0 }); + expect(JSON.parse(lines[99] ?? '')).toEqual({ event: 'step', n: 99 }); + }); + + it('A-02 two agents alternating appends preserve interleave order', () => { + let ast = parseJsonl('').ast; + for (let i = 0; i < 10; i++) { + const agent = i % 2 === 0 ? 'a' : 'b'; + ast = appendJsonlOcPath(ast, event(agent, i)); + } + const lines = emitJsonl(ast).split('\n').filter((l) => l.length > 0); + expect(lines).toHaveLength(10); + for (let i = 0; i < 10; i++) { + const expected = i % 2 === 0 ? 'a' : 'b'; + expect(JSON.parse(lines[i] ?? '').event).toBe(expected); + } + }); + + it('A-03 append after a malformed line preserves both', () => { + let ast = parseJsonl('{"a":1}\nbroken\n').ast; + ast = appendJsonlOcPath(ast, event('start', 1)); + const out = emitJsonl(ast); + expect(out).toContain('broken'); + expect(out).toContain('"event":"start"'); + }); + + it('A-04 append to empty file produces a single value line', () => { + let ast = parseJsonl('').ast; + ast = appendJsonlOcPath(ast, event('first', 0)); + const out = emitJsonl(ast); + expect(JSON.parse(out)).toEqual({ event: 'first', n: 0 }); + }); + + it('A-05 append assigns line numbers monotonically', () => { + let ast = parseJsonl('').ast; + ast = appendJsonlOcPath(ast, event('a', 0)); + ast = appendJsonlOcPath(ast, event('b', 1)); + ast = appendJsonlOcPath(ast, event('c', 2)); + expect(ast.lines.map((l) => l.line)).toEqual([1, 2, 3]); + }); + + it('A-06 append after blank lines preserves line-number gaps correctly', () => { + let ast = parseJsonl('{"a":1}\n\n\n').ast; + ast = appendJsonlOcPath(ast, event('after', 0)); + // Existing lines: L1 value, L2 blank, L3 blank. Appended line is L4. + expect(ast.lines.length).toBe(4); + expect(ast.lines[3]?.line).toBe(4); + }); + + it('A-07 1000-event session sim is deterministic', () => { + let ast = parseJsonl('').ast; + for (let i = 0; i < 1000; i++) { + ast = appendJsonlOcPath(ast, event('e', i)); + } + const lines = emitJsonl(ast).split('\n').filter((l) => l.length > 0); + expect(lines).toHaveLength(1000); + expect(JSON.parse(lines[999] ?? '').n).toBe(999); + }); + + it('A-08 append is non-mutating on the input AST', () => { + const ast = parseJsonl('{"a":1}\n').ast; + const before = JSON.stringify(ast); + appendJsonlOcPath(ast, event('x', 0)); + expect(JSON.stringify(ast)).toBe(before); + }); + + it('A-09 append preserves prior raw bytes (renders new tail)', () => { + let ast = parseJsonl('{"a":1}\n').ast; + ast = appendJsonlOcPath(ast, event('b', 1)); + const out = emitJsonl(ast); + const lines = out.split('\n'); + // First line content unchanged. + expect(lines[0]).toContain('"a":1'); + // Second line is the new event. + expect(JSON.parse(lines[1] ?? '')).toEqual({ event: 'b', n: 1 }); + }); + + it('A-10 deterministic line-number assignment after malformed lines', () => { + let ast = parseJsonl('{"a":1}\nbroken\n{"b":2}\n').ast; + ast = appendJsonlOcPath(ast, event('c', 2)); + expect(ast.lines.map((l) => l.line)).toEqual([1, 2, 3, 4]); + }); +}); diff --git a/src/oc-path/tests/scenarios/byte-fidelity.test.ts b/src/oc-path/tests/scenarios/byte-fidelity.test.ts new file mode 100644 index 00000000000..4d18ddd1df1 --- /dev/null +++ b/src/oc-path/tests/scenarios/byte-fidelity.test.ts @@ -0,0 +1,179 @@ +/** + * Wave 1 — byte-fidelity round-trip. + * + * Substrate guarantee: `emitMd(parse(raw), { mode: 'roundtrip' }) === raw` + * for every input the parser accepts. This wave hammers that. + */ +import { describe, expect, it } from 'vitest'; +import { emitMd } from '../../emit.js'; +import { parseMd } from '../../parse.js'; + +function roundTrip(raw: string): string { + const { ast } = parseMd(raw); + return emitMd(ast); +} + +describe('wave-01 byte-fidelity', () => { + it('B-01 empty file', () => { + expect(roundTrip('')).toBe(''); + }); + + it('B-02 whitespace-only file', () => { + expect(roundTrip(' \n\n \n')).toBe(' \n\n \n'); + }); + + it('B-03 single newline', () => { + expect(roundTrip('\n')).toBe('\n'); + }); + + it('B-04 file without trailing newline', () => { + expect(roundTrip('## H\n- item')).toBe('## H\n- item'); + }); + + it('B-05 file with trailing newline', () => { + expect(roundTrip('## H\n- item\n')).toBe('## H\n- item\n'); + }); + + it('B-06 file with multiple trailing newlines', () => { + expect(roundTrip('## H\n- item\n\n\n')).toBe('## H\n- item\n\n\n'); + }); + + it('B-07 BOM at start', () => { + const raw = '## Heading\n- item\n'; + expect(roundTrip(raw)).toBe(raw); + }); + + it('B-08 CRLF line endings', () => { + const raw = '## H\r\n\r\n- item\r\n'; + expect(roundTrip(raw)).toBe(raw); + }); + + it('B-09 mixed line endings (CRLF + LF)', () => { + const raw = '## H\r\n- item\n- another\r\n'; + expect(roundTrip(raw)).toBe(raw); + }); + + it('B-10 tabs preserved in body', () => { + const raw = '## H\n\n\tindented body\n'; + expect(roundTrip(raw)).toBe(raw); + }); + + it('B-11 trailing whitespace on lines preserved', () => { + const raw = '## Heading \n- item \n'; + expect(roundTrip(raw)).toBe(raw); + }); + + it('B-12 multiple consecutive blank lines preserved', () => { + const raw = '## H\n\n\n\n- item\n'; + expect(roundTrip(raw)).toBe(raw); + }); + + it('B-13 frontmatter only, no body', () => { + const raw = '---\nname: x\n---\n'; + expect(roundTrip(raw)).toBe(raw); + }); + + it('B-14 body only, no frontmatter, no headings', () => { + const raw = 'Just some prose.\nNo structure.\n'; + expect(roundTrip(raw)).toBe(raw); + }); + + it('B-15 frontmatter + body + multiple sections', () => { + const raw = `--- +name: github +description: gh CLI +--- + +Preamble. + +## Boundaries + +- never write to /etc + +## Tools + +- gh: GitHub CLI +- curl: HTTP client +`; + expect(roundTrip(raw)).toBe(raw); + }); + + it('B-16 unicode content preserved', () => { + const raw = '## Café Section\n\n- résumé item\n- 日本語\n'; + expect(roundTrip(raw)).toBe(raw); + }); + + it('B-17 emoji preserved', () => { + const raw = '## 🚀 Launch\n\n- ✅ ready\n- 🔒 secure\n'; + expect(roundTrip(raw)).toBe(raw); + }); + + it('B-18 frontmatter with special chars in values', () => { + const raw = `---\nurl: https://example.com:443/path?q=1&a=2\n---\n`; + expect(roundTrip(raw)).toBe(raw); + }); + + it('B-19 file with mixed bullet markers (-, *, +)', () => { + const raw = '## H\n\n- dash\n* star\n+ plus\n'; + expect(roundTrip(raw)).toBe(raw); + }); + + it('B-20 raw === parse(raw).raw === emitMd(parse(raw)) for 50 random shapes', () => { + const inputs = [ + '', + '\n', + '## A\n', + '## A\n## B\n', + '---\n---\n', + '---\nk: v\n---\n', + '---\nk: v\n---\nbody\n', + '## H\n- a\n- b\n## I\n- c\n', + '\n', + '\r\n', + '\t\n', + 'plain\n', + '`code`\n', + '```\nfence\n```\n', + '```ts\nconst x = 1;\n```\n', + '| a | b |\n| - | - |\n| 1 | 2 |\n', + '> quote\n', + '# H1 not split\n## H2 split\n', + 'preamble\n## block\nbody\n', + 'preamble\n## block\nbody\n## block2\nbody2\n', + '## h\n\n\n\n', + ' ## indented heading (not parsed)\n', + '##NoSpace\n', + '## With trailing spaces \n- item\n', + '## H\n- nested\n - sub\n', + '## H\n\n```md\n## inside code\n```\n', + '---\na: 1\nb: "two"\nc: \'three\'\n---\n', + '---\nopen\nbut no close\n\nbody\n', + 'mixed\r\nline\nendings\r\n', + '---\nname: bom\n---\nbody\n', + '## h\n- k: v\n- k2: v2\n- plain\n', + '## h\n\n| a | b |\n|---|---|\n', + '## h\n```sql\nSELECT 1\n```\n', + '## h\n\n- url: http://x.example.com:80/p?q=1\n', + '## h\n\n- key: value with: colons\n', + '## h\n\n- key: "quoted: value"\n', + '## h\n\n- a-b: c-d\n', + '## h with `inline code`\n', + 'no blocks\nat all\n', + 'No body or section\n\n\n\n', + ' \n \n', + '## h\n## h2\n## h3\n', + '##\n', // empty heading + '## \n', // heading whitespace only + '\n\n## h\n\n\n', + '---\n\n---\n', + '## h\n- \n', // empty bullet + '## h\n\n\n```\nempty fence body\n```\n', + '## h\n```\nunclosed fence', + '## empty section\n## next\n', + '0\n', + ]; + for (const raw of inputs) { + expect(roundTrip(raw), `failed on: ${JSON.stringify(raw.slice(0, 60))}`).toBe(raw); + } + }); +}); diff --git a/src/oc-path/tests/scenarios/code-blocks.test.ts b/src/oc-path/tests/scenarios/code-blocks.test.ts new file mode 100644 index 00000000000..9affc85b79d --- /dev/null +++ b/src/oc-path/tests/scenarios/code-blocks.test.ts @@ -0,0 +1,97 @@ +/** + * Wave 6 — fenced code blocks. + * + * Substrate guarantee: triple-backtick fences (` ``` `) inside H2 blocks + * extract as `AstCodeBlock` with `lang` (or null) and verbatim `text`. + * Code blocks suppress H2-split and item-extraction inside their body. + */ +import { describe, expect, it } from 'vitest'; +import { parseMd } from '../../parse.js'; + +describe('wave-06 code-blocks', () => { + it('CB-01 unlanguaged fence', () => { + const raw = `## H\n\n\`\`\`\nplain text\n\`\`\`\n`; + const { ast } = parseMd(raw); + expect(ast.blocks[0]?.codeBlocks[0]).toMatchObject({ + lang: null, + text: 'plain text', + }); + }); + + it('CB-02 languaged fence', () => { + const raw = `## H\n\n\`\`\`ts\nconst x = 1;\n\`\`\`\n`; + const { ast } = parseMd(raw); + expect(ast.blocks[0]?.codeBlocks[0]?.lang).toBe('ts'); + expect(ast.blocks[0]?.codeBlocks[0]?.text).toBe('const x = 1;'); + }); + + it('CB-03 multi-line code body preserved verbatim', () => { + const raw = `## H\n\n\`\`\`ts\nline 1\nline 2\nline 3\n\`\`\`\n`; + const { ast } = parseMd(raw); + expect(ast.blocks[0]?.codeBlocks[0]?.text).toBe('line 1\nline 2\nline 3'); + }); + + it('CB-04 empty code block', () => { + const raw = `## H\n\n\`\`\`\n\`\`\`\n`; + const { ast } = parseMd(raw); + expect(ast.blocks[0]?.codeBlocks[0]?.text).toBe(''); + }); + + it('CB-05 code block with `## ` does NOT split as heading', () => { + const raw = `## Real\n\n\`\`\`md\n## Not a heading\n\`\`\`\n\n## Another real\n`; + const { ast } = parseMd(raw); + expect(ast.blocks.map((b) => b.heading)).toEqual(['Real', 'Another real']); + }); + + it('CB-06 code block with `- bullet` does NOT extract as item', () => { + const raw = `## H\n\n\`\`\`\n- not a bullet\n- still not\n\`\`\`\n\n- real bullet\n`; + const { ast } = parseMd(raw); + expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(['real bullet']); + }); + + it('CB-07 multiple code blocks in same section', () => { + const raw = `## H\n\n\`\`\`a\nfirst\n\`\`\`\n\n\`\`\`b\nsecond\n\`\`\`\n`; + const { ast } = parseMd(raw); + expect(ast.blocks[0]?.codeBlocks.length).toBe(2); + expect(ast.blocks[0]?.codeBlocks.map((c) => c.lang)).toEqual(['a', 'b']); + }); + + it('CB-08 unterminated fence — body extends to end of section', () => { + const raw = `## H\n\n\`\`\`\nopen but never closes\n`; + const { ast } = parseMd(raw); + // Behavior: code block is created with whatever was after the open + // fence, including any trailing newline lines. Documents are + // likely malformed; substrate is lenient and preserves what's + // there (verifiable via raw round-trip). + expect(ast.blocks[0]?.codeBlocks[0]?.text).toContain('open but never closes'); + }); + + it('CB-09 fence with leading spaces (4-space indented code)', () => { + // Note: only column-0 ``` triggers fence. Indented content is body + // text. This is the documented behavior. + const raw = `## H\n\n \`\`\`\n indented\n \`\`\`\n`; + const { ast } = parseMd(raw); + expect(ast.blocks[0]?.codeBlocks).toEqual([]); + }); + + it('CB-10 lang tag with extra whitespace trimmed', () => { + const raw = `## H\n\n\`\`\` jsonc \nbody\n\`\`\`\n`; + const { ast } = parseMd(raw); + expect(ast.blocks[0]?.codeBlocks[0]?.lang).toBe('jsonc'); + }); + + it('CB-11 lang tag with hyphen / dot (typescript-jsx, c++)', () => { + const raw = `## H\n\n\`\`\`typescript-jsx\nx\n\`\`\`\n`; + const { ast } = parseMd(raw); + expect(ast.blocks[0]?.codeBlocks[0]?.lang).toBe('typescript-jsx'); + }); + + it('CB-12 fence appearing in preamble (before any H2) is ignored at block layer', () => { + const raw = `\`\`\`\npreamble code\n\`\`\`\n\n## H\n`; + const { ast } = parseMd(raw); + // Preamble code blocks aren't structurally extracted at the + // substrate layer; this is documented. Lint can scan preamble + // raw if needed. + expect(ast.blocks[0]?.codeBlocks).toEqual([]); + }); +}); diff --git a/src/oc-path/tests/scenarios/cross-cutting.test.ts b/src/oc-path/tests/scenarios/cross-cutting.test.ts new file mode 100644 index 00000000000..ab8ab5a93c7 --- /dev/null +++ b/src/oc-path/tests/scenarios/cross-cutting.test.ts @@ -0,0 +1,139 @@ +/** + * Wave 13 — cross-cutting integration. + * + * Pipelines: parse + resolve + emit working together. Slug stability + * across re-parses. OcPath round-trip via the AST (slugs in OcPath + * must round-trip back to the resolved node). + */ +import { describe, expect, it } from 'vitest'; +import { emitMd } from '../../emit.js'; +import { formatOcPath, parseOcPath } from '../../oc-path.js'; +import { parseMd } from '../../parse.js'; +import { resolveMdOcPath as resolveOcPath } from '../../resolve.js'; + +const SAMPLE = `--- +name: github +description: gh CLI +--- + +Preamble. + +## Boundaries + +- never write to /etc +- always confirm + +## Tools + +- gh: GitHub CLI +- curl: HTTP client +`; + +describe('wave-13 cross-cutting', () => { + it('CC-01 parse → resolve → emit pipeline (block)', () => { + const { ast } = parseMd(SAMPLE); + const m = resolveOcPath(ast, { file: 'AGENTS.md', section: 'boundaries' }); + expect(m?.kind).toBe('block'); + expect(emitMd(ast)).toBe(SAMPLE); + }); + + it('CC-02 OcPath round-trip via AST: parse + resolve + format', () => { + const { ast } = parseMd(SAMPLE); + for (const block of ast.blocks) { + const path = parseOcPath(`oc://AGENTS.md/${block.slug}`); + const m = resolveOcPath(ast, path); + expect(m?.kind, `block ${block.slug} should resolve`).toBe('block'); + // Format the same path back; slug → URI shape should be stable. + expect(formatOcPath(path)).toBe(`oc://AGENTS.md/${block.slug}`); + } + }); + + it('CC-03 every item in every block is OcPath-addressable', () => { + const { ast } = parseMd(SAMPLE); + for (const block of ast.blocks) { + for (const item of block.items) { + const path = parseOcPath(`oc://AGENTS.md/${block.slug}/${item.slug}`); + const m = resolveOcPath(ast, path); + expect(m?.kind, `${block.slug}/${item.slug} should resolve`).toBe('item'); + } + } + }); + + it('CC-04 every kv item field is OcPath-addressable', () => { + const { ast } = parseMd(SAMPLE); + for (const block of ast.blocks) { + for (const item of block.items) { + if (!item.kv) {continue;} + const path = parseOcPath( + `oc://AGENTS.md/${block.slug}/${item.slug}/${item.kv.key}`, + ); + const m = resolveOcPath(ast, path); + expect(m?.kind).toBe('item-field'); + } + } + }); + + it('CC-05 every frontmatter entry is OcPath-addressable', () => { + const { ast } = parseMd(SAMPLE); + for (const fm of ast.frontmatter) { + const path = parseOcPath(`oc://AGENTS.md/[frontmatter]/${fm.key}`); + const m = resolveOcPath(ast, path); + expect(m?.kind).toBe('frontmatter'); + } + }); + + it('CC-06 slugs are stable across re-parses (deterministic)', () => { + const a1 = parseMd(SAMPLE).ast; + const a2 = parseMd(SAMPLE).ast; + expect(a1.blocks.map((b) => b.slug)).toEqual(a2.blocks.map((b) => b.slug)); + expect(a1.blocks.map((b) => b.items.map((i) => i.slug))).toEqual( + a2.blocks.map((b) => b.items.map((i) => i.slug)), + ); + }); + + it('CC-07 modifying raw + re-parse produces consistent AST shape', () => { + const a1 = parseMd(SAMPLE).ast; + const modified = SAMPLE.replace('GitHub CLI', 'GitHub command-line interface'); + const a2 = parseMd(modified).ast; + // Block + item count + slugs unchanged. + expect(a2.blocks.length).toBe(a1.blocks.length); + const a1Tools = a1.blocks.find((b) => b.slug === 'tools'); + const a2Tools = a2.blocks.find((b) => b.slug === 'tools'); + expect(a2Tools?.items.length).toBe(a1Tools?.items.length); + // KV value reflects the change. + const ghItem = a2Tools?.items.find((i) => i.kv?.key === 'gh'); + expect(ghItem?.kv?.value).toBe('GitHub command-line interface'); + }); + + it('CC-08 unknown OcPath returns null without affecting subsequent valid resolves', () => { + const { ast } = parseMd(SAMPLE); + expect(resolveOcPath(ast, { file: 'X.md', section: 'nonexistent' })).toBeNull(); + expect(resolveOcPath(ast, { file: 'X.md', section: 'tools' })?.kind).toBe('block'); + }); + + it('CC-09 resolve does not depend on file segment matching', () => { + const { ast } = parseMd(SAMPLE); + const a = resolveOcPath(ast, { file: 'A.md', section: 'tools' }); + const b = resolveOcPath(ast, { file: 'B.md', section: 'tools' }); + expect(a?.kind).toBe(b?.kind); + }); + + it('CC-10 round-trip across all 9 valid OcPath shapes', () => { + const { ast } = parseMd(SAMPLE); + const cases = [ + { file: 'X.md' }, + { file: 'X.md', section: 'tools' }, + { file: 'X.md', section: 'tools', item: 'gh' }, + { file: 'X.md', section: 'tools', item: 'gh', field: 'gh' }, + { file: 'X.md', section: '[frontmatter]', field: 'name' }, + { file: 'X.md', section: 'boundaries' }, + { file: 'X.md', section: 'boundaries', item: 'never-write-to-etc' }, + { file: 'X.md', section: 'boundaries', item: 'always-confirm' }, + { file: 'X.md', section: '[frontmatter]', field: 'description' }, + ]; + for (const path of cases) { + const m = resolveOcPath(ast, path); + expect(m, `failed for ${JSON.stringify(path)}`).not.toBeNull(); + } + }); +}); diff --git a/src/oc-path/tests/scenarios/cross-kind-properties.test.ts b/src/oc-path/tests/scenarios/cross-kind-properties.test.ts new file mode 100644 index 00000000000..e2622f4d6c0 --- /dev/null +++ b/src/oc-path/tests/scenarios/cross-kind-properties.test.ts @@ -0,0 +1,153 @@ +/** + * Wave 22 — cross-kind property invariants. + * + * Per-kind verbs hold the same shape contracts regardless of kind: + * + * 1. parse → emit (round-trip) is byte-stable for ALL kinds + * 2. resolve is non-mutating for ALL kinds + * 3. set returns structured failure (never throws) for unresolvable + * paths across ALL kinds + * 4. inferKind aligns with the parsers consumers actually pick + * 5. parse → emit → parse is fixpoint + * 6. hostile inputs do not throw at parse time + */ +import { describe, expect, it } from 'vitest'; +import { inferKind } from '../../dispatch.js'; +import { emitMd } from '../../emit.js'; +import { setMdOcPath } from '../../edit.js'; +import { resolveMdOcPath } from '../../resolve.js'; +import { emitJsonc } from '../../jsonc/emit.js'; +import { setJsoncOcPath } from '../../jsonc/edit.js'; +import { resolveJsoncOcPath } from '../../jsonc/resolve.js'; +import { parseJsonc } from '../../jsonc/parse.js'; +import { emitJsonl } from '../../jsonl/emit.js'; +import { setJsonlOcPath } from '../../jsonl/edit.js'; +import { resolveJsonlOcPath } from '../../jsonl/resolve.js'; +import { parseJsonl } from '../../jsonl/parse.js'; +import { parseOcPath } from '../../oc-path.js'; +import { parseMd } from '../../parse.js'; + +describe('wave-22 cross-kind property invariants', () => { + const mdRaw = '---\nname: x\n---\n\n## Boundaries\n\n- enabled: true\n'; + const jsoncRaw = '// h\n{ "k": 1, "n": [1,2,3] }\n'; + const jsonlRaw = '{"a":1}\n\nbroken\n{"b":2}\n'; + + it('P-01 round-trip parse → emit is byte-stable across all kinds', () => { + expect(emitMd(parseMd(mdRaw).ast)).toBe(mdRaw); + expect(emitJsonc(parseJsonc(jsoncRaw).ast)).toBe(jsoncRaw); + expect(emitJsonl(parseJsonl(jsonlRaw).ast)).toBe(jsonlRaw); + }); + + it('P-02 resolve is non-mutating across all kinds', () => { + const md = parseMd(mdRaw).ast; + let before = JSON.stringify(md); + resolveMdOcPath(md, parseOcPath('oc://X/[frontmatter]/name')); + resolveMdOcPath(md, parseOcPath('oc://X/boundaries')); + expect(JSON.stringify(md)).toBe(before); + + const jsonc = parseJsonc(jsoncRaw).ast; + before = JSON.stringify(jsonc); + resolveJsoncOcPath(jsonc, parseOcPath('oc://X/k')); + resolveJsoncOcPath(jsonc, parseOcPath('oc://X/n.0')); + expect(JSON.stringify(jsonc)).toBe(before); + + const jsonl = parseJsonl(jsonlRaw).ast; + before = JSON.stringify(jsonl); + resolveJsonlOcPath(jsonl, parseOcPath('oc://X/L1')); + resolveJsonlOcPath(jsonl, parseOcPath('oc://X/$last')); + expect(JSON.stringify(jsonl)).toBe(before); + }); + + it('P-03 unresolvable set never throws across all kinds', () => { + const ocPath = parseOcPath('oc://X/totally.missing.path'); + expect(() => + setMdOcPath(parseMd(mdRaw).ast, ocPath, 'x'), + ).not.toThrow(); + expect(() => + setJsoncOcPath(parseJsonc(jsoncRaw).ast, ocPath, { + kind: 'string', + value: 'x', + }), + ).not.toThrow(); + expect(() => + setJsonlOcPath(parseJsonl(jsonlRaw).ast, ocPath, { + kind: 'string', + value: 'x', + }), + ).not.toThrow(); + }); + + it('P-04 inferKind aligns with the parser actually used', () => { + expect(inferKind('AGENTS.md')).toBe('md'); + expect(inferKind('SOUL.md')).toBe('md'); + expect(inferKind('config.jsonc')).toBe('jsonc'); + expect(inferKind('plugins.json')).toBe('jsonc'); + expect(inferKind('events.jsonl')).toBe('jsonl'); + expect(inferKind('audit.ndjson')).toBe('jsonl'); + }); + + it('P-05 parse → emit → parse is fixpoint across all kinds', () => { + const md1 = emitMd(parseMd(mdRaw).ast); + const md2 = emitMd(parseMd(md1).ast); + expect(md1).toBe(md2); + + const jc1 = emitJsonc(parseJsonc(jsoncRaw).ast); + const jc2 = emitJsonc(parseJsonc(jc1).ast); + expect(jc1).toBe(jc2); + + const jl1 = emitJsonl(parseJsonl(jsonlRaw).ast); + const jl2 = emitJsonl(parseJsonl(jl1).ast); + expect(jl1).toBe(jl2); + }); + + it('P-06 hostile inputs do not throw at parse time across all kinds', () => { + const hostile = [ + '\x00\x01\x02 binary garbage', + '{ "unclosed":', + '## heading without anything', + '\n\n\n\n\n', + ]; + for (const raw of hostile) { + expect(() => parseMd(raw)).not.toThrow(); + expect(() => parseJsonc(raw)).not.toThrow(); + expect(() => parseJsonl(raw)).not.toThrow(); + } + }); + + it('P-07 resolver returns null for paths past valid kinds (no throw)', () => { + const overlong = parseOcPath('oc://X/a/b/c.d.e.f.g.h'); + expect(() => resolveMdOcPath(parseMd(mdRaw).ast, overlong)).not.toThrow(); + expect(() => resolveJsoncOcPath(parseJsonc(jsoncRaw).ast, overlong)).not.toThrow(); + expect(() => resolveJsonlOcPath(parseJsonl(jsonlRaw).ast, overlong)).not.toThrow(); + }); + + it('P-08 set-then-resolve produces the value just written (jsonc)', () => { + const ast = parseJsonc('{ "k": 1 }').ast; + const r = setJsoncOcPath(ast, parseOcPath('oc://X/k'), { + kind: 'number', + value: 42, + }); + if (r.ok) { + const m = resolveJsoncOcPath(r.ast, parseOcPath('oc://X/k')); + if (m?.kind === 'object-entry') { + expect(m.node.value).toEqual({ kind: 'number', value: 42 }); + } + } + }); + + it('P-09 verbs are deterministic — same input twice produces same output', () => { + expect(emitMd(parseMd(mdRaw).ast)).toBe(emitMd(parseMd(mdRaw).ast)); + expect(emitJsonc(parseJsonc(jsoncRaw).ast)).toBe( + emitJsonc(parseJsonc(jsoncRaw).ast), + ); + expect(emitJsonl(parseJsonl(jsonlRaw).ast)).toBe( + emitJsonl(parseJsonl(jsonlRaw).ast), + ); + }); + + it('P-10 inferKind returns null for unknown extensions', () => { + expect(inferKind('binary.bin')).toBeNull(); + expect(inferKind('no-ext')).toBeNull(); + expect(inferKind('archive.tar.gz')).toBeNull(); + }); +}); diff --git a/src/oc-path/tests/scenarios/edit-emit-roundtrip.test.ts b/src/oc-path/tests/scenarios/edit-emit-roundtrip.test.ts new file mode 100644 index 00000000000..776fa74ec8e --- /dev/null +++ b/src/oc-path/tests/scenarios/edit-emit-roundtrip.test.ts @@ -0,0 +1,161 @@ +/** + * Wave 19 — edit → emit round-trip across all kinds. + * + * Substrate guarantee: parse → setXxxOcPath → emitXxx produces valid + * bytes that re-parse to an AST whose addressed value reflects the edit. + * Per-kind verbs throughout — caller picks based on AST type. + */ +import { describe, expect, it } from 'vitest'; +import { emitMd } from '../../emit.js'; +import { setMdOcPath } from '../../edit.js'; +import { emitJsonc } from '../../jsonc/emit.js'; +import { setJsoncOcPath } from '../../jsonc/edit.js'; +import { parseJsonc } from '../../jsonc/parse.js'; +import { emitJsonl } from '../../jsonl/emit.js'; +import { setJsonlOcPath } from '../../jsonl/edit.js'; +import { parseJsonl } from '../../jsonl/parse.js'; +import { parseOcPath } from '../../oc-path.js'; +import { parseMd } from '../../parse.js'; + +describe('wave-19 edit-then-emit round-trip', () => { + it('EE-01 md frontmatter edit re-parses to the new value', () => { + const md = parseMd('---\nname: old\n---\n\n## Body\n').ast; + const r = setMdOcPath(md, parseOcPath('oc://AGENTS.md/[frontmatter]/name'), 'new'); + expect(r.ok).toBe(true); + if (r.ok) { + const reparsed = parseMd(r.ast.raw).ast; + expect(reparsed.frontmatter.find((e) => e.key === 'name')?.value).toBe('new'); + } + }); + + it('EE-02 md item kv edit re-parses to the new value', () => { + const md = parseMd('## Boundaries\n\n- timeout: 5\n').ast; + const r = setMdOcPath( + md, + parseOcPath('oc://AGENTS.md/boundaries/timeout/timeout'), + '60', + ); + expect(r.ok).toBe(true); + if (r.ok) { + const reparsed = parseMd(emitMd(r.ast)).ast; + const block = reparsed.blocks.find((b) => b.slug === 'boundaries'); + expect(block?.items[0]?.kv?.value).toBe('60'); + } + }); + + it('EE-03 jsonc value edit re-parses to the new value', () => { + const ast = parseJsonc('{ "k": 1 }').ast; + const r = setJsoncOcPath(ast, parseOcPath('oc://config/k'), { + kind: 'number', + value: 42, + }); + expect(r.ok).toBe(true); + if (r.ok) { + expect(JSON.parse(emitJsonc(r.ast))).toEqual({ k: 42 }); + } + }); + + it('EE-04 jsonc nested edit preserves untouched siblings', () => { + const ast = parseJsonc('{ "a": 1, "b": { "c": 2, "d": 3 }, "e": 4 }').ast; + const r = setJsoncOcPath(ast, parseOcPath('oc://config/b.c'), { + kind: 'number', + value: 99, + }); + if (r.ok) { + expect(JSON.parse(emitJsonc(r.ast))).toEqual({ + a: 1, + b: { c: 99, d: 3 }, + e: 4, + }); + } + }); + + it('EE-05 jsonl line edit re-parses to the new value at the same line', () => { + const ast = parseJsonl('{"a":1}\n{"a":2}\n{"a":3}\n').ast; + const r = setJsonlOcPath(ast, parseOcPath('oc://log/L2/a'), { + kind: 'number', + value: 99, + }); + if (r.ok) { + const reparsed = parseJsonl(emitJsonl(r.ast)).ast; + const line2 = reparsed.lines[1]; + expect(line2?.kind).toBe('value'); + if (line2?.kind === 'value' && line2.value.kind === 'object') { + const entry = line2.value.entries.find((e) => e.key === 'a'); + expect(entry?.value).toMatchObject({ kind: 'number', value: 99 }); + } + } + }); + + it('EE-06 jsonc edit composes: two sequential edits both land', () => { + let ast = parseJsonc('{ "a": 1, "b": 2 }').ast; + let r = setJsoncOcPath(ast, parseOcPath('oc://config/a'), { + kind: 'number', + value: 10, + }); + if (r.ok) {ast = r.ast;} + r = setJsoncOcPath(ast, parseOcPath('oc://config/b'), { + kind: 'number', + value: 20, + }); + if (r.ok) {ast = r.ast;} + expect(JSON.parse(emitJsonc(ast))).toEqual({ a: 10, b: 20 }); + }); + + it('EE-07 missing path returns structured failure (not throw)', () => { + const ast = parseJsonc('{ "a": 1 }').ast; + const r = setJsoncOcPath(ast, parseOcPath('oc://config/missing'), { + kind: 'number', + value: 99, + }); + expect(r.ok).toBe(false); + if (!r.ok) {expect(r.reason).toBe('unresolved');} + }); + + it('EE-08 each per-kind verb takes its own AST type — no cross-kind leakage', () => { + // Type-level guarantee: each setter only accepts its kind's AST. + // Caller picks based on the AST they have. This is the design. + const md = parseMd('---\nx: 1\n---\n').ast; + const jsonc = parseJsonc('{"x":1}').ast; + const jsonl = parseJsonl('{"x":1}\n').ast; + + const a = setMdOcPath(md, parseOcPath('oc://X/[frontmatter]/x'), '2'); + const b = setJsoncOcPath(jsonc, parseOcPath('oc://X/x'), { + kind: 'number', + value: 2, + }); + const c = setJsonlOcPath(jsonl, parseOcPath('oc://X/L1/x'), { + kind: 'number', + value: 2, + }); + + expect(a.ok).toBe(true); + expect(b.ok).toBe(true); + expect(c.ok).toBe(true); + }); + + it('EE-09 byte-fidelity is broken after edit (expected — render mode applies)', () => { + const raw = '{\n "k": 1 // comment\n}\n'; + const ast = parseJsonc(raw).ast; + const r = setJsoncOcPath(ast, parseOcPath('oc://config/k'), { + kind: 'number', + value: 2, + }); + if (r.ok) { + // Comment is lost — expected. Caller's responsibility to know. + expect(emitJsonc(r.ast)).not.toContain('// comment'); + // But the value IS the new one. + expect(JSON.parse(emitJsonc(r.ast))).toEqual({ k: 2 }); + } + }); + + it('EE-10 edit on empty AST surfaces no-root', () => { + const ast = parseJsonc('').ast; + const r = setJsoncOcPath(ast, parseOcPath('oc://config/x'), { + kind: 'number', + value: 1, + }); + expect(r.ok).toBe(false); + if (!r.ok) {expect(r.reason).toBe('no-root');} + }); +}); diff --git a/src/oc-path/tests/scenarios/frontmatter-edges.test.ts b/src/oc-path/tests/scenarios/frontmatter-edges.test.ts new file mode 100644 index 00000000000..fb085e8b052 --- /dev/null +++ b/src/oc-path/tests/scenarios/frontmatter-edges.test.ts @@ -0,0 +1,140 @@ +/** + * Wave 2 — frontmatter edges. + * + * Substrate guarantee: frontmatter is parsed as `key: value` entries + * with quote-stripping; malformed frontmatter doesn't crash the parser + * (soft-error policy: emit diagnostic, recover). + */ +import { describe, expect, it } from 'vitest'; +import { parseMd } from '../../parse.js'; + +describe('wave-02 frontmatter-edges', () => { + it('FM-01 simple kv pairs', () => { + const { ast } = parseMd('---\nname: x\ndescription: y\n---\n'); + expect(ast.frontmatter.map((e) => [e.key, e.value])).toEqual([ + ['name', 'x'], + ['description', 'y'], + ]); + }); + + it('FM-02 unclosed frontmatter emits diagnostic, treats as preamble', () => { + const { ast, diagnostics } = parseMd('---\nname: x\nno close fence\nbody\n'); + expect(diagnostics.some((d) => d.code === 'OC_FRONTMATTER_UNCLOSED')).toBe(true); + expect(ast.frontmatter).toEqual([]); + }); + + it('FM-03 empty frontmatter (just open + close)', () => { + const { ast } = parseMd('---\n---\n'); + expect(ast.frontmatter).toEqual([]); + }); + + it('FM-04 frontmatter only, file has no other content', () => { + const { ast } = parseMd('---\nk: v\n---\n'); + expect(ast.frontmatter).toEqual([{ key: 'k', value: 'v', line: 2 }]); + expect(ast.preamble).toBe(''); + expect(ast.blocks).toEqual([]); + }); + + it('FM-05 double-quoted value', () => { + const { ast } = parseMd('---\ntitle: "Hello, world"\n---\n'); + expect(ast.frontmatter[0]?.value).toBe('Hello, world'); + }); + + it('FM-06 single-quoted value', () => { + const { ast } = parseMd("---\ntitle: 'Hello, world'\n---\n"); + expect(ast.frontmatter[0]?.value).toBe('Hello, world'); + }); + + it('FM-07 unquoted value with internal colons preserved', () => { + const { ast } = parseMd('---\nurl: https://example.com:443/p\n---\n'); + expect(ast.frontmatter[0]?.value).toBe('https://example.com:443/p'); + }); + + it('FM-08 empty value', () => { + const { ast } = parseMd('---\nk:\n---\n'); + expect(ast.frontmatter[0]).toEqual({ key: 'k', value: '', line: 2 }); + }); + + it('FM-09 value with leading/trailing whitespace trimmed', () => { + const { ast } = parseMd('---\nk: spaced \n---\n'); + expect(ast.frontmatter[0]?.value).toBe('spaced'); + }); + + it('FM-10 list-style continuations are silently dropped (substrate stays opinion-free)', () => { + const { ast } = parseMd('---\ntools:\n - gh\n - curl\n---\n'); + // The `tools:` key has an empty inline value; the list continuation + // lines ` - gh` and ` - curl` don't match the kv regex and are + // skipped. Lint rules can do their own structural reading of + // frontmatter; the substrate does not. + expect(ast.frontmatter.map((e) => e.key)).toEqual(['tools']); + expect(ast.frontmatter[0]?.value).toBe(''); + }); + + it('FM-11 line numbers are 1-based and accurate', () => { + const { ast } = parseMd('---\nk1: v1\nk2: v2\nk3: v3\n---\n'); + expect(ast.frontmatter.map((e) => [e.key, e.line])).toEqual([ + ['k1', 2], + ['k2', 3], + ['k3', 4], + ]); + }); + + it('FM-12 dash-key allowed', () => { + const { ast } = parseMd('---\nuser-invocable: true\n---\n'); + expect(ast.frontmatter[0]?.key).toBe('user-invocable'); + }); + + it('FM-13 underscore-key allowed', () => { + const { ast } = parseMd('---\nparam_set: foo\n---\n'); + expect(ast.frontmatter[0]?.key).toBe('param_set'); + }); + + it('FM-14 number-only value preserved as string', () => { + const { ast } = parseMd('---\ntimeout: 15000\n---\n'); + expect(ast.frontmatter[0]?.value).toBe('15000'); + }); + + it('FM-15 boolean-like value preserved as string', () => { + const { ast } = parseMd('---\nenabled: true\n---\n'); + expect(ast.frontmatter[0]?.value).toBe('true'); + }); + + it('FM-16 blank lines inside frontmatter are skipped', () => { + const { ast } = parseMd('---\n\nk1: v1\n\nk2: v2\n\n---\n'); + expect(ast.frontmatter.map((e) => e.key)).toEqual(['k1', 'k2']); + }); + + it('FM-17 frontmatter with same key twice — both retained (no dedup)', () => { + // Substrate doesn't dedup; lint rules can flag duplicates if needed. + const { ast } = parseMd('---\nk: v1\nk: v2\n---\n'); + expect(ast.frontmatter).toEqual([ + { key: 'k', value: 'v1', line: 2 }, + { key: 'k', value: 'v2', line: 3 }, + ]); + }); + + it('FM-18 frontmatter must be at start — leading blank line breaks detection', () => { + const { ast } = parseMd('\n---\nk: v\n---\n'); + expect(ast.frontmatter).toEqual([]); + }); + + it('FM-19 frontmatter must be at start — leading text breaks detection', () => { + const { ast } = parseMd('intro\n\n---\nk: v\n---\n'); + expect(ast.frontmatter).toEqual([]); + }); + + it('FM-20 BOM before frontmatter open is tolerated', () => { + const { ast } = parseMd('---\nname: bom\n---\n'); + expect(ast.frontmatter[0]?.value).toBe('bom'); + }); + + it('FM-21 single-line file with `---` and `---` is empty frontmatter', () => { + const { ast } = parseMd('---\n---'); + expect(ast.frontmatter).toEqual([]); + }); + + it('FM-22 hash-prefixed lines skipped (not yaml comments — just don\'t match kv regex)', () => { + const { ast } = parseMd('---\n# comment\nk: v\n---\n'); + expect(ast.frontmatter.map((e) => e.key)).toEqual(['k']); + }); +}); diff --git a/src/oc-path/tests/scenarios/h2-block-split.test.ts b/src/oc-path/tests/scenarios/h2-block-split.test.ts new file mode 100644 index 00000000000..d41ae57e478 --- /dev/null +++ b/src/oc-path/tests/scenarios/h2-block-split.test.ts @@ -0,0 +1,149 @@ +/** + * Wave 3 — H2 block split. + * + * Substrate guarantee: `## ` at column 0 outside fenced code blocks + * starts a new H2 block. H1 (`# `), H3 (`### `), and `## ` inside + * fenced code blocks do NOT split. + */ +import { describe, expect, it } from 'vitest'; +import { parseMd } from '../../parse.js'; + +describe('wave-03 h2-block-split', () => { + it('H2-01 no headings → no blocks, all preamble', () => { + const raw = 'Just prose, no headings.\nMore prose.\n'; + const { ast } = parseMd(raw); + expect(ast.blocks).toEqual([]); + // Preamble preserves the trailing newline from raw (split + rejoin + // is symmetric); callers that want trimmed prose call .trim(). + expect(ast.preamble).toBe('Just prose, no headings.\nMore prose.\n'); + }); + + it('H2-02 single heading splits preamble + one block', () => { + const { ast } = parseMd('preamble\n## Section\nbody\n'); + expect(ast.preamble.trim()).toBe('preamble'); + expect(ast.blocks.length).toBe(1); + expect(ast.blocks[0]?.heading).toBe('Section'); + expect(ast.blocks[0]?.bodyText.trim()).toBe('body'); + }); + + it('H2-03 multiple headings produce blocks in order', () => { + const { ast } = parseMd('## A\nbody-a\n## B\nbody-b\n## C\nbody-c\n'); + expect(ast.blocks.map((b) => b.heading)).toEqual(['A', 'B', 'C']); + }); + + it('H2-04 H1 does NOT split', () => { + const { ast } = parseMd('# H1 heading\n## H2 heading\n'); + expect(ast.blocks.length).toBe(1); + expect(ast.blocks[0]?.heading).toBe('H2 heading'); + expect(ast.preamble).toContain('# H1 heading'); + }); + + it('H2-05 H3 does NOT split', () => { + const { ast } = parseMd('## H2\nbody\n### H3\nstill in H2 block\n'); + expect(ast.blocks.length).toBe(1); + expect(ast.blocks[0]?.bodyText).toContain('### H3'); + }); + + it('H2-06 `## ` inside fenced code block does NOT split', () => { + const raw = '## Real\n\n```md\n## Inside code\n```\n\n## Another real\n'; + const { ast } = parseMd(raw); + expect(ast.blocks.map((b) => b.heading)).toEqual(['Real', 'Another real']); + }); + + it('H2-07 `##` without trailing space — does NOT match (regex requires \\s+)', () => { + const { ast } = parseMd('##NoSpace\n## With space\n'); + expect(ast.blocks.length).toBe(1); + expect(ast.blocks[0]?.heading).toBe('With space'); + }); + + it('H2-08 leading whitespace before `##` — does NOT match (regex anchored at line start)', () => { + const { ast } = parseMd(' ## indented\n## not indented\n'); + expect(ast.blocks.length).toBe(1); + expect(ast.blocks[0]?.heading).toBe('not indented'); + }); + + it('H2-09 trailing whitespace on heading — trimmed in heading text', () => { + const { ast } = parseMd('## Trailing \n'); + expect(ast.blocks[0]?.heading).toBe('Trailing'); + expect(ast.blocks[0]?.slug).toBe('trailing'); + }); + + it('H2-10 inline code in heading preserved', () => { + const { ast } = parseMd('## Use `gh` for GitHub\n'); + expect(ast.blocks[0]?.heading).toBe('Use `gh` for GitHub'); + }); + + it('H2-11 markdown formatting in heading preserved', () => { + const { ast } = parseMd('## **Bold** *italic*\n'); + expect(ast.blocks[0]?.heading).toBe('**Bold** *italic*'); + }); + + it('H2-12 immediately after frontmatter', () => { + const { ast } = parseMd('---\nk: v\n---\n## Section\nbody\n'); + expect(ast.blocks[0]?.heading).toBe('Section'); + expect(ast.preamble).toBe(''); + }); + + it('H2-13 H2 at end of file (no body)', () => { + const { ast } = parseMd('preamble\n## End\n'); + expect(ast.blocks[0]?.heading).toBe('End'); + expect(ast.blocks[0]?.bodyText).toBe(''); + }); + + it('H2-14 two consecutive H2s — empty body block between', () => { + const { ast } = parseMd('## A\n## B\n'); + expect(ast.blocks[0]?.bodyText).toBe(''); + expect(ast.blocks[1]?.heading).toBe('B'); + }); + + it('H2-15 line numbers are 1-based and track through frontmatter', () => { + const { ast } = parseMd('---\nk: v\n---\n## At line 4\n'); + expect(ast.blocks[0]?.line).toBe(4); + }); + + it('H2-16 line numbers track through preamble', () => { + const { ast } = parseMd('line 1\nline 2\n## At line 3\n'); + expect(ast.blocks[0]?.line).toBe(3); + }); + + it('H2-17 nested fenced code blocks (~~~ vs ```) — only ``` is detected', () => { + // Current parser only treats ``` as fence; ~~~ falls through. This + // is a documented limit. Inputs with ~~~ aren't broken — they're + // just not protected from H2-misparsing inside them. + const raw = '## H\n\n~~~md\n~~~\n\n## Next\n'; + const { ast } = parseMd(raw); + expect(ast.blocks.map((b) => b.heading)).toEqual(['H', 'Next']); + }); + + it('H2-18 setext-style heading (`Heading\\n========\\n`) is NOT recognized', () => { + // Substrate is opinion-aware: setext headings are treated as + // preamble. Lint rules can flag if needed; recognized markdown + // dialect is `## ATX-style only` for OpenClaw workspace files. + const raw = 'Heading\n=======\n## Real\n'; + const { ast } = parseMd(raw); + expect(ast.blocks.length).toBe(1); + expect(ast.blocks[0]?.heading).toBe('Real'); + }); + + it('H2-19 empty heading text (`## `)', () => { + const { ast } = parseMd('## \n'); + // Empty heading is technically a valid match (`## ` + empty text) + // but the regex requires `(.+?)` so empty doesn't match. Validates + // it's NOT split. + expect(ast.blocks).toEqual([]); + }); + + it('H2-20 heading with only whitespace (`## `)', () => { + const { ast } = parseMd('## \n'); + expect(ast.blocks).toEqual([]); + }); + + it('H2-21 heading-shaped text inside multi-line bullet body — does split', () => { + // The substrate treats line-start ## as a heading regardless of + // logical context (item continuation lines). Lint rules can flag + // the boundary; substrate prefers structural simplicity. + const raw = '## Section\n- item starts\n continues\n## Next\n'; + const { ast } = parseMd(raw); + expect(ast.blocks.map((b) => b.heading)).toEqual(['Section', 'Next']); + }); +}); diff --git a/src/oc-path/tests/scenarios/items.test.ts b/src/oc-path/tests/scenarios/items.test.ts new file mode 100644 index 00000000000..dfdb66504f2 --- /dev/null +++ b/src/oc-path/tests/scenarios/items.test.ts @@ -0,0 +1,146 @@ +/** + * Wave 4 — items (bullets + kv). + * + * Substrate guarantee: bullet lines (`- text`, `* text`, `+ text`) inside + * H2 blocks are extracted as `AstItem`. Lines matching `- key: value` + * also populate `item.kv`. Items inside fenced code blocks are NOT + * extracted. + */ +import { describe, expect, it } from 'vitest'; +import { parseMd } from '../../parse.js'; + +describe('wave-04 items', () => { + it('I-01 plain dash bullets', () => { + const { ast } = parseMd('## H\n- a\n- b\n- c\n'); + expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(['a', 'b', 'c']); + }); + + it('I-02 star bullets', () => { + const { ast } = parseMd('## H\n* a\n* b\n'); + expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(['a', 'b']); + }); + + it('I-03 plus bullets', () => { + const { ast } = parseMd('## H\n+ a\n+ b\n'); + expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(['a', 'b']); + }); + + it('I-04 mixed bullet markers in same section', () => { + const { ast } = parseMd('## H\n- dash\n* star\n+ plus\n'); + expect(ast.blocks[0]?.items.length).toBe(3); + }); + + it('I-05 kv-shape items populate kv', () => { + const { ast } = parseMd('## H\n- gh: GitHub CLI\n'); + expect(ast.blocks[0]?.items[0]?.kv).toEqual({ key: 'gh', value: 'GitHub CLI' }); + }); + + it('I-06 plain item has no kv', () => { + const { ast } = parseMd('## H\n- plain text\n'); + expect(ast.blocks[0]?.items[0]?.kv).toBeUndefined(); + }); + + it('I-07 multiple colons — first colon is the kv split', () => { + const { ast } = parseMd('## H\n- url: http://x.com:80/p\n'); + expect(ast.blocks[0]?.items[0]?.kv).toEqual({ + key: 'url', + value: 'http://x.com:80/p', + }); + }); + + it('I-08 colon with no space after is still kv', () => { + const { ast } = parseMd('## H\n- key:value\n'); + expect(ast.blocks[0]?.items[0]?.kv).toEqual({ key: 'key', value: 'value' }); + }); + + it('I-09 quoted value preserved verbatim (no unquote at item layer)', () => { + const { ast } = parseMd('## H\n- title: "quoted: value"\n'); + expect(ast.blocks[0]?.items[0]?.kv?.value).toBe('"quoted: value"'); + }); + + it('I-10 slug from kv key when kv present', () => { + const { ast } = parseMd('## H\n- The Tool: description\n'); + expect(ast.blocks[0]?.items[0]?.slug).toBe('the-tool'); + }); + + it('I-11 slug from item text when no kv', () => { + const { ast } = parseMd('## H\n- The Plain Item\n'); + expect(ast.blocks[0]?.items[0]?.slug).toBe('the-plain-item'); + }); + + it('I-12 items inside fenced code block are NOT extracted', () => { + const raw = '## H\n```\n- not a bullet\n- still not\n```\n- real bullet\n'; + const { ast } = parseMd(raw); + expect(ast.blocks[0]?.items.length).toBe(1); + expect(ast.blocks[0]?.items[0]?.text).toBe('real bullet'); + }); + + it('I-13 line numbers track through block body', () => { + const { ast } = parseMd('## H\n- first\n- second\n- third\n'); + expect(ast.blocks[0]?.items.map((i) => i.line)).toEqual([2, 3, 4]); + }); + + it('I-14 trailing whitespace on bullet trimmed in text', () => { + const { ast } = parseMd('## H\n- spaced \n'); + expect(ast.blocks[0]?.items[0]?.text).toBe('spaced'); + }); + + it('I-15 empty bullet text is dropped', () => { + const { ast } = parseMd('## H\n- \n- real\n'); + // The regex requires (.+?) non-empty, so `- ` alone doesn't match. + expect(ast.blocks[0]?.items.length).toBe(1); + }); + + it('I-16 indented bullet (sub-bullet) — current parser still picks up', () => { + // The current regex `^(?:[-*+])\\s+(.+?)\\s*$` requires column-0 + // bullet markers; indented bullets do NOT match. Documented as a + // limit — sub-bullets surface in body text but not in items. + const { ast } = parseMd('## H\n- top\n - sub\n'); + expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(['top']); + }); + + it('I-17 numbered list (1. item) is NOT extracted as item', () => { + const { ast } = parseMd('## H\n1. first\n2. second\n'); + expect(ast.blocks[0]?.items).toEqual([]); + }); + + it('I-18 items in a section with no body before — first item line is heading+1', () => { + const { ast } = parseMd('## H\n- a\n'); + expect(ast.blocks[0]?.items[0]?.line).toBe(2); + }); + + it('I-19 items spread across blocks are scoped to their block', () => { + const { ast } = parseMd('## A\n- a1\n## B\n- b1\n- b2\n'); + expect(ast.blocks[0]?.items.length).toBe(1); + expect(ast.blocks[1]?.items.length).toBe(2); + expect(ast.blocks[1]?.items.map((i) => i.text)).toEqual(['b1', 'b2']); + }); + + it('I-20 item with only-symbol kv key still parses', () => { + const { ast } = parseMd('## H\n- API_KEY: secret-value\n'); + expect(ast.blocks[0]?.items[0]?.kv).toEqual({ + key: 'API_KEY', + value: 'secret-value', + }); + expect(ast.blocks[0]?.items[0]?.slug).toBe('api-key'); + }); + + it('I-21 item with kv where value is empty', () => { + const { ast } = parseMd('## H\n- key:\n'); + // `- key:` has empty value after the colon; the kv regex requires + // (.+) for value, so this falls through to plain item. + expect(ast.blocks[0]?.items[0]?.kv).toBeUndefined(); + expect(ast.blocks[0]?.items[0]?.text).toBe('key:'); + }); + + it('I-22 bullet in preamble (before first H2) is NOT in any block', () => { + const { ast } = parseMd('- preamble bullet\n## H\n- block bullet\n'); + expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(['block bullet']); + expect(ast.preamble).toContain('- preamble bullet'); + }); + + it('I-23 bullet with internal markdown (italics, code) preserved in text', () => { + const { ast } = parseMd('## H\n- use *gh* and `curl`\n'); + expect(ast.blocks[0]?.items[0]?.text).toBe('use *gh* and `curl`'); + }); +}); diff --git a/src/oc-path/tests/scenarios/jsonc-byte-fidelity.test.ts b/src/oc-path/tests/scenarios/jsonc-byte-fidelity.test.ts new file mode 100644 index 00000000000..36229ee290e --- /dev/null +++ b/src/oc-path/tests/scenarios/jsonc-byte-fidelity.test.ts @@ -0,0 +1,188 @@ +/** + * Wave 15 — JSONC byte-fidelity round-trip. + * + * Substrate guarantee: `emitJsonc(parseJsonc(raw)) === raw` for every + * input the parser accepts. Mirrors wave-01 but for the JSONC kind. + * Comments, trailing commas, BOMs, mixed line endings — all byte-stable + * via the round-trip path. + * + * **What this file proves**: byte-identical round-trip via the + * default-mode emit (which echoes `ast.raw`). This is necessary but + * not sufficient — without the structural assertions below, a parser + * that emitted `ast.root: null` for every input would still pass the + * byte test (since `raw` is preserved on the AST regardless). + * + * Each assertParseable() call proves the parser actually ran and + * produced a structural tree, not just stored `raw` verbatim and + * called it a day. JC-17 deliberately uses `assertNotParseable` — + * malformed input must echo `raw` AND emit a diagnostic. + */ +import { describe, expect, it } from 'vitest'; +import { emitJsonc } from '../../jsonc/emit.js'; +import { parseJsonc } from '../../jsonc/parse.js'; +import type { JsoncValue } from '../../jsonc/ast.js'; + +function rt(raw: string): string { + return emitJsonc(parseJsonc(raw).ast); +} + +/** + * Verify the parser actually produced a structural tree (not just a + * `null` root with echoed `raw`). Without this, a parser that + * delegated everything to `raw` would pass the byte-fidelity test + * trivially. Returns the parsed root for follow-up structural asserts. + */ +function assertParseable(raw: string): JsoncValue { + const result = parseJsonc(raw); + expect(result.ast.root).not.toBeNull(); + return result.ast.root as JsoncValue; +} + +/** + * The complement: malformed input round-trips bytes verbatim AND + * emits an error diagnostic. JC-17 needs this — without the + * diagnostic check, the test would pass even if the parser silently + * dropped malformed content. + */ +function assertNotParseable(raw: string): void { + const result = parseJsonc(raw); + expect(result.ast.root).toBeNull(); + expect(result.diagnostics.some((d) => d.severity === 'error')).toBe(true); +} + +describe('wave-15 jsonc byte-fidelity', () => { + it('JC-01 empty file', () => { + expect(rt('')).toBe(''); + }); + + it('JC-02 whitespace-only', () => { + expect(rt(' \n\n \n')).toBe(' \n\n \n'); + }); + + it('JC-03 empty object', () => { + expect(rt('{}')).toBe('{}'); + const root = assertParseable('{}'); + expect(root.kind).toBe('object'); + if (root.kind === 'object') {expect(root.entries).toHaveLength(0);} + }); + + it('JC-04 empty array', () => { + expect(rt('[]')).toBe('[]'); + const root = assertParseable('[]'); + expect(root.kind).toBe('array'); + if (root.kind === 'array') {expect(root.items).toHaveLength(0);} + }); + + it('JC-05 trivial scalar root', () => { + expect(rt('42')).toBe('42'); + expect(rt('"x"')).toBe('"x"'); + expect(rt('true')).toBe('true'); + expect(rt('null')).toBe('null'); + expect(assertParseable('42').kind).toBe('number'); + expect(assertParseable('"x"').kind).toBe('string'); + expect(assertParseable('true').kind).toBe('boolean'); + expect(assertParseable('null').kind).toBe('null'); + }); + + it('JC-06 line comments preserved', () => { + const raw = '// a leading comment\n{ "x": 1 } // trailing\n'; + expect(rt(raw)).toBe(raw); + // Pin parse: the structural value `x: 1` is reachable. + const root = assertParseable(raw); + expect(root.kind).toBe('object'); + }); + + it('JC-07 block comments preserved', () => { + const raw = '/* header */\n{\n /* inline */\n "x": 1\n}\n'; + expect(rt(raw)).toBe(raw); + const root = assertParseable(raw); + expect(root.kind).toBe('object'); + }); + + it('JC-08 trailing commas preserved', () => { + const raw = '{\n "x": 1,\n "y": 2,\n}'; + expect(rt(raw)).toBe(raw); + const root = assertParseable(raw); + if (root.kind === 'object') {expect(root.entries).toHaveLength(2);} + }); + + it('JC-09 mixed CRLF + LF preserved', () => { + const raw = '{\r\n "x": 1,\n "y": 2\r\n}'; + expect(rt(raw)).toBe(raw); + const root = assertParseable(raw); + if (root.kind === 'object') {expect(root.entries.map((e) => e.key)).toEqual(['x', 'y']);} + }); + + it('JC-10 BOM preserved on raw', () => { + const raw = '{ "x": 1 }'; + expect(rt(raw)).toBe(raw); + // BOM stripped before parsing — parser still sees `{` as first char. + expect(assertParseable(raw).kind).toBe('object'); + }); + + it('JC-11 deeply nested structures preserved', () => { + const raw = '{ "a": { "b": { "c": { "d": [1, [2, [3, [4]]]] } } } }'; + expect(rt(raw)).toBe(raw); + expect(assertParseable(raw).kind).toBe('object'); + }); + + it('JC-12 string with escape sequences preserved', () => { + const raw = '{ "s": "a\\nb\\tc\\u0041\\\\d\\"e" }'; + expect(rt(raw)).toBe(raw); + // Pin escape resolution — parsed value carries actual control chars. + const root = assertParseable(raw); + if (root.kind === 'object') { + const s = root.entries[0]?.value; + if (s?.kind === 'string') { + expect(s.value).toBe('a\nb\tcA\\d"e'); + } + } + }); + + it('JC-13 numbers in scientific / negative / decimal forms preserved', () => { + const raw = '[ 0, -0, 1.5, -3.14, 1e3, -2.5e-10, 1E+5 ]'; + expect(rt(raw)).toBe(raw); + const root = assertParseable(raw); + if (root.kind === 'array') { + expect(root.items).toHaveLength(7); + expect(root.items.every((v) => v.kind === 'number')).toBe(true); + } + }); + + it('JC-14 unicode characters preserved verbatim', () => { + const raw = '{ "name": "héllo 世界 🎉" }'; + expect(rt(raw)).toBe(raw); + const root = assertParseable(raw); + if (root.kind === 'object') { + const v = root.entries[0]?.value; + if (v?.kind === 'string') {expect(v.value).toBe('héllo 世界 🎉');} + } + }); + + it('JC-15 idiosyncratic whitespace preserved', () => { + const raw = '{ "x" : 1 ,\n "y": 2}'; + expect(rt(raw)).toBe(raw); + expect(assertParseable(raw).kind).toBe('object'); + }); + + it('JC-16 file-level trailing whitespace preserved', () => { + const raw = '{ "x": 1 }\n\n\n'; + expect(rt(raw)).toBe(raw); + expect(assertParseable(raw).kind).toBe('object'); + }); + + it('JC-17 malformed input still emits raw verbatim AND emits a diagnostic', () => { + const raw = '{ broken json with "key": value }'; + expect(rt(raw)).toBe(raw); + // Without this assertion the test passes for any input regardless + // of parser behavior — pin both halves of the contract. + assertNotParseable(raw); + }); + + it('JC-18 comments-only file preserved', () => { + const raw = '// just a comment\n/* and a block */\n'; + expect(rt(raw)).toBe(raw); + // Comments-only files have no structural root — that's expected. + expect(parseJsonc(raw).ast.root).toBeNull(); + }); +}); diff --git a/src/oc-path/tests/scenarios/jsonc-resolver-edges.test.ts b/src/oc-path/tests/scenarios/jsonc-resolver-edges.test.ts new file mode 100644 index 00000000000..06001ddcb98 --- /dev/null +++ b/src/oc-path/tests/scenarios/jsonc-resolver-edges.test.ts @@ -0,0 +1,132 @@ +/** + * Wave 17 — JSONC resolver adversarial edges. + * + * Substrate guarantee: the resolver walks the value tree deterministically + * with mixed dotted / segment paths, returns null on any unresolvable + * walk, and never throws on hostile inputs. + */ +import { describe, expect, it } from 'vitest'; +import { parseJsonc } from '../../jsonc/parse.js'; +import { resolveJsoncOcPath } from '../../jsonc/resolve.js'; +import { parseOcPath } from '../../oc-path.js'; + +function rs(raw: string, ocPath: string) { + return resolveJsoncOcPath(parseJsonc(raw).ast, parseOcPath(ocPath)); +} + +describe('wave-17 jsonc resolver edges', () => { + it('JR-01 root resolves on empty object', () => { + expect(rs('{}', 'oc://config')?.kind).toBe('root'); + }); + + it('JR-02 root resolves on scalar root', () => { + expect(rs('42', 'oc://config')?.kind).toBe('root'); + }); + + it('JR-03 root resolves on array root', () => { + expect(rs('[1,2,3]', 'oc://config')?.kind).toBe('root'); + }); + + it('JR-04 deep dotted descent within section', () => { + const m = rs('{"a":{"b":{"c":1}}}', 'oc://config/a.b.c'); + expect(m?.kind).toBe('object-entry'); + }); + + it('JR-05 missing intermediate key returns null', () => { + expect(rs('{"a":{"b":1}}', 'oc://config/a.x.b')).toBeNull(); + }); + + it('JR-06 numeric segment indexes into array', () => { + const m = rs('{"items":["a","b","c"]}', 'oc://config/items.1'); + expect(m?.kind).toBe('value'); + if (m?.kind === 'value') { + expect(m.node).toMatchObject({ kind: 'string', value: 'b' }); + } + }); + + it('JR-07 negative array index resolves to Nth-from-last', () => { + expect(rs('{"x":[1,2]}', 'oc://config/x.-1')).toMatchObject({ kind: 'value', node: { kind: 'number', value: 2 } }); + expect(rs('{"x":[1,2]}', 'oc://config/x.-2')).toMatchObject({ kind: 'value', node: { kind: 'number', value: 1 } }); + expect(rs('{"x":[1,2]}', 'oc://config/x.-5')).toBeNull(); + }); + + it('JR-08 out-of-bounds array index returns null', () => { + expect(rs('{"x":[1,2]}', 'oc://config/x.99')).toBeNull(); + }); + + it('JR-09 non-integer index returns null (no NaN coercion)', () => { + expect(rs('{"x":[1,2]}', 'oc://config/x.foo')).toBeNull(); + }); + + it('JR-10 null AST root returns null on any path', () => { + expect(rs('', 'oc://config/x')).toBeNull(); + }); + + it('JR-11 descending past a primitive returns null', () => { + expect(rs('{"x":42}', 'oc://config/x.y')).toBeNull(); + }); + + it('JR-12 empty segment in dotted path throws OcPathError', () => { + // v1 invariant: malformed paths fail loud at parse time, not silently null. + expect(() => rs('{"x":1}', 'oc://config/x..y')).toThrow(/Empty dotted sub-segment/); + }); + + it('JR-13 string value at leaf surfaces via object-entry shape', () => { + const m = rs('{"k":"v"}', 'oc://config/k'); + expect(m?.kind).toBe('object-entry'); + if (m?.kind === 'object-entry') {expect(m.node.key).toBe('k');} + }); + + it('JR-14 boolean and null values resolve', () => { + const m1 = rs('{"k":true}', 'oc://config/k'); + expect(m1?.kind).toBe('object-entry'); + const m2 = rs('{"k":null}', 'oc://config/k'); + expect(m2?.kind).toBe('object-entry'); + }); + + it('JR-15 mixed slash + dot segments resolve identically', () => { + const a = rs('{"a":{"b":{"c":1}}}', 'oc://config/a.b.c'); + const b = rs('{"a":{"b":{"c":1}}}', 'oc://config/a/b.c'); + const c = rs('{"a":{"b":{"c":1}}}', 'oc://config/a/b/c'); + expect(a?.kind).toBe(b?.kind); + expect(b?.kind).toBe(c?.kind); + }); + + it('JR-16 keys with special characters resolve', () => { + const m = rs('{"a-b_c":{"x":1}}', 'oc://config/a-b_c.x'); + expect(m?.kind).toBe('object-entry'); + }); + + it('JR-17 unicode keys resolve', () => { + const m = rs('{"héllo":1}', 'oc://config/héllo'); + expect(m?.kind).toBe('object-entry'); + }); + + it('JR-18 large nested structure (depth 20) resolves to leaf', () => { + let json = '"leaf"'; + const segs: string[] = []; + for (let i = 19; i >= 0; i--) { + json = `{"k${i}":${json}}`; + segs.unshift(`k${i}`); + } + const m = rs(json, `oc://config/${segs.join('.')}`); + expect(m?.kind).toBe('object-entry'); + if (m?.kind === 'object-entry') { + expect(m.node.value).toMatchObject({ kind: 'string', value: 'leaf' }); + } + }); + + it('JR-19 resolver is non-mutating across calls', () => { + const { ast } = parseJsonc('{"x":{"y":1}}'); + const before = JSON.stringify(ast); + rs('{"x":{"y":1}}', 'oc://config/x.y'); + rs('{"x":{"y":1}}', 'oc://config/x'); + rs('{"x":{"y":1}}', 'oc://config/missing'); + expect(JSON.stringify(ast)).toBe(before); + }); + + it('JR-20 hostile input shapes do not throw', () => { + expect(() => rs('{garbage}', 'oc://config/x')).not.toThrow(); + expect(() => rs('{"a":', 'oc://config/a')).not.toThrow(); + }); +}); diff --git a/src/oc-path/tests/scenarios/jsonl-byte-fidelity.test.ts b/src/oc-path/tests/scenarios/jsonl-byte-fidelity.test.ts new file mode 100644 index 00000000000..adf0e63f7fe --- /dev/null +++ b/src/oc-path/tests/scenarios/jsonl-byte-fidelity.test.ts @@ -0,0 +1,125 @@ +/** + * Wave 16 — JSONL byte-fidelity round-trip. + * + * Substrate guarantee: `emitJsonl(parseJsonl(raw)) === raw` for every + * input the parser accepts. JSONL is line-oriented; blanks, malformed + * lines, mixed line endings, trailing-newline shape — all byte-stable. + */ +import { describe, expect, it } from 'vitest'; +import { emitJsonl } from '../../jsonl/emit.js'; +import { parseJsonl } from '../../jsonl/parse.js'; + +function rt(raw: string): string { + return emitJsonl(parseJsonl(raw).ast); +} + +describe('wave-16 jsonl byte-fidelity', () => { + it('JL-01 empty file', () => { + expect(rt('')).toBe(''); + }); + + it('JL-02 single line no trailing newline', () => { + expect(rt('{"a":1}')).toBe('{"a":1}'); + }); + + it('JL-03 single line with trailing newline', () => { + expect(rt('{"a":1}\n')).toBe('{"a":1}\n'); + }); + + it('JL-04 multiple lines preserved', () => { + const raw = '{"a":1}\n{"b":2}\n{"c":3}\n'; + expect(rt(raw)).toBe(raw); + }); + + it('JL-05 blank line in the middle preserved', () => { + const raw = '{"a":1}\n\n{"b":2}\n'; + expect(rt(raw)).toBe(raw); + }); + + it('JL-06 multiple blank lines preserved', () => { + const raw = '{"a":1}\n\n\n{"b":2}\n'; + expect(rt(raw)).toBe(raw); + }); + + it('JL-07 malformed line round-trips verbatim', () => { + const raw = '{"a":1}\nthis is not json\n{"b":2}\n'; + expect(rt(raw)).toBe(raw); + }); + + it('JL-08 entirely malformed file round-trips', () => { + const raw = 'header\nbody\nfooter\n'; + expect(rt(raw)).toBe(raw); + }); + + it('JL-09 leading + trailing blanks preserved', () => { + const raw = '\n\n{"a":1}\n\n'; + expect(rt(raw)).toBe(raw); + }); + + it('JL-10 file ending without final newline preserved', () => { + const raw = '{"a":1}\n{"b":2}'; + expect(rt(raw)).toBe(raw); + }); + + it('JL-11 nested object lines preserved', () => { + const raw = '{"a":{"b":{"c":1}}}\n{"x":[1,[2,[3]]]}\n'; + expect(rt(raw)).toBe(raw); + }); + + it('JL-12 unicode in a value line preserved', () => { + const raw = '{"name":"héllo 世界 🎉"}\n'; + expect(rt(raw)).toBe(raw); + }); + + it('JL-13 idiosyncratic whitespace inside a line preserved', () => { + const raw = '{ "a" : 1 }\n'; + expect(rt(raw)).toBe(raw); + }); + + it('JL-14 single blank line file preserved', () => { + const raw = '\n'; + expect(rt(raw)).toBe(raw); + }); + + it('JL-15 large log (1000 lines) preserved', () => { + const lines = Array.from({ length: 1000 }, (_, i) => `{"i":${i}}`); + const raw = lines.join('\n') + '\n'; + expect(rt(raw)).toBe(raw); + }); + + it('JL-16 mixed value + malformed + blank preserved', () => { + const raw = + '{"a":1}\n{not json}\n\n{"b":2}\nstill not json\n{"c":3}\n'; + expect(rt(raw)).toBe(raw); + }); + + // F10 — CRLF preservation. Without lineEnding tracking on the AST, + // a CRLF input edited via setJsonlOcPath rebuilds raw via render + // which joins with `\n`, mixing endings on Windows-authored datasets. + it('JL-17 CRLF input round-trips byte-identical via the default emit', () => { + const raw = '{"a":1}\r\n{"b":2}\r\n{"c":3}\r\n'; + expect(rt(raw)).toBe(raw); + }); + + it('JL-18 CRLF input preserves CRLF after a structural edit (render mode)', () => { + // Pin the render path: setJsonlOcPath rebuilds raw via render mode, + // which now consults ast.lineEnding to reconstruct the original + // convention. Without the fix, render-mode output uses `\n` and + // produces mixed line endings on Windows datasets. + const raw = '{"a":1}\r\n{"b":2}\r\n'; + const { ast } = parseJsonl(raw); + const rendered = emitJsonl(ast, { mode: 'render' }); + expect(rendered).toBe('{"a":1}\r\n{"b":2}'); + // Pin no-LF-only joins by counting CRLFs vs bare LFs. + expect((rendered.match(/\r\n/g) ?? []).length).toBe(1); + expect((rendered.match(/(? { + // Symmetric: a Unix-authored log doesn't mysteriously gain CRLF. + const raw = '{"a":1}\n{"b":2}\n'; + const { ast } = parseJsonl(raw); + const rendered = emitJsonl(ast, { mode: 'render' }); + expect(rendered).toBe('{"a":1}\n{"b":2}'); + }); +}); diff --git a/src/oc-path/tests/scenarios/jsonl-resolver-edges.test.ts b/src/oc-path/tests/scenarios/jsonl-resolver-edges.test.ts new file mode 100644 index 00000000000..edecb2cbb03 --- /dev/null +++ b/src/oc-path/tests/scenarios/jsonl-resolver-edges.test.ts @@ -0,0 +1,125 @@ +/** + * Wave 18 — JSONL resolver adversarial edges. + * + * Substrate guarantee: line addresses (`Lnnn`, `$last`) walk + * deterministically; missing addresses, blank-line targets, and + * malformed-line targets all surface as null without throwing. + */ +import { describe, expect, it } from 'vitest'; +import { parseJsonl } from '../../jsonl/parse.js'; +import { resolveJsonlOcPath } from '../../jsonl/resolve.js'; +import { parseOcPath } from '../../oc-path.js'; + +function rs(raw: string, ocPath: string) { + return resolveJsonlOcPath(parseJsonl(raw).ast, parseOcPath(ocPath)); +} + +describe('wave-18 jsonl resolver edges', () => { + it('JLR-01 root resolves with no segments', () => { + expect(rs('{"a":1}\n', 'oc://log')?.kind).toBe('root'); + }); + + it('JLR-02 L1 resolves to a value line', () => { + const m = rs('{"a":1}\n', 'oc://log/L1'); + expect(m?.kind).toBe('line'); + }); + + it('JLR-03 L99 unknown line returns null', () => { + expect(rs('{"a":1}\n', 'oc://log/L99')).toBeNull(); + }); + + it('JLR-04 $last picks the most recent value line', () => { + const m = rs('{"a":1}\n{"a":2}\n{"a":3}\n', 'oc://log/$last/a'); + expect(m?.kind).toBe('object-entry'); + if (m?.kind === 'object-entry') { + expect(m.node.value).toMatchObject({ kind: 'number', value: 3 }); + } + }); + + it('JLR-05 $last skips trailing blank lines', () => { + const m = rs('{"a":1}\n\n\n', 'oc://log/$last/a'); + expect(m?.kind).toBe('object-entry'); + if (m?.kind === 'object-entry') { + expect(m.node.value).toMatchObject({ kind: 'number', value: 1 }); + } + }); + + it('JLR-06 $last skips trailing malformed lines', () => { + const m = rs('{"a":1}\nbroken\n', 'oc://log/$last/a'); + expect(m?.kind).toBe('object-entry'); + }); + + it('JLR-07 $last on empty file returns null', () => { + expect(rs('', 'oc://log/$last/x')).toBeNull(); + }); + + it('JLR-08 $last on all-blank file returns null', () => { + expect(rs('\n\n\n', 'oc://log/$last/x')).toBeNull(); + }); + + it('JLR-09 $last on all-malformed file returns null', () => { + expect(rs('a\nb\nc\n', 'oc://log/$last/x')).toBeNull(); + }); + + it('JLR-10 garbage line address returns null', () => { + expect(rs('{"a":1}\n', 'oc://log/garbage')).toBeNull(); + expect(rs('{"a":1}\n', 'oc://log/L')).toBeNull(); + expect(rs('{"a":1}\n', 'oc://log/Labc')).toBeNull(); + }); + + it('JLR-11 descent into a blank line returns null', () => { + expect(rs('{"a":1}\n\n{"b":2}\n', 'oc://log/L2/anything')).toBeNull(); + }); + + it('JLR-12 descent into a malformed line returns null', () => { + expect(rs('{"a":1}\nbroken\n{"b":2}\n', 'oc://log/L2/anything')).toBeNull(); + }); + + it('JLR-13 missing field on a value line returns null', () => { + expect(rs('{"a":1}\n', 'oc://log/L1/missing')).toBeNull(); + }); + + it('JLR-14 dotted descent through line value resolves', () => { + const m = rs('{"r":{"ok":true,"d":"x"}}\n', 'oc://log/L1/r.d'); + expect(m?.kind).toBe('object-entry'); + if (m?.kind === 'object-entry') { + expect(m.node.value).toMatchObject({ kind: 'string', value: 'x' }); + } + }); + + it('JLR-15 array index inside a line resolves', () => { + const m = rs('{"items":["a","b","c"]}\n', 'oc://log/L1/items.2'); + expect(m?.kind).toBe('value'); + if (m?.kind === 'value') { + expect(m.node).toMatchObject({ kind: 'string', value: 'c' }); + } + }); + + it('JLR-16 line numbers are 1-indexed', () => { + const m = rs('{"a":1}\n{"a":2}\n', 'oc://log/L1/a'); + if (m?.kind === 'object-entry') { + expect(m.node.value).toMatchObject({ kind: 'number', value: 1 }); + } + }); + + it('JLR-17 line numbers preserved across blank/malformed entries', () => { + const m = rs('{"a":1}\n\nbroken\n{"a":4}\n', 'oc://log/L4/a'); + expect(m?.kind).toBe('object-entry'); + if (m?.kind === 'object-entry') { + expect(m.node.value).toMatchObject({ kind: 'number', value: 4 }); + } + }); + + it('JLR-18 resolver is non-mutating', () => { + const { ast } = parseJsonl('{"a":1}\n{"b":2}\n'); + const before = JSON.stringify(ast); + rs('{"a":1}\n{"b":2}\n', 'oc://log/L1'); + rs('{"a":1}\n{"b":2}\n', 'oc://log/$last'); + expect(JSON.stringify(ast)).toBe(before); + }); + + it('JLR-19 hostile inputs do not throw', () => { + expect(() => rs('not json\n', 'oc://log/L1')).not.toThrow(); + expect(() => rs('', 'oc://log/$last')).not.toThrow(); + }); +}); diff --git a/src/oc-path/tests/scenarios/malformed-input.test.ts b/src/oc-path/tests/scenarios/malformed-input.test.ts new file mode 100644 index 00000000000..baa011352ae --- /dev/null +++ b/src/oc-path/tests/scenarios/malformed-input.test.ts @@ -0,0 +1,155 @@ +/** + * Wave 11 — malformed input recovery. + * + * Substrate guarantee: parser is **soft-error**: it never throws on + * malformed input. Suspicious-but-recoverable inputs produce + * diagnostics; unparseable structural pieces are dropped silently. + */ +import { describe, expect, it } from 'vitest'; +import { parseMd } from '../../parse.js'; + +describe('wave-11 malformed-input', () => { + it('M-01 truncated mid-frontmatter (no close fence)', () => { + const raw = '---\nname: github\n'; + const { ast, diagnostics } = parseMd(raw); + expect(diagnostics.some((d) => d.code === 'OC_FRONTMATTER_UNCLOSED')).toBe(true); + expect(ast.frontmatter).toEqual([]); + }); + + it('M-02 truncated mid-section', () => { + const raw = '## H\n- item\nmid-line'; + const { ast } = parseMd(raw); + expect(ast.blocks.length).toBe(1); + }); + + it('M-03 only `---` (single fence, no content)', () => { + expect(() => parseMd('---\n')).not.toThrow(); + }); + + it('M-04 only `---\\n---`', () => { + const { ast } = parseMd('---\n---'); + expect(ast.frontmatter).toEqual([]); + }); + + it('M-05 binary-ish bytes (non-ASCII control chars)', () => { + const raw = '## H\n\x00\x01\x02\n'; + expect(() => parseMd(raw)).not.toThrow(); + }); + + it('M-06 very long single line (10k chars)', () => { + const raw = `## H\n${'x'.repeat(10_000)}\n`; + const { ast } = parseMd(raw); + expect(ast.blocks[0]?.heading).toBe('H'); + }); + + it('M-07 deeply repeated headings (1000 H2 blocks)', () => { + const lines: string[] = []; + for (let i = 0; i < 1000; i++) { + lines.push(`## H${i}`); + lines.push(`- item ${i}`); + } + const raw = lines.join('\n') + '\n'; + const { ast } = parseMd(raw); + expect(ast.blocks.length).toBe(1000); + }); + + it('M-08 bullet shape that isn\'t actually a bullet (`-not-a-bullet`)', () => { + const { ast } = parseMd('## H\n-not-a-bullet\n- real\n'); + expect(ast.blocks[0]?.items.length).toBe(1); + }); + + it('M-09 unclosed code fence', () => { + const raw = '## H\n```\nbody\n'; + expect(() => parseMd(raw)).not.toThrow(); + }); + + it('M-10 mismatched fence (open with ``` close with ~~~)', () => { + const raw = '## H\n```\nbody\n~~~\n'; + expect(() => parseMd(raw)).not.toThrow(); + }); + + it('M-11 nested fences (treated linearly, not nested)', () => { + const raw = '## H\n```\n```\nstill-in-second\n```\n'; + expect(() => parseMd(raw)).not.toThrow(); + }); + + it('M-12 empty file', () => { + const { ast, diagnostics } = parseMd(''); + expect(ast.raw).toBe(''); + expect(ast.frontmatter).toEqual([]); + expect(ast.blocks).toEqual([]); + expect(diagnostics).toEqual([]); + }); + + it('M-13 single character file', () => { + const { ast } = parseMd('x'); + expect(ast.preamble).toBe('x'); + expect(ast.blocks).toEqual([]); + }); + + it('M-14 single newline file', () => { + const { ast } = parseMd('\n'); + expect(ast.blocks).toEqual([]); + }); + + it('M-15 file with mixed indentation extremes (tabs, spaces, mixed)', () => { + const raw = '## H\n\t- tabbed\n - spaced\n\t - mixed\n'; + expect(() => parseMd(raw)).not.toThrow(); + }); + + it('M-16 frontmatter with frontmatter-shaped content inside (---)', () => { + const raw = '---\nk: v\n---\n\n---\nshould not parse as second frontmatter\n---\n'; + const { ast } = parseMd(raw); + expect(ast.frontmatter.map((e) => e.key)).toEqual(['k']); + // Second `---` block becomes part of preamble/body (it's not at file start). + expect(ast.preamble).toContain('---'); + }); + + it('M-17 lines starting with `#` but not heading (raw `#` chars in body)', () => { + const raw = '## H\n\n# This is text starting with #\n#### h4 not parsed as block\n'; + const { ast } = parseMd(raw); + expect(ast.blocks.length).toBe(1); + expect(ast.blocks[0]?.bodyText).toContain('# This is text'); + }); + + it('M-18 lines starting with multiple ## but malformed (####, ######)', () => { + const { ast } = parseMd('## Real\n#### Not block\n###### Not block\n'); + expect(ast.blocks.length).toBe(1); + expect(ast.blocks[0]?.heading).toBe('Real'); + }); + + it('M-19 file with just whitespace', () => { + expect(() => parseMd(' \n\t\n \n')).not.toThrow(); + }); + + it('M-20 file with only BOM', () => { + const { ast } = parseMd(''); + expect(ast.raw).toBe(''); + }); + + it('M-21 file mixing BOM + frontmatter + body + sections', () => { + const raw = '---\nk: v\n---\n\nbody\n## Section\n- item\n'; + expect(() => parseMd(raw)).not.toThrow(); + const { ast } = parseMd(raw); + expect(ast.frontmatter[0]?.value).toBe('v'); + expect(ast.blocks[0]?.heading).toBe('Section'); + }); + + it('M-22 line endings: legacy CR-only (Mac classic)', () => { + // Our regex /\r?\n/ doesn't split on CR-only. Treats whole as one line. + const raw = 'line1\rline2\r## Heading\r'; + expect(() => parseMd(raw)).not.toThrow(); + }); + + it('M-23 100 KB file', () => { + const lines: string[] = []; + for (let i = 0; i < 1000; i++) { + lines.push('## H' + i); + for (let j = 0; j < 5; j++) { + lines.push(`- item-${i}-${j}: value with some text content here`); + } + } + const raw = lines.join('\n'); + expect(() => parseMd(raw)).not.toThrow(); + }); +}); diff --git a/src/oc-path/tests/scenarios/oc-path-parse-edges.test.ts b/src/oc-path/tests/scenarios/oc-path-parse-edges.test.ts new file mode 100644 index 00000000000..fa0773d973d --- /dev/null +++ b/src/oc-path/tests/scenarios/oc-path-parse-edges.test.ts @@ -0,0 +1,252 @@ +/** + * Wave 7 — OcPath parsing edges. + * + * Substrate guarantee: `parseOcPath(s)` is a pure function. Valid input + * round-trips via `formatOcPath`; invalid input throws `OcPathError` + * with a stable `code`. + */ +import { describe, expect, it } from 'vitest'; +import { + OcPathError, + formatOcPath, + getPathLayout, + isPattern, + isValidOcPath, + parseOcPath, +} from '../../oc-path.js'; + +function expectErr(fn: () => unknown, code: string): void { + try { + fn(); + expect.fail(`expected OcPathError code ${code}`); + } catch (err) { + expect(err).toBeInstanceOf(OcPathError); + expect((err as OcPathError).code).toBe(code); + } +} + +describe('wave-07 oc-path-parse-edges', () => { + it('OP-01 file-only', () => { + expect(parseOcPath('oc://SOUL.md')).toEqual({ file: 'SOUL.md' }); + }); + + it('OP-02 file + section', () => { + expect(parseOcPath('oc://SOUL.md/Boundaries').section).toBe('Boundaries'); + }); + + it('OP-03 file + section + item', () => { + expect(parseOcPath('oc://SOUL.md/Boundaries/deny-rule-1').item).toBe('deny-rule-1'); + }); + + it('OP-04 file + section + item + field', () => { + expect(parseOcPath('oc://SOUL.md/B/deny-1/risk').field).toBe('risk'); + }); + + it('OP-05 session query parameter', () => { + expect(parseOcPath('oc://X.md?session=daily').session).toBe('daily'); + }); + + it('OP-06 session with full path', () => { + const p = parseOcPath('oc://X.md/sec/item/field?session=cron'); + expect(p).toEqual({ + file: 'X.md', + section: 'sec', + item: 'item', + field: 'field', + session: 'cron', + }); + }); + + it('OP-07 unknown query parameters silently ignored', () => { + const p = parseOcPath('oc://X.md?foo=bar&session=s&baz=qux'); + expect(p.session).toBe('s'); + }); + + it('OP-08 session= with empty value drops session', () => { + const p = parseOcPath('oc://X.md?session='); + expect(p.session).toBeUndefined(); + }); + + it('OP-09 query without `=` ignored', () => { + const p = parseOcPath('oc://X.md?nokeyhere'); + expect(p.session).toBeUndefined(); + }); + + it('OP-10 missing scheme throws', () => { + expectErr(() => parseOcPath('SOUL.md'), 'OC_PATH_MISSING_SCHEME'); + }); + + it('OP-11 wrong scheme throws', () => { + expectErr(() => parseOcPath('https://x.com'), 'OC_PATH_MISSING_SCHEME'); + }); + + it('OP-12 empty after scheme throws', () => { + expectErr(() => parseOcPath('oc://'), 'OC_PATH_EMPTY'); + }); + + it('OP-13 empty segment throws', () => { + expectErr(() => parseOcPath('oc://X.md//item'), 'OC_PATH_EMPTY_SEGMENT'); + }); + + it('OP-14 too-deep nesting throws', () => { + expectErr(() => parseOcPath('oc://X.md/a/b/c/d/e'), 'OC_PATH_TOO_DEEP'); + }); + + it('OP-15 non-string throws', () => { + expectErr(() => parseOcPath(42 as unknown as string), 'OC_PATH_NOT_STRING'); + }); + + it('OP-16 round-trip canonical forms', () => { + const cases = [ + 'oc://SOUL.md', + 'oc://SOUL.md/Boundaries', + 'oc://SOUL.md/Boundaries/deny-rule-1', + 'oc://SOUL.md/Boundaries/deny-rule-1/risk', + 'oc://SOUL.md?session=daily', + 'oc://X.md/a/b/c?session=s', + 'oc://skills/email-drafter/[frontmatter]/name', + 'oc://config/plugins.entries.foo.token', + ]; + for (const c of cases) { + expect(formatOcPath(parseOcPath(c)), `round-trip failed for ${c}`).toBe(c); + } + }); + + it('OP-17 isValidOcPath true positives', () => { + expect(isValidOcPath('oc://X.md')).toBe(true); + expect(isValidOcPath('oc://X.md/sec/item/field')).toBe(true); + }); + + it('OP-18 isValidOcPath true negatives', () => { + expect(isValidOcPath('')).toBe(false); + expect(isValidOcPath('X.md')).toBe(false); + expect(isValidOcPath('oc://')).toBe(false); + expect(isValidOcPath('oc://x//y')).toBe(false); + expect(isValidOcPath(null)).toBe(false); + expect(isValidOcPath({})).toBe(false); + }); + + it('OP-19 file segment with special chars (file with dots/slashes)', () => { + const p = parseOcPath('oc://config/plugins.entries.foo.token'); + expect(p.file).toBe('config'); + expect(p.section).toBe('plugins.entries.foo.token'); + }); + + it('OP-20 section segment with hyphens / underscores / numbers', () => { + const p = parseOcPath('oc://X.md/Multi-Tenant_Section_2'); + expect(p.section).toBe('Multi-Tenant_Section_2'); + }); + + it('OP-21 [frontmatter] sentinel is just a section name', () => { + const p = parseOcPath('oc://X.md/[frontmatter]/name'); + expect(p.section).toBe('[frontmatter]'); + expect(p.item).toBe('name'); + }); + + it('OP-22 formatOcPath rejects empty file', () => { + expectErr(() => formatOcPath({ file: '' }), 'OC_PATH_FILE_REQUIRED'); + }); + + it('OP-23 formatOcPath rejects item without section', () => { + expectErr(() => formatOcPath({ file: 'X.md', item: 'i' }), 'OC_PATH_NESTING'); + }); + + it('OP-24 formatOcPath quotes raw slot values containing special chars', () => { + // Closes ClawSweeper P2 on PR #78678: `formatOcPath` previously + // concatenated raw slot values, so a programmatically-constructed + // path with a `/` in the section/item slot would emit extra + // segments and fail to parse back to the same address. + // Use a slot value with `/` (and no internal `.`) — `.` inside + // a slot is the dotted sub-segment delimiter; callers wanting a + // literal `.` in a key should pre-quote that single sub-segment. + const constructed = formatOcPath({ + file: 'config.jsonc', + section: 'agents.defaults.models', + item: 'github-copilot/claude-opus-4-7', + field: 'alias', + }); + expect(constructed).toBe( + 'oc://config.jsonc/agents.defaults.models/"github-copilot/claude-opus-4-7"/alias', + ); + const parsed = parseOcPath(constructed); + expect(parsed.item).toBe('"github-copilot/claude-opus-4-7"'); + }); + + it('OP-25 parseOcPath finds query separator outside quoted keys', () => { + // Closes ClawSweeper P2 on PR #78678: `parseOcPath` previously + // used `indexOf('?')` which split a key like `"foo?bar"` at the + // embedded `?`, breaking advertised quoted-segment support. + const parsed = parseOcPath('oc://config.jsonc/"foo?bar"?session=daily'); + expect(parsed.section).toBe('"foo?bar"'); + expect(parsed.session).toBe('daily'); + }); + + it('OP-26 file slot with `/` round-trips via quoting', () => { + // Closes ClawSweeper P2 on PR #78678 (round 4): `parseOcPath` stored + // `path.file` verbatim while `formatOcPath` prefixed it without + // quote-wrapping, so a file like `skills/email-drafter` couldn't + // round-trip — formatter output got re-parsed as file plus section, + // and quoted input leaked the surrounding quotes into filesystem + // resolution. + const constructed = formatOcPath({ + file: 'skills/email-drafter', + section: 'Tools', + item: '-1', + }); + expect(constructed).toBe('oc://"skills/email-drafter"/Tools/-1'); + const parsed = parseOcPath(constructed); + expect(parsed.file).toBe('skills/email-drafter'); + expect(parsed.section).toBe('Tools'); + expect(parsed.item).toBe('-1'); + }); + + it('OP-27 file slot with dot extension does NOT get quoted', () => { + // The file slot's quoting trigger excludes `.` because filename + // extensions (`AGENTS.md`, `gateway.jsonc`) are normal — quoting + // them would make canonical form ugly without need. + expect(formatOcPath({ file: 'AGENTS.md' })).toBe('oc://AGENTS.md'); + expect(formatOcPath({ file: 'gateway.jsonc', section: 'version' })).toBe( + 'oc://gateway.jsonc/version', + ); + }); + + it('OP-28 formatOcPath rejects field without item or section', () => { + // Closes Galin P2 (round 8): the nesting guard caught + // `field + section + no item` but missed `field + no section + no item`. + // Such a struct emits `oc://FILE/FIELD` which silently re-parses as + // `{ file, section: FIELD }` — different shape, breaking round-trip. + expect(() => formatOcPath({ file: 'X', field: 'name' })).toThrow(OcPathError); + try { + formatOcPath({ file: 'X', field: 'name' }); + } catch (err) { + expect(err).toBeInstanceOf(OcPathError); + expect((err as OcPathError).code).toBe('OC_PATH_NESTING'); + } + }); + + it('OP-29 isPattern is quote-aware (literal `*` inside quoted segment)', () => { + // Closes Galin P2 (round 8): `isPattern` previously used + // `slot.split('.')` which shredded a quoted key like `"items.*.glob"` + // and falsely detected the literal `*` as a wildcard, causing + // single-match verbs to reject a concrete path. + const concrete = parseOcPath('oc://config.jsonc/"items.*.glob"'); + expect(isPattern(concrete)).toBe(false); + + // Sanity: an unquoted `*` IS still a wildcard. + const wildcard = parseOcPath('oc://config.jsonc/items/*'); + expect(isPattern(wildcard)).toBe(true); + }); + + it('OP-30 getPathLayout is quote-aware', () => { + // Closes Galin P2 (round 8): `getPathLayout` used `slot.split('.')` + // for all three slots, breaking the find-walker / repackPath layout + // contract for quoted segments containing `.`. + const path = parseOcPath('oc://config.jsonc/"github.com"/repos'); + const layout = getPathLayout(path); + // Quoted segment is one sub-segment, not two. + expect(layout.sectionLen).toBe(1); + expect(layout.subs[0]).toBe('"github.com"'); + expect(layout.itemLen).toBe(1); + expect(layout.subs[1]).toBe('repos'); + }); +}); diff --git a/src/oc-path/tests/scenarios/oc-path-resolver-edges.test.ts b/src/oc-path/tests/scenarios/oc-path-resolver-edges.test.ts new file mode 100644 index 00000000000..1f0381a8e6c --- /dev/null +++ b/src/oc-path/tests/scenarios/oc-path-resolver-edges.test.ts @@ -0,0 +1,235 @@ +/** + * Wave 8 — OcPath resolver edges. + * + * Substrate guarantee: `resolveOcPath(ast, ocPath)` returns the matched + * node or `null`. Slug matching is case-insensitive. Field on non-kv + * item returns `null` (not a guess). Frontmatter via the `[frontmatter]` + * sentinel section. + */ +import { describe, expect, it } from 'vitest'; +import { parseMd } from '../../parse.js'; +import { resolveMdOcPath as resolveOcPath } from '../../resolve.js'; + +const SAMPLE = `--- +name: github +description: gh CLI +url: https://example.com +--- + +Preamble prose. + +## Boundaries + +- never write to /etc +- always confirm before deleting + +## Tools + +- gh: GitHub CLI +- curl: HTTP client +- The Tool: with caps and spaces + +## Multi-Word Section + +- item one +`; + +describe('wave-08 oc-path-resolver-edges', () => { + const { ast } = parseMd(SAMPLE); + + it('R-01 root resolves to AST', () => { + const m = resolveOcPath(ast, { file: 'X.md' }); + expect(m?.kind).toBe('root'); + }); + + it('R-02 block by exact slug', () => { + const m = resolveOcPath(ast, { file: 'X.md', section: 'boundaries' }); + expect(m?.kind).toBe('block'); + }); + + it('R-03 block by case-mismatched slug (Boundaries → boundaries)', () => { + const m = resolveOcPath(ast, { file: 'X.md', section: 'Boundaries' }); + expect(m?.kind).toBe('block'); + }); + + it('R-04 block by uppercased slug', () => { + const m = resolveOcPath(ast, { file: 'X.md', section: 'BOUNDARIES' }); + expect(m?.kind).toBe('block'); + }); + + it('R-05 multi-word section by slug', () => { + const m = resolveOcPath(ast, { file: 'X.md', section: 'multi-word-section' }); + expect(m?.kind).toBe('block'); + if (m?.kind === 'block') {expect(m.node.heading).toBe('Multi-Word Section');} + }); + + it('R-06 multi-word section by exact heading text (case-folded)', () => { + const m = resolveOcPath(ast, { file: 'X.md', section: 'Multi-Word Section' }); + // The OcPath section is matched case-insensitively against block.slug. + // Block.slug for "Multi-Word Section" is "multi-word-section", and + // path.section.toLowerCase() = "multi-word section" which does NOT + // match "multi-word-section". Documented limit — callers must + // pass slug form, not heading text. This is intentional. + expect(m).toBeNull(); + }); + + it('R-07 unknown section returns null', () => { + const m = resolveOcPath(ast, { file: 'X.md', section: 'unknown' }); + expect(m).toBeNull(); + }); + + it('R-08 item by slug under known section', () => { + const m = resolveOcPath(ast, { + file: 'X.md', + section: 'tools', + item: 'gh', + }); + expect(m?.kind).toBe('item'); + }); + + it('R-09 item slug for KV uses kv.key (gh, not "gh-github-cli")', () => { + const m = resolveOcPath(ast, { + file: 'X.md', + section: 'tools', + item: 'gh', + }); + expect(m).not.toBeNull(); + if (m?.kind === 'item') {expect(m.node.kv?.value).toBe('GitHub CLI');} + }); + + it('R-10 item slug for plain bullet uses text', () => { + const m = resolveOcPath(ast, { + file: 'X.md', + section: 'boundaries', + item: 'never-write-to-etc', + }); + expect(m?.kind).toBe('item'); + }); + + it('R-11 item slug case-insensitive', () => { + const m = resolveOcPath(ast, { + file: 'X.md', + section: 'tools', + item: 'GH', + }); + expect(m?.kind).toBe('item'); + }); + + it('R-12 item with spaces in key (slugified)', () => { + const m = resolveOcPath(ast, { + file: 'X.md', + section: 'tools', + item: 'the-tool', + }); + expect(m?.kind).toBe('item'); + if (m?.kind === 'item') {expect(m.node.kv?.value).toBe('with caps and spaces');} + }); + + it('R-13 unknown item returns null', () => { + const m = resolveOcPath(ast, { + file: 'X.md', + section: 'tools', + item: 'nonexistent', + }); + expect(m).toBeNull(); + }); + + it('R-14 item-field matches kv.key (case-insensitive)', () => { + const m = resolveOcPath(ast, { + file: 'X.md', + section: 'tools', + item: 'gh', + field: 'gh', + }); + expect(m?.kind).toBe('item-field'); + }); + + it('R-15 field on plain (non-kv) item returns null', () => { + const m = resolveOcPath(ast, { + file: 'X.md', + section: 'boundaries', + item: 'never-write-to-etc', + field: 'risk', + }); + expect(m).toBeNull(); + }); + + it('R-16 field that does not match kv.key returns null', () => { + const m = resolveOcPath(ast, { + file: 'X.md', + section: 'tools', + item: 'gh', + field: 'nonexistent', + }); + expect(m).toBeNull(); + }); + + it('R-17 frontmatter via [frontmatter] sentinel section', () => { + const m = resolveOcPath(ast, { + file: 'X.md', + section: '[frontmatter]', + field: 'name', + }); + expect(m?.kind).toBe('frontmatter'); + if (m?.kind === 'frontmatter') {expect(m.node.value).toBe('github');} + }); + + it('R-18 frontmatter unknown key returns null', () => { + const m = resolveOcPath(ast, { + file: 'X.md', + section: '[frontmatter]', + field: 'nonexistent', + }); + expect(m).toBeNull(); + }); + + it('R-19 frontmatter without field returns null', () => { + const m = resolveOcPath(ast, { + file: 'X.md', + section: '[frontmatter]', + }); + expect(m).toBeNull(); + }); + + it('R-20 multiple frontmatter keys with same name — first match wins', () => { + // Build an AST manually to test + const dupeAst = { + kind: 'md' as const, + raw: '', + frontmatter: [ + { key: 'k', value: 'first', line: 2 }, + { key: 'k', value: 'second', line: 3 }, + ], + preamble: '', + blocks: [], + }; + const m = resolveOcPath(dupeAst, { + file: 'X.md', + section: '[frontmatter]', + field: 'k', + }); + expect(m?.kind).toBe('frontmatter'); + if (m?.kind === 'frontmatter') {expect(m.node.value).toBe('first');} + }); + + it('R-21 empty AST resolves root only', () => { + const empty = { kind: 'md' as const, raw: '', frontmatter: [], preamble: '', blocks: [] }; + expect(resolveOcPath(empty, { file: 'X.md' })?.kind).toBe('root'); + expect(resolveOcPath(empty, { file: 'X.md', section: 'any' })).toBeNull(); + }); + + it('R-22 resolver does not mutate the AST', () => { + const before = JSON.stringify(ast); + resolveOcPath(ast, { file: 'X.md', section: 'tools', item: 'gh', field: 'gh' }); + const after = JSON.stringify(ast); + expect(after).toBe(before); + }); + + it('R-23 file segment is informational — resolver doesn\'t check it', () => { + // The file name in OcPath is metadata; resolver assumes the AST + // matches. Callers verify file mapping before passing the AST. + const m1 = resolveOcPath(ast, { file: 'SOUL.md', section: 'tools' }); + const m2 = resolveOcPath(ast, { file: 'AGENTS.md', section: 'tools' }); + expect(m1?.kind).toBe(m2?.kind); + }); +}); diff --git a/src/oc-path/tests/scenarios/perf-determinism.test.ts b/src/oc-path/tests/scenarios/perf-determinism.test.ts new file mode 100644 index 00000000000..f6a17dd4528 --- /dev/null +++ b/src/oc-path/tests/scenarios/perf-determinism.test.ts @@ -0,0 +1,127 @@ +/** + * Wave 14 — performance + determinism + immutability. + * + * Substrate guarantees: + * - Parsing scales sub-linearly with file size (no quadratic blowup) + * - Same input produces same AST (no Object.keys / Set order surprises) + * - Resolver does not mutate the AST + * - AST is structurally cloneable (no functions, no cycles) + */ +import { describe, expect, it } from 'vitest'; +import { emitMd } from '../../emit.js'; +import { parseMd } from '../../parse.js'; +import { resolveMdOcPath as resolveOcPath } from '../../resolve.js'; + +describe('wave-14 perf + determinism', () => { + it('PD-01 parses 100 KB file in under 200 ms', () => { + const lines: string[] = []; + for (let i = 0; i < 1000; i++) { + lines.push('## H' + i); + for (let j = 0; j < 5; j++) { + lines.push(`- key${i}-${j}: value with content`); + } + } + const raw = lines.join('\n'); + const start = performance.now(); + parseMd(raw); + const elapsed = performance.now() - start; + expect(elapsed).toBeLessThan(200); + }); + + it('PD-02 parses 1000 small files in under 500 ms', () => { + const raw = `## H\n- a\n- b: c\n## I\n- d\n`; + const start = performance.now(); + for (let i = 0; i < 1000; i++) { + parseMd(raw); + } + const elapsed = performance.now() - start; + expect(elapsed).toBeLessThan(500); + }); + + it('PD-03 100k OcPath resolutions on parsed AST in under 500 ms', () => { + const raw = `## A\n- a1\n- a2\n## B\n- b1\n- b2\n## C\n- c1: cv\n`; + const { ast } = parseMd(raw); + const path = { file: 'X.md', section: 'b', item: 'b1' }; + const start = performance.now(); + for (let i = 0; i < 100_000; i++) { + resolveOcPath(ast, path); + } + const elapsed = performance.now() - start; + expect(elapsed).toBeLessThan(500); + }); + + it('PD-04 same input → byte-identical AST.raw across runs', () => { + const raw = `---\nb: 2\na: 1\n---\n## Z\n- z\n## A\n- a\n`; + const a1 = parseMd(raw).ast; + const a2 = parseMd(raw).ast; + expect(a1.raw).toBe(a2.raw); + expect(a1.frontmatter).toEqual(a2.frontmatter); + expect(a1.blocks).toEqual(a2.blocks); + }); + + it('PD-05 resolveOcPath is non-mutating', () => { + const raw = `## A\n- a: x\n## B\n- b\n`; + const { ast } = parseMd(raw); + const before = JSON.stringify(ast); + resolveOcPath(ast, { file: 'X.md', section: 'a', item: 'a', field: 'a' }); + resolveOcPath(ast, { file: 'X.md', section: 'b' }); + resolveOcPath(ast, { file: 'X.md', section: 'unknown' }); + expect(JSON.stringify(ast)).toBe(before); + }); + + it('PD-06 AST is JSON-serializable (no functions, no cycles)', () => { + const raw = `---\nk: v\n---\n## A\n- a\n\`\`\`ts\nx\n\`\`\`\n| h |\n| - |\n| 1 |\n`; + const { ast } = parseMd(raw); + const serialized = JSON.stringify(ast); + const parsed = JSON.parse(serialized); + expect(parsed.raw).toBe(ast.raw); + expect(parsed.blocks.length).toBe(ast.blocks.length); + }); + + it('PD-07 emit is non-mutating', () => { + const raw = `## A\n- a\n`; + const { ast } = parseMd(raw); + const before = JSON.stringify(ast); + emitMd(ast); + emitMd(ast); + emitMd(ast); + expect(JSON.stringify(ast)).toBe(before); + }); + + it('PD-08 frontmatter ordering is preserved (insertion order, not alphabetical)', () => { + const raw = `---\nz: 1\nm: 2\na: 3\n---\n`; + const { ast } = parseMd(raw); + expect(ast.frontmatter.map((e) => e.key)).toEqual(['z', 'm', 'a']); + }); + + it('PD-09 block ordering is document order, not alphabetical', () => { + const raw = `## Z\n## A\n## M\n`; + const { ast } = parseMd(raw); + expect(ast.blocks.map((b) => b.heading)).toEqual(['Z', 'A', 'M']); + }); + + it('PD-10 item ordering within block is document order', () => { + const raw = `## H\n- z\n- a\n- m\n`; + const { ast } = parseMd(raw); + expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(['z', 'a', 'm']); + }); + + it('PD-11 large fixture round-trip stays under 100 ms', () => { + const lines: string[] = []; + for (let i = 0; i < 500; i++) { + lines.push(`## Section ${i}`); + lines.push(''); + for (let j = 0; j < 10; j++) { + lines.push(`- item-${i}-${j}: with some prose value content here`); + } + lines.push(''); + } + const raw = lines.join('\n'); + const start = performance.now(); + const { ast } = parseMd(raw); + const out = emitMd(ast); + const elapsed = performance.now() - start; + expect(out).toBe(raw); + expect(elapsed).toBeLessThan(100); + }); +}); diff --git a/src/oc-path/tests/scenarios/pitfalls.test.ts b/src/oc-path/tests/scenarios/pitfalls.test.ts new file mode 100644 index 00000000000..245c2dfabce --- /dev/null +++ b/src/oc-path/tests/scenarios/pitfalls.test.ts @@ -0,0 +1,624 @@ +/** + * Wave-23 — Pitfall scenarios. + * + * One test per pitfall ID enumerated in + * `packages/oc-paths-substrate/PITFALLS.md` (the substrate-local + * pitfall taxonomy). Tests are grouped by category so a regression in + * any one defense is visible at a glance. Every MITIGATED / REJECTED + * pitfall has a positive validation here; DEFERRED ones are covered + * as documented limits with a `.skip` note. + * + * **Namespace note**: substrate pitfall IDs (P-001 … P-040) are a + * separate namespace from the claws-side `docs/PITFALLS.md` + * governance taxonomy (which uses P-NNN for completely different + * pitfalls — e.g., P-033 there is "Memory poisoning"). The package + * boundary disambiguates. + */ +import { describe, expect, it } from 'vitest'; +import { + MAX_PATH_LENGTH, + MAX_TRAVERSAL_DEPTH, + OcPathError, + findOcPaths, + formatOcPath, + parseOcPath, + resolveOcPath, + setOcPath, +} from '../../index.js'; +import { parseJsonc } from '../../jsonc/parse.js'; +import { parseJsonl } from '../../jsonl/parse.js'; +import { parseYaml } from '../../yaml/parse.js'; + +// ---------- Encoding pitfalls -------------------------------------------- + +describe('wave-23 pitfalls — encoding', () => { + it('P-001 strips leading UTF-8 BOM from path string', () => { + const bom = ''; + expect(parseOcPath(`${bom}oc://X/Y`).file).toBe('X'); + }); + + it('P-002 normalizes path to NFC', () => { + const nfc = 'café'; // composed + const nfd = 'café'; // decomposed + expect(parseOcPath(`oc://X/${nfd}`).section).toBe(nfc); + expect(parseOcPath(`oc://X/${nfc}`).section).toBe(nfc); + // Same struct out for both inputs. + expect(parseOcPath(`oc://X/${nfd}`)).toEqual(parseOcPath(`oc://X/${nfc}`)); + }); + + it('P-003 rejects whitespace in identifier-shaped segments', () => { + expect(() => parseOcPath('oc://X/foo /bar')).toThrow(OcPathError); + expect(() => parseOcPath('oc://X/ foo')).toThrow(OcPathError); + expect(() => parseOcPath('oc://X/foo\tbar')).toThrow(OcPathError); + }); + + it('P-003 allows whitespace inside predicate values (content)', () => { + // Spaces inside a predicate value are legitimate — they're filtering + // against actual content. + expect(() => parseOcPath('oc://X/[name=hello world]')).not.toThrow(); + }); + + it('P-004 / P-011 rejects control characters and null bytes', () => { + expect(() => parseOcPath('oc://X/\x00')).toThrow(/Control character/); + expect(() => parseOcPath('oc://X/foo\x01bar')).toThrow(/Control character/); + expect(() => parseOcPath('oc://X/foo\x7Fbar')).toThrow(/Control character/); + }); +}); + +// ---------- Empty / structural pitfalls ---------------------------------- + +describe('wave-23 pitfalls — empty & structural', () => { + it('P-008 rejects empty segments', () => { + expect(() => parseOcPath('oc://X//Y')).toThrow(/Empty segment/); + }); + + it('P-009 rejects empty dotted sub-segments', () => { + expect(() => parseOcPath('oc://X/a..b')).toThrow(/Empty dotted sub-segment/); + }); + + it('P-010 rejects scheme-only path', () => { + expect(() => parseOcPath('oc://')).toThrow(/Empty oc:\/\/ path/); + }); + + it('P-014 rejects empty predicate key', () => { + expect(() => parseOcPath('oc://X/[=foo]')).toThrow(/Malformed predicate/); + }); + + it('P-014 rejects empty predicate value', () => { + expect(() => parseOcPath('oc://X/[id=]')).toThrow(/Malformed predicate/); + }); + + it('P-015 accepts bracket segment with no operator as literal sentinel', () => { + // `[frontmatter]` predates the predicate grammar — kept as literal. + expect(parseOcPath('oc://AGENTS.md/[frontmatter]/key').section).toBe('[frontmatter]'); + }); + + it('P-016 rejects mismatched brackets', () => { + expect(() => parseOcPath('oc://X/[unclosed')).toThrow(OcPathError); + expect(() => parseOcPath('oc://X/closed]')).toThrow(OcPathError); + }); + + it('P-016 rejects mismatched braces', () => { + expect(() => parseOcPath('oc://X/{a,b')).toThrow(OcPathError); + }); + + it('P-018 rejects empty union', () => { + expect(() => parseOcPath('oc://X/{}')).toThrow(/Empty union/); + }); + + it('P-018 rejects union with empty alternative', () => { + expect(() => parseOcPath('oc://X/{a,,b}')).toThrow(/Empty alternative/); + }); +}); + +// ---------- Predicate-content pitfalls ----------------------------------- + +describe('wave-23 pitfalls — predicate content', () => { + it('P-012 predicate value containing `/` round-trips', () => { + // The path-level `/` split must respect bracket boundaries. + const p = parseOcPath('oc://X/[id=foo/bar]/cmd'); + expect(p.section).toBe('[id=foo/bar]'); + expect(p.item).toBe('cmd'); + }); + + it('P-012 findOcPaths matches a leaf whose id contains a slash', () => { + const ast = parseYaml( + 'steps:\n - id: foo/bar\n cmd: x\n - id: baz\n cmd: y\n' + ).ast; + const out = findOcPaths(ast, parseOcPath('oc://wf/steps/[id=foo/bar]/cmd')); + expect(out).toHaveLength(1); + if (out[0].match.kind === 'leaf') {expect(out[0].match.valueText).toBe('x');} + }); + + it('P-013 predicate value containing `.` round-trips', () => { + const p = parseOcPath('oc://X/steps.[id=1.0].cmd'); + expect(p.section).toBe('steps.[id=1.0].cmd'); + }); + + it('P-013 findOcPaths matches a leaf whose id is `1.0`', () => { + const ast = parseYaml( + 'steps:\n - id: "1.0"\n cmd: x\n - id: "2.0"\n cmd: y\n' + ).ast; + const out = findOcPaths(ast, parseOcPath('oc://wf/steps/[id=1.0]/cmd')); + expect(out).toHaveLength(1); + if (out[0].match.kind === 'leaf') {expect(out[0].match.valueText).toBe('x');} + }); +}); + +// ---------- Sentinel & collision pitfalls -------------------------------- + +describe('wave-23 pitfalls — sentinels & collisions', () => { + it('P-020/openclaw#59934 negative numeric key on object resolves as literal key', () => { + // Telegram supergroup IDs are negative numbers used as map keys. + // Our positional `-N` token would otherwise hijack them. Resolver + // falls through to literal-key lookup on non-indexable containers. + const ast = parseJsonc( + '{"channels":{"telegram":{"groups":{"-5028303500":{"requireMention":false}}}}}' + ).ast; + const m = resolveOcPath( + ast, + parseOcPath('oc://config/channels.telegram.groups.-5028303500.requireMention'), + ); + expect(m).not.toBeNull(); + expect(m?.kind).toBe('leaf'); + if (m?.kind === 'leaf') { + expect(m.valueText).toBe('false'); + expect(m.leafType).toBe('boolean'); + } + }); + + it('P-020 negative `-N` still works as positional on arrays', () => { + // Same syntax, indexable container — positional resolution wins. + const ast = parseJsonc('{"items":[10,20,30]}').ast; + const m = resolveOcPath(ast, parseOcPath('oc://X/items/-1')); + expect(m?.kind).toBe('leaf'); + if (m?.kind === 'leaf') {expect(m.valueText).toBe('30');} + }); + + it('P-020 numeric segment dispatches by node kind (array index vs map key)', () => { + // Same path string against two different ASTs — kind disambiguates. + const arr = parseJsonc('{"x":["a","b"]}').ast; + const map = parseJsonc('{"x":{"0":"a","1":"b"}}').ast; + const arrM = resolveOcPath(arr, parseOcPath('oc://config/x/0')); + const mapM = resolveOcPath(map, parseOcPath('oc://config/x/0')); + expect(arrM?.kind).toBe('leaf'); + expect(mapM?.kind).toBe('leaf'); + if (arrM?.kind === 'leaf') {expect(arrM.valueText).toBe('a');} + if (mapM?.kind === 'leaf') {expect(mapM.valueText).toBe('a');} + }); + + it('P-021 `$last` literal in a yaml key is shadowed by positional sentinel', () => { + // Document v0 limitation: `$last` always means "last", never a literal key. + // Authors with `$last` literal keys must use kind-narrow access. + const ast = parseYaml('$last: literal-value\nfoo: bar\n').ast; + const m = resolveOcPath(ast, parseOcPath('oc://X/$last')); + // `$last` resolves to the LAST key (`foo` → `bar`), not the literal `$last` key. + expect(m?.kind).toBe('leaf'); + if (m?.kind === 'leaf') {expect(m.valueText).toBe('bar');} + }); +}); + +// ---------- Round-trip pitfalls ------------------------------------------ + +describe('wave-23 pitfalls — round-trip', () => { + it('P-023 parseOcPath ∘ formatOcPath is idempotent across path shapes', () => { + const inputs = [ + 'oc://X', + 'oc://X/a', + 'oc://X/a/b', + 'oc://X/a/b/c', + 'oc://X/a.b.c', + 'oc://X/a?session=s1', + 'oc://X/[frontmatter]/key', + 'oc://X/steps/*/command', + 'oc://X/steps/$last/id', + 'oc://X/steps/-2/id', + 'oc://X/steps/{command,run}', + 'oc://X/steps/[id=foo]/cmd', + 'oc://X/steps/#0/foo', + ]; + for (const s of inputs) { + const parsed = parseOcPath(s); + const reparsed = parseOcPath(s); + expect(parsed).toEqual(reparsed); + } + }); +}); + +// ---------- Sentinel-guard pitfalls -------------------------------------- + +describe('wave-23 pitfalls — sentinel at format boundary (F9)', () => { + it('formatOcPath rejects an OcPath struct carrying the redaction sentinel', () => { + // Path strings flow into telemetry, audit events, error messages, + // find-result `path` fields. Without the format-time guard, a + // struct with `section: REDACTED_SENTINEL` would slip past every + // consumer except the CLI's scrubSentinel layer. The substrate's + // contract is "emit boundaries refuse the sentinel" — formatOcPath + // IS such a boundary for path strings. + expect(() => + formatOcPath({ file: 'AGENTS.md', section: '__OPENCLAW_REDACTED__' }), + ).toThrow(/sentinel literal/); + }); +}); + +// ---------- Containment pitfalls ----------------------------------------- + +describe('wave-23 pitfalls — file-slot containment', () => { + // oc:// paths are workspace-relative. Absolute paths and `..` segments + // would let a hostile workflow / skill manifest persuade + // `openclaw path resolve|set|emit` into reading or writing arbitrary + // filesystem locations (Node `path.resolve(cwd, absolute)` returns + // `absolute`, bypassing the workspace root). Reject at parseOcPath + // and formatOcPath for symmetric defense. + it('rejects an absolute POSIX file slot', () => { + expect(() => parseOcPath('oc:///etc/passwd')).toThrow(/Empty segment/); + // Quoted form — same containment violation, different parse path. + expect(() => parseOcPath('oc://"/etc/passwd"/section')).toThrow(/Absolute file slot/); + }); + + it('rejects a Windows drive-letter file slot', () => { + expect(() => parseOcPath('oc://"C:/Windows/System32/foo"/section')).toThrow( + /Absolute file slot/, + ); + expect(() => parseOcPath('oc://"C:\\\\Windows\\\\System32"/section')).toThrow( + /Absolute file slot/, + ); + }); + + it('rejects a leading-backslash file slot', () => { + expect(() => parseOcPath('oc://"\\\\srv\\\\share\\\\foo"/section')).toThrow( + /Absolute file slot/, + ); + }); + + it('rejects a parent-directory escape via plain `..`', () => { + expect(() => parseOcPath('oc://"../foo"/section')).toThrow(/Parent-directory/); + expect(() => parseOcPath('oc://".."/section')).toThrow(/Parent-directory/); + }); + + it('rejects a parent-directory escape mid-path', () => { + expect(() => parseOcPath('oc://"foo/../bar"/section')).toThrow(/Parent-directory/); + }); + + it('does not decode URL-encoded `..` — literal `%2E%2E` is treated as a filename', () => { + // The substrate does NOT do URL decoding — `%2E%2E` is the literal + // five-character filename, not a parent-directory escape. Documented + // limitation: consumers that pre-decode (HTTP layers, browser UI) + // are responsible for normalizing before invoking parseOcPath. + // Pin the current behavior so a future "let's decode for them" PR + // sees the explicit choice. + const p = parseOcPath('oc://"%2E%2E/foo"/section'); + expect(p.file).toBe('%2E%2E/foo'); + }); + + it('formatOcPath rejects an OcPath struct with absolute file', () => { + expect(() => formatOcPath({ file: '/etc/passwd' })).toThrow(/Absolute file slot/); + expect(() => formatOcPath({ file: 'C:/Windows' })).toThrow(/Absolute file slot/); + }); + + it('formatOcPath rejects an OcPath struct with parent-directory file', () => { + expect(() => formatOcPath({ file: '..' })).toThrow(/Parent-directory/); + expect(() => formatOcPath({ file: '../etc/passwd' })).toThrow(/Parent-directory/); + expect(() => formatOcPath({ file: 'foo/../bar' })).toThrow(/Parent-directory/); + }); +}); + +// ---------- formatOcPath ↔ parseOcPath round-trip ------------------------ + +describe('wave-23 pitfalls — format/parse round-trip', () => { + // The contract on oc-path.ts:13 — `formatOcPath(parseOcPath(s)) === s` + // for any string the formatter accepts. Round-trip breaks were + // observable on (a) struct fields with empty dotted sub-segments + // (`section: 'foo.'` → `oc://X/foo.""` → re-parses with `section: + // 'foo.""'`) and (b) struct fields with control chars (formatter + // emitted unquoted, parser refused). Pin both directions. + it('formatOcPath rejects empty dotted sub-segment in a slot', () => { + expect(() => formatOcPath({ file: 'a.md', section: 'foo.' })).toThrow( + /Empty dotted sub-segment/, + ); + expect(() => formatOcPath({ file: 'a.md', section: '.foo' })).toThrow( + /Empty dotted sub-segment/, + ); + expect(() => formatOcPath({ file: 'a.md', section: 'foo..bar' })).toThrow( + /Empty dotted sub-segment/, + ); + }); + + it('formatOcPath rejects control characters in any slot', () => { + expect(() => formatOcPath({ file: 'a.md', section: 'sec\x00tion' })).toThrow( + /Control character/, + ); + expect(() => formatOcPath({ file: 'a.md', section: 'sec\x01tion' })).toThrow( + /Control character/, + ); + expect(() => formatOcPath({ file: 'a.md', section: 'tab\ttion' })).toThrow( + /Control character/, + ); + expect(() => formatOcPath({ file: 'a\x00b.md' })).toThrow(/Control character/); + }); + + it('round-trips every shape parseOcPath accepts', () => { + // For every valid input, formatOcPath(parseOcPath(s)) MUST be + // re-parseable to the same struct. Don't string-compare (the + // formatter normalizes quoting); parse the round-tripped output + // and compare structs. + const inputs = [ + 'oc://X', + 'oc://X/a', + 'oc://X/a/b', + 'oc://X/a/b/c', + 'oc://X/a.b.c', + 'oc://X/a?session=s1', + 'oc://X/[frontmatter]/key', + 'oc://X/steps/$last/id', + 'oc://X/steps/-2/id', + 'oc://X/steps/[id=foo]/cmd', + 'oc://X/steps/{a,b}/cmd', + 'oc://X/"foo/bar"/baz', + 'oc://X/agents/"anthropic/claude-opus-4-7"/alias', + ]; + for (const s of inputs) { + const parsed = parseOcPath(s); + const formatted = formatOcPath(parsed); + const reparsed = parseOcPath(formatted); + expect(reparsed).toEqual(parsed); + } + }); +}); + +// ---------- Performance pitfalls ----------------------------------------- + +describe('wave-23 pitfalls — performance & limits', () => { + it('P-031 / P-033 walker depth cap throws on pathological recursion', () => { + // Construct a yaml that nests deeper than MAX_TRAVERSAL_DEPTH. + // We're using `**` against a synthetic deeply-nested structure. + let yaml = 'root:\n'; + let indent = ' '; + for (let i = 0; i < MAX_TRAVERSAL_DEPTH + 50; i++) { + yaml += `${indent}a:\n`; + indent += ' '; + } + yaml += `${indent}leaf: x\n`; + const ast = parseYaml(yaml).ast; + expect(() => findOcPaths(ast, parseOcPath('oc://X/**'))).toThrow(/MAX_TRAVERSAL_DEPTH/); + }); + + it('P-032 rejects path strings longer than MAX_PATH_LENGTH', () => { + const big = 'oc://X/' + 'a'.repeat(MAX_PATH_LENGTH); + expect(() => parseOcPath(big)).toThrow(/exceeds .* bytes/); + }); + + it('P-032 path at the cap parses cleanly', () => { + const justUnder = 'oc://X/' + 'a'.repeat(MAX_PATH_LENGTH - 'oc://X/'.length); + expect(() => parseOcPath(justUnder)).not.toThrow(); + }); + + it('P-032 formatOcPath enforces the same cap on output', () => { + // Symmetric upper bound — without this guard, a struct whose + // formatted form crosses the cap would emit a string parseOcPath + // would immediately reject (round-trip break). + expect(() => + formatOcPath({ file: 'X', section: 'a'.repeat(MAX_PATH_LENGTH) }), + ).toThrow(/Formatted oc:\/\/ exceeds/); + }); + + it('parser depth cap fires on pathological JSONC nesting (F6)', () => { + // Without `MAX_PARSE_DEPTH`, pathological input like + // `'['.repeat(20000) + '0' + ']'.repeat(20000)` triggers a V8 + // RangeError ("Maximum call stack size exceeded") that escapes + // commander as a raw stringified error — no `OcEmitSentinelError`- + // style structured catch. Pin the structured-diagnostic path: + // parser must surface OC_JSONC_DEPTH_EXCEEDED, not bare RangeError. + const open = '['.repeat(MAX_TRAVERSAL_DEPTH + 100); + const close = ']'.repeat(MAX_TRAVERSAL_DEPTH + 100); + const raw = `${open}0${close}`; + const result = parseJsonc(raw); + expect(result.ast.root).toBeNull(); + expect( + result.diagnostics.some((d) => d.code === 'OC_JSONC_DEPTH_EXCEEDED'), + ).toBe(true); + }); + + it('parser depth cap fires on JSONL line with deeply-nested JSON (F6)', () => { + // Per-line parseJsonc dispatch carries the same protection — each + // value line is parsed in isolation and gets its own depth cap. + // The line surfaces as `kind: 'malformed'` with the depth diagnostic. + let nested = '"x"'; + for (let i = 0; i < MAX_TRAVERSAL_DEPTH + 50; i++) { + nested = `{"a":${nested}}`; + } + const { diagnostics } = parseJsonl(nested + '\n'); + // The line-level diagnostic is OC_JSONL_LINE_MALFORMED (line failed); + // we don't promote OC_JSONC_DEPTH_EXCEEDED through the JSONL layer + // but the malformed-line detection prevents stack-overflow escape. + expect(diagnostics.some((d) => d.code === 'OC_JSONL_LINE_MALFORMED')).toBe(true); + }); +}); + +// ---------- Coercion pitfalls -------------------------------------------- + +describe('wave-23 pitfalls — coercion', () => { + it('P-029 numeric coercion is locale-independent', () => { + // `Number()` doesn't honor locale; `parseFloat` doesn't either in + // practice, but we never use `parseFloat`. Verify `Number("1,5")` + // returns NaN (which is rejected) and `"1.5"` returns 1.5. + const ast = parseJsonc('{"x":1.0}').ast; + const r1 = setOcPath(ast, parseOcPath('oc://X/x'), '1.5'); + expect(r1.ok).toBe(true); + const r2 = setOcPath(ast, parseOcPath('oc://X/x'), '1,5'); + expect(r2.ok).toBe(false); + if (!r2.ok) {expect(r2.reason).toBe('parse-error');} + }); + + it('P-030 boolean coercion is exact-match lowercase', () => { + const ast = parseJsonc('{"x":true}').ast; + expect(setOcPath(ast, parseOcPath('oc://X/x'), 'false').ok).toBe(true); + expect(setOcPath(ast, parseOcPath('oc://X/x'), 'False').ok).toBe(false); + expect(setOcPath(ast, parseOcPath('oc://X/x'), 'TRUE').ok).toBe(false); + expect(setOcPath(ast, parseOcPath('oc://X/x'), 'yes').ok).toBe(false); + }); +}); + +// ---------- Reserved character pitfalls ---------------------------------- + +describe('wave-23 pitfalls — reserved characters', () => { + it('P-026 rejects `?` outside the query separator position', () => { + // `?` triggers the query split. `oc://X/foo?session=s` is fine + // (legitimate query). But `?` *inside* a segment after the query + // section is consumed isn't a normal use case — the parser treats + // the first `?` as the query split. + expect(parseOcPath('oc://X/foo?session=s').section).toBe('foo'); + // Empty key after `?` (no `=`): query parser silently ignores. + expect(() => parseOcPath('oc://X/foo?')).not.toThrow(); + }); + + it('P-040 negative-index magnitude is bounded', () => { + // Out-of-range negative index → null at resolve time, not crash. + const ast = parseJsonc('{"x":[1,2,3]}').ast; + expect(resolveOcPath(ast, parseOcPath('oc://X/x/-9999999999'))).toBeNull(); + expect(resolveOcPath(ast, parseOcPath('oc://X/x/-1'))?.kind).toBe('leaf'); + }); +}); + +// ---------- Sentinel-redaction pitfall (P-036) --------------------------- + +describe('wave-23 pitfalls — redaction sentinel', () => { + // P-036 is fully covered by wave-21-sentinel-cross-kind. This is a + // smoke test asserting the link is intact. + it('P-036 sentinel guard activates at emit time (covered by wave-21)', () => { + expect(true).toBe(true); + }); +}); + +// ---------- DEFERRED — documented limits --------------------------------- + +describe('wave-23 pitfalls — deferred (v0 limits)', () => { + it.skip('P-005 slash literal in key — v1: quoted segments', () => {}); + it.skip('P-006 dot literal in key — v1: quoted segments', () => {}); + it.skip('P-017 nested unions {a,{b,c}} — v1: parser stack', () => {}); + it.skip('P-019 wildcard inside wildcard — v1: pattern composition', () => {}); + it.skip('P-025 leading-zero numeric `01` — v1: explicit form', () => {}); + it.skip('P-027 `&` in segments — v1: percent-encoding', () => {}); + it.skip('P-028 percent-encoded segments — v1: rfc3986 layer', () => {}); + it.skip('P-034 ast mutation between resolve & consume — caller invariant', () => {}); + it.skip('P-035 stale paths from prior find — caller invariant', () => {}); +}); + +// ---------- Injection pitfalls (C12 / W12) ------------------------------- + +describe('wave-23 pitfalls — injection (caller-supplied hostile input)', () => { + // P-037: a hostile path string. The substrate's job is to either + // parse safely or reject with `OcPathError` — never let undefined + // behavior leak. These cases lock the rejection-or-safe contract. + + it('P-037a control characters in path body are rejected', () => { + expect(() => parseOcPath('oc://a\x00b')).toThrow(OcPathError); + expect(() => parseOcPath('oc://a\x01b/c')).toThrow(OcPathError); + expect(() => parseOcPath('oc://a/b\x1Fc')).toThrow(OcPathError); + }); + + it('P-037b NUL byte anywhere in path is rejected', () => { + expect(() => parseOcPath('oc://X.md/sec\x00tion')).toThrow(OcPathError); + }); + + it('P-037c BOM at start of path is stripped, not interpreted', () => { + // BOM is unicode U+FEFF (0xFEFF). The substrate strips it before + // scheme check; without stripping, the BOM-prefixed string would + // fail the `oc://` scheme test. + const path = parseOcPath('oc://X.md/section'); + expect(path.file).toBe('X.md'); + expect(path.section).toBe('section'); + }); + + it('P-037d session query is parsed only via the documented `?session=...` form', () => { + // Legal session form parses cleanly. + const ok = parseOcPath('oc://X.md/sec?session=cron:daily'); + expect(ok.section).toBe('sec'); + expect(ok.session).toBe('cron:daily'); + // Substrate is lenient about loose `?garbage` — caller's + // responsibility to construct paths from `formatOcPath`. Confirm + // the loose form does NOT silently invent a session value. + const loose = parseOcPath('oc://X.md/sec?garbage'); + expect(loose.session).toBeUndefined(); + }); + + it('P-037e unescaped `&` in segments is rejected', () => { + expect(() => parseOcPath('oc://X.md/a&b')).toThrow(OcPathError); + }); + + it('P-037f unescaped `%` in segments is rejected', () => { + expect(() => parseOcPath('oc://X.md/a%b')).toThrow(OcPathError); + }); + + it('P-037g empty file slot is rejected', () => { + expect(() => parseOcPath('oc:///section')).toThrow(OcPathError); + }); + + it('P-037h backslash-escape attempts are not treated as path traversal', () => { + // No special meaning — the literal backslash is just a regular + // character. Doesn't allow escaping forward slashes. + expect(() => parseOcPath('oc://X.md/a\\../b')).toThrow(OcPathError); + }); + + // P-038: predicate-value injection. `[k=v]` predicates filter + // matches; a hostile `v` containing regex metachars, brackets, or + // operators must NOT escape the predicate scope or be interpreted + // as a regex. + + it('P-038a regex metacharacters in predicate value match literally', () => { + const ast = parseJsonc('{ "items": [ {"name": "a.*"}, {"name": "abc"} ] }').ast; + // Looking for the literal string "a.*" — should match only the + // first item, not "abc" (which would match if `.*` were treated + // as a regex). + const matches = findOcPaths(ast, parseOcPath('oc://X.jsonc/items/[name=a.*]')); + expect(matches).toHaveLength(1); + }); + + it('P-038b nested-bracket attempts in predicate value are kept literal', () => { + // The substrate is permissive on nested brackets — they're part + // of the literal predicate value, not interpreted as path syntax. + // The match would be against the literal string "a[b]"; a + // resolver that finds zero matches fails closed. + const path = parseOcPath('oc://X.jsonc/items/[name=a[b]]'); + expect(path.item).toBe('[name=a[b]]'); + // No data has the literal value `a[b]` here, so finding empty. + const ast = parseJsonc('{ "items": [ {"name": "abc"} ] }').ast; + expect(findOcPaths(ast, path)).toHaveLength(0); + }); + + it('P-038c equals-sign in predicate value is treated as part of the value', () => { + // The FIRST `=` separates key from value; subsequent `=`s belong + // to the value. The rule keeps the predicate parser simple — + // operators that prefix-match (`!=`, `<=`, `>=`) are tried + // before `=`, then `=` consumes the rest. + const ast = parseJsonc('{ "items": [ {"k": "a=b"}, {"k": "c"} ] }').ast; + const matches = findOcPaths(ast, parseOcPath('oc://X.jsonc/items/[k=a=b]')); + expect(matches).toHaveLength(1); + }); + + it('P-038d control characters in predicate value are rejected', () => { + expect(() => parseOcPath('oc://X.jsonc/items/[k=a\x00b]')).toThrow(OcPathError); + }); + + it('P-038e empty predicate body is rejected', () => { + expect(() => parseOcPath('oc://X.jsonc/items/[]')).toThrow(OcPathError); + }); + + it('P-038f predicate-shaped bracket without operator is treated as literal sentinel', () => { + // `[name]` without `=` is parsed as a literal-bracket sentinel + // (e.g. `[frontmatter]`-style). The substrate accepts it as a + // literal path segment — predicate parsing only kicks in when an + // operator is present. Document this to lock the behavior. + const path = parseOcPath('oc://X.jsonc/items/[name]'); + expect(path.item).toBe('[name]'); + }); + + it('P-038g predicate-shaped bracket with unsupported operator parses as literal', () => { + // `~` isn't in the supported-operator set; the parser doesn't + // recognize it as a predicate, so it's accepted as a literal + // bracket segment. This is the documented v1.1 behavior — a + // future version may add `~` (regex) and bump SDK_VERSION. + const path = parseOcPath('oc://X.jsonc/items/[k~v]'); + expect(path.item).toBe('[k~v]'); + }); +}); diff --git a/src/oc-path/tests/scenarios/real-world-fixtures.test.ts b/src/oc-path/tests/scenarios/real-world-fixtures.test.ts new file mode 100644 index 00000000000..f633d08fa66 --- /dev/null +++ b/src/oc-path/tests/scenarios/real-world-fixtures.test.ts @@ -0,0 +1,140 @@ +/** + * Wave 12 — real-world fixtures. + * + * Eight workspace files (one per upstream-recognized workspace + * filename) — each parsed, resolved, and round-tripped to verify the + * substrate handles realistic content. + */ +import { readFileSync } from 'node:fs'; +import { fileURLToPath } from 'node:url'; +import { join, dirname } from 'node:path'; +import { describe, expect, it } from 'vitest'; +import { emitMd } from '../../emit.js'; +import { parseMd } from '../../parse.js'; +import { resolveMdOcPath as resolveOcPath } from '../../resolve.js'; + +const HERE = dirname(fileURLToPath(import.meta.url)); +const FIXTURES = join(HERE, '..', 'fixtures', 'real'); + +function load(name: string): string { + return readFileSync(join(FIXTURES, name), 'utf-8'); +} + +describe('wave-12 real-world-fixtures', () => { + it('F-01 SOUL.md parses + round-trips', () => { + const raw = load('SOUL.md'); + const { ast, diagnostics } = parseMd(raw); + expect(diagnostics).toEqual([]); + expect(emitMd(ast)).toBe(raw); + // Has at least one H2 block. + expect(ast.blocks.length).toBeGreaterThan(0); + }); + + it('F-02 AGENTS.md parses + resolves Tools section', () => { + const raw = load('AGENTS.md'); + const { ast } = parseMd(raw); + expect(emitMd(ast)).toBe(raw); + const tools = resolveOcPath(ast, { file: 'AGENTS.md', section: 'tools' }); + expect(tools?.kind).toBe('block'); + if (tools?.kind === 'block') { + expect(tools.node.items.some((i) => i.kv?.key === 'gh')).toBe(true); + } + }); + + it('F-03 MEMORY.md frontmatter scope resolves via [frontmatter]', () => { + const raw = load('MEMORY.md'); + const { ast } = parseMd(raw); + expect(emitMd(ast)).toBe(raw); + const scope = resolveOcPath(ast, { + file: 'MEMORY.md', + section: '[frontmatter]', + field: 'scope', + }); + expect(scope?.kind).toBe('frontmatter'); + if (scope?.kind === 'frontmatter') {expect(scope.node.value).toBe('project');} + }); + + it('F-04 TOOLS.md table extracted from Tool Guidance section', () => { + const raw = load('TOOLS.md'); + const { ast } = parseMd(raw); + expect(emitMd(ast)).toBe(raw); + const guidance = resolveOcPath(ast, { + file: 'TOOLS.md', + section: 'tool-guidance', + }); + expect(guidance?.kind).toBe('block'); + if (guidance?.kind === 'block') { + expect(guidance.node.tables.length).toBeGreaterThan(0); + expect(guidance.node.tables[0]?.headers).toEqual(['tool', 'guidance']); + } + }); + + it('F-05 IDENTITY.md sections resolvable by slug', () => { + const raw = load('IDENTITY.md'); + const { ast } = parseMd(raw); + expect(emitMd(ast)).toBe(raw); + const trust = resolveOcPath(ast, { + file: 'IDENTITY.md', + section: 'trust-level', + }); + expect(trust?.kind).toBe('block'); + }); + + it('F-06 USER.md Preferences items extracted', () => { + const raw = load('USER.md'); + const { ast } = parseMd(raw); + expect(emitMd(ast)).toBe(raw); + const prefs = resolveOcPath(ast, { + file: 'USER.md', + section: 'preferences', + }); + expect(prefs?.kind).toBe('block'); + if (prefs?.kind === 'block') { + expect(prefs.node.items.length).toBeGreaterThan(0); + } + }); + + it('F-07 HEARTBEAT.md schedules — H2 sections as triggers', () => { + const raw = load('HEARTBEAT.md'); + const { ast } = parseMd(raw); + expect(emitMd(ast)).toBe(raw); + expect(ast.blocks.length).toBeGreaterThanOrEqual(3); + const slugs = ast.blocks.map((b) => b.slug); + expect(slugs).toContain('every-30m-wake'); + expect(slugs).toContain('every-4h-wake'); + }); + + it('F-08 SKILL.md frontmatter has name + description + tier', () => { + const raw = load('SKILL.md'); + const { ast } = parseMd(raw); + expect(emitMd(ast)).toBe(raw); + const fmKeys = ast.frontmatter.map((e) => e.key); + expect(fmKeys).toContain('name'); + expect(fmKeys).toContain('description'); + expect(fmKeys).toContain('tier'); + }); + + it('F-09 BOOTSTRAP.md round-trips', () => { + const raw = load('BOOTSTRAP.md'); + const { ast } = parseMd(raw); + expect(emitMd(ast)).toBe(raw); + }); + + it('F-10 all 8 fixtures combined round-trip-clean (sanity)', () => { + const names = [ + 'SOUL.md', + 'AGENTS.md', + 'MEMORY.md', + 'TOOLS.md', + 'IDENTITY.md', + 'USER.md', + 'HEARTBEAT.md', + 'SKILL.md', + 'BOOTSTRAP.md', + ]; + for (const name of names) { + const raw = load(name); + expect(emitMd(parseMd(raw).ast), `${name} failed round-trip`).toBe(raw); + } + }); +}); diff --git a/src/oc-path/tests/scenarios/roundtrip-property.test.ts b/src/oc-path/tests/scenarios/roundtrip-property.test.ts new file mode 100644 index 00000000000..7338c15d046 --- /dev/null +++ b/src/oc-path/tests/scenarios/roundtrip-property.test.ts @@ -0,0 +1,155 @@ +/** + * Wave 10 — round-trip property tests. + * + * Substrate guarantee: `emitMd(parse(raw)) === raw` for all inputs the + * parser accepts. This wave exercises that property over a generated + * corpus of synthetic markdown shapes and verifies parser idempotence + * (`parse(emitMd(parse(raw))) === parse(raw)` modulo `raw`). + */ +import { describe, expect, it } from 'vitest'; +import { emitMd } from '../../emit.js'; +import { parseMd } from '../../parse.js'; + +function roundTrip(raw: string): string { + return emitMd(parseMd(raw).ast); +} + +describe('wave-10 roundtrip-property', () => { + it('RT-01 byte-fidelity over 100 generated shapes', () => { + const inputs = generateCorpus(100); + for (const raw of inputs) { + try { + expect(roundTrip(raw)).toBe(raw); + } catch (e) { + // Surface which input failed for debugging. + throw new Error( + `round-trip failed for input (length ${raw.length}):\n${JSON.stringify(raw.slice(0, 200))}\nError: ${(e as Error).message}`, { cause: e }, + ); + } + } + }); + + it('RT-02 parser idempotence (parse → emit → parse → identical AST shape)', () => { + const inputs = generateCorpus(50); + for (const raw of inputs) { + const a = parseMd(raw).ast; + const a2 = parseMd(emitMd(a)).ast; + // Compare structural fields; raw will of course be identical. + expect(a2.frontmatter).toEqual(a.frontmatter); + expect(a2.preamble).toEqual(a.preamble); + expect(a2.blocks.map(stripDerived)).toEqual(a.blocks.map(stripDerived)); + } + }); + + it('RT-03 stable output for identical input', () => { + const raw = `---\nname: x\n---\n\n## A\n- a\n## B\n- b: c\n`; + const out1 = roundTrip(raw); + const out2 = roundTrip(raw); + const out3 = roundTrip(raw); + expect(out1).toBe(out2); + expect(out2).toBe(out3); + }); + + it('RT-04 ordering deterministic (no Object.keys / Set ordering surprises)', () => { + const raw = `---\nb: 2\na: 1\nc: 3\n---\n## Z\n- z\n## A\n- a\n`; + const a1 = parseMd(raw).ast; + const a2 = parseMd(raw).ast; + expect(a1.frontmatter.map((e) => e.key)).toEqual(a2.frontmatter.map((e) => e.key)); + expect(a1.blocks.map((b) => b.heading)).toEqual(a2.blocks.map((b) => b.heading)); + }); + + it('RT-05 round-trip preserves comment-like lines (no comment recognition at substrate)', () => { + const raw = `## H\n\n\n- bullet\n`; + expect(roundTrip(raw)).toBe(raw); + }); + + it('RT-06 round-trip preserves indented blocks (substrate doesn\'t reflow)', () => { + const raw = `## H\n\n indented code-ish block\n more indented\n`; + expect(roundTrip(raw)).toBe(raw); + }); + + it('RT-07 round-trip preserves blockquotes', () => { + const raw = `## H\n\n> quoted line 1\n> quoted line 2\n`; + expect(roundTrip(raw)).toBe(raw); + }); + + it('RT-08 round-trip preserves images / links', () => { + const raw = `## H\n\n![alt](path/to/img.png)\n[link](http://example.com)\n`; + expect(roundTrip(raw)).toBe(raw); + }); + + it('RT-09 round-trip preserves HTML', () => { + const raw = `## H\n\n
xbody
\n`; + expect(roundTrip(raw)).toBe(raw); + }); + + it('RT-10 round-trip preserves consecutive headings with no body between', () => { + const raw = `## A\n## B\n## C\n`; + expect(roundTrip(raw)).toBe(raw); + }); +}); + +// ---------- corpus generator ------------------------------------------------- + +function generateCorpus(count: number): string[] { + const corpus: string[] = []; + // Deterministic seed so flaky failures don't surface differently each run. + let seed = 42; + const rand = () => { + seed = (seed * 1664525 + 1013904223) % 2 ** 32; + return seed / 2 ** 32; + }; + const choose = (arr: readonly T[]): T => arr[Math.floor(rand() * arr.length)]; + + const headings = ['Boundaries', 'Tools', 'Memory', 'Identity', 'User', 'Heartbeat', 'Skills']; + const fmKeys = ['name', 'description', 'tier', 'enabled', 'timeout', 'url']; + const fmValues = ['github', 'gh CLI', 'T1', 'true', '15000', 'https://example.com']; + const itemTexts = ['never write to /etc', 'always confirm', 'gh: GitHub CLI', 'curl: HTTP']; + const eols = ['\n', '\r\n']; + + for (let i = 0; i < count; i++) { + const eol = choose(eols); + const parts: string[] = []; + + if (rand() < 0.5) { + parts.push('---'); + const fmCount = Math.floor(rand() * 4); + for (let k = 0; k < fmCount; k++) { + parts.push(`${choose(fmKeys)}: ${choose(fmValues)}`); + } + parts.push('---'); + parts.push(''); + } + + if (rand() < 0.3) { + parts.push('Some preamble.'); + parts.push(''); + } + + const blockCount = Math.floor(rand() * 3) + 1; + for (let b = 0; b < blockCount; b++) { + parts.push(`## ${choose(headings)}`); + parts.push(''); + const itemCount = Math.floor(rand() * 4); + for (let it = 0; it < itemCount; it++) { + parts.push(`- ${choose(itemTexts)}`); + } + if (rand() < 0.2) { + parts.push('```'); + parts.push('code'); + parts.push('```'); + } + parts.push(''); + } + + corpus.push(parts.join(eol)); + } + return corpus; +} + +function stripDerived(b: { heading: string; slug: string; bodyText: string }): { + heading: string; + slug: string; +} { + return { heading: b.heading, slug: b.slug }; +} diff --git a/src/oc-path/tests/scenarios/sentinel-cross-kind.test.ts b/src/oc-path/tests/scenarios/sentinel-cross-kind.test.ts new file mode 100644 index 00000000000..5c247efbbd5 --- /dev/null +++ b/src/oc-path/tests/scenarios/sentinel-cross-kind.test.ts @@ -0,0 +1,177 @@ +/** + * Wave 21 — sentinel guard across all 3 kinds. + * + * Substrate guarantee: emit refuses to write a CALLER-INJECTED + * `__OPENCLAW_REDACTED__` literal. Round-trip mode trusts parsed bytes + * (a workspace file legitimately containing the sentinel — in a code + * block, in a pasted error log — would otherwise become a workspace- + * wide emit DoS). Render mode walks every leaf, so a caller-injected + * sentinel via `setOcPath` always fails. Callers that want strict + * pre-existing-byte detection (e.g., LKG fingerprint verification) + * opt in via `acceptPreExistingSentinel: false`. + */ +import { describe, expect, it } from 'vitest'; +import { setJsoncOcPath } from '../../jsonc/edit.js'; +import { emitMd } from '../../emit.js'; +import { emitJsonc } from '../../jsonc/emit.js'; +import { parseJsonc } from '../../jsonc/parse.js'; +import { emitJsonl } from '../../jsonl/emit.js'; +import { parseJsonl } from '../../jsonl/parse.js'; +import { parseOcPath } from '../../oc-path.js'; +import { parseMd } from '../../parse.js'; +import { + OcEmitSentinelError, + REDACTED_SENTINEL, +} from '../../sentinel.js'; + +describe('wave-21 sentinel guard cross-kind', () => { + it('S-01 jsonc round-trip echoes safely when raw contains pre-existing sentinel', () => { + // Pre-existing sentinel bytes are trusted — see emit-policy comment + // in jsonc/emit.ts. The strict mode below is the opt-in path for + // callers who want LKG-style fingerprint verification. + const raw = `{ "x": "${REDACTED_SENTINEL}" }`; + const ast = parseJsonc(raw).ast; + expect(emitJsonc(ast)).toBe(raw); + // Strict mode still rejects pre-existing sentinel for callers who + // explicitly opt in. + expect(() => emitJsonc(ast, { acceptPreExistingSentinel: false })).toThrow( + OcEmitSentinelError, + ); + }); + + it('S-02 jsonl round-trip echoes safely; strict mode rejects', () => { + const raw = `{"x":"${REDACTED_SENTINEL}"}\n`; + const ast = parseJsonl(raw).ast; + expect(emitJsonl(ast)).toBe(raw); + expect(() => emitJsonl(ast, { acceptPreExistingSentinel: false })).toThrow( + OcEmitSentinelError, + ); + }); + + it('S-03 md round-trip echoes safely; strict mode rejects', () => { + const raw = `## Body\n\n- ${REDACTED_SENTINEL}\n`; + const ast = parseMd(raw).ast; + expect(emitMd(ast)).toBe(raw); + expect(() => emitMd(ast, { acceptPreExistingSentinel: false })).toThrow( + OcEmitSentinelError, + ); + }); + + it('S-04 jsonc render mode walks every leaf for sentinel', () => { + const ast = parseJsonc('{ "x": "ok" }').ast; + const tampered = { + ...ast, + root: { + kind: 'object' as const, + entries: [ + { + key: 'x', + line: 1, + value: { kind: 'string' as const, value: REDACTED_SENTINEL }, + }, + ], + }, + }; + expect(() => emitJsonc(tampered, { mode: 'render' })).toThrow( + OcEmitSentinelError, + ); + }); + + it('S-05 jsonl render mode walks every value-line leaf', () => { + const ast = parseJsonl('{"a":"ok"}\n').ast; + const tampered = { + ...ast, + lines: [ + { + kind: 'value' as const, + line: 1, + raw: '{"a":"ok"}', + value: { + kind: 'object' as const, + entries: [ + { + key: 'a', + line: 1, + value: { kind: 'string' as const, value: REDACTED_SENTINEL }, + }, + ], + }, + }, + ], + }; + expect(() => emitJsonl(tampered, { mode: 'render' })).toThrow( + OcEmitSentinelError, + ); + }); + + it('S-06 setJsoncOcPath itself throws when the new value contains the sentinel', () => { + // The substrate guard fires at write-time: setJsoncOcPath rebuilds + // raw via render mode emit, which scans every leaf. Defense-in-depth + // — even if a caller forgets to call emit afterward, the sentinel + // can't make it into an in-memory AST that pretends to be valid. + const ast = parseJsonc('{ "x": "ok" }').ast; + expect(() => + setJsoncOcPath(ast, parseOcPath('oc://config/x'), { + kind: 'string', + value: REDACTED_SENTINEL, + }), + ).toThrow(OcEmitSentinelError); + }); + + it('S-07 sentinel embedded in deep nesting — render mode catches the leaf', () => { + // Round-trip echoes the pre-existing bytes (the workspace contract: + // a parsed file containing the sentinel as data is not "writing" it + // on emit). Render mode walks every leaf and rejects this caller- + // injected pattern — and a `setOcPath` followed by emit lands here. + const raw = JSON.stringify({ a: { b: { c: REDACTED_SENTINEL } } }); + const ast = parseJsonc(raw).ast; + expect(emitJsonc(ast)).toBe(raw); // round-trip echo + expect(() => emitJsonc(ast, { mode: 'render' })).toThrow(OcEmitSentinelError); + }); + + it('S-08 sentinel inside an array element triggers guard in render mode', () => { + const raw = JSON.stringify({ arr: ['ok', REDACTED_SENTINEL, 'ok'] }); + const ast = parseJsonc(raw).ast; + expect(() => emitJsonc(ast, { mode: 'render' })).toThrow(OcEmitSentinelError); + }); + + it('S-09 sentinel as object key in raw — strict mode catches it', () => { + const raw = `{ "${REDACTED_SENTINEL}": 1 }`; + const ast = parseJsonc(raw).ast; + expect(emitJsonc(ast)).toBe(raw); // default-mode echo + expect(() => emitJsonc(ast, { acceptPreExistingSentinel: false })).toThrow( + OcEmitSentinelError, + ); + }); + + it('S-10 sentinel in jsonl malformed line — strict mode catches it', () => { + const raw = `${REDACTED_SENTINEL}\n`; + const ast = parseJsonl(raw).ast; + expect(emitJsonl(ast)).toBe(raw); // round-trip echoes verbatim + expect(() => emitJsonl(ast, { acceptPreExistingSentinel: false })).toThrow( + OcEmitSentinelError, + ); + }); + + it('S-11 partial sentinel substring does NOT trigger guard', () => { + const raw = '{ "x": "OPENCLAW_REDACTED" }'; + const ast = parseJsonc(raw).ast; + expect(() => emitJsonc(ast)).not.toThrow(); + }); + + it('S-12 sentinel guard error message includes the OcPath context (render mode)', () => { + // Render mode is the path that actually rejects caller-injected + // sentinel — round-trip just echoes, so the error context surfaces + // when render walks the offending leaf and constructs the path. + const raw = `{ "secret": "${REDACTED_SENTINEL}" }`; + const ast = parseJsonc(raw).ast; + try { + emitJsonc(ast, { mode: 'render', fileNameForGuard: 'config' }); + expect.fail('should have thrown'); + } catch (e) { + expect(e).toBeInstanceOf(OcEmitSentinelError); + expect(String(e)).toContain('oc://'); + expect(String(e)).toContain('config'); + } + }); +}); diff --git a/src/oc-path/tests/scenarios/sentinel-guard.test.ts b/src/oc-path/tests/scenarios/sentinel-guard.test.ts new file mode 100644 index 00000000000..b0865574518 --- /dev/null +++ b/src/oc-path/tests/scenarios/sentinel-guard.test.ts @@ -0,0 +1,180 @@ +/** + * Wave 9 — sentinel guard at every emit leaf. + * + * Substrate guarantee: `__OPENCLAW_REDACTED__` literal anywhere in the + * emitted bytes throws `OcEmitSentinelError`. Round-trip mode catches + * sentinels in `raw`; render mode walks every leaf. + */ +import { describe, expect, it } from 'vitest'; +import { emitMd } from '../../emit.js'; +import { parseMd } from '../../parse.js'; +import { + OcEmitSentinelError, + REDACTED_SENTINEL, + guardSentinel, +} from '../../sentinel.js'; + +describe('wave-09 sentinel-guard', () => { + it('S-01 sentinel constant matches the literal', () => { + expect(REDACTED_SENTINEL).toBe('__OPENCLAW_REDACTED__'); + }); + + it('S-02 guardSentinel passes normal strings', () => { + expect(() => guardSentinel('safe', 'oc://X.md')).not.toThrow(); + }); + + it('S-03 guardSentinel passes non-string types', () => { + expect(() => guardSentinel(42, 'oc://X.md')).not.toThrow(); + expect(() => guardSentinel(null, 'oc://X.md')).not.toThrow(); + expect(() => guardSentinel(undefined, 'oc://X.md')).not.toThrow(); + expect(() => guardSentinel({}, 'oc://X.md')).not.toThrow(); + }); + + it('S-04 guardSentinel throws on exact match', () => { + expect(() => guardSentinel(REDACTED_SENTINEL, 'oc://X.md')).toThrow(OcEmitSentinelError); + }); + + it('S-05 guardSentinel throws on substring matches (sentinel embedded in larger string)', () => { + // Substring scan — the sentinel anywhere in the value is a leak, + // not just exact equality. A hostile caller smuggling + // `prefix__OPENCLAW_REDACTED__suffix` would have bypassed the old + // equality check; substring scan closes the gap. + expect(() => guardSentinel(`prefix${REDACTED_SENTINEL}suffix`, 'oc://X.md')).toThrow( + OcEmitSentinelError, + ); + }); + + it('S-06 error attaches the OcPath context', () => { + try { + guardSentinel(REDACTED_SENTINEL, 'oc://config/plugins.entries.foo.token'); + expect.fail('should have thrown'); + } catch (err) { + expect(err).toBeInstanceOf(OcEmitSentinelError); + const e = err as OcEmitSentinelError; + expect(e.path).toBe('oc://config/plugins.entries.foo.token'); + expect(e.code).toBe('OC_EMIT_SENTINEL'); + } + }); + + it('S-07 round-trip echoes pre-existing sentinel; strict mode rejects', () => { + const raw = '## Section\n\n- token: __OPENCLAW_REDACTED__\n'; + const { ast } = parseMd(raw); + expect(emitMd(ast)).toBe(raw); + expect(() => emitMd(ast, { acceptPreExistingSentinel: false })).toThrow( + OcEmitSentinelError, + ); + }); + + it('S-08 round-trip emit allows sentinel-free content', () => { + const raw = '## Section\n\n- token: redacted-but-not-sentinel\n'; + const { ast } = parseMd(raw); + expect(() => emitMd(ast)).not.toThrow(); + }); + + it('S-09 render mode catches sentinel in frontmatter', () => { + const ast = { + kind: "md" as const, + raw: '', + frontmatter: [{ key: 'token', value: REDACTED_SENTINEL, line: 2 }], + preamble: '', + blocks: [], + }; + expect(() => emitMd(ast, { mode: 'render' })).toThrow(OcEmitSentinelError); + }); + + it('S-10 render mode catches sentinel in preamble', () => { + const ast = { + kind: "md" as const, + raw: '', + frontmatter: [], + preamble: REDACTED_SENTINEL, + blocks: [], + }; + expect(() => emitMd(ast, { mode: 'render' })).toThrow(OcEmitSentinelError); + }); + + it('S-11 render mode catches sentinel in block bodyText', () => { + const ast = { + kind: "md" as const, + raw: '', + frontmatter: [], + preamble: '', + blocks: [ + { + heading: 'Sec', + slug: 'sec', + line: 1, + bodyText: REDACTED_SENTINEL, + items: [], + tables: [], + codeBlocks: [], + }, + ], + }; + expect(() => emitMd(ast, { mode: 'render' })).toThrow(OcEmitSentinelError); + }); + + it('S-12 render mode catches sentinel in item kv.value', () => { + const ast = { + kind: "md" as const, + raw: '', + frontmatter: [], + preamble: '', + blocks: [ + { + heading: 'S', + slug: 's', + line: 1, + bodyText: '- t: x', + items: [ + { + text: 't: x', + slug: 't', + line: 2, + kv: { key: 't', value: REDACTED_SENTINEL }, + }, + ], + tables: [], + codeBlocks: [], + }, + ], + }; + expect(() => emitMd(ast, { mode: 'render', fileNameForGuard: 'AGENTS.md' })).toThrow( + OcEmitSentinelError, + ); + }); + + it('S-13 sentinel-as-substring in raw — strict mode catches it', () => { + const raw = `Some prose ${REDACTED_SENTINEL} more prose.\n`; + const { ast } = parseMd(raw); + expect(emitMd(ast)).toBe(raw); + expect(() => emitMd(ast, { acceptPreExistingSentinel: false })).toThrow( + OcEmitSentinelError, + ); + }); + + it('S-14 multiple sentinel occurrences in raw — strict mode catches them', () => { + const raw = `## A\n${REDACTED_SENTINEL}\n${REDACTED_SENTINEL}\n`; + const { ast } = parseMd(raw); + expect(emitMd(ast)).toBe(raw); + expect(() => emitMd(ast, { acceptPreExistingSentinel: false })).toThrow( + OcEmitSentinelError, + ); + }); + + it('S-15 fileNameForGuard appears in the error path', () => { + const ast = { + kind: "md" as const, + raw: '', + frontmatter: [{ key: 'token', value: REDACTED_SENTINEL, line: 2 }], + preamble: '', + blocks: [], + }; + try { + emitMd(ast, { mode: 'render', fileNameForGuard: 'config' }); + expect.fail('should have thrown'); + } catch (err) { + expect((err as OcEmitSentinelError).path).toContain('config'); + } + }); +}); diff --git a/src/oc-path/tests/scenarios/tables.test.ts b/src/oc-path/tests/scenarios/tables.test.ts new file mode 100644 index 00000000000..c7f01fec51f --- /dev/null +++ b/src/oc-path/tests/scenarios/tables.test.ts @@ -0,0 +1,154 @@ +/** + * Wave 5 — markdown tables. + * + * Substrate guarantee: GFM-style tables (`| h | h |\n|---|---|\n| r | r |`) + * inside H2 blocks are extracted into `AstTable`. Tables inside fenced + * code blocks are NOT extracted (handled at item-extraction layer too; + * tables share the same code-block awareness when relevant). + */ +import { describe, expect, it } from 'vitest'; +import { parseMd } from '../../parse.js'; + +describe('wave-05 tables', () => { + it('T-01 standard 2-column table', () => { + const raw = `## H + +| tool | guidance | +| --- | --- | +| gh | use for GitHub | +| curl | HTTP client | +`; + const { ast } = parseMd(raw); + const table = ast.blocks[0]?.tables[0]; + expect(table?.headers).toEqual(['tool', 'guidance']); + expect(table?.rows).toEqual([ + ['gh', 'use for GitHub'], + ['curl', 'HTTP client'], + ]); + }); + + it('T-02 3+ column table', () => { + const raw = `## H + +| a | b | c | +| - | - | - | +| 1 | 2 | 3 | +`; + const { ast } = parseMd(raw); + expect(ast.blocks[0]?.tables[0]?.headers).toEqual(['a', 'b', 'c']); + expect(ast.blocks[0]?.tables[0]?.rows[0]).toEqual(['1', '2', '3']); + }); + + it('T-03 table with alignment colons in separator', () => { + const raw = `## H + +| left | center | right | +| :--- | :---: | ---: | +| a | b | c | +`; + const { ast } = parseMd(raw); + expect(ast.blocks[0]?.tables.length).toBe(1); + }); + + it('T-04 table with empty cells', () => { + const raw = `## H + +| a | b | +| - | - | +| 1 | | +| | 2 | +`; + const { ast } = parseMd(raw); + expect(ast.blocks[0]?.tables[0]?.rows).toEqual([ + ['1', ''], + ['', '2'], + ]); + }); + + it('T-05 table with no rows (header + sep only)', () => { + const raw = `## H + +| a | b | +| - | - | +`; + const { ast } = parseMd(raw); + expect(ast.blocks[0]?.tables[0]?.headers).toEqual(['a', 'b']); + expect(ast.blocks[0]?.tables[0]?.rows).toEqual([]); + }); + + it('T-06 multiple tables in same section', () => { + const raw = `## H + +| a | b | +| - | - | +| 1 | 2 | + +Some text. + +| x | y | +| - | - | +| 3 | 4 | +`; + const { ast } = parseMd(raw); + expect(ast.blocks[0]?.tables.length).toBe(2); + }); + + it('T-07 table line numbers track to the header line', () => { + const raw = `## Section +preamble line +| a | b | +| - | - | +`; + const { ast } = parseMd(raw); + expect(ast.blocks[0]?.tables[0]?.line).toBeGreaterThan(0); + }); + + it('T-08 invalid separator (no pipes) — no table extracted', () => { + const raw = `## H + +| a | b | +not a separator +| 1 | 2 | +`; + const { ast } = parseMd(raw); + expect(ast.blocks[0]?.tables).toEqual([]); + }); + + it('T-09 single-column table (just `| col |\\n|---|`)', () => { + const raw = `## H + +| col | +| --- | +| value1 | +| value2 | +`; + const { ast } = parseMd(raw); + expect(ast.blocks[0]?.tables[0]?.headers).toEqual(['col']); + expect(ast.blocks[0]?.tables[0]?.rows).toEqual([['value1'], ['value2']]); + }); + + it('T-10 table at end of file with trailing newlines', () => { + const raw = `## H + +| a | +| - | +| 1 | + + +`; + const { ast } = parseMd(raw); + expect(ast.blocks[0]?.tables[0]?.rows).toEqual([['1']]); + }); + + it('T-11 table content with internal whitespace trimmed', () => { + const raw = `## H + +| col1 | col2 | +| --- | --- | +| a | b | +`; + const { ast } = parseMd(raw); + expect(ast.blocks[0]?.tables[0]?.headers).toEqual(['col1', 'col2']); + expect(ast.blocks[0]?.tables[0]?.rows[0]).toEqual(['a', 'b']); + }); +}); diff --git a/src/oc-path/tests/sentinel.test.ts b/src/oc-path/tests/sentinel.test.ts new file mode 100644 index 00000000000..980527ac1fe --- /dev/null +++ b/src/oc-path/tests/sentinel.test.ts @@ -0,0 +1,36 @@ +import { describe, expect, it } from 'vitest'; +import { + OcEmitSentinelError, + REDACTED_SENTINEL, + guardSentinel, +} from '../sentinel.js'; + +describe('guardSentinel', () => { + it('passes through normal strings', () => { + expect(() => guardSentinel('normal value', 'oc://SOUL.md')).not.toThrow(); + }); + + it('passes through non-string values', () => { + expect(() => guardSentinel(42, 'oc://SOUL.md')).not.toThrow(); + expect(() => guardSentinel(null, 'oc://SOUL.md')).not.toThrow(); + expect(() => guardSentinel(undefined, 'oc://SOUL.md')).not.toThrow(); + }); + + it('throws on the sentinel literal', () => { + expect(() => guardSentinel(REDACTED_SENTINEL, 'oc://SOUL.md/[fm]/token')).toThrow( + OcEmitSentinelError, + ); + }); + + it('attaches the OcPath in the error', () => { + try { + guardSentinel(REDACTED_SENTINEL, 'oc://config/plugins.entries.foo.token'); + expect.fail('should have thrown'); + } catch (err) { + expect(err).toBeInstanceOf(OcEmitSentinelError); + const e = err as OcEmitSentinelError; + expect(e.path).toBe('oc://config/plugins.entries.foo.token'); + expect(e.code).toBe('OC_EMIT_SENTINEL'); + } + }); +}); diff --git a/src/oc-path/tests/slug.test.ts b/src/oc-path/tests/slug.test.ts new file mode 100644 index 00000000000..542cb33591f --- /dev/null +++ b/src/oc-path/tests/slug.test.ts @@ -0,0 +1,50 @@ +import { describe, expect, it } from 'vitest'; +import { slugify } from '../slug.js'; + +describe('slugify', () => { + it('lowercases', () => { + expect(slugify('Boundaries')).toBe('boundaries'); + }); + + it('replaces underscores with hyphens', () => { + expect(slugify('API_KEY')).toBe('api-key'); + }); + + it('collapses multi-word headings', () => { + expect(slugify('Tool Guidance')).toBe('tool-guidance'); + }); + + it('preserves existing kebab-case', () => { + expect(slugify('deny-rule-1')).toBe('deny-rule-1'); + }); + + it('trims surrounding whitespace + non-slug chars', () => { + expect(slugify(' Restricted Data ')).toBe('restricted-data'); + }); + + it('handles colon + space patterns', () => { + expect(slugify('deny: secrets')).toBe('deny-secrets'); + }); + + it('collapses repeated hyphens', () => { + expect(slugify('foo----bar')).toBe('foo-bar'); + }); + + it('returns empty for non-slug-valid input', () => { + expect(slugify('!!')).toBe(''); + expect(slugify(' ')).toBe(''); + }); + + it('is idempotent', () => { + const inputs = ['Tool Guidance', 'API_KEY', 'deny-rule-1', 'Multi-tenant isolation']; + for (const input of inputs) { + expect(slugify(slugify(input))).toBe(slugify(input)); + } + }); + + it('handles unicode by stripping (current ASCII-only policy)', () => { + // Caveat: unicode in headings becomes empty/lossy. Document as a + // known limit; lint rules can flag non-ASCII headings if needed. + expect(slugify('Café')).toBe('caf'); + }); +}); diff --git a/src/oc-path/tests/universal.test.ts b/src/oc-path/tests/universal.test.ts new file mode 100644 index 00000000000..89a3bc7cff8 --- /dev/null +++ b/src/oc-path/tests/universal.test.ts @@ -0,0 +1,475 @@ +/** + * Universal verbs — `setOcPath` + `resolveOcPath` test surface. + * + * Every test exercises the universal entry point. The substrate + * dispatches via `ast.kind` and coerces value strings based on AST + * shape at the path location. + */ +import { describe, expect, it } from 'vitest'; +import { emitJsonc } from '../jsonc/emit.js'; +import { parseJsonc } from '../jsonc/parse.js'; +import { emitJsonl } from '../jsonl/emit.js'; +import { parseJsonl } from '../jsonl/parse.js'; +import { emitMd } from '../emit.js'; +import { parseMd } from '../parse.js'; +import { parseOcPath } from '../oc-path.js'; +import { + detectInsertion, + resolveOcPath, + setOcPath, +} from '../universal.js'; + +// ---------- detectInsertion ------------------------------------------------ + +describe('detectInsertion', () => { + it('returns null for plain paths', () => { + expect(detectInsertion(parseOcPath('oc://X.md/section/item/field'))).toBeNull(); + }); + + it('detects bare `+` end-insertion at section', () => { + const info = detectInsertion(parseOcPath('oc://X.md/tools/+')); + expect(info?.marker).toBe('+'); + expect(info?.parentPath.section).toBe('tools'); + expect(info?.parentPath.item).toBeUndefined(); + }); + + it('detects `+key` keyed insertion', () => { + const info = detectInsertion(parseOcPath('oc://config/plugins/+gitlab')); + expect(info?.marker).toEqual({ kind: 'keyed', key: 'gitlab' }); + }); + + it('detects `+nnn` indexed insertion', () => { + const info = detectInsertion(parseOcPath('oc://config/items/+2')); + expect(info?.marker).toEqual({ kind: 'indexed', index: 2 }); + }); + + it('detects file-root insertion', () => { + const info = detectInsertion(parseOcPath('oc://session.jsonl/+')); + expect(info?.marker).toBe('+'); + expect(info?.parentPath.section).toBeUndefined(); + }); +}); + +// ---------- resolveOcPath — universal across kinds ------------------------- + +describe('resolveOcPath — md AST', () => { + const md = parseMd( + '---\nname: github\n---\n\n## Boundaries\n\n- enabled: true\n', + ).ast; + + it('returns leaf with valueText for frontmatter entry', () => { + const m = resolveOcPath(md, parseOcPath('oc://X.md/[frontmatter]/name')); + expect(m).toMatchObject({ kind: 'leaf', valueText: 'github', leafType: 'string' }); + }); + + it('returns leaf for item-field', () => { + const m = resolveOcPath( + md, + parseOcPath('oc://X.md/boundaries/enabled/enabled'), + ); + expect(m).toMatchObject({ kind: 'leaf', valueText: 'true', leafType: 'string' }); + }); + + it('returns node for block', () => { + const m = resolveOcPath(md, parseOcPath('oc://X.md/boundaries')); + expect(m).toMatchObject({ kind: 'node', descriptor: 'md-block' }); + }); + + it('returns root for file-only path', () => { + const m = resolveOcPath(md, parseOcPath('oc://X.md')); + expect(m?.kind).toBe('root'); + }); + + it('returns null for unresolved', () => { + expect(resolveOcPath(md, parseOcPath('oc://X.md/missing'))).toBeNull(); + }); +}); + +describe('resolveOcPath — jsonc AST', () => { + const ast = parseJsonc('{ "k": 42, "s": "x", "b": true, "n": null, "arr": [1,2,3] }').ast; + + it('returns leaf:number for numeric value', () => { + const m = resolveOcPath(ast, parseOcPath('oc://config/k')); + expect(m).toMatchObject({ kind: 'leaf', valueText: '42', leafType: 'number' }); + }); + + it('returns leaf:string for string value', () => { + const m = resolveOcPath(ast, parseOcPath('oc://config/s')); + expect(m).toMatchObject({ kind: 'leaf', valueText: 'x', leafType: 'string' }); + }); + + it('returns leaf:boolean for bool value', () => { + const m = resolveOcPath(ast, parseOcPath('oc://config/b')); + expect(m).toMatchObject({ kind: 'leaf', valueText: 'true', leafType: 'boolean' }); + }); + + it('returns leaf:null for null value', () => { + const m = resolveOcPath(ast, parseOcPath('oc://config/n')); + expect(m).toMatchObject({ kind: 'leaf', valueText: 'null', leafType: 'null' }); + }); + + it('returns node:jsonc-array for array value', () => { + const m = resolveOcPath(ast, parseOcPath('oc://config/arr')); + expect(m).toMatchObject({ kind: 'node', descriptor: 'jsonc-array' }); + }); + + it('returns leaf at array index', () => { + const m = resolveOcPath(ast, parseOcPath('oc://config/arr.1')); + expect(m).toMatchObject({ kind: 'leaf', valueText: '2', leafType: 'number' }); + }); +}); + +describe('resolveOcPath — jsonl AST', () => { + const ast = parseJsonl('{"event":"start","n":1}\n{"event":"step","n":2}\n').ast; + + it('returns node:jsonl-line for line address', () => { + const m = resolveOcPath(ast, parseOcPath('oc://log/L1')); + expect(m).toMatchObject({ kind: 'node', descriptor: 'jsonl-line' }); + }); + + it('returns leaf for field on line', () => { + const m = resolveOcPath(ast, parseOcPath('oc://log/L2/event')); + expect(m).toMatchObject({ kind: 'leaf', valueText: 'step', leafType: 'string' }); + }); + + it('returns leaf:number for $last/n', () => { + const m = resolveOcPath(ast, parseOcPath('oc://log/$last/n')); + expect(m).toMatchObject({ kind: 'leaf', valueText: '2', leafType: 'number' }); + }); +}); + +describe('resolveOcPath — insertion-point detection', () => { + it('returns insertion-point for md section append', () => { + const md = parseMd('## Tools\n').ast; + const m = resolveOcPath(md, parseOcPath('oc://X.md/tools/+')); + expect(m).toMatchObject({ kind: 'insertion-point', container: 'md-section' }); + }); + + it('returns insertion-point for md file-level', () => { + const md = parseMd('## Tools\n').ast; + const m = resolveOcPath(md, parseOcPath('oc://X.md/+')); + expect(m).toMatchObject({ kind: 'insertion-point', container: 'md-file' }); + }); + + it('returns insertion-point for md frontmatter +key', () => { + const md = parseMd('---\nname: x\n---\n').ast; + const m = resolveOcPath( + md, + parseOcPath('oc://X.md/[frontmatter]/+description'), + ); + expect(m).toMatchObject({ kind: 'insertion-point', container: 'md-frontmatter' }); + }); + + it('returns insertion-point for jsonc array +', () => { + const ast = parseJsonc('{ "items": [1,2,3] }').ast; + const m = resolveOcPath(ast, parseOcPath('oc://config/items/+')); + expect(m).toMatchObject({ kind: 'insertion-point', container: 'jsonc-array' }); + }); + + it('returns insertion-point for jsonc object +key', () => { + const ast = parseJsonc('{ "plugins": {} }').ast; + const m = resolveOcPath(ast, parseOcPath('oc://config/plugins/+gitlab')); + expect(m).toMatchObject({ kind: 'insertion-point', container: 'jsonc-object' }); + }); + + it('returns insertion-point for jsonl file-root +', () => { + const ast = parseJsonl('').ast; + const m = resolveOcPath(ast, parseOcPath('oc://log/+')); + expect(m).toMatchObject({ kind: 'insertion-point', container: 'jsonl-file' }); + }); + + it('returns null when insertion target is not a container', () => { + const ast = parseJsonc('{ "k": 42 }').ast; + const m = resolveOcPath(ast, parseOcPath('oc://config/k/+')); + expect(m).toBeNull(); + }); +}); + +// ---------- setOcPath — leaf assignment ------------------------------------ + +describe('setOcPath — md leaf', () => { + it('replaces frontmatter value', () => { + const md = parseMd('---\nname: old\n---\n').ast; + const r = setOcPath(md, parseOcPath('oc://X.md/[frontmatter]/name'), 'new'); + expect(r.ok).toBe(true); + if (r.ok) {expect(r.ast.kind === 'md' && r.ast.frontmatter[0]?.value).toBe('new');} + }); + + it('replaces item kv value', () => { + const md = parseMd('## Boundaries\n\n- timeout: 5\n').ast; + const r = setOcPath(md, parseOcPath('oc://X.md/boundaries/timeout/timeout'), '60'); + expect(r.ok).toBe(true); + if (r.ok) { + const out = emitMd(r.ast as Parameters[0]); + expect(out).toContain('- timeout: 60'); + } + }); + + it('returns unresolved for missing path', () => { + const md = parseMd('').ast; + const r = setOcPath(md, parseOcPath('oc://X.md/missing/x/x'), 'v'); + expect(r.ok).toBe(false); + if (!r.ok) {expect(r.reason).toBe('unresolved');} + }); +}); + +describe('setOcPath — jsonc leaf with coercion', () => { + it('replaces string leaf with string value', () => { + const ast = parseJsonc('{ "k": "old" }').ast; + const r = setOcPath(ast, parseOcPath('oc://config/k'), 'new'); + expect(r.ok).toBe(true); + if (r.ok) { + const ast2 = r.ast as Parameters[0]; + expect(JSON.parse(emitJsonc(ast2))).toEqual({ k: 'new' }); + } + }); + + it('coerces value to number when leaf was number', () => { + const ast = parseJsonc('{ "k": 1 }').ast; + const r = setOcPath(ast, parseOcPath('oc://config/k'), '42'); + expect(r.ok).toBe(true); + if (r.ok) { + const ast2 = r.ast as Parameters[0]; + expect(JSON.parse(emitJsonc(ast2))).toEqual({ k: 42 }); + } + }); + + it('coerces "true"/"false" when leaf was boolean', () => { + const ast = parseJsonc('{ "k": true }').ast; + const r = setOcPath(ast, parseOcPath('oc://config/k'), 'false'); + expect(r.ok).toBe(true); + if (r.ok) { + const ast2 = r.ast as Parameters[0]; + expect(JSON.parse(emitJsonc(ast2))).toEqual({ k: false }); + } + }); + + it('rejects non-numeric string for number leaf', () => { + const ast = parseJsonc('{ "k": 1 }').ast; + const r = setOcPath(ast, parseOcPath('oc://config/k'), 'not-a-number'); + expect(r.ok).toBe(false); + if (!r.ok) {expect(r.reason).toBe('parse-error');} + }); + + it('rejects non-bool string for boolean leaf', () => { + const ast = parseJsonc('{ "k": true }').ast; + const r = setOcPath(ast, parseOcPath('oc://config/k'), 'maybe'); + expect(r.ok).toBe(false); + if (!r.ok) {expect(r.reason).toBe('parse-error');} + }); +}); + +describe('setOcPath — jsonl leaf', () => { + it('replaces field on a value line with coercion', () => { + const ast = parseJsonl('{"event":"start","n":1}\n').ast; + const r = setOcPath(ast, parseOcPath('oc://log/L1/n'), '42'); + expect(r.ok).toBe(true); + if (r.ok) { + const out = emitJsonl(r.ast as Parameters[0]); + expect(JSON.parse(out.split('\n')[0])).toEqual({ event: 'start', n: 42 }); + } + }); + + it('replaces whole line via JSON value', () => { + const ast = parseJsonl('{"event":"start"}\n').ast; + const r = setOcPath(ast, parseOcPath('oc://log/L1'), '{"event":"replaced"}'); + expect(r.ok).toBe(true); + if (r.ok) { + const out = emitJsonl(r.ast as Parameters[0]); + expect(JSON.parse(out.split('\n')[0])).toEqual({ event: 'replaced' }); + } + }); + + it('rejects malformed JSON for whole-line replacement', () => { + const ast = parseJsonl('{"event":"start"}\n').ast; + const r = setOcPath(ast, parseOcPath('oc://log/L1'), 'not json'); + expect(r.ok).toBe(false); + if (!r.ok) {expect(r.reason).toBe('parse-error');} + }); +}); + +// ---------- setOcPath — insertion ------------------------------------------ + +describe('setOcPath — md insertion', () => { + it('appends item to section with `+`', () => { + const md = parseMd('## Tools\n\n- gh: GitHub CLI\n').ast; + const r = setOcPath(md, parseOcPath('oc://X.md/tools/+'), 'docker: container CLI'); + expect(r.ok).toBe(true); + if (r.ok) { + const out = emitMd(r.ast as Parameters[0]); + expect(out).toContain('- gh: GitHub CLI'); + expect(out).toContain('- docker: container CLI'); + } + }); + + it('appends new section at file root with `+`', () => { + const md = parseMd('## Existing\n').ast; + const r = setOcPath(md, parseOcPath('oc://X.md/+'), 'New Section'); + expect(r.ok).toBe(true); + if (r.ok) { + const out = emitMd(r.ast as Parameters[0]); + expect(out).toContain('## Existing'); + expect(out).toContain('## New Section'); + } + }); + + it('adds new frontmatter key with +key', () => { + const md = parseMd('---\nname: x\n---\n').ast; + const r = setOcPath( + md, + parseOcPath('oc://X.md/[frontmatter]/+description'), + 'a new description', + ); + expect(r.ok).toBe(true); + if (r.ok) { + const out = emitMd(r.ast as Parameters[0]); + expect(out).toContain('description: a new description'); + } + }); + + it('rejects duplicate frontmatter key on insertion', () => { + const md = parseMd('---\nname: x\n---\n').ast; + const r = setOcPath(md, parseOcPath('oc://X.md/[frontmatter]/+name'), 'y'); + expect(r.ok).toBe(false); + if (!r.ok) {expect(r.reason).toBe('type-mismatch');} + }); +}); + +describe('setOcPath — jsonc insertion', () => { + it('appends to array with `+`', () => { + const ast = parseJsonc('{ "items": [1, 2] }').ast; + const r = setOcPath(ast, parseOcPath('oc://config/items/+'), '3'); + expect(r.ok).toBe(true); + if (r.ok) { + const ast2 = r.ast as Parameters[0]; + expect(JSON.parse(emitJsonc(ast2))).toEqual({ items: [1, 2, 3] }); + } + }); + + it('inserts at index with `+nnn`', () => { + const ast = parseJsonc('{ "items": [1, 3] }').ast; + const r = setOcPath(ast, parseOcPath('oc://config/items/+1'), '2'); + expect(r.ok).toBe(true); + if (r.ok) { + const ast2 = r.ast as Parameters[0]; + expect(JSON.parse(emitJsonc(ast2))).toEqual({ items: [1, 2, 3] }); + } + }); + + it('adds object key with `+key`', () => { + const ast = parseJsonc('{ "plugins": { "github": "tok" } }').ast; + const r = setOcPath( + ast, + parseOcPath('oc://config/plugins/+gitlab'), + '"new-tok"', + ); + expect(r.ok).toBe(true); + if (r.ok) { + const ast2 = r.ast as Parameters[0]; + expect(JSON.parse(emitJsonc(ast2))).toEqual({ + plugins: { github: 'tok', gitlab: 'new-tok' }, + }); + } + }); + + it('rejects duplicate object key', () => { + const ast = parseJsonc('{ "plugins": { "github": "x" } }').ast; + const r = setOcPath(ast, parseOcPath('oc://config/plugins/+github'), '"y"'); + expect(r.ok).toBe(false); + if (!r.ok) {expect(r.reason).toBe('unresolved');} + }); + + it('rejects +key on array', () => { + const ast = parseJsonc('{ "items": [1, 2] }').ast; + const r = setOcPath(ast, parseOcPath('oc://config/items/+abc'), '3'); + expect(r.ok).toBe(false); + if (!r.ok) {expect(r.reason).toBe('type-mismatch');} + }); + + it('inserts complex object via JSON value', () => { + const ast = parseJsonc('{ "plugins": {} }').ast; + const r = setOcPath( + ast, + parseOcPath('oc://config/plugins/+gitlab'), + '{"token":"xyz","enabled":true}', + ); + expect(r.ok).toBe(true); + if (r.ok) { + const ast2 = r.ast as Parameters[0]; + expect(JSON.parse(emitJsonc(ast2))).toEqual({ + plugins: { gitlab: { token: 'xyz', enabled: true } }, + }); + } + }); +}); + +describe('setOcPath — jsonl insertion (session append)', () => { + it('appends a JSON line with `+`', () => { + const ast = parseJsonl('{"event":"start"}\n').ast; + const r = setOcPath( + ast, + parseOcPath('oc://log/+'), + '{"event":"step","n":1}', + ); + expect(r.ok).toBe(true); + if (r.ok) { + const out = emitJsonl(r.ast as Parameters[0]); + const lines = out.split('\n').filter((l) => l.length > 0); + expect(lines).toHaveLength(2); + expect(JSON.parse(lines[1])).toEqual({ event: 'step', n: 1 }); + } + }); + + it('rejects malformed JSON value', () => { + const ast = parseJsonl('').ast; + const r = setOcPath(ast, parseOcPath('oc://log/+'), 'not json'); + expect(r.ok).toBe(false); + if (!r.ok) {expect(r.reason).toBe('parse-error');} + }); + + it('rejects non-root insertion target', () => { + const ast = parseJsonl('{"a":1}\n').ast; + const r = setOcPath(ast, parseOcPath('oc://log/L1/+'), '{}'); + expect(r.ok).toBe(false); + }); +}); + +// ---------- Cross-cutting properties --------------------------------------- + +describe('setOcPath — cross-cutting properties', () => { + it('is non-mutating across all kinds', () => { + const md = parseMd('---\nname: x\n---\n').ast; + const before = JSON.stringify(md); + setOcPath(md, parseOcPath('oc://X.md/[frontmatter]/name'), 'new'); + expect(JSON.stringify(md)).toBe(before); + + const jsonc = parseJsonc('{ "k": 1 }').ast; + const before2 = JSON.stringify(jsonc); + setOcPath(jsonc, parseOcPath('oc://config/k'), '99'); + expect(JSON.stringify(jsonc)).toBe(before2); + + const jsonl = parseJsonl('{"a":1}\n').ast; + const before3 = JSON.stringify(jsonl); + setOcPath(jsonl, parseOcPath('oc://log/L1/a'), '99'); + expect(JSON.stringify(jsonl)).toBe(before3); + }); + + it('returns ok-tagged result with new ast on success', () => { + const md = parseMd('---\nname: x\n---\n').ast; + const r = setOcPath(md, parseOcPath('oc://X.md/[frontmatter]/name'), 'y'); + expect(r.ok).toBe(true); + if (r.ok) { + expect(r.ast.kind).toBe('md'); + } + }); + + it('returns failure-tagged result with reason on unresolved', () => { + const ast = parseJsonc('{}').ast; + const r = setOcPath(ast, parseOcPath('oc://config/missing'), 'v'); + expect(r.ok).toBe(false); + if (!r.ok) { + expect(r.reason).toBeDefined(); + expect(typeof r.reason).toBe('string'); + } + }); +}); diff --git a/src/oc-path/tests/yaml/yaml-kind.test.ts b/src/oc-path/tests/yaml/yaml-kind.test.ts new file mode 100644 index 00000000000..f851b401ea3 --- /dev/null +++ b/src/oc-path/tests/yaml/yaml-kind.test.ts @@ -0,0 +1,248 @@ +/** + * YAML kind — parse / emit / resolve / set + universal verb dispatch. + * + * Real-world fixture: lobster `.lobster` workflow file shape. + */ +import { describe, expect, it } from 'vitest'; +import { emitYaml } from '../../yaml/emit.js'; +import { parseYaml } from '../../yaml/parse.js'; +import { resolveYamlOcPath } from '../../yaml/resolve.js'; +import { setYamlOcPath } from '../../yaml/edit.js'; +import { parseOcPath } from '../../oc-path.js'; +import { + resolveOcPath, + setOcPath, +} from '../../universal.js'; +import { inferKind } from '../../dispatch.js'; + +const LOBSTER = `name: inbox-triage +description: A simple example workflow + +steps: + - id: fetch + command: gog.gmail.search --query 'newer_than:1d' --max 20 + + - id: classify + command: openclaw.invoke --tool llm-task --action json + stdin: $fetch.stdout +`; + +describe('parseYaml — round-trip', () => { + it('preserves bytes verbatim on round-trip', () => { + const { ast } = parseYaml(LOBSTER); + expect(emitYaml(ast)).toBe(LOBSTER); + }); + + it('exposes kind: yaml discriminator', () => { + const { ast } = parseYaml(LOBSTER); + expect(ast.kind).toBe('yaml'); + }); + + it('handles empty file', () => { + const { ast } = parseYaml(''); + expect(ast.kind).toBe('yaml'); + expect(emitYaml(ast)).toBe(''); + }); + + it('reports errors as diagnostics, not throws', () => { + const { diagnostics } = parseYaml('key: value\n bad indent: oops\n'); + expect(diagnostics.length).toBeGreaterThanOrEqual(0); + }); +}); + +describe('resolveYamlOcPath — direct', () => { + it('resolves top-level scalar', () => { + const { ast } = parseYaml(LOBSTER); + const m = resolveYamlOcPath(ast, parseOcPath('oc://workflow.lobster/name')); + expect(m?.kind).toBe('pair'); + if (m?.kind === 'pair') {expect(m.value).toBe('inbox-triage');} + }); + + it('resolves into a sequence by index', () => { + const { ast } = parseYaml(LOBSTER); + const m = resolveYamlOcPath(ast, parseOcPath('oc://workflow.lobster/steps.0.id')); + expect(m?.kind).toBe('pair'); + if (m?.kind === 'pair') {expect(m.value).toBe('fetch');} + }); + + it('returns root when no segments', () => { + const { ast } = parseYaml(LOBSTER); + const m = resolveYamlOcPath(ast, parseOcPath('oc://workflow.lobster')); + expect(m?.kind).toBe('root'); + }); + + it('returns null for unresolved paths', () => { + const { ast } = parseYaml(LOBSTER); + expect( + resolveYamlOcPath(ast, parseOcPath('oc://workflow.lobster/missing')), + ).toBeNull(); + }); +}); + +describe('setYamlOcPath — direct', () => { + it('replaces a scalar value', () => { + const { ast } = parseYaml(LOBSTER); + const r = setYamlOcPath(ast, parseOcPath('oc://workflow.lobster/name'), 'new-name'); + expect(r.ok).toBe(true); + if (r.ok) {expect(r.ast.raw).toContain('name: new-name');} + }); + + it('replaces a nested scalar', () => { + const { ast } = parseYaml(LOBSTER); + const r = setYamlOcPath( + ast, + parseOcPath('oc://workflow.lobster/steps.0.id'), + 'fetch-renamed', + ); + expect(r.ok).toBe(true); + if (r.ok) {expect(r.ast.raw).toContain('id: fetch-renamed');} + }); + + it('returns unresolved for missing path', () => { + const { ast } = parseYaml(LOBSTER); + const r = setYamlOcPath(ast, parseOcPath('oc://workflow.lobster/missing'), 'x'); + expect(r.ok).toBe(false); + if (!r.ok) {expect(r.reason).toBe('unresolved');} + }); +}); + +describe('setYamlOcPath — positional tokens (round-11 resolve↔edit symmetry)', () => { + // ClawSweeper round-11 P2 — yaml edit forwarded segments straight + // to `setIn`, which would treat `$first` / `$last` / `-N` as + // literal map keys and silently miss the target. Pin the new + // behavior: positional tokens resolve against the live document + // BEFORE the yaml lib walks the path. + it('edits the first seq element via $first', () => { + const { ast } = parseYaml(LOBSTER); + const r = setYamlOcPath( + ast, + parseOcPath('oc://workflow.lobster/steps/$first/id'), + 'fetch-renamed', + ); + expect(r.ok).toBe(true); + if (r.ok) {expect(r.ast.raw).toContain('id: fetch-renamed');} + }); + + it('edits the last seq element via $last', () => { + const { ast } = parseYaml(LOBSTER); + const r = setYamlOcPath( + ast, + parseOcPath('oc://workflow.lobster/steps/$last/id'), + 'classify-renamed', + ); + expect(r.ok).toBe(true); + if (r.ok) {expect(r.ast.raw).toContain('id: classify-renamed');} + }); + + it('edits the second-to-last seq element via -2', () => { + const { ast } = parseYaml('items:\n - a\n - b\n - c\n'); + const r = setYamlOcPath( + ast, + parseOcPath('oc://x.yaml/items/-2'), + 'B', + ); + expect(r.ok).toBe(true); + if (r.ok) {expect(r.ast.raw).toContain('- B');} + }); + + it('edits the first map entry via $first', () => { + const { ast } = parseYaml('config:\n a: 1\n b: 2\n c: 3\n'); + const r = setYamlOcPath( + ast, + parseOcPath('oc://x.yaml/config/$first'), + 99, + ); + expect(r.ok).toBe(true); + if (r.ok) {expect(r.ast.raw).toContain('a: 99');} + }); + + it('returns unresolved for $first against an empty seq', () => { + const { ast } = parseYaml('items: []\n'); + const r = setYamlOcPath( + ast, + parseOcPath('oc://x.yaml/items/$first'), + 'x', + ); + expect(r.ok).toBe(false); + if (!r.ok) {expect(r.reason).toBe('unresolved');} + }); +}); + +describe('inferKind — yaml extensions', () => { + it('maps .yaml / .yml / .lobster to yaml', () => { + expect(inferKind('workflow.yaml')).toBe('yaml'); + expect(inferKind('config.yml')).toBe('yaml'); + expect(inferKind('inbox-triage.lobster')).toBe('yaml'); + }); +}); + +describe('universal verbs — yaml dispatch', () => { + it('resolveOcPath returns kind-agnostic match for yaml leaf', () => { + const { ast } = parseYaml(LOBSTER); + const m = resolveOcPath(ast, parseOcPath('oc://workflow.lobster/name')); + expect(m).toMatchObject({ kind: 'leaf', valueText: 'inbox-triage', leafType: 'string' }); + }); + + it('resolveOcPath returns node:yaml-map for top-level seq item', () => { + const { ast } = parseYaml(LOBSTER); + const m = resolveOcPath(ast, parseOcPath('oc://workflow.lobster/steps.0')); + expect(m).toMatchObject({ kind: 'node', descriptor: 'yaml-map' }); + }); + + it('resolveOcPath returns node:yaml-seq for sequence root', () => { + const { ast } = parseYaml(LOBSTER); + const m = resolveOcPath(ast, parseOcPath('oc://workflow.lobster/steps')); + expect(m).toMatchObject({ kind: 'node', descriptor: 'yaml-seq' }); + }); + + it('setOcPath replaces a yaml scalar via universal verb', () => { + const { ast } = parseYaml(LOBSTER); + const r = setOcPath(ast, parseOcPath('oc://workflow.lobster/name'), 'updated'); + expect(r.ok).toBe(true); + if (r.ok && r.ast.kind === 'yaml') { + expect(r.ast.raw).toContain('name: updated'); + } + }); + + it('setOcPath coerces numeric string to number for number leaf', () => { + const { ast } = parseYaml('count: 5\n'); + const r = setOcPath(ast, parseOcPath('oc://x.yaml/count'), '42'); + expect(r.ok).toBe(true); + if (r.ok && r.ast.kind === 'yaml') { + expect(r.ast.raw).toContain('count: 42'); + } + }); + + it('setOcPath returns parse-error for invalid coercion', () => { + const { ast } = parseYaml('count: 5\n'); + const r = setOcPath(ast, parseOcPath('oc://x.yaml/count'), 'abc'); + expect(r.ok).toBe(false); + if (!r.ok) {expect(r.reason).toBe('parse-error');} + }); +}); + +describe('universal verbs — yaml insertion', () => { + it('appends to a yaml seq with `+`', () => { + const { ast } = parseYaml('items:\n - a\n - b\n'); + const r = setOcPath(ast, parseOcPath('oc://x.yaml/items/+'), '"c"'); + expect(r.ok).toBe(true); + if (r.ok && r.ast.kind === 'yaml') { + expect(r.ast.raw).toContain('- c'); + } + }); + + it('adds key to yaml map with `+key`', () => { + const { ast } = parseYaml('config:\n a: 1\n'); + const r = setOcPath(ast, parseOcPath('oc://x.yaml/config/+b'), '2'); + expect(r.ok).toBe(true); + if (r.ok && r.ast.kind === 'yaml') { + expect(r.ast.raw).toContain('b: 2'); + } + }); + + it('rejects duplicate map key on insertion', () => { + const { ast } = parseYaml('config:\n a: 1\n'); + const r = setOcPath(ast, parseOcPath('oc://x.yaml/config/+a'), '99'); + expect(r.ok).toBe(false); + }); +}); diff --git a/src/oc-path/universal.ts b/src/oc-path/universal.ts new file mode 100644 index 00000000000..d217cc8beb5 --- /dev/null +++ b/src/oc-path/universal.ts @@ -0,0 +1,869 @@ +/** + * Universal `setOcPath` and `resolveOcPath` — the public verbs. + * + * **Strategic frame**: addressing is universal. Encoding is per-kind. + * The OcPath syntax encodes WHAT to do (set leaf vs. insert vs. address + * a structural node); the AST kind encodes HOW the substrate carries it + * out. Callers pass any AST + a path + a string value; the substrate + * dispatches via `ast.kind` and coerces the value based on the path's + * syntax and the AST shape at the resolution point. + * + * **Path syntax vocabulary** (v0): + * + * oc://FILE/section/item/field → leaf address (set/replace value) + * oc://FILE/section/+ → end-insertion at section + * oc://FILE/section/+key → keyed insertion (object key add) + * oc://FILE/section/+0 → indexed insertion (array splice) + * oc://FILE/+ → file-root insertion (jsonl line append, md new section) + * + * **Coercion at leaves** is driven by the AST type at the resolution point: + * - md leaf → value used verbatim (md is text-native) + * - jsonc/jsonl leaf, existing string → value verbatim + * - jsonc/jsonl leaf, existing number → parseFloat (parse-error if NaN) + * - jsonc/jsonl leaf, existing boolean → 'true'/'false' literal + * - jsonc/jsonl leaf, existing null → only `value === 'null'` + * - insertion → `JSON.parse(value)` for jsonc/jsonl; raw text for md + * + * @module @openclaw/oc-path/universal + */ + +import type { MdAst } from './ast.js'; +import type { JsoncAst, JsoncEntry, JsoncValue } from './jsonc/ast.js'; +import { setJsoncOcPath } from './jsonc/edit.js'; +import { resolveJsoncOcPath } from './jsonc/resolve.js'; +import type { JsonlAst } from './jsonl/ast.js'; +import { appendJsonlOcPath as appendJsonlLine, setJsonlOcPath } from './jsonl/edit.js'; +import { resolveJsonlOcPath } from './jsonl/resolve.js'; +import { setMdOcPath } from './edit.js'; +import type { OcPath } from './oc-path.js'; +import { + formatOcPath, + hasWildcard, + isQuotedSeg, + OcPathError, + splitRespectingBrackets, + unquoteSeg, +} from './oc-path.js'; +import { resolveMdOcPath } from './resolve.js'; +import { emitJsonc } from './jsonc/emit.js'; +import { emitJsonl } from './jsonl/emit.js'; +import type { YamlAst } from './yaml/ast.js'; +import { insertYamlOcPath, setYamlOcPath } from './yaml/edit.js'; +import { resolveYamlOcPath } from './yaml/resolve.js'; + +// ---------- Public types --------------------------------------------------- + +/** Tagged-union of every AST kind the substrate supports. */ +export type OcAst = MdAst | JsoncAst | JsonlAst | YamlAst; + +/** + * Universal resolve result. Same shape regardless of AST kind so + * consumers branch only on `match.kind`. + * + * `leaf` carries the value as a string — the canonical leaf form on + * the wire, suitable for direct comparison or display. Numeric/bool + * leaves are stringified deterministically (`String(42)` → `'42'`, + * `String(true)` → `'true'`). + * + * `node` describes which kind of structural node the path resolved to + * (md-block, jsonc-object, jsonl-line, etc.) — the descriptor lets + * tooling format / drill in without re-parsing the kind tag. + * + * `insertion-point` is returned when the path's terminal segment is + * an insertion marker (`+`, `+key`, `+nnn`) and the parent is a valid + * container. + * + * **`line`** is the 1-based source line of the matched node, or `1` + * for the root / synthetic constructions where no source line exists. + * Lint rules use it directly for diagnostic positioning instead of + * walking the kind-specific AST a second time. + */ +export type OcMatch = + | { readonly kind: 'root'; readonly ast: OcAst; readonly line: number } + | { readonly kind: 'leaf'; readonly valueText: string; readonly leafType: LeafType; readonly line: number } + | { readonly kind: 'node'; readonly descriptor: NodeDescriptor; readonly line: number } + | { readonly kind: 'insertion-point'; readonly container: ContainerKind; readonly line: number }; + +export type LeafType = 'string' | 'number' | 'boolean' | 'null'; + +export type NodeDescriptor = + | 'md-block' + | 'md-item' + | 'jsonc-object' + | 'jsonc-array' + | 'jsonl-line' + | 'yaml-map' + | 'yaml-seq'; + +export type ContainerKind = + | 'md-section' // append item to a section + | 'md-file' // append a section to the file + | 'md-frontmatter' // add a frontmatter key + | 'jsonc-object' + | 'jsonc-array' + | 'jsonl-file' // append a line + | 'yaml-map' // add key to YAML map + | 'yaml-seq'; // append item to YAML seq + +export type SetResult = + | { readonly ok: true; readonly ast: OcAst } + | { + readonly ok: false; + readonly reason: + | 'unresolved' + | 'no-root' + | 'not-writable' + | 'no-item-kv' + | 'not-a-value-line' + | 'parse-error' + | 'type-mismatch' + | 'wildcard-not-allowed'; + readonly detail?: string; + }; + +// ---------- Insertion-syntax detection ------------------------------------- + +/** + * Inspect the path for an insertion marker on the deepest segment. + * A segment of `+`, `+`, or `+` indicates insertion at the + * parent. Returns the parent path (with insertion segment stripped) + + * the marker; or `null` for a plain (non-insertion) path. + */ +export interface InsertionInfo { + readonly parentPath: OcPath; + readonly marker: '+' | { kind: 'keyed'; key: string } | { kind: 'indexed'; index: number }; +} + +export function detectInsertion(path: OcPath): InsertionInfo | null { + // Find the deepest defined segment. + const segments: Array<{ slot: 'section' | 'item' | 'field'; value: string }> = []; + if (path.section !== undefined) {segments.push({ slot: 'section', value: path.section });} + if (path.item !== undefined) {segments.push({ slot: 'item', value: path.item });} + if (path.field !== undefined) {segments.push({ slot: 'field', value: path.field });} + if (segments.length === 0) {return null;} + + const last = segments[segments.length - 1]; + if (!last.value.startsWith('+')) {return null;} + + const rest = last.value.slice(1); + let marker: InsertionInfo['marker']; + if (rest.length === 0) {marker = '+';} + else if (/^\d+$/.test(rest)) {marker = { kind: 'indexed', index: Number(rest) };} + else {marker = { kind: 'keyed', key: rest };} + + // Strip the deepest segment from the path. + const parentPath: OcPath = { + file: path.file, + ...(last.slot !== 'section' && path.section !== undefined ? { section: path.section } : {}), + ...(last.slot !== 'item' && path.item !== undefined ? { item: path.item } : {}), + ...(last.slot !== 'field' && path.field !== undefined ? { field: path.field } : {}), + ...(path.session !== undefined ? { session: path.session } : {}), + }; + return { parentPath, marker }; +} + +// ---------- Universal resolve ---------------------------------------------- + +/** + * Resolve an `OcPath` against any AST. Returns a kind-agnostic match + * shape or `null` when the path doesn't resolve. + * + * Insertion-marker paths return `{kind: 'insertion-point', container}` + * if the parent is a valid container; otherwise `null`. + */ +export function resolveOcPath(ast: OcAst, path: OcPath): OcMatch | null { + // Wildcard guard: `resolveOcPath` is the single-match verb. Wildcards + // belong to `findOcPaths` (multi-match). Throw with a structured code + // (consistent with `setOcPath`'s `wildcard-not-allowed` discriminator) + // — silent `null` here is indistinguishable from "path doesn't + // resolve", so consumers couldn't tell whether they should switch to + // findOcPaths or accept the address as missing. + if (hasWildcard(path)) { + throw new OcPathError( + `resolveOcPath received a wildcard pattern; use findOcPaths instead: ${formatOcPath(path)}`, + formatOcPath(path), + 'OC_PATH_WILDCARD_IN_RESOLVE', + ); + } + const insertion = detectInsertion(path); + if (insertion !== null) { + return resolveInsertion(ast, insertion); + } + + switch (ast.kind) { + case 'md': + return resolveMdToUniversal(ast, path); + case 'jsonc': + return resolveJsoncToUniversal(ast, path); + case 'jsonl': + return resolveJsonlToUniversal(ast, path); + case 'yaml': + return resolveYamlToUniversal(ast, path); + } + return null; +} + +function resolveYamlToUniversal(ast: YamlAst, path: OcPath): OcMatch | null { + const m = resolveYamlOcPath(ast, path); + if (m === null) {return null;} + if (m.kind === 'root') {return { kind: 'root', ast, line: 1 };} + // Walk the AST one more time to extract the matched node's range + // — the per-kind YamlOcPathMatch shape doesn't surface it directly. + // Cheap relative to the resolve cost; trades CPU for type cleanliness. + const line = locateYamlLine(ast, path); + if (m.kind === 'map') {return { kind: 'node', descriptor: 'yaml-map', line };} + if (m.kind === 'seq') {return { kind: 'node', descriptor: 'yaml-seq', line };} + if (m.kind === 'scalar' || m.kind === 'pair') { + const v = m.value; + if (v === null) {return { kind: 'leaf', valueText: 'null', leafType: 'null', line };} + if (typeof v === 'string') {return { kind: 'leaf', valueText: v, leafType: 'string', line };} + if (typeof v === 'number') {return { kind: 'leaf', valueText: String(v), leafType: 'number', line };} + if (typeof v === 'boolean') {return { kind: 'leaf', valueText: String(v), leafType: 'boolean', line };} + // Anything else (Date / BigInt / collection) — JSON-stringify so we + // don't end up with `[object Object]` in the leaf text. Falls back + // to literal "null" if JSON.stringify yields undefined. + const valueText = JSON.stringify(v) ?? 'null'; + return { kind: 'leaf', valueText, leafType: 'string', line }; + } + return null; +} + +function locateYamlLine(ast: YamlAst, path: OcPath): number { + // Re-walk the yaml CST to find the matched node's byte range, then + // convert via the AST's `lineCounter`. Quote-aware split + unquote so + // a quoted segment containing `.` survives as a single key (matches + // `resolveYamlOcPath`'s lookup behavior; without this a key like + // `"github.com/foo"` would shred and the line locator would fall back + // to line 1 silently). + const segments: string[] = []; + const collect = (slot: string | undefined) => { + if (slot === undefined) {return;} + for (const sub of splitRespectingBrackets(slot, '.')) { + segments.push(isQuotedSeg(sub) ? unquoteSeg(sub) : sub); + } + }; + collect(path.section); + collect(path.item); + collect(path.field); + if (segments.length === 0) {return 1;} + let node: unknown = ast.doc.contents; + for (const seg of segments) { + if (node === null || node === undefined) {return 1;} + const n = node as { items?: unknown[] }; + if (Array.isArray(n.items)) { + // Map or seq. + const items = n.items; + const isMap = items.length > 0 && typeof items[0] === 'object' && items[0] !== null && 'key' in (items[0]); + if (isMap) { + const pair = (items as { key: { value?: unknown }; value: unknown }[]).find((p) => { + const k = p.key !== null && typeof p.key === 'object' && 'value' in p.key ? p.key.value : p.key; + return String(k) === seg; + }); + if (pair === undefined) {return 1;} + node = pair.value; + } else { + const idx = Number(seg); + if (!Number.isInteger(idx) || idx < 0 || idx >= items.length) {return 1;} + node = items[idx]; + } + } else { + return 1; + } + } + if (node === null || typeof node !== 'object') {return 1;} + const range = (node as { range?: readonly [number, number, number] }).range; + if (range === undefined) {return 1;} + return ast.lineCounter.linePos(range[0]).line; +} + +function resolveMdToUniversal(ast: MdAst, path: OcPath): OcMatch | null { + const m = resolveMdOcPath(ast, path); + if (m === null) {return null;} + switch (m.kind) { + case 'root': + return { kind: 'root', ast, line: 1 }; + case 'frontmatter': + return { kind: 'leaf', valueText: m.node.value, leafType: 'string', line: m.node.line }; + case 'block': + return { kind: 'node', descriptor: 'md-block', line: m.node.line }; + case 'item': + return { kind: 'node', descriptor: 'md-item', line: m.node.line }; + case 'item-field': + return { kind: 'leaf', valueText: m.value, leafType: 'string', line: m.node.line }; + } + return null; +} + +function resolveJsoncToUniversal(ast: JsoncAst, path: OcPath): OcMatch | null { + const m = resolveJsoncOcPath(ast, path); + if (m === null) {return null;} + if (m.kind === 'root') {return { kind: 'root', ast, line: 1 };} + if (m.kind === 'object-entry') { + return jsoncValueToMatch(m.node.value, m.node.line); + } + // m.kind === 'value' — array element or root: line lives on the value itself. + return jsoncValueToMatch(m.node, m.node.line ?? 1); +} + +function jsoncValueToMatch(value: JsoncValue, line: number): OcMatch { + switch (value.kind) { + case 'object': + return { kind: 'node', descriptor: 'jsonc-object', line }; + case 'array': + return { kind: 'node', descriptor: 'jsonc-array', line }; + case 'string': + return { kind: 'leaf', valueText: value.value, leafType: 'string', line }; + case 'number': + return { kind: 'leaf', valueText: String(value.value), leafType: 'number', line }; + case 'boolean': + return { kind: 'leaf', valueText: String(value.value), leafType: 'boolean', line }; + case 'null': + return { kind: 'leaf', valueText: 'null', leafType: 'null', line }; + } + throw new Error(`unreachable: jsoncValueToMatch kind`); +} + +function resolveJsonlToUniversal(ast: JsonlAst, path: OcPath): OcMatch | null { + const m = resolveJsonlOcPath(ast, path); + if (m === null) {return null;} + if (m.kind === 'root') {return { kind: 'root', ast, line: 1 };} + if (m.kind === 'line') {return { kind: 'node', descriptor: 'jsonl-line', line: m.node.line };} + // Inside-line jsonc parser starts numbering at 1 for each jsonl + // line, so `m.node.line` would always be 1 for any jsonl-resolved + // match. Use `m.line` (the JsonlLine's file-level line) — by + // construction every inside-line node sits on the same file line. + if (m.kind === 'object-entry') {return jsoncValueToMatch(m.node.value, m.line);} + return jsoncValueToMatch(m.node, m.line); +} + +function resolveInsertion(ast: OcAst, info: InsertionInfo): OcMatch | null { + // For an insertion to be valid the parent must resolve to a container + // we know how to extend. Inspect the parent. + switch (ast.kind) { + case 'md': + return resolveMdInsertion(ast, info); + case 'jsonc': + return resolveJsoncInsertion(ast, info); + case 'jsonl': + return resolveJsonlInsertion(ast, info); + case 'yaml': + return resolveYamlInsertion(ast, info); + } + return null; +} + +function resolveYamlInsertion(ast: YamlAst, info: InsertionInfo): OcMatch | null { + const m = resolveYamlOcPath(ast, info.parentPath); + if (m === null) {return null;} + const line = locateYamlLine(ast, info.parentPath); + if (m.kind === 'map') {return { kind: 'insertion-point', container: 'yaml-map', line };} + if (m.kind === 'seq') {return { kind: 'insertion-point', container: 'yaml-seq', line };} + if (m.kind === 'root') { + // Top-level: inspect the document root. + const root = ast.doc.contents; + if (root === null) {return null;} + if ('items' in (root as object)) { + const isMapLike = (root as { items: { key?: unknown }[] }).items.every((p) => 'key' in p); + return { kind: 'insertion-point', container: isMapLike ? 'yaml-map' : 'yaml-seq', line: 1 }; + } + return null; + } + return null; +} + +function resolveMdInsertion(ast: MdAst, info: InsertionInfo): OcMatch | null { + const p = info.parentPath; + // oc://FILE/+ → file-root insertion (new section) + if (p.section === undefined) { + return { kind: 'insertion-point', container: 'md-file', line: 1 }; + } + // oc://FILE/[frontmatter]/+key → frontmatter add + if (p.section === '[frontmatter]') { + return { kind: 'insertion-point', container: 'md-frontmatter', line: 1 }; + } + // oc://FILE/section/+ → append item to section + if (p.item === undefined && p.field === undefined) { + const m = resolveMdOcPath(ast, p); + if (m === null || m.kind !== 'block') {return null;} + return { kind: 'insertion-point', container: 'md-section', line: m.node.line }; + } + return null; +} + +function resolveJsoncInsertion(ast: JsoncAst, info: InsertionInfo): OcMatch | null { + const m = resolveJsoncOcPath(ast, info.parentPath); + if (m === null) {return null;} + let containerNode: JsoncValue; + if (m.kind === 'root') { + if (ast.root === null) {return null;} + containerNode = ast.root; + } else if (m.kind === 'object-entry') { + containerNode = m.node.value; + } else { + containerNode = m.node; + } + const line = containerNode.line ?? 1; + if (containerNode.kind === 'object') { + return { kind: 'insertion-point', container: 'jsonc-object', line }; + } + if (containerNode.kind === 'array') { + return { kind: 'insertion-point', container: 'jsonc-array', line }; + } + return null; +} + +function resolveJsonlInsertion(ast: JsonlAst, info: InsertionInfo): OcMatch | null { + // jsonl insertion only makes sense at the file level: `oc://FILE/+`. + if (info.parentPath.section !== undefined) {return null;} + // The only insertion point for jsonl is "after the last line" — the + // line surfaced is `lastLine + 1` so consumers can render correctly. + const lastLine = ast.lines.length > 0 ? ast.lines[ast.lines.length - 1].line : 0; + return { kind: 'insertion-point', container: 'jsonl-file', line: lastLine + 1 }; +} + +// ---------- Universal set -------------------------------------------------- + +/** + * Replace or insert at `path` with `value` (always a string). + * Substrate dispatches via `ast.kind` and coerces value at leaves + * based on the existing AST shape at the path location. + * + * For insertion-marker paths (`+`, `+key`, `+nnn`) the value is parsed + * as kind-appropriate content (JSON for jsonc/jsonl; plain text for md). + * + * Returns a structured result; never throws on parser-tolerated input. + * Sentinel-guard violations DO throw `OcEmitSentinelError` (defense in + * depth — refuse to write redacted content even when caller "asked"). + */ +export function setOcPath(ast: OcAst, path: OcPath, value: string): SetResult { + // Wildcard guard: `setOcPath` writes a single concrete leaf. A pattern + // would be ambiguous (which match wins?) so we reject early. Callers + // who want multi-set should `findOcPaths(...)` then `setOcPath` per + // resolved path — the explicit loop is the right shape. + if (hasWildcard(path)) { + return { + ok: false, + reason: 'wildcard-not-allowed', + detail: 'setOcPath requires a concrete path; use findOcPaths to enumerate matches first', + }; + } + const insertion = detectInsertion(path); + if (insertion !== null) { + return setInsertion(ast, insertion, value); + } + + switch (ast.kind) { + case 'md': + return setMdLeaf(ast, path, value); + case 'jsonc': + return setJsoncLeaf(ast, path, value); + case 'jsonl': + return setJsonlLeaf(ast, path, value); + case 'yaml': + return setYamlLeaf(ast, path, value); + } + throw new Error(`unreachable: setOcPath kind`); +} + +function setYamlLeaf(ast: YamlAst, path: OcPath, value: string): SetResult { + const existing = resolveYamlOcPath(ast, path); + if (existing === null) {return { ok: false, reason: 'unresolved' };} + if (existing.kind === 'root') { + return { ok: false, reason: 'not-writable', detail: 'root replacement not supported via setOcPath' }; + } + // Coerce value based on existing scalar type. + let coerced: unknown = value; + if (existing.kind === 'scalar' || existing.kind === 'pair') { + const cur = existing.value; + if (typeof cur === 'number') { + const n = Number(value); + if (!Number.isFinite(n)) {return { ok: false, reason: 'parse-error' };} + coerced = n; + } else if (typeof cur === 'boolean') { + if (value === 'true') {coerced = true;} + else if (value === 'false') {coerced = false;} + else {return { ok: false, reason: 'parse-error' };} + } else if (cur === null && value !== 'null') { + return { ok: false, reason: 'parse-error' }; + } else if (cur === null && value === 'null') { + coerced = null; + } + } + const r = setYamlOcPath(ast, path, coerced); + if (r.ok) {return { ok: true, ast: r.ast };} + return { ok: false, reason: r.reason }; +} + +function setMdLeaf(ast: MdAst, path: OcPath, value: string): SetResult { + const r = setMdOcPath(ast, path, value); + if (r.ok) {return { ok: true, ast: r.ast };} + return { ok: false, reason: r.reason }; +} + +function setJsoncLeaf(ast: JsoncAst, path: OcPath, value: string): SetResult { + // Inspect the existing leaf to determine target type for coercion. + const existing = resolveJsoncOcPath(ast, path); + if (existing === null) {return { ok: false, reason: 'unresolved' };} + if (existing.kind === 'root') { + return { ok: false, reason: 'not-writable', detail: 'root replacement is not supported via setOcPath' }; + } + const leafValue = existing.kind === 'object-entry' ? existing.node.value : existing.node; + const coerced = coerceJsoncLeaf(value, leafValue); + if (coerced === null) { + return { ok: false, reason: 'parse-error', detail: `cannot coerce "${value}" to ${leafValue.kind}` }; + } + const r = setJsoncOcPath(ast, path, coerced); + if (r.ok) {return { ok: true, ast: r.ast };} + return { ok: false, reason: r.reason }; +} + +function setJsonlLeaf(ast: JsonlAst, path: OcPath, value: string): SetResult { + const existing = resolveJsonlOcPath(ast, path); + if (existing === null) {return { ok: false, reason: 'unresolved' };} + if (existing.kind === 'root') { + return { ok: false, reason: 'not-writable', detail: 'root replacement is not supported via setOcPath' }; + } + if (existing.kind === 'line') { + // Replacing a whole line — value should be JSON. + const parsed = tryParseJson(value); + if (parsed === undefined) { + return { ok: false, reason: 'parse-error', detail: `line replacement requires JSON value` }; + } + const r = setJsonlOcPath(ast, path, jsonToJsoncValue(parsed)); + if (r.ok) {return { ok: true, ast: r.ast };} + return { ok: false, reason: r.reason }; + } + // Field on a line — leaf coercion. + const leafValue = existing.kind === 'object-entry' ? existing.node.value : existing.node; + const coerced = coerceJsoncLeaf(value, leafValue); + if (coerced === null) { + return { ok: false, reason: 'parse-error', detail: `cannot coerce "${value}" to ${leafValue.kind}` }; + } + const r = setJsonlOcPath(ast, path, coerced); + if (r.ok) {return { ok: true, ast: r.ast };} + return { ok: false, reason: r.reason }; +} + +function setInsertion(ast: OcAst, info: InsertionInfo, value: string): SetResult { + switch (ast.kind) { + case 'md': + return setMdInsertion(ast, info, value); + case 'jsonc': + return setJsoncInsertion(ast, info, value); + case 'jsonl': + return setJsonlInsertion(ast, info, value); + case 'yaml': + return setYamlInsertion(ast, info, value); + } + throw new Error(`unreachable: setInsertion kind`); +} + +function setYamlInsertion(ast: YamlAst, info: InsertionInfo, value: string): SetResult { + // YAML insertion accepts a JSON-shaped value string (so callers can + // insert structured nodes uniformly). For simple scalars the JSON + // form `"foo"` / `42` / `true` works; complex shapes use objects. + const parsed = tryParseJson(value); + if (parsed === undefined) { + return { ok: false, reason: 'parse-error', detail: 'yaml insertion requires JSON value' }; + } + const r = insertYamlOcPath(ast, info.parentPath, info.marker, parsed); + if (r.ok) {return { ok: true, ast: r.ast };} + return { ok: false, reason: r.reason }; +} + +function setMdInsertion(ast: MdAst, info: InsertionInfo, value: string): SetResult { + const p = info.parentPath; + // file-level: append a section. Value is the heading text; body empty. + if (p.section === undefined) { + if (info.marker !== '+') { + return { ok: false, reason: 'not-writable', detail: 'md file-level insertion uses bare `+`' }; + } + const newAst: MdAst = { + ...ast, + blocks: [ + ...ast.blocks, + { + heading: value, + slug: slugifyHeading(value), + line: 0, + bodyText: '', + items: [], + tables: [], + codeBlocks: [], + }, + ], + }; + return { ok: true, ast: rebuildMdRaw(newAst) }; + } + + // [frontmatter] — keyed insertion only + if (p.section === '[frontmatter]') { + if (typeof info.marker !== 'object' || info.marker.kind !== 'keyed') { + return { ok: false, reason: 'not-writable', detail: 'md frontmatter insertion requires +key' }; + } + const key = info.marker.key; + if (ast.frontmatter.some((e) => e.key === key)) { + return { ok: false, reason: 'type-mismatch', detail: `frontmatter key '${key}' already exists; use set, not insert` }; + } + const newAst: MdAst = { + ...ast, + frontmatter: [...ast.frontmatter, { key, value, line: 0 }], + }; + return { ok: true, ast: rebuildMdRaw(newAst) }; + } + + // section-level: append item. Value can be `key: value` (kv) or plain text. + if (p.item === undefined && p.field === undefined) { + if (info.marker !== '+') { + return { ok: false, reason: 'not-writable', detail: 'md section insertion uses bare `+`' }; + } + const blockIdx = ast.blocks.findIndex((b) => b.slug === p.section!.toLowerCase()); + if (blockIdx === -1) {return { ok: false, reason: 'unresolved' };} + const block = ast.blocks[blockIdx]; + const kvMatch = /^([^:]+?)\s*:\s*(.+)$/.exec(value); + const itemLine = `- ${value}`; + const newItem = { + text: value, + slug: slugifyHeading(kvMatch ? kvMatch[1] : value), + line: 0, + ...(kvMatch !== null + ? { kv: { key: kvMatch[1].trim(), value: kvMatch[2].trim() } } + : {}), + }; + const newBodyText = block.bodyText.length === 0 + ? itemLine + : block.bodyText.replace(/\n*$/, '\n') + itemLine; + const newBlocks = ast.blocks.slice(); + newBlocks[blockIdx] = { + ...block, + items: [...block.items, newItem], + bodyText: newBodyText, + }; + return { ok: true, ast: rebuildMdRaw({ ...ast, blocks: newBlocks }) }; + } + + return { ok: false, reason: 'not-writable' }; +} + +function setJsoncInsertion(ast: JsoncAst, info: InsertionInfo, value: string): SetResult { + const containerMatch = resolveJsoncInsertion(ast, info); + if (containerMatch === null) {return { ok: false, reason: 'unresolved' };} + + const parsed = tryParseJson(value); + if (parsed === undefined) { + return { ok: false, reason: 'parse-error', detail: 'jsonc insertion requires JSON value' }; + } + const newJsoncValue = jsonToJsoncValue(parsed); + + if (containerMatch.kind !== 'insertion-point') {return { ok: false, reason: 'unresolved' };} + + if (containerMatch.container === 'jsonc-array') { + // index `+0` valid; bare `+` appends; `+key` rejected. + if (typeof info.marker === 'object' && info.marker.kind === 'keyed') { + return { ok: false, reason: 'type-mismatch', detail: 'cannot insert by key into array' }; + } + return mutateJsoncContainer(ast, info.parentPath, (container) => { + if (container.kind !== 'array') {return null;} + const items = container.items.slice(); + if (info.marker === '+') { + items.push(newJsoncValue); + } else if (typeof info.marker === 'object' && info.marker.kind === 'indexed') { + const idx = Math.min(info.marker.index, items.length); + items.splice(idx, 0, newJsoncValue); + } + return { + kind: 'array', + items, + ...(container.line !== undefined ? { line: container.line } : {}), + }; + }); + } + + // jsonc-object + if (typeof info.marker !== 'object' || info.marker.kind !== 'keyed') { + return { ok: false, reason: 'type-mismatch', detail: 'jsonc object insertion requires +key' }; + } + const key = info.marker.key; + return mutateJsoncContainer(ast, info.parentPath, (container) => { + if (container.kind !== 'object') {return null;} + if (container.entries.some((e) => e.key === key)) {return null;} // duplicate + const newEntry: JsoncEntry = { key, value: newJsoncValue, line: 0 }; + return { + kind: 'object', + entries: [...container.entries, newEntry], + ...(container.line !== undefined ? { line: container.line } : {}), + }; + }); +} + +function setJsonlInsertion(ast: JsonlAst, info: InsertionInfo, value: string): SetResult { + if (info.parentPath.section !== undefined || info.marker !== '+') { + return { ok: false, reason: 'not-writable', detail: 'jsonl insertion only supports oc://FILE/+ append' }; + } + const parsed = tryParseJson(value); + if (parsed === undefined) { + return { ok: false, reason: 'parse-error', detail: 'jsonl line append requires JSON value' }; + } + return { ok: true, ast: appendJsonlLine(ast, jsonToJsoncValue(parsed)) }; +} + +// ---------- Internal helpers ----------------------------------------------- + +function coerceJsoncLeaf(valueText: string, existing: JsoncValue): JsoncValue | null { + // Preserve the existing source line on coerced replacements — the + // semantic node is the same; only its bytes change. + const lineExt = existing.line !== undefined ? { line: existing.line } : {}; + if (existing.kind === 'string') {return { kind: 'string', value: valueText, ...lineExt };} + if (existing.kind === 'number') { + const n = Number(valueText); + return Number.isFinite(n) ? { kind: 'number', value: n, ...lineExt } : null; + } + if (existing.kind === 'boolean') { + if (valueText === 'true') {return { kind: 'boolean', value: true, ...lineExt };} + if (valueText === 'false') {return { kind: 'boolean', value: false, ...lineExt };} + return null; + } + if (existing.kind === 'null') { + return valueText === 'null' ? { kind: 'null', ...lineExt } : null; + } + // Object/array leaf — caller should use insertion or full-replace path. + return null; +} + +function tryParseJson(value: string): unknown { + try { + return JSON.parse(value); + } catch { + return undefined; + } +} + +function jsonToJsoncValue(v: unknown): JsoncValue { + // Synthetic values omit `line` (optional in the type) — the parser + // alone is the source of truth for line metadata. Insertions / + // mutations get the parent's line for surfacing in lint findings. + if (v === null) {return { kind: 'null' };} + if (typeof v === 'string') {return { kind: 'string', value: v };} + if (typeof v === 'number') {return { kind: 'number', value: v };} + if (typeof v === 'boolean') {return { kind: 'boolean', value: v };} + if (Array.isArray(v)) {return { kind: 'array', items: v.map(jsonToJsoncValue) };} + if (typeof v === 'object') { + const obj = v as Record; + return { + kind: 'object', + entries: Object.entries(obj).map(([key, value]) => ({ + key, + value: jsonToJsoncValue(value), + line: 0, + })), + }; + } + // Unsupported (undefined / function / symbol). JSON.parse never produces these. + throw new Error(`unsupported JSON value type: ${typeof v}`); +} + +function mutateJsoncContainer( + ast: JsoncAst, + parentPath: OcPath, + mutate: (container: JsoncValue) => JsoncValue | null, +): SetResult { + if (ast.root === null) {return { ok: false, reason: 'no-root' };} + + // Quote-aware split so JSONC insertion under a key containing + // `/`, `.`, or other special chars works through the parent path. + // `resolveJsoncOcPath` validates with quote-aware splitting; the + // mutation walker MUST use the same predicate or insertion validity + // can be reported and then fail as unresolved. + const segments: string[] = []; + if (parentPath.section !== undefined) {segments.push(...splitRespectingBrackets(parentPath.section, '.'));} + if (parentPath.item !== undefined) {segments.push(...splitRespectingBrackets(parentPath.item, '.'));} + if (parentPath.field !== undefined) {segments.push(...splitRespectingBrackets(parentPath.field, '.'));} + + const newRoot = segments.length === 0 + ? mutate(ast.root) + : mutateAt(ast.root, segments, 0, mutate); + if (newRoot === null) {return { ok: false, reason: 'unresolved' };} + + const next: JsoncAst = { kind: 'jsonc', raw: '', root: newRoot }; + return { ok: true, ast: { ...next, raw: emitJsonc(next, { mode: 'render' }) } }; +} + +function mutateAt( + current: JsoncValue, + segments: readonly string[], + i: number, + mutate: (container: JsoncValue) => JsoncValue | null, +): JsoncValue | null { + const seg = segments[i]; + if (seg === undefined) {return mutate(current);} + if (seg.length === 0) {return null;} + + if (current.kind === 'object') { + // Match `setJsoncOcPath`'s lookup: AST entry keys are unquoted, + // so strip quoting from the path segment before comparing. + const lookupKey = isQuotedSeg(seg) ? unquoteSeg(seg) : seg; + const idx = current.entries.findIndex((e) => e.key === lookupKey); + if (idx === -1) {return null;} + const child = current.entries[idx]; + const replaced = mutateAt(child.value, segments, i + 1, mutate); + if (replaced === null) {return null;} + const newEntries = current.entries.slice(); + newEntries[idx] = { ...child, value: replaced }; + return { + kind: 'object', + entries: newEntries, + ...(current.line !== undefined ? { line: current.line } : {}), + }; + } + if (current.kind === 'array') { + const idx = Number(seg); + if (!Number.isInteger(idx) || idx < 0 || idx >= current.items.length) {return null;} + const child = current.items[idx]; + const replaced = mutateAt(child, segments, i + 1, mutate); + if (replaced === null) {return null;} + const newItems = current.items.slice(); + newItems[idx] = replaced; + return { + kind: 'array', + items: newItems, + ...(current.line !== undefined ? { line: current.line } : {}), + }; + } + return null; +} + +function rebuildMdRaw(ast: MdAst): MdAst { + const parts: string[] = []; + if (ast.frontmatter.length > 0) { + parts.push('---'); + for (const fm of ast.frontmatter) { + parts.push(`${fm.key}: ${formatFrontmatterValue(fm.value)}`); + } + parts.push('---'); + } + if (ast.preamble.length > 0) { + if (parts.length > 0) {parts.push('');} + parts.push(ast.preamble); + } + for (const block of ast.blocks) { + if (parts.length > 0) {parts.push('');} + parts.push(`## ${block.heading}`); + if (block.bodyText.length > 0) {parts.push(block.bodyText);} + } + // Suppress unused — emitJsonl is imported for symmetry but only emitJsonc + // is used in the jsonc mutation helper. + void emitJsonl; + return { ...ast, raw: parts.join('\n') }; +} + +function formatFrontmatterValue(value: string): string { + if (value.length === 0) {return '""';} + if (/[:#&*?|<>=!%@`,[\]{}\r\n]/.test(value)) { + return JSON.stringify(value); + } + return value; +} + +function slugifyHeading(s: string): string { + return s.toLowerCase().trim().replace(/[^a-z0-9]+/g, '-').replace(/^-+|-+$/g, ''); +} diff --git a/src/oc-path/yaml/ast.ts b/src/oc-path/yaml/ast.ts new file mode 100644 index 00000000000..6141738824c --- /dev/null +++ b/src/oc-path/yaml/ast.ts @@ -0,0 +1,37 @@ +/** + * YAML AST types — wraps the `yaml` library's Document model so the + * substrate can address YAML nodes via `OcPath` while preserving the + * authoring shape (comments, anchors, etc.) for round-trip emit. + * + * **Per-kind discriminator**: `kind: 'yaml'` matches the md / jsonc / + * jsonl pattern. The universal `setOcPath` / `resolveOcPath` dispatch + * via `ast.kind`. + * + * **Byte-fidelity**: `raw` is preserved on the root for round-trip + * emit. The internal `doc` is the parsed `yaml.Document` from the + * `yaml` package — comment-preserving, anchor-aware. + * + * Lobster `.lobster` files (workflow specs) and `.craft/waves/*.yaml` + * (craft system) both flow through this kind. + * + * @module @openclaw/oc-path/yaml/ast + */ + +import type { Document, LineCounter } from 'yaml'; + +/** The root YAML AST. `raw` round-trips byte-identical via emit. */ +export interface YamlAst { + readonly kind: 'yaml'; + readonly raw: string; + /** + * Parsed `yaml.Document` — wraps the comment-preserving CST model. + */ + readonly doc: Document.Parsed; + /** + * `LineCounter` from the `yaml` package. Pass a node's `range[0]` + * (byte offset) to `lineCounter.linePos(offset)` to get + * `{ line, col }` (1-based). Lint rules use this to surface accurate + * line numbers in findings instead of hardcoding `line: 1`. + */ + readonly lineCounter: LineCounter; +} diff --git a/src/oc-path/yaml/edit.ts b/src/oc-path/yaml/edit.ts new file mode 100644 index 00000000000..08faeae2d31 --- /dev/null +++ b/src/oc-path/yaml/edit.ts @@ -0,0 +1,236 @@ +/** + * Mutate a `YamlAst` at an OcPath. Returns a new AST with the value + * replaced. + * + * Implementation uses `doc.setIn(path, value)` from the `yaml` package + * — comment-preserving on edit. Adding a new key does NOT preserve + * surrounding formatting verbatim (the `yaml` library handles + * pretty-printing); for byte-exact preservation use round-trip emit + * on unmodified ASTs. + * + * @module @openclaw/oc-path/yaml/edit + */ + +import { + Document, + isMap, + isScalar, + isSeq, + LineCounter, + parseDocument, + type Node, + type Pair, +} from 'yaml'; +import type { OcPath } from '../oc-path.js'; +import { + isPositionalSeg, + isQuotedSeg, + resolvePositionalSeg, + splitRespectingBrackets, + unquoteSeg, +} from '../oc-path.js'; +import type { YamlAst } from './ast.js'; + +export type YamlEditResult = + | { readonly ok: true; readonly ast: YamlAst } + | { + readonly ok: false; + readonly reason: 'unresolved' | 'no-root' | 'parse-error'; + }; + +export function setYamlOcPath( + ast: YamlAst, + path: OcPath, + newValue: unknown, +): YamlEditResult { + if (ast.doc.contents === null) {return { ok: false, reason: 'no-root' };} + + const rawSegments = pathSegments(path); + if (rawSegments.length === 0) { + return { ok: false, reason: 'unresolved' }; + } + + // Resolve positional tokens ($first / $last / -N) against the actual + // map keys / seq sizes BEFORE handing the segments to the yaml lib — + // otherwise `hasIn(['$last'])` treats the token as a literal map key + // and silently unresolves, producing a write↔read asymmetry with + // resolveYamlOcPath (which honors positional tokens at lookup). + const segments = resolvePositionalSegments(ast.doc.contents as Node, rawSegments); + if (segments === null) {return { ok: false, reason: 'unresolved' };} + + // Verify the path resolves before mutating — `setIn` would create + // missing intermediate nodes which is insertion semantics, not set. + if (!ast.doc.hasIn(segments)) { + return { ok: false, reason: 'unresolved' }; + } + + // Clone the document so the original AST is unchanged. + const { doc: cloned, lineCounter } = cloneDoc(ast.doc); + cloned.setIn(segments, newValue); + return { ok: true, ast: { kind: 'yaml', raw: cloned.toString(), doc: cloned, lineCounter } }; +} + +/** + * Append-style insertion: add a new key to a map or push to a seq at + * `path`. Used by the universal `setOcPath` when the path carries a + * `+` / `+key` / `+nnn` insertion marker. + */ +export function insertYamlOcPath( + ast: YamlAst, + parentPath: OcPath, + marker: '+' | { kind: 'keyed'; key: string } | { kind: 'indexed'; index: number }, + newValue: unknown, +): YamlEditResult { + if (ast.doc.contents === null) {return { ok: false, reason: 'no-root' };} + + const rawParentSegments = pathSegments(parentPath); + // Resolve positional tokens against the live document before walking + // — same rationale as setYamlOcPath; `getIn(['$last'])` would treat + // the token as a literal key and miss the actual last child. + const segments = + rawParentSegments.length === 0 + ? rawParentSegments + : resolvePositionalSegments(ast.doc.contents as Node, rawParentSegments); + if (segments === null) {return { ok: false, reason: 'unresolved' };} + const { doc: cloned, lineCounter } = cloneDoc(ast.doc); + + // Find the parent node. + const parent = segments.length === 0 ? cloned.contents : cloned.getIn(segments, false); + if (parent === undefined || parent === null) {return { ok: false, reason: 'unresolved' };} + + // Map insertion → keyed + if (typeof parent === 'object' && 'items' in parent && Array.isArray((parent as { items: unknown[] }).items)) { + const items = (parent as { items: { key?: unknown }[] }).items; + // Array#every() already returns true on an empty array — no need + // for the explicit length === 0 short-circuit. + const isMapLike = items.every((p) => 'key' in p); + + if (isMapLike) { + if (typeof marker !== 'object' || marker.kind !== 'keyed') { + return { ok: false, reason: 'unresolved' }; + } + // Reject duplicate + if (cloned.hasIn([...segments, marker.key])) { + return { ok: false, reason: 'unresolved' }; + } + cloned.setIn([...segments, marker.key], newValue); + return { ok: true, ast: { kind: 'yaml', raw: cloned.toString(), doc: cloned, lineCounter } }; + } + + // Seq insertion + if (typeof marker === 'object' && marker.kind === 'keyed') { + return { ok: false, reason: 'unresolved' }; + } + const seqItems = items as unknown[]; + if (marker === '+') { + cloned.addIn(segments, newValue); + } else if (typeof marker === 'object' && marker.kind === 'indexed') { + const idx = Math.min(marker.index, seqItems.length); + const current = cloned.getIn(segments) as unknown[] | undefined; + if (!Array.isArray(current)) {return { ok: false, reason: 'unresolved' };} + const newArr = [...current]; + newArr.splice(idx, 0, newValue); + cloned.setIn(segments, newArr); + } + return { ok: true, ast: { kind: 'yaml', raw: cloned.toString(), doc: cloned, lineCounter } }; + } + + return { ok: false, reason: 'unresolved' }; +} + +/** + * Walk `segments` against the live document, replacing each positional + * token (`$first` / `$last` / `-N`) with the concrete key (for maps) or + * index (for seqs) at that depth. Returns `null` if a positional token + * targets a missing or non-container node — caller treats that as + * `unresolved` and refuses to write. + * + * Mirrors `positionalForYaml` in resolve.ts so read and write agree on + * which child each token names. + */ +function resolvePositionalSegments( + root: Node, + segments: readonly string[], +): string[] | null { + const out: string[] = []; + let node: Node | null = root; + for (const seg of segments) { + if (node === null) {return null;} + let segNorm = seg; + if (isPositionalSeg(seg)) { + const concrete = positionalForYamlNode(node, seg); + if (concrete === null) {return null;} + segNorm = concrete; + } + out.push(segNorm); + if (isMap(node)) { + const pairs: readonly Pair[] = (node as { items: readonly Pair[] }).items; + const pair: Pair | undefined = pairs.find((p) => { + const k = isScalar(p.key) ? p.key.value : p.key; + return String(k) === segNorm; + }); + node = (pair?.value as Node | undefined) ?? null; + continue; + } + if (isSeq(node)) { + const idx = Number(segNorm); + if (!Number.isInteger(idx) || idx < 0 || idx >= node.items.length) {return null;} + node = (node.items[idx] as Node | null) ?? null; + continue; + } + // Scalar — we still emit the literal segment so the next-step + // hasIn check sees the same shape and fails cleanly with + // `unresolved`. Don't try to descend further. + node = null; + } + return out; +} + +function positionalForYamlNode(node: Node, seg: string): string | null { + if (isMap(node)) { + const pairs: readonly Pair[] = (node as { items: readonly Pair[] }).items; + const keys: readonly string[] = pairs.map((p) => + String(isScalar(p.key) ? p.key.value : p.key), + ); + return resolvePositionalSeg(seg, { indexable: false, size: keys.length, keys }); + } + if (isSeq(node)) { + const items: readonly Node[] = (node as { items: readonly Node[] }).items; + return resolvePositionalSeg(seg, { indexable: true, size: items.length }); + } + return null; +} + +function pathSegments(path: OcPath): string[] { + // Quote-aware split + unquote so YAML edit matches `resolveYamlOcPath`'s + // lookup behavior. A quoted segment carrying `/` or `.` (e.g. + // `"a/b"`) survives as a single segment, then gets stripped of + // its surrounding quotes for the actual `getIn` / `setIn` key + // comparison. Plain `.split('.')` would shred quoted keys and + // produce silent resolve↔write asymmetry. + const segs: string[] = []; + const collect = (slot: string | undefined) => { + if (slot === undefined) {return;} + for (const sub of splitRespectingBrackets(slot, '.')) { + segs.push(isQuotedSeg(sub) ? unquoteSeg(sub) : sub); + } + }; + collect(path.section); + collect(path.item); + collect(path.field); + return segs; +} + +function cloneDoc(doc: Document.Parsed): { doc: Document.Parsed; lineCounter: LineCounter } { + // Round-trip via toString → parseDocument is the simplest comment- + // preserving clone. yaml package doesn't expose a public `clone`. + // Re-parse with a fresh LineCounter so the cloned AST has accurate + // line positions for any subsequent inspection. + const lineCounter = new LineCounter(); + const cloned = parseDocument(doc.toString(), { + keepSourceTokens: true, + prettyErrors: false, + lineCounter, + }); + return { doc: cloned, lineCounter }; +} diff --git a/src/oc-path/yaml/emit.ts b/src/oc-path/yaml/emit.ts new file mode 100644 index 00000000000..baa49b6224d --- /dev/null +++ b/src/oc-path/yaml/emit.ts @@ -0,0 +1,49 @@ +/** + * Emit a `YamlAst` to bytes. + * + * **Round-trip mode (default)** returns `ast.raw` verbatim — preserves + * comments, anchors, formatting exactly. + * + * **Render mode** uses `doc.toString()` from the `yaml` package — also + * comment-preserving, but normalizes whitespace per the package's + * options. + * + * **Sentinel guard**: scans every emitted byte sequence for the + * `__OPENCLAW_REDACTED__` literal. + * + * @module @openclaw/oc-path/yaml/emit + */ + +import { OcEmitSentinelError, REDACTED_SENTINEL } from '../sentinel.js'; +import type { YamlAst } from './ast.js'; + +export interface YamlEmitOptions { + readonly mode?: 'roundtrip' | 'render'; + readonly fileNameForGuard?: string; + /** + * See `JsoncEmitOptions.acceptPreExistingSentinel` for the rationale. + * Default `true` — round-trip echoes parsed bytes without scanning. + * Render mode always scans the rendered output (callers can inject + * sentinels via setYamlOcPath, so render-time scan is mandatory). + */ + readonly acceptPreExistingSentinel?: boolean; +} + +export function emitYaml(ast: YamlAst, opts: YamlEmitOptions = {}): string { + const mode = opts.mode ?? 'roundtrip'; + const guardPath = opts.fileNameForGuard ? `oc://${opts.fileNameForGuard}` : 'oc://'; + const acceptPreExisting = opts.acceptPreExistingSentinel ?? true; + + if (mode === 'roundtrip') { + if (!acceptPreExisting && ast.raw.includes(REDACTED_SENTINEL)) { + throw new OcEmitSentinelError(`${guardPath}/[raw]`); + } + return ast.raw; + } + + const rendered = ast.doc.toString(); + if (rendered.includes(REDACTED_SENTINEL)) { + throw new OcEmitSentinelError(`${guardPath}/[rendered]`); + } + return rendered; +} diff --git a/src/oc-path/yaml/parse.ts b/src/oc-path/yaml/parse.ts new file mode 100644 index 00000000000..6a4720a91f8 --- /dev/null +++ b/src/oc-path/yaml/parse.ts @@ -0,0 +1,48 @@ +/** + * YAML parser — wraps `yaml.parseDocument` for comment-preserving CST + * + structured access. Soft-error policy: never throws on + * parser-tolerated input; recoverable problems surface as diagnostics. + * + * @module @openclaw/oc-path/yaml/parse + */ + +import { LineCounter, parseDocument } from 'yaml'; +import type { Diagnostic } from '../ast.js'; +import type { YamlAst } from './ast.js'; + +export interface YamlParseResult { + readonly ast: YamlAst; + readonly diagnostics: readonly Diagnostic[]; +} + +/** + * Parse YAML bytes into a `YamlAst`. The `yaml` package is + * comment-preserving and reports its own warnings/errors; we surface + * those as `Diagnostic` entries. + */ +export function parseYaml(raw: string): YamlParseResult { + const lineCounter = new LineCounter(); + const doc = parseDocument(raw, { + keepSourceTokens: true, + prettyErrors: false, + lineCounter, + }); + const diagnostics: Diagnostic[] = []; + for (const w of doc.warnings) { + diagnostics.push({ + line: w.linePos?.[0]?.line ?? 1, + message: w.message, + severity: 'warning', + code: 'OC_YAML_WARN', + }); + } + for (const e of doc.errors) { + diagnostics.push({ + line: e.linePos?.[0]?.line ?? 1, + message: e.message, + severity: 'error', + code: 'OC_YAML_PARSE_FAILED', + }); + } + return { ast: { kind: 'yaml', raw, doc, lineCounter }, diagnostics }; +} diff --git a/src/oc-path/yaml/resolve.ts b/src/oc-path/yaml/resolve.ts new file mode 100644 index 00000000000..751697532cd --- /dev/null +++ b/src/oc-path/yaml/resolve.ts @@ -0,0 +1,147 @@ +/** + * Resolve an `OcPath` against a `YamlAst`. + * + * YAML's structural shape mirrors JSONC: objects (`Map`), arrays + * (`Seq`), and scalars. Addressing follows the same dotted-path + * convention used by JSONC: + * + * oc://workflow.yaml/steps.0.command → command on first step + * oc://workflow.yaml/name → top-level name + * oc://workflow.yaml/steps.+command → insertion (handled by edit) + * + * @module @openclaw/oc-path/yaml/resolve + */ + +import { isMap, isScalar, isSeq, type Node, type Pair } from 'yaml'; +import type { OcPath } from '../oc-path.js'; +import { + isPositionalSeg, + isQuotedSeg, + resolvePositionalSeg, + splitRespectingBrackets, + unquoteSeg, +} from '../oc-path.js'; +import type { YamlAst } from './ast.js'; + +export type YamlOcPathMatch = + | { readonly kind: 'root'; readonly node: YamlAst } + | { readonly kind: 'scalar'; readonly value: unknown; readonly path: readonly string[] } + | { + readonly kind: 'map'; + readonly path: readonly string[]; + } + | { + readonly kind: 'seq'; + readonly path: readonly string[]; + } + | { + readonly kind: 'pair'; + readonly key: string; + readonly value: unknown; + readonly path: readonly string[]; + }; + +export function resolveYamlOcPath( + ast: YamlAst, + path: OcPath, +): YamlOcPathMatch | null { + const segments: string[] = []; + if (path.section !== undefined) { + for (const s of splitRespectingBrackets(path.section, '.')) { + segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s); + } + } + if (path.item !== undefined) { + for (const s of splitRespectingBrackets(path.item, '.')) { + segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s); + } + } + if (path.field !== undefined) { + for (const s of splitRespectingBrackets(path.field, '.')) { + segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s); + } + } + + if (segments.length === 0) {return { kind: 'root', node: ast };} + + const root = ast.doc.contents; + if (root === null) {return null;} + + return walkNode(root, segments, 0, []); +} + +function walkNode( + node: Node | null, + segments: readonly string[], + i: number, + walked: readonly string[], +): YamlOcPathMatch | null { + if (node === null) {return null;} + let seg = segments[i]; + + if (seg === undefined) { + // Reached end — describe whatever we landed on. + if (isMap(node)) {return { kind: 'map', path: walked };} + if (isSeq(node)) {return { kind: 'seq', path: walked };} + if (isScalar(node)) { + return { kind: 'scalar', value: node.value, path: walked }; + } + return null; + } + if (seg.length === 0) {return null;} + + // Positional tokens (`$first` / `$last` / `-N`) resolve to a concrete + // segment based on container shape. `-N` on a keyed container falls + // through to literal-key lookup (openclaw#59934 — Telegram supergroup + // IDs are negative numbers used as map keys). + if (isPositionalSeg(seg)) { + const concrete = positionalForYaml(node, seg); + if (concrete !== null) {seg = concrete;} + } + + if (isMap(node)) { + const pair = (node as { items: Pair[] }).items.find((p) => { + const k = isScalar(p.key) ? p.key.value : p.key; + return String(k) === seg; + }); + if (pair === undefined) {return null;} + const childWalked = [...walked, seg]; + if (i === segments.length - 1) { + const child = pair.value; + if (isScalar(child)) { + return { + kind: 'pair', + key: seg, + value: child.value, + path: childWalked, + }; + } + // Map / seq under the pair — describe by descending. + return walkNode(child as Node, segments, i + 1, childWalked); + } + return walkNode(pair.value as Node, segments, i + 1, childWalked); + } + + if (isSeq(node)) { + const idx = Number(seg); + if (!Number.isInteger(idx) || idx < 0 || idx >= node.items.length) {return null;} + const child = node.items[idx]; + return walkNode(child as Node, segments, i + 1, [...walked, seg]); + } + + // Scalar — can't descend. + return null; +} + +function positionalForYaml(node: Node, seg: string): string | null { + if (isMap(node)) { + const pairs = (node as { items: Pair[] }).items; + const keys = pairs.map((p) => String(isScalar(p.key) ? p.key.value : p.key)); + return resolvePositionalSeg(seg, { indexable: false, size: keys.length, keys }); + } + if (isSeq(node)) { + const items = (node as { items: Node[] }).items; + return resolvePositionalSeg(seg, { indexable: true, size: items.length }); + } + return null; +}