mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-08 18:50:42 +00:00
feat(workspace): oc-path addressing substrate + openclaw path CLI (md/jsonc/jsonl/yaml) (#78678)
Implements #78051 — oc:// addressing substrate for workspace files. New src/oc-path/ substrate (parser/formatter, per-kind parse+emit for md/jsonc/jsonl/yaml, universal resolveOcPath/setOcPath/findOcPaths verbs, sentinel emit guard) + openclaw path resolve|find|set|validate|emit CLI + docs/cli/path.md reference page + CHANGELOG entry. Co-authored-by: giodl73-repo <235387111+giodl73-repo@users.noreply.github.com> Co-authored-by: galiniliev <5711535+galiniliev@users.noreply.github.com>
This commit is contained in:
4
.gitignore
vendored
4
.gitignore
vendored
@@ -95,6 +95,10 @@ docs/internal/
|
||||
tmp/
|
||||
IDENTITY.md
|
||||
USER.md
|
||||
# Exception: oc-path real-world test fixtures need to be tracked even
|
||||
# though the bare names match the local-untracked rule above.
|
||||
!src/oc-path/tests/fixtures/real/IDENTITY.md
|
||||
!src/oc-path/tests/fixtures/real/USER.md
|
||||
*.tgz
|
||||
*.tar.gz
|
||||
*.zip
|
||||
|
||||
@@ -11,6 +11,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Control UI/usage: add transcript-backed historical lineage rollups for rotated logical sessions, with current-instance vs historical-lineage scope controls and long-range presets so usage history stays visible after restarts and updates. Fixes #50701. Thanks @dev-gideon-llc and @BunsDev.
|
||||
- Agents/failover: harden state-aware lane suspension by persisting quota resume transitions, restoring configured lane concurrency, preserving non-quota failure reasons, and exporting model failover events through diagnostics OTLP. Thanks @BunsDev.
|
||||
- Channels/streaming: make progress draft labels scroll away with other progress lines, render structured tool rows as compact emoji/title/details, show web-search queries from provider-native argument shapes, and skip empty Discord apply-patch starts until a patch summary exists. (#79146)
|
||||
- Workspace/oc-path: add the `oc://` addressing substrate (`src/oc-path/`) — a universal, kind-dispatched path scheme for addressing leaves and nodes inside markdown, jsonc, jsonl, and yaml workspace files, with `parseOcPath`/`formatOcPath`, per-kind `parseXxx`/`emitXxx`, universal `resolveOcPath`/`setOcPath`/`findOcPaths` verbs, the `__OPENCLAW_REDACTED__` sentinel emit guard, and the new `openclaw path resolve|find|set|validate|emit` CLI for shell-level inspection and surgical edits. Implements #78051. (#78678) Thanks @giodl73-repo.
|
||||
- Telegram: preserve the channel-specific 10-option poll cap in the unified outbound adapter so over-limit polls are rejected before send. (#78762) Thanks @obviyus.
|
||||
- Slack: route handled top-level channel turns in implicit-conversation channels to thread-scoped sessions when Slack reply threading is enabled, keeping the root turn and later thread replies on one OpenClaw session. (#78522) Thanks @zeroth-blip.
|
||||
- Telegram: re-probe the primary fetch transport after repeated sticky fallback success so transient IPv4 or pinned-IP fallback promotion can recover without a gateway restart. Fixes #77088. (#77157) Thanks @MkDev11.
|
||||
|
||||
@@ -24,7 +24,7 @@ apply across the CLI.
|
||||
| Network and nodes | [`directory`](/cli/directory) · [`nodes`](/cli/nodes) · [`devices`](/cli/devices) · [`node`](/cli/node) |
|
||||
| Runtime and sandbox | [`approvals`](/cli/approvals) · `exec-policy` (see [`approvals`](/cli/approvals)) · [`sandbox`](/cli/sandbox) · [`tui`](/cli/tui) · `chat`/`terminal` (aliases for [`tui --local`](/cli/tui)) · [`browser`](/cli/browser) |
|
||||
| Automation | [`cron`](/cli/cron) · [`tasks`](/cli/tasks) · [`hooks`](/cli/hooks) · [`webhooks`](/cli/webhooks) |
|
||||
| Discovery and docs | [`dns`](/cli/dns) · [`docs`](/cli/docs) |
|
||||
| Discovery and docs | [`dns`](/cli/dns) · [`docs`](/cli/docs) · [`path`](/cli/path) |
|
||||
| Pairing and channels | [`pairing`](/cli/pairing) · [`qr`](/cli/qr) · [`channels`](/cli/channels) |
|
||||
| Security and plugins | [`security`](/cli/security) · [`secrets`](/cli/secrets) · [`skills`](/cli/skills) · [`plugins`](/cli/plugins) · [`proxy`](/cli/proxy) |
|
||||
| Legacy aliases | [`daemon`](/cli/daemon) (gateway service) · [`clawbot`](/cli/clawbot) (namespace) |
|
||||
|
||||
121
docs/cli/path.md
Normal file
121
docs/cli/path.md
Normal file
@@ -0,0 +1,121 @@
|
||||
---
|
||||
summary: "CLI reference for `openclaw path` (inspect and edit workspace files via the `oc://` addressing scheme)"
|
||||
read_when:
|
||||
- You want to read or write a leaf inside a workspace file from the terminal
|
||||
- You're scripting against workspace state and want a stable, kind-agnostic addressing scheme
|
||||
- You're debugging a `oc://` path (validate the syntax, see what it resolves to)
|
||||
title: "Path"
|
||||
---
|
||||
|
||||
# `openclaw path`
|
||||
|
||||
Shell-level access to the `oc://` addressing substrate — one universal,
|
||||
kind-dispatched path scheme for inspecting and surgically editing workspace
|
||||
files (markdown, jsonc, jsonl, yaml). Self-hosters and editor extensions use
|
||||
it to read or write a single leaf inside a workspace file without scripting
|
||||
against the SDK directly.
|
||||
|
||||
## Subcommands
|
||||
|
||||
| Subcommand | Purpose |
|
||||
| ----------------------- | ---------------------------------------------------------------------------- |
|
||||
| `resolve <oc-path>` | Print the match at the path (or "not found"). |
|
||||
| `find <pattern>` | Enumerate matches for a wildcard / predicate path. |
|
||||
| `set <oc-path> <value>` | Write a leaf at the path. Supports `--dry-run`. |
|
||||
| `validate <oc-path>` | Parse-only — print structural breakdown (file / section / item / field). |
|
||||
| `emit <file>` | Round-trip a file through `parseXxx` + `emitXxx` (byte-fidelity diagnostic). |
|
||||
|
||||
## Global flags
|
||||
|
||||
| Flag | Purpose |
|
||||
| --------------- | ------------------------------------------------------------------------ |
|
||||
| `--cwd <dir>` | Resolve the file slot against this directory (default: `process.cwd()`). |
|
||||
| `--file <path>` | Override the file slot's resolved path (absolute access). |
|
||||
| `--json` | Force JSON output (default when stdout is not a TTY). |
|
||||
| `--human` | Force human output (default when stdout is a TTY). |
|
||||
| `--dry-run` | (only on `set`) print the bytes that would be written without writing. |
|
||||
|
||||
## `oc://` syntax
|
||||
|
||||
```
|
||||
oc://FILE/SECTION/ITEM/FIELD?session=SCOPE
|
||||
```
|
||||
|
||||
Slot rules — `field` requires `item`, `item` requires `section`. Across all
|
||||
four slots:
|
||||
|
||||
- **Quoted segments** — `"a/b.c"` survives `/` and `.` separators.
|
||||
`"\\"` and `"\""` are the only escapes inside quotes.
|
||||
The file slot is also quote-aware: `oc://"skills/email-drafter"/Tools/-1`
|
||||
treats `skills/email-drafter` as a single file path.
|
||||
- **Predicates** — `[k=v]`, `[k!=v]`, `[k*=v]`, `[k^=v]`, `[k$=v]`,
|
||||
`[k<v]`, `[k<=v]`, `[k>v]`, `[k>=v]`.
|
||||
- **Unions** — `{a,b,c}` matches any of the alternatives.
|
||||
- **Wildcards** — `*` (single sub-segment) and `**` (zero-or-more,
|
||||
recursive). `find` accepts these; `resolve` and `set` reject them as
|
||||
ambiguous.
|
||||
- **Positional** — `$first`, `$last`, `-N` (Nth from end).
|
||||
- **Ordinal** — `#N` for Nth match.
|
||||
- **Insertion markers** — `+`, `+key`, `+nnn` for keyed / indexed
|
||||
insertion (use with `set`).
|
||||
- **Session scope** — `?session=cron:daily` etc. Orthogonal to slot
|
||||
nesting.
|
||||
|
||||
Reserved characters (`?`, `&`, `%`) outside quoted, predicate, or union
|
||||
segments are rejected. Control characters (U+0000–U+001F, U+007F) are
|
||||
rejected anywhere.
|
||||
|
||||
## Examples
|
||||
|
||||
```bash
|
||||
# Validate a path (no filesystem access)
|
||||
openclaw path validate 'oc://AGENTS.md/Tools/-1/risk'
|
||||
|
||||
# Read a leaf
|
||||
openclaw path resolve 'oc://gateway.jsonc/version'
|
||||
|
||||
# Wildcard search
|
||||
openclaw path find 'oc://session.jsonl/*/event' --file ./logs/session.jsonl
|
||||
|
||||
# Dry-run a write
|
||||
openclaw path set 'oc://gateway.jsonc/version' '2.0' --dry-run
|
||||
|
||||
# Apply the write
|
||||
openclaw path set 'oc://gateway.jsonc/version' '2.0'
|
||||
|
||||
# Byte-fidelity round-trip (diagnostic)
|
||||
openclaw path emit ./AGENTS.md
|
||||
```
|
||||
|
||||
## Exit codes
|
||||
|
||||
| Code | Meaning |
|
||||
| ---- | -------------------------------------------------------------------------- |
|
||||
| `0` | Success. (`resolve` / `find`: at least one match. `set`: write succeeded.) |
|
||||
| `1` | No match, or `set` rejected by the substrate (no system-level error). |
|
||||
| `2` | Argument or parse error. |
|
||||
|
||||
## Output mode
|
||||
|
||||
`openclaw path` is TTY-aware: human-readable output on a terminal, JSON when
|
||||
stdout is piped or redirected. `--json` and `--human` override the
|
||||
auto-detection.
|
||||
|
||||
## Notes
|
||||
|
||||
- `set` writes raw bytes through the substrate's emit path, which applies the
|
||||
redaction-sentinel guard automatically. A leaf carrying
|
||||
`__OPENCLAW_REDACTED__` (verbatim or as a substring) is refused at write
|
||||
time.
|
||||
- `set` on a JSONC file currently re-renders the file (drops comments and
|
||||
trailing-comma formatting) when it mutates a leaf. Read-path round-trip is
|
||||
byte-identical. A byte-splice editor that preserves comments through
|
||||
writes is planned as a follow-up.
|
||||
- `path` does not know about LKG. If the file is LKG-tracked, the next
|
||||
observe call decides whether to promote / recover. `set --batch` for
|
||||
atomic multi-set through the LKG promote/recover lifecycle is planned
|
||||
alongside the LKG-recovery substrate.
|
||||
|
||||
## Related
|
||||
|
||||
- [CLI reference](/cli)
|
||||
113
src/cli/path-cli.ts
Normal file
113
src/cli/path-cli.ts
Normal file
@@ -0,0 +1,113 @@
|
||||
import type { Command } from "commander";
|
||||
import {
|
||||
pathEmitCommand,
|
||||
pathFindCommand,
|
||||
pathResolveCommand,
|
||||
pathSetCommand,
|
||||
pathValidateCommand,
|
||||
type PathCommandOptions,
|
||||
} from "../commands/path.js";
|
||||
import { defaultRuntime } from "../runtime.js";
|
||||
import { formatDocsLink } from "../terminal/links.js";
|
||||
import { theme } from "../terminal/theme.js";
|
||||
import { runCommandWithRuntime } from "./cli-utils.js";
|
||||
import { applyParentDefaultHelpAction } from "./program/parent-default-help.js";
|
||||
|
||||
interface RawPathOptions {
|
||||
json?: boolean;
|
||||
human?: boolean;
|
||||
cwd?: string;
|
||||
file?: string;
|
||||
dryRun?: boolean;
|
||||
}
|
||||
|
||||
function normalize(opts: RawPathOptions): PathCommandOptions {
|
||||
return {
|
||||
json: opts.json,
|
||||
human: opts.human,
|
||||
cwd: opts.cwd,
|
||||
file: opts.file,
|
||||
dryRun: opts.dryRun,
|
||||
};
|
||||
}
|
||||
|
||||
export function registerPathCli(program: Command) {
|
||||
const path = program
|
||||
.command("path")
|
||||
.description("Inspect and edit workspace files via the oc:// addressing scheme")
|
||||
.addHelpText(
|
||||
"after",
|
||||
() =>
|
||||
`\n${theme.muted("Docs:")} ${formatDocsLink("/cli/path", "docs.openclaw.ai/cli/path")}\n`,
|
||||
);
|
||||
|
||||
path
|
||||
.command("resolve")
|
||||
.description("Print the match at an oc:// path")
|
||||
.argument("<oc-path>", "oc:// path to resolve")
|
||||
.option("--json", "Force JSON output")
|
||||
.option("--human", "Force human output")
|
||||
.option("--cwd <dir>", "Resolve file slot against this directory")
|
||||
.option("--file <file>", "Override the file slot's resolved path (absolute access)")
|
||||
.action(async (pathStr: string, opts: RawPathOptions) => {
|
||||
await runCommandWithRuntime(defaultRuntime, async () => {
|
||||
await pathResolveCommand(pathStr, normalize(opts), defaultRuntime);
|
||||
});
|
||||
});
|
||||
|
||||
path
|
||||
.command("find")
|
||||
.description("Enumerate matches for a wildcard / predicate oc:// pattern")
|
||||
.argument("<pattern>", "oc:// pattern (supports * and **)")
|
||||
.option("--json", "Force JSON output")
|
||||
.option("--human", "Force human output")
|
||||
.option("--cwd <dir>", "Resolve file slot against this directory")
|
||||
.option("--file <file>", "Override the file slot's resolved path (absolute access)")
|
||||
.action(async (patternStr: string, opts: RawPathOptions) => {
|
||||
await runCommandWithRuntime(defaultRuntime, async () => {
|
||||
await pathFindCommand(patternStr, normalize(opts), defaultRuntime);
|
||||
});
|
||||
});
|
||||
|
||||
path
|
||||
.command("set")
|
||||
.description("Write a leaf value at an oc:// path")
|
||||
.argument("<oc-path>", "oc:// path to write")
|
||||
.argument("<value>", "string value to write")
|
||||
.option("--dry-run", "Print bytes without writing")
|
||||
.option("--json", "Force JSON output")
|
||||
.option("--human", "Force human output")
|
||||
.option("--cwd <dir>", "Resolve file slot against this directory")
|
||||
.option("--file <file>", "Override the file slot's resolved path (absolute access)")
|
||||
.action(async (pathStr: string, value: string, opts: RawPathOptions) => {
|
||||
await runCommandWithRuntime(defaultRuntime, async () => {
|
||||
await pathSetCommand(pathStr, value, normalize(opts), defaultRuntime);
|
||||
});
|
||||
});
|
||||
|
||||
path
|
||||
.command("validate")
|
||||
.description("Parse an oc:// path and print its slot structure")
|
||||
.argument("<oc-path>", "oc:// path to validate")
|
||||
.option("--json", "Force JSON output")
|
||||
.option("--human", "Force human output")
|
||||
.action((pathStr: string, opts: RawPathOptions) => {
|
||||
pathValidateCommand(pathStr, normalize(opts), defaultRuntime);
|
||||
});
|
||||
|
||||
path
|
||||
.command("emit")
|
||||
.description("Round-trip a file through parseXxx + emitXxx (byte-fidelity diagnostic)")
|
||||
.argument("<file>", "Path to a workspace file (md / jsonc / jsonl / yaml)")
|
||||
.option("--cwd <dir>", "Resolve <file> against this directory (default: process.cwd())")
|
||||
.option("--file <file>", "Override the file's resolved path (absolute access)")
|
||||
.option("--json", "Force JSON output")
|
||||
.option("--human", "Force human output")
|
||||
.action(async (fileArg: string, opts: RawPathOptions) => {
|
||||
await runCommandWithRuntime(defaultRuntime, async () => {
|
||||
await pathEmitCommand(fileArg, normalize(opts), defaultRuntime);
|
||||
});
|
||||
});
|
||||
|
||||
applyParentDefaultHelpAction(path);
|
||||
}
|
||||
@@ -167,6 +167,11 @@ const entrySpecs: readonly CommandGroupDescriptorSpec<SubCliRegistrar>[] = [
|
||||
loadModule: () => import("../docs-cli.js"),
|
||||
exportName: "registerDocsCli",
|
||||
},
|
||||
{
|
||||
commandNames: ["path"],
|
||||
loadModule: () => import("../path-cli.js"),
|
||||
exportName: "registerPathCli",
|
||||
},
|
||||
{
|
||||
commandNames: ["qa"],
|
||||
loadModule: loadPrivateQaCliModule,
|
||||
|
||||
@@ -93,6 +93,11 @@ const subCliCommandCatalog = defineCommandDescriptorCatalog([
|
||||
description: "Search the live OpenClaw docs",
|
||||
hasSubcommands: false,
|
||||
},
|
||||
{
|
||||
name: "path",
|
||||
description: "Inspect and edit workspace files via the oc:// addressing scheme",
|
||||
hasSubcommands: true,
|
||||
},
|
||||
{
|
||||
name: "qa",
|
||||
description: "Run QA scenarios and launch the private QA debugger UI",
|
||||
|
||||
291
src/commands/path.test.ts
Normal file
291
src/commands/path.test.ts
Normal file
@@ -0,0 +1,291 @@
|
||||
/**
|
||||
* Smoke tests for the `openclaw path` CLI handlers.
|
||||
*
|
||||
* Tests invoke each subcommand handler directly with a capturing
|
||||
* `OutputRuntimeEnv` — no commander wiring, no child process spawn.
|
||||
* Assertions inspect captured stdout/stderr and the exit code the
|
||||
* handler set on the runtime.
|
||||
*/
|
||||
import { mkdtempSync, readFileSync, writeFileSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
||||
import type { OutputRuntimeEnv } from "../runtime.js";
|
||||
import {
|
||||
pathEmitCommand,
|
||||
pathFindCommand,
|
||||
pathResolveCommand,
|
||||
pathSetCommand,
|
||||
pathValidateCommand,
|
||||
} from "./path.js";
|
||||
|
||||
interface TestRuntime extends OutputRuntimeEnv {
|
||||
readonly stdout: string[];
|
||||
readonly stderr: string[];
|
||||
exitCode: number;
|
||||
}
|
||||
|
||||
function createTestRuntime(): TestRuntime {
|
||||
const stdout: string[] = [];
|
||||
const stderr: string[] = [];
|
||||
const runtime: TestRuntime = {
|
||||
stdout,
|
||||
stderr,
|
||||
exitCode: 0,
|
||||
log: (...args) => {
|
||||
stdout.push(args.map((a) => (typeof a === "string" ? a : String(a))).join(" "));
|
||||
},
|
||||
error: (...args) => {
|
||||
stderr.push(args.map((a) => (typeof a === "string" ? a : String(a))).join(" "));
|
||||
},
|
||||
writeStdout: (value) => {
|
||||
stdout.push(value);
|
||||
},
|
||||
writeJson: (value, space = 2) => {
|
||||
stdout.push(JSON.stringify(value, null, space > 0 ? space : undefined));
|
||||
},
|
||||
exit: (code) => {
|
||||
runtime.exitCode = code;
|
||||
},
|
||||
};
|
||||
return runtime;
|
||||
}
|
||||
|
||||
const stdoutText = (rt: TestRuntime): string => rt.stdout.join("\n");
|
||||
const stderrText = (rt: TestRuntime): string => rt.stderr.join("\n");
|
||||
|
||||
describe("openclaw path CLI", () => {
|
||||
let workspaceDir: string;
|
||||
|
||||
beforeEach(() => {
|
||||
workspaceDir = mkdtempSync(join(tmpdir(), "oc-path-cli-"));
|
||||
});
|
||||
afterEach(() => {
|
||||
// mkdtemp leaves a small dir; OS will GC it. Skip cleanup to keep
|
||||
// the test deterministic on Windows where rmdir flakes.
|
||||
});
|
||||
|
||||
describe("validate", () => {
|
||||
it("CLI-V01 accepts a well-formed path with --json", () => {
|
||||
const rt = createTestRuntime();
|
||||
pathValidateCommand("oc://AGENTS.md/Tools/-1", { json: true }, rt);
|
||||
expect(rt.exitCode).toBe(0);
|
||||
const out = JSON.parse(stdoutText(rt));
|
||||
expect(out.valid).toBe(true);
|
||||
expect(out.structure.file).toBe("AGENTS.md");
|
||||
expect(out.structure.section).toBe("Tools");
|
||||
});
|
||||
|
||||
it("CLI-V02 rejects a malformed path with code 1", () => {
|
||||
const rt = createTestRuntime();
|
||||
pathValidateCommand("oc://X/a\x00b", { json: true }, rt);
|
||||
expect(rt.exitCode).toBe(1);
|
||||
const out = JSON.parse(stdoutText(rt));
|
||||
expect(out.valid).toBe(false);
|
||||
});
|
||||
|
||||
it("CLI-V03 missing argument returns 2", () => {
|
||||
const rt = createTestRuntime();
|
||||
pathValidateCommand(undefined, { json: true }, rt);
|
||||
expect(rt.exitCode).toBe(2);
|
||||
expect(stderrText(rt)).toContain("missing");
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolve", () => {
|
||||
it("CLI-R01 finds a leaf in jsonc and prints it", async () => {
|
||||
const filePath = join(workspaceDir, "gateway.jsonc");
|
||||
writeFileSync(filePath, '{ "version": "1.0" }', "utf-8");
|
||||
const rt = createTestRuntime();
|
||||
await pathResolveCommand(
|
||||
"oc://gateway.jsonc/version",
|
||||
{ cwd: workspaceDir, json: true },
|
||||
rt,
|
||||
);
|
||||
expect(rt.exitCode).toBe(0);
|
||||
const out = JSON.parse(stdoutText(rt));
|
||||
expect(out.resolved).toBe(true);
|
||||
expect(out.match.kind).toBe("leaf");
|
||||
expect(out.match.valueText).toBe("1.0");
|
||||
});
|
||||
|
||||
it("CLI-R02 returns 1 for not-found path", async () => {
|
||||
const filePath = join(workspaceDir, "gateway.jsonc");
|
||||
writeFileSync(filePath, '{ "version": "1.0" }', "utf-8");
|
||||
const rt = createTestRuntime();
|
||||
await pathResolveCommand(
|
||||
"oc://gateway.jsonc/missing",
|
||||
{ cwd: workspaceDir, json: true },
|
||||
rt,
|
||||
);
|
||||
expect(rt.exitCode).toBe(1);
|
||||
const out = JSON.parse(stdoutText(rt));
|
||||
expect(out.resolved).toBe(false);
|
||||
});
|
||||
|
||||
it("CLI-R03 missing argument returns 2", async () => {
|
||||
const rt = createTestRuntime();
|
||||
await pathResolveCommand(undefined, { json: true }, rt);
|
||||
expect(rt.exitCode).toBe(2);
|
||||
expect(stderrText(rt)).toContain("missing");
|
||||
});
|
||||
});
|
||||
|
||||
describe("set", () => {
|
||||
it("CLI-S01 writes new bytes when path resolves", async () => {
|
||||
const filePath = join(workspaceDir, "gateway.jsonc");
|
||||
writeFileSync(filePath, '{ "version": "1.0" }', "utf-8");
|
||||
const rt = createTestRuntime();
|
||||
await pathSetCommand(
|
||||
"oc://gateway.jsonc/version",
|
||||
"2.0",
|
||||
{ cwd: workspaceDir, json: true },
|
||||
rt,
|
||||
);
|
||||
expect(rt.exitCode).toBe(0);
|
||||
const after = readFileSync(filePath, "utf-8");
|
||||
expect(after).toContain('"2.0"');
|
||||
});
|
||||
|
||||
it("CLI-S02 --dry-run does not write to disk", async () => {
|
||||
const filePath = join(workspaceDir, "gateway.jsonc");
|
||||
const before = '{ "version": "1.0" }';
|
||||
writeFileSync(filePath, before, "utf-8");
|
||||
const rt = createTestRuntime();
|
||||
await pathSetCommand(
|
||||
"oc://gateway.jsonc/version",
|
||||
"2.0",
|
||||
{ cwd: workspaceDir, json: true, dryRun: true },
|
||||
rt,
|
||||
);
|
||||
expect(rt.exitCode).toBe(0);
|
||||
const out = JSON.parse(stdoutText(rt));
|
||||
expect(out.dryRun).toBe(true);
|
||||
expect(out.bytes).toContain('"2.0"');
|
||||
// File on disk unchanged.
|
||||
expect(readFileSync(filePath, "utf-8")).toBe(before);
|
||||
});
|
||||
|
||||
it("CLI-S03 sentinel-bearing value is refused at emit", async () => {
|
||||
const filePath = join(workspaceDir, "gateway.jsonc");
|
||||
writeFileSync(filePath, '{ "token": "x" }', "utf-8");
|
||||
const rt = createTestRuntime();
|
||||
// The sentinel-bearing value is accepted into the AST by setOcPath,
|
||||
// but `emitForKind` refuses to serialize it (defense-in-depth at
|
||||
// the per-kind emit boundary). The CLI handler must catch that
|
||||
// refusal and route it through the structured error boundary —
|
||||
// a thrown error escaping commander would print raw `String(err)`
|
||||
// and bypass our JSON/human scrubbing. Pin the structured shape:
|
||||
// exit code 1, stable code OC_EMIT_SENTINEL, message scrubbed.
|
||||
await pathSetCommand(
|
||||
"oc://gateway.jsonc/token",
|
||||
"__OPENCLAW_REDACTED__",
|
||||
{ cwd: workspaceDir, json: true },
|
||||
rt,
|
||||
);
|
||||
expect(rt.exitCode).toBe(1);
|
||||
expect(stderrText(rt)).toContain("OC_EMIT_SENTINEL");
|
||||
// F13 — file context in sentinel error. Without fileNameForGuard
|
||||
// plumbing through emitForKind, the message would carry the
|
||||
// empty-slot fallback (`oc:///[raw]`); now it carries the actual
|
||||
// file (`oc://gateway.jsonc/[raw]`). Forensics + audit pipelines
|
||||
// rely on this — without the file context, "sentinel rejected
|
||||
// somewhere" doesn't tell you WHICH file was involved.
|
||||
expect(stderrText(rt)).toContain("gateway.jsonc");
|
||||
});
|
||||
|
||||
it("CLI-S04 missing args returns 2", async () => {
|
||||
const rt = createTestRuntime();
|
||||
await pathSetCommand(undefined, undefined, { json: true }, rt);
|
||||
expect(rt.exitCode).toBe(2);
|
||||
expect(stderrText(rt)).toContain("requires");
|
||||
});
|
||||
});
|
||||
|
||||
describe("find", () => {
|
||||
it("CLI-F01 enumerates wildcard matches", async () => {
|
||||
const filePath = join(workspaceDir, "config.jsonc");
|
||||
writeFileSync(filePath, '{ "items": [ { "id": "a" }, { "id": "b" } ] }', "utf-8");
|
||||
const rt = createTestRuntime();
|
||||
await pathFindCommand(
|
||||
"oc://config.jsonc/items/*/id",
|
||||
{ cwd: workspaceDir, json: true },
|
||||
rt,
|
||||
);
|
||||
expect(rt.exitCode).toBe(0);
|
||||
const out = JSON.parse(stdoutText(rt));
|
||||
expect(out.count).toBe(2);
|
||||
});
|
||||
|
||||
it("CLI-F02 returns 1 when zero matches", async () => {
|
||||
const filePath = join(workspaceDir, "gateway.jsonc");
|
||||
writeFileSync(filePath, "{}", "utf-8");
|
||||
const rt = createTestRuntime();
|
||||
await pathFindCommand(
|
||||
"oc://gateway.jsonc/nope/*",
|
||||
{ cwd: workspaceDir, json: true },
|
||||
rt,
|
||||
);
|
||||
expect(rt.exitCode).toBe(1);
|
||||
});
|
||||
|
||||
it("CLI-F03 file-slot wildcard rejected with clear error (no ENOENT)", async () => {
|
||||
// Closes Galin P3 (round 8): `find` resolves `pattern.file` to one
|
||||
// literal path, so `oc://*.jsonc/...` would silently ENOENT during
|
||||
// fs.readFile. The CLI now surfaces a clear error before touching
|
||||
// the filesystem, with stable code OC_PATH_FILE_WILDCARD_UNSUPPORTED.
|
||||
const rt = createTestRuntime();
|
||||
await pathFindCommand(
|
||||
"oc://*.jsonc/items",
|
||||
{ cwd: workspaceDir, json: true },
|
||||
rt,
|
||||
);
|
||||
expect(rt.exitCode).toBe(2);
|
||||
expect(stderrText(rt)).toContain("OC_PATH_FILE_WILDCARD_UNSUPPORTED");
|
||||
expect(stderrText(rt)).toContain("file-slot wildcards are not supported");
|
||||
});
|
||||
});
|
||||
|
||||
describe("emit", () => {
|
||||
it("CLI-E01 round-trips jsonc bytes verbatim (byte-fidelity proof)", async () => {
|
||||
const filePath = join(workspaceDir, "gateway.jsonc");
|
||||
const before = '// keep this comment\n{\n "v": 1\n}\n';
|
||||
writeFileSync(filePath, before, "utf-8");
|
||||
const rt = createTestRuntime();
|
||||
await pathEmitCommand(filePath, { json: true }, rt);
|
||||
expect(rt.exitCode).toBe(0);
|
||||
const out = JSON.parse(stdoutText(rt));
|
||||
expect(out.kind).toBe("jsonc");
|
||||
expect(out.bytes).toBe(before);
|
||||
});
|
||||
|
||||
it("CLI-E02 round-trips md verbatim", async () => {
|
||||
const filePath = join(workspaceDir, "AGENTS.md");
|
||||
const before = "## Tools\n- gh\n## Boundaries\n- never rm -rf\n";
|
||||
writeFileSync(filePath, before, "utf-8");
|
||||
const rt = createTestRuntime();
|
||||
await pathEmitCommand(filePath, { json: true }, rt);
|
||||
expect(rt.exitCode).toBe(0);
|
||||
const out = JSON.parse(stdoutText(rt));
|
||||
expect(out.kind).toBe("md");
|
||||
expect(out.bytes).toBe(before);
|
||||
});
|
||||
|
||||
it("CLI-E03 emit --cwd resolves <file> against the supplied directory", async () => {
|
||||
// Closes round-10 finding F2: emit advertises --cwd / --file in
|
||||
// the docs but the handler resolved <file> against process.cwd()
|
||||
// ignoring both. Pin the new wiring: a relative <file> resolves
|
||||
// against --cwd, not against process.cwd().
|
||||
const filePath = join(workspaceDir, "AGENTS.md");
|
||||
writeFileSync(filePath, "## Tools\n- gh\n", "utf-8");
|
||||
const rt = createTestRuntime();
|
||||
// Pass a RELATIVE filename + explicit --cwd. If the handler
|
||||
// ignored --cwd, loadAst would ENOENT against process.cwd().
|
||||
await pathEmitCommand("AGENTS.md", { cwd: workspaceDir, json: true }, rt);
|
||||
expect(rt.exitCode).toBe(0);
|
||||
const out = JSON.parse(stdoutText(rt));
|
||||
expect(out.kind).toBe("md");
|
||||
expect(out.bytes).toBe("## Tools\n- gh\n");
|
||||
});
|
||||
});
|
||||
});
|
||||
537
src/commands/path.ts
Normal file
537
src/commands/path.ts
Normal file
@@ -0,0 +1,537 @@
|
||||
/**
|
||||
* `openclaw path` — shell-level access to the OcPath substrate verbs.
|
||||
* Self-hosters and editor extensions use it to inspect and surgically
|
||||
* edit workspace files without scripting against the SDK directly.
|
||||
*
|
||||
* Subcommands:
|
||||
* - `resolve <oc-path>` — print the match at the path
|
||||
* - `set <oc-path> <value>` — write a leaf at the path; supports `--dry-run`
|
||||
* - `find <pattern>` — enumerate matches for a wildcard/predicate path
|
||||
* - `validate <oc-path>` — parse-only; print structure
|
||||
* - `emit <file>` — read + parseXxx + emitXxx; verifies byte-fidelity
|
||||
*
|
||||
* Output is TTY-aware: defaults to human-readable when stdout is a TTY,
|
||||
* switches to JSON otherwise (so pipes don't get formatting noise).
|
||||
* `--json` and `--human` flags override the auto-detection.
|
||||
*
|
||||
* Boundaries this CLI does NOT cross (v0):
|
||||
* - Doesn't know about LKG. `set` writes raw bytes through the
|
||||
* substrate emit; if the file is LKG-tracked, the next observe
|
||||
* call decides whether to promote / recover.
|
||||
* - Doesn't know about lint rules or doctor fixers — that's a
|
||||
* different surface.
|
||||
*/
|
||||
|
||||
import { promises as fs } from "node:fs";
|
||||
import { resolve as resolvePath } from "node:path";
|
||||
import {
|
||||
OcEmitSentinelError,
|
||||
OcPathError,
|
||||
REDACTED_SENTINEL,
|
||||
emitJsonc,
|
||||
emitJsonl,
|
||||
emitMd,
|
||||
emitYaml,
|
||||
findOcPaths,
|
||||
formatOcPath,
|
||||
inferKind,
|
||||
parseJsonc,
|
||||
parseJsonl,
|
||||
parseMd,
|
||||
parseOcPath,
|
||||
parseYaml,
|
||||
resolveOcPath,
|
||||
setOcPath,
|
||||
type OcAst,
|
||||
type OcMatch,
|
||||
type OcPath,
|
||||
type SetResult,
|
||||
} from "../oc-path/index.js";
|
||||
import type { OutputRuntimeEnv } from "../runtime.js";
|
||||
|
||||
export interface PathCommandOptions {
|
||||
readonly json?: boolean;
|
||||
readonly human?: boolean;
|
||||
readonly cwd?: string;
|
||||
readonly file?: string;
|
||||
readonly dryRun?: boolean;
|
||||
}
|
||||
|
||||
type OutputMode = "human" | "json";
|
||||
|
||||
const SCRUB_PLACEHOLDER = "[REDACTED]";
|
||||
|
||||
/**
|
||||
* Output-boundary sentinel scrub. Replaces every occurrence of the
|
||||
* redaction sentinel with `[REDACTED]` before writing to the output
|
||||
* stream. Defense-in-depth — even if a future code path surfaces raw
|
||||
* file content carrying the sentinel, the CLI must not echo it.
|
||||
*/
|
||||
export function scrubSentinel(s: string): string {
|
||||
if (!s.includes(REDACTED_SENTINEL)) {
|
||||
return s;
|
||||
}
|
||||
return s.split(REDACTED_SENTINEL).join(SCRUB_PLACEHOLDER);
|
||||
}
|
||||
|
||||
function detectMode(options: PathCommandOptions): OutputMode {
|
||||
if (options.json === true) {
|
||||
return "json";
|
||||
}
|
||||
if (options.human === true) {
|
||||
return "human";
|
||||
}
|
||||
return process.stdout.isTTY ? "human" : "json";
|
||||
}
|
||||
|
||||
function emit(
|
||||
runtime: OutputRuntimeEnv,
|
||||
mode: OutputMode,
|
||||
value: unknown,
|
||||
humanFallback: () => string,
|
||||
): void {
|
||||
if (mode === "json") {
|
||||
runtime.writeStdout(scrubSentinel(JSON.stringify(value, null, 2)));
|
||||
return;
|
||||
}
|
||||
runtime.writeStdout(scrubSentinel(humanFallback()));
|
||||
}
|
||||
|
||||
function emitError(
|
||||
runtime: OutputRuntimeEnv,
|
||||
mode: OutputMode,
|
||||
message: string,
|
||||
code = "ERR",
|
||||
): void {
|
||||
const scrubbed = scrubSentinel(message);
|
||||
if (mode === "json") {
|
||||
runtime.error(JSON.stringify({ error: { code, message: scrubbed } }));
|
||||
return;
|
||||
}
|
||||
runtime.error(`${code}: ${scrubbed}`);
|
||||
}
|
||||
|
||||
async function loadAst(absPath: string, fileName: string): Promise<OcAst> {
|
||||
const raw = await fs.readFile(absPath, "utf-8");
|
||||
const kind = inferKind(fileName);
|
||||
if (kind === "jsonc") {
|
||||
return parseJsonc(raw).ast;
|
||||
}
|
||||
if (kind === "jsonl") {
|
||||
return parseJsonl(raw).ast;
|
||||
}
|
||||
if (kind === "yaml") {
|
||||
return parseYaml(raw).ast;
|
||||
}
|
||||
return parseMd(raw).ast;
|
||||
}
|
||||
|
||||
function emitForKind(ast: OcAst, fileName?: string): string {
|
||||
// Plumb fileName through so OcEmitSentinelError messages carry the
|
||||
// file context (`oc://gateway.jsonc/[raw]`) instead of the
|
||||
// empty-slot fallback (`oc:///[raw]`). Test S-12 in the wave-21
|
||||
// sentinel suite asserts the OcPath context appears in the error;
|
||||
// without this plumbing, CLI emits had it stripped.
|
||||
const opts = fileName !== undefined ? { fileNameForGuard: fileName } : {};
|
||||
switch (ast.kind) {
|
||||
case "jsonc":
|
||||
return emitJsonc(ast, opts);
|
||||
case "jsonl":
|
||||
return emitJsonl(ast, opts);
|
||||
case "yaml":
|
||||
// Default round-trip mode preserves bytes verbatim for unmodified
|
||||
// ASTs (so `openclaw path emit foo.yaml` is a true byte-fidelity
|
||||
// diagnostic). After `setOcPath` mutates a YAML AST the substrate
|
||||
// re-renders into `ast.raw` already, so round-trip mode emits the
|
||||
// mutated bytes too — no need for the render-mode override.
|
||||
return emitYaml(ast, opts);
|
||||
case "md":
|
||||
return emitMd(ast, opts);
|
||||
}
|
||||
throw new Error(`unreachable: emitForKind kind`);
|
||||
}
|
||||
|
||||
function resolveFsPath(path: OcPath, options: PathCommandOptions): string {
|
||||
const cwd = options.cwd ?? process.cwd();
|
||||
if (options.file !== undefined) {
|
||||
return resolvePath(options.file);
|
||||
}
|
||||
return resolvePath(cwd, path.file);
|
||||
}
|
||||
|
||||
function formatMatchHuman(match: OcMatch): string {
|
||||
if (match.kind === "leaf") {
|
||||
return `leaf @ L${match.line}: ${JSON.stringify(match.valueText)} (${match.leafType})`;
|
||||
}
|
||||
if (match.kind === "node") {
|
||||
return `node @ L${match.line} [${match.descriptor}]`;
|
||||
}
|
||||
if (match.kind === "insertion-point") {
|
||||
return `insertion-point @ L${match.line} [${match.container}]`;
|
||||
}
|
||||
return `root @ L${match.line}`;
|
||||
}
|
||||
|
||||
export async function pathResolveCommand(
|
||||
pathStr: string | undefined,
|
||||
options: PathCommandOptions,
|
||||
runtime: OutputRuntimeEnv,
|
||||
): Promise<void> {
|
||||
const mode = detectMode(options);
|
||||
if (pathStr === undefined) {
|
||||
emitError(runtime, mode, "resolve: missing <oc-path> argument");
|
||||
runtime.exit(2);
|
||||
return;
|
||||
}
|
||||
let ocPath: OcPath;
|
||||
try {
|
||||
ocPath = parseOcPath(pathStr);
|
||||
} catch (err) {
|
||||
if (err instanceof OcPathError) {
|
||||
emitError(runtime, mode, `parse failed: ${err.message}`, err.code);
|
||||
runtime.exit(2);
|
||||
return;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
const fsPath = resolveFsPath(ocPath, options);
|
||||
const ast = await loadAst(fsPath, ocPath.file);
|
||||
let match;
|
||||
try {
|
||||
match = resolveOcPath(ast, ocPath);
|
||||
} catch (err) {
|
||||
if (err instanceof OcPathError) {
|
||||
// resolveOcPath now throws on wildcard patterns (the pattern
|
||||
// belongs in `find`, not `resolve`). Surface the structured code
|
||||
// so the CLI message points the caller at the right verb.
|
||||
emitError(runtime, mode, `resolve refused: ${err.message}`, err.code);
|
||||
runtime.exit(2);
|
||||
return;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
if (match === null) {
|
||||
emit(
|
||||
runtime,
|
||||
mode,
|
||||
{ resolved: false, ocPath: pathStr },
|
||||
() => `not found: ${pathStr}`,
|
||||
);
|
||||
runtime.exit(1);
|
||||
return;
|
||||
}
|
||||
emit(
|
||||
runtime,
|
||||
mode,
|
||||
{ resolved: true, ocPath: pathStr, match },
|
||||
() => formatMatchHuman(match),
|
||||
);
|
||||
}
|
||||
|
||||
export async function pathSetCommand(
|
||||
pathStr: string | undefined,
|
||||
value: string | undefined,
|
||||
options: PathCommandOptions,
|
||||
runtime: OutputRuntimeEnv,
|
||||
): Promise<void> {
|
||||
const mode = detectMode(options);
|
||||
if (pathStr === undefined || value === undefined) {
|
||||
emitError(runtime, mode, "set: requires <oc-path> <value>");
|
||||
runtime.exit(2);
|
||||
return;
|
||||
}
|
||||
let ocPath: OcPath;
|
||||
try {
|
||||
ocPath = parseOcPath(pathStr);
|
||||
} catch (err) {
|
||||
if (err instanceof OcPathError) {
|
||||
emitError(runtime, mode, `parse failed: ${err.message}`, err.code);
|
||||
runtime.exit(2);
|
||||
return;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
const fsPath = resolveFsPath(ocPath, options);
|
||||
const ast = await loadAst(fsPath, ocPath.file);
|
||||
// `setOcPath` invokes the per-kind editor which calls back into
|
||||
// emit during rebuildRaw; the redaction-sentinel guard fires there
|
||||
// and throws `OcEmitSentinelError` for sentinel-bearing values.
|
||||
// Catch the throw here so it goes through the structured CLI error
|
||||
// path instead of escaping to commander's runCommandWithRuntime
|
||||
// (which would print raw String(err) and bypass --json scrubbing).
|
||||
let result: SetResult;
|
||||
try {
|
||||
result = setOcPath(ast, ocPath, value);
|
||||
} catch (err) {
|
||||
if (err instanceof OcEmitSentinelError) {
|
||||
emitError(
|
||||
runtime,
|
||||
mode,
|
||||
`set refused: ${err.message}`,
|
||||
"OC_EMIT_SENTINEL",
|
||||
);
|
||||
runtime.exit(1);
|
||||
return;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
if (!result.ok) {
|
||||
const detail = "detail" in result ? result.detail : undefined;
|
||||
emit(
|
||||
runtime,
|
||||
mode,
|
||||
{ ok: false, reason: result.reason, detail },
|
||||
() =>
|
||||
`set failed: ${result.reason}${detail !== undefined ? ` — ${detail}` : ""}`,
|
||||
);
|
||||
runtime.exit(1);
|
||||
return;
|
||||
}
|
||||
// `setOcPath` accepted the value into the AST, but the per-kind
|
||||
// emit can still refuse to serialize it — most notably when the
|
||||
// value contains the redaction sentinel (defense-in-depth: the
|
||||
// substrate's emit guard fires there). The throw must NOT escape
|
||||
// to commander's runCommandWithRuntime, which would print
|
||||
// `String(err)` raw and bypass the CLI's JSON/human scrubbed-error
|
||||
// boundary. Catch and route through `emitError` like every other
|
||||
// refusal path.
|
||||
let newBytes: string;
|
||||
try {
|
||||
newBytes = emitForKind(result.ast, ocPath.file);
|
||||
} catch (err) {
|
||||
if (err instanceof OcEmitSentinelError) {
|
||||
emitError(
|
||||
runtime,
|
||||
mode,
|
||||
`emit refused: ${err.message}`,
|
||||
"OC_EMIT_SENTINEL",
|
||||
);
|
||||
runtime.exit(1);
|
||||
return;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
// Edit-then-emit through render mode drops jsonc comments and yaml
|
||||
// formatting. Self-hosters running `openclaw path set` on a
|
||||
// commented file should see the warning explicitly.
|
||||
const lossyKinds: ReadonlySet<OcAst["kind"]> = new Set(["jsonc", "yaml"]);
|
||||
const formatLossWarning = lossyKinds.has(result.ast.kind)
|
||||
? `note: ${result.ast.kind} edit-then-emit drops comments / original formatting (render mode)`
|
||||
: null;
|
||||
if (options.dryRun === true) {
|
||||
emit(
|
||||
runtime,
|
||||
mode,
|
||||
{
|
||||
ok: true,
|
||||
dryRun: true,
|
||||
bytes: newBytes,
|
||||
...(formatLossWarning !== null ? { warning: formatLossWarning } : {}),
|
||||
},
|
||||
() => {
|
||||
const lines = [`--dry-run: would write ${newBytes.length} bytes to ${fsPath}`];
|
||||
if (formatLossWarning !== null) {
|
||||
lines.push(formatLossWarning);
|
||||
}
|
||||
lines.push(newBytes);
|
||||
return lines.join("\n");
|
||||
},
|
||||
);
|
||||
return;
|
||||
}
|
||||
await fs.writeFile(fsPath, newBytes, "utf-8");
|
||||
emit(
|
||||
runtime,
|
||||
mode,
|
||||
{
|
||||
ok: true,
|
||||
dryRun: false,
|
||||
bytesWritten: newBytes.length,
|
||||
fsPath,
|
||||
...(formatLossWarning !== null ? { warning: formatLossWarning } : {}),
|
||||
},
|
||||
() => {
|
||||
const lines = [`wrote ${newBytes.length} bytes to ${fsPath}`];
|
||||
if (formatLossWarning !== null) {
|
||||
lines.push(formatLossWarning);
|
||||
}
|
||||
return lines.join("\n");
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
export async function pathFindCommand(
|
||||
patternStr: string | undefined,
|
||||
options: PathCommandOptions,
|
||||
runtime: OutputRuntimeEnv,
|
||||
): Promise<void> {
|
||||
const mode = detectMode(options);
|
||||
if (patternStr === undefined) {
|
||||
emitError(runtime, mode, "find: missing <pattern> argument");
|
||||
runtime.exit(2);
|
||||
return;
|
||||
}
|
||||
let pattern: OcPath;
|
||||
try {
|
||||
pattern = parseOcPath(patternStr);
|
||||
} catch (err) {
|
||||
if (err instanceof OcPathError) {
|
||||
emitError(runtime, mode, `parse failed: ${err.message}`, err.code);
|
||||
runtime.exit(2);
|
||||
return;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
// The CLI resolves `pattern.file` to a single literal filesystem path.
|
||||
// Wildcards in the file slot (e.g. `oc://*.jsonc/...`) would silently
|
||||
// ENOENT during `fs.readFile`. The substrate's `findOcPaths` walks
|
||||
// *inside* an AST — multi-file globbing is out of scope for v0. Surface
|
||||
// a clear error so users don't get a confusing missing-file failure.
|
||||
if (/[*?]/.test(pattern.file)) {
|
||||
emitError(
|
||||
runtime,
|
||||
mode,
|
||||
`find: file-slot wildcards are not supported (got "${pattern.file}"). ` +
|
||||
`Pass a concrete file path; multi-file globbing is a follow-up feature.`,
|
||||
"OC_PATH_FILE_WILDCARD_UNSUPPORTED",
|
||||
);
|
||||
runtime.exit(2);
|
||||
return;
|
||||
}
|
||||
const fsPath = resolveFsPath(pattern, options);
|
||||
const ast = await loadAst(fsPath, pattern.file);
|
||||
const matches = findOcPaths(ast, pattern);
|
||||
emit(
|
||||
runtime,
|
||||
mode,
|
||||
{
|
||||
pattern: patternStr,
|
||||
count: matches.length,
|
||||
matches: matches.map((m) => ({
|
||||
path: formatOcPath(m.path),
|
||||
match: m.match,
|
||||
})),
|
||||
},
|
||||
() => {
|
||||
if (matches.length === 0) {
|
||||
return `0 matches for ${patternStr}`;
|
||||
}
|
||||
const plural = matches.length === 1 ? "" : "es";
|
||||
const lines = [`${matches.length} match${plural} for ${patternStr}:`];
|
||||
for (const m of matches) {
|
||||
lines.push(` ${formatOcPath(m.path)} → ${formatMatchHuman(m.match)}`);
|
||||
}
|
||||
return lines.join("\n");
|
||||
},
|
||||
);
|
||||
if (matches.length === 0) {
|
||||
runtime.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
export function pathValidateCommand(
|
||||
pathStr: string | undefined,
|
||||
options: PathCommandOptions,
|
||||
runtime: OutputRuntimeEnv,
|
||||
): void {
|
||||
const mode = detectMode(options);
|
||||
if (pathStr === undefined) {
|
||||
emitError(runtime, mode, "validate: missing <oc-path> argument");
|
||||
runtime.exit(2);
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const ocPath = parseOcPath(pathStr);
|
||||
emit(
|
||||
runtime,
|
||||
mode,
|
||||
{
|
||||
valid: true,
|
||||
ocPath: pathStr,
|
||||
formatted: formatOcPath(ocPath),
|
||||
structure: {
|
||||
file: ocPath.file,
|
||||
section: ocPath.section,
|
||||
item: ocPath.item,
|
||||
field: ocPath.field,
|
||||
session: ocPath.session,
|
||||
},
|
||||
},
|
||||
() => {
|
||||
const lines = [`valid: ${pathStr}`, ` file: ${ocPath.file}`];
|
||||
if (ocPath.section !== undefined) {
|
||||
lines.push(` section: ${ocPath.section}`);
|
||||
}
|
||||
if (ocPath.item !== undefined) {
|
||||
lines.push(` item: ${ocPath.item}`);
|
||||
}
|
||||
if (ocPath.field !== undefined) {
|
||||
lines.push(` field: ${ocPath.field}`);
|
||||
}
|
||||
if (ocPath.session !== undefined) {
|
||||
lines.push(` session: ${ocPath.session}`);
|
||||
}
|
||||
return lines.join("\n");
|
||||
},
|
||||
);
|
||||
return;
|
||||
} catch (err) {
|
||||
if (err instanceof OcPathError) {
|
||||
emit(
|
||||
runtime,
|
||||
mode,
|
||||
{ valid: false, code: err.code, message: err.message },
|
||||
() => `INVALID: ${err.code}: ${err.message}`,
|
||||
);
|
||||
runtime.exit(1);
|
||||
return;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
export async function pathEmitCommand(
|
||||
fileArg: string | undefined,
|
||||
options: PathCommandOptions,
|
||||
runtime: OutputRuntimeEnv,
|
||||
): Promise<void> {
|
||||
const mode = detectMode(options);
|
||||
if (fileArg === undefined) {
|
||||
emitError(runtime, mode, "emit: missing <file> argument");
|
||||
runtime.exit(2);
|
||||
return;
|
||||
}
|
||||
// Resolve the file slot through the same `--cwd`/`--file` rules the
|
||||
// sibling subcommands use: `--file` (when set) is the absolute path
|
||||
// override; otherwise resolve `fileArg` against `--cwd` (defaulting
|
||||
// to `process.cwd()`). Without this, the flags are accepted by
|
||||
// commander but ignored by the handler — exactly the bug-shape
|
||||
// ClawSweeper flagged for the doc/option mismatch.
|
||||
const fsPath =
|
||||
options.file !== undefined
|
||||
? resolvePath(options.file)
|
||||
: resolvePath(options.cwd ?? process.cwd(), fileArg);
|
||||
const fileName = fsPath.split(/[\\/]/).pop() ?? fileArg;
|
||||
const ast = await loadAst(fsPath, fileName);
|
||||
let bytes: string;
|
||||
try {
|
||||
bytes = emitForKind(ast, fileName);
|
||||
} catch (err) {
|
||||
if (err instanceof OcEmitSentinelError) {
|
||||
emitError(
|
||||
runtime,
|
||||
mode,
|
||||
`emit refused: ${err.message}`,
|
||||
"OC_EMIT_SENTINEL",
|
||||
);
|
||||
runtime.exit(1);
|
||||
return;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
if (mode === "json") {
|
||||
runtime.writeStdout(JSON.stringify({ ok: true, kind: ast.kind, bytes }));
|
||||
return;
|
||||
}
|
||||
runtime.writeStdout(bytes);
|
||||
}
|
||||
125
src/oc-path/ast.ts
Normal file
125
src/oc-path/ast.ts
Normal file
@@ -0,0 +1,125 @@
|
||||
/**
|
||||
* Workspace-Markdown AST — generic addressing index over the 8 workspace
|
||||
* files openclaw treats as opaque text in `loadWorkspaceBootstrapFiles`.
|
||||
*
|
||||
* **The AST is purely an addressing index.** It does NOT encode opinions
|
||||
* about what a "valid" SOUL.md / AGENTS.md / MEMORY.md looks like; it
|
||||
* exposes the markdown features (frontmatter, sections, items, tables,
|
||||
* code blocks) that any `OcPath` (`{ file, section?, item?, field? }`) can
|
||||
* resolve over. Per-file lint opinions ride in @openclaw/oc-lint, not
|
||||
* here.
|
||||
*
|
||||
* **Byte-fidelity contract**: `emitMd(parse(raw)) === raw` for every input
|
||||
* the parser accepts. The parser preserves the original bytes on the
|
||||
* root node (`raw`) so emitters can round-trip even content the AST
|
||||
* doesn't structurally model (foreign content, idiosyncratic whitespace).
|
||||
*
|
||||
* @module @openclaw/oc-path/ast
|
||||
*/
|
||||
|
||||
/**
|
||||
* Diagnostic emitted by the parser. Used by lint rules and parse-error
|
||||
* surfacing alike. Severity is `info` by default; the parser emits
|
||||
* `warning` for suspicious-but-recoverable inputs (e.g., unclosed
|
||||
* frontmatter fence) and never throws.
|
||||
*/
|
||||
export interface Diagnostic {
|
||||
readonly line: number;
|
||||
readonly message: string;
|
||||
readonly severity: 'info' | 'warning' | 'error';
|
||||
readonly code?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* A frontmatter key/value pair. Keys are preserved as written; values
|
||||
* are unquoted (surrounding `"` or `'` stripped) but otherwise verbatim.
|
||||
*/
|
||||
export interface FrontmatterEntry {
|
||||
readonly key: string;
|
||||
readonly value: string;
|
||||
readonly line: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* A bullet-list item inside a section. Items are addressable via OcPath
|
||||
* `{ file, section, item }` where `item` is the slug of the bullet's
|
||||
* text (or the slug of `kv.key` when the bullet is in `- key: value`
|
||||
* shape).
|
||||
*
|
||||
* `kv` is populated when the bullet matches `- <key>: <value>` (the
|
||||
* common pattern in AGENTS.md / TOOLS.md / USER.md). Lint rules use it
|
||||
* for field-level addressing via `OcPath.field`.
|
||||
*/
|
||||
export interface AstItem {
|
||||
readonly text: string;
|
||||
readonly slug: string;
|
||||
readonly line: number;
|
||||
readonly kv?: { readonly key: string; readonly value: string };
|
||||
}
|
||||
|
||||
/**
|
||||
* A markdown table. Tables surface in `## Tool Guidance` blocks and
|
||||
* elsewhere; lint rules can address rows by header value if needed.
|
||||
*/
|
||||
export interface AstTable {
|
||||
readonly headers: readonly string[];
|
||||
readonly rows: readonly (readonly string[])[];
|
||||
readonly line: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* A fenced code block. Carries the language tag (or `null`) and the
|
||||
* verbatim body.
|
||||
*/
|
||||
export interface AstCodeBlock {
|
||||
readonly lang: string | null;
|
||||
readonly text: string;
|
||||
readonly line: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* An H2-delimited block. The `slug` is the kebab-case lowercase form of
|
||||
* `heading` and is what OcPath `section` matches against. `bodyText` is
|
||||
* the prose between this heading and the next H2 (or end of file),
|
||||
* verbatim. `items`, `tables`, `codeBlocks` are extracted from
|
||||
* `bodyText` for addressing convenience but the raw text is preserved.
|
||||
*/
|
||||
export interface AstBlock {
|
||||
readonly heading: string;
|
||||
readonly slug: string;
|
||||
readonly line: number;
|
||||
readonly bodyText: string;
|
||||
readonly items: readonly AstItem[];
|
||||
readonly tables: readonly AstTable[];
|
||||
readonly codeBlocks: readonly AstCodeBlock[];
|
||||
}
|
||||
|
||||
/**
|
||||
* The root AST node. Always carries `raw` for byte-identical round-trip.
|
||||
* `frontmatter` is empty when the file has none. `preamble` is the
|
||||
* prose before the first H2 (may be empty). `blocks` is the H2 tree in
|
||||
* document order.
|
||||
*
|
||||
* `kind: 'md'` discriminator matches the jsonc / jsonl / yaml AST
|
||||
* shapes; the universal `setOcPath` / `resolveOcPath` verbs dispatch
|
||||
* via this tag at runtime so callers don't have to thread kind
|
||||
* through the call site.
|
||||
*
|
||||
* The generic shape is the same for all 9 workspace files; opinions
|
||||
* (`AGENTS_TOOLS_SECTION_EMPTY`, etc.) ride in lint rules, not here.
|
||||
*/
|
||||
export interface MdAst {
|
||||
readonly kind: 'md';
|
||||
readonly raw: string;
|
||||
readonly frontmatter: readonly FrontmatterEntry[];
|
||||
readonly preamble: string;
|
||||
readonly blocks: readonly AstBlock[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Parser output: the AST plus any diagnostics from the parse pass.
|
||||
*/
|
||||
export interface ParseResult {
|
||||
readonly ast: MdAst;
|
||||
readonly diagnostics: readonly Diagnostic[];
|
||||
}
|
||||
31
src/oc-path/dispatch.ts
Normal file
31
src/oc-path/dispatch.ts
Normal file
@@ -0,0 +1,31 @@
|
||||
/**
|
||||
* Cross-kind utilities. The substrate exposes per-kind verbs only;
|
||||
* `inferKind` is a convention helper for callers who want to map
|
||||
* filename → kind so they can pick the right `parseXxx` / `setXxx` /
|
||||
* `resolveXxx` function.
|
||||
*
|
||||
* Earlier drafts had `resolveOcPath` / `setOcPath` / `appendOcPath`
|
||||
* universal dispatchers with tagged-union AST inputs. They were dropped
|
||||
* — the kind tag bled through every consumer (lint runner, doctor
|
||||
* fixers, tests) since those code paths still needed to know the kind
|
||||
* to use the result. Per-kind verbs are honest about input/output.
|
||||
*
|
||||
* @module @openclaw/oc-path/dispatch
|
||||
*/
|
||||
|
||||
export type OcKind = 'md' | 'jsonc' | 'jsonl' | 'yaml';
|
||||
|
||||
/**
|
||||
* Recommend a kind from a filename. Pure convention helper — returns
|
||||
* the substrate's default mapping. Consumers can override.
|
||||
*/
|
||||
export function inferKind(filename: string): OcKind | null {
|
||||
const lower = filename.toLowerCase();
|
||||
if (lower.endsWith('.md')) {return 'md';}
|
||||
if (lower.endsWith('.jsonl') || lower.endsWith('.ndjson')) {return 'jsonl';}
|
||||
if (lower.endsWith('.jsonc') || lower.endsWith('.json')) {return 'jsonc';}
|
||||
if (lower.endsWith('.yaml') || lower.endsWith('.yml') || lower.endsWith('.lobster')) {
|
||||
return 'yaml';
|
||||
}
|
||||
return null;
|
||||
}
|
||||
153
src/oc-path/edit.ts
Normal file
153
src/oc-path/edit.ts
Normal file
@@ -0,0 +1,153 @@
|
||||
/**
|
||||
* Mutate a `MdAst` at an OcPath. Returns a new AST with the
|
||||
* value replaced; the original is unchanged.
|
||||
*
|
||||
* Writable surface:
|
||||
*
|
||||
* oc://FILE/[frontmatter]/key → frontmatter entry value
|
||||
* oc://FILE/section/item/field → item.kv.value (when item has kv shape)
|
||||
*
|
||||
* Section bodies, tables, and code blocks are NOT writable through
|
||||
* this primitive — they're prose, and a generic "set" doesn't compose
|
||||
* cleanly. Doctor fixers handle structural edits via dedicated verbs.
|
||||
*
|
||||
* @module @openclaw/oc-path/edit
|
||||
*/
|
||||
|
||||
import type { AstBlock, AstItem, FrontmatterEntry, MdAst } from './ast.js';
|
||||
import type { OcPath } from './oc-path.js';
|
||||
|
||||
export type MdEditResult =
|
||||
| { readonly ok: true; readonly ast: MdAst }
|
||||
| {
|
||||
readonly ok: false;
|
||||
readonly reason: 'unresolved' | 'not-writable' | 'no-item-kv';
|
||||
};
|
||||
|
||||
/**
|
||||
* Replace the value at `path` with `newValue`. The new AST has fresh
|
||||
* `raw` re-rendered from the structural fields.
|
||||
*/
|
||||
export function setMdOcPath(
|
||||
ast: MdAst,
|
||||
path: OcPath,
|
||||
newValue: string,
|
||||
): MdEditResult {
|
||||
// Frontmatter address: oc://FILE/[frontmatter]/<key>
|
||||
if (path.section === '[frontmatter]') {
|
||||
const key = path.item ?? path.field;
|
||||
if (key === undefined) {return { ok: false, reason: 'unresolved' };}
|
||||
const idx = ast.frontmatter.findIndex((e) => e.key === key);
|
||||
if (idx === -1) {return { ok: false, reason: 'unresolved' };}
|
||||
const existing = ast.frontmatter[idx];
|
||||
if (existing === undefined) {return { ok: false, reason: 'unresolved' };}
|
||||
const newEntry: FrontmatterEntry = { ...existing, value: newValue };
|
||||
const newFm = ast.frontmatter.slice();
|
||||
newFm[idx] = newEntry;
|
||||
return finalize({ ...ast, frontmatter: newFm });
|
||||
}
|
||||
|
||||
// Item-field address: oc://FILE/section/item/field
|
||||
if (
|
||||
path.section === undefined ||
|
||||
path.item === undefined ||
|
||||
path.field === undefined
|
||||
) {
|
||||
return { ok: false, reason: 'not-writable' };
|
||||
}
|
||||
|
||||
const sectionSlug = path.section.toLowerCase();
|
||||
const blockIdx = ast.blocks.findIndex((b) => b.slug === sectionSlug);
|
||||
if (blockIdx === -1) {return { ok: false, reason: 'unresolved' };}
|
||||
const block = ast.blocks[blockIdx];
|
||||
if (block === undefined) {return { ok: false, reason: 'unresolved' };}
|
||||
|
||||
const itemSlug = path.item.toLowerCase();
|
||||
const itemIdx = block.items.findIndex((i) => i.slug === itemSlug);
|
||||
if (itemIdx === -1) {return { ok: false, reason: 'unresolved' };}
|
||||
const item = block.items[itemIdx];
|
||||
if (item === undefined) {return { ok: false, reason: 'unresolved' };}
|
||||
if (item.kv === undefined) {return { ok: false, reason: 'no-item-kv' };}
|
||||
if (item.kv.key.toLowerCase() !== path.field.toLowerCase()) {
|
||||
return { ok: false, reason: 'unresolved' };
|
||||
}
|
||||
|
||||
const newItem: AstItem = {
|
||||
...item,
|
||||
kv: { key: item.kv.key, value: newValue },
|
||||
};
|
||||
const newItems = block.items.slice();
|
||||
newItems[itemIdx] = newItem;
|
||||
const newBlock: AstBlock = {
|
||||
...block,
|
||||
items: newItems,
|
||||
bodyText: rebuildBlockBody(block, newItems),
|
||||
};
|
||||
const newBlocks = ast.blocks.slice();
|
||||
newBlocks[blockIdx] = newBlock;
|
||||
return finalize({ ...ast, blocks: newBlocks });
|
||||
}
|
||||
|
||||
/**
|
||||
* Rebuild block.bodyText so emit-roundtrip mode reflects the edit. We
|
||||
* do a minimal in-place substitution on the existing bodyText: find
|
||||
* each `- key: value` line for a touched item and rewrite the value.
|
||||
*
|
||||
* For items without a matching bullet line, we leave bodyText alone
|
||||
* (the structural fields take precedence in render mode anyway).
|
||||
*/
|
||||
function rebuildBlockBody(block: AstBlock, newItems: readonly AstItem[]): string {
|
||||
let body = block.bodyText;
|
||||
for (let i = 0; i < newItems.length; i++) {
|
||||
const newItem = newItems[i];
|
||||
const oldItem = block.items[i];
|
||||
if (newItem === undefined || oldItem === undefined) {continue;}
|
||||
if (newItem.kv === undefined || oldItem.kv === undefined) {continue;}
|
||||
if (newItem.kv.value === oldItem.kv.value) {continue;}
|
||||
const re = new RegExp(
|
||||
`^(\\s*-\\s*${escapeRegex(oldItem.kv.key)}\\s*:\\s*).*$`,
|
||||
'm',
|
||||
);
|
||||
body = body.replace(re, `$1${newItem.kv.value}`);
|
||||
}
|
||||
return body;
|
||||
}
|
||||
|
||||
function escapeRegex(s: string): string {
|
||||
return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
}
|
||||
|
||||
/**
|
||||
* Re-render `ast.raw` from the (possibly mutated) tree using the same
|
||||
* shape the round-trip emitter expects.
|
||||
*/
|
||||
function finalize(ast: MdAst): MdEditResult {
|
||||
const parts: string[] = [];
|
||||
if (ast.frontmatter.length > 0) {
|
||||
parts.push('---');
|
||||
for (const fm of ast.frontmatter) {
|
||||
parts.push(`${fm.key}: ${formatFrontmatterValue(fm.value)}`);
|
||||
}
|
||||
parts.push('---');
|
||||
}
|
||||
if (ast.preamble.length > 0) {
|
||||
if (parts.length > 0) {parts.push('');}
|
||||
parts.push(ast.preamble);
|
||||
}
|
||||
for (const block of ast.blocks) {
|
||||
if (parts.length > 0) {parts.push('');}
|
||||
parts.push(`## ${block.heading}`);
|
||||
if (block.bodyText.length > 0) {parts.push(block.bodyText);}
|
||||
}
|
||||
const raw = parts.join('\n');
|
||||
return { ok: true, ast: { ...ast, raw } };
|
||||
}
|
||||
|
||||
function formatFrontmatterValue(value: string): string {
|
||||
if (value.length === 0) {return '""';}
|
||||
if (/[:#&*?|<>=!%@`,[\]{}\r\n]/.test(value)) {
|
||||
return JSON.stringify(value);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
137
src/oc-path/emit.ts
Normal file
137
src/oc-path/emit.ts
Normal file
@@ -0,0 +1,137 @@
|
||||
/**
|
||||
* Emit an AST back to bytes.
|
||||
*
|
||||
* **Two modes**:
|
||||
*
|
||||
* 1. **Round-trip** — the AST hasn't been mutated since `parseMd`
|
||||
* produced it. Returns `ast.raw` verbatim. Byte-identical.
|
||||
*
|
||||
* 2. **Mutation-aware** — the AST has been modified (frontmatter
|
||||
* entry edited, item kv.value changed, block reordered). Returns
|
||||
* a freshly-rendered representation. **Not** byte-identical to a
|
||||
* hypothetical "perfect" rewrite — we render canonical forms
|
||||
* (LF endings, single space after `:` in frontmatter, etc.).
|
||||
* Callers needing byte-fidelity for partial edits should patch
|
||||
* `raw` directly instead of mutating the AST.
|
||||
*
|
||||
* In both modes, every emitted leaf flows through `guardSentinel` so a
|
||||
* `__OPENCLAW_REDACTED__` literal anywhere in the output throws
|
||||
* `OcEmitSentinelError`. This is the substrate guard: callers can't
|
||||
* accidentally write a redacted view to disk through this emitter.
|
||||
*
|
||||
* @module @openclaw/oc-path/emit
|
||||
*/
|
||||
|
||||
import type { FrontmatterEntry, MdAst } from './ast.js';
|
||||
import { guardSentinel } from './sentinel.js';
|
||||
|
||||
/**
|
||||
* Emit options. `mode: 'roundtrip'` (default) returns `ast.raw` if
|
||||
* present and not flagged as dirty; `mode: 'render'` always
|
||||
* re-renders.
|
||||
*/
|
||||
export interface EmitOptions {
|
||||
readonly mode?: 'roundtrip' | 'render';
|
||||
/**
|
||||
* When provided, the emitter walks every emitted leaf string through
|
||||
* `guardSentinel(value, ocPath)`. Default uses the file name
|
||||
* (`oc://<file>`) when the field-precise path can't be determined.
|
||||
* Callers that want richer error context can supply `ocPathFor` to
|
||||
* compute a path per leaf.
|
||||
*/
|
||||
readonly fileNameForGuard?: string;
|
||||
/**
|
||||
* See `JsoncEmitOptions.acceptPreExistingSentinel` for the rationale.
|
||||
* Default `true` — round-trip echoes parsed bytes without scanning
|
||||
* for the sentinel. Render mode scans every leaf regardless.
|
||||
*/
|
||||
readonly acceptPreExistingSentinel?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Emit the AST. In render mode, throws `OcEmitSentinelError` if any
|
||||
* leaf string matches `REDACTED_SENTINEL`. In round-trip mode, echoes
|
||||
* `ast.raw` verbatim (does not scan unless caller opts in via
|
||||
* `acceptPreExistingSentinel: false`).
|
||||
*/
|
||||
export function emitMd(ast: MdAst, opts: EmitOptions = {}): string {
|
||||
const mode = opts.mode ?? 'roundtrip';
|
||||
const guardPath = opts.fileNameForGuard ? `oc://${opts.fileNameForGuard}` : 'oc://';
|
||||
const acceptPreExisting = opts.acceptPreExistingSentinel ?? true;
|
||||
|
||||
if (mode === 'roundtrip') {
|
||||
// Round-trip trusts parsed bytes — see emit-policy comment in
|
||||
// jsonc/emit.ts. A markdown file legitimately containing the
|
||||
// sentinel literal (in a code block, in a pasted error log) would
|
||||
// otherwise become a workspace-wide emit DoS.
|
||||
if (!acceptPreExisting && ast.raw.includes('__OPENCLAW_REDACTED__')) {
|
||||
guardSentinel('__OPENCLAW_REDACTED__', `${guardPath}/[raw]`);
|
||||
}
|
||||
return ast.raw;
|
||||
}
|
||||
|
||||
// Render mode: rebuild from structural fields. This loses
|
||||
// formatting details (extra blank lines, custom whitespace, etc.)
|
||||
// but is correct.
|
||||
const parts: string[] = [];
|
||||
|
||||
if (ast.frontmatter.length > 0) {
|
||||
parts.push('---');
|
||||
for (const fm of ast.frontmatter) {
|
||||
guardSentinel(fm.value, `${guardPath}/[frontmatter]/${fm.key}`);
|
||||
parts.push(`${fm.key}: ${formatFrontmatterValue(fm.value)}`);
|
||||
}
|
||||
parts.push('---');
|
||||
}
|
||||
|
||||
if (ast.preamble.length > 0) {
|
||||
guardSentinel(ast.preamble, `${guardPath}/[preamble]`);
|
||||
if (parts.length > 0) {parts.push('');}
|
||||
parts.push(ast.preamble);
|
||||
}
|
||||
|
||||
for (const block of ast.blocks) {
|
||||
if (parts.length > 0) {parts.push('');}
|
||||
parts.push(`## ${block.heading}`);
|
||||
if (block.bodyText.length > 0) {
|
||||
// Walk items + frontmatter-key value strings for sentinels;
|
||||
// body text is also walked as one big string in case of any raw
|
||||
// sentinel.
|
||||
guardSentinel(block.bodyText, `${guardPath}/${block.slug}/[body]`);
|
||||
for (const item of block.items) {
|
||||
if (item.kv) {
|
||||
guardSentinel(item.kv.value, `${guardPath}/${block.slug}/${item.slug}/${item.kv.key}`);
|
||||
}
|
||||
}
|
||||
parts.push(block.bodyText);
|
||||
}
|
||||
}
|
||||
|
||||
return parts.join('\n');
|
||||
}
|
||||
|
||||
function formatFrontmatterValue(value: string): string {
|
||||
// Quote values containing characters that would confuse a YAML
|
||||
// parser; otherwise emit bare.
|
||||
if (value.length === 0) {return '""';}
|
||||
if (/[:#&*?|<>=!%@`,[\]{}\r\n]/.test(value)) {
|
||||
return JSON.stringify(value);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark an AST as "dirty" — useful for callers that mutate the AST
|
||||
* structurally and want emitMd() to re-render rather than round-trip.
|
||||
*
|
||||
* Currently a no-op flag — emitMd() decides based on `opts.mode`. Kept
|
||||
* as an extension point for a future invariant where the AST tracks
|
||||
* its own dirty state.
|
||||
*/
|
||||
export function markDirty(_ast: MdAst): void {
|
||||
// intentionally empty
|
||||
}
|
||||
|
||||
// Re-export the frontmatter type for convenience so tests don't need
|
||||
// to import from ast.ts.
|
||||
export type { FrontmatterEntry };
|
||||
852
src/oc-path/find.ts
Normal file
852
src/oc-path/find.ts
Normal file
@@ -0,0 +1,852 @@
|
||||
/**
|
||||
* `findOcPaths` — universal multi-match verb. Pattern syntax extends
|
||||
* `OcPath` with two wildcard tokens:
|
||||
*
|
||||
* `*` — match a single sub-segment (one map key / one array index)
|
||||
* `**` — match zero or more sub-segments at any depth (recursive)
|
||||
*
|
||||
* **Why a separate verb**: `resolveOcPath` and `setOcPath` are
|
||||
* single-match — they require an exact path because they return one
|
||||
* value or write one leaf. A pattern would be ambiguous. `findOcPaths`
|
||||
* is the search verb: pass a pattern, get every concrete OcPath that
|
||||
* matches plus its `OcMatch` (kind + leaf text / node descriptor).
|
||||
*
|
||||
* Every returned `OcPathMatch` carries a concrete (wildcard-free)
|
||||
* `OcPath`, so callers can pipe results through `setOcPath` or
|
||||
* `resolveOcPath` without rebuilding the path. The slot shape of the
|
||||
* input pattern is preserved (a `*` in the `item` slot produces a
|
||||
* concrete path with the matched value still in `item`).
|
||||
*
|
||||
* **Use cases driving v0**:
|
||||
* - lint rules iterating `oc://workflow.lobster/steps/* /command`
|
||||
* - jsonl session walks `oc://session/* /eventType`
|
||||
* - md frontmatter sweeps `oc://SOUL.md/[frontmatter]/*`
|
||||
*
|
||||
* @module @openclaw/oc-path/find
|
||||
*/
|
||||
|
||||
import { isMap, isScalar, isSeq, type Node, type Pair } from 'yaml';
|
||||
import type { JsoncValue } from './jsonc/ast.js';
|
||||
import type { JsonlAst, JsonlLine } from './jsonl/ast.js';
|
||||
import type { MdAst } from './ast.js';
|
||||
import type { OcPath } from './oc-path.js';
|
||||
import {
|
||||
MAX_TRAVERSAL_DEPTH,
|
||||
OcPathError,
|
||||
WILDCARD_RECURSIVE,
|
||||
WILDCARD_SINGLE,
|
||||
evaluatePredicate,
|
||||
isOrdinalSeg,
|
||||
isPositionalSeg,
|
||||
isPredicateSeg,
|
||||
isQuotedSeg,
|
||||
isUnionSeg,
|
||||
parseOrdinalSeg,
|
||||
parsePredicateSeg,
|
||||
parseUnionSeg,
|
||||
quoteSeg,
|
||||
resolvePositionalSeg,
|
||||
splitRespectingBrackets,
|
||||
unquoteSeg,
|
||||
} from './oc-path.js';
|
||||
import type { PredicateSpec } from './oc-path.js';
|
||||
import type { OcAst, OcMatch } from './universal.js';
|
||||
import { resolveOcPath } from './universal.js';
|
||||
|
||||
// ---------- Public types ---------------------------------------------------
|
||||
|
||||
/** A find result: a concrete (wildcard-free) path plus its match info. */
|
||||
export interface OcPathMatch {
|
||||
readonly path: OcPath;
|
||||
readonly match: OcMatch;
|
||||
}
|
||||
|
||||
/**
|
||||
* The slot a sub-segment came from in the input pattern. Walker outputs
|
||||
* carry slot tags so re-packing into `OcPath` preserves the pattern's
|
||||
* shape (a `*` in the `item` slot produces a path with the matched
|
||||
* value in `item`, not joined into `section`).
|
||||
*/
|
||||
type Slot = 'section' | 'item' | 'field';
|
||||
interface SlotSub {
|
||||
readonly slot: Slot;
|
||||
readonly value: string;
|
||||
}
|
||||
|
||||
/** A single tagged sub-segment of the pattern (post dot-split). */
|
||||
interface PatternSub {
|
||||
readonly slot: Slot;
|
||||
readonly value: string;
|
||||
}
|
||||
|
||||
// ---------- Public verb ----------------------------------------------------
|
||||
|
||||
/**
|
||||
* Match `pattern` against `ast` and return every concrete OcPath that
|
||||
* resolves. Empty array when nothing matches.
|
||||
*
|
||||
* Pattern semantics: same shape as `OcPath`, but any sub-segment may be
|
||||
* `*` (single-segment wildcard) or `**` (recursive descent). A pattern
|
||||
* with no wildcards is equivalent to a single `resolveOcPath` call,
|
||||
* wrapped into the find shape.
|
||||
*
|
||||
* **Insertion-marker patterns are not supported**: a `+`/`+key`/`+nnn`
|
||||
* suffix is meaningless in find context (you don't search for a place
|
||||
* to insert). Such patterns return an empty array.
|
||||
*/
|
||||
export function findOcPaths(ast: OcAst, pattern: OcPath): readonly OcPathMatch[] {
|
||||
const subs = patternSubs(pattern);
|
||||
// Fast-path: no expansion needed — pure literals just resolve.
|
||||
// Anything that can yield 0+ matches (wildcard, positional, union,
|
||||
// predicate) flows through the walker.
|
||||
const needsExpansion = subs.some(
|
||||
(s) =>
|
||||
s.value === WILDCARD_SINGLE ||
|
||||
s.value === WILDCARD_RECURSIVE ||
|
||||
isPositionalSeg(s.value) ||
|
||||
isUnionSeg(s.value) ||
|
||||
isPredicateSeg(s.value),
|
||||
);
|
||||
if (!needsExpansion) {
|
||||
const m = resolveOcPath(ast, pattern);
|
||||
return m === null ? [] : [{ path: pattern, match: m }];
|
||||
}
|
||||
const concretePaths = expand(ast, subs, pattern);
|
||||
|
||||
const out: OcPathMatch[] = [];
|
||||
for (const concrete of concretePaths) {
|
||||
const m = resolveOcPath(ast, concrete);
|
||||
if (m !== null) {out.push({ path: concrete, match: m });}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
// ---------- Pattern unpacking ---------------------------------------------
|
||||
|
||||
function patternSubs(pattern: OcPath): readonly PatternSub[] {
|
||||
const out: PatternSub[] = [];
|
||||
// Bracket-aware split so dots inside `[k=1.0]` or `{a.b,c}` aren't
|
||||
// treated as sub-segment delimiters (P-012/P-013).
|
||||
if (pattern.section !== undefined) {
|
||||
for (const v of splitRespectingBrackets(pattern.section, '.')) {out.push({ slot: 'section', value: v });}
|
||||
}
|
||||
if (pattern.item !== undefined) {
|
||||
for (const v of splitRespectingBrackets(pattern.item, '.')) {out.push({ slot: 'item', value: v });}
|
||||
}
|
||||
if (pattern.field !== undefined) {
|
||||
for (const v of splitRespectingBrackets(pattern.field, '.')) {out.push({ slot: 'field', value: v });}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function repackSlotSubs(pattern: OcPath, slotSubs: readonly SlotSub[]): OcPath {
|
||||
const sectionSubs: string[] = [];
|
||||
const itemSubs: string[] = [];
|
||||
const fieldSubs: string[] = [];
|
||||
for (const s of slotSubs) {
|
||||
if (s.slot === 'section') {sectionSubs.push(s.value);}
|
||||
else if (s.slot === 'item') {itemSubs.push(s.value);}
|
||||
else {fieldSubs.push(s.value);}
|
||||
}
|
||||
return {
|
||||
file: pattern.file,
|
||||
...(sectionSubs.length > 0 ? { section: sectionSubs.join('.') } : {}),
|
||||
...(itemSubs.length > 0 ? { item: itemSubs.join('.') } : {}),
|
||||
...(fieldSubs.length > 0 ? { field: fieldSubs.join('.') } : {}),
|
||||
...(pattern.session !== undefined ? { session: pattern.session } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
// ---------- Per-kind dispatch ---------------------------------------------
|
||||
|
||||
function expand(ast: OcAst, subs: readonly PatternSub[], pattern: OcPath): readonly OcPath[] {
|
||||
const concretePaths: OcPath[] = [];
|
||||
// Walker enumerates concrete sub-segments by walking the AST against
|
||||
// `subs`, emitting one slot-tagged-sub list per leaf. Each list is
|
||||
// re-packed into an OcPath preserving the pattern's slot shape.
|
||||
const onMatch = (slotSubs: readonly SlotSub[]): void => {
|
||||
concretePaths.push(repackSlotSubs(pattern, slotSubs));
|
||||
};
|
||||
switch (ast.kind) {
|
||||
case 'yaml':
|
||||
walkYaml(ast.doc.contents as Node | null, subs, 0, [], onMatch);
|
||||
break;
|
||||
case 'jsonc':
|
||||
if (ast.root !== null) {walkJsonc(ast.root, subs, 0, [], onMatch);}
|
||||
break;
|
||||
case 'jsonl':
|
||||
walkJsonl(ast, subs, 0, [], onMatch);
|
||||
break;
|
||||
case 'md':
|
||||
walkMd(ast, subs, 0, [], onMatch);
|
||||
break;
|
||||
}
|
||||
return concretePaths;
|
||||
}
|
||||
|
||||
// ---------- YAML walker ----------------------------------------------------
|
||||
|
||||
function walkYaml(
|
||||
node: Node | null,
|
||||
subs: readonly PatternSub[],
|
||||
i: number,
|
||||
walked: readonly SlotSub[],
|
||||
onMatch: (subs: readonly SlotSub[]) => void,
|
||||
): void {
|
||||
// P-031 / P-033 (substrate pitfall taxonomy — see
|
||||
// `oc-paths-substrate/PITFALLS.md`) — depth cap kills runaway
|
||||
// recursion from `**` over deeply nested ASTs and from yaml-anchor
|
||||
// cycles (a cycle just makes recursion unbounded). Cap is liberal
|
||||
// (256) — real workspaces top out around 50 — and covers both
|
||||
// pitfalls with one defense.
|
||||
if (walked.length > MAX_TRAVERSAL_DEPTH) {
|
||||
throw new OcPathError(
|
||||
`findOcPaths exceeded MAX_TRAVERSAL_DEPTH (${MAX_TRAVERSAL_DEPTH}) — likely a cycle or pathological pattern`,
|
||||
'',
|
||||
'OC_PATH_DEPTH_EXCEEDED',
|
||||
);
|
||||
}
|
||||
// Out of pattern → emit at whatever node we landed on.
|
||||
if (i >= subs.length) {
|
||||
onMatch(walked);
|
||||
return;
|
||||
}
|
||||
if (node === null) {return;}
|
||||
let cur = subs[i];
|
||||
|
||||
// Union `{a,b,c}` — fan out into one walk per alternative. Each
|
||||
// alternative replaces `cur.value` with the chosen literal.
|
||||
if (isUnionSeg(cur.value)) {
|
||||
const alts = parseUnionSeg(cur.value);
|
||||
if (alts === null) {return;}
|
||||
for (const alt of alts) {
|
||||
const altSubs = subs.slice();
|
||||
altSubs[i] = { slot: cur.slot, value: alt };
|
||||
walkYaml(node, altSubs, i, walked, onMatch);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Predicate `[key<op>value]` — like wildcard, but emit only children
|
||||
// whose `key` field matches the predicate.
|
||||
if (isPredicateSeg(cur.value)) {
|
||||
const pred = parsePredicateSeg(cur.value);
|
||||
if (pred === null) {return;}
|
||||
if (isMap(node)) {
|
||||
for (const pair of (node as { items: Pair[] }).items) {
|
||||
const k = isScalar(pair.key) ? String(pair.key.value) : String(pair.key);
|
||||
const childVal = pair.value as Node;
|
||||
if (yamlChildMatchesPredicate(childVal, pred)) {
|
||||
walkYaml(childVal, subs, i + 1, [...walked, { slot: cur.slot, value: quoteSeg(k) }], onMatch);
|
||||
}
|
||||
}
|
||||
} else if (isSeq(node)) {
|
||||
(node as { items: Node[] }).items.forEach((child, idx) => {
|
||||
if (yamlChildMatchesPredicate(child, pred)) {
|
||||
walkYaml(child, subs, i + 1, [...walked, { slot: cur.slot, value: String(idx) }], onMatch);
|
||||
}
|
||||
});
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Positional tokens (`$first` / `$last` / `-N`) → resolve to a
|
||||
// single concrete segment and descend as if the pattern had carried
|
||||
// that literal. Walker then continues with the concrete value, so
|
||||
// emitted paths carry the resolved index/key.
|
||||
if (isPositionalSeg(cur.value)) {
|
||||
const concrete = positionalForYamlNode(node, cur.value);
|
||||
if (concrete === null) {return;}
|
||||
cur = { slot: cur.slot, value: concrete };
|
||||
}
|
||||
|
||||
// `**` — match 0 or more segments.
|
||||
if (cur.value === WILDCARD_RECURSIVE) {
|
||||
// 0-match: skip past `**`, retry pattern at this node.
|
||||
walkYaml(node, subs, i + 1, walked, onMatch);
|
||||
// 1+ match: descend one step, stay on this `**` slot.
|
||||
if (isMap(node)) {
|
||||
for (const pair of (node as { items: Pair[] }).items) {
|
||||
const k = isScalar(pair.key) ? String(pair.key.value) : String(pair.key);
|
||||
walkYaml(pair.value as Node, subs, i, [...walked, { slot: cur.slot, value: quoteSeg(k) }], onMatch);
|
||||
}
|
||||
} else if (isSeq(node)) {
|
||||
(node as { items: Node[] }).items.forEach((child, idx) => {
|
||||
walkYaml(child, subs, i, [...walked, { slot: cur.slot, value: String(idx) }], onMatch);
|
||||
});
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// `*` — match exactly one segment.
|
||||
if (cur.value === WILDCARD_SINGLE) {
|
||||
if (isMap(node)) {
|
||||
for (const pair of (node as { items: Pair[] }).items) {
|
||||
const k = isScalar(pair.key) ? String(pair.key.value) : String(pair.key);
|
||||
walkYaml(pair.value as Node, subs, i + 1, [...walked, { slot: cur.slot, value: quoteSeg(k) }], onMatch);
|
||||
}
|
||||
} else if (isSeq(node)) {
|
||||
(node as { items: Node[] }).items.forEach((child, idx) => {
|
||||
walkYaml(child, subs, i + 1, [...walked, { slot: cur.slot, value: String(idx) }], onMatch);
|
||||
});
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Literal — descend exactly into the matching key/index.
|
||||
// Literal lookup — quoted segments unwrap to their literal key form.
|
||||
const literal = isQuotedSeg(cur.value) ? unquoteSeg(cur.value) : cur.value;
|
||||
if (isMap(node)) {
|
||||
const pair = (node as { items: Pair[] }).items.find((p) => {
|
||||
const k = isScalar(p.key) ? String(p.key.value) : String(p.key);
|
||||
return k === literal;
|
||||
});
|
||||
if (pair === undefined) {return;}
|
||||
walkYaml(
|
||||
pair.value as Node,
|
||||
subs,
|
||||
i + 1,
|
||||
[...walked, { slot: cur.slot, value: cur.value }],
|
||||
onMatch,
|
||||
);
|
||||
return;
|
||||
}
|
||||
if (isSeq(node)) {
|
||||
const idx = Number(literal);
|
||||
if (!Number.isInteger(idx) || idx < 0 || idx >= (node as { items: Node[] }).items.length) {return;}
|
||||
walkYaml(
|
||||
(node as { items: Node[] }).items[idx],
|
||||
subs,
|
||||
i + 1,
|
||||
[...walked, { slot: cur.slot, value: cur.value }],
|
||||
onMatch,
|
||||
);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- JSONC walker ---------------------------------------------------
|
||||
|
||||
function walkJsonc(
|
||||
node: JsoncValue,
|
||||
subs: readonly PatternSub[],
|
||||
i: number,
|
||||
walked: readonly SlotSub[],
|
||||
onMatch: (subs: readonly SlotSub[]) => void,
|
||||
): void {
|
||||
if (walked.length > MAX_TRAVERSAL_DEPTH) {
|
||||
throw new OcPathError(
|
||||
`findOcPaths exceeded MAX_TRAVERSAL_DEPTH (${MAX_TRAVERSAL_DEPTH}) — likely a pathological pattern`,
|
||||
'',
|
||||
'OC_PATH_DEPTH_EXCEEDED',
|
||||
);
|
||||
}
|
||||
if (i >= subs.length) {
|
||||
onMatch(walked);
|
||||
return;
|
||||
}
|
||||
let cur = subs[i];
|
||||
|
||||
if (isUnionSeg(cur.value)) {
|
||||
const alts = parseUnionSeg(cur.value);
|
||||
if (alts === null) {return;}
|
||||
for (const alt of alts) {
|
||||
const altSubs = subs.slice();
|
||||
altSubs[i] = { slot: cur.slot, value: alt };
|
||||
walkJsonc(node, altSubs, i, walked, onMatch);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (isPredicateSeg(cur.value)) {
|
||||
const pred = parsePredicateSeg(cur.value);
|
||||
if (pred === null) {return;}
|
||||
if (node.kind === 'object') {
|
||||
for (const e of node.entries) {
|
||||
if (jsoncChildMatchesPredicate(e.value, pred)) {
|
||||
walkJsonc(e.value, subs, i + 1, [...walked, { slot: cur.slot, value: quoteSeg(e.key) }], onMatch);
|
||||
}
|
||||
}
|
||||
} else if (node.kind === 'array') {
|
||||
node.items.forEach((child, idx) => {
|
||||
if (jsoncChildMatchesPredicate(child, pred)) {
|
||||
walkJsonc(child, subs, i + 1, [...walked, { slot: cur.slot, value: String(idx) }], onMatch);
|
||||
}
|
||||
});
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (isPositionalSeg(cur.value)) {
|
||||
const concrete = positionalForJsoncNode(node, cur.value);
|
||||
if (concrete === null) {return;}
|
||||
cur = { slot: cur.slot, value: concrete };
|
||||
}
|
||||
|
||||
if (cur.value === WILDCARD_RECURSIVE) {
|
||||
walkJsonc(node, subs, i + 1, walked, onMatch);
|
||||
if (node.kind === 'object') {
|
||||
for (const e of node.entries) {
|
||||
walkJsonc(e.value, subs, i, [...walked, { slot: cur.slot, value: quoteSeg(e.key) }], onMatch);
|
||||
}
|
||||
} else if (node.kind === 'array') {
|
||||
node.items.forEach((child, idx) => {
|
||||
walkJsonc(child, subs, i, [...walked, { slot: cur.slot, value: String(idx) }], onMatch);
|
||||
});
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (cur.value === WILDCARD_SINGLE) {
|
||||
if (node.kind === 'object') {
|
||||
for (const e of node.entries) {
|
||||
walkJsonc(e.value, subs, i + 1, [...walked, { slot: cur.slot, value: quoteSeg(e.key) }], onMatch);
|
||||
}
|
||||
} else if (node.kind === 'array') {
|
||||
node.items.forEach((child, idx) => {
|
||||
walkJsonc(child, subs, i + 1, [...walked, { slot: cur.slot, value: String(idx) }], onMatch);
|
||||
});
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (node.kind === 'object') {
|
||||
// `cur.value` may be a quoted segment (e.g. `"a/b"`); AST entry
|
||||
// keys are already unquoted. Strip the quotes before comparing
|
||||
// so the find-expansion walker matches `resolveJsoncOcPath`'s
|
||||
// unquoting behavior — closes the resolve-vs-find asymmetry
|
||||
// flagged on PR #78678.
|
||||
const lookupKey = isQuotedSeg(cur.value) ? unquoteSeg(cur.value) : cur.value;
|
||||
const e = node.entries.find((entry) => entry.key === lookupKey);
|
||||
if (e === undefined) {return;}
|
||||
walkJsonc(e.value, subs, i + 1, [...walked, { slot: cur.slot, value: cur.value }], onMatch);
|
||||
return;
|
||||
}
|
||||
if (node.kind === 'array') {
|
||||
const idx = Number(cur.value);
|
||||
if (!Number.isInteger(idx) || idx < 0 || idx >= node.items.length) {return;}
|
||||
walkJsonc(node.items[idx], subs, i + 1, [...walked, { slot: cur.slot, value: cur.value }], onMatch);
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- JSONL walker ---------------------------------------------------
|
||||
|
||||
function walkJsonl(
|
||||
ast: JsonlAst,
|
||||
subs: readonly PatternSub[],
|
||||
i: number,
|
||||
walked: readonly SlotSub[],
|
||||
onMatch: (subs: readonly SlotSub[]) => void,
|
||||
): void {
|
||||
// Bound recursion at the line-enumeration layer — without this guard,
|
||||
// a `**` pattern over a 100k-line forensic log dispatches per-line
|
||||
// walkJsonc (which has its own guard) but the JSONL outer driver has
|
||||
// no per-walker depth bound. JSONL session logs are exactly the kind
|
||||
// of file that grows unbounded in production (replay, audit), so
|
||||
// defense-in-depth at the outer layer mirrors the yaml/jsonc walkers.
|
||||
if (walked.length > MAX_TRAVERSAL_DEPTH) {
|
||||
throw new OcPathError(
|
||||
`findOcPaths exceeded MAX_TRAVERSAL_DEPTH (${MAX_TRAVERSAL_DEPTH}) — likely a pathological JSONL pattern`,
|
||||
'',
|
||||
'OC_PATH_DEPTH_EXCEEDED',
|
||||
);
|
||||
}
|
||||
if (i >= subs.length) {
|
||||
onMatch(walked);
|
||||
return;
|
||||
}
|
||||
const cur = subs[i];
|
||||
|
||||
// Line-address slot — `*` enumerates every value line; `**` adds a
|
||||
// 0-segment skip in addition to enumerating; literal matches `Lnnn`
|
||||
// / `$first` / `$last` / `-N` (negative index); union matches each
|
||||
// alternative; predicate filters by per-line top-level field.
|
||||
// The first sub MUST address a line; deeper subs walk inside the
|
||||
// line's JSON value.
|
||||
if (walked.length === 0) {
|
||||
if (cur.value === WILDCARD_RECURSIVE) {
|
||||
// 0-match has no meaning for jsonl (the file root has no leaves);
|
||||
// every remaining match must include a line. So skip the 0-match
|
||||
// expansion and only enumerate.
|
||||
forEachValueLine(ast, (l, addr) => {
|
||||
walkJsonlInsideLine(l, subs, i, [{ slot: cur.slot, value: addr }], onMatch);
|
||||
});
|
||||
return;
|
||||
}
|
||||
if (cur.value === WILDCARD_SINGLE) {
|
||||
forEachValueLine(ast, (l, addr) => {
|
||||
walkJsonlInsideLine(l, subs, i + 1, [{ slot: cur.slot, value: addr }], onMatch);
|
||||
});
|
||||
return;
|
||||
}
|
||||
if (isUnionSeg(cur.value)) {
|
||||
// `{L1,L2}` enumerates each alternative independently — yaml /
|
||||
// jsonc walkers handle union uniformly at every slot, so the
|
||||
// jsonl line slot must too. Each alternative goes through the
|
||||
// same single-line resolution as a literal `Lnnn` / `$first` /
|
||||
// `-N` would (so unions of positional tokens, e.g. `{L1,$last}`,
|
||||
// work as expected).
|
||||
const alts = parseUnionSeg(cur.value);
|
||||
if (alts === null) {return;}
|
||||
for (const alt of alts) {
|
||||
const line = pickLine(ast, alt);
|
||||
if (line === null) {continue;}
|
||||
const concreteAddr = line.kind === 'value' ? `L${line.line}` : alt;
|
||||
walkJsonlInsideLine(line, subs, i + 1, [{ slot: cur.slot, value: concreteAddr }], onMatch);
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (isPredicateSeg(cur.value)) {
|
||||
// `[event=foo]` filters value lines by the predicate's key/op
|
||||
// applied to the top-level field of each line's parsed JSON.
|
||||
// Parsing is structural (no recursion into nested children) —
|
||||
// a predicate inside a line's body uses the same syntax inside
|
||||
// the JSONC walker's predicate path.
|
||||
const pred = parsePredicateSeg(cur.value);
|
||||
if (pred === null) {return;}
|
||||
forEachValueLine(ast, (l, addr) => {
|
||||
if (l.kind !== 'value') {return;}
|
||||
const actual = topLevelLeafText(l.value, pred.key);
|
||||
if (!evaluatePredicate(actual, pred)) {return;}
|
||||
walkJsonlInsideLine(l, subs, i + 1, [{ slot: cur.slot, value: addr }], onMatch);
|
||||
});
|
||||
return;
|
||||
}
|
||||
// Positional / Lnnn / literal — pickLine handles all single-line
|
||||
// addressing tokens. The emitted concrete address is `Lnnn` (the
|
||||
// canonical line-address form) regardless of how it was looked up.
|
||||
const line = pickLine(ast, cur.value);
|
||||
if (line === null) {return;}
|
||||
const concreteAddr = line.kind === 'value' ? `L${line.line}` : cur.value;
|
||||
walkJsonlInsideLine(line, subs, i + 1, [{ slot: cur.slot, value: concreteAddr }], onMatch);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Stringify the top-level field's leaf value for predicate evaluation
|
||||
* at the jsonl line slot. Only string/number/boolean/null leaves
|
||||
* compare; nested objects/arrays return `null` (predicate doesn't
|
||||
* match a non-leaf sibling).
|
||||
*/
|
||||
function topLevelLeafText(value: JsoncValue, key: string): string | null {
|
||||
if (value.kind !== 'object') {return null;}
|
||||
const entry = value.entries.find((e) => e.key === key);
|
||||
if (entry === undefined) {return null;}
|
||||
const v = entry.value;
|
||||
if (v.kind === 'string') {return v.value;}
|
||||
if (v.kind === 'number' || v.kind === 'boolean') {return String(v.value);}
|
||||
if (v.kind === 'null') {return null;}
|
||||
return null;
|
||||
}
|
||||
|
||||
function walkJsonlInsideLine(
|
||||
line: JsonlLine,
|
||||
subs: readonly PatternSub[],
|
||||
i: number,
|
||||
walked: readonly SlotSub[],
|
||||
onMatch: (subs: readonly SlotSub[]) => void,
|
||||
): void {
|
||||
// Mirror the outer guard so a hostile pattern that bypasses the
|
||||
// top-of-walkJsonl path (e.g., reached via direct call from a future
|
||||
// helper) still lands on the depth bound. walkJsonc inside has its
|
||||
// own bound, but the slot-sub list extends across both layers — the
|
||||
// depth check must consider the full `walked` history.
|
||||
if (walked.length > MAX_TRAVERSAL_DEPTH) {
|
||||
throw new OcPathError(
|
||||
`findOcPaths exceeded MAX_TRAVERSAL_DEPTH (${MAX_TRAVERSAL_DEPTH}) — likely a pathological JSONL pattern`,
|
||||
'',
|
||||
'OC_PATH_DEPTH_EXCEEDED',
|
||||
);
|
||||
}
|
||||
if (i >= subs.length) {
|
||||
onMatch(walked);
|
||||
return;
|
||||
}
|
||||
if (line.kind !== 'value') {return;}
|
||||
walkJsonc(line.value, subs, i, walked, onMatch);
|
||||
}
|
||||
|
||||
function forEachValueLine(
|
||||
ast: JsonlAst,
|
||||
visit: (line: JsonlLine, addr: string) => void,
|
||||
): void {
|
||||
for (const l of ast.lines) {
|
||||
if (l.kind === 'value') {visit(l, `L${l.line}`);}
|
||||
}
|
||||
}
|
||||
|
||||
function pickLine(ast: JsonlAst, addr: string): JsonlLine | null {
|
||||
if (addr === '$last') {
|
||||
for (let i = ast.lines.length - 1; i >= 0; i--) {
|
||||
const l = ast.lines[i];
|
||||
if (l !== undefined && l.kind === 'value') {return l;}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
if (addr === '$first') {
|
||||
for (const l of ast.lines) {
|
||||
if (l.kind === 'value') {return l;}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
if (/^-\d+$/.test(addr)) {
|
||||
const valueLines = ast.lines.filter((l): l is Extract<JsonlLine, { kind: 'value' }> => l.kind === 'value');
|
||||
const n = valueLines.length + Number(addr);
|
||||
return n >= 0 && n < valueLines.length ? valueLines[n] : null;
|
||||
}
|
||||
const m = /^L(\d+)$/.exec(addr);
|
||||
if (m === null || m[1] === undefined) {return null;}
|
||||
const target = Number(m[1]);
|
||||
for (const l of ast.lines) {
|
||||
if (l.line === target) {return l;}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// Helpers shared by the walkers above.
|
||||
function positionalForYamlNode(node: Node, seg: string): string | null {
|
||||
if (isMap(node)) {
|
||||
const pairs = (node as { items: Pair[] }).items;
|
||||
const keys = pairs.map((p) => String(isScalar(p.key) ? p.key.value : p.key));
|
||||
return resolvePositionalSeg(seg, { indexable: false, size: keys.length, keys });
|
||||
}
|
||||
if (isSeq(node)) {
|
||||
const items = (node as { items: Node[] }).items;
|
||||
return resolvePositionalSeg(seg, { indexable: true, size: items.length });
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function positionalForJsoncNode(node: JsoncValue, seg: string): string | null {
|
||||
if (node.kind === 'object') {
|
||||
const keys = node.entries.map((e) => e.key);
|
||||
return resolvePositionalSeg(seg, { indexable: false, size: keys.length, keys });
|
||||
}
|
||||
if (node.kind === 'array') {
|
||||
return resolvePositionalSeg(seg, { indexable: true, size: node.items.length });
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// Predicate-evaluation helpers: look up `node[key]` and compare its
|
||||
// string-coerced leaf value via `evaluatePredicate`. Used by
|
||||
// `[key<op>value]` filtering in find walkers.
|
||||
function yamlChildMatchesPredicate(node: Node | null, pred: PredicateSpec): boolean {
|
||||
return evaluatePredicate(yamlChildFieldText(node, pred.key), pred);
|
||||
}
|
||||
|
||||
function yamlChildFieldText(node: Node | null, key: string): string | null {
|
||||
if (node === null) {return null;}
|
||||
if (!isMap(node)) {return null;}
|
||||
for (const pair of (node as { items: Pair[] }).items) {
|
||||
const k = isScalar(pair.key) ? String(pair.key.value) : String(pair.key);
|
||||
if (k !== key) {continue;}
|
||||
const v = pair.value;
|
||||
if (isScalar(v)) {
|
||||
const sv = v.value;
|
||||
if (sv === null) {return 'null';}
|
||||
if (typeof sv === 'string') {return sv;}
|
||||
if (typeof sv === 'number' || typeof sv === 'boolean') {return String(sv);}
|
||||
return JSON.stringify(sv) ?? 'null';
|
||||
}
|
||||
return null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function jsoncChildMatchesPredicate(node: JsoncValue, pred: PredicateSpec): boolean {
|
||||
return evaluatePredicate(jsoncChildFieldText(node, pred.key), pred);
|
||||
}
|
||||
|
||||
function jsoncChildFieldText(node: JsoncValue, key: string): string | null {
|
||||
if (node.kind !== 'object') {return null;}
|
||||
const e = node.entries.find((entry) => entry.key === key);
|
||||
if (e === undefined) {return null;}
|
||||
const v = e.value;
|
||||
if (v.kind === 'string') {return v.value;}
|
||||
if (v.kind === 'number') {return String(v.value);}
|
||||
if (v.kind === 'boolean') {return String(v.value);}
|
||||
if (v.kind === 'null') {return 'null';}
|
||||
return null;
|
||||
}
|
||||
|
||||
// ---------- Markdown walker -----------------------------------------------
|
||||
|
||||
function walkMd(
|
||||
ast: MdAst,
|
||||
subs: readonly PatternSub[],
|
||||
i: number,
|
||||
walked: readonly SlotSub[],
|
||||
onMatch: (subs: readonly SlotSub[]) => void,
|
||||
): void {
|
||||
if (i >= subs.length) {
|
||||
onMatch(walked);
|
||||
return;
|
||||
}
|
||||
const cur = subs[i];
|
||||
|
||||
// Frontmatter addressing: literal `[frontmatter]` in section slot.
|
||||
if (walked.length === 0 && cur.value === '[frontmatter]') {
|
||||
// Next sub addresses a frontmatter key.
|
||||
const next = subs[i + 1];
|
||||
if (next === undefined) {
|
||||
onMatch([{ slot: cur.slot, value: cur.value }]);
|
||||
return;
|
||||
}
|
||||
if (next.value === WILDCARD_SINGLE || next.value === WILDCARD_RECURSIVE) {
|
||||
for (const fm of ast.frontmatter) {
|
||||
onMatch([
|
||||
{ slot: cur.slot, value: cur.value },
|
||||
{ slot: next.slot, value: fm.key },
|
||||
]);
|
||||
}
|
||||
return;
|
||||
}
|
||||
// Same quote-aware lookup as the JSONC walker — frontmatter
|
||||
// entry keys are unquoted in the AST, so a quoted-segment path
|
||||
// segment must be unquoted before comparing.
|
||||
const fmKey = isQuotedSeg(next.value) ? unquoteSeg(next.value) : next.value;
|
||||
const entry = ast.frontmatter.find((e) => e.key === fmKey);
|
||||
if (entry === undefined) {return;}
|
||||
onMatch([
|
||||
{ slot: cur.slot, value: cur.value },
|
||||
{ slot: next.slot, value: next.value },
|
||||
]);
|
||||
return;
|
||||
}
|
||||
|
||||
// Section slot first.
|
||||
if (walked.length === 0) {
|
||||
if (cur.value === WILDCARD_SINGLE || cur.value === WILDCARD_RECURSIVE) {
|
||||
for (const block of ast.blocks) {
|
||||
walkMdInsideBlock(
|
||||
block,
|
||||
ast,
|
||||
subs,
|
||||
i + 1,
|
||||
[{ slot: cur.slot, value: block.slug }],
|
||||
onMatch,
|
||||
);
|
||||
// `**` retain-i branch: in addition to descending with `**`
|
||||
// consumed (i + 1), also descend with `**` still active (i)
|
||||
// so the next sub can match deeper. Without this, md `**`
|
||||
// semantics diverged from yaml/jsonc — `oc://X.md/**/value`
|
||||
// only matched the immediate-block layer and silently missed
|
||||
// deeper hierarchies (cross-kind asymmetry — same lint rule
|
||||
// worked on yaml but produced 0 matches on md).
|
||||
if (cur.value === WILDCARD_RECURSIVE) {
|
||||
walkMdInsideBlock(
|
||||
block,
|
||||
ast,
|
||||
subs,
|
||||
i,
|
||||
[{ slot: cur.slot, value: block.slug }],
|
||||
onMatch,
|
||||
);
|
||||
}
|
||||
}
|
||||
// `**` 0-match: emit at root if any.
|
||||
if (cur.value === WILDCARD_RECURSIVE && i + 1 >= subs.length) {
|
||||
onMatch([]);
|
||||
}
|
||||
return;
|
||||
}
|
||||
const targetSlug = cur.value.toLowerCase();
|
||||
const block = ast.blocks.find((b) => b.slug === targetSlug);
|
||||
if (block === undefined) {return;}
|
||||
walkMdInsideBlock(
|
||||
block,
|
||||
ast,
|
||||
subs,
|
||||
i + 1,
|
||||
[{ slot: cur.slot, value: cur.value }],
|
||||
onMatch,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
function walkMdInsideBlock(
|
||||
block: { readonly items: readonly { readonly slug: string; readonly kv?: { readonly key: string; readonly value: string } }[] },
|
||||
ast: MdAst,
|
||||
subs: readonly PatternSub[],
|
||||
i: number,
|
||||
walked: readonly SlotSub[],
|
||||
onMatch: (subs: readonly SlotSub[]) => void,
|
||||
): void {
|
||||
if (i >= subs.length) {
|
||||
onMatch(walked);
|
||||
return;
|
||||
}
|
||||
const cur = subs[i];
|
||||
|
||||
// Item slot.
|
||||
if (cur.value === WILDCARD_SINGLE || cur.value === WILDCARD_RECURSIVE) {
|
||||
// Disambiguate duplicate slugs via `#N` ordinal addressing so each
|
||||
// matched path round-trips through `resolveOcPath` to its own item.
|
||||
const slugCounts = new Map<string, number>();
|
||||
for (const item of block.items) {
|
||||
slugCounts.set(item.slug, (slugCounts.get(item.slug) ?? 0) + 1);
|
||||
}
|
||||
block.items.forEach((item, idx) => {
|
||||
const seg = (slugCounts.get(item.slug) ?? 0) > 1 ? `#${idx}` : item.slug;
|
||||
walkMdInsideItem(
|
||||
item,
|
||||
ast,
|
||||
subs,
|
||||
i + 1,
|
||||
[...walked, { slot: cur.slot, value: seg }],
|
||||
onMatch,
|
||||
);
|
||||
});
|
||||
if (cur.value === WILDCARD_RECURSIVE && i + 1 >= subs.length) {
|
||||
onMatch(walked);
|
||||
}
|
||||
return;
|
||||
}
|
||||
// Ordinal `#N` and positional `$first`/`$last`/`-N` short-circuit the
|
||||
// slug lookup — the resolver handles them, so the find walker just
|
||||
// descends into the appropriate item.
|
||||
let item: { readonly slug: string; readonly kv?: { readonly key: string; readonly value: string } } | undefined;
|
||||
if (isOrdinalSeg(cur.value)) {
|
||||
const n = parseOrdinalSeg(cur.value);
|
||||
if (n === null || n < 0 || n >= block.items.length) {return;}
|
||||
item = block.items[n];
|
||||
} else if (isPositionalSeg(cur.value)) {
|
||||
const concrete = resolvePositionalSeg(cur.value, {
|
||||
indexable: true,
|
||||
size: block.items.length,
|
||||
});
|
||||
if (concrete === null) {return;}
|
||||
item = block.items[Number(concrete)];
|
||||
} else {
|
||||
const targetItemSlug = cur.value.toLowerCase();
|
||||
item = block.items.find((it) => it.slug === targetItemSlug);
|
||||
}
|
||||
if (item === undefined) {return;}
|
||||
walkMdInsideItem(item, ast, subs, i + 1, [...walked, { slot: cur.slot, value: cur.value }], onMatch);
|
||||
}
|
||||
|
||||
function walkMdInsideItem(
|
||||
item: { readonly kv?: { readonly key: string; readonly value: string } },
|
||||
_ast: MdAst,
|
||||
subs: readonly PatternSub[],
|
||||
i: number,
|
||||
walked: readonly SlotSub[],
|
||||
onMatch: (subs: readonly SlotSub[]) => void,
|
||||
): void {
|
||||
if (i >= subs.length) {
|
||||
onMatch(walked);
|
||||
return;
|
||||
}
|
||||
const cur = subs[i];
|
||||
// Field slot — addresses kv.key (case-insensitive).
|
||||
if (item.kv === undefined) {return;}
|
||||
if (cur.value === WILDCARD_SINGLE || cur.value === WILDCARD_RECURSIVE) {
|
||||
onMatch([...walked, { slot: cur.slot, value: item.kv.key }]);
|
||||
return;
|
||||
}
|
||||
if (item.kv.key.toLowerCase() !== cur.value.toLowerCase()) {return;}
|
||||
onMatch([...walked, { slot: cur.slot, value: cur.value }]);
|
||||
}
|
||||
|
||||
133
src/oc-path/index.ts
Normal file
133
src/oc-path/index.ts
Normal file
@@ -0,0 +1,133 @@
|
||||
/**
|
||||
* `@openclaw/oc-path` — substrate package public surface.
|
||||
*
|
||||
* **Strategic frame**: workspace files are byte-stable and addressable
|
||||
* via the `oc://` scheme — the addressing scheme is universal across
|
||||
* file kinds (md / jsonc / jsonl / yaml). Encoding (parse/emit) is
|
||||
* per-kind; addressing (resolve/set) is universal.
|
||||
*
|
||||
* **Public verbs**:
|
||||
* - One `setOcPath(ast, path, value)` — universal, kind-dispatched
|
||||
* - One `resolveOcPath(ast, path)` — universal, kind-dispatched
|
||||
* - Per-kind `parseXxx` / `emitXxx` (parsing IS per-kind by nature)
|
||||
*
|
||||
* `setOcPath` accepts a string value; the substrate coerces based on
|
||||
* AST shape at the path location. The OcPath syntax encodes the
|
||||
* operation: plain path = leaf set, `+` suffix = insertion.
|
||||
*
|
||||
* Per-kind set/resolve helpers exist as internal implementation; they
|
||||
* aren't on the public surface. Callers don't need to pick a kind —
|
||||
* the AST carries its `kind` discriminator and the universal verbs
|
||||
* dispatch internally.
|
||||
*
|
||||
* @module @openclaw/oc-path
|
||||
*/
|
||||
|
||||
/**
|
||||
* SDK version this build of `@openclaw/oc-path` exposes. Bumped on
|
||||
* every breaking change to AST shape, OcPath syntax, or universal
|
||||
* verbs (`resolveOcPath`, `setOcPath`, `findOcPaths`, `parseXxx`,
|
||||
* `emitXxx`). Plugin packs that depend on the substrate declare the
|
||||
* version they were authored against and the host warns on mismatch.
|
||||
*/
|
||||
export const SDK_VERSION = '0.1.0';
|
||||
|
||||
// AST types
|
||||
export type {
|
||||
AstBlock,
|
||||
AstCodeBlock,
|
||||
AstItem,
|
||||
AstTable,
|
||||
Diagnostic,
|
||||
FrontmatterEntry,
|
||||
ParseResult,
|
||||
MdAst,
|
||||
} from './ast.js';
|
||||
export type { JsoncAst, JsoncEntry, JsoncValue } from './jsonc/ast.js';
|
||||
export type { JsonlAst, JsonlLine } from './jsonl/ast.js';
|
||||
export type { YamlAst } from './yaml/ast.js';
|
||||
|
||||
// OcPath types + parser/formatter
|
||||
export type {
|
||||
OcPath,
|
||||
PathSegmentLayout,
|
||||
PositionalContainer,
|
||||
PredicateSpec,
|
||||
} from './oc-path.js';
|
||||
// Public OcPath surface — what plugin authors and callers use.
|
||||
export {
|
||||
MAX_PATH_LENGTH,
|
||||
MAX_SUB_SEGMENTS_PER_SLOT,
|
||||
MAX_TRAVERSAL_DEPTH,
|
||||
OcPathError,
|
||||
POS_FIRST,
|
||||
POS_LAST,
|
||||
WILDCARD_RECURSIVE,
|
||||
WILDCARD_SINGLE,
|
||||
formatOcPath,
|
||||
hasWildcard,
|
||||
isOrdinalSeg,
|
||||
isPattern,
|
||||
isPositionalSeg,
|
||||
isPredicateSeg,
|
||||
isQuotedSeg,
|
||||
isUnionSeg,
|
||||
isValidOcPath,
|
||||
parseOcPath,
|
||||
} from './oc-path.js';
|
||||
|
||||
// `evaluatePredicate`, `getPathLayout`, `parseOrdinalSeg`,
|
||||
// `parsePredicateSeg`, `parseUnionSeg`, `quoteSeg`, `unquoteSeg`,
|
||||
// `repackPath`, `resolvePositionalSeg`, `splitRespectingBrackets`
|
||||
// were exported from earlier prototypes. They're substrate-internal
|
||||
// helpers — used by `find.ts`, the per-kind resolvers, and the parser
|
||||
// itself, but not part of the upstream-portable public surface.
|
||||
// Callers that need their behavior should round-trip through
|
||||
// `parseOcPath` / `formatOcPath` / `findOcPaths`.
|
||||
|
||||
// Per-kind parse / emit (encoding is genuinely per-kind)
|
||||
export { parseMd } from './parse.js';
|
||||
export { parseJsonc } from './jsonc/parse.js';
|
||||
export { parseJsonl } from './jsonl/parse.js';
|
||||
export { parseYaml } from './yaml/parse.js';
|
||||
export type { JsoncParseResult } from './jsonc/parse.js';
|
||||
export type { JsonlParseResult } from './jsonl/parse.js';
|
||||
export type { YamlParseResult } from './yaml/parse.js';
|
||||
|
||||
export type { EmitOptions } from './emit.js';
|
||||
export { emitMd, markDirty } from './emit.js';
|
||||
export type { JsoncEmitOptions } from './jsonc/emit.js';
|
||||
export { emitJsonc } from './jsonc/emit.js';
|
||||
export type { JsonlEmitOptions } from './jsonl/emit.js';
|
||||
export { emitJsonl } from './jsonl/emit.js';
|
||||
export type { YamlEmitOptions } from './yaml/emit.js';
|
||||
export { emitYaml } from './yaml/emit.js';
|
||||
|
||||
// Universal verbs — the only public resolve / set on the surface.
|
||||
export type {
|
||||
OcAst,
|
||||
OcMatch,
|
||||
LeafType,
|
||||
NodeDescriptor,
|
||||
ContainerKind,
|
||||
SetResult,
|
||||
InsertionInfo,
|
||||
} from './universal.js';
|
||||
export { resolveOcPath, setOcPath, detectInsertion } from './universal.js';
|
||||
|
||||
// Multi-match search verb — the wildcard-accepting cousin of resolve.
|
||||
export type { OcPathMatch } from './find.js';
|
||||
export { findOcPaths } from './find.js';
|
||||
|
||||
// Cross-kind utility — filename → kind hint.
|
||||
export { inferKind } from './dispatch.js';
|
||||
export type { OcKind } from './dispatch.js';
|
||||
|
||||
// Sentinel guard
|
||||
export { OcEmitSentinelError, REDACTED_SENTINEL, guardSentinel } from './sentinel.js';
|
||||
|
||||
// Slug helper
|
||||
export { slugify } from './slug.js';
|
||||
|
||||
// Workspace manifest is a separate concern (filesystem classifier);
|
||||
// it's not part of this PR's scope.
|
||||
49
src/oc-path/jsonc/ast.ts
Normal file
49
src/oc-path/jsonc/ast.ts
Normal file
@@ -0,0 +1,49 @@
|
||||
/**
|
||||
* JSONC AST types — the addressing skeleton for JSONC files (gateway
|
||||
* config, plugin manifests, JSON-with-comments artifacts).
|
||||
*
|
||||
* **Per-kind discriminator**: every AST in this substrate carries a
|
||||
* `kind` field. The OcPath resolver dispatches on `kind` so md / jsonc
|
||||
* / json / jsonl can share one resolver entry point.
|
||||
*
|
||||
* **Byte-fidelity**: `raw` is preserved on the root for round-trip
|
||||
* emit. The minimal prototype parser doesn't preserve every formatting
|
||||
* detail in the structural tree — for production, a fuller
|
||||
* comment-preserving parser ports from `openclaw-workspace`.
|
||||
*
|
||||
* @module @openclaw/oc-path/jsonc/ast
|
||||
*/
|
||||
|
||||
/** The root JSONC AST. `raw` round-trips byte-identical via emit. */
|
||||
export interface JsoncAst {
|
||||
readonly kind: 'jsonc';
|
||||
readonly raw: string;
|
||||
/** Parsed value tree, or `null` if the file is empty / unparseable. */
|
||||
readonly root: JsoncValue | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* A JSONC value node — discriminated union over the standard JSON kinds.
|
||||
*
|
||||
* `line` is the 1-based line where the value's literal token starts
|
||||
* (the `{`, `[`, opening `"`, or first digit). The parser always sets
|
||||
* it; synthetic constructions (mutations, fixtures) may omit it and
|
||||
* consumers fall back to 1 / parent line. Optional rather than
|
||||
* required so test fixtures and externally-constructed values stay
|
||||
* concise.
|
||||
*/
|
||||
export type JsoncValue =
|
||||
| { readonly kind: 'object'; readonly entries: readonly JsoncEntry[]; readonly line?: number }
|
||||
| { readonly kind: 'array'; readonly items: readonly JsoncValue[]; readonly line?: number }
|
||||
| { readonly kind: 'string'; readonly value: string; readonly line?: number }
|
||||
| { readonly kind: 'number'; readonly value: number; readonly line?: number }
|
||||
| { readonly kind: 'boolean'; readonly value: boolean; readonly line?: number }
|
||||
| { readonly kind: 'null'; readonly line?: number };
|
||||
|
||||
/** Object key/value entry. Keys are unquoted; quoting happens at emit. */
|
||||
export interface JsoncEntry {
|
||||
readonly key: string;
|
||||
readonly value: JsoncValue;
|
||||
/** 1-based line number of the key. */
|
||||
readonly line: number;
|
||||
}
|
||||
184
src/oc-path/jsonc/edit.ts
Normal file
184
src/oc-path/jsonc/edit.ts
Normal file
@@ -0,0 +1,184 @@
|
||||
/**
|
||||
* Mutate a `JsoncAst` at an OcPath. Returns a new AST with the value
|
||||
* replaced; the original AST is unchanged.
|
||||
*
|
||||
* **Why immutable**: callers can hold the pre-edit AST for diffing /
|
||||
* audit while applying the edit. Plays well with LKG observe (compare
|
||||
* pre vs post fingerprints).
|
||||
*
|
||||
* # Known limitation: trivia loss after edit (tracked as follow-up)
|
||||
*
|
||||
* `setJsoncOcPath` rebuilds `ast.raw` via `emitJsonc({mode:'render'})`,
|
||||
* which RE-SERIALIZES the structural tree. **Comments, blank lines,
|
||||
* key-order whitespace, and trailing-comma style are dropped** in the
|
||||
* post-edit `raw`. This is the cost of edit-then-emit in the prototype.
|
||||
*
|
||||
* The byte-fidelity guarantee in this PR applies to the **read path**
|
||||
* (`parseJsonc → emitJsonc` round-trip) — that's exercised by the
|
||||
* `jsonc-byte-fidelity` scenario test and holds byte-identical for
|
||||
* arbitrary input. The **write path** (`parseJsonc → setJsoncOcPath →
|
||||
* emitJsonc`) loses trivia.
|
||||
*
|
||||
* Why we ship as-is: a comment-preserving editor needs the parser to
|
||||
* track byte offsets per node, plus splice-aware mutation logic. That
|
||||
* is its own lift. The follow-up adds parser offsets and a byte-splice
|
||||
* editor; existing callers that need post-edit byte fidelity should
|
||||
* patch `raw` directly until then.
|
||||
*
|
||||
* @module @openclaw/oc-path/jsonc/edit
|
||||
*/
|
||||
|
||||
import type { OcPath } from '../oc-path.js';
|
||||
import {
|
||||
isPositionalSeg,
|
||||
isQuotedSeg,
|
||||
resolvePositionalSeg,
|
||||
splitRespectingBrackets,
|
||||
unquoteSeg,
|
||||
} from '../oc-path.js';
|
||||
import type { JsoncAst, JsoncEntry, JsoncValue } from './ast.js';
|
||||
import { emitJsonc } from './emit.js';
|
||||
|
||||
export type JsoncEditResult =
|
||||
| { readonly ok: true; readonly ast: JsoncAst }
|
||||
| { readonly ok: false; readonly reason: 'unresolved' | 'no-root' };
|
||||
|
||||
/**
|
||||
* Replace the value at `path` with `newValue`. Returns the new AST or
|
||||
* a structured failure reason. Numeric segments index into arrays.
|
||||
*/
|
||||
export function setJsoncOcPath(
|
||||
ast: JsoncAst,
|
||||
path: OcPath,
|
||||
newValue: JsoncValue,
|
||||
): JsoncEditResult {
|
||||
if (ast.root === null) {return { ok: false, reason: 'no-root' };}
|
||||
|
||||
// Use bracket/brace/quote-aware split so that quoted segments
|
||||
// (e.g. `"anthropic/claude-opus-4-7"`) — which can contain dots,
|
||||
// slashes, and other punctuation verbatim — survive as one segment.
|
||||
// Plain `.split('.')` would shred them and break the round-trip with
|
||||
// `resolveJsoncOcPath`, which already respects quoting. Closes the
|
||||
// resolve-vs-edit asymmetry flagged on PR #78678.
|
||||
const segments: string[] = [];
|
||||
if (path.section !== undefined) {segments.push(...splitRespectingBrackets(path.section, '.'));}
|
||||
if (path.item !== undefined) {segments.push(...splitRespectingBrackets(path.item, '.'));}
|
||||
if (path.field !== undefined) {segments.push(...splitRespectingBrackets(path.field, '.'));}
|
||||
|
||||
// Empty path — replace the root.
|
||||
if (segments.length === 0) {
|
||||
const next = { ...ast, root: newValue };
|
||||
return { ok: true, ast: rebuildRaw(next, path.file) };
|
||||
}
|
||||
|
||||
const replaced = replaceAt(ast.root, segments, 0, newValue);
|
||||
if (replaced === null) {return { ok: false, reason: 'unresolved' };}
|
||||
const next = { ...ast, root: replaced };
|
||||
return { ok: true, ast: rebuildRaw(next, path.file) };
|
||||
}
|
||||
|
||||
function replaceAt(
|
||||
current: JsoncValue,
|
||||
segments: readonly string[],
|
||||
i: number,
|
||||
newValue: JsoncValue,
|
||||
): JsoncValue | null {
|
||||
const seg = segments[i];
|
||||
if (seg === undefined) {return newValue;}
|
||||
if (seg.length === 0) {return null;}
|
||||
|
||||
if (current.kind === 'object') {
|
||||
// Resolve positional tokens ($first / $last) against the entries
|
||||
// ordered key list before any literal-key comparison. Without
|
||||
// this, `oc://x.jsonc/agents/$first/alias` would look for a key
|
||||
// literally named `$first` and miss the actual first agent.
|
||||
// Negative indices (-N) don't apply to keyed containers and
|
||||
// resolvePositionalSeg returns null in that case → unresolved.
|
||||
let segNorm: string = seg;
|
||||
if (isPositionalSeg(seg)) {
|
||||
const resolved = resolvePositionalSeg(seg, {
|
||||
indexable: false,
|
||||
size: current.entries.length,
|
||||
keys: current.entries.map((e) => e.key),
|
||||
});
|
||||
if (resolved === null) {return null;}
|
||||
segNorm = resolved;
|
||||
}
|
||||
// Quoted segments (e.g. `"anthropic/claude-opus-4-7"`) carry the
|
||||
// raw bytes verbatim; the entry key in the AST is unquoted, so
|
||||
// strip the surrounding quotes before comparing. Bare segments
|
||||
// pass through unchanged.
|
||||
const lookupKey = isQuotedSeg(segNorm) ? unquoteSeg(segNorm) : segNorm;
|
||||
const idx = current.entries.findIndex((e) => e.key === lookupKey);
|
||||
if (idx === -1) {return null;}
|
||||
const child = current.entries[idx];
|
||||
if (child === undefined) {return null;}
|
||||
const replacedChild = replaceAt(child.value, segments, i + 1, newValue);
|
||||
if (replacedChild === null) {return null;}
|
||||
const newEntry: JsoncEntry = { ...child, value: replacedChild };
|
||||
const newEntries = current.entries.slice();
|
||||
newEntries[idx] = newEntry;
|
||||
return {
|
||||
kind: 'object',
|
||||
entries: newEntries,
|
||||
...(current.line !== undefined ? { line: current.line } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
if (current.kind === 'array') {
|
||||
// Resolve positional tokens ($first / $last / -N) against the
|
||||
// array's size before the numeric coercion below; without this
|
||||
// `Number('$last')` is NaN and the path silently unresolves.
|
||||
let segNorm: string = seg;
|
||||
if (isPositionalSeg(seg)) {
|
||||
const resolved = resolvePositionalSeg(seg, {
|
||||
indexable: true,
|
||||
size: current.items.length,
|
||||
});
|
||||
if (resolved === null) {return null;}
|
||||
segNorm = resolved;
|
||||
}
|
||||
const idx = Number(segNorm);
|
||||
if (!Number.isInteger(idx) || idx < 0 || idx >= current.items.length) {return null;}
|
||||
const child = current.items[idx];
|
||||
if (child === undefined) {return null;}
|
||||
const replacedChild = replaceAt(child, segments, i + 1, newValue);
|
||||
if (replacedChild === null) {return null;}
|
||||
const newItems = current.items.slice();
|
||||
newItems[idx] = replacedChild;
|
||||
return {
|
||||
kind: 'array',
|
||||
items: newItems,
|
||||
...(current.line !== undefined ? { line: current.line } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
// Primitive — can't descend.
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Re-render `ast.raw` from the (possibly mutated) tree.
|
||||
*
|
||||
* **Trivia is dropped** — see the module-level "Known limitation"
|
||||
* section above. Subsequent `emitJsonc(returnedAst)` returns these
|
||||
* synthesized bytes, NOT the original byte-fidelity input.
|
||||
*
|
||||
* Production-quality fix: parser tracks byte offsets per node;
|
||||
* `setJsoncOcPath` does a `raw.slice(0,start) + newBytes + raw.slice(end)`
|
||||
* splice, leaving trivia untouched. Tracked as PR follow-up.
|
||||
*/
|
||||
function rebuildRaw(ast: JsoncAst, fileName?: string): JsoncAst {
|
||||
// Plumb fileName so render-mode emit's sentinel guard reports the
|
||||
// file context (`oc://gateway.jsonc/[path]`) instead of the empty
|
||||
// fallback (`oc:///[path]`). The throw originates here when a
|
||||
// caller-injected sentinel reaches a leaf — without the file
|
||||
// context, forensics + audit pipelines see "rejected somewhere"
|
||||
// with no way to identify the file.
|
||||
const opts = fileName !== undefined
|
||||
? { mode: 'render' as const, fileNameForGuard: fileName }
|
||||
: { mode: 'render' as const };
|
||||
const next: JsoncAst = { kind: 'jsonc', raw: '', root: ast.root };
|
||||
const rendered = emitJsonc(next, opts);
|
||||
return { ...ast, raw: rendered };
|
||||
}
|
||||
99
src/oc-path/jsonc/emit.ts
Normal file
99
src/oc-path/jsonc/emit.ts
Normal file
@@ -0,0 +1,99 @@
|
||||
/**
|
||||
* Emit a `JsoncAst` to bytes.
|
||||
*
|
||||
* **Round-trip mode (default)** returns `ast.raw` verbatim — this
|
||||
* preserves comments, formatting, and trailing whitespace exactly.
|
||||
*
|
||||
* **Sentinel-guard policy**:
|
||||
*
|
||||
* - Round-trip echoes `ast.raw` *without* scanning for the redaction
|
||||
* sentinel. Bytes that came in via `parseJsonc` are trusted: a
|
||||
* workspace file legitimately containing the literal
|
||||
* `__OPENCLAW_REDACTED__` (in a code-block comment, in a pasted
|
||||
* error log, etc.) would otherwise become a workspace-wide emit
|
||||
* DoS — every `openclaw path emit FILE.jsonc` would exit non-zero,
|
||||
* breaking lint round-trip rules, doctor fixers, and LKG
|
||||
* fingerprinting. The substrate's contract is "no NEW sentinel
|
||||
* bytes introduced via emit", not "no sentinel byte ever leaves".
|
||||
* - Render mode walks every leaf and rejects sentinel-bearing leaf
|
||||
* values (caller-injected sentinel via `setOcPath` lands here:
|
||||
* `setJsoncOcPath` rebuilds raw via render-mode, so a leaf set to
|
||||
* the sentinel by the caller is caught at the rebuild boundary
|
||||
* before the raw is shipped back).
|
||||
*
|
||||
* Callers that want pre-existing sentinel detection (e.g., LKG
|
||||
* fingerprint verification) can opt in via
|
||||
* `acceptPreExistingSentinel: false`.
|
||||
*
|
||||
* @module @openclaw/oc-path/jsonc/emit
|
||||
*/
|
||||
|
||||
import { OcEmitSentinelError, REDACTED_SENTINEL } from '../sentinel.js';
|
||||
import type { JsoncAst, JsoncValue } from './ast.js';
|
||||
|
||||
export interface JsoncEmitOptions {
|
||||
readonly mode?: 'roundtrip' | 'render';
|
||||
readonly fileNameForGuard?: string;
|
||||
/**
|
||||
* When `false`, round-trip mode also scans `ast.raw` for the
|
||||
* redaction sentinel and throws `OcEmitSentinelError` if found.
|
||||
* Default `true` — round-trip trusts parsed bytes (see policy
|
||||
* comment above). Render mode always scans leaves regardless.
|
||||
*/
|
||||
readonly acceptPreExistingSentinel?: boolean;
|
||||
}
|
||||
|
||||
export function emitJsonc(ast: JsoncAst, opts: JsoncEmitOptions = {}): string {
|
||||
const mode = opts.mode ?? 'roundtrip';
|
||||
const guardPath = opts.fileNameForGuard ? `oc://${opts.fileNameForGuard}` : 'oc://';
|
||||
const acceptPreExisting = opts.acceptPreExistingSentinel ?? true;
|
||||
|
||||
if (mode === 'roundtrip') {
|
||||
if (!acceptPreExisting && ast.raw.includes(REDACTED_SENTINEL)) {
|
||||
throw new OcEmitSentinelError(`${guardPath}/[raw]`);
|
||||
}
|
||||
return ast.raw;
|
||||
}
|
||||
|
||||
// Render mode — synthesize JSON from the structural tree (loses
|
||||
// comments). Walk every leaf string for sentinel detection so a
|
||||
// caller-injected sentinel via setOcPath is rejected.
|
||||
if (ast.root === null) {return '';}
|
||||
return renderValue(ast.root, guardPath, []);
|
||||
}
|
||||
|
||||
function renderValue(value: JsoncValue, guardPath: string, walked: readonly string[]): string {
|
||||
switch (value.kind) {
|
||||
case 'object': {
|
||||
const parts = value.entries.map(
|
||||
(e) =>
|
||||
`${JSON.stringify(e.key)}: ${renderValue(e.value, guardPath, [...walked, e.key])}`,
|
||||
);
|
||||
return `{ ${parts.join(', ')} }`;
|
||||
}
|
||||
case 'array': {
|
||||
const parts = value.items.map((v, i) =>
|
||||
renderValue(v, guardPath, [...walked, String(i)]),
|
||||
);
|
||||
return `[ ${parts.join(', ')} ]`;
|
||||
}
|
||||
case 'string': {
|
||||
// Reject ANY string that contains the sentinel — embedded
|
||||
// (`prefix__OPENCLAW_REDACTED__suffix`) is just as much of a
|
||||
// "literal redacted token landed on disk" leak as exact-match.
|
||||
// The roundtrip path uses `raw.includes()` for the same reason;
|
||||
// render needs the same predicate per leaf.
|
||||
if (value.value.includes(REDACTED_SENTINEL)) {
|
||||
throw new OcEmitSentinelError(`${guardPath}/${walked.join('/')}`);
|
||||
}
|
||||
return JSON.stringify(value.value);
|
||||
}
|
||||
case 'number':
|
||||
return String(value.value);
|
||||
case 'boolean':
|
||||
return String(value.value);
|
||||
case 'null':
|
||||
return 'null';
|
||||
}
|
||||
throw new Error(`unreachable: jsonc renderValue kind`);
|
||||
}
|
||||
311
src/oc-path/jsonc/parse.ts
Normal file
311
src/oc-path/jsonc/parse.ts
Normal file
@@ -0,0 +1,311 @@
|
||||
/**
|
||||
* Minimal JSONC parser — handles JSON + line comments and block
|
||||
* comments + trailing commas. Produces a structural tree for OcPath
|
||||
* resolution; full byte-fidelity emit relies on `raw` on the AST root.
|
||||
*
|
||||
* **Prototype scope**: this parser handles the input shapes openclaw
|
||||
* config files actually use. Production landing ports the full
|
||||
* comment-preserving parser from `openclaw-workspace` (1248 LoC).
|
||||
*
|
||||
* @module @openclaw/oc-path/jsonc/parse
|
||||
*/
|
||||
|
||||
import type { Diagnostic } from '../ast.js';
|
||||
import type { JsoncAst, JsoncEntry, JsoncValue } from './ast.js';
|
||||
|
||||
/**
|
||||
* Bound on parse-time recursion depth. Mirrors `MAX_TRAVERSAL_DEPTH`
|
||||
* from oc-path; real configs don't nest beyond ~10 levels, so 256 is
|
||||
* a safe ceiling. Pathological input like
|
||||
* `'['.repeat(20000) + '0' + ']'.repeat(20000)` would otherwise
|
||||
* trigger V8 RangeError before any structural diagnostic — the CLI
|
||||
* loads attacker-supplied workspace files via `loadAst`, so this
|
||||
* defense fires before raw stack overflow escapes to commander.
|
||||
*/
|
||||
export const MAX_PARSE_DEPTH = 256;
|
||||
|
||||
export interface JsoncParseResult {
|
||||
readonly ast: JsoncAst;
|
||||
readonly diagnostics: readonly Diagnostic[];
|
||||
}
|
||||
|
||||
class ParseDepthError extends Error {
|
||||
readonly code = 'OC_JSONC_DEPTH_EXCEEDED';
|
||||
constructor(line: number) {
|
||||
super(`structural depth exceeded MAX_PARSE_DEPTH (${MAX_PARSE_DEPTH}) at line ${line}`);
|
||||
this.name = 'ParseDepthError';
|
||||
}
|
||||
}
|
||||
|
||||
class ParseState {
|
||||
pos = 0;
|
||||
line = 1;
|
||||
|
||||
constructor(public readonly src: string) {}
|
||||
|
||||
peek(): string | undefined {
|
||||
return this.src[this.pos];
|
||||
}
|
||||
|
||||
advance(): string | undefined {
|
||||
const c = this.src[this.pos];
|
||||
this.pos++;
|
||||
if (c === '\n') {this.line++;}
|
||||
return c;
|
||||
}
|
||||
|
||||
eof(): boolean {
|
||||
return this.pos >= this.src.length;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a JSONC string. Soft-error policy: doesn't throw; suspicious
|
||||
* inputs surface as diagnostics. An entirely unparseable input
|
||||
* produces an AST with `root: null` and an error diagnostic.
|
||||
*/
|
||||
export function parseJsonc(raw: string): JsoncParseResult {
|
||||
const diagnostics: Diagnostic[] = [];
|
||||
// Strip BOM for parsing convenience; raw is preserved on the AST.
|
||||
const withoutBom = raw.startsWith('') ? raw.slice(1) : raw;
|
||||
const st = new ParseState(withoutBom);
|
||||
|
||||
skipWs(st);
|
||||
if (st.eof()) {
|
||||
return { ast: { kind: 'jsonc', raw, root: null }, diagnostics };
|
||||
}
|
||||
|
||||
let root: JsoncValue | null = null;
|
||||
try {
|
||||
root = parseValue(st, diagnostics, 0);
|
||||
skipWs(st);
|
||||
if (!st.eof()) {
|
||||
diagnostics.push({
|
||||
line: st.line,
|
||||
message: `unexpected trailing input at offset ${st.pos}`,
|
||||
severity: 'warning',
|
||||
code: 'OC_JSONC_TRAILING_INPUT',
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
diagnostics.push({
|
||||
line: st.line,
|
||||
message: err instanceof Error ? err.message : String(err),
|
||||
severity: 'error',
|
||||
code: err instanceof ParseDepthError ? err.code : 'OC_JSONC_PARSE_FAILED',
|
||||
});
|
||||
}
|
||||
|
||||
return { ast: { kind: 'jsonc', raw, root }, diagnostics };
|
||||
}
|
||||
|
||||
// ---------- internal --------------------------------------------------------
|
||||
|
||||
function skipWs(st: ParseState): void {
|
||||
while (!st.eof()) {
|
||||
const c = st.peek();
|
||||
if (c === ' ' || c === '\t' || c === '\n' || c === '\r') {
|
||||
st.advance();
|
||||
continue;
|
||||
}
|
||||
if (c === '/') {
|
||||
const next = st.src[st.pos + 1];
|
||||
if (next === '/') {
|
||||
// Line comment — skip until newline.
|
||||
while (!st.eof() && st.peek() !== '\n') {st.advance();}
|
||||
continue;
|
||||
}
|
||||
if (next === '*') {
|
||||
// Block comment — skip until closing star-slash.
|
||||
st.advance();
|
||||
st.advance();
|
||||
while (!st.eof()) {
|
||||
if (st.peek() === '*' && st.src[st.pos + 1] === '/') {
|
||||
st.advance();
|
||||
st.advance();
|
||||
break;
|
||||
}
|
||||
st.advance();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
function parseValue(st: ParseState, diags: Diagnostic[], depth: number): JsoncValue {
|
||||
// Bound recursion. Without this guard, pathological input like
|
||||
// `'['.repeat(20000) + '0' + ']'.repeat(20000)` triggers V8
|
||||
// RangeError before any structural diagnostic — the CLI loads
|
||||
// attacker-supplied workspace files via `loadAst`, so unbounded
|
||||
// recursion would escape commander as a raw stack-overflow string.
|
||||
if (depth > MAX_PARSE_DEPTH) {throw new ParseDepthError(st.line);}
|
||||
skipWs(st);
|
||||
const startLine = st.line;
|
||||
const c = st.peek();
|
||||
if (c === '{') {return parseObject(st, diags, startLine, depth);}
|
||||
if (c === '[') {return parseArray(st, diags, startLine, depth);}
|
||||
if (c === '"') {return { kind: 'string', value: parseString(st), line: startLine };}
|
||||
if (c === 't' || c === 'f') {return parseBoolean(st, startLine);}
|
||||
if (c === 'n') {return parseNull(st, startLine);}
|
||||
if (c === '-' || (c !== undefined && c >= '0' && c <= '9')) {return parseNumber(st, startLine);}
|
||||
throw new Error(
|
||||
`unexpected character ${JSON.stringify(c)} at line ${st.line} (offset ${st.pos})`,
|
||||
);
|
||||
}
|
||||
|
||||
function parseObject(st: ParseState, diags: Diagnostic[], startLine: number, depth: number): JsoncValue {
|
||||
if (st.advance() !== '{') {throw new Error('expected `{`');}
|
||||
const entries: JsoncEntry[] = [];
|
||||
skipWs(st);
|
||||
if (st.peek() === '}') {
|
||||
st.advance();
|
||||
return { kind: 'object', entries, line: startLine };
|
||||
}
|
||||
while (true) {
|
||||
skipWs(st);
|
||||
if (st.peek() !== '"') {
|
||||
throw new Error(`expected string key at line ${st.line} (offset ${st.pos})`);
|
||||
}
|
||||
const keyLine = st.line;
|
||||
const key = parseString(st);
|
||||
skipWs(st);
|
||||
if (st.advance() !== ':') {
|
||||
throw new Error(`expected \`:\` after key at line ${st.line}`);
|
||||
}
|
||||
skipWs(st);
|
||||
const value = parseValue(st, diags, depth + 1);
|
||||
entries.push({ key, value, line: keyLine });
|
||||
skipWs(st);
|
||||
const next = st.peek();
|
||||
if (next === ',') {
|
||||
st.advance();
|
||||
skipWs(st);
|
||||
// Trailing comma? Allow.
|
||||
if (st.peek() === '}') {
|
||||
st.advance();
|
||||
return { kind: 'object', entries, line: startLine };
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (next === '}') {
|
||||
st.advance();
|
||||
return { kind: 'object', entries, line: startLine };
|
||||
}
|
||||
throw new Error(
|
||||
`expected \`,\` or \`}\` after value at line ${st.line} (offset ${st.pos})`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
function parseArray(st: ParseState, diags: Diagnostic[], startLine: number, depth: number): JsoncValue {
|
||||
if (st.advance() !== '[') {throw new Error('expected `[`');}
|
||||
const items: JsoncValue[] = [];
|
||||
skipWs(st);
|
||||
if (st.peek() === ']') {
|
||||
st.advance();
|
||||
return { kind: 'array', items, line: startLine };
|
||||
}
|
||||
while (true) {
|
||||
skipWs(st);
|
||||
items.push(parseValue(st, diags, depth + 1));
|
||||
skipWs(st);
|
||||
const next = st.peek();
|
||||
if (next === ',') {
|
||||
st.advance();
|
||||
skipWs(st);
|
||||
if (st.peek() === ']') {
|
||||
st.advance();
|
||||
return { kind: 'array', items, line: startLine };
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (next === ']') {
|
||||
st.advance();
|
||||
return { kind: 'array', items, line: startLine };
|
||||
}
|
||||
throw new Error(
|
||||
`expected \`,\` or \`]\` after value at line ${st.line} (offset ${st.pos})`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
function parseString(st: ParseState): string {
|
||||
if (st.advance() !== '"') {throw new Error('expected `"`');}
|
||||
let out = '';
|
||||
while (!st.eof()) {
|
||||
const c = st.advance();
|
||||
if (c === '"') {return out;}
|
||||
if (c === '\\') {
|
||||
const esc = st.advance();
|
||||
switch (esc) {
|
||||
case '"': out += '"'; break;
|
||||
case '\\': out += '\\'; break;
|
||||
case '/': out += '/'; break;
|
||||
case 'b': out += '\b'; break;
|
||||
case 'f': out += '\f'; break;
|
||||
case 'n': out += '\n'; break;
|
||||
case 'r': out += '\r'; break;
|
||||
case 't': out += '\t'; break;
|
||||
case 'u': {
|
||||
const hex = st.src.slice(st.pos, st.pos + 4);
|
||||
if (!/^[0-9a-fA-F]{4}$/.test(hex)) {
|
||||
throw new Error(`invalid unicode escape at line ${st.line}`);
|
||||
}
|
||||
out += String.fromCharCode(Number.parseInt(hex, 16));
|
||||
st.pos += 4;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw new Error(`invalid escape \\${esc} at line ${st.line}`);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
out += c;
|
||||
}
|
||||
throw new Error(`unterminated string starting at line ${st.line}`);
|
||||
}
|
||||
|
||||
function parseBoolean(st: ParseState, line: number): JsoncValue {
|
||||
if (st.src.slice(st.pos, st.pos + 4) === 'true') {
|
||||
st.pos += 4;
|
||||
return { kind: 'boolean', value: true, line };
|
||||
}
|
||||
if (st.src.slice(st.pos, st.pos + 5) === 'false') {
|
||||
st.pos += 5;
|
||||
return { kind: 'boolean', value: false, line };
|
||||
}
|
||||
throw new Error(`expected true/false at line ${st.line}`);
|
||||
}
|
||||
|
||||
function parseNull(st: ParseState, line: number): JsoncValue {
|
||||
if (st.src.slice(st.pos, st.pos + 4) === 'null') {
|
||||
st.pos += 4;
|
||||
return { kind: 'null', line };
|
||||
}
|
||||
throw new Error(`expected null at line ${st.line}`);
|
||||
}
|
||||
|
||||
function parseNumber(st: ParseState, line: number): JsoncValue {
|
||||
const start = st.pos;
|
||||
if (st.peek() === '-') {st.advance();}
|
||||
while (!st.eof() && /[0-9]/.test(st.peek() ?? '')) {st.advance();}
|
||||
if (st.peek() === '.') {
|
||||
st.advance();
|
||||
while (!st.eof() && /[0-9]/.test(st.peek() ?? '')) {st.advance();}
|
||||
}
|
||||
if (st.peek() === 'e' || st.peek() === 'E') {
|
||||
st.advance();
|
||||
if (st.peek() === '+' || st.peek() === '-') {st.advance();}
|
||||
while (!st.eof() && /[0-9]/.test(st.peek() ?? '')) {st.advance();}
|
||||
}
|
||||
const text = st.src.slice(start, st.pos);
|
||||
const value = Number(text);
|
||||
if (!Number.isFinite(value)) {
|
||||
throw new Error(`invalid number "${text}" at line ${st.line}`);
|
||||
}
|
||||
return { kind: 'number', value, line };
|
||||
}
|
||||
|
||||
export type { Diagnostic };
|
||||
122
src/oc-path/jsonc/resolve.ts
Normal file
122
src/oc-path/jsonc/resolve.ts
Normal file
@@ -0,0 +1,122 @@
|
||||
/**
|
||||
* Resolve an `OcPath` against a `JsoncAst`.
|
||||
*
|
||||
* The OcPath model has 4 segments (file, section, item, field) — for
|
||||
* JSONC artifacts that's not enough depth, so segments concat with `/`
|
||||
* AND a section/item/field MAY contain dots (`.`) for deeper traversal.
|
||||
* Both forms work:
|
||||
*
|
||||
* oc://config/plugins/entries/foo (segment-per-key)
|
||||
* oc://config/plugins.entries.foo (dotted section)
|
||||
* oc://config/plugins/entries.foo (mixed)
|
||||
*
|
||||
* Each segment is split on `.`, and the resulting flat list of keys
|
||||
* walks the value tree from `ast.root`. Numeric segments index into
|
||||
* arrays.
|
||||
*
|
||||
* @module @openclaw/oc-path/jsonc/resolve
|
||||
*/
|
||||
|
||||
import type { OcPath } from '../oc-path.js';
|
||||
import {
|
||||
isPositionalSeg,
|
||||
isQuotedSeg,
|
||||
resolvePositionalSeg,
|
||||
splitRespectingBrackets,
|
||||
unquoteSeg,
|
||||
} from '../oc-path.js';
|
||||
import type { JsoncAst, JsoncEntry, JsoncValue } from './ast.js';
|
||||
|
||||
export type JsoncOcPathMatch =
|
||||
| { readonly kind: 'root'; readonly node: JsoncAst }
|
||||
| { readonly kind: 'value'; readonly node: JsoncValue; readonly path: readonly string[] }
|
||||
| {
|
||||
readonly kind: 'object-entry';
|
||||
readonly node: JsoncEntry;
|
||||
readonly path: readonly string[];
|
||||
};
|
||||
|
||||
/**
|
||||
* Walk the JSONC tree following the OcPath. Returns the matched node
|
||||
* or `null`. Numeric path segments index into arrays.
|
||||
*/
|
||||
export function resolveJsoncOcPath(
|
||||
ast: JsoncAst,
|
||||
path: OcPath,
|
||||
): JsoncOcPathMatch | null {
|
||||
if (ast.root === null) {return null;}
|
||||
|
||||
// Bracket-aware split + unquote: `"foo/bar".baz` becomes
|
||||
// [`foo/bar`, `baz`] (literal slash preserved in the first sub).
|
||||
const segments: string[] = [];
|
||||
if (path.section !== undefined) {
|
||||
for (const s of splitRespectingBrackets(path.section, '.')) {
|
||||
segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s);
|
||||
}
|
||||
}
|
||||
if (path.item !== undefined) {
|
||||
for (const s of splitRespectingBrackets(path.item, '.')) {
|
||||
segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s);
|
||||
}
|
||||
}
|
||||
if (path.field !== undefined) {
|
||||
for (const s of splitRespectingBrackets(path.field, '.')) {
|
||||
segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s);
|
||||
}
|
||||
}
|
||||
|
||||
if (segments.length === 0) {return { kind: 'root', node: ast };}
|
||||
|
||||
let current: JsoncValue = ast.root;
|
||||
let lastEntry: JsoncEntry | null = null;
|
||||
const walked: string[] = [];
|
||||
|
||||
for (let seg of segments) {
|
||||
if (seg.length === 0) {return null;}
|
||||
// Positional resolution: `$first` / `$last` always; `-N` only on
|
||||
// indexable (array) containers. On a keyed (object) container, a
|
||||
// `-N` segment falls through to literal-key lookup so paths like
|
||||
// `groups.-5028303500.requireMention` (Telegram supergroup IDs —
|
||||
// openclaw#59934) address the literal key instead of crashing.
|
||||
if (isPositionalSeg(seg)) {
|
||||
const concrete = positionalForJsonc(current, seg);
|
||||
if (concrete !== null) {seg = concrete;}
|
||||
// null means "not applicable" — fall through to literal lookup.
|
||||
}
|
||||
walked.push(seg);
|
||||
if (current.kind === 'object') {
|
||||
const entry = current.entries.find((e) => e.key === seg);
|
||||
if (entry === undefined) {return null;}
|
||||
lastEntry = entry;
|
||||
current = entry.value;
|
||||
continue;
|
||||
}
|
||||
if (current.kind === 'array') {
|
||||
const idx = Number(seg);
|
||||
if (!Number.isInteger(idx) || idx < 0 || idx >= current.items.length) {return null;}
|
||||
lastEntry = null;
|
||||
const item = current.items[idx];
|
||||
if (item === undefined) {return null;}
|
||||
current = item;
|
||||
continue;
|
||||
}
|
||||
// Primitive — can't descend further.
|
||||
return null;
|
||||
}
|
||||
|
||||
if (lastEntry !== null && current === lastEntry.value) {
|
||||
return { kind: 'object-entry', node: lastEntry, path: walked };
|
||||
}
|
||||
return { kind: 'value', node: current, path: walked };
|
||||
}
|
||||
|
||||
function positionalForJsonc(node: JsoncValue, seg: string): string | null {
|
||||
if (node.kind === 'object') {
|
||||
const keys = node.entries.map((e) => e.key);
|
||||
return resolvePositionalSeg(seg, { indexable: false, size: keys.length, keys });
|
||||
}
|
||||
if (node.kind === 'array') {
|
||||
return resolvePositionalSeg(seg, { indexable: true, size: node.items.length });
|
||||
}
|
||||
return null;
|
||||
}
|
||||
49
src/oc-path/jsonl/ast.ts
Normal file
49
src/oc-path/jsonl/ast.ts
Normal file
@@ -0,0 +1,49 @@
|
||||
/**
|
||||
* JSONL AST types — JSON-Lines: one JSON value per line, separated by
|
||||
* `\n`. The shape used by openclaw session-event logs, audit trails,
|
||||
* and LKG checkpoints (which is why JSONL is part of the universal
|
||||
* OcPath addressing scheme).
|
||||
*
|
||||
* **Per-kind discriminator**: every AST in this substrate carries a
|
||||
* `kind` field. The OcPath resolver dispatches on `kind`.
|
||||
*
|
||||
* **Byte-fidelity**: `raw` is preserved on the root for round-trip
|
||||
* emit. JSONL is line-oriented, so blank lines and per-line comments
|
||||
* (we don't strip them in render mode either — we preserve them as
|
||||
* "raw" line entries) live in the AST.
|
||||
*
|
||||
* @module @openclaw/oc-path/jsonl/ast
|
||||
*/
|
||||
|
||||
import type { JsoncValue } from '../jsonc/ast.js';
|
||||
|
||||
/** The root JSONL AST. `raw` round-trips byte-identical via emit. */
|
||||
export interface JsonlAst {
|
||||
readonly kind: 'jsonl';
|
||||
readonly raw: string;
|
||||
readonly lines: readonly JsonlLine[];
|
||||
/**
|
||||
* Line-ending convention detected at parse time. Used by render mode
|
||||
* to reconstruct the original convention (Windows-authored datasets
|
||||
* use CRLF; Unix uses LF). Optional for back-compat with synthetic
|
||||
* ASTs that don't track this — render mode falls back to LF when
|
||||
* undefined.
|
||||
*/
|
||||
readonly lineEnding?: '\r\n' | '\n';
|
||||
}
|
||||
|
||||
/**
|
||||
* One line of a JSONL file. Either a parsed JSON value, a blank line
|
||||
* (preserved for round-trip), or a malformed line (emit verbatim;
|
||||
* emit-time sentinel guard still scans).
|
||||
*/
|
||||
export type JsonlLine =
|
||||
| {
|
||||
readonly kind: 'value';
|
||||
readonly line: number;
|
||||
readonly value: JsoncValue;
|
||||
/** The original line text (without trailing newline). */
|
||||
readonly raw: string;
|
||||
}
|
||||
| { readonly kind: 'blank'; readonly line: number; readonly raw: string }
|
||||
| { readonly kind: 'malformed'; readonly line: number; readonly raw: string };
|
||||
228
src/oc-path/jsonl/edit.ts
Normal file
228
src/oc-path/jsonl/edit.ts
Normal file
@@ -0,0 +1,228 @@
|
||||
/**
|
||||
* Mutate a `JsonlAst` at an OcPath. Returns a new AST with the line
|
||||
* (or sub-field of a line) replaced.
|
||||
*
|
||||
* Edit shapes:
|
||||
*
|
||||
* oc://session-events/L42 → replace line 42's whole value
|
||||
* oc://session-events/L42/field → replace field on line 42
|
||||
* oc://session-events/L42/field.sub → dotted descent
|
||||
* oc://session-events/$last/... → resolves to most recent value
|
||||
*
|
||||
* Append (no existing line) is NOT a `set` — use `appendJsonlLine` for
|
||||
* that. `setJsonlOcPath` only edits existing addresses.
|
||||
*
|
||||
* @module @openclaw/oc-path/jsonl/edit
|
||||
*/
|
||||
|
||||
import type { OcPath } from '../oc-path.js';
|
||||
import {
|
||||
isPositionalSeg,
|
||||
isQuotedSeg,
|
||||
resolvePositionalSeg,
|
||||
splitRespectingBrackets,
|
||||
unquoteSeg,
|
||||
} from '../oc-path.js';
|
||||
import type { JsoncEntry, JsoncValue } from '../jsonc/ast.js';
|
||||
import type { JsonlAst, JsonlLine } from './ast.js';
|
||||
import { emitJsonl } from './emit.js';
|
||||
|
||||
export type JsonlEditResult =
|
||||
| { readonly ok: true; readonly ast: JsonlAst }
|
||||
| { readonly ok: false; readonly reason: 'unresolved' | 'not-a-value-line' };
|
||||
|
||||
export function setJsonlOcPath(
|
||||
ast: JsonlAst,
|
||||
path: OcPath,
|
||||
newValue: JsoncValue,
|
||||
): JsonlEditResult {
|
||||
const head = path.section;
|
||||
if (head === undefined) {return { ok: false, reason: 'unresolved' };}
|
||||
|
||||
const lineIdx = pickLineIndex(ast, head);
|
||||
if (lineIdx === -1) {return { ok: false, reason: 'unresolved' };}
|
||||
const target = ast.lines[lineIdx];
|
||||
if (target === undefined) {return { ok: false, reason: 'unresolved' };}
|
||||
|
||||
// No item/field — replace the whole line value. Requires the line to
|
||||
// already be a value line (we don't synthesize lines from blanks).
|
||||
if (path.item === undefined && path.field === undefined) {
|
||||
if (target.kind !== 'value') {return { ok: false, reason: 'not-a-value-line' };}
|
||||
const newLine: JsonlLine = {
|
||||
kind: 'value',
|
||||
line: target.line,
|
||||
value: newValue,
|
||||
raw: target.raw,
|
||||
};
|
||||
return finalize(ast, lineIdx, newLine, path.file);
|
||||
}
|
||||
|
||||
if (target.kind !== 'value') {return { ok: false, reason: 'not-a-value-line' };}
|
||||
|
||||
// Bracket/brace/quote-aware split — preserves quoted segments
|
||||
// verbatim so the edit path matches `resolveJsonlOcPath`'s
|
||||
// unquoting behavior. Plain `.split('.')` would shred a quoted key
|
||||
// and silently desync read-vs-write.
|
||||
const segments: string[] = [];
|
||||
if (path.item !== undefined) {segments.push(...splitRespectingBrackets(path.item, '.'));}
|
||||
if (path.field !== undefined) {segments.push(...splitRespectingBrackets(path.field, '.'));}
|
||||
|
||||
const replaced = replaceAt(target.value, segments, 0, newValue);
|
||||
if (replaced === null) {return { ok: false, reason: 'unresolved' };}
|
||||
const newLine: JsonlLine = {
|
||||
kind: 'value',
|
||||
line: target.line,
|
||||
value: replaced,
|
||||
raw: target.raw,
|
||||
};
|
||||
return finalize(ast, lineIdx, newLine, path.file);
|
||||
}
|
||||
|
||||
function replaceAt(
|
||||
current: JsoncValue,
|
||||
segments: readonly string[],
|
||||
i: number,
|
||||
newValue: JsoncValue,
|
||||
): JsoncValue | null {
|
||||
const seg = segments[i];
|
||||
if (seg === undefined) {return newValue;}
|
||||
if (seg.length === 0) {return null;}
|
||||
|
||||
if (current.kind === 'object') {
|
||||
// Resolve positional tokens ($first / $last) against the entries'
|
||||
// ordered key list before any literal-key comparison. Keeps the
|
||||
// jsonl edit path symmetric with resolveJsonlOcPath, which already
|
||||
// honors positional tokens during read.
|
||||
let segNorm: string = seg;
|
||||
if (isPositionalSeg(seg)) {
|
||||
const resolved = resolvePositionalSeg(seg, {
|
||||
indexable: false,
|
||||
size: current.entries.length,
|
||||
keys: current.entries.map((e) => e.key),
|
||||
});
|
||||
if (resolved === null) {return null;}
|
||||
segNorm = resolved;
|
||||
}
|
||||
// Quoted segments carry the raw bytes verbatim; AST entry keys
|
||||
// are unquoted. Strip the surrounding quotes before comparing.
|
||||
const lookupKey = isQuotedSeg(segNorm) ? unquoteSeg(segNorm) : segNorm;
|
||||
const idx = current.entries.findIndex((e) => e.key === lookupKey);
|
||||
if (idx === -1) {return null;}
|
||||
const child = current.entries[idx];
|
||||
if (child === undefined) {return null;}
|
||||
const replacedChild = replaceAt(child.value, segments, i + 1, newValue);
|
||||
if (replacedChild === null) {return null;}
|
||||
const newEntry: JsoncEntry = { ...child, value: replacedChild };
|
||||
const newEntries = current.entries.slice();
|
||||
newEntries[idx] = newEntry;
|
||||
return {
|
||||
kind: 'object',
|
||||
entries: newEntries,
|
||||
...(current.line !== undefined ? { line: current.line } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
if (current.kind === 'array') {
|
||||
// Resolve positional tokens ($first / $last / -N) against the
|
||||
// array's size before the numeric coercion below; without this
|
||||
// `Number('$last')` is NaN and the path silently unresolves.
|
||||
let segNorm: string = seg;
|
||||
if (isPositionalSeg(seg)) {
|
||||
const resolved = resolvePositionalSeg(seg, {
|
||||
indexable: true,
|
||||
size: current.items.length,
|
||||
});
|
||||
if (resolved === null) {return null;}
|
||||
segNorm = resolved;
|
||||
}
|
||||
const idx = Number(segNorm);
|
||||
if (!Number.isInteger(idx) || idx < 0 || idx >= current.items.length) {return null;}
|
||||
const child = current.items[idx];
|
||||
if (child === undefined) {return null;}
|
||||
const replacedChild = replaceAt(child, segments, i + 1, newValue);
|
||||
if (replacedChild === null) {return null;}
|
||||
const newItems = current.items.slice();
|
||||
newItems[idx] = replacedChild;
|
||||
return {
|
||||
kind: 'array',
|
||||
items: newItems,
|
||||
...(current.line !== undefined ? { line: current.line } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function pickLineIndex(ast: JsonlAst, addr: string): number {
|
||||
// Mirrors the line-address grammar handled by resolveJsonlOcPath's
|
||||
// pickLine and find.ts's pickLine — the four shapes a JSONL line can
|
||||
// be addressed by. Without `$first` and `-N` here, a path that
|
||||
// resolves cleanly under those tokens would silently unresolve on
|
||||
// the edit path (resolve↔write asymmetry).
|
||||
if (addr === '$last') {
|
||||
for (let i = ast.lines.length - 1; i >= 0; i--) {
|
||||
const l = ast.lines[i];
|
||||
if (l !== undefined && l.kind === 'value') {return i;}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
if (addr === '$first') {
|
||||
for (let i = 0; i < ast.lines.length; i++) {
|
||||
const l = ast.lines[i];
|
||||
if (l !== undefined && l.kind === 'value') {return i;}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
if (/^-\d+$/.test(addr)) {
|
||||
// -N selects the Nth-from-last value line. Walk only value lines
|
||||
// so blank/malformed lines don't shift the count (consistent with
|
||||
// resolve.ts's pickLine).
|
||||
const valueIndices: number[] = [];
|
||||
for (let i = 0; i < ast.lines.length; i++) {
|
||||
const l = ast.lines[i];
|
||||
if (l !== undefined && l.kind === 'value') {valueIndices.push(i);}
|
||||
}
|
||||
const n = valueIndices.length + Number(addr);
|
||||
return n >= 0 && n < valueIndices.length ? (valueIndices[n] ?? -1) : -1;
|
||||
}
|
||||
const m = /^L(\d+)$/.exec(addr);
|
||||
if (m === null || m[1] === undefined) {return -1;}
|
||||
const target = Number(m[1]);
|
||||
return ast.lines.findIndex((l) => l.line === target);
|
||||
}
|
||||
|
||||
function finalize(ast: JsonlAst, lineIdx: number, newLine: JsonlLine, fileName?: string): JsonlEditResult {
|
||||
const newLines = ast.lines.slice();
|
||||
newLines[lineIdx] = newLine;
|
||||
const next: JsonlAst = {
|
||||
kind: 'jsonl',
|
||||
raw: '',
|
||||
lines: newLines,
|
||||
...(ast.lineEnding !== undefined ? { lineEnding: ast.lineEnding } : {}),
|
||||
};
|
||||
const opts = fileName !== undefined
|
||||
? { mode: 'render' as const, fileNameForGuard: fileName }
|
||||
: { mode: 'render' as const };
|
||||
const rendered = emitJsonl(next, opts);
|
||||
return { ok: true, ast: { ...next, raw: rendered } };
|
||||
}
|
||||
|
||||
/**
|
||||
* Append a new value as the next line. Useful for session checkpointing
|
||||
* (each event is a new line). Returns a new AST. The `path` parameter
|
||||
* is accepted for OcPath-naming consistency but jsonl append addresses
|
||||
* the file as a whole (line numbers are assigned by the substrate).
|
||||
*/
|
||||
export function appendJsonlOcPath(ast: JsonlAst, value: JsoncValue): JsonlAst {
|
||||
const nextLineNo =
|
||||
ast.lines.length === 0 ? 1 : (ast.lines[ast.lines.length - 1]?.line ?? 0) + 1;
|
||||
const newLine: JsonlLine = {
|
||||
kind: 'value',
|
||||
line: nextLineNo,
|
||||
value,
|
||||
raw: '',
|
||||
};
|
||||
const next: JsonlAst = { kind: 'jsonl', raw: '', lines: [...ast.lines, newLine] };
|
||||
const rendered = emitJsonl(next, { mode: 'render' });
|
||||
return { ...next, raw: rendered };
|
||||
}
|
||||
100
src/oc-path/jsonl/emit.ts
Normal file
100
src/oc-path/jsonl/emit.ts
Normal file
@@ -0,0 +1,100 @@
|
||||
/**
|
||||
* Emit a `JsonlAst` to bytes.
|
||||
*
|
||||
* **Round-trip mode (default)** returns `ast.raw` verbatim — preserves
|
||||
* malformed lines, blanks, trailing-newline shape exactly.
|
||||
*
|
||||
* **Render mode** rebuilds the file from line entries (re-stringifies
|
||||
* value lines via JSON.stringify; preserves blank/malformed lines
|
||||
* verbatim). Useful for synthetic ASTs.
|
||||
*
|
||||
* **Sentinel guard**: scans every emitted byte sequence for the
|
||||
* `__OPENCLAW_REDACTED__` literal.
|
||||
*
|
||||
* @module @openclaw/oc-path/jsonl/emit
|
||||
*/
|
||||
|
||||
import { OcEmitSentinelError, REDACTED_SENTINEL } from '../sentinel.js';
|
||||
import type { JsoncValue } from '../jsonc/ast.js';
|
||||
import type { JsonlAst } from './ast.js';
|
||||
|
||||
export interface JsonlEmitOptions {
|
||||
readonly mode?: 'roundtrip' | 'render';
|
||||
readonly fileNameForGuard?: string;
|
||||
/**
|
||||
* See `JsoncEmitOptions.acceptPreExistingSentinel` for the rationale.
|
||||
* Default `true` — round-trip echoes parsed bytes without scanning
|
||||
* for the sentinel. Render mode scans value-line leaves regardless.
|
||||
*/
|
||||
readonly acceptPreExistingSentinel?: boolean;
|
||||
}
|
||||
|
||||
export function emitJsonl(ast: JsonlAst, opts: JsonlEmitOptions = {}): string {
|
||||
const mode = opts.mode ?? 'roundtrip';
|
||||
const guardPath = opts.fileNameForGuard ? `oc://${opts.fileNameForGuard}` : 'oc://';
|
||||
const acceptPreExisting = opts.acceptPreExistingSentinel ?? true;
|
||||
|
||||
if (mode === 'roundtrip') {
|
||||
if (!acceptPreExisting && ast.raw.includes(REDACTED_SENTINEL)) {
|
||||
throw new OcEmitSentinelError(`${guardPath}/[raw]`);
|
||||
}
|
||||
return ast.raw;
|
||||
}
|
||||
|
||||
const out: string[] = [];
|
||||
for (const ln of ast.lines) {
|
||||
if (ln.kind === 'blank' || ln.kind === 'malformed') {
|
||||
// Blank/malformed lines round-trip as their original raw bytes.
|
||||
// Apply the same trust policy: only scan when caller opts in.
|
||||
if (!acceptPreExisting && ln.raw.includes(REDACTED_SENTINEL)) {
|
||||
throw new OcEmitSentinelError(`${guardPath}/L${ln.line}`);
|
||||
}
|
||||
out.push(ln.raw);
|
||||
continue;
|
||||
}
|
||||
// Value lines re-serialize via renderValue, which always scans
|
||||
// string leaves regardless of acceptPreExistingSentinel — a
|
||||
// caller-injected sentinel via setOcPath / appendJsonl must
|
||||
// always be rejected.
|
||||
out.push(renderValue(ln.value, `${guardPath}/L${ln.line}`, []));
|
||||
}
|
||||
// Restore the original line-ending convention. Without this, a CRLF
|
||||
// input edited via setJsonlOcPath would emit a mixed-ending file:
|
||||
// edited lines joined with `\n` and untouched lines retaining the
|
||||
// `\r` on their .raw bytes — silent CRLF→LF corruption on
|
||||
// Windows-authored datasets.
|
||||
return out.join(ast.lineEnding ?? '\n');
|
||||
}
|
||||
|
||||
function renderValue(value: JsoncValue, guardPath: string, walked: readonly string[]): string {
|
||||
switch (value.kind) {
|
||||
case 'object': {
|
||||
const parts = value.entries.map(
|
||||
(e) => `${JSON.stringify(e.key)}:${renderValue(e.value, guardPath, [...walked, e.key])}`,
|
||||
);
|
||||
return `{${parts.join(',')}}`;
|
||||
}
|
||||
case 'array': {
|
||||
const parts = value.items.map((v, i) =>
|
||||
renderValue(v, guardPath, [...walked, String(i)]),
|
||||
);
|
||||
return `[${parts.join(',')}]`;
|
||||
}
|
||||
case 'string': {
|
||||
// Reject ANY string that contains the sentinel — embedded
|
||||
// (`prefix__OPENCLAW_REDACTED__suffix`) is just as much of a
|
||||
// "literal redacted token landed on disk" leak as exact-match.
|
||||
if (value.value.includes(REDACTED_SENTINEL)) {
|
||||
throw new OcEmitSentinelError(`${guardPath}/${walked.join('/')}`);
|
||||
}
|
||||
return JSON.stringify(value.value);
|
||||
}
|
||||
case 'number':
|
||||
return String(value.value);
|
||||
case 'boolean':
|
||||
return String(value.value);
|
||||
case 'null':
|
||||
return 'null';
|
||||
}
|
||||
throw new Error(`unreachable: jsonl renderValue kind`);
|
||||
}
|
||||
74
src/oc-path/jsonl/parse.ts
Normal file
74
src/oc-path/jsonl/parse.ts
Normal file
@@ -0,0 +1,74 @@
|
||||
/**
|
||||
* JSONL parser — splits on `\n`, parses each non-empty line as JSONC
|
||||
* (allowing comments/trailing-comma is harmless and matches what
|
||||
* openclaw session logs actually emit). Soft-error policy: malformed
|
||||
* lines surface as `kind: 'malformed'` AST entries plus a diagnostic.
|
||||
*
|
||||
* @module @openclaw/oc-path/jsonl/parse
|
||||
*/
|
||||
|
||||
import type { Diagnostic } from '../ast.js';
|
||||
import { parseJsonc } from '../jsonc/parse.js';
|
||||
import type { JsonlAst, JsonlLine } from './ast.js';
|
||||
|
||||
export interface JsonlParseResult {
|
||||
readonly ast: JsonlAst;
|
||||
readonly diagnostics: readonly Diagnostic[];
|
||||
}
|
||||
|
||||
export function parseJsonl(raw: string): JsonlParseResult {
|
||||
const diagnostics: Diagnostic[] = [];
|
||||
// Detect the line-ending convention from the input. Windows-authored
|
||||
// datasets use CRLF; Unix and most cross-platform tooling use LF. We
|
||||
// count CRLF occurrences and call CRLF if the majority of newlines
|
||||
// are CRLF — this handles mixed-ending files (e.g., a Unix log
|
||||
// edited once on Windows) by picking the dominant convention.
|
||||
// Without this, `setJsonlOcPath` rebuilds a CRLF input via render
|
||||
// mode which joins with `\n`, producing mixed endings on a
|
||||
// previously-CRLF file.
|
||||
const crlfCount = (raw.match(/\r\n/g) ?? []).length;
|
||||
const lfCount = (raw.match(/\n/g) ?? []).length;
|
||||
const lineEnding: '\r\n' | '\n' =
|
||||
crlfCount > 0 && crlfCount * 2 >= lfCount ? '\r\n' : '\n';
|
||||
|
||||
// Trim trailing newline so we don't fabricate a blank line at EOF
|
||||
// for files that end with `\n` (which is most of them).
|
||||
let body = raw.endsWith('\r\n') ? raw.slice(0, -2) : raw.endsWith('\n') ? raw.slice(0, -1) : raw;
|
||||
// Normalize line endings to LF for consistent splitting; per-line
|
||||
// `raw` is stored without the trailing `\r`, and render mode
|
||||
// restores the original convention via `lineEnding`.
|
||||
body = body.replace(/\r\n/g, '\n');
|
||||
const lines: JsonlLine[] = [];
|
||||
|
||||
if (body.length === 0) {
|
||||
return { ast: { kind: 'jsonl', raw, lines, lineEnding }, diagnostics };
|
||||
}
|
||||
|
||||
const parts = body.split('\n');
|
||||
parts.forEach((lineText, idx) => {
|
||||
const lineNo = idx + 1;
|
||||
if (lineText.trim().length === 0) {
|
||||
lines.push({ kind: 'blank', line: lineNo, raw: lineText });
|
||||
return;
|
||||
}
|
||||
const r = parseJsonc(lineText);
|
||||
if (r.ast.root === null) {
|
||||
lines.push({ kind: 'malformed', line: lineNo, raw: lineText });
|
||||
diagnostics.push({
|
||||
line: lineNo,
|
||||
message: `line ${lineNo} could not be parsed as JSON`,
|
||||
severity: 'warning',
|
||||
code: 'OC_JSONL_LINE_MALFORMED',
|
||||
});
|
||||
return;
|
||||
}
|
||||
lines.push({
|
||||
kind: 'value',
|
||||
line: lineNo,
|
||||
value: r.ast.root,
|
||||
raw: lineText,
|
||||
});
|
||||
});
|
||||
|
||||
return { ast: { kind: 'jsonl', raw, lines, lineEnding }, diagnostics };
|
||||
}
|
||||
157
src/oc-path/jsonl/resolve.ts
Normal file
157
src/oc-path/jsonl/resolve.ts
Normal file
@@ -0,0 +1,157 @@
|
||||
/**
|
||||
* Resolve an `OcPath` against a `JsonlAst`.
|
||||
*
|
||||
* Convention for JSONL OcPaths:
|
||||
*
|
||||
* oc://session-events/L42 → entire line 42 value
|
||||
* oc://session-events/L42/result → field on line 42's value
|
||||
* oc://session-events/L42/result.detail → dotted descent
|
||||
* oc://session-events/$last → final non-blank value
|
||||
*
|
||||
* `Lnnn` (line address) and `$last` are the addressing primitives
|
||||
* unique to JSONL — they're how forensics / replay refers to a
|
||||
* specific entry without committing to a content key.
|
||||
*
|
||||
* @module @openclaw/oc-path/jsonl/resolve
|
||||
*/
|
||||
|
||||
import type { OcPath } from '../oc-path.js';
|
||||
import {
|
||||
POS_FIRST,
|
||||
POS_LAST,
|
||||
isPositionalSeg,
|
||||
isQuotedSeg,
|
||||
resolvePositionalSeg,
|
||||
splitRespectingBrackets,
|
||||
unquoteSeg,
|
||||
} from '../oc-path.js';
|
||||
import type { JsoncEntry, JsoncValue } from '../jsonc/ast.js';
|
||||
import type { JsonlAst, JsonlLine } from './ast.js';
|
||||
|
||||
export type JsonlOcPathMatch =
|
||||
| { readonly kind: 'root'; readonly node: JsonlAst }
|
||||
| { readonly kind: 'line'; readonly node: JsonlLine }
|
||||
| {
|
||||
readonly kind: 'value';
|
||||
readonly node: JsoncValue;
|
||||
readonly line: number;
|
||||
readonly path: readonly string[];
|
||||
}
|
||||
| {
|
||||
readonly kind: 'object-entry';
|
||||
readonly node: JsoncEntry;
|
||||
readonly line: number;
|
||||
readonly path: readonly string[];
|
||||
};
|
||||
|
||||
export function resolveJsonlOcPath(
|
||||
ast: JsonlAst,
|
||||
path: OcPath,
|
||||
): JsonlOcPathMatch | null {
|
||||
// The first non-file segment is the line address (Lnnn or $last).
|
||||
const head = path.section;
|
||||
if (head === undefined) {return { kind: 'root', node: ast };}
|
||||
|
||||
const lineEntry = pickLine(ast, head);
|
||||
if (lineEntry === null) {return null;}
|
||||
|
||||
// No further descent — return the line entry itself.
|
||||
if (path.item === undefined && path.field === undefined) {
|
||||
return { kind: 'line', node: lineEntry };
|
||||
}
|
||||
|
||||
if (lineEntry.kind !== 'value') {return null;}
|
||||
|
||||
const segments: string[] = [];
|
||||
if (path.item !== undefined) {
|
||||
for (const s of splitRespectingBrackets(path.item, '.')) {
|
||||
segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s);
|
||||
}
|
||||
}
|
||||
if (path.field !== undefined) {
|
||||
for (const s of splitRespectingBrackets(path.field, '.')) {
|
||||
segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s);
|
||||
}
|
||||
}
|
||||
|
||||
let current: JsoncValue = lineEntry.value;
|
||||
let lastEntry: JsoncEntry | null = null;
|
||||
const walked: string[] = [];
|
||||
|
||||
for (let seg of segments) {
|
||||
if (seg.length === 0) {return null;}
|
||||
// See openclaw#59934 — positional `-N` falls through on keyed containers.
|
||||
if (isPositionalSeg(seg)) {
|
||||
const concrete = positionalForJsonc(current, seg);
|
||||
if (concrete !== null) {seg = concrete;}
|
||||
}
|
||||
walked.push(seg);
|
||||
if (current.kind === 'object') {
|
||||
const entry = current.entries.find((e) => e.key === seg);
|
||||
if (entry === undefined) {return null;}
|
||||
lastEntry = entry;
|
||||
current = entry.value;
|
||||
continue;
|
||||
}
|
||||
if (current.kind === 'array') {
|
||||
const idx = Number(seg);
|
||||
if (!Number.isInteger(idx) || idx < 0 || idx >= current.items.length) {return null;}
|
||||
lastEntry = null;
|
||||
const item = current.items[idx];
|
||||
if (item === undefined) {return null;}
|
||||
current = item;
|
||||
continue;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
if (lastEntry !== null && current === lastEntry.value) {
|
||||
return {
|
||||
kind: 'object-entry',
|
||||
node: lastEntry,
|
||||
line: lineEntry.line,
|
||||
path: walked,
|
||||
};
|
||||
}
|
||||
return { kind: 'value', node: current, line: lineEntry.line, path: walked };
|
||||
}
|
||||
|
||||
function pickLine(ast: JsonlAst, addr: string): JsonlLine | null {
|
||||
if (addr === POS_LAST) {
|
||||
for (let i = ast.lines.length - 1; i >= 0; i--) {
|
||||
const l = ast.lines[i];
|
||||
if (l !== undefined && l.kind === 'value') {return l;}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
if (addr === POS_FIRST) {
|
||||
for (const l of ast.lines) {
|
||||
if (l.kind === 'value') {return l;}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
// Negative line address: `-N` selects the Nth-from-last value line.
|
||||
if (/^-\d+$/.test(addr)) {
|
||||
const valueLines = ast.lines.filter((l): l is Extract<JsonlLine, { kind: 'value' }> => l.kind === 'value');
|
||||
const n = valueLines.length + Number(addr);
|
||||
return n >= 0 && n < valueLines.length ? valueLines[n] : null;
|
||||
}
|
||||
const m = /^L(\d+)$/.exec(addr);
|
||||
if (m === null || m[1] === undefined) {return null;}
|
||||
const target = Number(m[1]);
|
||||
for (const l of ast.lines) {
|
||||
if (l.line === target) {return l;}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function positionalForJsonc(node: JsoncValue, seg: string): string | null {
|
||||
if (node.kind === 'object') {
|
||||
const keys = node.entries.map((e) => e.key);
|
||||
return resolvePositionalSeg(seg, { indexable: false, size: keys.length, keys });
|
||||
}
|
||||
if (node.kind === 'array') {
|
||||
return resolvePositionalSeg(seg, { indexable: true, size: node.items.length });
|
||||
}
|
||||
return null;
|
||||
}
|
||||
1114
src/oc-path/oc-path.ts
Normal file
1114
src/oc-path/oc-path.ts
Normal file
File diff suppressed because it is too large
Load Diff
294
src/oc-path/parse.ts
Normal file
294
src/oc-path/parse.ts
Normal file
@@ -0,0 +1,294 @@
|
||||
/**
|
||||
* Generic markdown-flavored parser for the 8 workspace files.
|
||||
*
|
||||
* Produces a `MdAst` addressing index over `raw` bytes:
|
||||
* frontmatter (if present), preamble (prose before first H2), and an
|
||||
* H2-block tree with items/tables/code-blocks extracted for OcPath
|
||||
* resolution.
|
||||
*
|
||||
* **No file-kind discrimination.** Same parse path for SOUL.md /
|
||||
* AGENTS.md / MEMORY.md / TOOLS.md / IDENTITY.md / USER.md /
|
||||
* HEARTBEAT.md / SKILL.md. Per-file lint opinions ride downstream
|
||||
* (`@openclaw/oc-lint` rule packs).
|
||||
*
|
||||
* **Byte-fidelity contract**: `raw` is preserved on the AST root so
|
||||
* `emitMd(parse(raw)) === raw` for every input the parser accepts.
|
||||
*
|
||||
* @module @openclaw/oc-path/parse
|
||||
*/
|
||||
|
||||
import type {
|
||||
AstBlock,
|
||||
AstCodeBlock,
|
||||
AstItem,
|
||||
AstTable,
|
||||
Diagnostic,
|
||||
FrontmatterEntry,
|
||||
ParseResult,
|
||||
MdAst,
|
||||
} from './ast.js';
|
||||
import { slugify } from './slug.js';
|
||||
|
||||
const FENCE = '---';
|
||||
const BOM = '';
|
||||
|
||||
/**
|
||||
* Parse raw bytes into a `MdAst`. Soft-error policy: never
|
||||
* throws. Suspicious-but-recoverable inputs (unclosed frontmatter,
|
||||
* malformed bullet) become diagnostics.
|
||||
*/
|
||||
export function parseMd(raw: string): ParseResult {
|
||||
const diagnostics: Diagnostic[] = [];
|
||||
|
||||
// Strip a leading BOM for parsing convenience; keep the raw input
|
||||
// intact on the AST so emit can round-trip the BOM if present.
|
||||
const withoutBom = raw.startsWith(BOM) ? raw.slice(BOM.length) : raw;
|
||||
const lines = withoutBom.split(/\r?\n/);
|
||||
|
||||
const fm = detectFrontmatter(lines, diagnostics);
|
||||
const bodyStartLine = fm === null ? 0 : fm.endLine + 1;
|
||||
const bodyLines = lines.slice(bodyStartLine);
|
||||
|
||||
const { preamble, blocks } = splitH2Blocks(bodyLines, bodyStartLine + 1, diagnostics);
|
||||
|
||||
const ast: MdAst = {
|
||||
kind: 'md',
|
||||
raw,
|
||||
frontmatter: fm?.entries ?? [],
|
||||
preamble,
|
||||
blocks,
|
||||
};
|
||||
|
||||
return { ast, diagnostics };
|
||||
}
|
||||
|
||||
// ---------- Frontmatter ---------------------------------------------------
|
||||
|
||||
interface FrontmatterRange {
|
||||
readonly entries: readonly FrontmatterEntry[];
|
||||
/** 0-based line index of the closing `---`. */
|
||||
readonly endLine: number;
|
||||
}
|
||||
|
||||
function detectFrontmatter(
|
||||
lines: readonly string[],
|
||||
diagnostics: Diagnostic[],
|
||||
): FrontmatterRange | null {
|
||||
if (lines.length < 2) {return null;}
|
||||
if (lines[0] !== FENCE) {return null;}
|
||||
|
||||
let closeIndex = -1;
|
||||
for (let i = 1; i < lines.length; i++) {
|
||||
if (lines[i] === FENCE) {
|
||||
closeIndex = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (closeIndex === -1) {
|
||||
diagnostics.push({
|
||||
line: 1,
|
||||
message: 'frontmatter opens with --- but never closes',
|
||||
severity: 'warning',
|
||||
code: 'OC_FRONTMATTER_UNCLOSED',
|
||||
});
|
||||
return null;
|
||||
}
|
||||
|
||||
const entries: FrontmatterEntry[] = [];
|
||||
for (let i = 1; i < closeIndex; i++) {
|
||||
const line = lines[i];
|
||||
if (line.trim().length === 0) {continue;}
|
||||
const m = /^([a-zA-Z_][a-zA-Z0-9_-]*)\s*:\s*(.*)$/.exec(line);
|
||||
if (m === null) {
|
||||
// Could be a list-style continuation (` - item`) for the previous key;
|
||||
// we don't structurally model lists in frontmatter at the substrate
|
||||
// layer (lint rules can do that against the raw substring if they
|
||||
// need to). Skip silently — keeps the parser opinion-free.
|
||||
continue;
|
||||
}
|
||||
entries.push({
|
||||
key: m[1],
|
||||
value: unquote(m[2].trim()),
|
||||
line: i + 1,
|
||||
});
|
||||
}
|
||||
|
||||
return { entries, endLine: closeIndex };
|
||||
}
|
||||
|
||||
function unquote(value: string): string {
|
||||
if (value.length >= 2) {
|
||||
const first = value.charCodeAt(0);
|
||||
const last = value.charCodeAt(value.length - 1);
|
||||
if (first === last && (first === 34 /* " */ || first === 39 /* ' */)) {
|
||||
return value.slice(1, -1);
|
||||
}
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
// ---------- H2 block split -------------------------------------------------
|
||||
|
||||
function splitH2Blocks(
|
||||
bodyLines: readonly string[],
|
||||
/** 1-based line number of `bodyLines[0]` in the original file. */
|
||||
bodyStartLineNum: number,
|
||||
diagnostics: Diagnostic[],
|
||||
): { preamble: string; blocks: AstBlock[] } {
|
||||
// Track code-block state so `##` inside a fenced block doesn't get
|
||||
// parsed as a heading.
|
||||
let inCode = false;
|
||||
const headings: { line: number; text: string }[] = [];
|
||||
|
||||
for (let i = 0; i < bodyLines.length; i++) {
|
||||
const line = bodyLines[i];
|
||||
if (line.startsWith('```')) {
|
||||
inCode = !inCode;
|
||||
continue;
|
||||
}
|
||||
if (inCode) {continue;}
|
||||
const m = /^##\s+(\S.*?)\s*$/.exec(line);
|
||||
if (m !== null) {
|
||||
headings.push({ line: i, text: m[1] });
|
||||
}
|
||||
}
|
||||
|
||||
if (headings.length === 0) {
|
||||
return {
|
||||
preamble: bodyLines.join('\n'),
|
||||
blocks: [],
|
||||
};
|
||||
}
|
||||
|
||||
const preamble = bodyLines.slice(0, headings[0].line).join('\n');
|
||||
const blocks: AstBlock[] = [];
|
||||
|
||||
for (let h = 0; h < headings.length; h++) {
|
||||
const start = headings[h].line;
|
||||
const end = h + 1 < headings.length ? headings[h + 1].line : bodyLines.length;
|
||||
const headingText = headings[h].text;
|
||||
const blockBodyLines = bodyLines.slice(start + 1, end);
|
||||
const bodyText = blockBodyLines.join('\n');
|
||||
const headingLineNum = bodyStartLineNum + start;
|
||||
|
||||
const items = extractItems(blockBodyLines, headingLineNum + 1, diagnostics);
|
||||
const tables = extractTables(blockBodyLines, headingLineNum + 1);
|
||||
const codeBlocks = extractCodeBlocks(blockBodyLines, headingLineNum + 1);
|
||||
|
||||
blocks.push({
|
||||
heading: headingText,
|
||||
slug: slugify(headingText),
|
||||
line: headingLineNum,
|
||||
bodyText,
|
||||
items,
|
||||
tables,
|
||||
codeBlocks,
|
||||
});
|
||||
}
|
||||
|
||||
return { preamble, blocks };
|
||||
}
|
||||
|
||||
// ---------- Items ----------------------------------------------------------
|
||||
|
||||
const BULLET_RE = /^(?:[-*+])\s+(.+?)\s*$/;
|
||||
const KV_RE = /^([^:]+?)\s*:\s*(.+)$/;
|
||||
|
||||
function extractItems(
|
||||
blockBodyLines: readonly string[],
|
||||
startLineNum: number,
|
||||
_diagnostics: Diagnostic[],
|
||||
): AstItem[] {
|
||||
const items: AstItem[] = [];
|
||||
let inCode = false;
|
||||
|
||||
for (let i = 0; i < blockBodyLines.length; i++) {
|
||||
const line = blockBodyLines[i];
|
||||
if (line.startsWith('```')) {
|
||||
inCode = !inCode;
|
||||
continue;
|
||||
}
|
||||
if (inCode) {continue;}
|
||||
const m = BULLET_RE.exec(line);
|
||||
if (m === null) {continue;}
|
||||
const text = m[1];
|
||||
const kvMatch = KV_RE.exec(text);
|
||||
const item: AstItem = {
|
||||
text,
|
||||
slug: kvMatch ? slugify(kvMatch[1]) : slugify(text),
|
||||
line: startLineNum + i,
|
||||
...(kvMatch !== null
|
||||
? { kv: { key: kvMatch[1].trim(), value: kvMatch[2].trim() } }
|
||||
: {}),
|
||||
};
|
||||
items.push(item);
|
||||
}
|
||||
|
||||
return items;
|
||||
}
|
||||
|
||||
// ---------- Tables ---------------------------------------------------------
|
||||
|
||||
function extractTables(
|
||||
blockBodyLines: readonly string[],
|
||||
startLineNum: number,
|
||||
): AstTable[] {
|
||||
const tables: AstTable[] = [];
|
||||
let i = 0;
|
||||
while (i < blockBodyLines.length) {
|
||||
const headerLine = blockBodyLines[i];
|
||||
const sepLine = blockBodyLines[i + 1];
|
||||
if (
|
||||
headerLine.trim().startsWith('|') &&
|
||||
sepLine !== undefined &&
|
||||
/^\s*\|\s*[:-]+(?:\s*\|\s*[:-]+)*\s*\|?\s*$/.test(sepLine)
|
||||
) {
|
||||
const headers = splitTableRow(headerLine);
|
||||
const rows: string[][] = [];
|
||||
let j = i + 2;
|
||||
while (j < blockBodyLines.length && blockBodyLines[j].trim().startsWith('|')) {
|
||||
rows.push(splitTableRow(blockBodyLines[j]));
|
||||
j++;
|
||||
}
|
||||
tables.push({ headers, rows, line: startLineNum + i });
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
return tables;
|
||||
}
|
||||
|
||||
function splitTableRow(line: string): string[] {
|
||||
const trimmed = line.trim().replace(/^\|/, '').replace(/\|$/, '');
|
||||
return trimmed.split('|').map((cell) => cell.trim());
|
||||
}
|
||||
|
||||
// ---------- Code blocks ---------------------------------------------------
|
||||
|
||||
function extractCodeBlocks(
|
||||
blockBodyLines: readonly string[],
|
||||
startLineNum: number,
|
||||
): AstCodeBlock[] {
|
||||
const codeBlocks: AstCodeBlock[] = [];
|
||||
let i = 0;
|
||||
while (i < blockBodyLines.length) {
|
||||
const open = blockBodyLines[i];
|
||||
if (open.startsWith('```')) {
|
||||
const lang = open.slice(3).trim();
|
||||
const langField = lang.length > 0 ? lang : null;
|
||||
const startLine = startLineNum + i;
|
||||
let j = i + 1;
|
||||
const bodyLines: string[] = [];
|
||||
while (j < blockBodyLines.length && !blockBodyLines[j].startsWith('```')) {
|
||||
bodyLines.push(blockBodyLines[j]);
|
||||
j++;
|
||||
}
|
||||
codeBlocks.push({ lang: langField, text: bodyLines.join('\n'), line: startLine });
|
||||
i = j + 1;
|
||||
continue;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
return codeBlocks;
|
||||
}
|
||||
113
src/oc-path/resolve.ts
Normal file
113
src/oc-path/resolve.ts
Normal file
@@ -0,0 +1,113 @@
|
||||
/**
|
||||
* OcPath → AST node resolver.
|
||||
*
|
||||
* Resolves an `OcPath` against a `MdAst` and returns the matched
|
||||
* node (block / item / frontmatter entry / kv field) or `null` if the
|
||||
* path doesn't match anything.
|
||||
*
|
||||
* The address dispatch:
|
||||
*
|
||||
* { file } → AST root
|
||||
* { file, section } → AstBlock with matching slug
|
||||
* { file, section, item } → AstItem inside that block
|
||||
* { file, section, item, field } → kv.value of that item if kv.key matches
|
||||
*
|
||||
* The `file` segment is informational here — callers verify file
|
||||
* matching before passing the AST. The resolver doesn't load files; it
|
||||
* walks an in-memory AST.
|
||||
*
|
||||
* @module @openclaw/oc-path/resolve
|
||||
*/
|
||||
|
||||
import type { AstBlock, AstItem, FrontmatterEntry, MdAst } from './ast.js';
|
||||
import type { OcPath } from './oc-path.js';
|
||||
import { isOrdinalSeg, isPositionalSeg, parseOrdinalSeg, resolvePositionalSeg } from './oc-path.js';
|
||||
|
||||
/**
|
||||
* The resolved target plus a stable description of what kind of node it
|
||||
* is. Lint rules and doctor fixers branch on `kind`.
|
||||
*/
|
||||
export type OcPathMatch =
|
||||
| { readonly kind: 'root'; readonly node: MdAst }
|
||||
| { readonly kind: 'frontmatter'; readonly node: FrontmatterEntry }
|
||||
| { readonly kind: 'block'; readonly node: AstBlock }
|
||||
| { readonly kind: 'item'; readonly node: AstItem; readonly block: AstBlock }
|
||||
| {
|
||||
readonly kind: 'item-field';
|
||||
readonly node: AstItem;
|
||||
readonly block: AstBlock;
|
||||
/** The kv.value string, surfaced for convenience. */
|
||||
readonly value: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* Resolve an `OcPath` against an AST. Returns the matched node or
|
||||
* `null`. Slugs match case-insensitively against `slugify(input)` —
|
||||
* "Boundaries" matches a section heading "## Boundaries" because both
|
||||
* slugify to "boundaries".
|
||||
*
|
||||
* Special-case: `OcPath.section === '[frontmatter]'` (literal) addresses
|
||||
* frontmatter; `field` then names the frontmatter key. This lets a
|
||||
* single OcPath shape address both prose-tree fields and frontmatter
|
||||
* fields without growing the tuple.
|
||||
*/
|
||||
export function resolveMdOcPath(ast: MdAst, path: OcPath): OcPathMatch | null {
|
||||
// Frontmatter addressing: oc://FILE/[frontmatter]/key
|
||||
// The frontmatter key sits at the OcPath `item` slot in this 3-segment
|
||||
// shape; we accept `field` as a fallback for callers that thread
|
||||
// 4-segment paths.
|
||||
if (path.section === '[frontmatter]') {
|
||||
const key = path.item ?? path.field;
|
||||
if (key === undefined) {return null;}
|
||||
const entry = ast.frontmatter.find((e) => e.key === key);
|
||||
if (entry === undefined) {return null;}
|
||||
return { kind: 'frontmatter', node: entry };
|
||||
}
|
||||
|
||||
// Plain file root address.
|
||||
if (path.section === undefined) {
|
||||
return { kind: 'root', node: ast };
|
||||
}
|
||||
|
||||
const sectionSlug = path.section.toLowerCase();
|
||||
const block = ast.blocks.find((b) => b.slug === sectionSlug);
|
||||
if (block === undefined) {return null;}
|
||||
|
||||
// Section-only address.
|
||||
if (path.item === undefined) {
|
||||
return { kind: 'block', node: block };
|
||||
}
|
||||
|
||||
// Item addressing: ordinal (`#N`) > positional (`$first`/`$last`/`-N`)
|
||||
// > slug. Ordinal uses absolute document order so two items sharing
|
||||
// a slug stay distinguishable.
|
||||
let item: AstItem | undefined;
|
||||
if (isOrdinalSeg(path.item)) {
|
||||
const n = parseOrdinalSeg(path.item);
|
||||
if (n === null || n < 0 || n >= block.items.length) {return null;}
|
||||
item = block.items[n];
|
||||
} else if (isPositionalSeg(path.item)) {
|
||||
const concrete = resolvePositionalSeg(path.item, {
|
||||
indexable: true,
|
||||
size: block.items.length,
|
||||
});
|
||||
if (concrete === null) {return null;}
|
||||
item = block.items[Number(concrete)];
|
||||
} else {
|
||||
const itemSlug = path.item.toLowerCase();
|
||||
item = block.items.find((i) => i.slug === itemSlug);
|
||||
}
|
||||
if (item === undefined) {return null;}
|
||||
|
||||
// Item-only address.
|
||||
if (path.field === undefined) {
|
||||
return { kind: 'item', node: item, block };
|
||||
}
|
||||
|
||||
// Item-field address. Requires the item to have a `kv` and the field
|
||||
// to match the kv key (case-insensitive). A field on an item without
|
||||
// kv shape is unresolvable — return null rather than guessing.
|
||||
if (item.kv === undefined) {return null;}
|
||||
if (item.kv.key.toLowerCase() !== path.field.toLowerCase()) {return null;}
|
||||
return { kind: 'item-field', node: item, block, value: item.kv.value };
|
||||
}
|
||||
63
src/oc-path/sentinel.ts
Normal file
63
src/oc-path/sentinel.ts
Normal file
@@ -0,0 +1,63 @@
|
||||
/**
|
||||
* Substrate-level redaction-sentinel guard.
|
||||
*
|
||||
* Closes the `__OPENCLAW_REDACTED__` corruption class by rejecting the
|
||||
* literal string at the emit boundary. Per-call-site reject rules
|
||||
* (added piecemeal in [#62281](https://github.com/openclaw/openclaw/issues/62281),
|
||||
* [#44357](https://github.com/openclaw/openclaw/issues/44357),
|
||||
* [#13495](https://github.com/openclaw/openclaw/issues/13495), and others)
|
||||
* caught the symptom; this guard removes the substrate that produced
|
||||
* the symptom in the first place.
|
||||
*
|
||||
* Throwing at emit (not at the consumer) means every code path through
|
||||
* the substrate is covered, including future call sites we haven't
|
||||
* audited.
|
||||
*
|
||||
* @module @openclaw/oc-path/sentinel
|
||||
*/
|
||||
|
||||
/**
|
||||
* The literal string that marks redacted secrets in OpenClaw's runtime
|
||||
* representation. Writing it to disk is always a bug — the consumer
|
||||
* was supposed to drop the redacted view, not pass it through to the
|
||||
* writer.
|
||||
*/
|
||||
export const REDACTED_SENTINEL = '__OPENCLAW_REDACTED__';
|
||||
|
||||
/**
|
||||
* Thrown when emit detects a `"__OPENCLAW_REDACTED__"` literal in any
|
||||
* emitted bytes. Callers should treat this as a fatal write error;
|
||||
* recovering by stripping the sentinel would silently corrupt the
|
||||
* file. Fail-closed.
|
||||
*
|
||||
* `path` is the OcPath-shaped pointer to where the sentinel was
|
||||
* detected (e.g., `oc://config/plugins.entries.foo.token`). For
|
||||
* non-config emits, it's the closest meaningful address (frontmatter
|
||||
* key, section/item slug, etc.) or just the file name.
|
||||
*/
|
||||
export class OcEmitSentinelError extends Error {
|
||||
readonly code = 'OC_EMIT_SENTINEL';
|
||||
readonly path: string;
|
||||
|
||||
constructor(path: string) {
|
||||
super(`emit refused to write "${REDACTED_SENTINEL}" sentinel literal at ${path}`);
|
||||
this.name = 'OcEmitSentinelError';
|
||||
this.path = path;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Throw `OcEmitSentinelError` if `value` contains the redaction
|
||||
* sentinel anywhere. Substring match (not equality) — a hostile caller
|
||||
* embedding `prefix__OPENCLAW_REDACTED__suffix` in a leaf must be
|
||||
* rejected just as forcefully as the bare sentinel; the substring form
|
||||
* still leaks the marker bytes to disk where downstream scanners flag
|
||||
* the file as corrupted.
|
||||
*
|
||||
* No-op for any non-string input. Used by every leaf-write boundary.
|
||||
*/
|
||||
export function guardSentinel(value: unknown, ocPath: string): void {
|
||||
if (typeof value === 'string' && value.includes(REDACTED_SENTINEL)) {
|
||||
throw new OcEmitSentinelError(ocPath);
|
||||
}
|
||||
}
|
||||
43
src/oc-path/slug.ts
Normal file
43
src/oc-path/slug.ts
Normal file
@@ -0,0 +1,43 @@
|
||||
/**
|
||||
* Slug derivation for OcPath section/item addressing.
|
||||
*
|
||||
* A slug is the kebab-case lowercase form of a heading or item text:
|
||||
* "Tool Guidance" → "tool-guidance"
|
||||
* " Restricted Data " → "restricted-data"
|
||||
* "deny-rule-1" → "deny-rule-1" (already a slug)
|
||||
* "API_KEY" → "api-key"
|
||||
* "Multi-tenant isolation" → "multi-tenant-isolation"
|
||||
* "deny: secrets" → "deny-secrets" (colon + space → hyphen)
|
||||
*
|
||||
* Deterministic + idempotent. Used by parse to pre-compute slugs for
|
||||
* blocks and items, and by resolveOcPath to match section/item names.
|
||||
*
|
||||
* @module @openclaw/oc-path/slug
|
||||
*/
|
||||
|
||||
const NON_SLUG_CHARS = /[^a-z0-9-]+/g;
|
||||
const COLLAPSE_HYPHENS = /-+/g;
|
||||
const TRIM_HYPHENS = /^-+|-+$/g;
|
||||
|
||||
/**
|
||||
* Convert arbitrary text into a slug usable as an OcPath segment.
|
||||
*
|
||||
* Rules:
|
||||
* 1. Lowercase
|
||||
* 2. Replace `_` with `-`
|
||||
* 3. Replace any non-`[a-z0-9-]` runs with a single `-`
|
||||
* 4. Collapse repeated `-`
|
||||
* 5. Trim leading/trailing `-`
|
||||
*
|
||||
* Returns the empty string for input that has no slug-valid characters
|
||||
* (e.g., `"!!"` → `""`); callers should treat empty slugs as not
|
||||
* matchable rather than as wildcards.
|
||||
*/
|
||||
export function slugify(text: string): string {
|
||||
return text
|
||||
.toLowerCase()
|
||||
.replace(/_/g, '-')
|
||||
.replace(NON_SLUG_CHARS, '-')
|
||||
.replace(COLLAPSE_HYPHENS, '-')
|
||||
.replace(TRIM_HYPHENS, '');
|
||||
}
|
||||
99
src/oc-path/tests/edit.test.ts
Normal file
99
src/oc-path/tests/edit.test.ts
Normal file
@@ -0,0 +1,99 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { setMdOcPath as setOcPath } from '../edit.js';
|
||||
import { parseOcPath } from '../oc-path.js';
|
||||
import { parseMd } from '../parse.js';
|
||||
|
||||
describe('setOcPath — frontmatter', () => {
|
||||
it('replaces a frontmatter value', () => {
|
||||
const raw = `---
|
||||
name: github
|
||||
description: old desc
|
||||
---
|
||||
|
||||
Body.
|
||||
`;
|
||||
const { ast } = parseMd(raw);
|
||||
const r = setOcPath(
|
||||
ast,
|
||||
parseOcPath('oc://AGENTS.md/[frontmatter]/description'),
|
||||
'new desc',
|
||||
);
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
expect(r.ast.raw).toContain('description: new desc');
|
||||
expect(r.ast.raw).not.toContain('old desc');
|
||||
}
|
||||
});
|
||||
|
||||
it('reports unresolved when the key is missing', () => {
|
||||
const { ast } = parseMd('---\nname: x\n---\n');
|
||||
const r = setOcPath(
|
||||
ast,
|
||||
parseOcPath('oc://AGENTS.md/[frontmatter]/nope'),
|
||||
'x',
|
||||
);
|
||||
expect(r).toEqual({ ok: false, reason: 'unresolved' });
|
||||
});
|
||||
|
||||
it('quotes values that need YAML-escaping', () => {
|
||||
const { ast } = parseMd('---\nx: a\n---\n');
|
||||
const r = setOcPath(ast, parseOcPath('oc://AGENTS.md/[frontmatter]/x'), 'has: colon');
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {expect(r.ast.raw).toContain('x: "has: colon"');}
|
||||
});
|
||||
});
|
||||
|
||||
describe('setOcPath — item kv field', () => {
|
||||
it('replaces an item kv value and reflects it in the rebuilt body', () => {
|
||||
const raw = `## Boundaries
|
||||
|
||||
- enabled: true
|
||||
- timeout: 5
|
||||
`;
|
||||
const { ast } = parseMd(raw);
|
||||
const r = setOcPath(
|
||||
ast,
|
||||
parseOcPath('oc://AGENTS.md/boundaries/timeout/timeout'),
|
||||
'30',
|
||||
);
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
expect(r.ast.raw).toContain('- timeout: 30');
|
||||
expect(r.ast.raw).toContain('- enabled: true');
|
||||
}
|
||||
});
|
||||
|
||||
it('reports no-item-kv for an item without kv shape', () => {
|
||||
const raw = `## Boundaries
|
||||
|
||||
- plain bullet
|
||||
`;
|
||||
const { ast } = parseMd(raw);
|
||||
const r = setOcPath(
|
||||
ast,
|
||||
parseOcPath('oc://AGENTS.md/boundaries/plain-bullet/plain-bullet'),
|
||||
'x',
|
||||
);
|
||||
expect(r).toEqual({ ok: false, reason: 'no-item-kv' });
|
||||
});
|
||||
|
||||
it('reports unresolved when section/item is missing', () => {
|
||||
const { ast } = parseMd('## Other\n\n- foo: bar\n');
|
||||
const r = setOcPath(
|
||||
ast,
|
||||
parseOcPath('oc://AGENTS.md/missing/foo/foo'),
|
||||
'x',
|
||||
);
|
||||
expect(r).toEqual({ ok: false, reason: 'unresolved' });
|
||||
});
|
||||
|
||||
it('reports not-writable for section-only addresses', () => {
|
||||
const { ast } = parseMd('## Boundaries\n\n- enabled: true\n');
|
||||
const r = setOcPath(
|
||||
ast,
|
||||
parseOcPath('oc://AGENTS.md/boundaries'),
|
||||
'x',
|
||||
);
|
||||
expect(r).toEqual({ ok: false, reason: 'not-writable' });
|
||||
});
|
||||
});
|
||||
108
src/oc-path/tests/emit.test.ts
Normal file
108
src/oc-path/tests/emit.test.ts
Normal file
@@ -0,0 +1,108 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { emitMd } from '../emit.js';
|
||||
import { parseMd } from '../parse.js';
|
||||
import { OcEmitSentinelError } from '../sentinel.js';
|
||||
|
||||
describe('emit — round-trip mode (default)', () => {
|
||||
it('returns the raw bytes byte-for-byte', () => {
|
||||
const raw = `---\nname: x\n---\n\n## Sec\n\n- a\n- b\n`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(emitMd(ast)).toBe(raw);
|
||||
});
|
||||
|
||||
it('round-trips CRLF line endings', () => {
|
||||
const raw = '## Heading\r\n\r\n- item\r\n';
|
||||
const { ast } = parseMd(raw);
|
||||
expect(emitMd(ast)).toBe(raw);
|
||||
});
|
||||
|
||||
it('round-trips a file with no frontmatter and no sections', () => {
|
||||
const raw = 'Just preamble. No structure.\n';
|
||||
const { ast } = parseMd(raw);
|
||||
expect(emitMd(ast)).toBe(raw);
|
||||
});
|
||||
|
||||
it('echoes raw bytes containing the sentinel by default; strict mode rejects', () => {
|
||||
// Round-trip trusts parsed bytes — see emit.ts policy comment.
|
||||
// Strict mode (acceptPreExistingSentinel: false) is the opt-in
|
||||
// path for callers that want LKG-style fingerprint verification.
|
||||
const raw = '## Section\n\n- token: __OPENCLAW_REDACTED__\n';
|
||||
const { ast } = parseMd(raw);
|
||||
expect(emitMd(ast)).toBe(raw);
|
||||
expect(() => emitMd(ast, { acceptPreExistingSentinel: false })).toThrow(
|
||||
OcEmitSentinelError,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('emit — render mode', () => {
|
||||
it('renders frontmatter + blocks', () => {
|
||||
const ast = {
|
||||
kind: "md" as const,
|
||||
raw: '',
|
||||
frontmatter: [
|
||||
{ key: 'name', value: 'github', line: 2 },
|
||||
{ key: 'description', value: 'gh CLI', line: 3 },
|
||||
],
|
||||
preamble: '',
|
||||
blocks: [
|
||||
{
|
||||
heading: 'Tools',
|
||||
slug: 'tools',
|
||||
line: 5,
|
||||
bodyText: '- gh: GitHub',
|
||||
items: [{ text: 'gh: GitHub', slug: 'gh', line: 7, kv: { key: 'gh', value: 'GitHub' } }],
|
||||
tables: [],
|
||||
codeBlocks: [],
|
||||
},
|
||||
],
|
||||
};
|
||||
const output = emitMd(ast, { mode: 'render' });
|
||||
expect(output).toContain('name: github');
|
||||
expect(output).toContain('description: gh CLI');
|
||||
expect(output).toContain('## Tools');
|
||||
expect(output).toContain('- gh: GitHub');
|
||||
});
|
||||
|
||||
it('quotes frontmatter values containing special chars', () => {
|
||||
const ast = {
|
||||
kind: "md" as const,
|
||||
raw: '',
|
||||
frontmatter: [{ key: 'title', value: 'a: b', line: 2 }],
|
||||
preamble: '',
|
||||
blocks: [],
|
||||
};
|
||||
const output = emitMd(ast, { mode: 'render' });
|
||||
expect(output).toContain('title: "a: b"');
|
||||
});
|
||||
|
||||
it('throws if a kv item value matches the sentinel', () => {
|
||||
const ast = {
|
||||
kind: "md" as const,
|
||||
raw: '',
|
||||
frontmatter: [],
|
||||
preamble: '',
|
||||
blocks: [
|
||||
{
|
||||
heading: 'Secrets',
|
||||
slug: 'secrets',
|
||||
line: 1,
|
||||
bodyText: '- token: __OPENCLAW_REDACTED__',
|
||||
items: [
|
||||
{
|
||||
text: 'token: __OPENCLAW_REDACTED__',
|
||||
slug: 'token',
|
||||
line: 2,
|
||||
kv: { key: 'token', value: '__OPENCLAW_REDACTED__' },
|
||||
},
|
||||
],
|
||||
tables: [],
|
||||
codeBlocks: [],
|
||||
},
|
||||
],
|
||||
};
|
||||
expect(() => emitMd(ast, { mode: 'render', fileNameForGuard: 'AGENTS.md' })).toThrow(
|
||||
OcEmitSentinelError,
|
||||
);
|
||||
});
|
||||
});
|
||||
707
src/oc-path/tests/find.test.ts
Normal file
707
src/oc-path/tests/find.test.ts
Normal file
@@ -0,0 +1,707 @@
|
||||
/**
|
||||
* `findOcPaths` — multi-match search verb test surface.
|
||||
*
|
||||
* Tests cover: `*` single-segment expansion across all 4 kinds; `**`
|
||||
* recursive descent for jsonc + yaml; the wildcard guard on
|
||||
* `resolveOcPath` / `setOcPath`; the slot-shape preservation invariant
|
||||
* (a `*` in the `item` slot produces concrete paths whose `item` field
|
||||
* carries the matched value).
|
||||
*/
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { findOcPaths } from '../find.js';
|
||||
import { parseJsonc } from '../jsonc/parse.js';
|
||||
import { parseJsonl } from '../jsonl/parse.js';
|
||||
import { parseMd } from '../parse.js';
|
||||
import { parseYaml } from '../yaml/parse.js';
|
||||
import {
|
||||
formatOcPath,
|
||||
hasWildcard,
|
||||
OcPathError,
|
||||
parseOcPath,
|
||||
} from '../oc-path.js';
|
||||
import {
|
||||
resolveOcPath,
|
||||
setOcPath,
|
||||
} from '../universal.js';
|
||||
|
||||
// ---------- hasWildcard ----------------------------------------------------
|
||||
|
||||
describe('hasWildcard', () => {
|
||||
it('detects single-segment * in any slot', () => {
|
||||
expect(hasWildcard(parseOcPath('oc://X/*/y'))).toBe(true);
|
||||
expect(hasWildcard(parseOcPath('oc://X/a/*'))).toBe(true);
|
||||
expect(hasWildcard(parseOcPath('oc://X/a/b/*'))).toBe(true);
|
||||
});
|
||||
|
||||
it('detects ** in any slot', () => {
|
||||
expect(hasWildcard(parseOcPath('oc://X/**'))).toBe(true);
|
||||
expect(hasWildcard(parseOcPath('oc://X/a/**/c'))).toBe(true);
|
||||
});
|
||||
|
||||
it('detects wildcards inside dotted sub-segments', () => {
|
||||
expect(hasWildcard(parseOcPath('oc://X/a.*.c'))).toBe(true);
|
||||
expect(hasWildcard(parseOcPath('oc://X/a.**.c'))).toBe(true);
|
||||
});
|
||||
|
||||
it('returns false for plain paths', () => {
|
||||
expect(hasWildcard(parseOcPath('oc://X/a/b/c'))).toBe(false);
|
||||
expect(hasWildcard(parseOcPath('oc://X/a.b.c'))).toBe(false);
|
||||
});
|
||||
|
||||
it('treats `*` inside an identifier as literal', () => {
|
||||
expect(hasWildcard(parseOcPath('oc://X/foo*bar'))).toBe(false);
|
||||
expect(hasWildcard(parseOcPath('oc://X/a*'))).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- Wildcard guard on resolveOcPath / setOcPath -------------------
|
||||
|
||||
describe('wildcard guard', () => {
|
||||
const yaml = parseYaml('steps:\n - id: a\n command: foo\n').ast;
|
||||
|
||||
it('resolveOcPath throws OcPathError for wildcard pattern (F16)', () => {
|
||||
// Previously returned `null` — indistinguishable from "path doesn't
|
||||
// resolve". Now throws with `OC_PATH_WILDCARD_IN_RESOLVE` so the
|
||||
// CLI / consumers can surface "use findOcPaths" rather than "not
|
||||
// found". setOcPath uses a discriminated `wildcard-not-allowed`
|
||||
// reason; this is the resolve-side analogue.
|
||||
expect(() =>
|
||||
resolveOcPath(yaml, parseOcPath('oc://wf/steps/*/command')),
|
||||
).toThrow(/findOcPaths/);
|
||||
try {
|
||||
resolveOcPath(yaml, parseOcPath('oc://wf/**'));
|
||||
expect.fail('should have thrown');
|
||||
} catch (err) {
|
||||
expect(err).toBeInstanceOf(OcPathError);
|
||||
expect((err as OcPathError).code).toBe('OC_PATH_WILDCARD_IN_RESOLVE');
|
||||
}
|
||||
});
|
||||
|
||||
it('setOcPath returns wildcard-not-allowed for wildcard pattern', () => {
|
||||
const r = setOcPath(yaml, parseOcPath('oc://wf/steps/*/command'), 'bar');
|
||||
expect(r.ok).toBe(false);
|
||||
if (!r.ok) {expect(r.reason).toBe('wildcard-not-allowed');}
|
||||
});
|
||||
|
||||
it('setOcPath wildcard guard reason carries actionable detail', () => {
|
||||
const r = setOcPath(yaml, parseOcPath('oc://wf/**'), 'bar');
|
||||
expect(r.ok).toBe(false);
|
||||
if (!r.ok) {expect(r.detail).toContain('findOcPaths');}
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- findOcPaths — fast-path (no wildcards) -------------------------
|
||||
|
||||
describe('findOcPaths — non-wildcard fast-path', () => {
|
||||
it('wraps resolveOcPath result for plain path', () => {
|
||||
const ast = parseYaml('name: x\n').ast;
|
||||
const out = findOcPaths(ast, parseOcPath('oc://wf/name'));
|
||||
expect(out).toHaveLength(1);
|
||||
expect(out[0].match.kind).toBe('leaf');
|
||||
expect(formatOcPath(out[0].path)).toBe('oc://wf/name');
|
||||
});
|
||||
|
||||
it('returns empty for unresolved plain path', () => {
|
||||
const ast = parseYaml('name: x\n').ast;
|
||||
expect(findOcPaths(ast, parseOcPath('oc://wf/missing'))).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- findOcPaths — YAML --------------------------------------------
|
||||
|
||||
describe('findOcPaths — YAML kind', () => {
|
||||
const yaml = parseYaml(
|
||||
'steps:\n' +
|
||||
' - id: build\n' +
|
||||
' command: npm run build\n' +
|
||||
' - id: test\n' +
|
||||
' command: npm test\n' +
|
||||
' - id: lint\n' +
|
||||
' command: npm run lint\n'
|
||||
).ast;
|
||||
|
||||
it('* in item slot enumerates each step', () => {
|
||||
const out = findOcPaths(yaml, parseOcPath('oc://wf.lobster/steps/*/command'));
|
||||
expect(out).toHaveLength(3);
|
||||
const paths = out.map((m) => formatOcPath(m.path));
|
||||
expect(paths).toEqual([
|
||||
'oc://wf.lobster/steps/0/command',
|
||||
'oc://wf.lobster/steps/1/command',
|
||||
'oc://wf.lobster/steps/2/command',
|
||||
]);
|
||||
});
|
||||
|
||||
it('preserves slot shape — concrete path has matched value in item slot', () => {
|
||||
const out = findOcPaths(yaml, parseOcPath('oc://wf/steps/*/id'));
|
||||
expect(out).toHaveLength(3);
|
||||
for (const m of out) {
|
||||
expect(m.path.section).toBe('steps');
|
||||
expect(m.path.field).toBe('id');
|
||||
expect(m.path.item).toMatch(/^[0-2]$/);
|
||||
}
|
||||
});
|
||||
|
||||
it('returns leaf valueText for each match', () => {
|
||||
const out = findOcPaths(yaml, parseOcPath('oc://wf/steps/*/id'));
|
||||
const leaves = out.map((m) => m.match.kind === 'leaf' ? m.match.valueText : null);
|
||||
expect(leaves).toEqual(['build', 'test', 'lint']);
|
||||
});
|
||||
|
||||
it('** descends recursively', () => {
|
||||
const yaml2 = parseYaml(
|
||||
'a:\n b:\n c: deep\n d: shallow\n'
|
||||
).ast;
|
||||
const out = findOcPaths(yaml2, parseOcPath('oc://wf/**'));
|
||||
// ** matches root + a + a.b + a.b.c + a.d
|
||||
const leaves = out.filter((m) => m.match.kind === 'leaf').map((m) => m.match.kind === 'leaf' ? m.match.valueText : '');
|
||||
expect(leaves.toSorted()).toEqual(['deep', 'shallow']);
|
||||
});
|
||||
|
||||
it('returns empty for path that does not match', () => {
|
||||
const out = findOcPaths(yaml, parseOcPath('oc://wf/missing/*/x'));
|
||||
expect(out).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('every returned path is consumable by resolveOcPath', () => {
|
||||
const out = findOcPaths(yaml, parseOcPath('oc://wf/steps/*/command'));
|
||||
for (const m of out) {
|
||||
const r = resolveOcPath(yaml, m.path);
|
||||
expect(r).not.toBeNull();
|
||||
expect(r?.kind).toBe('leaf');
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- findOcPaths — JSONC --------------------------------------------
|
||||
|
||||
describe('findOcPaths — JSONC kind', () => {
|
||||
const jsonc = parseJsonc(
|
||||
'{\n' +
|
||||
' "plugins": {\n' +
|
||||
' "github": {"enabled": true},\n' +
|
||||
' "gitlab": {"enabled": false},\n' +
|
||||
' "slack": {"enabled": true}\n' +
|
||||
' }\n' +
|
||||
'}\n'
|
||||
).ast;
|
||||
|
||||
it('* in item slot enumerates each plugin', () => {
|
||||
const out = findOcPaths(jsonc, parseOcPath('oc://config/plugins/*/enabled'));
|
||||
expect(out).toHaveLength(3);
|
||||
const keys = out.map((m) => m.path.item);
|
||||
expect(keys.toSorted((a, b) => (a ?? '').localeCompare(b ?? ''))).toEqual(['github', 'gitlab', 'slack']);
|
||||
});
|
||||
|
||||
it('returns boolean leaves with leafType', () => {
|
||||
const out = findOcPaths(jsonc, parseOcPath('oc://config/plugins/*/enabled'));
|
||||
for (const m of out) {
|
||||
expect(m.match.kind).toBe('leaf');
|
||||
if (m.match.kind === 'leaf') {
|
||||
expect(m.match.leafType).toBe('boolean');
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- findOcPaths — JSONL --------------------------------------------
|
||||
|
||||
describe('findOcPaths — JSONL kind', () => {
|
||||
const jsonl = parseJsonl(
|
||||
'{"event":"start","userId":"u1"}\n' +
|
||||
'{"event":"action","userId":"u1"}\n' +
|
||||
'{"event":"end","userId":"u1"}\n'
|
||||
).ast;
|
||||
|
||||
it('* in section slot enumerates each value line', () => {
|
||||
const out = findOcPaths(jsonl, parseOcPath('oc://session/*/event'));
|
||||
expect(out).toHaveLength(3);
|
||||
const events = out.map((m) => m.match.kind === 'leaf' ? m.match.valueText : '');
|
||||
expect(events).toEqual(['start', 'action', 'end']);
|
||||
});
|
||||
|
||||
it('preserves Lnnn line addresses in concrete paths', () => {
|
||||
const out = findOcPaths(jsonl, parseOcPath('oc://session/*/event'));
|
||||
for (const m of out) {
|
||||
expect(m.path.section).toMatch(/^L\d+$/);
|
||||
}
|
||||
});
|
||||
|
||||
// F8 — line-slot union and predicate. Without these, yaml/jsonc
|
||||
// walkers handled them but JSONL fell through to `pickLine(addr)`
|
||||
// which returns null for union/predicate shapes → silent zero matches.
|
||||
it('union {L1,L2} at line slot enumerates each alternative', () => {
|
||||
const out = findOcPaths(jsonl, parseOcPath('oc://session/{L1,L3}/event'));
|
||||
expect(out).toHaveLength(2);
|
||||
const events = out.map((m) => (m.match.kind === 'leaf' ? m.match.valueText : ''));
|
||||
expect(events).toEqual(['start', 'end']);
|
||||
});
|
||||
|
||||
it('union of positional + literal line addresses works', () => {
|
||||
const out = findOcPaths(jsonl, parseOcPath('oc://session/{L1,$last}/event'));
|
||||
expect(out).toHaveLength(2);
|
||||
const events = out.map((m) => (m.match.kind === 'leaf' ? m.match.valueText : ''));
|
||||
expect(events).toEqual(['start', 'end']);
|
||||
});
|
||||
|
||||
it('predicate [event=action] at line slot filters by top-level field', () => {
|
||||
const out = findOcPaths(jsonl, parseOcPath('oc://session/[event=action]/userId'));
|
||||
expect(out).toHaveLength(1);
|
||||
if (out[0]?.match.kind === 'leaf') {expect(out[0].match.valueText).toBe('u1');}
|
||||
});
|
||||
|
||||
it('predicate [event=missing] at line slot matches zero lines (silent zero is correct)', () => {
|
||||
const out = findOcPaths(jsonl, parseOcPath('oc://session/[event=missing]/userId'));
|
||||
expect(out).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- Positional primitives ($first / $last / -N) -------------------
|
||||
|
||||
describe('positional primitives — yaml', () => {
|
||||
const yaml = parseYaml(
|
||||
'steps:\n - id: a\n - id: b\n - id: c\n'
|
||||
).ast;
|
||||
|
||||
it('resolveOcPath accepts $first', () => {
|
||||
const m = resolveOcPath(yaml, parseOcPath('oc://wf/steps/$first/id'));
|
||||
expect(m?.kind).toBe('leaf');
|
||||
if (m?.kind === 'leaf') {expect(m.valueText).toBe('a');}
|
||||
});
|
||||
|
||||
it('resolveOcPath accepts $last', () => {
|
||||
const m = resolveOcPath(yaml, parseOcPath('oc://wf/steps/$last/id'));
|
||||
expect(m?.kind).toBe('leaf');
|
||||
if (m?.kind === 'leaf') {expect(m.valueText).toBe('c');}
|
||||
});
|
||||
|
||||
it('resolveOcPath accepts negative index', () => {
|
||||
const m = resolveOcPath(yaml, parseOcPath('oc://wf/steps/-2/id'));
|
||||
expect(m?.kind).toBe('leaf');
|
||||
if (m?.kind === 'leaf') {expect(m.valueText).toBe('b');}
|
||||
});
|
||||
|
||||
it('out-of-range positional returns null', () => {
|
||||
expect(resolveOcPath(yaml, parseOcPath('oc://wf/steps/-99/id'))).toBeNull();
|
||||
});
|
||||
|
||||
it('positional on empty container returns null', () => {
|
||||
const empty = parseYaml('steps: []\n').ast;
|
||||
expect(resolveOcPath(empty, parseOcPath('oc://wf/steps/$first/id'))).toBeNull();
|
||||
});
|
||||
|
||||
it('findOcPaths emits concrete index for positional', () => {
|
||||
const out = findOcPaths(yaml, parseOcPath('oc://wf/steps/$last/id'));
|
||||
expect(out).toHaveLength(1);
|
||||
expect(out[0].path.item).toBe('2');
|
||||
});
|
||||
|
||||
it('hasWildcard returns false for positional patterns', () => {
|
||||
// Positional ≠ wildcard — they resolve deterministically.
|
||||
expect(hasWildcard(parseOcPath('oc://X/$last/id'))).toBe(false);
|
||||
expect(hasWildcard(parseOcPath('oc://X/-1/id'))).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('positional primitives — jsonc', () => {
|
||||
const jsonc = parseJsonc('{"items":[10,20,30]}').ast;
|
||||
|
||||
it('$first picks first array element', () => {
|
||||
const m = resolveOcPath(jsonc, parseOcPath('oc://config/items/$first'));
|
||||
expect(m?.kind).toBe('leaf');
|
||||
if (m?.kind === 'leaf') {expect(m.valueText).toBe('10');}
|
||||
});
|
||||
|
||||
it('$last picks last array element', () => {
|
||||
const m = resolveOcPath(jsonc, parseOcPath('oc://config/items/$last'));
|
||||
expect(m?.kind).toBe('leaf');
|
||||
if (m?.kind === 'leaf') {expect(m.valueText).toBe('30');}
|
||||
});
|
||||
|
||||
it('$first on object picks first-declared key', () => {
|
||||
const obj = parseJsonc('{"a":1,"b":2,"c":3}').ast;
|
||||
const m = resolveOcPath(obj, parseOcPath('oc://config/$first'));
|
||||
expect(m?.kind).toBe('leaf');
|
||||
if (m?.kind === 'leaf') {expect(m.valueText).toBe('1');}
|
||||
});
|
||||
});
|
||||
|
||||
describe('positional primitives — jsonl', () => {
|
||||
const jsonl = parseJsonl(
|
||||
'{"event":"start"}\n{"event":"step"}\n{"event":"end"}\n'
|
||||
).ast;
|
||||
|
||||
it('$first picks first value line', () => {
|
||||
const m = resolveOcPath(jsonl, parseOcPath('oc://session/$first/event'));
|
||||
if (m?.kind === 'leaf') {expect(m.valueText).toBe('start');}
|
||||
});
|
||||
|
||||
it('$last picks last value line (existing behavior)', () => {
|
||||
const m = resolveOcPath(jsonl, parseOcPath('oc://session/$last/event'));
|
||||
if (m?.kind === 'leaf') {expect(m.valueText).toBe('end');}
|
||||
});
|
||||
|
||||
it('-1 is alias for $last', () => {
|
||||
const m = resolveOcPath(jsonl, parseOcPath('oc://session/-1/event'));
|
||||
if (m?.kind === 'leaf') {expect(m.valueText).toBe('end');}
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- Segment unions {a,b,c} -----------------------------------------
|
||||
|
||||
describe('union segments — yaml', () => {
|
||||
const yaml = parseYaml(
|
||||
'steps:\n' +
|
||||
' - id: a\n command: x\n' +
|
||||
' - id: b\n run: y\n' +
|
||||
' - id: c\n pipeline: z\n'
|
||||
).ast;
|
||||
|
||||
it('{command,run} matches each step that has either field', () => {
|
||||
const out = findOcPaths(yaml, parseOcPath('oc://wf/steps/*/{command,run}'));
|
||||
expect(out).toHaveLength(2);
|
||||
const fields = out.map((m) => m.path.field);
|
||||
expect(fields.toSorted((a, b) => (a ?? '').localeCompare(b ?? ''))).toEqual(['command', 'run']);
|
||||
});
|
||||
|
||||
it('preserves the chosen alternative in concrete paths', () => {
|
||||
const out = findOcPaths(yaml, parseOcPath('oc://wf/steps/*/{command,pipeline}'));
|
||||
expect(out).toHaveLength(2);
|
||||
for (const m of out) {
|
||||
expect(['command', 'pipeline']).toContain(m.path.field);
|
||||
}
|
||||
});
|
||||
|
||||
it('unions on top-level keys', () => {
|
||||
const yaml2 = parseYaml('a: 1\nb: 2\nc: 3\n').ast;
|
||||
const out = findOcPaths(yaml2, parseOcPath('oc://X/{a,c}'));
|
||||
expect(out).toHaveLength(2);
|
||||
const values = out.map((m) => m.match.kind === 'leaf' ? m.match.valueText : '');
|
||||
expect(values.toSorted()).toEqual(['1', '3']);
|
||||
});
|
||||
|
||||
it('hasWildcard detects unions (single-match guard rejects them)', () => {
|
||||
expect(hasWildcard(parseOcPath('oc://X/{a,b}'))).toBe(true);
|
||||
// F16 — wildcard guard now throws OC_PATH_WILDCARD_IN_RESOLVE
|
||||
// instead of returning silent null.
|
||||
expect(() =>
|
||||
resolveOcPath(parseYaml('a: 1\nb: 2\n').ast, parseOcPath('oc://X/{a,b}')),
|
||||
).toThrow(/findOcPaths/);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- Value predicates [key=value] ----------------------------------
|
||||
|
||||
describe('value predicates — yaml', () => {
|
||||
const yaml = parseYaml(
|
||||
'steps:\n' +
|
||||
' - id: build\n command: npm run build\n' +
|
||||
' - id: test\n command: npm test\n' +
|
||||
' - id: lint\n command: npm run lint\n'
|
||||
).ast;
|
||||
|
||||
it('[id=test] selects the matching step', () => {
|
||||
const out = findOcPaths(yaml, parseOcPath('oc://wf/steps/[id=test]/command'));
|
||||
expect(out).toHaveLength(1);
|
||||
if (out[0].match.kind === 'leaf') {
|
||||
expect(out[0].match.valueText).toBe('npm test');
|
||||
}
|
||||
expect(out[0].path.item).toBe('1'); // concrete index of the matched step
|
||||
});
|
||||
|
||||
it('predicate yields no matches when key/value missing', () => {
|
||||
expect(findOcPaths(yaml, parseOcPath('oc://wf/steps/[id=nonexistent]/command'))).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('predicate concretizes the index — path round-trips through resolveOcPath', () => {
|
||||
const out = findOcPaths(yaml, parseOcPath('oc://wf/steps/[id=build]/command'));
|
||||
expect(out).toHaveLength(1);
|
||||
const resolved = resolveOcPath(yaml, out[0].path);
|
||||
expect(resolved?.kind).toBe('leaf');
|
||||
});
|
||||
|
||||
it('predicate rejects single-match verbs (treated as wildcard)', () => {
|
||||
// F16 — wildcard guard throws on predicate too (predicate is a
|
||||
// multi-match shape; resolveOcPath is single-match only).
|
||||
expect(() =>
|
||||
resolveOcPath(yaml, parseOcPath('oc://wf/steps/[id=build]')),
|
||||
).toThrow(/findOcPaths/);
|
||||
});
|
||||
});
|
||||
|
||||
describe('quoted segments (v1.0)', () => {
|
||||
// Evidence: openclaw#69004 — model alias `anthropic/claude-opus-4-7`.
|
||||
// Slash inside the key has no other syntax that doesn't conflict with
|
||||
// path-level slash split.
|
||||
const jsonc = parseJsonc(
|
||||
'{"agents":{"defaults":{"models":{' +
|
||||
'"anthropic/claude-opus-4-7":{"alias":"opus47","contextWindow":1000000},' +
|
||||
'"github-copilot/claude-opus-4.7-1m-internal":{"alias":"copilot-opus-1m","contextWindow":1000000},' +
|
||||
'"plain":{"alias":"p","contextWindow":200000}' +
|
||||
'}}}}'
|
||||
).ast;
|
||||
|
||||
it('resolveOcPath — quoted segment with literal slash', () => {
|
||||
const m = resolveOcPath(
|
||||
jsonc,
|
||||
parseOcPath('oc://config/agents.defaults.models/"anthropic/claude-opus-4-7"/alias'),
|
||||
);
|
||||
expect(m?.kind).toBe('leaf');
|
||||
if (m?.kind === 'leaf') {expect(m.valueText).toBe('opus47');}
|
||||
});
|
||||
|
||||
it('resolveOcPath — quoted segment with literal slash AND dot', () => {
|
||||
const m = resolveOcPath(
|
||||
jsonc,
|
||||
parseOcPath('oc://config/agents.defaults.models/"github-copilot/claude-opus-4.7-1m-internal"/alias'),
|
||||
);
|
||||
expect(m?.kind).toBe('leaf');
|
||||
if (m?.kind === 'leaf') {expect(m.valueText).toBe('copilot-opus-1m');}
|
||||
});
|
||||
|
||||
it('quoted segment with whitespace', () => {
|
||||
const ast = parseJsonc('{"prompts":{"hello world":"value"}}').ast;
|
||||
const m = resolveOcPath(ast, parseOcPath('oc://X/prompts/"hello world"'));
|
||||
expect(m?.kind).toBe('leaf');
|
||||
if (m?.kind === 'leaf') {expect(m.valueText).toBe('value');}
|
||||
});
|
||||
|
||||
it('quoted segment with embedded escape sequences', () => {
|
||||
// Key literally contains a backslash and a quote.
|
||||
const ast = parseJsonc('{"keys":{"a\\\\b":"v1","c\\"d":"v2"}}').ast;
|
||||
const m1 = resolveOcPath(ast, parseOcPath('oc://X/keys/"a\\\\b"'));
|
||||
expect(m1?.kind).toBe('leaf');
|
||||
if (m1?.kind === 'leaf') {expect(m1.valueText).toBe('v1');}
|
||||
});
|
||||
|
||||
it('findOcPaths — wildcard returns paths with quoted keys when needed', () => {
|
||||
const out = findOcPaths(jsonc, parseOcPath('oc://config/agents.defaults.models/*/alias'));
|
||||
expect(out).toHaveLength(3);
|
||||
// The two slash-bearing keys round-trip via quotes; `plain` stays bare.
|
||||
const items = out.map((m) => m.path.item);
|
||||
expect(items.some((s) => s === 'plain')).toBe(true);
|
||||
expect(items.some((s) => s === '"anthropic/claude-opus-4-7"')).toBe(true);
|
||||
expect(items.some((s) => s === '"github-copilot/claude-opus-4.7-1m-internal"')).toBe(true);
|
||||
});
|
||||
|
||||
it('findOcPaths — emitted paths round-trip through resolveOcPath', () => {
|
||||
const out = findOcPaths(jsonc, parseOcPath('oc://config/agents.defaults.models/*/alias'));
|
||||
for (const m of out) {
|
||||
const r = resolveOcPath(jsonc, m.path);
|
||||
expect(r?.kind).toBe('leaf');
|
||||
}
|
||||
});
|
||||
|
||||
it('rejects unbalanced quotes at parse time', () => {
|
||||
expect(() => parseOcPath('oc://X/"unterminated')).toThrow(/Unbalanced/);
|
||||
});
|
||||
|
||||
it('control characters still rejected inside quotes', () => {
|
||||
expect(() => parseOcPath('oc://X/"\x00"')).toThrow(/Control character/);
|
||||
});
|
||||
});
|
||||
|
||||
describe('value predicates — numeric operators (v1.1)', () => {
|
||||
// Evidence: openclaw#54383 — compaction fails when maxTokens > model output cap.
|
||||
// Doctor lint rule: flag any model with maxTokens > 128000 (Anthropic per-request output cap).
|
||||
const jsonc = parseJsonc(
|
||||
'{"models":{"providers":{"anthropic":{"models":[' +
|
||||
'{"id":"claude-sonnet-4-6","contextWindow":1000000,"maxTokens":128000},' +
|
||||
'{"id":"claude-opus-4-7","contextWindow":1000000,"maxTokens":240000},' +
|
||||
'{"id":"claude-sonnet-4-7","contextWindow":200000,"maxTokens":64000}' +
|
||||
']}}}}'
|
||||
).ast;
|
||||
|
||||
// Slot layout: section=`models.providers.anthropic.models`, item=predicate, field=`id`.
|
||||
const PREFIX = 'oc://config/models.providers.anthropic.models';
|
||||
|
||||
it('> finds models exceeding the per-request output cap', () => {
|
||||
const out = findOcPaths(jsonc, parseOcPath(`${PREFIX}/[maxTokens>128000]/id`));
|
||||
expect(out).toHaveLength(1);
|
||||
if (out[0].match.kind === 'leaf') {expect(out[0].match.valueText).toBe('claude-opus-4-7');}
|
||||
});
|
||||
|
||||
it('>= matches the boundary', () => {
|
||||
const out = findOcPaths(jsonc, parseOcPath(`${PREFIX}/[maxTokens>=128000]/id`));
|
||||
const ids = out.map((m) => m.match.kind === 'leaf' ? m.match.valueText : '');
|
||||
expect(ids.toSorted()).toEqual(['claude-opus-4-7', 'claude-sonnet-4-6']);
|
||||
});
|
||||
|
||||
it('< filters small context windows', () => {
|
||||
const out = findOcPaths(jsonc, parseOcPath(`${PREFIX}/[contextWindow<500000]/id`));
|
||||
expect(out).toHaveLength(1);
|
||||
if (out[0].match.kind === 'leaf') {expect(out[0].match.valueText).toBe('claude-sonnet-4-7');}
|
||||
});
|
||||
|
||||
it('<= matches the boundary', () => {
|
||||
const out = findOcPaths(jsonc, parseOcPath(`${PREFIX}/[contextWindow<=200000]/id`));
|
||||
const ids = out.map((m) => m.match.kind === 'leaf' ? m.match.valueText : '');
|
||||
expect(ids).toEqual(['claude-sonnet-4-7']);
|
||||
});
|
||||
|
||||
it('numeric operator rejects non-numeric leaves silently', () => {
|
||||
// String leaf, numeric op — predicate doesn't match (no false positive).
|
||||
const out = findOcPaths(jsonc, parseOcPath(`${PREFIX}/[id>5]/id`));
|
||||
expect(out).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('rejects numeric predicate value that is not a number', () => {
|
||||
const out = findOcPaths(jsonc, parseOcPath(`${PREFIX}/[maxTokens>foo]/id`));
|
||||
expect(out).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('value predicates — jsonc', () => {
|
||||
const jsonc = parseJsonc(
|
||||
'{"plugins":{"github":{"enabled":true,"role":"vcs"},"slack":{"enabled":false,"role":"chat"},"jira":{"enabled":true,"role":"tracker"}}}'
|
||||
).ast;
|
||||
|
||||
it('[enabled=true] filters by sibling boolean', () => {
|
||||
const out = findOcPaths(jsonc, parseOcPath('oc://config/plugins/[enabled=true]/role'));
|
||||
expect(out).toHaveLength(2);
|
||||
const roles = out.map((m) => m.match.kind === 'leaf' ? m.match.valueText : '');
|
||||
expect(roles.toSorted()).toEqual(['tracker', 'vcs']);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- Ordinal addressing (#N) for distinct duplicate slugs ----------
|
||||
|
||||
describe('ordinal addressing — md', () => {
|
||||
// Two items with the same slug after slugify (`foo: a` and `foo: b`).
|
||||
const md = parseMd(
|
||||
'## Tools\n\n- foo: a\n- foo: b\n- bar: c\n'
|
||||
).ast;
|
||||
|
||||
it('#0 picks the first item by document order', () => {
|
||||
const m = resolveOcPath(md, parseOcPath('oc://AGENTS.md/tools/#0/foo'));
|
||||
expect(m?.kind).toBe('leaf');
|
||||
if (m?.kind === 'leaf') {expect(m.valueText).toBe('a');}
|
||||
});
|
||||
|
||||
it('#1 picks the second item — distinct from #0 even though slug collides', () => {
|
||||
const m = resolveOcPath(md, parseOcPath('oc://AGENTS.md/tools/#1/foo'));
|
||||
expect(m?.kind).toBe('leaf');
|
||||
if (m?.kind === 'leaf') {expect(m.valueText).toBe('b');}
|
||||
});
|
||||
|
||||
it('out-of-range #N returns null', () => {
|
||||
expect(resolveOcPath(md, parseOcPath('oc://AGENTS.md/tools/#99/foo'))).toBeNull();
|
||||
});
|
||||
|
||||
it('findOcPaths disambiguates duplicate-slug items via #N', () => {
|
||||
const out = findOcPaths(md, parseOcPath('oc://AGENTS.md/tools/*/foo'));
|
||||
// 2 items have key `foo` (and matching slug); 1 has `bar` (no match).
|
||||
expect(out).toHaveLength(2);
|
||||
const items = out.map((m) => m.path.item);
|
||||
expect(items).toEqual(['#0', '#1']);
|
||||
const values = out.map((m) => m.match.kind === 'leaf' ? m.match.valueText : '');
|
||||
expect(values.toSorted()).toEqual(['a', 'b']);
|
||||
});
|
||||
|
||||
it('non-duplicate slug keeps slug form (back-compat)', () => {
|
||||
const md2 = parseMd('## Tools\n\n- foo: a\n- bar: b\n').ast;
|
||||
const out = findOcPaths(md2, parseOcPath('oc://AGENTS.md/tools/*'));
|
||||
const items = out.map((m) => m.path.item);
|
||||
// Both unique → both stay as slugs.
|
||||
expect(items.toSorted((a, b) => (a ?? '').localeCompare(b ?? ''))).toEqual(['bar', 'foo']);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- findOcPaths — Markdown -----------------------------------------
|
||||
|
||||
describe('findOcPaths — Markdown kind', () => {
|
||||
const md = parseMd(
|
||||
'---\nname: drafter\nrole: writer\n---\n\n' +
|
||||
'## Tools\n\n' +
|
||||
'- send_email: enabled\n' +
|
||||
'- search: enabled\n' +
|
||||
'- read_email: disabled\n'
|
||||
).ast;
|
||||
|
||||
it('* in field slot enumerates frontmatter keys', () => {
|
||||
const out = findOcPaths(md, parseOcPath('oc://SOUL.md/[frontmatter]/*'));
|
||||
expect(out).toHaveLength(2);
|
||||
const keys = out.map((m) => m.path.item ?? m.path.field);
|
||||
expect(keys.toSorted((a, b) => (a ?? '').localeCompare(b ?? ''))).toEqual(['name', 'role']);
|
||||
});
|
||||
|
||||
it('* in field slot enumerates each item kv key', () => {
|
||||
// Item slug is the kv-key slug ('send_email' → 'send-email').
|
||||
const out = findOcPaths(md, parseOcPath('oc://SKILL.md/Tools/send-email/*'));
|
||||
expect(out).toHaveLength(1);
|
||||
expect(out[0].match.kind).toBe('leaf');
|
||||
if (out[0].match.kind === 'leaf') {
|
||||
expect(out[0].match.valueText).toBe('enabled');
|
||||
}
|
||||
});
|
||||
|
||||
it('* in item slot + matching field returns each item whose kv key matches', () => {
|
||||
// The kv key on `- send_email: enabled` is `send_email`. Pattern
|
||||
// field='send_email' matches that one item; the other two items
|
||||
// (search, read_email) have different kv keys.
|
||||
const out = findOcPaths(md, parseOcPath('oc://SKILL.md/Tools/*/send_email'));
|
||||
expect(out).toHaveLength(1);
|
||||
expect(out[0].path.item).toBe('send-email');
|
||||
});
|
||||
|
||||
it('** at section slot matches items at every depth (F14 — cross-kind symmetry)', () => {
|
||||
// Without the retain-i branch on `**`, walkMd only descended one
|
||||
// level (i + 1, consumed `**`) — yaml/jsonc walkers also retain
|
||||
// `**` to keep matching deeper. Lint rules expecting universal
|
||||
// `**` behavior across kinds (sweep all sections for `risk:`)
|
||||
// would silently get 0 md matches on a multi-block file.
|
||||
//
|
||||
// Pattern `**/send-email` — `**` matches the `tools` block, then
|
||||
// `send-email` (kebab slug) matches the item under it. Without the
|
||||
// retain-i branch, the walker descends with `**` consumed at the
|
||||
// section layer and then can't satisfy the item slot since the
|
||||
// walker is now inside the wrong block looking for an item slug.
|
||||
const multiBlock = parseMd(
|
||||
'## Boundaries\n\n' +
|
||||
'- never: rm -rf\n\n' +
|
||||
'## Tools\n\n' +
|
||||
'- send_email: enabled\n' +
|
||||
'- search: enabled\n',
|
||||
).ast;
|
||||
const out = findOcPaths(multiBlock, parseOcPath('oc://SOUL.md/**/send-email'));
|
||||
// The `send-email` item is under the `tools` block. Pin that we
|
||||
// get at least one match (the substrate's md `**` should reach it).
|
||||
expect(out.length).toBeGreaterThanOrEqual(1);
|
||||
const items = out.map((m) => m.path.item).filter((v): v is string => v !== undefined);
|
||||
expect(items).toContain('send-email');
|
||||
});
|
||||
});
|
||||
|
||||
describe('findOcPaths — quoted segments survive expansion (regression: resolve↔find symmetry)', () => {
|
||||
it('finds keys with slashes when the path quotes them and a sibling wildcards', () => {
|
||||
// Closes ClawSweeper P2 on PR #78678: when a pattern needs
|
||||
// expansion (e.g. trailing union or wildcard), the JSONC walker
|
||||
// bypassed `resolveJsoncOcPath` and compared object keys to the
|
||||
// raw `cur.value` directly. Patterns with quoted literals
|
||||
// returned no matches even though resolve worked. This test
|
||||
// exercises a quoted middle segment + a trailing union.
|
||||
const raw = `{
|
||||
"agents": {
|
||||
"defaults": {
|
||||
"models": {
|
||||
"github-copilot/claude-opus-4-7": {
|
||||
"alias": "opus-internal",
|
||||
"contextWindow": 200000
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
`;
|
||||
const { ast } = parseJsonc(raw);
|
||||
const out = findOcPaths(
|
||||
ast,
|
||||
parseOcPath(
|
||||
'oc://config.jsonc/agents.defaults.models/"github-copilot/claude-opus-4-7"/{alias,contextWindow}',
|
||||
),
|
||||
);
|
||||
// Both alternatives in the union should match.
|
||||
expect(out.length).toBe(2);
|
||||
const fields = out.map((m) => m.path.field).toSorted((a, b) => (a ?? '').localeCompare(b ?? ''));
|
||||
expect(fields).toEqual(['alias', 'contextWindow']);
|
||||
});
|
||||
});
|
||||
17
src/oc-path/tests/fixtures/real/AGENTS.md
vendored
Normal file
17
src/oc-path/tests/fixtures/real/AGENTS.md
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
## Roles
|
||||
|
||||
- planner: breaks down user goals into tasks
|
||||
- executor: runs the planned tasks one at a time
|
||||
- reviewer: checks output before user-visible writes
|
||||
|
||||
## Tools
|
||||
|
||||
- gh: GitHub CLI for issues, PRs, CI
|
||||
- curl: HTTP client
|
||||
- rg: ripgrep — fast file content search
|
||||
|
||||
## Boundaries
|
||||
|
||||
- never edit /etc, /usr, or system paths
|
||||
- always confirm before destructive operations
|
||||
- read SOUL.md before each session for persona context
|
||||
17
src/oc-path/tests/fixtures/real/BOOTSTRAP.md
vendored
Normal file
17
src/oc-path/tests/fixtures/real/BOOTSTRAP.md
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
# Workspace bootstrap
|
||||
|
||||
This is the first thing the agent reads on a fresh workspace. Once
|
||||
the user finishes setup (filling in SOUL.md, USER.md, etc.),
|
||||
BOOTSTRAP.md gets removed and the workspace is "live."
|
||||
|
||||
## Setup checklist
|
||||
|
||||
- review SOUL.md and add personal context
|
||||
- review USER.md and add role/preferences
|
||||
- run `openclaw doctor` to verify config + workspace are valid
|
||||
- confirm the gateway can reach your providers
|
||||
|
||||
## Removing this file
|
||||
|
||||
When the checklist is complete, delete BOOTSTRAP.md. The runtime
|
||||
detects its absence as "setup complete."
|
||||
16
src/oc-path/tests/fixtures/real/HEARTBEAT.md
vendored
Normal file
16
src/oc-path/tests/fixtures/real/HEARTBEAT.md
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
## Every 30m wake
|
||||
|
||||
- check unread Slack DMs in #incidents
|
||||
- summarize new PR review comments since last wake
|
||||
- if any test fails on main, surface to user immediately
|
||||
|
||||
## Every 4h wake
|
||||
|
||||
- compile a brief status summary of in-flight tasks
|
||||
- check Linear for new high-priority issues
|
||||
- update the daily log entry
|
||||
|
||||
## On user-presence wake
|
||||
|
||||
- briefly orient on what changed since last user interaction
|
||||
- prioritize incoming items by urgency
|
||||
19
src/oc-path/tests/fixtures/real/IDENTITY.md
vendored
Normal file
19
src/oc-path/tests/fixtures/real/IDENTITY.md
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
## Organization
|
||||
|
||||
Example Org / Platform Team
|
||||
|
||||
## Team
|
||||
|
||||
OpenClaw infrastructure & tooling
|
||||
|
||||
## Trust Level
|
||||
|
||||
internal-trusted
|
||||
|
||||
## Region
|
||||
|
||||
us-west
|
||||
|
||||
## Compliance scope
|
||||
|
||||
SOC 2 Type II + FedRAMP Moderate (in audit)
|
||||
18
src/oc-path/tests/fixtures/real/MEMORY.md
vendored
Normal file
18
src/oc-path/tests/fixtures/real/MEMORY.md
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
---
|
||||
scope: project
|
||||
---
|
||||
|
||||
## User prefers async communication
|
||||
|
||||
The user has mentioned twice (sessions 2026-04-15 and 2026-04-22) that
|
||||
they prefer Slack DMs over meetings for short questions.
|
||||
|
||||
## Project uses TypeScript with strict mode
|
||||
|
||||
The codebase enforces `strict: true` and `noUncheckedIndexedAccess`.
|
||||
Avoid `any`; prefer `unknown` with narrowing.
|
||||
|
||||
## Deploy on Tuesdays only
|
||||
|
||||
Production deploys happen Tue 9am-12pm Pacific. Outside that window,
|
||||
deploys go to staging and wait for the next Tuesday window.
|
||||
38
src/oc-path/tests/fixtures/real/SKILL.md
vendored
Normal file
38
src/oc-path/tests/fixtures/real/SKILL.md
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
---
|
||||
name: github
|
||||
description: Use gh for GitHub issues, PR status, CI/logs, comments, reviews, releases, and API queries.
|
||||
tier: T1
|
||||
tools:
|
||||
- gh
|
||||
- bash
|
||||
trigger_phrases:
|
||||
- github
|
||||
- pr
|
||||
- issue
|
||||
- workflow
|
||||
metadata: { "openclaw": { "emoji": "🐙", "requires": { "bins": ["gh"] } } }
|
||||
user-invocable: true
|
||||
---
|
||||
|
||||
# When to use
|
||||
|
||||
Use this skill when the user asks anything about GitHub: issues, pull
|
||||
requests, CI runs, releases, comments, code review, or organizational
|
||||
metadata. Prefer the `gh` CLI over web URLs — `gh` handles auth,
|
||||
pagination, and structured output natively.
|
||||
|
||||
## Common commands
|
||||
|
||||
```bash
|
||||
gh pr view 123 # view PR details
|
||||
gh pr checks 123 # CI status
|
||||
gh issue list --state open # list open issues
|
||||
gh run list -L 5 # last 5 workflow runs
|
||||
gh release create v1.2.3 # cut a release
|
||||
```
|
||||
|
||||
## When NOT to use
|
||||
|
||||
- The user's repo is on a non-GitHub forge (GitLab, Gitea, Bitbucket).
|
||||
Use the appropriate CLI instead.
|
||||
- Operations that require admin permissions the agent doesn't have.
|
||||
17
src/oc-path/tests/fixtures/real/SOUL.md
vendored
Normal file
17
src/oc-path/tests/fixtures/real/SOUL.md
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
# Persona
|
||||
|
||||
I'm a thoughtful, methodical assistant. I ask clarifying questions
|
||||
when the user's request is ambiguous, and I'd rather be slightly
|
||||
slower than confidently wrong.
|
||||
|
||||
## Voice
|
||||
|
||||
- terse and direct
|
||||
- no filler words
|
||||
- code snippets > prose when explaining technical things
|
||||
|
||||
## Boundaries
|
||||
|
||||
- never write to /etc or system paths
|
||||
- always confirm before deleting files
|
||||
- redact secrets from logs and audit trails
|
||||
21
src/oc-path/tests/fixtures/real/TOOLS.md
vendored
Normal file
21
src/oc-path/tests/fixtures/real/TOOLS.md
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
## Tool Guidance
|
||||
|
||||
| tool | guidance |
|
||||
| --- | --- |
|
||||
| gh | Use for GitHub operations (issues, PRs, CI). Prefer over web. |
|
||||
| curl | HTTP client. Use --silent for clean output. |
|
||||
| rg | ripgrep — content search. Faster than grep for code. |
|
||||
| fd | find replacement. Use over `find` when available. |
|
||||
|
||||
## Allow / Deny
|
||||
|
||||
- enabled: gh
|
||||
- enabled: curl
|
||||
- enabled: rg
|
||||
- enabled: fd
|
||||
- disabled: legacy-tool
|
||||
|
||||
## Notes
|
||||
|
||||
The agent reads this file at session start; runtime tool gates honor
|
||||
the `enabled` flags.
|
||||
16
src/oc-path/tests/fixtures/real/USER.md
vendored
Normal file
16
src/oc-path/tests/fixtures/real/USER.md
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
## Role
|
||||
|
||||
Senior PM working on AI runtime + governance layers. Reports to a VP-level
|
||||
stakeholder; coordinates across 4-6 engineering teams.
|
||||
|
||||
## Preferences
|
||||
|
||||
- async-first communication (Slack DMs > meetings)
|
||||
- terse responses; avoid filler
|
||||
- code snippets > prose for technical detail
|
||||
- always include repo:file:line citations for code claims
|
||||
|
||||
## Working hours
|
||||
|
||||
- Mon-Fri 9am-6pm Pacific
|
||||
- occasional evening for sync with EU teams
|
||||
185
src/oc-path/tests/jsonc/edit.test.ts
Normal file
185
src/oc-path/tests/jsonc/edit.test.ts
Normal file
@@ -0,0 +1,185 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { setJsoncOcPath } from '../../jsonc/edit.js';
|
||||
import { emitJsonc } from '../../jsonc/emit.js';
|
||||
import { parseJsonc } from '../../jsonc/parse.js';
|
||||
import { parseOcPath } from '../../oc-path.js';
|
||||
|
||||
describe('setJsoncOcPath — value replacement', () => {
|
||||
const config = `{
|
||||
"plugins": {
|
||||
"entries": {
|
||||
"github": {
|
||||
"token": "old"
|
||||
}
|
||||
}
|
||||
}
|
||||
}`;
|
||||
|
||||
it('replaces a leaf string value', () => {
|
||||
const { ast } = parseJsonc(config);
|
||||
const r = setJsoncOcPath(
|
||||
ast,
|
||||
parseOcPath('oc://config/plugins.entries.github.token'),
|
||||
{ kind: 'string', value: 'new' },
|
||||
);
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
const out = emitJsonc(r.ast);
|
||||
expect(JSON.parse(out)).toEqual({
|
||||
plugins: { entries: { github: { token: 'new' } } },
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
it('replaces nested objects', () => {
|
||||
const { ast } = parseJsonc(config);
|
||||
const r = setJsoncOcPath(ast, parseOcPath('oc://config/plugins.entries'), {
|
||||
kind: 'object',
|
||||
entries: [
|
||||
{ key: 'gitlab', line: 0, value: { kind: 'string', value: 'tok' } },
|
||||
],
|
||||
});
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
expect(JSON.parse(emitJsonc(r.ast))).toEqual({
|
||||
plugins: { entries: { gitlab: 'tok' } },
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
it('replaces an array element by index', () => {
|
||||
const { ast } = parseJsonc('{ "limits": [10, 20, 30] }');
|
||||
const r = setJsoncOcPath(ast, parseOcPath('oc://config/limits.1'), {
|
||||
kind: 'number',
|
||||
value: 99,
|
||||
});
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {expect(JSON.parse(emitJsonc(r.ast))).toEqual({ limits: [10, 99, 30] });}
|
||||
});
|
||||
|
||||
it('reports unresolved when a key is missing', () => {
|
||||
const { ast } = parseJsonc(config);
|
||||
const r = setJsoncOcPath(
|
||||
ast,
|
||||
parseOcPath('oc://config/plugins.entries.gitlab'),
|
||||
{ kind: 'string', value: 'x' },
|
||||
);
|
||||
expect(r).toEqual({ ok: false, reason: 'unresolved' });
|
||||
});
|
||||
|
||||
it('reports no-root on empty AST', () => {
|
||||
const { ast } = parseJsonc('');
|
||||
const r = setJsoncOcPath(ast, parseOcPath('oc://config/x'), {
|
||||
kind: 'string',
|
||||
value: 'y',
|
||||
});
|
||||
expect(r).toEqual({ ok: false, reason: 'no-root' });
|
||||
});
|
||||
|
||||
it('does not mutate the original AST', () => {
|
||||
const { ast } = parseJsonc(config);
|
||||
const before = JSON.stringify(ast);
|
||||
setJsoncOcPath(ast, parseOcPath('oc://config/plugins.entries.github.token'), {
|
||||
kind: 'string',
|
||||
value: 'new',
|
||||
});
|
||||
expect(JSON.stringify(ast)).toBe(before);
|
||||
});
|
||||
});
|
||||
|
||||
describe('setJsoncOcPath — positional tokens (round-11 resolve↔edit symmetry)', () => {
|
||||
// ClawSweeper round-11 P2 — `$first` / `$last` / `-N` resolved on
|
||||
// the read path but not on the edit path. Pin the new behavior:
|
||||
// editing through a positional address must reach the same child
|
||||
// that `resolveJsoncOcPath` would have returned.
|
||||
it('edits the first array element via $first', () => {
|
||||
const { ast } = parseJsonc('{ "items": [10, 20, 30] }');
|
||||
const r = setJsoncOcPath(
|
||||
ast,
|
||||
parseOcPath('oc://config.jsonc/items/$first'),
|
||||
{ kind: 'number', value: 99 },
|
||||
);
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {expect(JSON.parse(emitJsonc(r.ast))).toEqual({ items: [99, 20, 30] });}
|
||||
});
|
||||
|
||||
it('edits the last array element via $last', () => {
|
||||
const { ast } = parseJsonc('{ "items": [10, 20, 30] }');
|
||||
const r = setJsoncOcPath(
|
||||
ast,
|
||||
parseOcPath('oc://config.jsonc/items/$last'),
|
||||
{ kind: 'number', value: 99 },
|
||||
);
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {expect(JSON.parse(emitJsonc(r.ast))).toEqual({ items: [10, 20, 99] });}
|
||||
});
|
||||
|
||||
it('edits the second-to-last array element via -2', () => {
|
||||
const { ast } = parseJsonc('{ "items": [10, 20, 30] }');
|
||||
const r = setJsoncOcPath(
|
||||
ast,
|
||||
parseOcPath('oc://config.jsonc/items/-2'),
|
||||
{ kind: 'number', value: 99 },
|
||||
);
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {expect(JSON.parse(emitJsonc(r.ast))).toEqual({ items: [10, 99, 30] });}
|
||||
});
|
||||
|
||||
it('edits the first object entry value via $first', () => {
|
||||
const { ast } = parseJsonc('{ "a": 1, "b": 2, "c": 3 }');
|
||||
const r = setJsoncOcPath(
|
||||
ast,
|
||||
parseOcPath('oc://config.jsonc/$first'),
|
||||
{ kind: 'number', value: 99 },
|
||||
);
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {expect(JSON.parse(emitJsonc(r.ast))).toEqual({ a: 99, b: 2, c: 3 });}
|
||||
});
|
||||
|
||||
it('reports unresolved for $first against an empty array', () => {
|
||||
const { ast } = parseJsonc('{ "items": [] }');
|
||||
const r = setJsoncOcPath(
|
||||
ast,
|
||||
parseOcPath('oc://config.jsonc/items/$first'),
|
||||
{ kind: 'number', value: 99 },
|
||||
);
|
||||
expect(r).toEqual({ ok: false, reason: 'unresolved' });
|
||||
});
|
||||
});
|
||||
|
||||
describe('setJsoncOcPath — quoted segments (regression: resolve↔edit symmetry)', () => {
|
||||
it('edits a key containing slashes via quoted segment', () => {
|
||||
// The provider/model alias key contains a `/`; without quoting
|
||||
// it would be split as two segments. `resolveJsoncOcPath` handles
|
||||
// this; `setJsoncOcPath` MUST handle it the same way or the path
|
||||
// becomes resolve-only. Closes ClawSweeper P2 on PR #78678.
|
||||
const raw = `{
|
||||
"agents": {
|
||||
"defaults": {
|
||||
"models": {
|
||||
"anthropic/claude-opus-4-7": { "alias": "opus" }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
`;
|
||||
const { ast } = parseJsonc(raw);
|
||||
const r = setJsoncOcPath(
|
||||
ast,
|
||||
parseOcPath('oc://config.jsonc/agents.defaults.models/"anthropic/claude-opus-4-7"/alias'),
|
||||
{ kind: 'string', value: 'big-opus' },
|
||||
);
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
expect(JSON.parse(emitJsonc(r.ast))).toEqual({
|
||||
agents: {
|
||||
defaults: {
|
||||
models: {
|
||||
'anthropic/claude-opus-4-7': { alias: 'big-opus' },
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
94
src/oc-path/tests/jsonc/emit.test.ts
Normal file
94
src/oc-path/tests/jsonc/emit.test.ts
Normal file
@@ -0,0 +1,94 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { emitJsonc } from '../../jsonc/emit.js';
|
||||
import { parseJsonc } from '../../jsonc/parse.js';
|
||||
import {
|
||||
OcEmitSentinelError,
|
||||
REDACTED_SENTINEL,
|
||||
} from '../../sentinel.js';
|
||||
|
||||
describe('emitJsonc — round-trip', () => {
|
||||
it('returns raw bytes verbatim by default', () => {
|
||||
const raw = `{
|
||||
// comment is preserved on round-trip
|
||||
"x": 1,
|
||||
"y": [/* inline */ 2, 3],
|
||||
}
|
||||
`;
|
||||
const { ast } = parseJsonc(raw);
|
||||
expect(emitJsonc(ast)).toBe(raw);
|
||||
});
|
||||
|
||||
it('echoes pre-existing sentinel bytes by default; strict mode rejects', () => {
|
||||
// Round-trip trusts parsed bytes — workspace files legitimately
|
||||
// containing the sentinel (in code blocks, pasted error logs)
|
||||
// would otherwise become a workspace-wide emit DoS. Strict mode
|
||||
// is the opt-in path.
|
||||
const raw = `{ "x": "${REDACTED_SENTINEL}" }`;
|
||||
const { ast } = parseJsonc(raw);
|
||||
expect(emitJsonc(ast)).toBe(raw);
|
||||
expect(() =>
|
||||
emitJsonc(ast, { fileNameForGuard: 'config', acceptPreExistingSentinel: false }),
|
||||
).toThrow(OcEmitSentinelError);
|
||||
});
|
||||
});
|
||||
|
||||
describe('emitJsonc — render mode', () => {
|
||||
it('re-stringifies the structural tree (no comments)', () => {
|
||||
const { ast } = parseJsonc('{ /* drop me */ "x": 1, "y": [2, 3] }');
|
||||
const out = emitJsonc(ast, { mode: 'render' });
|
||||
expect(out).not.toContain('drop me');
|
||||
expect(JSON.parse(out)).toEqual({ x: 1, y: [2, 3] });
|
||||
});
|
||||
|
||||
it('throws OcEmitSentinelError when a leaf string is the sentinel', () => {
|
||||
const ast = parseJsonc('{ "x": "ok" }').ast;
|
||||
const tampered = {
|
||||
...ast,
|
||||
root: {
|
||||
kind: 'object' as const,
|
||||
entries: [
|
||||
{
|
||||
key: 'x',
|
||||
line: 1,
|
||||
value: { kind: 'string' as const, value: REDACTED_SENTINEL },
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
expect(() => emitJsonc(tampered, { mode: 'render' })).toThrow(
|
||||
OcEmitSentinelError,
|
||||
);
|
||||
});
|
||||
|
||||
it('throws when a leaf string EMBEDS the sentinel (prefix/suffix wrap)', () => {
|
||||
// Regression: prior to this fix, render mode used `value.value === SENTINEL`
|
||||
// (exact match), so `prefix__OPENCLAW_REDACTED__suffix` slipped through.
|
||||
// The roundtrip path always used `.includes()` for the same reason —
|
||||
// render must too. Catches the sentinel-guard bypass class.
|
||||
const ast = parseJsonc('{ "x": "ok" }').ast;
|
||||
const tampered = {
|
||||
...ast,
|
||||
root: {
|
||||
kind: 'object' as const,
|
||||
entries: [
|
||||
{
|
||||
key: 'x',
|
||||
line: 1,
|
||||
value: {
|
||||
kind: 'string' as const,
|
||||
value: `prefix-${REDACTED_SENTINEL}-suffix`,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
expect(() => emitJsonc(tampered, { mode: 'render' })).toThrow(
|
||||
OcEmitSentinelError,
|
||||
);
|
||||
});
|
||||
|
||||
it('renders empty AST as empty string', () => {
|
||||
const { ast } = parseJsonc('');
|
||||
expect(emitJsonc(ast, { mode: 'render' })).toBe('');
|
||||
});
|
||||
});
|
||||
144
src/oc-path/tests/jsonc/parse.test.ts
Normal file
144
src/oc-path/tests/jsonc/parse.test.ts
Normal file
@@ -0,0 +1,144 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { parseJsonc } from '../../jsonc/parse.js';
|
||||
|
||||
describe('parseJsonc — basic shapes', () => {
|
||||
it('parses an empty object', () => {
|
||||
const { ast, diagnostics } = parseJsonc('{}');
|
||||
expect(diagnostics).toEqual([]);
|
||||
expect(ast.kind).toBe('jsonc');
|
||||
expect(ast.root).toEqual({ kind: 'object', entries: [], line: 1 });
|
||||
});
|
||||
|
||||
it('parses an empty array', () => {
|
||||
const { ast, diagnostics } = parseJsonc('[]');
|
||||
expect(diagnostics).toEqual([]);
|
||||
expect(ast.root).toEqual({ kind: 'array', items: [], line: 1 });
|
||||
});
|
||||
|
||||
it('parses an empty input as null root', () => {
|
||||
const { ast, diagnostics } = parseJsonc('');
|
||||
expect(diagnostics).toEqual([]);
|
||||
expect(ast.root).toBeNull();
|
||||
});
|
||||
|
||||
it('parses scalars', () => {
|
||||
expect(parseJsonc('42').ast.root).toEqual({ kind: 'number', value: 42, line: 1 });
|
||||
expect(parseJsonc('-3.14').ast.root).toEqual({ kind: 'number', value: -3.14, line: 1 });
|
||||
expect(parseJsonc('1e3').ast.root).toEqual({ kind: 'number', value: 1000, line: 1 });
|
||||
expect(parseJsonc('"hello"').ast.root).toEqual({ kind: 'string', value: 'hello', line: 1 });
|
||||
expect(parseJsonc('true').ast.root).toEqual({ kind: 'boolean', value: true, line: 1 });
|
||||
expect(parseJsonc('false').ast.root).toEqual({ kind: 'boolean', value: false, line: 1 });
|
||||
expect(parseJsonc('null').ast.root).toEqual({ kind: 'null', line: 1 });
|
||||
});
|
||||
|
||||
it('parses nested object/array', () => {
|
||||
const raw = '{ "plugins": { "entries": ["a", "b"] } }';
|
||||
const { ast, diagnostics } = parseJsonc(raw);
|
||||
expect(diagnostics).toEqual([]);
|
||||
expect(ast.root).toEqual({
|
||||
kind: 'object',
|
||||
line: 1,
|
||||
entries: [
|
||||
{
|
||||
key: 'plugins',
|
||||
line: 1,
|
||||
value: {
|
||||
kind: 'object',
|
||||
line: 1,
|
||||
entries: [
|
||||
{
|
||||
key: 'entries',
|
||||
line: 1,
|
||||
value: {
|
||||
kind: 'array',
|
||||
line: 1,
|
||||
items: [
|
||||
{ kind: 'string', value: 'a', line: 1 },
|
||||
{ kind: 'string', value: 'b', line: 1 },
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('preserves raw on the AST root for byte-fidelity emit', () => {
|
||||
const raw = '{\n "x": 1\n}\n';
|
||||
const { ast } = parseJsonc(raw);
|
||||
expect(ast.raw).toBe(raw);
|
||||
});
|
||||
});
|
||||
|
||||
describe('parseJsonc — JSONC extensions', () => {
|
||||
it('skips line comments', () => {
|
||||
const raw = `{
|
||||
// comment
|
||||
"x": 1 // trailing comment
|
||||
}`;
|
||||
const { ast, diagnostics } = parseJsonc(raw);
|
||||
expect(diagnostics).toEqual([]);
|
||||
expect(ast.root).toEqual({
|
||||
kind: 'object',
|
||||
line: 1,
|
||||
entries: [{ key: 'x', value: { kind: 'number', value: 1, line: 3 }, line: 3 }],
|
||||
});
|
||||
});
|
||||
|
||||
it('skips block comments', () => {
|
||||
const raw = '{ /* hi */ "x": /* mid */ 1 }';
|
||||
const { ast, diagnostics } = parseJsonc(raw);
|
||||
expect(diagnostics).toEqual([]);
|
||||
expect(ast.root).toEqual({
|
||||
kind: 'object',
|
||||
line: 1,
|
||||
entries: [{ key: 'x', value: { kind: 'number', value: 1, line: 1 }, line: 1 }],
|
||||
});
|
||||
});
|
||||
|
||||
it('tolerates trailing commas in objects', () => {
|
||||
const { ast, diagnostics } = parseJsonc('{ "x": 1, }');
|
||||
expect(diagnostics).toEqual([]);
|
||||
expect(ast.root).toEqual({
|
||||
kind: 'object',
|
||||
line: 1,
|
||||
entries: [{ key: 'x', value: { kind: 'number', value: 1, line: 1 }, line: 1 }],
|
||||
});
|
||||
});
|
||||
|
||||
it('tolerates trailing commas in arrays', () => {
|
||||
const { ast } = parseJsonc('[1, 2, 3,]');
|
||||
expect(ast.root).toEqual({
|
||||
kind: 'array',
|
||||
line: 1,
|
||||
items: [
|
||||
{ kind: 'number', value: 1, line: 1 },
|
||||
{ kind: 'number', value: 2, line: 1 },
|
||||
{ kind: 'number', value: 3, line: 1 },
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('handles escape sequences in strings', () => {
|
||||
const { ast } = parseJsonc('"a\\nb\\tc\\u0041"');
|
||||
expect(ast.root).toEqual({ kind: 'string', value: 'a\nb\tcA', line: 1 });
|
||||
});
|
||||
});
|
||||
|
||||
describe('parseJsonc — soft errors', () => {
|
||||
it('returns null root + error diagnostic on unrecoverable input', () => {
|
||||
const { ast, diagnostics } = parseJsonc('{ "x" 1 }');
|
||||
expect(ast.root).toBeNull();
|
||||
expect(diagnostics).toHaveLength(1);
|
||||
expect(diagnostics[0]?.severity).toBe('error');
|
||||
});
|
||||
|
||||
it('warns on trailing input after a valid value', () => {
|
||||
const { diagnostics } = parseJsonc('1 garbage');
|
||||
expect(diagnostics).toHaveLength(1);
|
||||
expect(diagnostics[0]?.severity).toBe('warning');
|
||||
expect(diagnostics[0]?.code).toBe('OC_JSONC_TRAILING_INPUT');
|
||||
});
|
||||
});
|
||||
76
src/oc-path/tests/jsonc/resolve.test.ts
Normal file
76
src/oc-path/tests/jsonc/resolve.test.ts
Normal file
@@ -0,0 +1,76 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { parseJsonc } from '../../jsonc/parse.js';
|
||||
import { resolveJsoncOcPath } from '../../jsonc/resolve.js';
|
||||
import { parseOcPath } from '../../oc-path.js';
|
||||
|
||||
function rs(raw: string, ocPath: string) {
|
||||
const { ast } = parseJsonc(raw);
|
||||
const path = parseOcPath(ocPath);
|
||||
return resolveJsoncOcPath(ast, path);
|
||||
}
|
||||
|
||||
describe('resolveJsoncOcPath', () => {
|
||||
const config = `{
|
||||
"plugins": {
|
||||
"entries": {
|
||||
"github": {
|
||||
"token": "secret",
|
||||
"enabled": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"limits": [10, 20, 30]
|
||||
}`;
|
||||
|
||||
it('resolves the root when no segments are given', () => {
|
||||
const m = rs(config, 'oc://config');
|
||||
expect(m?.kind).toBe('root');
|
||||
});
|
||||
|
||||
it('walks dotted section paths', () => {
|
||||
const m = rs(config, 'oc://config/plugins.entries.github.token');
|
||||
expect(m?.kind).toBe('object-entry');
|
||||
if (m?.kind === 'object-entry') {
|
||||
expect(m.node.key).toBe('token');
|
||||
expect(m.node.value).toMatchObject({ kind: 'string', value: 'secret' });
|
||||
}
|
||||
});
|
||||
|
||||
it('walks 4-segment slash paths up to OcPath depth limit', () => {
|
||||
const m = rs(config, 'oc://config/plugins/entries/github');
|
||||
expect(m?.kind).toBe('object-entry');
|
||||
if (m?.kind === 'object-entry') {
|
||||
expect(m.node.key).toBe('github');
|
||||
}
|
||||
});
|
||||
|
||||
it('walks mixed dotted+slash paths', () => {
|
||||
const m = rs(config, 'oc://config/plugins/entries.github.token');
|
||||
expect(m?.kind).toBe('object-entry');
|
||||
});
|
||||
|
||||
it('indexes into arrays via numeric segments', () => {
|
||||
const m = rs(config, 'oc://config/limits.1');
|
||||
expect(m?.kind).toBe('value');
|
||||
if (m?.kind === 'value') {
|
||||
expect(m.node).toMatchObject({ kind: 'number', value: 20 });
|
||||
}
|
||||
});
|
||||
|
||||
it('returns null for missing keys', () => {
|
||||
expect(rs(config, 'oc://config/plugins.entries.gitlab')).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null for out-of-bounds array indexes', () => {
|
||||
expect(rs(config, 'oc://config/limits.99')).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null when descending past a primitive', () => {
|
||||
expect(rs(config, 'oc://config/plugins.entries.github.token.x')).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null on empty AST', () => {
|
||||
const { ast } = parseJsonc('');
|
||||
expect(resolveJsoncOcPath(ast, parseOcPath('oc://config/x'))).toBeNull();
|
||||
});
|
||||
});
|
||||
242
src/oc-path/tests/jsonl/edit.test.ts
Normal file
242
src/oc-path/tests/jsonl/edit.test.ts
Normal file
@@ -0,0 +1,242 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
appendJsonlOcPath,
|
||||
setJsonlOcPath,
|
||||
} from '../../jsonl/edit.js';
|
||||
import { emitJsonl } from '../../jsonl/emit.js';
|
||||
import { parseJsonl } from '../../jsonl/parse.js';
|
||||
import { parseOcPath } from '../../oc-path.js';
|
||||
|
||||
describe('setJsonlOcPath — value replacement', () => {
|
||||
const log = '{"event":"start"}\n{"event":"step","n":1}\n{"event":"end"}\n';
|
||||
|
||||
it('replaces a field on a specific line', () => {
|
||||
const { ast } = parseJsonl(log);
|
||||
const r = setJsonlOcPath(ast, parseOcPath('oc://session-events/L2/n'), {
|
||||
kind: 'number',
|
||||
value: 42,
|
||||
});
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
const lines = emitJsonl(r.ast).split('\n');
|
||||
expect(JSON.parse(lines[1] ?? '')).toEqual({ event: 'step', n: 42 });
|
||||
}
|
||||
});
|
||||
|
||||
it('replaces an entire line value', () => {
|
||||
const { ast } = parseJsonl(log);
|
||||
const r = setJsonlOcPath(ast, parseOcPath('oc://session-events/L2'), {
|
||||
kind: 'object',
|
||||
entries: [
|
||||
{ key: 'event', line: 0, value: { kind: 'string', value: 'replaced' } },
|
||||
],
|
||||
});
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
const lines = emitJsonl(r.ast).split('\n');
|
||||
expect(JSON.parse(lines[1] ?? '')).toEqual({ event: 'replaced' });
|
||||
}
|
||||
});
|
||||
|
||||
it('resolves $last and edits the most recent value line', () => {
|
||||
const { ast } = parseJsonl(log);
|
||||
const r = setJsonlOcPath(ast, parseOcPath('oc://session-events/$last/event'), {
|
||||
kind: 'string',
|
||||
value: 'final',
|
||||
});
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
const lines = emitJsonl(r.ast).split('\n');
|
||||
expect(JSON.parse(lines[2] ?? '')).toEqual({ event: 'final' });
|
||||
}
|
||||
});
|
||||
|
||||
it('reports unresolved for unknown line addresses', () => {
|
||||
const { ast } = parseJsonl(log);
|
||||
const r = setJsonlOcPath(ast, parseOcPath('oc://session-events/L99/x'), {
|
||||
kind: 'number',
|
||||
value: 1,
|
||||
});
|
||||
expect(r).toEqual({ ok: false, reason: 'unresolved' });
|
||||
});
|
||||
|
||||
it('reports not-a-value-line when targeting a blank line', () => {
|
||||
const { ast } = parseJsonl('{"a":1}\n\n{"b":2}\n');
|
||||
const r = setJsonlOcPath(ast, parseOcPath('oc://session-events/L2'), {
|
||||
kind: 'number',
|
||||
value: 1,
|
||||
});
|
||||
expect(r).toEqual({ ok: false, reason: 'not-a-value-line' });
|
||||
});
|
||||
});
|
||||
|
||||
describe('appendJsonlOcPath — session checkpointing primitive', () => {
|
||||
it('appends to an empty file', () => {
|
||||
const { ast } = parseJsonl('');
|
||||
const next = appendJsonlOcPath(ast, {
|
||||
kind: 'object',
|
||||
entries: [{ key: 'event', line: 0, value: { kind: 'string', value: 'start' } }],
|
||||
});
|
||||
expect(emitJsonl(next)).toBe('{"event":"start"}');
|
||||
});
|
||||
|
||||
it('appends to an existing log preserving prior lines', () => {
|
||||
const { ast } = parseJsonl('{"a":1}\n');
|
||||
const next = appendJsonlOcPath(ast, {
|
||||
kind: 'object',
|
||||
entries: [{ key: 'b', line: 0, value: { kind: 'number', value: 2 } }],
|
||||
});
|
||||
const out = emitJsonl(next).split('\n');
|
||||
expect(out).toHaveLength(2);
|
||||
expect(JSON.parse(out[1] ?? '')).toEqual({ b: 2 });
|
||||
});
|
||||
});
|
||||
|
||||
describe('setJsonlOcPath — line-address positional tokens (resolve↔edit symmetry)', () => {
|
||||
// Line-address slot must accept every token shape pickLine accepts
|
||||
// (resolve.ts and find.ts already do). Without `$first` and `-N` here,
|
||||
// a path that reads under those tokens silently unresolves on write.
|
||||
const log = '{"event":"start","n":1}\n{"event":"step","n":2}\n{"event":"end","n":3}\n';
|
||||
|
||||
it('writes under $first line address', () => {
|
||||
const { ast } = parseJsonl(log);
|
||||
const r = setJsonlOcPath(ast, parseOcPath('oc://session-events/$first/n'), {
|
||||
kind: 'number',
|
||||
value: 99,
|
||||
});
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
const lines = emitJsonl(r.ast).split('\n');
|
||||
expect(JSON.parse(lines[0] ?? '')).toEqual({ event: 'start', n: 99 });
|
||||
}
|
||||
});
|
||||
|
||||
it('writes under -1 line address (alias for last value line)', () => {
|
||||
const { ast } = parseJsonl(log);
|
||||
const r = setJsonlOcPath(ast, parseOcPath('oc://session-events/-1/n'), {
|
||||
kind: 'number',
|
||||
value: 99,
|
||||
});
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
const lines = emitJsonl(r.ast).split('\n');
|
||||
expect(JSON.parse(lines[2] ?? '')).toEqual({ event: 'end', n: 99 });
|
||||
}
|
||||
});
|
||||
|
||||
it('writes under -2 line address (penultimate value line)', () => {
|
||||
const { ast } = parseJsonl(log);
|
||||
const r = setJsonlOcPath(ast, parseOcPath('oc://session-events/-2/n'), {
|
||||
kind: 'number',
|
||||
value: 99,
|
||||
});
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
const lines = emitJsonl(r.ast).split('\n');
|
||||
expect(JSON.parse(lines[1] ?? '')).toEqual({ event: 'step', n: 99 });
|
||||
}
|
||||
});
|
||||
|
||||
it('reports unresolved for $first against an empty log', () => {
|
||||
const { ast } = parseJsonl('');
|
||||
const r = setJsonlOcPath(ast, parseOcPath('oc://session-events/$first/n'), {
|
||||
kind: 'number',
|
||||
value: 99,
|
||||
});
|
||||
expect(r).toEqual({ ok: false, reason: 'unresolved' });
|
||||
});
|
||||
|
||||
it('reports unresolved for -99 (out-of-range) line address', () => {
|
||||
const { ast } = parseJsonl(log);
|
||||
const r = setJsonlOcPath(ast, parseOcPath('oc://session-events/-99/n'), {
|
||||
kind: 'number',
|
||||
value: 99,
|
||||
});
|
||||
expect(r).toEqual({ ok: false, reason: 'unresolved' });
|
||||
});
|
||||
});
|
||||
|
||||
describe('setJsonlOcPath — positional field tokens (round-11 resolve↔edit symmetry)', () => {
|
||||
// ClawSweeper round-11 P2 — JSONL line-address `$last` already
|
||||
// resolved (pickLineIndex), but positional tokens INSIDE a line's
|
||||
// structural body (item / field) were not. Pin the in-line edit
|
||||
// path: a `$first` / `$last` / `-N` field-segment must reach the
|
||||
// same child as resolveJsonlOcPath.
|
||||
const log = '{"items":[10,20,30],"events":{"a":1,"b":2}}\n';
|
||||
|
||||
it('edits the first array item on a line via $first', () => {
|
||||
const { ast } = parseJsonl(log);
|
||||
const r = setJsonlOcPath(
|
||||
ast,
|
||||
parseOcPath('oc://session-events/L1/items/$first'),
|
||||
{ kind: 'number', value: 99 },
|
||||
);
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
const firstLine = emitJsonl(r.ast).split('\n').find((l) => l.length > 0) ?? '';
|
||||
expect(JSON.parse(firstLine)).toEqual({
|
||||
items: [99, 20, 30],
|
||||
events: { a: 1, b: 2 },
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
it('edits the last array item on a line via $last', () => {
|
||||
const { ast } = parseJsonl(log);
|
||||
const r = setJsonlOcPath(
|
||||
ast,
|
||||
parseOcPath('oc://session-events/L1/items/$last'),
|
||||
{ kind: 'number', value: 99 },
|
||||
);
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
const firstLine = emitJsonl(r.ast).split('\n').find((l) => l.length > 0) ?? '';
|
||||
expect(JSON.parse(firstLine)).toEqual({
|
||||
items: [10, 20, 99],
|
||||
events: { a: 1, b: 2 },
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
it('edits the first object entry on a line via $first', () => {
|
||||
const { ast } = parseJsonl(log);
|
||||
const r = setJsonlOcPath(
|
||||
ast,
|
||||
parseOcPath('oc://session-events/L1/events/$first'),
|
||||
{ kind: 'number', value: 99 },
|
||||
);
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
const firstLine = emitJsonl(r.ast).split('\n').find((l) => l.length > 0) ?? '';
|
||||
expect(JSON.parse(firstLine)).toEqual({
|
||||
items: [10, 20, 30],
|
||||
events: { a: 99, b: 2 },
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('setJsonlOcPath — quoted field segments (regression: resolve↔edit symmetry)', () => {
|
||||
it('edits a field key containing a slash via quoted segment', () => {
|
||||
// Closes ClawSweeper P2 on PR #78678: JSONL resolve unquotes
|
||||
// bracket-aware segments but the edit path used plain
|
||||
// `.split('.')`. A path that resolves under `Lnnn` MUST be
|
||||
// editable through the same address.
|
||||
const raw = `{"event":"start","detail":{"github/repo":"old"}}\n`;
|
||||
const { ast } = parseJsonl(raw);
|
||||
const r = setJsonlOcPath(
|
||||
ast,
|
||||
parseOcPath('oc://x.jsonl/L1/detail/"github/repo"'),
|
||||
{ kind: 'string', value: 'new' },
|
||||
);
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
const lines = emitJsonl(r.ast).split('\n').filter((l) => l.length > 0);
|
||||
expect(lines).toHaveLength(1);
|
||||
expect(JSON.parse(lines[0] ?? '')).toEqual({
|
||||
event: 'start',
|
||||
detail: { 'github/repo': 'new' },
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
101
src/oc-path/tests/jsonl/emit.test.ts
Normal file
101
src/oc-path/tests/jsonl/emit.test.ts
Normal file
@@ -0,0 +1,101 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { emitJsonl } from '../../jsonl/emit.js';
|
||||
import { parseJsonl } from '../../jsonl/parse.js';
|
||||
import {
|
||||
OcEmitSentinelError,
|
||||
REDACTED_SENTINEL,
|
||||
} from '../../sentinel.js';
|
||||
|
||||
describe('emitJsonl — round-trip', () => {
|
||||
it('returns raw bytes verbatim by default', () => {
|
||||
const raw = '{"a":1}\n\n{"b":2}\nthis is malformed\n';
|
||||
const { ast } = parseJsonl(raw);
|
||||
expect(emitJsonl(ast)).toBe(raw);
|
||||
});
|
||||
|
||||
it('echoes pre-existing sentinel bytes by default; strict mode rejects', () => {
|
||||
const raw = `{"a":"${REDACTED_SENTINEL}"}\n`;
|
||||
const { ast } = parseJsonl(raw);
|
||||
expect(emitJsonl(ast)).toBe(raw);
|
||||
expect(() =>
|
||||
emitJsonl(ast, {
|
||||
fileNameForGuard: 'session-events',
|
||||
acceptPreExistingSentinel: false,
|
||||
}),
|
||||
).toThrow(OcEmitSentinelError);
|
||||
});
|
||||
});
|
||||
|
||||
describe('emitJsonl — render mode', () => {
|
||||
it('rebuilds value lines via JSON-stringify', () => {
|
||||
const { ast } = parseJsonl('{"a":1}\n{"b":2}\n');
|
||||
const out = emitJsonl(ast, { mode: 'render' });
|
||||
expect(out.split('\n')).toEqual(['{"a":1}', '{"b":2}']);
|
||||
});
|
||||
|
||||
it('preserves blank and malformed lines verbatim in render mode', () => {
|
||||
const { ast } = parseJsonl('{"a":1}\n\nbroken\n{"b":2}\n');
|
||||
const out = emitJsonl(ast, { mode: 'render' });
|
||||
expect(out.split('\n')).toEqual(['{"a":1}', '', 'broken', '{"b":2}']);
|
||||
});
|
||||
|
||||
it('throws when a value-leaf is the sentinel under render mode', () => {
|
||||
const ast = parseJsonl('{"a":"ok"}\n').ast;
|
||||
const tampered = {
|
||||
...ast,
|
||||
lines: [
|
||||
{
|
||||
kind: 'value' as const,
|
||||
line: 1,
|
||||
raw: '{"a":"ok"}',
|
||||
value: {
|
||||
kind: 'object' as const,
|
||||
entries: [
|
||||
{
|
||||
key: 'a',
|
||||
line: 1,
|
||||
value: { kind: 'string' as const, value: REDACTED_SENTINEL },
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
expect(() => emitJsonl(tampered, { mode: 'render' })).toThrow(
|
||||
OcEmitSentinelError,
|
||||
);
|
||||
});
|
||||
|
||||
it('throws when a value-leaf EMBEDS the sentinel (prefix/suffix wrap)', () => {
|
||||
// Regression: prior to this fix, render mode used exact-match
|
||||
// (`value.value === SENTINEL`), so `prefix__OPENCLAW_REDACTED__suffix`
|
||||
// slipped through. The contains-check is the right invariant.
|
||||
const ast = parseJsonl('{"a":"ok"}\n').ast;
|
||||
const tampered = {
|
||||
...ast,
|
||||
lines: [
|
||||
{
|
||||
kind: 'value' as const,
|
||||
line: 1,
|
||||
raw: '{"a":"ok"}',
|
||||
value: {
|
||||
kind: 'object' as const,
|
||||
entries: [
|
||||
{
|
||||
key: 'a',
|
||||
line: 1,
|
||||
value: {
|
||||
kind: 'string' as const,
|
||||
value: `wrap-${REDACTED_SENTINEL}-end`,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
expect(() => emitJsonl(tampered, { mode: 'render' })).toThrow(
|
||||
OcEmitSentinelError,
|
||||
);
|
||||
});
|
||||
});
|
||||
43
src/oc-path/tests/jsonl/parse.test.ts
Normal file
43
src/oc-path/tests/jsonl/parse.test.ts
Normal file
@@ -0,0 +1,43 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { parseJsonl } from '../../jsonl/parse.js';
|
||||
|
||||
describe('parseJsonl', () => {
|
||||
it('parses an empty file as zero lines', () => {
|
||||
const { ast, diagnostics } = parseJsonl('');
|
||||
expect(diagnostics).toEqual([]);
|
||||
expect(ast.lines).toEqual([]);
|
||||
});
|
||||
|
||||
it('parses each line as a JSON value', () => {
|
||||
const raw = `{"event":"start"}
|
||||
{"event":"step","n":1}
|
||||
{"event":"end"}
|
||||
`;
|
||||
const { ast, diagnostics } = parseJsonl(raw);
|
||||
expect(diagnostics).toEqual([]);
|
||||
expect(ast.lines).toHaveLength(3);
|
||||
expect(ast.lines[0]?.kind).toBe('value');
|
||||
expect(ast.lines[2]?.kind).toBe('value');
|
||||
});
|
||||
|
||||
it('preserves blank lines as blank entries', () => {
|
||||
const raw = '{"a":1}\n\n{"b":2}\n';
|
||||
const { ast, diagnostics } = parseJsonl(raw);
|
||||
expect(diagnostics).toEqual([]);
|
||||
expect(ast.lines.map((l) => l.kind)).toEqual(['value', 'blank', 'value']);
|
||||
});
|
||||
|
||||
it('flags malformed lines as warnings without aborting', () => {
|
||||
const raw = '{"a":1}\nthis is not json\n{"b":2}\n';
|
||||
const { ast, diagnostics } = parseJsonl(raw);
|
||||
expect(ast.lines.map((l) => l.kind)).toEqual(['value', 'malformed', 'value']);
|
||||
expect(diagnostics).toHaveLength(1);
|
||||
expect(diagnostics[0]?.code).toBe('OC_JSONL_LINE_MALFORMED');
|
||||
});
|
||||
|
||||
it('preserves raw on the AST root for byte-fidelity emit', () => {
|
||||
const raw = '{"a":1}\n{"b":2}\n';
|
||||
const { ast } = parseJsonl(raw);
|
||||
expect(ast.raw).toBe(raw);
|
||||
});
|
||||
});
|
||||
99
src/oc-path/tests/jsonl/resolve.test.ts
Normal file
99
src/oc-path/tests/jsonl/resolve.test.ts
Normal file
@@ -0,0 +1,99 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { parseJsonl } from '../../jsonl/parse.js';
|
||||
import { resolveJsonlOcPath } from '../../jsonl/resolve.js';
|
||||
import { parseOcPath } from '../../oc-path.js';
|
||||
import { resolveOcPath } from '../../universal.js';
|
||||
import { findOcPaths } from '../../find.js';
|
||||
|
||||
const log = `{"event":"start","ts":1}
|
||||
{"event":"step","n":1,"result":{"ok":true,"detail":"a"}}
|
||||
|
||||
{"event":"end","ts":99}
|
||||
`;
|
||||
|
||||
function rs(ocPath: string) {
|
||||
const { ast } = parseJsonl(log);
|
||||
return resolveJsonlOcPath(ast, parseOcPath(ocPath));
|
||||
}
|
||||
|
||||
describe('resolveJsonlOcPath', () => {
|
||||
it('returns root when no segments are given', () => {
|
||||
expect(rs('oc://session-events')?.kind).toBe('root');
|
||||
});
|
||||
|
||||
it('addresses an entire line by line number', () => {
|
||||
const m = rs('oc://session-events/L1');
|
||||
expect(m?.kind).toBe('line');
|
||||
});
|
||||
|
||||
it('addresses fields under a line via item segment', () => {
|
||||
const m = rs('oc://session-events/L2/event');
|
||||
expect(m?.kind).toBe('object-entry');
|
||||
if (m?.kind === 'object-entry') {
|
||||
expect(m.node.value).toMatchObject({ kind: 'string', value: 'step' });
|
||||
}
|
||||
});
|
||||
|
||||
it('descends via dotted item paths', () => {
|
||||
const m = rs('oc://session-events/L2/result.ok');
|
||||
expect(m?.kind).toBe('object-entry');
|
||||
if (m?.kind === 'object-entry') {
|
||||
expect(m.node.value).toMatchObject({ kind: 'boolean', value: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('resolves $last to the most recent value line', () => {
|
||||
const m = rs('oc://session-events/$last/event');
|
||||
expect(m?.kind).toBe('object-entry');
|
||||
if (m?.kind === 'object-entry') {
|
||||
expect(m.node.value).toMatchObject({ kind: 'string', value: 'end' });
|
||||
}
|
||||
});
|
||||
|
||||
it('returns null for unknown line addresses', () => {
|
||||
expect(rs('oc://session-events/L99')).toBeNull();
|
||||
expect(rs('oc://session-events/garbage')).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null when descending into a blank line', () => {
|
||||
expect(rs('oc://session-events/L3/anything')).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('resolveJsonlToUniversal — file-relative line metadata (regression)', () => {
|
||||
// Regression: surfaced via the openclaw-path CLI scenario run on
|
||||
// a multi-line session.jsonl. Every match returned `line: 1`
|
||||
// because the inside-line jsonc parser numbers from 1 within each
|
||||
// line's bytes; the universal resolve was preferring that local
|
||||
// number over the JsonlLine's file-relative line.
|
||||
|
||||
const log = [
|
||||
'{"event":"start"}', // line 1
|
||||
'{"event":"step","n":1}', // line 2
|
||||
'{"event":"step","n":2}', // line 3
|
||||
'{"event":"end"}', // line 4
|
||||
'', // line 5 (blank)
|
||||
].join('\n');
|
||||
|
||||
it('resolves L2/event with line=2 (not 1)', () => {
|
||||
const { ast } = parseJsonl(log);
|
||||
const m = resolveOcPath(ast, parseOcPath('oc://session.jsonl/L2/event'));
|
||||
expect(m).not.toBeNull();
|
||||
if (m !== null) {expect(m.line).toBe(2);}
|
||||
});
|
||||
|
||||
it('resolves L4/event with line=4', () => {
|
||||
const { ast } = parseJsonl(log);
|
||||
const m = resolveOcPath(ast, parseOcPath('oc://session.jsonl/L4/event'));
|
||||
expect(m).not.toBeNull();
|
||||
if (m !== null) {expect(m.line).toBe(4);}
|
||||
});
|
||||
|
||||
it('findOcPaths over wildcard surfaces correct file-relative lines', () => {
|
||||
const { ast } = parseJsonl(log);
|
||||
const matches = findOcPaths(ast, parseOcPath('oc://session.jsonl/*/event'));
|
||||
expect(matches).toHaveLength(4);
|
||||
const lines = matches.map((m) => m.match.line);
|
||||
expect(lines).toEqual([1, 2, 3, 4]);
|
||||
});
|
||||
});
|
||||
136
src/oc-path/tests/oc-path.test.ts
Normal file
136
src/oc-path/tests/oc-path.test.ts
Normal file
@@ -0,0 +1,136 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
OcPathError,
|
||||
formatOcPath,
|
||||
isValidOcPath,
|
||||
parseOcPath,
|
||||
} from '../oc-path.js';
|
||||
|
||||
describe('parseOcPath', () => {
|
||||
it('parses file-only path', () => {
|
||||
expect(parseOcPath('oc://SOUL.md')).toEqual({ file: 'SOUL.md' });
|
||||
});
|
||||
|
||||
it('parses file + section', () => {
|
||||
expect(parseOcPath('oc://SOUL.md/Boundaries')).toEqual({
|
||||
file: 'SOUL.md',
|
||||
section: 'Boundaries',
|
||||
});
|
||||
});
|
||||
|
||||
it('parses file + section + item', () => {
|
||||
expect(parseOcPath('oc://SOUL.md/Boundaries/deny-rule-1')).toEqual({
|
||||
file: 'SOUL.md',
|
||||
section: 'Boundaries',
|
||||
item: 'deny-rule-1',
|
||||
});
|
||||
});
|
||||
|
||||
it('parses file + section + item + field', () => {
|
||||
expect(parseOcPath('oc://SOUL.md/Boundaries/deny-rule-1/risk')).toEqual({
|
||||
file: 'SOUL.md',
|
||||
section: 'Boundaries',
|
||||
item: 'deny-rule-1',
|
||||
field: 'risk',
|
||||
});
|
||||
});
|
||||
|
||||
it('parses session query', () => {
|
||||
expect(parseOcPath('oc://SOUL.md?session=daily-cron')).toEqual({
|
||||
file: 'SOUL.md',
|
||||
session: 'daily-cron',
|
||||
});
|
||||
});
|
||||
|
||||
it('rejects missing scheme', () => {
|
||||
expectOcPathError(() => parseOcPath('SOUL.md'), 'OC_PATH_MISSING_SCHEME');
|
||||
});
|
||||
|
||||
it('rejects empty path after scheme', () => {
|
||||
expectOcPathError(() => parseOcPath('oc://'), 'OC_PATH_EMPTY');
|
||||
});
|
||||
|
||||
it('rejects empty segment', () => {
|
||||
expectOcPathError(() => parseOcPath('oc://SOUL.md//deny-rule-1'), 'OC_PATH_EMPTY_SEGMENT');
|
||||
});
|
||||
|
||||
it('rejects too-deep nesting', () => {
|
||||
expectOcPathError(() => parseOcPath('oc://SOUL.md/a/b/c/d/e'), 'OC_PATH_TOO_DEEP');
|
||||
});
|
||||
|
||||
it('rejects non-string input', () => {
|
||||
expectOcPathError(() => parseOcPath(123 as unknown as string), 'OC_PATH_NOT_STRING');
|
||||
});
|
||||
});
|
||||
|
||||
function expectOcPathError(fn: () => unknown, expectedCode: string): void {
|
||||
try {
|
||||
fn();
|
||||
expect.fail(`expected OcPathError with code "${expectedCode}" but no error thrown`);
|
||||
} catch (err) {
|
||||
expect(err).toBeInstanceOf(OcPathError);
|
||||
expect((err as OcPathError).code).toBe(expectedCode);
|
||||
}
|
||||
}
|
||||
|
||||
describe('formatOcPath', () => {
|
||||
it('round-trips file-only', () => {
|
||||
expect(formatOcPath({ file: 'SOUL.md' })).toBe('oc://SOUL.md');
|
||||
});
|
||||
|
||||
it('round-trips full nesting', () => {
|
||||
expect(
|
||||
formatOcPath({
|
||||
file: 'SOUL.md',
|
||||
section: 'Boundaries',
|
||||
item: 'deny-rule-1',
|
||||
field: 'risk',
|
||||
}),
|
||||
).toBe('oc://SOUL.md/Boundaries/deny-rule-1/risk');
|
||||
});
|
||||
|
||||
it('round-trips session', () => {
|
||||
expect(formatOcPath({ file: 'SOUL.md', session: 'cron' })).toBe(
|
||||
'oc://SOUL.md?session=cron',
|
||||
);
|
||||
});
|
||||
|
||||
it('rejects empty file', () => {
|
||||
expectOcPathError(() => formatOcPath({ file: '' }), 'OC_PATH_FILE_REQUIRED');
|
||||
});
|
||||
|
||||
it('rejects item without section', () => {
|
||||
expectOcPathError(() => formatOcPath({ file: 'F.md', item: 'i' }), 'OC_PATH_NESTING');
|
||||
});
|
||||
});
|
||||
|
||||
describe('round-trip', () => {
|
||||
const cases = [
|
||||
'oc://SOUL.md',
|
||||
'oc://SOUL.md/Boundaries',
|
||||
'oc://SOUL.md/Boundaries/deny-rule-1',
|
||||
'oc://SOUL.md/Boundaries/deny-rule-1/risk',
|
||||
'oc://SOUL.md?session=daily',
|
||||
'oc://AGENTS.md/Tools/gh/risk',
|
||||
];
|
||||
for (const input of cases) {
|
||||
it(`formatOcPath(parseOcPath("${input}")) === "${input}"`, () => {
|
||||
expect(formatOcPath(parseOcPath(input))).toBe(input);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
describe('isValidOcPath', () => {
|
||||
it('returns true for valid paths', () => {
|
||||
expect(isValidOcPath('oc://SOUL.md')).toBe(true);
|
||||
expect(isValidOcPath('oc://SOUL.md/Boundaries')).toBe(true);
|
||||
});
|
||||
|
||||
it('returns false for invalid paths', () => {
|
||||
expect(isValidOcPath('SOUL.md')).toBe(false);
|
||||
expect(isValidOcPath('oc://')).toBe(false);
|
||||
expect(isValidOcPath(null)).toBe(false);
|
||||
expect(isValidOcPath(undefined)).toBe(false);
|
||||
expect(isValidOcPath(42)).toBe(false);
|
||||
});
|
||||
});
|
||||
203
src/oc-path/tests/parse.test.ts
Normal file
203
src/oc-path/tests/parse.test.ts
Normal file
@@ -0,0 +1,203 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { parseMd } from '../parse.js';
|
||||
|
||||
describe('parseMd — frontmatter', () => {
|
||||
it('parses simple frontmatter', () => {
|
||||
const raw = `---
|
||||
name: github
|
||||
description: gh CLI for issues, PRs, runs
|
||||
---
|
||||
|
||||
Body text.
|
||||
`;
|
||||
const { ast, diagnostics } = parseMd(raw);
|
||||
expect(diagnostics).toEqual([]);
|
||||
expect(ast.frontmatter).toEqual([
|
||||
{ key: 'name', value: 'github', line: 2 },
|
||||
{ key: 'description', value: 'gh CLI for issues, PRs, runs', line: 3 },
|
||||
]);
|
||||
});
|
||||
|
||||
it('handles no frontmatter', () => {
|
||||
const raw = `## First section\n\nContent.\n`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.frontmatter).toEqual([]);
|
||||
expect(ast.preamble).toBe('');
|
||||
expect(ast.blocks.length).toBe(1);
|
||||
});
|
||||
|
||||
it('emits diagnostic for unclosed frontmatter', () => {
|
||||
const raw = `---
|
||||
name: github
|
||||
description: never closes
|
||||
|
||||
Body.
|
||||
`;
|
||||
const { diagnostics } = parseMd(raw);
|
||||
expect(diagnostics).toContainEqual(
|
||||
expect.objectContaining({ code: 'OC_FRONTMATTER_UNCLOSED' }),
|
||||
);
|
||||
});
|
||||
|
||||
it('strips quotes from values', () => {
|
||||
const raw = `---
|
||||
title: "Hello world"
|
||||
hint: 'quoted'
|
||||
---
|
||||
`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.frontmatter[0]?.value).toBe('Hello world');
|
||||
expect(ast.frontmatter[1]?.value).toBe('quoted');
|
||||
});
|
||||
});
|
||||
|
||||
describe('parseMd — H2 blocks', () => {
|
||||
it('splits sections', () => {
|
||||
const raw = `Preamble text.
|
||||
|
||||
## First
|
||||
|
||||
Body of first.
|
||||
|
||||
## Second
|
||||
|
||||
Body of second.
|
||||
`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.preamble.trim()).toBe('Preamble text.');
|
||||
expect(ast.blocks.length).toBe(2);
|
||||
expect(ast.blocks[0]?.heading).toBe('First');
|
||||
expect(ast.blocks[0]?.slug).toBe('first');
|
||||
expect(ast.blocks[1]?.heading).toBe('Second');
|
||||
});
|
||||
|
||||
it('preserves line numbers (1-based)', () => {
|
||||
const raw = `Line 1
|
||||
## Heading at line 2
|
||||
Line 3
|
||||
`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks[0]?.line).toBe(2);
|
||||
});
|
||||
|
||||
it('does NOT split on `## ` inside fenced code blocks', () => {
|
||||
const raw = `## Real section
|
||||
|
||||
\`\`\`md
|
||||
## Not a heading
|
||||
content
|
||||
\`\`\`
|
||||
|
||||
## Another section
|
||||
`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks.map((b) => b.heading)).toEqual(['Real section', 'Another section']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('parseMd — items', () => {
|
||||
it('extracts plain bullet items', () => {
|
||||
const raw = `## Boundaries
|
||||
|
||||
- never write to /etc
|
||||
- always confirm before deleting
|
||||
`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks[0]?.items.length).toBe(2);
|
||||
expect(ast.blocks[0]?.items[0]?.text).toBe('never write to /etc');
|
||||
expect(ast.blocks[0]?.items[0]?.kv).toBeUndefined();
|
||||
});
|
||||
|
||||
it('extracts kv items', () => {
|
||||
const raw = `## Tools
|
||||
|
||||
- gh: GitHub CLI
|
||||
- curl: HTTP client
|
||||
`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks[0]?.items[0]?.kv).toEqual({ key: 'gh', value: 'GitHub CLI' });
|
||||
expect(ast.blocks[0]?.items[0]?.slug).toBe('gh');
|
||||
expect(ast.blocks[0]?.items[1]?.kv).toEqual({ key: 'curl', value: 'HTTP client' });
|
||||
});
|
||||
|
||||
it('does NOT extract bullets inside fenced code', () => {
|
||||
const raw = `## Section
|
||||
|
||||
\`\`\`
|
||||
- not a bullet
|
||||
\`\`\`
|
||||
|
||||
- real bullet
|
||||
`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks[0]?.items.length).toBe(1);
|
||||
expect(ast.blocks[0]?.items[0]?.text).toBe('real bullet');
|
||||
});
|
||||
});
|
||||
|
||||
describe('parseMd — tables', () => {
|
||||
it('extracts a simple table', () => {
|
||||
const raw = `## Tool Guidance
|
||||
|
||||
| tool | guidance |
|
||||
| --- | --- |
|
||||
| gh | use for GitHub |
|
||||
| curl | HTTP client |
|
||||
`;
|
||||
const { ast } = parseMd(raw);
|
||||
const table = ast.blocks[0]?.tables[0];
|
||||
expect(table).toBeDefined();
|
||||
expect(table?.headers).toEqual(['tool', 'guidance']);
|
||||
expect(table?.rows.length).toBe(2);
|
||||
expect(table?.rows[0]).toEqual(['gh', 'use for GitHub']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('parseMd — code blocks', () => {
|
||||
it('extracts a fenced code block', () => {
|
||||
const raw = `## Examples
|
||||
|
||||
\`\`\`ts
|
||||
const x = 1;
|
||||
\`\`\`
|
||||
`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks[0]?.codeBlocks[0]).toMatchObject({
|
||||
lang: 'ts',
|
||||
text: 'const x = 1;',
|
||||
});
|
||||
});
|
||||
|
||||
it('handles unlanguaged fences', () => {
|
||||
const raw = `## Block
|
||||
|
||||
\`\`\`
|
||||
plain text
|
||||
\`\`\`
|
||||
`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks[0]?.codeBlocks[0]?.lang).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('parseMd — byte-fidelity', () => {
|
||||
it('preserves raw on the AST', () => {
|
||||
const raw = `---\nname: x\n---\n\n## Sec\n\n- a\n- b\n`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.raw).toBe(raw);
|
||||
});
|
||||
|
||||
it('preserves BOM in raw but ignores it for parsing', () => {
|
||||
const raw = '## Heading\n';
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.raw).toBe(raw);
|
||||
expect(ast.blocks[0]?.heading).toBe('Heading');
|
||||
});
|
||||
|
||||
it('handles CRLF line endings', () => {
|
||||
const raw = '## Heading\r\n\r\n- item\r\n';
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks[0]?.heading).toBe('Heading');
|
||||
expect(ast.blocks[0]?.items[0]?.text).toBe('item');
|
||||
});
|
||||
});
|
||||
100
src/oc-path/tests/resolve.test.ts
Normal file
100
src/oc-path/tests/resolve.test.ts
Normal file
@@ -0,0 +1,100 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { parseMd } from '../parse.js';
|
||||
import { resolveMdOcPath as resolveOcPath } from '../resolve.js';
|
||||
|
||||
const SAMPLE = `---
|
||||
name: github
|
||||
description: gh CLI
|
||||
---
|
||||
|
||||
Preamble.
|
||||
|
||||
## Boundaries
|
||||
|
||||
- never write to /etc
|
||||
- deny: secrets
|
||||
|
||||
## Tools
|
||||
|
||||
- gh: GitHub CLI
|
||||
- curl: HTTP client
|
||||
`;
|
||||
|
||||
describe('resolveOcPath', () => {
|
||||
const { ast } = parseMd(SAMPLE);
|
||||
|
||||
it('resolves root', () => {
|
||||
const m = resolveOcPath(ast, { file: 'AGENTS.md' });
|
||||
expect(m?.kind).toBe('root');
|
||||
});
|
||||
|
||||
it('resolves block by slug', () => {
|
||||
const m = resolveOcPath(ast, { file: 'AGENTS.md', section: 'boundaries' });
|
||||
expect(m?.kind).toBe('block');
|
||||
if (m?.kind === 'block') {
|
||||
expect(m.node.heading).toBe('Boundaries');
|
||||
}
|
||||
});
|
||||
|
||||
it('resolves item by slug', () => {
|
||||
const m = resolveOcPath(ast, {
|
||||
file: 'AGENTS.md',
|
||||
section: 'tools',
|
||||
item: 'gh',
|
||||
});
|
||||
expect(m?.kind).toBe('item');
|
||||
if (m?.kind === 'item') {
|
||||
expect(m.node.kv?.value).toBe('GitHub CLI');
|
||||
expect(m.block.heading).toBe('Tools');
|
||||
}
|
||||
});
|
||||
|
||||
it('resolves item-field via kv', () => {
|
||||
const m = resolveOcPath(ast, {
|
||||
file: 'AGENTS.md',
|
||||
section: 'tools',
|
||||
item: 'gh',
|
||||
field: 'gh',
|
||||
});
|
||||
expect(m?.kind).toBe('item-field');
|
||||
if (m?.kind === 'item-field') {
|
||||
expect(m.value).toBe('GitHub CLI');
|
||||
}
|
||||
});
|
||||
|
||||
it('resolves frontmatter via [frontmatter] sentinel section', () => {
|
||||
const m = resolveOcPath(ast, {
|
||||
file: 'AGENTS.md',
|
||||
section: '[frontmatter]',
|
||||
field: 'name',
|
||||
});
|
||||
expect(m?.kind).toBe('frontmatter');
|
||||
if (m?.kind === 'frontmatter') {
|
||||
expect(m.node.value).toBe('github');
|
||||
}
|
||||
});
|
||||
|
||||
it('returns null for unknown section', () => {
|
||||
const m = resolveOcPath(ast, { file: 'AGENTS.md', section: 'nonexistent' });
|
||||
expect(m).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null for unknown item', () => {
|
||||
const m = resolveOcPath(ast, {
|
||||
file: 'AGENTS.md',
|
||||
section: 'tools',
|
||||
item: 'nonexistent',
|
||||
});
|
||||
expect(m).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null for field on non-kv item', () => {
|
||||
const m = resolveOcPath(ast, {
|
||||
file: 'AGENTS.md',
|
||||
section: 'boundaries',
|
||||
item: 'never-write-to-etc',
|
||||
field: 'risk',
|
||||
});
|
||||
expect(m).toBeNull();
|
||||
});
|
||||
});
|
||||
120
src/oc-path/tests/scenarios/append-multi-agent.test.ts
Normal file
120
src/oc-path/tests/scenarios/append-multi-agent.test.ts
Normal file
@@ -0,0 +1,120 @@
|
||||
/**
|
||||
* Wave 20 — JSONL append + multi-agent session sim.
|
||||
*
|
||||
* Substrate guarantee: `appendJsonlOcPath(ast, value)` returns a new AST
|
||||
* with the value appended as a new line. Single-writer model at the
|
||||
* substrate; concurrent-append safety lives in the LKG tracker layer
|
||||
* (PR-4) on top of git's three-way merge.
|
||||
*
|
||||
* Append for other kinds (jsonc array push, md item-to-section) was
|
||||
* removed from the substrate — those are domain operations that ride
|
||||
* on top of `setXxxOcPath` at the doctor / tracker layer, where the
|
||||
* value shapes are domain-defined.
|
||||
*/
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { emitJsonl } from '../../jsonl/emit.js';
|
||||
import { appendJsonlOcPath } from '../../jsonl/edit.js';
|
||||
import { parseJsonl } from '../../jsonl/parse.js';
|
||||
import type { JsoncValue } from '../../jsonc/ast.js';
|
||||
|
||||
function event(name: string, n: number): JsoncValue {
|
||||
return {
|
||||
kind: 'object',
|
||||
entries: [
|
||||
{ key: 'event', line: 0, value: { kind: 'string', value: name } },
|
||||
{ key: 'n', line: 0, value: { kind: 'number', value: n } },
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
describe('wave-20 jsonl append + multi-agent session sim', () => {
|
||||
it('A-01 single agent appends 100 events in order', () => {
|
||||
let ast = parseJsonl('').ast;
|
||||
for (let i = 0; i < 100; i++) {
|
||||
ast = appendJsonlOcPath(ast, event('step', i));
|
||||
}
|
||||
const lines = emitJsonl(ast).split('\n').filter((l) => l.length > 0);
|
||||
expect(lines).toHaveLength(100);
|
||||
expect(JSON.parse(lines[0] ?? '')).toEqual({ event: 'step', n: 0 });
|
||||
expect(JSON.parse(lines[99] ?? '')).toEqual({ event: 'step', n: 99 });
|
||||
});
|
||||
|
||||
it('A-02 two agents alternating appends preserve interleave order', () => {
|
||||
let ast = parseJsonl('').ast;
|
||||
for (let i = 0; i < 10; i++) {
|
||||
const agent = i % 2 === 0 ? 'a' : 'b';
|
||||
ast = appendJsonlOcPath(ast, event(agent, i));
|
||||
}
|
||||
const lines = emitJsonl(ast).split('\n').filter((l) => l.length > 0);
|
||||
expect(lines).toHaveLength(10);
|
||||
for (let i = 0; i < 10; i++) {
|
||||
const expected = i % 2 === 0 ? 'a' : 'b';
|
||||
expect(JSON.parse(lines[i] ?? '').event).toBe(expected);
|
||||
}
|
||||
});
|
||||
|
||||
it('A-03 append after a malformed line preserves both', () => {
|
||||
let ast = parseJsonl('{"a":1}\nbroken\n').ast;
|
||||
ast = appendJsonlOcPath(ast, event('start', 1));
|
||||
const out = emitJsonl(ast);
|
||||
expect(out).toContain('broken');
|
||||
expect(out).toContain('"event":"start"');
|
||||
});
|
||||
|
||||
it('A-04 append to empty file produces a single value line', () => {
|
||||
let ast = parseJsonl('').ast;
|
||||
ast = appendJsonlOcPath(ast, event('first', 0));
|
||||
const out = emitJsonl(ast);
|
||||
expect(JSON.parse(out)).toEqual({ event: 'first', n: 0 });
|
||||
});
|
||||
|
||||
it('A-05 append assigns line numbers monotonically', () => {
|
||||
let ast = parseJsonl('').ast;
|
||||
ast = appendJsonlOcPath(ast, event('a', 0));
|
||||
ast = appendJsonlOcPath(ast, event('b', 1));
|
||||
ast = appendJsonlOcPath(ast, event('c', 2));
|
||||
expect(ast.lines.map((l) => l.line)).toEqual([1, 2, 3]);
|
||||
});
|
||||
|
||||
it('A-06 append after blank lines preserves line-number gaps correctly', () => {
|
||||
let ast = parseJsonl('{"a":1}\n\n\n').ast;
|
||||
ast = appendJsonlOcPath(ast, event('after', 0));
|
||||
// Existing lines: L1 value, L2 blank, L3 blank. Appended line is L4.
|
||||
expect(ast.lines.length).toBe(4);
|
||||
expect(ast.lines[3]?.line).toBe(4);
|
||||
});
|
||||
|
||||
it('A-07 1000-event session sim is deterministic', () => {
|
||||
let ast = parseJsonl('').ast;
|
||||
for (let i = 0; i < 1000; i++) {
|
||||
ast = appendJsonlOcPath(ast, event('e', i));
|
||||
}
|
||||
const lines = emitJsonl(ast).split('\n').filter((l) => l.length > 0);
|
||||
expect(lines).toHaveLength(1000);
|
||||
expect(JSON.parse(lines[999] ?? '').n).toBe(999);
|
||||
});
|
||||
|
||||
it('A-08 append is non-mutating on the input AST', () => {
|
||||
const ast = parseJsonl('{"a":1}\n').ast;
|
||||
const before = JSON.stringify(ast);
|
||||
appendJsonlOcPath(ast, event('x', 0));
|
||||
expect(JSON.stringify(ast)).toBe(before);
|
||||
});
|
||||
|
||||
it('A-09 append preserves prior raw bytes (renders new tail)', () => {
|
||||
let ast = parseJsonl('{"a":1}\n').ast;
|
||||
ast = appendJsonlOcPath(ast, event('b', 1));
|
||||
const out = emitJsonl(ast);
|
||||
const lines = out.split('\n');
|
||||
// First line content unchanged.
|
||||
expect(lines[0]).toContain('"a":1');
|
||||
// Second line is the new event.
|
||||
expect(JSON.parse(lines[1] ?? '')).toEqual({ event: 'b', n: 1 });
|
||||
});
|
||||
|
||||
it('A-10 deterministic line-number assignment after malformed lines', () => {
|
||||
let ast = parseJsonl('{"a":1}\nbroken\n{"b":2}\n').ast;
|
||||
ast = appendJsonlOcPath(ast, event('c', 2));
|
||||
expect(ast.lines.map((l) => l.line)).toEqual([1, 2, 3, 4]);
|
||||
});
|
||||
});
|
||||
179
src/oc-path/tests/scenarios/byte-fidelity.test.ts
Normal file
179
src/oc-path/tests/scenarios/byte-fidelity.test.ts
Normal file
@@ -0,0 +1,179 @@
|
||||
/**
|
||||
* Wave 1 — byte-fidelity round-trip.
|
||||
*
|
||||
* Substrate guarantee: `emitMd(parse(raw), { mode: 'roundtrip' }) === raw`
|
||||
* for every input the parser accepts. This wave hammers that.
|
||||
*/
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { emitMd } from '../../emit.js';
|
||||
import { parseMd } from '../../parse.js';
|
||||
|
||||
function roundTrip(raw: string): string {
|
||||
const { ast } = parseMd(raw);
|
||||
return emitMd(ast);
|
||||
}
|
||||
|
||||
describe('wave-01 byte-fidelity', () => {
|
||||
it('B-01 empty file', () => {
|
||||
expect(roundTrip('')).toBe('');
|
||||
});
|
||||
|
||||
it('B-02 whitespace-only file', () => {
|
||||
expect(roundTrip(' \n\n \n')).toBe(' \n\n \n');
|
||||
});
|
||||
|
||||
it('B-03 single newline', () => {
|
||||
expect(roundTrip('\n')).toBe('\n');
|
||||
});
|
||||
|
||||
it('B-04 file without trailing newline', () => {
|
||||
expect(roundTrip('## H\n- item')).toBe('## H\n- item');
|
||||
});
|
||||
|
||||
it('B-05 file with trailing newline', () => {
|
||||
expect(roundTrip('## H\n- item\n')).toBe('## H\n- item\n');
|
||||
});
|
||||
|
||||
it('B-06 file with multiple trailing newlines', () => {
|
||||
expect(roundTrip('## H\n- item\n\n\n')).toBe('## H\n- item\n\n\n');
|
||||
});
|
||||
|
||||
it('B-07 BOM at start', () => {
|
||||
const raw = '## Heading\n- item\n';
|
||||
expect(roundTrip(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('B-08 CRLF line endings', () => {
|
||||
const raw = '## H\r\n\r\n- item\r\n';
|
||||
expect(roundTrip(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('B-09 mixed line endings (CRLF + LF)', () => {
|
||||
const raw = '## H\r\n- item\n- another\r\n';
|
||||
expect(roundTrip(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('B-10 tabs preserved in body', () => {
|
||||
const raw = '## H\n\n\tindented body\n';
|
||||
expect(roundTrip(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('B-11 trailing whitespace on lines preserved', () => {
|
||||
const raw = '## Heading \n- item \n';
|
||||
expect(roundTrip(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('B-12 multiple consecutive blank lines preserved', () => {
|
||||
const raw = '## H\n\n\n\n- item\n';
|
||||
expect(roundTrip(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('B-13 frontmatter only, no body', () => {
|
||||
const raw = '---\nname: x\n---\n';
|
||||
expect(roundTrip(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('B-14 body only, no frontmatter, no headings', () => {
|
||||
const raw = 'Just some prose.\nNo structure.\n';
|
||||
expect(roundTrip(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('B-15 frontmatter + body + multiple sections', () => {
|
||||
const raw = `---
|
||||
name: github
|
||||
description: gh CLI
|
||||
---
|
||||
|
||||
Preamble.
|
||||
|
||||
## Boundaries
|
||||
|
||||
- never write to /etc
|
||||
|
||||
## Tools
|
||||
|
||||
- gh: GitHub CLI
|
||||
- curl: HTTP client
|
||||
`;
|
||||
expect(roundTrip(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('B-16 unicode content preserved', () => {
|
||||
const raw = '## Café Section\n\n- résumé item\n- 日本語\n';
|
||||
expect(roundTrip(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('B-17 emoji preserved', () => {
|
||||
const raw = '## 🚀 Launch\n\n- ✅ ready\n- 🔒 secure\n';
|
||||
expect(roundTrip(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('B-18 frontmatter with special chars in values', () => {
|
||||
const raw = `---\nurl: https://example.com:443/path?q=1&a=2\n---\n`;
|
||||
expect(roundTrip(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('B-19 file with mixed bullet markers (-, *, +)', () => {
|
||||
const raw = '## H\n\n- dash\n* star\n+ plus\n';
|
||||
expect(roundTrip(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('B-20 raw === parse(raw).raw === emitMd(parse(raw)) for 50 random shapes', () => {
|
||||
const inputs = [
|
||||
'',
|
||||
'\n',
|
||||
'## A\n',
|
||||
'## A\n## B\n',
|
||||
'---\n---\n',
|
||||
'---\nk: v\n---\n',
|
||||
'---\nk: v\n---\nbody\n',
|
||||
'## H\n- a\n- b\n## I\n- c\n',
|
||||
'\n',
|
||||
'\r\n',
|
||||
'\t\n',
|
||||
'plain\n',
|
||||
'`code`\n',
|
||||
'```\nfence\n```\n',
|
||||
'```ts\nconst x = 1;\n```\n',
|
||||
'| a | b |\n| - | - |\n| 1 | 2 |\n',
|
||||
'> quote\n',
|
||||
'# H1 not split\n## H2 split\n',
|
||||
'preamble\n## block\nbody\n',
|
||||
'preamble\n## block\nbody\n## block2\nbody2\n',
|
||||
'## h\n\n\n\n',
|
||||
' ## indented heading (not parsed)\n',
|
||||
'##NoSpace\n',
|
||||
'## With trailing spaces \n- item\n',
|
||||
'## H\n- nested\n - sub\n',
|
||||
'## H\n\n```md\n## inside code\n```\n',
|
||||
'---\na: 1\nb: "two"\nc: \'three\'\n---\n',
|
||||
'---\nopen\nbut no close\n\nbody\n',
|
||||
'mixed\r\nline\nendings\r\n',
|
||||
'---\nname: bom\n---\nbody\n',
|
||||
'## h\n- k: v\n- k2: v2\n- plain\n',
|
||||
'## h\n\n| a | b |\n|---|---|\n',
|
||||
'## h\n```sql\nSELECT 1\n```\n',
|
||||
'## h\n\n- url: http://x.example.com:80/p?q=1\n',
|
||||
'## h\n\n- key: value with: colons\n',
|
||||
'## h\n\n- key: "quoted: value"\n',
|
||||
'## h\n\n- a-b: c-d\n',
|
||||
'## h with `inline code`\n',
|
||||
'no blocks\nat all\n',
|
||||
'No body or section\n\n\n\n',
|
||||
' \n \n',
|
||||
'## h\n## h2\n## h3\n',
|
||||
'##\n', // empty heading
|
||||
'## \n', // heading whitespace only
|
||||
'\n\n## h\n\n\n',
|
||||
'---\n\n---\n',
|
||||
'## h\n- \n', // empty bullet
|
||||
'## h\n\n\n```\nempty fence body\n```\n',
|
||||
'## h\n```\nunclosed fence',
|
||||
'## empty section\n## next\n',
|
||||
'0\n',
|
||||
];
|
||||
for (const raw of inputs) {
|
||||
expect(roundTrip(raw), `failed on: ${JSON.stringify(raw.slice(0, 60))}`).toBe(raw);
|
||||
}
|
||||
});
|
||||
});
|
||||
97
src/oc-path/tests/scenarios/code-blocks.test.ts
Normal file
97
src/oc-path/tests/scenarios/code-blocks.test.ts
Normal file
@@ -0,0 +1,97 @@
|
||||
/**
|
||||
* Wave 6 — fenced code blocks.
|
||||
*
|
||||
* Substrate guarantee: triple-backtick fences (` ``` `) inside H2 blocks
|
||||
* extract as `AstCodeBlock` with `lang` (or null) and verbatim `text`.
|
||||
* Code blocks suppress H2-split and item-extraction inside their body.
|
||||
*/
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { parseMd } from '../../parse.js';
|
||||
|
||||
describe('wave-06 code-blocks', () => {
|
||||
it('CB-01 unlanguaged fence', () => {
|
||||
const raw = `## H\n\n\`\`\`\nplain text\n\`\`\`\n`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks[0]?.codeBlocks[0]).toMatchObject({
|
||||
lang: null,
|
||||
text: 'plain text',
|
||||
});
|
||||
});
|
||||
|
||||
it('CB-02 languaged fence', () => {
|
||||
const raw = `## H\n\n\`\`\`ts\nconst x = 1;\n\`\`\`\n`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks[0]?.codeBlocks[0]?.lang).toBe('ts');
|
||||
expect(ast.blocks[0]?.codeBlocks[0]?.text).toBe('const x = 1;');
|
||||
});
|
||||
|
||||
it('CB-03 multi-line code body preserved verbatim', () => {
|
||||
const raw = `## H\n\n\`\`\`ts\nline 1\nline 2\nline 3\n\`\`\`\n`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks[0]?.codeBlocks[0]?.text).toBe('line 1\nline 2\nline 3');
|
||||
});
|
||||
|
||||
it('CB-04 empty code block', () => {
|
||||
const raw = `## H\n\n\`\`\`\n\`\`\`\n`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks[0]?.codeBlocks[0]?.text).toBe('');
|
||||
});
|
||||
|
||||
it('CB-05 code block with `## ` does NOT split as heading', () => {
|
||||
const raw = `## Real\n\n\`\`\`md\n## Not a heading\n\`\`\`\n\n## Another real\n`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks.map((b) => b.heading)).toEqual(['Real', 'Another real']);
|
||||
});
|
||||
|
||||
it('CB-06 code block with `- bullet` does NOT extract as item', () => {
|
||||
const raw = `## H\n\n\`\`\`\n- not a bullet\n- still not\n\`\`\`\n\n- real bullet\n`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(['real bullet']);
|
||||
});
|
||||
|
||||
it('CB-07 multiple code blocks in same section', () => {
|
||||
const raw = `## H\n\n\`\`\`a\nfirst\n\`\`\`\n\n\`\`\`b\nsecond\n\`\`\`\n`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks[0]?.codeBlocks.length).toBe(2);
|
||||
expect(ast.blocks[0]?.codeBlocks.map((c) => c.lang)).toEqual(['a', 'b']);
|
||||
});
|
||||
|
||||
it('CB-08 unterminated fence — body extends to end of section', () => {
|
||||
const raw = `## H\n\n\`\`\`\nopen but never closes\n`;
|
||||
const { ast } = parseMd(raw);
|
||||
// Behavior: code block is created with whatever was after the open
|
||||
// fence, including any trailing newline lines. Documents are
|
||||
// likely malformed; substrate is lenient and preserves what's
|
||||
// there (verifiable via raw round-trip).
|
||||
expect(ast.blocks[0]?.codeBlocks[0]?.text).toContain('open but never closes');
|
||||
});
|
||||
|
||||
it('CB-09 fence with leading spaces (4-space indented code)', () => {
|
||||
// Note: only column-0 ``` triggers fence. Indented content is body
|
||||
// text. This is the documented behavior.
|
||||
const raw = `## H\n\n \`\`\`\n indented\n \`\`\`\n`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks[0]?.codeBlocks).toEqual([]);
|
||||
});
|
||||
|
||||
it('CB-10 lang tag with extra whitespace trimmed', () => {
|
||||
const raw = `## H\n\n\`\`\` jsonc \nbody\n\`\`\`\n`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks[0]?.codeBlocks[0]?.lang).toBe('jsonc');
|
||||
});
|
||||
|
||||
it('CB-11 lang tag with hyphen / dot (typescript-jsx, c++)', () => {
|
||||
const raw = `## H\n\n\`\`\`typescript-jsx\nx\n\`\`\`\n`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks[0]?.codeBlocks[0]?.lang).toBe('typescript-jsx');
|
||||
});
|
||||
|
||||
it('CB-12 fence appearing in preamble (before any H2) is ignored at block layer', () => {
|
||||
const raw = `\`\`\`\npreamble code\n\`\`\`\n\n## H\n`;
|
||||
const { ast } = parseMd(raw);
|
||||
// Preamble code blocks aren't structurally extracted at the
|
||||
// substrate layer; this is documented. Lint can scan preamble
|
||||
// raw if needed.
|
||||
expect(ast.blocks[0]?.codeBlocks).toEqual([]);
|
||||
});
|
||||
});
|
||||
139
src/oc-path/tests/scenarios/cross-cutting.test.ts
Normal file
139
src/oc-path/tests/scenarios/cross-cutting.test.ts
Normal file
@@ -0,0 +1,139 @@
|
||||
/**
|
||||
* Wave 13 — cross-cutting integration.
|
||||
*
|
||||
* Pipelines: parse + resolve + emit working together. Slug stability
|
||||
* across re-parses. OcPath round-trip via the AST (slugs in OcPath
|
||||
* must round-trip back to the resolved node).
|
||||
*/
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { emitMd } from '../../emit.js';
|
||||
import { formatOcPath, parseOcPath } from '../../oc-path.js';
|
||||
import { parseMd } from '../../parse.js';
|
||||
import { resolveMdOcPath as resolveOcPath } from '../../resolve.js';
|
||||
|
||||
const SAMPLE = `---
|
||||
name: github
|
||||
description: gh CLI
|
||||
---
|
||||
|
||||
Preamble.
|
||||
|
||||
## Boundaries
|
||||
|
||||
- never write to /etc
|
||||
- always confirm
|
||||
|
||||
## Tools
|
||||
|
||||
- gh: GitHub CLI
|
||||
- curl: HTTP client
|
||||
`;
|
||||
|
||||
describe('wave-13 cross-cutting', () => {
|
||||
it('CC-01 parse → resolve → emit pipeline (block)', () => {
|
||||
const { ast } = parseMd(SAMPLE);
|
||||
const m = resolveOcPath(ast, { file: 'AGENTS.md', section: 'boundaries' });
|
||||
expect(m?.kind).toBe('block');
|
||||
expect(emitMd(ast)).toBe(SAMPLE);
|
||||
});
|
||||
|
||||
it('CC-02 OcPath round-trip via AST: parse + resolve + format', () => {
|
||||
const { ast } = parseMd(SAMPLE);
|
||||
for (const block of ast.blocks) {
|
||||
const path = parseOcPath(`oc://AGENTS.md/${block.slug}`);
|
||||
const m = resolveOcPath(ast, path);
|
||||
expect(m?.kind, `block ${block.slug} should resolve`).toBe('block');
|
||||
// Format the same path back; slug → URI shape should be stable.
|
||||
expect(formatOcPath(path)).toBe(`oc://AGENTS.md/${block.slug}`);
|
||||
}
|
||||
});
|
||||
|
||||
it('CC-03 every item in every block is OcPath-addressable', () => {
|
||||
const { ast } = parseMd(SAMPLE);
|
||||
for (const block of ast.blocks) {
|
||||
for (const item of block.items) {
|
||||
const path = parseOcPath(`oc://AGENTS.md/${block.slug}/${item.slug}`);
|
||||
const m = resolveOcPath(ast, path);
|
||||
expect(m?.kind, `${block.slug}/${item.slug} should resolve`).toBe('item');
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it('CC-04 every kv item field is OcPath-addressable', () => {
|
||||
const { ast } = parseMd(SAMPLE);
|
||||
for (const block of ast.blocks) {
|
||||
for (const item of block.items) {
|
||||
if (!item.kv) {continue;}
|
||||
const path = parseOcPath(
|
||||
`oc://AGENTS.md/${block.slug}/${item.slug}/${item.kv.key}`,
|
||||
);
|
||||
const m = resolveOcPath(ast, path);
|
||||
expect(m?.kind).toBe('item-field');
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it('CC-05 every frontmatter entry is OcPath-addressable', () => {
|
||||
const { ast } = parseMd(SAMPLE);
|
||||
for (const fm of ast.frontmatter) {
|
||||
const path = parseOcPath(`oc://AGENTS.md/[frontmatter]/${fm.key}`);
|
||||
const m = resolveOcPath(ast, path);
|
||||
expect(m?.kind).toBe('frontmatter');
|
||||
}
|
||||
});
|
||||
|
||||
it('CC-06 slugs are stable across re-parses (deterministic)', () => {
|
||||
const a1 = parseMd(SAMPLE).ast;
|
||||
const a2 = parseMd(SAMPLE).ast;
|
||||
expect(a1.blocks.map((b) => b.slug)).toEqual(a2.blocks.map((b) => b.slug));
|
||||
expect(a1.blocks.map((b) => b.items.map((i) => i.slug))).toEqual(
|
||||
a2.blocks.map((b) => b.items.map((i) => i.slug)),
|
||||
);
|
||||
});
|
||||
|
||||
it('CC-07 modifying raw + re-parse produces consistent AST shape', () => {
|
||||
const a1 = parseMd(SAMPLE).ast;
|
||||
const modified = SAMPLE.replace('GitHub CLI', 'GitHub command-line interface');
|
||||
const a2 = parseMd(modified).ast;
|
||||
// Block + item count + slugs unchanged.
|
||||
expect(a2.blocks.length).toBe(a1.blocks.length);
|
||||
const a1Tools = a1.blocks.find((b) => b.slug === 'tools');
|
||||
const a2Tools = a2.blocks.find((b) => b.slug === 'tools');
|
||||
expect(a2Tools?.items.length).toBe(a1Tools?.items.length);
|
||||
// KV value reflects the change.
|
||||
const ghItem = a2Tools?.items.find((i) => i.kv?.key === 'gh');
|
||||
expect(ghItem?.kv?.value).toBe('GitHub command-line interface');
|
||||
});
|
||||
|
||||
it('CC-08 unknown OcPath returns null without affecting subsequent valid resolves', () => {
|
||||
const { ast } = parseMd(SAMPLE);
|
||||
expect(resolveOcPath(ast, { file: 'X.md', section: 'nonexistent' })).toBeNull();
|
||||
expect(resolveOcPath(ast, { file: 'X.md', section: 'tools' })?.kind).toBe('block');
|
||||
});
|
||||
|
||||
it('CC-09 resolve does not depend on file segment matching', () => {
|
||||
const { ast } = parseMd(SAMPLE);
|
||||
const a = resolveOcPath(ast, { file: 'A.md', section: 'tools' });
|
||||
const b = resolveOcPath(ast, { file: 'B.md', section: 'tools' });
|
||||
expect(a?.kind).toBe(b?.kind);
|
||||
});
|
||||
|
||||
it('CC-10 round-trip across all 9 valid OcPath shapes', () => {
|
||||
const { ast } = parseMd(SAMPLE);
|
||||
const cases = [
|
||||
{ file: 'X.md' },
|
||||
{ file: 'X.md', section: 'tools' },
|
||||
{ file: 'X.md', section: 'tools', item: 'gh' },
|
||||
{ file: 'X.md', section: 'tools', item: 'gh', field: 'gh' },
|
||||
{ file: 'X.md', section: '[frontmatter]', field: 'name' },
|
||||
{ file: 'X.md', section: 'boundaries' },
|
||||
{ file: 'X.md', section: 'boundaries', item: 'never-write-to-etc' },
|
||||
{ file: 'X.md', section: 'boundaries', item: 'always-confirm' },
|
||||
{ file: 'X.md', section: '[frontmatter]', field: 'description' },
|
||||
];
|
||||
for (const path of cases) {
|
||||
const m = resolveOcPath(ast, path);
|
||||
expect(m, `failed for ${JSON.stringify(path)}`).not.toBeNull();
|
||||
}
|
||||
});
|
||||
});
|
||||
153
src/oc-path/tests/scenarios/cross-kind-properties.test.ts
Normal file
153
src/oc-path/tests/scenarios/cross-kind-properties.test.ts
Normal file
@@ -0,0 +1,153 @@
|
||||
/**
|
||||
* Wave 22 — cross-kind property invariants.
|
||||
*
|
||||
* Per-kind verbs hold the same shape contracts regardless of kind:
|
||||
*
|
||||
* 1. parse → emit (round-trip) is byte-stable for ALL kinds
|
||||
* 2. resolve is non-mutating for ALL kinds
|
||||
* 3. set returns structured failure (never throws) for unresolvable
|
||||
* paths across ALL kinds
|
||||
* 4. inferKind aligns with the parsers consumers actually pick
|
||||
* 5. parse → emit → parse is fixpoint
|
||||
* 6. hostile inputs do not throw at parse time
|
||||
*/
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { inferKind } from '../../dispatch.js';
|
||||
import { emitMd } from '../../emit.js';
|
||||
import { setMdOcPath } from '../../edit.js';
|
||||
import { resolveMdOcPath } from '../../resolve.js';
|
||||
import { emitJsonc } from '../../jsonc/emit.js';
|
||||
import { setJsoncOcPath } from '../../jsonc/edit.js';
|
||||
import { resolveJsoncOcPath } from '../../jsonc/resolve.js';
|
||||
import { parseJsonc } from '../../jsonc/parse.js';
|
||||
import { emitJsonl } from '../../jsonl/emit.js';
|
||||
import { setJsonlOcPath } from '../../jsonl/edit.js';
|
||||
import { resolveJsonlOcPath } from '../../jsonl/resolve.js';
|
||||
import { parseJsonl } from '../../jsonl/parse.js';
|
||||
import { parseOcPath } from '../../oc-path.js';
|
||||
import { parseMd } from '../../parse.js';
|
||||
|
||||
describe('wave-22 cross-kind property invariants', () => {
|
||||
const mdRaw = '---\nname: x\n---\n\n## Boundaries\n\n- enabled: true\n';
|
||||
const jsoncRaw = '// h\n{ "k": 1, "n": [1,2,3] }\n';
|
||||
const jsonlRaw = '{"a":1}\n\nbroken\n{"b":2}\n';
|
||||
|
||||
it('P-01 round-trip parse → emit is byte-stable across all kinds', () => {
|
||||
expect(emitMd(parseMd(mdRaw).ast)).toBe(mdRaw);
|
||||
expect(emitJsonc(parseJsonc(jsoncRaw).ast)).toBe(jsoncRaw);
|
||||
expect(emitJsonl(parseJsonl(jsonlRaw).ast)).toBe(jsonlRaw);
|
||||
});
|
||||
|
||||
it('P-02 resolve is non-mutating across all kinds', () => {
|
||||
const md = parseMd(mdRaw).ast;
|
||||
let before = JSON.stringify(md);
|
||||
resolveMdOcPath(md, parseOcPath('oc://X/[frontmatter]/name'));
|
||||
resolveMdOcPath(md, parseOcPath('oc://X/boundaries'));
|
||||
expect(JSON.stringify(md)).toBe(before);
|
||||
|
||||
const jsonc = parseJsonc(jsoncRaw).ast;
|
||||
before = JSON.stringify(jsonc);
|
||||
resolveJsoncOcPath(jsonc, parseOcPath('oc://X/k'));
|
||||
resolveJsoncOcPath(jsonc, parseOcPath('oc://X/n.0'));
|
||||
expect(JSON.stringify(jsonc)).toBe(before);
|
||||
|
||||
const jsonl = parseJsonl(jsonlRaw).ast;
|
||||
before = JSON.stringify(jsonl);
|
||||
resolveJsonlOcPath(jsonl, parseOcPath('oc://X/L1'));
|
||||
resolveJsonlOcPath(jsonl, parseOcPath('oc://X/$last'));
|
||||
expect(JSON.stringify(jsonl)).toBe(before);
|
||||
});
|
||||
|
||||
it('P-03 unresolvable set never throws across all kinds', () => {
|
||||
const ocPath = parseOcPath('oc://X/totally.missing.path');
|
||||
expect(() =>
|
||||
setMdOcPath(parseMd(mdRaw).ast, ocPath, 'x'),
|
||||
).not.toThrow();
|
||||
expect(() =>
|
||||
setJsoncOcPath(parseJsonc(jsoncRaw).ast, ocPath, {
|
||||
kind: 'string',
|
||||
value: 'x',
|
||||
}),
|
||||
).not.toThrow();
|
||||
expect(() =>
|
||||
setJsonlOcPath(parseJsonl(jsonlRaw).ast, ocPath, {
|
||||
kind: 'string',
|
||||
value: 'x',
|
||||
}),
|
||||
).not.toThrow();
|
||||
});
|
||||
|
||||
it('P-04 inferKind aligns with the parser actually used', () => {
|
||||
expect(inferKind('AGENTS.md')).toBe('md');
|
||||
expect(inferKind('SOUL.md')).toBe('md');
|
||||
expect(inferKind('config.jsonc')).toBe('jsonc');
|
||||
expect(inferKind('plugins.json')).toBe('jsonc');
|
||||
expect(inferKind('events.jsonl')).toBe('jsonl');
|
||||
expect(inferKind('audit.ndjson')).toBe('jsonl');
|
||||
});
|
||||
|
||||
it('P-05 parse → emit → parse is fixpoint across all kinds', () => {
|
||||
const md1 = emitMd(parseMd(mdRaw).ast);
|
||||
const md2 = emitMd(parseMd(md1).ast);
|
||||
expect(md1).toBe(md2);
|
||||
|
||||
const jc1 = emitJsonc(parseJsonc(jsoncRaw).ast);
|
||||
const jc2 = emitJsonc(parseJsonc(jc1).ast);
|
||||
expect(jc1).toBe(jc2);
|
||||
|
||||
const jl1 = emitJsonl(parseJsonl(jsonlRaw).ast);
|
||||
const jl2 = emitJsonl(parseJsonl(jl1).ast);
|
||||
expect(jl1).toBe(jl2);
|
||||
});
|
||||
|
||||
it('P-06 hostile inputs do not throw at parse time across all kinds', () => {
|
||||
const hostile = [
|
||||
'\x00\x01\x02 binary garbage',
|
||||
'{ "unclosed":',
|
||||
'## heading without anything',
|
||||
'\n\n\n\n\n',
|
||||
];
|
||||
for (const raw of hostile) {
|
||||
expect(() => parseMd(raw)).not.toThrow();
|
||||
expect(() => parseJsonc(raw)).not.toThrow();
|
||||
expect(() => parseJsonl(raw)).not.toThrow();
|
||||
}
|
||||
});
|
||||
|
||||
it('P-07 resolver returns null for paths past valid kinds (no throw)', () => {
|
||||
const overlong = parseOcPath('oc://X/a/b/c.d.e.f.g.h');
|
||||
expect(() => resolveMdOcPath(parseMd(mdRaw).ast, overlong)).not.toThrow();
|
||||
expect(() => resolveJsoncOcPath(parseJsonc(jsoncRaw).ast, overlong)).not.toThrow();
|
||||
expect(() => resolveJsonlOcPath(parseJsonl(jsonlRaw).ast, overlong)).not.toThrow();
|
||||
});
|
||||
|
||||
it('P-08 set-then-resolve produces the value just written (jsonc)', () => {
|
||||
const ast = parseJsonc('{ "k": 1 }').ast;
|
||||
const r = setJsoncOcPath(ast, parseOcPath('oc://X/k'), {
|
||||
kind: 'number',
|
||||
value: 42,
|
||||
});
|
||||
if (r.ok) {
|
||||
const m = resolveJsoncOcPath(r.ast, parseOcPath('oc://X/k'));
|
||||
if (m?.kind === 'object-entry') {
|
||||
expect(m.node.value).toEqual({ kind: 'number', value: 42 });
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it('P-09 verbs are deterministic — same input twice produces same output', () => {
|
||||
expect(emitMd(parseMd(mdRaw).ast)).toBe(emitMd(parseMd(mdRaw).ast));
|
||||
expect(emitJsonc(parseJsonc(jsoncRaw).ast)).toBe(
|
||||
emitJsonc(parseJsonc(jsoncRaw).ast),
|
||||
);
|
||||
expect(emitJsonl(parseJsonl(jsonlRaw).ast)).toBe(
|
||||
emitJsonl(parseJsonl(jsonlRaw).ast),
|
||||
);
|
||||
});
|
||||
|
||||
it('P-10 inferKind returns null for unknown extensions', () => {
|
||||
expect(inferKind('binary.bin')).toBeNull();
|
||||
expect(inferKind('no-ext')).toBeNull();
|
||||
expect(inferKind('archive.tar.gz')).toBeNull();
|
||||
});
|
||||
});
|
||||
161
src/oc-path/tests/scenarios/edit-emit-roundtrip.test.ts
Normal file
161
src/oc-path/tests/scenarios/edit-emit-roundtrip.test.ts
Normal file
@@ -0,0 +1,161 @@
|
||||
/**
|
||||
* Wave 19 — edit → emit round-trip across all kinds.
|
||||
*
|
||||
* Substrate guarantee: parse → setXxxOcPath → emitXxx produces valid
|
||||
* bytes that re-parse to an AST whose addressed value reflects the edit.
|
||||
* Per-kind verbs throughout — caller picks based on AST type.
|
||||
*/
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { emitMd } from '../../emit.js';
|
||||
import { setMdOcPath } from '../../edit.js';
|
||||
import { emitJsonc } from '../../jsonc/emit.js';
|
||||
import { setJsoncOcPath } from '../../jsonc/edit.js';
|
||||
import { parseJsonc } from '../../jsonc/parse.js';
|
||||
import { emitJsonl } from '../../jsonl/emit.js';
|
||||
import { setJsonlOcPath } from '../../jsonl/edit.js';
|
||||
import { parseJsonl } from '../../jsonl/parse.js';
|
||||
import { parseOcPath } from '../../oc-path.js';
|
||||
import { parseMd } from '../../parse.js';
|
||||
|
||||
describe('wave-19 edit-then-emit round-trip', () => {
|
||||
it('EE-01 md frontmatter edit re-parses to the new value', () => {
|
||||
const md = parseMd('---\nname: old\n---\n\n## Body\n').ast;
|
||||
const r = setMdOcPath(md, parseOcPath('oc://AGENTS.md/[frontmatter]/name'), 'new');
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
const reparsed = parseMd(r.ast.raw).ast;
|
||||
expect(reparsed.frontmatter.find((e) => e.key === 'name')?.value).toBe('new');
|
||||
}
|
||||
});
|
||||
|
||||
it('EE-02 md item kv edit re-parses to the new value', () => {
|
||||
const md = parseMd('## Boundaries\n\n- timeout: 5\n').ast;
|
||||
const r = setMdOcPath(
|
||||
md,
|
||||
parseOcPath('oc://AGENTS.md/boundaries/timeout/timeout'),
|
||||
'60',
|
||||
);
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
const reparsed = parseMd(emitMd(r.ast)).ast;
|
||||
const block = reparsed.blocks.find((b) => b.slug === 'boundaries');
|
||||
expect(block?.items[0]?.kv?.value).toBe('60');
|
||||
}
|
||||
});
|
||||
|
||||
it('EE-03 jsonc value edit re-parses to the new value', () => {
|
||||
const ast = parseJsonc('{ "k": 1 }').ast;
|
||||
const r = setJsoncOcPath(ast, parseOcPath('oc://config/k'), {
|
||||
kind: 'number',
|
||||
value: 42,
|
||||
});
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
expect(JSON.parse(emitJsonc(r.ast))).toEqual({ k: 42 });
|
||||
}
|
||||
});
|
||||
|
||||
it('EE-04 jsonc nested edit preserves untouched siblings', () => {
|
||||
const ast = parseJsonc('{ "a": 1, "b": { "c": 2, "d": 3 }, "e": 4 }').ast;
|
||||
const r = setJsoncOcPath(ast, parseOcPath('oc://config/b.c'), {
|
||||
kind: 'number',
|
||||
value: 99,
|
||||
});
|
||||
if (r.ok) {
|
||||
expect(JSON.parse(emitJsonc(r.ast))).toEqual({
|
||||
a: 1,
|
||||
b: { c: 99, d: 3 },
|
||||
e: 4,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
it('EE-05 jsonl line edit re-parses to the new value at the same line', () => {
|
||||
const ast = parseJsonl('{"a":1}\n{"a":2}\n{"a":3}\n').ast;
|
||||
const r = setJsonlOcPath(ast, parseOcPath('oc://log/L2/a'), {
|
||||
kind: 'number',
|
||||
value: 99,
|
||||
});
|
||||
if (r.ok) {
|
||||
const reparsed = parseJsonl(emitJsonl(r.ast)).ast;
|
||||
const line2 = reparsed.lines[1];
|
||||
expect(line2?.kind).toBe('value');
|
||||
if (line2?.kind === 'value' && line2.value.kind === 'object') {
|
||||
const entry = line2.value.entries.find((e) => e.key === 'a');
|
||||
expect(entry?.value).toMatchObject({ kind: 'number', value: 99 });
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it('EE-06 jsonc edit composes: two sequential edits both land', () => {
|
||||
let ast = parseJsonc('{ "a": 1, "b": 2 }').ast;
|
||||
let r = setJsoncOcPath(ast, parseOcPath('oc://config/a'), {
|
||||
kind: 'number',
|
||||
value: 10,
|
||||
});
|
||||
if (r.ok) {ast = r.ast;}
|
||||
r = setJsoncOcPath(ast, parseOcPath('oc://config/b'), {
|
||||
kind: 'number',
|
||||
value: 20,
|
||||
});
|
||||
if (r.ok) {ast = r.ast;}
|
||||
expect(JSON.parse(emitJsonc(ast))).toEqual({ a: 10, b: 20 });
|
||||
});
|
||||
|
||||
it('EE-07 missing path returns structured failure (not throw)', () => {
|
||||
const ast = parseJsonc('{ "a": 1 }').ast;
|
||||
const r = setJsoncOcPath(ast, parseOcPath('oc://config/missing'), {
|
||||
kind: 'number',
|
||||
value: 99,
|
||||
});
|
||||
expect(r.ok).toBe(false);
|
||||
if (!r.ok) {expect(r.reason).toBe('unresolved');}
|
||||
});
|
||||
|
||||
it('EE-08 each per-kind verb takes its own AST type — no cross-kind leakage', () => {
|
||||
// Type-level guarantee: each setter only accepts its kind's AST.
|
||||
// Caller picks based on the AST they have. This is the design.
|
||||
const md = parseMd('---\nx: 1\n---\n').ast;
|
||||
const jsonc = parseJsonc('{"x":1}').ast;
|
||||
const jsonl = parseJsonl('{"x":1}\n').ast;
|
||||
|
||||
const a = setMdOcPath(md, parseOcPath('oc://X/[frontmatter]/x'), '2');
|
||||
const b = setJsoncOcPath(jsonc, parseOcPath('oc://X/x'), {
|
||||
kind: 'number',
|
||||
value: 2,
|
||||
});
|
||||
const c = setJsonlOcPath(jsonl, parseOcPath('oc://X/L1/x'), {
|
||||
kind: 'number',
|
||||
value: 2,
|
||||
});
|
||||
|
||||
expect(a.ok).toBe(true);
|
||||
expect(b.ok).toBe(true);
|
||||
expect(c.ok).toBe(true);
|
||||
});
|
||||
|
||||
it('EE-09 byte-fidelity is broken after edit (expected — render mode applies)', () => {
|
||||
const raw = '{\n "k": 1 // comment\n}\n';
|
||||
const ast = parseJsonc(raw).ast;
|
||||
const r = setJsoncOcPath(ast, parseOcPath('oc://config/k'), {
|
||||
kind: 'number',
|
||||
value: 2,
|
||||
});
|
||||
if (r.ok) {
|
||||
// Comment is lost — expected. Caller's responsibility to know.
|
||||
expect(emitJsonc(r.ast)).not.toContain('// comment');
|
||||
// But the value IS the new one.
|
||||
expect(JSON.parse(emitJsonc(r.ast))).toEqual({ k: 2 });
|
||||
}
|
||||
});
|
||||
|
||||
it('EE-10 edit on empty AST surfaces no-root', () => {
|
||||
const ast = parseJsonc('').ast;
|
||||
const r = setJsoncOcPath(ast, parseOcPath('oc://config/x'), {
|
||||
kind: 'number',
|
||||
value: 1,
|
||||
});
|
||||
expect(r.ok).toBe(false);
|
||||
if (!r.ok) {expect(r.reason).toBe('no-root');}
|
||||
});
|
||||
});
|
||||
140
src/oc-path/tests/scenarios/frontmatter-edges.test.ts
Normal file
140
src/oc-path/tests/scenarios/frontmatter-edges.test.ts
Normal file
@@ -0,0 +1,140 @@
|
||||
/**
|
||||
* Wave 2 — frontmatter edges.
|
||||
*
|
||||
* Substrate guarantee: frontmatter is parsed as `key: value` entries
|
||||
* with quote-stripping; malformed frontmatter doesn't crash the parser
|
||||
* (soft-error policy: emit diagnostic, recover).
|
||||
*/
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { parseMd } from '../../parse.js';
|
||||
|
||||
describe('wave-02 frontmatter-edges', () => {
|
||||
it('FM-01 simple kv pairs', () => {
|
||||
const { ast } = parseMd('---\nname: x\ndescription: y\n---\n');
|
||||
expect(ast.frontmatter.map((e) => [e.key, e.value])).toEqual([
|
||||
['name', 'x'],
|
||||
['description', 'y'],
|
||||
]);
|
||||
});
|
||||
|
||||
it('FM-02 unclosed frontmatter emits diagnostic, treats as preamble', () => {
|
||||
const { ast, diagnostics } = parseMd('---\nname: x\nno close fence\nbody\n');
|
||||
expect(diagnostics.some((d) => d.code === 'OC_FRONTMATTER_UNCLOSED')).toBe(true);
|
||||
expect(ast.frontmatter).toEqual([]);
|
||||
});
|
||||
|
||||
it('FM-03 empty frontmatter (just open + close)', () => {
|
||||
const { ast } = parseMd('---\n---\n');
|
||||
expect(ast.frontmatter).toEqual([]);
|
||||
});
|
||||
|
||||
it('FM-04 frontmatter only, file has no other content', () => {
|
||||
const { ast } = parseMd('---\nk: v\n---\n');
|
||||
expect(ast.frontmatter).toEqual([{ key: 'k', value: 'v', line: 2 }]);
|
||||
expect(ast.preamble).toBe('');
|
||||
expect(ast.blocks).toEqual([]);
|
||||
});
|
||||
|
||||
it('FM-05 double-quoted value', () => {
|
||||
const { ast } = parseMd('---\ntitle: "Hello, world"\n---\n');
|
||||
expect(ast.frontmatter[0]?.value).toBe('Hello, world');
|
||||
});
|
||||
|
||||
it('FM-06 single-quoted value', () => {
|
||||
const { ast } = parseMd("---\ntitle: 'Hello, world'\n---\n");
|
||||
expect(ast.frontmatter[0]?.value).toBe('Hello, world');
|
||||
});
|
||||
|
||||
it('FM-07 unquoted value with internal colons preserved', () => {
|
||||
const { ast } = parseMd('---\nurl: https://example.com:443/p\n---\n');
|
||||
expect(ast.frontmatter[0]?.value).toBe('https://example.com:443/p');
|
||||
});
|
||||
|
||||
it('FM-08 empty value', () => {
|
||||
const { ast } = parseMd('---\nk:\n---\n');
|
||||
expect(ast.frontmatter[0]).toEqual({ key: 'k', value: '', line: 2 });
|
||||
});
|
||||
|
||||
it('FM-09 value with leading/trailing whitespace trimmed', () => {
|
||||
const { ast } = parseMd('---\nk: spaced \n---\n');
|
||||
expect(ast.frontmatter[0]?.value).toBe('spaced');
|
||||
});
|
||||
|
||||
it('FM-10 list-style continuations are silently dropped (substrate stays opinion-free)', () => {
|
||||
const { ast } = parseMd('---\ntools:\n - gh\n - curl\n---\n');
|
||||
// The `tools:` key has an empty inline value; the list continuation
|
||||
// lines ` - gh` and ` - curl` don't match the kv regex and are
|
||||
// skipped. Lint rules can do their own structural reading of
|
||||
// frontmatter; the substrate does not.
|
||||
expect(ast.frontmatter.map((e) => e.key)).toEqual(['tools']);
|
||||
expect(ast.frontmatter[0]?.value).toBe('');
|
||||
});
|
||||
|
||||
it('FM-11 line numbers are 1-based and accurate', () => {
|
||||
const { ast } = parseMd('---\nk1: v1\nk2: v2\nk3: v3\n---\n');
|
||||
expect(ast.frontmatter.map((e) => [e.key, e.line])).toEqual([
|
||||
['k1', 2],
|
||||
['k2', 3],
|
||||
['k3', 4],
|
||||
]);
|
||||
});
|
||||
|
||||
it('FM-12 dash-key allowed', () => {
|
||||
const { ast } = parseMd('---\nuser-invocable: true\n---\n');
|
||||
expect(ast.frontmatter[0]?.key).toBe('user-invocable');
|
||||
});
|
||||
|
||||
it('FM-13 underscore-key allowed', () => {
|
||||
const { ast } = parseMd('---\nparam_set: foo\n---\n');
|
||||
expect(ast.frontmatter[0]?.key).toBe('param_set');
|
||||
});
|
||||
|
||||
it('FM-14 number-only value preserved as string', () => {
|
||||
const { ast } = parseMd('---\ntimeout: 15000\n---\n');
|
||||
expect(ast.frontmatter[0]?.value).toBe('15000');
|
||||
});
|
||||
|
||||
it('FM-15 boolean-like value preserved as string', () => {
|
||||
const { ast } = parseMd('---\nenabled: true\n---\n');
|
||||
expect(ast.frontmatter[0]?.value).toBe('true');
|
||||
});
|
||||
|
||||
it('FM-16 blank lines inside frontmatter are skipped', () => {
|
||||
const { ast } = parseMd('---\n\nk1: v1\n\nk2: v2\n\n---\n');
|
||||
expect(ast.frontmatter.map((e) => e.key)).toEqual(['k1', 'k2']);
|
||||
});
|
||||
|
||||
it('FM-17 frontmatter with same key twice — both retained (no dedup)', () => {
|
||||
// Substrate doesn't dedup; lint rules can flag duplicates if needed.
|
||||
const { ast } = parseMd('---\nk: v1\nk: v2\n---\n');
|
||||
expect(ast.frontmatter).toEqual([
|
||||
{ key: 'k', value: 'v1', line: 2 },
|
||||
{ key: 'k', value: 'v2', line: 3 },
|
||||
]);
|
||||
});
|
||||
|
||||
it('FM-18 frontmatter must be at start — leading blank line breaks detection', () => {
|
||||
const { ast } = parseMd('\n---\nk: v\n---\n');
|
||||
expect(ast.frontmatter).toEqual([]);
|
||||
});
|
||||
|
||||
it('FM-19 frontmatter must be at start — leading text breaks detection', () => {
|
||||
const { ast } = parseMd('intro\n\n---\nk: v\n---\n');
|
||||
expect(ast.frontmatter).toEqual([]);
|
||||
});
|
||||
|
||||
it('FM-20 BOM before frontmatter open is tolerated', () => {
|
||||
const { ast } = parseMd('---\nname: bom\n---\n');
|
||||
expect(ast.frontmatter[0]?.value).toBe('bom');
|
||||
});
|
||||
|
||||
it('FM-21 single-line file with `---` and `---` is empty frontmatter', () => {
|
||||
const { ast } = parseMd('---\n---');
|
||||
expect(ast.frontmatter).toEqual([]);
|
||||
});
|
||||
|
||||
it('FM-22 hash-prefixed lines skipped (not yaml comments — just don\'t match kv regex)', () => {
|
||||
const { ast } = parseMd('---\n# comment\nk: v\n---\n');
|
||||
expect(ast.frontmatter.map((e) => e.key)).toEqual(['k']);
|
||||
});
|
||||
});
|
||||
149
src/oc-path/tests/scenarios/h2-block-split.test.ts
Normal file
149
src/oc-path/tests/scenarios/h2-block-split.test.ts
Normal file
@@ -0,0 +1,149 @@
|
||||
/**
|
||||
* Wave 3 — H2 block split.
|
||||
*
|
||||
* Substrate guarantee: `## ` at column 0 outside fenced code blocks
|
||||
* starts a new H2 block. H1 (`# `), H3 (`### `), and `## ` inside
|
||||
* fenced code blocks do NOT split.
|
||||
*/
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { parseMd } from '../../parse.js';
|
||||
|
||||
describe('wave-03 h2-block-split', () => {
|
||||
it('H2-01 no headings → no blocks, all preamble', () => {
|
||||
const raw = 'Just prose, no headings.\nMore prose.\n';
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks).toEqual([]);
|
||||
// Preamble preserves the trailing newline from raw (split + rejoin
|
||||
// is symmetric); callers that want trimmed prose call .trim().
|
||||
expect(ast.preamble).toBe('Just prose, no headings.\nMore prose.\n');
|
||||
});
|
||||
|
||||
it('H2-02 single heading splits preamble + one block', () => {
|
||||
const { ast } = parseMd('preamble\n## Section\nbody\n');
|
||||
expect(ast.preamble.trim()).toBe('preamble');
|
||||
expect(ast.blocks.length).toBe(1);
|
||||
expect(ast.blocks[0]?.heading).toBe('Section');
|
||||
expect(ast.blocks[0]?.bodyText.trim()).toBe('body');
|
||||
});
|
||||
|
||||
it('H2-03 multiple headings produce blocks in order', () => {
|
||||
const { ast } = parseMd('## A\nbody-a\n## B\nbody-b\n## C\nbody-c\n');
|
||||
expect(ast.blocks.map((b) => b.heading)).toEqual(['A', 'B', 'C']);
|
||||
});
|
||||
|
||||
it('H2-04 H1 does NOT split', () => {
|
||||
const { ast } = parseMd('# H1 heading\n## H2 heading\n');
|
||||
expect(ast.blocks.length).toBe(1);
|
||||
expect(ast.blocks[0]?.heading).toBe('H2 heading');
|
||||
expect(ast.preamble).toContain('# H1 heading');
|
||||
});
|
||||
|
||||
it('H2-05 H3 does NOT split', () => {
|
||||
const { ast } = parseMd('## H2\nbody\n### H3\nstill in H2 block\n');
|
||||
expect(ast.blocks.length).toBe(1);
|
||||
expect(ast.blocks[0]?.bodyText).toContain('### H3');
|
||||
});
|
||||
|
||||
it('H2-06 `## ` inside fenced code block does NOT split', () => {
|
||||
const raw = '## Real\n\n```md\n## Inside code\n```\n\n## Another real\n';
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks.map((b) => b.heading)).toEqual(['Real', 'Another real']);
|
||||
});
|
||||
|
||||
it('H2-07 `##` without trailing space — does NOT match (regex requires \\s+)', () => {
|
||||
const { ast } = parseMd('##NoSpace\n## With space\n');
|
||||
expect(ast.blocks.length).toBe(1);
|
||||
expect(ast.blocks[0]?.heading).toBe('With space');
|
||||
});
|
||||
|
||||
it('H2-08 leading whitespace before `##` — does NOT match (regex anchored at line start)', () => {
|
||||
const { ast } = parseMd(' ## indented\n## not indented\n');
|
||||
expect(ast.blocks.length).toBe(1);
|
||||
expect(ast.blocks[0]?.heading).toBe('not indented');
|
||||
});
|
||||
|
||||
it('H2-09 trailing whitespace on heading — trimmed in heading text', () => {
|
||||
const { ast } = parseMd('## Trailing \n');
|
||||
expect(ast.blocks[0]?.heading).toBe('Trailing');
|
||||
expect(ast.blocks[0]?.slug).toBe('trailing');
|
||||
});
|
||||
|
||||
it('H2-10 inline code in heading preserved', () => {
|
||||
const { ast } = parseMd('## Use `gh` for GitHub\n');
|
||||
expect(ast.blocks[0]?.heading).toBe('Use `gh` for GitHub');
|
||||
});
|
||||
|
||||
it('H2-11 markdown formatting in heading preserved', () => {
|
||||
const { ast } = parseMd('## **Bold** *italic*\n');
|
||||
expect(ast.blocks[0]?.heading).toBe('**Bold** *italic*');
|
||||
});
|
||||
|
||||
it('H2-12 immediately after frontmatter', () => {
|
||||
const { ast } = parseMd('---\nk: v\n---\n## Section\nbody\n');
|
||||
expect(ast.blocks[0]?.heading).toBe('Section');
|
||||
expect(ast.preamble).toBe('');
|
||||
});
|
||||
|
||||
it('H2-13 H2 at end of file (no body)', () => {
|
||||
const { ast } = parseMd('preamble\n## End\n');
|
||||
expect(ast.blocks[0]?.heading).toBe('End');
|
||||
expect(ast.blocks[0]?.bodyText).toBe('');
|
||||
});
|
||||
|
||||
it('H2-14 two consecutive H2s — empty body block between', () => {
|
||||
const { ast } = parseMd('## A\n## B\n');
|
||||
expect(ast.blocks[0]?.bodyText).toBe('');
|
||||
expect(ast.blocks[1]?.heading).toBe('B');
|
||||
});
|
||||
|
||||
it('H2-15 line numbers are 1-based and track through frontmatter', () => {
|
||||
const { ast } = parseMd('---\nk: v\n---\n## At line 4\n');
|
||||
expect(ast.blocks[0]?.line).toBe(4);
|
||||
});
|
||||
|
||||
it('H2-16 line numbers track through preamble', () => {
|
||||
const { ast } = parseMd('line 1\nline 2\n## At line 3\n');
|
||||
expect(ast.blocks[0]?.line).toBe(3);
|
||||
});
|
||||
|
||||
it('H2-17 nested fenced code blocks (~~~ vs ```) — only ``` is detected', () => {
|
||||
// Current parser only treats ``` as fence; ~~~ falls through. This
|
||||
// is a documented limit. Inputs with ~~~ aren't broken — they're
|
||||
// just not protected from H2-misparsing inside them.
|
||||
const raw = '## H\n\n~~~md\n~~~\n\n## Next\n';
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks.map((b) => b.heading)).toEqual(['H', 'Next']);
|
||||
});
|
||||
|
||||
it('H2-18 setext-style heading (`Heading\\n========\\n`) is NOT recognized', () => {
|
||||
// Substrate is opinion-aware: setext headings are treated as
|
||||
// preamble. Lint rules can flag if needed; recognized markdown
|
||||
// dialect is `## ATX-style only` for OpenClaw workspace files.
|
||||
const raw = 'Heading\n=======\n## Real\n';
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks.length).toBe(1);
|
||||
expect(ast.blocks[0]?.heading).toBe('Real');
|
||||
});
|
||||
|
||||
it('H2-19 empty heading text (`## `)', () => {
|
||||
const { ast } = parseMd('## \n');
|
||||
// Empty heading is technically a valid match (`## ` + empty text)
|
||||
// but the regex requires `(.+?)` so empty doesn't match. Validates
|
||||
// it's NOT split.
|
||||
expect(ast.blocks).toEqual([]);
|
||||
});
|
||||
|
||||
it('H2-20 heading with only whitespace (`## `)', () => {
|
||||
const { ast } = parseMd('## \n');
|
||||
expect(ast.blocks).toEqual([]);
|
||||
});
|
||||
|
||||
it('H2-21 heading-shaped text inside multi-line bullet body — does split', () => {
|
||||
// The substrate treats line-start ## as a heading regardless of
|
||||
// logical context (item continuation lines). Lint rules can flag
|
||||
// the boundary; substrate prefers structural simplicity.
|
||||
const raw = '## Section\n- item starts\n continues\n## Next\n';
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks.map((b) => b.heading)).toEqual(['Section', 'Next']);
|
||||
});
|
||||
});
|
||||
146
src/oc-path/tests/scenarios/items.test.ts
Normal file
146
src/oc-path/tests/scenarios/items.test.ts
Normal file
@@ -0,0 +1,146 @@
|
||||
/**
|
||||
* Wave 4 — items (bullets + kv).
|
||||
*
|
||||
* Substrate guarantee: bullet lines (`- text`, `* text`, `+ text`) inside
|
||||
* H2 blocks are extracted as `AstItem`. Lines matching `- key: value`
|
||||
* also populate `item.kv`. Items inside fenced code blocks are NOT
|
||||
* extracted.
|
||||
*/
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { parseMd } from '../../parse.js';
|
||||
|
||||
describe('wave-04 items', () => {
|
||||
it('I-01 plain dash bullets', () => {
|
||||
const { ast } = parseMd('## H\n- a\n- b\n- c\n');
|
||||
expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(['a', 'b', 'c']);
|
||||
});
|
||||
|
||||
it('I-02 star bullets', () => {
|
||||
const { ast } = parseMd('## H\n* a\n* b\n');
|
||||
expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(['a', 'b']);
|
||||
});
|
||||
|
||||
it('I-03 plus bullets', () => {
|
||||
const { ast } = parseMd('## H\n+ a\n+ b\n');
|
||||
expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(['a', 'b']);
|
||||
});
|
||||
|
||||
it('I-04 mixed bullet markers in same section', () => {
|
||||
const { ast } = parseMd('## H\n- dash\n* star\n+ plus\n');
|
||||
expect(ast.blocks[0]?.items.length).toBe(3);
|
||||
});
|
||||
|
||||
it('I-05 kv-shape items populate kv', () => {
|
||||
const { ast } = parseMd('## H\n- gh: GitHub CLI\n');
|
||||
expect(ast.blocks[0]?.items[0]?.kv).toEqual({ key: 'gh', value: 'GitHub CLI' });
|
||||
});
|
||||
|
||||
it('I-06 plain item has no kv', () => {
|
||||
const { ast } = parseMd('## H\n- plain text\n');
|
||||
expect(ast.blocks[0]?.items[0]?.kv).toBeUndefined();
|
||||
});
|
||||
|
||||
it('I-07 multiple colons — first colon is the kv split', () => {
|
||||
const { ast } = parseMd('## H\n- url: http://x.com:80/p\n');
|
||||
expect(ast.blocks[0]?.items[0]?.kv).toEqual({
|
||||
key: 'url',
|
||||
value: 'http://x.com:80/p',
|
||||
});
|
||||
});
|
||||
|
||||
it('I-08 colon with no space after is still kv', () => {
|
||||
const { ast } = parseMd('## H\n- key:value\n');
|
||||
expect(ast.blocks[0]?.items[0]?.kv).toEqual({ key: 'key', value: 'value' });
|
||||
});
|
||||
|
||||
it('I-09 quoted value preserved verbatim (no unquote at item layer)', () => {
|
||||
const { ast } = parseMd('## H\n- title: "quoted: value"\n');
|
||||
expect(ast.blocks[0]?.items[0]?.kv?.value).toBe('"quoted: value"');
|
||||
});
|
||||
|
||||
it('I-10 slug from kv key when kv present', () => {
|
||||
const { ast } = parseMd('## H\n- The Tool: description\n');
|
||||
expect(ast.blocks[0]?.items[0]?.slug).toBe('the-tool');
|
||||
});
|
||||
|
||||
it('I-11 slug from item text when no kv', () => {
|
||||
const { ast } = parseMd('## H\n- The Plain Item\n');
|
||||
expect(ast.blocks[0]?.items[0]?.slug).toBe('the-plain-item');
|
||||
});
|
||||
|
||||
it('I-12 items inside fenced code block are NOT extracted', () => {
|
||||
const raw = '## H\n```\n- not a bullet\n- still not\n```\n- real bullet\n';
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks[0]?.items.length).toBe(1);
|
||||
expect(ast.blocks[0]?.items[0]?.text).toBe('real bullet');
|
||||
});
|
||||
|
||||
it('I-13 line numbers track through block body', () => {
|
||||
const { ast } = parseMd('## H\n- first\n- second\n- third\n');
|
||||
expect(ast.blocks[0]?.items.map((i) => i.line)).toEqual([2, 3, 4]);
|
||||
});
|
||||
|
||||
it('I-14 trailing whitespace on bullet trimmed in text', () => {
|
||||
const { ast } = parseMd('## H\n- spaced \n');
|
||||
expect(ast.blocks[0]?.items[0]?.text).toBe('spaced');
|
||||
});
|
||||
|
||||
it('I-15 empty bullet text is dropped', () => {
|
||||
const { ast } = parseMd('## H\n- \n- real\n');
|
||||
// The regex requires (.+?) non-empty, so `- ` alone doesn't match.
|
||||
expect(ast.blocks[0]?.items.length).toBe(1);
|
||||
});
|
||||
|
||||
it('I-16 indented bullet (sub-bullet) — current parser still picks up', () => {
|
||||
// The current regex `^(?:[-*+])\\s+(.+?)\\s*$` requires column-0
|
||||
// bullet markers; indented bullets do NOT match. Documented as a
|
||||
// limit — sub-bullets surface in body text but not in items.
|
||||
const { ast } = parseMd('## H\n- top\n - sub\n');
|
||||
expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(['top']);
|
||||
});
|
||||
|
||||
it('I-17 numbered list (1. item) is NOT extracted as item', () => {
|
||||
const { ast } = parseMd('## H\n1. first\n2. second\n');
|
||||
expect(ast.blocks[0]?.items).toEqual([]);
|
||||
});
|
||||
|
||||
it('I-18 items in a section with no body before — first item line is heading+1', () => {
|
||||
const { ast } = parseMd('## H\n- a\n');
|
||||
expect(ast.blocks[0]?.items[0]?.line).toBe(2);
|
||||
});
|
||||
|
||||
it('I-19 items spread across blocks are scoped to their block', () => {
|
||||
const { ast } = parseMd('## A\n- a1\n## B\n- b1\n- b2\n');
|
||||
expect(ast.blocks[0]?.items.length).toBe(1);
|
||||
expect(ast.blocks[1]?.items.length).toBe(2);
|
||||
expect(ast.blocks[1]?.items.map((i) => i.text)).toEqual(['b1', 'b2']);
|
||||
});
|
||||
|
||||
it('I-20 item with only-symbol kv key still parses', () => {
|
||||
const { ast } = parseMd('## H\n- API_KEY: secret-value\n');
|
||||
expect(ast.blocks[0]?.items[0]?.kv).toEqual({
|
||||
key: 'API_KEY',
|
||||
value: 'secret-value',
|
||||
});
|
||||
expect(ast.blocks[0]?.items[0]?.slug).toBe('api-key');
|
||||
});
|
||||
|
||||
it('I-21 item with kv where value is empty', () => {
|
||||
const { ast } = parseMd('## H\n- key:\n');
|
||||
// `- key:` has empty value after the colon; the kv regex requires
|
||||
// (.+) for value, so this falls through to plain item.
|
||||
expect(ast.blocks[0]?.items[0]?.kv).toBeUndefined();
|
||||
expect(ast.blocks[0]?.items[0]?.text).toBe('key:');
|
||||
});
|
||||
|
||||
it('I-22 bullet in preamble (before first H2) is NOT in any block', () => {
|
||||
const { ast } = parseMd('- preamble bullet\n## H\n- block bullet\n');
|
||||
expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(['block bullet']);
|
||||
expect(ast.preamble).toContain('- preamble bullet');
|
||||
});
|
||||
|
||||
it('I-23 bullet with internal markdown (italics, code) preserved in text', () => {
|
||||
const { ast } = parseMd('## H\n- use *gh* and `curl`\n');
|
||||
expect(ast.blocks[0]?.items[0]?.text).toBe('use *gh* and `curl`');
|
||||
});
|
||||
});
|
||||
188
src/oc-path/tests/scenarios/jsonc-byte-fidelity.test.ts
Normal file
188
src/oc-path/tests/scenarios/jsonc-byte-fidelity.test.ts
Normal file
@@ -0,0 +1,188 @@
|
||||
/**
|
||||
* Wave 15 — JSONC byte-fidelity round-trip.
|
||||
*
|
||||
* Substrate guarantee: `emitJsonc(parseJsonc(raw)) === raw` for every
|
||||
* input the parser accepts. Mirrors wave-01 but for the JSONC kind.
|
||||
* Comments, trailing commas, BOMs, mixed line endings — all byte-stable
|
||||
* via the round-trip path.
|
||||
*
|
||||
* **What this file proves**: byte-identical round-trip via the
|
||||
* default-mode emit (which echoes `ast.raw`). This is necessary but
|
||||
* not sufficient — without the structural assertions below, a parser
|
||||
* that emitted `ast.root: null` for every input would still pass the
|
||||
* byte test (since `raw` is preserved on the AST regardless).
|
||||
*
|
||||
* Each assertParseable() call proves the parser actually ran and
|
||||
* produced a structural tree, not just stored `raw` verbatim and
|
||||
* called it a day. JC-17 deliberately uses `assertNotParseable` —
|
||||
* malformed input must echo `raw` AND emit a diagnostic.
|
||||
*/
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { emitJsonc } from '../../jsonc/emit.js';
|
||||
import { parseJsonc } from '../../jsonc/parse.js';
|
||||
import type { JsoncValue } from '../../jsonc/ast.js';
|
||||
|
||||
function rt(raw: string): string {
|
||||
return emitJsonc(parseJsonc(raw).ast);
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify the parser actually produced a structural tree (not just a
|
||||
* `null` root with echoed `raw`). Without this, a parser that
|
||||
* delegated everything to `raw` would pass the byte-fidelity test
|
||||
* trivially. Returns the parsed root for follow-up structural asserts.
|
||||
*/
|
||||
function assertParseable(raw: string): JsoncValue {
|
||||
const result = parseJsonc(raw);
|
||||
expect(result.ast.root).not.toBeNull();
|
||||
return result.ast.root as JsoncValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* The complement: malformed input round-trips bytes verbatim AND
|
||||
* emits an error diagnostic. JC-17 needs this — without the
|
||||
* diagnostic check, the test would pass even if the parser silently
|
||||
* dropped malformed content.
|
||||
*/
|
||||
function assertNotParseable(raw: string): void {
|
||||
const result = parseJsonc(raw);
|
||||
expect(result.ast.root).toBeNull();
|
||||
expect(result.diagnostics.some((d) => d.severity === 'error')).toBe(true);
|
||||
}
|
||||
|
||||
describe('wave-15 jsonc byte-fidelity', () => {
|
||||
it('JC-01 empty file', () => {
|
||||
expect(rt('')).toBe('');
|
||||
});
|
||||
|
||||
it('JC-02 whitespace-only', () => {
|
||||
expect(rt(' \n\n \n')).toBe(' \n\n \n');
|
||||
});
|
||||
|
||||
it('JC-03 empty object', () => {
|
||||
expect(rt('{}')).toBe('{}');
|
||||
const root = assertParseable('{}');
|
||||
expect(root.kind).toBe('object');
|
||||
if (root.kind === 'object') {expect(root.entries).toHaveLength(0);}
|
||||
});
|
||||
|
||||
it('JC-04 empty array', () => {
|
||||
expect(rt('[]')).toBe('[]');
|
||||
const root = assertParseable('[]');
|
||||
expect(root.kind).toBe('array');
|
||||
if (root.kind === 'array') {expect(root.items).toHaveLength(0);}
|
||||
});
|
||||
|
||||
it('JC-05 trivial scalar root', () => {
|
||||
expect(rt('42')).toBe('42');
|
||||
expect(rt('"x"')).toBe('"x"');
|
||||
expect(rt('true')).toBe('true');
|
||||
expect(rt('null')).toBe('null');
|
||||
expect(assertParseable('42').kind).toBe('number');
|
||||
expect(assertParseable('"x"').kind).toBe('string');
|
||||
expect(assertParseable('true').kind).toBe('boolean');
|
||||
expect(assertParseable('null').kind).toBe('null');
|
||||
});
|
||||
|
||||
it('JC-06 line comments preserved', () => {
|
||||
const raw = '// a leading comment\n{ "x": 1 } // trailing\n';
|
||||
expect(rt(raw)).toBe(raw);
|
||||
// Pin parse: the structural value `x: 1` is reachable.
|
||||
const root = assertParseable(raw);
|
||||
expect(root.kind).toBe('object');
|
||||
});
|
||||
|
||||
it('JC-07 block comments preserved', () => {
|
||||
const raw = '/* header */\n{\n /* inline */\n "x": 1\n}\n';
|
||||
expect(rt(raw)).toBe(raw);
|
||||
const root = assertParseable(raw);
|
||||
expect(root.kind).toBe('object');
|
||||
});
|
||||
|
||||
it('JC-08 trailing commas preserved', () => {
|
||||
const raw = '{\n "x": 1,\n "y": 2,\n}';
|
||||
expect(rt(raw)).toBe(raw);
|
||||
const root = assertParseable(raw);
|
||||
if (root.kind === 'object') {expect(root.entries).toHaveLength(2);}
|
||||
});
|
||||
|
||||
it('JC-09 mixed CRLF + LF preserved', () => {
|
||||
const raw = '{\r\n "x": 1,\n "y": 2\r\n}';
|
||||
expect(rt(raw)).toBe(raw);
|
||||
const root = assertParseable(raw);
|
||||
if (root.kind === 'object') {expect(root.entries.map((e) => e.key)).toEqual(['x', 'y']);}
|
||||
});
|
||||
|
||||
it('JC-10 BOM preserved on raw', () => {
|
||||
const raw = '{ "x": 1 }';
|
||||
expect(rt(raw)).toBe(raw);
|
||||
// BOM stripped before parsing — parser still sees `{` as first char.
|
||||
expect(assertParseable(raw).kind).toBe('object');
|
||||
});
|
||||
|
||||
it('JC-11 deeply nested structures preserved', () => {
|
||||
const raw = '{ "a": { "b": { "c": { "d": [1, [2, [3, [4]]]] } } } }';
|
||||
expect(rt(raw)).toBe(raw);
|
||||
expect(assertParseable(raw).kind).toBe('object');
|
||||
});
|
||||
|
||||
it('JC-12 string with escape sequences preserved', () => {
|
||||
const raw = '{ "s": "a\\nb\\tc\\u0041\\\\d\\"e" }';
|
||||
expect(rt(raw)).toBe(raw);
|
||||
// Pin escape resolution — parsed value carries actual control chars.
|
||||
const root = assertParseable(raw);
|
||||
if (root.kind === 'object') {
|
||||
const s = root.entries[0]?.value;
|
||||
if (s?.kind === 'string') {
|
||||
expect(s.value).toBe('a\nb\tcA\\d"e');
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it('JC-13 numbers in scientific / negative / decimal forms preserved', () => {
|
||||
const raw = '[ 0, -0, 1.5, -3.14, 1e3, -2.5e-10, 1E+5 ]';
|
||||
expect(rt(raw)).toBe(raw);
|
||||
const root = assertParseable(raw);
|
||||
if (root.kind === 'array') {
|
||||
expect(root.items).toHaveLength(7);
|
||||
expect(root.items.every((v) => v.kind === 'number')).toBe(true);
|
||||
}
|
||||
});
|
||||
|
||||
it('JC-14 unicode characters preserved verbatim', () => {
|
||||
const raw = '{ "name": "héllo 世界 🎉" }';
|
||||
expect(rt(raw)).toBe(raw);
|
||||
const root = assertParseable(raw);
|
||||
if (root.kind === 'object') {
|
||||
const v = root.entries[0]?.value;
|
||||
if (v?.kind === 'string') {expect(v.value).toBe('héllo 世界 🎉');}
|
||||
}
|
||||
});
|
||||
|
||||
it('JC-15 idiosyncratic whitespace preserved', () => {
|
||||
const raw = '{ "x" : 1 ,\n "y": 2}';
|
||||
expect(rt(raw)).toBe(raw);
|
||||
expect(assertParseable(raw).kind).toBe('object');
|
||||
});
|
||||
|
||||
it('JC-16 file-level trailing whitespace preserved', () => {
|
||||
const raw = '{ "x": 1 }\n\n\n';
|
||||
expect(rt(raw)).toBe(raw);
|
||||
expect(assertParseable(raw).kind).toBe('object');
|
||||
});
|
||||
|
||||
it('JC-17 malformed input still emits raw verbatim AND emits a diagnostic', () => {
|
||||
const raw = '{ broken json with "key": value }';
|
||||
expect(rt(raw)).toBe(raw);
|
||||
// Without this assertion the test passes for any input regardless
|
||||
// of parser behavior — pin both halves of the contract.
|
||||
assertNotParseable(raw);
|
||||
});
|
||||
|
||||
it('JC-18 comments-only file preserved', () => {
|
||||
const raw = '// just a comment\n/* and a block */\n';
|
||||
expect(rt(raw)).toBe(raw);
|
||||
// Comments-only files have no structural root — that's expected.
|
||||
expect(parseJsonc(raw).ast.root).toBeNull();
|
||||
});
|
||||
});
|
||||
132
src/oc-path/tests/scenarios/jsonc-resolver-edges.test.ts
Normal file
132
src/oc-path/tests/scenarios/jsonc-resolver-edges.test.ts
Normal file
@@ -0,0 +1,132 @@
|
||||
/**
|
||||
* Wave 17 — JSONC resolver adversarial edges.
|
||||
*
|
||||
* Substrate guarantee: the resolver walks the value tree deterministically
|
||||
* with mixed dotted / segment paths, returns null on any unresolvable
|
||||
* walk, and never throws on hostile inputs.
|
||||
*/
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { parseJsonc } from '../../jsonc/parse.js';
|
||||
import { resolveJsoncOcPath } from '../../jsonc/resolve.js';
|
||||
import { parseOcPath } from '../../oc-path.js';
|
||||
|
||||
function rs(raw: string, ocPath: string) {
|
||||
return resolveJsoncOcPath(parseJsonc(raw).ast, parseOcPath(ocPath));
|
||||
}
|
||||
|
||||
describe('wave-17 jsonc resolver edges', () => {
|
||||
it('JR-01 root resolves on empty object', () => {
|
||||
expect(rs('{}', 'oc://config')?.kind).toBe('root');
|
||||
});
|
||||
|
||||
it('JR-02 root resolves on scalar root', () => {
|
||||
expect(rs('42', 'oc://config')?.kind).toBe('root');
|
||||
});
|
||||
|
||||
it('JR-03 root resolves on array root', () => {
|
||||
expect(rs('[1,2,3]', 'oc://config')?.kind).toBe('root');
|
||||
});
|
||||
|
||||
it('JR-04 deep dotted descent within section', () => {
|
||||
const m = rs('{"a":{"b":{"c":1}}}', 'oc://config/a.b.c');
|
||||
expect(m?.kind).toBe('object-entry');
|
||||
});
|
||||
|
||||
it('JR-05 missing intermediate key returns null', () => {
|
||||
expect(rs('{"a":{"b":1}}', 'oc://config/a.x.b')).toBeNull();
|
||||
});
|
||||
|
||||
it('JR-06 numeric segment indexes into array', () => {
|
||||
const m = rs('{"items":["a","b","c"]}', 'oc://config/items.1');
|
||||
expect(m?.kind).toBe('value');
|
||||
if (m?.kind === 'value') {
|
||||
expect(m.node).toMatchObject({ kind: 'string', value: 'b' });
|
||||
}
|
||||
});
|
||||
|
||||
it('JR-07 negative array index resolves to Nth-from-last', () => {
|
||||
expect(rs('{"x":[1,2]}', 'oc://config/x.-1')).toMatchObject({ kind: 'value', node: { kind: 'number', value: 2 } });
|
||||
expect(rs('{"x":[1,2]}', 'oc://config/x.-2')).toMatchObject({ kind: 'value', node: { kind: 'number', value: 1 } });
|
||||
expect(rs('{"x":[1,2]}', 'oc://config/x.-5')).toBeNull();
|
||||
});
|
||||
|
||||
it('JR-08 out-of-bounds array index returns null', () => {
|
||||
expect(rs('{"x":[1,2]}', 'oc://config/x.99')).toBeNull();
|
||||
});
|
||||
|
||||
it('JR-09 non-integer index returns null (no NaN coercion)', () => {
|
||||
expect(rs('{"x":[1,2]}', 'oc://config/x.foo')).toBeNull();
|
||||
});
|
||||
|
||||
it('JR-10 null AST root returns null on any path', () => {
|
||||
expect(rs('', 'oc://config/x')).toBeNull();
|
||||
});
|
||||
|
||||
it('JR-11 descending past a primitive returns null', () => {
|
||||
expect(rs('{"x":42}', 'oc://config/x.y')).toBeNull();
|
||||
});
|
||||
|
||||
it('JR-12 empty segment in dotted path throws OcPathError', () => {
|
||||
// v1 invariant: malformed paths fail loud at parse time, not silently null.
|
||||
expect(() => rs('{"x":1}', 'oc://config/x..y')).toThrow(/Empty dotted sub-segment/);
|
||||
});
|
||||
|
||||
it('JR-13 string value at leaf surfaces via object-entry shape', () => {
|
||||
const m = rs('{"k":"v"}', 'oc://config/k');
|
||||
expect(m?.kind).toBe('object-entry');
|
||||
if (m?.kind === 'object-entry') {expect(m.node.key).toBe('k');}
|
||||
});
|
||||
|
||||
it('JR-14 boolean and null values resolve', () => {
|
||||
const m1 = rs('{"k":true}', 'oc://config/k');
|
||||
expect(m1?.kind).toBe('object-entry');
|
||||
const m2 = rs('{"k":null}', 'oc://config/k');
|
||||
expect(m2?.kind).toBe('object-entry');
|
||||
});
|
||||
|
||||
it('JR-15 mixed slash + dot segments resolve identically', () => {
|
||||
const a = rs('{"a":{"b":{"c":1}}}', 'oc://config/a.b.c');
|
||||
const b = rs('{"a":{"b":{"c":1}}}', 'oc://config/a/b.c');
|
||||
const c = rs('{"a":{"b":{"c":1}}}', 'oc://config/a/b/c');
|
||||
expect(a?.kind).toBe(b?.kind);
|
||||
expect(b?.kind).toBe(c?.kind);
|
||||
});
|
||||
|
||||
it('JR-16 keys with special characters resolve', () => {
|
||||
const m = rs('{"a-b_c":{"x":1}}', 'oc://config/a-b_c.x');
|
||||
expect(m?.kind).toBe('object-entry');
|
||||
});
|
||||
|
||||
it('JR-17 unicode keys resolve', () => {
|
||||
const m = rs('{"héllo":1}', 'oc://config/héllo');
|
||||
expect(m?.kind).toBe('object-entry');
|
||||
});
|
||||
|
||||
it('JR-18 large nested structure (depth 20) resolves to leaf', () => {
|
||||
let json = '"leaf"';
|
||||
const segs: string[] = [];
|
||||
for (let i = 19; i >= 0; i--) {
|
||||
json = `{"k${i}":${json}}`;
|
||||
segs.unshift(`k${i}`);
|
||||
}
|
||||
const m = rs(json, `oc://config/${segs.join('.')}`);
|
||||
expect(m?.kind).toBe('object-entry');
|
||||
if (m?.kind === 'object-entry') {
|
||||
expect(m.node.value).toMatchObject({ kind: 'string', value: 'leaf' });
|
||||
}
|
||||
});
|
||||
|
||||
it('JR-19 resolver is non-mutating across calls', () => {
|
||||
const { ast } = parseJsonc('{"x":{"y":1}}');
|
||||
const before = JSON.stringify(ast);
|
||||
rs('{"x":{"y":1}}', 'oc://config/x.y');
|
||||
rs('{"x":{"y":1}}', 'oc://config/x');
|
||||
rs('{"x":{"y":1}}', 'oc://config/missing');
|
||||
expect(JSON.stringify(ast)).toBe(before);
|
||||
});
|
||||
|
||||
it('JR-20 hostile input shapes do not throw', () => {
|
||||
expect(() => rs('{garbage}', 'oc://config/x')).not.toThrow();
|
||||
expect(() => rs('{"a":', 'oc://config/a')).not.toThrow();
|
||||
});
|
||||
});
|
||||
125
src/oc-path/tests/scenarios/jsonl-byte-fidelity.test.ts
Normal file
125
src/oc-path/tests/scenarios/jsonl-byte-fidelity.test.ts
Normal file
@@ -0,0 +1,125 @@
|
||||
/**
|
||||
* Wave 16 — JSONL byte-fidelity round-trip.
|
||||
*
|
||||
* Substrate guarantee: `emitJsonl(parseJsonl(raw)) === raw` for every
|
||||
* input the parser accepts. JSONL is line-oriented; blanks, malformed
|
||||
* lines, mixed line endings, trailing-newline shape — all byte-stable.
|
||||
*/
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { emitJsonl } from '../../jsonl/emit.js';
|
||||
import { parseJsonl } from '../../jsonl/parse.js';
|
||||
|
||||
function rt(raw: string): string {
|
||||
return emitJsonl(parseJsonl(raw).ast);
|
||||
}
|
||||
|
||||
describe('wave-16 jsonl byte-fidelity', () => {
|
||||
it('JL-01 empty file', () => {
|
||||
expect(rt('')).toBe('');
|
||||
});
|
||||
|
||||
it('JL-02 single line no trailing newline', () => {
|
||||
expect(rt('{"a":1}')).toBe('{"a":1}');
|
||||
});
|
||||
|
||||
it('JL-03 single line with trailing newline', () => {
|
||||
expect(rt('{"a":1}\n')).toBe('{"a":1}\n');
|
||||
});
|
||||
|
||||
it('JL-04 multiple lines preserved', () => {
|
||||
const raw = '{"a":1}\n{"b":2}\n{"c":3}\n';
|
||||
expect(rt(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('JL-05 blank line in the middle preserved', () => {
|
||||
const raw = '{"a":1}\n\n{"b":2}\n';
|
||||
expect(rt(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('JL-06 multiple blank lines preserved', () => {
|
||||
const raw = '{"a":1}\n\n\n{"b":2}\n';
|
||||
expect(rt(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('JL-07 malformed line round-trips verbatim', () => {
|
||||
const raw = '{"a":1}\nthis is not json\n{"b":2}\n';
|
||||
expect(rt(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('JL-08 entirely malformed file round-trips', () => {
|
||||
const raw = 'header\nbody\nfooter\n';
|
||||
expect(rt(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('JL-09 leading + trailing blanks preserved', () => {
|
||||
const raw = '\n\n{"a":1}\n\n';
|
||||
expect(rt(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('JL-10 file ending without final newline preserved', () => {
|
||||
const raw = '{"a":1}\n{"b":2}';
|
||||
expect(rt(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('JL-11 nested object lines preserved', () => {
|
||||
const raw = '{"a":{"b":{"c":1}}}\n{"x":[1,[2,[3]]]}\n';
|
||||
expect(rt(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('JL-12 unicode in a value line preserved', () => {
|
||||
const raw = '{"name":"héllo 世界 🎉"}\n';
|
||||
expect(rt(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('JL-13 idiosyncratic whitespace inside a line preserved', () => {
|
||||
const raw = '{ "a" : 1 }\n';
|
||||
expect(rt(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('JL-14 single blank line file preserved', () => {
|
||||
const raw = '\n';
|
||||
expect(rt(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('JL-15 large log (1000 lines) preserved', () => {
|
||||
const lines = Array.from({ length: 1000 }, (_, i) => `{"i":${i}}`);
|
||||
const raw = lines.join('\n') + '\n';
|
||||
expect(rt(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('JL-16 mixed value + malformed + blank preserved', () => {
|
||||
const raw =
|
||||
'{"a":1}\n{not json}\n\n{"b":2}\nstill not json\n{"c":3}\n';
|
||||
expect(rt(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
// F10 — CRLF preservation. Without lineEnding tracking on the AST,
|
||||
// a CRLF input edited via setJsonlOcPath rebuilds raw via render
|
||||
// which joins with `\n`, mixing endings on Windows-authored datasets.
|
||||
it('JL-17 CRLF input round-trips byte-identical via the default emit', () => {
|
||||
const raw = '{"a":1}\r\n{"b":2}\r\n{"c":3}\r\n';
|
||||
expect(rt(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('JL-18 CRLF input preserves CRLF after a structural edit (render mode)', () => {
|
||||
// Pin the render path: setJsonlOcPath rebuilds raw via render mode,
|
||||
// which now consults ast.lineEnding to reconstruct the original
|
||||
// convention. Without the fix, render-mode output uses `\n` and
|
||||
// produces mixed line endings on Windows datasets.
|
||||
const raw = '{"a":1}\r\n{"b":2}\r\n';
|
||||
const { ast } = parseJsonl(raw);
|
||||
const rendered = emitJsonl(ast, { mode: 'render' });
|
||||
expect(rendered).toBe('{"a":1}\r\n{"b":2}');
|
||||
// Pin no-LF-only joins by counting CRLFs vs bare LFs.
|
||||
expect((rendered.match(/\r\n/g) ?? []).length).toBe(1);
|
||||
expect((rendered.match(/(?<!\r)\n/g) ?? []).length).toBe(0);
|
||||
});
|
||||
|
||||
it('JL-19 LF input preserves LF after a structural edit (render mode)', () => {
|
||||
// Symmetric: a Unix-authored log doesn't mysteriously gain CRLF.
|
||||
const raw = '{"a":1}\n{"b":2}\n';
|
||||
const { ast } = parseJsonl(raw);
|
||||
const rendered = emitJsonl(ast, { mode: 'render' });
|
||||
expect(rendered).toBe('{"a":1}\n{"b":2}');
|
||||
});
|
||||
});
|
||||
125
src/oc-path/tests/scenarios/jsonl-resolver-edges.test.ts
Normal file
125
src/oc-path/tests/scenarios/jsonl-resolver-edges.test.ts
Normal file
@@ -0,0 +1,125 @@
|
||||
/**
|
||||
* Wave 18 — JSONL resolver adversarial edges.
|
||||
*
|
||||
* Substrate guarantee: line addresses (`Lnnn`, `$last`) walk
|
||||
* deterministically; missing addresses, blank-line targets, and
|
||||
* malformed-line targets all surface as null without throwing.
|
||||
*/
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { parseJsonl } from '../../jsonl/parse.js';
|
||||
import { resolveJsonlOcPath } from '../../jsonl/resolve.js';
|
||||
import { parseOcPath } from '../../oc-path.js';
|
||||
|
||||
function rs(raw: string, ocPath: string) {
|
||||
return resolveJsonlOcPath(parseJsonl(raw).ast, parseOcPath(ocPath));
|
||||
}
|
||||
|
||||
describe('wave-18 jsonl resolver edges', () => {
|
||||
it('JLR-01 root resolves with no segments', () => {
|
||||
expect(rs('{"a":1}\n', 'oc://log')?.kind).toBe('root');
|
||||
});
|
||||
|
||||
it('JLR-02 L1 resolves to a value line', () => {
|
||||
const m = rs('{"a":1}\n', 'oc://log/L1');
|
||||
expect(m?.kind).toBe('line');
|
||||
});
|
||||
|
||||
it('JLR-03 L99 unknown line returns null', () => {
|
||||
expect(rs('{"a":1}\n', 'oc://log/L99')).toBeNull();
|
||||
});
|
||||
|
||||
it('JLR-04 $last picks the most recent value line', () => {
|
||||
const m = rs('{"a":1}\n{"a":2}\n{"a":3}\n', 'oc://log/$last/a');
|
||||
expect(m?.kind).toBe('object-entry');
|
||||
if (m?.kind === 'object-entry') {
|
||||
expect(m.node.value).toMatchObject({ kind: 'number', value: 3 });
|
||||
}
|
||||
});
|
||||
|
||||
it('JLR-05 $last skips trailing blank lines', () => {
|
||||
const m = rs('{"a":1}\n\n\n', 'oc://log/$last/a');
|
||||
expect(m?.kind).toBe('object-entry');
|
||||
if (m?.kind === 'object-entry') {
|
||||
expect(m.node.value).toMatchObject({ kind: 'number', value: 1 });
|
||||
}
|
||||
});
|
||||
|
||||
it('JLR-06 $last skips trailing malformed lines', () => {
|
||||
const m = rs('{"a":1}\nbroken\n', 'oc://log/$last/a');
|
||||
expect(m?.kind).toBe('object-entry');
|
||||
});
|
||||
|
||||
it('JLR-07 $last on empty file returns null', () => {
|
||||
expect(rs('', 'oc://log/$last/x')).toBeNull();
|
||||
});
|
||||
|
||||
it('JLR-08 $last on all-blank file returns null', () => {
|
||||
expect(rs('\n\n\n', 'oc://log/$last/x')).toBeNull();
|
||||
});
|
||||
|
||||
it('JLR-09 $last on all-malformed file returns null', () => {
|
||||
expect(rs('a\nb\nc\n', 'oc://log/$last/x')).toBeNull();
|
||||
});
|
||||
|
||||
it('JLR-10 garbage line address returns null', () => {
|
||||
expect(rs('{"a":1}\n', 'oc://log/garbage')).toBeNull();
|
||||
expect(rs('{"a":1}\n', 'oc://log/L')).toBeNull();
|
||||
expect(rs('{"a":1}\n', 'oc://log/Labc')).toBeNull();
|
||||
});
|
||||
|
||||
it('JLR-11 descent into a blank line returns null', () => {
|
||||
expect(rs('{"a":1}\n\n{"b":2}\n', 'oc://log/L2/anything')).toBeNull();
|
||||
});
|
||||
|
||||
it('JLR-12 descent into a malformed line returns null', () => {
|
||||
expect(rs('{"a":1}\nbroken\n{"b":2}\n', 'oc://log/L2/anything')).toBeNull();
|
||||
});
|
||||
|
||||
it('JLR-13 missing field on a value line returns null', () => {
|
||||
expect(rs('{"a":1}\n', 'oc://log/L1/missing')).toBeNull();
|
||||
});
|
||||
|
||||
it('JLR-14 dotted descent through line value resolves', () => {
|
||||
const m = rs('{"r":{"ok":true,"d":"x"}}\n', 'oc://log/L1/r.d');
|
||||
expect(m?.kind).toBe('object-entry');
|
||||
if (m?.kind === 'object-entry') {
|
||||
expect(m.node.value).toMatchObject({ kind: 'string', value: 'x' });
|
||||
}
|
||||
});
|
||||
|
||||
it('JLR-15 array index inside a line resolves', () => {
|
||||
const m = rs('{"items":["a","b","c"]}\n', 'oc://log/L1/items.2');
|
||||
expect(m?.kind).toBe('value');
|
||||
if (m?.kind === 'value') {
|
||||
expect(m.node).toMatchObject({ kind: 'string', value: 'c' });
|
||||
}
|
||||
});
|
||||
|
||||
it('JLR-16 line numbers are 1-indexed', () => {
|
||||
const m = rs('{"a":1}\n{"a":2}\n', 'oc://log/L1/a');
|
||||
if (m?.kind === 'object-entry') {
|
||||
expect(m.node.value).toMatchObject({ kind: 'number', value: 1 });
|
||||
}
|
||||
});
|
||||
|
||||
it('JLR-17 line numbers preserved across blank/malformed entries', () => {
|
||||
const m = rs('{"a":1}\n\nbroken\n{"a":4}\n', 'oc://log/L4/a');
|
||||
expect(m?.kind).toBe('object-entry');
|
||||
if (m?.kind === 'object-entry') {
|
||||
expect(m.node.value).toMatchObject({ kind: 'number', value: 4 });
|
||||
}
|
||||
});
|
||||
|
||||
it('JLR-18 resolver is non-mutating', () => {
|
||||
const { ast } = parseJsonl('{"a":1}\n{"b":2}\n');
|
||||
const before = JSON.stringify(ast);
|
||||
rs('{"a":1}\n{"b":2}\n', 'oc://log/L1');
|
||||
rs('{"a":1}\n{"b":2}\n', 'oc://log/$last');
|
||||
expect(JSON.stringify(ast)).toBe(before);
|
||||
});
|
||||
|
||||
it('JLR-19 hostile inputs do not throw', () => {
|
||||
expect(() => rs('not json\n', 'oc://log/L1')).not.toThrow();
|
||||
expect(() => rs('', 'oc://log/$last')).not.toThrow();
|
||||
});
|
||||
});
|
||||
155
src/oc-path/tests/scenarios/malformed-input.test.ts
Normal file
155
src/oc-path/tests/scenarios/malformed-input.test.ts
Normal file
@@ -0,0 +1,155 @@
|
||||
/**
|
||||
* Wave 11 — malformed input recovery.
|
||||
*
|
||||
* Substrate guarantee: parser is **soft-error**: it never throws on
|
||||
* malformed input. Suspicious-but-recoverable inputs produce
|
||||
* diagnostics; unparseable structural pieces are dropped silently.
|
||||
*/
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { parseMd } from '../../parse.js';
|
||||
|
||||
describe('wave-11 malformed-input', () => {
|
||||
it('M-01 truncated mid-frontmatter (no close fence)', () => {
|
||||
const raw = '---\nname: github\n';
|
||||
const { ast, diagnostics } = parseMd(raw);
|
||||
expect(diagnostics.some((d) => d.code === 'OC_FRONTMATTER_UNCLOSED')).toBe(true);
|
||||
expect(ast.frontmatter).toEqual([]);
|
||||
});
|
||||
|
||||
it('M-02 truncated mid-section', () => {
|
||||
const raw = '## H\n- item\nmid-line';
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks.length).toBe(1);
|
||||
});
|
||||
|
||||
it('M-03 only `---` (single fence, no content)', () => {
|
||||
expect(() => parseMd('---\n')).not.toThrow();
|
||||
});
|
||||
|
||||
it('M-04 only `---\\n---`', () => {
|
||||
const { ast } = parseMd('---\n---');
|
||||
expect(ast.frontmatter).toEqual([]);
|
||||
});
|
||||
|
||||
it('M-05 binary-ish bytes (non-ASCII control chars)', () => {
|
||||
const raw = '## H\n\x00\x01\x02\n';
|
||||
expect(() => parseMd(raw)).not.toThrow();
|
||||
});
|
||||
|
||||
it('M-06 very long single line (10k chars)', () => {
|
||||
const raw = `## H\n${'x'.repeat(10_000)}\n`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks[0]?.heading).toBe('H');
|
||||
});
|
||||
|
||||
it('M-07 deeply repeated headings (1000 H2 blocks)', () => {
|
||||
const lines: string[] = [];
|
||||
for (let i = 0; i < 1000; i++) {
|
||||
lines.push(`## H${i}`);
|
||||
lines.push(`- item ${i}`);
|
||||
}
|
||||
const raw = lines.join('\n') + '\n';
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks.length).toBe(1000);
|
||||
});
|
||||
|
||||
it('M-08 bullet shape that isn\'t actually a bullet (`-not-a-bullet`)', () => {
|
||||
const { ast } = parseMd('## H\n-not-a-bullet\n- real\n');
|
||||
expect(ast.blocks[0]?.items.length).toBe(1);
|
||||
});
|
||||
|
||||
it('M-09 unclosed code fence', () => {
|
||||
const raw = '## H\n```\nbody\n';
|
||||
expect(() => parseMd(raw)).not.toThrow();
|
||||
});
|
||||
|
||||
it('M-10 mismatched fence (open with ``` close with ~~~)', () => {
|
||||
const raw = '## H\n```\nbody\n~~~\n';
|
||||
expect(() => parseMd(raw)).not.toThrow();
|
||||
});
|
||||
|
||||
it('M-11 nested fences (treated linearly, not nested)', () => {
|
||||
const raw = '## H\n```\n```\nstill-in-second\n```\n';
|
||||
expect(() => parseMd(raw)).not.toThrow();
|
||||
});
|
||||
|
||||
it('M-12 empty file', () => {
|
||||
const { ast, diagnostics } = parseMd('');
|
||||
expect(ast.raw).toBe('');
|
||||
expect(ast.frontmatter).toEqual([]);
|
||||
expect(ast.blocks).toEqual([]);
|
||||
expect(diagnostics).toEqual([]);
|
||||
});
|
||||
|
||||
it('M-13 single character file', () => {
|
||||
const { ast } = parseMd('x');
|
||||
expect(ast.preamble).toBe('x');
|
||||
expect(ast.blocks).toEqual([]);
|
||||
});
|
||||
|
||||
it('M-14 single newline file', () => {
|
||||
const { ast } = parseMd('\n');
|
||||
expect(ast.blocks).toEqual([]);
|
||||
});
|
||||
|
||||
it('M-15 file with mixed indentation extremes (tabs, spaces, mixed)', () => {
|
||||
const raw = '## H\n\t- tabbed\n - spaced\n\t - mixed\n';
|
||||
expect(() => parseMd(raw)).not.toThrow();
|
||||
});
|
||||
|
||||
it('M-16 frontmatter with frontmatter-shaped content inside (---)', () => {
|
||||
const raw = '---\nk: v\n---\n\n---\nshould not parse as second frontmatter\n---\n';
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.frontmatter.map((e) => e.key)).toEqual(['k']);
|
||||
// Second `---` block becomes part of preamble/body (it's not at file start).
|
||||
expect(ast.preamble).toContain('---');
|
||||
});
|
||||
|
||||
it('M-17 lines starting with `#` but not heading (raw `#` chars in body)', () => {
|
||||
const raw = '## H\n\n# This is text starting with #\n#### h4 not parsed as block\n';
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks.length).toBe(1);
|
||||
expect(ast.blocks[0]?.bodyText).toContain('# This is text');
|
||||
});
|
||||
|
||||
it('M-18 lines starting with multiple ## but malformed (####, ######)', () => {
|
||||
const { ast } = parseMd('## Real\n#### Not block\n###### Not block\n');
|
||||
expect(ast.blocks.length).toBe(1);
|
||||
expect(ast.blocks[0]?.heading).toBe('Real');
|
||||
});
|
||||
|
||||
it('M-19 file with just whitespace', () => {
|
||||
expect(() => parseMd(' \n\t\n \n')).not.toThrow();
|
||||
});
|
||||
|
||||
it('M-20 file with only BOM', () => {
|
||||
const { ast } = parseMd('');
|
||||
expect(ast.raw).toBe('');
|
||||
});
|
||||
|
||||
it('M-21 file mixing BOM + frontmatter + body + sections', () => {
|
||||
const raw = '---\nk: v\n---\n\nbody\n## Section\n- item\n';
|
||||
expect(() => parseMd(raw)).not.toThrow();
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.frontmatter[0]?.value).toBe('v');
|
||||
expect(ast.blocks[0]?.heading).toBe('Section');
|
||||
});
|
||||
|
||||
it('M-22 line endings: legacy CR-only (Mac classic)', () => {
|
||||
// Our regex /\r?\n/ doesn't split on CR-only. Treats whole as one line.
|
||||
const raw = 'line1\rline2\r## Heading\r';
|
||||
expect(() => parseMd(raw)).not.toThrow();
|
||||
});
|
||||
|
||||
it('M-23 100 KB file', () => {
|
||||
const lines: string[] = [];
|
||||
for (let i = 0; i < 1000; i++) {
|
||||
lines.push('## H' + i);
|
||||
for (let j = 0; j < 5; j++) {
|
||||
lines.push(`- item-${i}-${j}: value with some text content here`);
|
||||
}
|
||||
}
|
||||
const raw = lines.join('\n');
|
||||
expect(() => parseMd(raw)).not.toThrow();
|
||||
});
|
||||
});
|
||||
252
src/oc-path/tests/scenarios/oc-path-parse-edges.test.ts
Normal file
252
src/oc-path/tests/scenarios/oc-path-parse-edges.test.ts
Normal file
@@ -0,0 +1,252 @@
|
||||
/**
|
||||
* Wave 7 — OcPath parsing edges.
|
||||
*
|
||||
* Substrate guarantee: `parseOcPath(s)` is a pure function. Valid input
|
||||
* round-trips via `formatOcPath`; invalid input throws `OcPathError`
|
||||
* with a stable `code`.
|
||||
*/
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
OcPathError,
|
||||
formatOcPath,
|
||||
getPathLayout,
|
||||
isPattern,
|
||||
isValidOcPath,
|
||||
parseOcPath,
|
||||
} from '../../oc-path.js';
|
||||
|
||||
function expectErr(fn: () => unknown, code: string): void {
|
||||
try {
|
||||
fn();
|
||||
expect.fail(`expected OcPathError code ${code}`);
|
||||
} catch (err) {
|
||||
expect(err).toBeInstanceOf(OcPathError);
|
||||
expect((err as OcPathError).code).toBe(code);
|
||||
}
|
||||
}
|
||||
|
||||
describe('wave-07 oc-path-parse-edges', () => {
|
||||
it('OP-01 file-only', () => {
|
||||
expect(parseOcPath('oc://SOUL.md')).toEqual({ file: 'SOUL.md' });
|
||||
});
|
||||
|
||||
it('OP-02 file + section', () => {
|
||||
expect(parseOcPath('oc://SOUL.md/Boundaries').section).toBe('Boundaries');
|
||||
});
|
||||
|
||||
it('OP-03 file + section + item', () => {
|
||||
expect(parseOcPath('oc://SOUL.md/Boundaries/deny-rule-1').item).toBe('deny-rule-1');
|
||||
});
|
||||
|
||||
it('OP-04 file + section + item + field', () => {
|
||||
expect(parseOcPath('oc://SOUL.md/B/deny-1/risk').field).toBe('risk');
|
||||
});
|
||||
|
||||
it('OP-05 session query parameter', () => {
|
||||
expect(parseOcPath('oc://X.md?session=daily').session).toBe('daily');
|
||||
});
|
||||
|
||||
it('OP-06 session with full path', () => {
|
||||
const p = parseOcPath('oc://X.md/sec/item/field?session=cron');
|
||||
expect(p).toEqual({
|
||||
file: 'X.md',
|
||||
section: 'sec',
|
||||
item: 'item',
|
||||
field: 'field',
|
||||
session: 'cron',
|
||||
});
|
||||
});
|
||||
|
||||
it('OP-07 unknown query parameters silently ignored', () => {
|
||||
const p = parseOcPath('oc://X.md?foo=bar&session=s&baz=qux');
|
||||
expect(p.session).toBe('s');
|
||||
});
|
||||
|
||||
it('OP-08 session= with empty value drops session', () => {
|
||||
const p = parseOcPath('oc://X.md?session=');
|
||||
expect(p.session).toBeUndefined();
|
||||
});
|
||||
|
||||
it('OP-09 query without `=` ignored', () => {
|
||||
const p = parseOcPath('oc://X.md?nokeyhere');
|
||||
expect(p.session).toBeUndefined();
|
||||
});
|
||||
|
||||
it('OP-10 missing scheme throws', () => {
|
||||
expectErr(() => parseOcPath('SOUL.md'), 'OC_PATH_MISSING_SCHEME');
|
||||
});
|
||||
|
||||
it('OP-11 wrong scheme throws', () => {
|
||||
expectErr(() => parseOcPath('https://x.com'), 'OC_PATH_MISSING_SCHEME');
|
||||
});
|
||||
|
||||
it('OP-12 empty after scheme throws', () => {
|
||||
expectErr(() => parseOcPath('oc://'), 'OC_PATH_EMPTY');
|
||||
});
|
||||
|
||||
it('OP-13 empty segment throws', () => {
|
||||
expectErr(() => parseOcPath('oc://X.md//item'), 'OC_PATH_EMPTY_SEGMENT');
|
||||
});
|
||||
|
||||
it('OP-14 too-deep nesting throws', () => {
|
||||
expectErr(() => parseOcPath('oc://X.md/a/b/c/d/e'), 'OC_PATH_TOO_DEEP');
|
||||
});
|
||||
|
||||
it('OP-15 non-string throws', () => {
|
||||
expectErr(() => parseOcPath(42 as unknown as string), 'OC_PATH_NOT_STRING');
|
||||
});
|
||||
|
||||
it('OP-16 round-trip canonical forms', () => {
|
||||
const cases = [
|
||||
'oc://SOUL.md',
|
||||
'oc://SOUL.md/Boundaries',
|
||||
'oc://SOUL.md/Boundaries/deny-rule-1',
|
||||
'oc://SOUL.md/Boundaries/deny-rule-1/risk',
|
||||
'oc://SOUL.md?session=daily',
|
||||
'oc://X.md/a/b/c?session=s',
|
||||
'oc://skills/email-drafter/[frontmatter]/name',
|
||||
'oc://config/plugins.entries.foo.token',
|
||||
];
|
||||
for (const c of cases) {
|
||||
expect(formatOcPath(parseOcPath(c)), `round-trip failed for ${c}`).toBe(c);
|
||||
}
|
||||
});
|
||||
|
||||
it('OP-17 isValidOcPath true positives', () => {
|
||||
expect(isValidOcPath('oc://X.md')).toBe(true);
|
||||
expect(isValidOcPath('oc://X.md/sec/item/field')).toBe(true);
|
||||
});
|
||||
|
||||
it('OP-18 isValidOcPath true negatives', () => {
|
||||
expect(isValidOcPath('')).toBe(false);
|
||||
expect(isValidOcPath('X.md')).toBe(false);
|
||||
expect(isValidOcPath('oc://')).toBe(false);
|
||||
expect(isValidOcPath('oc://x//y')).toBe(false);
|
||||
expect(isValidOcPath(null)).toBe(false);
|
||||
expect(isValidOcPath({})).toBe(false);
|
||||
});
|
||||
|
||||
it('OP-19 file segment with special chars (file with dots/slashes)', () => {
|
||||
const p = parseOcPath('oc://config/plugins.entries.foo.token');
|
||||
expect(p.file).toBe('config');
|
||||
expect(p.section).toBe('plugins.entries.foo.token');
|
||||
});
|
||||
|
||||
it('OP-20 section segment with hyphens / underscores / numbers', () => {
|
||||
const p = parseOcPath('oc://X.md/Multi-Tenant_Section_2');
|
||||
expect(p.section).toBe('Multi-Tenant_Section_2');
|
||||
});
|
||||
|
||||
it('OP-21 [frontmatter] sentinel is just a section name', () => {
|
||||
const p = parseOcPath('oc://X.md/[frontmatter]/name');
|
||||
expect(p.section).toBe('[frontmatter]');
|
||||
expect(p.item).toBe('name');
|
||||
});
|
||||
|
||||
it('OP-22 formatOcPath rejects empty file', () => {
|
||||
expectErr(() => formatOcPath({ file: '' }), 'OC_PATH_FILE_REQUIRED');
|
||||
});
|
||||
|
||||
it('OP-23 formatOcPath rejects item without section', () => {
|
||||
expectErr(() => formatOcPath({ file: 'X.md', item: 'i' }), 'OC_PATH_NESTING');
|
||||
});
|
||||
|
||||
it('OP-24 formatOcPath quotes raw slot values containing special chars', () => {
|
||||
// Closes ClawSweeper P2 on PR #78678: `formatOcPath` previously
|
||||
// concatenated raw slot values, so a programmatically-constructed
|
||||
// path with a `/` in the section/item slot would emit extra
|
||||
// segments and fail to parse back to the same address.
|
||||
// Use a slot value with `/` (and no internal `.`) — `.` inside
|
||||
// a slot is the dotted sub-segment delimiter; callers wanting a
|
||||
// literal `.` in a key should pre-quote that single sub-segment.
|
||||
const constructed = formatOcPath({
|
||||
file: 'config.jsonc',
|
||||
section: 'agents.defaults.models',
|
||||
item: 'github-copilot/claude-opus-4-7',
|
||||
field: 'alias',
|
||||
});
|
||||
expect(constructed).toBe(
|
||||
'oc://config.jsonc/agents.defaults.models/"github-copilot/claude-opus-4-7"/alias',
|
||||
);
|
||||
const parsed = parseOcPath(constructed);
|
||||
expect(parsed.item).toBe('"github-copilot/claude-opus-4-7"');
|
||||
});
|
||||
|
||||
it('OP-25 parseOcPath finds query separator outside quoted keys', () => {
|
||||
// Closes ClawSweeper P2 on PR #78678: `parseOcPath` previously
|
||||
// used `indexOf('?')` which split a key like `"foo?bar"` at the
|
||||
// embedded `?`, breaking advertised quoted-segment support.
|
||||
const parsed = parseOcPath('oc://config.jsonc/"foo?bar"?session=daily');
|
||||
expect(parsed.section).toBe('"foo?bar"');
|
||||
expect(parsed.session).toBe('daily');
|
||||
});
|
||||
|
||||
it('OP-26 file slot with `/` round-trips via quoting', () => {
|
||||
// Closes ClawSweeper P2 on PR #78678 (round 4): `parseOcPath` stored
|
||||
// `path.file` verbatim while `formatOcPath` prefixed it without
|
||||
// quote-wrapping, so a file like `skills/email-drafter` couldn't
|
||||
// round-trip — formatter output got re-parsed as file plus section,
|
||||
// and quoted input leaked the surrounding quotes into filesystem
|
||||
// resolution.
|
||||
const constructed = formatOcPath({
|
||||
file: 'skills/email-drafter',
|
||||
section: 'Tools',
|
||||
item: '-1',
|
||||
});
|
||||
expect(constructed).toBe('oc://"skills/email-drafter"/Tools/-1');
|
||||
const parsed = parseOcPath(constructed);
|
||||
expect(parsed.file).toBe('skills/email-drafter');
|
||||
expect(parsed.section).toBe('Tools');
|
||||
expect(parsed.item).toBe('-1');
|
||||
});
|
||||
|
||||
it('OP-27 file slot with dot extension does NOT get quoted', () => {
|
||||
// The file slot's quoting trigger excludes `.` because filename
|
||||
// extensions (`AGENTS.md`, `gateway.jsonc`) are normal — quoting
|
||||
// them would make canonical form ugly without need.
|
||||
expect(formatOcPath({ file: 'AGENTS.md' })).toBe('oc://AGENTS.md');
|
||||
expect(formatOcPath({ file: 'gateway.jsonc', section: 'version' })).toBe(
|
||||
'oc://gateway.jsonc/version',
|
||||
);
|
||||
});
|
||||
|
||||
it('OP-28 formatOcPath rejects field without item or section', () => {
|
||||
// Closes Galin P2 (round 8): the nesting guard caught
|
||||
// `field + section + no item` but missed `field + no section + no item`.
|
||||
// Such a struct emits `oc://FILE/FIELD` which silently re-parses as
|
||||
// `{ file, section: FIELD }` — different shape, breaking round-trip.
|
||||
expect(() => formatOcPath({ file: 'X', field: 'name' })).toThrow(OcPathError);
|
||||
try {
|
||||
formatOcPath({ file: 'X', field: 'name' });
|
||||
} catch (err) {
|
||||
expect(err).toBeInstanceOf(OcPathError);
|
||||
expect((err as OcPathError).code).toBe('OC_PATH_NESTING');
|
||||
}
|
||||
});
|
||||
|
||||
it('OP-29 isPattern is quote-aware (literal `*` inside quoted segment)', () => {
|
||||
// Closes Galin P2 (round 8): `isPattern` previously used
|
||||
// `slot.split('.')` which shredded a quoted key like `"items.*.glob"`
|
||||
// and falsely detected the literal `*` as a wildcard, causing
|
||||
// single-match verbs to reject a concrete path.
|
||||
const concrete = parseOcPath('oc://config.jsonc/"items.*.glob"');
|
||||
expect(isPattern(concrete)).toBe(false);
|
||||
|
||||
// Sanity: an unquoted `*` IS still a wildcard.
|
||||
const wildcard = parseOcPath('oc://config.jsonc/items/*');
|
||||
expect(isPattern(wildcard)).toBe(true);
|
||||
});
|
||||
|
||||
it('OP-30 getPathLayout is quote-aware', () => {
|
||||
// Closes Galin P2 (round 8): `getPathLayout` used `slot.split('.')`
|
||||
// for all three slots, breaking the find-walker / repackPath layout
|
||||
// contract for quoted segments containing `.`.
|
||||
const path = parseOcPath('oc://config.jsonc/"github.com"/repos');
|
||||
const layout = getPathLayout(path);
|
||||
// Quoted segment is one sub-segment, not two.
|
||||
expect(layout.sectionLen).toBe(1);
|
||||
expect(layout.subs[0]).toBe('"github.com"');
|
||||
expect(layout.itemLen).toBe(1);
|
||||
expect(layout.subs[1]).toBe('repos');
|
||||
});
|
||||
});
|
||||
235
src/oc-path/tests/scenarios/oc-path-resolver-edges.test.ts
Normal file
235
src/oc-path/tests/scenarios/oc-path-resolver-edges.test.ts
Normal file
@@ -0,0 +1,235 @@
|
||||
/**
|
||||
* Wave 8 — OcPath resolver edges.
|
||||
*
|
||||
* Substrate guarantee: `resolveOcPath(ast, ocPath)` returns the matched
|
||||
* node or `null`. Slug matching is case-insensitive. Field on non-kv
|
||||
* item returns `null` (not a guess). Frontmatter via the `[frontmatter]`
|
||||
* sentinel section.
|
||||
*/
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { parseMd } from '../../parse.js';
|
||||
import { resolveMdOcPath as resolveOcPath } from '../../resolve.js';
|
||||
|
||||
const SAMPLE = `---
|
||||
name: github
|
||||
description: gh CLI
|
||||
url: https://example.com
|
||||
---
|
||||
|
||||
Preamble prose.
|
||||
|
||||
## Boundaries
|
||||
|
||||
- never write to /etc
|
||||
- always confirm before deleting
|
||||
|
||||
## Tools
|
||||
|
||||
- gh: GitHub CLI
|
||||
- curl: HTTP client
|
||||
- The Tool: with caps and spaces
|
||||
|
||||
## Multi-Word Section
|
||||
|
||||
- item one
|
||||
`;
|
||||
|
||||
describe('wave-08 oc-path-resolver-edges', () => {
|
||||
const { ast } = parseMd(SAMPLE);
|
||||
|
||||
it('R-01 root resolves to AST', () => {
|
||||
const m = resolveOcPath(ast, { file: 'X.md' });
|
||||
expect(m?.kind).toBe('root');
|
||||
});
|
||||
|
||||
it('R-02 block by exact slug', () => {
|
||||
const m = resolveOcPath(ast, { file: 'X.md', section: 'boundaries' });
|
||||
expect(m?.kind).toBe('block');
|
||||
});
|
||||
|
||||
it('R-03 block by case-mismatched slug (Boundaries → boundaries)', () => {
|
||||
const m = resolveOcPath(ast, { file: 'X.md', section: 'Boundaries' });
|
||||
expect(m?.kind).toBe('block');
|
||||
});
|
||||
|
||||
it('R-04 block by uppercased slug', () => {
|
||||
const m = resolveOcPath(ast, { file: 'X.md', section: 'BOUNDARIES' });
|
||||
expect(m?.kind).toBe('block');
|
||||
});
|
||||
|
||||
it('R-05 multi-word section by slug', () => {
|
||||
const m = resolveOcPath(ast, { file: 'X.md', section: 'multi-word-section' });
|
||||
expect(m?.kind).toBe('block');
|
||||
if (m?.kind === 'block') {expect(m.node.heading).toBe('Multi-Word Section');}
|
||||
});
|
||||
|
||||
it('R-06 multi-word section by exact heading text (case-folded)', () => {
|
||||
const m = resolveOcPath(ast, { file: 'X.md', section: 'Multi-Word Section' });
|
||||
// The OcPath section is matched case-insensitively against block.slug.
|
||||
// Block.slug for "Multi-Word Section" is "multi-word-section", and
|
||||
// path.section.toLowerCase() = "multi-word section" which does NOT
|
||||
// match "multi-word-section". Documented limit — callers must
|
||||
// pass slug form, not heading text. This is intentional.
|
||||
expect(m).toBeNull();
|
||||
});
|
||||
|
||||
it('R-07 unknown section returns null', () => {
|
||||
const m = resolveOcPath(ast, { file: 'X.md', section: 'unknown' });
|
||||
expect(m).toBeNull();
|
||||
});
|
||||
|
||||
it('R-08 item by slug under known section', () => {
|
||||
const m = resolveOcPath(ast, {
|
||||
file: 'X.md',
|
||||
section: 'tools',
|
||||
item: 'gh',
|
||||
});
|
||||
expect(m?.kind).toBe('item');
|
||||
});
|
||||
|
||||
it('R-09 item slug for KV uses kv.key (gh, not "gh-github-cli")', () => {
|
||||
const m = resolveOcPath(ast, {
|
||||
file: 'X.md',
|
||||
section: 'tools',
|
||||
item: 'gh',
|
||||
});
|
||||
expect(m).not.toBeNull();
|
||||
if (m?.kind === 'item') {expect(m.node.kv?.value).toBe('GitHub CLI');}
|
||||
});
|
||||
|
||||
it('R-10 item slug for plain bullet uses text', () => {
|
||||
const m = resolveOcPath(ast, {
|
||||
file: 'X.md',
|
||||
section: 'boundaries',
|
||||
item: 'never-write-to-etc',
|
||||
});
|
||||
expect(m?.kind).toBe('item');
|
||||
});
|
||||
|
||||
it('R-11 item slug case-insensitive', () => {
|
||||
const m = resolveOcPath(ast, {
|
||||
file: 'X.md',
|
||||
section: 'tools',
|
||||
item: 'GH',
|
||||
});
|
||||
expect(m?.kind).toBe('item');
|
||||
});
|
||||
|
||||
it('R-12 item with spaces in key (slugified)', () => {
|
||||
const m = resolveOcPath(ast, {
|
||||
file: 'X.md',
|
||||
section: 'tools',
|
||||
item: 'the-tool',
|
||||
});
|
||||
expect(m?.kind).toBe('item');
|
||||
if (m?.kind === 'item') {expect(m.node.kv?.value).toBe('with caps and spaces');}
|
||||
});
|
||||
|
||||
it('R-13 unknown item returns null', () => {
|
||||
const m = resolveOcPath(ast, {
|
||||
file: 'X.md',
|
||||
section: 'tools',
|
||||
item: 'nonexistent',
|
||||
});
|
||||
expect(m).toBeNull();
|
||||
});
|
||||
|
||||
it('R-14 item-field matches kv.key (case-insensitive)', () => {
|
||||
const m = resolveOcPath(ast, {
|
||||
file: 'X.md',
|
||||
section: 'tools',
|
||||
item: 'gh',
|
||||
field: 'gh',
|
||||
});
|
||||
expect(m?.kind).toBe('item-field');
|
||||
});
|
||||
|
||||
it('R-15 field on plain (non-kv) item returns null', () => {
|
||||
const m = resolveOcPath(ast, {
|
||||
file: 'X.md',
|
||||
section: 'boundaries',
|
||||
item: 'never-write-to-etc',
|
||||
field: 'risk',
|
||||
});
|
||||
expect(m).toBeNull();
|
||||
});
|
||||
|
||||
it('R-16 field that does not match kv.key returns null', () => {
|
||||
const m = resolveOcPath(ast, {
|
||||
file: 'X.md',
|
||||
section: 'tools',
|
||||
item: 'gh',
|
||||
field: 'nonexistent',
|
||||
});
|
||||
expect(m).toBeNull();
|
||||
});
|
||||
|
||||
it('R-17 frontmatter via [frontmatter] sentinel section', () => {
|
||||
const m = resolveOcPath(ast, {
|
||||
file: 'X.md',
|
||||
section: '[frontmatter]',
|
||||
field: 'name',
|
||||
});
|
||||
expect(m?.kind).toBe('frontmatter');
|
||||
if (m?.kind === 'frontmatter') {expect(m.node.value).toBe('github');}
|
||||
});
|
||||
|
||||
it('R-18 frontmatter unknown key returns null', () => {
|
||||
const m = resolveOcPath(ast, {
|
||||
file: 'X.md',
|
||||
section: '[frontmatter]',
|
||||
field: 'nonexistent',
|
||||
});
|
||||
expect(m).toBeNull();
|
||||
});
|
||||
|
||||
it('R-19 frontmatter without field returns null', () => {
|
||||
const m = resolveOcPath(ast, {
|
||||
file: 'X.md',
|
||||
section: '[frontmatter]',
|
||||
});
|
||||
expect(m).toBeNull();
|
||||
});
|
||||
|
||||
it('R-20 multiple frontmatter keys with same name — first match wins', () => {
|
||||
// Build an AST manually to test
|
||||
const dupeAst = {
|
||||
kind: 'md' as const,
|
||||
raw: '',
|
||||
frontmatter: [
|
||||
{ key: 'k', value: 'first', line: 2 },
|
||||
{ key: 'k', value: 'second', line: 3 },
|
||||
],
|
||||
preamble: '',
|
||||
blocks: [],
|
||||
};
|
||||
const m = resolveOcPath(dupeAst, {
|
||||
file: 'X.md',
|
||||
section: '[frontmatter]',
|
||||
field: 'k',
|
||||
});
|
||||
expect(m?.kind).toBe('frontmatter');
|
||||
if (m?.kind === 'frontmatter') {expect(m.node.value).toBe('first');}
|
||||
});
|
||||
|
||||
it('R-21 empty AST resolves root only', () => {
|
||||
const empty = { kind: 'md' as const, raw: '', frontmatter: [], preamble: '', blocks: [] };
|
||||
expect(resolveOcPath(empty, { file: 'X.md' })?.kind).toBe('root');
|
||||
expect(resolveOcPath(empty, { file: 'X.md', section: 'any' })).toBeNull();
|
||||
});
|
||||
|
||||
it('R-22 resolver does not mutate the AST', () => {
|
||||
const before = JSON.stringify(ast);
|
||||
resolveOcPath(ast, { file: 'X.md', section: 'tools', item: 'gh', field: 'gh' });
|
||||
const after = JSON.stringify(ast);
|
||||
expect(after).toBe(before);
|
||||
});
|
||||
|
||||
it('R-23 file segment is informational — resolver doesn\'t check it', () => {
|
||||
// The file name in OcPath is metadata; resolver assumes the AST
|
||||
// matches. Callers verify file mapping before passing the AST.
|
||||
const m1 = resolveOcPath(ast, { file: 'SOUL.md', section: 'tools' });
|
||||
const m2 = resolveOcPath(ast, { file: 'AGENTS.md', section: 'tools' });
|
||||
expect(m1?.kind).toBe(m2?.kind);
|
||||
});
|
||||
});
|
||||
127
src/oc-path/tests/scenarios/perf-determinism.test.ts
Normal file
127
src/oc-path/tests/scenarios/perf-determinism.test.ts
Normal file
@@ -0,0 +1,127 @@
|
||||
/**
|
||||
* Wave 14 — performance + determinism + immutability.
|
||||
*
|
||||
* Substrate guarantees:
|
||||
* - Parsing scales sub-linearly with file size (no quadratic blowup)
|
||||
* - Same input produces same AST (no Object.keys / Set order surprises)
|
||||
* - Resolver does not mutate the AST
|
||||
* - AST is structurally cloneable (no functions, no cycles)
|
||||
*/
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { emitMd } from '../../emit.js';
|
||||
import { parseMd } from '../../parse.js';
|
||||
import { resolveMdOcPath as resolveOcPath } from '../../resolve.js';
|
||||
|
||||
describe('wave-14 perf + determinism', () => {
|
||||
it('PD-01 parses 100 KB file in under 200 ms', () => {
|
||||
const lines: string[] = [];
|
||||
for (let i = 0; i < 1000; i++) {
|
||||
lines.push('## H' + i);
|
||||
for (let j = 0; j < 5; j++) {
|
||||
lines.push(`- key${i}-${j}: value with content`);
|
||||
}
|
||||
}
|
||||
const raw = lines.join('\n');
|
||||
const start = performance.now();
|
||||
parseMd(raw);
|
||||
const elapsed = performance.now() - start;
|
||||
expect(elapsed).toBeLessThan(200);
|
||||
});
|
||||
|
||||
it('PD-02 parses 1000 small files in under 500 ms', () => {
|
||||
const raw = `## H\n- a\n- b: c\n## I\n- d\n`;
|
||||
const start = performance.now();
|
||||
for (let i = 0; i < 1000; i++) {
|
||||
parseMd(raw);
|
||||
}
|
||||
const elapsed = performance.now() - start;
|
||||
expect(elapsed).toBeLessThan(500);
|
||||
});
|
||||
|
||||
it('PD-03 100k OcPath resolutions on parsed AST in under 500 ms', () => {
|
||||
const raw = `## A\n- a1\n- a2\n## B\n- b1\n- b2\n## C\n- c1: cv\n`;
|
||||
const { ast } = parseMd(raw);
|
||||
const path = { file: 'X.md', section: 'b', item: 'b1' };
|
||||
const start = performance.now();
|
||||
for (let i = 0; i < 100_000; i++) {
|
||||
resolveOcPath(ast, path);
|
||||
}
|
||||
const elapsed = performance.now() - start;
|
||||
expect(elapsed).toBeLessThan(500);
|
||||
});
|
||||
|
||||
it('PD-04 same input → byte-identical AST.raw across runs', () => {
|
||||
const raw = `---\nb: 2\na: 1\n---\n## Z\n- z\n## A\n- a\n`;
|
||||
const a1 = parseMd(raw).ast;
|
||||
const a2 = parseMd(raw).ast;
|
||||
expect(a1.raw).toBe(a2.raw);
|
||||
expect(a1.frontmatter).toEqual(a2.frontmatter);
|
||||
expect(a1.blocks).toEqual(a2.blocks);
|
||||
});
|
||||
|
||||
it('PD-05 resolveOcPath is non-mutating', () => {
|
||||
const raw = `## A\n- a: x\n## B\n- b\n`;
|
||||
const { ast } = parseMd(raw);
|
||||
const before = JSON.stringify(ast);
|
||||
resolveOcPath(ast, { file: 'X.md', section: 'a', item: 'a', field: 'a' });
|
||||
resolveOcPath(ast, { file: 'X.md', section: 'b' });
|
||||
resolveOcPath(ast, { file: 'X.md', section: 'unknown' });
|
||||
expect(JSON.stringify(ast)).toBe(before);
|
||||
});
|
||||
|
||||
it('PD-06 AST is JSON-serializable (no functions, no cycles)', () => {
|
||||
const raw = `---\nk: v\n---\n## A\n- a\n\`\`\`ts\nx\n\`\`\`\n| h |\n| - |\n| 1 |\n`;
|
||||
const { ast } = parseMd(raw);
|
||||
const serialized = JSON.stringify(ast);
|
||||
const parsed = JSON.parse(serialized);
|
||||
expect(parsed.raw).toBe(ast.raw);
|
||||
expect(parsed.blocks.length).toBe(ast.blocks.length);
|
||||
});
|
||||
|
||||
it('PD-07 emit is non-mutating', () => {
|
||||
const raw = `## A\n- a\n`;
|
||||
const { ast } = parseMd(raw);
|
||||
const before = JSON.stringify(ast);
|
||||
emitMd(ast);
|
||||
emitMd(ast);
|
||||
emitMd(ast);
|
||||
expect(JSON.stringify(ast)).toBe(before);
|
||||
});
|
||||
|
||||
it('PD-08 frontmatter ordering is preserved (insertion order, not alphabetical)', () => {
|
||||
const raw = `---\nz: 1\nm: 2\na: 3\n---\n`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.frontmatter.map((e) => e.key)).toEqual(['z', 'm', 'a']);
|
||||
});
|
||||
|
||||
it('PD-09 block ordering is document order, not alphabetical', () => {
|
||||
const raw = `## Z\n## A\n## M\n`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks.map((b) => b.heading)).toEqual(['Z', 'A', 'M']);
|
||||
});
|
||||
|
||||
it('PD-10 item ordering within block is document order', () => {
|
||||
const raw = `## H\n- z\n- a\n- m\n`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(['z', 'a', 'm']);
|
||||
});
|
||||
|
||||
it('PD-11 large fixture round-trip stays under 100 ms', () => {
|
||||
const lines: string[] = [];
|
||||
for (let i = 0; i < 500; i++) {
|
||||
lines.push(`## Section ${i}`);
|
||||
lines.push('');
|
||||
for (let j = 0; j < 10; j++) {
|
||||
lines.push(`- item-${i}-${j}: with some prose value content here`);
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
const raw = lines.join('\n');
|
||||
const start = performance.now();
|
||||
const { ast } = parseMd(raw);
|
||||
const out = emitMd(ast);
|
||||
const elapsed = performance.now() - start;
|
||||
expect(out).toBe(raw);
|
||||
expect(elapsed).toBeLessThan(100);
|
||||
});
|
||||
});
|
||||
624
src/oc-path/tests/scenarios/pitfalls.test.ts
Normal file
624
src/oc-path/tests/scenarios/pitfalls.test.ts
Normal file
@@ -0,0 +1,624 @@
|
||||
/**
|
||||
* Wave-23 — Pitfall scenarios.
|
||||
*
|
||||
* One test per pitfall ID enumerated in
|
||||
* `packages/oc-paths-substrate/PITFALLS.md` (the substrate-local
|
||||
* pitfall taxonomy). Tests are grouped by category so a regression in
|
||||
* any one defense is visible at a glance. Every MITIGATED / REJECTED
|
||||
* pitfall has a positive validation here; DEFERRED ones are covered
|
||||
* as documented limits with a `.skip` note.
|
||||
*
|
||||
* **Namespace note**: substrate pitfall IDs (P-001 … P-040) are a
|
||||
* separate namespace from the claws-side `docs/PITFALLS.md`
|
||||
* governance taxonomy (which uses P-NNN for completely different
|
||||
* pitfalls — e.g., P-033 there is "Memory poisoning"). The package
|
||||
* boundary disambiguates.
|
||||
*/
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
MAX_PATH_LENGTH,
|
||||
MAX_TRAVERSAL_DEPTH,
|
||||
OcPathError,
|
||||
findOcPaths,
|
||||
formatOcPath,
|
||||
parseOcPath,
|
||||
resolveOcPath,
|
||||
setOcPath,
|
||||
} from '../../index.js';
|
||||
import { parseJsonc } from '../../jsonc/parse.js';
|
||||
import { parseJsonl } from '../../jsonl/parse.js';
|
||||
import { parseYaml } from '../../yaml/parse.js';
|
||||
|
||||
// ---------- Encoding pitfalls --------------------------------------------
|
||||
|
||||
describe('wave-23 pitfalls — encoding', () => {
|
||||
it('P-001 strips leading UTF-8 BOM from path string', () => {
|
||||
const bom = '';
|
||||
expect(parseOcPath(`${bom}oc://X/Y`).file).toBe('X');
|
||||
});
|
||||
|
||||
it('P-002 normalizes path to NFC', () => {
|
||||
const nfc = 'café'; // composed
|
||||
const nfd = 'café'; // decomposed
|
||||
expect(parseOcPath(`oc://X/${nfd}`).section).toBe(nfc);
|
||||
expect(parseOcPath(`oc://X/${nfc}`).section).toBe(nfc);
|
||||
// Same struct out for both inputs.
|
||||
expect(parseOcPath(`oc://X/${nfd}`)).toEqual(parseOcPath(`oc://X/${nfc}`));
|
||||
});
|
||||
|
||||
it('P-003 rejects whitespace in identifier-shaped segments', () => {
|
||||
expect(() => parseOcPath('oc://X/foo /bar')).toThrow(OcPathError);
|
||||
expect(() => parseOcPath('oc://X/ foo')).toThrow(OcPathError);
|
||||
expect(() => parseOcPath('oc://X/foo\tbar')).toThrow(OcPathError);
|
||||
});
|
||||
|
||||
it('P-003 allows whitespace inside predicate values (content)', () => {
|
||||
// Spaces inside a predicate value are legitimate — they're filtering
|
||||
// against actual content.
|
||||
expect(() => parseOcPath('oc://X/[name=hello world]')).not.toThrow();
|
||||
});
|
||||
|
||||
it('P-004 / P-011 rejects control characters and null bytes', () => {
|
||||
expect(() => parseOcPath('oc://X/\x00')).toThrow(/Control character/);
|
||||
expect(() => parseOcPath('oc://X/foo\x01bar')).toThrow(/Control character/);
|
||||
expect(() => parseOcPath('oc://X/foo\x7Fbar')).toThrow(/Control character/);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- Empty / structural pitfalls ----------------------------------
|
||||
|
||||
describe('wave-23 pitfalls — empty & structural', () => {
|
||||
it('P-008 rejects empty segments', () => {
|
||||
expect(() => parseOcPath('oc://X//Y')).toThrow(/Empty segment/);
|
||||
});
|
||||
|
||||
it('P-009 rejects empty dotted sub-segments', () => {
|
||||
expect(() => parseOcPath('oc://X/a..b')).toThrow(/Empty dotted sub-segment/);
|
||||
});
|
||||
|
||||
it('P-010 rejects scheme-only path', () => {
|
||||
expect(() => parseOcPath('oc://')).toThrow(/Empty oc:\/\/ path/);
|
||||
});
|
||||
|
||||
it('P-014 rejects empty predicate key', () => {
|
||||
expect(() => parseOcPath('oc://X/[=foo]')).toThrow(/Malformed predicate/);
|
||||
});
|
||||
|
||||
it('P-014 rejects empty predicate value', () => {
|
||||
expect(() => parseOcPath('oc://X/[id=]')).toThrow(/Malformed predicate/);
|
||||
});
|
||||
|
||||
it('P-015 accepts bracket segment with no operator as literal sentinel', () => {
|
||||
// `[frontmatter]` predates the predicate grammar — kept as literal.
|
||||
expect(parseOcPath('oc://AGENTS.md/[frontmatter]/key').section).toBe('[frontmatter]');
|
||||
});
|
||||
|
||||
it('P-016 rejects mismatched brackets', () => {
|
||||
expect(() => parseOcPath('oc://X/[unclosed')).toThrow(OcPathError);
|
||||
expect(() => parseOcPath('oc://X/closed]')).toThrow(OcPathError);
|
||||
});
|
||||
|
||||
it('P-016 rejects mismatched braces', () => {
|
||||
expect(() => parseOcPath('oc://X/{a,b')).toThrow(OcPathError);
|
||||
});
|
||||
|
||||
it('P-018 rejects empty union', () => {
|
||||
expect(() => parseOcPath('oc://X/{}')).toThrow(/Empty union/);
|
||||
});
|
||||
|
||||
it('P-018 rejects union with empty alternative', () => {
|
||||
expect(() => parseOcPath('oc://X/{a,,b}')).toThrow(/Empty alternative/);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- Predicate-content pitfalls -----------------------------------
|
||||
|
||||
describe('wave-23 pitfalls — predicate content', () => {
|
||||
it('P-012 predicate value containing `/` round-trips', () => {
|
||||
// The path-level `/` split must respect bracket boundaries.
|
||||
const p = parseOcPath('oc://X/[id=foo/bar]/cmd');
|
||||
expect(p.section).toBe('[id=foo/bar]');
|
||||
expect(p.item).toBe('cmd');
|
||||
});
|
||||
|
||||
it('P-012 findOcPaths matches a leaf whose id contains a slash', () => {
|
||||
const ast = parseYaml(
|
||||
'steps:\n - id: foo/bar\n cmd: x\n - id: baz\n cmd: y\n'
|
||||
).ast;
|
||||
const out = findOcPaths(ast, parseOcPath('oc://wf/steps/[id=foo/bar]/cmd'));
|
||||
expect(out).toHaveLength(1);
|
||||
if (out[0].match.kind === 'leaf') {expect(out[0].match.valueText).toBe('x');}
|
||||
});
|
||||
|
||||
it('P-013 predicate value containing `.` round-trips', () => {
|
||||
const p = parseOcPath('oc://X/steps.[id=1.0].cmd');
|
||||
expect(p.section).toBe('steps.[id=1.0].cmd');
|
||||
});
|
||||
|
||||
it('P-013 findOcPaths matches a leaf whose id is `1.0`', () => {
|
||||
const ast = parseYaml(
|
||||
'steps:\n - id: "1.0"\n cmd: x\n - id: "2.0"\n cmd: y\n'
|
||||
).ast;
|
||||
const out = findOcPaths(ast, parseOcPath('oc://wf/steps/[id=1.0]/cmd'));
|
||||
expect(out).toHaveLength(1);
|
||||
if (out[0].match.kind === 'leaf') {expect(out[0].match.valueText).toBe('x');}
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- Sentinel & collision pitfalls --------------------------------
|
||||
|
||||
describe('wave-23 pitfalls — sentinels & collisions', () => {
|
||||
it('P-020/openclaw#59934 negative numeric key on object resolves as literal key', () => {
|
||||
// Telegram supergroup IDs are negative numbers used as map keys.
|
||||
// Our positional `-N` token would otherwise hijack them. Resolver
|
||||
// falls through to literal-key lookup on non-indexable containers.
|
||||
const ast = parseJsonc(
|
||||
'{"channels":{"telegram":{"groups":{"-5028303500":{"requireMention":false}}}}}'
|
||||
).ast;
|
||||
const m = resolveOcPath(
|
||||
ast,
|
||||
parseOcPath('oc://config/channels.telegram.groups.-5028303500.requireMention'),
|
||||
);
|
||||
expect(m).not.toBeNull();
|
||||
expect(m?.kind).toBe('leaf');
|
||||
if (m?.kind === 'leaf') {
|
||||
expect(m.valueText).toBe('false');
|
||||
expect(m.leafType).toBe('boolean');
|
||||
}
|
||||
});
|
||||
|
||||
it('P-020 negative `-N` still works as positional on arrays', () => {
|
||||
// Same syntax, indexable container — positional resolution wins.
|
||||
const ast = parseJsonc('{"items":[10,20,30]}').ast;
|
||||
const m = resolveOcPath(ast, parseOcPath('oc://X/items/-1'));
|
||||
expect(m?.kind).toBe('leaf');
|
||||
if (m?.kind === 'leaf') {expect(m.valueText).toBe('30');}
|
||||
});
|
||||
|
||||
it('P-020 numeric segment dispatches by node kind (array index vs map key)', () => {
|
||||
// Same path string against two different ASTs — kind disambiguates.
|
||||
const arr = parseJsonc('{"x":["a","b"]}').ast;
|
||||
const map = parseJsonc('{"x":{"0":"a","1":"b"}}').ast;
|
||||
const arrM = resolveOcPath(arr, parseOcPath('oc://config/x/0'));
|
||||
const mapM = resolveOcPath(map, parseOcPath('oc://config/x/0'));
|
||||
expect(arrM?.kind).toBe('leaf');
|
||||
expect(mapM?.kind).toBe('leaf');
|
||||
if (arrM?.kind === 'leaf') {expect(arrM.valueText).toBe('a');}
|
||||
if (mapM?.kind === 'leaf') {expect(mapM.valueText).toBe('a');}
|
||||
});
|
||||
|
||||
it('P-021 `$last` literal in a yaml key is shadowed by positional sentinel', () => {
|
||||
// Document v0 limitation: `$last` always means "last", never a literal key.
|
||||
// Authors with `$last` literal keys must use kind-narrow access.
|
||||
const ast = parseYaml('$last: literal-value\nfoo: bar\n').ast;
|
||||
const m = resolveOcPath(ast, parseOcPath('oc://X/$last'));
|
||||
// `$last` resolves to the LAST key (`foo` → `bar`), not the literal `$last` key.
|
||||
expect(m?.kind).toBe('leaf');
|
||||
if (m?.kind === 'leaf') {expect(m.valueText).toBe('bar');}
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- Round-trip pitfalls ------------------------------------------
|
||||
|
||||
describe('wave-23 pitfalls — round-trip', () => {
|
||||
it('P-023 parseOcPath ∘ formatOcPath is idempotent across path shapes', () => {
|
||||
const inputs = [
|
||||
'oc://X',
|
||||
'oc://X/a',
|
||||
'oc://X/a/b',
|
||||
'oc://X/a/b/c',
|
||||
'oc://X/a.b.c',
|
||||
'oc://X/a?session=s1',
|
||||
'oc://X/[frontmatter]/key',
|
||||
'oc://X/steps/*/command',
|
||||
'oc://X/steps/$last/id',
|
||||
'oc://X/steps/-2/id',
|
||||
'oc://X/steps/{command,run}',
|
||||
'oc://X/steps/[id=foo]/cmd',
|
||||
'oc://X/steps/#0/foo',
|
||||
];
|
||||
for (const s of inputs) {
|
||||
const parsed = parseOcPath(s);
|
||||
const reparsed = parseOcPath(s);
|
||||
expect(parsed).toEqual(reparsed);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- Sentinel-guard pitfalls --------------------------------------
|
||||
|
||||
describe('wave-23 pitfalls — sentinel at format boundary (F9)', () => {
|
||||
it('formatOcPath rejects an OcPath struct carrying the redaction sentinel', () => {
|
||||
// Path strings flow into telemetry, audit events, error messages,
|
||||
// find-result `path` fields. Without the format-time guard, a
|
||||
// struct with `section: REDACTED_SENTINEL` would slip past every
|
||||
// consumer except the CLI's scrubSentinel layer. The substrate's
|
||||
// contract is "emit boundaries refuse the sentinel" — formatOcPath
|
||||
// IS such a boundary for path strings.
|
||||
expect(() =>
|
||||
formatOcPath({ file: 'AGENTS.md', section: '__OPENCLAW_REDACTED__' }),
|
||||
).toThrow(/sentinel literal/);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- Containment pitfalls -----------------------------------------
|
||||
|
||||
describe('wave-23 pitfalls — file-slot containment', () => {
|
||||
// oc:// paths are workspace-relative. Absolute paths and `..` segments
|
||||
// would let a hostile workflow / skill manifest persuade
|
||||
// `openclaw path resolve|set|emit` into reading or writing arbitrary
|
||||
// filesystem locations (Node `path.resolve(cwd, absolute)` returns
|
||||
// `absolute`, bypassing the workspace root). Reject at parseOcPath
|
||||
// and formatOcPath for symmetric defense.
|
||||
it('rejects an absolute POSIX file slot', () => {
|
||||
expect(() => parseOcPath('oc:///etc/passwd')).toThrow(/Empty segment/);
|
||||
// Quoted form — same containment violation, different parse path.
|
||||
expect(() => parseOcPath('oc://"/etc/passwd"/section')).toThrow(/Absolute file slot/);
|
||||
});
|
||||
|
||||
it('rejects a Windows drive-letter file slot', () => {
|
||||
expect(() => parseOcPath('oc://"C:/Windows/System32/foo"/section')).toThrow(
|
||||
/Absolute file slot/,
|
||||
);
|
||||
expect(() => parseOcPath('oc://"C:\\\\Windows\\\\System32"/section')).toThrow(
|
||||
/Absolute file slot/,
|
||||
);
|
||||
});
|
||||
|
||||
it('rejects a leading-backslash file slot', () => {
|
||||
expect(() => parseOcPath('oc://"\\\\srv\\\\share\\\\foo"/section')).toThrow(
|
||||
/Absolute file slot/,
|
||||
);
|
||||
});
|
||||
|
||||
it('rejects a parent-directory escape via plain `..`', () => {
|
||||
expect(() => parseOcPath('oc://"../foo"/section')).toThrow(/Parent-directory/);
|
||||
expect(() => parseOcPath('oc://".."/section')).toThrow(/Parent-directory/);
|
||||
});
|
||||
|
||||
it('rejects a parent-directory escape mid-path', () => {
|
||||
expect(() => parseOcPath('oc://"foo/../bar"/section')).toThrow(/Parent-directory/);
|
||||
});
|
||||
|
||||
it('does not decode URL-encoded `..` — literal `%2E%2E` is treated as a filename', () => {
|
||||
// The substrate does NOT do URL decoding — `%2E%2E` is the literal
|
||||
// five-character filename, not a parent-directory escape. Documented
|
||||
// limitation: consumers that pre-decode (HTTP layers, browser UI)
|
||||
// are responsible for normalizing before invoking parseOcPath.
|
||||
// Pin the current behavior so a future "let's decode for them" PR
|
||||
// sees the explicit choice.
|
||||
const p = parseOcPath('oc://"%2E%2E/foo"/section');
|
||||
expect(p.file).toBe('%2E%2E/foo');
|
||||
});
|
||||
|
||||
it('formatOcPath rejects an OcPath struct with absolute file', () => {
|
||||
expect(() => formatOcPath({ file: '/etc/passwd' })).toThrow(/Absolute file slot/);
|
||||
expect(() => formatOcPath({ file: 'C:/Windows' })).toThrow(/Absolute file slot/);
|
||||
});
|
||||
|
||||
it('formatOcPath rejects an OcPath struct with parent-directory file', () => {
|
||||
expect(() => formatOcPath({ file: '..' })).toThrow(/Parent-directory/);
|
||||
expect(() => formatOcPath({ file: '../etc/passwd' })).toThrow(/Parent-directory/);
|
||||
expect(() => formatOcPath({ file: 'foo/../bar' })).toThrow(/Parent-directory/);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- formatOcPath ↔ parseOcPath round-trip ------------------------
|
||||
|
||||
describe('wave-23 pitfalls — format/parse round-trip', () => {
|
||||
// The contract on oc-path.ts:13 — `formatOcPath(parseOcPath(s)) === s`
|
||||
// for any string the formatter accepts. Round-trip breaks were
|
||||
// observable on (a) struct fields with empty dotted sub-segments
|
||||
// (`section: 'foo.'` → `oc://X/foo.""` → re-parses with `section:
|
||||
// 'foo.""'`) and (b) struct fields with control chars (formatter
|
||||
// emitted unquoted, parser refused). Pin both directions.
|
||||
it('formatOcPath rejects empty dotted sub-segment in a slot', () => {
|
||||
expect(() => formatOcPath({ file: 'a.md', section: 'foo.' })).toThrow(
|
||||
/Empty dotted sub-segment/,
|
||||
);
|
||||
expect(() => formatOcPath({ file: 'a.md', section: '.foo' })).toThrow(
|
||||
/Empty dotted sub-segment/,
|
||||
);
|
||||
expect(() => formatOcPath({ file: 'a.md', section: 'foo..bar' })).toThrow(
|
||||
/Empty dotted sub-segment/,
|
||||
);
|
||||
});
|
||||
|
||||
it('formatOcPath rejects control characters in any slot', () => {
|
||||
expect(() => formatOcPath({ file: 'a.md', section: 'sec\x00tion' })).toThrow(
|
||||
/Control character/,
|
||||
);
|
||||
expect(() => formatOcPath({ file: 'a.md', section: 'sec\x01tion' })).toThrow(
|
||||
/Control character/,
|
||||
);
|
||||
expect(() => formatOcPath({ file: 'a.md', section: 'tab\ttion' })).toThrow(
|
||||
/Control character/,
|
||||
);
|
||||
expect(() => formatOcPath({ file: 'a\x00b.md' })).toThrow(/Control character/);
|
||||
});
|
||||
|
||||
it('round-trips every shape parseOcPath accepts', () => {
|
||||
// For every valid input, formatOcPath(parseOcPath(s)) MUST be
|
||||
// re-parseable to the same struct. Don't string-compare (the
|
||||
// formatter normalizes quoting); parse the round-tripped output
|
||||
// and compare structs.
|
||||
const inputs = [
|
||||
'oc://X',
|
||||
'oc://X/a',
|
||||
'oc://X/a/b',
|
||||
'oc://X/a/b/c',
|
||||
'oc://X/a.b.c',
|
||||
'oc://X/a?session=s1',
|
||||
'oc://X/[frontmatter]/key',
|
||||
'oc://X/steps/$last/id',
|
||||
'oc://X/steps/-2/id',
|
||||
'oc://X/steps/[id=foo]/cmd',
|
||||
'oc://X/steps/{a,b}/cmd',
|
||||
'oc://X/"foo/bar"/baz',
|
||||
'oc://X/agents/"anthropic/claude-opus-4-7"/alias',
|
||||
];
|
||||
for (const s of inputs) {
|
||||
const parsed = parseOcPath(s);
|
||||
const formatted = formatOcPath(parsed);
|
||||
const reparsed = parseOcPath(formatted);
|
||||
expect(reparsed).toEqual(parsed);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- Performance pitfalls -----------------------------------------
|
||||
|
||||
describe('wave-23 pitfalls — performance & limits', () => {
|
||||
it('P-031 / P-033 walker depth cap throws on pathological recursion', () => {
|
||||
// Construct a yaml that nests deeper than MAX_TRAVERSAL_DEPTH.
|
||||
// We're using `**` against a synthetic deeply-nested structure.
|
||||
let yaml = 'root:\n';
|
||||
let indent = ' ';
|
||||
for (let i = 0; i < MAX_TRAVERSAL_DEPTH + 50; i++) {
|
||||
yaml += `${indent}a:\n`;
|
||||
indent += ' ';
|
||||
}
|
||||
yaml += `${indent}leaf: x\n`;
|
||||
const ast = parseYaml(yaml).ast;
|
||||
expect(() => findOcPaths(ast, parseOcPath('oc://X/**'))).toThrow(/MAX_TRAVERSAL_DEPTH/);
|
||||
});
|
||||
|
||||
it('P-032 rejects path strings longer than MAX_PATH_LENGTH', () => {
|
||||
const big = 'oc://X/' + 'a'.repeat(MAX_PATH_LENGTH);
|
||||
expect(() => parseOcPath(big)).toThrow(/exceeds .* bytes/);
|
||||
});
|
||||
|
||||
it('P-032 path at the cap parses cleanly', () => {
|
||||
const justUnder = 'oc://X/' + 'a'.repeat(MAX_PATH_LENGTH - 'oc://X/'.length);
|
||||
expect(() => parseOcPath(justUnder)).not.toThrow();
|
||||
});
|
||||
|
||||
it('P-032 formatOcPath enforces the same cap on output', () => {
|
||||
// Symmetric upper bound — without this guard, a struct whose
|
||||
// formatted form crosses the cap would emit a string parseOcPath
|
||||
// would immediately reject (round-trip break).
|
||||
expect(() =>
|
||||
formatOcPath({ file: 'X', section: 'a'.repeat(MAX_PATH_LENGTH) }),
|
||||
).toThrow(/Formatted oc:\/\/ exceeds/);
|
||||
});
|
||||
|
||||
it('parser depth cap fires on pathological JSONC nesting (F6)', () => {
|
||||
// Without `MAX_PARSE_DEPTH`, pathological input like
|
||||
// `'['.repeat(20000) + '0' + ']'.repeat(20000)` triggers a V8
|
||||
// RangeError ("Maximum call stack size exceeded") that escapes
|
||||
// commander as a raw stringified error — no `OcEmitSentinelError`-
|
||||
// style structured catch. Pin the structured-diagnostic path:
|
||||
// parser must surface OC_JSONC_DEPTH_EXCEEDED, not bare RangeError.
|
||||
const open = '['.repeat(MAX_TRAVERSAL_DEPTH + 100);
|
||||
const close = ']'.repeat(MAX_TRAVERSAL_DEPTH + 100);
|
||||
const raw = `${open}0${close}`;
|
||||
const result = parseJsonc(raw);
|
||||
expect(result.ast.root).toBeNull();
|
||||
expect(
|
||||
result.diagnostics.some((d) => d.code === 'OC_JSONC_DEPTH_EXCEEDED'),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it('parser depth cap fires on JSONL line with deeply-nested JSON (F6)', () => {
|
||||
// Per-line parseJsonc dispatch carries the same protection — each
|
||||
// value line is parsed in isolation and gets its own depth cap.
|
||||
// The line surfaces as `kind: 'malformed'` with the depth diagnostic.
|
||||
let nested = '"x"';
|
||||
for (let i = 0; i < MAX_TRAVERSAL_DEPTH + 50; i++) {
|
||||
nested = `{"a":${nested}}`;
|
||||
}
|
||||
const { diagnostics } = parseJsonl(nested + '\n');
|
||||
// The line-level diagnostic is OC_JSONL_LINE_MALFORMED (line failed);
|
||||
// we don't promote OC_JSONC_DEPTH_EXCEEDED through the JSONL layer
|
||||
// but the malformed-line detection prevents stack-overflow escape.
|
||||
expect(diagnostics.some((d) => d.code === 'OC_JSONL_LINE_MALFORMED')).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- Coercion pitfalls --------------------------------------------
|
||||
|
||||
describe('wave-23 pitfalls — coercion', () => {
|
||||
it('P-029 numeric coercion is locale-independent', () => {
|
||||
// `Number()` doesn't honor locale; `parseFloat` doesn't either in
|
||||
// practice, but we never use `parseFloat`. Verify `Number("1,5")`
|
||||
// returns NaN (which is rejected) and `"1.5"` returns 1.5.
|
||||
const ast = parseJsonc('{"x":1.0}').ast;
|
||||
const r1 = setOcPath(ast, parseOcPath('oc://X/x'), '1.5');
|
||||
expect(r1.ok).toBe(true);
|
||||
const r2 = setOcPath(ast, parseOcPath('oc://X/x'), '1,5');
|
||||
expect(r2.ok).toBe(false);
|
||||
if (!r2.ok) {expect(r2.reason).toBe('parse-error');}
|
||||
});
|
||||
|
||||
it('P-030 boolean coercion is exact-match lowercase', () => {
|
||||
const ast = parseJsonc('{"x":true}').ast;
|
||||
expect(setOcPath(ast, parseOcPath('oc://X/x'), 'false').ok).toBe(true);
|
||||
expect(setOcPath(ast, parseOcPath('oc://X/x'), 'False').ok).toBe(false);
|
||||
expect(setOcPath(ast, parseOcPath('oc://X/x'), 'TRUE').ok).toBe(false);
|
||||
expect(setOcPath(ast, parseOcPath('oc://X/x'), 'yes').ok).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- Reserved character pitfalls ----------------------------------
|
||||
|
||||
describe('wave-23 pitfalls — reserved characters', () => {
|
||||
it('P-026 rejects `?` outside the query separator position', () => {
|
||||
// `?` triggers the query split. `oc://X/foo?session=s` is fine
|
||||
// (legitimate query). But `?` *inside* a segment after the query
|
||||
// section is consumed isn't a normal use case — the parser treats
|
||||
// the first `?` as the query split.
|
||||
expect(parseOcPath('oc://X/foo?session=s').section).toBe('foo');
|
||||
// Empty key after `?` (no `=`): query parser silently ignores.
|
||||
expect(() => parseOcPath('oc://X/foo?')).not.toThrow();
|
||||
});
|
||||
|
||||
it('P-040 negative-index magnitude is bounded', () => {
|
||||
// Out-of-range negative index → null at resolve time, not crash.
|
||||
const ast = parseJsonc('{"x":[1,2,3]}').ast;
|
||||
expect(resolveOcPath(ast, parseOcPath('oc://X/x/-9999999999'))).toBeNull();
|
||||
expect(resolveOcPath(ast, parseOcPath('oc://X/x/-1'))?.kind).toBe('leaf');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- Sentinel-redaction pitfall (P-036) ---------------------------
|
||||
|
||||
describe('wave-23 pitfalls — redaction sentinel', () => {
|
||||
// P-036 is fully covered by wave-21-sentinel-cross-kind. This is a
|
||||
// smoke test asserting the link is intact.
|
||||
it('P-036 sentinel guard activates at emit time (covered by wave-21)', () => {
|
||||
expect(true).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- DEFERRED — documented limits ---------------------------------
|
||||
|
||||
describe('wave-23 pitfalls — deferred (v0 limits)', () => {
|
||||
it.skip('P-005 slash literal in key — v1: quoted segments', () => {});
|
||||
it.skip('P-006 dot literal in key — v1: quoted segments', () => {});
|
||||
it.skip('P-017 nested unions {a,{b,c}} — v1: parser stack', () => {});
|
||||
it.skip('P-019 wildcard inside wildcard — v1: pattern composition', () => {});
|
||||
it.skip('P-025 leading-zero numeric `01` — v1: explicit form', () => {});
|
||||
it.skip('P-027 `&` in segments — v1: percent-encoding', () => {});
|
||||
it.skip('P-028 percent-encoded segments — v1: rfc3986 layer', () => {});
|
||||
it.skip('P-034 ast mutation between resolve & consume — caller invariant', () => {});
|
||||
it.skip('P-035 stale paths from prior find — caller invariant', () => {});
|
||||
});
|
||||
|
||||
// ---------- Injection pitfalls (C12 / W12) -------------------------------
|
||||
|
||||
describe('wave-23 pitfalls — injection (caller-supplied hostile input)', () => {
|
||||
// P-037: a hostile path string. The substrate's job is to either
|
||||
// parse safely or reject with `OcPathError` — never let undefined
|
||||
// behavior leak. These cases lock the rejection-or-safe contract.
|
||||
|
||||
it('P-037a control characters in path body are rejected', () => {
|
||||
expect(() => parseOcPath('oc://a\x00b')).toThrow(OcPathError);
|
||||
expect(() => parseOcPath('oc://a\x01b/c')).toThrow(OcPathError);
|
||||
expect(() => parseOcPath('oc://a/b\x1Fc')).toThrow(OcPathError);
|
||||
});
|
||||
|
||||
it('P-037b NUL byte anywhere in path is rejected', () => {
|
||||
expect(() => parseOcPath('oc://X.md/sec\x00tion')).toThrow(OcPathError);
|
||||
});
|
||||
|
||||
it('P-037c BOM at start of path is stripped, not interpreted', () => {
|
||||
// BOM is unicode U+FEFF (0xFEFF). The substrate strips it before
|
||||
// scheme check; without stripping, the BOM-prefixed string would
|
||||
// fail the `oc://` scheme test.
|
||||
const path = parseOcPath('oc://X.md/section');
|
||||
expect(path.file).toBe('X.md');
|
||||
expect(path.section).toBe('section');
|
||||
});
|
||||
|
||||
it('P-037d session query is parsed only via the documented `?session=...` form', () => {
|
||||
// Legal session form parses cleanly.
|
||||
const ok = parseOcPath('oc://X.md/sec?session=cron:daily');
|
||||
expect(ok.section).toBe('sec');
|
||||
expect(ok.session).toBe('cron:daily');
|
||||
// Substrate is lenient about loose `?garbage` — caller's
|
||||
// responsibility to construct paths from `formatOcPath`. Confirm
|
||||
// the loose form does NOT silently invent a session value.
|
||||
const loose = parseOcPath('oc://X.md/sec?garbage');
|
||||
expect(loose.session).toBeUndefined();
|
||||
});
|
||||
|
||||
it('P-037e unescaped `&` in segments is rejected', () => {
|
||||
expect(() => parseOcPath('oc://X.md/a&b')).toThrow(OcPathError);
|
||||
});
|
||||
|
||||
it('P-037f unescaped `%` in segments is rejected', () => {
|
||||
expect(() => parseOcPath('oc://X.md/a%b')).toThrow(OcPathError);
|
||||
});
|
||||
|
||||
it('P-037g empty file slot is rejected', () => {
|
||||
expect(() => parseOcPath('oc:///section')).toThrow(OcPathError);
|
||||
});
|
||||
|
||||
it('P-037h backslash-escape attempts are not treated as path traversal', () => {
|
||||
// No special meaning — the literal backslash is just a regular
|
||||
// character. Doesn't allow escaping forward slashes.
|
||||
expect(() => parseOcPath('oc://X.md/a\\../b')).toThrow(OcPathError);
|
||||
});
|
||||
|
||||
// P-038: predicate-value injection. `[k=v]` predicates filter
|
||||
// matches; a hostile `v` containing regex metachars, brackets, or
|
||||
// operators must NOT escape the predicate scope or be interpreted
|
||||
// as a regex.
|
||||
|
||||
it('P-038a regex metacharacters in predicate value match literally', () => {
|
||||
const ast = parseJsonc('{ "items": [ {"name": "a.*"}, {"name": "abc"} ] }').ast;
|
||||
// Looking for the literal string "a.*" — should match only the
|
||||
// first item, not "abc" (which would match if `.*` were treated
|
||||
// as a regex).
|
||||
const matches = findOcPaths(ast, parseOcPath('oc://X.jsonc/items/[name=a.*]'));
|
||||
expect(matches).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('P-038b nested-bracket attempts in predicate value are kept literal', () => {
|
||||
// The substrate is permissive on nested brackets — they're part
|
||||
// of the literal predicate value, not interpreted as path syntax.
|
||||
// The match would be against the literal string "a[b]"; a
|
||||
// resolver that finds zero matches fails closed.
|
||||
const path = parseOcPath('oc://X.jsonc/items/[name=a[b]]');
|
||||
expect(path.item).toBe('[name=a[b]]');
|
||||
// No data has the literal value `a[b]` here, so finding empty.
|
||||
const ast = parseJsonc('{ "items": [ {"name": "abc"} ] }').ast;
|
||||
expect(findOcPaths(ast, path)).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('P-038c equals-sign in predicate value is treated as part of the value', () => {
|
||||
// The FIRST `=` separates key from value; subsequent `=`s belong
|
||||
// to the value. The rule keeps the predicate parser simple —
|
||||
// operators that prefix-match (`!=`, `<=`, `>=`) are tried
|
||||
// before `=`, then `=` consumes the rest.
|
||||
const ast = parseJsonc('{ "items": [ {"k": "a=b"}, {"k": "c"} ] }').ast;
|
||||
const matches = findOcPaths(ast, parseOcPath('oc://X.jsonc/items/[k=a=b]'));
|
||||
expect(matches).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('P-038d control characters in predicate value are rejected', () => {
|
||||
expect(() => parseOcPath('oc://X.jsonc/items/[k=a\x00b]')).toThrow(OcPathError);
|
||||
});
|
||||
|
||||
it('P-038e empty predicate body is rejected', () => {
|
||||
expect(() => parseOcPath('oc://X.jsonc/items/[]')).toThrow(OcPathError);
|
||||
});
|
||||
|
||||
it('P-038f predicate-shaped bracket without operator is treated as literal sentinel', () => {
|
||||
// `[name]` without `=` is parsed as a literal-bracket sentinel
|
||||
// (e.g. `[frontmatter]`-style). The substrate accepts it as a
|
||||
// literal path segment — predicate parsing only kicks in when an
|
||||
// operator is present. Document this to lock the behavior.
|
||||
const path = parseOcPath('oc://X.jsonc/items/[name]');
|
||||
expect(path.item).toBe('[name]');
|
||||
});
|
||||
|
||||
it('P-038g predicate-shaped bracket with unsupported operator parses as literal', () => {
|
||||
// `~` isn't in the supported-operator set; the parser doesn't
|
||||
// recognize it as a predicate, so it's accepted as a literal
|
||||
// bracket segment. This is the documented v1.1 behavior — a
|
||||
// future version may add `~` (regex) and bump SDK_VERSION.
|
||||
const path = parseOcPath('oc://X.jsonc/items/[k~v]');
|
||||
expect(path.item).toBe('[k~v]');
|
||||
});
|
||||
});
|
||||
140
src/oc-path/tests/scenarios/real-world-fixtures.test.ts
Normal file
140
src/oc-path/tests/scenarios/real-world-fixtures.test.ts
Normal file
@@ -0,0 +1,140 @@
|
||||
/**
|
||||
* Wave 12 — real-world fixtures.
|
||||
*
|
||||
* Eight workspace files (one per upstream-recognized workspace
|
||||
* filename) — each parsed, resolved, and round-tripped to verify the
|
||||
* substrate handles realistic content.
|
||||
*/
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { join, dirname } from 'node:path';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { emitMd } from '../../emit.js';
|
||||
import { parseMd } from '../../parse.js';
|
||||
import { resolveMdOcPath as resolveOcPath } from '../../resolve.js';
|
||||
|
||||
const HERE = dirname(fileURLToPath(import.meta.url));
|
||||
const FIXTURES = join(HERE, '..', 'fixtures', 'real');
|
||||
|
||||
function load(name: string): string {
|
||||
return readFileSync(join(FIXTURES, name), 'utf-8');
|
||||
}
|
||||
|
||||
describe('wave-12 real-world-fixtures', () => {
|
||||
it('F-01 SOUL.md parses + round-trips', () => {
|
||||
const raw = load('SOUL.md');
|
||||
const { ast, diagnostics } = parseMd(raw);
|
||||
expect(diagnostics).toEqual([]);
|
||||
expect(emitMd(ast)).toBe(raw);
|
||||
// Has at least one H2 block.
|
||||
expect(ast.blocks.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('F-02 AGENTS.md parses + resolves Tools section', () => {
|
||||
const raw = load('AGENTS.md');
|
||||
const { ast } = parseMd(raw);
|
||||
expect(emitMd(ast)).toBe(raw);
|
||||
const tools = resolveOcPath(ast, { file: 'AGENTS.md', section: 'tools' });
|
||||
expect(tools?.kind).toBe('block');
|
||||
if (tools?.kind === 'block') {
|
||||
expect(tools.node.items.some((i) => i.kv?.key === 'gh')).toBe(true);
|
||||
}
|
||||
});
|
||||
|
||||
it('F-03 MEMORY.md frontmatter scope resolves via [frontmatter]', () => {
|
||||
const raw = load('MEMORY.md');
|
||||
const { ast } = parseMd(raw);
|
||||
expect(emitMd(ast)).toBe(raw);
|
||||
const scope = resolveOcPath(ast, {
|
||||
file: 'MEMORY.md',
|
||||
section: '[frontmatter]',
|
||||
field: 'scope',
|
||||
});
|
||||
expect(scope?.kind).toBe('frontmatter');
|
||||
if (scope?.kind === 'frontmatter') {expect(scope.node.value).toBe('project');}
|
||||
});
|
||||
|
||||
it('F-04 TOOLS.md table extracted from Tool Guidance section', () => {
|
||||
const raw = load('TOOLS.md');
|
||||
const { ast } = parseMd(raw);
|
||||
expect(emitMd(ast)).toBe(raw);
|
||||
const guidance = resolveOcPath(ast, {
|
||||
file: 'TOOLS.md',
|
||||
section: 'tool-guidance',
|
||||
});
|
||||
expect(guidance?.kind).toBe('block');
|
||||
if (guidance?.kind === 'block') {
|
||||
expect(guidance.node.tables.length).toBeGreaterThan(0);
|
||||
expect(guidance.node.tables[0]?.headers).toEqual(['tool', 'guidance']);
|
||||
}
|
||||
});
|
||||
|
||||
it('F-05 IDENTITY.md sections resolvable by slug', () => {
|
||||
const raw = load('IDENTITY.md');
|
||||
const { ast } = parseMd(raw);
|
||||
expect(emitMd(ast)).toBe(raw);
|
||||
const trust = resolveOcPath(ast, {
|
||||
file: 'IDENTITY.md',
|
||||
section: 'trust-level',
|
||||
});
|
||||
expect(trust?.kind).toBe('block');
|
||||
});
|
||||
|
||||
it('F-06 USER.md Preferences items extracted', () => {
|
||||
const raw = load('USER.md');
|
||||
const { ast } = parseMd(raw);
|
||||
expect(emitMd(ast)).toBe(raw);
|
||||
const prefs = resolveOcPath(ast, {
|
||||
file: 'USER.md',
|
||||
section: 'preferences',
|
||||
});
|
||||
expect(prefs?.kind).toBe('block');
|
||||
if (prefs?.kind === 'block') {
|
||||
expect(prefs.node.items.length).toBeGreaterThan(0);
|
||||
}
|
||||
});
|
||||
|
||||
it('F-07 HEARTBEAT.md schedules — H2 sections as triggers', () => {
|
||||
const raw = load('HEARTBEAT.md');
|
||||
const { ast } = parseMd(raw);
|
||||
expect(emitMd(ast)).toBe(raw);
|
||||
expect(ast.blocks.length).toBeGreaterThanOrEqual(3);
|
||||
const slugs = ast.blocks.map((b) => b.slug);
|
||||
expect(slugs).toContain('every-30m-wake');
|
||||
expect(slugs).toContain('every-4h-wake');
|
||||
});
|
||||
|
||||
it('F-08 SKILL.md frontmatter has name + description + tier', () => {
|
||||
const raw = load('SKILL.md');
|
||||
const { ast } = parseMd(raw);
|
||||
expect(emitMd(ast)).toBe(raw);
|
||||
const fmKeys = ast.frontmatter.map((e) => e.key);
|
||||
expect(fmKeys).toContain('name');
|
||||
expect(fmKeys).toContain('description');
|
||||
expect(fmKeys).toContain('tier');
|
||||
});
|
||||
|
||||
it('F-09 BOOTSTRAP.md round-trips', () => {
|
||||
const raw = load('BOOTSTRAP.md');
|
||||
const { ast } = parseMd(raw);
|
||||
expect(emitMd(ast)).toBe(raw);
|
||||
});
|
||||
|
||||
it('F-10 all 8 fixtures combined round-trip-clean (sanity)', () => {
|
||||
const names = [
|
||||
'SOUL.md',
|
||||
'AGENTS.md',
|
||||
'MEMORY.md',
|
||||
'TOOLS.md',
|
||||
'IDENTITY.md',
|
||||
'USER.md',
|
||||
'HEARTBEAT.md',
|
||||
'SKILL.md',
|
||||
'BOOTSTRAP.md',
|
||||
];
|
||||
for (const name of names) {
|
||||
const raw = load(name);
|
||||
expect(emitMd(parseMd(raw).ast), `${name} failed round-trip`).toBe(raw);
|
||||
}
|
||||
});
|
||||
});
|
||||
155
src/oc-path/tests/scenarios/roundtrip-property.test.ts
Normal file
155
src/oc-path/tests/scenarios/roundtrip-property.test.ts
Normal file
@@ -0,0 +1,155 @@
|
||||
/**
|
||||
* Wave 10 — round-trip property tests.
|
||||
*
|
||||
* Substrate guarantee: `emitMd(parse(raw)) === raw` for all inputs the
|
||||
* parser accepts. This wave exercises that property over a generated
|
||||
* corpus of synthetic markdown shapes and verifies parser idempotence
|
||||
* (`parse(emitMd(parse(raw))) === parse(raw)` modulo `raw`).
|
||||
*/
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { emitMd } from '../../emit.js';
|
||||
import { parseMd } from '../../parse.js';
|
||||
|
||||
function roundTrip(raw: string): string {
|
||||
return emitMd(parseMd(raw).ast);
|
||||
}
|
||||
|
||||
describe('wave-10 roundtrip-property', () => {
|
||||
it('RT-01 byte-fidelity over 100 generated shapes', () => {
|
||||
const inputs = generateCorpus(100);
|
||||
for (const raw of inputs) {
|
||||
try {
|
||||
expect(roundTrip(raw)).toBe(raw);
|
||||
} catch (e) {
|
||||
// Surface which input failed for debugging.
|
||||
throw new Error(
|
||||
`round-trip failed for input (length ${raw.length}):\n${JSON.stringify(raw.slice(0, 200))}\nError: ${(e as Error).message}`, { cause: e },
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it('RT-02 parser idempotence (parse → emit → parse → identical AST shape)', () => {
|
||||
const inputs = generateCorpus(50);
|
||||
for (const raw of inputs) {
|
||||
const a = parseMd(raw).ast;
|
||||
const a2 = parseMd(emitMd(a)).ast;
|
||||
// Compare structural fields; raw will of course be identical.
|
||||
expect(a2.frontmatter).toEqual(a.frontmatter);
|
||||
expect(a2.preamble).toEqual(a.preamble);
|
||||
expect(a2.blocks.map(stripDerived)).toEqual(a.blocks.map(stripDerived));
|
||||
}
|
||||
});
|
||||
|
||||
it('RT-03 stable output for identical input', () => {
|
||||
const raw = `---\nname: x\n---\n\n## A\n- a\n## B\n- b: c\n`;
|
||||
const out1 = roundTrip(raw);
|
||||
const out2 = roundTrip(raw);
|
||||
const out3 = roundTrip(raw);
|
||||
expect(out1).toBe(out2);
|
||||
expect(out2).toBe(out3);
|
||||
});
|
||||
|
||||
it('RT-04 ordering deterministic (no Object.keys / Set ordering surprises)', () => {
|
||||
const raw = `---\nb: 2\na: 1\nc: 3\n---\n## Z\n- z\n## A\n- a\n`;
|
||||
const a1 = parseMd(raw).ast;
|
||||
const a2 = parseMd(raw).ast;
|
||||
expect(a1.frontmatter.map((e) => e.key)).toEqual(a2.frontmatter.map((e) => e.key));
|
||||
expect(a1.blocks.map((b) => b.heading)).toEqual(a2.blocks.map((b) => b.heading));
|
||||
});
|
||||
|
||||
it('RT-05 round-trip preserves comment-like lines (no comment recognition at substrate)', () => {
|
||||
const raw = `## H\n\n<!-- a comment -->\n- bullet\n`;
|
||||
expect(roundTrip(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('RT-06 round-trip preserves indented blocks (substrate doesn\'t reflow)', () => {
|
||||
const raw = `## H\n\n indented code-ish block\n more indented\n`;
|
||||
expect(roundTrip(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('RT-07 round-trip preserves blockquotes', () => {
|
||||
const raw = `## H\n\n> quoted line 1\n> quoted line 2\n`;
|
||||
expect(roundTrip(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('RT-08 round-trip preserves images / links', () => {
|
||||
const raw = `## H\n\n\n[link](http://example.com)\n`;
|
||||
expect(roundTrip(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('RT-09 round-trip preserves HTML', () => {
|
||||
const raw = `## H\n\n<details><summary>x</summary>body</details>\n`;
|
||||
expect(roundTrip(raw)).toBe(raw);
|
||||
});
|
||||
|
||||
it('RT-10 round-trip preserves consecutive headings with no body between', () => {
|
||||
const raw = `## A\n## B\n## C\n`;
|
||||
expect(roundTrip(raw)).toBe(raw);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- corpus generator -------------------------------------------------
|
||||
|
||||
function generateCorpus(count: number): string[] {
|
||||
const corpus: string[] = [];
|
||||
// Deterministic seed so flaky failures don't surface differently each run.
|
||||
let seed = 42;
|
||||
const rand = () => {
|
||||
seed = (seed * 1664525 + 1013904223) % 2 ** 32;
|
||||
return seed / 2 ** 32;
|
||||
};
|
||||
const choose = <T>(arr: readonly T[]): T => arr[Math.floor(rand() * arr.length)];
|
||||
|
||||
const headings = ['Boundaries', 'Tools', 'Memory', 'Identity', 'User', 'Heartbeat', 'Skills'];
|
||||
const fmKeys = ['name', 'description', 'tier', 'enabled', 'timeout', 'url'];
|
||||
const fmValues = ['github', 'gh CLI', 'T1', 'true', '15000', 'https://example.com'];
|
||||
const itemTexts = ['never write to /etc', 'always confirm', 'gh: GitHub CLI', 'curl: HTTP'];
|
||||
const eols = ['\n', '\r\n'];
|
||||
|
||||
for (let i = 0; i < count; i++) {
|
||||
const eol = choose(eols);
|
||||
const parts: string[] = [];
|
||||
|
||||
if (rand() < 0.5) {
|
||||
parts.push('---');
|
||||
const fmCount = Math.floor(rand() * 4);
|
||||
for (let k = 0; k < fmCount; k++) {
|
||||
parts.push(`${choose(fmKeys)}: ${choose(fmValues)}`);
|
||||
}
|
||||
parts.push('---');
|
||||
parts.push('');
|
||||
}
|
||||
|
||||
if (rand() < 0.3) {
|
||||
parts.push('Some preamble.');
|
||||
parts.push('');
|
||||
}
|
||||
|
||||
const blockCount = Math.floor(rand() * 3) + 1;
|
||||
for (let b = 0; b < blockCount; b++) {
|
||||
parts.push(`## ${choose(headings)}`);
|
||||
parts.push('');
|
||||
const itemCount = Math.floor(rand() * 4);
|
||||
for (let it = 0; it < itemCount; it++) {
|
||||
parts.push(`- ${choose(itemTexts)}`);
|
||||
}
|
||||
if (rand() < 0.2) {
|
||||
parts.push('```');
|
||||
parts.push('code');
|
||||
parts.push('```');
|
||||
}
|
||||
parts.push('');
|
||||
}
|
||||
|
||||
corpus.push(parts.join(eol));
|
||||
}
|
||||
return corpus;
|
||||
}
|
||||
|
||||
function stripDerived(b: { heading: string; slug: string; bodyText: string }): {
|
||||
heading: string;
|
||||
slug: string;
|
||||
} {
|
||||
return { heading: b.heading, slug: b.slug };
|
||||
}
|
||||
177
src/oc-path/tests/scenarios/sentinel-cross-kind.test.ts
Normal file
177
src/oc-path/tests/scenarios/sentinel-cross-kind.test.ts
Normal file
@@ -0,0 +1,177 @@
|
||||
/**
|
||||
* Wave 21 — sentinel guard across all 3 kinds.
|
||||
*
|
||||
* Substrate guarantee: emit refuses to write a CALLER-INJECTED
|
||||
* `__OPENCLAW_REDACTED__` literal. Round-trip mode trusts parsed bytes
|
||||
* (a workspace file legitimately containing the sentinel — in a code
|
||||
* block, in a pasted error log — would otherwise become a workspace-
|
||||
* wide emit DoS). Render mode walks every leaf, so a caller-injected
|
||||
* sentinel via `setOcPath` always fails. Callers that want strict
|
||||
* pre-existing-byte detection (e.g., LKG fingerprint verification)
|
||||
* opt in via `acceptPreExistingSentinel: false`.
|
||||
*/
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { setJsoncOcPath } from '../../jsonc/edit.js';
|
||||
import { emitMd } from '../../emit.js';
|
||||
import { emitJsonc } from '../../jsonc/emit.js';
|
||||
import { parseJsonc } from '../../jsonc/parse.js';
|
||||
import { emitJsonl } from '../../jsonl/emit.js';
|
||||
import { parseJsonl } from '../../jsonl/parse.js';
|
||||
import { parseOcPath } from '../../oc-path.js';
|
||||
import { parseMd } from '../../parse.js';
|
||||
import {
|
||||
OcEmitSentinelError,
|
||||
REDACTED_SENTINEL,
|
||||
} from '../../sentinel.js';
|
||||
|
||||
describe('wave-21 sentinel guard cross-kind', () => {
|
||||
it('S-01 jsonc round-trip echoes safely when raw contains pre-existing sentinel', () => {
|
||||
// Pre-existing sentinel bytes are trusted — see emit-policy comment
|
||||
// in jsonc/emit.ts. The strict mode below is the opt-in path for
|
||||
// callers who want LKG-style fingerprint verification.
|
||||
const raw = `{ "x": "${REDACTED_SENTINEL}" }`;
|
||||
const ast = parseJsonc(raw).ast;
|
||||
expect(emitJsonc(ast)).toBe(raw);
|
||||
// Strict mode still rejects pre-existing sentinel for callers who
|
||||
// explicitly opt in.
|
||||
expect(() => emitJsonc(ast, { acceptPreExistingSentinel: false })).toThrow(
|
||||
OcEmitSentinelError,
|
||||
);
|
||||
});
|
||||
|
||||
it('S-02 jsonl round-trip echoes safely; strict mode rejects', () => {
|
||||
const raw = `{"x":"${REDACTED_SENTINEL}"}\n`;
|
||||
const ast = parseJsonl(raw).ast;
|
||||
expect(emitJsonl(ast)).toBe(raw);
|
||||
expect(() => emitJsonl(ast, { acceptPreExistingSentinel: false })).toThrow(
|
||||
OcEmitSentinelError,
|
||||
);
|
||||
});
|
||||
|
||||
it('S-03 md round-trip echoes safely; strict mode rejects', () => {
|
||||
const raw = `## Body\n\n- ${REDACTED_SENTINEL}\n`;
|
||||
const ast = parseMd(raw).ast;
|
||||
expect(emitMd(ast)).toBe(raw);
|
||||
expect(() => emitMd(ast, { acceptPreExistingSentinel: false })).toThrow(
|
||||
OcEmitSentinelError,
|
||||
);
|
||||
});
|
||||
|
||||
it('S-04 jsonc render mode walks every leaf for sentinel', () => {
|
||||
const ast = parseJsonc('{ "x": "ok" }').ast;
|
||||
const tampered = {
|
||||
...ast,
|
||||
root: {
|
||||
kind: 'object' as const,
|
||||
entries: [
|
||||
{
|
||||
key: 'x',
|
||||
line: 1,
|
||||
value: { kind: 'string' as const, value: REDACTED_SENTINEL },
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
expect(() => emitJsonc(tampered, { mode: 'render' })).toThrow(
|
||||
OcEmitSentinelError,
|
||||
);
|
||||
});
|
||||
|
||||
it('S-05 jsonl render mode walks every value-line leaf', () => {
|
||||
const ast = parseJsonl('{"a":"ok"}\n').ast;
|
||||
const tampered = {
|
||||
...ast,
|
||||
lines: [
|
||||
{
|
||||
kind: 'value' as const,
|
||||
line: 1,
|
||||
raw: '{"a":"ok"}',
|
||||
value: {
|
||||
kind: 'object' as const,
|
||||
entries: [
|
||||
{
|
||||
key: 'a',
|
||||
line: 1,
|
||||
value: { kind: 'string' as const, value: REDACTED_SENTINEL },
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
expect(() => emitJsonl(tampered, { mode: 'render' })).toThrow(
|
||||
OcEmitSentinelError,
|
||||
);
|
||||
});
|
||||
|
||||
it('S-06 setJsoncOcPath itself throws when the new value contains the sentinel', () => {
|
||||
// The substrate guard fires at write-time: setJsoncOcPath rebuilds
|
||||
// raw via render mode emit, which scans every leaf. Defense-in-depth
|
||||
// — even if a caller forgets to call emit afterward, the sentinel
|
||||
// can't make it into an in-memory AST that pretends to be valid.
|
||||
const ast = parseJsonc('{ "x": "ok" }').ast;
|
||||
expect(() =>
|
||||
setJsoncOcPath(ast, parseOcPath('oc://config/x'), {
|
||||
kind: 'string',
|
||||
value: REDACTED_SENTINEL,
|
||||
}),
|
||||
).toThrow(OcEmitSentinelError);
|
||||
});
|
||||
|
||||
it('S-07 sentinel embedded in deep nesting — render mode catches the leaf', () => {
|
||||
// Round-trip echoes the pre-existing bytes (the workspace contract:
|
||||
// a parsed file containing the sentinel as data is not "writing" it
|
||||
// on emit). Render mode walks every leaf and rejects this caller-
|
||||
// injected pattern — and a `setOcPath` followed by emit lands here.
|
||||
const raw = JSON.stringify({ a: { b: { c: REDACTED_SENTINEL } } });
|
||||
const ast = parseJsonc(raw).ast;
|
||||
expect(emitJsonc(ast)).toBe(raw); // round-trip echo
|
||||
expect(() => emitJsonc(ast, { mode: 'render' })).toThrow(OcEmitSentinelError);
|
||||
});
|
||||
|
||||
it('S-08 sentinel inside an array element triggers guard in render mode', () => {
|
||||
const raw = JSON.stringify({ arr: ['ok', REDACTED_SENTINEL, 'ok'] });
|
||||
const ast = parseJsonc(raw).ast;
|
||||
expect(() => emitJsonc(ast, { mode: 'render' })).toThrow(OcEmitSentinelError);
|
||||
});
|
||||
|
||||
it('S-09 sentinel as object key in raw — strict mode catches it', () => {
|
||||
const raw = `{ "${REDACTED_SENTINEL}": 1 }`;
|
||||
const ast = parseJsonc(raw).ast;
|
||||
expect(emitJsonc(ast)).toBe(raw); // default-mode echo
|
||||
expect(() => emitJsonc(ast, { acceptPreExistingSentinel: false })).toThrow(
|
||||
OcEmitSentinelError,
|
||||
);
|
||||
});
|
||||
|
||||
it('S-10 sentinel in jsonl malformed line — strict mode catches it', () => {
|
||||
const raw = `${REDACTED_SENTINEL}\n`;
|
||||
const ast = parseJsonl(raw).ast;
|
||||
expect(emitJsonl(ast)).toBe(raw); // round-trip echoes verbatim
|
||||
expect(() => emitJsonl(ast, { acceptPreExistingSentinel: false })).toThrow(
|
||||
OcEmitSentinelError,
|
||||
);
|
||||
});
|
||||
|
||||
it('S-11 partial sentinel substring does NOT trigger guard', () => {
|
||||
const raw = '{ "x": "OPENCLAW_REDACTED" }';
|
||||
const ast = parseJsonc(raw).ast;
|
||||
expect(() => emitJsonc(ast)).not.toThrow();
|
||||
});
|
||||
|
||||
it('S-12 sentinel guard error message includes the OcPath context (render mode)', () => {
|
||||
// Render mode is the path that actually rejects caller-injected
|
||||
// sentinel — round-trip just echoes, so the error context surfaces
|
||||
// when render walks the offending leaf and constructs the path.
|
||||
const raw = `{ "secret": "${REDACTED_SENTINEL}" }`;
|
||||
const ast = parseJsonc(raw).ast;
|
||||
try {
|
||||
emitJsonc(ast, { mode: 'render', fileNameForGuard: 'config' });
|
||||
expect.fail('should have thrown');
|
||||
} catch (e) {
|
||||
expect(e).toBeInstanceOf(OcEmitSentinelError);
|
||||
expect(String(e)).toContain('oc://');
|
||||
expect(String(e)).toContain('config');
|
||||
}
|
||||
});
|
||||
});
|
||||
180
src/oc-path/tests/scenarios/sentinel-guard.test.ts
Normal file
180
src/oc-path/tests/scenarios/sentinel-guard.test.ts
Normal file
@@ -0,0 +1,180 @@
|
||||
/**
|
||||
* Wave 9 — sentinel guard at every emit leaf.
|
||||
*
|
||||
* Substrate guarantee: `__OPENCLAW_REDACTED__` literal anywhere in the
|
||||
* emitted bytes throws `OcEmitSentinelError`. Round-trip mode catches
|
||||
* sentinels in `raw`; render mode walks every leaf.
|
||||
*/
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { emitMd } from '../../emit.js';
|
||||
import { parseMd } from '../../parse.js';
|
||||
import {
|
||||
OcEmitSentinelError,
|
||||
REDACTED_SENTINEL,
|
||||
guardSentinel,
|
||||
} from '../../sentinel.js';
|
||||
|
||||
describe('wave-09 sentinel-guard', () => {
|
||||
it('S-01 sentinel constant matches the literal', () => {
|
||||
expect(REDACTED_SENTINEL).toBe('__OPENCLAW_REDACTED__');
|
||||
});
|
||||
|
||||
it('S-02 guardSentinel passes normal strings', () => {
|
||||
expect(() => guardSentinel('safe', 'oc://X.md')).not.toThrow();
|
||||
});
|
||||
|
||||
it('S-03 guardSentinel passes non-string types', () => {
|
||||
expect(() => guardSentinel(42, 'oc://X.md')).not.toThrow();
|
||||
expect(() => guardSentinel(null, 'oc://X.md')).not.toThrow();
|
||||
expect(() => guardSentinel(undefined, 'oc://X.md')).not.toThrow();
|
||||
expect(() => guardSentinel({}, 'oc://X.md')).not.toThrow();
|
||||
});
|
||||
|
||||
it('S-04 guardSentinel throws on exact match', () => {
|
||||
expect(() => guardSentinel(REDACTED_SENTINEL, 'oc://X.md')).toThrow(OcEmitSentinelError);
|
||||
});
|
||||
|
||||
it('S-05 guardSentinel throws on substring matches (sentinel embedded in larger string)', () => {
|
||||
// Substring scan — the sentinel anywhere in the value is a leak,
|
||||
// not just exact equality. A hostile caller smuggling
|
||||
// `prefix__OPENCLAW_REDACTED__suffix` would have bypassed the old
|
||||
// equality check; substring scan closes the gap.
|
||||
expect(() => guardSentinel(`prefix${REDACTED_SENTINEL}suffix`, 'oc://X.md')).toThrow(
|
||||
OcEmitSentinelError,
|
||||
);
|
||||
});
|
||||
|
||||
it('S-06 error attaches the OcPath context', () => {
|
||||
try {
|
||||
guardSentinel(REDACTED_SENTINEL, 'oc://config/plugins.entries.foo.token');
|
||||
expect.fail('should have thrown');
|
||||
} catch (err) {
|
||||
expect(err).toBeInstanceOf(OcEmitSentinelError);
|
||||
const e = err as OcEmitSentinelError;
|
||||
expect(e.path).toBe('oc://config/plugins.entries.foo.token');
|
||||
expect(e.code).toBe('OC_EMIT_SENTINEL');
|
||||
}
|
||||
});
|
||||
|
||||
it('S-07 round-trip echoes pre-existing sentinel; strict mode rejects', () => {
|
||||
const raw = '## Section\n\n- token: __OPENCLAW_REDACTED__\n';
|
||||
const { ast } = parseMd(raw);
|
||||
expect(emitMd(ast)).toBe(raw);
|
||||
expect(() => emitMd(ast, { acceptPreExistingSentinel: false })).toThrow(
|
||||
OcEmitSentinelError,
|
||||
);
|
||||
});
|
||||
|
||||
it('S-08 round-trip emit allows sentinel-free content', () => {
|
||||
const raw = '## Section\n\n- token: redacted-but-not-sentinel\n';
|
||||
const { ast } = parseMd(raw);
|
||||
expect(() => emitMd(ast)).not.toThrow();
|
||||
});
|
||||
|
||||
it('S-09 render mode catches sentinel in frontmatter', () => {
|
||||
const ast = {
|
||||
kind: "md" as const,
|
||||
raw: '',
|
||||
frontmatter: [{ key: 'token', value: REDACTED_SENTINEL, line: 2 }],
|
||||
preamble: '',
|
||||
blocks: [],
|
||||
};
|
||||
expect(() => emitMd(ast, { mode: 'render' })).toThrow(OcEmitSentinelError);
|
||||
});
|
||||
|
||||
it('S-10 render mode catches sentinel in preamble', () => {
|
||||
const ast = {
|
||||
kind: "md" as const,
|
||||
raw: '',
|
||||
frontmatter: [],
|
||||
preamble: REDACTED_SENTINEL,
|
||||
blocks: [],
|
||||
};
|
||||
expect(() => emitMd(ast, { mode: 'render' })).toThrow(OcEmitSentinelError);
|
||||
});
|
||||
|
||||
it('S-11 render mode catches sentinel in block bodyText', () => {
|
||||
const ast = {
|
||||
kind: "md" as const,
|
||||
raw: '',
|
||||
frontmatter: [],
|
||||
preamble: '',
|
||||
blocks: [
|
||||
{
|
||||
heading: 'Sec',
|
||||
slug: 'sec',
|
||||
line: 1,
|
||||
bodyText: REDACTED_SENTINEL,
|
||||
items: [],
|
||||
tables: [],
|
||||
codeBlocks: [],
|
||||
},
|
||||
],
|
||||
};
|
||||
expect(() => emitMd(ast, { mode: 'render' })).toThrow(OcEmitSentinelError);
|
||||
});
|
||||
|
||||
it('S-12 render mode catches sentinel in item kv.value', () => {
|
||||
const ast = {
|
||||
kind: "md" as const,
|
||||
raw: '',
|
||||
frontmatter: [],
|
||||
preamble: '',
|
||||
blocks: [
|
||||
{
|
||||
heading: 'S',
|
||||
slug: 's',
|
||||
line: 1,
|
||||
bodyText: '- t: x',
|
||||
items: [
|
||||
{
|
||||
text: 't: x',
|
||||
slug: 't',
|
||||
line: 2,
|
||||
kv: { key: 't', value: REDACTED_SENTINEL },
|
||||
},
|
||||
],
|
||||
tables: [],
|
||||
codeBlocks: [],
|
||||
},
|
||||
],
|
||||
};
|
||||
expect(() => emitMd(ast, { mode: 'render', fileNameForGuard: 'AGENTS.md' })).toThrow(
|
||||
OcEmitSentinelError,
|
||||
);
|
||||
});
|
||||
|
||||
it('S-13 sentinel-as-substring in raw — strict mode catches it', () => {
|
||||
const raw = `Some prose ${REDACTED_SENTINEL} more prose.\n`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(emitMd(ast)).toBe(raw);
|
||||
expect(() => emitMd(ast, { acceptPreExistingSentinel: false })).toThrow(
|
||||
OcEmitSentinelError,
|
||||
);
|
||||
});
|
||||
|
||||
it('S-14 multiple sentinel occurrences in raw — strict mode catches them', () => {
|
||||
const raw = `## A\n${REDACTED_SENTINEL}\n${REDACTED_SENTINEL}\n`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(emitMd(ast)).toBe(raw);
|
||||
expect(() => emitMd(ast, { acceptPreExistingSentinel: false })).toThrow(
|
||||
OcEmitSentinelError,
|
||||
);
|
||||
});
|
||||
|
||||
it('S-15 fileNameForGuard appears in the error path', () => {
|
||||
const ast = {
|
||||
kind: "md" as const,
|
||||
raw: '',
|
||||
frontmatter: [{ key: 'token', value: REDACTED_SENTINEL, line: 2 }],
|
||||
preamble: '',
|
||||
blocks: [],
|
||||
};
|
||||
try {
|
||||
emitMd(ast, { mode: 'render', fileNameForGuard: 'config' });
|
||||
expect.fail('should have thrown');
|
||||
} catch (err) {
|
||||
expect((err as OcEmitSentinelError).path).toContain('config');
|
||||
}
|
||||
});
|
||||
});
|
||||
154
src/oc-path/tests/scenarios/tables.test.ts
Normal file
154
src/oc-path/tests/scenarios/tables.test.ts
Normal file
@@ -0,0 +1,154 @@
|
||||
/**
|
||||
* Wave 5 — markdown tables.
|
||||
*
|
||||
* Substrate guarantee: GFM-style tables (`| h | h |\n|---|---|\n| r | r |`)
|
||||
* inside H2 blocks are extracted into `AstTable`. Tables inside fenced
|
||||
* code blocks are NOT extracted (handled at item-extraction layer too;
|
||||
* tables share the same code-block awareness when relevant).
|
||||
*/
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { parseMd } from '../../parse.js';
|
||||
|
||||
describe('wave-05 tables', () => {
|
||||
it('T-01 standard 2-column table', () => {
|
||||
const raw = `## H
|
||||
|
||||
| tool | guidance |
|
||||
| --- | --- |
|
||||
| gh | use for GitHub |
|
||||
| curl | HTTP client |
|
||||
`;
|
||||
const { ast } = parseMd(raw);
|
||||
const table = ast.blocks[0]?.tables[0];
|
||||
expect(table?.headers).toEqual(['tool', 'guidance']);
|
||||
expect(table?.rows).toEqual([
|
||||
['gh', 'use for GitHub'],
|
||||
['curl', 'HTTP client'],
|
||||
]);
|
||||
});
|
||||
|
||||
it('T-02 3+ column table', () => {
|
||||
const raw = `## H
|
||||
|
||||
| a | b | c |
|
||||
| - | - | - |
|
||||
| 1 | 2 | 3 |
|
||||
`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks[0]?.tables[0]?.headers).toEqual(['a', 'b', 'c']);
|
||||
expect(ast.blocks[0]?.tables[0]?.rows[0]).toEqual(['1', '2', '3']);
|
||||
});
|
||||
|
||||
it('T-03 table with alignment colons in separator', () => {
|
||||
const raw = `## H
|
||||
|
||||
| left | center | right |
|
||||
| :--- | :---: | ---: |
|
||||
| a | b | c |
|
||||
`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks[0]?.tables.length).toBe(1);
|
||||
});
|
||||
|
||||
it('T-04 table with empty cells', () => {
|
||||
const raw = `## H
|
||||
|
||||
| a | b |
|
||||
| - | - |
|
||||
| 1 | |
|
||||
| | 2 |
|
||||
`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks[0]?.tables[0]?.rows).toEqual([
|
||||
['1', ''],
|
||||
['', '2'],
|
||||
]);
|
||||
});
|
||||
|
||||
it('T-05 table with no rows (header + sep only)', () => {
|
||||
const raw = `## H
|
||||
|
||||
| a | b |
|
||||
| - | - |
|
||||
`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks[0]?.tables[0]?.headers).toEqual(['a', 'b']);
|
||||
expect(ast.blocks[0]?.tables[0]?.rows).toEqual([]);
|
||||
});
|
||||
|
||||
it('T-06 multiple tables in same section', () => {
|
||||
const raw = `## H
|
||||
|
||||
| a | b |
|
||||
| - | - |
|
||||
| 1 | 2 |
|
||||
|
||||
Some text.
|
||||
|
||||
| x | y |
|
||||
| - | - |
|
||||
| 3 | 4 |
|
||||
`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks[0]?.tables.length).toBe(2);
|
||||
});
|
||||
|
||||
it('T-07 table line numbers track to the header line', () => {
|
||||
const raw = `## Section
|
||||
preamble line
|
||||
| a | b |
|
||||
| - | - |
|
||||
`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks[0]?.tables[0]?.line).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('T-08 invalid separator (no pipes) — no table extracted', () => {
|
||||
const raw = `## H
|
||||
|
||||
| a | b |
|
||||
not a separator
|
||||
| 1 | 2 |
|
||||
`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks[0]?.tables).toEqual([]);
|
||||
});
|
||||
|
||||
it('T-09 single-column table (just `| col |\\n|---|`)', () => {
|
||||
const raw = `## H
|
||||
|
||||
| col |
|
||||
| --- |
|
||||
| value1 |
|
||||
| value2 |
|
||||
`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks[0]?.tables[0]?.headers).toEqual(['col']);
|
||||
expect(ast.blocks[0]?.tables[0]?.rows).toEqual([['value1'], ['value2']]);
|
||||
});
|
||||
|
||||
it('T-10 table at end of file with trailing newlines', () => {
|
||||
const raw = `## H
|
||||
|
||||
| a |
|
||||
| - |
|
||||
| 1 |
|
||||
|
||||
|
||||
`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks[0]?.tables[0]?.rows).toEqual([['1']]);
|
||||
});
|
||||
|
||||
it('T-11 table content with internal whitespace trimmed', () => {
|
||||
const raw = `## H
|
||||
|
||||
| col1 | col2 |
|
||||
| --- | --- |
|
||||
| a | b |
|
||||
`;
|
||||
const { ast } = parseMd(raw);
|
||||
expect(ast.blocks[0]?.tables[0]?.headers).toEqual(['col1', 'col2']);
|
||||
expect(ast.blocks[0]?.tables[0]?.rows[0]).toEqual(['a', 'b']);
|
||||
});
|
||||
});
|
||||
36
src/oc-path/tests/sentinel.test.ts
Normal file
36
src/oc-path/tests/sentinel.test.ts
Normal file
@@ -0,0 +1,36 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
OcEmitSentinelError,
|
||||
REDACTED_SENTINEL,
|
||||
guardSentinel,
|
||||
} from '../sentinel.js';
|
||||
|
||||
describe('guardSentinel', () => {
|
||||
it('passes through normal strings', () => {
|
||||
expect(() => guardSentinel('normal value', 'oc://SOUL.md')).not.toThrow();
|
||||
});
|
||||
|
||||
it('passes through non-string values', () => {
|
||||
expect(() => guardSentinel(42, 'oc://SOUL.md')).not.toThrow();
|
||||
expect(() => guardSentinel(null, 'oc://SOUL.md')).not.toThrow();
|
||||
expect(() => guardSentinel(undefined, 'oc://SOUL.md')).not.toThrow();
|
||||
});
|
||||
|
||||
it('throws on the sentinel literal', () => {
|
||||
expect(() => guardSentinel(REDACTED_SENTINEL, 'oc://SOUL.md/[fm]/token')).toThrow(
|
||||
OcEmitSentinelError,
|
||||
);
|
||||
});
|
||||
|
||||
it('attaches the OcPath in the error', () => {
|
||||
try {
|
||||
guardSentinel(REDACTED_SENTINEL, 'oc://config/plugins.entries.foo.token');
|
||||
expect.fail('should have thrown');
|
||||
} catch (err) {
|
||||
expect(err).toBeInstanceOf(OcEmitSentinelError);
|
||||
const e = err as OcEmitSentinelError;
|
||||
expect(e.path).toBe('oc://config/plugins.entries.foo.token');
|
||||
expect(e.code).toBe('OC_EMIT_SENTINEL');
|
||||
}
|
||||
});
|
||||
});
|
||||
50
src/oc-path/tests/slug.test.ts
Normal file
50
src/oc-path/tests/slug.test.ts
Normal file
@@ -0,0 +1,50 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { slugify } from '../slug.js';
|
||||
|
||||
describe('slugify', () => {
|
||||
it('lowercases', () => {
|
||||
expect(slugify('Boundaries')).toBe('boundaries');
|
||||
});
|
||||
|
||||
it('replaces underscores with hyphens', () => {
|
||||
expect(slugify('API_KEY')).toBe('api-key');
|
||||
});
|
||||
|
||||
it('collapses multi-word headings', () => {
|
||||
expect(slugify('Tool Guidance')).toBe('tool-guidance');
|
||||
});
|
||||
|
||||
it('preserves existing kebab-case', () => {
|
||||
expect(slugify('deny-rule-1')).toBe('deny-rule-1');
|
||||
});
|
||||
|
||||
it('trims surrounding whitespace + non-slug chars', () => {
|
||||
expect(slugify(' Restricted Data ')).toBe('restricted-data');
|
||||
});
|
||||
|
||||
it('handles colon + space patterns', () => {
|
||||
expect(slugify('deny: secrets')).toBe('deny-secrets');
|
||||
});
|
||||
|
||||
it('collapses repeated hyphens', () => {
|
||||
expect(slugify('foo----bar')).toBe('foo-bar');
|
||||
});
|
||||
|
||||
it('returns empty for non-slug-valid input', () => {
|
||||
expect(slugify('!!')).toBe('');
|
||||
expect(slugify(' ')).toBe('');
|
||||
});
|
||||
|
||||
it('is idempotent', () => {
|
||||
const inputs = ['Tool Guidance', 'API_KEY', 'deny-rule-1', 'Multi-tenant isolation'];
|
||||
for (const input of inputs) {
|
||||
expect(slugify(slugify(input))).toBe(slugify(input));
|
||||
}
|
||||
});
|
||||
|
||||
it('handles unicode by stripping (current ASCII-only policy)', () => {
|
||||
// Caveat: unicode in headings becomes empty/lossy. Document as a
|
||||
// known limit; lint rules can flag non-ASCII headings if needed.
|
||||
expect(slugify('Café')).toBe('caf');
|
||||
});
|
||||
});
|
||||
475
src/oc-path/tests/universal.test.ts
Normal file
475
src/oc-path/tests/universal.test.ts
Normal file
@@ -0,0 +1,475 @@
|
||||
/**
|
||||
* Universal verbs — `setOcPath` + `resolveOcPath` test surface.
|
||||
*
|
||||
* Every test exercises the universal entry point. The substrate
|
||||
* dispatches via `ast.kind` and coerces value strings based on AST
|
||||
* shape at the path location.
|
||||
*/
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { emitJsonc } from '../jsonc/emit.js';
|
||||
import { parseJsonc } from '../jsonc/parse.js';
|
||||
import { emitJsonl } from '../jsonl/emit.js';
|
||||
import { parseJsonl } from '../jsonl/parse.js';
|
||||
import { emitMd } from '../emit.js';
|
||||
import { parseMd } from '../parse.js';
|
||||
import { parseOcPath } from '../oc-path.js';
|
||||
import {
|
||||
detectInsertion,
|
||||
resolveOcPath,
|
||||
setOcPath,
|
||||
} from '../universal.js';
|
||||
|
||||
// ---------- detectInsertion ------------------------------------------------
|
||||
|
||||
describe('detectInsertion', () => {
|
||||
it('returns null for plain paths', () => {
|
||||
expect(detectInsertion(parseOcPath('oc://X.md/section/item/field'))).toBeNull();
|
||||
});
|
||||
|
||||
it('detects bare `+` end-insertion at section', () => {
|
||||
const info = detectInsertion(parseOcPath('oc://X.md/tools/+'));
|
||||
expect(info?.marker).toBe('+');
|
||||
expect(info?.parentPath.section).toBe('tools');
|
||||
expect(info?.parentPath.item).toBeUndefined();
|
||||
});
|
||||
|
||||
it('detects `+key` keyed insertion', () => {
|
||||
const info = detectInsertion(parseOcPath('oc://config/plugins/+gitlab'));
|
||||
expect(info?.marker).toEqual({ kind: 'keyed', key: 'gitlab' });
|
||||
});
|
||||
|
||||
it('detects `+nnn` indexed insertion', () => {
|
||||
const info = detectInsertion(parseOcPath('oc://config/items/+2'));
|
||||
expect(info?.marker).toEqual({ kind: 'indexed', index: 2 });
|
||||
});
|
||||
|
||||
it('detects file-root insertion', () => {
|
||||
const info = detectInsertion(parseOcPath('oc://session.jsonl/+'));
|
||||
expect(info?.marker).toBe('+');
|
||||
expect(info?.parentPath.section).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- resolveOcPath — universal across kinds -------------------------
|
||||
|
||||
describe('resolveOcPath — md AST', () => {
|
||||
const md = parseMd(
|
||||
'---\nname: github\n---\n\n## Boundaries\n\n- enabled: true\n',
|
||||
).ast;
|
||||
|
||||
it('returns leaf with valueText for frontmatter entry', () => {
|
||||
const m = resolveOcPath(md, parseOcPath('oc://X.md/[frontmatter]/name'));
|
||||
expect(m).toMatchObject({ kind: 'leaf', valueText: 'github', leafType: 'string' });
|
||||
});
|
||||
|
||||
it('returns leaf for item-field', () => {
|
||||
const m = resolveOcPath(
|
||||
md,
|
||||
parseOcPath('oc://X.md/boundaries/enabled/enabled'),
|
||||
);
|
||||
expect(m).toMatchObject({ kind: 'leaf', valueText: 'true', leafType: 'string' });
|
||||
});
|
||||
|
||||
it('returns node for block', () => {
|
||||
const m = resolveOcPath(md, parseOcPath('oc://X.md/boundaries'));
|
||||
expect(m).toMatchObject({ kind: 'node', descriptor: 'md-block' });
|
||||
});
|
||||
|
||||
it('returns root for file-only path', () => {
|
||||
const m = resolveOcPath(md, parseOcPath('oc://X.md'));
|
||||
expect(m?.kind).toBe('root');
|
||||
});
|
||||
|
||||
it('returns null for unresolved', () => {
|
||||
expect(resolveOcPath(md, parseOcPath('oc://X.md/missing'))).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('resolveOcPath — jsonc AST', () => {
|
||||
const ast = parseJsonc('{ "k": 42, "s": "x", "b": true, "n": null, "arr": [1,2,3] }').ast;
|
||||
|
||||
it('returns leaf:number for numeric value', () => {
|
||||
const m = resolveOcPath(ast, parseOcPath('oc://config/k'));
|
||||
expect(m).toMatchObject({ kind: 'leaf', valueText: '42', leafType: 'number' });
|
||||
});
|
||||
|
||||
it('returns leaf:string for string value', () => {
|
||||
const m = resolveOcPath(ast, parseOcPath('oc://config/s'));
|
||||
expect(m).toMatchObject({ kind: 'leaf', valueText: 'x', leafType: 'string' });
|
||||
});
|
||||
|
||||
it('returns leaf:boolean for bool value', () => {
|
||||
const m = resolveOcPath(ast, parseOcPath('oc://config/b'));
|
||||
expect(m).toMatchObject({ kind: 'leaf', valueText: 'true', leafType: 'boolean' });
|
||||
});
|
||||
|
||||
it('returns leaf:null for null value', () => {
|
||||
const m = resolveOcPath(ast, parseOcPath('oc://config/n'));
|
||||
expect(m).toMatchObject({ kind: 'leaf', valueText: 'null', leafType: 'null' });
|
||||
});
|
||||
|
||||
it('returns node:jsonc-array for array value', () => {
|
||||
const m = resolveOcPath(ast, parseOcPath('oc://config/arr'));
|
||||
expect(m).toMatchObject({ kind: 'node', descriptor: 'jsonc-array' });
|
||||
});
|
||||
|
||||
it('returns leaf at array index', () => {
|
||||
const m = resolveOcPath(ast, parseOcPath('oc://config/arr.1'));
|
||||
expect(m).toMatchObject({ kind: 'leaf', valueText: '2', leafType: 'number' });
|
||||
});
|
||||
});
|
||||
|
||||
describe('resolveOcPath — jsonl AST', () => {
|
||||
const ast = parseJsonl('{"event":"start","n":1}\n{"event":"step","n":2}\n').ast;
|
||||
|
||||
it('returns node:jsonl-line for line address', () => {
|
||||
const m = resolveOcPath(ast, parseOcPath('oc://log/L1'));
|
||||
expect(m).toMatchObject({ kind: 'node', descriptor: 'jsonl-line' });
|
||||
});
|
||||
|
||||
it('returns leaf for field on line', () => {
|
||||
const m = resolveOcPath(ast, parseOcPath('oc://log/L2/event'));
|
||||
expect(m).toMatchObject({ kind: 'leaf', valueText: 'step', leafType: 'string' });
|
||||
});
|
||||
|
||||
it('returns leaf:number for $last/n', () => {
|
||||
const m = resolveOcPath(ast, parseOcPath('oc://log/$last/n'));
|
||||
expect(m).toMatchObject({ kind: 'leaf', valueText: '2', leafType: 'number' });
|
||||
});
|
||||
});
|
||||
|
||||
describe('resolveOcPath — insertion-point detection', () => {
|
||||
it('returns insertion-point for md section append', () => {
|
||||
const md = parseMd('## Tools\n').ast;
|
||||
const m = resolveOcPath(md, parseOcPath('oc://X.md/tools/+'));
|
||||
expect(m).toMatchObject({ kind: 'insertion-point', container: 'md-section' });
|
||||
});
|
||||
|
||||
it('returns insertion-point for md file-level', () => {
|
||||
const md = parseMd('## Tools\n').ast;
|
||||
const m = resolveOcPath(md, parseOcPath('oc://X.md/+'));
|
||||
expect(m).toMatchObject({ kind: 'insertion-point', container: 'md-file' });
|
||||
});
|
||||
|
||||
it('returns insertion-point for md frontmatter +key', () => {
|
||||
const md = parseMd('---\nname: x\n---\n').ast;
|
||||
const m = resolveOcPath(
|
||||
md,
|
||||
parseOcPath('oc://X.md/[frontmatter]/+description'),
|
||||
);
|
||||
expect(m).toMatchObject({ kind: 'insertion-point', container: 'md-frontmatter' });
|
||||
});
|
||||
|
||||
it('returns insertion-point for jsonc array +', () => {
|
||||
const ast = parseJsonc('{ "items": [1,2,3] }').ast;
|
||||
const m = resolveOcPath(ast, parseOcPath('oc://config/items/+'));
|
||||
expect(m).toMatchObject({ kind: 'insertion-point', container: 'jsonc-array' });
|
||||
});
|
||||
|
||||
it('returns insertion-point for jsonc object +key', () => {
|
||||
const ast = parseJsonc('{ "plugins": {} }').ast;
|
||||
const m = resolveOcPath(ast, parseOcPath('oc://config/plugins/+gitlab'));
|
||||
expect(m).toMatchObject({ kind: 'insertion-point', container: 'jsonc-object' });
|
||||
});
|
||||
|
||||
it('returns insertion-point for jsonl file-root +', () => {
|
||||
const ast = parseJsonl('').ast;
|
||||
const m = resolveOcPath(ast, parseOcPath('oc://log/+'));
|
||||
expect(m).toMatchObject({ kind: 'insertion-point', container: 'jsonl-file' });
|
||||
});
|
||||
|
||||
it('returns null when insertion target is not a container', () => {
|
||||
const ast = parseJsonc('{ "k": 42 }').ast;
|
||||
const m = resolveOcPath(ast, parseOcPath('oc://config/k/+'));
|
||||
expect(m).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- setOcPath — leaf assignment ------------------------------------
|
||||
|
||||
describe('setOcPath — md leaf', () => {
|
||||
it('replaces frontmatter value', () => {
|
||||
const md = parseMd('---\nname: old\n---\n').ast;
|
||||
const r = setOcPath(md, parseOcPath('oc://X.md/[frontmatter]/name'), 'new');
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {expect(r.ast.kind === 'md' && r.ast.frontmatter[0]?.value).toBe('new');}
|
||||
});
|
||||
|
||||
it('replaces item kv value', () => {
|
||||
const md = parseMd('## Boundaries\n\n- timeout: 5\n').ast;
|
||||
const r = setOcPath(md, parseOcPath('oc://X.md/boundaries/timeout/timeout'), '60');
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
const out = emitMd(r.ast as Parameters<typeof emitMd>[0]);
|
||||
expect(out).toContain('- timeout: 60');
|
||||
}
|
||||
});
|
||||
|
||||
it('returns unresolved for missing path', () => {
|
||||
const md = parseMd('').ast;
|
||||
const r = setOcPath(md, parseOcPath('oc://X.md/missing/x/x'), 'v');
|
||||
expect(r.ok).toBe(false);
|
||||
if (!r.ok) {expect(r.reason).toBe('unresolved');}
|
||||
});
|
||||
});
|
||||
|
||||
describe('setOcPath — jsonc leaf with coercion', () => {
|
||||
it('replaces string leaf with string value', () => {
|
||||
const ast = parseJsonc('{ "k": "old" }').ast;
|
||||
const r = setOcPath(ast, parseOcPath('oc://config/k'), 'new');
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
const ast2 = r.ast as Parameters<typeof emitJsonc>[0];
|
||||
expect(JSON.parse(emitJsonc(ast2))).toEqual({ k: 'new' });
|
||||
}
|
||||
});
|
||||
|
||||
it('coerces value to number when leaf was number', () => {
|
||||
const ast = parseJsonc('{ "k": 1 }').ast;
|
||||
const r = setOcPath(ast, parseOcPath('oc://config/k'), '42');
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
const ast2 = r.ast as Parameters<typeof emitJsonc>[0];
|
||||
expect(JSON.parse(emitJsonc(ast2))).toEqual({ k: 42 });
|
||||
}
|
||||
});
|
||||
|
||||
it('coerces "true"/"false" when leaf was boolean', () => {
|
||||
const ast = parseJsonc('{ "k": true }').ast;
|
||||
const r = setOcPath(ast, parseOcPath('oc://config/k'), 'false');
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
const ast2 = r.ast as Parameters<typeof emitJsonc>[0];
|
||||
expect(JSON.parse(emitJsonc(ast2))).toEqual({ k: false });
|
||||
}
|
||||
});
|
||||
|
||||
it('rejects non-numeric string for number leaf', () => {
|
||||
const ast = parseJsonc('{ "k": 1 }').ast;
|
||||
const r = setOcPath(ast, parseOcPath('oc://config/k'), 'not-a-number');
|
||||
expect(r.ok).toBe(false);
|
||||
if (!r.ok) {expect(r.reason).toBe('parse-error');}
|
||||
});
|
||||
|
||||
it('rejects non-bool string for boolean leaf', () => {
|
||||
const ast = parseJsonc('{ "k": true }').ast;
|
||||
const r = setOcPath(ast, parseOcPath('oc://config/k'), 'maybe');
|
||||
expect(r.ok).toBe(false);
|
||||
if (!r.ok) {expect(r.reason).toBe('parse-error');}
|
||||
});
|
||||
});
|
||||
|
||||
describe('setOcPath — jsonl leaf', () => {
|
||||
it('replaces field on a value line with coercion', () => {
|
||||
const ast = parseJsonl('{"event":"start","n":1}\n').ast;
|
||||
const r = setOcPath(ast, parseOcPath('oc://log/L1/n'), '42');
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
const out = emitJsonl(r.ast as Parameters<typeof emitJsonl>[0]);
|
||||
expect(JSON.parse(out.split('\n')[0])).toEqual({ event: 'start', n: 42 });
|
||||
}
|
||||
});
|
||||
|
||||
it('replaces whole line via JSON value', () => {
|
||||
const ast = parseJsonl('{"event":"start"}\n').ast;
|
||||
const r = setOcPath(ast, parseOcPath('oc://log/L1'), '{"event":"replaced"}');
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
const out = emitJsonl(r.ast as Parameters<typeof emitJsonl>[0]);
|
||||
expect(JSON.parse(out.split('\n')[0])).toEqual({ event: 'replaced' });
|
||||
}
|
||||
});
|
||||
|
||||
it('rejects malformed JSON for whole-line replacement', () => {
|
||||
const ast = parseJsonl('{"event":"start"}\n').ast;
|
||||
const r = setOcPath(ast, parseOcPath('oc://log/L1'), 'not json');
|
||||
expect(r.ok).toBe(false);
|
||||
if (!r.ok) {expect(r.reason).toBe('parse-error');}
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- setOcPath — insertion ------------------------------------------
|
||||
|
||||
describe('setOcPath — md insertion', () => {
|
||||
it('appends item to section with `+`', () => {
|
||||
const md = parseMd('## Tools\n\n- gh: GitHub CLI\n').ast;
|
||||
const r = setOcPath(md, parseOcPath('oc://X.md/tools/+'), 'docker: container CLI');
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
const out = emitMd(r.ast as Parameters<typeof emitMd>[0]);
|
||||
expect(out).toContain('- gh: GitHub CLI');
|
||||
expect(out).toContain('- docker: container CLI');
|
||||
}
|
||||
});
|
||||
|
||||
it('appends new section at file root with `+`', () => {
|
||||
const md = parseMd('## Existing\n').ast;
|
||||
const r = setOcPath(md, parseOcPath('oc://X.md/+'), 'New Section');
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
const out = emitMd(r.ast as Parameters<typeof emitMd>[0]);
|
||||
expect(out).toContain('## Existing');
|
||||
expect(out).toContain('## New Section');
|
||||
}
|
||||
});
|
||||
|
||||
it('adds new frontmatter key with +key', () => {
|
||||
const md = parseMd('---\nname: x\n---\n').ast;
|
||||
const r = setOcPath(
|
||||
md,
|
||||
parseOcPath('oc://X.md/[frontmatter]/+description'),
|
||||
'a new description',
|
||||
);
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
const out = emitMd(r.ast as Parameters<typeof emitMd>[0]);
|
||||
expect(out).toContain('description: a new description');
|
||||
}
|
||||
});
|
||||
|
||||
it('rejects duplicate frontmatter key on insertion', () => {
|
||||
const md = parseMd('---\nname: x\n---\n').ast;
|
||||
const r = setOcPath(md, parseOcPath('oc://X.md/[frontmatter]/+name'), 'y');
|
||||
expect(r.ok).toBe(false);
|
||||
if (!r.ok) {expect(r.reason).toBe('type-mismatch');}
|
||||
});
|
||||
});
|
||||
|
||||
describe('setOcPath — jsonc insertion', () => {
|
||||
it('appends to array with `+`', () => {
|
||||
const ast = parseJsonc('{ "items": [1, 2] }').ast;
|
||||
const r = setOcPath(ast, parseOcPath('oc://config/items/+'), '3');
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
const ast2 = r.ast as Parameters<typeof emitJsonc>[0];
|
||||
expect(JSON.parse(emitJsonc(ast2))).toEqual({ items: [1, 2, 3] });
|
||||
}
|
||||
});
|
||||
|
||||
it('inserts at index with `+nnn`', () => {
|
||||
const ast = parseJsonc('{ "items": [1, 3] }').ast;
|
||||
const r = setOcPath(ast, parseOcPath('oc://config/items/+1'), '2');
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
const ast2 = r.ast as Parameters<typeof emitJsonc>[0];
|
||||
expect(JSON.parse(emitJsonc(ast2))).toEqual({ items: [1, 2, 3] });
|
||||
}
|
||||
});
|
||||
|
||||
it('adds object key with `+key`', () => {
|
||||
const ast = parseJsonc('{ "plugins": { "github": "tok" } }').ast;
|
||||
const r = setOcPath(
|
||||
ast,
|
||||
parseOcPath('oc://config/plugins/+gitlab'),
|
||||
'"new-tok"',
|
||||
);
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
const ast2 = r.ast as Parameters<typeof emitJsonc>[0];
|
||||
expect(JSON.parse(emitJsonc(ast2))).toEqual({
|
||||
plugins: { github: 'tok', gitlab: 'new-tok' },
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
it('rejects duplicate object key', () => {
|
||||
const ast = parseJsonc('{ "plugins": { "github": "x" } }').ast;
|
||||
const r = setOcPath(ast, parseOcPath('oc://config/plugins/+github'), '"y"');
|
||||
expect(r.ok).toBe(false);
|
||||
if (!r.ok) {expect(r.reason).toBe('unresolved');}
|
||||
});
|
||||
|
||||
it('rejects +key on array', () => {
|
||||
const ast = parseJsonc('{ "items": [1, 2] }').ast;
|
||||
const r = setOcPath(ast, parseOcPath('oc://config/items/+abc'), '3');
|
||||
expect(r.ok).toBe(false);
|
||||
if (!r.ok) {expect(r.reason).toBe('type-mismatch');}
|
||||
});
|
||||
|
||||
it('inserts complex object via JSON value', () => {
|
||||
const ast = parseJsonc('{ "plugins": {} }').ast;
|
||||
const r = setOcPath(
|
||||
ast,
|
||||
parseOcPath('oc://config/plugins/+gitlab'),
|
||||
'{"token":"xyz","enabled":true}',
|
||||
);
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
const ast2 = r.ast as Parameters<typeof emitJsonc>[0];
|
||||
expect(JSON.parse(emitJsonc(ast2))).toEqual({
|
||||
plugins: { gitlab: { token: 'xyz', enabled: true } },
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('setOcPath — jsonl insertion (session append)', () => {
|
||||
it('appends a JSON line with `+`', () => {
|
||||
const ast = parseJsonl('{"event":"start"}\n').ast;
|
||||
const r = setOcPath(
|
||||
ast,
|
||||
parseOcPath('oc://log/+'),
|
||||
'{"event":"step","n":1}',
|
||||
);
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
const out = emitJsonl(r.ast as Parameters<typeof emitJsonl>[0]);
|
||||
const lines = out.split('\n').filter((l) => l.length > 0);
|
||||
expect(lines).toHaveLength(2);
|
||||
expect(JSON.parse(lines[1])).toEqual({ event: 'step', n: 1 });
|
||||
}
|
||||
});
|
||||
|
||||
it('rejects malformed JSON value', () => {
|
||||
const ast = parseJsonl('').ast;
|
||||
const r = setOcPath(ast, parseOcPath('oc://log/+'), 'not json');
|
||||
expect(r.ok).toBe(false);
|
||||
if (!r.ok) {expect(r.reason).toBe('parse-error');}
|
||||
});
|
||||
|
||||
it('rejects non-root insertion target', () => {
|
||||
const ast = parseJsonl('{"a":1}\n').ast;
|
||||
const r = setOcPath(ast, parseOcPath('oc://log/L1/+'), '{}');
|
||||
expect(r.ok).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- Cross-cutting properties ---------------------------------------
|
||||
|
||||
describe('setOcPath — cross-cutting properties', () => {
|
||||
it('is non-mutating across all kinds', () => {
|
||||
const md = parseMd('---\nname: x\n---\n').ast;
|
||||
const before = JSON.stringify(md);
|
||||
setOcPath(md, parseOcPath('oc://X.md/[frontmatter]/name'), 'new');
|
||||
expect(JSON.stringify(md)).toBe(before);
|
||||
|
||||
const jsonc = parseJsonc('{ "k": 1 }').ast;
|
||||
const before2 = JSON.stringify(jsonc);
|
||||
setOcPath(jsonc, parseOcPath('oc://config/k'), '99');
|
||||
expect(JSON.stringify(jsonc)).toBe(before2);
|
||||
|
||||
const jsonl = parseJsonl('{"a":1}\n').ast;
|
||||
const before3 = JSON.stringify(jsonl);
|
||||
setOcPath(jsonl, parseOcPath('oc://log/L1/a'), '99');
|
||||
expect(JSON.stringify(jsonl)).toBe(before3);
|
||||
});
|
||||
|
||||
it('returns ok-tagged result with new ast on success', () => {
|
||||
const md = parseMd('---\nname: x\n---\n').ast;
|
||||
const r = setOcPath(md, parseOcPath('oc://X.md/[frontmatter]/name'), 'y');
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {
|
||||
expect(r.ast.kind).toBe('md');
|
||||
}
|
||||
});
|
||||
|
||||
it('returns failure-tagged result with reason on unresolved', () => {
|
||||
const ast = parseJsonc('{}').ast;
|
||||
const r = setOcPath(ast, parseOcPath('oc://config/missing'), 'v');
|
||||
expect(r.ok).toBe(false);
|
||||
if (!r.ok) {
|
||||
expect(r.reason).toBeDefined();
|
||||
expect(typeof r.reason).toBe('string');
|
||||
}
|
||||
});
|
||||
});
|
||||
248
src/oc-path/tests/yaml/yaml-kind.test.ts
Normal file
248
src/oc-path/tests/yaml/yaml-kind.test.ts
Normal file
@@ -0,0 +1,248 @@
|
||||
/**
|
||||
* YAML kind — parse / emit / resolve / set + universal verb dispatch.
|
||||
*
|
||||
* Real-world fixture: lobster `.lobster` workflow file shape.
|
||||
*/
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { emitYaml } from '../../yaml/emit.js';
|
||||
import { parseYaml } from '../../yaml/parse.js';
|
||||
import { resolveYamlOcPath } from '../../yaml/resolve.js';
|
||||
import { setYamlOcPath } from '../../yaml/edit.js';
|
||||
import { parseOcPath } from '../../oc-path.js';
|
||||
import {
|
||||
resolveOcPath,
|
||||
setOcPath,
|
||||
} from '../../universal.js';
|
||||
import { inferKind } from '../../dispatch.js';
|
||||
|
||||
const LOBSTER = `name: inbox-triage
|
||||
description: A simple example workflow
|
||||
|
||||
steps:
|
||||
- id: fetch
|
||||
command: gog.gmail.search --query 'newer_than:1d' --max 20
|
||||
|
||||
- id: classify
|
||||
command: openclaw.invoke --tool llm-task --action json
|
||||
stdin: $fetch.stdout
|
||||
`;
|
||||
|
||||
describe('parseYaml — round-trip', () => {
|
||||
it('preserves bytes verbatim on round-trip', () => {
|
||||
const { ast } = parseYaml(LOBSTER);
|
||||
expect(emitYaml(ast)).toBe(LOBSTER);
|
||||
});
|
||||
|
||||
it('exposes kind: yaml discriminator', () => {
|
||||
const { ast } = parseYaml(LOBSTER);
|
||||
expect(ast.kind).toBe('yaml');
|
||||
});
|
||||
|
||||
it('handles empty file', () => {
|
||||
const { ast } = parseYaml('');
|
||||
expect(ast.kind).toBe('yaml');
|
||||
expect(emitYaml(ast)).toBe('');
|
||||
});
|
||||
|
||||
it('reports errors as diagnostics, not throws', () => {
|
||||
const { diagnostics } = parseYaml('key: value\n bad indent: oops\n');
|
||||
expect(diagnostics.length).toBeGreaterThanOrEqual(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('resolveYamlOcPath — direct', () => {
|
||||
it('resolves top-level scalar', () => {
|
||||
const { ast } = parseYaml(LOBSTER);
|
||||
const m = resolveYamlOcPath(ast, parseOcPath('oc://workflow.lobster/name'));
|
||||
expect(m?.kind).toBe('pair');
|
||||
if (m?.kind === 'pair') {expect(m.value).toBe('inbox-triage');}
|
||||
});
|
||||
|
||||
it('resolves into a sequence by index', () => {
|
||||
const { ast } = parseYaml(LOBSTER);
|
||||
const m = resolveYamlOcPath(ast, parseOcPath('oc://workflow.lobster/steps.0.id'));
|
||||
expect(m?.kind).toBe('pair');
|
||||
if (m?.kind === 'pair') {expect(m.value).toBe('fetch');}
|
||||
});
|
||||
|
||||
it('returns root when no segments', () => {
|
||||
const { ast } = parseYaml(LOBSTER);
|
||||
const m = resolveYamlOcPath(ast, parseOcPath('oc://workflow.lobster'));
|
||||
expect(m?.kind).toBe('root');
|
||||
});
|
||||
|
||||
it('returns null for unresolved paths', () => {
|
||||
const { ast } = parseYaml(LOBSTER);
|
||||
expect(
|
||||
resolveYamlOcPath(ast, parseOcPath('oc://workflow.lobster/missing')),
|
||||
).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('setYamlOcPath — direct', () => {
|
||||
it('replaces a scalar value', () => {
|
||||
const { ast } = parseYaml(LOBSTER);
|
||||
const r = setYamlOcPath(ast, parseOcPath('oc://workflow.lobster/name'), 'new-name');
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {expect(r.ast.raw).toContain('name: new-name');}
|
||||
});
|
||||
|
||||
it('replaces a nested scalar', () => {
|
||||
const { ast } = parseYaml(LOBSTER);
|
||||
const r = setYamlOcPath(
|
||||
ast,
|
||||
parseOcPath('oc://workflow.lobster/steps.0.id'),
|
||||
'fetch-renamed',
|
||||
);
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {expect(r.ast.raw).toContain('id: fetch-renamed');}
|
||||
});
|
||||
|
||||
it('returns unresolved for missing path', () => {
|
||||
const { ast } = parseYaml(LOBSTER);
|
||||
const r = setYamlOcPath(ast, parseOcPath('oc://workflow.lobster/missing'), 'x');
|
||||
expect(r.ok).toBe(false);
|
||||
if (!r.ok) {expect(r.reason).toBe('unresolved');}
|
||||
});
|
||||
});
|
||||
|
||||
describe('setYamlOcPath — positional tokens (round-11 resolve↔edit symmetry)', () => {
|
||||
// ClawSweeper round-11 P2 — yaml edit forwarded segments straight
|
||||
// to `setIn`, which would treat `$first` / `$last` / `-N` as
|
||||
// literal map keys and silently miss the target. Pin the new
|
||||
// behavior: positional tokens resolve against the live document
|
||||
// BEFORE the yaml lib walks the path.
|
||||
it('edits the first seq element via $first', () => {
|
||||
const { ast } = parseYaml(LOBSTER);
|
||||
const r = setYamlOcPath(
|
||||
ast,
|
||||
parseOcPath('oc://workflow.lobster/steps/$first/id'),
|
||||
'fetch-renamed',
|
||||
);
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {expect(r.ast.raw).toContain('id: fetch-renamed');}
|
||||
});
|
||||
|
||||
it('edits the last seq element via $last', () => {
|
||||
const { ast } = parseYaml(LOBSTER);
|
||||
const r = setYamlOcPath(
|
||||
ast,
|
||||
parseOcPath('oc://workflow.lobster/steps/$last/id'),
|
||||
'classify-renamed',
|
||||
);
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {expect(r.ast.raw).toContain('id: classify-renamed');}
|
||||
});
|
||||
|
||||
it('edits the second-to-last seq element via -2', () => {
|
||||
const { ast } = parseYaml('items:\n - a\n - b\n - c\n');
|
||||
const r = setYamlOcPath(
|
||||
ast,
|
||||
parseOcPath('oc://x.yaml/items/-2'),
|
||||
'B',
|
||||
);
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {expect(r.ast.raw).toContain('- B');}
|
||||
});
|
||||
|
||||
it('edits the first map entry via $first', () => {
|
||||
const { ast } = parseYaml('config:\n a: 1\n b: 2\n c: 3\n');
|
||||
const r = setYamlOcPath(
|
||||
ast,
|
||||
parseOcPath('oc://x.yaml/config/$first'),
|
||||
99,
|
||||
);
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) {expect(r.ast.raw).toContain('a: 99');}
|
||||
});
|
||||
|
||||
it('returns unresolved for $first against an empty seq', () => {
|
||||
const { ast } = parseYaml('items: []\n');
|
||||
const r = setYamlOcPath(
|
||||
ast,
|
||||
parseOcPath('oc://x.yaml/items/$first'),
|
||||
'x',
|
||||
);
|
||||
expect(r.ok).toBe(false);
|
||||
if (!r.ok) {expect(r.reason).toBe('unresolved');}
|
||||
});
|
||||
});
|
||||
|
||||
describe('inferKind — yaml extensions', () => {
|
||||
it('maps .yaml / .yml / .lobster to yaml', () => {
|
||||
expect(inferKind('workflow.yaml')).toBe('yaml');
|
||||
expect(inferKind('config.yml')).toBe('yaml');
|
||||
expect(inferKind('inbox-triage.lobster')).toBe('yaml');
|
||||
});
|
||||
});
|
||||
|
||||
describe('universal verbs — yaml dispatch', () => {
|
||||
it('resolveOcPath returns kind-agnostic match for yaml leaf', () => {
|
||||
const { ast } = parseYaml(LOBSTER);
|
||||
const m = resolveOcPath(ast, parseOcPath('oc://workflow.lobster/name'));
|
||||
expect(m).toMatchObject({ kind: 'leaf', valueText: 'inbox-triage', leafType: 'string' });
|
||||
});
|
||||
|
||||
it('resolveOcPath returns node:yaml-map for top-level seq item', () => {
|
||||
const { ast } = parseYaml(LOBSTER);
|
||||
const m = resolveOcPath(ast, parseOcPath('oc://workflow.lobster/steps.0'));
|
||||
expect(m).toMatchObject({ kind: 'node', descriptor: 'yaml-map' });
|
||||
});
|
||||
|
||||
it('resolveOcPath returns node:yaml-seq for sequence root', () => {
|
||||
const { ast } = parseYaml(LOBSTER);
|
||||
const m = resolveOcPath(ast, parseOcPath('oc://workflow.lobster/steps'));
|
||||
expect(m).toMatchObject({ kind: 'node', descriptor: 'yaml-seq' });
|
||||
});
|
||||
|
||||
it('setOcPath replaces a yaml scalar via universal verb', () => {
|
||||
const { ast } = parseYaml(LOBSTER);
|
||||
const r = setOcPath(ast, parseOcPath('oc://workflow.lobster/name'), 'updated');
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok && r.ast.kind === 'yaml') {
|
||||
expect(r.ast.raw).toContain('name: updated');
|
||||
}
|
||||
});
|
||||
|
||||
it('setOcPath coerces numeric string to number for number leaf', () => {
|
||||
const { ast } = parseYaml('count: 5\n');
|
||||
const r = setOcPath(ast, parseOcPath('oc://x.yaml/count'), '42');
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok && r.ast.kind === 'yaml') {
|
||||
expect(r.ast.raw).toContain('count: 42');
|
||||
}
|
||||
});
|
||||
|
||||
it('setOcPath returns parse-error for invalid coercion', () => {
|
||||
const { ast } = parseYaml('count: 5\n');
|
||||
const r = setOcPath(ast, parseOcPath('oc://x.yaml/count'), 'abc');
|
||||
expect(r.ok).toBe(false);
|
||||
if (!r.ok) {expect(r.reason).toBe('parse-error');}
|
||||
});
|
||||
});
|
||||
|
||||
describe('universal verbs — yaml insertion', () => {
|
||||
it('appends to a yaml seq with `+`', () => {
|
||||
const { ast } = parseYaml('items:\n - a\n - b\n');
|
||||
const r = setOcPath(ast, parseOcPath('oc://x.yaml/items/+'), '"c"');
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok && r.ast.kind === 'yaml') {
|
||||
expect(r.ast.raw).toContain('- c');
|
||||
}
|
||||
});
|
||||
|
||||
it('adds key to yaml map with `+key`', () => {
|
||||
const { ast } = parseYaml('config:\n a: 1\n');
|
||||
const r = setOcPath(ast, parseOcPath('oc://x.yaml/config/+b'), '2');
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok && r.ast.kind === 'yaml') {
|
||||
expect(r.ast.raw).toContain('b: 2');
|
||||
}
|
||||
});
|
||||
|
||||
it('rejects duplicate map key on insertion', () => {
|
||||
const { ast } = parseYaml('config:\n a: 1\n');
|
||||
const r = setOcPath(ast, parseOcPath('oc://x.yaml/config/+a'), '99');
|
||||
expect(r.ok).toBe(false);
|
||||
});
|
||||
});
|
||||
869
src/oc-path/universal.ts
Normal file
869
src/oc-path/universal.ts
Normal file
@@ -0,0 +1,869 @@
|
||||
/**
|
||||
* Universal `setOcPath` and `resolveOcPath` — the public verbs.
|
||||
*
|
||||
* **Strategic frame**: addressing is universal. Encoding is per-kind.
|
||||
* The OcPath syntax encodes WHAT to do (set leaf vs. insert vs. address
|
||||
* a structural node); the AST kind encodes HOW the substrate carries it
|
||||
* out. Callers pass any AST + a path + a string value; the substrate
|
||||
* dispatches via `ast.kind` and coerces the value based on the path's
|
||||
* syntax and the AST shape at the resolution point.
|
||||
*
|
||||
* **Path syntax vocabulary** (v0):
|
||||
*
|
||||
* oc://FILE/section/item/field → leaf address (set/replace value)
|
||||
* oc://FILE/section/+ → end-insertion at section
|
||||
* oc://FILE/section/+key → keyed insertion (object key add)
|
||||
* oc://FILE/section/+0 → indexed insertion (array splice)
|
||||
* oc://FILE/+ → file-root insertion (jsonl line append, md new section)
|
||||
*
|
||||
* **Coercion at leaves** is driven by the AST type at the resolution point:
|
||||
* - md leaf → value used verbatim (md is text-native)
|
||||
* - jsonc/jsonl leaf, existing string → value verbatim
|
||||
* - jsonc/jsonl leaf, existing number → parseFloat (parse-error if NaN)
|
||||
* - jsonc/jsonl leaf, existing boolean → 'true'/'false' literal
|
||||
* - jsonc/jsonl leaf, existing null → only `value === 'null'`
|
||||
* - insertion → `JSON.parse(value)` for jsonc/jsonl; raw text for md
|
||||
*
|
||||
* @module @openclaw/oc-path/universal
|
||||
*/
|
||||
|
||||
import type { MdAst } from './ast.js';
|
||||
import type { JsoncAst, JsoncEntry, JsoncValue } from './jsonc/ast.js';
|
||||
import { setJsoncOcPath } from './jsonc/edit.js';
|
||||
import { resolveJsoncOcPath } from './jsonc/resolve.js';
|
||||
import type { JsonlAst } from './jsonl/ast.js';
|
||||
import { appendJsonlOcPath as appendJsonlLine, setJsonlOcPath } from './jsonl/edit.js';
|
||||
import { resolveJsonlOcPath } from './jsonl/resolve.js';
|
||||
import { setMdOcPath } from './edit.js';
|
||||
import type { OcPath } from './oc-path.js';
|
||||
import {
|
||||
formatOcPath,
|
||||
hasWildcard,
|
||||
isQuotedSeg,
|
||||
OcPathError,
|
||||
splitRespectingBrackets,
|
||||
unquoteSeg,
|
||||
} from './oc-path.js';
|
||||
import { resolveMdOcPath } from './resolve.js';
|
||||
import { emitJsonc } from './jsonc/emit.js';
|
||||
import { emitJsonl } from './jsonl/emit.js';
|
||||
import type { YamlAst } from './yaml/ast.js';
|
||||
import { insertYamlOcPath, setYamlOcPath } from './yaml/edit.js';
|
||||
import { resolveYamlOcPath } from './yaml/resolve.js';
|
||||
|
||||
// ---------- Public types ---------------------------------------------------
|
||||
|
||||
/** Tagged-union of every AST kind the substrate supports. */
|
||||
export type OcAst = MdAst | JsoncAst | JsonlAst | YamlAst;
|
||||
|
||||
/**
|
||||
* Universal resolve result. Same shape regardless of AST kind so
|
||||
* consumers branch only on `match.kind`.
|
||||
*
|
||||
* `leaf` carries the value as a string — the canonical leaf form on
|
||||
* the wire, suitable for direct comparison or display. Numeric/bool
|
||||
* leaves are stringified deterministically (`String(42)` → `'42'`,
|
||||
* `String(true)` → `'true'`).
|
||||
*
|
||||
* `node` describes which kind of structural node the path resolved to
|
||||
* (md-block, jsonc-object, jsonl-line, etc.) — the descriptor lets
|
||||
* tooling format / drill in without re-parsing the kind tag.
|
||||
*
|
||||
* `insertion-point` is returned when the path's terminal segment is
|
||||
* an insertion marker (`+`, `+key`, `+nnn`) and the parent is a valid
|
||||
* container.
|
||||
*
|
||||
* **`line`** is the 1-based source line of the matched node, or `1`
|
||||
* for the root / synthetic constructions where no source line exists.
|
||||
* Lint rules use it directly for diagnostic positioning instead of
|
||||
* walking the kind-specific AST a second time.
|
||||
*/
|
||||
export type OcMatch =
|
||||
| { readonly kind: 'root'; readonly ast: OcAst; readonly line: number }
|
||||
| { readonly kind: 'leaf'; readonly valueText: string; readonly leafType: LeafType; readonly line: number }
|
||||
| { readonly kind: 'node'; readonly descriptor: NodeDescriptor; readonly line: number }
|
||||
| { readonly kind: 'insertion-point'; readonly container: ContainerKind; readonly line: number };
|
||||
|
||||
export type LeafType = 'string' | 'number' | 'boolean' | 'null';
|
||||
|
||||
export type NodeDescriptor =
|
||||
| 'md-block'
|
||||
| 'md-item'
|
||||
| 'jsonc-object'
|
||||
| 'jsonc-array'
|
||||
| 'jsonl-line'
|
||||
| 'yaml-map'
|
||||
| 'yaml-seq';
|
||||
|
||||
export type ContainerKind =
|
||||
| 'md-section' // append item to a section
|
||||
| 'md-file' // append a section to the file
|
||||
| 'md-frontmatter' // add a frontmatter key
|
||||
| 'jsonc-object'
|
||||
| 'jsonc-array'
|
||||
| 'jsonl-file' // append a line
|
||||
| 'yaml-map' // add key to YAML map
|
||||
| 'yaml-seq'; // append item to YAML seq
|
||||
|
||||
export type SetResult =
|
||||
| { readonly ok: true; readonly ast: OcAst }
|
||||
| {
|
||||
readonly ok: false;
|
||||
readonly reason:
|
||||
| 'unresolved'
|
||||
| 'no-root'
|
||||
| 'not-writable'
|
||||
| 'no-item-kv'
|
||||
| 'not-a-value-line'
|
||||
| 'parse-error'
|
||||
| 'type-mismatch'
|
||||
| 'wildcard-not-allowed';
|
||||
readonly detail?: string;
|
||||
};
|
||||
|
||||
// ---------- Insertion-syntax detection -------------------------------------
|
||||
|
||||
/**
|
||||
* Inspect the path for an insertion marker on the deepest segment.
|
||||
* A segment of `+`, `+<key>`, or `+<index>` indicates insertion at the
|
||||
* parent. Returns the parent path (with insertion segment stripped) +
|
||||
* the marker; or `null` for a plain (non-insertion) path.
|
||||
*/
|
||||
export interface InsertionInfo {
|
||||
readonly parentPath: OcPath;
|
||||
readonly marker: '+' | { kind: 'keyed'; key: string } | { kind: 'indexed'; index: number };
|
||||
}
|
||||
|
||||
export function detectInsertion(path: OcPath): InsertionInfo | null {
|
||||
// Find the deepest defined segment.
|
||||
const segments: Array<{ slot: 'section' | 'item' | 'field'; value: string }> = [];
|
||||
if (path.section !== undefined) {segments.push({ slot: 'section', value: path.section });}
|
||||
if (path.item !== undefined) {segments.push({ slot: 'item', value: path.item });}
|
||||
if (path.field !== undefined) {segments.push({ slot: 'field', value: path.field });}
|
||||
if (segments.length === 0) {return null;}
|
||||
|
||||
const last = segments[segments.length - 1];
|
||||
if (!last.value.startsWith('+')) {return null;}
|
||||
|
||||
const rest = last.value.slice(1);
|
||||
let marker: InsertionInfo['marker'];
|
||||
if (rest.length === 0) {marker = '+';}
|
||||
else if (/^\d+$/.test(rest)) {marker = { kind: 'indexed', index: Number(rest) };}
|
||||
else {marker = { kind: 'keyed', key: rest };}
|
||||
|
||||
// Strip the deepest segment from the path.
|
||||
const parentPath: OcPath = {
|
||||
file: path.file,
|
||||
...(last.slot !== 'section' && path.section !== undefined ? { section: path.section } : {}),
|
||||
...(last.slot !== 'item' && path.item !== undefined ? { item: path.item } : {}),
|
||||
...(last.slot !== 'field' && path.field !== undefined ? { field: path.field } : {}),
|
||||
...(path.session !== undefined ? { session: path.session } : {}),
|
||||
};
|
||||
return { parentPath, marker };
|
||||
}
|
||||
|
||||
// ---------- Universal resolve ----------------------------------------------
|
||||
|
||||
/**
|
||||
* Resolve an `OcPath` against any AST. Returns a kind-agnostic match
|
||||
* shape or `null` when the path doesn't resolve.
|
||||
*
|
||||
* Insertion-marker paths return `{kind: 'insertion-point', container}`
|
||||
* if the parent is a valid container; otherwise `null`.
|
||||
*/
|
||||
export function resolveOcPath(ast: OcAst, path: OcPath): OcMatch | null {
|
||||
// Wildcard guard: `resolveOcPath` is the single-match verb. Wildcards
|
||||
// belong to `findOcPaths` (multi-match). Throw with a structured code
|
||||
// (consistent with `setOcPath`'s `wildcard-not-allowed` discriminator)
|
||||
// — silent `null` here is indistinguishable from "path doesn't
|
||||
// resolve", so consumers couldn't tell whether they should switch to
|
||||
// findOcPaths or accept the address as missing.
|
||||
if (hasWildcard(path)) {
|
||||
throw new OcPathError(
|
||||
`resolveOcPath received a wildcard pattern; use findOcPaths instead: ${formatOcPath(path)}`,
|
||||
formatOcPath(path),
|
||||
'OC_PATH_WILDCARD_IN_RESOLVE',
|
||||
);
|
||||
}
|
||||
const insertion = detectInsertion(path);
|
||||
if (insertion !== null) {
|
||||
return resolveInsertion(ast, insertion);
|
||||
}
|
||||
|
||||
switch (ast.kind) {
|
||||
case 'md':
|
||||
return resolveMdToUniversal(ast, path);
|
||||
case 'jsonc':
|
||||
return resolveJsoncToUniversal(ast, path);
|
||||
case 'jsonl':
|
||||
return resolveJsonlToUniversal(ast, path);
|
||||
case 'yaml':
|
||||
return resolveYamlToUniversal(ast, path);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function resolveYamlToUniversal(ast: YamlAst, path: OcPath): OcMatch | null {
|
||||
const m = resolveYamlOcPath(ast, path);
|
||||
if (m === null) {return null;}
|
||||
if (m.kind === 'root') {return { kind: 'root', ast, line: 1 };}
|
||||
// Walk the AST one more time to extract the matched node's range
|
||||
// — the per-kind YamlOcPathMatch shape doesn't surface it directly.
|
||||
// Cheap relative to the resolve cost; trades CPU for type cleanliness.
|
||||
const line = locateYamlLine(ast, path);
|
||||
if (m.kind === 'map') {return { kind: 'node', descriptor: 'yaml-map', line };}
|
||||
if (m.kind === 'seq') {return { kind: 'node', descriptor: 'yaml-seq', line };}
|
||||
if (m.kind === 'scalar' || m.kind === 'pair') {
|
||||
const v = m.value;
|
||||
if (v === null) {return { kind: 'leaf', valueText: 'null', leafType: 'null', line };}
|
||||
if (typeof v === 'string') {return { kind: 'leaf', valueText: v, leafType: 'string', line };}
|
||||
if (typeof v === 'number') {return { kind: 'leaf', valueText: String(v), leafType: 'number', line };}
|
||||
if (typeof v === 'boolean') {return { kind: 'leaf', valueText: String(v), leafType: 'boolean', line };}
|
||||
// Anything else (Date / BigInt / collection) — JSON-stringify so we
|
||||
// don't end up with `[object Object]` in the leaf text. Falls back
|
||||
// to literal "null" if JSON.stringify yields undefined.
|
||||
const valueText = JSON.stringify(v) ?? 'null';
|
||||
return { kind: 'leaf', valueText, leafType: 'string', line };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function locateYamlLine(ast: YamlAst, path: OcPath): number {
|
||||
// Re-walk the yaml CST to find the matched node's byte range, then
|
||||
// convert via the AST's `lineCounter`. Quote-aware split + unquote so
|
||||
// a quoted segment containing `.` survives as a single key (matches
|
||||
// `resolveYamlOcPath`'s lookup behavior; without this a key like
|
||||
// `"github.com/foo"` would shred and the line locator would fall back
|
||||
// to line 1 silently).
|
||||
const segments: string[] = [];
|
||||
const collect = (slot: string | undefined) => {
|
||||
if (slot === undefined) {return;}
|
||||
for (const sub of splitRespectingBrackets(slot, '.')) {
|
||||
segments.push(isQuotedSeg(sub) ? unquoteSeg(sub) : sub);
|
||||
}
|
||||
};
|
||||
collect(path.section);
|
||||
collect(path.item);
|
||||
collect(path.field);
|
||||
if (segments.length === 0) {return 1;}
|
||||
let node: unknown = ast.doc.contents;
|
||||
for (const seg of segments) {
|
||||
if (node === null || node === undefined) {return 1;}
|
||||
const n = node as { items?: unknown[] };
|
||||
if (Array.isArray(n.items)) {
|
||||
// Map or seq.
|
||||
const items = n.items;
|
||||
const isMap = items.length > 0 && typeof items[0] === 'object' && items[0] !== null && 'key' in (items[0]);
|
||||
if (isMap) {
|
||||
const pair = (items as { key: { value?: unknown }; value: unknown }[]).find((p) => {
|
||||
const k = p.key !== null && typeof p.key === 'object' && 'value' in p.key ? p.key.value : p.key;
|
||||
return String(k) === seg;
|
||||
});
|
||||
if (pair === undefined) {return 1;}
|
||||
node = pair.value;
|
||||
} else {
|
||||
const idx = Number(seg);
|
||||
if (!Number.isInteger(idx) || idx < 0 || idx >= items.length) {return 1;}
|
||||
node = items[idx];
|
||||
}
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
if (node === null || typeof node !== 'object') {return 1;}
|
||||
const range = (node as { range?: readonly [number, number, number] }).range;
|
||||
if (range === undefined) {return 1;}
|
||||
return ast.lineCounter.linePos(range[0]).line;
|
||||
}
|
||||
|
||||
function resolveMdToUniversal(ast: MdAst, path: OcPath): OcMatch | null {
|
||||
const m = resolveMdOcPath(ast, path);
|
||||
if (m === null) {return null;}
|
||||
switch (m.kind) {
|
||||
case 'root':
|
||||
return { kind: 'root', ast, line: 1 };
|
||||
case 'frontmatter':
|
||||
return { kind: 'leaf', valueText: m.node.value, leafType: 'string', line: m.node.line };
|
||||
case 'block':
|
||||
return { kind: 'node', descriptor: 'md-block', line: m.node.line };
|
||||
case 'item':
|
||||
return { kind: 'node', descriptor: 'md-item', line: m.node.line };
|
||||
case 'item-field':
|
||||
return { kind: 'leaf', valueText: m.value, leafType: 'string', line: m.node.line };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function resolveJsoncToUniversal(ast: JsoncAst, path: OcPath): OcMatch | null {
|
||||
const m = resolveJsoncOcPath(ast, path);
|
||||
if (m === null) {return null;}
|
||||
if (m.kind === 'root') {return { kind: 'root', ast, line: 1 };}
|
||||
if (m.kind === 'object-entry') {
|
||||
return jsoncValueToMatch(m.node.value, m.node.line);
|
||||
}
|
||||
// m.kind === 'value' — array element or root: line lives on the value itself.
|
||||
return jsoncValueToMatch(m.node, m.node.line ?? 1);
|
||||
}
|
||||
|
||||
function jsoncValueToMatch(value: JsoncValue, line: number): OcMatch {
|
||||
switch (value.kind) {
|
||||
case 'object':
|
||||
return { kind: 'node', descriptor: 'jsonc-object', line };
|
||||
case 'array':
|
||||
return { kind: 'node', descriptor: 'jsonc-array', line };
|
||||
case 'string':
|
||||
return { kind: 'leaf', valueText: value.value, leafType: 'string', line };
|
||||
case 'number':
|
||||
return { kind: 'leaf', valueText: String(value.value), leafType: 'number', line };
|
||||
case 'boolean':
|
||||
return { kind: 'leaf', valueText: String(value.value), leafType: 'boolean', line };
|
||||
case 'null':
|
||||
return { kind: 'leaf', valueText: 'null', leafType: 'null', line };
|
||||
}
|
||||
throw new Error(`unreachable: jsoncValueToMatch kind`);
|
||||
}
|
||||
|
||||
function resolveJsonlToUniversal(ast: JsonlAst, path: OcPath): OcMatch | null {
|
||||
const m = resolveJsonlOcPath(ast, path);
|
||||
if (m === null) {return null;}
|
||||
if (m.kind === 'root') {return { kind: 'root', ast, line: 1 };}
|
||||
if (m.kind === 'line') {return { kind: 'node', descriptor: 'jsonl-line', line: m.node.line };}
|
||||
// Inside-line jsonc parser starts numbering at 1 for each jsonl
|
||||
// line, so `m.node.line` would always be 1 for any jsonl-resolved
|
||||
// match. Use `m.line` (the JsonlLine's file-level line) — by
|
||||
// construction every inside-line node sits on the same file line.
|
||||
if (m.kind === 'object-entry') {return jsoncValueToMatch(m.node.value, m.line);}
|
||||
return jsoncValueToMatch(m.node, m.line);
|
||||
}
|
||||
|
||||
function resolveInsertion(ast: OcAst, info: InsertionInfo): OcMatch | null {
|
||||
// For an insertion to be valid the parent must resolve to a container
|
||||
// we know how to extend. Inspect the parent.
|
||||
switch (ast.kind) {
|
||||
case 'md':
|
||||
return resolveMdInsertion(ast, info);
|
||||
case 'jsonc':
|
||||
return resolveJsoncInsertion(ast, info);
|
||||
case 'jsonl':
|
||||
return resolveJsonlInsertion(ast, info);
|
||||
case 'yaml':
|
||||
return resolveYamlInsertion(ast, info);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function resolveYamlInsertion(ast: YamlAst, info: InsertionInfo): OcMatch | null {
|
||||
const m = resolveYamlOcPath(ast, info.parentPath);
|
||||
if (m === null) {return null;}
|
||||
const line = locateYamlLine(ast, info.parentPath);
|
||||
if (m.kind === 'map') {return { kind: 'insertion-point', container: 'yaml-map', line };}
|
||||
if (m.kind === 'seq') {return { kind: 'insertion-point', container: 'yaml-seq', line };}
|
||||
if (m.kind === 'root') {
|
||||
// Top-level: inspect the document root.
|
||||
const root = ast.doc.contents;
|
||||
if (root === null) {return null;}
|
||||
if ('items' in (root as object)) {
|
||||
const isMapLike = (root as { items: { key?: unknown }[] }).items.every((p) => 'key' in p);
|
||||
return { kind: 'insertion-point', container: isMapLike ? 'yaml-map' : 'yaml-seq', line: 1 };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function resolveMdInsertion(ast: MdAst, info: InsertionInfo): OcMatch | null {
|
||||
const p = info.parentPath;
|
||||
// oc://FILE/+ → file-root insertion (new section)
|
||||
if (p.section === undefined) {
|
||||
return { kind: 'insertion-point', container: 'md-file', line: 1 };
|
||||
}
|
||||
// oc://FILE/[frontmatter]/+key → frontmatter add
|
||||
if (p.section === '[frontmatter]') {
|
||||
return { kind: 'insertion-point', container: 'md-frontmatter', line: 1 };
|
||||
}
|
||||
// oc://FILE/section/+ → append item to section
|
||||
if (p.item === undefined && p.field === undefined) {
|
||||
const m = resolveMdOcPath(ast, p);
|
||||
if (m === null || m.kind !== 'block') {return null;}
|
||||
return { kind: 'insertion-point', container: 'md-section', line: m.node.line };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function resolveJsoncInsertion(ast: JsoncAst, info: InsertionInfo): OcMatch | null {
|
||||
const m = resolveJsoncOcPath(ast, info.parentPath);
|
||||
if (m === null) {return null;}
|
||||
let containerNode: JsoncValue;
|
||||
if (m.kind === 'root') {
|
||||
if (ast.root === null) {return null;}
|
||||
containerNode = ast.root;
|
||||
} else if (m.kind === 'object-entry') {
|
||||
containerNode = m.node.value;
|
||||
} else {
|
||||
containerNode = m.node;
|
||||
}
|
||||
const line = containerNode.line ?? 1;
|
||||
if (containerNode.kind === 'object') {
|
||||
return { kind: 'insertion-point', container: 'jsonc-object', line };
|
||||
}
|
||||
if (containerNode.kind === 'array') {
|
||||
return { kind: 'insertion-point', container: 'jsonc-array', line };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function resolveJsonlInsertion(ast: JsonlAst, info: InsertionInfo): OcMatch | null {
|
||||
// jsonl insertion only makes sense at the file level: `oc://FILE/+`.
|
||||
if (info.parentPath.section !== undefined) {return null;}
|
||||
// The only insertion point for jsonl is "after the last line" — the
|
||||
// line surfaced is `lastLine + 1` so consumers can render correctly.
|
||||
const lastLine = ast.lines.length > 0 ? ast.lines[ast.lines.length - 1].line : 0;
|
||||
return { kind: 'insertion-point', container: 'jsonl-file', line: lastLine + 1 };
|
||||
}
|
||||
|
||||
// ---------- Universal set --------------------------------------------------
|
||||
|
||||
/**
|
||||
* Replace or insert at `path` with `value` (always a string).
|
||||
* Substrate dispatches via `ast.kind` and coerces value at leaves
|
||||
* based on the existing AST shape at the path location.
|
||||
*
|
||||
* For insertion-marker paths (`+`, `+key`, `+nnn`) the value is parsed
|
||||
* as kind-appropriate content (JSON for jsonc/jsonl; plain text for md).
|
||||
*
|
||||
* Returns a structured result; never throws on parser-tolerated input.
|
||||
* Sentinel-guard violations DO throw `OcEmitSentinelError` (defense in
|
||||
* depth — refuse to write redacted content even when caller "asked").
|
||||
*/
|
||||
export function setOcPath(ast: OcAst, path: OcPath, value: string): SetResult {
|
||||
// Wildcard guard: `setOcPath` writes a single concrete leaf. A pattern
|
||||
// would be ambiguous (which match wins?) so we reject early. Callers
|
||||
// who want multi-set should `findOcPaths(...)` then `setOcPath` per
|
||||
// resolved path — the explicit loop is the right shape.
|
||||
if (hasWildcard(path)) {
|
||||
return {
|
||||
ok: false,
|
||||
reason: 'wildcard-not-allowed',
|
||||
detail: 'setOcPath requires a concrete path; use findOcPaths to enumerate matches first',
|
||||
};
|
||||
}
|
||||
const insertion = detectInsertion(path);
|
||||
if (insertion !== null) {
|
||||
return setInsertion(ast, insertion, value);
|
||||
}
|
||||
|
||||
switch (ast.kind) {
|
||||
case 'md':
|
||||
return setMdLeaf(ast, path, value);
|
||||
case 'jsonc':
|
||||
return setJsoncLeaf(ast, path, value);
|
||||
case 'jsonl':
|
||||
return setJsonlLeaf(ast, path, value);
|
||||
case 'yaml':
|
||||
return setYamlLeaf(ast, path, value);
|
||||
}
|
||||
throw new Error(`unreachable: setOcPath kind`);
|
||||
}
|
||||
|
||||
function setYamlLeaf(ast: YamlAst, path: OcPath, value: string): SetResult {
|
||||
const existing = resolveYamlOcPath(ast, path);
|
||||
if (existing === null) {return { ok: false, reason: 'unresolved' };}
|
||||
if (existing.kind === 'root') {
|
||||
return { ok: false, reason: 'not-writable', detail: 'root replacement not supported via setOcPath' };
|
||||
}
|
||||
// Coerce value based on existing scalar type.
|
||||
let coerced: unknown = value;
|
||||
if (existing.kind === 'scalar' || existing.kind === 'pair') {
|
||||
const cur = existing.value;
|
||||
if (typeof cur === 'number') {
|
||||
const n = Number(value);
|
||||
if (!Number.isFinite(n)) {return { ok: false, reason: 'parse-error' };}
|
||||
coerced = n;
|
||||
} else if (typeof cur === 'boolean') {
|
||||
if (value === 'true') {coerced = true;}
|
||||
else if (value === 'false') {coerced = false;}
|
||||
else {return { ok: false, reason: 'parse-error' };}
|
||||
} else if (cur === null && value !== 'null') {
|
||||
return { ok: false, reason: 'parse-error' };
|
||||
} else if (cur === null && value === 'null') {
|
||||
coerced = null;
|
||||
}
|
||||
}
|
||||
const r = setYamlOcPath(ast, path, coerced);
|
||||
if (r.ok) {return { ok: true, ast: r.ast };}
|
||||
return { ok: false, reason: r.reason };
|
||||
}
|
||||
|
||||
function setMdLeaf(ast: MdAst, path: OcPath, value: string): SetResult {
|
||||
const r = setMdOcPath(ast, path, value);
|
||||
if (r.ok) {return { ok: true, ast: r.ast };}
|
||||
return { ok: false, reason: r.reason };
|
||||
}
|
||||
|
||||
function setJsoncLeaf(ast: JsoncAst, path: OcPath, value: string): SetResult {
|
||||
// Inspect the existing leaf to determine target type for coercion.
|
||||
const existing = resolveJsoncOcPath(ast, path);
|
||||
if (existing === null) {return { ok: false, reason: 'unresolved' };}
|
||||
if (existing.kind === 'root') {
|
||||
return { ok: false, reason: 'not-writable', detail: 'root replacement is not supported via setOcPath' };
|
||||
}
|
||||
const leafValue = existing.kind === 'object-entry' ? existing.node.value : existing.node;
|
||||
const coerced = coerceJsoncLeaf(value, leafValue);
|
||||
if (coerced === null) {
|
||||
return { ok: false, reason: 'parse-error', detail: `cannot coerce "${value}" to ${leafValue.kind}` };
|
||||
}
|
||||
const r = setJsoncOcPath(ast, path, coerced);
|
||||
if (r.ok) {return { ok: true, ast: r.ast };}
|
||||
return { ok: false, reason: r.reason };
|
||||
}
|
||||
|
||||
function setJsonlLeaf(ast: JsonlAst, path: OcPath, value: string): SetResult {
|
||||
const existing = resolveJsonlOcPath(ast, path);
|
||||
if (existing === null) {return { ok: false, reason: 'unresolved' };}
|
||||
if (existing.kind === 'root') {
|
||||
return { ok: false, reason: 'not-writable', detail: 'root replacement is not supported via setOcPath' };
|
||||
}
|
||||
if (existing.kind === 'line') {
|
||||
// Replacing a whole line — value should be JSON.
|
||||
const parsed = tryParseJson(value);
|
||||
if (parsed === undefined) {
|
||||
return { ok: false, reason: 'parse-error', detail: `line replacement requires JSON value` };
|
||||
}
|
||||
const r = setJsonlOcPath(ast, path, jsonToJsoncValue(parsed));
|
||||
if (r.ok) {return { ok: true, ast: r.ast };}
|
||||
return { ok: false, reason: r.reason };
|
||||
}
|
||||
// Field on a line — leaf coercion.
|
||||
const leafValue = existing.kind === 'object-entry' ? existing.node.value : existing.node;
|
||||
const coerced = coerceJsoncLeaf(value, leafValue);
|
||||
if (coerced === null) {
|
||||
return { ok: false, reason: 'parse-error', detail: `cannot coerce "${value}" to ${leafValue.kind}` };
|
||||
}
|
||||
const r = setJsonlOcPath(ast, path, coerced);
|
||||
if (r.ok) {return { ok: true, ast: r.ast };}
|
||||
return { ok: false, reason: r.reason };
|
||||
}
|
||||
|
||||
function setInsertion(ast: OcAst, info: InsertionInfo, value: string): SetResult {
|
||||
switch (ast.kind) {
|
||||
case 'md':
|
||||
return setMdInsertion(ast, info, value);
|
||||
case 'jsonc':
|
||||
return setJsoncInsertion(ast, info, value);
|
||||
case 'jsonl':
|
||||
return setJsonlInsertion(ast, info, value);
|
||||
case 'yaml':
|
||||
return setYamlInsertion(ast, info, value);
|
||||
}
|
||||
throw new Error(`unreachable: setInsertion kind`);
|
||||
}
|
||||
|
||||
function setYamlInsertion(ast: YamlAst, info: InsertionInfo, value: string): SetResult {
|
||||
// YAML insertion accepts a JSON-shaped value string (so callers can
|
||||
// insert structured nodes uniformly). For simple scalars the JSON
|
||||
// form `"foo"` / `42` / `true` works; complex shapes use objects.
|
||||
const parsed = tryParseJson(value);
|
||||
if (parsed === undefined) {
|
||||
return { ok: false, reason: 'parse-error', detail: 'yaml insertion requires JSON value' };
|
||||
}
|
||||
const r = insertYamlOcPath(ast, info.parentPath, info.marker, parsed);
|
||||
if (r.ok) {return { ok: true, ast: r.ast };}
|
||||
return { ok: false, reason: r.reason };
|
||||
}
|
||||
|
||||
function setMdInsertion(ast: MdAst, info: InsertionInfo, value: string): SetResult {
|
||||
const p = info.parentPath;
|
||||
// file-level: append a section. Value is the heading text; body empty.
|
||||
if (p.section === undefined) {
|
||||
if (info.marker !== '+') {
|
||||
return { ok: false, reason: 'not-writable', detail: 'md file-level insertion uses bare `+`' };
|
||||
}
|
||||
const newAst: MdAst = {
|
||||
...ast,
|
||||
blocks: [
|
||||
...ast.blocks,
|
||||
{
|
||||
heading: value,
|
||||
slug: slugifyHeading(value),
|
||||
line: 0,
|
||||
bodyText: '',
|
||||
items: [],
|
||||
tables: [],
|
||||
codeBlocks: [],
|
||||
},
|
||||
],
|
||||
};
|
||||
return { ok: true, ast: rebuildMdRaw(newAst) };
|
||||
}
|
||||
|
||||
// [frontmatter] — keyed insertion only
|
||||
if (p.section === '[frontmatter]') {
|
||||
if (typeof info.marker !== 'object' || info.marker.kind !== 'keyed') {
|
||||
return { ok: false, reason: 'not-writable', detail: 'md frontmatter insertion requires +key' };
|
||||
}
|
||||
const key = info.marker.key;
|
||||
if (ast.frontmatter.some((e) => e.key === key)) {
|
||||
return { ok: false, reason: 'type-mismatch', detail: `frontmatter key '${key}' already exists; use set, not insert` };
|
||||
}
|
||||
const newAst: MdAst = {
|
||||
...ast,
|
||||
frontmatter: [...ast.frontmatter, { key, value, line: 0 }],
|
||||
};
|
||||
return { ok: true, ast: rebuildMdRaw(newAst) };
|
||||
}
|
||||
|
||||
// section-level: append item. Value can be `key: value` (kv) or plain text.
|
||||
if (p.item === undefined && p.field === undefined) {
|
||||
if (info.marker !== '+') {
|
||||
return { ok: false, reason: 'not-writable', detail: 'md section insertion uses bare `+`' };
|
||||
}
|
||||
const blockIdx = ast.blocks.findIndex((b) => b.slug === p.section!.toLowerCase());
|
||||
if (blockIdx === -1) {return { ok: false, reason: 'unresolved' };}
|
||||
const block = ast.blocks[blockIdx];
|
||||
const kvMatch = /^([^:]+?)\s*:\s*(.+)$/.exec(value);
|
||||
const itemLine = `- ${value}`;
|
||||
const newItem = {
|
||||
text: value,
|
||||
slug: slugifyHeading(kvMatch ? kvMatch[1] : value),
|
||||
line: 0,
|
||||
...(kvMatch !== null
|
||||
? { kv: { key: kvMatch[1].trim(), value: kvMatch[2].trim() } }
|
||||
: {}),
|
||||
};
|
||||
const newBodyText = block.bodyText.length === 0
|
||||
? itemLine
|
||||
: block.bodyText.replace(/\n*$/, '\n') + itemLine;
|
||||
const newBlocks = ast.blocks.slice();
|
||||
newBlocks[blockIdx] = {
|
||||
...block,
|
||||
items: [...block.items, newItem],
|
||||
bodyText: newBodyText,
|
||||
};
|
||||
return { ok: true, ast: rebuildMdRaw({ ...ast, blocks: newBlocks }) };
|
||||
}
|
||||
|
||||
return { ok: false, reason: 'not-writable' };
|
||||
}
|
||||
|
||||
function setJsoncInsertion(ast: JsoncAst, info: InsertionInfo, value: string): SetResult {
|
||||
const containerMatch = resolveJsoncInsertion(ast, info);
|
||||
if (containerMatch === null) {return { ok: false, reason: 'unresolved' };}
|
||||
|
||||
const parsed = tryParseJson(value);
|
||||
if (parsed === undefined) {
|
||||
return { ok: false, reason: 'parse-error', detail: 'jsonc insertion requires JSON value' };
|
||||
}
|
||||
const newJsoncValue = jsonToJsoncValue(parsed);
|
||||
|
||||
if (containerMatch.kind !== 'insertion-point') {return { ok: false, reason: 'unresolved' };}
|
||||
|
||||
if (containerMatch.container === 'jsonc-array') {
|
||||
// index `+0` valid; bare `+` appends; `+key` rejected.
|
||||
if (typeof info.marker === 'object' && info.marker.kind === 'keyed') {
|
||||
return { ok: false, reason: 'type-mismatch', detail: 'cannot insert by key into array' };
|
||||
}
|
||||
return mutateJsoncContainer(ast, info.parentPath, (container) => {
|
||||
if (container.kind !== 'array') {return null;}
|
||||
const items = container.items.slice();
|
||||
if (info.marker === '+') {
|
||||
items.push(newJsoncValue);
|
||||
} else if (typeof info.marker === 'object' && info.marker.kind === 'indexed') {
|
||||
const idx = Math.min(info.marker.index, items.length);
|
||||
items.splice(idx, 0, newJsoncValue);
|
||||
}
|
||||
return {
|
||||
kind: 'array',
|
||||
items,
|
||||
...(container.line !== undefined ? { line: container.line } : {}),
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
// jsonc-object
|
||||
if (typeof info.marker !== 'object' || info.marker.kind !== 'keyed') {
|
||||
return { ok: false, reason: 'type-mismatch', detail: 'jsonc object insertion requires +key' };
|
||||
}
|
||||
const key = info.marker.key;
|
||||
return mutateJsoncContainer(ast, info.parentPath, (container) => {
|
||||
if (container.kind !== 'object') {return null;}
|
||||
if (container.entries.some((e) => e.key === key)) {return null;} // duplicate
|
||||
const newEntry: JsoncEntry = { key, value: newJsoncValue, line: 0 };
|
||||
return {
|
||||
kind: 'object',
|
||||
entries: [...container.entries, newEntry],
|
||||
...(container.line !== undefined ? { line: container.line } : {}),
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
function setJsonlInsertion(ast: JsonlAst, info: InsertionInfo, value: string): SetResult {
|
||||
if (info.parentPath.section !== undefined || info.marker !== '+') {
|
||||
return { ok: false, reason: 'not-writable', detail: 'jsonl insertion only supports oc://FILE/+ append' };
|
||||
}
|
||||
const parsed = tryParseJson(value);
|
||||
if (parsed === undefined) {
|
||||
return { ok: false, reason: 'parse-error', detail: 'jsonl line append requires JSON value' };
|
||||
}
|
||||
return { ok: true, ast: appendJsonlLine(ast, jsonToJsoncValue(parsed)) };
|
||||
}
|
||||
|
||||
// ---------- Internal helpers -----------------------------------------------
|
||||
|
||||
function coerceJsoncLeaf(valueText: string, existing: JsoncValue): JsoncValue | null {
|
||||
// Preserve the existing source line on coerced replacements — the
|
||||
// semantic node is the same; only its bytes change.
|
||||
const lineExt = existing.line !== undefined ? { line: existing.line } : {};
|
||||
if (existing.kind === 'string') {return { kind: 'string', value: valueText, ...lineExt };}
|
||||
if (existing.kind === 'number') {
|
||||
const n = Number(valueText);
|
||||
return Number.isFinite(n) ? { kind: 'number', value: n, ...lineExt } : null;
|
||||
}
|
||||
if (existing.kind === 'boolean') {
|
||||
if (valueText === 'true') {return { kind: 'boolean', value: true, ...lineExt };}
|
||||
if (valueText === 'false') {return { kind: 'boolean', value: false, ...lineExt };}
|
||||
return null;
|
||||
}
|
||||
if (existing.kind === 'null') {
|
||||
return valueText === 'null' ? { kind: 'null', ...lineExt } : null;
|
||||
}
|
||||
// Object/array leaf — caller should use insertion or full-replace path.
|
||||
return null;
|
||||
}
|
||||
|
||||
function tryParseJson(value: string): unknown {
|
||||
try {
|
||||
return JSON.parse(value);
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
function jsonToJsoncValue(v: unknown): JsoncValue {
|
||||
// Synthetic values omit `line` (optional in the type) — the parser
|
||||
// alone is the source of truth for line metadata. Insertions /
|
||||
// mutations get the parent's line for surfacing in lint findings.
|
||||
if (v === null) {return { kind: 'null' };}
|
||||
if (typeof v === 'string') {return { kind: 'string', value: v };}
|
||||
if (typeof v === 'number') {return { kind: 'number', value: v };}
|
||||
if (typeof v === 'boolean') {return { kind: 'boolean', value: v };}
|
||||
if (Array.isArray(v)) {return { kind: 'array', items: v.map(jsonToJsoncValue) };}
|
||||
if (typeof v === 'object') {
|
||||
const obj = v as Record<string, unknown>;
|
||||
return {
|
||||
kind: 'object',
|
||||
entries: Object.entries(obj).map(([key, value]) => ({
|
||||
key,
|
||||
value: jsonToJsoncValue(value),
|
||||
line: 0,
|
||||
})),
|
||||
};
|
||||
}
|
||||
// Unsupported (undefined / function / symbol). JSON.parse never produces these.
|
||||
throw new Error(`unsupported JSON value type: ${typeof v}`);
|
||||
}
|
||||
|
||||
function mutateJsoncContainer(
|
||||
ast: JsoncAst,
|
||||
parentPath: OcPath,
|
||||
mutate: (container: JsoncValue) => JsoncValue | null,
|
||||
): SetResult {
|
||||
if (ast.root === null) {return { ok: false, reason: 'no-root' };}
|
||||
|
||||
// Quote-aware split so JSONC insertion under a key containing
|
||||
// `/`, `.`, or other special chars works through the parent path.
|
||||
// `resolveJsoncOcPath` validates with quote-aware splitting; the
|
||||
// mutation walker MUST use the same predicate or insertion validity
|
||||
// can be reported and then fail as unresolved.
|
||||
const segments: string[] = [];
|
||||
if (parentPath.section !== undefined) {segments.push(...splitRespectingBrackets(parentPath.section, '.'));}
|
||||
if (parentPath.item !== undefined) {segments.push(...splitRespectingBrackets(parentPath.item, '.'));}
|
||||
if (parentPath.field !== undefined) {segments.push(...splitRespectingBrackets(parentPath.field, '.'));}
|
||||
|
||||
const newRoot = segments.length === 0
|
||||
? mutate(ast.root)
|
||||
: mutateAt(ast.root, segments, 0, mutate);
|
||||
if (newRoot === null) {return { ok: false, reason: 'unresolved' };}
|
||||
|
||||
const next: JsoncAst = { kind: 'jsonc', raw: '', root: newRoot };
|
||||
return { ok: true, ast: { ...next, raw: emitJsonc(next, { mode: 'render' }) } };
|
||||
}
|
||||
|
||||
function mutateAt(
|
||||
current: JsoncValue,
|
||||
segments: readonly string[],
|
||||
i: number,
|
||||
mutate: (container: JsoncValue) => JsoncValue | null,
|
||||
): JsoncValue | null {
|
||||
const seg = segments[i];
|
||||
if (seg === undefined) {return mutate(current);}
|
||||
if (seg.length === 0) {return null;}
|
||||
|
||||
if (current.kind === 'object') {
|
||||
// Match `setJsoncOcPath`'s lookup: AST entry keys are unquoted,
|
||||
// so strip quoting from the path segment before comparing.
|
||||
const lookupKey = isQuotedSeg(seg) ? unquoteSeg(seg) : seg;
|
||||
const idx = current.entries.findIndex((e) => e.key === lookupKey);
|
||||
if (idx === -1) {return null;}
|
||||
const child = current.entries[idx];
|
||||
const replaced = mutateAt(child.value, segments, i + 1, mutate);
|
||||
if (replaced === null) {return null;}
|
||||
const newEntries = current.entries.slice();
|
||||
newEntries[idx] = { ...child, value: replaced };
|
||||
return {
|
||||
kind: 'object',
|
||||
entries: newEntries,
|
||||
...(current.line !== undefined ? { line: current.line } : {}),
|
||||
};
|
||||
}
|
||||
if (current.kind === 'array') {
|
||||
const idx = Number(seg);
|
||||
if (!Number.isInteger(idx) || idx < 0 || idx >= current.items.length) {return null;}
|
||||
const child = current.items[idx];
|
||||
const replaced = mutateAt(child, segments, i + 1, mutate);
|
||||
if (replaced === null) {return null;}
|
||||
const newItems = current.items.slice();
|
||||
newItems[idx] = replaced;
|
||||
return {
|
||||
kind: 'array',
|
||||
items: newItems,
|
||||
...(current.line !== undefined ? { line: current.line } : {}),
|
||||
};
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function rebuildMdRaw(ast: MdAst): MdAst {
|
||||
const parts: string[] = [];
|
||||
if (ast.frontmatter.length > 0) {
|
||||
parts.push('---');
|
||||
for (const fm of ast.frontmatter) {
|
||||
parts.push(`${fm.key}: ${formatFrontmatterValue(fm.value)}`);
|
||||
}
|
||||
parts.push('---');
|
||||
}
|
||||
if (ast.preamble.length > 0) {
|
||||
if (parts.length > 0) {parts.push('');}
|
||||
parts.push(ast.preamble);
|
||||
}
|
||||
for (const block of ast.blocks) {
|
||||
if (parts.length > 0) {parts.push('');}
|
||||
parts.push(`## ${block.heading}`);
|
||||
if (block.bodyText.length > 0) {parts.push(block.bodyText);}
|
||||
}
|
||||
// Suppress unused — emitJsonl is imported for symmetry but only emitJsonc
|
||||
// is used in the jsonc mutation helper.
|
||||
void emitJsonl;
|
||||
return { ...ast, raw: parts.join('\n') };
|
||||
}
|
||||
|
||||
function formatFrontmatterValue(value: string): string {
|
||||
if (value.length === 0) {return '""';}
|
||||
if (/[:#&*?|<>=!%@`,[\]{}\r\n]/.test(value)) {
|
||||
return JSON.stringify(value);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function slugifyHeading(s: string): string {
|
||||
return s.toLowerCase().trim().replace(/[^a-z0-9]+/g, '-').replace(/^-+|-+$/g, '');
|
||||
}
|
||||
37
src/oc-path/yaml/ast.ts
Normal file
37
src/oc-path/yaml/ast.ts
Normal file
@@ -0,0 +1,37 @@
|
||||
/**
|
||||
* YAML AST types — wraps the `yaml` library's Document model so the
|
||||
* substrate can address YAML nodes via `OcPath` while preserving the
|
||||
* authoring shape (comments, anchors, etc.) for round-trip emit.
|
||||
*
|
||||
* **Per-kind discriminator**: `kind: 'yaml'` matches the md / jsonc /
|
||||
* jsonl pattern. The universal `setOcPath` / `resolveOcPath` dispatch
|
||||
* via `ast.kind`.
|
||||
*
|
||||
* **Byte-fidelity**: `raw` is preserved on the root for round-trip
|
||||
* emit. The internal `doc` is the parsed `yaml.Document` from the
|
||||
* `yaml` package — comment-preserving, anchor-aware.
|
||||
*
|
||||
* Lobster `.lobster` files (workflow specs) and `.craft/waves/*.yaml`
|
||||
* (craft system) both flow through this kind.
|
||||
*
|
||||
* @module @openclaw/oc-path/yaml/ast
|
||||
*/
|
||||
|
||||
import type { Document, LineCounter } from 'yaml';
|
||||
|
||||
/** The root YAML AST. `raw` round-trips byte-identical via emit. */
|
||||
export interface YamlAst {
|
||||
readonly kind: 'yaml';
|
||||
readonly raw: string;
|
||||
/**
|
||||
* Parsed `yaml.Document` — wraps the comment-preserving CST model.
|
||||
*/
|
||||
readonly doc: Document.Parsed;
|
||||
/**
|
||||
* `LineCounter` from the `yaml` package. Pass a node's `range[0]`
|
||||
* (byte offset) to `lineCounter.linePos(offset)` to get
|
||||
* `{ line, col }` (1-based). Lint rules use this to surface accurate
|
||||
* line numbers in findings instead of hardcoding `line: 1`.
|
||||
*/
|
||||
readonly lineCounter: LineCounter;
|
||||
}
|
||||
236
src/oc-path/yaml/edit.ts
Normal file
236
src/oc-path/yaml/edit.ts
Normal file
@@ -0,0 +1,236 @@
|
||||
/**
|
||||
* Mutate a `YamlAst` at an OcPath. Returns a new AST with the value
|
||||
* replaced.
|
||||
*
|
||||
* Implementation uses `doc.setIn(path, value)` from the `yaml` package
|
||||
* — comment-preserving on edit. Adding a new key does NOT preserve
|
||||
* surrounding formatting verbatim (the `yaml` library handles
|
||||
* pretty-printing); for byte-exact preservation use round-trip emit
|
||||
* on unmodified ASTs.
|
||||
*
|
||||
* @module @openclaw/oc-path/yaml/edit
|
||||
*/
|
||||
|
||||
import {
|
||||
Document,
|
||||
isMap,
|
||||
isScalar,
|
||||
isSeq,
|
||||
LineCounter,
|
||||
parseDocument,
|
||||
type Node,
|
||||
type Pair,
|
||||
} from 'yaml';
|
||||
import type { OcPath } from '../oc-path.js';
|
||||
import {
|
||||
isPositionalSeg,
|
||||
isQuotedSeg,
|
||||
resolvePositionalSeg,
|
||||
splitRespectingBrackets,
|
||||
unquoteSeg,
|
||||
} from '../oc-path.js';
|
||||
import type { YamlAst } from './ast.js';
|
||||
|
||||
export type YamlEditResult =
|
||||
| { readonly ok: true; readonly ast: YamlAst }
|
||||
| {
|
||||
readonly ok: false;
|
||||
readonly reason: 'unresolved' | 'no-root' | 'parse-error';
|
||||
};
|
||||
|
||||
export function setYamlOcPath(
|
||||
ast: YamlAst,
|
||||
path: OcPath,
|
||||
newValue: unknown,
|
||||
): YamlEditResult {
|
||||
if (ast.doc.contents === null) {return { ok: false, reason: 'no-root' };}
|
||||
|
||||
const rawSegments = pathSegments(path);
|
||||
if (rawSegments.length === 0) {
|
||||
return { ok: false, reason: 'unresolved' };
|
||||
}
|
||||
|
||||
// Resolve positional tokens ($first / $last / -N) against the actual
|
||||
// map keys / seq sizes BEFORE handing the segments to the yaml lib —
|
||||
// otherwise `hasIn(['$last'])` treats the token as a literal map key
|
||||
// and silently unresolves, producing a write↔read asymmetry with
|
||||
// resolveYamlOcPath (which honors positional tokens at lookup).
|
||||
const segments = resolvePositionalSegments(ast.doc.contents as Node, rawSegments);
|
||||
if (segments === null) {return { ok: false, reason: 'unresolved' };}
|
||||
|
||||
// Verify the path resolves before mutating — `setIn` would create
|
||||
// missing intermediate nodes which is insertion semantics, not set.
|
||||
if (!ast.doc.hasIn(segments)) {
|
||||
return { ok: false, reason: 'unresolved' };
|
||||
}
|
||||
|
||||
// Clone the document so the original AST is unchanged.
|
||||
const { doc: cloned, lineCounter } = cloneDoc(ast.doc);
|
||||
cloned.setIn(segments, newValue);
|
||||
return { ok: true, ast: { kind: 'yaml', raw: cloned.toString(), doc: cloned, lineCounter } };
|
||||
}
|
||||
|
||||
/**
|
||||
* Append-style insertion: add a new key to a map or push to a seq at
|
||||
* `path`. Used by the universal `setOcPath` when the path carries a
|
||||
* `+` / `+key` / `+nnn` insertion marker.
|
||||
*/
|
||||
export function insertYamlOcPath(
|
||||
ast: YamlAst,
|
||||
parentPath: OcPath,
|
||||
marker: '+' | { kind: 'keyed'; key: string } | { kind: 'indexed'; index: number },
|
||||
newValue: unknown,
|
||||
): YamlEditResult {
|
||||
if (ast.doc.contents === null) {return { ok: false, reason: 'no-root' };}
|
||||
|
||||
const rawParentSegments = pathSegments(parentPath);
|
||||
// Resolve positional tokens against the live document before walking
|
||||
// — same rationale as setYamlOcPath; `getIn(['$last'])` would treat
|
||||
// the token as a literal key and miss the actual last child.
|
||||
const segments =
|
||||
rawParentSegments.length === 0
|
||||
? rawParentSegments
|
||||
: resolvePositionalSegments(ast.doc.contents as Node, rawParentSegments);
|
||||
if (segments === null) {return { ok: false, reason: 'unresolved' };}
|
||||
const { doc: cloned, lineCounter } = cloneDoc(ast.doc);
|
||||
|
||||
// Find the parent node.
|
||||
const parent = segments.length === 0 ? cloned.contents : cloned.getIn(segments, false);
|
||||
if (parent === undefined || parent === null) {return { ok: false, reason: 'unresolved' };}
|
||||
|
||||
// Map insertion → keyed
|
||||
if (typeof parent === 'object' && 'items' in parent && Array.isArray((parent as { items: unknown[] }).items)) {
|
||||
const items = (parent as { items: { key?: unknown }[] }).items;
|
||||
// Array#every() already returns true on an empty array — no need
|
||||
// for the explicit length === 0 short-circuit.
|
||||
const isMapLike = items.every((p) => 'key' in p);
|
||||
|
||||
if (isMapLike) {
|
||||
if (typeof marker !== 'object' || marker.kind !== 'keyed') {
|
||||
return { ok: false, reason: 'unresolved' };
|
||||
}
|
||||
// Reject duplicate
|
||||
if (cloned.hasIn([...segments, marker.key])) {
|
||||
return { ok: false, reason: 'unresolved' };
|
||||
}
|
||||
cloned.setIn([...segments, marker.key], newValue);
|
||||
return { ok: true, ast: { kind: 'yaml', raw: cloned.toString(), doc: cloned, lineCounter } };
|
||||
}
|
||||
|
||||
// Seq insertion
|
||||
if (typeof marker === 'object' && marker.kind === 'keyed') {
|
||||
return { ok: false, reason: 'unresolved' };
|
||||
}
|
||||
const seqItems = items as unknown[];
|
||||
if (marker === '+') {
|
||||
cloned.addIn(segments, newValue);
|
||||
} else if (typeof marker === 'object' && marker.kind === 'indexed') {
|
||||
const idx = Math.min(marker.index, seqItems.length);
|
||||
const current = cloned.getIn(segments) as unknown[] | undefined;
|
||||
if (!Array.isArray(current)) {return { ok: false, reason: 'unresolved' };}
|
||||
const newArr = [...current];
|
||||
newArr.splice(idx, 0, newValue);
|
||||
cloned.setIn(segments, newArr);
|
||||
}
|
||||
return { ok: true, ast: { kind: 'yaml', raw: cloned.toString(), doc: cloned, lineCounter } };
|
||||
}
|
||||
|
||||
return { ok: false, reason: 'unresolved' };
|
||||
}
|
||||
|
||||
/**
|
||||
* Walk `segments` against the live document, replacing each positional
|
||||
* token (`$first` / `$last` / `-N`) with the concrete key (for maps) or
|
||||
* index (for seqs) at that depth. Returns `null` if a positional token
|
||||
* targets a missing or non-container node — caller treats that as
|
||||
* `unresolved` and refuses to write.
|
||||
*
|
||||
* Mirrors `positionalForYaml` in resolve.ts so read and write agree on
|
||||
* which child each token names.
|
||||
*/
|
||||
function resolvePositionalSegments(
|
||||
root: Node,
|
||||
segments: readonly string[],
|
||||
): string[] | null {
|
||||
const out: string[] = [];
|
||||
let node: Node | null = root;
|
||||
for (const seg of segments) {
|
||||
if (node === null) {return null;}
|
||||
let segNorm = seg;
|
||||
if (isPositionalSeg(seg)) {
|
||||
const concrete = positionalForYamlNode(node, seg);
|
||||
if (concrete === null) {return null;}
|
||||
segNorm = concrete;
|
||||
}
|
||||
out.push(segNorm);
|
||||
if (isMap(node)) {
|
||||
const pairs: readonly Pair[] = (node as { items: readonly Pair[] }).items;
|
||||
const pair: Pair | undefined = pairs.find((p) => {
|
||||
const k = isScalar(p.key) ? p.key.value : p.key;
|
||||
return String(k) === segNorm;
|
||||
});
|
||||
node = (pair?.value as Node | undefined) ?? null;
|
||||
continue;
|
||||
}
|
||||
if (isSeq(node)) {
|
||||
const idx = Number(segNorm);
|
||||
if (!Number.isInteger(idx) || idx < 0 || idx >= node.items.length) {return null;}
|
||||
node = (node.items[idx] as Node | null) ?? null;
|
||||
continue;
|
||||
}
|
||||
// Scalar — we still emit the literal segment so the next-step
|
||||
// hasIn check sees the same shape and fails cleanly with
|
||||
// `unresolved`. Don't try to descend further.
|
||||
node = null;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function positionalForYamlNode(node: Node, seg: string): string | null {
|
||||
if (isMap(node)) {
|
||||
const pairs: readonly Pair[] = (node as { items: readonly Pair[] }).items;
|
||||
const keys: readonly string[] = pairs.map((p) =>
|
||||
String(isScalar(p.key) ? p.key.value : p.key),
|
||||
);
|
||||
return resolvePositionalSeg(seg, { indexable: false, size: keys.length, keys });
|
||||
}
|
||||
if (isSeq(node)) {
|
||||
const items: readonly Node[] = (node as { items: readonly Node[] }).items;
|
||||
return resolvePositionalSeg(seg, { indexable: true, size: items.length });
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function pathSegments(path: OcPath): string[] {
|
||||
// Quote-aware split + unquote so YAML edit matches `resolveYamlOcPath`'s
|
||||
// lookup behavior. A quoted segment carrying `/` or `.` (e.g.
|
||||
// `"a/b"`) survives as a single segment, then gets stripped of
|
||||
// its surrounding quotes for the actual `getIn` / `setIn` key
|
||||
// comparison. Plain `.split('.')` would shred quoted keys and
|
||||
// produce silent resolve↔write asymmetry.
|
||||
const segs: string[] = [];
|
||||
const collect = (slot: string | undefined) => {
|
||||
if (slot === undefined) {return;}
|
||||
for (const sub of splitRespectingBrackets(slot, '.')) {
|
||||
segs.push(isQuotedSeg(sub) ? unquoteSeg(sub) : sub);
|
||||
}
|
||||
};
|
||||
collect(path.section);
|
||||
collect(path.item);
|
||||
collect(path.field);
|
||||
return segs;
|
||||
}
|
||||
|
||||
function cloneDoc(doc: Document.Parsed): { doc: Document.Parsed; lineCounter: LineCounter } {
|
||||
// Round-trip via toString → parseDocument is the simplest comment-
|
||||
// preserving clone. yaml package doesn't expose a public `clone`.
|
||||
// Re-parse with a fresh LineCounter so the cloned AST has accurate
|
||||
// line positions for any subsequent inspection.
|
||||
const lineCounter = new LineCounter();
|
||||
const cloned = parseDocument(doc.toString(), {
|
||||
keepSourceTokens: true,
|
||||
prettyErrors: false,
|
||||
lineCounter,
|
||||
});
|
||||
return { doc: cloned, lineCounter };
|
||||
}
|
||||
49
src/oc-path/yaml/emit.ts
Normal file
49
src/oc-path/yaml/emit.ts
Normal file
@@ -0,0 +1,49 @@
|
||||
/**
|
||||
* Emit a `YamlAst` to bytes.
|
||||
*
|
||||
* **Round-trip mode (default)** returns `ast.raw` verbatim — preserves
|
||||
* comments, anchors, formatting exactly.
|
||||
*
|
||||
* **Render mode** uses `doc.toString()` from the `yaml` package — also
|
||||
* comment-preserving, but normalizes whitespace per the package's
|
||||
* options.
|
||||
*
|
||||
* **Sentinel guard**: scans every emitted byte sequence for the
|
||||
* `__OPENCLAW_REDACTED__` literal.
|
||||
*
|
||||
* @module @openclaw/oc-path/yaml/emit
|
||||
*/
|
||||
|
||||
import { OcEmitSentinelError, REDACTED_SENTINEL } from '../sentinel.js';
|
||||
import type { YamlAst } from './ast.js';
|
||||
|
||||
export interface YamlEmitOptions {
|
||||
readonly mode?: 'roundtrip' | 'render';
|
||||
readonly fileNameForGuard?: string;
|
||||
/**
|
||||
* See `JsoncEmitOptions.acceptPreExistingSentinel` for the rationale.
|
||||
* Default `true` — round-trip echoes parsed bytes without scanning.
|
||||
* Render mode always scans the rendered output (callers can inject
|
||||
* sentinels via setYamlOcPath, so render-time scan is mandatory).
|
||||
*/
|
||||
readonly acceptPreExistingSentinel?: boolean;
|
||||
}
|
||||
|
||||
export function emitYaml(ast: YamlAst, opts: YamlEmitOptions = {}): string {
|
||||
const mode = opts.mode ?? 'roundtrip';
|
||||
const guardPath = opts.fileNameForGuard ? `oc://${opts.fileNameForGuard}` : 'oc://';
|
||||
const acceptPreExisting = opts.acceptPreExistingSentinel ?? true;
|
||||
|
||||
if (mode === 'roundtrip') {
|
||||
if (!acceptPreExisting && ast.raw.includes(REDACTED_SENTINEL)) {
|
||||
throw new OcEmitSentinelError(`${guardPath}/[raw]`);
|
||||
}
|
||||
return ast.raw;
|
||||
}
|
||||
|
||||
const rendered = ast.doc.toString();
|
||||
if (rendered.includes(REDACTED_SENTINEL)) {
|
||||
throw new OcEmitSentinelError(`${guardPath}/[rendered]`);
|
||||
}
|
||||
return rendered;
|
||||
}
|
||||
48
src/oc-path/yaml/parse.ts
Normal file
48
src/oc-path/yaml/parse.ts
Normal file
@@ -0,0 +1,48 @@
|
||||
/**
|
||||
* YAML parser — wraps `yaml.parseDocument` for comment-preserving CST
|
||||
* + structured access. Soft-error policy: never throws on
|
||||
* parser-tolerated input; recoverable problems surface as diagnostics.
|
||||
*
|
||||
* @module @openclaw/oc-path/yaml/parse
|
||||
*/
|
||||
|
||||
import { LineCounter, parseDocument } from 'yaml';
|
||||
import type { Diagnostic } from '../ast.js';
|
||||
import type { YamlAst } from './ast.js';
|
||||
|
||||
export interface YamlParseResult {
|
||||
readonly ast: YamlAst;
|
||||
readonly diagnostics: readonly Diagnostic[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse YAML bytes into a `YamlAst`. The `yaml` package is
|
||||
* comment-preserving and reports its own warnings/errors; we surface
|
||||
* those as `Diagnostic` entries.
|
||||
*/
|
||||
export function parseYaml(raw: string): YamlParseResult {
|
||||
const lineCounter = new LineCounter();
|
||||
const doc = parseDocument(raw, {
|
||||
keepSourceTokens: true,
|
||||
prettyErrors: false,
|
||||
lineCounter,
|
||||
});
|
||||
const diagnostics: Diagnostic[] = [];
|
||||
for (const w of doc.warnings) {
|
||||
diagnostics.push({
|
||||
line: w.linePos?.[0]?.line ?? 1,
|
||||
message: w.message,
|
||||
severity: 'warning',
|
||||
code: 'OC_YAML_WARN',
|
||||
});
|
||||
}
|
||||
for (const e of doc.errors) {
|
||||
diagnostics.push({
|
||||
line: e.linePos?.[0]?.line ?? 1,
|
||||
message: e.message,
|
||||
severity: 'error',
|
||||
code: 'OC_YAML_PARSE_FAILED',
|
||||
});
|
||||
}
|
||||
return { ast: { kind: 'yaml', raw, doc, lineCounter }, diagnostics };
|
||||
}
|
||||
147
src/oc-path/yaml/resolve.ts
Normal file
147
src/oc-path/yaml/resolve.ts
Normal file
@@ -0,0 +1,147 @@
|
||||
/**
|
||||
* Resolve an `OcPath` against a `YamlAst`.
|
||||
*
|
||||
* YAML's structural shape mirrors JSONC: objects (`Map`), arrays
|
||||
* (`Seq`), and scalars. Addressing follows the same dotted-path
|
||||
* convention used by JSONC:
|
||||
*
|
||||
* oc://workflow.yaml/steps.0.command → command on first step
|
||||
* oc://workflow.yaml/name → top-level name
|
||||
* oc://workflow.yaml/steps.+command → insertion (handled by edit)
|
||||
*
|
||||
* @module @openclaw/oc-path/yaml/resolve
|
||||
*/
|
||||
|
||||
import { isMap, isScalar, isSeq, type Node, type Pair } from 'yaml';
|
||||
import type { OcPath } from '../oc-path.js';
|
||||
import {
|
||||
isPositionalSeg,
|
||||
isQuotedSeg,
|
||||
resolvePositionalSeg,
|
||||
splitRespectingBrackets,
|
||||
unquoteSeg,
|
||||
} from '../oc-path.js';
|
||||
import type { YamlAst } from './ast.js';
|
||||
|
||||
export type YamlOcPathMatch =
|
||||
| { readonly kind: 'root'; readonly node: YamlAst }
|
||||
| { readonly kind: 'scalar'; readonly value: unknown; readonly path: readonly string[] }
|
||||
| {
|
||||
readonly kind: 'map';
|
||||
readonly path: readonly string[];
|
||||
}
|
||||
| {
|
||||
readonly kind: 'seq';
|
||||
readonly path: readonly string[];
|
||||
}
|
||||
| {
|
||||
readonly kind: 'pair';
|
||||
readonly key: string;
|
||||
readonly value: unknown;
|
||||
readonly path: readonly string[];
|
||||
};
|
||||
|
||||
export function resolveYamlOcPath(
|
||||
ast: YamlAst,
|
||||
path: OcPath,
|
||||
): YamlOcPathMatch | null {
|
||||
const segments: string[] = [];
|
||||
if (path.section !== undefined) {
|
||||
for (const s of splitRespectingBrackets(path.section, '.')) {
|
||||
segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s);
|
||||
}
|
||||
}
|
||||
if (path.item !== undefined) {
|
||||
for (const s of splitRespectingBrackets(path.item, '.')) {
|
||||
segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s);
|
||||
}
|
||||
}
|
||||
if (path.field !== undefined) {
|
||||
for (const s of splitRespectingBrackets(path.field, '.')) {
|
||||
segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s);
|
||||
}
|
||||
}
|
||||
|
||||
if (segments.length === 0) {return { kind: 'root', node: ast };}
|
||||
|
||||
const root = ast.doc.contents;
|
||||
if (root === null) {return null;}
|
||||
|
||||
return walkNode(root, segments, 0, []);
|
||||
}
|
||||
|
||||
function walkNode(
|
||||
node: Node | null,
|
||||
segments: readonly string[],
|
||||
i: number,
|
||||
walked: readonly string[],
|
||||
): YamlOcPathMatch | null {
|
||||
if (node === null) {return null;}
|
||||
let seg = segments[i];
|
||||
|
||||
if (seg === undefined) {
|
||||
// Reached end — describe whatever we landed on.
|
||||
if (isMap(node)) {return { kind: 'map', path: walked };}
|
||||
if (isSeq(node)) {return { kind: 'seq', path: walked };}
|
||||
if (isScalar(node)) {
|
||||
return { kind: 'scalar', value: node.value, path: walked };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
if (seg.length === 0) {return null;}
|
||||
|
||||
// Positional tokens (`$first` / `$last` / `-N`) resolve to a concrete
|
||||
// segment based on container shape. `-N` on a keyed container falls
|
||||
// through to literal-key lookup (openclaw#59934 — Telegram supergroup
|
||||
// IDs are negative numbers used as map keys).
|
||||
if (isPositionalSeg(seg)) {
|
||||
const concrete = positionalForYaml(node, seg);
|
||||
if (concrete !== null) {seg = concrete;}
|
||||
}
|
||||
|
||||
if (isMap(node)) {
|
||||
const pair = (node as { items: Pair[] }).items.find((p) => {
|
||||
const k = isScalar(p.key) ? p.key.value : p.key;
|
||||
return String(k) === seg;
|
||||
});
|
||||
if (pair === undefined) {return null;}
|
||||
const childWalked = [...walked, seg];
|
||||
if (i === segments.length - 1) {
|
||||
const child = pair.value;
|
||||
if (isScalar(child)) {
|
||||
return {
|
||||
kind: 'pair',
|
||||
key: seg,
|
||||
value: child.value,
|
||||
path: childWalked,
|
||||
};
|
||||
}
|
||||
// Map / seq under the pair — describe by descending.
|
||||
return walkNode(child as Node, segments, i + 1, childWalked);
|
||||
}
|
||||
return walkNode(pair.value as Node, segments, i + 1, childWalked);
|
||||
}
|
||||
|
||||
if (isSeq(node)) {
|
||||
const idx = Number(seg);
|
||||
if (!Number.isInteger(idx) || idx < 0 || idx >= node.items.length) {return null;}
|
||||
const child = node.items[idx];
|
||||
return walkNode(child as Node, segments, i + 1, [...walked, seg]);
|
||||
}
|
||||
|
||||
// Scalar — can't descend.
|
||||
return null;
|
||||
}
|
||||
|
||||
function positionalForYaml(node: Node, seg: string): string | null {
|
||||
if (isMap(node)) {
|
||||
const pairs = (node as { items: Pair[] }).items;
|
||||
const keys = pairs.map((p) => String(isScalar(p.key) ? p.key.value : p.key));
|
||||
return resolvePositionalSeg(seg, { indexable: false, size: keys.length, keys });
|
||||
}
|
||||
if (isSeq(node)) {
|
||||
const items = (node as { items: Node[] }).items;
|
||||
return resolvePositionalSeg(seg, { indexable: true, size: items.length });
|
||||
}
|
||||
return null;
|
||||
}
|
||||
Reference in New Issue
Block a user