Files
openclaw/test/scripts/prompt-snapshots.test.ts
Eva 2a0350b5b4 Separate prompt surfaces by selected harness (#83454)
* fix: scope agent prompt surfaces

* fix(codex): preserve lightweight project doc suppression

* fix(codex): demote openclaw context for native turns

* fix(codex): report demoted prompt context

* fix(codex): align demoted prompt observability

* docs: format codex runtime table

* docs: align codex prompt overlay docs

* test: align codex prompt snapshots

* test: update prompt snapshot contract

---------

Co-authored-by: Eva (agent) <eva+agent-78055@100yen.org>
Co-authored-by: Peter Steinberger <steipete@gmail.com>
2026-05-18 13:00:53 +01:00

284 lines
10 KiB
TypeScript

import { spawnSync } from "node:child_process";
import fs from "node:fs";
import os from "node:os";
import path from "node:path";
import { describe, expect, it } from "vitest";
import { deleteStalePromptSnapshotFiles } from "../../scripts/prompt-snapshot-files.js";
import {
defaultCatalogPathCandidates,
findDefaultCatalogPath,
renderCodexModelInstructions,
runCodexModelPromptFixtureSync,
} from "../../scripts/sync-codex-model-prompt-fixture.js";
import { expectNoReaddirSyncDuring } from "../../src/test-utils/fs-scan-assertions.js";
import { toRepoRelativePath } from "../../src/test-utils/repo-files.js";
import {
CODEX_MODEL_PROMPT_FIXTURE_DIR,
CODEX_RUNTIME_HAPPY_PATH_PROMPT_SNAPSHOT_DIR,
} from "../helpers/agents/prompt-snapshot-paths.js";
function readCommittedSnapshot(fileName: string): string {
return fs.readFileSync(path.join(CODEX_RUNTIME_HAPPY_PATH_PROMPT_SNAPSHOT_DIR, fileName), "utf8");
}
function renderedPromptSection(content: string, heading: string, nextHeading: string): string {
const start = content.indexOf(heading);
const end = content.indexOf(nextHeading, start + heading.length);
if (start === -1 || end === -1) {
throw new Error(`Missing rendered prompt section ${heading}`);
}
return content.slice(start, end);
}
function listCommittedPromptSnapshotFiles(): string[] {
const externalFiles = listExternalCommittedPromptSnapshotFiles();
if (externalFiles) {
return externalFiles;
}
return fs
.readdirSync(CODEX_RUNTIME_HAPPY_PATH_PROMPT_SNAPSHOT_DIR)
.filter((entry) => entry.endsWith(".md") || entry.endsWith(".json"))
.map((entry) => path.join(CODEX_RUNTIME_HAPPY_PATH_PROMPT_SNAPSHOT_DIR, entry))
.toSorted();
}
function listExternalCommittedPromptSnapshotFiles(): string[] | null {
return listGitCommittedPromptSnapshotFiles() ?? listFindCommittedPromptSnapshotFiles();
}
function listGitCommittedPromptSnapshotFiles(): string[] | null {
const result = spawnSync(
"git",
["ls-files", "--", CODEX_RUNTIME_HAPPY_PATH_PROMPT_SNAPSHOT_DIR],
{
cwd: process.cwd(),
encoding: "utf8",
maxBuffer: 1024 * 1024,
stdio: ["ignore", "pipe", "ignore"],
},
);
if (result.status !== 0) {
return null;
}
return result.stdout
.split("\n")
.map((line) => line.trim())
.filter((line) => line.endsWith(".md") || line.endsWith(".json"))
.toSorted();
}
function listFindCommittedPromptSnapshotFiles(): string[] | null {
const result = spawnSync(
"find",
[
path.join(process.cwd(), CODEX_RUNTIME_HAPPY_PATH_PROMPT_SNAPSHOT_DIR),
"-maxdepth",
"1",
"-type",
"f",
"(",
"-name",
"*.md",
"-o",
"-name",
"*.json",
")",
],
{
cwd: process.cwd(),
encoding: "utf8",
maxBuffer: 1024 * 1024,
stdio: ["ignore", "pipe", "ignore"],
},
);
if (result.status !== 0) {
return null;
}
return result.stdout
.split("\n")
.map((line) => line.trim())
.filter((line) => line.length > 0)
.map((filePath) => toRepoRelativePath(process.cwd(), filePath))
.toSorted();
}
describe("happy path prompt snapshots", () => {
it("lists committed Codex prompt snapshot artifacts without scanning directories in-process", () => {
expectNoReaddirSyncDuring(() => {
const committed = listCommittedPromptSnapshotFiles();
expect(committed.length).toBeGreaterThan(0);
expect(committed.every((file) => file.endsWith(".md") || file.endsWith(".json"))).toBe(true);
});
});
it("keeps the committed Codex prompt snapshot artifact set explicit", () => {
expect(listCommittedPromptSnapshotFiles().map((file) => path.basename(file))).toEqual([
"README.md",
"codex-dynamic-tools.discord-group.json",
"codex-dynamic-tools.heartbeat-turn.json",
"codex-dynamic-tools.telegram-direct.json",
"discord-group-codex-message-tool.md",
"telegram-direct-codex-message-tool.md",
"telegram-heartbeat-codex-tool.md",
]);
});
it("deletes stale generated snapshot artifacts", async () => {
const root = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-prompt-snapshot-stale-"));
try {
const snapshotDir = path.join(root, CODEX_RUNTIME_HAPPY_PATH_PROMPT_SNAPSHOT_DIR);
fs.mkdirSync(snapshotDir, { recursive: true });
const stalePath = path.join(
CODEX_RUNTIME_HAPPY_PATH_PROMPT_SNAPSHOT_DIR,
"stale-snapshot.md",
);
fs.writeFileSync(path.join(root, stalePath), "stale\n");
const deleted = await deleteStalePromptSnapshotFiles(root, [
{ path: path.join(CODEX_RUNTIME_HAPPY_PATH_PROMPT_SNAPSHOT_DIR, "current.md") },
]);
expect(deleted).toEqual([stalePath]);
expect(fs.existsSync(path.join(root, stalePath))).toBe(false);
} finally {
fs.rmSync(root, { recursive: true, force: true });
}
});
it("renders the Codex model-bound prompt layers", async () => {
const telegram = readCommittedSnapshot("telegram-direct-codex-message-tool.md");
expect(telegram).toContain("## Reconstructed Model-Bound Prompt Layers");
expect(telegram).toContain("### System: Codex Model Instructions (gpt-5.5, pragmatic)");
expect(telegram).toContain("You are Codex, a coding agent based on GPT-5.");
expect(telegram).toContain("### Developer: Codex Permission Instructions");
expect(telegram).toContain(
"Approval policy is currently never. Do not provide the `sandbox_permissions`",
);
expect(telegram).toContain("### User: Codex Config Instructions");
expect(telegram).toContain("### User: Turn Input Text");
expect(telegram).toContain("OpenClaw runtime context for this turn:");
expect(telegram).toContain("<SOUL.md contents will be here>");
expect(telegram).toContain("<TOOLS.md contents will be here>");
expect(telegram).toContain("<HEARTBEAT.md contents will be here>");
expect(telegram).toContain("Codex loads AGENTS.md natively");
expect(telegram).toContain("### Tools: Dynamic Tool Catalog");
});
it("keeps heartbeat guidance in heartbeat collaboration mode only", async () => {
const direct = readCommittedSnapshot("telegram-direct-codex-message-tool.md");
const group = readCommittedSnapshot("discord-group-codex-message-tool.md");
const heartbeat = readCommittedSnapshot("telegram-heartbeat-codex-tool.md");
const heartbeatPhrase = "Use heartbeats to create useful proactive progress";
expect(direct).toContain('"collaborationMode": {');
expect(direct).toContain('"developer_instructions": null');
expect(group).toContain('"collaborationMode": {');
expect(group).toContain('"developer_instructions": null');
expect(direct).not.toContain(heartbeatPhrase);
expect(group).not.toContain(heartbeatPhrase);
expect(heartbeat).toContain('"collaborationMode": {');
expect(heartbeat).toContain('"developer_instructions": "This is an OpenClaw heartbeat turn.');
const openClawRuntimeInstructions = renderedPromptSection(
heartbeat,
"### Developer: OpenClaw Runtime Instructions",
"### Developer: Codex Collaboration Mode Instructions",
);
const collaborationModeInstructions = renderedPromptSection(
heartbeat,
"### Developer: Codex Collaboration Mode Instructions",
"### User: Turn Input Text",
);
expect(openClawRuntimeInstructions).not.toContain(heartbeatPhrase);
expect(collaborationModeInstructions).toContain(heartbeatPhrase);
expect(collaborationModeInstructions.split(heartbeatPhrase)).toHaveLength(2);
});
it("keeps the Codex model prompt fixture next to its source metadata", () => {
expect(
fs.existsSync(path.join(CODEX_MODEL_PROMPT_FIXTURE_DIR, "gpt-5.5.pragmatic.instructions.md")),
).toBe(true);
expect(
fs.existsSync(path.join(CODEX_MODEL_PROMPT_FIXTURE_DIR, "gpt-5.5.pragmatic.source.json")),
).toBe(true);
});
it("renders Codex model catalog instructions with the selected personality", () => {
const rendered = renderCodexModelInstructions({
model: {
slug: "gpt-5.5",
base_instructions: "fallback",
model_messages: {
instructions_template: "Intro\n{{ personality }}\nEnd",
instructions_variables: {
personality_pragmatic: "Pragmatic voice",
},
},
},
personality: "pragmatic",
});
expect(rendered).toEqual({
instructions: "Intro\nPragmatic voice\nEnd",
field:
"model_messages.instructions_template + model_messages.instructions_variables.personality_pragmatic",
});
});
it("prefers the Codex runtime model cache before local checkout fallbacks", () => {
const candidates = defaultCatalogPathCandidates({
env: { CODEX_HOME: "/tmp/codex-home" },
homeDir: "/tmp/home",
});
expect(candidates).toEqual([
path.join("/tmp/codex-home", "models_cache.json"),
path.join("/tmp/home", ".codex", "models_cache.json"),
path.join("/tmp/home", "code", "codex", "codex-rs", "models-manager", "models.json"),
]);
});
it("finds the first available default Codex model catalog source", async () => {
const root = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-codex-catalog-"));
try {
const cachePath = path.join(root, ".codex", "models_cache.json");
fs.mkdirSync(path.dirname(cachePath), { recursive: true });
fs.writeFileSync(cachePath, JSON.stringify({ models: [] }));
await expect(findDefaultCatalogPath({ env: {}, homeDir: root })).resolves.toEqual({
catalogPath: cachePath,
candidates: [
cachePath,
path.join(root, "code", "codex", "codex-rs", "models-manager", "models.json"),
],
});
} finally {
fs.rmSync(root, { recursive: true, force: true });
}
});
it("skips Codex model prompt fixture sync when no default catalog exists", async () => {
const root = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-codex-catalog-missing-"));
const chunks: string[] = [];
try {
const result = await runCodexModelPromptFixtureSync([], {
env: {},
homeDir: root,
stdout: {
write(chunk) {
chunks.push(chunk);
},
},
});
expect(result.status).toBe("skipped");
expect(chunks.join("")).toContain("No Codex model catalog/cache found");
} finally {
fs.rmSync(root, { recursive: true, force: true });
}
});
});