test: harden beta release gates

This commit is contained in:
Peter Steinberger
2026-04-15 19:28:18 +01:00
parent 41699cfc2d
commit 4efd3c3d74
7 changed files with 95 additions and 46 deletions

View File

@@ -376,6 +376,9 @@ describe("memory index", () => {
const manager = requireManager(result);
managersForCleanup.add(manager);
resetManagerForTest(manager);
if (!manager.status().fts?.available) {
return;
}
await fs.writeFile(
path.join(memoryDir, "2026-01-12.md"),
@@ -411,6 +414,9 @@ describe("memory index", () => {
const manager = requireManager(result);
managersForCleanup.add(manager);
resetManagerForTest(manager);
if (!manager.status().fts?.available) {
return;
}
const memoryPath = path.join(workspaceDir, "MEMORY.md");
await fs.writeFile(memoryPath, "Project Nebula stale codename: ORBIT-9.\n", "utf8");
@@ -478,6 +484,9 @@ describe("memory index", () => {
const manager = requireManager(result);
managersForCleanup.add(manager);
resetManagerForTest(manager);
if (!manager.status().fts?.available) {
return;
}
const sessionsDir = resolveSessionTranscriptsDirForAgent("main");
await fs.mkdir(sessionsDir, { recursive: true });

View File

@@ -9,9 +9,26 @@ import { searchKeyword } from "./manager-search.js";
describe("searchKeyword trigram fallback", () => {
const { DatabaseSync } = requireNodeSqlite();
function supportsTrigramFts(): boolean {
const db = new DatabaseSync(":memory:");
try {
const result = ensureMemoryIndexSchema({
db,
embeddingCacheTable: "embedding_cache",
cacheEnabled: false,
ftsTable: "chunks_fts",
ftsEnabled: true,
ftsTokenizer: "trigram",
});
return result.ftsAvailable;
} finally {
db.close();
}
}
function createTrigramDb() {
const db = new DatabaseSync(":memory:");
ensureMemoryIndexSchema({
const result = ensureMemoryIndexSchema({
db,
embeddingCacheTable: "embedding_cache",
cacheEnabled: false,
@@ -19,6 +36,10 @@ describe("searchKeyword trigram fallback", () => {
ftsEnabled: true,
ftsTokenizer: "trigram",
});
if (!result.ftsAvailable) {
db.close();
throw new Error(`FTS5 trigram unavailable: ${result.ftsError ?? "unknown error"}`);
}
return db;
}
@@ -53,7 +74,9 @@ describe("searchKeyword trigram fallback", () => {
}
}
it("finds short Chinese queries with substring fallback", async () => {
const itWithTrigramFts = supportsTrigramFts() ? it : it.skip;
itWithTrigramFts("finds short Chinese queries with substring fallback", async () => {
const results = await runSearch({
rows: [{ id: "1", path: "memory/zh.md", text: "今天玩成语接龙游戏" }],
query: "成语",
@@ -62,7 +85,7 @@ describe("searchKeyword trigram fallback", () => {
expect(results[0]?.textScore).toBe(1);
});
it("finds short Japanese and Korean queries with substring fallback", async () => {
itWithTrigramFts("finds short Japanese and Korean queries with substring fallback", async () => {
const japaneseResults = await runSearch({
rows: [{ id: "jp", path: "memory/jp.md", text: "今日はしりとり大会" }],
query: "しり とり",
@@ -76,19 +99,22 @@ describe("searchKeyword trigram fallback", () => {
expect(koreanResults.map((row) => row.id)).toEqual(["ko"]);
});
it("keeps MATCH semantics for long trigram terms while requiring short CJK substrings", async () => {
const results = await runSearch({
rows: [
{ id: "match", path: "memory/good.md", text: "今天玩成语接龙游戏" },
{ id: "partial", path: "memory/partial.md", text: "今天玩成语接龙" },
],
query: "成语接龙 游戏",
});
expect(results.map((row) => row.id)).toEqual(["match"]);
expect(results[0]?.textScore).toBeGreaterThan(0);
});
itWithTrigramFts(
"keeps MATCH semantics for long trigram terms while requiring short CJK substrings",
async () => {
const results = await runSearch({
rows: [
{ id: "match", path: "memory/good.md", text: "今天玩成语接龙游戏" },
{ id: "partial", path: "memory/partial.md", text: "今天玩成语接龙" },
],
query: "成语接龙 游戏",
});
expect(results.map((row) => row.id)).toEqual(["match"]);
expect(results[0]?.textScore).toBeGreaterThan(0);
},
);
it("applies fallback lexical boosts without exceeding bounded scores", async () => {
itWithTrigramFts("applies fallback lexical boosts without exceeding bounded scores", async () => {
const results = await runSearch({
rows: [
{
@@ -133,7 +159,7 @@ describe("searchKeyword trigram fallback", () => {
expect(boostedById.get("weak")?.score).toBeLessThanOrEqual(1);
});
it("does not overweight repeated query tokens in fallback scoring", async () => {
itWithTrigramFts("does not overweight repeated query tokens in fallback scoring", async () => {
const unique = await runSearch({
rows: [{ id: "1", path: "memory/project.md", text: "Project memory context." }],
query: "project memory context",

View File

@@ -72,6 +72,7 @@ export const mockedRunEmbeddedAttempt =
vi.fn<(params: unknown) => Promise<EmbeddedRunAttemptResult>>();
export const mockedRunContextEngineMaintenance = vi.fn(async () => undefined);
export const mockedSessionLikelyHasOversizedToolResults = vi.fn(() => false);
export const mockedResolveLiveToolResultMaxChars = vi.fn(() => 32_000);
type MockTruncateOversizedToolResultsResult = {
truncated: boolean;
truncatedCount: number;
@@ -228,6 +229,8 @@ export function resetRunOverflowCompactionHarnessMocks(): void {
mockedRunContextEngineMaintenance.mockResolvedValue(undefined);
mockedSessionLikelyHasOversizedToolResults.mockReset();
mockedSessionLikelyHasOversizedToolResults.mockReturnValue(false);
mockedResolveLiveToolResultMaxChars.mockReset();
mockedResolveLiveToolResultMaxChars.mockReturnValue(32_000);
mockedTruncateOversizedToolResultsInSession.mockReset();
mockedTruncateOversizedToolResultsInSession.mockResolvedValue({
truncated: false,
@@ -420,6 +423,7 @@ export async function loadRunOverflowCompactionHarness(): Promise<{
}));
vi.doMock("./tool-result-truncation.js", () => ({
resolveLiveToolResultMaxChars: mockedResolveLiveToolResultMaxChars,
sessionLikelyHasOversizedToolResults: mockedSessionLikelyHasOversizedToolResults,
truncateOversizedToolResultsInSession: mockedTruncateOversizedToolResultsInSession,
}));

View File

@@ -505,14 +505,15 @@ describe("installContextEngineLoopHook", () => {
const engine = makeMockEngine({ omitAfterTurn: true, omitIngestBatch: true });
installHook(agent, engine, 1);
const messages = [makeUser("first"), makeToolResult("call_1", "r1")];
const toolResult = makeToolResult("call_1", "r1");
const messages = [makeUser("first"), toolResult];
await callTransform(agent, messages);
expect(engine.ingest).toHaveBeenCalledTimes(1);
expect(engine.ingest.mock.calls[0]?.[0]).toMatchObject({
sessionId,
sessionKey,
message: makeToolResult("call_1", "r1"),
message: toolResult,
});
expect(engine.assemble).toHaveBeenCalledTimes(1);
});

View File

@@ -86,7 +86,7 @@ describe("createOpenClawCodingTools read behavior", () => {
const result = await readTool.execute("read-cap-1", { path: "huge.txt" });
const text = extractToolText(result);
expect(text).toContain("line-0001");
expect(text).toContain("[Read output capped at 50KB for this call. Use offset=");
expect(text).toContain("[Read output capped at 32KB for this call. Use offset=");
expect(text).not.toContain("line-8000");
} finally {
await fs.rm(tmpDir, { recursive: true, force: true });

View File

@@ -92,8 +92,8 @@ describe("buildContextReply", () => {
omitBootstrapLimits: true,
}),
);
expect(result.text).toContain("Bootstrap max/file: 20,000 chars");
expect(result.text).toContain("Bootstrap max/total: 150,000 chars");
expect(result.text).toContain("Bootstrap max/file: 12,000 chars");
expect(result.text).toContain("Bootstrap max/total: 60,000 chars");
expect(result.text).not.toContain("Bootstrap max/file: ? chars");
});

View File

@@ -486,6 +486,13 @@ describe("chat directive tag stripping for non-streaming final payloads", () =>
const transcriptDir = path.dirname(mockState.transcriptPath);
const audioPath = path.join(transcriptDir, "reply.mp3");
fs.writeFileSync(audioPath, Buffer.from([0xff, 0xfb, 0x90, 0x00]));
mockState.config = {
agents: {
defaults: {
workspace: transcriptDir,
},
},
};
mockState.triggerAgentRunStart = true;
mockState.dispatchedReplies = [
{
@@ -506,32 +513,34 @@ describe("chat directive tag stripping for non-streaming final payloads", () =>
expectBroadcast: false,
});
const assistantUpdate = mockState.emittedTranscriptUpdates.find(
(update) =>
typeof update.message === "object" &&
update.message !== null &&
(update.message as { role?: unknown }).role === "assistant" &&
Array.isArray((update.message as { content?: unknown }).content) &&
((update.message as { content: Array<{ type?: string }> }).content.some(
(block) => block?.type === "audio",
) ??
false),
);
expect(assistantUpdate).toMatchObject({
message: {
role: "assistant",
idempotencyKey: "idem-agent-audio:assistant-audio",
content: [
{ type: "text", text: "Audio reply" },
{
type: "audio",
source: {
type: "base64",
media_type: "audio/mpeg",
await waitForAssertion(() => {
const assistantUpdate = mockState.emittedTranscriptUpdates.find(
(update) =>
typeof update.message === "object" &&
update.message !== null &&
(update.message as { role?: unknown }).role === "assistant" &&
Array.isArray((update.message as { content?: unknown }).content) &&
((update.message as { content: Array<{ type?: string }> }).content.some(
(block) => block?.type === "audio",
) ??
false),
);
expect(assistantUpdate).toMatchObject({
message: {
role: "assistant",
idempotencyKey: "idem-agent-audio:assistant-audio",
content: [
{ type: "text", text: "Audio reply" },
{
type: "audio",
source: {
type: "base64",
media_type: "audio/mpeg",
},
},
},
],
},
],
},
});
});
});