perf(test): dedupe memory host mirror tests

This commit is contained in:
Peter Steinberger
2026-04-20 20:02:16 +01:00
parent a2f158e5ed
commit 88de927a0c
5 changed files with 25 additions and 278 deletions

View File

@@ -1,82 +0,0 @@
import { describe, expect, it } from "vitest";
import { applyEmbeddingBatchOutputLine } from "./batch-output.js";
describe("applyEmbeddingBatchOutputLine", () => {
it("stores embedding for successful response", () => {
const remaining = new Set(["req-1"]);
const errors: string[] = [];
const byCustomId = new Map<string, number[]>();
applyEmbeddingBatchOutputLine({
line: {
custom_id: "req-1",
response: {
status_code: 200,
body: { data: [{ embedding: [0.1, 0.2] }] },
},
},
remaining,
errors,
byCustomId,
});
expect(remaining.has("req-1")).toBe(false);
expect(errors).toEqual([]);
expect(byCustomId.get("req-1")).toEqual([0.1, 0.2]);
});
it("records provider error from line.error", () => {
const remaining = new Set(["req-2"]);
const errors: string[] = [];
const byCustomId = new Map<string, number[]>();
applyEmbeddingBatchOutputLine({
line: {
custom_id: "req-2",
error: { message: "provider failed" },
},
remaining,
errors,
byCustomId,
});
expect(remaining.has("req-2")).toBe(false);
expect(errors).toEqual(["req-2: provider failed"]);
expect(byCustomId.size).toBe(0);
});
it("records non-2xx response errors and empty embedding errors", () => {
const remaining = new Set(["req-3", "req-4"]);
const errors: string[] = [];
const byCustomId = new Map<string, number[]>();
applyEmbeddingBatchOutputLine({
line: {
custom_id: "req-3",
response: {
status_code: 500,
body: { error: { message: "internal" } },
},
},
remaining,
errors,
byCustomId,
});
applyEmbeddingBatchOutputLine({
line: {
custom_id: "req-4",
response: {
status_code: 200,
body: { data: [] },
},
},
remaining,
errors,
byCustomId,
});
expect(errors).toEqual(["req-3: internal", "req-4: empty embedding"]);
expect(byCustomId.size).toBe(0);
});
});

View File

@@ -1,60 +0,0 @@
import { describe, expect, it } from "vitest";
import {
resolveBatchCompletionFromStatus,
resolveCompletedBatchResult,
throwIfBatchTerminalFailure,
} from "./batch-status.js";
describe("batch-status helpers", () => {
it("resolves completion payload from completed status", () => {
expect(
resolveBatchCompletionFromStatus({
provider: "openai",
batchId: "b1",
status: {
output_file_id: "out-1",
error_file_id: "err-1",
},
}),
).toEqual({
outputFileId: "out-1",
errorFileId: "err-1",
});
});
it("throws for terminal failure states", async () => {
await expect(
throwIfBatchTerminalFailure({
provider: "voyage",
status: { id: "b2", status: "failed", error_file_id: "err-file" },
readError: async () => "bad input",
}),
).rejects.toThrow("voyage batch b2 failed: bad input");
});
it("returns completed result directly without waiting", async () => {
const waitForBatch = async () => ({ outputFileId: "out-2" });
const result = await resolveCompletedBatchResult({
provider: "openai",
status: {
id: "b3",
status: "completed",
output_file_id: "out-3",
},
wait: false,
waitForBatch,
});
expect(result).toEqual({ outputFileId: "out-3", errorFileId: undefined });
});
it("throws when wait disabled and batch is not complete", async () => {
await expect(
resolveCompletedBatchResult({
provider: "openai",
status: { id: "b4", status: "pending" },
wait: false,
waitForBatch: async () => ({ outputFileId: "out" }),
}),
).rejects.toThrow("openai batch b4 submitted; enable remote.batch.wait to await completion");
});
});

View File

@@ -1,102 +0,0 @@
import { describe, expect, it } from "vitest";
import { enforceEmbeddingMaxInputTokens } from "./embedding-chunk-limits.js";
import { estimateUtf8Bytes } from "./embedding-input-limits.js";
import type { EmbeddingProvider } from "./embeddings.js";
function createProvider(maxInputTokens: number): EmbeddingProvider {
return {
id: "mock",
model: "mock-embed",
maxInputTokens,
embedQuery: async () => [0],
embedBatch: async () => [[0]],
};
}
function createProviderWithoutMaxInputTokens(params: {
id: string;
model: string;
}): EmbeddingProvider {
return {
id: params.id,
model: params.model,
embedQuery: async () => [0],
embedBatch: async () => [[0]],
};
}
describe("embedding chunk limits", () => {
it("splits oversized chunks so each embedding input stays <= maxInputTokens bytes", () => {
const provider = createProvider(8192);
const input = {
startLine: 1,
endLine: 1,
text: "x".repeat(9000),
hash: "ignored",
};
const out = enforceEmbeddingMaxInputTokens(provider, [input]);
expect(out.length).toBeGreaterThan(1);
expect(out.map((chunk) => chunk.text).join("")).toBe(input.text);
expect(out.every((chunk) => estimateUtf8Bytes(chunk.text) <= 8192)).toBe(true);
expect(out.every((chunk) => chunk.startLine === 1 && chunk.endLine === 1)).toBe(true);
expect(out.every((chunk) => typeof chunk.hash === "string" && chunk.hash.length > 0)).toBe(
true,
);
});
it("does not split inside surrogate pairs (emoji)", () => {
const provider = createProvider(8192);
const emoji = "😀";
const inputText = `${emoji.repeat(2100)}\n${emoji.repeat(2100)}`;
const out = enforceEmbeddingMaxInputTokens(provider, [
{ startLine: 1, endLine: 2, text: inputText, hash: "ignored" },
]);
expect(out.length).toBeGreaterThan(1);
expect(out.map((chunk) => chunk.text).join("")).toBe(inputText);
expect(out.every((chunk) => estimateUtf8Bytes(chunk.text) <= 8192)).toBe(true);
// If we split inside surrogate pairs we'd likely end up with replacement chars.
expect(out.map((chunk) => chunk.text).join("")).not.toContain("\uFFFD");
});
it("uses conservative fallback limits for local providers without declared maxInputTokens", () => {
const provider = createProviderWithoutMaxInputTokens({
id: "local",
model: "unknown-local-embedding",
});
const out = enforceEmbeddingMaxInputTokens(provider, [
{
startLine: 1,
endLine: 1,
text: "x".repeat(3000),
hash: "ignored",
},
]);
expect(out.length).toBeGreaterThan(1);
expect(out.every((chunk) => estimateUtf8Bytes(chunk.text) <= 2048)).toBe(true);
});
it("honors hard safety caps lower than provider maxInputTokens", () => {
const provider = createProvider(8192);
const out = enforceEmbeddingMaxInputTokens(
provider,
[
{
startLine: 1,
endLine: 1,
text: "x".repeat(8100),
hash: "ignored",
},
],
8000,
);
expect(out.length).toBeGreaterThan(1);
expect(out.every((chunk) => estimateUtf8Bytes(chunk.text) <= 8000)).toBe(true);
});
});

View File

@@ -1,34 +0,0 @@
import { describe, expect, it } from "vitest";
import { normalizeEmbeddingModelWithPrefixes } from "./embeddings-model-normalize.js";
describe("normalizeEmbeddingModelWithPrefixes", () => {
it("returns default model when input is blank", () => {
expect(
normalizeEmbeddingModelWithPrefixes({
model: " ",
defaultModel: "fallback-model",
prefixes: ["openai/"],
}),
).toBe("fallback-model");
});
it("strips the first matching prefix", () => {
expect(
normalizeEmbeddingModelWithPrefixes({
model: "openai/text-embedding-3-small",
defaultModel: "fallback-model",
prefixes: ["openai/"],
}),
).toBe("text-embedding-3-small");
});
it("keeps explicit model names when no prefix matches", () => {
expect(
normalizeEmbeddingModelWithPrefixes({
model: "voyage-4-large",
defaultModel: "fallback-model",
prefixes: ["voyage/"],
}),
).toBe("voyage-4-large");
});
});

View File

@@ -0,0 +1,25 @@
import fs from "node:fs";
import path from "node:path";
import { fileURLToPath } from "node:url";
import { describe, expect, it } from "vitest";
const HOST_DIR = path.dirname(fileURLToPath(import.meta.url));
const REPO_ROOT = path.resolve(HOST_DIR, "../../..");
const PACKAGE_HOST_DIR = path.join(REPO_ROOT, "packages/memory-host-sdk/src/host");
const PACKAGE_COVERED_MIRRORS = [
"batch-output.ts",
"batch-status.ts",
"embedding-chunk-limits.ts",
"embeddings-model-normalize.ts",
] as const;
describe("memory-host-sdk mirrored host modules", () => {
it("keeps package-covered source mirrors byte-identical", () => {
for (const fileName of PACKAGE_COVERED_MIRRORS) {
const srcSource = fs.readFileSync(path.join(HOST_DIR, fileName), "utf8");
const packageSource = fs.readFileSync(path.join(PACKAGE_HOST_DIR, fileName), "utf8");
expect(srcSource, fileName).toBe(packageSource);
}
});
});