mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-12 13:00:44 +00:00
test: gate qa character concurrency
This commit is contained in:
@@ -40,6 +40,55 @@ function makeRunSuite(transcriptForModel: (model: string) => string = defaultMod
|
||||
);
|
||||
}
|
||||
|
||||
function createConcurrencyGate(expectedActive: number) {
|
||||
let active = 0;
|
||||
let maxActive = 0;
|
||||
let releaseStartedTasks = false;
|
||||
let resolveExpectedActive: () => void = () => {};
|
||||
const expectedActiveReached = new Promise<void>((resolve) => {
|
||||
resolveExpectedActive = resolve;
|
||||
});
|
||||
const taskReleases: Array<() => void> = [];
|
||||
const releaseQueuedTasks = () => {
|
||||
if (!releaseStartedTasks) {
|
||||
return;
|
||||
}
|
||||
let releaseTask: (() => void) | undefined;
|
||||
while ((releaseTask = taskReleases.shift())) {
|
||||
releaseTask();
|
||||
}
|
||||
};
|
||||
|
||||
return {
|
||||
get maxActive() {
|
||||
return maxActive;
|
||||
},
|
||||
async run<T>(work: () => T | Promise<T>): Promise<T> {
|
||||
active += 1;
|
||||
maxActive = Math.max(maxActive, active);
|
||||
if (active >= expectedActive) {
|
||||
resolveExpectedActive();
|
||||
}
|
||||
await new Promise<void>((resolve) => {
|
||||
taskReleases.push(resolve);
|
||||
releaseQueuedTasks();
|
||||
});
|
||||
try {
|
||||
return await work();
|
||||
} finally {
|
||||
active -= 1;
|
||||
}
|
||||
},
|
||||
async waitForExpectedActive(): Promise<void> {
|
||||
await expectedActiveReached;
|
||||
},
|
||||
releaseStartedTasks(): void {
|
||||
releaseStartedTasks = true;
|
||||
releaseQueuedTasks();
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function makeSuiteResult(params: { outputDir: string; model: string; transcript: string }) {
|
||||
return {
|
||||
outputDir: params.outputDir,
|
||||
@@ -265,22 +314,17 @@ describe("runQaCharacterEval", () => {
|
||||
});
|
||||
|
||||
it("runs candidate models with bounded concurrency while preserving result order", async () => {
|
||||
let activeRuns = 0;
|
||||
let maxActiveRuns = 0;
|
||||
const runSuite = vi.fn(async (params: CharacterRunSuiteParams) => {
|
||||
activeRuns += 1;
|
||||
maxActiveRuns = Math.max(maxActiveRuns, activeRuns);
|
||||
await new Promise((resolve) => setTimeout(resolve, 10));
|
||||
activeRuns -= 1;
|
||||
return makeReplySuiteResult(params);
|
||||
});
|
||||
const runGate = createConcurrencyGate(2);
|
||||
const runSuite = vi.fn(async (params: CharacterRunSuiteParams) =>
|
||||
runGate.run(() => makeReplySuiteResult(params)),
|
||||
);
|
||||
const runJudge = makeRunJudge([
|
||||
{ model: "openai/gpt-5.5", rank: 1, score: 8, summary: "ok" },
|
||||
{ model: "anthropic/claude-sonnet-4-6", rank: 2, score: 7, summary: "ok" },
|
||||
{ model: "moonshot/kimi-k2.5", rank: 3, score: 6, summary: "ok" },
|
||||
]);
|
||||
|
||||
const result = await runQaCharacterEval({
|
||||
const resultPromise = runQaCharacterEval({
|
||||
repoRoot: tempRoot,
|
||||
outputDir: path.join(tempRoot, "character"),
|
||||
models: ["openai/gpt-5.5", "anthropic/claude-sonnet-4-6", "moonshot/kimi-k2.5"],
|
||||
@@ -290,7 +334,10 @@ describe("runQaCharacterEval", () => {
|
||||
runJudge,
|
||||
});
|
||||
|
||||
expect(maxActiveRuns).toBe(2);
|
||||
await runGate.waitForExpectedActive();
|
||||
expect(runGate.maxActive).toBe(2);
|
||||
runGate.releaseStartedTasks();
|
||||
const result = await resultPromise;
|
||||
expect(result.runs.map((run) => run.model)).toEqual([
|
||||
"openai/gpt-5.5",
|
||||
"anthropic/claude-sonnet-4-6",
|
||||
@@ -299,33 +346,25 @@ describe("runQaCharacterEval", () => {
|
||||
});
|
||||
|
||||
it("defaults candidate and judge concurrency to sixteen", async () => {
|
||||
let activeRuns = 0;
|
||||
let maxActiveRuns = 0;
|
||||
const runSuite = vi.fn(async (params: CharacterRunSuiteParams) => {
|
||||
activeRuns += 1;
|
||||
maxActiveRuns = Math.max(maxActiveRuns, activeRuns);
|
||||
await new Promise((resolve) => setTimeout(resolve, 10));
|
||||
activeRuns -= 1;
|
||||
return makeReplySuiteResult(params);
|
||||
});
|
||||
let activeJudges = 0;
|
||||
let maxActiveJudges = 0;
|
||||
const runGate = createConcurrencyGate(16);
|
||||
const judgeGate = createConcurrencyGate(16);
|
||||
const runSuite = vi.fn(async (params: CharacterRunSuiteParams) =>
|
||||
runGate.run(() => makeReplySuiteResult(params)),
|
||||
);
|
||||
const runJudge = vi.fn(async (_params: CharacterRunJudgeParams) => {
|
||||
activeJudges += 1;
|
||||
maxActiveJudges = Math.max(maxActiveJudges, activeJudges);
|
||||
await new Promise((resolve) => setTimeout(resolve, 10));
|
||||
activeJudges -= 1;
|
||||
return makeJudgeReply(
|
||||
Array.from({ length: 20 }, (_, index) => ({
|
||||
model: `provider/model-${index + 1}`,
|
||||
rank: index + 1,
|
||||
score: 10 - index,
|
||||
summary: "ok",
|
||||
})),
|
||||
return await judgeGate.run(() =>
|
||||
makeJudgeReply(
|
||||
Array.from({ length: 20 }, (_, index) => ({
|
||||
model: `provider/model-${index + 1}`,
|
||||
rank: index + 1,
|
||||
score: 10 - index,
|
||||
summary: "ok",
|
||||
})),
|
||||
),
|
||||
);
|
||||
});
|
||||
|
||||
await runQaCharacterEval({
|
||||
const resultPromise = runQaCharacterEval({
|
||||
repoRoot: tempRoot,
|
||||
outputDir: path.join(tempRoot, "character"),
|
||||
models: Array.from({ length: 20 }, (_, index) => `provider/model-${index + 1}`),
|
||||
@@ -334,8 +373,13 @@ describe("runQaCharacterEval", () => {
|
||||
runJudge,
|
||||
});
|
||||
|
||||
expect(maxActiveRuns).toBe(16);
|
||||
expect(maxActiveJudges).toBe(16);
|
||||
await runGate.waitForExpectedActive();
|
||||
expect(runGate.maxActive).toBe(16);
|
||||
runGate.releaseStartedTasks();
|
||||
await judgeGate.waitForExpectedActive();
|
||||
expect(judgeGate.maxActive).toBe(16);
|
||||
judgeGate.releaseStartedTasks();
|
||||
await resultPromise;
|
||||
});
|
||||
|
||||
it("marks raw provider error transcripts as failed output", async () => {
|
||||
|
||||
Reference in New Issue
Block a user