mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 08:00:42 +00:00
test: speed up Docker live scheduling
This commit is contained in:
@@ -11,6 +11,7 @@ const DEFAULT_TAIL_PARALLELISM = 10;
|
||||
const DEFAULT_FAILURE_TAIL_LINES = 80;
|
||||
const DEFAULT_LANE_TIMEOUT_MS = 120 * 60 * 1000;
|
||||
const DEFAULT_LANE_START_STAGGER_MS = 2_000;
|
||||
const DEFAULT_LIVE_RETRIES = 1;
|
||||
const DEFAULT_STATUS_INTERVAL_MS = 30_000;
|
||||
const DEFAULT_PREFLIGHT_RUN_TIMEOUT_MS = 60_000;
|
||||
const DEFAULT_TIMINGS_FILE = path.join(ROOT_DIR, ".artifacts/docker-tests/lane-timings.json");
|
||||
@@ -21,29 +22,71 @@ const OPENWEBUI_TIMEOUT_MS = 20 * 60 * 1000;
|
||||
const BUNDLED_UPDATE_TIMEOUT_MS = 20 * 60 * 1000;
|
||||
const DEFAULT_RESOURCE_LIMITS = {
|
||||
docker: DEFAULT_PARALLELISM,
|
||||
live: 6,
|
||||
npm: 8,
|
||||
live: 9,
|
||||
"live:claude": 4,
|
||||
"live:codex": 4,
|
||||
"live:gemini": 4,
|
||||
npm: 10,
|
||||
service: 7,
|
||||
};
|
||||
const LIVE_RETRY_PATTERNS = [
|
||||
/529\b/i,
|
||||
/overloaded/i,
|
||||
/capacity/i,
|
||||
/rate.?limit/i,
|
||||
/gateway closed \(1000 normal closure\)/i,
|
||||
/ECONNRESET|ETIMEDOUT|ENOTFOUND/i,
|
||||
];
|
||||
|
||||
const bundledChannelLaneCommand =
|
||||
"OPENCLAW_SKIP_DOCKER_BUILD=1 OPENCLAW_BUNDLED_CHANNEL_UPDATE_SCENARIO=0 OPENCLAW_BUNDLED_CHANNEL_ROOT_OWNED_SCENARIO=0 OPENCLAW_BUNDLED_CHANNEL_SETUP_ENTRY_SCENARIO=0 OPENCLAW_BUNDLED_CHANNEL_LOAD_FAILURE_SCENARIO=0 OPENCLAW_BUNDLED_CHANNEL_DISABLED_CONFIG_SCENARIO=0 pnpm test:docker:bundled-channel-deps";
|
||||
|
||||
function lane(name, command, options = {}) {
|
||||
return {
|
||||
cacheKey: options.cacheKey,
|
||||
command,
|
||||
estimateSeconds: options.estimateSeconds,
|
||||
live: options.live === true,
|
||||
name,
|
||||
retryPatterns: options.retryPatterns ?? [],
|
||||
retries: options.retries ?? 0,
|
||||
resources: options.resources ?? [],
|
||||
timeoutMs: options.timeoutMs,
|
||||
weight: options.weight ?? 1,
|
||||
};
|
||||
}
|
||||
|
||||
function liveProviderResource(provider) {
|
||||
if (!provider) {
|
||||
return undefined;
|
||||
}
|
||||
if (provider === "claude-cli" || provider === "claude") {
|
||||
return "live:claude";
|
||||
}
|
||||
if (provider === "codex-cli" || provider === "codex") {
|
||||
return "live:codex";
|
||||
}
|
||||
if (provider === "google-gemini-cli" || provider === "gemini") {
|
||||
return "live:gemini";
|
||||
}
|
||||
if (provider === "openai") {
|
||||
return "live:openai";
|
||||
}
|
||||
return `live:${provider}`;
|
||||
}
|
||||
|
||||
function liveProviderResources(options) {
|
||||
const providers = options.providers ?? (options.provider ? [options.provider] : []);
|
||||
return providers.map(liveProviderResource).filter(Boolean);
|
||||
}
|
||||
|
||||
function liveLane(name, command, options = {}) {
|
||||
return lane(name, command, {
|
||||
...options,
|
||||
resources: ["live", ...(options.resources ?? [])],
|
||||
live: true,
|
||||
resources: ["live", ...liveProviderResources(options), ...(options.resources ?? [])],
|
||||
retryPatterns: options.retryPatterns ?? LIVE_RETRY_PATTERNS,
|
||||
retries: options.retries ?? DEFAULT_LIVE_RETRIES,
|
||||
weight: options.weight ?? 3,
|
||||
});
|
||||
}
|
||||
@@ -132,22 +175,36 @@ const bundledScenarioLanes = [
|
||||
|
||||
const lanes = [
|
||||
liveLane("live-models", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:live-models", {
|
||||
providers: ["claude-cli", "codex-cli", "google-gemini-cli"],
|
||||
timeoutMs: LIVE_PROFILE_TIMEOUT_MS,
|
||||
weight: 4,
|
||||
}),
|
||||
liveLane("live-gateway", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:live-gateway", {
|
||||
providers: ["claude-cli", "codex-cli", "google-gemini-cli"],
|
||||
timeoutMs: LIVE_PROFILE_TIMEOUT_MS,
|
||||
weight: 4,
|
||||
}),
|
||||
liveLane(
|
||||
"live-cli-backend-claude",
|
||||
"OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:live-cli-backend:claude",
|
||||
{ resources: ["npm"], timeoutMs: LIVE_CLI_TIMEOUT_MS, weight: 3 },
|
||||
{
|
||||
cacheKey: "cli-backend-claude",
|
||||
provider: "claude-cli",
|
||||
resources: ["npm"],
|
||||
timeoutMs: LIVE_CLI_TIMEOUT_MS,
|
||||
weight: 3,
|
||||
},
|
||||
),
|
||||
liveLane(
|
||||
"live-cli-backend-gemini",
|
||||
"OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:live-cli-backend:gemini",
|
||||
{ resources: ["npm"], timeoutMs: LIVE_CLI_TIMEOUT_MS, weight: 3 },
|
||||
{
|
||||
cacheKey: "cli-backend-gemini",
|
||||
provider: "google-gemini-cli",
|
||||
resources: ["npm"],
|
||||
timeoutMs: LIVE_CLI_TIMEOUT_MS,
|
||||
weight: 3,
|
||||
},
|
||||
),
|
||||
serviceLane("openwebui", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:openwebui", {
|
||||
timeoutMs: OPENWEBUI_TIMEOUT_MS,
|
||||
@@ -204,9 +261,17 @@ const exclusiveLanes = [
|
||||
liveLane(
|
||||
"live-codex-harness",
|
||||
"OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:live-codex-harness",
|
||||
{ resources: ["npm"], timeoutMs: LIVE_ACP_TIMEOUT_MS, weight: 3 },
|
||||
{
|
||||
cacheKey: "codex-harness",
|
||||
provider: "codex-cli",
|
||||
resources: ["npm"],
|
||||
timeoutMs: LIVE_ACP_TIMEOUT_MS,
|
||||
weight: 3,
|
||||
},
|
||||
),
|
||||
liveLane("live-codex-bind", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:live-codex-bind", {
|
||||
cacheKey: "codex-harness",
|
||||
provider: "codex-cli",
|
||||
resources: ["npm"],
|
||||
timeoutMs: LIVE_ACP_TIMEOUT_MS,
|
||||
weight: 3,
|
||||
@@ -214,22 +279,46 @@ const exclusiveLanes = [
|
||||
liveLane(
|
||||
"live-cli-backend-codex",
|
||||
"OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:live-cli-backend:codex",
|
||||
{ resources: ["npm"], timeoutMs: LIVE_CLI_TIMEOUT_MS, weight: 3 },
|
||||
{
|
||||
cacheKey: "cli-backend-codex",
|
||||
provider: "codex-cli",
|
||||
resources: ["npm"],
|
||||
timeoutMs: LIVE_CLI_TIMEOUT_MS,
|
||||
weight: 3,
|
||||
},
|
||||
),
|
||||
liveLane(
|
||||
"live-acp-bind-claude",
|
||||
"OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:live-acp-bind:claude",
|
||||
{ resources: ["npm"], timeoutMs: LIVE_ACP_TIMEOUT_MS, weight: 3 },
|
||||
{
|
||||
cacheKey: "acp-bind-claude",
|
||||
provider: "claude-cli",
|
||||
resources: ["npm"],
|
||||
timeoutMs: LIVE_ACP_TIMEOUT_MS,
|
||||
weight: 3,
|
||||
},
|
||||
),
|
||||
liveLane(
|
||||
"live-acp-bind-codex",
|
||||
"OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:live-acp-bind:codex",
|
||||
{ resources: ["npm"], timeoutMs: LIVE_ACP_TIMEOUT_MS, weight: 3 },
|
||||
{
|
||||
cacheKey: "acp-bind-codex",
|
||||
provider: "codex-cli",
|
||||
resources: ["npm"],
|
||||
timeoutMs: LIVE_ACP_TIMEOUT_MS,
|
||||
weight: 3,
|
||||
},
|
||||
),
|
||||
liveLane(
|
||||
"live-acp-bind-gemini",
|
||||
"OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:live-acp-bind:gemini",
|
||||
{ resources: ["npm"], timeoutMs: LIVE_ACP_TIMEOUT_MS, weight: 3 },
|
||||
{
|
||||
cacheKey: "acp-bind-gemini",
|
||||
provider: "google-gemini-cli",
|
||||
resources: ["npm"],
|
||||
timeoutMs: LIVE_ACP_TIMEOUT_MS,
|
||||
weight: 3,
|
||||
},
|
||||
),
|
||||
];
|
||||
|
||||
@@ -264,8 +353,39 @@ function parseBool(raw, fallback) {
|
||||
return !/^(?:0|false|no)$/i.test(raw);
|
||||
}
|
||||
|
||||
function parseLiveMode(raw) {
|
||||
const mode = raw || "all";
|
||||
if (mode === "all" || mode === "skip" || mode === "only") {
|
||||
return mode;
|
||||
}
|
||||
throw new Error(
|
||||
`OPENCLAW_DOCKER_ALL_LIVE_MODE must be one of: all, skip, only. Got: ${JSON.stringify(raw)}`,
|
||||
);
|
||||
}
|
||||
|
||||
function applyLiveMode(poolLanes, mode) {
|
||||
if (mode === "all") {
|
||||
return poolLanes;
|
||||
}
|
||||
return poolLanes.filter((poolLane) => (mode === "only" ? poolLane.live : !poolLane.live));
|
||||
}
|
||||
|
||||
function applyLiveRetries(poolLanes, retries) {
|
||||
return poolLanes.map((poolLane) => (poolLane.live ? { ...poolLane, retries } : poolLane));
|
||||
}
|
||||
|
||||
function resourceLimitsSummary(resourceLimits) {
|
||||
return Object.entries(resourceLimits)
|
||||
.map(([resource, limit]) => `${resource}=${String(limit)}`)
|
||||
.join(" ");
|
||||
}
|
||||
|
||||
function resourceLimitEnvName(resource) {
|
||||
return `OPENCLAW_DOCKER_ALL_${resource.toUpperCase().replace(/[^A-Z0-9]+/g, "_")}_LIMIT`;
|
||||
}
|
||||
|
||||
function parseResourceLimit(env, resource, parallelism, fallback) {
|
||||
const envName = `OPENCLAW_DOCKER_ALL_${resource.toUpperCase()}_LIMIT`;
|
||||
const envName = resourceLimitEnvName(resource);
|
||||
return parsePositiveInt(env[envName], Math.min(parallelism, fallback), envName);
|
||||
}
|
||||
|
||||
@@ -275,13 +395,12 @@ function parseSchedulerOptions(env, parallelism) {
|
||||
parallelism,
|
||||
"OPENCLAW_DOCKER_ALL_WEIGHT_LIMIT",
|
||||
);
|
||||
const resourceLimits = {};
|
||||
for (const [resource, fallback] of Object.entries(DEFAULT_RESOURCE_LIMITS)) {
|
||||
resourceLimits[resource] = parseResourceLimit(env, resource, parallelism, fallback);
|
||||
}
|
||||
return {
|
||||
resourceLimits: {
|
||||
docker: parseResourceLimit(env, "docker", parallelism, parallelism),
|
||||
live: parseResourceLimit(env, "live", parallelism, DEFAULT_RESOURCE_LIMITS.live),
|
||||
npm: parseResourceLimit(env, "npm", parallelism, DEFAULT_RESOURCE_LIMITS.npm),
|
||||
service: parseResourceLimit(env, "service", parallelism, DEFAULT_RESOURCE_LIMITS.service),
|
||||
},
|
||||
resourceLimits,
|
||||
weightLimit,
|
||||
};
|
||||
}
|
||||
@@ -297,7 +416,9 @@ function laneResources(poolLane) {
|
||||
function laneSummary(poolLane) {
|
||||
const resources = laneResources(poolLane).join(",");
|
||||
const timeout = poolLane.timeoutMs ? ` timeout=${Math.round(poolLane.timeoutMs / 1000)}s` : "";
|
||||
return `${poolLane.name}(w=${laneWeight(poolLane)} r=${resources}${timeout})`;
|
||||
const retries = poolLane.retries > 0 ? ` retries=${poolLane.retries}` : "";
|
||||
const cache = poolLane.cacheKey ? ` cache=${poolLane.cacheKey}` : "";
|
||||
return `${poolLane.name}(w=${laneWeight(poolLane)} r=${resources}${timeout}${retries}${cache})`;
|
||||
}
|
||||
|
||||
function sleep(ms) {
|
||||
@@ -638,15 +759,16 @@ async function prepareBundledChannelPackage(baseEnv, logDir) {
|
||||
console.log(`==> Bundled channel package: ${baseEnv.OPENCLAW_BUNDLED_CHANNEL_PACKAGE_TGZ}`);
|
||||
}
|
||||
|
||||
function laneEnv(name, baseEnv, logDir) {
|
||||
function laneEnv(name, baseEnv, logDir, cacheKey) {
|
||||
const env = {
|
||||
...baseEnv,
|
||||
};
|
||||
const cacheName = cacheKey || name;
|
||||
if (!process.env.OPENCLAW_DOCKER_CLI_TOOLS_DIR) {
|
||||
env.OPENCLAW_DOCKER_CLI_TOOLS_DIR = path.join(logDir, `${name}-cli-tools`);
|
||||
env.OPENCLAW_DOCKER_CLI_TOOLS_DIR = path.join(logDir, `${cacheName}-cli-tools`);
|
||||
}
|
||||
if (!process.env.OPENCLAW_DOCKER_CACHE_HOME_DIR) {
|
||||
env.OPENCLAW_DOCKER_CACHE_HOME_DIR = path.join(logDir, `${name}-cache`);
|
||||
env.OPENCLAW_DOCKER_CACHE_HOME_DIR = path.join(logDir, `${cacheName}-cache`);
|
||||
}
|
||||
return env;
|
||||
}
|
||||
@@ -655,7 +777,7 @@ async function runLane(lane, baseEnv, logDir, fallbackTimeoutMs) {
|
||||
const { command, name } = lane;
|
||||
const timeoutMs = lane.timeoutMs ?? fallbackTimeoutMs;
|
||||
const logFile = path.join(logDir, `${name}.log`);
|
||||
const env = laneEnv(name, baseEnv, logDir);
|
||||
const env = laneEnv(name, baseEnv, logDir, lane.cacheKey);
|
||||
await mkdir(env.OPENCLAW_DOCKER_CLI_TOOLS_DIR, { recursive: true });
|
||||
await mkdir(env.OPENCLAW_DOCKER_CACHE_HOME_DIR, { recursive: true });
|
||||
await fs.promises.writeFile(
|
||||
@@ -664,12 +786,29 @@ async function runLane(lane, baseEnv, logDir, fallbackTimeoutMs) {
|
||||
`==> [${name}] cli tools dir: ${env.OPENCLAW_DOCKER_CLI_TOOLS_DIR}`,
|
||||
`==> [${name}] cache dir: ${env.OPENCLAW_DOCKER_CACHE_HOME_DIR}`,
|
||||
`==> [${name}] timeout: ${timeoutMs}ms`,
|
||||
`==> [${name}] retries: ${lane.retries ?? 0}`,
|
||||
"",
|
||||
].join("\n"),
|
||||
);
|
||||
console.log(`==> [${name}] start`);
|
||||
const startedAt = Date.now();
|
||||
const result = await runShellCommand({ command, env, label: name, logFile, timeoutMs });
|
||||
let result;
|
||||
const maxAttempts = 1 + Math.max(0, lane.retries ?? 0);
|
||||
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
|
||||
if (attempt > 1) {
|
||||
await fs.promises.appendFile(logFile, `\n==> [${name}] retry attempt ${attempt}\n`);
|
||||
console.log(`==> [${name}] retry ${attempt}/${maxAttempts}`);
|
||||
}
|
||||
result = await runShellCommand({ command, env, label: name, logFile, timeoutMs });
|
||||
if (result.status === 0 || attempt >= maxAttempts) {
|
||||
break;
|
||||
}
|
||||
const retryable =
|
||||
result.timedOut || (await laneLogMatchesRetryPattern(logFile, lane.retryPatterns));
|
||||
if (!retryable) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
const elapsedSeconds = Math.round((Date.now() - startedAt) / 1000);
|
||||
if (result.status === 0) {
|
||||
console.log(`==> [${name}] pass ${elapsedSeconds}s`);
|
||||
@@ -847,6 +986,14 @@ async function tailFile(file, lines) {
|
||||
return tail.trimEnd();
|
||||
}
|
||||
|
||||
async function laneLogMatchesRetryPattern(logFile, patterns) {
|
||||
if (!patterns || patterns.length === 0) {
|
||||
return false;
|
||||
}
|
||||
const tail = await tailFile(logFile, 160);
|
||||
return patterns.some((pattern) => pattern.test(tail));
|
||||
}
|
||||
|
||||
async function printFailureSummary(failures, tailLines) {
|
||||
console.error(`ERROR: ${failures.length} Docker lane(s) failed.`);
|
||||
for (const failure of failures) {
|
||||
@@ -927,6 +1074,12 @@ async function main() {
|
||||
const preflightEnabled = parseBool(process.env.OPENCLAW_DOCKER_ALL_PREFLIGHT, true);
|
||||
const preflightCleanup = parseBool(process.env.OPENCLAW_DOCKER_ALL_PREFLIGHT_CLEANUP, true);
|
||||
const timingsEnabled = parseBool(process.env.OPENCLAW_DOCKER_ALL_TIMINGS, true);
|
||||
const liveMode = parseLiveMode(process.env.OPENCLAW_DOCKER_ALL_LIVE_MODE);
|
||||
const liveRetries = parseNonNegativeInt(
|
||||
process.env.OPENCLAW_DOCKER_ALL_LIVE_RETRIES,
|
||||
DEFAULT_LIVE_RETRIES,
|
||||
"OPENCLAW_DOCKER_ALL_LIVE_RETRIES",
|
||||
);
|
||||
const timingsFile = path.resolve(
|
||||
process.env.OPENCLAW_DOCKER_ALL_TIMINGS_FILE || DEFAULT_TIMINGS_FILE,
|
||||
);
|
||||
@@ -945,13 +1098,22 @@ async function main() {
|
||||
appendExtension(baseEnv, "codex");
|
||||
|
||||
const timingStore = await loadTimingStore(timingsFile, timingsEnabled);
|
||||
const orderedLanes = orderLanes(lanes, timingStore);
|
||||
const orderedTailLanes = orderLanes(tailLanes, timingStore);
|
||||
const retriedMainLanes = applyLiveRetries(lanes, liveRetries);
|
||||
const retriedTailLanes = applyLiveRetries(tailLanes, liveRetries);
|
||||
const configuredLanes =
|
||||
liveMode === "only"
|
||||
? applyLiveMode([...retriedMainLanes, ...retriedTailLanes], liveMode)
|
||||
: applyLiveMode(retriedMainLanes, liveMode);
|
||||
const configuredTailLanes = liveMode === "only" ? [] : applyLiveMode(retriedTailLanes, liveMode);
|
||||
const orderedLanes = orderLanes(configuredLanes, timingStore);
|
||||
const orderedTailLanes = orderLanes(configuredTailLanes, timingStore);
|
||||
|
||||
console.log(`==> Docker test logs: ${logDir}`);
|
||||
console.log(`==> Parallelism: ${parallelism}`);
|
||||
console.log(`==> Tail parallelism: ${tailParallelism}`);
|
||||
console.log(`==> Lane timeout: ${laneTimeoutMs}ms`);
|
||||
console.log(`==> Live mode: ${liveMode}`);
|
||||
console.log(`==> Live retries: ${liveRetries}`);
|
||||
console.log(`==> Lane start stagger: ${laneStartStaggerMs}ms`);
|
||||
console.log(`==> Status interval: ${statusIntervalMs}ms`);
|
||||
console.log(`==> Fail fast: ${failFast ? "yes" : "no"}`);
|
||||
@@ -966,10 +1128,10 @@ async function main() {
|
||||
const schedulerOptions = parseSchedulerOptions(process.env, parallelism);
|
||||
const tailSchedulerOptions = parseSchedulerOptions(process.env, tailParallelism);
|
||||
console.log(
|
||||
`==> Scheduler: weight=${schedulerOptions.weightLimit} docker=${schedulerOptions.resourceLimits.docker} live=${schedulerOptions.resourceLimits.live} npm=${schedulerOptions.resourceLimits.npm} service=${schedulerOptions.resourceLimits.service}`,
|
||||
`==> Scheduler: weight=${schedulerOptions.weightLimit} ${resourceLimitsSummary(schedulerOptions.resourceLimits)}`,
|
||||
);
|
||||
console.log(
|
||||
`==> Tail scheduler: weight=${tailSchedulerOptions.weightLimit} docker=${tailSchedulerOptions.resourceLimits.docker} live=${tailSchedulerOptions.resourceLimits.live} npm=${tailSchedulerOptions.resourceLimits.npm} service=${tailSchedulerOptions.resourceLimits.service}`,
|
||||
`==> Tail scheduler: weight=${tailSchedulerOptions.weightLimit} ${resourceLimitsSummary(tailSchedulerOptions.resourceLimits)}`,
|
||||
);
|
||||
printLaneManifest("Main", orderedLanes, timingStore);
|
||||
printLaneManifest("Tail", orderedTailLanes, timingStore);
|
||||
|
||||
Reference in New Issue
Block a user