test(gateway): harden acp bind docker smoke

(cherry picked from commit e60cc50dff)
This commit is contained in:
Peter Steinberger
2026-04-26 19:14:58 +01:00
parent a8ba87ee90
commit cec1d46b30
4 changed files with 81 additions and 26 deletions

View File

@@ -227,10 +227,12 @@ Notes:
- `OPENCLAW_LIVE_ACP_BIND_CODEX_MODEL=gpt-5.2`
- `OPENCLAW_LIVE_ACP_BIND_OPENCODE_MODEL=opencode/kimi-k2.6`
- `OPENCLAW_LIVE_ACP_BIND_REQUIRE_TRANSCRIPT=1`
- `OPENCLAW_LIVE_ACP_BIND_REQUIRE_CRON=1`
- `OPENCLAW_LIVE_ACP_BIND_PARENT_MODEL=openai/gpt-5.2`
- Notes:
- This lane uses the gateway `chat.send` surface with admin-only synthetic originating-route fields so tests can attach message-channel context without pretending to deliver externally.
- When `OPENCLAW_LIVE_ACP_BIND_AGENT_COMMAND` is unset, the test uses the embedded `acpx` plugin's built-in agent registry for the selected ACP harness agent.
- Bound-session cron MCP creation is best-effort by default because external ACP harnesses can cancel MCP calls after the bind/image proof has passed; set `OPENCLAW_LIVE_ACP_BIND_REQUIRE_CRON=1` to make that post-bind cron probe strict.
Example:

View File

@@ -148,6 +148,7 @@ exec "\$script_dir/claude-real" "\$@"
WRAP
chmod +x "$NPM_CONFIG_PREFIX/bin/claude"
fi
export CLAUDE_CODE_EXECUTABLE="$NPM_CONFIG_PREFIX/bin/claude"
claude auth status || true
;;
codex)
@@ -162,8 +163,8 @@ WRAP
fi
droid --version
if [ -z "${FACTORY_API_KEY:-}" ]; then
echo "Droid Docker ACP bind requires FACTORY_API_KEY; Factory OAuth/keyring auth in ~/.factory is not portable into the container." >&2
exit 1
echo "SKIP: Droid Docker ACP bind requires FACTORY_API_KEY; Factory OAuth/keyring auth in ~/.factory is not portable into the container." >&2
exit 0
fi
;;
gemini)
@@ -262,6 +263,16 @@ for ACP_AGENT in "${ACP_AGENTS[@]}"; do
DOCKER_AUTH_PRESTAGED=1
fi
if [[ "$ACP_AGENT" == "droid" && -z "${FACTORY_API_KEY:-}" ]]; then
echo "==> Run ACP bind live test in Docker"
echo "==> Agent: $ACP_AGENT"
echo "==> Profile file: $PROFILE_STATUS"
echo "==> Auth dirs: ${AUTH_DIRS_CSV:-none}"
echo "==> Auth files: ${AUTH_FILES_CSV:-none}"
echo "SKIP: Droid Docker ACP bind requires FACTORY_API_KEY; Factory OAuth/keyring auth in ~/.factory is not portable into the container." >&2
continue
fi
EXTERNAL_AUTH_MOUNTS=()
if ((${#AUTH_DIRS[@]} > 0)); then
for auth_dir in "${AUTH_DIRS[@]}"; do

View File

@@ -36,6 +36,9 @@ const describeLive = LIVE && ACP_BIND_LIVE ? describe : describe.skip;
const CONNECT_TIMEOUT_MS = 90_000;
const LIVE_TIMEOUT_MS = 240_000;
const ACP_CRON_MCP_PROBE_MAX_ATTEMPTS = 2;
const ACP_CRON_MCP_PROBE_VERIFY_POLLS = 5;
const ACP_CRON_MCP_PROBE_VERIFY_POLL_MS = 1_000;
const DEFAULT_LIVE_CODEX_MODEL = "gpt-5.5";
const DEFAULT_LIVE_PARENT_MODEL = "openai/gpt-5.4";
type LiveAcpAgent = "claude" | "codex" | "droid" | "gemini" | "opencode";
@@ -150,6 +153,10 @@ function shouldRequireBoundAssistantTranscript(liveAgent: LiveAcpAgent): boolean
);
}
function shouldRequireCronMcpProbe(): boolean {
return isTruthyEnvValue(process.env.OPENCLAW_LIVE_ACP_BIND_REQUIRE_CRON);
}
function normalizeOpenAiModelRef(value: string): string {
const trimmed = value.trim();
if (!trimmed) {
@@ -287,24 +294,30 @@ async function bindConversationAndWait(params: {
doctor?: () => Promise<{ message?: string; details?: string[] }>;
}
| undefined;
if (runtime?.probeAvailability) {
await runtime.probeAvailability().catch(() => {});
}
if (!backend || (backend.healthy && !backend.healthy())) {
if (runtime?.doctor && (attempt === 1 || attempt % 6 === 0)) {
const report = await runtime.doctor().catch((error) => ({
message: error instanceof Error ? error.message : String(error),
details: [],
}));
logLiveStep(
`acpx doctor before bind attempt ${attempt}: ${report.message ?? "unknown"}${
report.details?.length ? ` (${report.details.join("; ")})` : ""
}`,
);
const backendUnavailable = !backend || (backend.healthy && !backend.healthy());
if (backendUnavailable) {
if (runtime?.probeAvailability) {
await runtime.probeAvailability().catch(() => {});
}
const backendReadyAfterProbe = backend && (!backend.healthy || backend.healthy());
if (backendReadyAfterProbe) {
logLiveStep(`acpx backend became healthy before bind attempt ${attempt}`);
} else {
if (runtime?.doctor && (attempt === 1 || attempt % 6 === 0)) {
const report = await runtime.doctor().catch((error) => ({
message: error instanceof Error ? error.message : String(error),
details: [],
}));
logLiveStep(
`acpx doctor before bind attempt ${attempt}: ${report.message ?? "unknown"}${
report.details?.length ? ` (${report.details.join("; ")})` : ""
}`,
);
}
logLiveStep(`acpx backend still unhealthy before bind attempt ${attempt}`);
await sleep(5_000);
continue;
}
logLiveStep(`acpx backend still unhealthy before bind attempt ${attempt}`);
await sleep(5_000);
continue;
}
await sendChatAndWait({
@@ -463,6 +476,25 @@ async function waitForAssistantTurn(params: {
);
}
async function pollCronJobVisibleViaCli(params: {
port: number;
token: string;
env: NodeJS.ProcessEnv;
expectedName: string;
expectedMessage: string;
}): Promise<{ job?: Awaited<ReturnType<typeof assertCronJobVisibleViaCli>>; pollsUsed: number }> {
for (let verifyAttempt = 0; verifyAttempt < ACP_CRON_MCP_PROBE_VERIFY_POLLS; verifyAttempt += 1) {
const job = await assertCronJobVisibleViaCli(params);
if (job) {
return { job, pollsUsed: verifyAttempt + 1 };
}
if (verifyAttempt < ACP_CRON_MCP_PROBE_VERIFY_POLLS - 1) {
await sleep(ACP_CRON_MCP_PROBE_VERIFY_POLL_MS);
}
}
return { pollsUsed: ACP_CRON_MCP_PROBE_VERIFY_POLLS };
}
describeLive("gateway live (ACP bind)", () => {
it(
"binds a synthetic Slack DM conversation to a live ACP session and reroutes the next turn",
@@ -852,9 +884,10 @@ describeLive("gateway live (ACP bind)", () => {
agentId: liveAgent,
sessionKey: spawnedSessionKey,
});
const requireCronMcpProbe = shouldRequireCronMcpProbe();
let cronJobId: string | undefined;
let lastCronAssistantText = "";
for (let attempt = 0; attempt < 2; attempt += 1) {
for (let attempt = 0; attempt < ACP_CRON_MCP_PROBE_MAX_ATTEMPTS; attempt += 1) {
await sendChatAndWait({
client,
sessionKey: originalSessionKey,
@@ -876,7 +909,7 @@ describeLive("gateway live (ACP bind)", () => {
cronHistory = await waitForAssistantText({
client,
sessionKey: spawnedSessionKey,
timeoutMs: liveAgent === "claude" ? 90_000 : 45_000,
timeoutMs: 20_000,
contains: cronProbe.name,
});
} catch {
@@ -885,13 +918,14 @@ describeLive("gateway live (ACP bind)", () => {
if (cronHistory) {
lastCronAssistantText = cronHistory.lastAssistantText;
}
const createdJob = await assertCronJobVisibleViaCli({
const verifyResult = await pollCronJobVisibleViaCli({
port,
token,
env: process.env,
expectedName: cronProbe.name,
expectedMessage: cronProbe.message,
});
const createdJob = verifyResult.job;
if (createdJob) {
assertCronJobMatches({
job: createdJob,
@@ -906,10 +940,15 @@ describeLive("gateway live (ACP bind)", () => {
}
break;
}
if (attempt === 1) {
if (liveAgent !== "claude") {
logLiveStep(
`cron mcp job not observed after attempt ${String(
attempt + 1,
)}; polls=${String(verifyResult.pollsUsed)}`,
);
if (attempt === ACP_CRON_MCP_PROBE_MAX_ATTEMPTS - 1) {
if (!requireCronMcpProbe) {
logLiveStep(
`cron mcp job ${cronProbe.name} not observed for ${liveAgent}; continuing after bind/image verification`,
`cron mcp job ${cronProbe.name} not observed; continuing after bind/image verification`,
);
break;
}
@@ -921,7 +960,7 @@ describeLive("gateway live (ACP bind)", () => {
}
}
if (!cronJobId) {
if (liveAgent !== "claude") {
if (!requireCronMcpProbe) {
return;
}
throw new Error(`acp cron cli verify did not create job ${cronProbe.name}`);

View File

@@ -74,6 +74,7 @@ export function buildLiveCronProbeMessage(params: {
if (params.attempt === 0) {
return (
"Use the OpenClaw MCP tool `openclaw-tools/cron` (server `openclaw-tools`, tool `cron`). " +
"If the harness shows Claude-style MCP names, use `mcp__openclaw-tools__cron` or `mcp__openclaw_tools__cron`. " +
`Call it with JSON arguments ${params.argsJson}. ` +
"Preserve the JSON exactly, including job.sessionTarget and job.sessionKey; do not omit, rename, or flatten those fields. " +
"Do the actual tool call; I will verify externally with the OpenClaw cron CLI. " +
@@ -83,6 +84,7 @@ export function buildLiveCronProbeMessage(params: {
if (claudeLike) {
return (
"Retry the OpenClaw MCP tool `openclaw-tools/cron` now. " +
"If the harness shows Claude-style MCP names, use `mcp__openclaw-tools__cron` or `mcp__openclaw_tools__cron`. " +
`Use these exact JSON arguments: ${params.argsJson}. ` +
"Preserve job.sessionTarget and job.sessionKey exactly as provided. " +
`If the cron job is created, reply exactly: ${params.exactReply}. ` +
@@ -94,6 +96,7 @@ export function buildLiveCronProbeMessage(params: {
return (
"Your previous OpenClaw cron MCP tool call was cancelled before the job was created. " +
"Retry the OpenClaw MCP tool `openclaw-tools/cron` now. " +
"If the harness shows Claude-style MCP names, use `mcp__openclaw-tools__cron` or `mcp__openclaw_tools__cron`. " +
`Use these exact JSON arguments: ${params.argsJson}. ` +
"Preserve job.sessionTarget and job.sessionKey exactly as provided. " +
`If the cron job is created, reply exactly: ${params.exactReply}. ` +