fix(browser): preserve tabs across target swaps

This commit is contained in:
Peter Steinberger
2026-04-26 01:21:29 +01:00
parent f3accc753c
commit e6ee4d6e68
8 changed files with 201 additions and 13 deletions

View File

@@ -73,6 +73,10 @@ Docs: https://docs.openclaw.ai
- Plugins/QQ Bot: prefer an installed QQ Bot plugin that declares it replaces
the bundled `qqbot` channel, preventing duplicate `qqbot_channel_api` and
`qqbot_remind` tool registration noise. Fixes #63102.
- Browser automation: keep stable tab ids and labels attached when Chromium
replaces the raw target after form submissions or other action-triggered
navigations, and return the replacement `targetId` from `/act` when the match
is provable. Fixes #46137.
- QQ Bot: make `qqbot_remind` schedule, list, and remove Gateway cron jobs
directly for owner-authorized senders instead of returning `cronParams` and
relying on a follow-up generic `cron` tool call. Fixes #70865. (#70937)

View File

@@ -138,6 +138,10 @@ the optional label, and the raw `targetId`. Agents should pass
`suggestedTargetId` back into `focus`, `close`, snapshots, and actions. You can
assign a label with `open --label`, `tab new --label`, or `tab label`; labels,
tab ids, raw target ids, and unique target-id prefixes are all accepted.
When Chromium replaces the underlying raw target during a navigation or form
submit, OpenClaw keeps the stable `tabId`/label attached to the replacement tab
when it can prove the match. Raw target ids remain volatile; prefer
`suggestedTargetId`.
## Snapshot / screenshot / actions
@@ -185,6 +189,10 @@ openclaw browser wait --text "Done"
openclaw browser evaluate --fn '(el) => el.textContent' --ref <ref>
```
Action responses return the current raw `targetId` after action-triggered page
replacement when OpenClaw can prove the replacement tab. Scripts should still
store and pass `suggestedTargetId`/labels for long-lived workflows.
File + dialog helpers:
```bash

View File

@@ -221,6 +221,11 @@ Notes:
- Download, trace, and upload paths are constrained to OpenClaw temp roots: `/tmp/openclaw{,/downloads,/uploads}` (fallback: `${os.tmpdir()}/openclaw/...`).
- `upload` can also set file inputs directly via `--input-ref` or `--element`.
Stable tab ids and labels survive Chromium raw-target replacement when OpenClaw
can prove the replacement tab, such as same URL or a single old tab becoming a
single new tab after form submission. Raw target ids are still volatile; prefer
`suggestedTargetId` from `tabs` in scripts.
Snapshot flags at a glance:
- `--format ai` (default with Playwright): AI snapshot with numeric refs (`aria-ref="<n>"`).
@@ -258,6 +263,9 @@ OpenClaw supports two “snapshot” styles:
Ref behavior:
- Refs are **not stable across navigations**; if something fails, re-run `snapshot` and use a fresh ref.
- `/act` returns the current raw `targetId` after action-triggered replacement
when it can prove the replacement tab. Keep using stable tab ids/labels for
follow-up commands.
- If the role snapshot was taken with `--frame`, role refs are scoped to that iframe until the next role snapshot.
- Unknown or stale `axN` refs fail fast instead of falling through to
Playwright's `aria-ref` selector. Run a fresh snapshot on the same tab when

View File

@@ -34,9 +34,11 @@ import {
readBody,
requirePwAi,
resolveTargetIdFromBody,
resolveSafeRouteTabUrl,
withRouteTabContext,
SELECTOR_UNSUPPORTED_MESSAGE,
} from "./agent.shared.js";
import { resolveTargetIdAfterNavigate } from "./agent.snapshot-target.js";
import { EXISTING_SESSION_LIMITS } from "./existing-session-limits.js";
import type { BrowserRouteRegistrar } from "./types.js";
import { asyncBrowserRoute, jsonError, toNumber, toStringOrEmpty } from "./utils.js";
@@ -388,11 +390,35 @@ export function registerBrowserAgentActRoutes(
run: async ({ profileCtx, cdpUrl, tab, resolveTabUrl }) => {
const evaluateEnabled = ctx.state().resolved.evaluateEnabled;
const ssrfPolicy = ctx.state().resolved.ssrfPolicy;
const jsonOk = async (extra?: Record<string, unknown>) => {
const url = await resolveTabUrl(tab.url);
const isExistingSession = getBrowserProfileCapabilities(profileCtx.profile).usesChromeMcp;
const hasNavigationResultPolicy = Boolean(
withBrowserNavigationPolicy(ssrfPolicy).ssrfPolicy,
);
const jsonOk = async (
extra?: Record<string, unknown>,
options?: { resolveCurrentTarget?: boolean },
) => {
const shouldResolveCurrentTarget =
options?.resolveCurrentTarget && (!isExistingSession || hasNavigationResultPolicy);
const responseTargetId = shouldResolveCurrentTarget
? await resolveTargetIdAfterNavigate({
oldTargetId: tab.targetId,
navigatedUrl: tab.url,
listTabs: () => profileCtx.listTabs(),
})
: tab.targetId;
const url =
responseTargetId === tab.targetId
? await resolveTabUrl(tab.url)
: await resolveSafeRouteTabUrl({
ctx,
profileCtx,
targetId: responseTargetId,
fallbackUrl: tab.url,
});
return res.json({
ok: true,
targetId: tab.targetId,
targetId: responseTargetId,
...(url ? { url } : {}),
...extra,
});
@@ -405,10 +431,9 @@ export function registerBrowserAgentActRoutes(
"action targetId must match request targetId",
);
}
const isExistingSession = getBrowserProfileCapabilities(profileCtx.profile).usesChromeMcp;
const profileName = profileCtx.profile.name;
if (isExistingSession) {
const initialTabTargetIds = withBrowserNavigationPolicy(ssrfPolicy).ssrfPolicy
const initialTabTargetIds = hasNavigationResultPolicy
? new Set((await profileCtx.listTabs()).map((currentTab) => currentTab.targetId))
: new Set<string>();
const existingSessionNavigationGuard = {
@@ -443,7 +468,7 @@ export function registerBrowserAgentActRoutes(
}),
guard: existingSessionNavigationGuard,
});
return await jsonOk();
return await jsonOk(undefined, { resolveCurrentTarget: true });
case "clickCoords":
await runExistingSessionActionWithNavigationGuard({
execute: () =>
@@ -459,7 +484,7 @@ export function registerBrowserAgentActRoutes(
}),
guard: existingSessionNavigationGuard,
});
return await jsonOk();
return await jsonOk(undefined, { resolveCurrentTarget: true });
case "type":
await runExistingSessionActionWithNavigationGuard({
execute: async () => {
@@ -481,7 +506,7 @@ export function registerBrowserAgentActRoutes(
},
guard: existingSessionNavigationGuard,
});
return await jsonOk();
return await jsonOk(undefined, { resolveCurrentTarget: true });
case "press":
await runExistingSessionActionWithNavigationGuard({
execute: () =>
@@ -493,7 +518,7 @@ export function registerBrowserAgentActRoutes(
}),
guard: existingSessionNavigationGuard,
});
return await jsonOk();
return await jsonOk(undefined, { resolveCurrentTarget: true });
case "hover":
await runExistingSessionActionWithNavigationGuard({
execute: () =>
@@ -631,15 +656,19 @@ export function registerBrowserAgentActRoutes(
});
switch (action.kind) {
case "batch":
return await jsonOk({ results: result.results ?? [] });
return await jsonOk(
{ results: result.results ?? [] },
{ resolveCurrentTarget: true },
);
case "evaluate":
return await jsonOk({ result: result.result });
return await jsonOk({ result: result.result }, { resolveCurrentTarget: true });
case "click":
case "clickCoords":
return await jsonOk(undefined, { resolveCurrentTarget: true });
case "resize":
return await jsonOk();
default:
return await jsonOk();
return await jsonOk(undefined, { resolveCurrentTarget: true });
}
},
});

View File

@@ -94,6 +94,64 @@ describe("browser remote profile tab ops via Playwright", () => {
);
});
it("transfers stable aliases across a high-confidence target replacement", async () => {
let currentPages = [page("A", "https://app.example/form")];
const listPagesViaPlaywright = vi.fn(async () => currentPages);
vi.spyOn(deps.pwAiModule, "getPwAiModule").mockResolvedValue({
listPagesViaPlaywright,
} as unknown as Awaited<ReturnType<typeof deps.pwAiModule.getPwAiModule>>);
const { state, remote } = deps.createRemoteRouteHarness();
const first = await remote.listTabs();
expect(first).toMatchObject([{ targetId: "A", tabId: "t1", suggestedTargetId: "t1" }]);
const labeled = await remote.labelTab("t1", "form");
expect(labeled).toMatchObject({ targetId: "A", tabId: "t1", label: "form" });
state.profiles.get("remote")!.lastTargetId = "A";
currentPages = [page("B", "https://app.example/submitted")];
const afterSwap = await remote.listTabs();
expect(afterSwap).toMatchObject([
{ targetId: "B", tabId: "t1", suggestedTargetId: "form", label: "form" },
]);
expect(state.profiles.get("remote")?.lastTargetId).toBe("B");
await expect(remote.ensureTabAvailable("A")).rejects.toThrow(/tab not found/i);
await expect(remote.ensureTabAvailable("form")).resolves.toMatchObject({
targetId: "B",
tabId: "t1",
label: "form",
});
});
it("does not transfer aliases when target replacement is ambiguous", async () => {
let currentPages = [page("A", "https://a.example"), page("C", "https://c.example")];
const listPagesViaPlaywright = vi.fn(async () => currentPages);
vi.spyOn(deps.pwAiModule, "getPwAiModule").mockResolvedValue({
listPagesViaPlaywright,
} as unknown as Awaited<ReturnType<typeof deps.pwAiModule.getPwAiModule>>);
const { state, remote } = deps.createRemoteRouteHarness();
const first = await remote.listTabs();
expect(first.map((tab) => [tab.targetId, tab.tabId])).toEqual([
["A", "t1"],
["C", "t2"],
]);
state.profiles.get("remote")!.lastTargetId = "A";
currentPages = [page("B", "https://b.example"), page("D", "https://d.example")];
const afterSwap = await remote.listTabs();
expect(afterSwap.map((tab) => [tab.targetId, tab.tabId])).toEqual([
["B", "t3"],
["D", "t4"],
]);
expect(state.profiles.get("remote")?.lastTargetId).toBe("A");
});
it("prefers lastTargetId for remote profiles when targetId is omitted", async () => {
const responses = [
[

View File

@@ -106,6 +106,7 @@ function assignTabAlias(params: {
}
entry.label = label;
}
entry.url = params.tab.url;
const labelFields = entry.label ? { label: entry.label } : {};
return {
...params.tab,
@@ -115,9 +116,51 @@ function assignTabAlias(params: {
};
}
function isConfidentReplacement(params: {
staleEntry: { url?: string };
tab: BrowserTab;
staleCount: number;
newCandidateCount: number;
}): boolean {
const staleUrl = params.staleEntry.url?.trim();
const tabUrl = params.tab.url?.trim();
if (staleUrl && tabUrl && staleUrl === tabUrl) {
return true;
}
return params.staleCount === 1 && params.newCandidateCount === 1;
}
function assignTabAliases(profileState: ProfileRuntimeState, tabs: BrowserTab[]): BrowserTab[] {
const aliases = getTabAliasState(profileState);
const liveTargetIds = new Set(tabs.map((tab) => tab.targetId));
const staleEntries = Object.entries(aliases.byTargetId).filter(
([targetId]) => !liveTargetIds.has(targetId),
);
const newCandidates = tabs.filter((tab) => !aliases.byTargetId[tab.targetId]);
const claimedTargetIds = new Set<string>();
for (const [oldTargetId, staleEntry] of staleEntries) {
const candidate = newCandidates.find(
(tab) =>
!claimedTargetIds.has(tab.targetId) &&
isConfidentReplacement({
staleEntry,
tab,
staleCount: staleEntries.length,
newCandidateCount: newCandidates.length,
}),
);
if (!candidate) {
continue;
}
aliases.byTargetId[candidate.targetId] = staleEntry;
delete aliases.byTargetId[oldTargetId];
claimedTargetIds.add(candidate.targetId);
if (profileState.lastTargetId === oldTargetId) {
profileState.lastTargetId = candidate.targetId;
}
}
for (const targetId of Object.keys(aliases.byTargetId)) {
if (!liveTargetIds.has(targetId)) {
delete aliases.byTargetId[targetId];

View File

@@ -16,7 +16,7 @@ export type ProfileRuntimeState = {
/** Stable, user-facing tab aliases scoped to this profile runtime. */
tabAliases?: {
nextTabNumber: number;
byTargetId: Record<string, { tabId: string; label?: string }>;
byTargetId: Record<string, { tabId: string; label?: string; url?: string }>;
};
reconcile?: {
previousProfile: ResolvedBrowserProfile;

View File

@@ -157,6 +157,44 @@ describe("browser control server", () => {
slowTimeoutMs,
);
it(
"returns the replacement targetId after an action-triggered target swap",
async () => {
const base = await startServerAndBase();
pwMocks.clickViaPlaywright.mockImplementationOnce(async () => {
vi.stubGlobal(
"fetch",
vi.fn(async (url: string) => {
if (url.includes("/json/list")) {
return makeResponse([
{
id: "fresh5678",
title: "Submitted",
url: "https://submitted.example",
webSocketDebuggerUrl: "ws://127.0.0.1/devtools/page/fresh5678",
type: "page",
},
]);
}
throw new Error(`unexpected fetch: ${url}`);
}),
);
});
const response = await postJson<{ ok: boolean; targetId?: string }>(`${base}/act`, {
kind: "click",
ref: "5",
targetId: "abcd1234",
});
expect(response).toMatchObject({
ok: true,
targetId: "fresh5678",
});
},
slowTimeoutMs,
);
it(
"returns ACT_SELECTOR_UNSUPPORTED for selector on unsupported action kinds",
async () => {