diff --git a/CHANGELOG.md b/CHANGELOG.md index 04c857a1718..03ab92a0462 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -73,6 +73,10 @@ Docs: https://docs.openclaw.ai - Plugins/QQ Bot: prefer an installed QQ Bot plugin that declares it replaces the bundled `qqbot` channel, preventing duplicate `qqbot_channel_api` and `qqbot_remind` tool registration noise. Fixes #63102. +- Browser automation: keep stable tab ids and labels attached when Chromium + replaces the raw target after form submissions or other action-triggered + navigations, and return the replacement `targetId` from `/act` when the match + is provable. Fixes #46137. - QQ Bot: make `qqbot_remind` schedule, list, and remove Gateway cron jobs directly for owner-authorized senders instead of returning `cronParams` and relying on a follow-up generic `cron` tool call. Fixes #70865. (#70937) diff --git a/docs/cli/browser.md b/docs/cli/browser.md index 962d8c7fc6b..baf81622f59 100644 --- a/docs/cli/browser.md +++ b/docs/cli/browser.md @@ -138,6 +138,10 @@ the optional label, and the raw `targetId`. Agents should pass `suggestedTargetId` back into `focus`, `close`, snapshots, and actions. You can assign a label with `open --label`, `tab new --label`, or `tab label`; labels, tab ids, raw target ids, and unique target-id prefixes are all accepted. +When Chromium replaces the underlying raw target during a navigation or form +submit, OpenClaw keeps the stable `tabId`/label attached to the replacement tab +when it can prove the match. Raw target ids remain volatile; prefer +`suggestedTargetId`. ## Snapshot / screenshot / actions @@ -185,6 +189,10 @@ openclaw browser wait --text "Done" openclaw browser evaluate --fn '(el) => el.textContent' --ref ``` +Action responses return the current raw `targetId` after action-triggered page +replacement when OpenClaw can prove the replacement tab. Scripts should still +store and pass `suggestedTargetId`/labels for long-lived workflows. + File + dialog helpers: ```bash diff --git a/docs/tools/browser-control.md b/docs/tools/browser-control.md index f88d8504c60..990e25ae1e9 100644 --- a/docs/tools/browser-control.md +++ b/docs/tools/browser-control.md @@ -221,6 +221,11 @@ Notes: - Download, trace, and upload paths are constrained to OpenClaw temp roots: `/tmp/openclaw{,/downloads,/uploads}` (fallback: `${os.tmpdir()}/openclaw/...`). - `upload` can also set file inputs directly via `--input-ref` or `--element`. +Stable tab ids and labels survive Chromium raw-target replacement when OpenClaw +can prove the replacement tab, such as same URL or a single old tab becoming a +single new tab after form submission. Raw target ids are still volatile; prefer +`suggestedTargetId` from `tabs` in scripts. + Snapshot flags at a glance: - `--format ai` (default with Playwright): AI snapshot with numeric refs (`aria-ref=""`). @@ -258,6 +263,9 @@ OpenClaw supports two “snapshot” styles: Ref behavior: - Refs are **not stable across navigations**; if something fails, re-run `snapshot` and use a fresh ref. +- `/act` returns the current raw `targetId` after action-triggered replacement + when it can prove the replacement tab. Keep using stable tab ids/labels for + follow-up commands. - If the role snapshot was taken with `--frame`, role refs are scoped to that iframe until the next role snapshot. - Unknown or stale `axN` refs fail fast instead of falling through to Playwright's `aria-ref` selector. Run a fresh snapshot on the same tab when diff --git a/extensions/browser/src/browser/routes/agent.act.ts b/extensions/browser/src/browser/routes/agent.act.ts index 2fd0cf85a75..ab33f4fb07f 100644 --- a/extensions/browser/src/browser/routes/agent.act.ts +++ b/extensions/browser/src/browser/routes/agent.act.ts @@ -34,9 +34,11 @@ import { readBody, requirePwAi, resolveTargetIdFromBody, + resolveSafeRouteTabUrl, withRouteTabContext, SELECTOR_UNSUPPORTED_MESSAGE, } from "./agent.shared.js"; +import { resolveTargetIdAfterNavigate } from "./agent.snapshot-target.js"; import { EXISTING_SESSION_LIMITS } from "./existing-session-limits.js"; import type { BrowserRouteRegistrar } from "./types.js"; import { asyncBrowserRoute, jsonError, toNumber, toStringOrEmpty } from "./utils.js"; @@ -388,11 +390,35 @@ export function registerBrowserAgentActRoutes( run: async ({ profileCtx, cdpUrl, tab, resolveTabUrl }) => { const evaluateEnabled = ctx.state().resolved.evaluateEnabled; const ssrfPolicy = ctx.state().resolved.ssrfPolicy; - const jsonOk = async (extra?: Record) => { - const url = await resolveTabUrl(tab.url); + const isExistingSession = getBrowserProfileCapabilities(profileCtx.profile).usesChromeMcp; + const hasNavigationResultPolicy = Boolean( + withBrowserNavigationPolicy(ssrfPolicy).ssrfPolicy, + ); + const jsonOk = async ( + extra?: Record, + options?: { resolveCurrentTarget?: boolean }, + ) => { + const shouldResolveCurrentTarget = + options?.resolveCurrentTarget && (!isExistingSession || hasNavigationResultPolicy); + const responseTargetId = shouldResolveCurrentTarget + ? await resolveTargetIdAfterNavigate({ + oldTargetId: tab.targetId, + navigatedUrl: tab.url, + listTabs: () => profileCtx.listTabs(), + }) + : tab.targetId; + const url = + responseTargetId === tab.targetId + ? await resolveTabUrl(tab.url) + : await resolveSafeRouteTabUrl({ + ctx, + profileCtx, + targetId: responseTargetId, + fallbackUrl: tab.url, + }); return res.json({ ok: true, - targetId: tab.targetId, + targetId: responseTargetId, ...(url ? { url } : {}), ...extra, }); @@ -405,10 +431,9 @@ export function registerBrowserAgentActRoutes( "action targetId must match request targetId", ); } - const isExistingSession = getBrowserProfileCapabilities(profileCtx.profile).usesChromeMcp; const profileName = profileCtx.profile.name; if (isExistingSession) { - const initialTabTargetIds = withBrowserNavigationPolicy(ssrfPolicy).ssrfPolicy + const initialTabTargetIds = hasNavigationResultPolicy ? new Set((await profileCtx.listTabs()).map((currentTab) => currentTab.targetId)) : new Set(); const existingSessionNavigationGuard = { @@ -443,7 +468,7 @@ export function registerBrowserAgentActRoutes( }), guard: existingSessionNavigationGuard, }); - return await jsonOk(); + return await jsonOk(undefined, { resolveCurrentTarget: true }); case "clickCoords": await runExistingSessionActionWithNavigationGuard({ execute: () => @@ -459,7 +484,7 @@ export function registerBrowserAgentActRoutes( }), guard: existingSessionNavigationGuard, }); - return await jsonOk(); + return await jsonOk(undefined, { resolveCurrentTarget: true }); case "type": await runExistingSessionActionWithNavigationGuard({ execute: async () => { @@ -481,7 +506,7 @@ export function registerBrowserAgentActRoutes( }, guard: existingSessionNavigationGuard, }); - return await jsonOk(); + return await jsonOk(undefined, { resolveCurrentTarget: true }); case "press": await runExistingSessionActionWithNavigationGuard({ execute: () => @@ -493,7 +518,7 @@ export function registerBrowserAgentActRoutes( }), guard: existingSessionNavigationGuard, }); - return await jsonOk(); + return await jsonOk(undefined, { resolveCurrentTarget: true }); case "hover": await runExistingSessionActionWithNavigationGuard({ execute: () => @@ -631,15 +656,19 @@ export function registerBrowserAgentActRoutes( }); switch (action.kind) { case "batch": - return await jsonOk({ results: result.results ?? [] }); + return await jsonOk( + { results: result.results ?? [] }, + { resolveCurrentTarget: true }, + ); case "evaluate": - return await jsonOk({ result: result.result }); + return await jsonOk({ result: result.result }, { resolveCurrentTarget: true }); case "click": case "clickCoords": + return await jsonOk(undefined, { resolveCurrentTarget: true }); case "resize": return await jsonOk(); default: - return await jsonOk(); + return await jsonOk(undefined, { resolveCurrentTarget: true }); } }, }); diff --git a/extensions/browser/src/browser/server-context.remote-profile-tab-ops.playwright.test.ts b/extensions/browser/src/browser/server-context.remote-profile-tab-ops.playwright.test.ts index ba8082756e8..5ae4f3bbf9e 100644 --- a/extensions/browser/src/browser/server-context.remote-profile-tab-ops.playwright.test.ts +++ b/extensions/browser/src/browser/server-context.remote-profile-tab-ops.playwright.test.ts @@ -94,6 +94,64 @@ describe("browser remote profile tab ops via Playwright", () => { ); }); + it("transfers stable aliases across a high-confidence target replacement", async () => { + let currentPages = [page("A", "https://app.example/form")]; + const listPagesViaPlaywright = vi.fn(async () => currentPages); + + vi.spyOn(deps.pwAiModule, "getPwAiModule").mockResolvedValue({ + listPagesViaPlaywright, + } as unknown as Awaited>); + + const { state, remote } = deps.createRemoteRouteHarness(); + + const first = await remote.listTabs(); + expect(first).toMatchObject([{ targetId: "A", tabId: "t1", suggestedTargetId: "t1" }]); + const labeled = await remote.labelTab("t1", "form"); + expect(labeled).toMatchObject({ targetId: "A", tabId: "t1", label: "form" }); + state.profiles.get("remote")!.lastTargetId = "A"; + + currentPages = [page("B", "https://app.example/submitted")]; + + const afterSwap = await remote.listTabs(); + expect(afterSwap).toMatchObject([ + { targetId: "B", tabId: "t1", suggestedTargetId: "form", label: "form" }, + ]); + expect(state.profiles.get("remote")?.lastTargetId).toBe("B"); + await expect(remote.ensureTabAvailable("A")).rejects.toThrow(/tab not found/i); + await expect(remote.ensureTabAvailable("form")).resolves.toMatchObject({ + targetId: "B", + tabId: "t1", + label: "form", + }); + }); + + it("does not transfer aliases when target replacement is ambiguous", async () => { + let currentPages = [page("A", "https://a.example"), page("C", "https://c.example")]; + const listPagesViaPlaywright = vi.fn(async () => currentPages); + + vi.spyOn(deps.pwAiModule, "getPwAiModule").mockResolvedValue({ + listPagesViaPlaywright, + } as unknown as Awaited>); + + const { state, remote } = deps.createRemoteRouteHarness(); + + const first = await remote.listTabs(); + expect(first.map((tab) => [tab.targetId, tab.tabId])).toEqual([ + ["A", "t1"], + ["C", "t2"], + ]); + state.profiles.get("remote")!.lastTargetId = "A"; + + currentPages = [page("B", "https://b.example"), page("D", "https://d.example")]; + + const afterSwap = await remote.listTabs(); + expect(afterSwap.map((tab) => [tab.targetId, tab.tabId])).toEqual([ + ["B", "t3"], + ["D", "t4"], + ]); + expect(state.profiles.get("remote")?.lastTargetId).toBe("A"); + }); + it("prefers lastTargetId for remote profiles when targetId is omitted", async () => { const responses = [ [ diff --git a/extensions/browser/src/browser/server-context.tab-ops.ts b/extensions/browser/src/browser/server-context.tab-ops.ts index b315f306ac8..080a481bcdc 100644 --- a/extensions/browser/src/browser/server-context.tab-ops.ts +++ b/extensions/browser/src/browser/server-context.tab-ops.ts @@ -106,6 +106,7 @@ function assignTabAlias(params: { } entry.label = label; } + entry.url = params.tab.url; const labelFields = entry.label ? { label: entry.label } : {}; return { ...params.tab, @@ -115,9 +116,51 @@ function assignTabAlias(params: { }; } +function isConfidentReplacement(params: { + staleEntry: { url?: string }; + tab: BrowserTab; + staleCount: number; + newCandidateCount: number; +}): boolean { + const staleUrl = params.staleEntry.url?.trim(); + const tabUrl = params.tab.url?.trim(); + if (staleUrl && tabUrl && staleUrl === tabUrl) { + return true; + } + return params.staleCount === 1 && params.newCandidateCount === 1; +} + function assignTabAliases(profileState: ProfileRuntimeState, tabs: BrowserTab[]): BrowserTab[] { const aliases = getTabAliasState(profileState); const liveTargetIds = new Set(tabs.map((tab) => tab.targetId)); + const staleEntries = Object.entries(aliases.byTargetId).filter( + ([targetId]) => !liveTargetIds.has(targetId), + ); + const newCandidates = tabs.filter((tab) => !aliases.byTargetId[tab.targetId]); + const claimedTargetIds = new Set(); + + for (const [oldTargetId, staleEntry] of staleEntries) { + const candidate = newCandidates.find( + (tab) => + !claimedTargetIds.has(tab.targetId) && + isConfidentReplacement({ + staleEntry, + tab, + staleCount: staleEntries.length, + newCandidateCount: newCandidates.length, + }), + ); + if (!candidate) { + continue; + } + aliases.byTargetId[candidate.targetId] = staleEntry; + delete aliases.byTargetId[oldTargetId]; + claimedTargetIds.add(candidate.targetId); + if (profileState.lastTargetId === oldTargetId) { + profileState.lastTargetId = candidate.targetId; + } + } + for (const targetId of Object.keys(aliases.byTargetId)) { if (!liveTargetIds.has(targetId)) { delete aliases.byTargetId[targetId]; diff --git a/extensions/browser/src/browser/server-context.types.ts b/extensions/browser/src/browser/server-context.types.ts index bb4cfd16dfe..cf378fbdac7 100644 --- a/extensions/browser/src/browser/server-context.types.ts +++ b/extensions/browser/src/browser/server-context.types.ts @@ -16,7 +16,7 @@ export type ProfileRuntimeState = { /** Stable, user-facing tab aliases scoped to this profile runtime. */ tabAliases?: { nextTabNumber: number; - byTargetId: Record; + byTargetId: Record; }; reconcile?: { previousProfile: ResolvedBrowserProfile; diff --git a/extensions/browser/src/browser/server.agent-contract-core.test.ts b/extensions/browser/src/browser/server.agent-contract-core.test.ts index 39e5c4a3534..638aff778b0 100644 --- a/extensions/browser/src/browser/server.agent-contract-core.test.ts +++ b/extensions/browser/src/browser/server.agent-contract-core.test.ts @@ -157,6 +157,44 @@ describe("browser control server", () => { slowTimeoutMs, ); + it( + "returns the replacement targetId after an action-triggered target swap", + async () => { + const base = await startServerAndBase(); + pwMocks.clickViaPlaywright.mockImplementationOnce(async () => { + vi.stubGlobal( + "fetch", + vi.fn(async (url: string) => { + if (url.includes("/json/list")) { + return makeResponse([ + { + id: "fresh5678", + title: "Submitted", + url: "https://submitted.example", + webSocketDebuggerUrl: "ws://127.0.0.1/devtools/page/fresh5678", + type: "page", + }, + ]); + } + throw new Error(`unexpected fetch: ${url}`); + }), + ); + }); + + const response = await postJson<{ ok: boolean; targetId?: string }>(`${base}/act`, { + kind: "click", + ref: "5", + targetId: "abcd1234", + }); + + expect(response).toMatchObject({ + ok: true, + targetId: "fresh5678", + }); + }, + slowTimeoutMs, + ); + it( "returns ACT_SELECTOR_UNSUPPORTED for selector on unsupported action kinds", async () => {