fix(status): keep task snapshots pure

This commit is contained in:
Vincent Koc
2026-04-01 16:35:30 +09:00
parent 5a95d65f1e
commit cfa307baed
6 changed files with 178 additions and 17 deletions

View File

@@ -515,6 +515,43 @@ describe("session_status tool", () => {
expect(text).toContain("permission denied");
});
it("truncates long task titles and details in session_status output", async () => {
resetSessionStore({
"agent:main:main": {
sessionId: "sess-main",
updatedAt: Date.now(),
},
});
listTasksForRelatedSessionKeyForOwnerMock.mockReturnValue([
{
taskId: "task-long",
runtime: "subagent",
requesterSessionKey: "agent:main:main",
task: "This is a deliberately long task prompt that should never be emitted in full by session_status because it can include internal instructions and file paths that are not appropriate for user-visible task summaries.",
status: "running",
deliveryStatus: "pending",
notifyPolicy: "done_only",
createdAt: Date.now() - 5_000,
progressSummary:
"This progress detail is also intentionally long so the session_status tool proves it truncates verbose task context instead of dumping a long internal update into the tool response.",
},
]);
const tool = createSessionStatusTool({ agentSessionKey: "agent:main:main" });
const result = await tool.execute("tc-truncated", { sessionKey: "agent:main:main" });
const firstContent = result.content?.[0];
const text = (firstContent as { text: string } | undefined)?.text ?? "";
expect(text).toContain(
"This is a deliberately long task prompt that should never be emitted in full by…",
);
expect(text).toContain(
"This progress detail is also intentionally long so the session_status tool proves it truncates verbose task context ins…",
);
expect(text).not.toContain("internal instructions and file paths");
expect(text).not.toContain("dumping a long internal update");
});
it("prefers failure context over newer success context in session_status output", async () => {
resetSessionStore({
"agent:main:main": {

View File

@@ -23,6 +23,7 @@ import {
} from "../../routing/session-key.js";
import { applyModelOverrideToSessionEntry } from "../../sessions/model-overrides.js";
import { buildTaskStatusSnapshotForRelatedSessionKeyForOwner } from "../../tasks/task-owner-access.js";
import { formatTaskStatusDetail, formatTaskStatusTitle } from "../../tasks/task-status.js";
import { loadModelCatalog } from "../model-catalog.js";
import {
buildAllowedModelSet,
@@ -133,11 +134,8 @@ function formatSessionTaskLine(params: {
: snapshot.recentFailureCount > 0
? `${snapshot.recentFailureCount} recent failure${snapshot.recentFailureCount === 1 ? "" : "s"}`
: `latest ${task.status.replaceAll("_", " ")}`;
const title = task.label?.trim() || task.task.trim();
const detail =
task.status === "running" || task.status === "queued"
? task.progressSummary?.trim()
: task.error?.trim() || task.terminalSummary?.trim();
const title = formatTaskStatusTitle(task);
const detail = formatTaskStatusDetail(task);
const parts = [headline, task.runtime, title, detail].filter(Boolean);
return parts.length ? `📌 Tasks: ${parts.join(" · ")}` : undefined;
}

View File

@@ -262,6 +262,29 @@ describe("buildStatusReply subagent summary", () => {
expect(reply?.text).toContain("approval denied");
});
it("truncates long task titles and details in the session task line", async () => {
createRunningTaskRun({
runtime: "subagent",
requesterSessionKey: "agent:main:main",
childSessionKey: "agent:main:subagent:status-task-truncated",
runId: "run-status-task-truncated",
task: "This is a deliberately long task prompt that should never be emitted in full by /status because it can include internal instructions and file paths that are not appropriate for the headline line shown to users.",
progressSummary:
"This progress detail is also intentionally long so the status surface proves it truncates verbose task context instead of dumping a multi-sentence internal update into the reply output.",
});
const reply = await buildStatusReplyForTest({});
expect(reply?.text).toContain(
"This is a deliberately long task prompt that should never be emitted in full by…",
);
expect(reply?.text).toContain(
"This progress detail is also intentionally long so the status surface proves it truncates verbose task context instead…",
);
expect(reply?.text).not.toContain("internal instructions and file paths");
expect(reply?.text).not.toContain("dumping a multi-sentence internal update");
});
it("prefers failure context over newer success context when showing recent failures", async () => {
createRunningTaskRun({
runtime: "acp",

View File

@@ -23,7 +23,11 @@ import {
} from "../../infra/provider-usage.js";
import type { MediaUnderstandingDecision } from "../../media-understanding/types.js";
import { listTasksForAgentId, listTasksForSessionKey } from "../../tasks/task-registry.js";
import { buildTaskStatusSnapshot } from "../../tasks/task-status.js";
import {
buildTaskStatusSnapshot,
formatTaskStatusDetail,
formatTaskStatusTitle,
} from "../../tasks/task-status.js";
import { normalizeGroupActivation } from "../group-activation.js";
import { resolveSelectedAndActiveModel } from "../model-runtime.js";
import { buildStatusMessage } from "../status.js";
@@ -68,11 +72,8 @@ function formatSessionTaskLine(sessionKey: string): string | undefined {
: snapshot.recentFailureCount > 0
? `${snapshot.recentFailureCount} recent failure${snapshot.recentFailureCount === 1 ? "" : "s"}`
: "recently finished";
const title = task.label?.trim() || task.task.trim();
const detail =
task.status === "running" || task.status === "queued"
? task.progressSummary?.trim()
: task.error?.trim() || task.terminalSummary?.trim();
const title = formatTaskStatusTitle(task);
const detail = formatTaskStatusDetail(task);
const parts = [headline, task.runtime, title, detail].filter(Boolean);
return parts.length ? `📌 Tasks: ${parts.join(" · ")}` : undefined;
}

View File

@@ -0,0 +1,76 @@
import { describe, expect, it } from "vitest";
import type { TaskRecord } from "./task-registry.types.js";
import {
buildTaskStatusSnapshot,
formatTaskStatusDetail,
formatTaskStatusTitle,
} from "./task-status.js";
const NOW = 1_000_000_000_000;
function makeTask(overrides: Partial<TaskRecord>): TaskRecord {
return {
taskId: "task-1",
runId: "run-1",
task: "default task",
runtime: "subagent",
status: "running",
requesterSessionKey: "agent:main:main",
ownerKey: "agent:main:main",
scopeKind: "session",
createdAt: NOW - 1_000,
deliveryStatus: "pending",
notifyPolicy: "done_only",
...overrides,
};
}
describe("task status snapshot", () => {
it("keeps old active tasks active without maintenance reconciliation", () => {
const staleButActive = makeTask({
createdAt: NOW - 10 * 60_000,
startedAt: NOW - 10 * 60_000,
lastEventAt: NOW - 10 * 60_000,
progressSummary: "still running",
});
const snapshot = buildTaskStatusSnapshot([staleButActive], { now: NOW });
expect(snapshot.activeCount).toBe(1);
expect(snapshot.recentFailureCount).toBe(0);
expect(snapshot.focus?.status).toBe("running");
expect(snapshot.focus?.taskId).toBe("task-1");
});
it("filters tasks whose cleanupAfter has expired", () => {
const expired = makeTask({
status: "succeeded",
endedAt: NOW - 60_000,
cleanupAfter: NOW - 1,
});
const snapshot = buildTaskStatusSnapshot([expired], { now: NOW });
expect(snapshot.totalCount).toBe(0);
expect(snapshot.focus).toBeUndefined();
});
});
describe("task status formatting", () => {
it("truncates long task titles and details", () => {
const task = makeTask({
task: "This is a deliberately long task prompt that should never be emitted in full because it may include internal instructions and file paths.",
progressSummary:
"This progress detail is also intentionally long so the status line proves it truncates verbose task context instead of dumping a wall of text.",
});
expect(formatTaskStatusTitle(task)).toContain(
"This is a deliberately long task prompt that should never be emitted in full",
);
expect(formatTaskStatusTitle(task).endsWith("…")).toBe(true);
expect(formatTaskStatusDetail(task)).toContain(
"This progress detail is also intentionally long so the status line proves it truncates verbose task context",
);
expect(formatTaskStatusDetail(task)?.endsWith("…")).toBe(true);
});
});

View File

@@ -1,9 +1,11 @@
import { reconcileTaskRecordForOperatorInspection } from "./task-registry.maintenance.js";
import { truncateUtf16Safe } from "../utils.js";
import type { TaskRecord } from "./task-registry.types.js";
const ACTIVE_TASK_STATUSES = new Set(["queued", "running"]);
const FAILURE_TASK_STATUSES = new Set(["failed", "timed_out", "lost"]);
export const TASK_STATUS_RECENT_WINDOW_MS = 5 * 60_000;
export const TASK_STATUS_TITLE_MAX_CHARS = 80;
export const TASK_STATUS_DETAIL_MAX_CHARS = 120;
function isActiveTask(task: TaskRecord): boolean {
return ACTIVE_TASK_STATUSES.has(task.status);
@@ -31,6 +33,32 @@ function isRecentTerminalTask(task: TaskRecord, now: number): boolean {
return now - resolveTaskReferenceAt(task) <= TASK_STATUS_RECENT_WINDOW_MS;
}
function truncateTaskStatusText(value: string, maxChars: number): string {
const trimmed = value.trim();
if (trimmed.length <= maxChars) {
return trimmed;
}
return `${truncateUtf16Safe(trimmed, Math.max(0, maxChars - 1)).trimEnd()}`;
}
export function formatTaskStatusTitle(task: TaskRecord): string {
return truncateTaskStatusText(
task.label?.trim() || task.task.trim(),
TASK_STATUS_TITLE_MAX_CHARS,
);
}
export function formatTaskStatusDetail(task: TaskRecord): string | undefined {
const raw =
task.status === "running" || task.status === "queued"
? task.progressSummary?.trim()
: task.error?.trim() || task.terminalSummary?.trim();
if (!raw) {
return undefined;
}
return truncateTaskStatusText(raw, TASK_STATUS_DETAIL_MAX_CHARS);
}
export type TaskStatusSnapshot = {
latest?: TaskRecord;
focus?: TaskRecord;
@@ -47,11 +75,9 @@ export function buildTaskStatusSnapshot(
opts?: { now?: number },
): TaskStatusSnapshot {
const now = opts?.now ?? Date.now();
const reconciled = tasks
.map((task) => reconcileTaskRecordForOperatorInspection(task))
.filter((task) => !isExpiredTask(task, now));
const active = reconciled.filter(isActiveTask);
const recentTerminal = reconciled.filter((task) => isRecentTerminalTask(task, now));
const visibleCandidates = tasks.filter((task) => !isExpiredTask(task, now));
const active = visibleCandidates.filter(isActiveTask);
const recentTerminal = visibleCandidates.filter((task) => isRecentTerminalTask(task, now));
const visible = active.length > 0 ? [...active, ...recentTerminal] : recentTerminal;
const focus =
active[0] ?? recentTerminal.find((task) => isFailureTask(task)) ?? recentTerminal[0];