From ecc5601b2a4cd4f3e32d862c2ae9aa7097f77bb8 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Sat, 30 May 2026 01:41:29 +0200 Subject: [PATCH] fix(github): bound proof comment API bodies --- scripts/github/real-behavior-proof-check.mjs | 138 ++++++++++++---- scripts/github/real-behavior-proof-policy.mjs | 27 +++- .../scripts/real-behavior-proof-check.test.ts | 147 +++++++++++++++++- .../real-behavior-proof-policy.test.ts | 102 ++++++++++-- 4 files changed, 370 insertions(+), 44 deletions(-) diff --git a/scripts/github/real-behavior-proof-check.mjs b/scripts/github/real-behavior-proof-check.mjs index 7d149f396e5..5e06ecb2d59 100644 --- a/scripts/github/real-behavior-proof-check.mjs +++ b/scripts/github/real-behavior-proof-check.mjs @@ -6,9 +6,13 @@ import { evaluateClawSweeperExactHeadProof, evaluateRealBehaviorProof, isMaintainerTeamMember, + readBoundedGitHubApiJson, withGitHubApiTimeout, } from "./real-behavior-proof-policy.mjs"; +const PROOF_COMMENTS_PER_PAGE = 100; +const MAX_PROOF_COMMENT_PAGES = 10; + function escapeCommandValue(value) { return String(value) .replace(/%/g, "%25") @@ -17,6 +21,100 @@ function escapeCommandValue(value) { .replace(/:/g, "%3A"); } +function isTooLargeBodyError(error) { + return error?.code === "ETOOBIG"; +} + +async function fetchProofCommentPage({ + owner, + repo, + issueNumber, + token, + fetchImpl, + timeoutMs, + page, + perPage, +}) { + const url = new URL( + `https://api.github.com/repos/${owner}/${repo}/issues/${issueNumber}/comments`, + ); + url.searchParams.set("per_page", String(perPage)); + url.searchParams.set("page", String(page)); + const response = await withGitHubApiTimeout( + `proof comment lookup page ${page}`, + timeoutMs, + (signal) => + fetchImpl(url, { + headers: { + Accept: "application/vnd.github+json", + Authorization: `Bearer ${token}`, + "X-GitHub-Api-Version": "2022-11-28", + }, + signal, + }), + ); + if (!response.ok) { + throw new Error(`comments API returned ${response.status}`); + } + return await withGitHubApiTimeout(`proof comment response page ${page}`, timeoutMs, (signal) => + readBoundedGitHubApiJson(response, `proof comment response page ${page}`, undefined, { + signal, + }), + ); +} + +async function fetchOversizedProofCommentPageIndividually({ + owner, + repo, + issueNumber, + token, + fetchImpl, + timeoutMs, + page, + perPage, +}) { + const comments = []; + const firstSinglePage = (page - 1) * perPage + 1; + for (let offset = 0; offset < perPage; offset += 1) { + try { + const pageComments = await fetchProofCommentPage({ + owner, + repo, + issueNumber, + token, + fetchImpl, + timeoutMs, + page: firstSinglePage + offset, + perPage: 1, + }); + comments.push(...pageComments); + if (pageComments.length === 0) { + return { comments, exhausted: true }; + } + } catch (error) { + if (!isTooLargeBodyError(error)) { + throw error; + } + } + } + return { comments, exhausted: false }; +} + +async function fetchProofCommentPageWithFallback(params) { + try { + const comments = await fetchProofCommentPage(params); + return { + comments, + exhausted: comments.length < params.perPage, + }; + } catch (error) { + if (!isTooLargeBodyError(error) || params.perPage === 1) { + throw error; + } + return await fetchOversizedProofCommentPageIndividually(params); + } +} + export async function fetchProofComments({ owner, repo, @@ -29,35 +127,19 @@ export async function fetchProofComments({ for (const token of tokens.filter(Boolean)) { const comments = []; try { - for (let page = 1; page <= 10; page += 1) { - const url = new URL( - `https://api.github.com/repos/${owner}/${repo}/issues/${issueNumber}/comments`, - ); - url.searchParams.set("per_page", "100"); - url.searchParams.set("page", String(page)); - const response = await withGitHubApiTimeout( - `proof comment lookup page ${page}`, + for (let page = 1; page <= MAX_PROOF_COMMENT_PAGES; page += 1) { + const result = await fetchProofCommentPageWithFallback({ + owner, + repo, + issueNumber, + token, + fetchImpl, timeoutMs, - (signal) => - fetchImpl(url, { - headers: { - Accept: "application/vnd.github+json", - Authorization: `Bearer ${token}`, - "X-GitHub-Api-Version": "2022-11-28", - }, - signal, - }), - ); - if (!response.ok) { - throw new Error(`comments API returned ${response.status}`); - } - const pageComments = await withGitHubApiTimeout( - `proof comment response page ${page}`, - timeoutMs, - () => response.json(), - ); - comments.push(...pageComments); - if (pageComments.length < 100) { + page, + perPage: PROOF_COMMENTS_PER_PAGE, + }); + comments.push(...result.comments); + if (result.exhausted) { break; } } diff --git a/scripts/github/real-behavior-proof-policy.mjs b/scripts/github/real-behavior-proof-policy.mjs index d5dab4be394..51912642634 100644 --- a/scripts/github/real-behavior-proof-policy.mjs +++ b/scripts/github/real-behavior-proof-policy.mjs @@ -1,3 +1,5 @@ +import { readBoundedResponseText } from "../lib/bounded-response.mjs"; + export const PROOF_OVERRIDE_LABEL = "proof: override"; export const PROOF_SUPPLIED_LABEL = "proof: supplied"; export const PROOF_SUFFICIENT_LABEL = "proof: sufficient"; @@ -5,6 +7,7 @@ export const NEEDS_REAL_BEHAVIOR_PROOF_LABEL = "triage: needs-real-behavior-proo export const MOCK_ONLY_PROOF_LABEL = "triage: mock-only-proof"; export const MAINTAINER_TEAM_SLUG = "maintainer"; export const DEFAULT_GITHUB_API_TIMEOUT_MS = 30_000; +export const GITHUB_API_RESPONSE_BODY_MAX_BYTES = 1024 * 1024; export const CLAWSWEEPER_PROOF_VERDICT_STATUS = "clawsweeper_exact_head_pass"; const CLAWSWEEPER_BOT_LOGINS = new Set(["clawsweeper[bot]", "openclaw-clawsweeper[bot]"]); @@ -88,6 +91,12 @@ function createTimeoutError(label, timeoutMs) { return error; } +function createTooLargeGitHubApiBodyError(label, maxBytes) { + const error = new Error(`${label} response body exceeded ${maxBytes} bytes`); + error.code = "ETOOBIG"; + return error; +} + export async function withGitHubApiTimeout(label, timeoutMs, run) { const boundedTimeoutMs = Math.max(1, timeoutMs); const controller = new AbortController(); @@ -110,6 +119,19 @@ export async function withGitHubApiTimeout(label, timeoutMs, run) { } } +export async function readBoundedGitHubApiJson( + response, + label, + maxBytes = GITHUB_API_RESPONSE_BODY_MAX_BYTES, + options = {}, +) { + const text = await readBoundedResponseText(response, label, maxBytes, { + ...options, + createTooLargeError: () => createTooLargeGitHubApiBodyError(label, maxBytes), + }); + return JSON.parse(text); +} + function normalizeLineEndings(text = "") { return text.replace(/\r\n?/g, "\n"); } @@ -178,7 +200,10 @@ export async function isMaintainerTeamMember({ const body = await withGitHubApiTimeout( `maintainer membership response for ${login}`, timeoutMs, - () => response.json(), + (signal) => + readBoundedGitHubApiJson(response, `maintainer membership response for ${login}`, undefined, { + signal, + }), ); return body?.state === "active"; } diff --git a/test/scripts/real-behavior-proof-check.test.ts b/test/scripts/real-behavior-proof-check.test.ts index 5db739d5ca2..193a6917bb4 100644 --- a/test/scripts/real-behavior-proof-check.test.ts +++ b/test/scripts/real-behavior-proof-check.test.ts @@ -1,6 +1,63 @@ import { describe, expect, it, vi } from "vitest"; import { fetchProofComments } from "../../scripts/github/real-behavior-proof-check.mjs"; +function stalledResponse() { + let keepAlive: ReturnType | undefined; + const reader = { + read: () => + new Promise>(() => { + keepAlive = setTimeout(() => {}, 10_000); + }), + cancel: vi.fn(() => { + if (keepAlive) { + clearTimeout(keepAlive); + } + return Promise.resolve(); + }), + releaseLock: vi.fn(), + }; + return { + ok: true, + status: 200, + headers: new Headers(), + body: { + getReader: () => reader, + }, + }; +} + +function contentLengthResponse(contentLength: number) { + const cancel = vi.fn(() => Promise.resolve()); + return { + ok: true, + status: 200, + headers: new Headers({ "content-length": String(contentLength) }), + body: { cancel }, + cancel, + }; +} + +function chunkedResponse(chunks: Uint8Array[]) { + const cancel = vi.fn(() => Promise.resolve()); + const read = vi.fn(); + for (const chunk of chunks) { + read.mockResolvedValueOnce({ done: false, value: chunk }); + } + read.mockResolvedValueOnce({ done: true, value: undefined }); + return { + ok: true, + status: 200, + headers: new Headers(), + body: { + getReader: () => ({ + read, + cancel, + releaseLock: vi.fn(), + }), + }, + }; +} + describe("real-behavior-proof-check GitHub lookups", () => { it("aborts stalled proof comment fetches", async () => { const fetch = vi.fn((_url: URL, init: RequestInit) => { @@ -22,11 +79,7 @@ describe("real-behavior-proof-check GitHub lookups", () => { }); it("times out stalled proof comment response bodies", async () => { - const fetch = vi.fn().mockResolvedValue({ - ok: true, - status: 200, - json: () => new Promise(() => {}), - }); + const fetch = vi.fn().mockResolvedValue(stalledResponse()); await expect( fetchProofComments({ @@ -39,4 +92,88 @@ describe("real-behavior-proof-check GitHub lookups", () => { }), ).rejects.toThrow(/proof comment response page 1 timed out after 5ms/); }); + + it("skips oversized proof comment bodies by content length after narrow fallback", async () => { + const response = contentLengthResponse(1024 * 1024 + 1); + const fetch = vi.fn((url: URL) => { + const perPage = url.searchParams.get("per_page"); + const page = url.searchParams.get("page"); + if ((perPage === "100" && page === "1") || (perPage === "1" && page === "1")) { + return Promise.resolve(response); + } + return Promise.resolve(new Response("[]", { status: 200 })); + }); + + await expect( + fetchProofComments({ + fetchImpl: fetch as typeof globalThis.fetch, + issueNumber: 123, + owner: "openclaw", + repo: "openclaw", + tokens: ["tok"], + }), + ).resolves.toEqual([]); + expect(response.cancel).toHaveBeenCalled(); + }); + + it("skips oversized streamed proof comment bodies after narrow fallback", async () => { + const encoder = new TextEncoder(); + const oversizedResponse = () => + chunkedResponse([ + encoder.encode("["), + encoder.encode(" ".repeat(1024 * 1024)), + encoder.encode("]"), + ]); + const fetch = vi.fn((url: URL) => { + const perPage = url.searchParams.get("per_page"); + const page = url.searchParams.get("page"); + if ((perPage === "100" && page === "1") || (perPage === "1" && page === "1")) { + return Promise.resolve(oversizedResponse()); + } + return Promise.resolve(new Response("[]", { status: 200 })); + }); + + await expect( + fetchProofComments({ + fetchImpl: fetch as typeof globalThis.fetch, + issueNumber: 123, + owner: "openclaw", + repo: "openclaw", + tokens: ["tok"], + }), + ).resolves.toEqual([]); + }); + + it("falls back to one-comment pages when a bulk comment page is oversized", async () => { + const fetch = vi.fn((url: URL) => { + const perPage = url.searchParams.get("per_page"); + const page = url.searchParams.get("page"); + if (perPage === "100" && page === "1") { + return Promise.resolve(contentLengthResponse(1024 * 1024 + 1)); + } + if (perPage === "1" && page === "1") { + return Promise.resolve(contentLengthResponse(1024 * 1024 + 1)); + } + if (perPage === "1" && page === "2") { + return Promise.resolve(new Response('[{"id":2,"body":"trusted proof"}]', { status: 200 })); + } + return Promise.resolve(new Response("[]", { status: 200 })); + }); + + await expect( + fetchProofComments({ + fetchImpl: fetch as typeof globalThis.fetch, + issueNumber: 123, + owner: "openclaw", + repo: "openclaw", + tokens: ["tok"], + }), + ).resolves.toEqual([{ id: 2, body: "trusted proof" }]); + + expect( + fetch.mock.calls.map( + ([url]) => `${url.searchParams.get("per_page")}:${url.searchParams.get("page")}`, + ), + ).toEqual(["100:1", "1:1", "1:2", "1:3"]); + }); }); diff --git a/test/scripts/real-behavior-proof-policy.test.ts b/test/scripts/real-behavior-proof-policy.test.ts index 4cb0112d352..94510ba0b7b 100644 --- a/test/scripts/real-behavior-proof-policy.test.ts +++ b/test/scripts/real-behavior-proof-policy.test.ts @@ -9,6 +9,7 @@ import { hasClawSweeperExactHeadProof, isMaintainerTeamMember, labelsForRealBehaviorProof, + readBoundedGitHubApiJson, } from "../../scripts/github/real-behavior-proof-policy.mjs"; function externalPr(body: string, overrides: Record = {}) { @@ -46,6 +47,59 @@ function proofBody(evidence: string, overrides: Record = {}) { ].join("\n"); } +function stalledResponse() { + let keepAlive: ReturnType | undefined; + const reader = { + read: () => + new Promise>(() => { + keepAlive = setTimeout(() => {}, 10_000); + }), + cancel: vi.fn(() => { + if (keepAlive) { + clearTimeout(keepAlive); + } + return Promise.resolve(); + }), + releaseLock: vi.fn(), + }; + return { + ok: true, + status: 200, + headers: new Headers(), + body: { + getReader: () => reader, + }, + }; +} + +function contentLengthResponse(contentLength: number) { + const cancel = vi.fn(() => Promise.resolve()); + return { + headers: new Headers({ "content-length": String(contentLength) }), + body: { cancel }, + cancel, + }; +} + +function chunkedResponse(chunks: Uint8Array[]) { + const cancel = vi.fn(() => Promise.resolve()); + const read = vi.fn(); + for (const chunk of chunks) { + read.mockResolvedValueOnce({ done: false, value: chunk }); + } + read.mockResolvedValueOnce({ done: true, value: undefined }); + return { + headers: new Headers(), + body: { + getReader: () => ({ + read, + cancel, + releaseLock: vi.fn(), + }), + }, + }; +} + describe("real-behavior-proof-policy", () => { it.each([ "![after](https://github.com/user-attachments/assets/abc123)", @@ -406,11 +460,7 @@ describe("real-behavior-proof-policy", () => { describe("isMaintainerTeamMember", () => { function jsonResponse(status: number, body: unknown = {}) { - return { - ok: status >= 200 && status < 300, - status, - json: () => Promise.resolve(body), - }; + return new Response(JSON.stringify(body), { status }); } it("returns true for active members", async () => { @@ -478,11 +528,7 @@ describe("isMaintainerTeamMember", () => { }); it("times out stalled membership response bodies", async () => { - const fetch = vi.fn().mockResolvedValue({ - ok: true, - status: 200, - json: () => new Promise(() => {}), - }); + const fetch = vi.fn().mockResolvedValue(stalledResponse()); await expect( isMaintainerTeamMember({ @@ -495,3 +541,39 @@ describe("isMaintainerTeamMember", () => { ).rejects.toThrow(/maintainer membership response for u timed out after 5ms/); }); }); + +describe("readBoundedGitHubApiJson", () => { + it("reads bounded JSON response bodies", async () => { + await expect( + readBoundedGitHubApiJson(new Response('{"state":"active"}'), "GitHub API", 1024), + ).resolves.toEqual({ state: "active" }); + }); + + it("rejects oversized JSON bodies by content length", async () => { + const response = contentLengthResponse(1025); + + await expect( + readBoundedGitHubApiJson(response as unknown as Response, "GitHub API", 1024), + ).rejects.toMatchObject({ + code: "ETOOBIG", + message: "GitHub API response body exceeded 1024 bytes", + }); + expect(response.cancel).toHaveBeenCalled(); + }); + + it("rejects oversized streamed JSON bodies", async () => { + const encoder = new TextEncoder(); + const response = chunkedResponse([ + encoder.encode('{"body":"'), + encoder.encode("x".repeat(1024)), + encoder.encode('"}'), + ]); + + await expect( + readBoundedGitHubApiJson(response as unknown as Response, "GitHub API", 1024), + ).rejects.toMatchObject({ + code: "ETOOBIG", + message: "GitHub API response body exceeded 1024 bytes", + }); + }); +});