mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 21:00:44 +00:00
* feat(security): add GHSA detector-review pipeline and OpenGrep CI workflows [AI-assisted]
Stand up an end-to-end pipeline that turns every published openclaw GitHub
Security Advisory into a reusable OpenGrep rule, and wire the compiled rules
into manual-dispatch GitHub Actions workflows that publish SARIF to GitHub
Code Scanning.
The pipeline is harness-agnostic: any coding-agent CLI (Rovo Dev, Claude
Code, Codex, OpenCode, or anything you can shell out to) can drive it via
the runner script's --harness flag. Built-in adapters cover the four common
harnesses; --harness-cmd '<template>' supports anything else with shell-style
{prompt}/{model}/{output_file} substitution.
Pipeline pieces:
- scripts/run-ghsa-detector-review-batch.mjs runs your chosen coding harness
in parallel against every advisory using the agent-agnostic detector-review
spec at security/detector-review/detector-review-spec.md. Each case
produces an opengrep general-rule.yml (precise) and broad-rule.yml
(review-aid), plus a coverage-validated report against the vulnerable
commit's changed files.
- scripts/compile-opengrep-rules.mjs walks a run directory, rewrites each
rule's id to ghsa-detector.<ghsa>.<orig-id>, injects ghsa/advisory-url/
detector-bucket/source-rule-id metadata, and uses opengrep itself to drop
rules with InvalidRuleSchemaError so the published super-configs load
cleanly.
Compiled outputs:
- security/opengrep/precise.yml (336 rules)
- security/opengrep/broad.yml (459 rules)
- security/opengrep/compile-manifest.json (per-rule provenance map)
CI workflows (manual workflow_dispatch only):
- .github/workflows/opengrep-precise.yml
- .github/workflows/opengrep-broad.yml
Both install a pinned opengrep, run opengrep scan against src/, upload SARIF
to Code Scanning under categories opengrep-precise / opengrep-broad, and use
continue-on-error: true so findings never block the workflow.
Detector-review spec and assets:
- security/detector-review/detector-review-spec.md the agent-agnostic spec
the runner injects into each per-case prompt
- security/detector-review/references/{detector-rubric,report-template}.md
- security/detector-review/scripts/init_case.py
- security/prompt-suffix-coverage-first.md mandatory prompt addendum that
enforces coverage-first validation (rule must catch the OG vuln, not just
pass synthetic fixtures)
Docs:
- security/README.md end-to-end flow, supported harnesses, regen recipe
- security/opengrep/README.md compiled-config details + recompile recipe
* security: tighten GHSA OpenGrep detector workflow
* chore: refine precise opengrep workflow
* chore: remove stale opengrep metadata
* fix: harden GHSA OpenGrep workflow
* ci: split OpenGrep diff and full scans
* chore: remove performance-only opengrep rule
* ci: use OpenGrep installer path
* chore: enforce opengrep rule metadata provenance
* chore: generalize opengrep rule compilation
* docs: align opengrep rulepack guidance
* chore: support generic opengrep rule sources
* fix: validate opengrep rulepack-only changes
---------
Co-authored-by: Jesse Merhi <security-engineering@atlassian.com>
139 lines
4.4 KiB
JavaScript
139 lines
4.4 KiB
JavaScript
#!/usr/bin/env node
|
|
import { promises as fs } from "node:fs";
|
|
import * as path from "node:path";
|
|
import { parseDocument } from "yaml";
|
|
|
|
const DEFAULT_RULEPACK = path.resolve("security", "opengrep", "precise.yml");
|
|
const GHSA_RE = /^GHSA-[0-9A-Z]{4}-[0-9A-Z]{4}-[0-9A-Z]{4}$/;
|
|
const RULE_ID_RE = /^([a-z0-9][a-z0-9_-]*)\..+$/;
|
|
|
|
function printHelp() {
|
|
console.log(`Usage: node security/opengrep/check-rule-metadata.mjs [rulepack.yml]
|
|
|
|
Checks that every compiled OpenGrep rule carries source/provenance metadata.
|
|
Default rulepack: ${DEFAULT_RULEPACK}
|
|
`);
|
|
}
|
|
|
|
export async function readRules(rulepackPath) {
|
|
const raw = await fs.readFile(rulepackPath, "utf8");
|
|
const doc = parseDocument(raw, { keepSourceTokens: false });
|
|
if (doc.errors.length > 0) {
|
|
throw new Error(
|
|
`Could not parse ${rulepackPath}: ${doc.errors.map((e) => e.message).join("; ")}`,
|
|
);
|
|
}
|
|
const data = doc.toJSON();
|
|
if (!data || !Array.isArray(data.rules)) {
|
|
throw new Error(`${rulepackPath} must contain a top-level rules array`);
|
|
}
|
|
return data.rules;
|
|
}
|
|
|
|
function hasNonEmptyString(value) {
|
|
return typeof value === "string" && value.trim().length > 0;
|
|
}
|
|
|
|
function sanitizeIdComponent(value) {
|
|
return (
|
|
String(value || "")
|
|
.replace(/[^a-zA-Z0-9._-]+/g, "-")
|
|
.replace(/^-+|-+$/g, "")
|
|
.toLowerCase() || "rule"
|
|
);
|
|
}
|
|
|
|
function sanitizeSourceIdComponent(value) {
|
|
return sanitizeIdComponent(value).replace(/[.]+/g, "-");
|
|
}
|
|
|
|
export function validateRuleMetadata(rules) {
|
|
const violations = [];
|
|
|
|
for (const [index, rule] of rules.entries()) {
|
|
const id = String(rule?.id ?? "");
|
|
const label = id || `rules[${index}]`;
|
|
const metadata = rule?.metadata;
|
|
if (!metadata || typeof metadata !== "object" || Array.isArray(metadata)) {
|
|
violations.push(`${label}: missing metadata object`);
|
|
continue;
|
|
}
|
|
|
|
const idMatch = id.match(RULE_ID_RE);
|
|
if (!idMatch) {
|
|
violations.push(`${label}: id must match <source-id>.<source-rule-id>`);
|
|
}
|
|
|
|
const ghsa = String(metadata.ghsa ?? "");
|
|
const advisoryId = String(metadata["advisory-id"] ?? metadata.ghsa ?? "")
|
|
.trim()
|
|
.toUpperCase();
|
|
if (!hasNonEmptyString(advisoryId)) {
|
|
violations.push(`${label}: missing metadata.advisory-id or metadata.ghsa`);
|
|
} else if (idMatch && idMatch[1] !== sanitizeSourceIdComponent(advisoryId)) {
|
|
violations.push(
|
|
`${label}: source id in metadata (${advisoryId}) must match source id in rule id (${idMatch[1]})`,
|
|
);
|
|
}
|
|
|
|
if (ghsa && !GHSA_RE.test(ghsa)) {
|
|
violations.push(`${label}: metadata.ghsa must match GHSA-XXXX-XXXX-XXXX when present`);
|
|
} else if (ghsa && advisoryId !== ghsa) {
|
|
violations.push(
|
|
`${label}: metadata.advisory-id must match metadata.ghsa when both are present`,
|
|
);
|
|
}
|
|
|
|
const advisoryUrl = String(metadata["advisory-url"] ?? "");
|
|
const expectedGhsaUrl = GHSA_RE.test(advisoryId)
|
|
? `https://github.com/openclaw/openclaw/security/advisories/${advisoryId}`
|
|
: "";
|
|
if (!hasNonEmptyString(advisoryUrl)) {
|
|
violations.push(`${label}: missing metadata.advisory-url`);
|
|
} else if (expectedGhsaUrl && advisoryUrl !== expectedGhsaUrl) {
|
|
violations.push(`${label}: metadata.advisory-url must be ${expectedGhsaUrl}`);
|
|
}
|
|
|
|
if (metadata["detector-bucket"] !== "precise") {
|
|
violations.push(`${label}: metadata.detector-bucket must be precise`);
|
|
}
|
|
if (!hasNonEmptyString(metadata["source-rule-id"])) {
|
|
violations.push(`${label}: missing metadata.source-rule-id`);
|
|
}
|
|
}
|
|
|
|
return violations;
|
|
}
|
|
|
|
export async function checkRulepack(rulepackPath = DEFAULT_RULEPACK) {
|
|
const rules = await readRules(rulepackPath);
|
|
return validateRuleMetadata(rules);
|
|
}
|
|
|
|
export async function main(argv = process.argv.slice(2)) {
|
|
if (argv.includes("--help") || argv.includes("-h")) {
|
|
printHelp();
|
|
return 0;
|
|
}
|
|
const rulepackPath = path.resolve(argv[0] ?? DEFAULT_RULEPACK);
|
|
const violations = await checkRulepack(rulepackPath);
|
|
if (violations.length > 0) {
|
|
console.error(
|
|
`check-opengrep-rule-metadata: ${violations.length} violation(s) in ${rulepackPath}`,
|
|
);
|
|
for (const violation of violations.slice(0, 50)) {
|
|
console.error(` - ${violation}`);
|
|
}
|
|
if (violations.length > 50) {
|
|
console.error(` ... ${violations.length - 50} more`);
|
|
}
|
|
return 1;
|
|
}
|
|
console.log(`check-opengrep-rule-metadata: ${rulepackPath} ok`);
|
|
return 0;
|
|
}
|
|
|
|
if (import.meta.main) {
|
|
process.exitCode = await main();
|
|
}
|