From 61726a2fbde146b4583f94d029b41266cd1b9c55 Mon Sep 17 00:00:00 2001 From: zisisp Date: Mon, 16 Feb 2026 22:39:09 +0200 Subject: [PATCH] skills: add video-quote-finder with timestamp links --- README.md | 8 ++ skills/video-quote-finder/SKILL.md | 35 +++++++ skills/video-quote-finder/references/usage.md | 15 +++ .../scripts/find_quote_timestamp.py | 95 +++++++++++++++++++ 4 files changed, 153 insertions(+) create mode 100644 skills/video-quote-finder/SKILL.md create mode 100644 skills/video-quote-finder/references/usage.md create mode 100755 skills/video-quote-finder/scripts/find_quote_timestamp.py diff --git a/README.md b/README.md index 40afade0f48..1684fca800f 100644 --- a/README.md +++ b/README.md @@ -262,6 +262,14 @@ ClawHub is a minimal skill registry. With ClawHub enabled, the agent can search [ClawHub](https://clawhub.com) +### Example skill: video-quote-finder + +Use `skills/video-quote-finder` to locate where a quote appears in a YouTube video and return timestamp links. + +Original prompt: + +> "OK I want to make a PR with this skill back to openclaw... make a new one that will search for a point on the video and give me the timestamp. For example I want to find the timestamp in this video where peter says 'I think vibe coding is a slur' https://youtu.be/YFjfBk8HI5o?si=DTT2nVt0HQ4dSIoV" + ## Chat commands Send these in WhatsApp/Telegram/Slack/Google Chat/Microsoft Teams/WebChat (group commands are owner-only): diff --git a/skills/video-quote-finder/SKILL.md b/skills/video-quote-finder/SKILL.md new file mode 100644 index 00000000000..405f5b2610a --- /dev/null +++ b/skills/video-quote-finder/SKILL.md @@ -0,0 +1,35 @@ +--- +name: video-quote-finder +description: Find where a quote appears in a YouTube video and return timestamped links. Use when users ask "where in this video does X say Y", "find the timestamp for this line", or "locate quote in this YouTube video". +--- + +# Video Quote Finder + +Find quote timestamps in YouTube videos using the `summarize` CLI transcript extraction with timestamps. + +## Quick start + +```bash +python3 skills/video-quote-finder/scripts/find_quote_timestamp.py \ + "https://youtu.be/YFjfBk8HI5o" \ + "I think vibe coding is a slur" +``` + +## Workflow + +1. Extract transcript with timestamps via `summarize --extract --timestamps`. +2. Score transcript lines against the requested quote. +3. Return best match + top alternatives. +4. Include direct YouTube links with `t=`. + +## Output format + +- `best_match` timestamp + line + score +- `best_link` with timestamp +- up to 5 candidate timestamps with links + +## Notes + +- Requires `summarize` CLI (`@steipete/summarize`) in PATH. +- Works best when YouTube captions are available. +- If no exact match is found, uses fuzzy matching and suggests alternatives. diff --git a/skills/video-quote-finder/references/usage.md b/skills/video-quote-finder/references/usage.md new file mode 100644 index 00000000000..3d40cfa4209 --- /dev/null +++ b/skills/video-quote-finder/references/usage.md @@ -0,0 +1,15 @@ +# Usage + +## Find timestamp for a quote + +```bash +python3 skills/video-quote-finder/scripts/find_quote_timestamp.py \ + "https://youtu.be/YFjfBk8HI5o?si=DTT2nVt0HQ4dSIoV" \ + "I think vibe coding is a slur" +``` + +## Tips + +- Start with exact quote text. +- If no match, use a distinctive 3-8 word fragment. +- Prefer phrase fragments unlikely to repeat frequently. diff --git a/skills/video-quote-finder/scripts/find_quote_timestamp.py b/skills/video-quote-finder/scripts/find_quote_timestamp.py new file mode 100755 index 00000000000..090de3be7fd --- /dev/null +++ b/skills/video-quote-finder/scripts/find_quote_timestamp.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +import argparse +import re +import subprocess +import sys +from difflib import SequenceMatcher + +TS_LINE = re.compile(r"^\[(\d{1,2}:\d{2}(?::\d{2})?)\]\s*(.*)$") + + +def ts_to_seconds(ts: str) -> int: + parts = [int(x) for x in ts.split(':')] + if len(parts) == 2: + m, s = parts + return m * 60 + s + h, m, s = parts + return h * 3600 + m * 60 + s + + +def with_timestamp_url(url: str, ts: str) -> str: + sec = ts_to_seconds(ts) + joiner = '&' if '?' in url else '?' + return f"{url}{joiner}t={sec}s" + + +def run_extract(url: str) -> str: + cmd = ["summarize", url, "--extract", "--timestamps"] + p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) + if p.returncode != 0: + raise RuntimeError(p.stderr.strip() or "summarize failed") + return p.stdout + + +def normalize(s: str) -> str: + return re.sub(r"\s+", " ", s.lower()).strip() + + +def score(quote: str, line: str) -> float: + q = normalize(quote) + l = normalize(line) + if not q or not l: + return 0.0 + if q in l: + return 1.0 + + q_words = set(q.split()) + l_words = set(l.split()) + overlap = len(q_words & l_words) / max(1, len(q_words)) + ratio = SequenceMatcher(None, q, l).ratio() + return 0.6 * overlap + 0.4 * ratio + + +def find_matches(text: str, quote: str): + matches = [] + for line in text.splitlines(): + m = TS_LINE.match(line) + if not m: + continue + ts, body = m.group(1), m.group(2) + s = score(quote, body) + if s >= 0.35: + matches.append((s, ts, body)) + matches.sort(key=lambda x: x[0], reverse=True) + return matches[:5] + + +def main(): + ap = argparse.ArgumentParser(description="Find quote timestamp in YouTube transcript") + ap.add_argument("url") + ap.add_argument("quote") + args = ap.parse_args() + + try: + text = run_extract(args.url) + matches = find_matches(text, args.quote) + except Exception as e: + print(f"ERROR: {e}", file=sys.stderr) + sys.exit(1) + + if not matches: + print("No matches found. Try a shorter quote fragment.") + sys.exit(2) + + best = matches[0] + best_link = with_timestamp_url(args.url, best[1]) + print(f"best_match: [{best[1]}] score={best[0]:.2f} :: {best[2]}") + print(f"best_link: {best_link}") + print("candidates:") + for s, ts, body in matches: + print(f"- [{ts}] score={s:.2f} :: {body}") + print(f" link: {with_timestamp_url(args.url, ts)}") + + +if __name__ == "__main__": + main()