diff --git a/README.md b/README.md index 34b4ca6f6f8..6ec750692a1 100644 --- a/README.md +++ b/README.md @@ -262,19 +262,6 @@ ClawHub is a minimal skill registry. With ClawHub enabled, the agent can search [ClawHub](https://clawhub.com) -### Example skill: video-quote-finder - -Use `skills/video-quote-finder` to locate where a quote appears in a YouTube video and return timestamp links. - -Original prompt: - -> "OK I want to make a PR with this skill back to openclaw... make a new one that -> will search for a point on the video and give me the timestamp. For example I -> want to find the timestamp in this video where Peter says 'I think vibe coding -> is a slur'" -> -> Video: [https://youtu.be/YFjfBk8HI5o?si=DTT2nVt0HQ4dSIoV](https://youtu.be/YFjfBk8HI5o?si=DTT2nVt0HQ4dSIoV) - ## Chat commands Send these in WhatsApp/Telegram/Slack/Google Chat/Microsoft Teams/WebChat (group commands are owner-only): diff --git a/skills/video-quote-finder/SKILL.md b/skills/video-quote-finder/SKILL.md deleted file mode 100644 index 405f5b2610a..00000000000 --- a/skills/video-quote-finder/SKILL.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -name: video-quote-finder -description: Find where a quote appears in a YouTube video and return timestamped links. Use when users ask "where in this video does X say Y", "find the timestamp for this line", or "locate quote in this YouTube video". ---- - -# Video Quote Finder - -Find quote timestamps in YouTube videos using the `summarize` CLI transcript extraction with timestamps. - -## Quick start - -```bash -python3 skills/video-quote-finder/scripts/find_quote_timestamp.py \ - "https://youtu.be/YFjfBk8HI5o" \ - "I think vibe coding is a slur" -``` - -## Workflow - -1. Extract transcript with timestamps via `summarize --extract --timestamps`. -2. Score transcript lines against the requested quote. -3. Return best match + top alternatives. -4. Include direct YouTube links with `t=`. - -## Output format - -- `best_match` timestamp + line + score -- `best_link` with timestamp -- up to 5 candidate timestamps with links - -## Notes - -- Requires `summarize` CLI (`@steipete/summarize`) in PATH. -- Works best when YouTube captions are available. -- If no exact match is found, uses fuzzy matching and suggests alternatives. diff --git a/skills/video-quote-finder/references/usage.md b/skills/video-quote-finder/references/usage.md deleted file mode 100644 index 3d40cfa4209..00000000000 --- a/skills/video-quote-finder/references/usage.md +++ /dev/null @@ -1,15 +0,0 @@ -# Usage - -## Find timestamp for a quote - -```bash -python3 skills/video-quote-finder/scripts/find_quote_timestamp.py \ - "https://youtu.be/YFjfBk8HI5o?si=DTT2nVt0HQ4dSIoV" \ - "I think vibe coding is a slur" -``` - -## Tips - -- Start with exact quote text. -- If no match, use a distinctive 3-8 word fragment. -- Prefer phrase fragments unlikely to repeat frequently. diff --git a/skills/video-quote-finder/scripts/find_quote_timestamp.py b/skills/video-quote-finder/scripts/find_quote_timestamp.py deleted file mode 100755 index ce6da085728..00000000000 --- a/skills/video-quote-finder/scripts/find_quote_timestamp.py +++ /dev/null @@ -1,96 +0,0 @@ -#!/usr/bin/env python3 -import argparse -import re -import subprocess -import sys -from difflib import SequenceMatcher - -TS_LINE = re.compile(r"^\[(\d{1,2}:\d{2}(?::\d{2})?)\]\s*(.*)$") - - -def ts_to_seconds(ts: str) -> int: - parts = [int(x) for x in ts.split(':')] - if len(parts) == 2: - m, s = parts - return m * 60 + s - h, m, s = parts - return h * 3600 + m * 60 + s - - -def with_timestamp_url(url: str, ts: str) -> str: - sec = ts_to_seconds(ts) - base_url = url.split('#', 1)[0] # drop fragment so query params are honored - joiner = '&' if '?' in base_url else '?' - return f"{base_url}{joiner}t={sec}s" - - -def run_extract(url: str) -> str: - cmd = ["summarize", url, "--extract", "--timestamps"] - p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) - if p.returncode != 0: - raise RuntimeError(p.stderr.strip() or "summarize failed") - return p.stdout - - -def normalize(s: str) -> str: - return re.sub(r"\s+", " ", s.lower()).strip() - - -def score(quote: str, line: str) -> float: - q = normalize(quote) - l = normalize(line) - if not q or not l: - return 0.0 - if q in l: - return 1.0 - - q_words = set(q.split()) - l_words = set(l.split()) - overlap = len(q_words & l_words) / max(1, len(q_words)) - ratio = SequenceMatcher(None, q, l).ratio() - return 0.6 * overlap + 0.4 * ratio - - -def find_matches(text: str, quote: str): - matches = [] - for line in text.splitlines(): - m = TS_LINE.match(line) - if not m: - continue - ts, body = m.group(1), m.group(2) - s = score(quote, body) - if s >= 0.35: - matches.append((s, ts, body)) - matches.sort(key=lambda x: x[0], reverse=True) - return matches[:5] - - -def main(): - ap = argparse.ArgumentParser(description="Find quote timestamp in YouTube transcript") - ap.add_argument("url") - ap.add_argument("quote") - args = ap.parse_args() - - try: - text = run_extract(args.url) - matches = find_matches(text, args.quote) - except Exception as e: - print(f"ERROR: {e}", file=sys.stderr) - sys.exit(1) - - if not matches: - print("No matches found. Try a shorter quote fragment.") - sys.exit(2) - - best = matches[0] - best_link = with_timestamp_url(args.url, best[1]) - print(f"best_match: [{best[1]}] score={best[0]:.2f} :: {best[2]}") - print(f"best_link: {best_link}") - print("candidates:") - for s, ts, body in matches: - print(f"- [{ts}] score={s:.2f} :: {body}") - print(f" link: {with_timestamp_url(args.url, ts)}") - - -if __name__ == "__main__": - main()