mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-12 07:20:45 +00:00
skills: add video-quote-finder with timestamp links
This commit is contained in:
committed by
Peter Steinberger
parent
89ce1460e1
commit
61726a2fbd
@@ -262,6 +262,14 @@ ClawHub is a minimal skill registry. With ClawHub enabled, the agent can search
|
|||||||
|
|
||||||
[ClawHub](https://clawhub.com)
|
[ClawHub](https://clawhub.com)
|
||||||
|
|
||||||
|
### Example skill: video-quote-finder
|
||||||
|
|
||||||
|
Use `skills/video-quote-finder` to locate where a quote appears in a YouTube video and return timestamp links.
|
||||||
|
|
||||||
|
Original prompt:
|
||||||
|
|
||||||
|
> "OK I want to make a PR with this skill back to openclaw... make a new one that will search for a point on the video and give me the timestamp. For example I want to find the timestamp in this video where peter says 'I think vibe coding is a slur' https://youtu.be/YFjfBk8HI5o?si=DTT2nVt0HQ4dSIoV"
|
||||||
|
|
||||||
## Chat commands
|
## Chat commands
|
||||||
|
|
||||||
Send these in WhatsApp/Telegram/Slack/Google Chat/Microsoft Teams/WebChat (group commands are owner-only):
|
Send these in WhatsApp/Telegram/Slack/Google Chat/Microsoft Teams/WebChat (group commands are owner-only):
|
||||||
|
|||||||
35
skills/video-quote-finder/SKILL.md
Normal file
35
skills/video-quote-finder/SKILL.md
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
---
|
||||||
|
name: video-quote-finder
|
||||||
|
description: Find where a quote appears in a YouTube video and return timestamped links. Use when users ask "where in this video does X say Y", "find the timestamp for this line", or "locate quote in this YouTube video".
|
||||||
|
---
|
||||||
|
|
||||||
|
# Video Quote Finder
|
||||||
|
|
||||||
|
Find quote timestamps in YouTube videos using the `summarize` CLI transcript extraction with timestamps.
|
||||||
|
|
||||||
|
## Quick start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python3 skills/video-quote-finder/scripts/find_quote_timestamp.py \
|
||||||
|
"https://youtu.be/YFjfBk8HI5o" \
|
||||||
|
"I think vibe coding is a slur"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Workflow
|
||||||
|
|
||||||
|
1. Extract transcript with timestamps via `summarize --extract --timestamps`.
|
||||||
|
2. Score transcript lines against the requested quote.
|
||||||
|
3. Return best match + top alternatives.
|
||||||
|
4. Include direct YouTube links with `t=<seconds>`.
|
||||||
|
|
||||||
|
## Output format
|
||||||
|
|
||||||
|
- `best_match` timestamp + line + score
|
||||||
|
- `best_link` with timestamp
|
||||||
|
- up to 5 candidate timestamps with links
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
|
||||||
|
- Requires `summarize` CLI (`@steipete/summarize`) in PATH.
|
||||||
|
- Works best when YouTube captions are available.
|
||||||
|
- If no exact match is found, uses fuzzy matching and suggests alternatives.
|
||||||
15
skills/video-quote-finder/references/usage.md
Normal file
15
skills/video-quote-finder/references/usage.md
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
# Usage
|
||||||
|
|
||||||
|
## Find timestamp for a quote
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python3 skills/video-quote-finder/scripts/find_quote_timestamp.py \
|
||||||
|
"https://youtu.be/YFjfBk8HI5o?si=DTT2nVt0HQ4dSIoV" \
|
||||||
|
"I think vibe coding is a slur"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Tips
|
||||||
|
|
||||||
|
- Start with exact quote text.
|
||||||
|
- If no match, use a distinctive 3-8 word fragment.
|
||||||
|
- Prefer phrase fragments unlikely to repeat frequently.
|
||||||
95
skills/video-quote-finder/scripts/find_quote_timestamp.py
Executable file
95
skills/video-quote-finder/scripts/find_quote_timestamp.py
Executable file
@@ -0,0 +1,95 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import argparse
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from difflib import SequenceMatcher
|
||||||
|
|
||||||
|
TS_LINE = re.compile(r"^\[(\d{1,2}:\d{2}(?::\d{2})?)\]\s*(.*)$")
|
||||||
|
|
||||||
|
|
||||||
|
def ts_to_seconds(ts: str) -> int:
|
||||||
|
parts = [int(x) for x in ts.split(':')]
|
||||||
|
if len(parts) == 2:
|
||||||
|
m, s = parts
|
||||||
|
return m * 60 + s
|
||||||
|
h, m, s = parts
|
||||||
|
return h * 3600 + m * 60 + s
|
||||||
|
|
||||||
|
|
||||||
|
def with_timestamp_url(url: str, ts: str) -> str:
|
||||||
|
sec = ts_to_seconds(ts)
|
||||||
|
joiner = '&' if '?' in url else '?'
|
||||||
|
return f"{url}{joiner}t={sec}s"
|
||||||
|
|
||||||
|
|
||||||
|
def run_extract(url: str) -> str:
|
||||||
|
cmd = ["summarize", url, "--extract", "--timestamps"]
|
||||||
|
p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
|
||||||
|
if p.returncode != 0:
|
||||||
|
raise RuntimeError(p.stderr.strip() or "summarize failed")
|
||||||
|
return p.stdout
|
||||||
|
|
||||||
|
|
||||||
|
def normalize(s: str) -> str:
|
||||||
|
return re.sub(r"\s+", " ", s.lower()).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def score(quote: str, line: str) -> float:
|
||||||
|
q = normalize(quote)
|
||||||
|
l = normalize(line)
|
||||||
|
if not q or not l:
|
||||||
|
return 0.0
|
||||||
|
if q in l:
|
||||||
|
return 1.0
|
||||||
|
|
||||||
|
q_words = set(q.split())
|
||||||
|
l_words = set(l.split())
|
||||||
|
overlap = len(q_words & l_words) / max(1, len(q_words))
|
||||||
|
ratio = SequenceMatcher(None, q, l).ratio()
|
||||||
|
return 0.6 * overlap + 0.4 * ratio
|
||||||
|
|
||||||
|
|
||||||
|
def find_matches(text: str, quote: str):
|
||||||
|
matches = []
|
||||||
|
for line in text.splitlines():
|
||||||
|
m = TS_LINE.match(line)
|
||||||
|
if not m:
|
||||||
|
continue
|
||||||
|
ts, body = m.group(1), m.group(2)
|
||||||
|
s = score(quote, body)
|
||||||
|
if s >= 0.35:
|
||||||
|
matches.append((s, ts, body))
|
||||||
|
matches.sort(key=lambda x: x[0], reverse=True)
|
||||||
|
return matches[:5]
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
ap = argparse.ArgumentParser(description="Find quote timestamp in YouTube transcript")
|
||||||
|
ap.add_argument("url")
|
||||||
|
ap.add_argument("quote")
|
||||||
|
args = ap.parse_args()
|
||||||
|
|
||||||
|
try:
|
||||||
|
text = run_extract(args.url)
|
||||||
|
matches = find_matches(text, args.quote)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"ERROR: {e}", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if not matches:
|
||||||
|
print("No matches found. Try a shorter quote fragment.")
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
|
best = matches[0]
|
||||||
|
best_link = with_timestamp_url(args.url, best[1])
|
||||||
|
print(f"best_match: [{best[1]}] score={best[0]:.2f} :: {best[2]}")
|
||||||
|
print(f"best_link: {best_link}")
|
||||||
|
print("candidates:")
|
||||||
|
for s, ts, body in matches:
|
||||||
|
print(f"- [{ts}] score={s:.2f} :: {body}")
|
||||||
|
print(f" link: {with_timestamp_url(args.url, ts)}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user