Files
openclaw/scripts/docs-i18n/main.go
Mason 9b0ea7c579 docs: relocalize stale locale links (#61796)
* docs: relocalize stale locale links

* docs: unify locale link postprocessing

* docs: preserve relocalized frontmatter

* docs: relocalize partial docs runs

* docs: scope locale link postprocessing

* docs: continue scoped relocalization

* docs: drain parallel i18n results

* docs: add i18n pipeline link regression tests

* docs: clarify i18n pipeline regression test intent

* docs: update provider references in i18n tests to use example-provider

* fix: note docs i18n link relocalization

* docs: rephrase gateway local ws sentence
2026-04-06 22:37:35 +08:00

340 lines
9.5 KiB
Go

package main
import (
"context"
"flag"
"fmt"
"log"
"os"
"path/filepath"
"sync"
"time"
)
type docJob struct {
index int
path string
rel string
}
type docResult struct {
index int
rel string
output string
duration time.Duration
skipped bool
err error
}
type runConfig struct {
targetLang string
sourceLang string
docsRoot string
tmPath string
mode string
thinking string
overwrite bool
maxFiles int
parallel int
}
func main() {
var (
targetLang = flag.String("lang", "zh-CN", "target language (e.g., zh-CN)")
sourceLang = flag.String("src", "en", "source language")
docsRoot = flag.String("docs", "docs", "docs root")
tmPath = flag.String("tm", "", "translation memory path")
mode = flag.String("mode", "segment", "translation mode (segment|doc)")
thinking = flag.String("thinking", "high", "thinking level (low|high)")
overwrite = flag.Bool("overwrite", false, "overwrite existing translations")
maxFiles = flag.Int("max", 0, "max files to process (0 = all)")
parallel = flag.Int("parallel", 1, "parallel workers for doc mode")
)
flag.Parse()
files := flag.Args()
if len(files) == 0 {
fatal(fmt.Errorf("no doc files provided"))
}
if err := runDocsI18N(context.Background(), runConfig{
targetLang: *targetLang,
sourceLang: *sourceLang,
docsRoot: *docsRoot,
tmPath: *tmPath,
mode: *mode,
thinking: *thinking,
overwrite: *overwrite,
maxFiles: *maxFiles,
parallel: *parallel,
}, files, func(srcLang, tgtLang string, glossary []GlossaryEntry, thinking string) (docsTranslator, error) {
return NewPiTranslator(srcLang, tgtLang, glossary, thinking)
}); err != nil {
fatal(err)
}
}
func runDocsI18N(ctx context.Context, cfg runConfig, files []string, newTranslator docsTranslatorFactory) error {
if len(files) == 0 {
return fmt.Errorf("no doc files provided")
}
resolvedDocsRoot, err := filepath.Abs(cfg.docsRoot)
if err != nil {
return err
}
tmPath := cfg.tmPath
if tmPath == "" {
tmPath = filepath.Join(resolvedDocsRoot, ".i18n", fmt.Sprintf("%s.tm.jsonl", cfg.targetLang))
}
glossaryPath := filepath.Join(resolvedDocsRoot, ".i18n", fmt.Sprintf("glossary.%s.json", cfg.targetLang))
glossary, err := LoadGlossary(glossaryPath)
if err != nil {
return err
}
tm, err := LoadTranslationMemory(tmPath)
if err != nil {
return err
}
ordered, err := orderFiles(resolvedDocsRoot, files)
if err != nil {
return err
}
totalFiles := len(ordered)
preSkipped := 0
if cfg.mode == "doc" && !cfg.overwrite {
filtered, skipped, err := filterDocQueue(resolvedDocsRoot, cfg.targetLang, ordered)
if err != nil {
return err
}
ordered = filtered
preSkipped = skipped
}
if cfg.maxFiles > 0 && cfg.maxFiles < len(ordered) {
ordered = ordered[:cfg.maxFiles]
}
parallel := cfg.parallel
if parallel < 1 {
parallel = 1
}
log.SetFlags(log.LstdFlags)
start := time.Now()
processed := 0
skipped := 0
localizedFiles := []string{}
var runErr error
log.Printf("docs-i18n: mode=%s total=%d pending=%d pre_skipped=%d overwrite=%t thinking=%s parallel=%d", cfg.mode, totalFiles, len(ordered), preSkipped, cfg.overwrite, cfg.thinking, parallel)
switch cfg.mode {
case "doc":
if parallel > 1 {
proc, skip, outputs, err := runDocParallel(ctx, ordered, resolvedDocsRoot, cfg.sourceLang, cfg.targetLang, cfg.overwrite, parallel, glossary, cfg.thinking, newTranslator)
processed += proc
skipped += skip
localizedFiles = append(localizedFiles, outputs...)
if err != nil {
runErr = err
}
} else {
translator, err := newTranslator(cfg.sourceLang, cfg.targetLang, glossary, cfg.thinking)
if err != nil {
return err
}
defer translator.Close()
proc, skip, outputs, err := runDocSequential(ctx, ordered, translator, resolvedDocsRoot, cfg.sourceLang, cfg.targetLang, cfg.overwrite)
processed += proc
skipped += skip
localizedFiles = append(localizedFiles, outputs...)
if err != nil {
runErr = err
}
}
case "segment":
if parallel > 1 {
return fmt.Errorf("parallel processing is only supported in doc mode")
}
translator, err := newTranslator(cfg.sourceLang, cfg.targetLang, glossary, cfg.thinking)
if err != nil {
return err
}
defer translator.Close()
proc, outputs, err := runSegmentSequential(ctx, ordered, translator, tm, resolvedDocsRoot, cfg.sourceLang, cfg.targetLang)
processed += proc
localizedFiles = append(localizedFiles, outputs...)
if err != nil {
runErr = err
}
default:
return fmt.Errorf("unknown mode: %s", cfg.mode)
}
if err := tm.Save(); err != nil && runErr == nil {
runErr = err
}
if err := postprocessLocalizedDocs(resolvedDocsRoot, cfg.targetLang, localizedFiles); err != nil && runErr == nil {
runErr = err
}
elapsed := time.Since(start).Round(time.Millisecond)
log.Printf("docs-i18n: completed processed=%d skipped=%d elapsed=%s", processed, skipped, elapsed)
return runErr
}
func runDocSequential(ctx context.Context, ordered []string, translator docsTranslator, docsRoot, srcLang, tgtLang string, overwrite bool) (int, int, []string, error) {
processed := 0
skipped := 0
outputs := []string{}
for index, file := range ordered {
relPath := resolveRelPath(docsRoot, file)
log.Printf("docs-i18n: [%d/%d] start %s", index+1, len(ordered), relPath)
start := time.Now()
skip, outputPath, err := processFileDoc(ctx, translator, docsRoot, file, srcLang, tgtLang, overwrite)
if err != nil {
return processed, skipped, outputs, err
}
if skip {
skipped++
log.Printf("docs-i18n: [%d/%d] skipped %s (%s)", index+1, len(ordered), relPath, time.Since(start).Round(time.Millisecond))
} else {
processed++
outputs = append(outputs, outputPath)
log.Printf("docs-i18n: [%d/%d] done %s (%s)", index+1, len(ordered), relPath, time.Since(start).Round(time.Millisecond))
}
}
return processed, skipped, outputs, nil
}
func runDocParallel(ctx context.Context, ordered []string, docsRoot, srcLang, tgtLang string, overwrite bool, parallel int, glossary []GlossaryEntry, thinking string, newTranslator docsTranslatorFactory) (int, int, []string, error) {
jobs := make(chan docJob)
results := make(chan docResult, len(ordered))
ctx, cancel := context.WithCancel(ctx)
defer cancel()
var wg sync.WaitGroup
for worker := 0; worker < parallel; worker++ {
wg.Add(1)
go func(workerID int) {
defer wg.Done()
translator, err := newTranslator(srcLang, tgtLang, glossary, thinking)
if err != nil {
results <- docResult{err: err}
return
}
defer translator.Close()
for job := range jobs {
if ctx.Err() != nil {
return
}
log.Printf("docs-i18n: [w%d %d/%d] start %s", workerID, job.index, len(ordered), job.rel)
start := time.Now()
skip, outputPath, err := processFileDoc(ctx, translator, docsRoot, job.path, srcLang, tgtLang, overwrite)
results <- docResult{
index: job.index,
rel: job.rel,
output: outputPath,
duration: time.Since(start),
skipped: skip,
err: err,
}
if err != nil {
cancel()
return
}
}
}(worker + 1)
}
go func() {
defer close(jobs)
for index, file := range ordered {
job := docJob{index: index + 1, path: file, rel: resolveRelPath(docsRoot, file)}
select {
case <-ctx.Done():
return
case jobs <- job:
}
}
}()
go func() {
wg.Wait()
close(results)
}()
processed := 0
skipped := 0
outputs := []string{}
var firstErr error
for result := range results {
if result.err != nil && firstErr == nil {
firstErr = result.err
}
if result.skipped {
skipped++
log.Printf("docs-i18n: [w* %d/%d] skipped %s (%s)", result.index, len(ordered), result.rel, result.duration.Round(time.Millisecond))
} else if result.err == nil {
processed++
outputs = append(outputs, result.output)
log.Printf("docs-i18n: [w* %d/%d] done %s (%s)", result.index, len(ordered), result.rel, result.duration.Round(time.Millisecond))
}
}
return processed, skipped, outputs, firstErr
}
func runSegmentSequential(ctx context.Context, ordered []string, translator docsTranslator, tm *TranslationMemory, docsRoot, srcLang, tgtLang string) (int, []string, error) {
processed := 0
outputs := []string{}
for index, file := range ordered {
relPath := resolveRelPath(docsRoot, file)
log.Printf("docs-i18n: [%d/%d] start %s", index+1, len(ordered), relPath)
start := time.Now()
_, outputPath, err := processFile(ctx, translator, tm, docsRoot, file, srcLang, tgtLang)
if err != nil {
return processed, outputs, err
}
processed++
outputs = append(outputs, outputPath)
log.Printf("docs-i18n: [%d/%d] done %s (%s)", index+1, len(ordered), relPath, time.Since(start).Round(time.Millisecond))
}
return processed, outputs, nil
}
func resolveRelPath(docsRoot, file string) string {
relPath := file
if _, rel, err := resolveDocsPath(docsRoot, file); err == nil {
relPath = rel
}
return relPath
}
func filterDocQueue(docsRoot, targetLang string, ordered []string) ([]string, int, error) {
pending := make([]string, 0, len(ordered))
skipped := 0
for _, file := range ordered {
absPath, relPath, err := resolveDocsPath(docsRoot, file)
if err != nil {
return nil, skipped, err
}
content, err := os.ReadFile(absPath)
if err != nil {
return nil, skipped, err
}
sourceHash := hashBytes(content)
outputPath := filepath.Join(docsRoot, targetLang, relPath)
skip, err := shouldSkipDoc(outputPath, sourceHash)
if err != nil {
return nil, skipped, err
}
if skip {
skipped++
continue
}
pending = append(pending, file)
}
return pending, skipped, nil
}