mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-12 17:51:22 +00:00
* docs: relocalize stale locale links * docs: unify locale link postprocessing * docs: preserve relocalized frontmatter * docs: relocalize partial docs runs * docs: scope locale link postprocessing * docs: continue scoped relocalization * docs: drain parallel i18n results * docs: add i18n pipeline link regression tests * docs: clarify i18n pipeline regression test intent * docs: update provider references in i18n tests to use example-provider * fix: note docs i18n link relocalization * docs: rephrase gateway local ws sentence
340 lines
9.5 KiB
Go
340 lines
9.5 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"flag"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"path/filepath"
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
type docJob struct {
|
|
index int
|
|
path string
|
|
rel string
|
|
}
|
|
|
|
type docResult struct {
|
|
index int
|
|
rel string
|
|
output string
|
|
duration time.Duration
|
|
skipped bool
|
|
err error
|
|
}
|
|
|
|
type runConfig struct {
|
|
targetLang string
|
|
sourceLang string
|
|
docsRoot string
|
|
tmPath string
|
|
mode string
|
|
thinking string
|
|
overwrite bool
|
|
maxFiles int
|
|
parallel int
|
|
}
|
|
|
|
func main() {
|
|
var (
|
|
targetLang = flag.String("lang", "zh-CN", "target language (e.g., zh-CN)")
|
|
sourceLang = flag.String("src", "en", "source language")
|
|
docsRoot = flag.String("docs", "docs", "docs root")
|
|
tmPath = flag.String("tm", "", "translation memory path")
|
|
mode = flag.String("mode", "segment", "translation mode (segment|doc)")
|
|
thinking = flag.String("thinking", "high", "thinking level (low|high)")
|
|
overwrite = flag.Bool("overwrite", false, "overwrite existing translations")
|
|
maxFiles = flag.Int("max", 0, "max files to process (0 = all)")
|
|
parallel = flag.Int("parallel", 1, "parallel workers for doc mode")
|
|
)
|
|
flag.Parse()
|
|
files := flag.Args()
|
|
if len(files) == 0 {
|
|
fatal(fmt.Errorf("no doc files provided"))
|
|
}
|
|
|
|
if err := runDocsI18N(context.Background(), runConfig{
|
|
targetLang: *targetLang,
|
|
sourceLang: *sourceLang,
|
|
docsRoot: *docsRoot,
|
|
tmPath: *tmPath,
|
|
mode: *mode,
|
|
thinking: *thinking,
|
|
overwrite: *overwrite,
|
|
maxFiles: *maxFiles,
|
|
parallel: *parallel,
|
|
}, files, func(srcLang, tgtLang string, glossary []GlossaryEntry, thinking string) (docsTranslator, error) {
|
|
return NewPiTranslator(srcLang, tgtLang, glossary, thinking)
|
|
}); err != nil {
|
|
fatal(err)
|
|
}
|
|
}
|
|
|
|
func runDocsI18N(ctx context.Context, cfg runConfig, files []string, newTranslator docsTranslatorFactory) error {
|
|
if len(files) == 0 {
|
|
return fmt.Errorf("no doc files provided")
|
|
}
|
|
|
|
resolvedDocsRoot, err := filepath.Abs(cfg.docsRoot)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
tmPath := cfg.tmPath
|
|
if tmPath == "" {
|
|
tmPath = filepath.Join(resolvedDocsRoot, ".i18n", fmt.Sprintf("%s.tm.jsonl", cfg.targetLang))
|
|
}
|
|
|
|
glossaryPath := filepath.Join(resolvedDocsRoot, ".i18n", fmt.Sprintf("glossary.%s.json", cfg.targetLang))
|
|
glossary, err := LoadGlossary(glossaryPath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
tm, err := LoadTranslationMemory(tmPath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
ordered, err := orderFiles(resolvedDocsRoot, files)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
totalFiles := len(ordered)
|
|
preSkipped := 0
|
|
if cfg.mode == "doc" && !cfg.overwrite {
|
|
filtered, skipped, err := filterDocQueue(resolvedDocsRoot, cfg.targetLang, ordered)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
ordered = filtered
|
|
preSkipped = skipped
|
|
}
|
|
if cfg.maxFiles > 0 && cfg.maxFiles < len(ordered) {
|
|
ordered = ordered[:cfg.maxFiles]
|
|
}
|
|
|
|
parallel := cfg.parallel
|
|
if parallel < 1 {
|
|
parallel = 1
|
|
}
|
|
|
|
log.SetFlags(log.LstdFlags)
|
|
start := time.Now()
|
|
processed := 0
|
|
skipped := 0
|
|
localizedFiles := []string{}
|
|
var runErr error
|
|
|
|
log.Printf("docs-i18n: mode=%s total=%d pending=%d pre_skipped=%d overwrite=%t thinking=%s parallel=%d", cfg.mode, totalFiles, len(ordered), preSkipped, cfg.overwrite, cfg.thinking, parallel)
|
|
switch cfg.mode {
|
|
case "doc":
|
|
if parallel > 1 {
|
|
proc, skip, outputs, err := runDocParallel(ctx, ordered, resolvedDocsRoot, cfg.sourceLang, cfg.targetLang, cfg.overwrite, parallel, glossary, cfg.thinking, newTranslator)
|
|
processed += proc
|
|
skipped += skip
|
|
localizedFiles = append(localizedFiles, outputs...)
|
|
if err != nil {
|
|
runErr = err
|
|
}
|
|
} else {
|
|
translator, err := newTranslator(cfg.sourceLang, cfg.targetLang, glossary, cfg.thinking)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer translator.Close()
|
|
proc, skip, outputs, err := runDocSequential(ctx, ordered, translator, resolvedDocsRoot, cfg.sourceLang, cfg.targetLang, cfg.overwrite)
|
|
processed += proc
|
|
skipped += skip
|
|
localizedFiles = append(localizedFiles, outputs...)
|
|
if err != nil {
|
|
runErr = err
|
|
}
|
|
}
|
|
case "segment":
|
|
if parallel > 1 {
|
|
return fmt.Errorf("parallel processing is only supported in doc mode")
|
|
}
|
|
translator, err := newTranslator(cfg.sourceLang, cfg.targetLang, glossary, cfg.thinking)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer translator.Close()
|
|
proc, outputs, err := runSegmentSequential(ctx, ordered, translator, tm, resolvedDocsRoot, cfg.sourceLang, cfg.targetLang)
|
|
processed += proc
|
|
localizedFiles = append(localizedFiles, outputs...)
|
|
if err != nil {
|
|
runErr = err
|
|
}
|
|
default:
|
|
return fmt.Errorf("unknown mode: %s", cfg.mode)
|
|
}
|
|
|
|
if err := tm.Save(); err != nil && runErr == nil {
|
|
runErr = err
|
|
}
|
|
if err := postprocessLocalizedDocs(resolvedDocsRoot, cfg.targetLang, localizedFiles); err != nil && runErr == nil {
|
|
runErr = err
|
|
}
|
|
elapsed := time.Since(start).Round(time.Millisecond)
|
|
log.Printf("docs-i18n: completed processed=%d skipped=%d elapsed=%s", processed, skipped, elapsed)
|
|
return runErr
|
|
}
|
|
|
|
func runDocSequential(ctx context.Context, ordered []string, translator docsTranslator, docsRoot, srcLang, tgtLang string, overwrite bool) (int, int, []string, error) {
|
|
processed := 0
|
|
skipped := 0
|
|
outputs := []string{}
|
|
for index, file := range ordered {
|
|
relPath := resolveRelPath(docsRoot, file)
|
|
log.Printf("docs-i18n: [%d/%d] start %s", index+1, len(ordered), relPath)
|
|
start := time.Now()
|
|
skip, outputPath, err := processFileDoc(ctx, translator, docsRoot, file, srcLang, tgtLang, overwrite)
|
|
if err != nil {
|
|
return processed, skipped, outputs, err
|
|
}
|
|
if skip {
|
|
skipped++
|
|
log.Printf("docs-i18n: [%d/%d] skipped %s (%s)", index+1, len(ordered), relPath, time.Since(start).Round(time.Millisecond))
|
|
} else {
|
|
processed++
|
|
outputs = append(outputs, outputPath)
|
|
log.Printf("docs-i18n: [%d/%d] done %s (%s)", index+1, len(ordered), relPath, time.Since(start).Round(time.Millisecond))
|
|
}
|
|
}
|
|
return processed, skipped, outputs, nil
|
|
}
|
|
|
|
func runDocParallel(ctx context.Context, ordered []string, docsRoot, srcLang, tgtLang string, overwrite bool, parallel int, glossary []GlossaryEntry, thinking string, newTranslator docsTranslatorFactory) (int, int, []string, error) {
|
|
jobs := make(chan docJob)
|
|
results := make(chan docResult, len(ordered))
|
|
ctx, cancel := context.WithCancel(ctx)
|
|
defer cancel()
|
|
|
|
var wg sync.WaitGroup
|
|
for worker := 0; worker < parallel; worker++ {
|
|
wg.Add(1)
|
|
go func(workerID int) {
|
|
defer wg.Done()
|
|
translator, err := newTranslator(srcLang, tgtLang, glossary, thinking)
|
|
if err != nil {
|
|
results <- docResult{err: err}
|
|
return
|
|
}
|
|
defer translator.Close()
|
|
for job := range jobs {
|
|
if ctx.Err() != nil {
|
|
return
|
|
}
|
|
log.Printf("docs-i18n: [w%d %d/%d] start %s", workerID, job.index, len(ordered), job.rel)
|
|
start := time.Now()
|
|
skip, outputPath, err := processFileDoc(ctx, translator, docsRoot, job.path, srcLang, tgtLang, overwrite)
|
|
results <- docResult{
|
|
index: job.index,
|
|
rel: job.rel,
|
|
output: outputPath,
|
|
duration: time.Since(start),
|
|
skipped: skip,
|
|
err: err,
|
|
}
|
|
if err != nil {
|
|
cancel()
|
|
return
|
|
}
|
|
}
|
|
}(worker + 1)
|
|
}
|
|
|
|
go func() {
|
|
defer close(jobs)
|
|
for index, file := range ordered {
|
|
job := docJob{index: index + 1, path: file, rel: resolveRelPath(docsRoot, file)}
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case jobs <- job:
|
|
}
|
|
}
|
|
}()
|
|
|
|
go func() {
|
|
wg.Wait()
|
|
close(results)
|
|
}()
|
|
|
|
processed := 0
|
|
skipped := 0
|
|
outputs := []string{}
|
|
var firstErr error
|
|
for result := range results {
|
|
if result.err != nil && firstErr == nil {
|
|
firstErr = result.err
|
|
}
|
|
if result.skipped {
|
|
skipped++
|
|
log.Printf("docs-i18n: [w* %d/%d] skipped %s (%s)", result.index, len(ordered), result.rel, result.duration.Round(time.Millisecond))
|
|
} else if result.err == nil {
|
|
processed++
|
|
outputs = append(outputs, result.output)
|
|
log.Printf("docs-i18n: [w* %d/%d] done %s (%s)", result.index, len(ordered), result.rel, result.duration.Round(time.Millisecond))
|
|
}
|
|
}
|
|
return processed, skipped, outputs, firstErr
|
|
}
|
|
|
|
func runSegmentSequential(ctx context.Context, ordered []string, translator docsTranslator, tm *TranslationMemory, docsRoot, srcLang, tgtLang string) (int, []string, error) {
|
|
processed := 0
|
|
outputs := []string{}
|
|
for index, file := range ordered {
|
|
relPath := resolveRelPath(docsRoot, file)
|
|
log.Printf("docs-i18n: [%d/%d] start %s", index+1, len(ordered), relPath)
|
|
start := time.Now()
|
|
_, outputPath, err := processFile(ctx, translator, tm, docsRoot, file, srcLang, tgtLang)
|
|
if err != nil {
|
|
return processed, outputs, err
|
|
}
|
|
processed++
|
|
outputs = append(outputs, outputPath)
|
|
log.Printf("docs-i18n: [%d/%d] done %s (%s)", index+1, len(ordered), relPath, time.Since(start).Round(time.Millisecond))
|
|
}
|
|
return processed, outputs, nil
|
|
}
|
|
|
|
func resolveRelPath(docsRoot, file string) string {
|
|
relPath := file
|
|
if _, rel, err := resolveDocsPath(docsRoot, file); err == nil {
|
|
relPath = rel
|
|
}
|
|
return relPath
|
|
}
|
|
|
|
func filterDocQueue(docsRoot, targetLang string, ordered []string) ([]string, int, error) {
|
|
pending := make([]string, 0, len(ordered))
|
|
skipped := 0
|
|
for _, file := range ordered {
|
|
absPath, relPath, err := resolveDocsPath(docsRoot, file)
|
|
if err != nil {
|
|
return nil, skipped, err
|
|
}
|
|
content, err := os.ReadFile(absPath)
|
|
if err != nil {
|
|
return nil, skipped, err
|
|
}
|
|
sourceHash := hashBytes(content)
|
|
outputPath := filepath.Join(docsRoot, targetLang, relPath)
|
|
skip, err := shouldSkipDoc(outputPath, sourceHash)
|
|
if err != nil {
|
|
return nil, skipped, err
|
|
}
|
|
if skip {
|
|
skipped++
|
|
continue
|
|
}
|
|
pending = append(pending, file)
|
|
}
|
|
return pending, skipped, nil
|
|
}
|