fix(docs): finalize i18n postprocess before skip (#92668)

Summary:
- Merged fix(docs): finalize i18n postprocess before skip after ClawSweeper review.

Automerge notes:
- No ClawSweeper repair was needed after automerge opt-in.

Validation:
- ClawSweeper review passed for head ad79445835.
- Required merge gates passed before the squash merge.

Prepared head SHA: ad79445835
Review: https://github.com/openclaw/openclaw/pull/92668#issuecomment-4698629026

Co-authored-by: Mason Huang <masonxhuang@tencent.com>
Co-authored-by: clawsweeper[bot] <274271284+clawsweeper[bot]@users.noreply.github.com>
Approved-by: hxy91819
Co-authored-by: hxy91819 <8814856+hxy91819@users.noreply.github.com>
This commit is contained in:
Mason Huang
2026-06-13 21:17:03 +08:00
committed by GitHub
parent dc493bc9a2
commit eaeedbf1f9
6 changed files with 378 additions and 32 deletions

View File

@@ -18,6 +18,14 @@ const (
bodyTagEnd = "</body>"
)
type docOutputStatus int
const (
docOutputNeedsTranslation docOutputStatus = iota
docOutputReady
docOutputNeedsPostprocess
)
func processFileDoc(ctx context.Context, translator docsTranslator, docsRoot, filePath, srcLang, tgtLang string, overwrite bool) (bool, string, error) {
absPath, relPath, err := resolveDocsPath(docsRoot, filePath)
if err != nil {
@@ -32,12 +40,15 @@ func processFileDoc(ctx context.Context, translator docsTranslator, docsRoot, fi
outputPath := filepath.Join(docsRoot, tgtLang, relPath)
if !overwrite {
skip, err := shouldSkipDoc(outputPath, currentHash)
status, err := classifyDocOutput(outputPath, currentHash, tgtLang)
if err != nil {
return false, "", err
}
if skip {
switch status {
case docOutputReady:
return true, "", nil
case docOutputNeedsPostprocess:
return true, outputPath, nil
}
}
@@ -138,31 +149,42 @@ func trimTagNewlines(value string) string {
return value
}
func shouldSkipDoc(outputPath string, sourceHash string) (bool, error) {
func classifyDocOutput(outputPath string, sourceHash string, targetLang string) (docOutputStatus, error) {
data, err := os.ReadFile(outputPath)
if err != nil {
if os.IsNotExist(err) {
return false, nil
return docOutputNeedsTranslation, nil
}
return false, err
return docOutputNeedsTranslation, err
}
frontMatter, _ := splitFrontMatter(string(data))
if frontMatter == "" {
return false, nil
return docOutputNeedsTranslation, nil
}
frontData := map[string]any{}
if err := yaml.Unmarshal([]byte(frontMatter), &frontData); err != nil {
return false, nil
return docOutputNeedsTranslation, nil
}
storedHash := extractSourceHash(frontData)
if storedHash == "" {
return false, nil
return docOutputNeedsTranslation, nil
}
return strings.EqualFold(storedHash, sourceHash), nil
if !strings.EqualFold(storedHash, sourceHash) {
return docOutputNeedsTranslation, nil
}
if strings.EqualFold(strings.TrimSpace(targetLang), "en") {
return docOutputReady, nil
}
postprocessVersion := extractPostprocessVersion(frontData)
if strings.EqualFold(postprocessVersion, localizedLinkPostprocessVersion) {
return docOutputReady, nil
}
return docOutputNeedsPostprocess, nil
}
func extractSourceHash(frontData map[string]any) string {
xi, ok := frontData["x-i18n"].(map[string]any)
xi, ok := extractXI18N(frontData)
if !ok {
return ""
}
@@ -173,6 +195,26 @@ func extractSourceHash(frontData map[string]any) string {
return strings.TrimSpace(value)
}
func extractPostprocessVersion(frontData map[string]any) string {
xi, ok := extractXI18N(frontData)
if !ok {
return ""
}
value, ok := xi["postprocess_version"].(string)
if !ok {
return ""
}
return strings.TrimSpace(value)
}
func extractXI18N(frontData map[string]any) (map[string]any, bool) {
xi, ok := frontData["x-i18n"].(map[string]any)
if ok {
return xi, true
}
return nil, false
}
func logDocChunkPlan(relPath string, blocks []string, groups [][]string) {
totalBytes := 0
for _, block := range blocks {

View File

@@ -109,15 +109,17 @@ func runDocsI18N(ctx context.Context, cfg runConfig, files []string, newTranslat
}
totalFiles := len(ordered)
preSkipped := 0
prePostprocessFiles := []string{}
if cfg.mode == "doc" && !cfg.overwrite {
filtered, skipped, err := filterDocQueue(resolvedDocsRoot, cfg.targetLang, ordered)
filtered, skipped, existingOutputs, err := filterDocQueue(resolvedDocsRoot, cfg.targetLang, ordered, cfg.maxFiles)
if err != nil {
return err
}
ordered = filtered
preSkipped = skipped
prePostprocessFiles = append(prePostprocessFiles, existingOutputs...)
}
if cfg.maxFiles > 0 && cfg.maxFiles < len(ordered) {
if (cfg.mode != "doc" || cfg.overwrite) && cfg.maxFiles > 0 && cfg.maxFiles < len(ordered) {
ordered = ordered[:cfg.maxFiles]
}
@@ -130,7 +132,7 @@ func runDocsI18N(ctx context.Context, cfg runConfig, files []string, newTranslat
start := time.Now()
processed := 0
skipped := 0
localizedFiles := []string{}
localizedFiles := append([]string{}, prePostprocessFiles...)
var translationErr error
log.Printf("docs-i18n: mode=%s total=%d pending=%d pre_skipped=%d overwrite=%t thinking=%s parallel=%d", cfg.mode, totalFiles, len(ordered), preSkipped, cfg.overwrite, cfg.thinking, parallel)
@@ -217,6 +219,9 @@ func runDocSequential(ctx context.Context, ordered []string, translator docsTran
}
if skip {
skipped++
if outputPath != "" {
outputs = append(outputs, outputPath)
}
log.Printf("docs-i18n: [%d/%d] skipped %s (%s)", index+1, len(ordered), relPath, time.Since(start).Round(time.Millisecond))
} else {
processed++
@@ -294,6 +299,9 @@ func runDocParallel(ctx context.Context, ordered []string, docsRoot, srcLang, tg
}
if result.skipped {
skipped++
if result.output != "" {
outputs = append(outputs, result.output)
}
log.Printf("docs-i18n: [w* %d/%d] skipped %s (%s)", result.index, len(ordered), result.rel, result.duration.Round(time.Millisecond))
} else if result.err != nil {
log.Printf("docs-i18n: [w* %d/%d] failed %s (%s): %v", result.index, len(ordered), result.rel, result.duration.Round(time.Millisecond), result.err)
@@ -339,29 +347,40 @@ func resolveRelPath(docsRoot, file string) string {
return relPath
}
func filterDocQueue(docsRoot, targetLang string, ordered []string) ([]string, int, error) {
func filterDocQueue(docsRoot, targetLang string, ordered []string, maxFiles int) ([]string, int, []string, error) {
pending := make([]string, 0, len(ordered))
existingOutputs := []string{}
skipped := 0
for _, file := range ordered {
absPath, relPath, err := resolveDocsPath(docsRoot, file)
if err != nil {
return nil, skipped, err
return nil, skipped, nil, err
}
content, err := os.ReadFile(absPath)
if err != nil {
return nil, skipped, err
return nil, skipped, nil, err
}
sourceHash := hashBytes(content)
outputPath := filepath.Join(docsRoot, targetLang, relPath)
skip, err := shouldSkipDoc(outputPath, sourceHash)
status, err := classifyDocOutput(outputPath, sourceHash, targetLang)
if err != nil {
return nil, skipped, err
return nil, skipped, nil, err
}
if skip {
switch status {
case docOutputReady:
skipped++
continue
case docOutputNeedsPostprocess:
if maxFiles > 0 && len(pending)+len(existingOutputs) >= maxFiles {
continue
}
skipped++
existingOutputs = append(existingOutputs, outputPath)
case docOutputNeedsTranslation:
if maxFiles > 0 && len(pending)+len(existingOutputs) >= maxFiles {
continue
}
pending = append(pending, file)
}
pending = append(pending, file)
}
return pending, skipped, nil
return pending, skipped, existingOutputs, nil
}

View File

@@ -219,6 +219,201 @@ func TestRunDocsI18NRewritesFinalLocalizedPageLinks(t *testing.T) {
}
}
func TestRunDocsI18NDoesNotSkipOutputAfterPostprocessFailure(t *testing.T) {
t.Parallel()
docsRoot := t.TempDir()
sourcePath := filepath.Join(docsRoot, "gateway", "index.md")
writeFile(t, sourcePath, stringsJoin(
"---",
"title: Gateway",
"---",
"",
"See [Troubleshooting](/gateway/troubleshooting).",
))
skip, outputPath, err := processFileDoc(context.Background(), fakeDocsTranslator{}, docsRoot, sourcePath, "en", "zh-CN", true)
if err != nil {
t.Fatalf("processFileDoc failed: %v", err)
}
if skip {
t.Fatal("processFileDoc unexpectedly skipped translation")
}
if err := postprocessLocalizedDocs(docsRoot, "zh-CN", []string{outputPath}); err == nil {
t.Fatal("expected missing docs.json to fail postprocess")
}
sourceBytes, err := os.ReadFile(sourcePath)
if err != nil {
t.Fatalf("read source failed: %v", err)
}
status, err := classifyDocOutput(outputPath, hashBytes(sourceBytes), "zh-CN")
if err != nil {
t.Fatalf("classifyDocOutput failed: %v", err)
}
if status != docOutputNeedsPostprocess {
t.Fatalf("expected failed-postprocess output to need postprocess, got %v", status)
}
}
func TestRunDocsI18NOnlyBecomesSkippableAfterPostprocessSucceeds(t *testing.T) {
t.Parallel()
docsRoot := t.TempDir()
writeFile(t, filepath.Join(docsRoot, ".i18n", "glossary.zh-CN.json"), "[]")
writeFile(t, filepath.Join(docsRoot, "docs.json"), `{"redirects":[]}`)
sourcePath := filepath.Join(docsRoot, "gateway", "index.md")
writeFile(t, sourcePath, stringsJoin(
"---",
"title: Gateway",
"---",
"",
"See [Troubleshooting](/gateway/troubleshooting).",
))
writeFile(t, filepath.Join(docsRoot, "gateway", "troubleshooting.md"), "# Troubleshooting\n")
skip, outputPath, err := processFileDoc(context.Background(), fakeDocsTranslator{}, docsRoot, sourcePath, "en", "zh-CN", true)
if err != nil {
t.Fatalf("processFileDoc failed: %v", err)
}
if skip {
t.Fatal("processFileDoc unexpectedly skipped translation")
}
sourceBytes, err := os.ReadFile(sourcePath)
if err != nil {
t.Fatalf("read source failed: %v", err)
}
status, err := classifyDocOutput(outputPath, hashBytes(sourceBytes), "zh-CN")
if err != nil {
t.Fatalf("classifyDocOutput before postprocess failed: %v", err)
}
if status != docOutputNeedsPostprocess {
t.Fatalf("expected pending postprocess output to need postprocess, got %v", status)
}
if err := postprocessLocalizedDocs(docsRoot, "zh-CN", []string{outputPath}); err != nil {
t.Fatalf("postprocessLocalizedDocs failed: %v", err)
}
status, err = classifyDocOutput(outputPath, hashBytes(sourceBytes), "zh-CN")
if err != nil {
t.Fatalf("classifyDocOutput after postprocess failed: %v", err)
}
if status != docOutputReady {
t.Fatalf("expected postprocessed output to be ready, got %v:\n%s", status, mustReadFile(t, outputPath))
}
}
func TestClassifyDocOutputKeepsEnglishTargetsHashOnly(t *testing.T) {
t.Parallel()
docsRoot := t.TempDir()
sourcePath := filepath.Join(docsRoot, "gateway", "index.md")
writeFile(t, sourcePath, stringsJoin(
"---",
"title: Gateway",
"---",
"",
"See [Troubleshooting](/gateway/troubleshooting).",
))
outputPath := filepath.Join(docsRoot, "en", "gateway", "index.md")
writeFile(t, outputPath, stringsJoin(
"---",
"title: Gateway",
"x-i18n:",
" source_hash: "+hashBytes([]byte(mustReadFile(t, sourcePath))),
" postprocess_version: "+localizedLinkPostprocessPending,
"---",
"",
"See [Troubleshooting](/gateway/troubleshooting).",
))
status, err := classifyDocOutput(outputPath, hashBytes([]byte(mustReadFile(t, sourcePath))), "en")
if err != nil {
t.Fatalf("classifyDocOutput for English target failed: %v", err)
}
if status != docOutputReady {
t.Fatalf("expected English target to remain ready with matching source hash, got %v", status)
}
}
func TestFilterDocQueueSchedulesLegacyOutputsForPostprocessOnly(t *testing.T) {
t.Parallel()
docsRoot := t.TempDir()
sourcePath := filepath.Join(docsRoot, "gateway", "index.md")
writeFile(t, sourcePath, "# Gateway\n")
outputPath := filepath.Join(docsRoot, "zh-CN", "gateway", "index.md")
writeFile(t, outputPath, stringsJoin(
"---",
"title: 网关",
"x-i18n:",
" source_hash: "+hashBytes([]byte(mustReadFile(t, sourcePath))),
"---",
"",
"See [Troubleshooting](/gateway/troubleshooting).",
))
pending, skipped, existingOutputs, err := filterDocQueue(docsRoot, "zh-CN", []string{sourcePath}, 0)
if err != nil {
t.Fatalf("filterDocQueue failed: %v", err)
}
if len(pending) != 0 {
t.Fatalf("expected legacy matching output to skip translation, got pending=%v", pending)
}
if skipped != 1 {
t.Fatalf("expected one skipped translation, got %d", skipped)
}
if len(existingOutputs) != 1 || existingOutputs[0] != outputPath {
t.Fatalf("expected existing output to be queued for postprocess, got %v", existingOutputs)
}
}
func TestFilterDocQueueHonorsMaxAcrossPostprocessOutputs(t *testing.T) {
t.Parallel()
docsRoot := t.TempDir()
firstSource := filepath.Join(docsRoot, "gateway", "index.md")
secondSource := filepath.Join(docsRoot, "providers", "example-provider.md")
writeFile(t, firstSource, "# Gateway\n")
writeFile(t, secondSource, "# Example provider\n")
firstOutput := filepath.Join(docsRoot, "zh-CN", "gateway", "index.md")
secondOutput := filepath.Join(docsRoot, "zh-CN", "providers", "example-provider.md")
writeFile(t, firstOutput, stringsJoin(
"---",
"title: 网关",
"x-i18n:",
" source_hash: "+hashBytes([]byte(mustReadFile(t, firstSource))),
"---",
"",
"# 网关",
))
writeFile(t, secondOutput, stringsJoin(
"---",
"title: 示例 provider",
"x-i18n:",
" source_hash: "+hashBytes([]byte(mustReadFile(t, secondSource))),
"---",
"",
"# 示例 provider",
))
pending, skipped, existingOutputs, err := filterDocQueue(docsRoot, "zh-CN", []string{firstSource, secondSource}, 1)
if err != nil {
t.Fatalf("filterDocQueue failed: %v", err)
}
if len(pending) != 0 {
t.Fatalf("expected no translations to be queued, got %v", pending)
}
if skipped != 1 {
t.Fatalf("expected one bounded postprocess-only skip, got %d", skipped)
}
if len(existingOutputs) != 1 || existingOutputs[0] != firstOutput {
t.Fatalf("expected only first output to be queued for postprocess, got %v", existingOutputs)
}
}
func TestRunDocsI18NAllowPartialKeepsEarlierSuccessfulDocOutputs(t *testing.T) {
t.Parallel()

View File

@@ -12,6 +12,11 @@ import (
"gopkg.in/yaml.v3"
)
const (
localizedLinkPostprocessPending = "pending"
localizedLinkPostprocessVersion = "locale-links-v1"
)
func processFile(ctx context.Context, translator docsTranslator, tm *TranslationMemory, docsRoot, filePath, srcLang, tgtLang string) (bool, string, error) {
absPath, relPath, err := resolveDocsPath(docsRoot, filePath)
if err != nil {
@@ -120,12 +125,13 @@ func encodeFrontMatter(frontData map[string]any, relPath string, source []byte)
frontData = map[string]any{}
}
frontData["x-i18n"] = map[string]any{
"source_path": relPath,
"source_hash": hashBytes(source),
"provider": docsI18nProvider(),
"model": docsI18nModel(),
"workflow": workflowVersion,
"generated_at": time.Now().UTC().Format(time.RFC3339),
"source_path": relPath,
"source_hash": hashBytes(source),
"provider": docsI18nProvider(),
"model": docsI18nModel(),
"workflow": workflowVersion,
"generated_at": time.Now().UTC().Format(time.RFC3339),
"postprocess_version": localizedLinkPostprocessPending,
}
encoded, err := yaml.Marshal(frontData)
if err != nil {

View File

@@ -2,6 +2,7 @@ package main
import (
"os"
"strings"
)
func postprocessLocalizedDocs(docsRoot, targetLang string, localizedFiles []string) error {
@@ -22,13 +23,14 @@ func postprocessLocalizedDocs(docsRoot, targetLang string, localizedFiles []stri
frontMatter, body := splitFrontMatter(string(content))
rewrittenBody := routes.localizeBodyLinks(body)
if rewrittenBody == body {
updatedFrontMatter := setPostprocessVersion(frontMatter, localizedLinkPostprocessVersion)
if rewrittenBody == body && updatedFrontMatter == frontMatter {
continue
}
output := rewrittenBody
if frontMatter != "" {
output = "---\n" + frontMatter + "\n---\n\n" + rewrittenBody
if updatedFrontMatter != "" {
output = "---\n" + updatedFrontMatter + "\n---\n\n" + rewrittenBody
}
if err := os.WriteFile(path, []byte(output), 0o644); err != nil {
@@ -38,3 +40,55 @@ func postprocessLocalizedDocs(docsRoot, targetLang string, localizedFiles []stri
return nil
}
func setPostprocessVersion(frontMatter, version string) string {
if strings.TrimSpace(frontMatter) == "" {
return frontMatter
}
lines := strings.Split(frontMatter, "\n")
inXI18N := false
xi18nLine := -1
insertAt := -1
childIndent := " "
for i, line := range lines {
trimmed := strings.TrimSpace(line)
if trimmed == "x-i18n:" {
inXI18N = true
xi18nLine = i
insertAt = len(lines)
continue
}
if !inXI18N {
continue
}
if trimmed == "" {
continue
}
indent := leadingWhitespace(line)
if len(indent) <= len(leadingWhitespace(lines[xi18nLine])) {
insertAt = i
break
}
childIndent = indent
if strings.HasPrefix(trimmed, "postprocess_version:") {
lines[i] = indent + "postprocess_version: " + version
return strings.Join(lines, "\n")
}
}
if xi18nLine == -1 {
return frontMatter
}
if insertAt == -1 {
insertAt = len(lines)
}
lines = append(lines[:insertAt], append([]string{childIndent + "postprocess_version: " + version}, lines[insertAt:]...)...)
return strings.Join(lines, "\n")
}
func leadingWhitespace(text string) string {
return text[:len(text)-len(strings.TrimLeft(text, " \t"))]
}

View File

@@ -41,7 +41,7 @@ func TestPostprocessLocalizedDocsFixesStaleLinksAfterLaterPagesExist(t *testing.
}
got := mustReadFile(t, filepath.Join(docsRoot, "zh-CN", "gateway", "index.md"))
if !strings.Contains(got, "---\ntitle: 网关\nx-i18n:\n source_hash: test\n---\n\n") {
if !strings.Contains(got, "---\ntitle: 网关\nx-i18n:\n source_hash: test\n postprocess_version: "+localizedLinkPostprocessVersion+"\n---\n\n") {
t.Fatalf("front matter corrupted after rewrite:\n%s", got)
}
want := "See [Troubleshooting](/zh-CN/gateway/troubleshooting)."
@@ -231,6 +231,36 @@ func TestPostprocessLocalizedDocsContinuesAfterUnchangedFile(t *testing.T) {
}
}
func TestPostprocessLocalizedDocsFinalizesPostprocessVersionWithoutBodyRewrite(t *testing.T) {
t.Parallel()
docsRoot := t.TempDir()
path := filepath.Join(docsRoot, "zh-CN", "gateway", "index.md")
writeFile(t, filepath.Join(docsRoot, "docs.json"), `{"redirects":[]}`)
writeFile(t, path, stringsJoin(
"---",
"title: 网关",
"x-i18n:",
" source_hash: test",
" postprocess_version: "+localizedLinkPostprocessPending,
"---",
"",
"See [Troubleshooting](/zh-CN/gateway/troubleshooting).",
))
if err := postprocessLocalizedDocs(docsRoot, "zh-CN", []string{path}); err != nil {
t.Fatalf("postprocessLocalizedDocs failed: %v", err)
}
got := mustReadFile(t, path)
if !strings.Contains(got, " postprocess_version: "+localizedLinkPostprocessVersion) {
t.Fatalf("expected postprocess version marker to be finalized:\n%s", got)
}
if !containsLine(got, "See [Troubleshooting](/zh-CN/gateway/troubleshooting).") {
t.Fatalf("expected localized link to remain unchanged, got:\n%s", got)
}
}
func stringsJoin(lines ...string) string {
result := ""
for i, line := range lines {