From eaeedbf1f96bad726d708cb713e5916e42fddfa3 Mon Sep 17 00:00:00 2001 From: Mason Huang Date: Sat, 13 Jun 2026 21:17:03 +0800 Subject: [PATCH] fix(docs): finalize i18n postprocess before skip (#92668) Summary: - Merged fix(docs): finalize i18n postprocess before skip after ClawSweeper review. Automerge notes: - No ClawSweeper repair was needed after automerge opt-in. Validation: - ClawSweeper review passed for head ad79445835f69493d2bb89b9ae3094c62db455a3. - Required merge gates passed before the squash merge. Prepared head SHA: ad79445835f69493d2bb89b9ae3094c62db455a3 Review: https://github.com/openclaw/openclaw/pull/92668#issuecomment-4698629026 Co-authored-by: Mason Huang Co-authored-by: clawsweeper[bot] <274271284+clawsweeper[bot]@users.noreply.github.com> Approved-by: hxy91819 Co-authored-by: hxy91819 <8814856+hxy91819@users.noreply.github.com> --- scripts/docs-i18n/doc_mode.go | 62 +++++++-- scripts/docs-i18n/main.go | 43 ++++-- scripts/docs-i18n/main_test.go | 195 +++++++++++++++++++++++++++ scripts/docs-i18n/process.go | 18 ++- scripts/docs-i18n/relocalize.go | 60 ++++++++- scripts/docs-i18n/relocalize_test.go | 32 ++++- 6 files changed, 378 insertions(+), 32 deletions(-) diff --git a/scripts/docs-i18n/doc_mode.go b/scripts/docs-i18n/doc_mode.go index 6d62b4ab0fa..7da6eb149fe 100644 --- a/scripts/docs-i18n/doc_mode.go +++ b/scripts/docs-i18n/doc_mode.go @@ -18,6 +18,14 @@ const ( bodyTagEnd = "" ) +type docOutputStatus int + +const ( + docOutputNeedsTranslation docOutputStatus = iota + docOutputReady + docOutputNeedsPostprocess +) + func processFileDoc(ctx context.Context, translator docsTranslator, docsRoot, filePath, srcLang, tgtLang string, overwrite bool) (bool, string, error) { absPath, relPath, err := resolveDocsPath(docsRoot, filePath) if err != nil { @@ -32,12 +40,15 @@ func processFileDoc(ctx context.Context, translator docsTranslator, docsRoot, fi outputPath := filepath.Join(docsRoot, tgtLang, relPath) if !overwrite { - skip, err := shouldSkipDoc(outputPath, currentHash) + status, err := classifyDocOutput(outputPath, currentHash, tgtLang) if err != nil { return false, "", err } - if skip { + switch status { + case docOutputReady: return true, "", nil + case docOutputNeedsPostprocess: + return true, outputPath, nil } } @@ -138,31 +149,42 @@ func trimTagNewlines(value string) string { return value } -func shouldSkipDoc(outputPath string, sourceHash string) (bool, error) { +func classifyDocOutput(outputPath string, sourceHash string, targetLang string) (docOutputStatus, error) { data, err := os.ReadFile(outputPath) if err != nil { if os.IsNotExist(err) { - return false, nil + return docOutputNeedsTranslation, nil } - return false, err + return docOutputNeedsTranslation, err } frontMatter, _ := splitFrontMatter(string(data)) if frontMatter == "" { - return false, nil + return docOutputNeedsTranslation, nil } frontData := map[string]any{} if err := yaml.Unmarshal([]byte(frontMatter), &frontData); err != nil { - return false, nil + return docOutputNeedsTranslation, nil } storedHash := extractSourceHash(frontData) if storedHash == "" { - return false, nil + return docOutputNeedsTranslation, nil } - return strings.EqualFold(storedHash, sourceHash), nil + if !strings.EqualFold(storedHash, sourceHash) { + return docOutputNeedsTranslation, nil + } + if strings.EqualFold(strings.TrimSpace(targetLang), "en") { + return docOutputReady, nil + } + + postprocessVersion := extractPostprocessVersion(frontData) + if strings.EqualFold(postprocessVersion, localizedLinkPostprocessVersion) { + return docOutputReady, nil + } + return docOutputNeedsPostprocess, nil } func extractSourceHash(frontData map[string]any) string { - xi, ok := frontData["x-i18n"].(map[string]any) + xi, ok := extractXI18N(frontData) if !ok { return "" } @@ -173,6 +195,26 @@ func extractSourceHash(frontData map[string]any) string { return strings.TrimSpace(value) } +func extractPostprocessVersion(frontData map[string]any) string { + xi, ok := extractXI18N(frontData) + if !ok { + return "" + } + value, ok := xi["postprocess_version"].(string) + if !ok { + return "" + } + return strings.TrimSpace(value) +} + +func extractXI18N(frontData map[string]any) (map[string]any, bool) { + xi, ok := frontData["x-i18n"].(map[string]any) + if ok { + return xi, true + } + return nil, false +} + func logDocChunkPlan(relPath string, blocks []string, groups [][]string) { totalBytes := 0 for _, block := range blocks { diff --git a/scripts/docs-i18n/main.go b/scripts/docs-i18n/main.go index 087d763ac69..89dd24d281f 100644 --- a/scripts/docs-i18n/main.go +++ b/scripts/docs-i18n/main.go @@ -109,15 +109,17 @@ func runDocsI18N(ctx context.Context, cfg runConfig, files []string, newTranslat } totalFiles := len(ordered) preSkipped := 0 + prePostprocessFiles := []string{} if cfg.mode == "doc" && !cfg.overwrite { - filtered, skipped, err := filterDocQueue(resolvedDocsRoot, cfg.targetLang, ordered) + filtered, skipped, existingOutputs, err := filterDocQueue(resolvedDocsRoot, cfg.targetLang, ordered, cfg.maxFiles) if err != nil { return err } ordered = filtered preSkipped = skipped + prePostprocessFiles = append(prePostprocessFiles, existingOutputs...) } - if cfg.maxFiles > 0 && cfg.maxFiles < len(ordered) { + if (cfg.mode != "doc" || cfg.overwrite) && cfg.maxFiles > 0 && cfg.maxFiles < len(ordered) { ordered = ordered[:cfg.maxFiles] } @@ -130,7 +132,7 @@ func runDocsI18N(ctx context.Context, cfg runConfig, files []string, newTranslat start := time.Now() processed := 0 skipped := 0 - localizedFiles := []string{} + localizedFiles := append([]string{}, prePostprocessFiles...) var translationErr error log.Printf("docs-i18n: mode=%s total=%d pending=%d pre_skipped=%d overwrite=%t thinking=%s parallel=%d", cfg.mode, totalFiles, len(ordered), preSkipped, cfg.overwrite, cfg.thinking, parallel) @@ -217,6 +219,9 @@ func runDocSequential(ctx context.Context, ordered []string, translator docsTran } if skip { skipped++ + if outputPath != "" { + outputs = append(outputs, outputPath) + } log.Printf("docs-i18n: [%d/%d] skipped %s (%s)", index+1, len(ordered), relPath, time.Since(start).Round(time.Millisecond)) } else { processed++ @@ -294,6 +299,9 @@ func runDocParallel(ctx context.Context, ordered []string, docsRoot, srcLang, tg } if result.skipped { skipped++ + if result.output != "" { + outputs = append(outputs, result.output) + } log.Printf("docs-i18n: [w* %d/%d] skipped %s (%s)", result.index, len(ordered), result.rel, result.duration.Round(time.Millisecond)) } else if result.err != nil { log.Printf("docs-i18n: [w* %d/%d] failed %s (%s): %v", result.index, len(ordered), result.rel, result.duration.Round(time.Millisecond), result.err) @@ -339,29 +347,40 @@ func resolveRelPath(docsRoot, file string) string { return relPath } -func filterDocQueue(docsRoot, targetLang string, ordered []string) ([]string, int, error) { +func filterDocQueue(docsRoot, targetLang string, ordered []string, maxFiles int) ([]string, int, []string, error) { pending := make([]string, 0, len(ordered)) + existingOutputs := []string{} skipped := 0 for _, file := range ordered { absPath, relPath, err := resolveDocsPath(docsRoot, file) if err != nil { - return nil, skipped, err + return nil, skipped, nil, err } content, err := os.ReadFile(absPath) if err != nil { - return nil, skipped, err + return nil, skipped, nil, err } sourceHash := hashBytes(content) outputPath := filepath.Join(docsRoot, targetLang, relPath) - skip, err := shouldSkipDoc(outputPath, sourceHash) + status, err := classifyDocOutput(outputPath, sourceHash, targetLang) if err != nil { - return nil, skipped, err + return nil, skipped, nil, err } - if skip { + switch status { + case docOutputReady: skipped++ - continue + case docOutputNeedsPostprocess: + if maxFiles > 0 && len(pending)+len(existingOutputs) >= maxFiles { + continue + } + skipped++ + existingOutputs = append(existingOutputs, outputPath) + case docOutputNeedsTranslation: + if maxFiles > 0 && len(pending)+len(existingOutputs) >= maxFiles { + continue + } + pending = append(pending, file) } - pending = append(pending, file) } - return pending, skipped, nil + return pending, skipped, existingOutputs, nil } diff --git a/scripts/docs-i18n/main_test.go b/scripts/docs-i18n/main_test.go index 5d08d33ab7e..57f91fd95e9 100644 --- a/scripts/docs-i18n/main_test.go +++ b/scripts/docs-i18n/main_test.go @@ -219,6 +219,201 @@ func TestRunDocsI18NRewritesFinalLocalizedPageLinks(t *testing.T) { } } +func TestRunDocsI18NDoesNotSkipOutputAfterPostprocessFailure(t *testing.T) { + t.Parallel() + + docsRoot := t.TempDir() + sourcePath := filepath.Join(docsRoot, "gateway", "index.md") + writeFile(t, sourcePath, stringsJoin( + "---", + "title: Gateway", + "---", + "", + "See [Troubleshooting](/gateway/troubleshooting).", + )) + + skip, outputPath, err := processFileDoc(context.Background(), fakeDocsTranslator{}, docsRoot, sourcePath, "en", "zh-CN", true) + if err != nil { + t.Fatalf("processFileDoc failed: %v", err) + } + if skip { + t.Fatal("processFileDoc unexpectedly skipped translation") + } + if err := postprocessLocalizedDocs(docsRoot, "zh-CN", []string{outputPath}); err == nil { + t.Fatal("expected missing docs.json to fail postprocess") + } + + sourceBytes, err := os.ReadFile(sourcePath) + if err != nil { + t.Fatalf("read source failed: %v", err) + } + status, err := classifyDocOutput(outputPath, hashBytes(sourceBytes), "zh-CN") + if err != nil { + t.Fatalf("classifyDocOutput failed: %v", err) + } + if status != docOutputNeedsPostprocess { + t.Fatalf("expected failed-postprocess output to need postprocess, got %v", status) + } +} + +func TestRunDocsI18NOnlyBecomesSkippableAfterPostprocessSucceeds(t *testing.T) { + t.Parallel() + + docsRoot := t.TempDir() + writeFile(t, filepath.Join(docsRoot, ".i18n", "glossary.zh-CN.json"), "[]") + writeFile(t, filepath.Join(docsRoot, "docs.json"), `{"redirects":[]}`) + sourcePath := filepath.Join(docsRoot, "gateway", "index.md") + writeFile(t, sourcePath, stringsJoin( + "---", + "title: Gateway", + "---", + "", + "See [Troubleshooting](/gateway/troubleshooting).", + )) + writeFile(t, filepath.Join(docsRoot, "gateway", "troubleshooting.md"), "# Troubleshooting\n") + + skip, outputPath, err := processFileDoc(context.Background(), fakeDocsTranslator{}, docsRoot, sourcePath, "en", "zh-CN", true) + if err != nil { + t.Fatalf("processFileDoc failed: %v", err) + } + if skip { + t.Fatal("processFileDoc unexpectedly skipped translation") + } + + sourceBytes, err := os.ReadFile(sourcePath) + if err != nil { + t.Fatalf("read source failed: %v", err) + } + status, err := classifyDocOutput(outputPath, hashBytes(sourceBytes), "zh-CN") + if err != nil { + t.Fatalf("classifyDocOutput before postprocess failed: %v", err) + } + if status != docOutputNeedsPostprocess { + t.Fatalf("expected pending postprocess output to need postprocess, got %v", status) + } + + if err := postprocessLocalizedDocs(docsRoot, "zh-CN", []string{outputPath}); err != nil { + t.Fatalf("postprocessLocalizedDocs failed: %v", err) + } + + status, err = classifyDocOutput(outputPath, hashBytes(sourceBytes), "zh-CN") + if err != nil { + t.Fatalf("classifyDocOutput after postprocess failed: %v", err) + } + if status != docOutputReady { + t.Fatalf("expected postprocessed output to be ready, got %v:\n%s", status, mustReadFile(t, outputPath)) + } +} + +func TestClassifyDocOutputKeepsEnglishTargetsHashOnly(t *testing.T) { + t.Parallel() + + docsRoot := t.TempDir() + sourcePath := filepath.Join(docsRoot, "gateway", "index.md") + writeFile(t, sourcePath, stringsJoin( + "---", + "title: Gateway", + "---", + "", + "See [Troubleshooting](/gateway/troubleshooting).", + )) + outputPath := filepath.Join(docsRoot, "en", "gateway", "index.md") + writeFile(t, outputPath, stringsJoin( + "---", + "title: Gateway", + "x-i18n:", + " source_hash: "+hashBytes([]byte(mustReadFile(t, sourcePath))), + " postprocess_version: "+localizedLinkPostprocessPending, + "---", + "", + "See [Troubleshooting](/gateway/troubleshooting).", + )) + + status, err := classifyDocOutput(outputPath, hashBytes([]byte(mustReadFile(t, sourcePath))), "en") + if err != nil { + t.Fatalf("classifyDocOutput for English target failed: %v", err) + } + if status != docOutputReady { + t.Fatalf("expected English target to remain ready with matching source hash, got %v", status) + } +} + +func TestFilterDocQueueSchedulesLegacyOutputsForPostprocessOnly(t *testing.T) { + t.Parallel() + + docsRoot := t.TempDir() + sourcePath := filepath.Join(docsRoot, "gateway", "index.md") + writeFile(t, sourcePath, "# Gateway\n") + outputPath := filepath.Join(docsRoot, "zh-CN", "gateway", "index.md") + writeFile(t, outputPath, stringsJoin( + "---", + "title: 网关", + "x-i18n:", + " source_hash: "+hashBytes([]byte(mustReadFile(t, sourcePath))), + "---", + "", + "See [Troubleshooting](/gateway/troubleshooting).", + )) + + pending, skipped, existingOutputs, err := filterDocQueue(docsRoot, "zh-CN", []string{sourcePath}, 0) + if err != nil { + t.Fatalf("filterDocQueue failed: %v", err) + } + if len(pending) != 0 { + t.Fatalf("expected legacy matching output to skip translation, got pending=%v", pending) + } + if skipped != 1 { + t.Fatalf("expected one skipped translation, got %d", skipped) + } + if len(existingOutputs) != 1 || existingOutputs[0] != outputPath { + t.Fatalf("expected existing output to be queued for postprocess, got %v", existingOutputs) + } +} + +func TestFilterDocQueueHonorsMaxAcrossPostprocessOutputs(t *testing.T) { + t.Parallel() + + docsRoot := t.TempDir() + firstSource := filepath.Join(docsRoot, "gateway", "index.md") + secondSource := filepath.Join(docsRoot, "providers", "example-provider.md") + writeFile(t, firstSource, "# Gateway\n") + writeFile(t, secondSource, "# Example provider\n") + firstOutput := filepath.Join(docsRoot, "zh-CN", "gateway", "index.md") + secondOutput := filepath.Join(docsRoot, "zh-CN", "providers", "example-provider.md") + writeFile(t, firstOutput, stringsJoin( + "---", + "title: 网关", + "x-i18n:", + " source_hash: "+hashBytes([]byte(mustReadFile(t, firstSource))), + "---", + "", + "# 网关", + )) + writeFile(t, secondOutput, stringsJoin( + "---", + "title: 示例 provider", + "x-i18n:", + " source_hash: "+hashBytes([]byte(mustReadFile(t, secondSource))), + "---", + "", + "# 示例 provider", + )) + + pending, skipped, existingOutputs, err := filterDocQueue(docsRoot, "zh-CN", []string{firstSource, secondSource}, 1) + if err != nil { + t.Fatalf("filterDocQueue failed: %v", err) + } + if len(pending) != 0 { + t.Fatalf("expected no translations to be queued, got %v", pending) + } + if skipped != 1 { + t.Fatalf("expected one bounded postprocess-only skip, got %d", skipped) + } + if len(existingOutputs) != 1 || existingOutputs[0] != firstOutput { + t.Fatalf("expected only first output to be queued for postprocess, got %v", existingOutputs) + } +} + func TestRunDocsI18NAllowPartialKeepsEarlierSuccessfulDocOutputs(t *testing.T) { t.Parallel() diff --git a/scripts/docs-i18n/process.go b/scripts/docs-i18n/process.go index af1389b4597..b598354e6c9 100644 --- a/scripts/docs-i18n/process.go +++ b/scripts/docs-i18n/process.go @@ -12,6 +12,11 @@ import ( "gopkg.in/yaml.v3" ) +const ( + localizedLinkPostprocessPending = "pending" + localizedLinkPostprocessVersion = "locale-links-v1" +) + func processFile(ctx context.Context, translator docsTranslator, tm *TranslationMemory, docsRoot, filePath, srcLang, tgtLang string) (bool, string, error) { absPath, relPath, err := resolveDocsPath(docsRoot, filePath) if err != nil { @@ -120,12 +125,13 @@ func encodeFrontMatter(frontData map[string]any, relPath string, source []byte) frontData = map[string]any{} } frontData["x-i18n"] = map[string]any{ - "source_path": relPath, - "source_hash": hashBytes(source), - "provider": docsI18nProvider(), - "model": docsI18nModel(), - "workflow": workflowVersion, - "generated_at": time.Now().UTC().Format(time.RFC3339), + "source_path": relPath, + "source_hash": hashBytes(source), + "provider": docsI18nProvider(), + "model": docsI18nModel(), + "workflow": workflowVersion, + "generated_at": time.Now().UTC().Format(time.RFC3339), + "postprocess_version": localizedLinkPostprocessPending, } encoded, err := yaml.Marshal(frontData) if err != nil { diff --git a/scripts/docs-i18n/relocalize.go b/scripts/docs-i18n/relocalize.go index 001088b2c3e..a539e3eb6a6 100644 --- a/scripts/docs-i18n/relocalize.go +++ b/scripts/docs-i18n/relocalize.go @@ -2,6 +2,7 @@ package main import ( "os" + "strings" ) func postprocessLocalizedDocs(docsRoot, targetLang string, localizedFiles []string) error { @@ -22,13 +23,14 @@ func postprocessLocalizedDocs(docsRoot, targetLang string, localizedFiles []stri frontMatter, body := splitFrontMatter(string(content)) rewrittenBody := routes.localizeBodyLinks(body) - if rewrittenBody == body { + updatedFrontMatter := setPostprocessVersion(frontMatter, localizedLinkPostprocessVersion) + if rewrittenBody == body && updatedFrontMatter == frontMatter { continue } output := rewrittenBody - if frontMatter != "" { - output = "---\n" + frontMatter + "\n---\n\n" + rewrittenBody + if updatedFrontMatter != "" { + output = "---\n" + updatedFrontMatter + "\n---\n\n" + rewrittenBody } if err := os.WriteFile(path, []byte(output), 0o644); err != nil { @@ -38,3 +40,55 @@ func postprocessLocalizedDocs(docsRoot, targetLang string, localizedFiles []stri return nil } + +func setPostprocessVersion(frontMatter, version string) string { + if strings.TrimSpace(frontMatter) == "" { + return frontMatter + } + + lines := strings.Split(frontMatter, "\n") + inXI18N := false + xi18nLine := -1 + insertAt := -1 + childIndent := " " + + for i, line := range lines { + trimmed := strings.TrimSpace(line) + if trimmed == "x-i18n:" { + inXI18N = true + xi18nLine = i + insertAt = len(lines) + continue + } + if !inXI18N { + continue + } + if trimmed == "" { + continue + } + indent := leadingWhitespace(line) + if len(indent) <= len(leadingWhitespace(lines[xi18nLine])) { + insertAt = i + break + } + childIndent = indent + if strings.HasPrefix(trimmed, "postprocess_version:") { + lines[i] = indent + "postprocess_version: " + version + return strings.Join(lines, "\n") + } + } + + if xi18nLine == -1 { + return frontMatter + } + if insertAt == -1 { + insertAt = len(lines) + } + + lines = append(lines[:insertAt], append([]string{childIndent + "postprocess_version: " + version}, lines[insertAt:]...)...) + return strings.Join(lines, "\n") +} + +func leadingWhitespace(text string) string { + return text[:len(text)-len(strings.TrimLeft(text, " \t"))] +} diff --git a/scripts/docs-i18n/relocalize_test.go b/scripts/docs-i18n/relocalize_test.go index be78e49ce53..9805aa92acc 100644 --- a/scripts/docs-i18n/relocalize_test.go +++ b/scripts/docs-i18n/relocalize_test.go @@ -41,7 +41,7 @@ func TestPostprocessLocalizedDocsFixesStaleLinksAfterLaterPagesExist(t *testing. } got := mustReadFile(t, filepath.Join(docsRoot, "zh-CN", "gateway", "index.md")) - if !strings.Contains(got, "---\ntitle: 网关\nx-i18n:\n source_hash: test\n---\n\n") { + if !strings.Contains(got, "---\ntitle: 网关\nx-i18n:\n source_hash: test\n postprocess_version: "+localizedLinkPostprocessVersion+"\n---\n\n") { t.Fatalf("front matter corrupted after rewrite:\n%s", got) } want := "See [Troubleshooting](/zh-CN/gateway/troubleshooting)." @@ -231,6 +231,36 @@ func TestPostprocessLocalizedDocsContinuesAfterUnchangedFile(t *testing.T) { } } +func TestPostprocessLocalizedDocsFinalizesPostprocessVersionWithoutBodyRewrite(t *testing.T) { + t.Parallel() + + docsRoot := t.TempDir() + path := filepath.Join(docsRoot, "zh-CN", "gateway", "index.md") + writeFile(t, filepath.Join(docsRoot, "docs.json"), `{"redirects":[]}`) + writeFile(t, path, stringsJoin( + "---", + "title: 网关", + "x-i18n:", + " source_hash: test", + " postprocess_version: "+localizedLinkPostprocessPending, + "---", + "", + "See [Troubleshooting](/zh-CN/gateway/troubleshooting).", + )) + + if err := postprocessLocalizedDocs(docsRoot, "zh-CN", []string{path}); err != nil { + t.Fatalf("postprocessLocalizedDocs failed: %v", err) + } + + got := mustReadFile(t, path) + if !strings.Contains(got, " postprocess_version: "+localizedLinkPostprocessVersion) { + t.Fatalf("expected postprocess version marker to be finalized:\n%s", got) + } + if !containsLine(got, "See [Troubleshooting](/zh-CN/gateway/troubleshooting).") { + t.Fatalf("expected localized link to remain unchanged, got:\n%s", got) + } +} + func stringsJoin(lines ...string) string { result := "" for i, line := range lines {