mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-23 10:58:09 +00:00
Summary: - Merged fix(docs): finalize i18n postprocess before skip after ClawSweeper review. Automerge notes: - No ClawSweeper repair was needed after automerge opt-in. Validation: - ClawSweeper review passed for headad79445835. - Required merge gates passed before the squash merge. Prepared head SHA:ad79445835Review: https://github.com/openclaw/openclaw/pull/92668#issuecomment-4698629026 Co-authored-by: Mason Huang <masonxhuang@tencent.com> Co-authored-by: clawsweeper[bot] <274271284+clawsweeper[bot]@users.noreply.github.com> Approved-by: hxy91819 Co-authored-by: hxy91819 <8814856+hxy91819@users.noreply.github.com>
243 lines
6.6 KiB
Go
243 lines
6.6 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"gopkg.in/yaml.v3"
|
|
)
|
|
|
|
const (
|
|
frontmatterTagStart = "<frontmatter>"
|
|
frontmatterTagEnd = "</frontmatter>"
|
|
bodyTagStart = "<body>"
|
|
bodyTagEnd = "</body>"
|
|
)
|
|
|
|
type docOutputStatus int
|
|
|
|
const (
|
|
docOutputNeedsTranslation docOutputStatus = iota
|
|
docOutputReady
|
|
docOutputNeedsPostprocess
|
|
)
|
|
|
|
func processFileDoc(ctx context.Context, translator docsTranslator, docsRoot, filePath, srcLang, tgtLang string, overwrite bool) (bool, string, error) {
|
|
absPath, relPath, err := resolveDocsPath(docsRoot, filePath)
|
|
if err != nil {
|
|
return false, "", err
|
|
}
|
|
|
|
content, err := os.ReadFile(absPath)
|
|
if err != nil {
|
|
return false, "", err
|
|
}
|
|
currentHash := hashBytes(content)
|
|
|
|
outputPath := filepath.Join(docsRoot, tgtLang, relPath)
|
|
if !overwrite {
|
|
status, err := classifyDocOutput(outputPath, currentHash, tgtLang)
|
|
if err != nil {
|
|
return false, "", err
|
|
}
|
|
switch status {
|
|
case docOutputReady:
|
|
return true, "", nil
|
|
case docOutputNeedsPostprocess:
|
|
return true, outputPath, nil
|
|
}
|
|
}
|
|
|
|
sourceFront, sourceBody := splitFrontMatter(string(content))
|
|
frontData := map[string]any{}
|
|
if strings.TrimSpace(sourceFront) != "" {
|
|
if err := yaml.Unmarshal([]byte(sourceFront), &frontData); err != nil {
|
|
return false, "", fmt.Errorf("frontmatter parse failed for %s: %w", relPath, err)
|
|
}
|
|
}
|
|
docTM := &TranslationMemory{entries: map[string]TMEntry{}}
|
|
if err := translateFrontMatter(ctx, translator, docTM, frontData, relPath, srcLang, tgtLang); err != nil {
|
|
return false, "", fmt.Errorf("frontmatter translation failed for %s: %w", relPath, err)
|
|
}
|
|
updatedFront, err := encodeFrontMatter(frontData, relPath, content)
|
|
if err != nil {
|
|
return false, "", err
|
|
}
|
|
translatedBody, err := translateDocBodyChunked(ctx, translator, relPath, sourceBody, srcLang, tgtLang)
|
|
if err != nil {
|
|
return false, "", fmt.Errorf("body translate failed for %s: %w", relPath, err)
|
|
}
|
|
|
|
if err := os.MkdirAll(filepath.Dir(outputPath), 0o755); err != nil {
|
|
return false, "", err
|
|
}
|
|
|
|
output := updatedFront + translatedBody
|
|
return false, outputPath, os.WriteFile(outputPath, []byte(output), 0o644)
|
|
}
|
|
|
|
func formatTaggedDocument(frontMatter, body string) string {
|
|
return fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s", frontmatterTagStart, frontMatter, frontmatterTagEnd, bodyTagStart, body, bodyTagEnd)
|
|
}
|
|
|
|
func parseTaggedDocument(text string) (string, string, error) {
|
|
frontStart := strings.Index(text, frontmatterTagStart)
|
|
if frontStart == -1 {
|
|
return "", "", fmt.Errorf("missing %s", frontmatterTagStart)
|
|
}
|
|
frontStart += len(frontmatterTagStart)
|
|
frontEnd := strings.Index(text[frontStart:], frontmatterTagEnd)
|
|
if frontEnd == -1 {
|
|
return "", "", fmt.Errorf("missing %s", frontmatterTagEnd)
|
|
}
|
|
frontEnd += frontStart
|
|
|
|
bodyStart := strings.Index(text[frontEnd:], bodyTagStart)
|
|
if bodyStart == -1 {
|
|
return "", "", fmt.Errorf("missing %s", bodyTagStart)
|
|
}
|
|
bodyStart += frontEnd + len(bodyTagStart)
|
|
|
|
bodyEnd := findTaggedBodyEnd(text, bodyStart)
|
|
if bodyEnd == -1 {
|
|
return "", "", fmt.Errorf("missing %s", bodyTagEnd)
|
|
}
|
|
body := trimTagNewlines(text[bodyStart:bodyEnd])
|
|
suffix := strings.TrimSpace(text[bodyEnd+len(bodyTagEnd):])
|
|
|
|
prefix := strings.TrimSpace(text[:frontStart-len(frontmatterTagStart)])
|
|
if prefix != "" || suffix != "" {
|
|
return "", "", fmt.Errorf("unexpected text outside tagged sections")
|
|
}
|
|
|
|
frontMatter := trimTagNewlines(text[frontStart:frontEnd])
|
|
return frontMatter, body, nil
|
|
}
|
|
|
|
func findTaggedBodyEnd(text string, bodyStart int) int {
|
|
if bodyStart < 0 || bodyStart > len(text) {
|
|
return -1
|
|
}
|
|
search := text[bodyStart:]
|
|
candidate := -1
|
|
offset := 0
|
|
for {
|
|
index := strings.Index(search[offset:], bodyTagEnd)
|
|
if index == -1 {
|
|
return candidate
|
|
}
|
|
index += offset
|
|
absolute := bodyStart + index
|
|
suffix := strings.TrimSpace(text[absolute+len(bodyTagEnd):])
|
|
if suffix == "" {
|
|
candidate = absolute
|
|
}
|
|
offset = index + len(bodyTagEnd)
|
|
if offset >= len(search) {
|
|
return candidate
|
|
}
|
|
}
|
|
}
|
|
|
|
func trimTagNewlines(value string) string {
|
|
value = strings.TrimPrefix(value, "\n")
|
|
value = strings.TrimSuffix(value, "\n")
|
|
return value
|
|
}
|
|
|
|
func classifyDocOutput(outputPath string, sourceHash string, targetLang string) (docOutputStatus, error) {
|
|
data, err := os.ReadFile(outputPath)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
return docOutputNeedsTranslation, nil
|
|
}
|
|
return docOutputNeedsTranslation, err
|
|
}
|
|
frontMatter, _ := splitFrontMatter(string(data))
|
|
if frontMatter == "" {
|
|
return docOutputNeedsTranslation, nil
|
|
}
|
|
frontData := map[string]any{}
|
|
if err := yaml.Unmarshal([]byte(frontMatter), &frontData); err != nil {
|
|
return docOutputNeedsTranslation, nil
|
|
}
|
|
storedHash := extractSourceHash(frontData)
|
|
if storedHash == "" {
|
|
return docOutputNeedsTranslation, nil
|
|
}
|
|
if !strings.EqualFold(storedHash, sourceHash) {
|
|
return docOutputNeedsTranslation, nil
|
|
}
|
|
if strings.EqualFold(strings.TrimSpace(targetLang), "en") {
|
|
return docOutputReady, nil
|
|
}
|
|
|
|
postprocessVersion := extractPostprocessVersion(frontData)
|
|
if strings.EqualFold(postprocessVersion, localizedLinkPostprocessVersion) {
|
|
return docOutputReady, nil
|
|
}
|
|
return docOutputNeedsPostprocess, nil
|
|
}
|
|
|
|
func extractSourceHash(frontData map[string]any) string {
|
|
xi, ok := extractXI18N(frontData)
|
|
if !ok {
|
|
return ""
|
|
}
|
|
value, ok := xi["source_hash"].(string)
|
|
if !ok {
|
|
return ""
|
|
}
|
|
return strings.TrimSpace(value)
|
|
}
|
|
|
|
func extractPostprocessVersion(frontData map[string]any) string {
|
|
xi, ok := extractXI18N(frontData)
|
|
if !ok {
|
|
return ""
|
|
}
|
|
value, ok := xi["postprocess_version"].(string)
|
|
if !ok {
|
|
return ""
|
|
}
|
|
return strings.TrimSpace(value)
|
|
}
|
|
|
|
func extractXI18N(frontData map[string]any) (map[string]any, bool) {
|
|
xi, ok := frontData["x-i18n"].(map[string]any)
|
|
if ok {
|
|
return xi, true
|
|
}
|
|
return nil, false
|
|
}
|
|
|
|
func logDocChunkPlan(relPath string, blocks []string, groups [][]string) {
|
|
totalBytes := 0
|
|
for _, block := range blocks {
|
|
totalBytes += len(block)
|
|
}
|
|
log.Printf("docs-i18n: body-chunks %s blocks=%d groups=%d bytes=%d", relPath, len(blocks), len(groups), totalBytes)
|
|
}
|
|
|
|
func resolveDocsPath(docsRoot, filePath string) (string, string, error) {
|
|
absPath, err := filepath.Abs(filePath)
|
|
if err != nil {
|
|
return "", "", err
|
|
}
|
|
relPath, err := filepath.Rel(docsRoot, absPath)
|
|
if err != nil {
|
|
return "", "", err
|
|
}
|
|
if relPath == "." || relPath == "" {
|
|
return "", "", fmt.Errorf("file %s resolves to docs root %s", absPath, docsRoot)
|
|
}
|
|
if filepath.IsAbs(relPath) || relPath == ".." || strings.HasPrefix(relPath, ".."+string(filepath.Separator)) {
|
|
return "", "", fmt.Errorf("file %s not under docs root %s", absPath, docsRoot)
|
|
}
|
|
return absPath, relPath, nil
|
|
}
|