docs-i18n: chunk raw doc translation (#62969)

Merged via squash.

Prepared head SHA: 6a16d66486
Co-authored-by: hxy91819 <8814856+hxy91819@users.noreply.github.com>
Co-authored-by: hxy91819 <8814856+hxy91819@users.noreply.github.com>
Reviewed-by: @hxy91819
This commit is contained in:
Mason
2026-04-09 23:22:16 +08:00
committed by GitHub
parent 635bb35b68
commit 06dea262c4
12 changed files with 2319 additions and 149 deletions

View File

@@ -3,6 +3,7 @@ package main
import (
"context"
"fmt"
"log"
"os"
"path/filepath"
"strings"
@@ -138,17 +139,29 @@ func translateFrontMatter(ctx context.Context, translator docsTranslator, tm *Tr
return nil
}
if summary, ok := data["summary"].(string); ok {
if docsI18nVerboseLogs() {
log.Printf("docs-i18n: frontmatter start %s field=summary bytes=%d", relPath, len(summary))
}
translated, err := translateSnippet(ctx, translator, tm, relPath+":frontmatter:summary", summary, srcLang, tgtLang)
if err != nil {
return err
}
if docsI18nVerboseLogs() {
log.Printf("docs-i18n: frontmatter done %s field=summary out_bytes=%d", relPath, len(translated))
}
data["summary"] = translated
}
if title, ok := data["title"].(string); ok {
if docsI18nVerboseLogs() {
log.Printf("docs-i18n: frontmatter start %s field=title bytes=%d", relPath, len(title))
}
translated, err := translateSnippet(ctx, translator, tm, relPath+":frontmatter:title", title, srcLang, tgtLang)
if err != nil {
return err
}
if docsI18nVerboseLogs() {
log.Printf("docs-i18n: frontmatter done %s field=title out_bytes=%d", relPath, len(translated))
}
data["title"] = translated
}
if readWhen, ok := data["read_when"].([]any); ok {
@@ -159,10 +172,16 @@ func translateFrontMatter(ctx context.Context, translator docsTranslator, tm *Tr
translated = append(translated, item)
continue
}
if docsI18nVerboseLogs() {
log.Printf("docs-i18n: frontmatter start %s field=read_when[%d] bytes=%d", relPath, idx, len(textValue))
}
value, err := translateSnippet(ctx, translator, tm, fmt.Sprintf("%s:frontmatter:read_when:%d", relPath, idx), textValue, srcLang, tgtLang)
if err != nil {
return err
}
if docsI18nVerboseLogs() {
log.Printf("docs-i18n: frontmatter done %s field=read_when[%d] out_bytes=%d", relPath, idx, len(value))
}
translated = append(translated, value)
}
data["read_when"] = translated
@@ -170,6 +189,19 @@ func translateFrontMatter(ctx context.Context, translator docsTranslator, tm *Tr
return nil
}
func docsI18nVerboseLogs() bool {
value := strings.TrimSpace(os.Getenv("OPENCLAW_DOCS_I18N_VERBOSE_LOGS"))
if value == "" {
return false
}
switch strings.ToLower(value) {
case "1", "true", "yes", "on", "debug", "verbose":
return true
default:
return false
}
}
func translateSnippet(ctx context.Context, translator docsTranslator, tm *TranslationMemory, segmentID, textValue, srcLang, tgtLang string) (string, error) {
if strings.TrimSpace(textValue) == "" {
return textValue, nil
@@ -184,6 +216,12 @@ func translateSnippet(ctx context.Context, translator docsTranslator, tm *Transl
if err != nil {
return "", err
}
shouldCache := true
if validationErr := validateFrontmatterScalarTranslation(textValue, translated); validationErr != nil {
log.Printf("docs-i18n: frontmatter fallback %s reason=%v", segmentID, validationErr)
translated = textValue
shouldCache = false
}
entry := TMEntry{
CacheKey: ck,
SegmentID: segmentID,
@@ -197,6 +235,45 @@ func translateSnippet(ctx context.Context, translator docsTranslator, tm *Transl
TgtLang: tgtLang,
UpdatedAt: time.Now().UTC().Format(time.RFC3339),
}
tm.Put(entry)
if shouldCache {
tm.Put(entry)
}
return translated, nil
}
func validateFrontmatterScalarTranslation(source, translated string) error {
trimmed := strings.TrimSpace(translated)
if trimmed == "" {
return fmt.Errorf("empty translation")
}
lower := strings.ToLower(trimmed)
if strings.Contains(lower, "<frontmatter>") || strings.Contains(lower, "</frontmatter>") || strings.Contains(lower, "<body>") || strings.Contains(lower, "</body>") {
return fmt.Errorf("tagged document wrapper detected")
}
if strings.Contains(trimmed, "[[[FM_") {
return fmt.Errorf("frontmatter marker leaked into scalar translation")
}
if strings.Contains(trimmed, "\n---\n") || strings.HasPrefix(trimmed, "---\n") {
return fmt.Errorf("yaml document boundary detected")
}
if !strings.Contains(source, "\n") && strings.Count(trimmed, "\n") >= 3 {
return fmt.Errorf("unexpected multiline expansion")
}
sourceLen := len(strings.TrimSpace(source))
translatedLen := len(trimmed)
if sourceLen > 0 {
limit := sourceLen*8 + 256
if limit < 512 {
limit = 512
}
if translatedLen > limit {
return fmt.Errorf("unexpected size expansion source=%d translated=%d", sourceLen, translatedLen)
}
}
for _, key := range []string{"title:", "summary:", "read_when:"} {
if strings.Contains(lower, "\n"+key) || strings.HasPrefix(lower, key) {
return fmt.Errorf("frontmatter key leaked into scalar translation")
}
}
return nil
}