docs-i18n: avoid ambiguous body-only wrapper unwrap (#63808)

* docs-i18n: avoid ambiguous body-only wrapper unwrap

* docs: clarify targeted testing tip

* changelog: include docs-i18n follow-up thanks
This commit is contained in:
Mason
2026-04-10 00:01:17 +08:00
committed by GitHub
parent 2954c7235b
commit 164287f056
4 changed files with 29 additions and 9 deletions

View File

@@ -231,14 +231,21 @@ func sanitizeDocChunkProtocolWrappers(source, translated string) string {
return body
}
}
body, ok := stripBodyOnlyWrapper(trimmedTranslated)
body, ok := stripBodyOnlyWrapper(source, trimmedTranslated)
if !ok || strings.TrimSpace(body) == "" {
return translated
}
return body
}
func stripBodyOnlyWrapper(text string) (string, bool) {
func stripBodyOnlyWrapper(source, text string) (string, bool) {
sourceLower := strings.ToLower(source)
// When the source itself documents <body> tokens, a bare body-only payload is
// ambiguous: the trailing </body> can be literal translated content instead of
// a real wrapper close. Keep it for validation/retry instead of truncating.
if strings.Contains(sourceLower, strings.ToLower(bodyTagStart)) || strings.Contains(sourceLower, strings.ToLower(bodyTagEnd)) {
return "", false
}
lower := strings.ToLower(text)
bodyStartLower := strings.ToLower(bodyTagStart)
bodyEndLower := strings.ToLower(bodyTagEnd)