docs-i18n: avoid ambiguous body-only wrapper unwrap (#63808)

* docs-i18n: avoid ambiguous body-only wrapper unwrap

* docs: clarify targeted testing tip

* changelog: include docs-i18n follow-up thanks
This commit is contained in:
Mason
2026-04-10 00:01:17 +08:00
committed by GitHub
parent 2954c7235b
commit 164287f056
4 changed files with 29 additions and 9 deletions

View File

@@ -231,14 +231,21 @@ func sanitizeDocChunkProtocolWrappers(source, translated string) string {
return body
}
}
body, ok := stripBodyOnlyWrapper(trimmedTranslated)
body, ok := stripBodyOnlyWrapper(source, trimmedTranslated)
if !ok || strings.TrimSpace(body) == "" {
return translated
}
return body
}
func stripBodyOnlyWrapper(text string) (string, bool) {
func stripBodyOnlyWrapper(source, text string) (string, bool) {
sourceLower := strings.ToLower(source)
// When the source itself documents <body> tokens, a bare body-only payload is
// ambiguous: the trailing </body> can be literal translated content instead of
// a real wrapper close. Keep it for validation/retry instead of truncating.
if strings.Contains(sourceLower, strings.ToLower(bodyTagStart)) || strings.Contains(sourceLower, strings.ToLower(bodyTagEnd)) {
return "", false
}
lower := strings.ToLower(text)
bodyStartLower := strings.ToLower(bodyTagStart)
bodyEndLower := strings.ToLower(bodyTagEnd)

View File

@@ -512,18 +512,15 @@ func TestTranslateDocBodyChunkedStripsUppercaseBodyWrapper(t *testing.T) {
}
}
func TestSanitizeDocChunkProtocolWrappersStripsTopLevelWrapperEvenWhenSourceMentionsBodyTag(t *testing.T) {
func TestSanitizeDocChunkProtocolWrappersKeepsBodyOnlyWrapperWhenSourceMentionsBodyTag(t *testing.T) {
t.Parallel()
source := "Use `<body>` and `</body>` in examples, but keep the paragraph text plain.\n"
translated := "<body>\nTranslated paragraph.\n</body>\n"
got := sanitizeDocChunkProtocolWrappers(source, translated)
if strings.Contains(got, "<body>") || strings.Contains(got, "</body>") {
t.Fatalf("expected top-level wrapper to be stripped, got %q", got)
}
if strings.TrimSpace(got) != "Translated paragraph." {
t.Fatalf("unexpected sanitized body %q", got)
if got != translated {
t.Fatalf("expected ambiguous body-only wrapper to remain unchanged for retry\nwant:\n%s\ngot:\n%s", translated, got)
}
}
@@ -539,6 +536,21 @@ func TestSanitizeDocChunkProtocolWrappersKeepsLegitimateTopLevelBodyBlock(t *tes
}
}
func TestSanitizeDocChunkProtocolWrappersStripsBodyOnlyWrapperWhenSourceHasNoBodyTokens(t *testing.T) {
t.Parallel()
source := "Regular paragraph.\n"
translated := "<body>\nTranslated paragraph.\n</body>\n"
got := sanitizeDocChunkProtocolWrappers(source, translated)
if strings.Contains(got, "<body>") || strings.Contains(got, "</body>") {
t.Fatalf("expected body-only wrapper to be stripped, got %q", got)
}
if strings.TrimSpace(got) != "Translated paragraph." {
t.Fatalf("unexpected sanitized body %q", got)
}
}
func TestSanitizeDocChunkProtocolWrappersKeepsAmbiguousTaggedWrapperForRetry(t *testing.T) {
t.Parallel()