From f2dc241e9db774a6856b1a2a94c2dc9cb6c7dc60 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 5 Apr 2026 11:02:32 +0100 Subject: [PATCH] docs: harden zh-CN translation flow --- docs/.i18n/README.md | 50 +++++++++++++++++++++++++----- scripts/docs-i18n/doc_mode.go | 18 +++++++---- scripts/docs-i18n/doc_mode_test.go | 31 ++++++++++++++++++ 3 files changed, 86 insertions(+), 13 deletions(-) create mode 100644 scripts/docs-i18n/doc_mode_test.go diff --git a/docs/.i18n/README.md b/docs/.i18n/README.md index f50136c89f2..102a7b50e16 100644 --- a/docs/.i18n/README.md +++ b/docs/.i18n/README.md @@ -2,12 +2,38 @@ This folder stores translation config for the source docs repo. -Generated zh-CN pages and the live zh-CN translation memory now live in the publish repo (`openclaw/docs`, local sibling checkout `~/Projects/openclaw-docs`). +Generated zh-CN pages and the live zh-CN translation memory live in the publish repo: -## Files +- repo: `openclaw/docs` +- local checkout: `~/Projects/openclaw-docs` -- `glossary..json` — preferred term mappings (used in prompt guidance). -- `.tm.jsonl` — translation memory (cache) keyed by workflow + model + text hash. In this repo, zh-CN TM is no longer committed. +## Source of truth + +- English docs are authored in `openclaw/openclaw`. +- The source docs tree lives under `docs/`. +- The source repo no longer keeps committed `docs/zh-CN/**`. + +## End-to-end flow + +1. Edit English docs in `openclaw/openclaw`. +2. Push to `main`. +3. `openclaw/openclaw/.github/workflows/docs-sync-publish.yml` mirrors the docs tree into `openclaw/docs`. +4. The sync script rewrites the publish `docs/docs.json` so `zh-Hans` navigation exists there even though it is no longer committed in the source repo. +5. `openclaw/docs/.github/workflows/translate-zh-cn.yml` refreshes `docs/zh-CN/**` on push and hourly. + +## Why the split exists + +- Keep generated zh-CN output out of the main product repo. +- Keep Mintlify on a single published docs tree. +- Preserve the built-in language switcher by letting the publish repo own `docs/zh-CN/**`. + +## Files in this folder + +- `glossary..json` — preferred term mappings used as prompt guidance. +- `zh-Hans-navigation.json` — the `zh-Hans` Mintlify nav block reinserted into the publish repo during sync. +- `.tm.jsonl` — translation memory keyed by workflow + model + text hash. + +In this repo, `docs/.i18n/zh-CN.tm.jsonl` is intentionally no longer committed. ## Glossary format @@ -27,8 +53,18 @@ Fields: - `source`: English (or source) phrase to prefer. - `target`: preferred translation output. -## Notes +## Translation mechanics -- Glossary entries are passed to the model as **prompt guidance** (no deterministic rewrites). - `scripts/docs-i18n` still owns translation generation. -- The source repo syncs English docs into the publish repo; zh-CN generation runs there on push and hourly. +- Doc mode writes `x-i18n.source_hash` into each translated page. +- The publish workflow precomputes a pending file list by comparing the current English source hash to the stored zh-CN `x-i18n.source_hash`. +- If the pending count is `0`, the expensive translation step is skipped entirely. +- If there are pending files, the workflow translates only those files. +- The publish workflow retries transient model-format failures, but unchanged files stay skipped because the same hash check runs on each retry. + +## Operational notes + +- Sync metadata is written to `.openclaw-sync/source.json` in the publish repo. +- Source repo secret: `OPENCLAW_DOCS_SYNC_TOKEN` +- Publish repo secret: `OPENCLAW_DOCS_I18N_OPENAI_API_KEY` +- If zh-CN output looks stale, check the `Translate zh-CN` workflow in `openclaw/docs` first. diff --git a/scripts/docs-i18n/doc_mode.go b/scripts/docs-i18n/doc_mode.go index ad71bdb2b08..39c8718723c 100644 --- a/scripts/docs-i18n/doc_mode.go +++ b/scripts/docs-i18n/doc_mode.go @@ -101,20 +101,26 @@ func parseTaggedDocument(text string) (string, string, error) { return "", "", fmt.Errorf("missing %s", bodyTagStart) } bodyStart += frontEnd + len(bodyTagStart) - bodyEnd := strings.Index(text[bodyStart:], bodyTagEnd) - if bodyEnd == -1 { - return "", "", fmt.Errorf("missing %s", bodyTagEnd) + + body := "" + suffix := "" + if bodyEnd := strings.Index(text[bodyStart:], bodyTagEnd); bodyEnd != -1 { + bodyEnd += bodyStart + body = trimTagNewlines(text[bodyStart:bodyEnd]) + suffix = strings.TrimSpace(text[bodyEnd+len(bodyTagEnd):]) + } else { + // Some model replies omit the final closing tag but otherwise return a + // valid document. Treat EOF as the end of so doc retries do not + // burn through the whole workflow on a recoverable formatting slip. + body = trimTagNewlines(text[bodyStart:]) } - bodyEnd += bodyStart prefix := strings.TrimSpace(text[:frontStart-len(frontmatterTagStart)]) - suffix := strings.TrimSpace(text[bodyEnd+len(bodyTagEnd):]) if prefix != "" || suffix != "" { return "", "", fmt.Errorf("unexpected text outside tagged sections") } frontMatter := trimTagNewlines(text[frontStart:frontEnd]) - body := trimTagNewlines(text[bodyStart:bodyEnd]) return frontMatter, body, nil } diff --git a/scripts/docs-i18n/doc_mode_test.go b/scripts/docs-i18n/doc_mode_test.go new file mode 100644 index 00000000000..1744f43cfce --- /dev/null +++ b/scripts/docs-i18n/doc_mode_test.go @@ -0,0 +1,31 @@ +package main + +import "testing" + +func TestParseTaggedDocumentAcceptsMissingBodyCloseAtEOF(t *testing.T) { + t.Parallel() + + input := "\ntitle: Test\n\n\nTranslated body\n" + + front, body, err := parseTaggedDocument(input) + if err != nil { + t.Fatalf("parseTaggedDocument returned error: %v", err) + } + if front != "title: Test" { + t.Fatalf("unexpected frontmatter %q", front) + } + if body != "Translated body" { + t.Fatalf("unexpected body %q", body) + } +} + +func TestParseTaggedDocumentRejectsTrailingTextOutsideTags(t *testing.T) { + t.Parallel() + + input := "\ntitle: Test\n\n\nTranslated body\n\nextra" + + _, _, err := parseTaggedDocument(input) + if err == nil { + t.Fatal("expected error for trailing text") + } +}