Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
198 changes: 118 additions & 80 deletions .github/scripts/translate-docs.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
#!/usr/bin/env python3
"""
Translate English documentation files to Japanese.
Preserves code blocks, links, MDX components, and frontmatter structure.
Translates all text including inside MDX components, link text,
frontmatter descriptions, and callouts. Preserves code blocks,
URLs, and component tag structure.

Requires ANTHROPIC_API_KEY environment variable.

Usage:
Expand All @@ -14,72 +17,78 @@
import re
import anthropic

def extract_preservable_blocks(content):
"""Replace code blocks, tags, and links with placeholders.

Only the *tags themselves* are preserved for MDX/JSX components so that
translatable prose inside wrapper components (e.g. <Update>, <Steps>)
still gets sent to the translator.
"""
placeholders = []
counter = [0]

def replace_with_placeholder(match):
placeholders.append(match.group(0))
idx = counter[0]
counter[0] += 1
return f"__PRESERVE_{idx}__"

# Preserve fenced code blocks (``` ... ```)
content = re.sub(r'```[\s\S]*?```', replace_with_placeholder, content)
# Preserve inline code
content = re.sub(r'`[^`\n]+`', replace_with_placeholder, content)
# Preserve MDX/JSX opening tags (e.g. <Update label="...">, <Step title="...">)
content = re.sub(r'<[A-Z][a-zA-Z]*(?:\s[^>]*)?\s*>', replace_with_placeholder, content)
# Preserve MDX/JSX closing tags (e.g. </Update>, </Step>)
content = re.sub(r'</[A-Z][a-zA-Z]*>', replace_with_placeholder, content)
# Preserve self-closing MDX components
content = re.sub(r'<[A-Z][^/]*?/>', replace_with_placeholder, content)
# Preserve HTML-like tags
content = re.sub(r'<(?:img|video|iframe|br|hr)[^>]*/?>', replace_with_placeholder, content)
# Preserve image references
content = re.sub(r'!\[([^\]]*)\]\([^)]+\)', replace_with_placeholder, content)
# Preserve link URLs (but translate link text later)
content = re.sub(r'\[([^\]]*)\]\([^)]+\)', replace_with_placeholder, content)

return content, placeholders


def restore_preserved_blocks(content, placeholders):
"""Restore preserved blocks from placeholders."""
for i, block in enumerate(placeholders):
content = content.replace(f"__PRESERVE_{i}__", block)
remaining = re.findall(r'__PRESERVE_\d+__', content)
if remaining:
print(f" WARNING: {len(remaining)} unresolved placeholders remain: {remaining[:5]}")
return content


def split_frontmatter(content):
"""Split content into frontmatter and body."""
match = re.match(r'^(---\s*\n.*?\n---\s*\n)(.*)', content, re.DOTALL)
match = re.match(r'^(---\s*\n)(.*?\n)(---\s*\n)(.*)', content, re.DOTALL)
if match:
return match.group(1), match.group(2)
return "", content
return match.group(1), match.group(2), match.group(3), match.group(4)
return "", "", "", content


def translate_frontmatter(client, fm_body):
"""Translate description and sidebarTitle fields in frontmatter."""
if not fm_body.strip():
return fm_body

lines = fm_body.split('\n')
result = []
for line in lines:
# Translate description field
m = re.match(r'^(description:\s*)(.*)', line)
if m and m.group(2).strip():
translated = _translate_chunk(client, m.group(2).strip(),
"Translate this short description to Japanese. Keep it concise. Output ONLY the translation.")
result.append(f"{m.group(1)}{translated}")
continue

# Translate sidebarTitle field
m = re.match(r'^(sidebarTitle:\s*)(.*)', line)
if m and m.group(2).strip():
translated = _translate_chunk(client, m.group(2).strip(),
"Translate this short UI label to Japanese. Output ONLY the translation.")
result.append(f"{m.group(1)}{translated}")
continue

result.append(line)

return '\n'.join(result)


def translate_text(client, text):
"""Translate English text to Japanese using Claude.

Splits large texts into chunks to avoid output truncation.
"""
def translate_body(client, body):
"""Translate body content, preserving code blocks and translating everything else."""
if not body.strip():
return body

# Split content into code blocks and non-code segments
parts = re.split(r'(```[\s\S]*?```)', body)

translated_parts = []
for i, part in enumerate(parts):
# Odd indices are code blocks - preserve them
if i % 2 == 1:
translated_parts.append(part)
continue

# Even indices are prose - translate them
if not part.strip():
translated_parts.append(part)
continue

translated_parts.append(translate_prose_segment(client, part))

return ''.join(translated_parts)


def translate_prose_segment(client, text):
"""Translate a prose segment (no code blocks) to Japanese."""
if not text.strip():
return text

lines = text.split('\n')
# For small texts, translate in one shot
if len(lines) <= 200:
return _translate_chunk(client, text)
return _translate_prose(client, text)

# Split into chunks at paragraph boundaries
chunks = []
Expand All @@ -95,30 +104,61 @@ def translate_text(client, text):
translated = []
for i, chunk in enumerate(chunks):
print(f" Translating chunk {i+1}/{len(chunks)}...")
translated.append(_translate_chunk(client, chunk))
translated.append(_translate_prose(client, chunk))

return '\n'.join(translated)


def _translate_chunk(client, text):
"""Translate a single chunk of text."""
def _translate_prose(client, text):
"""Translate prose text with full context about what to translate."""
return _translate_chunk(client, text, """Translate the following English technical documentation (MDX/Markdown) to Japanese.

CRITICAL RULES:
1. Translate ALL prose text to natural Japanese, including:
- Paragraph text
- Heading text (after #, ##, ### etc.)
- List item text
- Text inside MDX components like <Tip>, <Note>, <Warning>, <Info>, <Check>, <Card>, <Step>, <Accordion>
- Link display text: translate [Display Text](/path) to [翻訳テキスト](/path) — keep the URL unchanged
- Text in component attributes like title="..." and description="..." — translate the attribute values
- Table cell text (translate content, keep | separators)
- Bold and italic text content

2. Do NOT translate:
- Code inside backticks (`code`) — keep exactly as-is
- URLs and file paths
- Brand names: Factory, Droid, GitHub, GitLab, Linear, Slack, Discord, Sentry, PagerDuty, Jira, Notion
- Technical terms commonly kept in English in Japanese tech docs: API, CLI, SDK, MCP, SSO, SCIM, BYOK, IDE, JSON, YAML, MDX, PR, CI/CD, OAuth, OTEL, LLM
- Component tag names: <Card>, <Step>, <Tip>, etc.
- Property/attribute names: title=, description=, href=, icon=
- Import statements and JSX expressions in { }

3. Keep ALL markdown and MDX formatting exactly intact:
- Headers (#, ##, ###)
- Bold (**text**), italic (*text*)
- Lists (-, 1.)
- Links [text](url) — translate text, keep url
- Images ![alt](src) — translate alt text, keep src
- MDX component tags and structure
- Line breaks and paragraph structure

4. For links pointing to English docs paths, update them to Japanese paths:
- [text](/cli/overview) → [翻訳テキスト](/jp/cli/overview)
- [text](/guides/foo) → [翻訳テキスト](/jp/guides/foo)
- External URLs (https://...) stay unchanged

5. Output ONLY the translated text. No preamble, no explanation, no wrapping.""")


def _translate_chunk(client, text, system_prompt):
"""Translate a single chunk of text using Claude."""
message = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=16384,
messages=[
{
"role": "user",
"content": f"""Translate the following English technical documentation text to Japanese.

Rules:
- Translate all prose text to natural Japanese
- CRITICAL: Do NOT translate, modify, or remove any placeholder tokens like __PRESERVE_0__, __PRESERVE_1__, etc. Keep them EXACTLY as-is in the EXACT same position
- Do NOT translate brand names (Factory, Droid, GitHub, etc.)
- Do NOT translate technical terms that are commonly kept in English in Japanese tech docs (API, CLI, SDK, MCP, SSO, SCIM, etc.)
- Keep markdown formatting (headers ##, bold **, italic *, lists -, etc.) intact
- Keep the same line structure and paragraph breaks
- Translate heading text after # symbols
- Output ONLY the translated text, no preamble or explanation
"content": f"""{system_prompt}

Text to translate:
{text}"""
Expand All @@ -130,24 +170,22 @@ def _translate_chunk(client, text):

def translate_file(client, filepath):
"""Translate a single .mdx file."""
with open(filepath, 'r') as f:
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()

frontmatter, body = split_frontmatter(content)

# Extract and preserve code blocks, components, links
body_with_placeholders, placeholders = extract_preservable_blocks(body)
fm_open, fm_body, fm_close, body = split_frontmatter(content)

# Translate the prose
translated_body = translate_text(client, body_with_placeholders)
# Translate frontmatter fields (description, sidebarTitle)
if fm_body:
fm_body = translate_frontmatter(client, fm_body)

# Restore preserved blocks
translated_body = restore_preserved_blocks(translated_body, placeholders)
# Translate body content
translated_body = translate_body(client, body)

# Reassemble
result = frontmatter + translated_body
result = fm_open + fm_body + fm_close + translated_body

with open(filepath, 'w') as f:
with open(filepath, 'w', encoding='utf-8') as f:
f.write(result)

print(f" Translated: {filepath}")
Expand Down
1 change: 1 addition & 0 deletions docs/cli/configuration/mcp.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ The easiest way to get started is using the built-in registry. Type `/mcp` in dr

| Server | Description |
| :----- | :---------- |
| figma | Design exploration and implementation |
| linear | Issue tracking and project management |
| sentry | Error tracking and performance monitoring |
| notion | Notes, docs, and project management |
Expand Down
1 change: 1 addition & 0 deletions docs/jp/cli/configuration/mcp.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Model Context Protocol (MCP) サーバーは、追加のツールとコンテキ

| サーバー | 説明 |
| :----- | :---------- |
| figma | デザインの探索と実装 |
| linear | 課題追跡とプロジェクト管理 |
| sentry | エラー追跡とパフォーマンス監視 |
| notion | ノート、ドキュメント、プロジェクト管理 |
Expand Down