diff --git a/.github/scripts/translate-docs.py b/.github/scripts/translate-docs.py index 2971d34..6e6a523 100644 --- a/.github/scripts/translate-docs.py +++ b/.github/scripts/translate-docs.py @@ -1,7 +1,10 @@ #!/usr/bin/env python3 """ Translate English documentation files to Japanese. -Preserves code blocks, links, MDX components, and frontmatter structure. +Translates all text including inside MDX components, link text, +frontmatter descriptions, and callouts. Preserves code blocks, +URLs, and component tag structure. + Requires ANTHROPIC_API_KEY environment variable. Usage: @@ -14,72 +17,78 @@ import re import anthropic -def extract_preservable_blocks(content): - """Replace code blocks, tags, and links with placeholders. - - Only the *tags themselves* are preserved for MDX/JSX components so that - translatable prose inside wrapper components (e.g. , ) - still gets sent to the translator. - """ - placeholders = [] - counter = [0] - - def replace_with_placeholder(match): - placeholders.append(match.group(0)) - idx = counter[0] - counter[0] += 1 - return f"__PRESERVE_{idx}__" - - # Preserve fenced code blocks (``` ... ```) - content = re.sub(r'```[\s\S]*?```', replace_with_placeholder, content) - # Preserve inline code - content = re.sub(r'`[^`\n]+`', replace_with_placeholder, content) - # Preserve MDX/JSX opening tags (e.g. , ) - content = re.sub(r'<[A-Z][a-zA-Z]*(?:\s[^>]*)?\s*>', replace_with_placeholder, content) - # Preserve MDX/JSX closing tags (e.g. , ) - content = re.sub(r'', replace_with_placeholder, content) - # Preserve self-closing MDX components - content = re.sub(r'<[A-Z][^/]*?/>', replace_with_placeholder, content) - # Preserve HTML-like tags - content = re.sub(r'<(?:img|video|iframe|br|hr)[^>]*/?>', replace_with_placeholder, content) - # Preserve image references - content = re.sub(r'!\[([^\]]*)\]\([^)]+\)', replace_with_placeholder, content) - # Preserve link URLs (but translate link text later) - content = re.sub(r'\[([^\]]*)\]\([^)]+\)', replace_with_placeholder, content) - - return content, placeholders - - -def restore_preserved_blocks(content, placeholders): - """Restore preserved blocks from placeholders.""" - for i, block in enumerate(placeholders): - content = content.replace(f"__PRESERVE_{i}__", block) - remaining = re.findall(r'__PRESERVE_\d+__', content) - if remaining: - print(f" WARNING: {len(remaining)} unresolved placeholders remain: {remaining[:5]}") - return content - def split_frontmatter(content): """Split content into frontmatter and body.""" - match = re.match(r'^(---\s*\n.*?\n---\s*\n)(.*)', content, re.DOTALL) + match = re.match(r'^(---\s*\n)(.*?\n)(---\s*\n)(.*)', content, re.DOTALL) if match: - return match.group(1), match.group(2) - return "", content + return match.group(1), match.group(2), match.group(3), match.group(4) + return "", "", "", content + + +def translate_frontmatter(client, fm_body): + """Translate description and sidebarTitle fields in frontmatter.""" + if not fm_body.strip(): + return fm_body + + lines = fm_body.split('\n') + result = [] + for line in lines: + # Translate description field + m = re.match(r'^(description:\s*)(.*)', line) + if m and m.group(2).strip(): + translated = _translate_chunk(client, m.group(2).strip(), + "Translate this short description to Japanese. Keep it concise. Output ONLY the translation.") + result.append(f"{m.group(1)}{translated}") + continue + + # Translate sidebarTitle field + m = re.match(r'^(sidebarTitle:\s*)(.*)', line) + if m and m.group(2).strip(): + translated = _translate_chunk(client, m.group(2).strip(), + "Translate this short UI label to Japanese. Output ONLY the translation.") + result.append(f"{m.group(1)}{translated}") + continue + + result.append(line) + + return '\n'.join(result) -def translate_text(client, text): - """Translate English text to Japanese using Claude. - - Splits large texts into chunks to avoid output truncation. - """ +def translate_body(client, body): + """Translate body content, preserving code blocks and translating everything else.""" + if not body.strip(): + return body + + # Split content into code blocks and non-code segments + parts = re.split(r'(```[\s\S]*?```)', body) + + translated_parts = [] + for i, part in enumerate(parts): + # Odd indices are code blocks - preserve them + if i % 2 == 1: + translated_parts.append(part) + continue + + # Even indices are prose - translate them + if not part.strip(): + translated_parts.append(part) + continue + + translated_parts.append(translate_prose_segment(client, part)) + + return ''.join(translated_parts) + + +def translate_prose_segment(client, text): + """Translate a prose segment (no code blocks) to Japanese.""" if not text.strip(): return text lines = text.split('\n') # For small texts, translate in one shot if len(lines) <= 200: - return _translate_chunk(client, text) + return _translate_prose(client, text) # Split into chunks at paragraph boundaries chunks = [] @@ -95,30 +104,61 @@ def translate_text(client, text): translated = [] for i, chunk in enumerate(chunks): print(f" Translating chunk {i+1}/{len(chunks)}...") - translated.append(_translate_chunk(client, chunk)) + translated.append(_translate_prose(client, chunk)) return '\n'.join(translated) -def _translate_chunk(client, text): - """Translate a single chunk of text.""" +def _translate_prose(client, text): + """Translate prose text with full context about what to translate.""" + return _translate_chunk(client, text, """Translate the following English technical documentation (MDX/Markdown) to Japanese. + +CRITICAL RULES: +1. Translate ALL prose text to natural Japanese, including: + - Paragraph text + - Heading text (after #, ##, ### etc.) + - List item text + - Text inside MDX components like , , , , , , , + - Link display text: translate [Display Text](/path) to [翻訳テキスト](/path) — keep the URL unchanged + - Text in component attributes like title="..." and description="..." — translate the attribute values + - Table cell text (translate content, keep | separators) + - Bold and italic text content + +2. Do NOT translate: + - Code inside backticks (`code`) — keep exactly as-is + - URLs and file paths + - Brand names: Factory, Droid, GitHub, GitLab, Linear, Slack, Discord, Sentry, PagerDuty, Jira, Notion + - Technical terms commonly kept in English in Japanese tech docs: API, CLI, SDK, MCP, SSO, SCIM, BYOK, IDE, JSON, YAML, MDX, PR, CI/CD, OAuth, OTEL, LLM + - Component tag names: , , , etc. + - Property/attribute names: title=, description=, href=, icon= + - Import statements and JSX expressions in { } + +3. Keep ALL markdown and MDX formatting exactly intact: + - Headers (#, ##, ###) + - Bold (**text**), italic (*text*) + - Lists (-, 1.) + - Links [text](url) — translate text, keep url + - Images ![alt](src) — translate alt text, keep src + - MDX component tags and structure + - Line breaks and paragraph structure + +4. For links pointing to English docs paths, update them to Japanese paths: + - [text](/cli/overview) → [翻訳テキスト](/jp/cli/overview) + - [text](/guides/foo) → [翻訳テキスト](/jp/guides/foo) + - External URLs (https://...) stay unchanged + +5. Output ONLY the translated text. No preamble, no explanation, no wrapping.""") + + +def _translate_chunk(client, text, system_prompt): + """Translate a single chunk of text using Claude.""" message = client.messages.create( model="claude-sonnet-4-20250514", max_tokens=16384, messages=[ { "role": "user", - "content": f"""Translate the following English technical documentation text to Japanese. - -Rules: -- Translate all prose text to natural Japanese -- CRITICAL: Do NOT translate, modify, or remove any placeholder tokens like __PRESERVE_0__, __PRESERVE_1__, etc. Keep them EXACTLY as-is in the EXACT same position -- Do NOT translate brand names (Factory, Droid, GitHub, etc.) -- Do NOT translate technical terms that are commonly kept in English in Japanese tech docs (API, CLI, SDK, MCP, SSO, SCIM, etc.) -- Keep markdown formatting (headers ##, bold **, italic *, lists -, etc.) intact -- Keep the same line structure and paragraph breaks -- Translate heading text after # symbols -- Output ONLY the translated text, no preamble or explanation + "content": f"""{system_prompt} Text to translate: {text}""" @@ -130,24 +170,22 @@ def _translate_chunk(client, text): def translate_file(client, filepath): """Translate a single .mdx file.""" - with open(filepath, 'r') as f: + with open(filepath, 'r', encoding='utf-8') as f: content = f.read() - frontmatter, body = split_frontmatter(content) - - # Extract and preserve code blocks, components, links - body_with_placeholders, placeholders = extract_preservable_blocks(body) + fm_open, fm_body, fm_close, body = split_frontmatter(content) - # Translate the prose - translated_body = translate_text(client, body_with_placeholders) + # Translate frontmatter fields (description, sidebarTitle) + if fm_body: + fm_body = translate_frontmatter(client, fm_body) - # Restore preserved blocks - translated_body = restore_preserved_blocks(translated_body, placeholders) + # Translate body content + translated_body = translate_body(client, body) # Reassemble - result = frontmatter + translated_body + result = fm_open + fm_body + fm_close + translated_body - with open(filepath, 'w') as f: + with open(filepath, 'w', encoding='utf-8') as f: f.write(result) print(f" Translated: {filepath}") diff --git a/docs/cli/configuration/mcp.mdx b/docs/cli/configuration/mcp.mdx index 2aba9dc..9f839ca 100644 --- a/docs/cli/configuration/mcp.mdx +++ b/docs/cli/configuration/mcp.mdx @@ -12,6 +12,7 @@ The easiest way to get started is using the built-in registry. Type `/mcp` in dr | Server | Description | | :----- | :---------- | +| figma | Design exploration and implementation | | linear | Issue tracking and project management | | sentry | Error tracking and performance monitoring | | notion | Notes, docs, and project management | diff --git a/docs/jp/cli/configuration/mcp.mdx b/docs/jp/cli/configuration/mcp.mdx index 8a916fc..3f30b14 100644 --- a/docs/jp/cli/configuration/mcp.mdx +++ b/docs/jp/cli/configuration/mcp.mdx @@ -12,6 +12,7 @@ Model Context Protocol (MCP) サーバーは、追加のツールとコンテキ | サーバー | 説明 | | :----- | :---------- | +| figma | デザインの探索と実装 | | linear | 課題追跡とプロジェクト管理 | | sentry | エラー追跡とパフォーマンス監視 | | notion | ノート、ドキュメント、プロジェクト管理 |