69gg · 69gg · Mar 8, 2026 · Mar 4, 2026 · Mar 4, 2026 · Mar 4, 2026
diff --git a/AGENTS.md b/AGENTS.md
@@ -1,37 +1,28 @@
 # Repository Guidelines
 
 ## Project Structure & Module Organization
-Core Python code lives in `src/Undefined/`, organized by domain: `ai/`, `services/`, `cognitive/`, `skills/`, `webui/`, and `utils/`.  
-Tests are in `tests/` (pytest auto-discovers from this directory).  
-Runtime/config assets live in `res/`, `img/`, and `config/`.  
-Operational scripts are in `scripts/` (for example, `scripts/reembed_cognitive.py`).  
-Documentation is in `docs/`, and CI/release workflows are under `.github/workflows/`.
+Primary code lives in `src/Undefined/`. Keep changes in the matching domain package: `ai/` for model orchestration, `cognitive/` for memory pipelines, `services/` for runtime services, `skills/` for tools/agents/commands, `webui/` for the management UI, and `utils/` for shared helpers. Tests live in `tests/`. Packaged assets and defaults live in `res/`, `img/`, and `config/`. Scripts are in `scripts/`; docs are in `docs/`.
 
 ## Build, Test, and Development Commands
-- `uv sync --group dev -p 3.12`: install project + development dependencies.
-- `uv run playwright install`: install browser runtime used by rendering/web features.
-- `uv run Undefined` or `uv run Undefined-webui`: run bot or WebUI (choose one, do not run both).
-- `uv run ruff format .`: auto-format code.
-- `uv run ruff check .`: lint checks.
-- `uv run mypy .`: strict type checking (project is configured with `mypy` strict mode).
-- `uv run pytest tests/`: run full test suite.
-- `uv build --wheel`: build distributable wheel (CI also validates packaged resources).
+- `uv sync --group dev -p 3.12`: install the project with contributor tooling.
+- `uv run playwright install`: install the browser runtime used by rendering and web-driven features.
+- `cp config.toml.example config.toml`: create a local config before running the app.
+- `uv run Undefined`: start the bot process directly.
+- `uv run Undefined-webui`: start the WebUI manager. Do not run this alongside `Undefined`.
+- `uv run ruff format .`: apply formatting.
+- `uv run ruff check .`: run lint checks.
+- `uv run mypy .`: run strict type checking.
+- `uv run pytest tests/`: run the full test suite.
+- `uv build --wheel`: build the distribution and verify packaged resources.
 
 ## Coding Style & Naming Conventions
-Use 4-space indentation, type annotations, and `async`/`await` for I/O paths when applicable.  
-Follow Ruff formatting output; do not hand-tune style against formatter/linter.  
-Use `snake_case` for modules/functions/variables, `PascalCase` for classes, and `UPPER_SNAKE_CASE` for constants.  
-Keep modules focused by capability (for example, add chat command logic under `skills/commands/`).
+Use 4-space indentation, Python type hints, and `async`/`await` for I/O paths. Let Ruff drive formatting instead of hand-formatting around it. Use `snake_case` for modules, functions, and variables; `PascalCase` for classes; `UPPER_SNAKE_CASE` for constants. Keep modules narrow in scope, and place new Skills content under the correct subtree such as `skills/tools/`, `skills/toolsets/`, or `skills/agents/`.
 
 ## Testing Guidelines
-Frameworks: `pytest`, `pytest-asyncio` (`asyncio_mode = auto`).  
-Name tests as `test_*.py` and test functions as `test_*`.  
-Prefer targeted runs during development, e.g. `uv run pytest tests/test_parse_command.py -q`.  
-Before opening a PR, run format, lint, type check, and full tests locally.  
-No fixed coverage gate is enforced, but add tests for behavior changes and regressions.
+The project uses `pytest` with `pytest-asyncio` (`asyncio_mode = auto`). Name files `tests/test_*.py` and test functions `test_*`. Prefer focused runs while iterating, for example `uv run pytest tests/test_parse_command.py -q`, then finish with the full suite. Add regression coverage for behavior changes in handlers, config loading, Skills discovery, and WebUI routes.
 
 ## Commit & Pull Request Guidelines
-Use Conventional Commit style seen in history: `feat: ...`, `fix(scope): ...`, `chore(version): ...`, `refactor: ...`.  
-Release tooling groups commits by `feat`/`fix`, so use these prefixes accurately.  
-PRs should include: concise summary, linked issue (if any), test evidence (commands/results), and screenshots for WebUI changes.  
-Ensure CI passes (`ruff`, `mypy`, `pytest`, build checks) before requesting review.
+Follow the commit style already used in history: `feat: ...`, `fix(scope): ...`, `chore(version): ...`. Keep subjects short and imperative. PRs should include a clear summary, linked issue when applicable, the commands you ran (`ruff`, `mypy`, `pytest`), and screenshots for WebUI changes. If you modify `res/`, `img/`, or `config.toml.example`, note that wheel packaging was checked with `uv build --wheel`.
+
+## Security & Configuration Tips
+Treat `config.toml` as runtime state and avoid committing secrets. Prefer `config.toml.example` for documented defaults. Outputs under `data/` and `logs/` should stay out of feature commits unless the change explicitly targets fixtures or diagnostics.
diff --git a/config.toml.example b/config.toml.example
@@ -89,6 +89,15 @@ max_tokens = 8192
 # zh: 队列发车间隔（秒）。
 # en: Queue interval (seconds).
 queue_interval_seconds = 1.0
+# zh: API 模式：传统 chat.completions 或新版 responses。
+# en: API mode: classic chat.completions or the newer responses API.
+api_mode = "chat_completions"
+# zh: 是否启用 reasoning.effort。
+# en: Enable reasoning.effort.
+reasoning_enabled = false
+# zh: reasoning.effort 档位：none / minimal / low / medium / high / xhigh。
+# en: reasoning.effort level: none / minimal / low / medium / high / xhigh.
+reasoning_effort = "medium"
 # zh: 是否启用 thinking（思维链）。
 # en: Enable thinking (reasoning).
 thinking_enabled = false
@@ -100,7 +109,17 @@ thinking_budget_tokens = 20000
 thinking_include_budget = true
 # zh: 思维链工具调用兼容：启用后在多轮工具调用中回传 reasoning_content，避免部分模型返回 400。
 # en: Thinking tool-call compatibility: pass back reasoning_content in multi-turn tool calls to avoid 400 errors from some models.
-thinking_tool_call_compat = false
+thinking_tool_call_compat = true
+# zh: Responses API 的 tool_choice 兼容模式：仅在关闭时请求仍返回 500、怀疑上游不兼容对象型 tool_choice 时再尝试开启；开启后上报为 "required" 并只保留目标工具。当前已在 new-api v0.11.4-alpha.3 发现该问题。默认关闭。
+# en: Responses API tool_choice compatibility mode: only try enabling this when requests still return 500 with the default setting and you suspect the upstream does not support object-style tool_choice; it sends "required" and keeps only the selected tool. This issue is currently observed on new-api v0.11.4-alpha.3. Disabled by default.
+responses_tool_choice_compat = false
+# zh: Responses API 续轮强制降级：启用后，多轮工具调用将始终跳过 previous_response_id，直接使用完整消息重放（stateless replay）。仅在上游不兼容 responses 状态续轮时使用。默认关闭。
+# en: Responses API force stateless replay: when enabled, multi-turn tool follow-ups always skip previous_response_id and replay the full message history instead. Use only when the upstream does not handle stateful responses follow-ups correctly. Disabled by default.
+responses_force_stateless_replay = false
+
+# zh: 额外请求体参数（可选），可用于 temperature 或供应商私有参数。
+# en: Extra request-body params (optional), e.g. temperature or vendor-specific fields.
+[models.chat.request_params]
 
 # zh: 模型池配置（可选，支持多模型轮询/随机/用户指定）。
 # en: Model pool configuration (optional, supports round-robin/random/user-specified).
@@ -130,6 +149,15 @@ model_name = ""
 # zh: 队列发车间隔（秒）。
 # en: Queue interval (seconds).
 queue_interval_seconds = 1.0
+# zh: API 模式：传统 chat.completions 或新版 responses。
+# en: API mode: classic chat.completions or the newer responses API.
+api_mode = "chat_completions"
+# zh: 是否启用 reasoning.effort。
+# en: Enable reasoning.effort.
+reasoning_enabled = false
+# zh: reasoning.effort 档位：none / minimal / low / medium / high / xhigh。
+# en: reasoning.effort level: none / minimal / low / medium / high / xhigh.
+reasoning_effort = "medium"
 # zh: 是否启用 thinking（思维链）。
 # en: Enable thinking (reasoning).
 thinking_enabled = false
@@ -141,7 +169,17 @@ thinking_budget_tokens = 20000
 thinking_include_budget = true
 # zh: 思维链工具调用兼容：启用后在多轮工具调用中回传 reasoning_content，避免部分模型返回 400。
 # en: Thinking tool-call compatibility: pass back reasoning_content in multi-turn tool calls to avoid 400 errors from some models.
-thinking_tool_call_compat = false
+thinking_tool_call_compat = true
+# zh: Responses API 的 tool_choice 兼容模式：仅在关闭时请求仍返回 500、怀疑上游不兼容对象型 tool_choice 时再尝试开启；开启后上报为 "required" 并只保留目标工具。当前已在 new-api v0.11.4-alpha.3 发现该问题。默认关闭。
+# en: Responses API tool_choice compatibility mode: only try enabling this when requests still return 500 with the default setting and you suspect the upstream does not support object-style tool_choice; it sends "required" and keeps only the selected tool. This issue is currently observed on new-api v0.11.4-alpha.3. Disabled by default.
+responses_tool_choice_compat = false
+# zh: Responses API 续轮强制降级：启用后，多轮工具调用将始终跳过 previous_response_id，直接使用完整消息重放（stateless replay）。仅在上游不兼容 responses 状态续轮时使用。默认关闭。
+# en: Responses API force stateless replay: when enabled, multi-turn tool follow-ups always skip previous_response_id and replay the full message history instead. Use only when the upstream does not handle stateful responses follow-ups correctly. Disabled by default.
+responses_force_stateless_replay = false
+
+# zh: 额外请求体参数（可选），可用于 temperature 或供应商私有参数。
+# en: Extra request-body params (optional), e.g. temperature or vendor-specific fields.
+[models.vision.request_params]
 
 # zh: 安全模型配置（用于防注入检测和注入后回复生成）。
 # en: Security model config (injection detection and post-injection responses).
@@ -164,6 +202,15 @@ max_tokens = 100
 # zh: 队列发车间隔（秒）。
 # en: Queue interval (seconds).
 queue_interval_seconds = 1.0
+# zh: API 模式：传统 chat.completions 或新版 responses。
+# en: API mode: classic chat.completions or the newer responses API.
+api_mode = "chat_completions"
+# zh: 是否启用 reasoning.effort。
+# en: Enable reasoning.effort.
+reasoning_enabled = false
+# zh: reasoning.effort 档位：none / minimal / low / medium / high / xhigh。
+# en: reasoning.effort level: none / minimal / low / medium / high / xhigh.
+reasoning_effort = "medium"
 # zh: 是否启用 thinking（思维链）。
 # en: Enable thinking (reasoning).
 thinking_enabled = false
@@ -175,7 +222,17 @@ thinking_budget_tokens = 0
 thinking_include_budget = true
 # zh: 思维链工具调用兼容：启用后在多轮工具调用中回传 reasoning_content，避免部分模型返回 400。
 # en: Thinking tool-call compatibility: pass back reasoning_content in multi-turn tool calls to avoid 400 errors from some models.
-thinking_tool_call_compat = false
+thinking_tool_call_compat = true
+# zh: Responses API 的 tool_choice 兼容模式：仅在关闭时请求仍返回 500、怀疑上游不兼容对象型 tool_choice 时再尝试开启；开启后上报为 "required" 并只保留目标工具。当前已在 new-api v0.11.4-alpha.3 发现该问题。默认关闭。
+# en: Responses API tool_choice compatibility mode: only try enabling this when requests still return 500 with the default setting and you suspect the upstream does not support object-style tool_choice; it sends "required" and keeps only the selected tool. This issue is currently observed on new-api v0.11.4-alpha.3. Disabled by default.
+responses_tool_choice_compat = false
+# zh: Responses API 续轮强制降级：启用后，多轮工具调用将始终跳过 previous_response_id，直接使用完整消息重放（stateless replay）。仅在上游不兼容 responses 状态续轮时使用。默认关闭。
+# en: Responses API force stateless replay: when enabled, multi-turn tool follow-ups always skip previous_response_id and replay the full message history instead. Use only when the upstream does not handle stateful responses follow-ups correctly. Disabled by default.
+responses_force_stateless_replay = false
+
+# zh: 额外请求体参数（可选），可用于 temperature 或供应商私有参数。
+# en: Extra request-body params (optional), e.g. temperature or vendor-specific fields.
+[models.security.request_params]
 
 # zh: Agent 模型配置（用于执行 agents）。
 # en: Agent model config (used to run agents).
@@ -195,6 +252,15 @@ max_tokens = 4096
 # zh: 队列发车间隔（秒）。
 # en: Queue interval (seconds).
 queue_interval_seconds = 1.0
+# zh: API 模式：传统 chat.completions 或新版 responses。
+# en: API mode: classic chat.completions or the newer responses API.
+api_mode = "chat_completions"
+# zh: 是否启用 reasoning.effort。
+# en: Enable reasoning.effort.
+reasoning_enabled = false
+# zh: reasoning.effort 档位：none / minimal / low / medium / high / xhigh。
+# en: reasoning.effort level: none / minimal / low / medium / high / xhigh.
+reasoning_effort = "medium"
 # zh: 是否启用 thinking（思维链）。
 # en: Enable thinking (reasoning).
 thinking_enabled = false
@@ -206,7 +272,17 @@ thinking_budget_tokens = 0
 thinking_include_budget = true
 # zh: 思维链工具调用兼容：启用后在多轮工具调用中回传 reasoning_content，避免部分模型返回 400。
 # en: Thinking tool-call compatibility: pass back reasoning_content in multi-turn tool calls to avoid 400 errors from some models.
-thinking_tool_call_compat = false
+thinking_tool_call_compat = true
+# zh: Responses API 的 tool_choice 兼容模式：仅在关闭时请求仍返回 500、怀疑上游不兼容对象型 tool_choice 时再尝试开启；开启后上报为 "required" 并只保留目标工具。当前已在 new-api v0.11.4-alpha.3 发现该问题。默认关闭。
+# en: Responses API tool_choice compatibility mode: only try enabling this when requests still return 500 with the default setting and you suspect the upstream does not support object-style tool_choice; it sends "required" and keeps only the selected tool. This issue is currently observed on new-api v0.11.4-alpha.3. Disabled by default.
+responses_tool_choice_compat = false
+# zh: Responses API 续轮强制降级：启用后，多轮工具调用将始终跳过 previous_response_id，直接使用完整消息重放（stateless replay）。仅在上游不兼容 responses 状态续轮时使用。默认关闭。
+# en: Responses API force stateless replay: when enabled, multi-turn tool follow-ups always skip previous_response_id and replay the full message history instead. Use only when the upstream does not handle stateful responses follow-ups correctly. Disabled by default.
+responses_force_stateless_replay = false
+
+# zh: 额外请求体参数（可选），可用于 temperature 或供应商私有参数。
+# en: Extra request-body params (optional), e.g. temperature or vendor-specific fields.
+[models.agent.request_params]
 
 # zh: Agent 模型池配置（可选，支持多模型轮询/随机/用户指定）。
 # en: Agent model pool configuration (optional, supports round-robin/random/user-specified).
@@ -239,6 +315,15 @@ max_tokens = 4096
 # zh: 队列发车间隔（秒）。
 # en: Queue interval (seconds).
 queue_interval_seconds = 1.0
+# zh: API 模式：传统 chat.completions 或新版 responses。
+# en: API mode: classic chat.completions or the newer responses API.
+api_mode = "chat_completions"
+# zh: 是否启用 reasoning.effort。
+# en: Enable reasoning.effort.
+reasoning_enabled = false
+# zh: reasoning.effort 档位：none / minimal / low / medium / high / xhigh。
+# en: reasoning.effort level: none / minimal / low / medium / high / xhigh.
+reasoning_effort = "medium"
 # zh: 是否启用 thinking（思维链）。
 # en: Enable thinking (reasoning).
 thinking_enabled = false
@@ -250,7 +335,17 @@ thinking_budget_tokens = 0
 thinking_include_budget = true
 # zh: 思维链工具调用兼容：启用后在多轮工具调用中回传 reasoning_content，避免部分模型返回 400。
 # en: Thinking tool-call compatibility: pass back reasoning_content in multi-turn tool calls to avoid 400 errors from some models.
-thinking_tool_call_compat = false
+thinking_tool_call_compat = true
+# zh: Responses API 的 tool_choice 兼容模式：仅在关闭时请求仍返回 500、怀疑上游不兼容对象型 tool_choice 时再尝试开启；开启后上报为 "required" 并只保留目标工具。当前已在 new-api v0.11.4-alpha.3 发现该问题。默认关闭。
+# en: Responses API tool_choice compatibility mode: only try enabling this when requests still return 500 with the default setting and you suspect the upstream does not support object-style tool_choice; it sends "required" and keeps only the selected tool. This issue is currently observed on new-api v0.11.4-alpha.3. Disabled by default.
+responses_tool_choice_compat = false
+# zh: Responses API 续轮强制降级：启用后，多轮工具调用将始终跳过 previous_response_id，直接使用完整消息重放（stateless replay）。仅在上游不兼容 responses 状态续轮时使用。默认关闭。
+# en: Responses API force stateless replay: when enabled, multi-turn tool follow-ups always skip previous_response_id and replay the full message history instead. Use only when the upstream does not handle stateful responses follow-ups correctly. Disabled by default.
+responses_force_stateless_replay = false
+
+# zh: 额外请求体参数（可选），可用于 temperature 或供应商私有参数。
+# en: Extra request-body params (optional), e.g. temperature or vendor-specific fields.
+[models.historian.request_params]
 
 # zh: 嵌入模型配置（知识库语义检索使用）。
 # en: Embedding model config (used by knowledge semantic retrieval).
@@ -277,6 +372,10 @@ query_instruction = ""
 # en: Document instruction prefix (optional, common for E5-style models, e.g. "passage: ").
 document_instruction = ""
 
+# zh: 额外请求体参数（可选），用于 embedding 供应商的扩展字段。
+# en: Extra request-body params (optional) for embedding-provider-specific fields.
+[models.embedding.request_params]
+
 # zh: 重排模型配置（知识库二阶段检索使用）。
 # en: Rerank model config (used in second-stage knowledge retrieval).
 [models.rerank]
@@ -296,6 +395,10 @@ queue_interval_seconds = 1.0
 # en: Query instruction prefix (optional, required by some rerank models, e.g. "Instruct: ...\\nQuery: ").
 query_instruction = ""
 
+# zh: 额外请求体参数（可选），用于 rerank 供应商的扩展字段。
+# en: Extra request-body params (optional) for rerank-provider-specific fields.
+[models.rerank.request_params]
+
 # zh: 本地知识库配置。
 # en: Local knowledge base settings.
 [knowledge]