fix(channel): prevent false timeout during multi-turn tool loops (#1037)

2026-02-20 12:28:05 +08:00 · 2026-02-20 12:28:05 +08:00 · f274fd5757
commit f274fd5757
parent 178bb108da
3 changed files with 45 additions and 6 deletions
--- a/docs/config-reference.md
+++ b/docs/config-reference.md
@ -332,7 +332,7 @@ Top-level channel options are configured under `channels_config`.
 | Key | Default | Purpose |
 |---|---|---|
-| `message_timeout_secs` | `300` | Timeout in seconds for processing a single channel message (LLM + tools) |
+| `message_timeout_secs` | `300` | Base timeout in seconds for channel message processing; runtime scales this with tool-loop depth (up to 4x) |
 Examples:
@ -344,6 +344,8 @@ Examples:
 Notes:
 - Default `300s` is optimized for on-device LLMs (Ollama) which are slower than cloud APIs.
 - Runtime timeout budget is `message_timeout_secs * scale`, where `scale = min(max_tool_iterations, 4)` and a minimum of `1`.
 - This scaling avoids false timeouts when the first LLM turn is slow/retried but later tool-loop turns still need to complete.
 - If using cloud APIs (OpenAI, Anthropic, etc.), you can reduce this to `60` or lower.
 - Values below `30` are clamped to `30` to avoid immediate timeout churn.
 - When a timeout occurs, users receive: `⚠️ Request timed out while waiting for the model. Please try again.`
--- a/src/channels/mod.rs
+++ b/src/channels/mod.rs
@ -95,6 +95,8 @@ const MIN_CHANNEL_MESSAGE_TIMEOUT_SECS: u64 = 30;
 /// Default timeout for processing a single channel message (LLM + tools).
 /// Used as fallback when not configured in channels_config.message_timeout_secs.
 const CHANNEL_MESSAGE_TIMEOUT_SECS: u64 = 300;
 /// Cap timeout scaling so large max_tool_iterations values do not create unbounded waits.
 const CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP: u64 = 4;
 const CHANNEL_PARALLELISM_PER_CHANNEL: usize = 4;
 const CHANNEL_MIN_IN_FLIGHT_MESSAGES: usize = 8;
 const CHANNEL_MAX_IN_FLIGHT_MESSAGES: usize = 64;
@ -114,6 +116,15 @@ fn effective_channel_message_timeout_secs(configured: u64) -> u64 {
    configured.max(MIN_CHANNEL_MESSAGE_TIMEOUT_SECS)
 }
 fn channel_message_timeout_budget_secs(
    message_timeout_secs: u64,
    max_tool_iterations: usize,
 ) -> u64 {
    let iterations = max_tool_iterations.max(1) as u64;
    let scale = iterations.min(CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP);
    message_timeout_secs.saturating_mul(scale)
 }
 #[derive(Debug, Clone, PartialEq, Eq)]
 struct ChannelRouteSelection {
    provider: String,
@ -1223,10 +1234,12 @@ async fn process_channel_message(
        Cancelled,
    }
    let timeout_budget_secs =
        channel_message_timeout_budget_secs(ctx.message_timeout_secs, ctx.max_tool_iterations);
    let llm_result = tokio::select! {
        () = cancellation_token.cancelled() => LlmExecutionResult::Cancelled,
        result = tokio::time::timeout(
-            Duration::from_secs(ctx.message_timeout_secs),
+            Duration::from_secs(timeout_budget_secs),
            run_tool_call_loop(
                active_provider.as_ref(),
                &mut history,
@ -1385,7 +1398,10 @@ async fn process_channel_message(
            }
        }
        LlmExecutionResult::Completed(Err(_)) => {
-            let timeout_msg = format!("LLM response timed out after {}s", ctx.message_timeout_secs);
+            let timeout_msg = format!(
                "LLM response timed out after {}s (base={}s, max_tool_iterations={})",
                timeout_budget_secs, ctx.message_timeout_secs, ctx.max_tool_iterations
            );
            eprintln!(
                "  ❌ {} (elapsed: {}ms)",
                timeout_msg,
@ -2641,6 +2657,24 @@ mod tests {
        assert_eq!(effective_channel_message_timeout_secs(300), 300);
    }
    #[test]
    fn channel_message_timeout_budget_scales_with_tool_iterations() {
        assert_eq!(channel_message_timeout_budget_secs(300, 1), 300);
        assert_eq!(channel_message_timeout_budget_secs(300, 2), 600);
        assert_eq!(channel_message_timeout_budget_secs(300, 3), 900);
    }
    #[test]
    fn channel_message_timeout_budget_uses_safe_defaults_and_cap() {
        // 0 iterations falls back to 1x timeout budget.
        assert_eq!(channel_message_timeout_budget_secs(300, 0), 300);
        // Large iteration counts are capped to avoid runaway waits.
        assert_eq!(
            channel_message_timeout_budget_secs(300, 10),
            300 * CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP
        );
    }
    #[test]
    fn context_window_overflow_error_detector_matches_known_messages() {
        let overflow_err = anyhow::anyhow!(
--- a/src/config/schema.rs
+++ b/src/config/schema.rs
@ -7,9 +7,9 @@ use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
 use std::path::{Path, PathBuf};
 use std::sync::{OnceLock, RwLock};
 use tokio::fs::{self, OpenOptions};
 #[cfg(unix)]
 use tokio::fs::File;
 use tokio::fs::{self, OpenOptions};
 use tokio::io::AsyncWriteExt;
 const SUPPORTED_PROXY_SERVICE_KEYS: &[&str] = &[
@ -2197,7 +2197,10 @@ pub struct ChannelsConfig {
    pub dingtalk: Option<DingTalkConfig>,
    /// QQ Official Bot channel configuration.
    pub qq: Option<QQConfig>,
-    /// Timeout in seconds for processing a single channel message (LLM + tools).
+    /// Base timeout in seconds for processing a single channel message (LLM + tools).
    /// Runtime uses this as a per-turn budget that scales with tool-loop depth
    /// (up to 4x, capped) so one slow/retried model call does not consume the
    /// entire conversation budget.
    /// Default: 300s for on-device LLMs (Ollama) which are slower than cloud APIs.
    #[serde(default = "default_channel_message_timeout_secs")]
    pub message_timeout_secs: u64,
@ -3544,9 +3547,9 @@ async fn sync_directory(_path: &Path) -> Result<()> {
 #[cfg(test)]
 mod tests {
    use super::*;
    use std::path::PathBuf;
    #[cfg(unix)]
    use std::{fs::Permissions, os::unix::fs::PermissionsExt};
    use std::path::PathBuf;
    use tokio::sync::{Mutex, MutexGuard};
    use tokio::test;
    use tokio_stream::wrappers::ReadDirStream;