From f274fd575791af756ef0be60b7f8c71d1dd2097c Mon Sep 17 00:00:00 2001 From: Chummy Date: Fri, 20 Feb 2026 12:28:05 +0800 Subject: [PATCH] fix(channel): prevent false timeout during multi-turn tool loops (#1037) --- docs/config-reference.md | 4 +++- src/channels/mod.rs | 38 ++++++++++++++++++++++++++++++++++++-- src/config/schema.rs | 9 ++++++--- 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/docs/config-reference.md b/docs/config-reference.md index 0d0da02..4a182f5 100644 --- a/docs/config-reference.md +++ b/docs/config-reference.md @@ -332,7 +332,7 @@ Top-level channel options are configured under `channels_config`. | Key | Default | Purpose | |---|---|---| -| `message_timeout_secs` | `300` | Timeout in seconds for processing a single channel message (LLM + tools) | +| `message_timeout_secs` | `300` | Base timeout in seconds for channel message processing; runtime scales this with tool-loop depth (up to 4x) | Examples: @@ -344,6 +344,8 @@ Examples: Notes: - Default `300s` is optimized for on-device LLMs (Ollama) which are slower than cloud APIs. +- Runtime timeout budget is `message_timeout_secs * scale`, where `scale = min(max_tool_iterations, 4)` and a minimum of `1`. +- This scaling avoids false timeouts when the first LLM turn is slow/retried but later tool-loop turns still need to complete. - If using cloud APIs (OpenAI, Anthropic, etc.), you can reduce this to `60` or lower. - Values below `30` are clamped to `30` to avoid immediate timeout churn. - When a timeout occurs, users receive: `⚠️ Request timed out while waiting for the model. Please try again.` diff --git a/src/channels/mod.rs b/src/channels/mod.rs index 0379bea..b9ec121 100644 --- a/src/channels/mod.rs +++ b/src/channels/mod.rs @@ -95,6 +95,8 @@ const MIN_CHANNEL_MESSAGE_TIMEOUT_SECS: u64 = 30; /// Default timeout for processing a single channel message (LLM + tools). /// Used as fallback when not configured in channels_config.message_timeout_secs. const CHANNEL_MESSAGE_TIMEOUT_SECS: u64 = 300; +/// Cap timeout scaling so large max_tool_iterations values do not create unbounded waits. +const CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP: u64 = 4; const CHANNEL_PARALLELISM_PER_CHANNEL: usize = 4; const CHANNEL_MIN_IN_FLIGHT_MESSAGES: usize = 8; const CHANNEL_MAX_IN_FLIGHT_MESSAGES: usize = 64; @@ -114,6 +116,15 @@ fn effective_channel_message_timeout_secs(configured: u64) -> u64 { configured.max(MIN_CHANNEL_MESSAGE_TIMEOUT_SECS) } +fn channel_message_timeout_budget_secs( + message_timeout_secs: u64, + max_tool_iterations: usize, +) -> u64 { + let iterations = max_tool_iterations.max(1) as u64; + let scale = iterations.min(CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP); + message_timeout_secs.saturating_mul(scale) +} + #[derive(Debug, Clone, PartialEq, Eq)] struct ChannelRouteSelection { provider: String, @@ -1223,10 +1234,12 @@ async fn process_channel_message( Cancelled, } + let timeout_budget_secs = + channel_message_timeout_budget_secs(ctx.message_timeout_secs, ctx.max_tool_iterations); let llm_result = tokio::select! { () = cancellation_token.cancelled() => LlmExecutionResult::Cancelled, result = tokio::time::timeout( - Duration::from_secs(ctx.message_timeout_secs), + Duration::from_secs(timeout_budget_secs), run_tool_call_loop( active_provider.as_ref(), &mut history, @@ -1385,7 +1398,10 @@ async fn process_channel_message( } } LlmExecutionResult::Completed(Err(_)) => { - let timeout_msg = format!("LLM response timed out after {}s", ctx.message_timeout_secs); + let timeout_msg = format!( + "LLM response timed out after {}s (base={}s, max_tool_iterations={})", + timeout_budget_secs, ctx.message_timeout_secs, ctx.max_tool_iterations + ); eprintln!( " ❌ {} (elapsed: {}ms)", timeout_msg, @@ -2641,6 +2657,24 @@ mod tests { assert_eq!(effective_channel_message_timeout_secs(300), 300); } + #[test] + fn channel_message_timeout_budget_scales_with_tool_iterations() { + assert_eq!(channel_message_timeout_budget_secs(300, 1), 300); + assert_eq!(channel_message_timeout_budget_secs(300, 2), 600); + assert_eq!(channel_message_timeout_budget_secs(300, 3), 900); + } + + #[test] + fn channel_message_timeout_budget_uses_safe_defaults_and_cap() { + // 0 iterations falls back to 1x timeout budget. + assert_eq!(channel_message_timeout_budget_secs(300, 0), 300); + // Large iteration counts are capped to avoid runaway waits. + assert_eq!( + channel_message_timeout_budget_secs(300, 10), + 300 * CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP + ); + } + #[test] fn context_window_overflow_error_detector_matches_known_messages() { let overflow_err = anyhow::anyhow!( diff --git a/src/config/schema.rs b/src/config/schema.rs index a31c811..7167ffb 100644 --- a/src/config/schema.rs +++ b/src/config/schema.rs @@ -7,9 +7,9 @@ use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::path::{Path, PathBuf}; use std::sync::{OnceLock, RwLock}; -use tokio::fs::{self, OpenOptions}; #[cfg(unix)] use tokio::fs::File; +use tokio::fs::{self, OpenOptions}; use tokio::io::AsyncWriteExt; const SUPPORTED_PROXY_SERVICE_KEYS: &[&str] = &[ @@ -2197,7 +2197,10 @@ pub struct ChannelsConfig { pub dingtalk: Option, /// QQ Official Bot channel configuration. pub qq: Option, - /// Timeout in seconds for processing a single channel message (LLM + tools). + /// Base timeout in seconds for processing a single channel message (LLM + tools). + /// Runtime uses this as a per-turn budget that scales with tool-loop depth + /// (up to 4x, capped) so one slow/retried model call does not consume the + /// entire conversation budget. /// Default: 300s for on-device LLMs (Ollama) which are slower than cloud APIs. #[serde(default = "default_channel_message_timeout_secs")] pub message_timeout_secs: u64, @@ -3544,9 +3547,9 @@ async fn sync_directory(_path: &Path) -> Result<()> { #[cfg(test)] mod tests { use super::*; + use std::path::PathBuf; #[cfg(unix)] use std::{fs::Permissions, os::unix::fs::PermissionsExt}; - use std::path::PathBuf; use tokio::sync::{Mutex, MutexGuard}; use tokio::test; use tokio_stream::wrappers::ReadDirStream;