fix(channel): prevent false timeout during multi-turn tool loops (#1037)

This commit is contained in:
Chummy 2026-02-20 12:28:05 +08:00 committed by GitHub
parent 178bb108da
commit f274fd5757
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 45 additions and 6 deletions

View file

@ -95,6 +95,8 @@ const MIN_CHANNEL_MESSAGE_TIMEOUT_SECS: u64 = 30;
/// Default timeout for processing a single channel message (LLM + tools).
/// Used as fallback when not configured in channels_config.message_timeout_secs.
const CHANNEL_MESSAGE_TIMEOUT_SECS: u64 = 300;
/// Cap timeout scaling so large max_tool_iterations values do not create unbounded waits.
const CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP: u64 = 4;
const CHANNEL_PARALLELISM_PER_CHANNEL: usize = 4;
const CHANNEL_MIN_IN_FLIGHT_MESSAGES: usize = 8;
const CHANNEL_MAX_IN_FLIGHT_MESSAGES: usize = 64;
@ -114,6 +116,15 @@ fn effective_channel_message_timeout_secs(configured: u64) -> u64 {
configured.max(MIN_CHANNEL_MESSAGE_TIMEOUT_SECS)
}
fn channel_message_timeout_budget_secs(
message_timeout_secs: u64,
max_tool_iterations: usize,
) -> u64 {
let iterations = max_tool_iterations.max(1) as u64;
let scale = iterations.min(CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP);
message_timeout_secs.saturating_mul(scale)
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct ChannelRouteSelection {
provider: String,
@ -1223,10 +1234,12 @@ async fn process_channel_message(
Cancelled,
}
let timeout_budget_secs =
channel_message_timeout_budget_secs(ctx.message_timeout_secs, ctx.max_tool_iterations);
let llm_result = tokio::select! {
() = cancellation_token.cancelled() => LlmExecutionResult::Cancelled,
result = tokio::time::timeout(
Duration::from_secs(ctx.message_timeout_secs),
Duration::from_secs(timeout_budget_secs),
run_tool_call_loop(
active_provider.as_ref(),
&mut history,
@ -1385,7 +1398,10 @@ async fn process_channel_message(
}
}
LlmExecutionResult::Completed(Err(_)) => {
let timeout_msg = format!("LLM response timed out after {}s", ctx.message_timeout_secs);
let timeout_msg = format!(
"LLM response timed out after {}s (base={}s, max_tool_iterations={})",
timeout_budget_secs, ctx.message_timeout_secs, ctx.max_tool_iterations
);
eprintln!(
" ❌ {} (elapsed: {}ms)",
timeout_msg,
@ -2641,6 +2657,24 @@ mod tests {
assert_eq!(effective_channel_message_timeout_secs(300), 300);
}
#[test]
fn channel_message_timeout_budget_scales_with_tool_iterations() {
assert_eq!(channel_message_timeout_budget_secs(300, 1), 300);
assert_eq!(channel_message_timeout_budget_secs(300, 2), 600);
assert_eq!(channel_message_timeout_budget_secs(300, 3), 900);
}
#[test]
fn channel_message_timeout_budget_uses_safe_defaults_and_cap() {
// 0 iterations falls back to 1x timeout budget.
assert_eq!(channel_message_timeout_budget_secs(300, 0), 300);
// Large iteration counts are capped to avoid runaway waits.
assert_eq!(
channel_message_timeout_budget_secs(300, 10),
300 * CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP
);
}
#[test]
fn context_window_overflow_error_detector_matches_known_messages() {
let overflow_err = anyhow::anyhow!(

View file

@ -7,9 +7,9 @@ use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::{OnceLock, RwLock};
use tokio::fs::{self, OpenOptions};
#[cfg(unix)]
use tokio::fs::File;
use tokio::fs::{self, OpenOptions};
use tokio::io::AsyncWriteExt;
const SUPPORTED_PROXY_SERVICE_KEYS: &[&str] = &[
@ -2197,7 +2197,10 @@ pub struct ChannelsConfig {
pub dingtalk: Option<DingTalkConfig>,
/// QQ Official Bot channel configuration.
pub qq: Option<QQConfig>,
/// Timeout in seconds for processing a single channel message (LLM + tools).
/// Base timeout in seconds for processing a single channel message (LLM + tools).
/// Runtime uses this as a per-turn budget that scales with tool-loop depth
/// (up to 4x, capped) so one slow/retried model call does not consume the
/// entire conversation budget.
/// Default: 300s for on-device LLMs (Ollama) which are slower than cloud APIs.
#[serde(default = "default_channel_message_timeout_secs")]
pub message_timeout_secs: u64,
@ -3544,9 +3547,9 @@ async fn sync_directory(_path: &Path) -> Result<()> {
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
#[cfg(unix)]
use std::{fs::Permissions, os::unix::fs::PermissionsExt};
use std::path::PathBuf;
use tokio::sync::{Mutex, MutexGuard};
use tokio::test;
use tokio_stream::wrappers::ReadDirStream;