fix(channel): prevent false timeout during multi-turn tool loops (#1037)
This commit is contained in:
parent
178bb108da
commit
f274fd5757
3 changed files with 45 additions and 6 deletions
|
|
@ -95,6 +95,8 @@ const MIN_CHANNEL_MESSAGE_TIMEOUT_SECS: u64 = 30;
|
|||
/// Default timeout for processing a single channel message (LLM + tools).
|
||||
/// Used as fallback when not configured in channels_config.message_timeout_secs.
|
||||
const CHANNEL_MESSAGE_TIMEOUT_SECS: u64 = 300;
|
||||
/// Cap timeout scaling so large max_tool_iterations values do not create unbounded waits.
|
||||
const CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP: u64 = 4;
|
||||
const CHANNEL_PARALLELISM_PER_CHANNEL: usize = 4;
|
||||
const CHANNEL_MIN_IN_FLIGHT_MESSAGES: usize = 8;
|
||||
const CHANNEL_MAX_IN_FLIGHT_MESSAGES: usize = 64;
|
||||
|
|
@ -114,6 +116,15 @@ fn effective_channel_message_timeout_secs(configured: u64) -> u64 {
|
|||
configured.max(MIN_CHANNEL_MESSAGE_TIMEOUT_SECS)
|
||||
}
|
||||
|
||||
fn channel_message_timeout_budget_secs(
|
||||
message_timeout_secs: u64,
|
||||
max_tool_iterations: usize,
|
||||
) -> u64 {
|
||||
let iterations = max_tool_iterations.max(1) as u64;
|
||||
let scale = iterations.min(CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP);
|
||||
message_timeout_secs.saturating_mul(scale)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
struct ChannelRouteSelection {
|
||||
provider: String,
|
||||
|
|
@ -1223,10 +1234,12 @@ async fn process_channel_message(
|
|||
Cancelled,
|
||||
}
|
||||
|
||||
let timeout_budget_secs =
|
||||
channel_message_timeout_budget_secs(ctx.message_timeout_secs, ctx.max_tool_iterations);
|
||||
let llm_result = tokio::select! {
|
||||
() = cancellation_token.cancelled() => LlmExecutionResult::Cancelled,
|
||||
result = tokio::time::timeout(
|
||||
Duration::from_secs(ctx.message_timeout_secs),
|
||||
Duration::from_secs(timeout_budget_secs),
|
||||
run_tool_call_loop(
|
||||
active_provider.as_ref(),
|
||||
&mut history,
|
||||
|
|
@ -1385,7 +1398,10 @@ async fn process_channel_message(
|
|||
}
|
||||
}
|
||||
LlmExecutionResult::Completed(Err(_)) => {
|
||||
let timeout_msg = format!("LLM response timed out after {}s", ctx.message_timeout_secs);
|
||||
let timeout_msg = format!(
|
||||
"LLM response timed out after {}s (base={}s, max_tool_iterations={})",
|
||||
timeout_budget_secs, ctx.message_timeout_secs, ctx.max_tool_iterations
|
||||
);
|
||||
eprintln!(
|
||||
" ❌ {} (elapsed: {}ms)",
|
||||
timeout_msg,
|
||||
|
|
@ -2641,6 +2657,24 @@ mod tests {
|
|||
assert_eq!(effective_channel_message_timeout_secs(300), 300);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn channel_message_timeout_budget_scales_with_tool_iterations() {
|
||||
assert_eq!(channel_message_timeout_budget_secs(300, 1), 300);
|
||||
assert_eq!(channel_message_timeout_budget_secs(300, 2), 600);
|
||||
assert_eq!(channel_message_timeout_budget_secs(300, 3), 900);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn channel_message_timeout_budget_uses_safe_defaults_and_cap() {
|
||||
// 0 iterations falls back to 1x timeout budget.
|
||||
assert_eq!(channel_message_timeout_budget_secs(300, 0), 300);
|
||||
// Large iteration counts are capped to avoid runaway waits.
|
||||
assert_eq!(
|
||||
channel_message_timeout_budget_secs(300, 10),
|
||||
300 * CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn context_window_overflow_error_detector_matches_known_messages() {
|
||||
let overflow_err = anyhow::anyhow!(
|
||||
|
|
|
|||
|
|
@ -7,9 +7,9 @@ use serde::{Deserialize, Serialize};
|
|||
use std::collections::HashMap;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::{OnceLock, RwLock};
|
||||
use tokio::fs::{self, OpenOptions};
|
||||
#[cfg(unix)]
|
||||
use tokio::fs::File;
|
||||
use tokio::fs::{self, OpenOptions};
|
||||
use tokio::io::AsyncWriteExt;
|
||||
|
||||
const SUPPORTED_PROXY_SERVICE_KEYS: &[&str] = &[
|
||||
|
|
@ -2197,7 +2197,10 @@ pub struct ChannelsConfig {
|
|||
pub dingtalk: Option<DingTalkConfig>,
|
||||
/// QQ Official Bot channel configuration.
|
||||
pub qq: Option<QQConfig>,
|
||||
/// Timeout in seconds for processing a single channel message (LLM + tools).
|
||||
/// Base timeout in seconds for processing a single channel message (LLM + tools).
|
||||
/// Runtime uses this as a per-turn budget that scales with tool-loop depth
|
||||
/// (up to 4x, capped) so one slow/retried model call does not consume the
|
||||
/// entire conversation budget.
|
||||
/// Default: 300s for on-device LLMs (Ollama) which are slower than cloud APIs.
|
||||
#[serde(default = "default_channel_message_timeout_secs")]
|
||||
pub message_timeout_secs: u64,
|
||||
|
|
@ -3544,9 +3547,9 @@ async fn sync_directory(_path: &Path) -> Result<()> {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::PathBuf;
|
||||
#[cfg(unix)]
|
||||
use std::{fs::Permissions, os::unix::fs::PermissionsExt};
|
||||
use std::path::PathBuf;
|
||||
use tokio::sync::{Mutex, MutexGuard};
|
||||
use tokio::test;
|
||||
use tokio_stream::wrappers::ReadDirStream;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue