fix(channel): prevent false timeout during multi-turn tool loops (#1037)
This commit is contained in:
parent
178bb108da
commit
f274fd5757
3 changed files with 45 additions and 6 deletions
|
|
@ -332,7 +332,7 @@ Top-level channel options are configured under `channels_config`.
|
||||||
|
|
||||||
| Key | Default | Purpose |
|
| Key | Default | Purpose |
|
||||||
|---|---|---|
|
|---|---|---|
|
||||||
| `message_timeout_secs` | `300` | Timeout in seconds for processing a single channel message (LLM + tools) |
|
| `message_timeout_secs` | `300` | Base timeout in seconds for channel message processing; runtime scales this with tool-loop depth (up to 4x) |
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
|
|
||||||
|
|
@ -344,6 +344,8 @@ Examples:
|
||||||
Notes:
|
Notes:
|
||||||
|
|
||||||
- Default `300s` is optimized for on-device LLMs (Ollama) which are slower than cloud APIs.
|
- Default `300s` is optimized for on-device LLMs (Ollama) which are slower than cloud APIs.
|
||||||
|
- Runtime timeout budget is `message_timeout_secs * scale`, where `scale = min(max_tool_iterations, 4)` and a minimum of `1`.
|
||||||
|
- This scaling avoids false timeouts when the first LLM turn is slow/retried but later tool-loop turns still need to complete.
|
||||||
- If using cloud APIs (OpenAI, Anthropic, etc.), you can reduce this to `60` or lower.
|
- If using cloud APIs (OpenAI, Anthropic, etc.), you can reduce this to `60` or lower.
|
||||||
- Values below `30` are clamped to `30` to avoid immediate timeout churn.
|
- Values below `30` are clamped to `30` to avoid immediate timeout churn.
|
||||||
- When a timeout occurs, users receive: `⚠️ Request timed out while waiting for the model. Please try again.`
|
- When a timeout occurs, users receive: `⚠️ Request timed out while waiting for the model. Please try again.`
|
||||||
|
|
|
||||||
|
|
@ -95,6 +95,8 @@ const MIN_CHANNEL_MESSAGE_TIMEOUT_SECS: u64 = 30;
|
||||||
/// Default timeout for processing a single channel message (LLM + tools).
|
/// Default timeout for processing a single channel message (LLM + tools).
|
||||||
/// Used as fallback when not configured in channels_config.message_timeout_secs.
|
/// Used as fallback when not configured in channels_config.message_timeout_secs.
|
||||||
const CHANNEL_MESSAGE_TIMEOUT_SECS: u64 = 300;
|
const CHANNEL_MESSAGE_TIMEOUT_SECS: u64 = 300;
|
||||||
|
/// Cap timeout scaling so large max_tool_iterations values do not create unbounded waits.
|
||||||
|
const CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP: u64 = 4;
|
||||||
const CHANNEL_PARALLELISM_PER_CHANNEL: usize = 4;
|
const CHANNEL_PARALLELISM_PER_CHANNEL: usize = 4;
|
||||||
const CHANNEL_MIN_IN_FLIGHT_MESSAGES: usize = 8;
|
const CHANNEL_MIN_IN_FLIGHT_MESSAGES: usize = 8;
|
||||||
const CHANNEL_MAX_IN_FLIGHT_MESSAGES: usize = 64;
|
const CHANNEL_MAX_IN_FLIGHT_MESSAGES: usize = 64;
|
||||||
|
|
@ -114,6 +116,15 @@ fn effective_channel_message_timeout_secs(configured: u64) -> u64 {
|
||||||
configured.max(MIN_CHANNEL_MESSAGE_TIMEOUT_SECS)
|
configured.max(MIN_CHANNEL_MESSAGE_TIMEOUT_SECS)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn channel_message_timeout_budget_secs(
|
||||||
|
message_timeout_secs: u64,
|
||||||
|
max_tool_iterations: usize,
|
||||||
|
) -> u64 {
|
||||||
|
let iterations = max_tool_iterations.max(1) as u64;
|
||||||
|
let scale = iterations.min(CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP);
|
||||||
|
message_timeout_secs.saturating_mul(scale)
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
struct ChannelRouteSelection {
|
struct ChannelRouteSelection {
|
||||||
provider: String,
|
provider: String,
|
||||||
|
|
@ -1223,10 +1234,12 @@ async fn process_channel_message(
|
||||||
Cancelled,
|
Cancelled,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let timeout_budget_secs =
|
||||||
|
channel_message_timeout_budget_secs(ctx.message_timeout_secs, ctx.max_tool_iterations);
|
||||||
let llm_result = tokio::select! {
|
let llm_result = tokio::select! {
|
||||||
() = cancellation_token.cancelled() => LlmExecutionResult::Cancelled,
|
() = cancellation_token.cancelled() => LlmExecutionResult::Cancelled,
|
||||||
result = tokio::time::timeout(
|
result = tokio::time::timeout(
|
||||||
Duration::from_secs(ctx.message_timeout_secs),
|
Duration::from_secs(timeout_budget_secs),
|
||||||
run_tool_call_loop(
|
run_tool_call_loop(
|
||||||
active_provider.as_ref(),
|
active_provider.as_ref(),
|
||||||
&mut history,
|
&mut history,
|
||||||
|
|
@ -1385,7 +1398,10 @@ async fn process_channel_message(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
LlmExecutionResult::Completed(Err(_)) => {
|
LlmExecutionResult::Completed(Err(_)) => {
|
||||||
let timeout_msg = format!("LLM response timed out after {}s", ctx.message_timeout_secs);
|
let timeout_msg = format!(
|
||||||
|
"LLM response timed out after {}s (base={}s, max_tool_iterations={})",
|
||||||
|
timeout_budget_secs, ctx.message_timeout_secs, ctx.max_tool_iterations
|
||||||
|
);
|
||||||
eprintln!(
|
eprintln!(
|
||||||
" ❌ {} (elapsed: {}ms)",
|
" ❌ {} (elapsed: {}ms)",
|
||||||
timeout_msg,
|
timeout_msg,
|
||||||
|
|
@ -2641,6 +2657,24 @@ mod tests {
|
||||||
assert_eq!(effective_channel_message_timeout_secs(300), 300);
|
assert_eq!(effective_channel_message_timeout_secs(300), 300);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn channel_message_timeout_budget_scales_with_tool_iterations() {
|
||||||
|
assert_eq!(channel_message_timeout_budget_secs(300, 1), 300);
|
||||||
|
assert_eq!(channel_message_timeout_budget_secs(300, 2), 600);
|
||||||
|
assert_eq!(channel_message_timeout_budget_secs(300, 3), 900);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn channel_message_timeout_budget_uses_safe_defaults_and_cap() {
|
||||||
|
// 0 iterations falls back to 1x timeout budget.
|
||||||
|
assert_eq!(channel_message_timeout_budget_secs(300, 0), 300);
|
||||||
|
// Large iteration counts are capped to avoid runaway waits.
|
||||||
|
assert_eq!(
|
||||||
|
channel_message_timeout_budget_secs(300, 10),
|
||||||
|
300 * CHANNEL_MESSAGE_TIMEOUT_SCALE_CAP
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn context_window_overflow_error_detector_matches_known_messages() {
|
fn context_window_overflow_error_detector_matches_known_messages() {
|
||||||
let overflow_err = anyhow::anyhow!(
|
let overflow_err = anyhow::anyhow!(
|
||||||
|
|
|
||||||
|
|
@ -7,9 +7,9 @@ use serde::{Deserialize, Serialize};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::{OnceLock, RwLock};
|
use std::sync::{OnceLock, RwLock};
|
||||||
use tokio::fs::{self, OpenOptions};
|
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
use tokio::fs::File;
|
use tokio::fs::File;
|
||||||
|
use tokio::fs::{self, OpenOptions};
|
||||||
use tokio::io::AsyncWriteExt;
|
use tokio::io::AsyncWriteExt;
|
||||||
|
|
||||||
const SUPPORTED_PROXY_SERVICE_KEYS: &[&str] = &[
|
const SUPPORTED_PROXY_SERVICE_KEYS: &[&str] = &[
|
||||||
|
|
@ -2197,7 +2197,10 @@ pub struct ChannelsConfig {
|
||||||
pub dingtalk: Option<DingTalkConfig>,
|
pub dingtalk: Option<DingTalkConfig>,
|
||||||
/// QQ Official Bot channel configuration.
|
/// QQ Official Bot channel configuration.
|
||||||
pub qq: Option<QQConfig>,
|
pub qq: Option<QQConfig>,
|
||||||
/// Timeout in seconds for processing a single channel message (LLM + tools).
|
/// Base timeout in seconds for processing a single channel message (LLM + tools).
|
||||||
|
/// Runtime uses this as a per-turn budget that scales with tool-loop depth
|
||||||
|
/// (up to 4x, capped) so one slow/retried model call does not consume the
|
||||||
|
/// entire conversation budget.
|
||||||
/// Default: 300s for on-device LLMs (Ollama) which are slower than cloud APIs.
|
/// Default: 300s for on-device LLMs (Ollama) which are slower than cloud APIs.
|
||||||
#[serde(default = "default_channel_message_timeout_secs")]
|
#[serde(default = "default_channel_message_timeout_secs")]
|
||||||
pub message_timeout_secs: u64,
|
pub message_timeout_secs: u64,
|
||||||
|
|
@ -3544,9 +3547,9 @@ async fn sync_directory(_path: &Path) -> Result<()> {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use std::path::PathBuf;
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
use std::{fs::Permissions, os::unix::fs::PermissionsExt};
|
use std::{fs::Permissions, os::unix::fs::PermissionsExt};
|
||||||
use std::path::PathBuf;
|
|
||||||
use tokio::sync::{Mutex, MutexGuard};
|
use tokio::sync::{Mutex, MutexGuard};
|
||||||
use tokio::test;
|
use tokio::test;
|
||||||
use tokio_stream::wrappers::ReadDirStream;
|
use tokio_stream::wrappers::ReadDirStream;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue