fix: fail fast on context-window overflow and reset channel history
This commit is contained in:
parent
aa176ef881
commit
2c07fb1792
2 changed files with 157 additions and 9 deletions
|
|
@ -254,6 +254,22 @@ fn clear_sender_history(ctx: &ChannelRuntimeContext, sender_key: &str) {
|
||||||
.remove(sender_key);
|
.remove(sender_key);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn is_context_window_overflow_error(err: &anyhow::Error) -> bool {
|
||||||
|
let lower = err.to_string().to_lowercase();
|
||||||
|
[
|
||||||
|
"exceeds the context window",
|
||||||
|
"context window of this model",
|
||||||
|
"maximum context length",
|
||||||
|
"context length exceeded",
|
||||||
|
"too many tokens",
|
||||||
|
"token limit exceeded",
|
||||||
|
"prompt is too long",
|
||||||
|
"input is too long",
|
||||||
|
]
|
||||||
|
.iter()
|
||||||
|
.any(|hint| lower.contains(hint))
|
||||||
|
}
|
||||||
|
|
||||||
fn load_cached_model_preview(workspace_dir: &Path, provider_name: &str) -> Vec<String> {
|
fn load_cached_model_preview(workspace_dir: &Path, provider_name: &str) -> Vec<String> {
|
||||||
let cache_path = workspace_dir.join("state").join(MODEL_CACHE_FILE);
|
let cache_path = workspace_dir.join("state").join(MODEL_CACHE_FILE);
|
||||||
let Ok(raw) = std::fs::read_to_string(cache_path) else {
|
let Ok(raw) = std::fs::read_to_string(cache_path) else {
|
||||||
|
|
@ -592,7 +608,10 @@ async fn process_channel_message(ctx: Arc<ChannelRuntimeContext>, msg: traits::C
|
||||||
);
|
);
|
||||||
if let Some(channel) = target_channel.as_ref() {
|
if let Some(channel) = target_channel.as_ref() {
|
||||||
let _ = channel
|
let _ = channel
|
||||||
.send(&SendMessage::new(message, &msg.reply_target).in_thread(msg.thread_ts.clone()))
|
.send(
|
||||||
|
&SendMessage::new(message, &msg.reply_target)
|
||||||
|
.in_thread(msg.thread_ts.clone()),
|
||||||
|
)
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
|
|
@ -658,7 +677,9 @@ async fn process_channel_message(ctx: Arc<ChannelRuntimeContext>, msg: traits::C
|
||||||
let draft_message_id = if use_streaming {
|
let draft_message_id = if use_streaming {
|
||||||
if let Some(channel) = target_channel.as_ref() {
|
if let Some(channel) = target_channel.as_ref() {
|
||||||
match channel
|
match channel
|
||||||
.send_draft(&SendMessage::new("...", &msg.reply_target).in_thread(msg.thread_ts.clone()))
|
.send_draft(
|
||||||
|
&SendMessage::new("...", &msg.reply_target).in_thread(msg.thread_ts.clone()),
|
||||||
|
)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
Ok(id) => id,
|
Ok(id) => id,
|
||||||
|
|
@ -769,11 +790,17 @@ async fn process_channel_message(ctx: Arc<ChannelRuntimeContext>, msg: traits::C
|
||||||
{
|
{
|
||||||
tracing::warn!("Failed to finalize draft: {e}; sending as new message");
|
tracing::warn!("Failed to finalize draft: {e}; sending as new message");
|
||||||
let _ = channel
|
let _ = channel
|
||||||
.send(&SendMessage::new(&response, &msg.reply_target).in_thread(msg.thread_ts.clone()))
|
.send(
|
||||||
|
&SendMessage::new(&response, &msg.reply_target)
|
||||||
|
.in_thread(msg.thread_ts.clone()),
|
||||||
|
)
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
} else if let Err(e) = channel
|
} else if let Err(e) = channel
|
||||||
.send(&SendMessage::new(response, &msg.reply_target).in_thread(msg.thread_ts.clone()))
|
.send(
|
||||||
|
&SendMessage::new(response, &msg.reply_target)
|
||||||
|
.in_thread(msg.thread_ts.clone()),
|
||||||
|
)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
eprintln!(" ❌ Failed to reply on {}: {e}", channel.name());
|
eprintln!(" ❌ Failed to reply on {}: {e}", channel.name());
|
||||||
|
|
@ -781,6 +808,30 @@ async fn process_channel_message(ctx: Arc<ChannelRuntimeContext>, msg: traits::C
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(Err(e)) => {
|
Ok(Err(e)) => {
|
||||||
|
if is_context_window_overflow_error(&e) {
|
||||||
|
clear_sender_history(ctx.as_ref(), &history_key);
|
||||||
|
let error_text = "⚠️ Context window exceeded for this conversation. I cleared this sender history. Please resend your last message.";
|
||||||
|
eprintln!(
|
||||||
|
" ⚠️ Context window exceeded after {}ms; sender history cleared",
|
||||||
|
started_at.elapsed().as_millis()
|
||||||
|
);
|
||||||
|
if let Some(channel) = target_channel.as_ref() {
|
||||||
|
if let Some(ref draft_id) = draft_message_id {
|
||||||
|
let _ = channel
|
||||||
|
.finalize_draft(&msg.reply_target, draft_id, error_text)
|
||||||
|
.await;
|
||||||
|
} else {
|
||||||
|
let _ = channel
|
||||||
|
.send(
|
||||||
|
&SendMessage::new(error_text, &msg.reply_target)
|
||||||
|
.in_thread(msg.thread_ts.clone()),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
eprintln!(
|
eprintln!(
|
||||||
" ❌ LLM error after {}ms: {e}",
|
" ❌ LLM error after {}ms: {e}",
|
||||||
started_at.elapsed().as_millis()
|
started_at.elapsed().as_millis()
|
||||||
|
|
@ -792,10 +843,10 @@ async fn process_channel_message(ctx: Arc<ChannelRuntimeContext>, msg: traits::C
|
||||||
.await;
|
.await;
|
||||||
} else {
|
} else {
|
||||||
let _ = channel
|
let _ = channel
|
||||||
.send(&SendMessage::new(
|
.send(
|
||||||
format!("⚠️ Error: {e}"),
|
&SendMessage::new(format!("⚠️ Error: {e}"), &msg.reply_target)
|
||||||
&msg.reply_target,
|
.in_thread(msg.thread_ts.clone()),
|
||||||
).in_thread(msg.thread_ts.clone()))
|
)
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -816,7 +867,10 @@ async fn process_channel_message(ctx: Arc<ChannelRuntimeContext>, msg: traits::C
|
||||||
.await;
|
.await;
|
||||||
} else {
|
} else {
|
||||||
let _ = channel
|
let _ = channel
|
||||||
.send(&SendMessage::new(error_text, &msg.reply_target).in_thread(msg.thread_ts.clone()))
|
.send(
|
||||||
|
&SendMessage::new(error_text, &msg.reply_target)
|
||||||
|
.in_thread(msg.thread_ts.clone()),
|
||||||
|
)
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1993,6 +2047,18 @@ mod tests {
|
||||||
assert_eq!(effective_channel_message_timeout_secs(300), 300);
|
assert_eq!(effective_channel_message_timeout_secs(300), 300);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn context_window_overflow_error_detector_matches_known_messages() {
|
||||||
|
let overflow_err = anyhow::anyhow!(
|
||||||
|
"OpenAI Codex stream error: Your input exceeds the context window of this model."
|
||||||
|
);
|
||||||
|
assert!(is_context_window_overflow_error(&overflow_err));
|
||||||
|
|
||||||
|
let other_err =
|
||||||
|
anyhow::anyhow!("OpenAI Codex API error (502 Bad Gateway): error code: 502");
|
||||||
|
assert!(!is_context_window_overflow_error(&other_err));
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
struct RecordingChannel {
|
struct RecordingChannel {
|
||||||
sent_messages: tokio::sync::Mutex<Vec<String>>,
|
sent_messages: tokio::sync::Mutex<Vec<String>>,
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,10 @@ use std::time::Duration;
|
||||||
|
|
||||||
/// Check if an error is non-retryable (client errors that won't resolve with retries).
|
/// Check if an error is non-retryable (client errors that won't resolve with retries).
|
||||||
fn is_non_retryable(err: &anyhow::Error) -> bool {
|
fn is_non_retryable(err: &anyhow::Error) -> bool {
|
||||||
|
if is_context_window_exceeded(err) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
if let Some(reqwest_err) = err.downcast_ref::<reqwest::Error>() {
|
if let Some(reqwest_err) = err.downcast_ref::<reqwest::Error>() {
|
||||||
if let Some(status) = reqwest_err.status() {
|
if let Some(status) = reqwest_err.status() {
|
||||||
let code = status.as_u16();
|
let code = status.as_u16();
|
||||||
|
|
@ -53,6 +57,22 @@ fn is_non_retryable(err: &anyhow::Error) -> bool {
|
||||||
|| msg_lower.contains("invalid"))
|
|| msg_lower.contains("invalid"))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn is_context_window_exceeded(err: &anyhow::Error) -> bool {
|
||||||
|
let lower = err.to_string().to_lowercase();
|
||||||
|
let hints = [
|
||||||
|
"exceeds the context window",
|
||||||
|
"context window of this model",
|
||||||
|
"maximum context length",
|
||||||
|
"context length exceeded",
|
||||||
|
"too many tokens",
|
||||||
|
"token limit exceeded",
|
||||||
|
"prompt is too long",
|
||||||
|
"input is too long",
|
||||||
|
];
|
||||||
|
|
||||||
|
hints.iter().any(|hint| lower.contains(hint))
|
||||||
|
}
|
||||||
|
|
||||||
/// Check if an error is a rate-limit (429) error.
|
/// Check if an error is a rate-limit (429) error.
|
||||||
fn is_rate_limited(err: &anyhow::Error) -> bool {
|
fn is_rate_limited(err: &anyhow::Error) -> bool {
|
||||||
if let Some(reqwest_err) = err.downcast_ref::<reqwest::Error>() {
|
if let Some(reqwest_err) = err.downcast_ref::<reqwest::Error>() {
|
||||||
|
|
@ -325,6 +345,14 @@ impl Provider for ReliableProvider {
|
||||||
error = %error_detail,
|
error = %error_detail,
|
||||||
"Non-retryable error, moving on"
|
"Non-retryable error, moving on"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
if is_context_window_exceeded(&e) {
|
||||||
|
anyhow::bail!(
|
||||||
|
"Request exceeds model context window; retries and fallbacks were skipped. Attempts:\n{}",
|
||||||
|
failures.join("\n")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -433,6 +461,14 @@ impl Provider for ReliableProvider {
|
||||||
error = %error_detail,
|
error = %error_detail,
|
||||||
"Non-retryable error, moving on"
|
"Non-retryable error, moving on"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
if is_context_window_exceeded(&e) {
|
||||||
|
anyhow::bail!(
|
||||||
|
"Request exceeds model context window; retries and fallbacks were skipped. Attempts:\n{}",
|
||||||
|
failures.join("\n")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -541,6 +577,14 @@ impl Provider for ReliableProvider {
|
||||||
error = %error_detail,
|
error = %error_detail,
|
||||||
"Non-retryable error, moving on"
|
"Non-retryable error, moving on"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
if is_context_window_exceeded(&e) {
|
||||||
|
anyhow::bail!(
|
||||||
|
"Request exceeds model context window; retries and fallbacks were skipped. Attempts:\n{}",
|
||||||
|
failures.join("\n")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -867,6 +911,44 @@ mod tests {
|
||||||
assert!(!is_non_retryable(&anyhow::anyhow!(
|
assert!(!is_non_retryable(&anyhow::anyhow!(
|
||||||
"model overloaded, try again later"
|
"model overloaded, try again later"
|
||||||
)));
|
)));
|
||||||
|
assert!(is_non_retryable(&anyhow::anyhow!(
|
||||||
|
"OpenAI Codex stream error: Your input exceeds the context window of this model."
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn context_window_error_aborts_retries_and_model_fallbacks() {
|
||||||
|
let calls = Arc::new(AtomicUsize::new(0));
|
||||||
|
let mut model_fallbacks = std::collections::HashMap::new();
|
||||||
|
model_fallbacks.insert(
|
||||||
|
"gpt-5.3-codex".to_string(),
|
||||||
|
vec!["gpt-5.2-codex".to_string()],
|
||||||
|
);
|
||||||
|
|
||||||
|
let provider = ReliableProvider::new(
|
||||||
|
vec![(
|
||||||
|
"openai-codex".into(),
|
||||||
|
Box::new(MockProvider {
|
||||||
|
calls: Arc::clone(&calls),
|
||||||
|
fail_until_attempt: usize::MAX,
|
||||||
|
response: "never",
|
||||||
|
error: "OpenAI Codex stream error: Your input exceeds the context window of this model. Please adjust your input and try again.",
|
||||||
|
}),
|
||||||
|
)],
|
||||||
|
4,
|
||||||
|
1,
|
||||||
|
)
|
||||||
|
.with_model_fallbacks(model_fallbacks);
|
||||||
|
|
||||||
|
let err = provider
|
||||||
|
.simple_chat("hello", "gpt-5.3-codex", 0.0)
|
||||||
|
.await
|
||||||
|
.expect_err("context window overflow should fail fast");
|
||||||
|
let msg = err.to_string();
|
||||||
|
|
||||||
|
assert!(msg.contains("context window"));
|
||||||
|
assert!(msg.contains("skipped"));
|
||||||
|
assert_eq!(calls.load(Ordering::SeqCst), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue