From 2c07fb17926d3e35fca15d4a6c844e109cd96d05 Mon Sep 17 00:00:00 2001 From: Chummy Date: Thu, 19 Feb 2026 19:34:26 +0800 Subject: [PATCH] fix: fail fast on context-window overflow and reset channel history --- src/channels/mod.rs | 84 ++++++++++++++++++++++++++++++++++----- src/providers/reliable.rs | 82 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 157 insertions(+), 9 deletions(-) diff --git a/src/channels/mod.rs b/src/channels/mod.rs index 374da9f..2f77512 100644 --- a/src/channels/mod.rs +++ b/src/channels/mod.rs @@ -254,6 +254,22 @@ fn clear_sender_history(ctx: &ChannelRuntimeContext, sender_key: &str) { .remove(sender_key); } +fn is_context_window_overflow_error(err: &anyhow::Error) -> bool { + let lower = err.to_string().to_lowercase(); + [ + "exceeds the context window", + "context window of this model", + "maximum context length", + "context length exceeded", + "too many tokens", + "token limit exceeded", + "prompt is too long", + "input is too long", + ] + .iter() + .any(|hint| lower.contains(hint)) +} + fn load_cached_model_preview(workspace_dir: &Path, provider_name: &str) -> Vec { let cache_path = workspace_dir.join("state").join(MODEL_CACHE_FILE); let Ok(raw) = std::fs::read_to_string(cache_path) else { @@ -592,7 +608,10 @@ async fn process_channel_message(ctx: Arc, msg: traits::C ); if let Some(channel) = target_channel.as_ref() { let _ = channel - .send(&SendMessage::new(message, &msg.reply_target).in_thread(msg.thread_ts.clone())) + .send( + &SendMessage::new(message, &msg.reply_target) + .in_thread(msg.thread_ts.clone()), + ) .await; } return; @@ -658,7 +677,9 @@ async fn process_channel_message(ctx: Arc, msg: traits::C let draft_message_id = if use_streaming { if let Some(channel) = target_channel.as_ref() { match channel - .send_draft(&SendMessage::new("...", &msg.reply_target).in_thread(msg.thread_ts.clone())) + .send_draft( + &SendMessage::new("...", &msg.reply_target).in_thread(msg.thread_ts.clone()), + ) .await { Ok(id) => id, @@ -769,11 +790,17 @@ async fn process_channel_message(ctx: Arc, msg: traits::C { tracing::warn!("Failed to finalize draft: {e}; sending as new message"); let _ = channel - .send(&SendMessage::new(&response, &msg.reply_target).in_thread(msg.thread_ts.clone())) + .send( + &SendMessage::new(&response, &msg.reply_target) + .in_thread(msg.thread_ts.clone()), + ) .await; } } else if let Err(e) = channel - .send(&SendMessage::new(response, &msg.reply_target).in_thread(msg.thread_ts.clone())) + .send( + &SendMessage::new(response, &msg.reply_target) + .in_thread(msg.thread_ts.clone()), + ) .await { eprintln!(" ❌ Failed to reply on {}: {e}", channel.name()); @@ -781,6 +808,30 @@ async fn process_channel_message(ctx: Arc, msg: traits::C } } Ok(Err(e)) => { + if is_context_window_overflow_error(&e) { + clear_sender_history(ctx.as_ref(), &history_key); + let error_text = "⚠️ Context window exceeded for this conversation. I cleared this sender history. Please resend your last message."; + eprintln!( + " ⚠️ Context window exceeded after {}ms; sender history cleared", + started_at.elapsed().as_millis() + ); + if let Some(channel) = target_channel.as_ref() { + if let Some(ref draft_id) = draft_message_id { + let _ = channel + .finalize_draft(&msg.reply_target, draft_id, error_text) + .await; + } else { + let _ = channel + .send( + &SendMessage::new(error_text, &msg.reply_target) + .in_thread(msg.thread_ts.clone()), + ) + .await; + } + } + return; + } + eprintln!( " ❌ LLM error after {}ms: {e}", started_at.elapsed().as_millis() @@ -792,10 +843,10 @@ async fn process_channel_message(ctx: Arc, msg: traits::C .await; } else { let _ = channel - .send(&SendMessage::new( - format!("⚠️ Error: {e}"), - &msg.reply_target, - ).in_thread(msg.thread_ts.clone())) + .send( + &SendMessage::new(format!("⚠️ Error: {e}"), &msg.reply_target) + .in_thread(msg.thread_ts.clone()), + ) .await; } } @@ -816,7 +867,10 @@ async fn process_channel_message(ctx: Arc, msg: traits::C .await; } else { let _ = channel - .send(&SendMessage::new(error_text, &msg.reply_target).in_thread(msg.thread_ts.clone())) + .send( + &SendMessage::new(error_text, &msg.reply_target) + .in_thread(msg.thread_ts.clone()), + ) .await; } } @@ -1993,6 +2047,18 @@ mod tests { assert_eq!(effective_channel_message_timeout_secs(300), 300); } + #[test] + fn context_window_overflow_error_detector_matches_known_messages() { + let overflow_err = anyhow::anyhow!( + "OpenAI Codex stream error: Your input exceeds the context window of this model." + ); + assert!(is_context_window_overflow_error(&overflow_err)); + + let other_err = + anyhow::anyhow!("OpenAI Codex API error (502 Bad Gateway): error code: 502"); + assert!(!is_context_window_overflow_error(&other_err)); + } + #[derive(Default)] struct RecordingChannel { sent_messages: tokio::sync::Mutex>, diff --git a/src/providers/reliable.rs b/src/providers/reliable.rs index 82b7d83..bafe1bc 100644 --- a/src/providers/reliable.rs +++ b/src/providers/reliable.rs @@ -8,6 +8,10 @@ use std::time::Duration; /// Check if an error is non-retryable (client errors that won't resolve with retries). fn is_non_retryable(err: &anyhow::Error) -> bool { + if is_context_window_exceeded(err) { + return true; + } + if let Some(reqwest_err) = err.downcast_ref::() { if let Some(status) = reqwest_err.status() { let code = status.as_u16(); @@ -53,6 +57,22 @@ fn is_non_retryable(err: &anyhow::Error) -> bool { || msg_lower.contains("invalid")) } +fn is_context_window_exceeded(err: &anyhow::Error) -> bool { + let lower = err.to_string().to_lowercase(); + let hints = [ + "exceeds the context window", + "context window of this model", + "maximum context length", + "context length exceeded", + "too many tokens", + "token limit exceeded", + "prompt is too long", + "input is too long", + ]; + + hints.iter().any(|hint| lower.contains(hint)) +} + /// Check if an error is a rate-limit (429) error. fn is_rate_limited(err: &anyhow::Error) -> bool { if let Some(reqwest_err) = err.downcast_ref::() { @@ -325,6 +345,14 @@ impl Provider for ReliableProvider { error = %error_detail, "Non-retryable error, moving on" ); + + if is_context_window_exceeded(&e) { + anyhow::bail!( + "Request exceeds model context window; retries and fallbacks were skipped. Attempts:\n{}", + failures.join("\n") + ); + } + break; } @@ -433,6 +461,14 @@ impl Provider for ReliableProvider { error = %error_detail, "Non-retryable error, moving on" ); + + if is_context_window_exceeded(&e) { + anyhow::bail!( + "Request exceeds model context window; retries and fallbacks were skipped. Attempts:\n{}", + failures.join("\n") + ); + } + break; } @@ -541,6 +577,14 @@ impl Provider for ReliableProvider { error = %error_detail, "Non-retryable error, moving on" ); + + if is_context_window_exceeded(&e) { + anyhow::bail!( + "Request exceeds model context window; retries and fallbacks were skipped. Attempts:\n{}", + failures.join("\n") + ); + } + break; } @@ -867,6 +911,44 @@ mod tests { assert!(!is_non_retryable(&anyhow::anyhow!( "model overloaded, try again later" ))); + assert!(is_non_retryable(&anyhow::anyhow!( + "OpenAI Codex stream error: Your input exceeds the context window of this model." + ))); + } + + #[tokio::test] + async fn context_window_error_aborts_retries_and_model_fallbacks() { + let calls = Arc::new(AtomicUsize::new(0)); + let mut model_fallbacks = std::collections::HashMap::new(); + model_fallbacks.insert( + "gpt-5.3-codex".to_string(), + vec!["gpt-5.2-codex".to_string()], + ); + + let provider = ReliableProvider::new( + vec![( + "openai-codex".into(), + Box::new(MockProvider { + calls: Arc::clone(&calls), + fail_until_attempt: usize::MAX, + response: "never", + error: "OpenAI Codex stream error: Your input exceeds the context window of this model. Please adjust your input and try again.", + }), + )], + 4, + 1, + ) + .with_model_fallbacks(model_fallbacks); + + let err = provider + .simple_chat("hello", "gpt-5.3-codex", 0.0) + .await + .expect_err("context window overflow should fail fast"); + let msg = err.to_string(); + + assert!(msg.contains("context window")); + assert!(msg.contains("skipped")); + assert_eq!(calls.load(Ordering::SeqCst), 1); } #[tokio::test]