fix(providers): harden tool fallback and refresh model catalogs

2026-02-18 22:36:39 +08:00 · 2026-02-18 22:36:39 +08:00 · b4b379e3e7
commit b4b379e3e7
parent 43494f8331
9 changed files with 1111 additions and 367 deletions
--- a/src/providers/reliable.rs
+++ b/src/providers/reliable.rs
@ -67,6 +67,52 @@ fn is_rate_limited(err: &anyhow::Error) -> bool {
        && (msg.contains("Too Many") || msg.contains("rate") || msg.contains("limit"))
 }

+/// Check if a 429 is a business/quota-plan error that retries cannot fix.
+///
+/// Examples:
+/// - plan does not include requested model
+/// - insufficient balance / package not active
+/// - known provider business codes (e.g. Z.AI: 1311, 1113)
+fn is_non_retryable_rate_limit(err: &anyhow::Error) -> bool {
+    if !is_rate_limited(err) {
+        return false;
+    }
+
+    let msg = err.to_string();
+    let lower = msg.to_lowercase();
+
+    let business_hints = [
+        "plan does not include",
+        "doesn't include",
+        "not include",
+        "insufficient balance",
+        "insufficient_balance",
+        "insufficient quota",
+        "insufficient_quota",
+        "quota exhausted",
+        "out of credits",
+        "no available package",
+        "package not active",
+        "purchase package",
+        "model not available for your plan",
+    ];
+
+    if business_hints.iter().any(|hint| lower.contains(hint)) {
+        return true;
+    }
+
+    // Known provider business codes observed for 429 where retry is futile.
+    for token in lower.split(|c: char| !c.is_ascii_digit()) {
+        if let Ok(code) = token.parse::<u16>() {
+            if matches!(code, 1113 | 1311) {
+                return true;
+            }
+        }
+    }
+
+    false
+}
+
 /// Try to extract a Retry-After value (in milliseconds) from an error message.
 /// Looks for patterns like `Retry-After: 5` or `retry_after: 2.5` in the error string.
 fn parse_retry_after_ms(err: &anyhow::Error) -> Option<u64> {
@ -101,7 +147,9 @@ fn parse_retry_after_ms(err: &anyhow::Error) -> Option<u64> {
 }

 fn failure_reason(rate_limited: bool, non_retryable: bool) -> &'static str {
-    if rate_limited {
+    if rate_limited && non_retryable {
+        "rate_limited_non_retryable"
+    } else if rate_limited {
        "rate_limited"
    } else if non_retryable {
        "non_retryable"
@ -244,7 +292,8 @@ impl Provider for ReliableProvider {
                            return Ok(resp);
                        }
                        Err(e) => {
-                            let non_retryable = is_non_retryable(&e);
+                            let non_retryable_rate_limit = is_non_retryable_rate_limit(&e);
+                            let non_retryable = is_non_retryable(&e) || non_retryable_rate_limit;
                            let rate_limited = is_rate_limited(&e);
                            let failure_reason = failure_reason(rate_limited, non_retryable);
                            let error_detail = compact_error_detail(&e);
@ -260,7 +309,7 @@ impl Provider for ReliableProvider {
                            );

                            // On rate-limit, try rotating API key
-                            if rate_limited {
+                            if rate_limited && !non_retryable_rate_limit {
                                if let Some(new_key) = self.rotate_key() {
                                    tracing::info!(
                                        provider = provider_name,
@ -352,7 +401,8 @@ impl Provider for ReliableProvider {
                            return Ok(resp);
                        }
                        Err(e) => {
-                            let non_retryable = is_non_retryable(&e);
+                            let non_retryable_rate_limit = is_non_retryable_rate_limit(&e);
+                            let non_retryable = is_non_retryable(&e) || non_retryable_rate_limit;
                            let rate_limited = is_rate_limited(&e);
                            let failure_reason = failure_reason(rate_limited, non_retryable);
                            let error_detail = compact_error_detail(&e);
@ -367,7 +417,7 @@ impl Provider for ReliableProvider {
                                &error_detail,
                            );

-                            if rate_limited {
+                            if rate_limited && !non_retryable_rate_limit {
                                if let Some(new_key) = self.rotate_key() {
                                    tracing::info!(
                                        provider = provider_name,
@ -459,7 +509,8 @@ impl Provider for ReliableProvider {
                            return Ok(resp);
                        }
                        Err(e) => {
-                            let non_retryable = is_non_retryable(&e);
+                            let non_retryable_rate_limit = is_non_retryable_rate_limit(&e);
+                            let non_retryable = is_non_retryable(&e) || non_retryable_rate_limit;
                            let rate_limited = is_rate_limited(&e);
                            let failure_reason = failure_reason(rate_limited, non_retryable);
                            let error_detail = compact_error_detail(&e);
@ -474,7 +525,7 @@ impl Provider for ReliableProvider {
                                &error_detail,
                            );

-                            if rate_limited {
+                            if rate_limited && !non_retryable_rate_limit {
                                if let Some(new_key) = self.rotate_key() {
                                    tracing::info!(
                                        provider = provider_name,
@ -1106,6 +1157,39 @@ mod tests {
        )));
    }

+    #[test]
+    fn non_retryable_rate_limit_detects_plan_restricted_model() {
+        let err = anyhow::anyhow!(
+            "{}",
+            "API error (429 Too Many Requests): {\"code\":1311,\"message\":\"the current account plan does not include glm-5\"}"
+        );
+        assert!(
+            is_non_retryable_rate_limit(&err),
+            "plan-restricted 429 should skip retries"
+        );
+    }
+
+    #[test]
+    fn non_retryable_rate_limit_detects_insufficient_balance() {
+        let err = anyhow::anyhow!(
+            "{}",
+            "API error (429 Too Many Requests): {\"code\":1113,\"message\":\"insufficient balance\"}"
+        );
+        assert!(
+            is_non_retryable_rate_limit(&err),
+            "insufficient-balance 429 should skip retries"
+        );
+    }
+
+    #[test]
+    fn non_retryable_rate_limit_does_not_flag_generic_429() {
+        let err = anyhow::anyhow!("429 Too Many Requests: rate limit exceeded");
+        assert!(
+            !is_non_retryable_rate_limit(&err),
+            "generic rate-limit 429 should remain retryable"
+        );
+    }
+
    #[test]
    fn compute_backoff_uses_retry_after() {
        let provider = ReliableProvider::new(vec![], 0, 500);
@ -1261,6 +1345,35 @@ mod tests {
        );
    }

+    #[tokio::test]
+    async fn non_retryable_rate_limit_skips_retries_for_plan_errors() {
+        let calls = Arc::new(AtomicUsize::new(0));
+        let provider = ReliableProvider::new(
+            vec![(
+                "primary".into(),
+                Box::new(MockProvider {
+                    calls: Arc::clone(&calls),
+                    fail_until_attempt: usize::MAX,
+                    response: "never",
+                    error: "API error (429 Too Many Requests): {\"code\":1311,\"message\":\"plan does not include glm-5\"}",
+                }),
+            )],
+            5,
+            1,
+        );
+
+        let result = provider.simple_chat("hello", "test", 0.0).await;
+        assert!(
+            result.is_err(),
+            "plan-restricted 429 should fail quickly without retrying"
+        );
+        assert_eq!(
+            calls.load(Ordering::SeqCst),
+            1,
+            "must not retry non-retryable 429 business errors"
+        );
+    }
+
    // ── Arc<ModelAwareMock> Provider impl for test ──

    #[async_trait]