feat(providers): support reasoning_content fallback for thinking models
Reasoning/thinking models (Qwen3, GLM-4, DeepSeek, etc.) may return output in `reasoning_content` instead of `content`. Add automatic fallback for both OpenAI and OpenAI-compatible providers, including streaming SSE support. Changes: - Add `reasoning_content` field to response structs in both providers - Add `effective_content()` helper that prefers `content` but falls back to `reasoning_content` when content is empty/null/missing - Update all extraction sites to use `effective_content()` - Add streaming SSE fallback for `reasoning_content` chunks - Add 16 focused unit tests covering all edge cases Tested end-to-end against GLM-4.7-flash via local LLM server.
This commit is contained in:
parent
219764d4d8
commit
dd4f5271d1
2 changed files with 201 additions and 16 deletions
|
|
@ -171,10 +171,26 @@ struct Choice {
|
||||||
struct ResponseMessage {
|
struct ResponseMessage {
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
content: Option<String>,
|
content: Option<String>,
|
||||||
|
/// Reasoning/thinking models (e.g. Qwen3, GLM-4) may return their output
|
||||||
|
/// in `reasoning_content` instead of `content`. Used as automatic fallback.
|
||||||
|
#[serde(default)]
|
||||||
|
reasoning_content: Option<String>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
tool_calls: Option<Vec<ToolCall>>,
|
tool_calls: Option<Vec<ToolCall>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl ResponseMessage {
|
||||||
|
/// Extract text content, falling back to `reasoning_content` when `content`
|
||||||
|
/// is missing or empty. Reasoning/thinking models (Qwen3, GLM-4, etc.)
|
||||||
|
/// often return their output solely in `reasoning_content`.
|
||||||
|
fn effective_content(&self) -> String {
|
||||||
|
match &self.content {
|
||||||
|
Some(c) if !c.is_empty() => c.clone(),
|
||||||
|
_ => self.reasoning_content.clone().unwrap_or_default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Deserialize, Serialize)]
|
#[derive(Debug, Deserialize, Serialize)]
|
||||||
struct ToolCall {
|
struct ToolCall {
|
||||||
#[serde(rename = "type")]
|
#[serde(rename = "type")]
|
||||||
|
|
@ -245,6 +261,9 @@ struct StreamChoice {
|
||||||
struct StreamDelta {
|
struct StreamDelta {
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
content: Option<String>,
|
content: Option<String>,
|
||||||
|
/// Reasoning/thinking models may stream output via `reasoning_content`.
|
||||||
|
#[serde(default)]
|
||||||
|
reasoning_content: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse SSE (Server-Sent Events) stream from OpenAI-compatible providers.
|
/// Parse SSE (Server-Sent Events) stream from OpenAI-compatible providers.
|
||||||
|
|
@ -274,6 +293,10 @@ fn parse_sse_line(line: &str) -> StreamResult<Option<String>> {
|
||||||
if let Some(content) = &choice.delta.content {
|
if let Some(content) = &choice.delta.content {
|
||||||
return Ok(Some(content.clone()));
|
return Ok(Some(content.clone()));
|
||||||
}
|
}
|
||||||
|
// Fallback to reasoning_content for thinking models
|
||||||
|
if let Some(reasoning) = &choice.delta.reasoning_content {
|
||||||
|
return Ok(Some(reasoning.clone()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -529,10 +552,10 @@ impl Provider for OpenAiCompatibleProvider {
|
||||||
.map_or(false, |t| !t.is_empty())
|
.map_or(false, |t| !t.is_empty())
|
||||||
{
|
{
|
||||||
serde_json::to_string(&c.message)
|
serde_json::to_string(&c.message)
|
||||||
.unwrap_or_else(|_| c.message.content.unwrap_or_default())
|
.unwrap_or_else(|_| c.message.effective_content())
|
||||||
} else {
|
} else {
|
||||||
// No tool calls, return content as-is
|
// No tool calls, return content (with reasoning_content fallback)
|
||||||
c.message.content.unwrap_or_default()
|
c.message.effective_content()
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.ok_or_else(|| anyhow::anyhow!("No response from {}", self.name))
|
.ok_or_else(|| anyhow::anyhow!("No response from {}", self.name))
|
||||||
|
|
@ -617,10 +640,10 @@ impl Provider for OpenAiCompatibleProvider {
|
||||||
.map_or(false, |t| !t.is_empty())
|
.map_or(false, |t| !t.is_empty())
|
||||||
{
|
{
|
||||||
serde_json::to_string(&c.message)
|
serde_json::to_string(&c.message)
|
||||||
.unwrap_or_else(|_| c.message.content.unwrap_or_default())
|
.unwrap_or_else(|_| c.message.effective_content())
|
||||||
} else {
|
} else {
|
||||||
// No tool calls, return content as-is
|
// No tool calls, return content (with reasoning_content fallback)
|
||||||
c.message.content.unwrap_or_default()
|
c.message.effective_content()
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.ok_or_else(|| anyhow::anyhow!("No response from {}", self.name))
|
.ok_or_else(|| anyhow::anyhow!("No response from {}", self.name))
|
||||||
|
|
@ -1150,4 +1173,96 @@ mod tests {
|
||||||
let result = provider.warmup().await;
|
let result = provider.warmup().await;
|
||||||
assert!(result.is_ok());
|
assert!(result.is_ok());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ══════════════════════════════════════════════════════════
|
||||||
|
// Reasoning model fallback tests (reasoning_content)
|
||||||
|
// ══════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn reasoning_content_fallback_when_content_empty() {
|
||||||
|
// Reasoning models (Qwen3, GLM-4) return content: "" with reasoning_content populated
|
||||||
|
let json = r#"{"choices":[{"message":{"content":"","reasoning_content":"Thinking output here"}}]}"#;
|
||||||
|
let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
|
||||||
|
let msg = &resp.choices[0].message;
|
||||||
|
assert_eq!(msg.effective_content(), "Thinking output here");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn reasoning_content_fallback_when_content_null() {
|
||||||
|
// Some models may return content: null with reasoning_content
|
||||||
|
let json =
|
||||||
|
r#"{"choices":[{"message":{"content":null,"reasoning_content":"Fallback text"}}]}"#;
|
||||||
|
let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
|
||||||
|
let msg = &resp.choices[0].message;
|
||||||
|
assert_eq!(msg.effective_content(), "Fallback text");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn reasoning_content_fallback_when_content_missing() {
|
||||||
|
// content field absent entirely, reasoning_content present
|
||||||
|
let json = r#"{"choices":[{"message":{"reasoning_content":"Only reasoning"}}]}"#;
|
||||||
|
let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
|
||||||
|
let msg = &resp.choices[0].message;
|
||||||
|
assert_eq!(msg.effective_content(), "Only reasoning");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn reasoning_content_not_used_when_content_present() {
|
||||||
|
// Normal model: content populated, reasoning_content should be ignored
|
||||||
|
let json = r#"{"choices":[{"message":{"content":"Normal response","reasoning_content":"Should be ignored"}}]}"#;
|
||||||
|
let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
|
||||||
|
let msg = &resp.choices[0].message;
|
||||||
|
assert_eq!(msg.effective_content(), "Normal response");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn reasoning_content_both_absent_returns_empty() {
|
||||||
|
// Neither content nor reasoning_content — returns empty string
|
||||||
|
let json = r#"{"choices":[{"message":{}}]}"#;
|
||||||
|
let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
|
||||||
|
let msg = &resp.choices[0].message;
|
||||||
|
assert_eq!(msg.effective_content(), "");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn reasoning_content_ignored_by_normal_models() {
|
||||||
|
// Standard response without reasoning_content still works
|
||||||
|
let json = r#"{"choices":[{"message":{"content":"Hello from Venice!"}}]}"#;
|
||||||
|
let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
|
||||||
|
let msg = &resp.choices[0].message;
|
||||||
|
assert!(msg.reasoning_content.is_none());
|
||||||
|
assert_eq!(msg.effective_content(), "Hello from Venice!");
|
||||||
|
}
|
||||||
|
|
||||||
|
// ══════════════════════════════════════════════════════════
|
||||||
|
// SSE streaming reasoning_content fallback tests
|
||||||
|
// ══════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_sse_line_with_content() {
|
||||||
|
let line = r#"data: {"choices":[{"delta":{"content":"hello"}}]}"#;
|
||||||
|
let result = parse_sse_line(line).unwrap();
|
||||||
|
assert_eq!(result, Some("hello".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_sse_line_with_reasoning_content() {
|
||||||
|
let line = r#"data: {"choices":[{"delta":{"reasoning_content":"thinking..."}}]}"#;
|
||||||
|
let result = parse_sse_line(line).unwrap();
|
||||||
|
assert_eq!(result, Some("thinking...".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_sse_line_with_both_prefers_content() {
|
||||||
|
let line = r#"data: {"choices":[{"delta":{"content":"real answer","reasoning_content":"thinking..."}}]}"#;
|
||||||
|
let result = parse_sse_line(line).unwrap();
|
||||||
|
assert_eq!(result, Some("real answer".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_sse_line_done_sentinel() {
|
||||||
|
let line = "data: [DONE]";
|
||||||
|
let result = parse_sse_line(line).unwrap();
|
||||||
|
assert_eq!(result, None);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,20 @@ struct Choice {
|
||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
struct ResponseMessage {
|
struct ResponseMessage {
|
||||||
content: String,
|
#[serde(default)]
|
||||||
|
content: Option<String>,
|
||||||
|
/// Reasoning/thinking models may return output in `reasoning_content`.
|
||||||
|
#[serde(default)]
|
||||||
|
reasoning_content: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ResponseMessage {
|
||||||
|
fn effective_content(&self) -> String {
|
||||||
|
match &self.content {
|
||||||
|
Some(c) if !c.is_empty() => c.clone(),
|
||||||
|
_ => self.reasoning_content.clone().unwrap_or_default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize)]
|
#[derive(Debug, Serialize)]
|
||||||
|
|
@ -105,10 +118,22 @@ struct NativeChoice {
|
||||||
struct NativeResponseMessage {
|
struct NativeResponseMessage {
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
content: Option<String>,
|
content: Option<String>,
|
||||||
|
/// Reasoning/thinking models may return output in `reasoning_content`.
|
||||||
|
#[serde(default)]
|
||||||
|
reasoning_content: Option<String>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
tool_calls: Option<Vec<NativeToolCall>>,
|
tool_calls: Option<Vec<NativeToolCall>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl NativeResponseMessage {
|
||||||
|
fn effective_content(&self) -> Option<String> {
|
||||||
|
match &self.content {
|
||||||
|
Some(c) if !c.is_empty() => Some(c.clone()),
|
||||||
|
_ => self.reasoning_content.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl OpenAiProvider {
|
impl OpenAiProvider {
|
||||||
pub fn new(credential: Option<&str>) -> Self {
|
pub fn new(credential: Option<&str>) -> Self {
|
||||||
Self {
|
Self {
|
||||||
|
|
@ -205,6 +230,7 @@ impl OpenAiProvider {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_native_response(message: NativeResponseMessage) -> ProviderChatResponse {
|
fn parse_native_response(message: NativeResponseMessage) -> ProviderChatResponse {
|
||||||
|
let text = message.effective_content();
|
||||||
let tool_calls = message
|
let tool_calls = message
|
||||||
.tool_calls
|
.tool_calls
|
||||||
.unwrap_or_default()
|
.unwrap_or_default()
|
||||||
|
|
@ -216,10 +242,7 @@ impl OpenAiProvider {
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
ProviderChatResponse {
|
ProviderChatResponse { text, tool_calls }
|
||||||
text: message.content,
|
|
||||||
tool_calls,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -274,7 +297,7 @@ impl Provider for OpenAiProvider {
|
||||||
.choices
|
.choices
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.next()
|
.next()
|
||||||
.map(|c| c.message.content)
|
.map(|c| c.message.effective_content())
|
||||||
.ok_or_else(|| anyhow::anyhow!("No response from OpenAI"))
|
.ok_or_else(|| anyhow::anyhow!("No response from OpenAI"))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -417,7 +440,7 @@ mod tests {
|
||||||
let json = r#"{"choices":[{"message":{"content":"Hi!"}}]}"#;
|
let json = r#"{"choices":[{"message":{"content":"Hi!"}}]}"#;
|
||||||
let resp: ChatResponse = serde_json::from_str(json).unwrap();
|
let resp: ChatResponse = serde_json::from_str(json).unwrap();
|
||||||
assert_eq!(resp.choices.len(), 1);
|
assert_eq!(resp.choices.len(), 1);
|
||||||
assert_eq!(resp.choices[0].message.content, "Hi!");
|
assert_eq!(resp.choices[0].message.effective_content(), "Hi!");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
@ -432,14 +455,14 @@ mod tests {
|
||||||
let json = r#"{"choices":[{"message":{"content":"A"}},{"message":{"content":"B"}}]}"#;
|
let json = r#"{"choices":[{"message":{"content":"A"}},{"message":{"content":"B"}}]}"#;
|
||||||
let resp: ChatResponse = serde_json::from_str(json).unwrap();
|
let resp: ChatResponse = serde_json::from_str(json).unwrap();
|
||||||
assert_eq!(resp.choices.len(), 2);
|
assert_eq!(resp.choices.len(), 2);
|
||||||
assert_eq!(resp.choices[0].message.content, "A");
|
assert_eq!(resp.choices[0].message.effective_content(), "A");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn response_with_unicode() {
|
fn response_with_unicode() {
|
||||||
let json = r#"{"choices":[{"message":{"content":"こんにちは 🦀"}}]}"#;
|
let json = r#"{"choices":[{"message":{"content":"こんにちは 🦀"}}]}"#;
|
||||||
let resp: ChatResponse = serde_json::from_str(json).unwrap();
|
let resp: ChatResponse = serde_json::from_str(json).unwrap();
|
||||||
assert_eq!(resp.choices[0].message.content, "こんにちは 🦀");
|
assert_eq!(resp.choices[0].message.effective_content(), "こんにちは 🦀");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
@ -447,7 +470,10 @@ mod tests {
|
||||||
let long = "x".repeat(100_000);
|
let long = "x".repeat(100_000);
|
||||||
let json = format!(r#"{{"choices":[{{"message":{{"content":"{long}"}}}}]}}"#);
|
let json = format!(r#"{{"choices":[{{"message":{{"content":"{long}"}}}}]}}"#);
|
||||||
let resp: ChatResponse = serde_json::from_str(&json).unwrap();
|
let resp: ChatResponse = serde_json::from_str(&json).unwrap();
|
||||||
assert_eq!(resp.choices[0].message.content.len(), 100_000);
|
assert_eq!(
|
||||||
|
resp.choices[0].message.content.as_ref().unwrap().len(),
|
||||||
|
100_000
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
|
|
@ -456,4 +482,48 @@ mod tests {
|
||||||
let result = provider.warmup().await;
|
let result = provider.warmup().await;
|
||||||
assert!(result.is_ok());
|
assert!(result.is_ok());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ══════════════════════════════════════════════════════════
|
||||||
|
// Reasoning model fallback tests (reasoning_content)
|
||||||
|
// ══════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn reasoning_content_fallback_empty_content() {
|
||||||
|
let json = r#"{"choices":[{"message":{"content":"","reasoning_content":"Thinking..."}}]}"#;
|
||||||
|
let resp: ChatResponse = serde_json::from_str(json).unwrap();
|
||||||
|
assert_eq!(resp.choices[0].message.effective_content(), "Thinking...");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn reasoning_content_fallback_null_content() {
|
||||||
|
let json =
|
||||||
|
r#"{"choices":[{"message":{"content":null,"reasoning_content":"Thinking..."}}]}"#;
|
||||||
|
let resp: ChatResponse = serde_json::from_str(json).unwrap();
|
||||||
|
assert_eq!(resp.choices[0].message.effective_content(), "Thinking...");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn reasoning_content_not_used_when_content_present() {
|
||||||
|
let json = r#"{"choices":[{"message":{"content":"Hello","reasoning_content":"Ignored"}}]}"#;
|
||||||
|
let resp: ChatResponse = serde_json::from_str(json).unwrap();
|
||||||
|
assert_eq!(resp.choices[0].message.effective_content(), "Hello");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn native_response_reasoning_content_fallback() {
|
||||||
|
let json =
|
||||||
|
r#"{"choices":[{"message":{"content":"","reasoning_content":"Native thinking"}}]}"#;
|
||||||
|
let resp: NativeChatResponse = serde_json::from_str(json).unwrap();
|
||||||
|
let msg = &resp.choices[0].message;
|
||||||
|
assert_eq!(msg.effective_content(), Some("Native thinking".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn native_response_reasoning_content_ignored_when_content_present() {
|
||||||
|
let json =
|
||||||
|
r#"{"choices":[{"message":{"content":"Real answer","reasoning_content":"Ignored"}}]}"#;
|
||||||
|
let resp: NativeChatResponse = serde_json::from_str(json).unwrap();
|
||||||
|
let msg = &resp.choices[0].message;
|
||||||
|
assert_eq!(msg.effective_content(), Some("Real answer".to_string()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue