feat(streaming): add streaming support for LLM responses (fixes #211)
Implement Server-Sent Events (SSE) streaming for OpenAI-compatible providers:
- Add StreamChunk, StreamOptions, and StreamError types to traits module
- Add supports_streaming() and stream_chat_with_system() to Provider trait
- Implement SSE parser for OpenAI streaming responses (data: {...} format)
- Add streaming support to OpenAiCompatibleProvider
- Add streaming support to ReliableProvider with error propagation
- Add futures dependency for async stream support
Features:
- Token-by-token streaming for real-time feedback
- Token counting option (estimated ~4 chars per token)
- Graceful error handling and logging
- Channel-based stream bridging for async compatibility
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
ccc48824cf
commit
d94e78c621
3 changed files with 325 additions and 3 deletions
|
|
@ -1,6 +1,7 @@
|
|||
use super::traits::ChatMessage;
|
||||
use super::traits::{ChatMessage, StreamChunk, StreamOptions, StreamResult};
|
||||
use super::Provider;
|
||||
use async_trait::async_trait;
|
||||
use futures_util::{stream, StreamExt};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::time::Duration;
|
||||
|
|
@ -337,6 +338,80 @@ impl Provider for ReliableProvider {
|
|||
failures.join("\n")
|
||||
)
|
||||
}
|
||||
|
||||
fn supports_streaming(&self) -> bool {
|
||||
self.providers.iter().any(|(_, p)| p.supports_streaming())
|
||||
}
|
||||
|
||||
fn stream_chat_with_system(
|
||||
&self,
|
||||
system_prompt: Option<&str>,
|
||||
message: &str,
|
||||
model: &str,
|
||||
temperature: f64,
|
||||
options: StreamOptions,
|
||||
) -> stream::BoxStream<'static, StreamResult<StreamChunk>> {
|
||||
// Try each provider/model combination for streaming
|
||||
// For streaming, we use the first provider that supports it and has streaming enabled
|
||||
for (provider_name, provider) in &self.providers {
|
||||
if !provider.supports_streaming() || !options.enabled {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Clone provider data for the stream
|
||||
let provider_clone = provider_name.clone();
|
||||
|
||||
// Try the first model in the chain for streaming
|
||||
let current_model = match self.model_chain(model).first() {
|
||||
Some(m) => m.to_string(),
|
||||
None => model.to_string(),
|
||||
};
|
||||
|
||||
// For streaming, we attempt once and propagate errors
|
||||
// The caller can retry the entire request if needed
|
||||
let stream = provider.stream_chat_with_system(
|
||||
system_prompt,
|
||||
message,
|
||||
¤t_model,
|
||||
temperature,
|
||||
options,
|
||||
);
|
||||
|
||||
// Use a channel to bridge the stream with logging
|
||||
let (tx, rx) = tokio::sync::mpsc::channel::<StreamResult<StreamChunk>>(100);
|
||||
|
||||
tokio::spawn(async move {
|
||||
let mut stream = stream;
|
||||
while let Some(chunk) = stream.next().await {
|
||||
if let Err(ref e) = chunk {
|
||||
tracing::warn!(
|
||||
provider = provider_clone,
|
||||
model = current_model,
|
||||
"Streaming error: {e}"
|
||||
);
|
||||
}
|
||||
if tx.send(chunk).await.is_err() {
|
||||
break; // Receiver dropped
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Convert channel receiver to stream
|
||||
return stream::unfold(rx, |mut rx| async move {
|
||||
match rx.recv().await {
|
||||
Some(chunk) => Some((chunk, rx)),
|
||||
None => None,
|
||||
}
|
||||
}).boxed();
|
||||
}
|
||||
|
||||
// No streaming support available
|
||||
stream::once(async move {
|
||||
Err(super::traits::StreamError::Provider(
|
||||
"No provider supports streaming".to_string()
|
||||
))
|
||||
}).boxed()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue