Merge PR #500: streaming support and security fixes

- feat(streaming): add streaming support for LLM responses (fixes #211) - security(deps): remove vulnerable xmas-elf dependency via embuild (fixes #399) - fix: resolve merge conflicts and integrate chat_with_tools from main Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-17 05:05:57 -05:00 · 2026-02-17 05:05:57 -05:00 · 69a9adde33
commit 69a9adde33
parent f75f73a50d
7 changed files with 484 additions and 20 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -4862,6 +4862,7 @@ dependencies = [
 "dialoguer",
 "directories",
 "fantoccini",
+ "futures",
 "futures-util",
 "glob",
 "hex",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -3,7 +3,7 @@ name = "zeroclaw"
 version = "0.1.0"
 edition = "2021"
 authors = ["theonlyhennygod"]
-license = "MIT"
+license = "Apache-2.0"
 description = "Zero overhead. Zero compromise. 100% Rust. The fastest, smallest AI assistant."
 repository = "https://github.com/zeroclaw-labs/zeroclaw"
 readme = "README.md"
@ -85,6 +85,7 @@ glob = "0.3"
 # Discord WebSocket gateway
 tokio-tungstenite = { version = "0.24", features = ["rustls-tls-webpki-roots"] }
 futures-util = { version = "0.3", default-features = false, features = ["sink"] }
+futures = "0.3"
 hostname = "0.4.2"
 lettre = { version = "0.11.19", default-features = false, features = ["builder", "smtp-transport", "rustls-tls"] }
 mail-parser = "0.11.2"
--- a/firmware/zeroclaw-esp32/Cargo.lock
+++ b/firmware/zeroclaw-esp32/Cargo.lock
@ -483,7 +483,6 @@ dependencies = [
 "tempfile",
 "thiserror 1.0.69",
 "which",
- "xmas-elf",
 ]

 [[package]]
@ -1806,21 +1805,6 @@ dependencies = [
 "wasmparser",
 ]

-[[package]]
-name = "xmas-elf"
-version = "0.9.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "42c49817e78342f7f30a181573d82ff55b88a35f86ccaf07fc64b3008f56d1c6"
-dependencies = [
- "zero",
-]
-
-[[package]]
-name = "zero"
-version = "0.1.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2fe21bcc34ca7fe6dd56cc2cb1261ea59d6b93620215aefb5ea6032265527784"
-
 [[package]]
 name = "zeroclaw-esp32"
 version = "0.1.0"
--- a/firmware/zeroclaw-esp32/Cargo.toml
+++ b/firmware/zeroclaw-esp32/Cargo.toml
@ -22,7 +22,7 @@ serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"

 [build-dependencies]
-embuild = { version = "0.31", features = ["elf"] }
+embuild = "0.31"

 [profile.release]
 opt-level = "s"
--- a/src/providers/compatible.rs
+++ b/src/providers/compatible.rs
@ -4,9 +4,10 @@

 use crate::providers::traits::{
    ChatMessage, ChatRequest as ProviderChatRequest, ChatResponse as ProviderChatResponse,
-    Provider, ToolCall as ProviderToolCall,
+    Provider, StreamChunk, StreamError, StreamOptions, StreamResult, ToolCall as ProviderToolCall,
 };
 use async_trait::async_trait;
+use futures_util::{stream, StreamExt};
 use reqwest::Client;
 use serde::{Deserialize, Serialize};

@ -219,6 +220,154 @@ struct ResponsesContent {
    text: Option<String>,
 }

+// ═══════════════════════════════════════════════════════════════
+// Streaming support (SSE parser)
+// ═══════════════════════════════════════════════════════════════
+
+/// Server-Sent Event stream chunk for OpenAI-compatible streaming.
+#[derive(Debug, Deserialize)]
+struct StreamChunkResponse {
+    choices: Vec<StreamChoice>,
+}
+
+#[derive(Debug, Deserialize)]
+struct StreamChoice {
+    delta: StreamDelta,
+    finish_reason: Option<String>,
+}
+
+#[derive(Debug, Deserialize)]
+struct StreamDelta {
+    #[serde(default)]
+    content: Option<String>,
+}
+
+/// Parse SSE (Server-Sent Events) stream from OpenAI-compatible providers.
+/// Handles the `data: {...}` format and `[DONE]` sentinel.
+fn parse_sse_line(line: &str) -> StreamResult<Option<String>> {
+    let line = line.trim();
+
+    // Skip empty lines and comments
+    if line.is_empty() || line.starts_with(':') {
+        return Ok(None);
+    }
+
+    // SSE format: "data: {...}"
+    if let Some(data) = line.strip_prefix("data:") {
+        let data = data.trim();
+
+        // Check for [DONE] sentinel
+        if data == "[DONE]" {
+            return Ok(None);
+        }
+
+        // Parse JSON delta
+        let chunk: StreamChunkResponse = serde_json::from_str(data).map_err(StreamError::Json)?;
+
+        // Extract content from delta
+        if let Some(choice) = chunk.choices.first() {
+            if let Some(content) = &choice.delta.content {
+                return Ok(Some(content.clone()));
+            }
+        }
+    }
+
+    Ok(None)
+}
+
+/// Convert SSE byte stream to text chunks.
+async fn sse_bytes_to_chunks(
+    mut response: reqwest::Response,
+    count_tokens: bool,
+) -> stream::BoxStream<'static, StreamResult<StreamChunk>> {
+    use tokio::io::AsyncBufReadExt;
+
+    let name = "stream".to_string();
+
+    // Create a channel to send chunks
+    let (mut tx, rx) = tokio::sync::mpsc::channel::<StreamResult<StreamChunk>>(100);
+
+    tokio::spawn(async move {
+        // Buffer for incomplete lines
+        let mut buffer = String::new();
+
+        // Get response body as bytes stream
+        match response.error_for_status_ref() {
+            Ok(_) => {}
+            Err(e) => {
+                let _ = tx.send(Err(StreamError::Http(e))).await;
+                return;
+            }
+        }
+
+        let mut bytes_stream = response.bytes_stream();
+
+        while let Some(item) = bytes_stream.next().await {
+            match item {
+                Ok(bytes) => {
+                    // Convert bytes to string and process line by line
+                    let text = match String::from_utf8(bytes.to_vec()) {
+                        Ok(t) => t,
+                        Err(e) => {
+                            let _ = tx
+                                .send(Err(StreamError::InvalidSse(format!(
+                                    "Invalid UTF-8: {}",
+                                    e
+                                ))))
+                                .await;
+                            break;
+                        }
+                    };
+
+                    buffer.push_str(&text);
+
+                    // Process complete lines
+                    while let Some(pos) = buffer.find('\n') {
+                        let line = buffer.drain(..=pos).collect::<String>();
+                        buffer = buffer[pos + 1..].to_string();
+
+                        match parse_sse_line(&line) {
+                            Ok(Some(content)) => {
+                                let mut chunk = StreamChunk::delta(content);
+                                if count_tokens {
+                                    chunk = chunk.with_token_estimate();
+                                }
+                                if tx.send(Ok(chunk)).await.is_err() {
+                                    return; // Receiver dropped
+                                }
+                            }
+                            Ok(None) => {
+                                // Empty line or [DONE] sentinel - continue
+                                continue;
+                            }
+                            Err(e) => {
+                                let _ = tx.send(Err(e)).await;
+                                return;
+                            }
+                        }
+                    }
+                }
+                Err(e) => {
+                    let _ = tx.send(Err(StreamError::Http(e))).await;
+                    break;
+                }
+            }
+        }
+
+        // Send final chunk
+        let _ = tx.send(Ok(StreamChunk::final_chunk())).await;
+    });
+
+    // Convert channel receiver to stream
+    stream::unfold(rx, |mut rx| async {
+        match rx.recv().await {
+            Some(chunk) => Some((chunk, rx)),
+            None => None,
+        }
+    })
+    .boxed()
+}
+
 fn first_nonempty(text: Option<&str>) -> Option<String> {
    text.and_then(|value| {
        let trimmed = value.trim();
@ -525,6 +674,115 @@ impl Provider for OpenAiCompatibleProvider {
    fn supports_native_tools(&self) -> bool {
        true
    }
+
+    fn supports_streaming(&self) -> bool {
+        true
+    }
+
+    fn stream_chat_with_system(
+        &self,
+        system_prompt: Option<&str>,
+        message: &str,
+        model: &str,
+        temperature: f64,
+        options: StreamOptions,
+    ) -> stream::BoxStream<'static, StreamResult<StreamChunk>> {
+        let api_key = match self.api_key.as_ref() {
+            Some(key) => key.clone(),
+            None => {
+                let provider_name = self.name.clone();
+                return stream::once(async move {
+                    Err(StreamError::Provider(format!(
+                        "{} API key not set",
+                        provider_name
+                    )))
+                })
+                .boxed();
+            }
+        };
+
+        let mut messages = Vec::new();
+        if let Some(sys) = system_prompt {
+            messages.push(Message {
+                role: "system".to_string(),
+                content: sys.to_string(),
+            });
+        }
+        messages.push(Message {
+            role: "user".to_string(),
+            content: message.to_string(),
+        });
+
+        let request = ChatRequest {
+            model: model.to_string(),
+            messages,
+            temperature,
+            stream: Some(options.enabled),
+        };
+
+        let url = self.chat_completions_url();
+        let client = self.client.clone();
+        let auth_header = self.auth_header.clone();
+
+        // Use a channel to bridge the async HTTP response to the stream
+        let (tx, rx) = tokio::sync::mpsc::channel::<StreamResult<StreamChunk>>(100);
+
+        tokio::spawn(async move {
+            // Build request with auth
+            let mut req_builder = client.post(&url).json(&request);
+
+            // Apply auth header
+            req_builder = match &auth_header {
+                AuthStyle::Bearer => {
+                    req_builder.header("Authorization", format!("Bearer {}", api_key))
+                }
+                AuthStyle::XApiKey => req_builder.header("x-api-key", &api_key),
+                AuthStyle::Custom(header) => req_builder.header(header, &api_key),
+            };
+
+            // Set accept header for streaming
+            req_builder = req_builder.header("Accept", "text/event-stream");
+
+            // Send request
+            let response = match req_builder.send().await {
+                Ok(r) => r,
+                Err(e) => {
+                    let _ = tx.send(Err(StreamError::Http(e))).await;
+                    return;
+                }
+            };
+
+            // Check status
+            if !response.status().is_success() {
+                let status = response.status();
+                let error = match response.text().await {
+                    Ok(e) => e,
+                    Err(_) => format!("HTTP error: {}", status),
+                };
+                let _ = tx
+                    .send(Err(StreamError::Provider(format!("{}: {}", status, error))))
+                    .await;
+                return;
+            }
+
+            // Convert to chunk stream and forward to channel
+            let mut chunk_stream = sse_bytes_to_chunks(response, options.count_tokens).await;
+            while let Some(chunk) = chunk_stream.next().await {
+                if tx.send(chunk).await.is_err() {
+                    break; // Receiver dropped
+                }
+            }
+        });
+
+        // Convert channel receiver to stream
+        stream::unfold(rx, |mut rx| async move {
+            match rx.recv().await {
+                Some(chunk) => Some((chunk, rx)),
+                None => None,
+            }
+        })
+        .boxed()
+    }
 }

 #[cfg(test)]
--- a/src/providers/reliable.rs
+++ b/src/providers/reliable.rs
@ -1,6 +1,7 @@
-use super::traits::ChatMessage;
+use super::traits::{ChatMessage, StreamChunk, StreamOptions, StreamResult};
 use super::Provider;
 use async_trait::async_trait;
+use futures_util::{stream, StreamExt};
 use std::collections::HashMap;
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::time::Duration;
@ -337,6 +338,82 @@ impl Provider for ReliableProvider {
            failures.join("\n")
        )
    }
+
+    fn supports_streaming(&self) -> bool {
+        self.providers.iter().any(|(_, p)| p.supports_streaming())
+    }
+
+    fn stream_chat_with_system(
+        &self,
+        system_prompt: Option<&str>,
+        message: &str,
+        model: &str,
+        temperature: f64,
+        options: StreamOptions,
+    ) -> stream::BoxStream<'static, StreamResult<StreamChunk>> {
+        // Try each provider/model combination for streaming
+        // For streaming, we use the first provider that supports it and has streaming enabled
+        for (provider_name, provider) in &self.providers {
+            if !provider.supports_streaming() || !options.enabled {
+                continue;
+            }
+
+            // Clone provider data for the stream
+            let provider_clone = provider_name.clone();
+
+            // Try the first model in the chain for streaming
+            let current_model = match self.model_chain(model).first() {
+                Some(m) => m.to_string(),
+                None => model.to_string(),
+            };
+
+            // For streaming, we attempt once and propagate errors
+            // The caller can retry the entire request if needed
+            let stream = provider.stream_chat_with_system(
+                system_prompt,
+                message,
+                &current_model,
+                temperature,
+                options,
+            );
+
+            // Use a channel to bridge the stream with logging
+            let (tx, rx) = tokio::sync::mpsc::channel::<StreamResult<StreamChunk>>(100);
+
+            tokio::spawn(async move {
+                let mut stream = stream;
+                while let Some(chunk) = stream.next().await {
+                    if let Err(ref e) = chunk {
+                        tracing::warn!(
+                            provider = provider_clone,
+                            model = current_model,
+                            "Streaming error: {e}"
+                        );
+                    }
+                    if tx.send(chunk).await.is_err() {
+                        break; // Receiver dropped
+                    }
+                }
+            });
+
+            // Convert channel receiver to stream
+            return stream::unfold(rx, |mut rx| async move {
+                match rx.recv().await {
+                    Some(chunk) => Some((chunk, rx)),
+                    None => None,
+                }
+            })
+            .boxed();
+        }
+
+        // No streaming support available
+        stream::once(async move {
+            Err(super::traits::StreamError::Provider(
+                "No provider supports streaming".to_string(),
+            ))
+        })
+        .boxed()
+    }
 }

 #[cfg(test)]
--- a/src/providers/traits.rs
+++ b/src/providers/traits.rs
@ -1,5 +1,6 @@
 use crate::tools::ToolSpec;
 use async_trait::async_trait;
+use futures_util::{stream, StreamExt};
 use serde::{Deserialize, Serialize};

 /// A single message in a conversation.
@ -97,6 +98,99 @@ pub enum ConversationMessage {
    ToolResults(Vec<ToolResultMessage>),
 }

+/// A chunk of content from a streaming response.
+#[derive(Debug, Clone)]
+pub struct StreamChunk {
+    /// Text delta for this chunk.
+    pub delta: String,
+    /// Whether this is the final chunk.
+    pub is_final: bool,
+    /// Approximate token count for this chunk (estimated).
+    pub token_count: usize,
+}
+
+impl StreamChunk {
+    /// Create a new non-final chunk.
+    pub fn delta(text: impl Into<String>) -> Self {
+        Self {
+            delta: text.into(),
+            is_final: false,
+            token_count: 0,
+        }
+    }
+
+    /// Create a final chunk.
+    pub fn final_chunk() -> Self {
+        Self {
+            delta: String::new(),
+            is_final: true,
+            token_count: 0,
+        }
+    }
+
+    /// Create an error chunk.
+    pub fn error(message: impl Into<String>) -> Self {
+        Self {
+            delta: message.into(),
+            is_final: true,
+            token_count: 0,
+        }
+    }
+
+    /// Estimate tokens (rough approximation: ~4 chars per token).
+    pub fn with_token_estimate(mut self) -> Self {
+        self.token_count = (self.delta.len() + 3) / 4;
+        self
+    }
+}
+
+/// Options for streaming chat requests.
+#[derive(Debug, Clone, Copy, Default)]
+pub struct StreamOptions {
+    /// Whether to enable streaming (default: true).
+    pub enabled: bool,
+    /// Whether to include token counts in chunks.
+    pub count_tokens: bool,
+}
+
+impl StreamOptions {
+    /// Create new streaming options with enabled flag.
+    pub fn new(enabled: bool) -> Self {
+        Self {
+            enabled,
+            count_tokens: false,
+        }
+    }
+
+    /// Enable token counting.
+    pub fn with_token_count(mut self) -> Self {
+        self.count_tokens = true;
+        self
+    }
+}
+
+/// Result type for streaming operations.
+pub type StreamResult<T> = std::result::Result<T, StreamError>;
+
+/// Errors that can occur during streaming.
+#[derive(Debug, thiserror::Error)]
+pub enum StreamError {
+    #[error("HTTP error: {0}")]
+    Http(reqwest::Error),
+
+    #[error("JSON parse error: {0}")]
+    Json(serde_json::Error),
+
+    #[error("Invalid SSE format: {0}")]
+    InvalidSse(String),
+
+    #[error("Provider error: {0}")]
+    Provider(String),
+
+    #[error("IO error: {0}")]
+    Io(#[from] std::io::Error),
+}
+
 #[async_trait]
 pub trait Provider: Send + Sync {
    /// Simple one-shot chat (single user message, no explicit system prompt).
@ -187,6 +281,55 @@ pub trait Provider: Send + Sync {
            tool_calls: Vec::new(),
        })
    }
+
+    /// Whether provider supports streaming responses.
+    /// Default implementation returns false.
+    fn supports_streaming(&self) -> bool {
+        false
+    }
+
+    /// Streaming chat with optional system prompt.
+    /// Returns an async stream of text chunks.
+    /// Default implementation falls back to non-streaming chat.
+    fn stream_chat_with_system(
+        &self,
+        _system_prompt: Option<&str>,
+        _message: &str,
+        _model: &str,
+        _temperature: f64,
+        _options: StreamOptions,
+    ) -> stream::BoxStream<'static, StreamResult<StreamChunk>> {
+        // Default: return an empty stream (not supported)
+        stream::empty().boxed()
+    }
+
+    /// Streaming chat with history.
+    /// Default implementation falls back to stream_chat_with_system with last user message.
+    fn stream_chat_with_history(
+        &self,
+        messages: &[ChatMessage],
+        model: &str,
+        temperature: f64,
+        options: StreamOptions,
+    ) -> stream::BoxStream<'static, StreamResult<StreamChunk>> {
+        let system = messages
+            .iter()
+            .find(|m| m.role == "system")
+            .map(|m| m.content.clone());
+        let last_user = messages
+            .iter()
+            .rfind(|m| m.role == "user")
+            .map(|m| m.content.clone())
+            .unwrap_or_default();
+
+        // For default implementation, we need to convert to owned strings
+        // This is a limitation of the default implementation
+        let provider_name = "unknown".to_string();
+
+        // Create a single empty chunk to indicate not supported
+        let chunk = StreamChunk::error(format!("{} does not support streaming", provider_name));
+        stream::once(async move { Ok(chunk) }).boxed()
+    }
 }

 #[cfg(test)]