Merge pull request #872 from agorevski/perf/eliminate-unnecessary-heap-allocations

perf: eliminate unnecessary heap allocations across agent loop, memory and channels
This commit is contained in:
Alex Gorevski 2026-02-19 07:11:55 -08:00 committed by GitHub
commit 3a19d6cd98
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 77 additions and 63 deletions

View file

@ -154,7 +154,14 @@ impl EmailChannel {
_ => {}
}
}
result.split_whitespace().collect::<Vec<_>>().join(" ")
let mut normalized = String::with_capacity(result.len());
for word in result.split_whitespace() {
if !normalized.is_empty() {
normalized.push(' ');
}
normalized.push_str(word);
}
normalized
}
/// Extract the sender address from a parsed email

View file

@ -163,12 +163,17 @@ fn split_message(message: &str, max_bytes: usize) -> Vec<String> {
// Guard against max_bytes == 0 to prevent infinite loop
if max_bytes == 0 {
let full: String = message
let mut full = String::new();
for l in message
.lines()
.map(|l| l.trim_end_matches('\r'))
.filter(|l| !l.is_empty())
.collect::<Vec<_>>()
.join(" ");
{
if !full.is_empty() {
full.push(' ');
}
full.push_str(l);
}
if full.is_empty() {
chunks.push(String::new());
} else {

View file

@ -3,12 +3,14 @@
// Splits on markdown headings and paragraph boundaries, respecting
// a max token limit per chunk. Preserves heading context.
use std::rc::Rc;
/// A single chunk of text with metadata.
#[derive(Debug, Clone)]
pub struct Chunk {
pub index: usize,
pub content: String,
pub heading: Option<String>,
pub heading: Option<Rc<str>>,
}
/// Split markdown text into chunks, each under `max_tokens` approximate tokens.
@ -26,9 +28,10 @@ pub fn chunk_markdown(text: &str, max_tokens: usize) -> Vec<Chunk> {
let max_chars = max_tokens * 4;
let sections = split_on_headings(text);
let mut chunks = Vec::new();
let mut chunks = Vec::with_capacity(sections.len());
for (heading, body) in sections {
let heading: Option<Rc<str>> = heading.map(Rc::from);
let full = if let Some(ref h) = heading {
format!("{h}\n{body}")
} else {
@ -45,7 +48,7 @@ pub fn chunk_markdown(text: &str, max_tokens: usize) -> Vec<Chunk> {
// Split on paragraphs (blank lines)
let paragraphs = split_on_blank_lines(&body);
let mut current = heading
.as_ref()
.as_deref()
.map_or_else(String::new, |h| format!("{h}\n"));
for para in paragraphs {
@ -56,7 +59,7 @@ pub fn chunk_markdown(text: &str, max_tokens: usize) -> Vec<Chunk> {
heading: heading.clone(),
});
current = heading
.as_ref()
.as_deref()
.map_or_else(String::new, |h| format!("{h}\n"));
}
@ -69,7 +72,7 @@ pub fn chunk_markdown(text: &str, max_tokens: usize) -> Vec<Chunk> {
heading: heading.clone(),
});
current = heading
.as_ref()
.as_deref()
.map_or_else(String::new, |h| format!("{h}\n"));
}
for line_chunk in split_on_lines(&para, max_chars) {
@ -115,8 +118,7 @@ fn split_on_headings(text: &str) -> Vec<(Option<String>, String)> {
for line in text.lines() {
if line.starts_with("# ") || line.starts_with("## ") || line.starts_with("### ") {
if !current_body.trim().is_empty() || current_heading.is_some() {
sections.push((current_heading.take(), current_body.clone()));
current_body.clear();
sections.push((current_heading.take(), std::mem::take(&mut current_body)));
}
current_heading = Some(line.to_string());
} else {
@ -140,8 +142,7 @@ fn split_on_blank_lines(text: &str) -> Vec<String> {
for line in text.lines() {
if line.trim().is_empty() {
if !current.trim().is_empty() {
paragraphs.push(current.clone());
current.clear();
paragraphs.push(std::mem::take(&mut current));
}
} else {
current.push_str(line);
@ -158,13 +159,12 @@ fn split_on_blank_lines(text: &str) -> Vec<String> {
/// Split text on line boundaries to fit within `max_chars`
fn split_on_lines(text: &str, max_chars: usize) -> Vec<String> {
let mut chunks = Vec::new();
let mut chunks = Vec::with_capacity(text.len() / max_chars.max(1) + 1);
let mut current = String::new();
for line in text.lines() {
if current.len() + line.len() + 1 > max_chars && !current.is_empty() {
chunks.push(current.clone());
current.clear();
chunks.push(std::mem::take(&mut current));
}
current.push_str(line);
current.push('\n');

View file

@ -42,7 +42,7 @@ struct ContentBlock {
}
#[derive(Debug, Serialize)]
struct NativeChatRequest {
struct NativeChatRequest<'a> {
model: String,
max_tokens: u32,
#[serde(skip_serializing_if = "Option::is_none")]
@ -50,7 +50,7 @@ struct NativeChatRequest {
messages: Vec<NativeMessage>,
temperature: f64,
#[serde(skip_serializing_if = "Option::is_none")]
tools: Option<Vec<NativeToolSpec>>,
tools: Option<Vec<NativeToolSpec<'a>>>,
}
#[derive(Debug, Serialize)]
@ -86,10 +86,10 @@ enum NativeContentOut {
}
#[derive(Debug, Serialize)]
struct NativeToolSpec {
name: String,
description: String,
input_schema: serde_json::Value,
struct NativeToolSpec<'a> {
name: &'a str,
description: &'a str,
input_schema: &'a serde_json::Value,
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<CacheControl>,
}
@ -206,17 +206,17 @@ impl AnthropicProvider {
}
}
fn convert_tools(tools: Option<&[ToolSpec]>) -> Option<Vec<NativeToolSpec>> {
fn convert_tools<'a>(tools: Option<&'a [ToolSpec]>) -> Option<Vec<NativeToolSpec<'a>>> {
let items = tools?;
if items.is_empty() {
return None;
}
let mut native_tools: Vec<NativeToolSpec> = items
let mut native_tools: Vec<NativeToolSpec<'a>> = items
.iter()
.map(|tool| NativeToolSpec {
name: tool.name.clone(),
description: tool.description.clone(),
input_schema: tool.parameters.clone(),
name: &tool.name,
description: &tool.description,
input_schema: &tool.parameters,
cache_control: None,
})
.collect();
@ -828,10 +828,11 @@ mod tests {
#[test]
fn native_tool_spec_without_cache_control() {
let schema = serde_json::json!({"type": "object"});
let tool = NativeToolSpec {
name: "get_weather".to_string(),
description: "Get weather info".to_string(),
input_schema: serde_json::json!({"type": "object"}),
name: "get_weather",
description: "Get weather info",
input_schema: &schema,
cache_control: None,
};
let json = serde_json::to_string(&tool).unwrap();
@ -841,10 +842,11 @@ mod tests {
#[test]
fn native_tool_spec_with_cache_control() {
let schema = serde_json::json!({"type": "object"});
let tool = NativeToolSpec {
name: "get_weather".to_string(),
description: "Get weather info".to_string(),
input_schema: serde_json::json!({"type": "object"}),
name: "get_weather",
description: "Get weather info",
input_schema: &schema,
cache_control: Some(CacheControl::ephemeral()),
};
let json = serde_json::to_string(&tool).unwrap();

View file

@ -81,12 +81,12 @@ struct CachedApiKey {
// ── Chat completions types ───────────────────────────────────────
#[derive(Debug, Serialize)]
struct ApiChatRequest {
struct ApiChatRequest<'a> {
model: String,
messages: Vec<ApiMessage>,
temperature: f64,
#[serde(skip_serializing_if = "Option::is_none")]
tools: Option<Vec<NativeToolSpec>>,
tools: Option<Vec<NativeToolSpec<'a>>>,
#[serde(skip_serializing_if = "Option::is_none")]
tool_choice: Option<String>,
}
@ -103,17 +103,17 @@ struct ApiMessage {
}
#[derive(Debug, Serialize)]
struct NativeToolSpec {
struct NativeToolSpec<'a> {
#[serde(rename = "type")]
kind: String,
function: NativeToolFunctionSpec,
kind: &'static str,
function: NativeToolFunctionSpec<'a>,
}
#[derive(Debug, Serialize)]
struct NativeToolFunctionSpec {
name: String,
description: String,
parameters: serde_json::Value,
struct NativeToolFunctionSpec<'a> {
name: &'a str,
description: &'a str,
parameters: &'a serde_json::Value,
}
#[derive(Debug, Serialize, Deserialize)]
@ -219,16 +219,16 @@ impl CopilotProvider {
("Accept", "application/json"),
];
fn convert_tools(tools: Option<&[ToolSpec]>) -> Option<Vec<NativeToolSpec>> {
fn convert_tools<'a>(tools: Option<&'a [ToolSpec]>) -> Option<Vec<NativeToolSpec<'a>>> {
tools.map(|items| {
items
.iter()
.map(|tool| NativeToolSpec {
kind: "function".to_string(),
kind: "function",
function: NativeToolFunctionSpec {
name: tool.name.clone(),
description: tool.description.clone(),
parameters: tool.parameters.clone(),
name: &tool.name,
description: &tool.description,
parameters: &tool.parameters,
},
})
.collect()

View file

@ -54,12 +54,12 @@ impl ResponseMessage {
}
#[derive(Debug, Serialize)]
struct NativeChatRequest {
struct NativeChatRequest<'a> {
model: String,
messages: Vec<NativeMessage>,
temperature: f64,
#[serde(skip_serializing_if = "Option::is_none")]
tools: Option<Vec<NativeToolSpec>>,
tools: Option<Vec<NativeToolSpec<'a>>>,
#[serde(skip_serializing_if = "Option::is_none")]
tool_choice: Option<String>,
}
@ -75,18 +75,18 @@ struct NativeMessage {
tool_calls: Option<Vec<NativeToolCall>>,
}
#[derive(Debug, Serialize, Deserialize)]
struct NativeToolSpec {
#[derive(Debug, Serialize)]
struct NativeToolSpec<'a> {
#[serde(rename = "type")]
kind: String,
function: NativeToolFunctionSpec,
kind: &'static str,
function: NativeToolFunctionSpec<'a>,
}
#[derive(Debug, Serialize, Deserialize)]
struct NativeToolFunctionSpec {
name: String,
description: String,
parameters: serde_json::Value,
#[derive(Debug, Serialize)]
struct NativeToolFunctionSpec<'a> {
name: &'a str,
description: &'a str,
parameters: &'a serde_json::Value,
}
#[derive(Debug, Serialize, Deserialize)]
@ -150,16 +150,16 @@ impl OpenAiProvider {
}
}
fn convert_tools(tools: Option<&[ToolSpec]>) -> Option<Vec<NativeToolSpec>> {
fn convert_tools<'a>(tools: Option<&'a [ToolSpec]>) -> Option<Vec<NativeToolSpec<'a>>> {
tools.map(|items| {
items
.iter()
.map(|tool| NativeToolSpec {
kind: "function".to_string(),
kind: "function",
function: NativeToolFunctionSpec {
name: tool.name.clone(),
description: tool.description.clone(),
parameters: tool.parameters.clone(),
name: &tool.name,
description: &tool.description,
parameters: &tool.parameters,
},
})
.collect()