perf: eliminate unnecessary heap allocations across agent loop, memory, and channels
- Replace clone()+clear() with std::mem::take() in chunker (items 1, 6) - Add Vec::with_capacity() hints in chunker split functions (item 2) - Replace collect::<Vec<_>>().join() with direct iteration in IRC and email channels (item 3) - Share heading strings via Rc<str> instead of cloning per chunk (item 5) - Use borrowed references in provider tool spec types to avoid cloning name/description/parameters per tool per request (item 7) Closes #712 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
parent
dce7280812
commit
a4b27d2afe
6 changed files with 77 additions and 63 deletions
|
|
@ -154,7 +154,14 @@ impl EmailChannel {
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
result.split_whitespace().collect::<Vec<_>>().join(" ")
|
let mut normalized = String::with_capacity(result.len());
|
||||||
|
for word in result.split_whitespace() {
|
||||||
|
if !normalized.is_empty() {
|
||||||
|
normalized.push(' ');
|
||||||
|
}
|
||||||
|
normalized.push_str(word);
|
||||||
|
}
|
||||||
|
normalized
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Extract the sender address from a parsed email
|
/// Extract the sender address from a parsed email
|
||||||
|
|
|
||||||
|
|
@ -163,12 +163,17 @@ fn split_message(message: &str, max_bytes: usize) -> Vec<String> {
|
||||||
|
|
||||||
// Guard against max_bytes == 0 to prevent infinite loop
|
// Guard against max_bytes == 0 to prevent infinite loop
|
||||||
if max_bytes == 0 {
|
if max_bytes == 0 {
|
||||||
let full: String = message
|
let mut full = String::new();
|
||||||
|
for l in message
|
||||||
.lines()
|
.lines()
|
||||||
.map(|l| l.trim_end_matches('\r'))
|
.map(|l| l.trim_end_matches('\r'))
|
||||||
.filter(|l| !l.is_empty())
|
.filter(|l| !l.is_empty())
|
||||||
.collect::<Vec<_>>()
|
{
|
||||||
.join(" ");
|
if !full.is_empty() {
|
||||||
|
full.push(' ');
|
||||||
|
}
|
||||||
|
full.push_str(l);
|
||||||
|
}
|
||||||
if full.is_empty() {
|
if full.is_empty() {
|
||||||
chunks.push(String::new());
|
chunks.push(String::new());
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
|
|
@ -3,12 +3,14 @@
|
||||||
// Splits on markdown headings and paragraph boundaries, respecting
|
// Splits on markdown headings and paragraph boundaries, respecting
|
||||||
// a max token limit per chunk. Preserves heading context.
|
// a max token limit per chunk. Preserves heading context.
|
||||||
|
|
||||||
|
use std::rc::Rc;
|
||||||
|
|
||||||
/// A single chunk of text with metadata.
|
/// A single chunk of text with metadata.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct Chunk {
|
pub struct Chunk {
|
||||||
pub index: usize,
|
pub index: usize,
|
||||||
pub content: String,
|
pub content: String,
|
||||||
pub heading: Option<String>,
|
pub heading: Option<Rc<str>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Split markdown text into chunks, each under `max_tokens` approximate tokens.
|
/// Split markdown text into chunks, each under `max_tokens` approximate tokens.
|
||||||
|
|
@ -26,9 +28,10 @@ pub fn chunk_markdown(text: &str, max_tokens: usize) -> Vec<Chunk> {
|
||||||
|
|
||||||
let max_chars = max_tokens * 4;
|
let max_chars = max_tokens * 4;
|
||||||
let sections = split_on_headings(text);
|
let sections = split_on_headings(text);
|
||||||
let mut chunks = Vec::new();
|
let mut chunks = Vec::with_capacity(sections.len());
|
||||||
|
|
||||||
for (heading, body) in sections {
|
for (heading, body) in sections {
|
||||||
|
let heading: Option<Rc<str>> = heading.map(Rc::from);
|
||||||
let full = if let Some(ref h) = heading {
|
let full = if let Some(ref h) = heading {
|
||||||
format!("{h}\n{body}")
|
format!("{h}\n{body}")
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -45,7 +48,7 @@ pub fn chunk_markdown(text: &str, max_tokens: usize) -> Vec<Chunk> {
|
||||||
// Split on paragraphs (blank lines)
|
// Split on paragraphs (blank lines)
|
||||||
let paragraphs = split_on_blank_lines(&body);
|
let paragraphs = split_on_blank_lines(&body);
|
||||||
let mut current = heading
|
let mut current = heading
|
||||||
.as_ref()
|
.as_deref()
|
||||||
.map_or_else(String::new, |h| format!("{h}\n"));
|
.map_or_else(String::new, |h| format!("{h}\n"));
|
||||||
|
|
||||||
for para in paragraphs {
|
for para in paragraphs {
|
||||||
|
|
@ -56,7 +59,7 @@ pub fn chunk_markdown(text: &str, max_tokens: usize) -> Vec<Chunk> {
|
||||||
heading: heading.clone(),
|
heading: heading.clone(),
|
||||||
});
|
});
|
||||||
current = heading
|
current = heading
|
||||||
.as_ref()
|
.as_deref()
|
||||||
.map_or_else(String::new, |h| format!("{h}\n"));
|
.map_or_else(String::new, |h| format!("{h}\n"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -69,7 +72,7 @@ pub fn chunk_markdown(text: &str, max_tokens: usize) -> Vec<Chunk> {
|
||||||
heading: heading.clone(),
|
heading: heading.clone(),
|
||||||
});
|
});
|
||||||
current = heading
|
current = heading
|
||||||
.as_ref()
|
.as_deref()
|
||||||
.map_or_else(String::new, |h| format!("{h}\n"));
|
.map_or_else(String::new, |h| format!("{h}\n"));
|
||||||
}
|
}
|
||||||
for line_chunk in split_on_lines(¶, max_chars) {
|
for line_chunk in split_on_lines(¶, max_chars) {
|
||||||
|
|
@ -115,8 +118,7 @@ fn split_on_headings(text: &str) -> Vec<(Option<String>, String)> {
|
||||||
for line in text.lines() {
|
for line in text.lines() {
|
||||||
if line.starts_with("# ") || line.starts_with("## ") || line.starts_with("### ") {
|
if line.starts_with("# ") || line.starts_with("## ") || line.starts_with("### ") {
|
||||||
if !current_body.trim().is_empty() || current_heading.is_some() {
|
if !current_body.trim().is_empty() || current_heading.is_some() {
|
||||||
sections.push((current_heading.take(), current_body.clone()));
|
sections.push((current_heading.take(), std::mem::take(&mut current_body)));
|
||||||
current_body.clear();
|
|
||||||
}
|
}
|
||||||
current_heading = Some(line.to_string());
|
current_heading = Some(line.to_string());
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -140,8 +142,7 @@ fn split_on_blank_lines(text: &str) -> Vec<String> {
|
||||||
for line in text.lines() {
|
for line in text.lines() {
|
||||||
if line.trim().is_empty() {
|
if line.trim().is_empty() {
|
||||||
if !current.trim().is_empty() {
|
if !current.trim().is_empty() {
|
||||||
paragraphs.push(current.clone());
|
paragraphs.push(std::mem::take(&mut current));
|
||||||
current.clear();
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
current.push_str(line);
|
current.push_str(line);
|
||||||
|
|
@ -158,13 +159,12 @@ fn split_on_blank_lines(text: &str) -> Vec<String> {
|
||||||
|
|
||||||
/// Split text on line boundaries to fit within `max_chars`
|
/// Split text on line boundaries to fit within `max_chars`
|
||||||
fn split_on_lines(text: &str, max_chars: usize) -> Vec<String> {
|
fn split_on_lines(text: &str, max_chars: usize) -> Vec<String> {
|
||||||
let mut chunks = Vec::new();
|
let mut chunks = Vec::with_capacity(text.len() / max_chars.max(1) + 1);
|
||||||
let mut current = String::new();
|
let mut current = String::new();
|
||||||
|
|
||||||
for line in text.lines() {
|
for line in text.lines() {
|
||||||
if current.len() + line.len() + 1 > max_chars && !current.is_empty() {
|
if current.len() + line.len() + 1 > max_chars && !current.is_empty() {
|
||||||
chunks.push(current.clone());
|
chunks.push(std::mem::take(&mut current));
|
||||||
current.clear();
|
|
||||||
}
|
}
|
||||||
current.push_str(line);
|
current.push_str(line);
|
||||||
current.push('\n');
|
current.push('\n');
|
||||||
|
|
|
||||||
|
|
@ -42,7 +42,7 @@ struct ContentBlock {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize)]
|
#[derive(Debug, Serialize)]
|
||||||
struct NativeChatRequest {
|
struct NativeChatRequest<'a> {
|
||||||
model: String,
|
model: String,
|
||||||
max_tokens: u32,
|
max_tokens: u32,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
|
@ -50,7 +50,7 @@ struct NativeChatRequest {
|
||||||
messages: Vec<NativeMessage>,
|
messages: Vec<NativeMessage>,
|
||||||
temperature: f64,
|
temperature: f64,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
tools: Option<Vec<NativeToolSpec>>,
|
tools: Option<Vec<NativeToolSpec<'a>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize)]
|
#[derive(Debug, Serialize)]
|
||||||
|
|
@ -86,10 +86,10 @@ enum NativeContentOut {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize)]
|
#[derive(Debug, Serialize)]
|
||||||
struct NativeToolSpec {
|
struct NativeToolSpec<'a> {
|
||||||
name: String,
|
name: &'a str,
|
||||||
description: String,
|
description: &'a str,
|
||||||
input_schema: serde_json::Value,
|
input_schema: &'a serde_json::Value,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
cache_control: Option<CacheControl>,
|
cache_control: Option<CacheControl>,
|
||||||
}
|
}
|
||||||
|
|
@ -206,17 +206,17 @@ impl AnthropicProvider {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn convert_tools(tools: Option<&[ToolSpec]>) -> Option<Vec<NativeToolSpec>> {
|
fn convert_tools<'a>(tools: Option<&'a [ToolSpec]>) -> Option<Vec<NativeToolSpec<'a>>> {
|
||||||
let items = tools?;
|
let items = tools?;
|
||||||
if items.is_empty() {
|
if items.is_empty() {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
let mut native_tools: Vec<NativeToolSpec> = items
|
let mut native_tools: Vec<NativeToolSpec<'a>> = items
|
||||||
.iter()
|
.iter()
|
||||||
.map(|tool| NativeToolSpec {
|
.map(|tool| NativeToolSpec {
|
||||||
name: tool.name.clone(),
|
name: &tool.name,
|
||||||
description: tool.description.clone(),
|
description: &tool.description,
|
||||||
input_schema: tool.parameters.clone(),
|
input_schema: &tool.parameters,
|
||||||
cache_control: None,
|
cache_control: None,
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
@ -828,10 +828,11 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn native_tool_spec_without_cache_control() {
|
fn native_tool_spec_without_cache_control() {
|
||||||
|
let schema = serde_json::json!({"type": "object"});
|
||||||
let tool = NativeToolSpec {
|
let tool = NativeToolSpec {
|
||||||
name: "get_weather".to_string(),
|
name: "get_weather",
|
||||||
description: "Get weather info".to_string(),
|
description: "Get weather info",
|
||||||
input_schema: serde_json::json!({"type": "object"}),
|
input_schema: &schema,
|
||||||
cache_control: None,
|
cache_control: None,
|
||||||
};
|
};
|
||||||
let json = serde_json::to_string(&tool).unwrap();
|
let json = serde_json::to_string(&tool).unwrap();
|
||||||
|
|
@ -841,10 +842,11 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn native_tool_spec_with_cache_control() {
|
fn native_tool_spec_with_cache_control() {
|
||||||
|
let schema = serde_json::json!({"type": "object"});
|
||||||
let tool = NativeToolSpec {
|
let tool = NativeToolSpec {
|
||||||
name: "get_weather".to_string(),
|
name: "get_weather",
|
||||||
description: "Get weather info".to_string(),
|
description: "Get weather info",
|
||||||
input_schema: serde_json::json!({"type": "object"}),
|
input_schema: &schema,
|
||||||
cache_control: Some(CacheControl::ephemeral()),
|
cache_control: Some(CacheControl::ephemeral()),
|
||||||
};
|
};
|
||||||
let json = serde_json::to_string(&tool).unwrap();
|
let json = serde_json::to_string(&tool).unwrap();
|
||||||
|
|
|
||||||
|
|
@ -81,12 +81,12 @@ struct CachedApiKey {
|
||||||
// ── Chat completions types ───────────────────────────────────────
|
// ── Chat completions types ───────────────────────────────────────
|
||||||
|
|
||||||
#[derive(Debug, Serialize)]
|
#[derive(Debug, Serialize)]
|
||||||
struct ApiChatRequest {
|
struct ApiChatRequest<'a> {
|
||||||
model: String,
|
model: String,
|
||||||
messages: Vec<ApiMessage>,
|
messages: Vec<ApiMessage>,
|
||||||
temperature: f64,
|
temperature: f64,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
tools: Option<Vec<NativeToolSpec>>,
|
tools: Option<Vec<NativeToolSpec<'a>>>,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
tool_choice: Option<String>,
|
tool_choice: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
@ -103,17 +103,17 @@ struct ApiMessage {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize)]
|
#[derive(Debug, Serialize)]
|
||||||
struct NativeToolSpec {
|
struct NativeToolSpec<'a> {
|
||||||
#[serde(rename = "type")]
|
#[serde(rename = "type")]
|
||||||
kind: String,
|
kind: &'static str,
|
||||||
function: NativeToolFunctionSpec,
|
function: NativeToolFunctionSpec<'a>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize)]
|
#[derive(Debug, Serialize)]
|
||||||
struct NativeToolFunctionSpec {
|
struct NativeToolFunctionSpec<'a> {
|
||||||
name: String,
|
name: &'a str,
|
||||||
description: String,
|
description: &'a str,
|
||||||
parameters: serde_json::Value,
|
parameters: &'a serde_json::Value,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize)]
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
|
@ -219,16 +219,16 @@ impl CopilotProvider {
|
||||||
("Accept", "application/json"),
|
("Accept", "application/json"),
|
||||||
];
|
];
|
||||||
|
|
||||||
fn convert_tools(tools: Option<&[ToolSpec]>) -> Option<Vec<NativeToolSpec>> {
|
fn convert_tools<'a>(tools: Option<&'a [ToolSpec]>) -> Option<Vec<NativeToolSpec<'a>>> {
|
||||||
tools.map(|items| {
|
tools.map(|items| {
|
||||||
items
|
items
|
||||||
.iter()
|
.iter()
|
||||||
.map(|tool| NativeToolSpec {
|
.map(|tool| NativeToolSpec {
|
||||||
kind: "function".to_string(),
|
kind: "function",
|
||||||
function: NativeToolFunctionSpec {
|
function: NativeToolFunctionSpec {
|
||||||
name: tool.name.clone(),
|
name: &tool.name,
|
||||||
description: tool.description.clone(),
|
description: &tool.description,
|
||||||
parameters: tool.parameters.clone(),
|
parameters: &tool.parameters,
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
.collect()
|
.collect()
|
||||||
|
|
|
||||||
|
|
@ -54,12 +54,12 @@ impl ResponseMessage {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize)]
|
#[derive(Debug, Serialize)]
|
||||||
struct NativeChatRequest {
|
struct NativeChatRequest<'a> {
|
||||||
model: String,
|
model: String,
|
||||||
messages: Vec<NativeMessage>,
|
messages: Vec<NativeMessage>,
|
||||||
temperature: f64,
|
temperature: f64,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
tools: Option<Vec<NativeToolSpec>>,
|
tools: Option<Vec<NativeToolSpec<'a>>>,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
tool_choice: Option<String>,
|
tool_choice: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
@ -75,18 +75,18 @@ struct NativeMessage {
|
||||||
tool_calls: Option<Vec<NativeToolCall>>,
|
tool_calls: Option<Vec<NativeToolCall>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize)]
|
#[derive(Debug, Serialize)]
|
||||||
struct NativeToolSpec {
|
struct NativeToolSpec<'a> {
|
||||||
#[serde(rename = "type")]
|
#[serde(rename = "type")]
|
||||||
kind: String,
|
kind: &'static str,
|
||||||
function: NativeToolFunctionSpec,
|
function: NativeToolFunctionSpec<'a>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize)]
|
#[derive(Debug, Serialize)]
|
||||||
struct NativeToolFunctionSpec {
|
struct NativeToolFunctionSpec<'a> {
|
||||||
name: String,
|
name: &'a str,
|
||||||
description: String,
|
description: &'a str,
|
||||||
parameters: serde_json::Value,
|
parameters: &'a serde_json::Value,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize)]
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
|
@ -150,16 +150,16 @@ impl OpenAiProvider {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn convert_tools(tools: Option<&[ToolSpec]>) -> Option<Vec<NativeToolSpec>> {
|
fn convert_tools<'a>(tools: Option<&'a [ToolSpec]>) -> Option<Vec<NativeToolSpec<'a>>> {
|
||||||
tools.map(|items| {
|
tools.map(|items| {
|
||||||
items
|
items
|
||||||
.iter()
|
.iter()
|
||||||
.map(|tool| NativeToolSpec {
|
.map(|tool| NativeToolSpec {
|
||||||
kind: "function".to_string(),
|
kind: "function",
|
||||||
function: NativeToolFunctionSpec {
|
function: NativeToolFunctionSpec {
|
||||||
name: tool.name.clone(),
|
name: &tool.name,
|
||||||
description: tool.description.clone(),
|
description: &tool.description,
|
||||||
parameters: tool.parameters.clone(),
|
parameters: &tool.parameters,
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
.collect()
|
.collect()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue