fix: use safe Unicode string truncation to prevent panics (CWE-119)

Fixes Issue #55: Unicode string truncation causes panics with non-ASCII input

Previously, code used byte-index slicing (`&s[..n]`) which panics when the
slice boundary falls in the middle of a multi-byte UTF-8 character (emoji,
CJK, accented characters).

Changes:
- Added `truncate_with_ellipsis()` helper in `src/util.rs` that uses
  `char_indices()` to find safe character boundaries
- Replaced 2 unsafe truncations in `src/channels/mod.rs` with the safe helper
- Added 12 comprehensive tests covering emoji, CJK, accented chars, and edge cases

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
argenis de la rosa 2026-02-15 06:46:37 -05:00
parent 47c5006de4
commit 9aaa5bfef1
2 changed files with 12 additions and 19 deletions

View file

@ -20,6 +20,7 @@ pub use whatsapp::WhatsAppChannel;
use crate::config::Config;
use crate::memory::{self, Memory};
use crate::providers::{self, Provider};
use crate::util::truncate_with_ellipsis;
use anyhow::Result;
use std::sync::Arc;
use std::time::Duration;
@ -253,17 +254,17 @@ fn inject_workspace_file(prompt: &mut String, workspace_dir: &std::path::Path, f
}
}
pub fn handle_command(command: super::ChannelCommands, config: &Config) -> Result<()> {
pub fn handle_command(command: crate::ChannelCommands, config: &Config) -> Result<()> {
match command {
super::ChannelCommands::Start => {
crate::ChannelCommands::Start => {
// Handled in main.rs (needs async), this is unreachable
unreachable!("Start is handled in main.rs")
}
super::ChannelCommands::Doctor => {
crate::ChannelCommands::Doctor => {
// Handled in main.rs (needs async), this is unreachable
unreachable!("Doctor is handled in main.rs")
}
super::ChannelCommands::List => {
crate::ChannelCommands::List => {
println!("Channels:");
println!(" ✅ CLI (always available)");
for (name, configured) in [
@ -282,7 +283,7 @@ pub fn handle_command(command: super::ChannelCommands, config: &Config) -> Resul
println!("To configure: zeroclaw onboard");
Ok(())
}
super::ChannelCommands::Add {
crate::ChannelCommands::Add {
channel_type,
config: _,
} => {
@ -290,7 +291,7 @@ pub fn handle_command(command: super::ChannelCommands, config: &Config) -> Resul
"Channel type '{channel_type}' — use `zeroclaw onboard` to configure channels"
);
}
super::ChannelCommands::Remove { name } => {
crate::ChannelCommands::Remove { name } => {
anyhow::bail!("Remove channel '{name}' — edit ~/.zeroclaw/config.toml directly");
}
}
@ -603,11 +604,7 @@ pub async fn start_channels(config: Config) -> Result<()> {
" 💬 [{}] from {}: {}",
msg.channel,
msg.sender,
if msg.content.len() > 80 {
format!("{}...", &msg.content[..80])
} else {
msg.content.clone()
}
truncate_with_ellipsis(&msg.content, 80)
);
// Auto-save to memory
@ -629,11 +626,7 @@ pub async fn start_channels(config: Config) -> Result<()> {
Ok(response) => {
println!(
" 🤖 Reply: {}",
if response.len() > 80 {
format!("{}...", &response[..80])
} else {
response.clone()
}
truncate_with_ellipsis(&response, 80)
);
// Find the channel that sent this message and reply
for ch in &channels {

View file

@ -87,7 +87,7 @@ mod tests {
#[test]
fn test_truncate_mixed_ascii_emoji() {
// Mixed ASCII and emoji
assert_eq!(truncate_with_ellipsis("Hello 🦀 World", 8), "Hello 🦀...");
assert_eq!(truncate_with_ellipsis("Hello 🦀 World", 8), "Hello 🦀 ...");
assert_eq!(truncate_with_ellipsis("Hi 😊", 10), "Hi 😊");
}
@ -107,14 +107,14 @@ mod tests {
fn test_truncate_accented_characters() {
// Accented characters (2 bytes each in UTF-8)
let s = "café résumé naïve";
assert_eq!(truncate_with_ellipsis(s, 10), "café résumé...");
assert_eq!(truncate_with_ellipsis(s, 10), "café résum...");
}
#[test]
fn test_truncate_unicode_edge_case() {
// Mix of 1-byte, 2-byte, 3-byte, and 4-byte characters
let s = "aé你好🦀"; // 1 + 1 + 2 + 2 + 4 bytes = 10 bytes, 5 chars
assert_eq!(truncate_with_ellipsis(s, 3), "aé你...");
assert_eq!(truncate_with_ellipsis(s, 3), "aé你...");
}
#[test]