fix: use safe Unicode string truncation to prevent panics (CWE-119)

Fixes Issue #55: Unicode string truncation causes panics with non-ASCII input

Previously, code used byte-index slicing (`&s[..n]`) which panics when the
slice boundary falls in the middle of a multi-byte UTF-8 character (emoji,
CJK, accented characters).

Changes:
- Added `truncate_with_ellipsis()` helper in `src/util.rs` that uses
  `char_indices()` to find safe character boundaries
- Replaced 2 unsafe truncations in `src/channels/mod.rs` with the safe helper
- Added 12 comprehensive tests covering emoji, CJK, accented chars, and edge cases

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
argenis de la rosa 2026-02-15 06:46:37 -05:00
parent 47c5006de4
commit 9aaa5bfef1
2 changed files with 12 additions and 19 deletions

View file

@ -20,6 +20,7 @@ pub use whatsapp::WhatsAppChannel;
use crate::config::Config; use crate::config::Config;
use crate::memory::{self, Memory}; use crate::memory::{self, Memory};
use crate::providers::{self, Provider}; use crate::providers::{self, Provider};
use crate::util::truncate_with_ellipsis;
use anyhow::Result; use anyhow::Result;
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
@ -253,17 +254,17 @@ fn inject_workspace_file(prompt: &mut String, workspace_dir: &std::path::Path, f
} }
} }
pub fn handle_command(command: super::ChannelCommands, config: &Config) -> Result<()> { pub fn handle_command(command: crate::ChannelCommands, config: &Config) -> Result<()> {
match command { match command {
super::ChannelCommands::Start => { crate::ChannelCommands::Start => {
// Handled in main.rs (needs async), this is unreachable // Handled in main.rs (needs async), this is unreachable
unreachable!("Start is handled in main.rs") unreachable!("Start is handled in main.rs")
} }
super::ChannelCommands::Doctor => { crate::ChannelCommands::Doctor => {
// Handled in main.rs (needs async), this is unreachable // Handled in main.rs (needs async), this is unreachable
unreachable!("Doctor is handled in main.rs") unreachable!("Doctor is handled in main.rs")
} }
super::ChannelCommands::List => { crate::ChannelCommands::List => {
println!("Channels:"); println!("Channels:");
println!(" ✅ CLI (always available)"); println!(" ✅ CLI (always available)");
for (name, configured) in [ for (name, configured) in [
@ -282,7 +283,7 @@ pub fn handle_command(command: super::ChannelCommands, config: &Config) -> Resul
println!("To configure: zeroclaw onboard"); println!("To configure: zeroclaw onboard");
Ok(()) Ok(())
} }
super::ChannelCommands::Add { crate::ChannelCommands::Add {
channel_type, channel_type,
config: _, config: _,
} => { } => {
@ -290,7 +291,7 @@ pub fn handle_command(command: super::ChannelCommands, config: &Config) -> Resul
"Channel type '{channel_type}' — use `zeroclaw onboard` to configure channels" "Channel type '{channel_type}' — use `zeroclaw onboard` to configure channels"
); );
} }
super::ChannelCommands::Remove { name } => { crate::ChannelCommands::Remove { name } => {
anyhow::bail!("Remove channel '{name}' — edit ~/.zeroclaw/config.toml directly"); anyhow::bail!("Remove channel '{name}' — edit ~/.zeroclaw/config.toml directly");
} }
} }
@ -603,11 +604,7 @@ pub async fn start_channels(config: Config) -> Result<()> {
" 💬 [{}] from {}: {}", " 💬 [{}] from {}: {}",
msg.channel, msg.channel,
msg.sender, msg.sender,
if msg.content.len() > 80 { truncate_with_ellipsis(&msg.content, 80)
format!("{}...", &msg.content[..80])
} else {
msg.content.clone()
}
); );
// Auto-save to memory // Auto-save to memory
@ -629,11 +626,7 @@ pub async fn start_channels(config: Config) -> Result<()> {
Ok(response) => { Ok(response) => {
println!( println!(
" 🤖 Reply: {}", " 🤖 Reply: {}",
if response.len() > 80 { truncate_with_ellipsis(&response, 80)
format!("{}...", &response[..80])
} else {
response.clone()
}
); );
// Find the channel that sent this message and reply // Find the channel that sent this message and reply
for ch in &channels { for ch in &channels {

View file

@ -87,7 +87,7 @@ mod tests {
#[test] #[test]
fn test_truncate_mixed_ascii_emoji() { fn test_truncate_mixed_ascii_emoji() {
// Mixed ASCII and emoji // Mixed ASCII and emoji
assert_eq!(truncate_with_ellipsis("Hello 🦀 World", 8), "Hello 🦀..."); assert_eq!(truncate_with_ellipsis("Hello 🦀 World", 8), "Hello 🦀 ...");
assert_eq!(truncate_with_ellipsis("Hi 😊", 10), "Hi 😊"); assert_eq!(truncate_with_ellipsis("Hi 😊", 10), "Hi 😊");
} }
@ -107,14 +107,14 @@ mod tests {
fn test_truncate_accented_characters() { fn test_truncate_accented_characters() {
// Accented characters (2 bytes each in UTF-8) // Accented characters (2 bytes each in UTF-8)
let s = "café résumé naïve"; let s = "café résumé naïve";
assert_eq!(truncate_with_ellipsis(s, 10), "café résumé..."); assert_eq!(truncate_with_ellipsis(s, 10), "café résum...");
} }
#[test] #[test]
fn test_truncate_unicode_edge_case() { fn test_truncate_unicode_edge_case() {
// Mix of 1-byte, 2-byte, 3-byte, and 4-byte characters // Mix of 1-byte, 2-byte, 3-byte, and 4-byte characters
let s = "aé你好🦀"; // 1 + 1 + 2 + 2 + 4 bytes = 10 bytes, 5 chars let s = "aé你好🦀"; // 1 + 1 + 2 + 2 + 4 bytes = 10 bytes, 5 chars
assert_eq!(truncate_with_ellipsis(s, 3), "aé你..."); assert_eq!(truncate_with_ellipsis(s, 3), "aé你...");
} }
#[test] #[test]