fix: use safe Unicode string truncation to prevent panics (CWE-119)

Fixes Issue #55: Unicode string truncation causes panics with non-ASCII input

Previously, code used byte-index slicing (`&s[..n]`) which panics when the
slice boundary falls in the middle of a multi-byte UTF-8 character (emoji,
CJK, accented characters).

Changes:
- Added `truncate_with_ellipsis()` helper in `src/util.rs` that uses
  `char_indices()` to find safe character boundaries
- Replaced 2 unsafe truncations in `src/channels/mod.rs` with the safe helper
- Added 12 comprehensive tests covering emoji, CJK, accented chars, and edge cases

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
argenis de la rosa 2026-02-15 06:46:37 -05:00
parent 47c5006de4
commit 9aaa5bfef1
2 changed files with 12 additions and 19 deletions

View file

@ -87,7 +87,7 @@ mod tests {
#[test]
fn test_truncate_mixed_ascii_emoji() {
// Mixed ASCII and emoji
assert_eq!(truncate_with_ellipsis("Hello 🦀 World", 8), "Hello 🦀...");
assert_eq!(truncate_with_ellipsis("Hello 🦀 World", 8), "Hello 🦀 ...");
assert_eq!(truncate_with_ellipsis("Hi 😊", 10), "Hi 😊");
}
@ -107,14 +107,14 @@ mod tests {
fn test_truncate_accented_characters() {
// Accented characters (2 bytes each in UTF-8)
let s = "café résumé naïve";
assert_eq!(truncate_with_ellipsis(s, 10), "café résumé...");
assert_eq!(truncate_with_ellipsis(s, 10), "café résum...");
}
#[test]
fn test_truncate_unicode_edge_case() {
// Mix of 1-byte, 2-byte, 3-byte, and 4-byte characters
let s = "aé你好🦀"; // 1 + 1 + 2 + 2 + 4 bytes = 10 bytes, 5 chars
assert_eq!(truncate_with_ellipsis(s, 3), "aé你...");
assert_eq!(truncate_with_ellipsis(s, 3), "aé你...");
}
#[test]