//! Utility functions for ZeroClaw. //! //! This module contains reusable helper functions used across the codebase. /// Truncate a string to at most `max_chars` characters, appending "..." if truncated. /// /// This function safely handles multi-byte UTF-8 characters (emoji, CJK, accented characters) /// by using character boundaries instead of byte indices. /// /// # Arguments /// * `s` - The string to truncate /// * `max_chars` - Maximum number of characters to keep (excluding "...") /// /// # Returns /// * Original string if length <= `max_chars` /// * Truncated string with "..." appended if length > `max_chars` /// /// # Examples /// ``` /// use zeroclaw::util::truncate_with_ellipsis; /// /// // ASCII string - no truncation needed /// assert_eq!(truncate_with_ellipsis("hello", 10), "hello"); /// /// // ASCII string - truncation needed /// assert_eq!(truncate_with_ellipsis("hello world", 5), "hello..."); /// /// // Multi-byte UTF-8 (emoji) - safe truncation /// assert_eq!(truncate_with_ellipsis("Hello πŸ¦€ World", 8), "Hello πŸ¦€..."); /// assert_eq!(truncate_with_ellipsis("πŸ˜€πŸ˜€πŸ˜€πŸ˜€", 2), "πŸ˜€πŸ˜€..."); /// /// // Empty string /// assert_eq!(truncate_with_ellipsis("", 10), ""); /// ``` pub fn truncate_with_ellipsis(s: &str, max_chars: usize) -> String { match s.char_indices().nth(max_chars) { Some((idx, _)) => format!("{}...", &s[..idx]), None => s.to_string(), } } #[cfg(test)] mod tests { use super::*; #[test] fn test_truncate_ascii_no_truncation() { // ASCII string shorter than limit - no change assert_eq!(truncate_with_ellipsis("hello", 10), "hello"); assert_eq!(truncate_with_ellipsis("hello world", 50), "hello world"); } #[test] fn test_truncate_ascii_with_truncation() { // ASCII string longer than limit - truncates assert_eq!(truncate_with_ellipsis("hello world", 5), "hello..."); assert_eq!(truncate_with_ellipsis("This is a long message", 10), "This is a ..."); } #[test] fn test_truncate_empty_string() { assert_eq!(truncate_with_ellipsis("", 10), ""); } #[test] fn test_truncate_at_exact_boundary() { // String exactly at boundary - no truncation assert_eq!(truncate_with_ellipsis("hello", 5), "hello"); } #[test] fn test_truncate_emoji_single() { // Single emoji (4 bytes) - should not panic let s = "πŸ¦€"; assert_eq!(truncate_with_ellipsis(s, 10), s); assert_eq!(truncate_with_ellipsis(s, 1), s); } #[test] fn test_truncate_emoji_multiple() { // Multiple emoji - safe truncation at character boundary let s = "πŸ˜€πŸ˜€πŸ˜€πŸ˜€"; // 4 emoji, each 4 bytes = 16 bytes total assert_eq!(truncate_with_ellipsis(s, 2), "πŸ˜€πŸ˜€..."); assert_eq!(truncate_with_ellipsis(s, 3), "πŸ˜€πŸ˜€πŸ˜€..."); } #[test] fn test_truncate_mixed_ascii_emoji() { // Mixed ASCII and emoji assert_eq!(truncate_with_ellipsis("Hello πŸ¦€ World", 8), "Hello πŸ¦€ ..."); assert_eq!(truncate_with_ellipsis("Hi 😊", 10), "Hi 😊"); } #[test] fn test_truncate_cjk_characters() { // CJK characters (Chinese - each is 3 bytes) // This would panic with byte slicing: &s[..50] where s has 17 chars (51 bytes) let s = "θΏ™ζ˜―δΈ€δΈͺζ΅‹θ―•ζΆˆζ―η”¨ζ₯θ§¦ε‘ε΄©ζΊƒηš„δΈ­ζ–‡"; // 21 characters // Each character is 3 bytes, so 50 bytes is ~16 characters let result = truncate_with_ellipsis(s, 16); assert!(result.ends_with("...")); // Should not panic and should be valid UTF-8 assert!(result.is_char_boundary(result.len() - 1)); } #[test] fn test_truncate_accented_characters() { // Accented characters (2 bytes each in UTF-8) let s = "cafΓ© rΓ©sumΓ© naΓ―ve"; assert_eq!(truncate_with_ellipsis(s, 10), "cafΓ© rΓ©sum..."); } #[test] fn test_truncate_unicode_edge_case() { // Mix of 1-byte, 2-byte, 3-byte, and 4-byte characters let s = "aΓ©δ½ ε₯½πŸ¦€"; // 1 + 1 + 2 + 2 + 4 bytes = 10 bytes, 5 chars assert_eq!(truncate_with_ellipsis(s, 3), "aΓ©δ½ ..."); } #[test] fn test_truncate_long_string() { // Long ASCII string let s = "a".repeat(200); let result = truncate_with_ellipsis(&s, 50); assert_eq!(result.len(), 53); // 50 + "..." assert!(result.ends_with("...")); } #[test] fn test_truncate_zero_max_chars() { // Edge case: max_chars = 0 assert_eq!(truncate_with_ellipsis("hello", 0), "..."); } }