Merge remote-tracking branch 'origin/feat/whatsapp-email-channels'

# Conflicts: # Cargo.lock # src/config/schema.rs # src/cron/mod.rs # src/security/secrets.rs # src/service/mod.rs
2026-02-15 06:37:51 -05:00 · 2026-02-15 06:37:51 -05:00 · 47c5006de4
commit 47c5006de4
parent 5cc02c5813 a310e178db
12 changed files with 1689 additions and 143 deletions
--- a/src/util.rs
+++ b/src/util.rs
@ -0,0 +1,134 @@
+//! Utility functions for ZeroClaw.
+//!
+//! This module contains reusable helper functions used across the codebase.
+
+/// Truncate a string to at most `max_chars` characters, appending "..." if truncated.
+///
+/// This function safely handles multi-byte UTF-8 characters (emoji, CJK, accented characters)
+/// by using character boundaries instead of byte indices.
+///
+/// # Arguments
+/// * `s` - The string to truncate
+/// * `max_chars` - Maximum number of characters to keep (excluding "...")
+///
+/// # Returns
+/// * Original string if length <= `max_chars`
+/// * Truncated string with "..." appended if length > `max_chars`
+///
+/// # Examples
+/// ```
+/// use zeroclaw::util::truncate_with_ellipsis;
+///
+/// // ASCII string - no truncation needed
+/// assert_eq!(truncate_with_ellipsis("hello", 10), "hello");
+///
+/// // ASCII string - truncation needed
+/// assert_eq!(truncate_with_ellipsis("hello world", 5), "hello...");
+///
+/// // Multi-byte UTF-8 (emoji) - safe truncation
+/// assert_eq!(truncate_with_ellipsis("Hello 🦀 World", 8), "Hello 🦀...");
+/// assert_eq!(truncate_with_ellipsis("😀😀😀😀", 2), "😀😀...");
+///
+/// // Empty string
+/// assert_eq!(truncate_with_ellipsis("", 10), "");
+/// ```
+pub fn truncate_with_ellipsis(s: &str, max_chars: usize) -> String {
+    match s.char_indices().nth(max_chars) {
+        Some((idx, _)) => format!("{}...", &s[..idx]),
+        None => s.to_string(),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_truncate_ascii_no_truncation() {
+        // ASCII string shorter than limit - no change
+        assert_eq!(truncate_with_ellipsis("hello", 10), "hello");
+        assert_eq!(truncate_with_ellipsis("hello world", 50), "hello world");
+    }
+
+    #[test]
+    fn test_truncate_ascii_with_truncation() {
+        // ASCII string longer than limit - truncates
+        assert_eq!(truncate_with_ellipsis("hello world", 5), "hello...");
+        assert_eq!(truncate_with_ellipsis("This is a long message", 10), "This is a ...");
+    }
+
+    #[test]
+    fn test_truncate_empty_string() {
+        assert_eq!(truncate_with_ellipsis("", 10), "");
+    }
+
+    #[test]
+    fn test_truncate_at_exact_boundary() {
+        // String exactly at boundary - no truncation
+        assert_eq!(truncate_with_ellipsis("hello", 5), "hello");
+    }
+
+    #[test]
+    fn test_truncate_emoji_single() {
+        // Single emoji (4 bytes) - should not panic
+        let s = "🦀";
+        assert_eq!(truncate_with_ellipsis(s, 10), s);
+        assert_eq!(truncate_with_ellipsis(s, 1), s);
+    }
+
+    #[test]
+    fn test_truncate_emoji_multiple() {
+        // Multiple emoji - safe truncation at character boundary
+        let s = "😀😀😀😀"; // 4 emoji, each 4 bytes = 16 bytes total
+        assert_eq!(truncate_with_ellipsis(s, 2), "😀😀...");
+        assert_eq!(truncate_with_ellipsis(s, 3), "😀😀😀...");
+    }
+
+    #[test]
+    fn test_truncate_mixed_ascii_emoji() {
+        // Mixed ASCII and emoji
+        assert_eq!(truncate_with_ellipsis("Hello 🦀 World", 8), "Hello 🦀...");
+        assert_eq!(truncate_with_ellipsis("Hi 😊", 10), "Hi 😊");
+    }
+
+    #[test]
+    fn test_truncate_cjk_characters() {
+        // CJK characters (Chinese - each is 3 bytes)
+        // This would panic with byte slicing: &s[..50] where s has 17 chars (51 bytes)
+        let s = "这是一个测试消息用来触发崩溃的中文"; // 21 characters
+        // Each character is 3 bytes, so 50 bytes is ~16 characters
+        let result = truncate_with_ellipsis(s, 16);
+        assert!(result.ends_with("..."));
+        // Should not panic and should be valid UTF-8
+        assert!(result.is_char_boundary(result.len() - 1));
+    }
+
+    #[test]
+    fn test_truncate_accented_characters() {
+        // Accented characters (2 bytes each in UTF-8)
+        let s = "café résumé naïve";
+        assert_eq!(truncate_with_ellipsis(s, 10), "café résumé...");
+    }
+
+    #[test]
+    fn test_truncate_unicode_edge_case() {
+        // Mix of 1-byte, 2-byte, 3-byte, and 4-byte characters
+        let s = "aé你好🦀"; // 1 + 1 + 2 + 2 + 4 bytes = 10 bytes, 5 chars
+        assert_eq!(truncate_with_ellipsis(s, 3), "aé你好...");
+    }
+
+    #[test]
+    fn test_truncate_long_string() {
+        // Long ASCII string
+        let s = "a".repeat(200);
+        let result = truncate_with_ellipsis(&s, 50);
+        assert_eq!(result.len(), 53); // 50 + "..."
+        assert!(result.ends_with("..."));
+    }
+
+    #[test]
+    fn test_truncate_zero_max_chars() {
+        // Edge case: max_chars = 0
+        assert_eq!(truncate_with_ellipsis("hello", 0), "...");
+    }
+}