Merge remote-tracking branch 'origin/feat/whatsapp-email-channels'
# Conflicts: # Cargo.lock # src/config/schema.rs # src/cron/mod.rs # src/security/secrets.rs # src/service/mod.rs
This commit is contained in:
commit
47c5006de4
12 changed files with 1689 additions and 143 deletions
134
src/util.rs
Normal file
134
src/util.rs
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
//! Utility functions for ZeroClaw.
|
||||
//!
|
||||
//! This module contains reusable helper functions used across the codebase.
|
||||
|
||||
/// Truncate a string to at most `max_chars` characters, appending "..." if truncated.
|
||||
///
|
||||
/// This function safely handles multi-byte UTF-8 characters (emoji, CJK, accented characters)
|
||||
/// by using character boundaries instead of byte indices.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `s` - The string to truncate
|
||||
/// * `max_chars` - Maximum number of characters to keep (excluding "...")
|
||||
///
|
||||
/// # Returns
|
||||
/// * Original string if length <= `max_chars`
|
||||
/// * Truncated string with "..." appended if length > `max_chars`
|
||||
///
|
||||
/// # Examples
|
||||
/// ```
|
||||
/// use zeroclaw::util::truncate_with_ellipsis;
|
||||
///
|
||||
/// // ASCII string - no truncation needed
|
||||
/// assert_eq!(truncate_with_ellipsis("hello", 10), "hello");
|
||||
///
|
||||
/// // ASCII string - truncation needed
|
||||
/// assert_eq!(truncate_with_ellipsis("hello world", 5), "hello...");
|
||||
///
|
||||
/// // Multi-byte UTF-8 (emoji) - safe truncation
|
||||
/// assert_eq!(truncate_with_ellipsis("Hello 🦀 World", 8), "Hello 🦀...");
|
||||
/// assert_eq!(truncate_with_ellipsis("😀😀😀😀", 2), "😀😀...");
|
||||
///
|
||||
/// // Empty string
|
||||
/// assert_eq!(truncate_with_ellipsis("", 10), "");
|
||||
/// ```
|
||||
pub fn truncate_with_ellipsis(s: &str, max_chars: usize) -> String {
|
||||
match s.char_indices().nth(max_chars) {
|
||||
Some((idx, _)) => format!("{}...", &s[..idx]),
|
||||
None => s.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_truncate_ascii_no_truncation() {
|
||||
// ASCII string shorter than limit - no change
|
||||
assert_eq!(truncate_with_ellipsis("hello", 10), "hello");
|
||||
assert_eq!(truncate_with_ellipsis("hello world", 50), "hello world");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_truncate_ascii_with_truncation() {
|
||||
// ASCII string longer than limit - truncates
|
||||
assert_eq!(truncate_with_ellipsis("hello world", 5), "hello...");
|
||||
assert_eq!(truncate_with_ellipsis("This is a long message", 10), "This is a ...");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_truncate_empty_string() {
|
||||
assert_eq!(truncate_with_ellipsis("", 10), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_truncate_at_exact_boundary() {
|
||||
// String exactly at boundary - no truncation
|
||||
assert_eq!(truncate_with_ellipsis("hello", 5), "hello");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_truncate_emoji_single() {
|
||||
// Single emoji (4 bytes) - should not panic
|
||||
let s = "🦀";
|
||||
assert_eq!(truncate_with_ellipsis(s, 10), s);
|
||||
assert_eq!(truncate_with_ellipsis(s, 1), s);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_truncate_emoji_multiple() {
|
||||
// Multiple emoji - safe truncation at character boundary
|
||||
let s = "😀😀😀😀"; // 4 emoji, each 4 bytes = 16 bytes total
|
||||
assert_eq!(truncate_with_ellipsis(s, 2), "😀😀...");
|
||||
assert_eq!(truncate_with_ellipsis(s, 3), "😀😀😀...");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_truncate_mixed_ascii_emoji() {
|
||||
// Mixed ASCII and emoji
|
||||
assert_eq!(truncate_with_ellipsis("Hello 🦀 World", 8), "Hello 🦀...");
|
||||
assert_eq!(truncate_with_ellipsis("Hi 😊", 10), "Hi 😊");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_truncate_cjk_characters() {
|
||||
// CJK characters (Chinese - each is 3 bytes)
|
||||
// This would panic with byte slicing: &s[..50] where s has 17 chars (51 bytes)
|
||||
let s = "这是一个测试消息用来触发崩溃的中文"; // 21 characters
|
||||
// Each character is 3 bytes, so 50 bytes is ~16 characters
|
||||
let result = truncate_with_ellipsis(s, 16);
|
||||
assert!(result.ends_with("..."));
|
||||
// Should not panic and should be valid UTF-8
|
||||
assert!(result.is_char_boundary(result.len() - 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_truncate_accented_characters() {
|
||||
// Accented characters (2 bytes each in UTF-8)
|
||||
let s = "café résumé naïve";
|
||||
assert_eq!(truncate_with_ellipsis(s, 10), "café résumé...");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_truncate_unicode_edge_case() {
|
||||
// Mix of 1-byte, 2-byte, 3-byte, and 4-byte characters
|
||||
let s = "aé你好🦀"; // 1 + 1 + 2 + 2 + 4 bytes = 10 bytes, 5 chars
|
||||
assert_eq!(truncate_with_ellipsis(s, 3), "aé你好...");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_truncate_long_string() {
|
||||
// Long ASCII string
|
||||
let s = "a".repeat(200);
|
||||
let result = truncate_with_ellipsis(&s, 50);
|
||||
assert_eq!(result.len(), 53); // 50 + "..."
|
||||
assert!(result.ends_with("..."));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_truncate_zero_max_chars() {
|
||||
// Edge case: max_chars = 0
|
||||
assert_eq!(truncate_with_ellipsis("hello", 0), "...");
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue