1use tokio_util::sync::CancellationToken;
2use unicode_normalization::UnicodeNormalization;
3
4fn is_in_unicode_tag_range(c: char) -> bool {
7 matches!(c, '\u{E0000}'..='\u{E007F}')
8}
9
10pub fn contains_unicode_tags(text: &str) -> bool {
11 text.chars().any(is_in_unicode_tag_range)
12}
13
14pub fn sanitize_unicode_tags(text: &str) -> String {
16 let normalized: String = text.nfc().collect();
17
18 normalized
19 .chars()
20 .filter(|&c| !is_in_unicode_tag_range(c))
21 .collect()
22}
23
24pub fn safe_truncate(s: &str, max_chars: usize) -> String {
36 if s.chars().count() <= max_chars {
37 s.to_string()
38 } else {
39 let truncated: String = s.chars().take(max_chars.saturating_sub(3)).collect();
40 format!("{}...", truncated)
41 }
42}
43
44pub fn is_token_cancelled(cancellation_token: &Option<CancellationToken>) -> bool {
45 cancellation_token
46 .as_ref()
47 .is_some_and(|t| t.is_cancelled())
48}
49
50#[cfg(test)]
51mod tests {
52 use super::*;
53
54 #[test]
55 fn test_contains_unicode_tags() {
56 assert!(contains_unicode_tags("Hello\u{E0041}world"));
58 assert!(contains_unicode_tags("\u{E0000}"));
59 assert!(contains_unicode_tags("\u{E007F}"));
60 assert!(!contains_unicode_tags("Hello world"));
61 assert!(!contains_unicode_tags("Hello δΈη π"));
62 assert!(!contains_unicode_tags(""));
63 }
64
65 #[test]
66 fn test_sanitize_unicode_tags() {
67 let malicious = "Hello\u{E0041}\u{E0042}\u{E0043}world"; let cleaned = sanitize_unicode_tags(malicious);
70 assert_eq!(cleaned, "Helloworld");
71 }
72
73 #[test]
74 fn test_sanitize_unicode_tags_preserves_legitimate_unicode() {
75 let clean_text = "Hello world δΈη π";
77 let cleaned = sanitize_unicode_tags(clean_text);
78 assert_eq!(cleaned, clean_text);
79 }
80
81 #[test]
82 fn test_sanitize_unicode_tags_empty_string() {
83 let empty = "";
84 let cleaned = sanitize_unicode_tags(empty);
85 assert_eq!(cleaned, "");
86 }
87
88 #[test]
89 fn test_sanitize_unicode_tags_only_malicious() {
90 let only_malicious = "\u{E0041}\u{E0042}\u{E0043}";
92 let cleaned = sanitize_unicode_tags(only_malicious);
93 assert_eq!(cleaned, "");
94 }
95
96 #[test]
97 fn test_sanitize_unicode_tags_mixed_content() {
98 let mixed = "Hello\u{E0041} δΈη\u{E0042} π\u{E0043}!";
100 let cleaned = sanitize_unicode_tags(mixed);
101 assert_eq!(cleaned, "Hello δΈη π!");
102 }
103
104 #[test]
105 fn test_safe_truncate_ascii() {
106 assert_eq!(safe_truncate("hello world", 20), "hello world");
107 assert_eq!(safe_truncate("hello world", 8), "hello...");
108 assert_eq!(safe_truncate("hello", 5), "hello");
109 assert_eq!(safe_truncate("hello", 3), "...");
110 }
111
112 #[test]
113 fn test_safe_truncate_japanese() {
114 let japanese = "γγγ«γ‘γ―δΈη";
116 assert_eq!(safe_truncate(japanese, 10), japanese);
117 assert_eq!(safe_truncate(japanese, 5), "γγ...");
118 assert_eq!(safe_truncate(japanese, 7), japanese);
119 }
120
121 #[test]
122 fn test_safe_truncate_mixed() {
123 let mixed = "Hello γγγ«γ‘γ―";
125 assert_eq!(safe_truncate(mixed, 20), mixed);
126 assert_eq!(safe_truncate(mixed, 8), "Hello...");
127 }
128}