offline_intelligence/utils/
text_utils.rs1use std::borrow::Cow;
4use regex::Regex;
5use lazy_static::lazy_static;
6
7lazy_static! {
8 static ref WHITESPACE_REGEX: Regex = Regex::new(r"\s+").unwrap();
9}
10
11pub struct TextUtils;
13
14impl TextUtils {
15 pub fn contains_ignore_case(text: &str, pattern: &str) -> bool {
17 if pattern.len() > text.len() {
18 return false;
19 }
20
21 if pattern.len() <= 32 {
23 text.to_lowercase().contains(&pattern.to_lowercase())
24 } else {
25 text.chars()
27 .flat_map(char::to_lowercase)
28 .collect::<String>()
29 .contains(&pattern.to_lowercase())
30 }
31 }
32
33 pub fn normalize_whitespace(text: &str) -> Cow<'_, str> {
35 if WHITESPACE_REGEX.is_match(text) {
36 Cow::Owned(WHITESPACE_REGEX.replace_all(text, " ").trim().to_string())
37 } else {
38 Cow::Borrowed(text)
39 }
40 }
41
42 pub fn first_words(text: &str, n: usize) -> Cow<'_, str> {
44 if n == 0 || text.is_empty() {
45 return Cow::Borrowed("");
46 }
47
48 let mut word_count = 0;
49 let mut end_pos = 0;
50
51 for (pos, _) in text.match_indices(' ') {
52 word_count += 1;
53 if word_count >= n {
54 end_pos = pos;
55 break;
56 }
57 }
58
59 if end_pos > 0 {
60 Cow::Borrowed(&text[..end_pos])
61 } else {
62 Cow::Borrowed(text)
63 }
64 }
65
66 pub fn count_words(text: &str) -> usize {
68 if text.is_empty() {
69 return 0;
70 }
71
72 text.split_whitespace().count()
73 }
74
75 pub fn truncate_with_ellipsis(text: &str, max_len: usize) -> Cow<'_, str> {
77 if text.len() <= max_len {
78 Cow::Borrowed(text)
79 } else if max_len <= 3 {
80 Cow::Borrowed("...")
81 } else {
82 let mut result = String::with_capacity(max_len);
83 result.push_str(&text[..max_len - 3]);
84 result.push_str("...");
85 Cow::Owned(result)
86 }
87 }
88
89 pub fn is_significant_word(word: &str, min_len: usize) -> bool {
91 if word.len() < min_len {
92 return false;
93 }
94
95 !matches!(word.to_lowercase().as_str(), "the" | "a" | "an" | "and" | "or" | "but" | "in" | "on" | "at" | "to" | "for" |
97 "of" | "with" | "by" | "is" | "am" | "are" | "was" | "were" | "be" | "been" |
98 "being" | "have" | "has" | "had" | "do" | "does" | "did")
99 }
100}