sensitive_rs/engine/
wumanber.rs

1use hashbrown::HashMap;
2use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
3use smallvec::SmallVec;
4use std::collections::HashSet;
5use std::hash::{DefaultHasher, Hash, Hasher};
6use std::sync::Arc;
7
8/// Space handling policy
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub enum SpaceHandling {
11    /// Strict match (default): Spaces must match exactly
12    Strict,
13    /// Ignore spaces: Ignore all whitespace characters when matching
14    IgnoreSpaces,
15    /// Normalized spaces: Treat multiple consecutive spaces as a single space
16    NormalizeSpaces,
17}
18
19/// High-performance Wu-Manber multi-pattern matching algorithm
20/// Optimized for Chinese text processing with parallel table building and memory efficiency
21#[derive(Debug, Clone)]
22pub struct WuManber {
23    patterns: Vec<Arc<String>>,
24    original_patterns: Vec<String>,
25    pattern_set: HashSet<Arc<String>>, // For quick lookup
26    min_len: usize,
27    block_size: usize,
28    shift_table: HashMap<u64, usize>,
29    hash_table: HashMap<u64, SmallVec<[usize; 4]>>,
30    space_handling: SpaceHandling,
31}
32
33impl WuManber {
34    /// Create new instance optimized for Chinese characters
35    pub fn new_chinese(patterns: Vec<String>) -> Self {
36        Self::new_with_space_handling(patterns, SpaceHandling::Strict)
37    }
38
39    /// Create new instance with custom block size
40    pub fn new(patterns: Vec<String>, block_size: usize) -> Self {
41        Self::new_with_block_size_and_space_handling(patterns, block_size, SpaceHandling::Strict)
42    }
43
44    /// Create new instance with parallel table building
45    pub fn new_parallel(patterns: Vec<String>, block_size: usize) -> Self {
46        Self::new_parallel_with_space_handling(patterns, block_size, SpaceHandling::Strict)
47    }
48
49    /// Create new instance with space handling strategy
50    pub fn new_with_space_handling(patterns: Vec<String>, space_handling: SpaceHandling) -> Self {
51        let block_size = Self::calculate_optimal_block_size(&patterns, space_handling);
52        Self::new_with_block_size_and_space_handling(patterns, block_size, space_handling)
53    }
54
55    /// Create new instance with custom block size and space handling
56    pub fn new_with_block_size_and_space_handling(
57        patterns: Vec<String>,
58        block_size: usize,
59        space_handling: SpaceHandling,
60    ) -> Self {
61        Self::build_instance(patterns, block_size, false, space_handling)
62    }
63
64    /// Create new instance with parallel table building and space handling
65    pub fn new_parallel_with_space_handling(
66        patterns: Vec<String>,
67        block_size: usize,
68        space_handling: SpaceHandling,
69    ) -> Self {
70        Self::build_instance(patterns, block_size, true, space_handling)
71    }
72
73    /// Calculate optimal block size based on patterns and space handling
74    fn calculate_optimal_block_size(patterns: &[String], space_handling: SpaceHandling) -> usize {
75        if patterns.is_empty() {
76            return 2;
77        }
78
79        let processed_patterns: Vec<String> =
80            patterns.iter().map(|p| Self::preprocess_pattern(p, space_handling)).filter(|p| !p.is_empty()).collect();
81
82        let min_len = processed_patterns.iter().map(|p| p.chars().count()).min().unwrap_or(2);
83
84        // Dynamically adjust block_size to ensure it doesn't exceed the minimum mode length
85        match min_len {
86            1 => 1,
87            2..=3 => 2,
88            4..=10 => 3,
89            _ => (min_len / 2).clamp(2, 4),
90        }
91    }
92
93    /// Preprocess pattern based on space handling strategy
94    fn preprocess_pattern(pattern: &str, space_handling: SpaceHandling) -> String {
95        match space_handling {
96            SpaceHandling::Strict => pattern.to_string(),
97            SpaceHandling::IgnoreSpaces => pattern.chars().filter(|c| !c.is_whitespace()).collect(),
98            SpaceHandling::NormalizeSpaces => {
99                let mut result = String::new();
100                let mut prev_was_space = false;
101
102                for ch in pattern.chars() {
103                    if ch.is_whitespace() {
104                        if !prev_was_space {
105                            result.push(' ');
106                            prev_was_space = true;
107                        }
108                    } else {
109                        result.push(ch);
110                        prev_was_space = false;
111                    }
112                }
113
114                result
115            }
116        }
117    }
118
119    /// Preprocess text based on space handling strategy
120    fn preprocess_text(&self, text: &str) -> String {
121        Self::preprocess_pattern(text, self.space_handling)
122    }
123
124    /// Create empty instance
125    fn empty() -> Self {
126        WuManber {
127            patterns: Vec::new(),
128            original_patterns: Vec::new(),
129            pattern_set: HashSet::new(),
130            min_len: 0,
131            block_size: 2,
132            shift_table: HashMap::new(),
133            hash_table: HashMap::new(),
134            space_handling: SpaceHandling::Strict,
135        }
136    }
137
138    /// Internal instance builder with Arc optimization and space handling
139    fn build_instance(patterns: Vec<String>, block_size: usize, parallel: bool, space_handling: SpaceHandling) -> Self {
140        if patterns.is_empty() {
141            return Self::empty();
142        }
143
144        // Save the original mode
145        let original_patterns = patterns.clone();
146
147        // Pretreatment mode
148        let processed_patterns: Vec<String> =
149            patterns.iter().map(|p| Self::preprocess_pattern(p, space_handling)).filter(|p| !p.is_empty()).collect();
150
151        if processed_patterns.is_empty() {
152            return Self::empty();
153        }
154
155        let min_len = processed_patterns.iter().map(|p| p.chars().count()).min().unwrap_or(1);
156
157        // Make sure that the block_size does not exceed the minimum mode length
158        let safe_block_size = block_size.min(min_len);
159
160        let patterns_arc: Vec<Arc<String>> = processed_patterns.into_iter().map(Arc::new).collect();
161        // Build pattern_set for quick lookups
162        let pattern_set: HashSet<Arc<String>> = patterns_arc.iter().cloned().collect();
163        let mut instance = WuManber {
164            patterns: patterns_arc,
165            original_patterns,
166            pattern_set,
167            min_len,
168            block_size: safe_block_size,
169            shift_table: HashMap::new(),
170            hash_table: HashMap::new(),
171            space_handling,
172        };
173
174        if parallel {
175            instance.build_tables_parallel();
176        } else {
177            instance.build_tables();
178        }
179
180        instance
181    }
182
183    /// Build shift and hash tables sequentially with memory optimization
184    fn build_tables(&mut self) {
185        self.build_shift_table();
186        self.build_hash_table();
187    }
188
189    /// Build shift and hash tables in parallel with memory optimization
190    fn build_tables_parallel(&mut self) {
191        self.build_shift_table_parallel();
192        self.build_hash_table_parallel();
193    }
194
195    /// Build shift table sequentially with optimized character handling
196    fn build_shift_table(&mut self) {
197        for pattern in self.patterns.iter() {
198            let chars: Vec<char> = pattern.chars().collect();
199            let char_count = chars.len();
200
201            // Prevent spillage: Ensure that the pattern length is greater than or equal to block_size
202            if char_count < self.block_size {
203                continue;
204            }
205
206            for i in 0..=(char_count - self.block_size) {
207                let block = self.extract_block_optimized(&chars, i);
208                let hash = Self::calculate_hash_fast(&block);
209                let shift = char_count - i - self.block_size;
210
211                self.shift_table.entry(hash).and_modify(|v| *v = (*v).min(shift)).or_insert(shift);
212            }
213        }
214    }
215
216    /// Build hash table sequentially with memory optimization
217    fn build_hash_table(&mut self) {
218        for (pattern_idx, pattern) in self.patterns.iter().enumerate() {
219            let chars: Vec<char> = pattern.chars().collect();
220            let char_count = chars.len();
221
222            if char_count >= self.block_size {
223                let start_pos = char_count - self.block_size;
224                let block = self.extract_block_optimized(&chars, start_pos);
225                let hash = Self::calculate_hash_fast(&block);
226
227                self.hash_table.entry(hash).or_default().push(pattern_idx);
228            }
229        }
230    }
231
232    /// Build shift table in parallel with proper iterator handling
233    fn build_shift_table_parallel(&mut self) {
234        let block_size = self.block_size;
235
236        let shift_entries: Vec<(u64, usize)> = self
237            .patterns
238            .par_iter()
239            .flat_map(|pattern| {
240                let chars: Vec<char> = pattern.chars().collect();
241                let char_count = chars.len();
242
243                if char_count < block_size {
244                    return Vec::new();
245                }
246
247                (0..=(char_count - block_size))
248                    .map(move |i| {
249                        let block = chars[i..i + block_size].iter().collect::<String>();
250                        let hash = Self::calculate_hash_fast(&block);
251                        let shift = char_count - i - block_size;
252                        (hash, shift)
253                    })
254                    .collect::<Vec<_>>()
255            })
256            .collect();
257
258        for (hash, shift) in shift_entries {
259            self.shift_table.entry(hash).and_modify(|v| *v = (*v).min(shift)).or_insert(shift);
260        }
261    }
262
263    /// Build hash table in parallel with memory optimization
264    fn build_hash_table_parallel(&mut self) {
265        let block_size = self.block_size;
266
267        let hash_entries: Vec<(u64, usize)> = self
268            .patterns
269            .par_iter()
270            .enumerate()
271            .filter_map(|(pattern_idx, pattern)| {
272                let chars: Vec<char> = pattern.chars().collect();
273                let char_count = chars.len();
274
275                if char_count >= block_size {
276                    let start_pos = char_count - block_size;
277                    let block = chars[start_pos..start_pos + block_size].iter().collect::<String>();
278                    let hash = Self::calculate_hash_fast(&block);
279                    Some((hash, pattern_idx))
280                } else {
281                    None
282                }
283            })
284            .collect();
285
286        for (hash, pattern_idx) in hash_entries {
287            self.hash_table.entry(hash).or_insert_with(SmallVec::new).push(pattern_idx);
288        }
289    }
290
291    /// Extract block from character array with reduced allocations
292    #[inline]
293    fn extract_block_optimized(&self, chars: &[char], start: usize) -> String {
294        chars[start..start + self.block_size].iter().collect()
295    }
296
297    /// Optimized hash calculation with better performance
298    #[inline]
299    fn calculate_hash_fast(s: &str) -> u64 {
300        let mut hasher = DefaultHasher::new();
301        s.hash(&mut hasher);
302        hasher.finish()
303    }
304
305    /// Search for first match using Wu-Manber algorithm with space handling
306    pub fn search(&self, text: &str) -> Option<Arc<String>> {
307        if self.patterns.is_empty() || text.is_empty() {
308            return None;
309        }
310
311        if self.space_handling != SpaceHandling::Strict {
312            return self.search_with_preprocessing(text);
313        }
314
315        let chars: Vec<char> = text.chars().collect();
316        let text_len = chars.len();
317
318        if text_len < self.min_len {
319            return None;
320        }
321
322        let mut pos = self.min_len - 1;
323
324        while pos < text_len {
325            if pos + 1 < self.block_size {
326                pos += 1;
327                continue;
328            }
329
330            let block_start = pos + 1 - self.block_size;
331            let block = self.extract_block_optimized(&chars, block_start);
332            let hash = Self::calculate_hash_fast(&block);
333
334            if let Some(&shift) = self.shift_table.get(&hash) {
335                if shift == 0 {
336                    if let Some(pattern_indices) = self.hash_table.get(&hash) {
337                        if let Some(result) = self.verify_matches_arc(&chars, pos, pattern_indices) {
338                            return Some(result);
339                        }
340                    }
341
342                    let prev_block_start = block_start.saturating_sub(1);
343                    if prev_block_start < block_start {
344                        let prev_block = self.extract_block_optimized(&chars, prev_block_start);
345                        let prev_hash = Self::calculate_hash_fast(&prev_block);
346                        if let Some(prev_pattern_indices) = self.hash_table.get(&prev_hash) {
347                            // Verify the previous position
348                            if let Some(found) = self.verify_matches_arc(&chars, pos - 1, prev_pattern_indices) {
349                                return Some(found);
350                            }
351                        }
352                    }
353
354                    pos += 1;
355                } else {
356                    pos += shift;
357                }
358            } else {
359                // pos += self.min_len;
360                pos += self.min_len.saturating_sub(self.block_size).saturating_add(1);
361            }
362        }
363
364        None
365    }
366
367    /// Search with preprocessing for non-strict space handling
368    fn search_with_preprocessing(&self, text: &str) -> Option<Arc<String>> {
369        for (i, original_pattern) in self.original_patterns.iter().enumerate() {
370            let processed_text = self.preprocess_text(text);
371            let processed_pattern = Self::preprocess_pattern(original_pattern, self.space_handling);
372
373            if processed_text.contains(&processed_pattern) {
374                return self.patterns.get(i).cloned();
375            }
376        }
377        None
378    }
379
380    /// Verify potential matches and return Arc pattern
381    fn verify_matches_arc(&self, chars: &[char], pos: usize, pattern_indices: &[usize]) -> Option<Arc<String>> {
382        for &pattern_idx in pattern_indices {
383            if let Some(pattern) = self.patterns.get(pattern_idx) {
384                let pattern_chars: Vec<char> = pattern.chars().collect();
385                let pattern_len = pattern_chars.len();
386
387                if pos + 1 >= pattern_len {
388                    let start = pos + 1 - pattern_len;
389                    if chars[start..start + pattern_len] == pattern_chars[..] {
390                        return Some(pattern.clone());
391                    }
392                }
393            }
394        }
395        None
396    }
397
398    /// Legacy search method for string compatibility
399    pub fn search_string(&self, text: &str) -> Option<String> {
400        if self.space_handling != SpaceHandling::Strict {
401            // For non-strict matches, use simplified logic to ensure correctness
402            for original_pattern in &self.original_patterns {
403                let processed_text = self.preprocess_text(text);
404                let processed_pattern = Self::preprocess_pattern(original_pattern, self.space_handling);
405                if processed_text.contains(&processed_pattern) {
406                    return Some(original_pattern.clone());
407                }
408            }
409            return None;
410        }
411
412        self.search(text).map(|arc| (*arc).clone())
413    }
414
415    /// Find all matches in text with space handling
416    pub fn search_all(&self, text: &str) -> Vec<Arc<String>> {
417        if self.patterns.is_empty() || text.is_empty() {
418            return Vec::new();
419        }
420
421        let mut results = Vec::new();
422
423        // Use simplified matching logic to ensure correctness
424        for (i, original_pattern) in self.original_patterns.iter().enumerate() {
425            let matches = match self.space_handling {
426                SpaceHandling::Strict => text.contains(original_pattern),
427                _ => {
428                    let processed_text = self.preprocess_text(text);
429                    let processed_pattern = Self::preprocess_pattern(original_pattern, self.space_handling);
430                    processed_text.contains(&processed_pattern)
431                }
432            };
433
434            if matches {
435                if let Some(pattern) = self.patterns.get(i) {
436                    results.push(pattern.clone());
437                }
438            }
439        }
440
441        results
442    }
443
444    /// Find all matches returning strings for compatibility
445    pub fn search_all_strings(&self, text: &str) -> Vec<String> {
446        if self.patterns.is_empty() || text.is_empty() {
447            return Vec::new();
448        }
449
450        let mut results = Vec::new();
451
452        for original_pattern in &self.original_patterns {
453            let matches = match self.space_handling {
454                SpaceHandling::Strict => text.contains(original_pattern),
455                _ => {
456                    let processed_text = self.preprocess_text(text);
457                    let processed_pattern = Self::preprocess_pattern(original_pattern, self.space_handling);
458                    processed_text.contains(&processed_pattern)
459                }
460            };
461
462            if matches {
463                results.push(original_pattern.clone());
464            }
465        }
466
467        results
468    }
469
470    /// Replace all matches with replacement character
471    pub fn replace_all(&self, text: &str, replacement: char) -> String {
472        let mut result = text.to_string();
473
474        for pattern in &self.original_patterns {
475            let replacement_str = replacement.to_string().repeat(pattern.chars().count());
476            result = result.replace(pattern, &replacement_str);
477        }
478
479        result
480    }
481
482    /// Remove all matches from text
483    pub fn remove_all(&self, text: &str) -> String {
484        let mut result = text.to_string();
485
486        for pattern in &self.original_patterns {
487            result = result.replace(pattern, "");
488        }
489
490        result
491    }
492
493    /// Find all match positions with byte offsets
494    pub fn find_matches(&self, text: &str) -> Vec<Match> {
495        let mut matches = Vec::new();
496
497        for pattern in &self.original_patterns {
498            let mut start = 0;
499            while let Some(pos) = text[start..].find(pattern) {
500                let absolute_start = start + pos;
501                let absolute_end = absolute_start + pattern.len();
502                matches.push(Match { start: absolute_start, end: absolute_end });
503                start = absolute_start + 1;
504            }
505        }
506
507        matches.sort_by_key(|m| m.start);
508        matches
509    }
510
511    /// Get patterns reference (returns Arc slice)
512    pub fn patterns(&self) -> &[Arc<String>] {
513        &self.patterns
514    }
515
516    /// Get original patterns as strings
517    pub fn patterns_strings(&self) -> Vec<String> {
518        self.original_patterns.clone()
519    }
520
521    /// Check if pattern exists (使用 pattern_set 提供 O(1) 查找)
522    pub fn contains_pattern(&self, pattern: &str) -> bool {
523        let target = Arc::new(pattern.to_string());
524        self.pattern_set.contains(&target)
525    }
526
527    /// Get space handling strategy
528    pub fn space_handling(&self) -> SpaceHandling {
529        self.space_handling
530    }
531
532    /// Get memory usage statistics
533    pub fn memory_stats(&self) -> WuManberMemoryStats {
534        let patterns_memory = self.patterns.iter().map(|p| size_of::<Arc<String>>() + p.len()).sum::<usize>();
535
536        let shift_table_memory = self.shift_table.len() * (size_of::<u64>() + size_of::<usize>());
537        let hash_table_memory = self
538            .hash_table
539            .iter()
540            .map(|(_k, v)| size_of::<u64>() + size_of::<SmallVec<[usize; 4]>>() + v.len() * size_of::<usize>())
541            .sum::<usize>();
542
543        let pattern_set_memory = self.pattern_set.len() * size_of::<Arc<String>>();
544
545        let total_memory = patterns_memory + shift_table_memory + hash_table_memory + pattern_set_memory;
546
547        WuManberMemoryStats {
548            total_patterns: self.patterns.len(),
549            patterns_memory,
550            shift_table_memory,
551            hash_table_memory,
552            total_memory,
553        }
554    }
555
556    /// Get statistics
557    pub fn stats(&self) -> WuManberStats {
558        WuManberStats {
559            pattern_count: self.patterns.len(),
560            min_length: self.min_len,
561            block_size: self.block_size,
562            shift_table_size: self.shift_table.len(),
563            hash_table_size: self.hash_table.len(),
564        }
565    }
566}
567
568/// Match result with byte positions
569#[derive(Debug, Clone, Copy, PartialEq, Eq)]
570pub struct Match {
571    pub start: usize,
572    pub end: usize,
573}
574
575/// Wu-Manber statistics
576#[derive(Debug, Clone)]
577pub struct WuManberStats {
578    pub pattern_count: usize,
579    pub min_length: usize,
580    pub block_size: usize,
581    pub shift_table_size: usize,
582    pub hash_table_size: usize,
583}
584
585/// Wu-Manber memory usage statistics
586#[derive(Debug, Clone)]
587pub struct WuManberMemoryStats {
588    pub total_patterns: usize,
589    pub patterns_memory: usize,
590    pub shift_table_memory: usize,
591    pub hash_table_memory: usize,
592    pub total_memory: usize,
593}
594
595#[cfg(test)]
596mod tests {
597    use super::*;
598
599    #[test]
600    fn test_basic_matching() {
601        let wm =
602            WuManber::new_chinese(vec!["色情".to_string(), "赌博".to_string(), "诈骗".to_string(), "扯蛋".to_string()]);
603
604        assert_eq!(wm.search_string("正常内容"), None);
605        assert_eq!(wm.search_string("测试含有赌博内容"), Some("赌博".to_string()));
606        assert_eq!(wm.search_string("测试还有色情赌博图片"), Some("色情".to_string()));
607        assert_eq!(wm.search_string("测试有色情赌博图片"), Some("色情".to_string()));
608        assert_eq!(wm.search_string("还有诈骗色情测试"), Some("诈骗".to_string()));
609    }
610
611    #[test]
612    fn test_varied_length() {
613        let wm = WuManber::new(vec!["赌".to_string(), "赌博".to_string(), "赌博机".to_string()], 1);
614
615        assert_eq!(wm.search_string("赌"), Some("赌".to_string()));
616        assert_eq!(wm.search_string("赌博"), Some("赌".to_string())); // 会匹配最先找到的
617        assert_eq!(wm.search_string("赌博机"), Some("赌".to_string())); // 会匹配最先找到的
618    }
619
620    #[test]
621    fn test_space_handling_strategies() {
622        let patterns = vec!["hello world".to_string(), "test  pattern".to_string()];
623
624        // 严格匹配
625        let wm_strict = WuManber::new_with_space_handling(patterns.clone(), SpaceHandling::Strict);
626        assert_eq!(wm_strict.search_string("hello world"), Some("hello world".to_string()));
627        assert_eq!(wm_strict.search_string("helloworld"), None);
628
629        // 忽略空格
630        let wm_ignore = WuManber::new_with_space_handling(patterns.clone(), SpaceHandling::IgnoreSpaces);
631        assert_eq!(wm_ignore.search_string("hello world"), Some("hello world".to_string()));
632        assert_eq!(wm_ignore.search_string("helloworld"), Some("hello world".to_string()));
633
634        // 标准化空格
635        let wm_normalize = WuManber::new_with_space_handling(patterns.clone(), SpaceHandling::NormalizeSpaces);
636        assert_eq!(wm_normalize.search_string("test  pattern"), Some("test  pattern".to_string()));
637        assert_eq!(wm_normalize.search_string("test   pattern"), Some("test  pattern".to_string()));
638    }
639
640    #[test]
641    fn test_mixed_patterns() {
642        let mixed_patterns =
643            vec!["关键词2500".to_string(), "关键词 3000".to_string(), "test".to_string(), "hello world".to_string()];
644
645        let wm = WuManber::new_chinese(mixed_patterns);
646
647        assert_eq!(wm.search_string("包含关键词2500的文本"), Some("关键词2500".to_string()));
648        assert_eq!(wm.search_string("包含关键词 3000 的文本"), Some("关键词 3000".to_string()));
649        assert_eq!(wm.search_string("this is test"), Some("test".to_string()));
650        assert_eq!(wm.search_string("say hello world"), Some("hello world".to_string()));
651    }
652
653    #[test]
654    fn test_complex_patterns_with_spaces() {
655        let complex_patterns = vec![
656            "Hello World".to_string(),
657            "你好 世界".to_string(),
658            "multiple   spaces".to_string(),
659            "tab\there".to_string(),
660            "new\nline".to_string(),
661        ];
662
663        let wm = WuManber::new_chinese(complex_patterns);
664
665        assert_eq!(wm.search_string("Say Hello World"), Some("Hello World".to_string()));
666        assert_eq!(wm.search_string("说你好 世界"), Some("你好 世界".to_string()));
667        assert_eq!(wm.search_string("has multiple   spaces here"), Some("multiple   spaces".to_string()));
668        assert_eq!(wm.search_string("tab\there you go"), Some("tab\there".to_string()));
669        assert_eq!(wm.search_string("new\nline break"), Some("new\nline".to_string()));
670    }
671
672    #[test]
673    fn test_ignore_spaces_functionality() {
674        let patterns = vec!["关键词 2500".to_string(), "hello world".to_string()];
675        let wm = WuManber::new_with_space_handling(patterns, SpaceHandling::IgnoreSpaces);
676
677        // 这些都应该匹配
678        assert_eq!(wm.search_string("关键词2500"), Some("关键词 2500".to_string()));
679        assert_eq!(wm.search_string("关键词 2500"), Some("关键词 2500".to_string()));
680        assert_eq!(wm.search_string("关键词  2500"), Some("关键词 2500".to_string()));
681        assert_eq!(wm.search_string("helloworld"), Some("hello world".to_string()));
682        assert_eq!(wm.search_string("hello world"), Some("hello world".to_string()));
683    }
684
685    #[test]
686    fn test_parallel_performance() {
687        let patterns: Vec<String> = (0..5000).map(|i| format!("关键词{i}")).collect();
688
689        let wm_seq = WuManber::new(patterns.clone(), 2);
690        let wm_par = WuManber::new_parallel(patterns, 2);
691
692        let text = "这里包含关键词2500和关键词3000";
693
694        let result_seq = wm_seq.search_all_strings(text);
695        let result_par = wm_par.search_all_strings(text);
696
697        assert_eq!(result_seq.len(), result_par.len());
698        assert!(result_seq.contains(&"关键词2500".to_string()));
699        assert!(result_seq.contains(&"关键词3000".to_string()));
700
701        for item in &result_seq {
702            assert!(result_par.contains(item));
703        }
704    }
705
706    #[test]
707    fn test_search_all_functionality() {
708        let wm = WuManber::new_chinese(vec!["苹果".to_string(), "香蕉".to_string(), "橙子".to_string()]);
709
710        let text = "我喜欢吃苹果、香蕉和橙子";
711        let results = wm.search_all_strings(text);
712
713        assert_eq!(results.len(), 3);
714        assert!(results.contains(&"苹果".to_string()));
715        assert!(results.contains(&"香蕉".to_string()));
716        assert!(results.contains(&"橙子".to_string()));
717    }
718}