1#![forbid(unsafe_code)]
2
3use unicode_width::UnicodeWidthChar;
26
27#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
33pub enum WidthMode {
34 #[default]
36 Standard,
37
38 CjkAmbiguousWide,
40}
41
42impl WidthMode {
43 #[inline]
45 #[must_use]
46 pub fn char_width(self, ch: char) -> usize {
47 let w = match self {
48 Self::Standard => UnicodeWidthChar::width(ch).unwrap_or(0),
49 Self::CjkAmbiguousWide => UnicodeWidthChar::width_cjk(ch).unwrap_or(0),
50 };
51 w.min(2)
52 }
53
54 #[must_use]
56 pub fn str_width(self, s: &str) -> usize {
57 s.chars().map(|ch| self.char_width(ch)).sum()
58 }
59}
60
61#[must_use]
69pub fn display_col_at(s: &str, byte_offset: usize, mode: WidthMode) -> usize {
70 debug_assert!(s.is_char_boundary(byte_offset));
71 s[..byte_offset].chars().map(|ch| mode.char_width(ch)).sum()
72}
73
74#[derive(Debug, Clone, PartialEq, Eq)]
76pub struct SearchResult {
77 pub range: std::ops::Range<usize>,
79}
80
81impl SearchResult {
82 #[must_use]
84 pub fn new(start: usize, end: usize) -> Self {
85 Self { range: start..end }
86 }
87
88 #[must_use]
90 pub fn text<'a>(&self, source: &'a str) -> &'a str {
91 &source[self.range.clone()]
92 }
93}
94
95#[must_use]
99pub fn search_exact(haystack: &str, needle: &str) -> Vec<SearchResult> {
100 if needle.is_empty() {
101 return Vec::new();
102 }
103 let mut results = Vec::new();
104 let mut start = 0;
105 while let Some(pos) = haystack[start..].find(needle) {
106 let abs_pos = start + pos;
107 results.push(SearchResult::new(abs_pos, abs_pos + needle.len()));
108 start = abs_pos + needle.len();
109 }
110 results
111}
112
113#[must_use]
117pub fn search_exact_overlapping(haystack: &str, needle: &str) -> Vec<SearchResult> {
118 if needle.is_empty() {
119 return Vec::new();
120 }
121 let mut results = Vec::new();
122 let mut start = 0;
123 while let Some(pos) = haystack[start..].find(needle) {
124 let abs_pos = start + pos;
125 results.push(SearchResult::new(abs_pos, abs_pos + needle.len()));
126 start = abs_pos + 1;
128 while start < haystack.len() && !haystack.is_char_boundary(start) {
130 start += 1;
131 }
132 }
133 results
134}
135
136#[must_use]
141pub fn search_ascii_case_insensitive(haystack: &str, needle: &str) -> Vec<SearchResult> {
142 if needle.is_empty() {
143 return Vec::new();
144 }
145 let haystack_lower = haystack.to_ascii_lowercase();
146 let needle_lower = needle.to_ascii_lowercase();
147 let mut results = Vec::new();
148 let mut start = 0;
149 while let Some(pos) = haystack_lower[start..].find(&needle_lower) {
150 let abs_pos = start + pos;
151 results.push(SearchResult::new(abs_pos, abs_pos + needle.len()));
152 start = abs_pos + needle.len();
153 }
154 results
155}
156
157#[cfg(feature = "normalization")]
162#[must_use]
163pub fn search_case_insensitive(haystack: &str, needle: &str) -> Vec<SearchResult> {
164 if needle.is_empty() {
165 return Vec::new();
166 }
167 let needle_norm = crate::normalization::normalize_for_search(needle);
168 if needle_norm.is_empty() {
169 return Vec::new();
170 }
171
172 use unicode_segmentation::UnicodeSegmentation;
173
174 let mut norm_start_map: Vec<usize> = Vec::new();
179 let mut norm_end_map: Vec<usize> = Vec::new();
180 let mut normalized = String::new();
181
182 for (orig_byte, grapheme) in haystack.grapheme_indices(true) {
183 let chunk = crate::normalization::normalize_for_search(grapheme);
184 if chunk.is_empty() {
185 continue;
186 }
187 let orig_end = orig_byte + grapheme.len();
188 for _ in chunk.bytes() {
189 norm_start_map.push(orig_byte);
190 norm_end_map.push(orig_end);
191 }
192 normalized.push_str(&chunk);
193 }
194 if normalized.is_empty() {
195 return Vec::new();
196 }
197
198 let mut results = Vec::new();
199 let mut start = 0;
200 while let Some(pos) = normalized[start..].find(&needle_norm) {
201 let norm_start = start + pos;
202 let norm_end = norm_start + needle_norm.len();
203
204 let orig_start = norm_start_map
205 .get(norm_start)
206 .copied()
207 .unwrap_or(haystack.len());
208 let orig_end = if norm_end == 0 {
209 orig_start
210 } else {
211 norm_end_map
212 .get(norm_end - 1)
213 .copied()
214 .unwrap_or(haystack.len())
215 };
216
217 if results
220 .last()
221 .is_some_and(|r: &SearchResult| r.range.start == orig_start && r.range.end == orig_end)
222 {
223 start = norm_end;
224 continue;
225 }
226
227 results.push(SearchResult::new(orig_start, orig_end));
228 start = norm_end;
229 }
230 results
231}
232
233#[cfg(feature = "normalization")]
238#[must_use]
239pub fn search_normalized(
240 haystack: &str,
241 needle: &str,
242 form: crate::normalization::NormForm,
243) -> Vec<SearchResult> {
244 use crate::normalization::normalize;
245 use unicode_segmentation::UnicodeSegmentation;
246
247 if needle.is_empty() {
248 return Vec::new();
249 }
250 let needle_norm = normalize(needle, form);
251 if needle_norm.is_empty() {
252 return Vec::new();
253 }
254
255 let mut norm_start_map: Vec<usize> = Vec::new();
259 let mut norm_end_map: Vec<usize> = Vec::new();
260 let mut normalized = String::new();
261
262 for (orig_byte, grapheme) in haystack.grapheme_indices(true) {
263 let chunk = normalize(grapheme, form);
264 if chunk.is_empty() {
265 continue;
266 }
267 let orig_end = orig_byte + grapheme.len();
268 for _ in chunk.bytes() {
269 norm_start_map.push(orig_byte);
270 norm_end_map.push(orig_end);
271 }
272 normalized.push_str(&chunk);
273 }
274 if normalized.is_empty() {
275 return Vec::new();
276 }
277
278 let mut results = Vec::new();
279 let mut start = 0;
280 while let Some(pos) = normalized[start..].find(&needle_norm) {
281 let norm_start = start + pos;
282 let norm_end = norm_start + needle_norm.len();
283
284 let orig_start = norm_start_map
285 .get(norm_start)
286 .copied()
287 .unwrap_or(haystack.len());
288 let orig_end = if norm_end == 0 {
289 orig_start
290 } else {
291 norm_end_map
292 .get(norm_end - 1)
293 .copied()
294 .unwrap_or(haystack.len())
295 };
296
297 if results
298 .last()
299 .is_some_and(|r: &SearchResult| r.range.start == orig_start && r.range.end == orig_end)
300 {
301 start = norm_end;
302 continue;
303 }
304
305 results.push(SearchResult::new(orig_start, orig_end));
306 start = norm_end;
307 }
308 results
309}
310
311#[cfg(feature = "normalization")]
330#[derive(Debug, Clone, Copy, PartialEq, Eq)]
331pub struct SearchPolicy {
332 pub norm_form: crate::normalization::NormForm,
334 pub case_insensitive: bool,
336 pub width_mode: WidthMode,
338}
339
340#[cfg(feature = "normalization")]
341impl SearchPolicy {
342 pub const STANDARD: Self = Self {
344 norm_form: crate::normalization::NormForm::Nfkc,
345 case_insensitive: true,
346 width_mode: WidthMode::Standard,
347 };
348
349 pub const CJK: Self = Self {
351 norm_form: crate::normalization::NormForm::Nfkc,
352 case_insensitive: true,
353 width_mode: WidthMode::CjkAmbiguousWide,
354 };
355
356 pub const EXACT_NFC: Self = Self {
358 norm_form: crate::normalization::NormForm::Nfc,
359 case_insensitive: false,
360 width_mode: WidthMode::Standard,
361 };
362}
363
364#[cfg(feature = "normalization")]
369#[derive(Debug, Clone, PartialEq, Eq)]
370pub struct PolicySearchResult {
371 pub range: std::ops::Range<usize>,
373 pub col_start: usize,
375 pub col_end: usize,
377}
378
379#[cfg(feature = "normalization")]
380impl PolicySearchResult {
381 #[must_use]
383 pub fn text<'a>(&self, source: &'a str) -> &'a str {
384 &source[self.range.clone()]
385 }
386
387 #[must_use]
389 pub fn display_width(&self) -> usize {
390 self.col_end - self.col_start
391 }
392}
393
394#[cfg(feature = "normalization")]
413#[must_use]
414pub fn search_with_policy(
415 haystack: &str,
416 needle: &str,
417 policy: &SearchPolicy,
418) -> Vec<PolicySearchResult> {
419 use crate::normalization::normalize;
420 use unicode_segmentation::UnicodeSegmentation;
421
422 if needle.is_empty() {
423 return Vec::new();
424 }
425
426 let needle_norm = if policy.case_insensitive {
428 normalize(needle, policy.norm_form).to_lowercase()
429 } else {
430 normalize(needle, policy.norm_form)
431 };
432 if needle_norm.is_empty() {
433 return Vec::new();
434 }
435
436 let mut norm_start_map: Vec<usize> = Vec::new();
438 let mut norm_end_map: Vec<usize> = Vec::new();
439 let mut normalized = String::new();
440
441 for (orig_byte, grapheme) in haystack.grapheme_indices(true) {
442 let chunk = if policy.case_insensitive {
443 normalize(grapheme, policy.norm_form).to_lowercase()
444 } else {
445 normalize(grapheme, policy.norm_form)
446 };
447 if chunk.is_empty() {
448 continue;
449 }
450 let orig_end = orig_byte + grapheme.len();
451 for _ in chunk.bytes() {
452 norm_start_map.push(orig_byte);
453 norm_end_map.push(orig_end);
454 }
455 normalized.push_str(&chunk);
456 }
457 if normalized.is_empty() {
458 return Vec::new();
459 }
460
461 let mut results = Vec::new();
463 let mut start = 0;
464 while let Some(pos) = normalized[start..].find(&needle_norm) {
465 let norm_start = start + pos;
466 let norm_end = norm_start + needle_norm.len();
467
468 let orig_start = norm_start_map
469 .get(norm_start)
470 .copied()
471 .unwrap_or(haystack.len());
472 let orig_end = if norm_end == 0 {
473 orig_start
474 } else {
475 norm_end_map
476 .get(norm_end - 1)
477 .copied()
478 .unwrap_or(haystack.len())
479 };
480
481 if results.last().is_some_and(|r: &PolicySearchResult| {
483 r.range.start == orig_start && r.range.end == orig_end
484 }) {
485 start = norm_end;
486 continue;
487 }
488
489 let col_start = display_col_at(haystack, orig_start, policy.width_mode);
490 let col_end = display_col_at(haystack, orig_end, policy.width_mode);
491
492 results.push(PolicySearchResult {
493 range: orig_start..orig_end,
494 col_start,
495 col_end,
496 });
497 start = norm_end;
498 }
499 results
500}
501
502#[cfg(test)]
503mod tests {
504 use super::*;
505
506 #[test]
511 fn exact_basic() {
512 let results = search_exact("hello world hello", "hello");
513 assert_eq!(results.len(), 2);
514 assert_eq!(results[0].range, 0..5);
515 assert_eq!(results[1].range, 12..17);
516 }
517
518 #[test]
519 fn exact_no_match() {
520 let results = search_exact("hello world", "xyz");
521 assert!(results.is_empty());
522 }
523
524 #[test]
525 fn exact_empty_needle() {
526 let results = search_exact("hello", "");
527 assert!(results.is_empty());
528 }
529
530 #[test]
531 fn exact_empty_haystack() {
532 let results = search_exact("", "hello");
533 assert!(results.is_empty());
534 }
535
536 #[test]
537 fn exact_needle_equals_haystack() {
538 let results = search_exact("hello", "hello");
539 assert_eq!(results.len(), 1);
540 assert_eq!(results[0].range, 0..5);
541 }
542
543 #[test]
544 fn exact_needle_longer() {
545 let results = search_exact("hi", "hello");
546 assert!(results.is_empty());
547 }
548
549 #[test]
550 fn exact_adjacent_matches() {
551 let results = search_exact("aaa", "a");
552 assert_eq!(results.len(), 3);
553 }
554
555 #[test]
556 fn exact_text_extraction() {
557 let haystack = "foo bar baz";
558 let results = search_exact(haystack, "bar");
559 assert_eq!(results[0].text(haystack), "bar");
560 }
561
562 #[test]
563 fn exact_unicode() {
564 let results = search_exact("cafรฉ rรฉsumรฉ cafรฉ", "cafรฉ");
565 assert_eq!(results.len(), 2);
566 }
567
568 #[test]
569 fn exact_cjk() {
570 let results = search_exact("ไฝ ๅฅฝไธ็ไฝ ๅฅฝ", "ไฝ ๅฅฝ");
571 assert_eq!(results.len(), 2);
572 }
573
574 #[test]
579 fn overlapping_basic() {
580 let results = search_exact_overlapping("aaa", "aa");
581 assert_eq!(results.len(), 2);
582 assert_eq!(results[0].range, 0..2);
583 assert_eq!(results[1].range, 1..3);
584 }
585
586 #[test]
587 fn overlapping_no_overlap() {
588 let results = search_exact_overlapping("abcabc", "abc");
589 assert_eq!(results.len(), 2);
590 }
591
592 #[test]
593 fn overlapping_empty_needle() {
594 let results = search_exact_overlapping("abc", "");
595 assert!(results.is_empty());
596 }
597
598 #[test]
603 fn ascii_ci_basic() {
604 let results = search_ascii_case_insensitive("Hello World HELLO", "hello");
605 assert_eq!(results.len(), 2);
606 }
607
608 #[test]
609 fn ascii_ci_mixed_case() {
610 let results = search_ascii_case_insensitive("FoO BaR fOo", "foo");
611 assert_eq!(results.len(), 2);
612 }
613
614 #[test]
615 fn ascii_ci_no_match() {
616 let results = search_ascii_case_insensitive("hello", "xyz");
617 assert!(results.is_empty());
618 }
619
620 #[test]
625 fn results_have_valid_ranges() {
626 let test_cases = [
627 ("hello world", "o"),
628 ("aaaa", "aa"),
629 ("", "x"),
630 ("x", ""),
631 ("cafรฉ", "รฉ"),
632 ("๐ world ๐", "๐"),
633 ];
634 for (haystack, needle) in test_cases {
635 let results = search_exact(haystack, needle);
636 for r in &results {
637 assert!(
638 r.range.start <= r.range.end,
639 "Invalid range for '{needle}' in '{haystack}'"
640 );
641 assert!(
642 r.range.end <= haystack.len(),
643 "Out of bounds for '{needle}' in '{haystack}'"
644 );
645 assert!(
646 haystack.is_char_boundary(r.range.start),
647 "Not char boundary at start"
648 );
649 assert!(
650 haystack.is_char_boundary(r.range.end),
651 "Not char boundary at end"
652 );
653 }
654 }
655 }
656
657 #[test]
658 fn emoji_search() {
659 let results = search_exact("hello ๐ world ๐ end", "๐");
660 assert_eq!(results.len(), 2);
661 for r in &results {
662 assert_eq!(&"hello ๐ world ๐ end"[r.range.clone()], "๐");
663 }
664 }
665}
666
667#[cfg(all(test, feature = "normalization"))]
668mod normalization_tests {
669 use super::*;
670
671 #[test]
672 fn case_insensitive_unicode() {
673 let results = search_case_insensitive("Straรe Strasse", "strasse");
676 assert!(
677 !results.is_empty(),
678 "Should find literal case-insensitive match"
679 );
680 }
681
682 #[test]
683 fn case_insensitive_expansion_range_maps_to_grapheme() {
684 let haystack = "STRAรE";
687 let results = search_case_insensitive(haystack, "straรe");
688 assert_eq!(results.len(), 1);
689 let result = &results[0];
690 assert_eq!(result.text(haystack), "STRAรE");
691 assert!(haystack.is_char_boundary(result.range.start));
692 assert!(haystack.is_char_boundary(result.range.end));
693 }
694
695 #[test]
696 fn case_insensitive_accented() {
697 let results = search_case_insensitive("CAFร cafรฉ Cafรฉ", "cafรฉ");
698 assert_eq!(results.len(), 3);
700 }
701
702 #[test]
703 fn case_insensitive_empty() {
704 let results = search_case_insensitive("hello", "");
705 assert!(results.is_empty());
706 }
707
708 #[test]
709 fn case_insensitive_fullwidth() {
710 let results = search_case_insensitive("\u{FF28}\u{FF25}\u{FF2C}\u{FF2C}\u{FF2F}", "hello");
712 assert!(!results.is_empty(), "Fullwidth should match via NFKC");
713 }
714
715 #[test]
716 fn normalized_composed_vs_decomposed() {
717 use crate::normalization::NormForm;
718 let haystack = "caf\u{0065}\u{0301}"; let needle = "caf\u{00E9}"; let results = search_normalized(haystack, needle, NormForm::Nfc);
722 assert_eq!(results.len(), 1, "Should find NFC-equivalent match");
723 }
724
725 #[test]
726 fn normalized_no_false_positive() {
727 use crate::normalization::NormForm;
728 let results = search_normalized("hello", "world", NormForm::Nfc);
729 assert!(results.is_empty());
730 }
731
732 #[test]
733 fn normalized_result_ranges_valid() {
734 use crate::normalization::NormForm;
735 let haystack = "cafรฉ rรฉsumรฉ cafรฉ";
736 let needle = "cafรฉ";
737 let results = search_normalized(haystack, needle, NormForm::Nfc);
738 for r in &results {
739 assert!(r.range.start <= r.range.end);
740 assert!(r.range.end <= haystack.len());
741 assert!(haystack.is_char_boundary(r.range.start));
742 assert!(haystack.is_char_boundary(r.range.end));
743 }
744 }
745
746 #[test]
747 fn case_insensitive_result_ranges_valid() {
748 let haystack = "Hello WORLD hello";
749 let results = search_case_insensitive(haystack, "hello");
750 for r in &results {
751 assert!(r.range.start <= r.range.end);
752 assert!(r.range.end <= haystack.len());
753 assert!(haystack.is_char_boundary(r.range.start));
754 assert!(haystack.is_char_boundary(r.range.end));
755 }
756 }
757}
758
759#[cfg(all(test, feature = "normalization"))]
764mod policy_tests {
765 use super::*;
766 use crate::normalization::NormForm;
767
768 #[test]
771 fn width_mode_ascii_is_one() {
772 for ch in ['a', 'Z', '0', ' ', '~'] {
773 assert_eq!(WidthMode::Standard.char_width(ch), 1);
774 assert_eq!(WidthMode::CjkAmbiguousWide.char_width(ch), 1);
775 }
776 }
777
778 #[test]
779 fn width_mode_cjk_ideograph_is_two() {
780 for ch in ['ไธญ', 'ๅฝ', 'ๅญ'] {
781 assert_eq!(WidthMode::Standard.char_width(ch), 2);
782 assert_eq!(WidthMode::CjkAmbiguousWide.char_width(ch), 2);
783 }
784 }
785
786 #[test]
787 fn width_mode_ea_ambiguous_differs() {
788 for ch in ['โ', 'โ', 'โ'] {
790 assert_eq!(WidthMode::Standard.char_width(ch), 1, "Standard: {ch:?}");
791 assert_eq!(
792 WidthMode::CjkAmbiguousWide.char_width(ch),
793 2,
794 "CjkWide: {ch:?}"
795 );
796 }
797 for ch in ['โ', 'โ', 'โ', 'โ'] {
799 assert_eq!(WidthMode::Standard.char_width(ch), 1);
800 assert_eq!(WidthMode::CjkAmbiguousWide.char_width(ch), 2);
801 }
802 for ch in ['ยฐ', 'ร', 'ยฎ'] {
804 assert_eq!(WidthMode::Standard.char_width(ch), 1);
805 assert_eq!(WidthMode::CjkAmbiguousWide.char_width(ch), 2);
806 }
807 }
808
809 #[test]
810 fn width_mode_combining_marks_zero() {
811 for ch in ['\u{0300}', '\u{0301}', '\u{0302}'] {
812 assert_eq!(WidthMode::Standard.char_width(ch), 0);
813 assert_eq!(WidthMode::CjkAmbiguousWide.char_width(ch), 0);
814 }
815 }
816
817 #[test]
818 fn width_mode_str_width() {
819 assert_eq!(WidthMode::Standard.str_width("hello"), 5);
820 assert_eq!(WidthMode::Standard.str_width("ไธญๅฝ"), 4);
821 assert_eq!(WidthMode::CjkAmbiguousWide.str_width("hello"), 5);
822 assert_eq!(WidthMode::CjkAmbiguousWide.str_width("โ ไธญ"), 5);
824 assert_eq!(WidthMode::Standard.str_width("โ ไธญ"), 4); }
826
827 #[test]
828 fn width_mode_default_is_standard() {
829 assert_eq!(WidthMode::default(), WidthMode::Standard);
830 }
831
832 #[test]
835 fn display_col_at_ascii() {
836 let s = "hello world";
837 assert_eq!(display_col_at(s, 0, WidthMode::Standard), 0);
838 assert_eq!(display_col_at(s, 5, WidthMode::Standard), 5);
839 assert_eq!(display_col_at(s, 11, WidthMode::Standard), 11);
840 }
841
842 #[test]
843 fn display_col_at_cjk() {
844 let s = "ไฝ ๅฅฝworld";
845 assert_eq!(display_col_at(s, 0, WidthMode::Standard), 0);
847 assert_eq!(display_col_at(s, 3, WidthMode::Standard), 2); assert_eq!(display_col_at(s, 6, WidthMode::Standard), 4); assert_eq!(display_col_at(s, 11, WidthMode::Standard), 9); }
851
852 #[test]
853 fn display_col_at_ea_ambiguous_differs() {
854 let s = "โโtext";
855 let after_box = 3; let after_arrow = 6; assert_eq!(display_col_at(s, after_box, WidthMode::Standard), 1);
859 assert_eq!(display_col_at(s, after_box, WidthMode::CjkAmbiguousWide), 2);
860 assert_eq!(display_col_at(s, after_arrow, WidthMode::Standard), 2);
861 assert_eq!(
862 display_col_at(s, after_arrow, WidthMode::CjkAmbiguousWide),
863 4
864 );
865 }
866
867 #[test]
868 fn display_col_at_combining_marks() {
869 let s = "e\u{0301}x";
871 assert_eq!(display_col_at(s, 0, WidthMode::Standard), 0);
873 assert_eq!(display_col_at(s, 1, WidthMode::Standard), 1); assert_eq!(display_col_at(s, 3, WidthMode::Standard), 1); assert_eq!(display_col_at(s, 4, WidthMode::Standard), 2); }
877
878 #[test]
881 fn policy_standard_preset() {
882 let p = SearchPolicy::STANDARD;
883 assert_eq!(p.norm_form, NormForm::Nfkc);
884 assert!(p.case_insensitive);
885 assert_eq!(p.width_mode, WidthMode::Standard);
886 }
887
888 #[test]
889 fn policy_cjk_preset() {
890 let p = SearchPolicy::CJK;
891 assert_eq!(p.norm_form, NormForm::Nfkc);
892 assert!(p.case_insensitive);
893 assert_eq!(p.width_mode, WidthMode::CjkAmbiguousWide);
894 }
895
896 #[test]
897 fn policy_exact_nfc_preset() {
898 let p = SearchPolicy::EXACT_NFC;
899 assert_eq!(p.norm_form, NormForm::Nfc);
900 assert!(!p.case_insensitive);
901 assert_eq!(p.width_mode, WidthMode::Standard);
902 }
903
904 #[test]
907 fn policy_search_basic_ascii() {
908 let results = search_with_policy("hello world", "hello", &SearchPolicy::STANDARD);
909 assert_eq!(results.len(), 1);
910 assert_eq!(results[0].range, 0..5);
911 assert_eq!(results[0].col_start, 0);
912 assert_eq!(results[0].col_end, 5);
913 }
914
915 #[test]
916 fn policy_search_case_insensitive() {
917 let results = search_with_policy("Hello WORLD hello", "hello", &SearchPolicy::STANDARD);
918 assert_eq!(results.len(), 2);
919 assert_eq!(results[0].text("Hello WORLD hello"), "Hello");
920 assert_eq!(results[1].text("Hello WORLD hello"), "hello");
921 }
922
923 #[test]
924 fn policy_search_case_sensitive() {
925 let results = search_with_policy("Hello hello", "hello", &SearchPolicy::EXACT_NFC);
926 assert_eq!(results.len(), 1);
927 assert_eq!(results[0].range.start, 6);
928 }
929
930 #[test]
931 fn policy_search_empty_needle() {
932 let results = search_with_policy("hello", "", &SearchPolicy::STANDARD);
933 assert!(results.is_empty());
934 }
935
936 #[test]
937 fn policy_search_empty_haystack() {
938 let results = search_with_policy("", "hello", &SearchPolicy::STANDARD);
939 assert!(results.is_empty());
940 }
941
942 #[test]
943 fn policy_search_no_match() {
944 let results = search_with_policy("hello", "world", &SearchPolicy::STANDARD);
945 assert!(results.is_empty());
946 }
947
948 #[test]
951 fn policy_search_composed_vs_decomposed() {
952 let haystack = "caf\u{00E9}";
954 let needle = "caf\u{0065}\u{0301}";
955 let results = search_with_policy(haystack, needle, &SearchPolicy::EXACT_NFC);
956 assert_eq!(
957 results.len(),
958 1,
959 "NFC should equate composed and decomposed"
960 );
961 }
962
963 #[test]
964 fn policy_search_fullwidth_nfkc() {
965 let haystack = "\u{FF28}\u{FF25}\u{FF2C}\u{FF2C}\u{FF2F}";
967 let results = search_with_policy(haystack, "hello", &SearchPolicy::STANDARD);
968 assert!(!results.is_empty(), "Fullwidth should match via NFKC");
969 }
970
971 #[test]
972 fn policy_search_nfc_does_not_match_compatibility() {
973 let haystack = "\u{FB01}le";
975 let results = search_with_policy(haystack, "file", &SearchPolicy::EXACT_NFC);
976 assert!(
977 results.is_empty(),
978 "NFC should not decompose compatibility chars"
979 );
980 }
981
982 #[test]
983 fn policy_search_nfkc_matches_compatibility() {
984 let haystack = "\u{FB01}le";
986 let results = search_with_policy(haystack, "file", &SearchPolicy::STANDARD);
987 assert!(!results.is_empty(), "NFKC should decompose fi ligature");
988 }
989
990 #[test]
993 fn policy_search_cjk_column_offsets() {
994 let haystack = "ไฝ ๅฅฝworldไฝ ๅฅฝ";
996 let results = search_with_policy(haystack, "world", &SearchPolicy::STANDARD);
997 assert_eq!(results.len(), 1);
998 assert_eq!(results[0].col_start, 4);
1000 assert_eq!(results[0].col_end, 9); }
1002
1003 #[test]
1004 fn policy_search_cjk_in_cjk() {
1005 let haystack = "ไฝ ๅฅฝไธ็ไฝ ๅฅฝ";
1007 let results = search_with_policy(haystack, "ไธ็", &SearchPolicy::STANDARD);
1008 assert_eq!(results.len(), 1);
1009 assert_eq!(results[0].col_start, 4); assert_eq!(results[0].col_end, 8); }
1012
1013 #[test]
1016 fn policy_search_ea_ambiguous_column_divergence() {
1017 let haystack = "โhello";
1019 let standard = search_with_policy(haystack, "hello", &SearchPolicy::STANDARD);
1020 let cjk = search_with_policy(haystack, "hello", &SearchPolicy::CJK);
1021
1022 assert_eq!(standard.len(), 1);
1023 assert_eq!(cjk.len(), 1);
1024 assert_eq!(standard[0].range, cjk[0].range);
1026 assert_eq!(standard[0].col_start, 1); assert_eq!(cjk[0].col_start, 2); assert_eq!(standard[0].col_end, 6);
1030 assert_eq!(cjk[0].col_end, 7);
1031 }
1032
1033 #[test]
1034 fn policy_search_box_drawing_column_divergence() {
1035 let haystack = "โโtext";
1037 let standard = search_with_policy(haystack, "text", &SearchPolicy::STANDARD);
1038 let cjk = search_with_policy(haystack, "text", &SearchPolicy::CJK);
1039
1040 assert_eq!(standard[0].col_start, 2); assert_eq!(cjk[0].col_start, 4); }
1043
1044 #[test]
1047 fn policy_search_combining_mark_offsets() {
1048 let haystack = "cafรฉ";
1050 let results = search_with_policy(haystack, "fรฉ", &SearchPolicy::STANDARD);
1051 assert_eq!(results.len(), 1);
1052 assert_eq!(results[0].col_start, 2);
1054 assert_eq!(results[0].col_end, 4);
1056 }
1057
1058 #[test]
1059 fn policy_search_decomposed_combining_offsets() {
1060 let haystack = "cafe\u{0301}";
1062 let needle = "f\u{00E9}"; let results = search_with_policy(haystack, needle, &SearchPolicy::EXACT_NFC);
1064 assert_eq!(results.len(), 1);
1065 assert_eq!(results[0].col_start, 2);
1067 assert_eq!(results[0].col_end, 4);
1069 }
1070
1071 #[test]
1074 fn policy_result_display_width() {
1075 let haystack = "ไฝ ๅฅฝhello";
1076 let results = search_with_policy(haystack, "hello", &SearchPolicy::STANDARD);
1077 assert_eq!(results[0].display_width(), 5);
1078 }
1079
1080 #[test]
1081 fn policy_result_display_width_cjk_match() {
1082 let haystack = "abcไฝ ๅฅฝdef";
1083 let results = search_with_policy(haystack, "ไฝ ๅฅฝ", &SearchPolicy::STANDARD);
1084 assert_eq!(results[0].display_width(), 4); }
1086
1087 #[test]
1090 fn policy_result_text_extraction() {
1091 let haystack = "Hello World";
1092 let results = search_with_policy(haystack, "world", &SearchPolicy::STANDARD);
1093 assert_eq!(results[0].text(haystack), "World");
1094 }
1095
1096 #[test]
1099 fn policy_search_multiple_matches() {
1100 let haystack = "foo bar foo baz foo";
1101 let results = search_with_policy(haystack, "foo", &SearchPolicy::STANDARD);
1102 assert_eq!(results.len(), 3);
1103 assert_eq!(results[0].col_start, 0);
1104 assert_eq!(results[1].col_start, 8);
1105 assert_eq!(results[2].col_start, 16);
1106 }
1107
1108 #[test]
1111 fn policy_search_ranges_always_valid() {
1112 let test_cases = [
1113 ("hello world", "o", SearchPolicy::STANDARD),
1114 ("CAFร cafรฉ", "cafรฉ", SearchPolicy::STANDARD),
1115 ("ไฝ ๅฅฝไธ็", "ไธ", SearchPolicy::CJK),
1116 ("โโtext", "text", SearchPolicy::CJK),
1117 ("\u{FB01}le", "file", SearchPolicy::STANDARD),
1118 ("e\u{0301}", "\u{00E9}", SearchPolicy::EXACT_NFC),
1119 ];
1120 for (haystack, needle, policy) in &test_cases {
1121 let results = search_with_policy(haystack, needle, policy);
1122 for r in &results {
1123 assert!(
1124 r.range.start <= r.range.end,
1125 "Invalid range for '{needle}' in '{haystack}'"
1126 );
1127 assert!(
1128 r.range.end <= haystack.len(),
1129 "Out of bounds for '{needle}' in '{haystack}'"
1130 );
1131 assert!(
1132 haystack.is_char_boundary(r.range.start),
1133 "Not char boundary at start"
1134 );
1135 assert!(
1136 haystack.is_char_boundary(r.range.end),
1137 "Not char boundary at end"
1138 );
1139 assert!(
1140 r.col_start <= r.col_end,
1141 "col_start > col_end for '{needle}' in '{haystack}'"
1142 );
1143 }
1144 }
1145 }
1146
1147 #[test]
1150 fn policy_search_columns_monotonically_increasing() {
1151 let haystack = "aa bb aa cc aa";
1152 let results = search_with_policy(haystack, "aa", &SearchPolicy::STANDARD);
1153 assert_eq!(results.len(), 3);
1154 for w in results.windows(2) {
1155 assert!(
1156 w[0].col_end <= w[1].col_start,
1157 "Non-overlapping matches should have monotonically increasing columns"
1158 );
1159 }
1160 }
1161
1162 #[test]
1165 fn policy_custom_nfd_case_sensitive() {
1166 let policy = SearchPolicy {
1167 norm_form: NormForm::Nfd,
1168 case_insensitive: false,
1169 width_mode: WidthMode::Standard,
1170 };
1171 let haystack = "\u{00E9}"; let needle = "e\u{0301}"; let results = search_with_policy(haystack, needle, &policy);
1175 assert_eq!(results.len(), 1, "NFD should match decomposed forms");
1176 }
1177
1178 #[test]
1179 fn policy_custom_nfkd_case_insensitive() {
1180 let policy = SearchPolicy {
1181 norm_form: NormForm::Nfkd,
1182 case_insensitive: true,
1183 width_mode: WidthMode::CjkAmbiguousWide,
1184 };
1185 let haystack = "\u{FB01}";
1187 let results = search_with_policy(haystack, "FI", &policy);
1188 assert!(!results.is_empty(), "NFKD + CI should match fi ligature");
1189 }
1190
1191 #[test]
1194 fn policy_search_agrees_with_search_case_insensitive() {
1195 let test_cases = [
1196 ("Hello World HELLO", "hello"),
1197 ("CAFร cafรฉ Cafรฉ", "cafรฉ"),
1198 ("\u{FF28}\u{FF25}\u{FF2C}\u{FF2C}\u{FF2F}", "hello"),
1199 ];
1200 for (haystack, needle) in &test_cases {
1201 let old = search_case_insensitive(haystack, needle);
1202 let new = search_with_policy(haystack, needle, &SearchPolicy::STANDARD);
1203 assert_eq!(
1204 old.len(),
1205 new.len(),
1206 "Match count mismatch for '{needle}' in '{haystack}'"
1207 );
1208 for (o, n) in old.iter().zip(new.iter()) {
1209 assert_eq!(
1210 o.range, n.range,
1211 "Byte range mismatch for '{needle}' in '{haystack}'"
1212 );
1213 }
1214 }
1215 }
1216
1217 #[test]
1218 fn policy_search_agrees_with_search_normalized() {
1219 let haystack = "caf\u{0065}\u{0301} rรฉsumรฉ";
1220 let needle = "caf\u{00E9}";
1221 let old = search_normalized(haystack, needle, NormForm::Nfc);
1222 let new = search_with_policy(haystack, needle, &SearchPolicy::EXACT_NFC);
1223 assert_eq!(old.len(), new.len());
1224 for (o, n) in old.iter().zip(new.iter()) {
1225 assert_eq!(o.range, n.range);
1226 }
1227 }
1228}