#![forbid(unsafe_code)]
use unicode_width::UnicodeWidthChar;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
pub enum WidthMode {
#[default]
Standard,
CjkAmbiguousWide,
}
impl WidthMode {
#[inline]
#[must_use]
pub fn char_width(self, ch: char) -> usize {
let w = match self {
Self::Standard => UnicodeWidthChar::width(ch).unwrap_or(0),
Self::CjkAmbiguousWide => UnicodeWidthChar::width_cjk(ch).unwrap_or(0),
};
w.min(2)
}
#[must_use]
pub fn str_width(self, s: &str) -> usize {
s.chars().map(|ch| self.char_width(ch)).sum()
}
}
#[must_use]
pub fn display_col_at(s: &str, byte_offset: usize, mode: WidthMode) -> usize {
debug_assert!(s.is_char_boundary(byte_offset));
s[..byte_offset].chars().map(|ch| mode.char_width(ch)).sum()
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SearchResult {
pub range: std::ops::Range<usize>,
}
impl SearchResult {
#[must_use]
pub fn new(start: usize, end: usize) -> Self {
Self { range: start..end }
}
#[must_use]
pub fn text<'a>(&self, source: &'a str) -> &'a str {
&source[self.range.clone()]
}
}
#[must_use]
pub fn search_exact(haystack: &str, needle: &str) -> Vec<SearchResult> {
if needle.is_empty() {
return Vec::new();
}
let mut results = Vec::new();
let mut start = 0;
while let Some(pos) = haystack[start..].find(needle) {
let abs_pos = start + pos;
results.push(SearchResult::new(abs_pos, abs_pos + needle.len()));
start = abs_pos + needle.len();
}
results
}
#[must_use]
pub fn search_exact_overlapping(haystack: &str, needle: &str) -> Vec<SearchResult> {
if needle.is_empty() {
return Vec::new();
}
let mut results = Vec::new();
let mut start = 0;
while let Some(pos) = haystack[start..].find(needle) {
let abs_pos = start + pos;
results.push(SearchResult::new(abs_pos, abs_pos + needle.len()));
start = abs_pos + 1;
while start < haystack.len() && !haystack.is_char_boundary(start) {
start += 1;
}
}
results
}
#[must_use]
pub fn search_ascii_case_insensitive(haystack: &str, needle: &str) -> Vec<SearchResult> {
if needle.is_empty() {
return Vec::new();
}
let haystack_lower = haystack.to_ascii_lowercase();
let needle_lower = needle.to_ascii_lowercase();
let mut results = Vec::new();
let mut start = 0;
while let Some(pos) = haystack_lower[start..].find(&needle_lower) {
let abs_pos = start + pos;
results.push(SearchResult::new(abs_pos, abs_pos + needle.len()));
start = abs_pos + needle.len();
}
results
}
#[cfg(feature = "normalization")]
#[must_use]
pub fn search_case_insensitive(haystack: &str, needle: &str) -> Vec<SearchResult> {
if needle.is_empty() {
return Vec::new();
}
let needle_norm = crate::normalization::normalize_for_search(needle);
if needle_norm.is_empty() {
return Vec::new();
}
use unicode_segmentation::UnicodeSegmentation;
let mut norm_start_map: Vec<usize> = Vec::new();
let mut norm_end_map: Vec<usize> = Vec::new();
let mut normalized = String::new();
for (orig_byte, grapheme) in haystack.grapheme_indices(true) {
let chunk = crate::normalization::normalize_for_search(grapheme);
if chunk.is_empty() {
continue;
}
let orig_end = orig_byte + grapheme.len();
for _ in chunk.bytes() {
norm_start_map.push(orig_byte);
norm_end_map.push(orig_end);
}
normalized.push_str(&chunk);
}
if normalized.is_empty() {
return Vec::new();
}
let mut results = Vec::new();
let mut start = 0;
while let Some(pos) = normalized[start..].find(&needle_norm) {
let norm_start = start + pos;
let norm_end = norm_start + needle_norm.len();
let orig_start = norm_start_map
.get(norm_start)
.copied()
.unwrap_or(haystack.len());
let orig_end = if norm_end == 0 {
orig_start
} else {
norm_end_map
.get(norm_end - 1)
.copied()
.unwrap_or(haystack.len())
};
if results
.last()
.is_some_and(|r: &SearchResult| r.range.start == orig_start && r.range.end == orig_end)
{
start = norm_end;
continue;
}
results.push(SearchResult::new(orig_start, orig_end));
start = norm_end;
}
results
}
#[cfg(feature = "normalization")]
#[must_use]
pub fn search_normalized(
haystack: &str,
needle: &str,
form: crate::normalization::NormForm,
) -> Vec<SearchResult> {
use crate::normalization::normalize;
use unicode_segmentation::UnicodeSegmentation;
if needle.is_empty() {
return Vec::new();
}
let needle_norm = normalize(needle, form);
if needle_norm.is_empty() {
return Vec::new();
}
let mut norm_start_map: Vec<usize> = Vec::new();
let mut norm_end_map: Vec<usize> = Vec::new();
let mut normalized = String::new();
for (orig_byte, grapheme) in haystack.grapheme_indices(true) {
let chunk = normalize(grapheme, form);
if chunk.is_empty() {
continue;
}
let orig_end = orig_byte + grapheme.len();
for _ in chunk.bytes() {
norm_start_map.push(orig_byte);
norm_end_map.push(orig_end);
}
normalized.push_str(&chunk);
}
if normalized.is_empty() {
return Vec::new();
}
let mut results = Vec::new();
let mut start = 0;
while let Some(pos) = normalized[start..].find(&needle_norm) {
let norm_start = start + pos;
let norm_end = norm_start + needle_norm.len();
let orig_start = norm_start_map
.get(norm_start)
.copied()
.unwrap_or(haystack.len());
let orig_end = if norm_end == 0 {
orig_start
} else {
norm_end_map
.get(norm_end - 1)
.copied()
.unwrap_or(haystack.len())
};
if results
.last()
.is_some_and(|r: &SearchResult| r.range.start == orig_start && r.range.end == orig_end)
{
start = norm_end;
continue;
}
results.push(SearchResult::new(orig_start, orig_end));
start = norm_end;
}
results
}
#[cfg(feature = "normalization")]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct SearchPolicy {
pub norm_form: crate::normalization::NormForm,
pub case_insensitive: bool,
pub width_mode: WidthMode,
}
#[cfg(feature = "normalization")]
impl SearchPolicy {
pub const STANDARD: Self = Self {
norm_form: crate::normalization::NormForm::Nfkc,
case_insensitive: true,
width_mode: WidthMode::Standard,
};
pub const CJK: Self = Self {
norm_form: crate::normalization::NormForm::Nfkc,
case_insensitive: true,
width_mode: WidthMode::CjkAmbiguousWide,
};
pub const EXACT_NFC: Self = Self {
norm_form: crate::normalization::NormForm::Nfc,
case_insensitive: false,
width_mode: WidthMode::Standard,
};
}
#[cfg(feature = "normalization")]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct PolicySearchResult {
pub range: std::ops::Range<usize>,
pub col_start: usize,
pub col_end: usize,
}
#[cfg(feature = "normalization")]
impl PolicySearchResult {
#[must_use]
pub fn text<'a>(&self, source: &'a str) -> &'a str {
&source[self.range.clone()]
}
#[must_use]
pub fn display_width(&self) -> usize {
self.col_end - self.col_start
}
}
#[cfg(feature = "normalization")]
#[must_use]
pub fn search_with_policy(
haystack: &str,
needle: &str,
policy: &SearchPolicy,
) -> Vec<PolicySearchResult> {
use crate::normalization::normalize;
use unicode_segmentation::UnicodeSegmentation;
if needle.is_empty() {
return Vec::new();
}
let needle_norm = if policy.case_insensitive {
normalize(needle, policy.norm_form).to_lowercase()
} else {
normalize(needle, policy.norm_form)
};
if needle_norm.is_empty() {
return Vec::new();
}
let mut norm_start_map: Vec<usize> = Vec::new();
let mut norm_end_map: Vec<usize> = Vec::new();
let mut normalized = String::new();
for (orig_byte, grapheme) in haystack.grapheme_indices(true) {
let chunk = if policy.case_insensitive {
normalize(grapheme, policy.norm_form).to_lowercase()
} else {
normalize(grapheme, policy.norm_form)
};
if chunk.is_empty() {
continue;
}
let orig_end = orig_byte + grapheme.len();
for _ in chunk.bytes() {
norm_start_map.push(orig_byte);
norm_end_map.push(orig_end);
}
normalized.push_str(&chunk);
}
if normalized.is_empty() {
return Vec::new();
}
let mut results = Vec::new();
let mut start = 0;
while let Some(pos) = normalized[start..].find(&needle_norm) {
let norm_start = start + pos;
let norm_end = norm_start + needle_norm.len();
let orig_start = norm_start_map
.get(norm_start)
.copied()
.unwrap_or(haystack.len());
let orig_end = if norm_end == 0 {
orig_start
} else {
norm_end_map
.get(norm_end - 1)
.copied()
.unwrap_or(haystack.len())
};
if results.last().is_some_and(|r: &PolicySearchResult| {
r.range.start == orig_start && r.range.end == orig_end
}) {
start = norm_end;
continue;
}
let col_start = display_col_at(haystack, orig_start, policy.width_mode);
let col_end = display_col_at(haystack, orig_end, policy.width_mode);
results.push(PolicySearchResult {
range: orig_start..orig_end,
col_start,
col_end,
});
start = norm_end;
}
results
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn exact_basic() {
let results = search_exact("hello world hello", "hello");
assert_eq!(results.len(), 2);
assert_eq!(results[0].range, 0..5);
assert_eq!(results[1].range, 12..17);
}
#[test]
fn exact_no_match() {
let results = search_exact("hello world", "xyz");
assert!(results.is_empty());
}
#[test]
fn exact_empty_needle() {
let results = search_exact("hello", "");
assert!(results.is_empty());
}
#[test]
fn exact_empty_haystack() {
let results = search_exact("", "hello");
assert!(results.is_empty());
}
#[test]
fn exact_needle_equals_haystack() {
let results = search_exact("hello", "hello");
assert_eq!(results.len(), 1);
assert_eq!(results[0].range, 0..5);
}
#[test]
fn exact_needle_longer() {
let results = search_exact("hi", "hello");
assert!(results.is_empty());
}
#[test]
fn exact_adjacent_matches() {
let results = search_exact("aaa", "a");
assert_eq!(results.len(), 3);
}
#[test]
fn exact_text_extraction() {
let haystack = "foo bar baz";
let results = search_exact(haystack, "bar");
assert_eq!(results[0].text(haystack), "bar");
}
#[test]
fn exact_unicode() {
let results = search_exact("café résumé café", "café");
assert_eq!(results.len(), 2);
}
#[test]
fn exact_cjk() {
let results = search_exact("你好世界你好", "你好");
assert_eq!(results.len(), 2);
}
#[test]
fn overlapping_basic() {
let results = search_exact_overlapping("aaa", "aa");
assert_eq!(results.len(), 2);
assert_eq!(results[0].range, 0..2);
assert_eq!(results[1].range, 1..3);
}
#[test]
fn overlapping_no_overlap() {
let results = search_exact_overlapping("abcabc", "abc");
assert_eq!(results.len(), 2);
}
#[test]
fn overlapping_empty_needle() {
let results = search_exact_overlapping("abc", "");
assert!(results.is_empty());
}
#[test]
fn ascii_ci_basic() {
let results = search_ascii_case_insensitive("Hello World HELLO", "hello");
assert_eq!(results.len(), 2);
}
#[test]
fn ascii_ci_mixed_case() {
let results = search_ascii_case_insensitive("FoO BaR fOo", "foo");
assert_eq!(results.len(), 2);
}
#[test]
fn ascii_ci_no_match() {
let results = search_ascii_case_insensitive("hello", "xyz");
assert!(results.is_empty());
}
#[test]
fn results_have_valid_ranges() {
let test_cases = [
("hello world", "o"),
("aaaa", "aa"),
("", "x"),
("x", ""),
("café", "é"),
("🌍 world 🌍", "🌍"),
];
for (haystack, needle) in test_cases {
let results = search_exact(haystack, needle);
for r in &results {
assert!(
r.range.start <= r.range.end,
"Invalid range for '{needle}' in '{haystack}'"
);
assert!(
r.range.end <= haystack.len(),
"Out of bounds for '{needle}' in '{haystack}'"
);
assert!(
haystack.is_char_boundary(r.range.start),
"Not char boundary at start"
);
assert!(
haystack.is_char_boundary(r.range.end),
"Not char boundary at end"
);
}
}
}
#[test]
fn emoji_search() {
let results = search_exact("hello 🌍 world 🌍 end", "🌍");
assert_eq!(results.len(), 2);
for r in &results {
assert_eq!(&"hello 🌍 world 🌍 end"[r.range.clone()], "🌍");
}
}
}
#[cfg(all(test, feature = "normalization"))]
mod normalization_tests {
use super::*;
#[test]
fn case_insensitive_unicode() {
let results = search_case_insensitive("Straße Strasse", "strasse");
assert!(
!results.is_empty(),
"Should find literal case-insensitive match"
);
}
#[test]
fn case_insensitive_expansion_range_maps_to_grapheme() {
let haystack = "STRAßE";
let results = search_case_insensitive(haystack, "straße");
assert_eq!(results.len(), 1);
let result = &results[0];
assert_eq!(result.text(haystack), "STRAßE");
assert!(haystack.is_char_boundary(result.range.start));
assert!(haystack.is_char_boundary(result.range.end));
}
#[test]
fn case_insensitive_accented() {
let results = search_case_insensitive("CAFÉ café Café", "café");
assert_eq!(results.len(), 3);
}
#[test]
fn case_insensitive_empty() {
let results = search_case_insensitive("hello", "");
assert!(results.is_empty());
}
#[test]
fn case_insensitive_fullwidth() {
let results = search_case_insensitive("\u{FF28}\u{FF25}\u{FF2C}\u{FF2C}\u{FF2F}", "hello");
assert!(!results.is_empty(), "Fullwidth should match via NFKC");
}
#[test]
fn normalized_composed_vs_decomposed() {
use crate::normalization::NormForm;
let haystack = "caf\u{0065}\u{0301}"; let needle = "caf\u{00E9}"; let results = search_normalized(haystack, needle, NormForm::Nfc);
assert_eq!(results.len(), 1, "Should find NFC-equivalent match");
}
#[test]
fn normalized_no_false_positive() {
use crate::normalization::NormForm;
let results = search_normalized("hello", "world", NormForm::Nfc);
assert!(results.is_empty());
}
#[test]
fn normalized_result_ranges_valid() {
use crate::normalization::NormForm;
let haystack = "café résumé café";
let needle = "café";
let results = search_normalized(haystack, needle, NormForm::Nfc);
for r in &results {
assert!(r.range.start <= r.range.end);
assert!(r.range.end <= haystack.len());
assert!(haystack.is_char_boundary(r.range.start));
assert!(haystack.is_char_boundary(r.range.end));
}
}
#[test]
fn case_insensitive_result_ranges_valid() {
let haystack = "Hello WORLD hello";
let results = search_case_insensitive(haystack, "hello");
for r in &results {
assert!(r.range.start <= r.range.end);
assert!(r.range.end <= haystack.len());
assert!(haystack.is_char_boundary(r.range.start));
assert!(haystack.is_char_boundary(r.range.end));
}
}
}
#[cfg(all(test, feature = "normalization"))]
mod policy_tests {
use super::*;
use crate::normalization::NormForm;
#[test]
fn width_mode_ascii_is_one() {
for ch in ['a', 'Z', '0', ' ', '~'] {
assert_eq!(WidthMode::Standard.char_width(ch), 1);
assert_eq!(WidthMode::CjkAmbiguousWide.char_width(ch), 1);
}
}
#[test]
fn width_mode_cjk_ideograph_is_two() {
for ch in ['中', '国', '字'] {
assert_eq!(WidthMode::Standard.char_width(ch), 2);
assert_eq!(WidthMode::CjkAmbiguousWide.char_width(ch), 2);
}
}
#[test]
fn width_mode_ea_ambiguous_differs() {
for ch in ['─', '│', '┌'] {
assert_eq!(WidthMode::Standard.char_width(ch), 1, "Standard: {ch:?}");
assert_eq!(
WidthMode::CjkAmbiguousWide.char_width(ch),
2,
"CjkWide: {ch:?}"
);
}
for ch in ['→', '←', '↑', '↓'] {
assert_eq!(WidthMode::Standard.char_width(ch), 1);
assert_eq!(WidthMode::CjkAmbiguousWide.char_width(ch), 2);
}
for ch in ['°', '×', '®'] {
assert_eq!(WidthMode::Standard.char_width(ch), 1);
assert_eq!(WidthMode::CjkAmbiguousWide.char_width(ch), 2);
}
}
#[test]
fn width_mode_combining_marks_zero() {
for ch in ['\u{0300}', '\u{0301}', '\u{0302}'] {
assert_eq!(WidthMode::Standard.char_width(ch), 0);
assert_eq!(WidthMode::CjkAmbiguousWide.char_width(ch), 0);
}
}
#[test]
fn width_mode_str_width() {
assert_eq!(WidthMode::Standard.str_width("hello"), 5);
assert_eq!(WidthMode::Standard.str_width("中国"), 4);
assert_eq!(WidthMode::CjkAmbiguousWide.str_width("hello"), 5);
assert_eq!(WidthMode::CjkAmbiguousWide.str_width("→ 中"), 5);
assert_eq!(WidthMode::Standard.str_width("→ 中"), 4); }
#[test]
fn width_mode_default_is_standard() {
assert_eq!(WidthMode::default(), WidthMode::Standard);
}
#[test]
fn display_col_at_ascii() {
let s = "hello world";
assert_eq!(display_col_at(s, 0, WidthMode::Standard), 0);
assert_eq!(display_col_at(s, 5, WidthMode::Standard), 5);
assert_eq!(display_col_at(s, 11, WidthMode::Standard), 11);
}
#[test]
fn display_col_at_cjk() {
let s = "你好world";
assert_eq!(display_col_at(s, 0, WidthMode::Standard), 0);
assert_eq!(display_col_at(s, 3, WidthMode::Standard), 2); assert_eq!(display_col_at(s, 6, WidthMode::Standard), 4); assert_eq!(display_col_at(s, 11, WidthMode::Standard), 9); }
#[test]
fn display_col_at_ea_ambiguous_differs() {
let s = "─→text";
let after_box = 3; let after_arrow = 6; assert_eq!(display_col_at(s, after_box, WidthMode::Standard), 1);
assert_eq!(display_col_at(s, after_box, WidthMode::CjkAmbiguousWide), 2);
assert_eq!(display_col_at(s, after_arrow, WidthMode::Standard), 2);
assert_eq!(
display_col_at(s, after_arrow, WidthMode::CjkAmbiguousWide),
4
);
}
#[test]
fn display_col_at_combining_marks() {
let s = "e\u{0301}x";
assert_eq!(display_col_at(s, 0, WidthMode::Standard), 0);
assert_eq!(display_col_at(s, 1, WidthMode::Standard), 1); assert_eq!(display_col_at(s, 3, WidthMode::Standard), 1); assert_eq!(display_col_at(s, 4, WidthMode::Standard), 2); }
#[test]
fn policy_standard_preset() {
let p = SearchPolicy::STANDARD;
assert_eq!(p.norm_form, NormForm::Nfkc);
assert!(p.case_insensitive);
assert_eq!(p.width_mode, WidthMode::Standard);
}
#[test]
fn policy_cjk_preset() {
let p = SearchPolicy::CJK;
assert_eq!(p.norm_form, NormForm::Nfkc);
assert!(p.case_insensitive);
assert_eq!(p.width_mode, WidthMode::CjkAmbiguousWide);
}
#[test]
fn policy_exact_nfc_preset() {
let p = SearchPolicy::EXACT_NFC;
assert_eq!(p.norm_form, NormForm::Nfc);
assert!(!p.case_insensitive);
assert_eq!(p.width_mode, WidthMode::Standard);
}
#[test]
fn policy_search_basic_ascii() {
let results = search_with_policy("hello world", "hello", &SearchPolicy::STANDARD);
assert_eq!(results.len(), 1);
assert_eq!(results[0].range, 0..5);
assert_eq!(results[0].col_start, 0);
assert_eq!(results[0].col_end, 5);
}
#[test]
fn policy_search_case_insensitive() {
let results = search_with_policy("Hello WORLD hello", "hello", &SearchPolicy::STANDARD);
assert_eq!(results.len(), 2);
assert_eq!(results[0].text("Hello WORLD hello"), "Hello");
assert_eq!(results[1].text("Hello WORLD hello"), "hello");
}
#[test]
fn policy_search_case_sensitive() {
let results = search_with_policy("Hello hello", "hello", &SearchPolicy::EXACT_NFC);
assert_eq!(results.len(), 1);
assert_eq!(results[0].range.start, 6);
}
#[test]
fn policy_search_empty_needle() {
let results = search_with_policy("hello", "", &SearchPolicy::STANDARD);
assert!(results.is_empty());
}
#[test]
fn policy_search_empty_haystack() {
let results = search_with_policy("", "hello", &SearchPolicy::STANDARD);
assert!(results.is_empty());
}
#[test]
fn policy_search_no_match() {
let results = search_with_policy("hello", "world", &SearchPolicy::STANDARD);
assert!(results.is_empty());
}
#[test]
fn policy_search_composed_vs_decomposed() {
let haystack = "caf\u{00E9}";
let needle = "caf\u{0065}\u{0301}";
let results = search_with_policy(haystack, needle, &SearchPolicy::EXACT_NFC);
assert_eq!(
results.len(),
1,
"NFC should equate composed and decomposed"
);
}
#[test]
fn policy_search_fullwidth_nfkc() {
let haystack = "\u{FF28}\u{FF25}\u{FF2C}\u{FF2C}\u{FF2F}";
let results = search_with_policy(haystack, "hello", &SearchPolicy::STANDARD);
assert!(!results.is_empty(), "Fullwidth should match via NFKC");
}
#[test]
fn policy_search_nfc_does_not_match_compatibility() {
let haystack = "\u{FB01}le";
let results = search_with_policy(haystack, "file", &SearchPolicy::EXACT_NFC);
assert!(
results.is_empty(),
"NFC should not decompose compatibility chars"
);
}
#[test]
fn policy_search_nfkc_matches_compatibility() {
let haystack = "\u{FB01}le";
let results = search_with_policy(haystack, "file", &SearchPolicy::STANDARD);
assert!(!results.is_empty(), "NFKC should decompose fi ligature");
}
#[test]
fn policy_search_cjk_column_offsets() {
let haystack = "你好world你好";
let results = search_with_policy(haystack, "world", &SearchPolicy::STANDARD);
assert_eq!(results.len(), 1);
assert_eq!(results[0].col_start, 4);
assert_eq!(results[0].col_end, 9); }
#[test]
fn policy_search_cjk_in_cjk() {
let haystack = "你好世界你好";
let results = search_with_policy(haystack, "世界", &SearchPolicy::STANDARD);
assert_eq!(results.len(), 1);
assert_eq!(results[0].col_start, 4); assert_eq!(results[0].col_end, 8); }
#[test]
fn policy_search_ea_ambiguous_column_divergence() {
let haystack = "→hello";
let standard = search_with_policy(haystack, "hello", &SearchPolicy::STANDARD);
let cjk = search_with_policy(haystack, "hello", &SearchPolicy::CJK);
assert_eq!(standard.len(), 1);
assert_eq!(cjk.len(), 1);
assert_eq!(standard[0].range, cjk[0].range);
assert_eq!(standard[0].col_start, 1); assert_eq!(cjk[0].col_start, 2); assert_eq!(standard[0].col_end, 6);
assert_eq!(cjk[0].col_end, 7);
}
#[test]
fn policy_search_box_drawing_column_divergence() {
let haystack = "──text";
let standard = search_with_policy(haystack, "text", &SearchPolicy::STANDARD);
let cjk = search_with_policy(haystack, "text", &SearchPolicy::CJK);
assert_eq!(standard[0].col_start, 2); assert_eq!(cjk[0].col_start, 4); }
#[test]
fn policy_search_combining_mark_offsets() {
let haystack = "café";
let results = search_with_policy(haystack, "fé", &SearchPolicy::STANDARD);
assert_eq!(results.len(), 1);
assert_eq!(results[0].col_start, 2);
assert_eq!(results[0].col_end, 4);
}
#[test]
fn policy_search_decomposed_combining_offsets() {
let haystack = "cafe\u{0301}";
let needle = "f\u{00E9}"; let results = search_with_policy(haystack, needle, &SearchPolicy::EXACT_NFC);
assert_eq!(results.len(), 1);
assert_eq!(results[0].col_start, 2);
assert_eq!(results[0].col_end, 4);
}
#[test]
fn policy_result_display_width() {
let haystack = "你好hello";
let results = search_with_policy(haystack, "hello", &SearchPolicy::STANDARD);
assert_eq!(results[0].display_width(), 5);
}
#[test]
fn policy_result_display_width_cjk_match() {
let haystack = "abc你好def";
let results = search_with_policy(haystack, "你好", &SearchPolicy::STANDARD);
assert_eq!(results[0].display_width(), 4); }
#[test]
fn policy_result_text_extraction() {
let haystack = "Hello World";
let results = search_with_policy(haystack, "world", &SearchPolicy::STANDARD);
assert_eq!(results[0].text(haystack), "World");
}
#[test]
fn policy_search_multiple_matches() {
let haystack = "foo bar foo baz foo";
let results = search_with_policy(haystack, "foo", &SearchPolicy::STANDARD);
assert_eq!(results.len(), 3);
assert_eq!(results[0].col_start, 0);
assert_eq!(results[1].col_start, 8);
assert_eq!(results[2].col_start, 16);
}
#[test]
fn policy_search_ranges_always_valid() {
let test_cases = [
("hello world", "o", SearchPolicy::STANDARD),
("CAFÉ café", "café", SearchPolicy::STANDARD),
("你好世界", "世", SearchPolicy::CJK),
("─→text", "text", SearchPolicy::CJK),
("\u{FB01}le", "file", SearchPolicy::STANDARD),
("e\u{0301}", "\u{00E9}", SearchPolicy::EXACT_NFC),
];
for (haystack, needle, policy) in &test_cases {
let results = search_with_policy(haystack, needle, policy);
for r in &results {
assert!(
r.range.start <= r.range.end,
"Invalid range for '{needle}' in '{haystack}'"
);
assert!(
r.range.end <= haystack.len(),
"Out of bounds for '{needle}' in '{haystack}'"
);
assert!(
haystack.is_char_boundary(r.range.start),
"Not char boundary at start"
);
assert!(
haystack.is_char_boundary(r.range.end),
"Not char boundary at end"
);
assert!(
r.col_start <= r.col_end,
"col_start > col_end for '{needle}' in '{haystack}'"
);
}
}
}
#[test]
fn policy_search_columns_monotonically_increasing() {
let haystack = "aa bb aa cc aa";
let results = search_with_policy(haystack, "aa", &SearchPolicy::STANDARD);
assert_eq!(results.len(), 3);
for w in results.windows(2) {
assert!(
w[0].col_end <= w[1].col_start,
"Non-overlapping matches should have monotonically increasing columns"
);
}
}
#[test]
fn policy_custom_nfd_case_sensitive() {
let policy = SearchPolicy {
norm_form: NormForm::Nfd,
case_insensitive: false,
width_mode: WidthMode::Standard,
};
let haystack = "\u{00E9}"; let needle = "e\u{0301}"; let results = search_with_policy(haystack, needle, &policy);
assert_eq!(results.len(), 1, "NFD should match decomposed forms");
}
#[test]
fn policy_custom_nfkd_case_insensitive() {
let policy = SearchPolicy {
norm_form: NormForm::Nfkd,
case_insensitive: true,
width_mode: WidthMode::CjkAmbiguousWide,
};
let haystack = "\u{FB01}";
let results = search_with_policy(haystack, "FI", &policy);
assert!(!results.is_empty(), "NFKD + CI should match fi ligature");
}
#[test]
fn policy_search_agrees_with_search_case_insensitive() {
let test_cases = [
("Hello World HELLO", "hello"),
("CAFÉ café Café", "café"),
("\u{FF28}\u{FF25}\u{FF2C}\u{FF2C}\u{FF2F}", "hello"),
];
for (haystack, needle) in &test_cases {
let old = search_case_insensitive(haystack, needle);
let new = search_with_policy(haystack, needle, &SearchPolicy::STANDARD);
assert_eq!(
old.len(),
new.len(),
"Match count mismatch for '{needle}' in '{haystack}'"
);
for (o, n) in old.iter().zip(new.iter()) {
assert_eq!(
o.range, n.range,
"Byte range mismatch for '{needle}' in '{haystack}'"
);
}
}
}
#[test]
fn policy_search_agrees_with_search_normalized() {
let haystack = "caf\u{0065}\u{0301} résumé";
let needle = "caf\u{00E9}";
let old = search_normalized(haystack, needle, NormForm::Nfc);
let new = search_with_policy(haystack, needle, &SearchPolicy::EXACT_NFC);
assert_eq!(old.len(), new.len());
for (o, n) in old.iter().zip(new.iter()) {
assert_eq!(o.range, n.range);
}
}
}