use crate::fuzzy_matcher::MatchIndices;
use regex::Regex;
use unicode_normalization::UnicodeNormalization;
pub fn normalize_with_char_mapping(s: &str) -> (String, Vec<usize>) {
let mut normalized = String::new();
let mut mapping = Vec::new();
for (orig_char_idx, orig_char) in s.chars().enumerate() {
for decomposed_char in orig_char.nfd() {
if !unicode_normalization::char::is_combining_mark(decomposed_char) {
normalized.push(decomposed_char);
mapping.push(orig_char_idx);
}
}
}
(normalized, mapping)
}
pub fn map_char_indices_to_original(normalized_indices: &[usize], char_mapping: &[usize]) -> MatchIndices {
normalized_indices
.iter()
.filter_map(|&idx| char_mapping.get(idx).copied())
.collect()
}
pub fn normalize_with_byte_mapping(s: &str) -> (String, Vec<usize>) {
let mut normalized = String::new();
let mut byte_mapping = Vec::new();
for (orig_byte_pos, orig_char) in s.char_indices() {
for decomposed_char in orig_char.nfd() {
if !unicode_normalization::char::is_combining_mark(decomposed_char) {
let char_start = normalized.len();
normalized.push(decomposed_char);
for _ in char_start..normalized.len() {
byte_mapping.push(orig_byte_pos);
}
}
}
}
(normalized, byte_mapping)
}
pub fn map_byte_range_to_original(
normalized_start: usize,
normalized_end: usize,
byte_mapping: &[usize],
original_str: &str,
) -> (usize, usize) {
if byte_mapping.is_empty() || normalized_start >= byte_mapping.len() {
return (0, 0);
}
let orig_start = byte_mapping[normalized_start];
let orig_end = if normalized_end > 0 && normalized_end <= byte_mapping.len() {
let last_byte_orig_pos = byte_mapping[normalized_end - 1];
original_str[last_byte_orig_pos..]
.chars()
.next()
.map(|c| last_byte_orig_pos + c.len_utf8())
.unwrap_or(original_str.len())
} else if normalized_end >= byte_mapping.len() {
original_str.len()
} else {
orig_start
};
(orig_start, orig_end)
}
pub fn regex_match(choice: &str, pattern: &Option<Regex>) -> Option<(usize, usize)> {
match *pattern {
Some(ref pat) => {
let mat = pat.find(choice)?;
Some((mat.start(), mat.end()))
}
None => None,
}
}
pub fn contains_upper(string: &str) -> bool {
for ch in string.chars() {
if ch.is_uppercase() {
return true;
}
}
false
}