1use alloc::string::String;
10use alloc::vec::Vec;
11
12use crate::casefold::{self, CaseFoldMode};
13use crate::confusable;
14
15#[derive(Clone, Copy, Debug, PartialEq, Eq)]
17pub struct MatchingOptions {
18 pub case_fold: CaseFoldMode,
20}
21
22impl Default for MatchingOptions {
23 fn default() -> Self {
24 MatchingOptions {
25 case_fold: CaseFoldMode::Standard,
26 }
27 }
28}
29
30pub fn normalize_for_matching(input: &str, opts: &MatchingOptions) -> String {
60 if input.is_empty() {
61 return String::new();
62 }
63
64 let mut current = one_pass(input, opts);
68 for _ in 0..3 {
69 let next = one_pass(¤t, opts);
70 if next == current {
71 return current;
72 }
73 current = next;
74 }
75 current
76}
77
78fn one_pass(input: &str, opts: &MatchingOptions) -> String {
89 let nfkc = crate::nfkc().normalize(input);
90 let folded = casefold::casefold(&nfkc, opts.case_fold);
91 let skel = confusable::skeleton(&folded);
92 let final_folded = casefold::casefold(&skel, opts.case_fold);
93 final_folded.into_owned()
94}
95
96#[cfg(any(test, feature = "internal-test-api"))]
99pub fn normalize_for_matching_legacy(input: &str, opts: &MatchingOptions) -> String {
100 if input.is_empty() {
101 return String::new();
102 }
103 let mut current = one_pass_legacy(input, opts);
104 for _ in 0..3 {
105 let next = one_pass_legacy(¤t, opts);
106 if next == current {
107 return current;
108 }
109 current = next;
110 }
111 current
112}
113
114#[cfg(any(test, feature = "internal-test-api"))]
116fn one_pass_legacy(input: &str, opts: &MatchingOptions) -> String {
117 let nfkc = crate::nfkc().normalize(input);
118 let folded = casefold::casefold(&nfkc, opts.case_fold);
119 let skel = confusable::skeleton(&folded);
120 let final_folded = casefold::casefold(&skel, opts.case_fold);
121 final_folded.into_owned()
122}
123
124pub fn normalize_for_matching_utf16(input: &str, opts: &MatchingOptions) -> Vec<u16> {
128 normalize_for_matching(input, opts).encode_utf16().collect()
129}
130
131pub fn matches_normalized(a: &str, b: &str, opts: &MatchingOptions) -> bool {
150 if a == b {
152 return true;
153 }
154 normalize_for_matching(a, opts) == normalize_for_matching(b, opts)
155}
156
157#[cfg(test)]
158mod tests {
159 use super::*;
160
161 fn default_opts() -> MatchingOptions {
162 MatchingOptions::default()
163 }
164
165 fn turkish_opts() -> MatchingOptions {
166 MatchingOptions {
167 case_fold: CaseFoldMode::Turkish,
168 }
169 }
170
171 #[test]
174 fn empty_input() {
175 assert_eq!(normalize_for_matching("", &default_opts()), "");
176 }
177
178 #[test]
179 fn ascii_lowercase_unchanged() {
180 let result = normalize_for_matching("hello", &default_opts());
181 assert!(!result.is_empty());
182 }
183
184 #[test]
185 fn identical_strings_match() {
186 assert!(matches_normalized("test", "test", &default_opts()));
187 }
188
189 #[test]
190 fn different_strings_dont_match() {
191 assert!(!matches_normalized("hello", "world", &default_opts()));
192 }
193
194 #[test]
197 fn case_insensitive_ascii() {
198 let opts = default_opts();
199 assert!(matches_normalized("File", "file", &opts));
200 assert!(matches_normalized("FILE", "file", &opts));
201 assert!(matches_normalized("FiLe", "file", &opts));
202 }
203
204 #[test]
205 fn case_insensitive_extended() {
206 let opts = default_opts();
207 assert!(matches_normalized("Ströme", "ströme", &opts));
209 }
210
211 #[test]
214 fn confusable_latin_cyrillic_a() {
215 let opts = default_opts();
216 assert!(matches_normalized("a", "\u{0430}", &opts));
218 }
219
220 #[test]
221 fn confusable_latin_cyrillic_word() {
222 let opts = default_opts();
223 let latin = "apple";
226 let mixed = "\u{0430}\u{0440}\u{0440}l\u{0435}";
227 assert!(matches_normalized(latin, mixed, &opts));
228 }
229
230 #[test]
233 fn file_variants_all_match() {
234 let opts = default_opts();
235 let canonical = normalize_for_matching("file", &opts);
236
237 assert_eq!(normalize_for_matching("File", &opts), canonical);
239 assert_eq!(normalize_for_matching("FILE", &opts), canonical);
240
241 let fıle = "f\u{0131}le";
245 assert!(
246 matches_normalized("file", fıle, &opts),
247 "'file' and 'fıle' should match: file={:?}, fıle={:?}",
248 normalize_for_matching("file", &opts),
249 normalize_for_matching(fıle, &opts),
250 );
251 }
252
253 #[test]
254 fn file_mixed_case_and_confusable() {
255 let opts = default_opts();
256 let input = "F\u{0131}LE";
258 assert!(
259 matches_normalized("file", input, &opts),
260 "'file' and 'FıLE' should match: file={:?}, FıLE={:?}",
261 normalize_for_matching("file", &opts),
262 normalize_for_matching(input, &opts),
263 );
264 }
265
266 #[test]
269 fn nfkc_fullwidth() {
270 let opts = default_opts();
271 let fullwidth_a = "\u{FF21}";
273 assert!(matches_normalized(fullwidth_a, "a", &opts));
274 }
275
276 #[test]
277 fn nfkc_superscript() {
278 let opts = default_opts();
279 assert_eq!(
281 normalize_for_matching("\u{00B2}", &opts),
282 normalize_for_matching("2", &opts),
283 );
284 }
285
286 #[test]
289 fn turkish_mode_dotless_i() {
290 let opts = turkish_opts();
291 let a = normalize_for_matching("Istanbul", &opts);
294 let b = normalize_for_matching("\u{0131}stanbul", &opts);
295 assert_eq!(a, b);
296 }
297
298 #[test]
299 fn turkish_mode_dotted_i() {
300 let opts = turkish_opts();
301 assert!(matches_normalized("\u{0130}stanbul", "istanbul", &opts));
303 }
304
305 #[test]
308 fn utf16_encoding() {
309 let opts = default_opts();
310 let utf16 = normalize_for_matching_utf16("hello", &opts);
311 assert!(!utf16.is_empty());
312 let decoded = String::from_utf16(&utf16).expect("valid UTF-16");
314 assert_eq!(decoded, normalize_for_matching("hello", &opts));
315 }
316
317 #[test]
318 fn utf16_supplementary() {
319 let opts = default_opts();
320 let utf16 = normalize_for_matching_utf16("\u{1F600}", &opts);
322 assert!(!utf16.is_empty());
323 let decoded = String::from_utf16(&utf16).expect("valid UTF-16");
324 assert_eq!(decoded, normalize_for_matching("\u{1F600}", &opts));
325 }
326
327 #[test]
330 fn matching_idempotent() {
331 let opts = default_opts();
332 let inputs = [
333 "hello",
334 "File",
335 "\u{0430}\u{0440}\u{0440}l\u{0435}",
336 "\u{00C0}",
337 ];
338 for input in &inputs {
339 let once = normalize_for_matching(input, &opts);
340 let twice = normalize_for_matching(&once, &opts);
341 assert_eq!(
342 once, twice,
343 "normalize_for_matching should be idempotent for {:?}",
344 input
345 );
346 }
347 }
348
349 #[test]
350 fn matching_not_confusable_different_words() {
351 let opts = default_opts();
352 assert!(!matches_normalized("hello", "world", &opts));
353 assert!(!matches_normalized("file", "pile", &opts));
354 }
355}