1#![doc = include_str!("../README.md")]
2#![no_std]
3mod rfc3454;
26
27#[cfg(feature = "alloc")]
28extern crate alloc;
29#[cfg(feature = "alloc")]
30use alloc::string::String;
31
32use core::iter::{Filter, FlatMap, FusedIterator, Iterator, Map};
33use core::slice::Iter;
34use core::str::Chars;
35use unicode_normalization::{Recompositions, UnicodeNormalization};
36
37const REPLACEMENT_CHARACTER: char = '\u{FFFD}';
38
39#[inline]
50const fn is_unassigned(c: char) -> bool {
51 c >= '\u{30000}' && c <= '\u{DFFFF}'
52}
53
54#[inline]
56const fn is_private_use(c: char) -> bool {
57 matches!(c, '\u{E000}'..='\u{F8FF}' | '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}')
58}
59
60#[inline]
62const fn is_non_char(c: char) -> bool {
63 let bottom_nybble = c as u32 & 0xFFFF;
64 if bottom_nybble >= 0xFFFE && bottom_nybble <= 0xFFFF {
65 return true;
66 }
67 matches!(c, '\u{FDD0}'..='\u{FDEF}')
68}
69
70#[inline]
73fn x520_mapped_to_something(c: &char) -> bool {
74 match *c {
75 '\u{00AD}'
76 | '\u{1806}'
77 | '\u{034F}'
78 | '\u{180B}'..='\u{180D}'
79 | '\u{FE00}'..='\u{FE0F}'
80 | '\u{FFFC}'
81 | '\u{200B}' => false,
82 '\u{09}' | '\u{0A}'..='\u{0D}' | '\u{85}' => true,
84 _ => !c.is_control(),
85 }
86}
87
88#[inline]
89fn is_separator(c: char) -> bool {
90 match c {
91 | '\u{20}' | '\u{a0}' | '\u{2028}' | '\u{2029}' | '\u{1680}' | '\u{2000}'..='\u{200a}' | '\u{202f}' | '\u{205f}' | '\u{3000}' => true,
101 _ => false,
102 }
103}
104
105#[inline]
106fn x520_map(c: char) -> char {
107 match c {
108 '\u{09}' | '\u{0A}'..='\u{0D}' | '\u{85}' => ' ',
109 c => {
110 if is_separator(c) {
111 ' '
112 } else {
113 c
114 }
115 }
116 }
117}
118
119#[inline]
121fn case_fold_for_nfkc(c: char) -> CaseFoldForNfkc {
122 let inner = match rfc3454::B_2.binary_search_by_key(&c, |e| e.0) {
123 Ok(idx) => FoldInner::Chars(rfc3454::B_2[idx].1.chars()),
124 Err(_) => FoldInner::Char(Some(c)),
125 };
126 CaseFoldForNfkc(inner)
127}
128
129enum FoldInner {
130 Chars(Chars<'static>),
131 Char(Option<char>),
132}
133
134struct CaseFoldForNfkc(FoldInner);
136
137impl Iterator for CaseFoldForNfkc {
138 type Item = char;
139
140 fn next(&mut self) -> Option<char> {
141 match self.0 {
142 FoldInner::Chars(ref mut it) => it.next(),
143 FoldInner::Char(ref mut ch) => ch.take(),
144 }
145 }
146}
147
148impl FusedIterator for CaseFoldForNfkc {}
149
150pub struct X520CaseExactStringPrepChars<I>
151 where I: Iterator<Item = char> {
152 s: Recompositions<Map<Filter<I, fn(&char) -> bool>, fn(char) -> char>>,
153 previous_was_space: bool,
154}
155
156impl<I> X520CaseExactStringPrepChars<I>
157 where I: Iterator<Item = char> {
158 pub fn new(s: I) -> Self {
159 X520CaseExactStringPrepChars {
160 previous_was_space: false,
161 s: s
162 .filter(x520_mapped_to_something as fn(&char) -> bool)
163 .map(x520_map as fn(_) -> _)
164 .nfkc(),
165 }
166 }
167}
168
169impl<I> Iterator for X520CaseExactStringPrepChars<I>
170 where I: Iterator<Item = char> {
171 type Item = Result<char, char>;
173
174 fn next(&mut self) -> Option<Self::Item> {
175 while let Some(c) = self.s.next() {
176 if c == ' ' {
177 if self.previous_was_space == true {
178 continue;
179 } else {
180 self.previous_was_space = true;
181 return Some(Ok(' '));
182 }
183 }
184 self.previous_was_space = false;
185 if is_unassigned(c) || is_private_use(c) || is_non_char(c) || c == '\u{FFFD}' {
188 return Some(Err(c));
190 }
191 return Some(Ok(c));
192 }
193 None
194 }
195}
196
197impl<I> FusedIterator for X520CaseExactStringPrepChars<I>
198 where I: Iterator<Item = char> {
199}
200
201pub struct X520CaseIgnoreStringPrepChars<I>
202 where I: Iterator<Item = char> {
203 s: Recompositions<
204 FlatMap<
205 Map<Filter<I, fn(&char) -> bool>, fn(char) -> char>,
206 CaseFoldForNfkc,
207 fn(char) -> CaseFoldForNfkc,
208 >,
209 >,
210 previous_was_space: bool,
211}
212
213impl<I> FusedIterator for X520CaseIgnoreStringPrepChars<I>
214 where I: Iterator<Item = char> {
215}
216
217impl<I> X520CaseIgnoreStringPrepChars<I>
218 where I: Iterator<Item = char> {
219 pub fn new(s: I) -> Self {
220 X520CaseIgnoreStringPrepChars {
221 previous_was_space: false,
222 s: s
223 .filter(x520_mapped_to_something as fn(&char) -> bool)
224 .map(x520_map as fn(_) -> _)
225 .flat_map(case_fold_for_nfkc as fn(_) -> _)
226 .nfkc(),
227 }
228 }
229}
230
231impl<I> Iterator for X520CaseIgnoreStringPrepChars<I>
232 where I: Iterator<Item = char> {
233 type Item = Result<char, char>;
235
236 fn next(&mut self) -> Option<Self::Item> {
237 while let Some(c) = self.s.next() {
238 if c == ' ' {
239 if self.previous_was_space == true {
240 continue;
241 } else {
242 self.previous_was_space = true;
243 return Some(Ok(' '));
244 }
245 }
246 self.previous_was_space = false;
247 if is_unassigned(c) || is_private_use(c) || is_non_char(c) || c == '\u{FFFD}' {
250 return Some(Err(c));
252 }
253 return Some(Ok(c));
254 }
255 None
256 }
257}
258
259#[inline]
263pub fn x520_stringprep_case_exact_str<'a>(s: &'a str) -> X520CaseExactStringPrepChars<Chars<'a>> {
264 X520CaseExactStringPrepChars::new(s.chars())
265}
266
267#[inline]
271pub fn x520_stringprep_case_ignore_str<'a>(s: &'a str) -> X520CaseIgnoreStringPrepChars<Chars<'a>> {
272 X520CaseIgnoreStringPrepChars::new(s.chars())
273}
274
275#[inline]
279pub fn x520_stringprep_case_exact_bmp<'a>(s: &'a [u16]) -> X520CaseExactStringPrepChars<Map<Iter<'a, u16>, fn(&u16) -> char>> {
280 let it: Map<Iter<'a, u16>, fn(&u16) -> char> = s
281 .iter()
282 .map(|c| char::from_u32(*c as u32).unwrap_or(REPLACEMENT_CHARACTER));
283 X520CaseExactStringPrepChars::new(it)
284}
285
286#[inline]
290pub fn x520_stringprep_case_ignore_bmp<'a>(s: &'a [u16]) -> X520CaseIgnoreStringPrepChars<Map<Iter<'a, u16>, fn(&u16) -> char>> {
291 let it: Map<Iter<'a, u16>, fn(&u16) -> char> = s
292 .iter()
293 .map(|c| char::from_u32(*c as u32).unwrap_or(REPLACEMENT_CHARACTER));
294 X520CaseIgnoreStringPrepChars::new(it)
295}
296
297#[inline]
301pub fn x520_stringprep_case_exact_univ_str<'a>(s: &'a [u32]) -> X520CaseExactStringPrepChars<Map<Iter<'a, u32>, fn(&u32) -> char>> {
302 let it: Map<Iter<'a, u32>, fn(&u32) -> char> = s
303 .iter()
304 .map(|c| char::from_u32(*c as u32).unwrap_or(REPLACEMENT_CHARACTER));
305 X520CaseExactStringPrepChars::new(it)
306}
307
308#[inline]
312pub fn x520_stringprep_case_ignore_univ_str<'a>(s: &'a [u32]) -> X520CaseIgnoreStringPrepChars<Map<Iter<'a, u32>, fn(&u32) -> char>> {
313 let it: Map<Iter<'a, u32>, fn(&u32) -> char> = s
314 .iter()
315 .map(|c| char::from_u32(*c).unwrap_or(REPLACEMENT_CHARACTER));
316 X520CaseIgnoreStringPrepChars::new(it)
317}
318
319pub fn is_x520_stringprepped_case_exact_str(s: &str) -> bool {
323 let mut chars = s.chars();
324 let mut it = x520_stringprep_case_exact_str(s);
325 while let Some(c) = it.next() {
326 if c.is_err() {
327 return false;
328 }
329 if chars.next() != Some(c.unwrap()) {
330 return false;
331 }
332 }
333 true
334}
335
336pub fn is_x520_stringprepped_case_ignore_str(s: &str) -> bool {
340 let mut chars = s.chars();
341 let mut it = x520_stringprep_case_ignore_str(s);
342 while let Some(c) = it.next() {
343 if c.is_err() {
344 return false;
345 }
346 if chars.next() != Some(c.unwrap()) {
347 return false;
348 }
349 }
350 true
351}
352
353#[cfg(feature = "alloc")]
358#[inline]
359pub fn x520_stringprep_to_case_exact_string(s: &str) -> Result<String, char> {
360 x520_stringprep_case_exact_str(s).collect()
361}
362
363#[cfg(feature = "alloc")]
368#[inline]
369pub fn x520_stringprep_to_case_ignore_string(s: &str) -> Result<String, char> {
370 x520_stringprep_case_ignore_str(s).collect()
371}
372
373#[inline]
377pub fn x520_stringprep_case_exact_compare(s1: &str, s2: &str) -> bool {
378 x520_stringprep_case_exact_str(s1).eq(x520_stringprep_case_exact_str(s2))
379}
380
381#[inline]
385pub fn x520_stringprep_case_ignore_compare(s1: &str, s2: &str) -> bool {
386 x520_stringprep_case_ignore_str(s1).eq(x520_stringprep_case_ignore_str(s2))
387}
388
389#[cfg(test)]
390mod tests {
391 use super::{
392 x520_stringprep_case_exact_str,
393 x520_stringprep_case_ignore_str,
394 x520_stringprep_case_exact_bmp,
395 x520_stringprep_case_exact_univ_str,
396 };
397 extern crate alloc;
398 use alloc::string::String;
399 use alloc::vec::Vec;
400
401 #[test]
402 fn test_case_exact_stringprep_1() {
403 let input = "Jonathan Wilbur";
404 let output: String = x520_stringprep_case_exact_str(input)
405 .map(|maybe_c| maybe_c.unwrap())
406 .collect();
407 assert_eq!(output.as_str(), "Jonathan Wilbur");
408 }
409
410 #[test]
411 fn test_nfkc_normalization() {
412 let input = "e\u{0301}"; let output: String = x520_stringprep_case_exact_str(input)
415 .map(|maybe_c| maybe_c.unwrap())
416 .collect();
417 assert_eq!(output, "é"); let input = "e\u{0301}\u{0300}"; let output: String = x520_stringprep_case_exact_str(input)
422 .map(|maybe_c| maybe_c.unwrap())
423 .collect();
424 assert_eq!(output, "é\u{0300}"); }
426
427 #[test]
428 fn test_whitespace_mapping() {
429 let input = "Hello\tWorld\nTest\r\nSpace";
431 let output: String = x520_stringprep_case_exact_str(input)
432 .map(|maybe_c| maybe_c.unwrap())
433 .collect();
434 assert_eq!(output, "Hello World Test Space");
435
436 let input = "Hello\u{2000}World\u{2001}Test\u{2002}Space"; let output: String = x520_stringprep_case_exact_str(input)
439 .map(|maybe_c| maybe_c.unwrap())
440 .collect();
441 assert_eq!(output, "Hello World Test Space");
442
443 let input = "Hello\t\u{2000}World\n\u{2001}Test\r\u{2002}Space";
445 let output: String = x520_stringprep_case_exact_str(input)
446 .map(|maybe_c| maybe_c.unwrap())
447 .collect();
448 assert_eq!(output, "Hello World Test Space");
449 }
450
451 #[test]
452 fn test_space_consolidation() {
453 let input = "Hello World";
455 let output: String = x520_stringprep_case_exact_str(input)
456 .map(|maybe_c| maybe_c.unwrap())
457 .collect();
458 assert_eq!(output, "Hello World");
459
460 let input = "Hello\t\t\n\n\r\rWorld";
462 let output: String = x520_stringprep_case_exact_str(input)
463 .map(|maybe_c| maybe_c.unwrap())
464 .collect();
465 assert_eq!(output, "Hello World");
466
467 let input = "Hello\u{2000}\u{2001}\u{2002}World";
469 let output: String = x520_stringprep_case_exact_str(input)
470 .map(|maybe_c| maybe_c.unwrap())
471 .collect();
472 assert_eq!(output, "Hello World");
473
474 let input = "Hello \t\u{2000}\nWorld";
476 let output: String = x520_stringprep_case_exact_str(input)
477 .map(|maybe_c| maybe_c.unwrap())
478 .collect();
479 assert_eq!(output, "Hello World");
480 }
481
482 #[test]
483 fn test_leading_trailing_spaces() {
484 let input = " Hello World";
486 let output: String = x520_stringprep_case_exact_str(input)
487 .map(|maybe_c| maybe_c.unwrap())
488 .collect();
489 assert_eq!(output, " Hello World");
490
491 let input = "Hello World ";
493 let output: String = x520_stringprep_case_exact_str(input)
494 .map(|maybe_c| maybe_c.unwrap())
495 .collect();
496 assert_eq!(output, "Hello World ");
497
498 let input = " Hello World ";
500 let output: String = x520_stringprep_case_exact_str(input)
501 .map(|maybe_c| maybe_c.unwrap())
502 .collect();
503 assert_eq!(output, " Hello World ");
504 }
505
506 #[test]
507 fn test_prohibited_characters() {
508 let input = "Hello\u{E000}World"; let result: Result<String, char> = x520_stringprep_case_exact_str(input).collect();
518 assert!(result.is_err());
519 assert_eq!(result.unwrap_err(), '\u{E000}');
520
521 let input = "Hello\u{FDD0}World"; let result: Result<String, char> = x520_stringprep_case_exact_str(input).collect();
524 assert!(result.is_err());
525 assert_eq!(result.unwrap_err(), '\u{FDD0}');
526
527 let input = "Hello\u{FFFD}World";
529 let result: Result<String, char> = x520_stringprep_case_exact_str(input).collect();
530 assert!(result.is_err());
531 assert_eq!(result.unwrap_err(), '\u{FFFD}');
532 }
533
534 #[test]
535 fn test_control_characters() {
536 let input = "Hello\u{0009}World"; let output: String = x520_stringprep_case_exact_str(input)
539 .map(|maybe_c| maybe_c.unwrap())
540 .collect();
541 assert_eq!(output, "Hello World");
542
543 let input = "Hello\u{000A}World";
545 let output: String = x520_stringprep_case_exact_str(input)
546 .map(|maybe_c| maybe_c.unwrap())
547 .collect();
548 assert_eq!(output, "Hello World");
549
550 let input = "Hello\u{000D}World";
552 let output: String = x520_stringprep_case_exact_str(input)
553 .map(|maybe_c| maybe_c.unwrap())
554 .collect();
555 assert_eq!(output, "Hello World");
556
557 let input = "Hello\u{0085}World";
559 let output: String = x520_stringprep_case_exact_str(input)
560 .map(|maybe_c| maybe_c.unwrap())
561 .collect();
562 assert_eq!(output, "Hello World");
563 }
564
565 #[test]
566 fn test_filtered_characters() {
567 let input = "Hello\u{00AD}World"; let output: String = x520_stringprep_case_exact_str(input)
570 .map(|maybe_c| maybe_c.unwrap())
571 .collect();
572 assert_eq!(output, "HelloWorld");
573
574 let input = "Hello\u{200B}World";
576 let output: String = x520_stringprep_case_exact_str(input)
577 .map(|maybe_c| maybe_c.unwrap())
578 .collect();
579 assert_eq!(output, "HelloWorld");
580
581 let input = "Hello\u{FFFC}World";
583 let output: String = x520_stringprep_case_exact_str(input)
584 .map(|maybe_c| maybe_c.unwrap())
585 .collect();
586 assert_eq!(output, "HelloWorld");
587 }
588
589 #[test]
590 fn test_complex_normalization() {
591 let input = " Hello\te\u{0301}\u{2000}Ä\u{FB03}n ";
593 let output: String = x520_stringprep_case_exact_str(input)
594 .map(|maybe_c| maybe_c.unwrap())
595 .collect();
596 assert_eq!(output, " Hello é Äffin ");
597
598 let output: String = x520_stringprep_case_ignore_str(input)
599 .map(|maybe_c| maybe_c.unwrap())
600 .collect();
601 assert_eq!(output, " hello é äffin ");
602 }
603
604 #[test]
605 fn test_empty_string() {
606 let input = "";
607 let output: String = x520_stringprep_case_exact_str(input)
608 .map(|maybe_c| maybe_c.unwrap())
609 .collect();
610 assert_eq!(output, "");
611
612 let output: String = x520_stringprep_case_ignore_str(input)
613 .map(|maybe_c| maybe_c.unwrap())
614 .collect();
615 assert_eq!(output, "");
616 }
617
618 #[test]
619 fn test_only_spaces() {
620 let input = " \t\n\r ";
621 let output: String = x520_stringprep_case_exact_str(input)
622 .map(|maybe_c| maybe_c.unwrap())
623 .collect();
624 assert_eq!(output, " ");
625
626 let output: String = x520_stringprep_case_ignore_str(input)
627 .map(|maybe_c| maybe_c.unwrap())
628 .collect();
629 assert_eq!(output, " ");
630 }
631
632 #[test]
633 fn test_case_ignore_stringprep_1() {
634 let input = "Jonathan Wilbur";
635 let output: String = x520_stringprep_case_ignore_str(input)
636 .map(|maybe_c| maybe_c.unwrap())
637 .collect();
638 assert_eq!(output.as_str(), "jonathan wilbur");
639 }
640
641 #[test]
643 fn test_bmp_string_1() {
644 let input: Vec<u16> = "Jonathan Wilbur".encode_utf16().collect();
645 let output = x520_stringprep_case_exact_bmp(input.as_slice()).collect::<Result<String, char>>().unwrap();
646 assert_eq!(output.as_str(), "Jonathan Wilbur");
647 }
648
649 #[test]
651 fn test_univ_string_1() {
652 let input: Vec<u32> = "Jonathan Wilbur".chars().map(|c| c as u32).collect();
653 let output = x520_stringprep_case_exact_univ_str(input.as_slice()).collect::<Result<String, char>>().unwrap();
654 assert_eq!(output.as_str(), "Jonathan Wilbur");
655 }
656
657}