feruca/ascii.rs
1use std::cmp::Ordering;
2
3pub fn fill_and_check(
4 a_iter: &mut impl Iterator<Item = u32>,
5 b_iter: &mut impl Iterator<Item = u32>,
6 a_chars: &mut Vec<u32>,
7 b_chars: &mut Vec<u32>,
8) -> Option<Ordering> {
9 let mut backup: Option<Ordering> = None;
10 let mut bad = false;
11
12 #[allow(clippy::while_let_loop)]
13 loop {
14 let Some(a) = a_iter.next() else { break }; // Break if iterator exhausted
15 a_chars.push(a);
16
17 if !ascii_alphanumeric(a) {
18 bad = true;
19 break; // Break and set `bad` if non-ASCII character found
20 }
21
22 let Some(b) = b_iter.next() else { break }; // Break if iterator exhausted
23 b_chars.push(b);
24
25 if !ascii_alphanumeric(b) {
26 bad = true;
27 break; // Break and set `bad` if non-ASCII character found
28 }
29
30 if a == b {
31 continue; // Continue if we found identical ASCII characters
32 }
33
34 let a_folded = if a > 0x5A { a - 0x20 } else { a };
35 let b_folded = if b > 0x5A { b - 0x20 } else { b };
36
37 // This means the characters differ only in case (since they weren't equal before folding)
38 if a_folded == b_folded {
39 if backup.is_none() {
40 // The backup value will be set only once, i.e., at the first case difference. We
41 // compare the characters in reverse order here because ASCII has uppercase letters
42 // before lowercase, but we need the opposite for Unicode collation.
43 backup = Some(b.cmp(&a));
44 }
45
46 continue;
47 }
48
49 // We found a difference between ASCII characters; return it
50 return Some(a_folded.cmp(&b_folded));
51 }
52
53 // Finish filling code point Vecs
54 a_chars.extend(a_iter);
55 b_chars.extend(b_iter);
56
57 if bad {
58 return None;
59 }
60
61 // If we found no non-ASCII characters, and one string is a prefix of the other, the longer
62 // string wins.
63 if a_chars.len() != b_chars.len() {
64 return Some(a_chars.len().cmp(&b_chars.len()));
65 }
66
67 // If we found an ASCII case difference, return it; otherwise this will be None
68 backup
69}
70
71fn ascii_alphanumeric(c: u32) -> bool {
72 (0x30..=0x7A).contains(&c)
73 && !(0x3A..=0x40).contains(&c) // Punctuation and symbols
74 && !(0x5B..=0x60).contains(&c) // More symbols
75}