feruca/ascii.rs
1use std::cmp::Ordering;
2
3pub fn fill_and_check(
4 a_iter: &mut impl Iterator<Item = u32>,
5 b_iter: &mut impl Iterator<Item = u32>,
6 a_chars: &mut Vec<u32>,
7 b_chars: &mut Vec<u32>,
8) -> Option<Ordering> {
9 let mut backup: Option<Ordering> = None;
10 let mut bad = false;
11
12 loop {
13 let Some(a) = a_iter.next() else { break }; // Break if iterator exhausted
14 a_chars.push(a);
15
16 if !ascii_alphanumeric(a) {
17 bad = true;
18 break; // Break and set `bad` if non-ASCII character found
19 }
20
21 let Some(b) = b_iter.next() else { break }; // Break if iterator exhausted
22 b_chars.push(b);
23
24 if !ascii_alphanumeric(b) {
25 bad = true;
26 break; // Break and set `bad` if non-ASCII character found
27 }
28
29 if a == b {
30 continue; // Continue if we found identical ASCII characters
31 }
32
33 let a_folded = if a > 0x5A { a - 0x20 } else { a };
34 let b_folded = if b > 0x5A { b - 0x20 } else { b };
35
36 // This means the characters differ only in case (since they weren't equal before folding)
37 if a_folded == b_folded {
38 if backup.is_none() {
39 // The backup value will be set only once, i.e., at the first case difference. We
40 // compare the characters in reverse order here because ASCII has uppercase letters
41 // before lowercase, but we need the opposite for Unicode collation.
42 backup = Some(b.cmp(&a));
43 }
44
45 continue;
46 }
47
48 // We found a difference between ASCII characters; return it
49 return Some(a_folded.cmp(&b_folded));
50 }
51
52 // Finish filling code point Vecs
53 a_chars.extend(a_iter);
54 b_chars.extend(b_iter);
55
56 if bad {
57 return None;
58 }
59
60 // If we found no non-ASCII characters, and one string is a prefix of the other, the longer
61 // string wins.
62 if a_chars.len() != b_chars.len() {
63 return Some(a_chars.len().cmp(&b_chars.len()));
64 }
65
66 // If we found an ASCII case difference, return it; otherwise this will be None
67 backup
68}
69
70fn ascii_alphanumeric(c: u32) -> bool {
71 (0x30..=0x7A).contains(&c)
72 && !(0x3A..=0x40).contains(&c) // Punctuation and symbols
73 && !(0x5B..=0x60).contains(&c) // More symbols
74}