feruca/
ascii.rs

1use std::cmp::Ordering;
2
3pub fn fill_and_check(
4    a_iter: &mut impl Iterator<Item = u32>,
5    b_iter: &mut impl Iterator<Item = u32>,
6    a_chars: &mut Vec<u32>,
7    b_chars: &mut Vec<u32>,
8) -> Option<Ordering> {
9    let mut backup: Option<Ordering> = None;
10    let mut bad = false;
11
12    loop {
13        let Some(a) = a_iter.next() else { break }; // Break if iterator exhausted
14        a_chars.push(a);
15
16        if !ascii_alphanumeric(a) {
17            bad = true;
18            break; // Break and set `bad` if non-ASCII character found
19        }
20
21        let Some(b) = b_iter.next() else { break }; // Break if iterator exhausted
22        b_chars.push(b);
23
24        if !ascii_alphanumeric(b) {
25            bad = true;
26            break; // Break and set `bad` if non-ASCII character found
27        }
28
29        if a == b {
30            continue; // Continue if we found identical ASCII characters
31        }
32
33        let a_folded = if a > 0x5A { a - 0x20 } else { a };
34        let b_folded = if b > 0x5A { b - 0x20 } else { b };
35
36        // This means the characters differ only in case (since they weren't equal before folding)
37        if a_folded == b_folded {
38            if backup.is_none() {
39                // The backup value will be set only once, i.e., at the first case difference. We
40                // compare the characters in reverse order here because ASCII has uppercase letters
41                // before lowercase, but we need the opposite for Unicode collation.
42                backup = Some(b.cmp(&a));
43            }
44
45            continue;
46        }
47
48        // We found a difference between ASCII characters; return it
49        return Some(a_folded.cmp(&b_folded));
50    }
51
52    // Finish filling code point Vecs
53    a_chars.extend(a_iter);
54    b_chars.extend(b_iter);
55
56    if bad {
57        return None;
58    }
59
60    // If we found no non-ASCII characters, and one string is a prefix of the other, the longer
61    // string wins.
62    if a_chars.len() != b_chars.len() {
63        return Some(a_chars.len().cmp(&b_chars.len()));
64    }
65
66    // If we found an ASCII case difference, return it; otherwise this will be None
67    backup
68}
69
70fn ascii_alphanumeric(c: u32) -> bool {
71    (0x30..=0x7A).contains(&c)
72        && !(0x3A..=0x40).contains(&c) // Punctuation and symbols
73        && !(0x5B..=0x60).contains(&c) // More symbols
74}