feruca/
ascii.rs

1use std::cmp::Ordering;
2
3pub fn fill_and_check(
4    a_iter: &mut impl Iterator<Item = u32>,
5    b_iter: &mut impl Iterator<Item = u32>,
6    a_chars: &mut Vec<u32>,
7    b_chars: &mut Vec<u32>,
8) -> Option<Ordering> {
9    let mut backup: Option<Ordering> = None;
10    let mut bad = false;
11
12    #[allow(clippy::while_let_loop)]
13    loop {
14        let Some(a) = a_iter.next() else { break }; // Break if iterator exhausted
15        a_chars.push(a);
16
17        if !ascii_alphanumeric(a) {
18            bad = true;
19            break; // Break and set `bad` if non-ASCII character found
20        }
21
22        let Some(b) = b_iter.next() else { break }; // Break if iterator exhausted
23        b_chars.push(b);
24
25        if !ascii_alphanumeric(b) {
26            bad = true;
27            break; // Break and set `bad` if non-ASCII character found
28        }
29
30        if a == b {
31            continue; // Continue if we found identical ASCII characters
32        }
33
34        let a_folded = if a > 0x5A { a - 0x20 } else { a };
35        let b_folded = if b > 0x5A { b - 0x20 } else { b };
36
37        // This means the characters differ only in case (since they weren't equal before folding)
38        if a_folded == b_folded {
39            if backup.is_none() {
40                // The backup value will be set only once, i.e., at the first case difference. We
41                // compare the characters in reverse order here because ASCII has uppercase letters
42                // before lowercase, but we need the opposite for Unicode collation.
43                backup = Some(b.cmp(&a));
44            }
45
46            continue;
47        }
48
49        // We found a difference between ASCII characters; return it
50        return Some(a_folded.cmp(&b_folded));
51    }
52
53    // Finish filling code point Vecs
54    a_chars.extend(a_iter);
55    b_chars.extend(b_iter);
56
57    if bad {
58        return None;
59    }
60
61    // If we found no non-ASCII characters, and one string is a prefix of the other, the longer
62    // string wins.
63    if a_chars.len() != b_chars.len() {
64        return Some(a_chars.len().cmp(&b_chars.len()));
65    }
66
67    // If we found an ASCII case difference, return it; otherwise this will be None
68    backup
69}
70
71fn ascii_alphanumeric(c: u32) -> bool {
72    (0x30..=0x7A).contains(&c)
73        && !(0x3A..=0x40).contains(&c) // Punctuation and symbols
74        && !(0x5B..=0x60).contains(&c) // More symbols
75}