Skip to main content

coreutils_rs/sort/
compare.rs

1/// Comparison functions for different sort modes.
2/// All comparison functions are allocation-free for maximum sort performance.
3use std::cmp::Ordering;
4
5use super::key::KeyOpts;
6
7/// Strip leading blanks (space and tab).
8#[inline]
9pub fn skip_leading_blanks(s: &[u8]) -> &[u8] {
10    let mut i = 0;
11    while i < s.len() && (s[i] == b' ' || s[i] == b'\t') {
12        i += 1;
13    }
14    &s[i..]
15}
16
17/// Compare two byte slices lexicographically (default sort).
18#[inline]
19pub fn compare_lexical(a: &[u8], b: &[u8]) -> Ordering {
20    a.cmp(b)
21}
22
23/// Numeric sort (-n): compare leading numeric strings.
24/// Handles optional leading whitespace, sign, and decimal point.
25pub fn compare_numeric(a: &[u8], b: &[u8]) -> Ordering {
26    let va = parse_numeric_value(a);
27    let vb = parse_numeric_value(b);
28    va.partial_cmp(&vb).unwrap_or(Ordering::Equal)
29}
30
31pub fn parse_numeric_value(s: &[u8]) -> f64 {
32    let s = skip_leading_blanks(s);
33    if s.is_empty() {
34        return 0.0;
35    }
36    let end = find_numeric_end(s);
37    if end == 0 {
38        return 0.0;
39    }
40    match std::str::from_utf8(&s[..end]) {
41        Ok(num_str) => num_str.parse::<f64>().unwrap_or(0.0),
42        Err(_) => 0.0,
43    }
44}
45
46fn find_numeric_end(s: &[u8]) -> usize {
47    let mut i = 0;
48    if i < s.len() && (s[i] == b'+' || s[i] == b'-') {
49        i += 1;
50    }
51    let mut has_digits = false;
52    while i < s.len() && s[i].is_ascii_digit() {
53        i += 1;
54        has_digits = true;
55    }
56    if i < s.len() && s[i] == b'.' {
57        i += 1;
58        while i < s.len() && s[i].is_ascii_digit() {
59            i += 1;
60            has_digits = true;
61        }
62    }
63    if has_digits { i } else { 0 }
64}
65
66/// General numeric sort (-g): handles scientific notation, infinity, NaN.
67/// O(n) parser.
68pub fn compare_general_numeric(a: &[u8], b: &[u8]) -> Ordering {
69    let va = parse_general_numeric(a);
70    let vb = parse_general_numeric(b);
71    match (va.is_nan(), vb.is_nan()) {
72        (true, true) => Ordering::Equal,
73        (true, false) => Ordering::Less,
74        (false, true) => Ordering::Greater,
75        (false, false) => va.partial_cmp(&vb).unwrap_or(Ordering::Equal),
76    }
77}
78
79pub fn parse_general_numeric(s: &[u8]) -> f64 {
80    let s = skip_leading_blanks(s);
81    let s_str = match std::str::from_utf8(s) {
82        Ok(s) => s.trim(),
83        Err(_) => return f64::NAN,
84    };
85    if s_str.is_empty() {
86        return f64::NAN;
87    }
88    // Try parsing the whole string first (handles "inf", "-inf", "nan", etc.)
89    if let Ok(v) = s_str.parse::<f64>() {
90        return v;
91    }
92
93    // Find the longest valid float prefix in O(n)
94    let bytes = s_str.as_bytes();
95    let mut i = 0;
96
97    // Sign
98    if i < bytes.len() && (bytes[i] == b'+' || bytes[i] == b'-') {
99        i += 1;
100    }
101    // Digits before decimal
102    let mut has_digits = false;
103    while i < bytes.len() && bytes[i].is_ascii_digit() {
104        i += 1;
105        has_digits = true;
106    }
107    // Decimal point
108    if i < bytes.len() && bytes[i] == b'.' {
109        i += 1;
110        while i < bytes.len() && bytes[i].is_ascii_digit() {
111            i += 1;
112            has_digits = true;
113        }
114    }
115    if !has_digits {
116        return f64::NAN;
117    }
118    // Exponent
119    if i < bytes.len() && (bytes[i] == b'e' || bytes[i] == b'E') {
120        let save = i;
121        i += 1;
122        if i < bytes.len() && (bytes[i] == b'+' || bytes[i] == b'-') {
123            i += 1;
124        }
125        if i < bytes.len() && bytes[i].is_ascii_digit() {
126            while i < bytes.len() && bytes[i].is_ascii_digit() {
127                i += 1;
128            }
129        } else {
130            i = save;
131        }
132    }
133
134    s_str[..i].parse::<f64>().unwrap_or(f64::NAN)
135}
136
137/// Human numeric sort (-h): handles suffixes K, M, G, T, P, E, Z, Y.
138pub fn compare_human_numeric(a: &[u8], b: &[u8]) -> Ordering {
139    let va = parse_human_numeric(a);
140    let vb = parse_human_numeric(b);
141    va.partial_cmp(&vb).unwrap_or(Ordering::Equal)
142}
143
144pub fn parse_human_numeric(s: &[u8]) -> f64 {
145    let s = skip_leading_blanks(s);
146    if s.is_empty() {
147        return 0.0;
148    }
149    let end = find_numeric_end(s);
150    let base = if end == 0 {
151        0.0
152    } else {
153        match std::str::from_utf8(&s[..end]) {
154            Ok(num_str) => num_str.parse::<f64>().unwrap_or(0.0),
155            Err(_) => 0.0,
156        }
157    };
158    if end < s.len() {
159        let multiplier = match s[end] {
160            b'K' | b'k' => 1e3,
161            b'M' => 1e6,
162            b'G' => 1e9,
163            b'T' => 1e12,
164            b'P' => 1e15,
165            b'E' => 1e18,
166            b'Z' => 1e21,
167            b'Y' => 1e24,
168            _ => 1.0,
169        };
170        base * multiplier
171    } else {
172        base
173    }
174}
175
176/// Month sort (-M).
177pub fn compare_month(a: &[u8], b: &[u8]) -> Ordering {
178    let ma = parse_month(a);
179    let mb = parse_month(b);
180    ma.cmp(&mb)
181}
182
183fn parse_month(s: &[u8]) -> u8 {
184    let s = skip_leading_blanks(s);
185    if s.len() < 3 {
186        return 0;
187    }
188    let m = [
189        s[0].to_ascii_uppercase(),
190        s[1].to_ascii_uppercase(),
191        s[2].to_ascii_uppercase(),
192    ];
193    match &m {
194        b"JAN" => 1,
195        b"FEB" => 2,
196        b"MAR" => 3,
197        b"APR" => 4,
198        b"MAY" => 5,
199        b"JUN" => 6,
200        b"JUL" => 7,
201        b"AUG" => 8,
202        b"SEP" => 9,
203        b"OCT" => 10,
204        b"NOV" => 11,
205        b"DEC" => 12,
206        _ => 0,
207    }
208}
209
210/// Version sort (-V): natural sort of version numbers.
211pub fn compare_version(a: &[u8], b: &[u8]) -> Ordering {
212    let a_str = std::str::from_utf8(a).unwrap_or("");
213    let b_str = std::str::from_utf8(b).unwrap_or("");
214    compare_version_str(a_str, b_str)
215}
216
217fn compare_version_str(a: &str, b: &str) -> Ordering {
218    let mut ai = a.chars().peekable();
219    let mut bi = b.chars().peekable();
220
221    loop {
222        match (ai.peek(), bi.peek()) {
223            (None, None) => return Ordering::Equal,
224            (None, Some(_)) => return Ordering::Less,
225            (Some(_), None) => return Ordering::Greater,
226            (Some(&ac), Some(&bc)) => {
227                if ac.is_ascii_digit() && bc.is_ascii_digit() {
228                    let anum = consume_number(&mut ai);
229                    let bnum = consume_number(&mut bi);
230                    match anum.cmp(&bnum) {
231                        Ordering::Equal => continue,
232                        other => return other,
233                    }
234                } else {
235                    match ac.cmp(&bc) {
236                        Ordering::Equal => {
237                            ai.next();
238                            bi.next();
239                        }
240                        other => return other,
241                    }
242                }
243            }
244        }
245    }
246}
247
248fn consume_number(iter: &mut std::iter::Peekable<std::str::Chars>) -> u64 {
249    let mut n: u64 = 0;
250    while let Some(&c) = iter.peek() {
251        if c.is_ascii_digit() {
252            n = n.saturating_mul(10).saturating_add(c as u64 - '0' as u64);
253            iter.next();
254        } else {
255            break;
256        }
257    }
258    n
259}
260
261/// Random sort (-R): hash-based shuffle that groups identical keys.
262pub fn compare_random(a: &[u8], b: &[u8], seed: u64) -> Ordering {
263    let ha = fnv1a_hash(a, seed);
264    let hb = fnv1a_hash(b, seed);
265    ha.cmp(&hb)
266}
267
268/// FNV-1a hash with seed mixing.
269#[inline]
270fn fnv1a_hash(data: &[u8], seed: u64) -> u64 {
271    let mut hash = 0xcbf29ce484222325u64 ^ seed;
272    for &b in data {
273        hash ^= b as u64;
274        hash = hash.wrapping_mul(0x100000001b3);
275    }
276    hash
277}
278
279/// Compare with text filtering (-d, -i, -f flags in any combination).
280/// Allocation-free: uses iterator filtering.
281#[inline]
282fn is_dict_char(b: u8) -> bool {
283    b.is_ascii_alphanumeric() || b == b' ' || b == b'\t'
284}
285
286#[inline]
287fn is_printable(b: u8) -> bool {
288    b >= 0x20 && b < 0x7f
289}
290
291fn compare_text_filtered(
292    a: &[u8],
293    b: &[u8],
294    dict: bool,
295    no_print: bool,
296    fold_case: bool,
297) -> Ordering {
298    if !dict && !no_print && !fold_case {
299        return a.cmp(b);
300    }
301
302    let mut ai = a.iter().copied();
303    let mut bi = b.iter().copied();
304
305    loop {
306        let na = next_valid(&mut ai, dict, no_print);
307        let nb = next_valid(&mut bi, dict, no_print);
308        match (na, nb) {
309            (Some(ab), Some(bb)) => {
310                let ca = if fold_case {
311                    ab.to_ascii_uppercase()
312                } else {
313                    ab
314                };
315                let cb = if fold_case {
316                    bb.to_ascii_uppercase()
317                } else {
318                    bb
319                };
320                match ca.cmp(&cb) {
321                    Ordering::Equal => continue,
322                    other => return other,
323                }
324            }
325            (Some(_), None) => return Ordering::Greater,
326            (None, Some(_)) => return Ordering::Less,
327            (None, None) => return Ordering::Equal,
328        }
329    }
330}
331
332#[inline]
333fn next_valid(iter: &mut impl Iterator<Item = u8>, dict: bool, no_print: bool) -> Option<u8> {
334    loop {
335        match iter.next() {
336            None => return None,
337            Some(b) => {
338                if dict && !is_dict_char(b) {
339                    continue;
340                }
341                if no_print && !is_printable(b) {
342                    continue;
343                }
344                return Some(b);
345            }
346        }
347    }
348}
349
350// Public wrappers for backward compatibility with tests
351pub fn compare_ignore_case(a: &[u8], b: &[u8]) -> Ordering {
352    compare_text_filtered(a, b, false, false, true)
353}
354
355pub fn compare_dictionary(a: &[u8], b: &[u8], ignore_case: bool) -> Ordering {
356    compare_text_filtered(a, b, true, false, ignore_case)
357}
358
359pub fn compare_ignore_nonprinting(a: &[u8], b: &[u8], ignore_case: bool) -> Ordering {
360    compare_text_filtered(a, b, false, true, ignore_case)
361}
362
363/// Master comparison function that dispatches based on KeyOpts.
364pub fn compare_with_opts(a: &[u8], b: &[u8], opts: &KeyOpts, random_seed: u64) -> Ordering {
365    let a = if opts.ignore_leading_blanks {
366        skip_leading_blanks(a)
367    } else {
368        a
369    };
370    let b = if opts.ignore_leading_blanks {
371        skip_leading_blanks(b)
372    } else {
373        b
374    };
375
376    let result = if opts.numeric {
377        compare_numeric(a, b)
378    } else if opts.general_numeric {
379        compare_general_numeric(a, b)
380    } else if opts.human_numeric {
381        compare_human_numeric(a, b)
382    } else if opts.month {
383        compare_month(a, b)
384    } else if opts.version {
385        compare_version(a, b)
386    } else if opts.random {
387        compare_random(a, b, random_seed)
388    } else {
389        compare_text_filtered(
390            a,
391            b,
392            opts.dictionary_order,
393            opts.ignore_nonprinting,
394            opts.ignore_case,
395        )
396    };
397
398    if opts.reverse {
399        result.reverse()
400    } else {
401        result
402    }
403}