Skip to main content

coreutils_rs/sort/
compare.rs

1/// Comparison functions for different sort modes.
2use std::cmp::Ordering;
3
4use super::key::KeyOpts;
5
6/// Strip leading blanks (space and tab).
7#[inline(always)]
8pub fn skip_leading_blanks(s: &[u8]) -> &[u8] {
9    let mut i = 0;
10    while i < s.len()
11        && (unsafe { *s.get_unchecked(i) } == b' ' || unsafe { *s.get_unchecked(i) } == b'\t')
12    {
13        i += 1;
14    }
15    &s[i..]
16}
17
18/// Compare two byte slices using locale-aware collation (strcoll).
19/// Uses stack buffers (up to 256 bytes) to avoid heap allocation in the hot path.
20/// Falls back to heap-allocated CString for longer strings, and to byte comparison
21/// if the input contains interior null bytes.
22#[inline]
23pub fn compare_locale(a: &[u8], b: &[u8]) -> Ordering {
24    // Stack buffer size for null-terminated copies. Most sort keys are < 256 bytes.
25    const STACK_BUF: usize = 256;
26
27    // Fast path: if either contains a null byte, fall back to byte comparison
28    // (CString can't represent interior nulls)
29    if memchr::memchr(0, a).is_some() || memchr::memchr(0, b).is_some() {
30        return a.cmp(b);
31    }
32
33    if a.len() < STACK_BUF && b.len() < STACK_BUF {
34        // Stack-only path: no heap allocation.
35        // Zero-init provides the null terminator after copy_nonoverlapping.
36        let mut buf_a = [0u8; STACK_BUF];
37        let mut buf_b = [0u8; STACK_BUF];
38        // SAFETY: a.len() < STACK_BUF (checked above), no interior NULs (checked above).
39        unsafe {
40            std::ptr::copy_nonoverlapping(a.as_ptr(), buf_a.as_mut_ptr(), a.len());
41            std::ptr::copy_nonoverlapping(b.as_ptr(), buf_b.as_mut_ptr(), b.len());
42            let result = libc::strcoll(buf_a.as_ptr() as *const _, buf_b.as_ptr() as *const _);
43            return result.cmp(&0);
44        }
45    }
46
47    // Fallback for long strings: heap allocate.
48    // Null bytes were already filtered above, so CString::new always succeeds.
49    use std::ffi::CString;
50    let ca = CString::new(a).expect("null bytes already filtered above");
51    let cb = CString::new(b).expect("null bytes already filtered above");
52    let result = unsafe { libc::strcoll(ca.as_ptr(), cb.as_ptr()) };
53    result.cmp(&0)
54}
55
56/// Compare two byte slices lexicographically (default sort).
57#[inline]
58pub fn compare_lexical(a: &[u8], b: &[u8]) -> Ordering {
59    a.cmp(b)
60}
61
62/// Numeric sort (-n): compare leading numeric strings.
63/// Handles optional leading whitespace, sign, and decimal point.
64#[inline]
65pub fn compare_numeric(a: &[u8], b: &[u8]) -> Ordering {
66    let va = parse_numeric_value(a);
67    let vb = parse_numeric_value(b);
68    va.partial_cmp(&vb).unwrap_or(Ordering::Equal)
69}
70
71/// Fast custom numeric parser: parses sign + digits + optional decimal directly from bytes.
72/// Avoids UTF-8 validation and str::parse::<f64>() overhead entirely.
73/// Uses batch digit processing for the integer part (4 digits at a time) to reduce
74/// loop iterations and branch mispredictions.
75#[inline]
76pub fn parse_numeric_value(s: &[u8]) -> f64 {
77    let s = skip_leading_blanks(s);
78    if s.is_empty() {
79        return 0.0;
80    }
81
82    let mut i = 0;
83    let negative = if unsafe { *s.get_unchecked(i) } == b'-' {
84        i += 1;
85        true
86    } else {
87        if i < s.len() && unsafe { *s.get_unchecked(i) } == b'+' {
88            i += 1;
89        }
90        false
91    };
92
93    if i >= s.len() {
94        return 0.0;
95    }
96
97    // Parse integer part — batch 4 digits at a time for reduced loop overhead
98    let mut integer: u64 = 0;
99    let mut has_digits = false;
100
101    // Fast batch path: process 4 digits at a time
102    while i + 4 <= s.len() {
103        let d0 = unsafe { *s.get_unchecked(i) }.wrapping_sub(b'0');
104        if d0 > 9 {
105            break;
106        }
107        let d1 = unsafe { *s.get_unchecked(i + 1) }.wrapping_sub(b'0');
108        if d1 > 9 {
109            integer = integer.wrapping_mul(10).wrapping_add(d0 as u64);
110            i += 1;
111            has_digits = true;
112            break;
113        }
114        let d2 = unsafe { *s.get_unchecked(i + 2) }.wrapping_sub(b'0');
115        if d2 > 9 {
116            integer = integer
117                .wrapping_mul(100)
118                .wrapping_add(d0 as u64 * 10 + d1 as u64);
119            i += 2;
120            has_digits = true;
121            break;
122        }
123        let d3 = unsafe { *s.get_unchecked(i + 3) }.wrapping_sub(b'0');
124        if d3 > 9 {
125            integer = integer
126                .wrapping_mul(1000)
127                .wrapping_add(d0 as u64 * 100 + d1 as u64 * 10 + d2 as u64);
128            i += 3;
129            has_digits = true;
130            break;
131        }
132        integer = integer
133            .wrapping_mul(10000)
134            .wrapping_add(d0 as u64 * 1000 + d1 as u64 * 100 + d2 as u64 * 10 + d3 as u64);
135        i += 4;
136        has_digits = true;
137    }
138
139    // Tail: process remaining digits one at a time
140    while i < s.len() {
141        let d = unsafe { *s.get_unchecked(i) }.wrapping_sub(b'0');
142        if d > 9 {
143            break;
144        }
145        integer = integer.wrapping_mul(10).wrapping_add(d as u64);
146        has_digits = true;
147        i += 1;
148    }
149
150    // Parse fractional part
151    if i < s.len() && unsafe { *s.get_unchecked(i) } == b'.' {
152        i += 1;
153        let frac_start = i;
154        let mut frac_val: u64 = 0;
155        while i < s.len() {
156            let d = unsafe { *s.get_unchecked(i) }.wrapping_sub(b'0');
157            if d > 9 {
158                break;
159            }
160            frac_val = frac_val.wrapping_mul(10).wrapping_add(d as u64);
161            has_digits = true;
162            i += 1;
163        }
164        if !has_digits {
165            return 0.0;
166        }
167        let frac_digits = i - frac_start;
168        let result = if frac_digits > 0 {
169            // Use pre-computed powers of 10 for common cases
170            let divisor = POW10[frac_digits.min(POW10.len() - 1)];
171            integer as f64 + frac_val as f64 / divisor
172        } else {
173            integer as f64
174        };
175        return if negative { -result } else { result };
176    }
177
178    if !has_digits {
179        return 0.0;
180    }
181
182    let result = integer as f64;
183    if negative { -result } else { result }
184}
185
186/// Pre-computed powers of 10 for fast decimal conversion.
187const POW10: [f64; 20] = [
188    1.0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16,
189    1e17, 1e18, 1e19,
190];
191
192/// Fast integer-only parser for numeric sort (-n).
193/// Returns Some(i64) if the value is a pure integer (no decimal point, no exponent).
194/// Returns None if the value has a decimal point or is not a valid integer.
195/// This avoids the f64 conversion path for integer-only data.
196/// Uses wrapping arithmetic (no overflow checks) for speed — values >18 digits
197/// may wrap but sort order is still consistent since all values wrap the same way.
198#[inline]
199pub fn try_parse_integer(s: &[u8]) -> Option<i64> {
200    let s = skip_leading_blanks(s);
201    if s.is_empty() {
202        return Some(0);
203    }
204
205    let mut i = 0;
206    let negative = if unsafe { *s.get_unchecked(i) } == b'-' {
207        i += 1;
208        true
209    } else {
210        if i < s.len() && unsafe { *s.get_unchecked(i) } == b'+' {
211            i += 1;
212        }
213        false
214    };
215
216    // Parse digits — wrapping arithmetic, batch 4 digits at a time
217    let mut value: i64 = 0;
218    let mut has_digits = false;
219
220    // Fast batch: 4 digits at a time
221    while i + 4 <= s.len() {
222        let d0 = unsafe { *s.get_unchecked(i) }.wrapping_sub(b'0');
223        if d0 > 9 {
224            break;
225        }
226        let d1 = unsafe { *s.get_unchecked(i + 1) }.wrapping_sub(b'0');
227        if d1 > 9 {
228            value = value.wrapping_mul(10).wrapping_add(d0 as i64);
229            i += 1;
230            has_digits = true;
231            break;
232        }
233        let d2 = unsafe { *s.get_unchecked(i + 2) }.wrapping_sub(b'0');
234        if d2 > 9 {
235            value = value
236                .wrapping_mul(100)
237                .wrapping_add(d0 as i64 * 10 + d1 as i64);
238            i += 2;
239            has_digits = true;
240            break;
241        }
242        let d3 = unsafe { *s.get_unchecked(i + 3) }.wrapping_sub(b'0');
243        if d3 > 9 {
244            value = value
245                .wrapping_mul(1000)
246                .wrapping_add(d0 as i64 * 100 + d1 as i64 * 10 + d2 as i64);
247            i += 3;
248            has_digits = true;
249            break;
250        }
251        value = value
252            .wrapping_mul(10000)
253            .wrapping_add(d0 as i64 * 1000 + d1 as i64 * 100 + d2 as i64 * 10 + d3 as i64);
254        i += 4;
255        has_digits = true;
256    }
257
258    // Tail: remaining digits
259    while i < s.len() {
260        let d = unsafe { *s.get_unchecked(i) }.wrapping_sub(b'0');
261        if d > 9 {
262            break;
263        }
264        value = value.wrapping_mul(10).wrapping_add(d as i64);
265        has_digits = true;
266        i += 1;
267    }
268
269    // If there's a decimal point, this is not a pure integer
270    if i < s.len() && unsafe { *s.get_unchecked(i) } == b'.' {
271        return None;
272    }
273
274    if !has_digits {
275        return Some(0);
276    }
277
278    Some(if negative {
279        value.wrapping_neg()
280    } else {
281        value
282    })
283}
284
285/// Convert an i64 to a sortable u64 whose natural ordering matches signed integer ordering.
286/// Adds i64::MAX + 1 (0x8000000000000000) to shift the range to unsigned.
287#[inline]
288pub fn int_to_sortable_u64(v: i64) -> u64 {
289    (v as u64).wrapping_add(0x8000000000000000)
290}
291
292fn find_numeric_end(s: &[u8]) -> usize {
293    let mut i = 0;
294    if i < s.len() && (s[i] == b'+' || s[i] == b'-') {
295        i += 1;
296    }
297    let mut has_digits = false;
298    while i < s.len() && s[i].is_ascii_digit() {
299        i += 1;
300        has_digits = true;
301    }
302    if i < s.len() && s[i] == b'.' {
303        i += 1;
304        while i < s.len() && s[i].is_ascii_digit() {
305            i += 1;
306            has_digits = true;
307        }
308    }
309    if has_digits { i } else { 0 }
310}
311
312/// General numeric sort (-g): handles scientific notation, infinity, NaN.
313/// O(n) parser.
314pub fn compare_general_numeric(a: &[u8], b: &[u8]) -> Ordering {
315    let va = parse_general_numeric(a);
316    let vb = parse_general_numeric(b);
317    match (va.is_nan(), vb.is_nan()) {
318        (true, true) => Ordering::Equal,
319        (true, false) => Ordering::Less,
320        (false, true) => Ordering::Greater,
321        (false, false) => va.partial_cmp(&vb).unwrap_or(Ordering::Equal),
322    }
323}
324
325pub fn parse_general_numeric(s: &[u8]) -> f64 {
326    let s = skip_leading_blanks(s);
327    if s.is_empty() {
328        return f64::NAN;
329    }
330
331    // Find the longest valid float prefix
332    let mut i = 0;
333
334    // Handle "inf", "-inf", "+inf", "nan" etc.
335    let start = if i < s.len() && (s[i] == b'+' || s[i] == b'-') {
336        i += 1;
337        i - 1
338    } else {
339        i
340    };
341
342    // Check for "inf"/"infinity"/"nan" prefix (case-insensitive)
343    if i + 2 < s.len() {
344        let c0 = s[i].to_ascii_lowercase();
345        let c1 = s[i + 1].to_ascii_lowercase();
346        let c2 = s[i + 2].to_ascii_lowercase();
347        if (c0 == b'i' && c1 == b'n' && c2 == b'f') || (c0 == b'n' && c1 == b'a' && c2 == b'n') {
348            // Try parsing the prefix as a special float
349            let end = s.len().min(i + 8); // "infinity" is 8 chars
350            for e in (i + 3..=end).rev() {
351                if let Ok(text) = std::str::from_utf8(&s[start..e]) {
352                    if let Ok(v) = text.parse::<f64>() {
353                        return v;
354                    }
355                }
356            }
357            return f64::NAN;
358        }
359    }
360
361    // Reset i for numeric parsing
362    i = start;
363    if i < s.len() && (s[i] == b'+' || s[i] == b'-') {
364        i += 1;
365    }
366
367    // Digits before decimal
368    let mut has_digits = false;
369    while i < s.len() && s[i].is_ascii_digit() {
370        i += 1;
371        has_digits = true;
372    }
373    // Decimal point
374    if i < s.len() && s[i] == b'.' {
375        i += 1;
376        while i < s.len() && s[i].is_ascii_digit() {
377            i += 1;
378            has_digits = true;
379        }
380    }
381    if !has_digits {
382        return f64::NAN;
383    }
384    // Exponent
385    if i < s.len() && (s[i] == b'e' || s[i] == b'E') {
386        let save = i;
387        i += 1;
388        if i < s.len() && (s[i] == b'+' || s[i] == b'-') {
389            i += 1;
390        }
391        if i < s.len() && s[i].is_ascii_digit() {
392            while i < s.len() && s[i].is_ascii_digit() {
393                i += 1;
394            }
395        } else {
396            i = save;
397        }
398    }
399
400    // Parse the numeric prefix using standard library
401    std::str::from_utf8(&s[start..i])
402        .ok()
403        .and_then(|s| s.parse::<f64>().ok())
404        .unwrap_or(f64::NAN)
405}
406
407/// Human numeric sort (-h): handles suffixes K, M, G, T, P, E, Z, Y.
408pub fn compare_human_numeric(a: &[u8], b: &[u8]) -> Ordering {
409    let va = parse_human_numeric(a);
410    let vb = parse_human_numeric(b);
411    va.partial_cmp(&vb).unwrap_or(Ordering::Equal)
412}
413
414pub fn parse_human_numeric(s: &[u8]) -> f64 {
415    let s = skip_leading_blanks(s);
416    if s.is_empty() {
417        return 0.0;
418    }
419
420    // Use the fast custom parser for the numeric part
421    let base = parse_numeric_value(s);
422    let end = find_numeric_end(s);
423
424    if end < s.len() {
425        let multiplier = match s[end] {
426            b'K' | b'k' => 1e3,
427            b'M' => 1e6,
428            b'G' => 1e9,
429            b'T' => 1e12,
430            b'P' => 1e15,
431            b'E' => 1e18,
432            b'Z' => 1e21,
433            b'Y' => 1e24,
434            _ => 1.0,
435        };
436        base * multiplier
437    } else {
438        base
439    }
440}
441
442/// Month sort (-M).
443pub fn compare_month(a: &[u8], b: &[u8]) -> Ordering {
444    let ma = parse_month(a);
445    let mb = parse_month(b);
446    ma.cmp(&mb)
447}
448
449fn parse_month(s: &[u8]) -> u8 {
450    let s = skip_leading_blanks(s);
451    if s.len() < 3 {
452        return 0;
453    }
454    let m = [
455        s[0].to_ascii_uppercase(),
456        s[1].to_ascii_uppercase(),
457        s[2].to_ascii_uppercase(),
458    ];
459    match &m {
460        b"JAN" => 1,
461        b"FEB" => 2,
462        b"MAR" => 3,
463        b"APR" => 4,
464        b"MAY" => 5,
465        b"JUN" => 6,
466        b"JUL" => 7,
467        b"AUG" => 8,
468        b"SEP" => 9,
469        b"OCT" => 10,
470        b"NOV" => 11,
471        b"DEC" => 12,
472        _ => 0,
473    }
474}
475
476/// Version sort (-V): natural sort of version numbers.
477/// Uses byte slices directly instead of char iterators for maximum performance.
478pub fn compare_version(a: &[u8], b: &[u8]) -> Ordering {
479    let mut ai = 0usize;
480    let mut bi = 0usize;
481
482    loop {
483        if ai >= a.len() && bi >= b.len() {
484            return Ordering::Equal;
485        }
486        if ai >= a.len() {
487            return Ordering::Less;
488        }
489        if bi >= b.len() {
490            return Ordering::Greater;
491        }
492
493        let ac = a[ai];
494        let bc = b[bi];
495
496        if ac.is_ascii_digit() && bc.is_ascii_digit() {
497            let anum = consume_number_bytes(a, &mut ai);
498            let bnum = consume_number_bytes(b, &mut bi);
499            match anum.cmp(&bnum) {
500                Ordering::Equal => continue,
501                other => return other,
502            }
503        } else {
504            match ac.cmp(&bc) {
505                Ordering::Equal => {
506                    ai += 1;
507                    bi += 1;
508                }
509                other => return other,
510            }
511        }
512    }
513}
514
515#[inline]
516fn consume_number_bytes(data: &[u8], pos: &mut usize) -> u64 {
517    let mut n: u64 = 0;
518    while *pos < data.len() && data[*pos].is_ascii_digit() {
519        n = n
520            .saturating_mul(10)
521            .saturating_add((data[*pos] - b'0') as u64);
522        *pos += 1;
523    }
524    n
525}
526
527/// Random sort (-R): hash-based shuffle that groups identical keys.
528pub fn compare_random(a: &[u8], b: &[u8], seed: u64) -> Ordering {
529    let ha = fnv1a_hash(a, seed);
530    let hb = fnv1a_hash(b, seed);
531    ha.cmp(&hb)
532}
533
534/// FNV-1a hash with seed mixing.
535#[inline]
536fn fnv1a_hash(data: &[u8], seed: u64) -> u64 {
537    let mut hash = 0xcbf29ce484222325u64 ^ seed;
538    for &b in data {
539        hash ^= b as u64;
540        hash = hash.wrapping_mul(0x100000001b3);
541    }
542    hash
543}
544
545/// Compare with text filtering (-d, -i, -f flags in any combination).
546/// Allocation-free: uses iterator filtering.
547#[inline]
548fn is_dict_char(b: u8) -> bool {
549    b.is_ascii_alphanumeric() || b == b' ' || b == b'\t'
550}
551
552#[inline]
553fn is_printable(b: u8) -> bool {
554    b >= 0x20 && b < 0x7f
555}
556
557fn compare_text_filtered(
558    a: &[u8],
559    b: &[u8],
560    dict: bool,
561    no_print: bool,
562    fold_case: bool,
563) -> Ordering {
564    if !dict && !no_print && !fold_case {
565        return a.cmp(b);
566    }
567
568    let mut ai = a.iter().copied();
569    let mut bi = b.iter().copied();
570
571    loop {
572        let na = next_valid(&mut ai, dict, no_print);
573        let nb = next_valid(&mut bi, dict, no_print);
574        match (na, nb) {
575            (Some(ab), Some(bb)) => {
576                let ca = if fold_case {
577                    ab.to_ascii_uppercase()
578                } else {
579                    ab
580                };
581                let cb = if fold_case {
582                    bb.to_ascii_uppercase()
583                } else {
584                    bb
585                };
586                match ca.cmp(&cb) {
587                    Ordering::Equal => continue,
588                    other => return other,
589                }
590            }
591            (Some(_), None) => return Ordering::Greater,
592            (None, Some(_)) => return Ordering::Less,
593            (None, None) => return Ordering::Equal,
594        }
595    }
596}
597
598#[inline]
599fn next_valid(iter: &mut impl Iterator<Item = u8>, dict: bool, no_print: bool) -> Option<u8> {
600    loop {
601        match iter.next() {
602            None => return None,
603            Some(b) => {
604                if dict && !is_dict_char(b) {
605                    continue;
606                }
607                if no_print && !is_printable(b) {
608                    continue;
609                }
610                return Some(b);
611            }
612        }
613    }
614}
615
616/// Case-insensitive lexicographic comparison.
617/// Tight loop specialized for fold-case only (no dict/nonprinting filtering),
618/// avoiding the overhead of the general compare_text_filtered path.
619pub fn compare_ignore_case(a: &[u8], b: &[u8]) -> Ordering {
620    let alen = a.len();
621    let blen = b.len();
622    let min_len = alen.min(blen);
623    let ap = a.as_ptr();
624    let bp = b.as_ptr();
625    // Compare uppercase bytes directly with raw pointers for zero bounds-check overhead
626    let mut i = 0usize;
627    while i < min_len {
628        let ca = unsafe { (*ap.add(i)).to_ascii_uppercase() };
629        let cb = unsafe { (*bp.add(i)).to_ascii_uppercase() };
630        if ca != cb {
631            return ca.cmp(&cb);
632        }
633        i += 1;
634    }
635    alen.cmp(&blen)
636}
637
638pub fn compare_dictionary(a: &[u8], b: &[u8], ignore_case: bool) -> Ordering {
639    compare_text_filtered(a, b, true, false, ignore_case)
640}
641
642pub fn compare_ignore_nonprinting(a: &[u8], b: &[u8], ignore_case: bool) -> Ordering {
643    compare_text_filtered(a, b, false, true, ignore_case)
644}
645
646/// Master comparison function that dispatches based on KeyOpts.
647pub fn compare_with_opts(a: &[u8], b: &[u8], opts: &KeyOpts, random_seed: u64) -> Ordering {
648    let a = if opts.ignore_leading_blanks {
649        skip_leading_blanks(a)
650    } else {
651        a
652    };
653    let b = if opts.ignore_leading_blanks {
654        skip_leading_blanks(b)
655    } else {
656        b
657    };
658
659    let result = if opts.numeric {
660        compare_numeric(a, b)
661    } else if opts.general_numeric {
662        compare_general_numeric(a, b)
663    } else if opts.human_numeric {
664        compare_human_numeric(a, b)
665    } else if opts.month {
666        compare_month(a, b)
667    } else if opts.version {
668        compare_version(a, b)
669    } else if opts.random {
670        compare_random(a, b, random_seed)
671    } else if opts.dictionary_order || opts.ignore_nonprinting || opts.ignore_case {
672        compare_text_filtered(
673            a,
674            b,
675            opts.dictionary_order,
676            opts.ignore_nonprinting,
677            opts.ignore_case,
678        )
679    } else if super::core::is_c_locale() {
680        // C/POSIX locale: strcoll == byte comparison, skip CString overhead
681        a.cmp(b)
682    } else {
683        // Default: locale-aware comparison matching GNU sort's LC_COLLATE behavior
684        compare_locale(a, b)
685    };
686
687    if opts.reverse {
688        result.reverse()
689    } else {
690        result
691    }
692}
693
694/// Concrete comparison function type. Selected once at setup time to avoid
695/// per-comparison flag checking in hot sort loops.
696pub type CompareFn = fn(&[u8], &[u8]) -> Ordering;
697
698/// Select a concrete comparison function based on KeyOpts.
699/// Returns (compare_fn, needs_leading_blank_strip, needs_reverse).
700/// The caller applies blank-stripping and reversal outside the function pointer,
701/// eliminating all per-comparison branching.
702pub fn select_comparator(opts: &KeyOpts, random_seed: u64) -> (CompareFn, bool, bool) {
703    let needs_blank = opts.ignore_leading_blanks;
704    let needs_reverse = opts.reverse;
705
706    let cmp: CompareFn = if opts.numeric {
707        compare_numeric
708    } else if opts.general_numeric {
709        compare_general_numeric
710    } else if opts.human_numeric {
711        compare_human_numeric
712    } else if opts.month {
713        compare_month
714    } else if opts.version {
715        compare_version
716    } else if opts.random {
717        // Random needs seed — wrap in a closure-like pattern
718        // Since we need random_seed, we use a special case
719        return (
720            make_random_comparator(random_seed),
721            needs_blank,
722            needs_reverse,
723        );
724    } else if opts.dictionary_order || opts.ignore_nonprinting || opts.ignore_case {
725        // Text filtering: select specialized variant
726        match (
727            opts.dictionary_order,
728            opts.ignore_nonprinting,
729            opts.ignore_case,
730        ) {
731            (false, false, true) => compare_ignore_case,
732            (true, false, false) => |a: &[u8], b: &[u8]| compare_dictionary(a, b, false),
733            (true, false, true) => |a: &[u8], b: &[u8]| compare_dictionary(a, b, true),
734            (false, true, false) => |a: &[u8], b: &[u8]| compare_ignore_nonprinting(a, b, false),
735            (false, true, true) => |a: &[u8], b: &[u8]| compare_ignore_nonprinting(a, b, true),
736            (true, true, false) => {
737                |a: &[u8], b: &[u8]| compare_text_filtered(a, b, true, true, false)
738            }
739            (true, true, true) => {
740                |a: &[u8], b: &[u8]| compare_text_filtered(a, b, true, true, true)
741            }
742            _ => |a: &[u8], b: &[u8]| a.cmp(b),
743        }
744    } else if super::core::is_c_locale() {
745        // C/POSIX locale: strcoll == byte comparison, skip CString overhead
746        compare_lexical
747    } else {
748        // Default: locale-aware comparison matching GNU sort's LC_COLLATE behavior
749        compare_locale
750    };
751
752    (cmp, needs_blank, needs_reverse)
753}
754
755fn make_random_comparator(seed: u64) -> CompareFn {
756    // We can't capture the seed in a function pointer, so we use a static.
757    // This is safe because sort is single-process and seed doesn't change during a sort.
758    RANDOM_SEED.store(seed, std::sync::atomic::Ordering::Relaxed);
759    random_compare_with_static_seed
760}
761
762static RANDOM_SEED: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0);
763
764fn random_compare_with_static_seed(a: &[u8], b: &[u8]) -> Ordering {
765    let seed = RANDOM_SEED.load(std::sync::atomic::Ordering::Relaxed);
766    compare_random(a, b, seed)
767}