1use std::cmp::Ordering;
3
4use super::key::KeyOpts;
5
6#[inline(always)]
8pub fn skip_leading_blanks(s: &[u8]) -> &[u8] {
9 let mut i = 0;
10 while i < s.len()
11 && (unsafe { *s.get_unchecked(i) } == b' ' || unsafe { *s.get_unchecked(i) } == b'\t')
12 {
13 i += 1;
14 }
15 &s[i..]
16}
17
18#[inline]
23pub fn compare_locale(a: &[u8], b: &[u8]) -> Ordering {
24 const STACK_BUF: usize = 256;
26
27 if memchr::memchr(0, a).is_some() || memchr::memchr(0, b).is_some() {
30 return a.cmp(b);
31 }
32
33 if a.len() < STACK_BUF && b.len() < STACK_BUF {
34 let mut buf_a = [0u8; STACK_BUF];
37 let mut buf_b = [0u8; STACK_BUF];
38 unsafe {
40 std::ptr::copy_nonoverlapping(a.as_ptr(), buf_a.as_mut_ptr(), a.len());
41 std::ptr::copy_nonoverlapping(b.as_ptr(), buf_b.as_mut_ptr(), b.len());
42 let result = libc::strcoll(buf_a.as_ptr() as *const _, buf_b.as_ptr() as *const _);
43 return result.cmp(&0);
44 }
45 }
46
47 use std::ffi::CString;
50 let ca = CString::new(a).expect("null bytes already filtered above");
51 let cb = CString::new(b).expect("null bytes already filtered above");
52 let result = unsafe { libc::strcoll(ca.as_ptr(), cb.as_ptr()) };
53 result.cmp(&0)
54}
55
56#[inline]
58pub fn compare_lexical(a: &[u8], b: &[u8]) -> Ordering {
59 a.cmp(b)
60}
61
62#[inline]
65pub fn compare_numeric(a: &[u8], b: &[u8]) -> Ordering {
66 let va = parse_numeric_value(a);
67 let vb = parse_numeric_value(b);
68 va.partial_cmp(&vb).unwrap_or(Ordering::Equal)
69}
70
71#[inline]
76pub fn parse_numeric_value(s: &[u8]) -> f64 {
77 let s = skip_leading_blanks(s);
78 if s.is_empty() {
79 return 0.0;
80 }
81
82 let mut i = 0;
83 let negative = if unsafe { *s.get_unchecked(i) } == b'-' {
84 i += 1;
85 true
86 } else {
87 if i < s.len() && unsafe { *s.get_unchecked(i) } == b'+' {
88 i += 1;
89 }
90 false
91 };
92
93 if i >= s.len() {
94 return 0.0;
95 }
96
97 let mut integer: u64 = 0;
99 let mut has_digits = false;
100
101 while i + 4 <= s.len() {
103 let d0 = unsafe { *s.get_unchecked(i) }.wrapping_sub(b'0');
104 if d0 > 9 {
105 break;
106 }
107 let d1 = unsafe { *s.get_unchecked(i + 1) }.wrapping_sub(b'0');
108 if d1 > 9 {
109 integer = integer.wrapping_mul(10).wrapping_add(d0 as u64);
110 i += 1;
111 has_digits = true;
112 break;
113 }
114 let d2 = unsafe { *s.get_unchecked(i + 2) }.wrapping_sub(b'0');
115 if d2 > 9 {
116 integer = integer
117 .wrapping_mul(100)
118 .wrapping_add(d0 as u64 * 10 + d1 as u64);
119 i += 2;
120 has_digits = true;
121 break;
122 }
123 let d3 = unsafe { *s.get_unchecked(i + 3) }.wrapping_sub(b'0');
124 if d3 > 9 {
125 integer = integer
126 .wrapping_mul(1000)
127 .wrapping_add(d0 as u64 * 100 + d1 as u64 * 10 + d2 as u64);
128 i += 3;
129 has_digits = true;
130 break;
131 }
132 integer = integer
133 .wrapping_mul(10000)
134 .wrapping_add(d0 as u64 * 1000 + d1 as u64 * 100 + d2 as u64 * 10 + d3 as u64);
135 i += 4;
136 has_digits = true;
137 }
138
139 while i < s.len() {
141 let d = unsafe { *s.get_unchecked(i) }.wrapping_sub(b'0');
142 if d > 9 {
143 break;
144 }
145 integer = integer.wrapping_mul(10).wrapping_add(d as u64);
146 has_digits = true;
147 i += 1;
148 }
149
150 if i < s.len() && unsafe { *s.get_unchecked(i) } == b'.' {
152 i += 1;
153 let frac_start = i;
154 let mut frac_val: u64 = 0;
155 while i < s.len() {
156 let d = unsafe { *s.get_unchecked(i) }.wrapping_sub(b'0');
157 if d > 9 {
158 break;
159 }
160 frac_val = frac_val.wrapping_mul(10).wrapping_add(d as u64);
161 has_digits = true;
162 i += 1;
163 }
164 if !has_digits {
165 return 0.0;
166 }
167 let frac_digits = i - frac_start;
168 let result = if frac_digits > 0 {
169 let divisor = POW10[frac_digits.min(POW10.len() - 1)];
171 integer as f64 + frac_val as f64 / divisor
172 } else {
173 integer as f64
174 };
175 return if negative { -result } else { result };
176 }
177
178 if !has_digits {
179 return 0.0;
180 }
181
182 let result = integer as f64;
183 if negative { -result } else { result }
184}
185
186const POW10: [f64; 20] = [
188 1.0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16,
189 1e17, 1e18, 1e19,
190];
191
192#[inline]
199pub fn try_parse_integer(s: &[u8]) -> Option<i64> {
200 let s = skip_leading_blanks(s);
201 if s.is_empty() {
202 return Some(0);
203 }
204
205 let mut i = 0;
206 let negative = if unsafe { *s.get_unchecked(i) } == b'-' {
207 i += 1;
208 true
209 } else {
210 if i < s.len() && unsafe { *s.get_unchecked(i) } == b'+' {
211 i += 1;
212 }
213 false
214 };
215
216 let mut value: i64 = 0;
218 let mut has_digits = false;
219
220 while i + 4 <= s.len() {
222 let d0 = unsafe { *s.get_unchecked(i) }.wrapping_sub(b'0');
223 if d0 > 9 {
224 break;
225 }
226 let d1 = unsafe { *s.get_unchecked(i + 1) }.wrapping_sub(b'0');
227 if d1 > 9 {
228 value = value.wrapping_mul(10).wrapping_add(d0 as i64);
229 i += 1;
230 has_digits = true;
231 break;
232 }
233 let d2 = unsafe { *s.get_unchecked(i + 2) }.wrapping_sub(b'0');
234 if d2 > 9 {
235 value = value
236 .wrapping_mul(100)
237 .wrapping_add(d0 as i64 * 10 + d1 as i64);
238 i += 2;
239 has_digits = true;
240 break;
241 }
242 let d3 = unsafe { *s.get_unchecked(i + 3) }.wrapping_sub(b'0');
243 if d3 > 9 {
244 value = value
245 .wrapping_mul(1000)
246 .wrapping_add(d0 as i64 * 100 + d1 as i64 * 10 + d2 as i64);
247 i += 3;
248 has_digits = true;
249 break;
250 }
251 value = value
252 .wrapping_mul(10000)
253 .wrapping_add(d0 as i64 * 1000 + d1 as i64 * 100 + d2 as i64 * 10 + d3 as i64);
254 i += 4;
255 has_digits = true;
256 }
257
258 while i < s.len() {
260 let d = unsafe { *s.get_unchecked(i) }.wrapping_sub(b'0');
261 if d > 9 {
262 break;
263 }
264 value = value.wrapping_mul(10).wrapping_add(d as i64);
265 has_digits = true;
266 i += 1;
267 }
268
269 if i < s.len() && unsafe { *s.get_unchecked(i) } == b'.' {
271 return None;
272 }
273
274 if !has_digits {
275 return Some(0);
276 }
277
278 Some(if negative {
279 value.wrapping_neg()
280 } else {
281 value
282 })
283}
284
285#[inline]
288pub fn int_to_sortable_u64(v: i64) -> u64 {
289 (v as u64).wrapping_add(0x8000000000000000)
290}
291
292fn find_numeric_end(s: &[u8]) -> usize {
293 let mut i = 0;
294 if i < s.len() && (s[i] == b'+' || s[i] == b'-') {
295 i += 1;
296 }
297 let mut has_digits = false;
298 while i < s.len() && s[i].is_ascii_digit() {
299 i += 1;
300 has_digits = true;
301 }
302 if i < s.len() && s[i] == b'.' {
303 i += 1;
304 while i < s.len() && s[i].is_ascii_digit() {
305 i += 1;
306 has_digits = true;
307 }
308 }
309 if has_digits { i } else { 0 }
310}
311
312pub fn compare_general_numeric(a: &[u8], b: &[u8]) -> Ordering {
315 let va = parse_general_numeric(a);
316 let vb = parse_general_numeric(b);
317 match (va.is_nan(), vb.is_nan()) {
318 (true, true) => Ordering::Equal,
319 (true, false) => Ordering::Less,
320 (false, true) => Ordering::Greater,
321 (false, false) => va.partial_cmp(&vb).unwrap_or(Ordering::Equal),
322 }
323}
324
325pub fn parse_general_numeric(s: &[u8]) -> f64 {
326 let s = skip_leading_blanks(s);
327 if s.is_empty() {
328 return f64::NAN;
329 }
330
331 let mut i = 0;
333
334 let start = if i < s.len() && (s[i] == b'+' || s[i] == b'-') {
336 i += 1;
337 i - 1
338 } else {
339 i
340 };
341
342 if i + 2 < s.len() {
344 let c0 = s[i].to_ascii_lowercase();
345 let c1 = s[i + 1].to_ascii_lowercase();
346 let c2 = s[i + 2].to_ascii_lowercase();
347 if (c0 == b'i' && c1 == b'n' && c2 == b'f') || (c0 == b'n' && c1 == b'a' && c2 == b'n') {
348 let end = s.len().min(i + 8); for e in (i + 3..=end).rev() {
351 if let Ok(text) = std::str::from_utf8(&s[start..e]) {
352 if let Ok(v) = text.parse::<f64>() {
353 return v;
354 }
355 }
356 }
357 return f64::NAN;
358 }
359 }
360
361 i = start;
363 if i < s.len() && (s[i] == b'+' || s[i] == b'-') {
364 i += 1;
365 }
366
367 let mut has_digits = false;
369 while i < s.len() && s[i].is_ascii_digit() {
370 i += 1;
371 has_digits = true;
372 }
373 if i < s.len() && s[i] == b'.' {
375 i += 1;
376 while i < s.len() && s[i].is_ascii_digit() {
377 i += 1;
378 has_digits = true;
379 }
380 }
381 if !has_digits {
382 return f64::NAN;
383 }
384 if i < s.len() && (s[i] == b'e' || s[i] == b'E') {
386 let save = i;
387 i += 1;
388 if i < s.len() && (s[i] == b'+' || s[i] == b'-') {
389 i += 1;
390 }
391 if i < s.len() && s[i].is_ascii_digit() {
392 while i < s.len() && s[i].is_ascii_digit() {
393 i += 1;
394 }
395 } else {
396 i = save;
397 }
398 }
399
400 std::str::from_utf8(&s[start..i])
402 .ok()
403 .and_then(|s| s.parse::<f64>().ok())
404 .unwrap_or(f64::NAN)
405}
406
407pub fn compare_human_numeric(a: &[u8], b: &[u8]) -> Ordering {
409 let va = parse_human_numeric(a);
410 let vb = parse_human_numeric(b);
411 va.partial_cmp(&vb).unwrap_or(Ordering::Equal)
412}
413
414pub fn parse_human_numeric(s: &[u8]) -> f64 {
415 let s = skip_leading_blanks(s);
416 if s.is_empty() {
417 return 0.0;
418 }
419
420 let base = parse_numeric_value(s);
422 let end = find_numeric_end(s);
423
424 if end < s.len() {
425 let multiplier = match s[end] {
426 b'K' | b'k' => 1e3,
427 b'M' => 1e6,
428 b'G' => 1e9,
429 b'T' => 1e12,
430 b'P' => 1e15,
431 b'E' => 1e18,
432 b'Z' => 1e21,
433 b'Y' => 1e24,
434 _ => 1.0,
435 };
436 base * multiplier
437 } else {
438 base
439 }
440}
441
442pub fn compare_month(a: &[u8], b: &[u8]) -> Ordering {
444 let ma = parse_month(a);
445 let mb = parse_month(b);
446 ma.cmp(&mb)
447}
448
449fn parse_month(s: &[u8]) -> u8 {
450 let s = skip_leading_blanks(s);
451 if s.len() < 3 {
452 return 0;
453 }
454 let m = [
455 s[0].to_ascii_uppercase(),
456 s[1].to_ascii_uppercase(),
457 s[2].to_ascii_uppercase(),
458 ];
459 match &m {
460 b"JAN" => 1,
461 b"FEB" => 2,
462 b"MAR" => 3,
463 b"APR" => 4,
464 b"MAY" => 5,
465 b"JUN" => 6,
466 b"JUL" => 7,
467 b"AUG" => 8,
468 b"SEP" => 9,
469 b"OCT" => 10,
470 b"NOV" => 11,
471 b"DEC" => 12,
472 _ => 0,
473 }
474}
475
476pub fn compare_version(a: &[u8], b: &[u8]) -> Ordering {
479 let mut ai = 0usize;
480 let mut bi = 0usize;
481
482 loop {
483 if ai >= a.len() && bi >= b.len() {
484 return Ordering::Equal;
485 }
486 if ai >= a.len() {
487 return Ordering::Less;
488 }
489 if bi >= b.len() {
490 return Ordering::Greater;
491 }
492
493 let ac = a[ai];
494 let bc = b[bi];
495
496 if ac.is_ascii_digit() && bc.is_ascii_digit() {
497 let anum = consume_number_bytes(a, &mut ai);
498 let bnum = consume_number_bytes(b, &mut bi);
499 match anum.cmp(&bnum) {
500 Ordering::Equal => continue,
501 other => return other,
502 }
503 } else {
504 match ac.cmp(&bc) {
505 Ordering::Equal => {
506 ai += 1;
507 bi += 1;
508 }
509 other => return other,
510 }
511 }
512 }
513}
514
515#[inline]
516fn consume_number_bytes(data: &[u8], pos: &mut usize) -> u64 {
517 let mut n: u64 = 0;
518 while *pos < data.len() && data[*pos].is_ascii_digit() {
519 n = n
520 .saturating_mul(10)
521 .saturating_add((data[*pos] - b'0') as u64);
522 *pos += 1;
523 }
524 n
525}
526
527pub fn compare_random(a: &[u8], b: &[u8], seed: u64) -> Ordering {
529 let ha = fnv1a_hash(a, seed);
530 let hb = fnv1a_hash(b, seed);
531 ha.cmp(&hb)
532}
533
534#[inline]
536fn fnv1a_hash(data: &[u8], seed: u64) -> u64 {
537 let mut hash = 0xcbf29ce484222325u64 ^ seed;
538 for &b in data {
539 hash ^= b as u64;
540 hash = hash.wrapping_mul(0x100000001b3);
541 }
542 hash
543}
544
545#[inline]
548fn is_dict_char(b: u8) -> bool {
549 b.is_ascii_alphanumeric() || b == b' ' || b == b'\t'
550}
551
552#[inline]
553fn is_printable(b: u8) -> bool {
554 b >= 0x20 && b < 0x7f
555}
556
557fn compare_text_filtered(
558 a: &[u8],
559 b: &[u8],
560 dict: bool,
561 no_print: bool,
562 fold_case: bool,
563) -> Ordering {
564 if !dict && !no_print && !fold_case {
565 return a.cmp(b);
566 }
567
568 let mut ai = a.iter().copied();
569 let mut bi = b.iter().copied();
570
571 loop {
572 let na = next_valid(&mut ai, dict, no_print);
573 let nb = next_valid(&mut bi, dict, no_print);
574 match (na, nb) {
575 (Some(ab), Some(bb)) => {
576 let ca = if fold_case {
577 ab.to_ascii_uppercase()
578 } else {
579 ab
580 };
581 let cb = if fold_case {
582 bb.to_ascii_uppercase()
583 } else {
584 bb
585 };
586 match ca.cmp(&cb) {
587 Ordering::Equal => continue,
588 other => return other,
589 }
590 }
591 (Some(_), None) => return Ordering::Greater,
592 (None, Some(_)) => return Ordering::Less,
593 (None, None) => return Ordering::Equal,
594 }
595 }
596}
597
598#[inline]
599fn next_valid(iter: &mut impl Iterator<Item = u8>, dict: bool, no_print: bool) -> Option<u8> {
600 loop {
601 match iter.next() {
602 None => return None,
603 Some(b) => {
604 if dict && !is_dict_char(b) {
605 continue;
606 }
607 if no_print && !is_printable(b) {
608 continue;
609 }
610 return Some(b);
611 }
612 }
613 }
614}
615
616pub fn compare_ignore_case(a: &[u8], b: &[u8]) -> Ordering {
620 let alen = a.len();
621 let blen = b.len();
622 let min_len = alen.min(blen);
623 let ap = a.as_ptr();
624 let bp = b.as_ptr();
625 let mut i = 0usize;
627 while i < min_len {
628 let ca = unsafe { (*ap.add(i)).to_ascii_uppercase() };
629 let cb = unsafe { (*bp.add(i)).to_ascii_uppercase() };
630 if ca != cb {
631 return ca.cmp(&cb);
632 }
633 i += 1;
634 }
635 alen.cmp(&blen)
636}
637
638pub fn compare_dictionary(a: &[u8], b: &[u8], ignore_case: bool) -> Ordering {
639 compare_text_filtered(a, b, true, false, ignore_case)
640}
641
642pub fn compare_ignore_nonprinting(a: &[u8], b: &[u8], ignore_case: bool) -> Ordering {
643 compare_text_filtered(a, b, false, true, ignore_case)
644}
645
646pub fn compare_with_opts(a: &[u8], b: &[u8], opts: &KeyOpts, random_seed: u64) -> Ordering {
648 let a = if opts.ignore_leading_blanks {
649 skip_leading_blanks(a)
650 } else {
651 a
652 };
653 let b = if opts.ignore_leading_blanks {
654 skip_leading_blanks(b)
655 } else {
656 b
657 };
658
659 let result = if opts.numeric {
660 compare_numeric(a, b)
661 } else if opts.general_numeric {
662 compare_general_numeric(a, b)
663 } else if opts.human_numeric {
664 compare_human_numeric(a, b)
665 } else if opts.month {
666 compare_month(a, b)
667 } else if opts.version {
668 compare_version(a, b)
669 } else if opts.random {
670 compare_random(a, b, random_seed)
671 } else if opts.dictionary_order || opts.ignore_nonprinting || opts.ignore_case {
672 compare_text_filtered(
673 a,
674 b,
675 opts.dictionary_order,
676 opts.ignore_nonprinting,
677 opts.ignore_case,
678 )
679 } else if super::core::is_c_locale() {
680 a.cmp(b)
682 } else {
683 compare_locale(a, b)
685 };
686
687 if opts.reverse {
688 result.reverse()
689 } else {
690 result
691 }
692}
693
694pub type CompareFn = fn(&[u8], &[u8]) -> Ordering;
697
698pub fn select_comparator(opts: &KeyOpts, random_seed: u64) -> (CompareFn, bool, bool) {
703 let needs_blank = opts.ignore_leading_blanks;
704 let needs_reverse = opts.reverse;
705
706 let cmp: CompareFn = if opts.numeric {
707 compare_numeric
708 } else if opts.general_numeric {
709 compare_general_numeric
710 } else if opts.human_numeric {
711 compare_human_numeric
712 } else if opts.month {
713 compare_month
714 } else if opts.version {
715 compare_version
716 } else if opts.random {
717 return (
720 make_random_comparator(random_seed),
721 needs_blank,
722 needs_reverse,
723 );
724 } else if opts.dictionary_order || opts.ignore_nonprinting || opts.ignore_case {
725 match (
727 opts.dictionary_order,
728 opts.ignore_nonprinting,
729 opts.ignore_case,
730 ) {
731 (false, false, true) => compare_ignore_case,
732 (true, false, false) => |a: &[u8], b: &[u8]| compare_dictionary(a, b, false),
733 (true, false, true) => |a: &[u8], b: &[u8]| compare_dictionary(a, b, true),
734 (false, true, false) => |a: &[u8], b: &[u8]| compare_ignore_nonprinting(a, b, false),
735 (false, true, true) => |a: &[u8], b: &[u8]| compare_ignore_nonprinting(a, b, true),
736 (true, true, false) => {
737 |a: &[u8], b: &[u8]| compare_text_filtered(a, b, true, true, false)
738 }
739 (true, true, true) => {
740 |a: &[u8], b: &[u8]| compare_text_filtered(a, b, true, true, true)
741 }
742 _ => |a: &[u8], b: &[u8]| a.cmp(b),
743 }
744 } else if super::core::is_c_locale() {
745 compare_lexical
747 } else {
748 compare_locale
750 };
751
752 (cmp, needs_blank, needs_reverse)
753}
754
755fn make_random_comparator(seed: u64) -> CompareFn {
756 RANDOM_SEED.store(seed, std::sync::atomic::Ordering::Relaxed);
759 random_compare_with_static_seed
760}
761
762static RANDOM_SEED: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0);
763
764fn random_compare_with_static_seed(a: &[u8], b: &[u8]) -> Ordering {
765 let seed = RANDOM_SEED.load(std::sync::atomic::Ordering::Relaxed);
766 compare_random(a, b, seed)
767}