1use std::cmp::Ordering;
3
4use super::key::KeyOpts;
5
6#[inline(always)]
8pub fn skip_leading_blanks(s: &[u8]) -> &[u8] {
9 let mut i = 0;
10 while i < s.len()
11 && (unsafe { *s.get_unchecked(i) } == b' ' || unsafe { *s.get_unchecked(i) } == b'\t')
12 {
13 i += 1;
14 }
15 &s[i..]
16}
17
18#[inline]
21pub fn compare_locale(a: &[u8], b: &[u8]) -> Ordering {
22 use std::ffi::CString;
23 if let (Ok(ca), Ok(cb)) = (CString::new(a), CString::new(b)) {
26 let result = unsafe { libc::strcoll(ca.as_ptr(), cb.as_ptr()) };
27 if result < 0 {
28 return Ordering::Less;
29 } else if result > 0 {
30 return Ordering::Greater;
31 } else {
32 return Ordering::Equal;
33 }
34 }
35 a.cmp(b)
37}
38
39#[inline]
41pub fn compare_lexical(a: &[u8], b: &[u8]) -> Ordering {
42 a.cmp(b)
43}
44
45#[inline]
48pub fn compare_numeric(a: &[u8], b: &[u8]) -> Ordering {
49 let va = parse_numeric_value(a);
50 let vb = parse_numeric_value(b);
51 va.partial_cmp(&vb).unwrap_or(Ordering::Equal)
52}
53
54#[inline]
59pub fn parse_numeric_value(s: &[u8]) -> f64 {
60 let s = skip_leading_blanks(s);
61 if s.is_empty() {
62 return 0.0;
63 }
64
65 let mut i = 0;
66 let negative = if unsafe { *s.get_unchecked(i) } == b'-' {
67 i += 1;
68 true
69 } else {
70 if i < s.len() && unsafe { *s.get_unchecked(i) } == b'+' {
71 i += 1;
72 }
73 false
74 };
75
76 if i >= s.len() {
77 return 0.0;
78 }
79
80 let mut integer: u64 = 0;
82 let mut has_digits = false;
83
84 while i + 4 <= s.len() {
86 let d0 = unsafe { *s.get_unchecked(i) }.wrapping_sub(b'0');
87 if d0 > 9 {
88 break;
89 }
90 let d1 = unsafe { *s.get_unchecked(i + 1) }.wrapping_sub(b'0');
91 if d1 > 9 {
92 integer = integer.wrapping_mul(10).wrapping_add(d0 as u64);
93 i += 1;
94 has_digits = true;
95 break;
96 }
97 let d2 = unsafe { *s.get_unchecked(i + 2) }.wrapping_sub(b'0');
98 if d2 > 9 {
99 integer = integer
100 .wrapping_mul(100)
101 .wrapping_add(d0 as u64 * 10 + d1 as u64);
102 i += 2;
103 has_digits = true;
104 break;
105 }
106 let d3 = unsafe { *s.get_unchecked(i + 3) }.wrapping_sub(b'0');
107 if d3 > 9 {
108 integer = integer
109 .wrapping_mul(1000)
110 .wrapping_add(d0 as u64 * 100 + d1 as u64 * 10 + d2 as u64);
111 i += 3;
112 has_digits = true;
113 break;
114 }
115 integer = integer
116 .wrapping_mul(10000)
117 .wrapping_add(d0 as u64 * 1000 + d1 as u64 * 100 + d2 as u64 * 10 + d3 as u64);
118 i += 4;
119 has_digits = true;
120 }
121
122 while i < s.len() {
124 let d = unsafe { *s.get_unchecked(i) }.wrapping_sub(b'0');
125 if d > 9 {
126 break;
127 }
128 integer = integer.wrapping_mul(10).wrapping_add(d as u64);
129 has_digits = true;
130 i += 1;
131 }
132
133 if i < s.len() && unsafe { *s.get_unchecked(i) } == b'.' {
135 i += 1;
136 let frac_start = i;
137 let mut frac_val: u64 = 0;
138 while i < s.len() {
139 let d = unsafe { *s.get_unchecked(i) }.wrapping_sub(b'0');
140 if d > 9 {
141 break;
142 }
143 frac_val = frac_val.wrapping_mul(10).wrapping_add(d as u64);
144 has_digits = true;
145 i += 1;
146 }
147 if !has_digits {
148 return 0.0;
149 }
150 let frac_digits = i - frac_start;
151 let result = if frac_digits > 0 {
152 let divisor = POW10[frac_digits.min(POW10.len() - 1)];
154 integer as f64 + frac_val as f64 / divisor
155 } else {
156 integer as f64
157 };
158 return if negative { -result } else { result };
159 }
160
161 if !has_digits {
162 return 0.0;
163 }
164
165 let result = integer as f64;
166 if negative { -result } else { result }
167}
168
169const POW10: [f64; 20] = [
171 1.0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16,
172 1e17, 1e18, 1e19,
173];
174
175#[inline]
182pub fn try_parse_integer(s: &[u8]) -> Option<i64> {
183 let s = skip_leading_blanks(s);
184 if s.is_empty() {
185 return Some(0);
186 }
187
188 let mut i = 0;
189 let negative = if unsafe { *s.get_unchecked(i) } == b'-' {
190 i += 1;
191 true
192 } else {
193 if i < s.len() && unsafe { *s.get_unchecked(i) } == b'+' {
194 i += 1;
195 }
196 false
197 };
198
199 let mut value: i64 = 0;
201 let mut has_digits = false;
202
203 while i + 4 <= s.len() {
205 let d0 = unsafe { *s.get_unchecked(i) }.wrapping_sub(b'0');
206 if d0 > 9 {
207 break;
208 }
209 let d1 = unsafe { *s.get_unchecked(i + 1) }.wrapping_sub(b'0');
210 if d1 > 9 {
211 value = value.wrapping_mul(10).wrapping_add(d0 as i64);
212 i += 1;
213 has_digits = true;
214 break;
215 }
216 let d2 = unsafe { *s.get_unchecked(i + 2) }.wrapping_sub(b'0');
217 if d2 > 9 {
218 value = value
219 .wrapping_mul(100)
220 .wrapping_add(d0 as i64 * 10 + d1 as i64);
221 i += 2;
222 has_digits = true;
223 break;
224 }
225 let d3 = unsafe { *s.get_unchecked(i + 3) }.wrapping_sub(b'0');
226 if d3 > 9 {
227 value = value
228 .wrapping_mul(1000)
229 .wrapping_add(d0 as i64 * 100 + d1 as i64 * 10 + d2 as i64);
230 i += 3;
231 has_digits = true;
232 break;
233 }
234 value = value
235 .wrapping_mul(10000)
236 .wrapping_add(d0 as i64 * 1000 + d1 as i64 * 100 + d2 as i64 * 10 + d3 as i64);
237 i += 4;
238 has_digits = true;
239 }
240
241 while i < s.len() {
243 let d = unsafe { *s.get_unchecked(i) }.wrapping_sub(b'0');
244 if d > 9 {
245 break;
246 }
247 value = value.wrapping_mul(10).wrapping_add(d as i64);
248 has_digits = true;
249 i += 1;
250 }
251
252 if i < s.len() && unsafe { *s.get_unchecked(i) } == b'.' {
254 return None;
255 }
256
257 if !has_digits {
258 return Some(0);
259 }
260
261 Some(if negative {
262 value.wrapping_neg()
263 } else {
264 value
265 })
266}
267
268#[inline]
271pub fn int_to_sortable_u64(v: i64) -> u64 {
272 (v as u64).wrapping_add(0x8000000000000000)
273}
274
275fn find_numeric_end(s: &[u8]) -> usize {
276 let mut i = 0;
277 if i < s.len() && (s[i] == b'+' || s[i] == b'-') {
278 i += 1;
279 }
280 let mut has_digits = false;
281 while i < s.len() && s[i].is_ascii_digit() {
282 i += 1;
283 has_digits = true;
284 }
285 if i < s.len() && s[i] == b'.' {
286 i += 1;
287 while i < s.len() && s[i].is_ascii_digit() {
288 i += 1;
289 has_digits = true;
290 }
291 }
292 if has_digits { i } else { 0 }
293}
294
295pub fn compare_general_numeric(a: &[u8], b: &[u8]) -> Ordering {
298 let va = parse_general_numeric(a);
299 let vb = parse_general_numeric(b);
300 match (va.is_nan(), vb.is_nan()) {
301 (true, true) => Ordering::Equal,
302 (true, false) => Ordering::Less,
303 (false, true) => Ordering::Greater,
304 (false, false) => va.partial_cmp(&vb).unwrap_or(Ordering::Equal),
305 }
306}
307
308pub fn parse_general_numeric(s: &[u8]) -> f64 {
309 let s = skip_leading_blanks(s);
310 if s.is_empty() {
311 return f64::NAN;
312 }
313
314 let mut i = 0;
316
317 let start = if i < s.len() && (s[i] == b'+' || s[i] == b'-') {
319 i += 1;
320 i - 1
321 } else {
322 i
323 };
324
325 if i + 2 < s.len() {
327 let c0 = s[i].to_ascii_lowercase();
328 let c1 = s[i + 1].to_ascii_lowercase();
329 let c2 = s[i + 2].to_ascii_lowercase();
330 if (c0 == b'i' && c1 == b'n' && c2 == b'f') || (c0 == b'n' && c1 == b'a' && c2 == b'n') {
331 let end = s.len().min(i + 8); for e in (i + 3..=end).rev() {
334 if let Ok(text) = std::str::from_utf8(&s[start..e]) {
335 if let Ok(v) = text.parse::<f64>() {
336 return v;
337 }
338 }
339 }
340 return f64::NAN;
341 }
342 }
343
344 i = start;
346 if i < s.len() && (s[i] == b'+' || s[i] == b'-') {
347 i += 1;
348 }
349
350 let mut has_digits = false;
352 while i < s.len() && s[i].is_ascii_digit() {
353 i += 1;
354 has_digits = true;
355 }
356 if i < s.len() && s[i] == b'.' {
358 i += 1;
359 while i < s.len() && s[i].is_ascii_digit() {
360 i += 1;
361 has_digits = true;
362 }
363 }
364 if !has_digits {
365 return f64::NAN;
366 }
367 if i < s.len() && (s[i] == b'e' || s[i] == b'E') {
369 let save = i;
370 i += 1;
371 if i < s.len() && (s[i] == b'+' || s[i] == b'-') {
372 i += 1;
373 }
374 if i < s.len() && s[i].is_ascii_digit() {
375 while i < s.len() && s[i].is_ascii_digit() {
376 i += 1;
377 }
378 } else {
379 i = save;
380 }
381 }
382
383 std::str::from_utf8(&s[start..i])
385 .ok()
386 .and_then(|s| s.parse::<f64>().ok())
387 .unwrap_or(f64::NAN)
388}
389
390pub fn compare_human_numeric(a: &[u8], b: &[u8]) -> Ordering {
392 let va = parse_human_numeric(a);
393 let vb = parse_human_numeric(b);
394 va.partial_cmp(&vb).unwrap_or(Ordering::Equal)
395}
396
397pub fn parse_human_numeric(s: &[u8]) -> f64 {
398 let s = skip_leading_blanks(s);
399 if s.is_empty() {
400 return 0.0;
401 }
402
403 let base = parse_numeric_value(s);
405 let end = find_numeric_end(s);
406
407 if end < s.len() {
408 let multiplier = match s[end] {
409 b'K' | b'k' => 1e3,
410 b'M' => 1e6,
411 b'G' => 1e9,
412 b'T' => 1e12,
413 b'P' => 1e15,
414 b'E' => 1e18,
415 b'Z' => 1e21,
416 b'Y' => 1e24,
417 _ => 1.0,
418 };
419 base * multiplier
420 } else {
421 base
422 }
423}
424
425pub fn compare_month(a: &[u8], b: &[u8]) -> Ordering {
427 let ma = parse_month(a);
428 let mb = parse_month(b);
429 ma.cmp(&mb)
430}
431
432fn parse_month(s: &[u8]) -> u8 {
433 let s = skip_leading_blanks(s);
434 if s.len() < 3 {
435 return 0;
436 }
437 let m = [
438 s[0].to_ascii_uppercase(),
439 s[1].to_ascii_uppercase(),
440 s[2].to_ascii_uppercase(),
441 ];
442 match &m {
443 b"JAN" => 1,
444 b"FEB" => 2,
445 b"MAR" => 3,
446 b"APR" => 4,
447 b"MAY" => 5,
448 b"JUN" => 6,
449 b"JUL" => 7,
450 b"AUG" => 8,
451 b"SEP" => 9,
452 b"OCT" => 10,
453 b"NOV" => 11,
454 b"DEC" => 12,
455 _ => 0,
456 }
457}
458
459pub fn compare_version(a: &[u8], b: &[u8]) -> Ordering {
462 let mut ai = 0usize;
463 let mut bi = 0usize;
464
465 loop {
466 if ai >= a.len() && bi >= b.len() {
467 return Ordering::Equal;
468 }
469 if ai >= a.len() {
470 return Ordering::Less;
471 }
472 if bi >= b.len() {
473 return Ordering::Greater;
474 }
475
476 let ac = a[ai];
477 let bc = b[bi];
478
479 if ac.is_ascii_digit() && bc.is_ascii_digit() {
480 let anum = consume_number_bytes(a, &mut ai);
481 let bnum = consume_number_bytes(b, &mut bi);
482 match anum.cmp(&bnum) {
483 Ordering::Equal => continue,
484 other => return other,
485 }
486 } else {
487 match ac.cmp(&bc) {
488 Ordering::Equal => {
489 ai += 1;
490 bi += 1;
491 }
492 other => return other,
493 }
494 }
495 }
496}
497
498#[inline]
499fn consume_number_bytes(data: &[u8], pos: &mut usize) -> u64 {
500 let mut n: u64 = 0;
501 while *pos < data.len() && data[*pos].is_ascii_digit() {
502 n = n
503 .saturating_mul(10)
504 .saturating_add((data[*pos] - b'0') as u64);
505 *pos += 1;
506 }
507 n
508}
509
510pub fn compare_random(a: &[u8], b: &[u8], seed: u64) -> Ordering {
512 let ha = fnv1a_hash(a, seed);
513 let hb = fnv1a_hash(b, seed);
514 ha.cmp(&hb)
515}
516
517#[inline]
519fn fnv1a_hash(data: &[u8], seed: u64) -> u64 {
520 let mut hash = 0xcbf29ce484222325u64 ^ seed;
521 for &b in data {
522 hash ^= b as u64;
523 hash = hash.wrapping_mul(0x100000001b3);
524 }
525 hash
526}
527
528#[inline]
531fn is_dict_char(b: u8) -> bool {
532 b.is_ascii_alphanumeric() || b == b' ' || b == b'\t'
533}
534
535#[inline]
536fn is_printable(b: u8) -> bool {
537 b >= 0x20 && b < 0x7f
538}
539
540fn compare_text_filtered(
541 a: &[u8],
542 b: &[u8],
543 dict: bool,
544 no_print: bool,
545 fold_case: bool,
546) -> Ordering {
547 if !dict && !no_print && !fold_case {
548 return a.cmp(b);
549 }
550
551 let mut ai = a.iter().copied();
552 let mut bi = b.iter().copied();
553
554 loop {
555 let na = next_valid(&mut ai, dict, no_print);
556 let nb = next_valid(&mut bi, dict, no_print);
557 match (na, nb) {
558 (Some(ab), Some(bb)) => {
559 let ca = if fold_case {
560 ab.to_ascii_uppercase()
561 } else {
562 ab
563 };
564 let cb = if fold_case {
565 bb.to_ascii_uppercase()
566 } else {
567 bb
568 };
569 match ca.cmp(&cb) {
570 Ordering::Equal => continue,
571 other => return other,
572 }
573 }
574 (Some(_), None) => return Ordering::Greater,
575 (None, Some(_)) => return Ordering::Less,
576 (None, None) => return Ordering::Equal,
577 }
578 }
579}
580
581#[inline]
582fn next_valid(iter: &mut impl Iterator<Item = u8>, dict: bool, no_print: bool) -> Option<u8> {
583 loop {
584 match iter.next() {
585 None => return None,
586 Some(b) => {
587 if dict && !is_dict_char(b) {
588 continue;
589 }
590 if no_print && !is_printable(b) {
591 continue;
592 }
593 return Some(b);
594 }
595 }
596 }
597}
598
599pub fn compare_ignore_case(a: &[u8], b: &[u8]) -> Ordering {
603 let alen = a.len();
604 let blen = b.len();
605 let min_len = alen.min(blen);
606 let ap = a.as_ptr();
607 let bp = b.as_ptr();
608 let mut i = 0usize;
610 while i < min_len {
611 let ca = unsafe { (*ap.add(i)).to_ascii_uppercase() };
612 let cb = unsafe { (*bp.add(i)).to_ascii_uppercase() };
613 if ca != cb {
614 return ca.cmp(&cb);
615 }
616 i += 1;
617 }
618 alen.cmp(&blen)
619}
620
621pub fn compare_dictionary(a: &[u8], b: &[u8], ignore_case: bool) -> Ordering {
622 compare_text_filtered(a, b, true, false, ignore_case)
623}
624
625pub fn compare_ignore_nonprinting(a: &[u8], b: &[u8], ignore_case: bool) -> Ordering {
626 compare_text_filtered(a, b, false, true, ignore_case)
627}
628
629pub fn compare_with_opts(a: &[u8], b: &[u8], opts: &KeyOpts, random_seed: u64) -> Ordering {
631 let a = if opts.ignore_leading_blanks {
632 skip_leading_blanks(a)
633 } else {
634 a
635 };
636 let b = if opts.ignore_leading_blanks {
637 skip_leading_blanks(b)
638 } else {
639 b
640 };
641
642 let result = if opts.numeric {
643 compare_numeric(a, b)
644 } else if opts.general_numeric {
645 compare_general_numeric(a, b)
646 } else if opts.human_numeric {
647 compare_human_numeric(a, b)
648 } else if opts.month {
649 compare_month(a, b)
650 } else if opts.version {
651 compare_version(a, b)
652 } else if opts.random {
653 compare_random(a, b, random_seed)
654 } else if opts.dictionary_order || opts.ignore_nonprinting || opts.ignore_case {
655 compare_text_filtered(
656 a,
657 b,
658 opts.dictionary_order,
659 opts.ignore_nonprinting,
660 opts.ignore_case,
661 )
662 } else {
663 compare_locale(a, b)
665 };
666
667 if opts.reverse {
668 result.reverse()
669 } else {
670 result
671 }
672}
673
674pub type CompareFn = fn(&[u8], &[u8]) -> Ordering;
677
678pub fn select_comparator(opts: &KeyOpts, random_seed: u64) -> (CompareFn, bool, bool) {
683 let needs_blank = opts.ignore_leading_blanks;
684 let needs_reverse = opts.reverse;
685
686 let cmp: CompareFn = if opts.numeric {
687 compare_numeric
688 } else if opts.general_numeric {
689 compare_general_numeric
690 } else if opts.human_numeric {
691 compare_human_numeric
692 } else if opts.month {
693 compare_month
694 } else if opts.version {
695 compare_version
696 } else if opts.random {
697 return (
700 make_random_comparator(random_seed),
701 needs_blank,
702 needs_reverse,
703 );
704 } else if opts.dictionary_order || opts.ignore_nonprinting || opts.ignore_case {
705 match (
707 opts.dictionary_order,
708 opts.ignore_nonprinting,
709 opts.ignore_case,
710 ) {
711 (false, false, true) => compare_ignore_case,
712 (true, false, false) => |a: &[u8], b: &[u8]| compare_dictionary(a, b, false),
713 (true, false, true) => |a: &[u8], b: &[u8]| compare_dictionary(a, b, true),
714 (false, true, false) => |a: &[u8], b: &[u8]| compare_ignore_nonprinting(a, b, false),
715 (false, true, true) => |a: &[u8], b: &[u8]| compare_ignore_nonprinting(a, b, true),
716 (true, true, false) => {
717 |a: &[u8], b: &[u8]| compare_text_filtered(a, b, true, true, false)
718 }
719 (true, true, true) => {
720 |a: &[u8], b: &[u8]| compare_text_filtered(a, b, true, true, true)
721 }
722 _ => |a: &[u8], b: &[u8]| a.cmp(b),
723 }
724 } else {
725 compare_locale
727 };
728
729 (cmp, needs_blank, needs_reverse)
730}
731
732fn make_random_comparator(seed: u64) -> CompareFn {
733 RANDOM_SEED.store(seed, std::sync::atomic::Ordering::Relaxed);
736 random_compare_with_static_seed
737}
738
739static RANDOM_SEED: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0);
740
741fn random_compare_with_static_seed(a: &[u8], b: &[u8]) -> Ordering {
742 let seed = RANDOM_SEED.load(std::sync::atomic::Ordering::Relaxed);
743 compare_random(a, b, seed)
744}