1use std::cmp::Ordering;
4
5use super::key::KeyOpts;
6
7#[inline]
9pub fn skip_leading_blanks(s: &[u8]) -> &[u8] {
10 let mut i = 0;
11 while i < s.len() && (s[i] == b' ' || s[i] == b'\t') {
12 i += 1;
13 }
14 &s[i..]
15}
16
17#[inline]
19pub fn compare_lexical(a: &[u8], b: &[u8]) -> Ordering {
20 a.cmp(b)
21}
22
23pub fn compare_numeric(a: &[u8], b: &[u8]) -> Ordering {
26 let va = parse_numeric_value(a);
27 let vb = parse_numeric_value(b);
28 va.partial_cmp(&vb).unwrap_or(Ordering::Equal)
29}
30
31pub fn parse_numeric_value(s: &[u8]) -> f64 {
34 let s = skip_leading_blanks(s);
35 if s.is_empty() {
36 return 0.0;
37 }
38
39 let mut i = 0;
40 let negative = if s[i] == b'-' {
41 i += 1;
42 true
43 } else {
44 if s[i] == b'+' {
45 i += 1;
46 }
47 false
48 };
49
50 let mut integer: u64 = 0;
52 let mut has_digits = false;
53 while i < s.len() && s[i].is_ascii_digit() {
54 integer = integer.wrapping_mul(10).wrapping_add((s[i] - b'0') as u64);
55 has_digits = true;
56 i += 1;
57 }
58
59 if i < s.len() && s[i] == b'.' {
61 i += 1;
62 let frac_start = i;
63 let mut frac_val: u64 = 0;
64 while i < s.len() && s[i].is_ascii_digit() {
65 frac_val = frac_val.wrapping_mul(10).wrapping_add((s[i] - b'0') as u64);
66 has_digits = true;
67 i += 1;
68 }
69 if !has_digits {
70 return 0.0;
71 }
72 let frac_digits = i - frac_start;
73 let result = if frac_digits > 0 {
74 let divisor = POW10[frac_digits.min(POW10.len() - 1)];
76 integer as f64 + frac_val as f64 / divisor
77 } else {
78 integer as f64
79 };
80 return if negative { -result } else { result };
81 }
82
83 if !has_digits {
84 return 0.0;
85 }
86
87 let result = integer as f64;
88 if negative { -result } else { result }
89}
90
91const POW10: [f64; 20] = [
93 1.0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16,
94 1e17, 1e18, 1e19,
95];
96
97#[inline]
102pub fn try_parse_integer(s: &[u8]) -> Option<i64> {
103 let s = skip_leading_blanks(s);
104 if s.is_empty() {
105 return Some(0);
106 }
107
108 let mut i = 0;
109 let negative = if s[i] == b'-' {
110 i += 1;
111 true
112 } else {
113 if s[i] == b'+' {
114 i += 1;
115 }
116 false
117 };
118
119 let mut value: i64 = 0;
121 let mut has_digits = false;
122 while i < s.len() && s[i].is_ascii_digit() {
123 value = value.checked_mul(10)?.checked_add((s[i] - b'0') as i64)?;
125 has_digits = true;
126 i += 1;
127 }
128
129 if i < s.len() && s[i] == b'.' {
131 return None;
132 }
133
134 if !has_digits {
135 return Some(0);
136 }
137
138 Some(if negative { -value } else { value })
139}
140
141#[inline]
144pub fn int_to_sortable_u64(v: i64) -> u64 {
145 (v as u64).wrapping_add(0x8000000000000000)
146}
147
148fn find_numeric_end(s: &[u8]) -> usize {
149 let mut i = 0;
150 if i < s.len() && (s[i] == b'+' || s[i] == b'-') {
151 i += 1;
152 }
153 let mut has_digits = false;
154 while i < s.len() && s[i].is_ascii_digit() {
155 i += 1;
156 has_digits = true;
157 }
158 if i < s.len() && s[i] == b'.' {
159 i += 1;
160 while i < s.len() && s[i].is_ascii_digit() {
161 i += 1;
162 has_digits = true;
163 }
164 }
165 if has_digits { i } else { 0 }
166}
167
168pub fn compare_general_numeric(a: &[u8], b: &[u8]) -> Ordering {
171 let va = parse_general_numeric(a);
172 let vb = parse_general_numeric(b);
173 match (va.is_nan(), vb.is_nan()) {
174 (true, true) => Ordering::Equal,
175 (true, false) => Ordering::Less,
176 (false, true) => Ordering::Greater,
177 (false, false) => va.partial_cmp(&vb).unwrap_or(Ordering::Equal),
178 }
179}
180
181pub fn parse_general_numeric(s: &[u8]) -> f64 {
182 let s = skip_leading_blanks(s);
183 if s.is_empty() {
184 return f64::NAN;
185 }
186
187 let mut i = 0;
189
190 let start = if i < s.len() && (s[i] == b'+' || s[i] == b'-') {
192 i += 1;
193 i - 1
194 } else {
195 i
196 };
197
198 if i + 2 < s.len() {
200 let c0 = s[i].to_ascii_lowercase();
201 let c1 = s[i + 1].to_ascii_lowercase();
202 let c2 = s[i + 2].to_ascii_lowercase();
203 if (c0 == b'i' && c1 == b'n' && c2 == b'f') || (c0 == b'n' && c1 == b'a' && c2 == b'n') {
204 let end = s.len().min(i + 8); for e in (i + 3..=end).rev() {
207 if let Ok(text) = std::str::from_utf8(&s[start..e]) {
208 if let Ok(v) = text.parse::<f64>() {
209 return v;
210 }
211 }
212 }
213 return f64::NAN;
214 }
215 }
216
217 i = start;
219 if i < s.len() && (s[i] == b'+' || s[i] == b'-') {
220 i += 1;
221 }
222
223 let mut has_digits = false;
225 while i < s.len() && s[i].is_ascii_digit() {
226 i += 1;
227 has_digits = true;
228 }
229 if i < s.len() && s[i] == b'.' {
231 i += 1;
232 while i < s.len() && s[i].is_ascii_digit() {
233 i += 1;
234 has_digits = true;
235 }
236 }
237 if !has_digits {
238 return f64::NAN;
239 }
240 if i < s.len() && (s[i] == b'e' || s[i] == b'E') {
242 let save = i;
243 i += 1;
244 if i < s.len() && (s[i] == b'+' || s[i] == b'-') {
245 i += 1;
246 }
247 if i < s.len() && s[i].is_ascii_digit() {
248 while i < s.len() && s[i].is_ascii_digit() {
249 i += 1;
250 }
251 } else {
252 i = save;
253 }
254 }
255
256 std::str::from_utf8(&s[start..i])
258 .ok()
259 .and_then(|s| s.parse::<f64>().ok())
260 .unwrap_or(f64::NAN)
261}
262
263pub fn compare_human_numeric(a: &[u8], b: &[u8]) -> Ordering {
265 let va = parse_human_numeric(a);
266 let vb = parse_human_numeric(b);
267 va.partial_cmp(&vb).unwrap_or(Ordering::Equal)
268}
269
270pub fn parse_human_numeric(s: &[u8]) -> f64 {
271 let s = skip_leading_blanks(s);
272 if s.is_empty() {
273 return 0.0;
274 }
275
276 let base = parse_numeric_value(s);
278 let end = find_numeric_end(s);
279
280 if end < s.len() {
281 let multiplier = match s[end] {
282 b'K' | b'k' => 1e3,
283 b'M' => 1e6,
284 b'G' => 1e9,
285 b'T' => 1e12,
286 b'P' => 1e15,
287 b'E' => 1e18,
288 b'Z' => 1e21,
289 b'Y' => 1e24,
290 _ => 1.0,
291 };
292 base * multiplier
293 } else {
294 base
295 }
296}
297
298pub fn compare_month(a: &[u8], b: &[u8]) -> Ordering {
300 let ma = parse_month(a);
301 let mb = parse_month(b);
302 ma.cmp(&mb)
303}
304
305fn parse_month(s: &[u8]) -> u8 {
306 let s = skip_leading_blanks(s);
307 if s.len() < 3 {
308 return 0;
309 }
310 let m = [
311 s[0].to_ascii_uppercase(),
312 s[1].to_ascii_uppercase(),
313 s[2].to_ascii_uppercase(),
314 ];
315 match &m {
316 b"JAN" => 1,
317 b"FEB" => 2,
318 b"MAR" => 3,
319 b"APR" => 4,
320 b"MAY" => 5,
321 b"JUN" => 6,
322 b"JUL" => 7,
323 b"AUG" => 8,
324 b"SEP" => 9,
325 b"OCT" => 10,
326 b"NOV" => 11,
327 b"DEC" => 12,
328 _ => 0,
329 }
330}
331
332pub fn compare_version(a: &[u8], b: &[u8]) -> Ordering {
335 let mut ai = 0usize;
336 let mut bi = 0usize;
337
338 loop {
339 if ai >= a.len() && bi >= b.len() {
340 return Ordering::Equal;
341 }
342 if ai >= a.len() {
343 return Ordering::Less;
344 }
345 if bi >= b.len() {
346 return Ordering::Greater;
347 }
348
349 let ac = a[ai];
350 let bc = b[bi];
351
352 if ac.is_ascii_digit() && bc.is_ascii_digit() {
353 let anum = consume_number_bytes(a, &mut ai);
354 let bnum = consume_number_bytes(b, &mut bi);
355 match anum.cmp(&bnum) {
356 Ordering::Equal => continue,
357 other => return other,
358 }
359 } else {
360 match ac.cmp(&bc) {
361 Ordering::Equal => {
362 ai += 1;
363 bi += 1;
364 }
365 other => return other,
366 }
367 }
368 }
369}
370
371#[inline]
372fn consume_number_bytes(data: &[u8], pos: &mut usize) -> u64 {
373 let mut n: u64 = 0;
374 while *pos < data.len() && data[*pos].is_ascii_digit() {
375 n = n
376 .saturating_mul(10)
377 .saturating_add((data[*pos] - b'0') as u64);
378 *pos += 1;
379 }
380 n
381}
382
383pub fn compare_random(a: &[u8], b: &[u8], seed: u64) -> Ordering {
385 let ha = fnv1a_hash(a, seed);
386 let hb = fnv1a_hash(b, seed);
387 ha.cmp(&hb)
388}
389
390#[inline]
392fn fnv1a_hash(data: &[u8], seed: u64) -> u64 {
393 let mut hash = 0xcbf29ce484222325u64 ^ seed;
394 for &b in data {
395 hash ^= b as u64;
396 hash = hash.wrapping_mul(0x100000001b3);
397 }
398 hash
399}
400
401#[inline]
404fn is_dict_char(b: u8) -> bool {
405 b.is_ascii_alphanumeric() || b == b' ' || b == b'\t'
406}
407
408#[inline]
409fn is_printable(b: u8) -> bool {
410 b >= 0x20 && b < 0x7f
411}
412
413fn compare_text_filtered(
414 a: &[u8],
415 b: &[u8],
416 dict: bool,
417 no_print: bool,
418 fold_case: bool,
419) -> Ordering {
420 if !dict && !no_print && !fold_case {
421 return a.cmp(b);
422 }
423
424 let mut ai = a.iter().copied();
425 let mut bi = b.iter().copied();
426
427 loop {
428 let na = next_valid(&mut ai, dict, no_print);
429 let nb = next_valid(&mut bi, dict, no_print);
430 match (na, nb) {
431 (Some(ab), Some(bb)) => {
432 let ca = if fold_case {
433 ab.to_ascii_uppercase()
434 } else {
435 ab
436 };
437 let cb = if fold_case {
438 bb.to_ascii_uppercase()
439 } else {
440 bb
441 };
442 match ca.cmp(&cb) {
443 Ordering::Equal => continue,
444 other => return other,
445 }
446 }
447 (Some(_), None) => return Ordering::Greater,
448 (None, Some(_)) => return Ordering::Less,
449 (None, None) => return Ordering::Equal,
450 }
451 }
452}
453
454#[inline]
455fn next_valid(iter: &mut impl Iterator<Item = u8>, dict: bool, no_print: bool) -> Option<u8> {
456 loop {
457 match iter.next() {
458 None => return None,
459 Some(b) => {
460 if dict && !is_dict_char(b) {
461 continue;
462 }
463 if no_print && !is_printable(b) {
464 continue;
465 }
466 return Some(b);
467 }
468 }
469 }
470}
471
472pub fn compare_ignore_case(a: &[u8], b: &[u8]) -> Ordering {
474 compare_text_filtered(a, b, false, false, true)
475}
476
477pub fn compare_dictionary(a: &[u8], b: &[u8], ignore_case: bool) -> Ordering {
478 compare_text_filtered(a, b, true, false, ignore_case)
479}
480
481pub fn compare_ignore_nonprinting(a: &[u8], b: &[u8], ignore_case: bool) -> Ordering {
482 compare_text_filtered(a, b, false, true, ignore_case)
483}
484
485pub fn compare_with_opts(a: &[u8], b: &[u8], opts: &KeyOpts, random_seed: u64) -> Ordering {
487 let a = if opts.ignore_leading_blanks {
488 skip_leading_blanks(a)
489 } else {
490 a
491 };
492 let b = if opts.ignore_leading_blanks {
493 skip_leading_blanks(b)
494 } else {
495 b
496 };
497
498 let result = if opts.numeric {
499 compare_numeric(a, b)
500 } else if opts.general_numeric {
501 compare_general_numeric(a, b)
502 } else if opts.human_numeric {
503 compare_human_numeric(a, b)
504 } else if opts.month {
505 compare_month(a, b)
506 } else if opts.version {
507 compare_version(a, b)
508 } else if opts.random {
509 compare_random(a, b, random_seed)
510 } else {
511 compare_text_filtered(
512 a,
513 b,
514 opts.dictionary_order,
515 opts.ignore_nonprinting,
516 opts.ignore_case,
517 )
518 };
519
520 if opts.reverse {
521 result.reverse()
522 } else {
523 result
524 }
525}
526
527pub type CompareFn = fn(&[u8], &[u8]) -> Ordering;
530
531pub fn select_comparator(opts: &KeyOpts, random_seed: u64) -> (CompareFn, bool, bool) {
536 let needs_blank = opts.ignore_leading_blanks;
537 let needs_reverse = opts.reverse;
538
539 let cmp: CompareFn = if opts.numeric {
540 compare_numeric
541 } else if opts.general_numeric {
542 compare_general_numeric
543 } else if opts.human_numeric {
544 compare_human_numeric
545 } else if opts.month {
546 compare_month
547 } else if opts.version {
548 compare_version
549 } else if opts.random {
550 return (
553 make_random_comparator(random_seed),
554 needs_blank,
555 needs_reverse,
556 );
557 } else if opts.dictionary_order || opts.ignore_nonprinting || opts.ignore_case {
558 match (
560 opts.dictionary_order,
561 opts.ignore_nonprinting,
562 opts.ignore_case,
563 ) {
564 (false, false, true) => compare_ignore_case,
565 (true, false, false) => |a: &[u8], b: &[u8]| compare_dictionary(a, b, false),
566 (true, false, true) => |a: &[u8], b: &[u8]| compare_dictionary(a, b, true),
567 (false, true, false) => |a: &[u8], b: &[u8]| compare_ignore_nonprinting(a, b, false),
568 (false, true, true) => |a: &[u8], b: &[u8]| compare_ignore_nonprinting(a, b, true),
569 (true, true, false) => {
570 |a: &[u8], b: &[u8]| compare_text_filtered(a, b, true, true, false)
571 }
572 (true, true, true) => {
573 |a: &[u8], b: &[u8]| compare_text_filtered(a, b, true, true, true)
574 }
575 _ => |a: &[u8], b: &[u8]| a.cmp(b),
576 }
577 } else {
578 |a: &[u8], b: &[u8]| a.cmp(b)
579 };
580
581 (cmp, needs_blank, needs_reverse)
582}
583
584fn make_random_comparator(seed: u64) -> CompareFn {
585 RANDOM_SEED.store(seed, std::sync::atomic::Ordering::Relaxed);
588 random_compare_with_static_seed
589}
590
591static RANDOM_SEED: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0);
592
593fn random_compare_with_static_seed(a: &[u8], b: &[u8]) -> Ordering {
594 let seed = RANDOM_SEED.load(std::sync::atomic::Ordering::Relaxed);
595 compare_random(a, b, seed)
596}