1use std::cmp::Ordering;
4
5use super::key::KeyOpts;
6
7#[inline]
9pub fn skip_leading_blanks(s: &[u8]) -> &[u8] {
10 let mut i = 0;
11 while i < s.len() && (s[i] == b' ' || s[i] == b'\t') {
12 i += 1;
13 }
14 &s[i..]
15}
16
17#[inline]
19pub fn compare_lexical(a: &[u8], b: &[u8]) -> Ordering {
20 a.cmp(b)
21}
22
23pub fn compare_numeric(a: &[u8], b: &[u8]) -> Ordering {
26 let va = parse_numeric_value(a);
27 let vb = parse_numeric_value(b);
28 va.partial_cmp(&vb).unwrap_or(Ordering::Equal)
29}
30
31pub fn parse_numeric_value(s: &[u8]) -> f64 {
32 let s = skip_leading_blanks(s);
33 if s.is_empty() {
34 return 0.0;
35 }
36 let end = find_numeric_end(s);
37 if end == 0 {
38 return 0.0;
39 }
40 match std::str::from_utf8(&s[..end]) {
41 Ok(num_str) => num_str.parse::<f64>().unwrap_or(0.0),
42 Err(_) => 0.0,
43 }
44}
45
46fn find_numeric_end(s: &[u8]) -> usize {
47 let mut i = 0;
48 if i < s.len() && (s[i] == b'+' || s[i] == b'-') {
49 i += 1;
50 }
51 let mut has_digits = false;
52 while i < s.len() && s[i].is_ascii_digit() {
53 i += 1;
54 has_digits = true;
55 }
56 if i < s.len() && s[i] == b'.' {
57 i += 1;
58 while i < s.len() && s[i].is_ascii_digit() {
59 i += 1;
60 has_digits = true;
61 }
62 }
63 if has_digits { i } else { 0 }
64}
65
66pub fn compare_general_numeric(a: &[u8], b: &[u8]) -> Ordering {
69 let va = parse_general_numeric(a);
70 let vb = parse_general_numeric(b);
71 match (va.is_nan(), vb.is_nan()) {
72 (true, true) => Ordering::Equal,
73 (true, false) => Ordering::Less,
74 (false, true) => Ordering::Greater,
75 (false, false) => va.partial_cmp(&vb).unwrap_or(Ordering::Equal),
76 }
77}
78
79pub fn parse_general_numeric(s: &[u8]) -> f64 {
80 let s = skip_leading_blanks(s);
81 let s_str = match std::str::from_utf8(s) {
82 Ok(s) => s.trim(),
83 Err(_) => return f64::NAN,
84 };
85 if s_str.is_empty() {
86 return f64::NAN;
87 }
88 if let Ok(v) = s_str.parse::<f64>() {
90 return v;
91 }
92
93 let bytes = s_str.as_bytes();
95 let mut i = 0;
96
97 if i < bytes.len() && (bytes[i] == b'+' || bytes[i] == b'-') {
99 i += 1;
100 }
101 let mut has_digits = false;
103 while i < bytes.len() && bytes[i].is_ascii_digit() {
104 i += 1;
105 has_digits = true;
106 }
107 if i < bytes.len() && bytes[i] == b'.' {
109 i += 1;
110 while i < bytes.len() && bytes[i].is_ascii_digit() {
111 i += 1;
112 has_digits = true;
113 }
114 }
115 if !has_digits {
116 return f64::NAN;
117 }
118 if i < bytes.len() && (bytes[i] == b'e' || bytes[i] == b'E') {
120 let save = i;
121 i += 1;
122 if i < bytes.len() && (bytes[i] == b'+' || bytes[i] == b'-') {
123 i += 1;
124 }
125 if i < bytes.len() && bytes[i].is_ascii_digit() {
126 while i < bytes.len() && bytes[i].is_ascii_digit() {
127 i += 1;
128 }
129 } else {
130 i = save;
131 }
132 }
133
134 s_str[..i].parse::<f64>().unwrap_or(f64::NAN)
135}
136
137pub fn compare_human_numeric(a: &[u8], b: &[u8]) -> Ordering {
139 let va = parse_human_numeric(a);
140 let vb = parse_human_numeric(b);
141 va.partial_cmp(&vb).unwrap_or(Ordering::Equal)
142}
143
144pub fn parse_human_numeric(s: &[u8]) -> f64 {
145 let s = skip_leading_blanks(s);
146 if s.is_empty() {
147 return 0.0;
148 }
149 let end = find_numeric_end(s);
150 let base = if end == 0 {
151 0.0
152 } else {
153 match std::str::from_utf8(&s[..end]) {
154 Ok(num_str) => num_str.parse::<f64>().unwrap_or(0.0),
155 Err(_) => 0.0,
156 }
157 };
158 if end < s.len() {
159 let multiplier = match s[end] {
160 b'K' | b'k' => 1e3,
161 b'M' => 1e6,
162 b'G' => 1e9,
163 b'T' => 1e12,
164 b'P' => 1e15,
165 b'E' => 1e18,
166 b'Z' => 1e21,
167 b'Y' => 1e24,
168 _ => 1.0,
169 };
170 base * multiplier
171 } else {
172 base
173 }
174}
175
176pub fn compare_month(a: &[u8], b: &[u8]) -> Ordering {
178 let ma = parse_month(a);
179 let mb = parse_month(b);
180 ma.cmp(&mb)
181}
182
183fn parse_month(s: &[u8]) -> u8 {
184 let s = skip_leading_blanks(s);
185 if s.len() < 3 {
186 return 0;
187 }
188 let m = [
189 s[0].to_ascii_uppercase(),
190 s[1].to_ascii_uppercase(),
191 s[2].to_ascii_uppercase(),
192 ];
193 match &m {
194 b"JAN" => 1,
195 b"FEB" => 2,
196 b"MAR" => 3,
197 b"APR" => 4,
198 b"MAY" => 5,
199 b"JUN" => 6,
200 b"JUL" => 7,
201 b"AUG" => 8,
202 b"SEP" => 9,
203 b"OCT" => 10,
204 b"NOV" => 11,
205 b"DEC" => 12,
206 _ => 0,
207 }
208}
209
210pub fn compare_version(a: &[u8], b: &[u8]) -> Ordering {
212 let a_str = std::str::from_utf8(a).unwrap_or("");
213 let b_str = std::str::from_utf8(b).unwrap_or("");
214 compare_version_str(a_str, b_str)
215}
216
217fn compare_version_str(a: &str, b: &str) -> Ordering {
218 let mut ai = a.chars().peekable();
219 let mut bi = b.chars().peekable();
220
221 loop {
222 match (ai.peek(), bi.peek()) {
223 (None, None) => return Ordering::Equal,
224 (None, Some(_)) => return Ordering::Less,
225 (Some(_), None) => return Ordering::Greater,
226 (Some(&ac), Some(&bc)) => {
227 if ac.is_ascii_digit() && bc.is_ascii_digit() {
228 let anum = consume_number(&mut ai);
229 let bnum = consume_number(&mut bi);
230 match anum.cmp(&bnum) {
231 Ordering::Equal => continue,
232 other => return other,
233 }
234 } else {
235 match ac.cmp(&bc) {
236 Ordering::Equal => {
237 ai.next();
238 bi.next();
239 }
240 other => return other,
241 }
242 }
243 }
244 }
245 }
246}
247
248fn consume_number(iter: &mut std::iter::Peekable<std::str::Chars>) -> u64 {
249 let mut n: u64 = 0;
250 while let Some(&c) = iter.peek() {
251 if c.is_ascii_digit() {
252 n = n.saturating_mul(10).saturating_add(c as u64 - '0' as u64);
253 iter.next();
254 } else {
255 break;
256 }
257 }
258 n
259}
260
261pub fn compare_random(a: &[u8], b: &[u8], seed: u64) -> Ordering {
263 let ha = fnv1a_hash(a, seed);
264 let hb = fnv1a_hash(b, seed);
265 ha.cmp(&hb)
266}
267
268#[inline]
270fn fnv1a_hash(data: &[u8], seed: u64) -> u64 {
271 let mut hash = 0xcbf29ce484222325u64 ^ seed;
272 for &b in data {
273 hash ^= b as u64;
274 hash = hash.wrapping_mul(0x100000001b3);
275 }
276 hash
277}
278
279#[inline]
282fn is_dict_char(b: u8) -> bool {
283 b.is_ascii_alphanumeric() || b == b' ' || b == b'\t'
284}
285
286#[inline]
287fn is_printable(b: u8) -> bool {
288 b >= 0x20 && b < 0x7f
289}
290
291fn compare_text_filtered(
292 a: &[u8],
293 b: &[u8],
294 dict: bool,
295 no_print: bool,
296 fold_case: bool,
297) -> Ordering {
298 if !dict && !no_print && !fold_case {
299 return a.cmp(b);
300 }
301
302 let mut ai = a.iter().copied();
303 let mut bi = b.iter().copied();
304
305 loop {
306 let na = next_valid(&mut ai, dict, no_print);
307 let nb = next_valid(&mut bi, dict, no_print);
308 match (na, nb) {
309 (Some(ab), Some(bb)) => {
310 let ca = if fold_case {
311 ab.to_ascii_uppercase()
312 } else {
313 ab
314 };
315 let cb = if fold_case {
316 bb.to_ascii_uppercase()
317 } else {
318 bb
319 };
320 match ca.cmp(&cb) {
321 Ordering::Equal => continue,
322 other => return other,
323 }
324 }
325 (Some(_), None) => return Ordering::Greater,
326 (None, Some(_)) => return Ordering::Less,
327 (None, None) => return Ordering::Equal,
328 }
329 }
330}
331
332#[inline]
333fn next_valid(iter: &mut impl Iterator<Item = u8>, dict: bool, no_print: bool) -> Option<u8> {
334 loop {
335 match iter.next() {
336 None => return None,
337 Some(b) => {
338 if dict && !is_dict_char(b) {
339 continue;
340 }
341 if no_print && !is_printable(b) {
342 continue;
343 }
344 return Some(b);
345 }
346 }
347 }
348}
349
350pub fn compare_ignore_case(a: &[u8], b: &[u8]) -> Ordering {
352 compare_text_filtered(a, b, false, false, true)
353}
354
355pub fn compare_dictionary(a: &[u8], b: &[u8], ignore_case: bool) -> Ordering {
356 compare_text_filtered(a, b, true, false, ignore_case)
357}
358
359pub fn compare_ignore_nonprinting(a: &[u8], b: &[u8], ignore_case: bool) -> Ordering {
360 compare_text_filtered(a, b, false, true, ignore_case)
361}
362
363pub fn compare_with_opts(a: &[u8], b: &[u8], opts: &KeyOpts, random_seed: u64) -> Ordering {
365 let a = if opts.ignore_leading_blanks {
366 skip_leading_blanks(a)
367 } else {
368 a
369 };
370 let b = if opts.ignore_leading_blanks {
371 skip_leading_blanks(b)
372 } else {
373 b
374 };
375
376 let result = if opts.numeric {
377 compare_numeric(a, b)
378 } else if opts.general_numeric {
379 compare_general_numeric(a, b)
380 } else if opts.human_numeric {
381 compare_human_numeric(a, b)
382 } else if opts.month {
383 compare_month(a, b)
384 } else if opts.version {
385 compare_version(a, b)
386 } else if opts.random {
387 compare_random(a, b, random_seed)
388 } else {
389 compare_text_filtered(
390 a,
391 b,
392 opts.dictionary_order,
393 opts.ignore_nonprinting,
394 opts.ignore_case,
395 )
396 };
397
398 if opts.reverse {
399 result.reverse()
400 } else {
401 result
402 }
403}