1mod check;
13mod chunks;
14mod custom_str_cmp;
15mod ext_sort;
16mod merge;
17mod numeric_str_cmp;
18mod tmp_dir;
19
20use bigdecimal::BigDecimal;
21use chunks::LineData;
22use clap::builder::ValueParser;
23use clap::{Arg, ArgAction, Command};
24use custom_str_cmp::custom_str_cmp;
25use ext_sort::ext_sort;
26use fnv::FnvHasher;
27#[cfg(target_os = "linux")]
28use nix::libc::{RLIMIT_NOFILE, getrlimit, rlimit};
29use numeric_str_cmp::{NumInfo, NumInfoParseSettings, human_numeric_str_cmp, numeric_str_cmp};
30use rand::{Rng, rng};
31use rayon::prelude::*;
32use std::cmp::Ordering;
33use std::env;
34use std::ffi::{OsStr, OsString};
35use std::fs::{File, OpenOptions};
36use std::hash::{Hash, Hasher};
37use std::io::{BufRead, BufReader, BufWriter, Read, Write, stdin, stdout};
38use std::num::IntErrorKind;
39use std::ops::Range;
40use std::path::Path;
41use std::path::PathBuf;
42use std::str::Utf8Error;
43use thiserror::Error;
44use uucore::display::Quotable;
45use uucore::error::{FromIo, strip_errno};
46use uucore::error::{UError, UResult, USimpleError, UUsageError};
47use uucore::extendedbigdecimal::ExtendedBigDecimal;
48use uucore::format_usage;
49use uucore::line_ending::LineEnding;
50use uucore::parser::num_parser::{ExtendedParser, ExtendedParserError};
51use uucore::parser::parse_size::{ParseSizeError, Parser};
52use uucore::parser::shortcut_value_parser::ShortcutValueParser;
53use uucore::show_error;
54use uucore::translate;
55use uucore::version_cmp::version_cmp;
56
57use crate::tmp_dir::TmpDirWrapper;
58
59mod options {
60 pub mod modes {
61 pub const SORT: &str = "sort";
62
63 pub const HUMAN_NUMERIC: &str = "human-numeric-sort";
64 pub const MONTH: &str = "month-sort";
65 pub const NUMERIC: &str = "numeric-sort";
66 pub const GENERAL_NUMERIC: &str = "general-numeric-sort";
67 pub const VERSION: &str = "version-sort";
68 pub const RANDOM: &str = "random-sort";
69
70 pub const ALL_SORT_MODES: [&str; 6] = [
71 GENERAL_NUMERIC,
72 HUMAN_NUMERIC,
73 MONTH,
74 NUMERIC,
75 VERSION,
76 RANDOM,
77 ];
78 }
79
80 pub mod check {
81 pub const CHECK: &str = "check";
82 pub const CHECK_SILENT: &str = "check-silent";
83 pub const SILENT: &str = "silent";
84 pub const QUIET: &str = "quiet";
85 pub const DIAGNOSE_FIRST: &str = "diagnose-first";
86 }
87
88 pub const HELP: &str = "help";
89 pub const VERSION: &str = "version";
90 pub const DICTIONARY_ORDER: &str = "dictionary-order";
91 pub const MERGE: &str = "merge";
92 pub const DEBUG: &str = "debug";
93 pub const IGNORE_CASE: &str = "ignore-case";
94 pub const IGNORE_LEADING_BLANKS: &str = "ignore-leading-blanks";
95 pub const IGNORE_NONPRINTING: &str = "ignore-nonprinting";
96 pub const OUTPUT: &str = "output";
97 pub const REVERSE: &str = "reverse";
98 pub const STABLE: &str = "stable";
99 pub const UNIQUE: &str = "unique";
100 pub const KEY: &str = "key";
101 pub const SEPARATOR: &str = "field-separator";
102 pub const ZERO_TERMINATED: &str = "zero-terminated";
103 pub const PARALLEL: &str = "parallel";
104 pub const FILES0_FROM: &str = "files0-from";
105 pub const BUF_SIZE: &str = "buffer-size";
106 pub const TMP_DIR: &str = "temporary-directory";
107 pub const COMPRESS_PROG: &str = "compress-program";
108 pub const BATCH_SIZE: &str = "batch-size";
109
110 pub const FILES: &str = "files";
111}
112
113const DECIMAL_PT: u8 = b'.';
114
115const NEGATIVE: &u8 = &b'-';
116const POSITIVE: &u8 = &b'+';
117
118const DEFAULT_BUF_SIZE: usize = 1_000_000_000; #[derive(Debug, Error)]
124pub enum SortError {
125 #[error("{}", format_disorder(.file, .line_number, .line, .silent))]
126 Disorder {
127 file: OsString,
128 line_number: usize,
129 line: String,
130 silent: bool,
131 },
132
133 #[error("{}", translate!("sort-open-failed", "path" => format!("{}", .path.maybe_quote()), "error" => strip_errno(.error)))]
134 OpenFailed {
135 path: PathBuf,
136 error: std::io::Error,
137 },
138
139 #[error("{}", translate!("sort-parse-key-error", "key" => .key.quote(), "msg" => .msg.clone()))]
140 ParseKeyError { key: String, msg: String },
141
142 #[error("{}", translate!("sort-cannot-read", "path" => format!("{}", .path.maybe_quote()), "error" => strip_errno(.error)))]
143 ReadFailed {
144 path: PathBuf,
145 error: std::io::Error,
146 },
147
148 #[error("{}", translate!("sort-open-tmp-file-failed", "error" => strip_errno(.error)))]
149 OpenTmpFileFailed { error: std::io::Error },
150
151 #[error("{}", translate!("sort-compress-prog-execution-failed", "code" => .code))]
152 CompressProgExecutionFailed { code: i32 },
153
154 #[error("{}", translate!("sort-compress-prog-terminated-abnormally", "prog" => .prog.quote()))]
155 CompressProgTerminatedAbnormally { prog: String },
156
157 #[error("{}", translate!("sort-cannot-create-tmp-file", "path" => format!("{}", .path.display())))]
158 TmpFileCreationFailed { path: PathBuf },
159
160 #[error("{}", translate!("sort-file-operands-combined", "file" => format!("{}", .file.display()), "help" => uucore::execution_phrase()))]
161 FileOperandsCombined { file: PathBuf },
162
163 #[error("{error}")]
164 Uft8Error { error: Utf8Error },
165
166 #[error("{}", translate!("sort-multiple-output-files"))]
167 MultipleOutputFiles,
168
169 #[error("{}", translate!("sort-minus-in-stdin"))]
170 MinusInStdIn,
171
172 #[error("{}", translate!("sort-no-input-from", "file" => format!("{}", .file.display())))]
173 EmptyInputFile { file: PathBuf },
174
175 #[error("{}", translate!("sort-invalid-zero-length-filename", "file" => format!("{}", .file.display()), "line_num" => .line_num))]
176 ZeroLengthFileName { file: PathBuf, line_num: usize },
177}
178
179impl UError for SortError {
180 fn code(&self) -> i32 {
181 match self {
182 Self::Disorder { .. } => 1,
183 _ => 2,
184 }
185 }
186}
187
188fn format_disorder(file: &OsString, line_number: &usize, line: &String, silent: &bool) -> String {
189 if *silent {
190 String::new()
191 } else {
192 translate!("sort-error-disorder", "file" => file.maybe_quote(), "line_number" => line_number, "line" => line.to_owned())
193 }
194}
195
196#[derive(Eq, Ord, PartialEq, PartialOrd, Clone, Copy, Debug)]
197enum SortMode {
198 Numeric,
199 HumanNumeric,
200 GeneralNumeric,
201 Month,
202 Version,
203 Random,
204 Default,
205}
206
207impl SortMode {
208 fn get_short_name(&self) -> Option<char> {
209 match self {
210 Self::Numeric => Some('n'),
211 Self::HumanNumeric => Some('h'),
212 Self::GeneralNumeric => Some('g'),
213 Self::Month => Some('M'),
214 Self::Version => Some('V'),
215 Self::Random => Some('R'),
216 Self::Default => None,
217 }
218 }
219}
220
221pub struct Output {
222 file: Option<(OsString, File)>,
223}
224
225impl Output {
226 fn new(name: Option<impl AsRef<OsStr>>) -> UResult<Self> {
227 let file = if let Some(name) = name {
228 let path = Path::new(name.as_ref());
229 #[allow(clippy::suspicious_open_options)]
232 let file = OpenOptions::new()
233 .write(true)
234 .create(true)
235 .open(path)
236 .map_err(|e| SortError::OpenFailed {
237 path: path.to_owned(),
238 error: e,
239 })?;
240 Some((name.as_ref().to_owned(), file))
241 } else {
242 None
243 };
244 Ok(Self { file })
245 }
246
247 fn into_write(self) -> BufWriter<Box<dyn Write>> {
248 BufWriter::new(match self.file {
249 Some((_name, file)) => {
250 let _ = file.set_len(0);
252 Box::new(file)
253 }
254 None => Box::new(stdout()),
255 })
256 }
257
258 fn as_output_name(&self) -> Option<&OsStr> {
259 match &self.file {
260 Some((name, _file)) => Some(name.as_os_str()),
261 None => None,
262 }
263 }
264}
265
266#[derive(Clone)]
267pub struct GlobalSettings {
268 mode: SortMode,
269 debug: bool,
270 ignore_leading_blanks: bool,
271 ignore_case: bool,
272 dictionary_order: bool,
273 ignore_non_printing: bool,
274 merge: bool,
275 reverse: bool,
276 stable: bool,
277 unique: bool,
278 check: bool,
279 check_silent: bool,
280 salt: Option<[u8; 16]>,
281 selectors: Vec<FieldSelector>,
282 separator: Option<u8>,
283 threads: String,
284 line_ending: LineEnding,
285 buffer_size: usize,
286 compress_prog: Option<String>,
287 merge_batch_size: usize,
288 precomputed: Precomputed,
289}
290
291#[derive(Clone, Debug, Default)]
294struct Precomputed {
295 needs_tokens: bool,
296 num_infos_per_line: usize,
297 floats_per_line: usize,
298 selections_per_line: usize,
299}
300
301impl GlobalSettings {
302 fn parse_byte_count(input: &str) -> Result<usize, ParseSizeError> {
307 let size = Parser::default()
310 .with_allow_list(&[
311 "b", "k", "K", "m", "M", "g", "G", "t", "T", "P", "E", "Z", "Y", "R", "Q", "%",
312 ])
313 .with_default_unit("K")
314 .with_b_byte_count(true)
315 .parse(input.trim())?;
316
317 usize::try_from(size).map_err(|_| {
318 ParseSizeError::SizeTooBig(translate!("sort-error-buffer-size-too-big", "size" => size))
319 })
320 }
321
322 fn init_precomputed(&mut self) {
326 self.precomputed.needs_tokens = self.selectors.iter().any(|s| s.needs_tokens);
327 self.precomputed.selections_per_line =
328 self.selectors.iter().filter(|s| s.needs_selection).count();
329 self.precomputed.num_infos_per_line = self
330 .selectors
331 .iter()
332 .filter(|s| matches!(s.settings.mode, SortMode::Numeric | SortMode::HumanNumeric))
333 .count();
334 self.precomputed.floats_per_line = self
335 .selectors
336 .iter()
337 .filter(|s| matches!(s.settings.mode, SortMode::GeneralNumeric))
338 .count();
339 }
340}
341
342impl Default for GlobalSettings {
343 fn default() -> Self {
344 Self {
345 mode: SortMode::Default,
346 debug: false,
347 ignore_leading_blanks: false,
348 ignore_case: false,
349 dictionary_order: false,
350 ignore_non_printing: false,
351 merge: false,
352 reverse: false,
353 stable: false,
354 unique: false,
355 check: false,
356 check_silent: false,
357 salt: None,
358 selectors: vec![],
359 separator: None,
360 threads: String::new(),
361 line_ending: LineEnding::Newline,
362 buffer_size: DEFAULT_BUF_SIZE,
363 compress_prog: None,
364 merge_batch_size: 32,
365 precomputed: Precomputed::default(),
366 }
367 }
368}
369
370#[derive(Clone, PartialEq, Debug)]
371struct KeySettings {
372 mode: SortMode,
373 ignore_blanks: bool,
374 ignore_case: bool,
375 dictionary_order: bool,
376 ignore_non_printing: bool,
377 reverse: bool,
378}
379
380impl KeySettings {
381 fn check_compatibility(
383 mode: SortMode,
384 ignore_non_printing: bool,
385 dictionary_order: bool,
386 ) -> Result<(), String> {
387 if matches!(
388 mode,
389 SortMode::Numeric | SortMode::HumanNumeric | SortMode::GeneralNumeric | SortMode::Month
390 ) {
391 if dictionary_order {
392 return Err(
393 translate!("sort-options-incompatible", "opt1" => "d", "opt2" => mode.get_short_name().unwrap()),
394 );
395 } else if ignore_non_printing {
396 return Err(
397 translate!("sort-options-incompatible", "opt1" => "i", "opt2" => mode.get_short_name().unwrap()),
398 );
399 }
400 }
401 Ok(())
402 }
403
404 fn set_sort_mode(&mut self, mode: SortMode) -> Result<(), String> {
405 if self.mode != SortMode::Default && self.mode != mode {
406 return Err(
407 translate!("sort-options-incompatible", "opt1" => self.mode.get_short_name().unwrap(), "opt2" => mode.get_short_name().unwrap()),
408 );
409 }
410 Self::check_compatibility(mode, self.ignore_non_printing, self.dictionary_order)?;
411 self.mode = mode;
412 Ok(())
413 }
414
415 fn set_dictionary_order(&mut self) -> Result<(), String> {
416 Self::check_compatibility(self.mode, self.ignore_non_printing, true)?;
417 self.dictionary_order = true;
418 Ok(())
419 }
420
421 fn set_ignore_non_printing(&mut self) -> Result<(), String> {
422 Self::check_compatibility(self.mode, true, self.dictionary_order)?;
423 self.ignore_non_printing = true;
424 Ok(())
425 }
426}
427
428impl From<&GlobalSettings> for KeySettings {
429 fn from(settings: &GlobalSettings) -> Self {
430 Self {
431 mode: settings.mode,
432 ignore_blanks: settings.ignore_leading_blanks,
433 ignore_case: settings.ignore_case,
434 ignore_non_printing: settings.ignore_non_printing,
435 reverse: settings.reverse,
436 dictionary_order: settings.dictionary_order,
437 }
438 }
439}
440
441impl Default for KeySettings {
442 fn default() -> Self {
443 Self::from(&GlobalSettings::default())
444 }
445}
446enum Selection<'a> {
447 AsBigDecimal(GeneralBigDecimalParseResult),
448 WithNumInfo(&'a [u8], NumInfo),
449 Str(&'a [u8]),
450}
451
452type Field = Range<usize>;
453
454#[derive(Clone, Debug)]
455pub struct Line<'a> {
456 line: &'a [u8],
457 index: usize,
458}
459
460impl<'a> Line<'a> {
461 fn create(
466 line: &'a [u8],
467 index: usize,
468 line_data: &mut LineData<'a>,
469 token_buffer: &mut Vec<Field>,
470 settings: &GlobalSettings,
471 ) -> Self {
472 token_buffer.clear();
473 if settings.precomputed.needs_tokens {
474 tokenize(line, settings.separator, token_buffer);
475 }
476 if settings.mode == SortMode::Numeric {
477 let line_num_float = (!line.iter().any(u8::is_ascii_alphabetic))
479 .then(|| std::str::from_utf8(line).ok())
480 .flatten()
481 .and_then(|s| s.parse::<f64>().ok());
482 line_data.line_num_floats.push(line_num_float);
483 }
484 for (selector, selection) in settings
485 .selectors
486 .iter()
487 .map(|selector| (selector, selector.get_selection(line, token_buffer)))
488 {
489 match selection {
490 Selection::AsBigDecimal(parsed_float) => line_data.parsed_floats.push(parsed_float),
491 Selection::WithNumInfo(str, num_info) => {
492 line_data.num_infos.push(num_info);
493 line_data.selections.push(str);
494 }
495 Selection::Str(str) => {
496 if selector.needs_selection {
497 line_data.selections.push(str);
498 }
499 }
500 }
501 }
502 Self { line, index }
503 }
504
505 fn print(&self, writer: &mut impl Write, settings: &GlobalSettings) -> std::io::Result<()> {
506 if settings.debug {
507 self.print_debug(settings, writer)?;
508 } else {
509 writer.write_all(self.line)?;
510 writer.write_all(&[settings.line_ending.into()])?;
511 }
512 Ok(())
513 }
514
515 fn print_debug(
518 &self,
519 settings: &GlobalSettings,
520 writer: &mut impl Write,
521 ) -> std::io::Result<()> {
522 let line = self
527 .line
528 .iter()
529 .copied()
530 .map(|c| if c == b'\t' { b'>' } else { c })
531 .collect::<Vec<_>>();
532
533 writer.write_all(&line)?;
534 writeln!(writer)?;
535
536 let mut fields = vec![];
537 tokenize(self.line, settings.separator, &mut fields);
538 for selector in &settings.selectors {
539 let mut selection = selector.get_range(self.line, Some(&fields));
540 match selector.settings.mode {
541 SortMode::Numeric | SortMode::HumanNumeric => {
542 let (_, num_range) = NumInfo::parse(
544 &self.line[selection.clone()],
545 &NumInfoParseSettings {
546 accept_si_units: selector.settings.mode == SortMode::HumanNumeric,
547 ..Default::default()
548 },
549 );
550 let initial_selection = selection.clone();
551
552 selection.start += num_range.start;
554 selection.end = selection.start + num_range.len();
555
556 if num_range == (0..0) {
557 let leading_whitespace = self.line[selection.clone()]
560 .iter()
561 .position(|c| !c.is_ascii_whitespace())
562 .unwrap_or(0);
563 selection.start += leading_whitespace;
564 selection.end += leading_whitespace;
565 } else {
566 if selector.settings.mode == SortMode::HumanNumeric {
568 if let Some(
569 b'k' | b'K' | b'M' | b'G' | b'T' | b'P' | b'E' | b'Z' | b'Y' | b'R'
570 | b'Q',
571 ) = self.line[selection.end..initial_selection.end].first()
572 {
573 selection.end += 1;
574 }
575 }
576
577 while let Some(b'-' | b'0' | b'.') =
579 self.line[initial_selection.start..selection.start].last()
580 {
581 selection.start -= 1;
582 }
583 }
584 }
585 SortMode::GeneralNumeric => {
586 let initial_selection = &self.line[selection.clone()];
587
588 let leading = get_leading_gen(initial_selection);
589
590 selection.start += leading.start;
592 selection.end = selection.start + leading.len();
593 }
594 SortMode::Month => {
595 let initial_selection = &self.line[selection.clone()];
596
597 let mut month_chars = initial_selection
598 .iter()
599 .enumerate()
600 .skip_while(|(_, c)| c.is_ascii_whitespace());
601
602 let month = if month_parse(initial_selection) == Month::Unknown {
603 let first_non_whitespace = month_chars.next();
606 first_non_whitespace.map_or(
607 initial_selection.len()..initial_selection.len(),
608 |(idx, _)| idx..idx,
609 )
610 } else {
611 month_chars.next().unwrap().0
613 ..month_chars
614 .nth(2)
615 .map_or(initial_selection.len(), |(idx, _)| idx)
616 };
617
618 selection.start += month.start;
620 selection.end = selection.start + month.len();
621 }
622 _ => {}
623 }
624
625 let select = &line[..selection.start];
626 write!(writer, "{}", " ".repeat(select.len()))?;
627
628 if selection.is_empty() {
629 writeln!(writer, "{}", translate!("sort-error-no-match-for-key"))?;
630 } else {
631 let select = &line[selection];
632 writeln!(writer, "{}", "_".repeat(select.len()))?;
633 }
634 }
635
636 if settings.mode != SortMode::Random
637 && !settings.stable
638 && !settings.unique
639 && (settings.dictionary_order
640 || settings.ignore_leading_blanks
641 || settings.ignore_case
642 || settings.ignore_non_printing
643 || settings.mode != SortMode::Default
644 || settings
645 .selectors
646 .last()
647 .is_none_or(|selector| selector != &FieldSelector::default()))
648 {
649 if self.line.is_empty() {
651 writeln!(writer, "{}", translate!("sort-error-no-match-for-key"))?;
652 } else {
653 writeln!(writer, "{}", "_".repeat(self.line.len()))?;
654 }
655 }
656 Ok(())
657 }
658}
659
660fn tokenize(line: &[u8], separator: Option<u8>, token_buffer: &mut Vec<Field>) {
662 assert!(token_buffer.is_empty());
663 if let Some(separator) = separator {
664 tokenize_with_separator(line, separator, token_buffer);
665 } else {
666 tokenize_default(line, token_buffer);
667 }
668}
669
670fn tokenize_default(line: &[u8], token_buffer: &mut Vec<Field>) {
674 token_buffer.push(0..0);
675 let mut previous_was_whitespace = true;
677 for (idx, char) in line.iter().enumerate() {
678 if char.is_ascii_whitespace() {
679 if !previous_was_whitespace {
680 token_buffer.last_mut().unwrap().end = idx;
681 token_buffer.push(idx..0);
682 }
683 previous_was_whitespace = true;
684 } else {
685 previous_was_whitespace = false;
686 }
687 }
688 token_buffer.last_mut().unwrap().end = line.len();
689}
690
691fn tokenize_with_separator(line: &[u8], separator: u8, token_buffer: &mut Vec<Field>) {
694 let separator_indices = line
695 .iter()
696 .enumerate()
697 .filter_map(|(i, &c)| if c == separator { Some(i) } else { None });
698 let mut start = 0;
699 for sep_idx in separator_indices {
700 token_buffer.push(start..sep_idx);
701 start = sep_idx + 1;
702 }
703 if start < line.len() {
704 token_buffer.push(start..line.len());
705 }
706}
707
708#[derive(Clone, PartialEq, Debug)]
709struct KeyPosition {
710 field: usize,
712 char: usize,
714 ignore_blanks: bool,
715}
716
717impl KeyPosition {
718 fn new(key: &str, default_char_index: usize, ignore_blanks: bool) -> Result<Self, String> {
719 let mut field_and_char = key.split('.');
720
721 let field = field_and_char
722 .next()
723 .ok_or_else(|| translate!("sort-invalid-key", "key" => key.quote()))?;
724 let char = field_and_char.next();
725
726 let field = match field.parse::<usize>() {
727 Ok(f) => f,
728 Err(e) if *e.kind() == IntErrorKind::PosOverflow => usize::MAX,
729 Err(e) => {
730 return Err(
731 translate!("sort-failed-parse-field-index", "field" => field.quote(), "error" => e),
732 );
733 }
734 };
735 if field == 0 {
736 return Err(translate!("sort-field-index-cannot-be-zero"));
737 }
738
739 let char = char.map_or(Ok(default_char_index), |char| {
740 char.parse().map_err(|e: std::num::ParseIntError| {
741 translate!("sort-failed-parse-char-index", "char" => char.quote(), "error" => e)
742 })
743 })?;
744
745 Ok(Self {
746 field,
747 char,
748 ignore_blanks,
749 })
750 }
751}
752
753impl Default for KeyPosition {
754 fn default() -> Self {
755 Self {
756 field: 1,
757 char: 1,
758 ignore_blanks: false,
759 }
760 }
761}
762
763#[derive(Clone, PartialEq, Debug, Default)]
764struct FieldSelector {
765 from: KeyPosition,
766 to: Option<KeyPosition>,
767 settings: KeySettings,
768 needs_tokens: bool,
769 needs_selection: bool,
773}
774
775impl FieldSelector {
776 fn split_key_options(position: &str) -> (&str, &str) {
778 if let Some((options_start, _)) = position.char_indices().find(|(_, c)| c.is_alphabetic()) {
779 position.split_at(options_start)
780 } else {
781 (position, "")
782 }
783 }
784
785 fn parse(key: &str, global_settings: &GlobalSettings) -> UResult<Self> {
786 let mut from_to = key.split(',');
787 let (from, from_options) = Self::split_key_options(from_to.next().unwrap());
788 let to = from_to.next().map(Self::split_key_options);
789 let options_are_empty = from_options.is_empty() && matches!(to, None | Some((_, "")));
790
791 if options_are_empty {
792 (|| {
794 Self::new(
797 KeyPosition::new(from, 1, global_settings.ignore_leading_blanks)?,
798 to.map(|(to, _)| {
799 KeyPosition::new(to, 0, global_settings.ignore_leading_blanks)
800 })
801 .transpose()?,
802 KeySettings::from(global_settings),
803 )
804 })()
805 } else {
806 Self::parse_with_options((from, from_options), to)
808 }
809 .map_err(|msg| {
810 SortError::ParseKeyError {
811 key: key.to_owned(),
812 msg,
813 }
814 .into()
815 })
816 }
817
818 fn parse_with_options(
819 (from, from_options): (&str, &str),
820 to: Option<(&str, &str)>,
821 ) -> Result<Self, String> {
822 fn parse_key_settings(
824 options: &str,
825 key_settings: &mut KeySettings,
826 ) -> Result<bool, String> {
827 let mut ignore_blanks = false;
828 for option in options.chars() {
829 match option {
830 'M' => key_settings.set_sort_mode(SortMode::Month)?,
831 'b' => ignore_blanks = true,
832 'd' => key_settings.set_dictionary_order()?,
833 'f' => key_settings.ignore_case = true,
834 'g' => key_settings.set_sort_mode(SortMode::GeneralNumeric)?,
835 'h' => key_settings.set_sort_mode(SortMode::HumanNumeric)?,
836 'i' => key_settings.set_ignore_non_printing()?,
837 'n' => key_settings.set_sort_mode(SortMode::Numeric)?,
838 'R' => key_settings.set_sort_mode(SortMode::Random)?,
839 'r' => key_settings.reverse = true,
840 'V' => key_settings.set_sort_mode(SortMode::Version)?,
841 c => {
842 return Err(translate!("sort-invalid-option", "option" => c));
843 }
844 }
845 }
846 Ok(ignore_blanks)
847 }
848
849 let mut key_settings = KeySettings::default();
850 let from = parse_key_settings(from_options, &mut key_settings)
851 .map(|ignore_blanks| KeyPosition::new(from, 1, ignore_blanks))??;
852 let to = if let Some((to, to_options)) = to {
853 Some(
854 parse_key_settings(to_options, &mut key_settings)
855 .map(|ignore_blanks| KeyPosition::new(to, 0, ignore_blanks))??,
856 )
857 } else {
858 None
859 };
860 Self::new(from, to, key_settings)
861 }
862
863 fn new(
864 from: KeyPosition,
865 to: Option<KeyPosition>,
866 settings: KeySettings,
867 ) -> Result<Self, String> {
868 if from.char == 0 {
869 Err(translate!("sort-invalid-char-index-zero-start"))
870 } else {
871 Ok(Self {
872 needs_selection: (from.field != 1
873 || from.char != 1
874 || to.is_some()
875 || matches!(settings.mode, SortMode::Numeric | SortMode::HumanNumeric)
876 || from.ignore_blanks)
877 && !matches!(settings.mode, SortMode::GeneralNumeric),
878 needs_tokens: from.field != 1 || from.char == 0 || to.is_some(),
879 from,
880 to,
881 settings,
882 })
883 }
884 }
885
886 fn get_selection<'a>(&self, line: &'a [u8], tokens: &[Field]) -> Selection<'a> {
889 let tokens = if self.needs_tokens {
891 Some(tokens)
892 } else {
893 None
894 };
895 let mut range_str = &line[self.get_range(line, tokens)];
896 if self.settings.mode == SortMode::Numeric || self.settings.mode == SortMode::HumanNumeric {
897 let (info, num_range) = NumInfo::parse(
899 range_str,
900 &NumInfoParseSettings {
901 accept_si_units: self.settings.mode == SortMode::HumanNumeric,
902 ..Default::default()
903 },
904 );
905 range_str = &range_str[num_range];
907 Selection::WithNumInfo(range_str, info)
908 } else if self.settings.mode == SortMode::GeneralNumeric {
909 Selection::AsBigDecimal(general_bd_parse(&range_str[get_leading_gen(range_str)]))
911 } else {
912 Selection::Str(range_str)
914 }
915 }
916
917 fn get_range(&self, line: &[u8], tokens: Option<&[Field]>) -> Range<usize> {
920 enum Resolution {
921 StartOfChar(usize),
923 EndOfChar(usize),
926 TooLow,
928 TooHigh,
930 }
931
932 fn resolve_index(
934 line: &[u8],
935 tokens: Option<&[Field]>,
936 position: &KeyPosition,
937 ) -> Resolution {
938 if matches!(tokens, Some(tokens) if tokens.len() < position.field) {
939 Resolution::TooHigh
940 } else if position.char == 0 {
941 let end = tokens.unwrap()[position.field - 1].end;
942 if end == 0 {
943 Resolution::TooLow
944 } else {
945 Resolution::EndOfChar(end)
946 }
947 } else {
948 let mut idx = if position.field == 1 {
949 0
952 } else {
953 tokens.unwrap()[position.field - 1].start
954 };
955 if position.ignore_blanks {
957 idx += line[idx..]
958 .iter()
959 .enumerate()
960 .find(|(_, c)| !c.is_ascii_whitespace())
961 .map_or(line[idx..].len(), |(idx, _)| idx);
962 }
963 idx += line[idx..]
965 .iter()
966 .enumerate()
967 .nth(position.char - 1)
968 .map_or(line[idx..].len(), |(idx, _)| idx);
969 if idx >= line.len() {
970 Resolution::TooHigh
971 } else {
972 Resolution::StartOfChar(idx)
973 }
974 }
975 }
976
977 match resolve_index(line, tokens, &self.from) {
978 Resolution::StartOfChar(from) => {
979 let to = self.to.as_ref().map(|to| resolve_index(line, tokens, to));
980
981 let mut range = match to {
982 Some(Resolution::StartOfChar(mut to)) => {
983 to += 1;
985 from..to
986 }
987 Some(Resolution::EndOfChar(to)) => from..to,
988 None | Some(Resolution::TooHigh) => from..line.len(),
991 Some(Resolution::TooLow) => 0..0,
994 };
995 if range.start > range.end {
996 range.end = range.start;
997 }
998 range
999 }
1000 Resolution::TooLow | Resolution::EndOfChar(_) => {
1001 unreachable!(
1002 "This should only happen if the field start index is 0, but that should already have caused an error."
1003 )
1004 }
1005 Resolution::TooHigh => line.len()..line.len(),
1008 }
1009 }
1010}
1011
1012fn make_sort_mode_arg(mode: &'static str, short: char, help: String) -> Arg {
1014 Arg::new(mode)
1015 .short(short)
1016 .long(mode)
1017 .help(help)
1018 .action(ArgAction::SetTrue)
1019 .conflicts_with_all(
1020 options::modes::ALL_SORT_MODES
1021 .iter()
1022 .filter(|&&m| m != mode),
1023 )
1024}
1025
1026#[cfg(target_os = "linux")]
1027fn get_rlimit() -> UResult<usize> {
1028 let mut limit = rlimit {
1029 rlim_cur: 0,
1030 rlim_max: 0,
1031 };
1032 match unsafe { getrlimit(RLIMIT_NOFILE, &raw mut limit) } {
1033 0 => Ok(limit.rlim_cur as usize),
1034 _ => Err(UUsageError::new(2, translate!("sort-failed-fetch-rlimit"))),
1035 }
1036}
1037
1038const STDIN_FILE: &str = "-";
1039
1040#[uucore::main]
1041#[allow(clippy::cognitive_complexity)]
1042pub fn uumain(args: impl uucore::Args) -> UResult<()> {
1043 let mut settings = GlobalSettings::default();
1044
1045 let matches = uucore::clap_localization::handle_clap_result_with_exit_code(uu_app(), args, 2)?;
1046
1047 if matches
1049 .get_occurrences::<OsString>(options::OUTPUT)
1050 .is_some_and(|out| out.len() > 1)
1051 {
1052 return Err(SortError::MultipleOutputFiles.into());
1053 }
1054
1055 settings.debug = matches.get_flag(options::DEBUG);
1056
1057 let mut files: Vec<OsString> = if matches.contains_id(options::FILES0_FROM) {
1059 let files0_from: PathBuf = matches
1060 .get_one::<OsString>(options::FILES0_FROM)
1061 .map(|v| v.into())
1062 .unwrap_or_default();
1063
1064 if let Some(s) = matches.get_one::<OsString>(options::FILES) {
1066 return Err(SortError::FileOperandsCombined { file: s.into() }.into());
1067 }
1068
1069 let mut files = Vec::new();
1070
1071 let reader = open_with_open_failed_error(&files0_from)?;
1073 let buf_reader = BufReader::new(reader);
1074 for (line_num, line) in buf_reader.split(b'\0').flatten().enumerate() {
1075 let f = std::str::from_utf8(&line)
1076 .expect("Could not parse string from zero terminated input.");
1077 match f {
1078 STDIN_FILE => {
1079 return Err(SortError::MinusInStdIn.into());
1080 }
1081 "" => {
1082 return Err(SortError::ZeroLengthFileName {
1083 file: files0_from,
1084 line_num: line_num + 1,
1085 }
1086 .into());
1087 }
1088 _ => {}
1089 }
1090
1091 files.push(OsString::from(
1092 std::str::from_utf8(&line)
1093 .expect("Could not parse string from zero terminated input."),
1094 ));
1095 }
1096 if files.is_empty() {
1097 return Err(SortError::EmptyInputFile { file: files0_from }.into());
1098 }
1099 files
1100 } else {
1101 matches
1102 .get_many::<OsString>(options::FILES)
1103 .map(|v| v.map(ToOwned::to_owned).collect())
1104 .unwrap_or_default()
1105 };
1106
1107 settings.mode = if matches.get_flag(options::modes::HUMAN_NUMERIC)
1108 || matches
1109 .get_one::<String>(options::modes::SORT)
1110 .is_some_and(|s| s == "human-numeric")
1111 {
1112 SortMode::HumanNumeric
1113 } else if matches.get_flag(options::modes::MONTH)
1114 || matches
1115 .get_one::<String>(options::modes::SORT)
1116 .is_some_and(|s| s == "month")
1117 {
1118 SortMode::Month
1119 } else if matches.get_flag(options::modes::GENERAL_NUMERIC)
1120 || matches
1121 .get_one::<String>(options::modes::SORT)
1122 .is_some_and(|s| s == "general-numeric")
1123 {
1124 SortMode::GeneralNumeric
1125 } else if matches.get_flag(options::modes::NUMERIC)
1126 || matches
1127 .get_one::<String>(options::modes::SORT)
1128 .is_some_and(|s| s == "numeric")
1129 {
1130 SortMode::Numeric
1131 } else if matches.get_flag(options::modes::VERSION)
1132 || matches
1133 .get_one::<String>(options::modes::SORT)
1134 .is_some_and(|s| s == "version")
1135 {
1136 SortMode::Version
1137 } else if matches.get_flag(options::modes::RANDOM)
1138 || matches
1139 .get_one::<String>(options::modes::SORT)
1140 .is_some_and(|s| s == "random")
1141 {
1142 settings.salt = Some(get_rand_string());
1143 SortMode::Random
1144 } else {
1145 SortMode::Default
1146 };
1147
1148 settings.dictionary_order = matches.get_flag(options::DICTIONARY_ORDER);
1149 settings.ignore_non_printing = matches.get_flag(options::IGNORE_NONPRINTING);
1150 if matches.contains_id(options::PARALLEL) {
1151 settings.threads = matches
1153 .get_one::<String>(options::PARALLEL)
1154 .map_or_else(|| "0".to_string(), String::from);
1155 unsafe {
1156 env::set_var("RAYON_NUM_THREADS", &settings.threads);
1157 }
1158 }
1159
1160 settings.buffer_size =
1161 matches
1162 .get_one::<String>(options::BUF_SIZE)
1163 .map_or(Ok(DEFAULT_BUF_SIZE), |s| {
1164 GlobalSettings::parse_byte_count(s).map_err(|e| {
1165 USimpleError::new(2, format_error_message(&e, s, options::BUF_SIZE))
1166 })
1167 })?;
1168
1169 let mut tmp_dir = TmpDirWrapper::new(
1170 matches
1171 .get_one::<String>(options::TMP_DIR)
1172 .map_or_else(env::temp_dir, PathBuf::from),
1173 );
1174
1175 settings.compress_prog = matches
1176 .get_one::<String>(options::COMPRESS_PROG)
1177 .map(String::from);
1178
1179 if let Some(n_merge) = matches.get_one::<String>(options::BATCH_SIZE) {
1180 match n_merge.parse::<usize>() {
1181 Ok(parsed_value) => {
1182 if parsed_value < 2 {
1183 show_error!(
1184 "{}",
1185 translate!("sort-invalid-batch-size-arg", "arg" => n_merge)
1186 );
1187 return Err(UUsageError::new(
1188 2,
1189 translate!("sort-minimum-batch-size-two"),
1190 ));
1191 }
1192 settings.merge_batch_size = parsed_value;
1193 }
1194 Err(e) => {
1195 let error_message = if *e.kind() == IntErrorKind::PosOverflow {
1196 let batch_too_large = translate!(
1197 "sort-batch-size-too-large",
1198 "arg" => n_merge.quote()
1199 );
1200
1201 #[cfg(target_os = "linux")]
1202 {
1203 show_error!("{}", batch_too_large);
1204
1205 translate!(
1206 "sort-maximum-batch-size-rlimit",
1207 "rlimit" => get_rlimit()?
1208 )
1209 }
1210 #[cfg(not(target_os = "linux"))]
1211 {
1212 batch_too_large
1213 }
1214 } else {
1215 translate!(
1216 "sort-invalid-batch-size-arg",
1217 "arg" => n_merge,
1218 )
1219 };
1220
1221 return Err(UUsageError::new(2, error_message));
1222 }
1223 }
1224 }
1225
1226 settings.line_ending = LineEnding::from_zero_flag(matches.get_flag(options::ZERO_TERMINATED));
1227 settings.merge = matches.get_flag(options::MERGE);
1228
1229 settings.check = matches.contains_id(options::check::CHECK);
1230 if matches.get_flag(options::check::CHECK_SILENT)
1231 || matches!(
1232 matches
1233 .get_one::<String>(options::check::CHECK)
1234 .map(|s| s.as_str()),
1235 Some(options::check::SILENT | options::check::QUIET)
1236 )
1237 {
1238 settings.check_silent = true;
1239 settings.check = true;
1240 }
1241
1242 settings.ignore_case = matches.get_flag(options::IGNORE_CASE);
1243
1244 settings.ignore_leading_blanks = matches.get_flag(options::IGNORE_LEADING_BLANKS);
1245
1246 settings.reverse = matches.get_flag(options::REVERSE);
1247 settings.stable = matches.get_flag(options::STABLE);
1248 settings.unique = matches.get_flag(options::UNIQUE);
1249
1250 if files.is_empty() {
1251 files.push(OsString::from(STDIN_FILE));
1253 } else if settings.check && files.len() != 1 {
1254 return Err(UUsageError::new(
1255 2,
1256 translate!("sort-extra-operand-not-allowed-with-c", "operand" => files[1].quote()),
1257 ));
1258 }
1259
1260 if let Some(arg) = matches.get_one::<OsString>(options::SEPARATOR) {
1261 let mut separator = arg.to_str().ok_or_else(|| {
1262 UUsageError::new(
1263 2,
1264 translate!("sort-separator-not-valid-unicode", "arg" => arg.quote()),
1265 )
1266 })?;
1267 if separator == "\\0" {
1268 separator = "\0";
1269 }
1270 let &[sep_char] = separator.as_bytes() else {
1274 return Err(UUsageError::new(
1275 2,
1276 translate!("sort-separator-must-be-one-char", "separator" => separator.quote()),
1277 ));
1278 };
1279 settings.separator = Some(sep_char);
1280 }
1281
1282 if let Some(values) = matches.get_many::<String>(options::KEY) {
1283 for value in values {
1284 let selector = FieldSelector::parse(value, &settings)?;
1285 if selector.settings.mode == SortMode::Random && settings.salt.is_none() {
1286 settings.salt = Some(get_rand_string());
1287 }
1288 settings.selectors.push(selector);
1289 }
1290 }
1291
1292 if !matches.contains_id(options::KEY) {
1293 let key_settings = KeySettings::from(&settings);
1295 settings.selectors.push(
1296 FieldSelector::new(
1297 KeyPosition {
1298 field: 1,
1299 char: 1,
1300 ignore_blanks: key_settings.ignore_blanks,
1301 },
1302 None,
1303 key_settings,
1304 )
1305 .unwrap(),
1306 );
1307 }
1308
1309 for file in &files {
1314 open(file)?;
1315 }
1316
1317 let output = Output::new(matches.get_one::<OsString>(options::OUTPUT))?;
1318
1319 settings.init_precomputed();
1320
1321 let result = exec(&mut files, &settings, output, &mut tmp_dir);
1322 tmp_dir.wait_if_signal();
1325 result
1326}
1327
1328pub fn uu_app() -> Command {
1329 uucore::clap_localization::configure_localized_command(
1330 Command::new(uucore::util_name())
1331 .version(uucore::crate_version!())
1332 .about(translate!("sort-about"))
1333 .after_help(translate!("sort-after-help"))
1334 .override_usage(format_usage(&translate!("sort-usage"))),
1335 )
1336 .infer_long_args(true)
1337 .disable_help_flag(true)
1338 .disable_version_flag(true)
1339 .args_override_self(true)
1340 .arg(
1341 Arg::new(options::HELP)
1342 .long(options::HELP)
1343 .help(translate!("sort-help-help"))
1344 .action(ArgAction::Help),
1345 )
1346 .arg(
1347 Arg::new(options::VERSION)
1348 .long(options::VERSION)
1349 .help(translate!("sort-help-version"))
1350 .action(ArgAction::Version),
1351 )
1352 .arg(
1353 Arg::new(options::modes::SORT)
1354 .long(options::modes::SORT)
1355 .value_parser(ShortcutValueParser::new([
1356 "general-numeric",
1357 "human-numeric",
1358 "month",
1359 "numeric",
1360 "version",
1361 "random",
1362 ]))
1363 .conflicts_with_all(options::modes::ALL_SORT_MODES),
1364 )
1365 .arg(make_sort_mode_arg(
1366 options::modes::HUMAN_NUMERIC,
1367 'h',
1368 translate!("sort-help-human-numeric"),
1369 ))
1370 .arg(make_sort_mode_arg(
1371 options::modes::MONTH,
1372 'M',
1373 translate!("sort-help-month"),
1374 ))
1375 .arg(make_sort_mode_arg(
1376 options::modes::NUMERIC,
1377 'n',
1378 translate!("sort-help-numeric"),
1379 ))
1380 .arg(make_sort_mode_arg(
1381 options::modes::GENERAL_NUMERIC,
1382 'g',
1383 translate!("sort-help-general-numeric"),
1384 ))
1385 .arg(make_sort_mode_arg(
1386 options::modes::VERSION,
1387 'V',
1388 translate!("sort-help-version-sort"),
1389 ))
1390 .arg(make_sort_mode_arg(
1391 options::modes::RANDOM,
1392 'R',
1393 translate!("sort-help-random"),
1394 ))
1395 .arg(
1396 Arg::new(options::DICTIONARY_ORDER)
1397 .short('d')
1398 .long(options::DICTIONARY_ORDER)
1399 .help(translate!("sort-help-dictionary-order"))
1400 .conflicts_with_all([
1401 options::modes::NUMERIC,
1402 options::modes::GENERAL_NUMERIC,
1403 options::modes::HUMAN_NUMERIC,
1404 options::modes::MONTH,
1405 ])
1406 .action(ArgAction::SetTrue),
1407 )
1408 .arg(
1409 Arg::new(options::MERGE)
1410 .short('m')
1411 .long(options::MERGE)
1412 .help(translate!("sort-help-merge"))
1413 .action(ArgAction::SetTrue),
1414 )
1415 .arg(
1416 Arg::new(options::check::CHECK)
1417 .short('c')
1418 .long(options::check::CHECK)
1419 .require_equals(true)
1420 .num_args(0..)
1421 .value_parser(ShortcutValueParser::new([
1422 options::check::SILENT,
1423 options::check::QUIET,
1424 options::check::DIAGNOSE_FIRST,
1425 ]))
1426 .conflicts_with_all([options::OUTPUT, options::check::CHECK_SILENT])
1427 .help(translate!("sort-help-check")),
1428 )
1429 .arg(
1430 Arg::new(options::check::CHECK_SILENT)
1431 .short('C')
1432 .long(options::check::CHECK_SILENT)
1433 .conflicts_with_all([options::OUTPUT, options::check::CHECK])
1434 .help(translate!("sort-help-check-silent"))
1435 .action(ArgAction::SetTrue),
1436 )
1437 .arg(
1438 Arg::new(options::IGNORE_CASE)
1439 .short('f')
1440 .long(options::IGNORE_CASE)
1441 .help(translate!("sort-help-ignore-case"))
1442 .action(ArgAction::SetTrue),
1443 )
1444 .arg(
1445 Arg::new(options::IGNORE_NONPRINTING)
1446 .short('i')
1447 .long(options::IGNORE_NONPRINTING)
1448 .help(translate!("sort-help-ignore-nonprinting"))
1449 .conflicts_with_all([
1450 options::modes::NUMERIC,
1451 options::modes::GENERAL_NUMERIC,
1452 options::modes::HUMAN_NUMERIC,
1453 options::modes::MONTH,
1454 ])
1455 .action(ArgAction::SetTrue),
1456 )
1457 .arg(
1458 Arg::new(options::IGNORE_LEADING_BLANKS)
1459 .short('b')
1460 .long(options::IGNORE_LEADING_BLANKS)
1461 .help(translate!("sort-help-ignore-leading-blanks"))
1462 .action(ArgAction::SetTrue),
1463 )
1464 .arg(
1465 Arg::new(options::OUTPUT)
1466 .short('o')
1467 .long(options::OUTPUT)
1468 .help(translate!("sort-help-output"))
1469 .value_parser(ValueParser::os_string())
1470 .value_name("FILENAME")
1471 .value_hint(clap::ValueHint::FilePath)
1472 .num_args(1)
1473 .allow_hyphen_values(true)
1474 .action(ArgAction::Append),
1476 )
1477 .arg(
1478 Arg::new(options::REVERSE)
1479 .short('r')
1480 .long(options::REVERSE)
1481 .help(translate!("sort-help-reverse"))
1482 .action(ArgAction::SetTrue),
1483 )
1484 .arg(
1485 Arg::new(options::STABLE)
1486 .short('s')
1487 .long(options::STABLE)
1488 .help(translate!("sort-help-stable"))
1489 .action(ArgAction::SetTrue),
1490 )
1491 .arg(
1492 Arg::new(options::UNIQUE)
1493 .short('u')
1494 .long(options::UNIQUE)
1495 .help(translate!("sort-help-unique"))
1496 .action(ArgAction::SetTrue),
1497 )
1498 .arg(
1499 Arg::new(options::KEY)
1500 .short('k')
1501 .long(options::KEY)
1502 .help(translate!("sort-help-key"))
1503 .action(ArgAction::Append)
1504 .num_args(1),
1505 )
1506 .arg(
1507 Arg::new(options::SEPARATOR)
1508 .short('t')
1509 .long(options::SEPARATOR)
1510 .help(translate!("sort-help-separator"))
1511 .value_parser(ValueParser::os_string()),
1512 )
1513 .arg(
1514 Arg::new(options::ZERO_TERMINATED)
1515 .short('z')
1516 .long(options::ZERO_TERMINATED)
1517 .help(translate!("sort-help-zero-terminated"))
1518 .action(ArgAction::SetTrue),
1519 )
1520 .arg(
1521 Arg::new(options::PARALLEL)
1522 .long(options::PARALLEL)
1523 .help(translate!("sort-help-parallel"))
1524 .value_name("NUM_THREADS"),
1525 )
1526 .arg(
1527 Arg::new(options::BUF_SIZE)
1528 .short('S')
1529 .long(options::BUF_SIZE)
1530 .help(translate!("sort-help-buf-size"))
1531 .value_name("SIZE"),
1532 )
1533 .arg(
1534 Arg::new(options::TMP_DIR)
1535 .short('T')
1536 .long(options::TMP_DIR)
1537 .help(translate!("sort-help-tmp-dir"))
1538 .value_name("DIR")
1539 .value_hint(clap::ValueHint::DirPath),
1540 )
1541 .arg(
1542 Arg::new(options::COMPRESS_PROG)
1543 .long(options::COMPRESS_PROG)
1544 .help(translate!("sort-help-compress-prog"))
1545 .value_name("PROG")
1546 .value_hint(clap::ValueHint::CommandName),
1547 )
1548 .arg(
1549 Arg::new(options::BATCH_SIZE)
1550 .long(options::BATCH_SIZE)
1551 .help(translate!("sort-help-batch-size"))
1552 .value_name("N_MERGE"),
1553 )
1554 .arg(
1555 Arg::new(options::FILES0_FROM)
1556 .long(options::FILES0_FROM)
1557 .help(translate!("sort-help-files0-from"))
1558 .value_name("NUL_FILE")
1559 .value_parser(ValueParser::os_string())
1560 .value_hint(clap::ValueHint::FilePath),
1561 )
1562 .arg(
1563 Arg::new(options::DEBUG)
1564 .long(options::DEBUG)
1565 .help(translate!("sort-help-debug"))
1566 .action(ArgAction::SetTrue),
1567 )
1568 .arg(
1569 Arg::new(options::FILES)
1570 .action(ArgAction::Append)
1571 .value_parser(ValueParser::os_string())
1572 .value_hint(clap::ValueHint::FilePath),
1573 )
1574}
1575
1576fn exec(
1577 files: &mut [OsString],
1578 settings: &GlobalSettings,
1579 output: Output,
1580 tmp_dir: &mut TmpDirWrapper,
1581) -> UResult<()> {
1582 if settings.merge {
1583 merge::merge(files, settings, output, tmp_dir)
1584 } else if settings.check {
1585 if files.len() > 1 {
1586 Err(UUsageError::new(
1587 2,
1588 translate!("sort-only-one-file-allowed-with-c"),
1589 ))
1590 } else {
1591 check::check(files.first().unwrap(), settings)
1592 }
1593 } else {
1594 let mut lines = files.iter().map(open);
1595 ext_sort(&mut lines, settings, output, tmp_dir)
1596 }
1597}
1598
1599fn sort_by<'a>(unsorted: &mut Vec<Line<'a>>, settings: &GlobalSettings, line_data: &LineData<'a>) {
1600 if settings.stable || settings.unique {
1601 unsorted.par_sort_by(|a, b| compare_by(a, b, settings, line_data, line_data));
1602 } else {
1603 unsorted.par_sort_unstable_by(|a, b| compare_by(a, b, settings, line_data, line_data));
1604 }
1605}
1606
1607fn compare_by<'a>(
1608 a: &Line<'a>,
1609 b: &Line<'a>,
1610 global_settings: &GlobalSettings,
1611 a_line_data: &LineData<'a>,
1612 b_line_data: &LineData<'a>,
1613) -> Ordering {
1614 let mut selection_index = 0;
1615 let mut num_info_index = 0;
1616 let mut parsed_float_index = 0;
1617
1618 if let (Some(Some(a_f64)), Some(Some(b_f64))) = (
1619 a_line_data.line_num_floats.get(a.index),
1620 b_line_data.line_num_floats.get(b.index),
1621 ) {
1622 if let Some(cmp) = a_f64.partial_cmp(b_f64) {
1624 if cmp != Ordering::Equal || a.line == b.line {
1626 return if global_settings.reverse {
1627 cmp.reverse()
1628 } else {
1629 cmp
1630 };
1631 }
1632 }
1633 }
1634
1635 for selector in &global_settings.selectors {
1636 let (a_str, b_str) = if selector.needs_selection {
1637 let selections = (
1638 a_line_data.selections
1639 [a.index * global_settings.precomputed.selections_per_line + selection_index],
1640 b_line_data.selections
1641 [b.index * global_settings.precomputed.selections_per_line + selection_index],
1642 );
1643 selection_index += 1;
1644 selections
1645 } else {
1646 (a.line, b.line)
1648 };
1649
1650 let settings = &selector.settings;
1651
1652 let cmp: Ordering = match settings.mode {
1653 SortMode::Random => {
1654 if custom_str_cmp(
1656 a_str,
1657 b_str,
1658 settings.ignore_non_printing,
1659 settings.dictionary_order,
1660 settings.ignore_case,
1661 ) == Ordering::Equal
1662 {
1663 Ordering::Equal
1664 } else {
1665 random_shuffle(a_str, b_str, &global_settings.salt.unwrap())
1667 }
1668 }
1669 SortMode::Numeric => {
1670 let a_num_info = &a_line_data.num_infos
1671 [a.index * global_settings.precomputed.num_infos_per_line + num_info_index];
1672 let b_num_info = &b_line_data.num_infos
1673 [b.index * global_settings.precomputed.num_infos_per_line + num_info_index];
1674 num_info_index += 1;
1675 numeric_str_cmp((a_str, a_num_info), (b_str, b_num_info))
1676 }
1677 SortMode::HumanNumeric => {
1678 let a_num_info = &a_line_data.num_infos
1679 [a.index * global_settings.precomputed.num_infos_per_line + num_info_index];
1680 let b_num_info = &b_line_data.num_infos
1681 [b.index * global_settings.precomputed.num_infos_per_line + num_info_index];
1682 num_info_index += 1;
1683 human_numeric_str_cmp((a_str, a_num_info), (b_str, b_num_info))
1684 }
1685 SortMode::GeneralNumeric => {
1686 let a_float = &a_line_data.parsed_floats
1687 [a.index * global_settings.precomputed.floats_per_line + parsed_float_index];
1688 let b_float = &b_line_data.parsed_floats
1689 [b.index * global_settings.precomputed.floats_per_line + parsed_float_index];
1690 parsed_float_index += 1;
1691 general_numeric_compare(a_float, b_float)
1692 }
1693 SortMode::Month => month_compare(a_str, b_str),
1694 SortMode::Version => version_cmp(a_str, b_str),
1695 SortMode::Default => custom_str_cmp(
1696 a_str,
1697 b_str,
1698 settings.ignore_non_printing,
1699 settings.dictionary_order,
1700 settings.ignore_case,
1701 ),
1702 };
1703 if cmp != Ordering::Equal {
1704 return if settings.reverse { cmp.reverse() } else { cmp };
1705 }
1706 }
1707
1708 let cmp = if global_settings.mode == SortMode::Random
1710 || global_settings.stable
1711 || global_settings.unique
1712 {
1713 Ordering::Equal
1714 } else {
1715 a.line.cmp(b.line)
1716 };
1717
1718 if global_settings.reverse {
1719 cmp.reverse()
1720 } else {
1721 cmp
1722 }
1723}
1724
1725#[allow(clippy::cognitive_complexity)]
1730fn get_leading_gen(inp: &[u8]) -> Range<usize> {
1731 let trimmed = inp.trim_ascii_start();
1732 let leading_whitespace_len = inp.len() - trimmed.len();
1733
1734 const ALLOWED_PREFIXES: &[&[u8]] = &[b"inf", b"-inf", b"nan"];
1736 for &allowed_prefix in ALLOWED_PREFIXES {
1737 if trimmed.len() >= allowed_prefix.len()
1738 && trimmed[..allowed_prefix.len()].eq_ignore_ascii_case(allowed_prefix)
1739 {
1740 return leading_whitespace_len..(leading_whitespace_len + allowed_prefix.len());
1741 }
1742 }
1743 let mut char_indices = itertools::peek_nth(trimmed.iter().enumerate());
1745
1746 let first = char_indices.peek();
1747
1748 if matches!(first, Some((_, NEGATIVE | POSITIVE))) {
1749 char_indices.next();
1750 }
1751
1752 let mut had_e_notation = false;
1753 let mut had_decimal_pt = false;
1754 let mut had_hex_notation: bool = false;
1755 while let Some((idx, &c)) = char_indices.next() {
1756 if had_hex_notation && c.is_ascii_hexdigit() {
1757 continue;
1758 }
1759
1760 if c.is_ascii_digit() {
1761 if c == b'0' && matches!(char_indices.peek(), Some((_, b'x' | b'X'))) {
1762 had_hex_notation = true;
1763 char_indices.next();
1764 }
1765 continue;
1766 }
1767
1768 if c == DECIMAL_PT && !had_decimal_pt && !had_e_notation {
1769 had_decimal_pt = true;
1770 continue;
1771 }
1772 let is_decimal_e = (c == b'e' || c == b'E') && !had_hex_notation;
1773 let is_hex_e = (c == b'p' || c == b'P') && had_hex_notation;
1774 if (is_decimal_e || is_hex_e) && !had_e_notation {
1775 if let Some(&(_, &next_char)) = char_indices.peek() {
1777 if (next_char == b'+' || next_char == b'-')
1778 && matches!(
1779 char_indices.peek_nth(2),
1780 Some((_, c)) if c.is_ascii_digit()
1781 )
1782 {
1783 char_indices.next();
1785 had_e_notation = true;
1786 continue;
1787 }
1788 if next_char.is_ascii_digit() {
1789 had_e_notation = true;
1790 continue;
1791 }
1792 }
1793 }
1794 return leading_whitespace_len..(leading_whitespace_len + idx);
1795 }
1796 leading_whitespace_len..inp.len()
1797}
1798
1799#[derive(Clone, PartialEq, PartialOrd, Debug)]
1800pub enum GeneralBigDecimalParseResult {
1801 Invalid,
1802 Nan,
1803 MinusInfinity,
1804 Number(BigDecimal),
1805 Infinity,
1806}
1807
1808#[inline(always)]
1811fn general_bd_parse(a: &[u8]) -> GeneralBigDecimalParseResult {
1812 let Ok(a) = std::str::from_utf8(a) else {
1814 return GeneralBigDecimalParseResult::Invalid;
1815 };
1816
1817 let ebd = match ExtendedBigDecimal::extended_parse(a) {
1819 Err(ExtendedParserError::NotNumeric) => return GeneralBigDecimalParseResult::Invalid,
1820 Err(
1821 ExtendedParserError::PartialMatch(ebd, _)
1822 | ExtendedParserError::Overflow(ebd)
1823 | ExtendedParserError::Underflow(ebd),
1824 )
1825 | Ok(ebd) => ebd,
1826 };
1827
1828 match ebd {
1829 ExtendedBigDecimal::BigDecimal(bd) => GeneralBigDecimalParseResult::Number(bd),
1830 ExtendedBigDecimal::Infinity => GeneralBigDecimalParseResult::Infinity,
1831 ExtendedBigDecimal::MinusInfinity => GeneralBigDecimalParseResult::MinusInfinity,
1832 ExtendedBigDecimal::MinusZero => GeneralBigDecimalParseResult::Number(0.into()),
1834 ExtendedBigDecimal::Nan | ExtendedBigDecimal::MinusNan => GeneralBigDecimalParseResult::Nan,
1835 }
1836}
1837
1838fn general_numeric_compare(
1842 a: &GeneralBigDecimalParseResult,
1843 b: &GeneralBigDecimalParseResult,
1844) -> Ordering {
1845 a.partial_cmp(b).unwrap()
1846}
1847
1848fn get_rand_string() -> [u8; 16] {
1849 rng().sample(rand::distr::StandardUniform)
1850}
1851
1852fn get_hash<T: Hash>(t: &T) -> u64 {
1853 let mut s = FnvHasher::default();
1854 t.hash(&mut s);
1855 s.finish()
1856}
1857
1858fn random_shuffle(a: &[u8], b: &[u8], salt: &[u8]) -> Ordering {
1859 let da = get_hash(&(a, salt));
1860 let db = get_hash(&(b, salt));
1861 da.cmp(&db)
1862}
1863
1864#[derive(Eq, Ord, PartialEq, PartialOrd, Clone, Copy)]
1865enum Month {
1866 Unknown,
1867 January,
1868 February,
1869 March,
1870 April,
1871 May,
1872 June,
1873 July,
1874 August,
1875 September,
1876 October,
1877 November,
1878 December,
1879}
1880
1881fn month_parse(line: &[u8]) -> Month {
1883 let line = line.trim_ascii_start();
1884
1885 match line.get(..3).map(|x| x.to_ascii_uppercase()).as_deref() {
1886 Some(b"JAN") => Month::January,
1887 Some(b"FEB") => Month::February,
1888 Some(b"MAR") => Month::March,
1889 Some(b"APR") => Month::April,
1890 Some(b"MAY") => Month::May,
1891 Some(b"JUN") => Month::June,
1892 Some(b"JUL") => Month::July,
1893 Some(b"AUG") => Month::August,
1894 Some(b"SEP") => Month::September,
1895 Some(b"OCT") => Month::October,
1896 Some(b"NOV") => Month::November,
1897 Some(b"DEC") => Month::December,
1898 _ => Month::Unknown,
1899 }
1900}
1901
1902fn month_compare(a: &[u8], b: &[u8]) -> Ordering {
1903 let ma = month_parse(a);
1904 let mb = month_parse(b);
1905
1906 ma.cmp(&mb)
1907}
1908
1909fn print_sorted<'a, T: Iterator<Item = &'a Line<'a>>>(
1910 iter: T,
1911 settings: &GlobalSettings,
1912 output: Output,
1913) -> UResult<()> {
1914 let output_name = output
1915 .as_output_name()
1916 .unwrap_or(OsStr::new("standard output"))
1917 .to_owned();
1918 let ctx = || translate!("sort-error-write-failed", "output" => output_name.maybe_quote());
1919
1920 let mut writer = output.into_write();
1921 for line in iter {
1922 line.print(&mut writer, settings).map_err_context(ctx)?;
1923 }
1924 writer.flush().map_err_context(ctx)?;
1925 Ok(())
1926}
1927
1928fn open(path: impl AsRef<OsStr>) -> UResult<Box<dyn Read + Send>> {
1929 let path = path.as_ref();
1930 if path == STDIN_FILE {
1931 let stdin = stdin();
1932 return Ok(Box::new(stdin) as Box<dyn Read + Send>);
1933 }
1934
1935 let path = Path::new(path);
1936 match File::open(path) {
1937 Ok(f) => Ok(Box::new(f) as Box<dyn Read + Send>),
1938 Err(error) => Err(SortError::ReadFailed {
1939 path: path.to_owned(),
1940 error,
1941 }
1942 .into()),
1943 }
1944}
1945
1946fn open_with_open_failed_error(path: impl AsRef<OsStr>) -> UResult<Box<dyn Read + Send>> {
1947 let path = path.as_ref();
1949 if path == STDIN_FILE {
1950 let stdin = stdin();
1951 return Ok(Box::new(stdin) as Box<dyn Read + Send>);
1952 }
1953
1954 let path = Path::new(path);
1955 match File::open(path) {
1956 Ok(f) => Ok(Box::new(f) as Box<dyn Read + Send>),
1957 Err(error) => Err(SortError::OpenFailed {
1958 path: path.to_owned(),
1959 error,
1960 }
1961 .into()),
1962 }
1963}
1964
1965fn format_error_message(error: &ParseSizeError, s: &str, option: &str) -> String {
1966 match error {
1969 ParseSizeError::InvalidSuffix(_) => {
1970 translate!("sort-invalid-suffix-in-option-arg", "option" => option, "arg" => s.quote())
1971 }
1972 ParseSizeError::ParseFailure(_) | ParseSizeError::PhysicalMem(_) => {
1973 translate!("sort-invalid-option-arg", "option" => option, "arg" => s.quote())
1974 }
1975 ParseSizeError::SizeTooBig(_) => {
1976 translate!("sort-option-arg-too-large", "option" => option, "arg" => s.quote())
1977 }
1978 }
1979}
1980
1981#[cfg(test)]
1982mod tests {
1983
1984 use super::*;
1985
1986 fn tokenize_helper(line: &[u8], separator: Option<u8>) -> Vec<Field> {
1987 let mut buffer = vec![];
1988 tokenize(line, separator, &mut buffer);
1989 buffer
1990 }
1991
1992 #[test]
1993 fn test_get_hash() {
1994 let a = "Ted".to_string();
1995
1996 assert_eq!(2_646_829_031_758_483_623, get_hash(&a));
1997 }
1998
1999 #[test]
2000 fn test_random_shuffle() {
2001 let a = b"Ted";
2002 let b = b"Ted";
2003 let c = get_rand_string();
2004
2005 assert_eq!(Ordering::Equal, random_shuffle(a, b, &c));
2006 }
2007
2008 #[test]
2009 fn test_month_compare() {
2010 let a = b"JaN";
2011 let b = b"OCt";
2012
2013 assert_eq!(Ordering::Less, month_compare(a, b));
2014 }
2015 #[test]
2016 fn test_version_compare() {
2017 let a = b"1.2.3-alpha2";
2018 let b = b"1.4.0";
2019
2020 assert_eq!(Ordering::Less, version_cmp(a, b));
2021 }
2022
2023 #[test]
2024 fn test_random_compare() {
2025 let a = b"9";
2026 let b = b"9";
2027 let c = get_rand_string();
2028
2029 assert_eq!(Ordering::Equal, random_shuffle(a, b, &c));
2030 }
2031
2032 #[test]
2033 fn test_tokenize_fields() {
2034 let line = b"foo bar b x";
2035 assert_eq!(tokenize_helper(line, None), vec![0..3, 3..7, 7..9, 9..14]);
2036 }
2037
2038 #[test]
2039 fn test_tokenize_fields_leading_whitespace() {
2040 let line = b" foo bar b x";
2041 assert_eq!(
2042 tokenize_helper(line, None),
2043 vec![0..7, 7..11, 11..13, 13..18]
2044 );
2045 }
2046
2047 #[test]
2048 fn test_tokenize_fields_custom_separator() {
2049 let line = b"aaa foo bar b x";
2050 assert_eq!(
2051 tokenize_helper(line, Some(b'a')),
2052 vec![0..0, 1..1, 2..2, 3..9, 10..18]
2053 );
2054 }
2055
2056 #[test]
2057 fn test_tokenize_fields_trailing_custom_separator() {
2058 let line = b"a";
2059 assert_eq!(tokenize_helper(line, Some(b'a')), vec![0..0]);
2060 let line = b"aa";
2061 assert_eq!(tokenize_helper(line, Some(b'a')), vec![0..0, 1..1]);
2062 let line = b"..a..a";
2063 assert_eq!(tokenize_helper(line, Some(b'a')), vec![0..2, 3..5]);
2064 }
2065
2066 #[test]
2067 #[cfg(target_pointer_width = "64")]
2068 fn test_line_size() {
2069 assert_eq!(size_of::<Line>(), 24);
2072 }
2073
2074 #[test]
2075 fn test_parse_byte_count() {
2076 let valid_input = [
2077 ("0", 0),
2078 ("50K", 50 * 1024),
2079 ("50k", 50 * 1024),
2080 ("1M", 1024 * 1024),
2081 ("100M", 100 * 1024 * 1024),
2082 #[cfg(not(target_pointer_width = "32"))]
2083 ("1000G", 1000 * 1024 * 1024 * 1024),
2084 #[cfg(not(target_pointer_width = "32"))]
2085 ("10T", 10 * 1024 * 1024 * 1024 * 1024),
2086 ("1b", 1),
2087 ("1024b", 1024),
2088 ("1024Mb", 1024 * 1024 * 1024), ("1", 1024), ("50", 50 * 1024),
2091 ("K", 1024),
2092 ("k", 1024),
2093 ("m", 1024 * 1024),
2094 #[cfg(not(target_pointer_width = "32"))]
2095 ("E", 1024 * 1024 * 1024 * 1024 * 1024 * 1024),
2096 ];
2097 for (input, expected_output) in &valid_input {
2098 assert_eq!(
2099 GlobalSettings::parse_byte_count(input),
2100 Ok(*expected_output)
2101 );
2102 }
2103
2104 let invalid_input = ["500E", "1Y"];
2106 for input in &invalid_input {
2107 assert!(GlobalSettings::parse_byte_count(input).is_err());
2108 }
2109
2110 let invalid_input = ["nonsense", "1B", "B", "b", "p", "e", "z", "y"];
2112 for input in &invalid_input {
2113 assert!(GlobalSettings::parse_byte_count(input).is_err());
2114 }
2115 }
2116}