mod check;
mod chunks;
mod custom_str_cmp;
mod ext_sort;
mod merge;
mod numeric_str_cmp;
mod tmp_dir;
use chunks::LineData;
use clap::builder::ValueParser;
use clap::{crate_version, Arg, ArgAction, Command};
use custom_str_cmp::custom_str_cmp;
use ext_sort::ext_sort;
use fnv::FnvHasher;
use numeric_str_cmp::{human_numeric_str_cmp, numeric_str_cmp, NumInfo, NumInfoParseSettings};
use rand::{thread_rng, Rng};
use rayon::prelude::*;
use std::cmp::Ordering;
use std::env;
use std::error::Error;
use std::ffi::{OsStr, OsString};
use std::fmt::Display;
use std::fs::{File, OpenOptions};
use std::hash::{Hash, Hasher};
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write};
use std::ops::Range;
use std::path::Path;
use std::path::PathBuf;
use std::str::Utf8Error;
use unicode_width::UnicodeWidthStr;
use uucore::display::Quotable;
use uucore::error::{set_exit_code, strip_errno, UError, UResult, USimpleError, UUsageError};
use uucore::line_ending::LineEnding;
use uucore::parse_size::{ParseSizeError, Parser};
use uucore::shortcut_value_parser::ShortcutValueParser;
use uucore::version_cmp::version_cmp;
use uucore::{format_usage, help_about, help_section, help_usage};
use crate::tmp_dir::TmpDirWrapper;
const ABOUT: &str = help_about!("sort.md");
const USAGE: &str = help_usage!("sort.md");
const AFTER_HELP: &str = help_section!("after help", "sort.md");
mod options {
pub mod modes {
pub const SORT: &str = "sort";
pub const HUMAN_NUMERIC: &str = "human-numeric-sort";
pub const MONTH: &str = "month-sort";
pub const NUMERIC: &str = "numeric-sort";
pub const GENERAL_NUMERIC: &str = "general-numeric-sort";
pub const VERSION: &str = "version-sort";
pub const RANDOM: &str = "random-sort";
pub const ALL_SORT_MODES: [&str; 6] = [
GENERAL_NUMERIC,
HUMAN_NUMERIC,
MONTH,
NUMERIC,
VERSION,
RANDOM,
];
}
pub mod check {
pub const CHECK: &str = "check";
pub const CHECK_SILENT: &str = "check-silent";
pub const SILENT: &str = "silent";
pub const QUIET: &str = "quiet";
pub const DIAGNOSE_FIRST: &str = "diagnose-first";
}
pub const HELP: &str = "help";
pub const VERSION: &str = "version";
pub const DICTIONARY_ORDER: &str = "dictionary-order";
pub const MERGE: &str = "merge";
pub const DEBUG: &str = "debug";
pub const IGNORE_CASE: &str = "ignore-case";
pub const IGNORE_LEADING_BLANKS: &str = "ignore-leading-blanks";
pub const IGNORE_NONPRINTING: &str = "ignore-nonprinting";
pub const OUTPUT: &str = "output";
pub const REVERSE: &str = "reverse";
pub const STABLE: &str = "stable";
pub const UNIQUE: &str = "unique";
pub const KEY: &str = "key";
pub const SEPARATOR: &str = "field-separator";
pub const ZERO_TERMINATED: &str = "zero-terminated";
pub const PARALLEL: &str = "parallel";
pub const FILES0_FROM: &str = "files0-from";
pub const BUF_SIZE: &str = "buffer-size";
pub const TMP_DIR: &str = "temporary-directory";
pub const COMPRESS_PROG: &str = "compress-program";
pub const BATCH_SIZE: &str = "batch-size";
pub const FILES: &str = "files";
}
const DECIMAL_PT: char = '.';
const NEGATIVE: char = '-';
const POSITIVE: char = '+';
const DEFAULT_BUF_SIZE: usize = 1_000_000_000; #[derive(Debug)]
enum SortError {
Disorder {
file: OsString,
line_number: usize,
line: String,
silent: bool,
},
OpenFailed {
path: String,
error: std::io::Error,
},
ReadFailed {
path: PathBuf,
error: std::io::Error,
},
ParseKeyError {
key: String,
msg: String,
},
OpenTmpFileFailed {
error: std::io::Error,
},
CompressProgExecutionFailed {
code: i32,
},
CompressProgTerminatedAbnormally {
prog: String,
},
TmpDirCreationFailed,
Uft8Error {
error: Utf8Error,
},
}
impl Error for SortError {}
impl UError for SortError {
fn code(&self) -> i32 {
match self {
Self::Disorder { .. } => 1,
_ => 2,
}
}
}
impl Display for SortError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Disorder {
file,
line_number,
line,
silent,
} => {
if *silent {
Ok(())
} else {
write!(
f,
"{}:{}: disorder: {}",
file.maybe_quote(),
line_number,
line
)
}
}
Self::OpenFailed { path, error } => {
write!(
f,
"open failed: {}: {}",
path.maybe_quote(),
strip_errno(error)
)
}
Self::ParseKeyError { key, msg } => {
write!(f, "failed to parse key {}: {}", key.quote(), msg)
}
Self::ReadFailed { path, error } => {
write!(
f,
"cannot read: {}: {}",
path.maybe_quote(),
strip_errno(error)
)
}
Self::OpenTmpFileFailed { error } => {
write!(f, "failed to open temporary file: {}", strip_errno(error))
}
Self::CompressProgExecutionFailed { code } => {
write!(f, "couldn't execute compress program: errno {code}")
}
Self::CompressProgTerminatedAbnormally { prog } => {
write!(f, "{} terminated abnormally", prog.quote())
}
Self::TmpDirCreationFailed => write!(f, "could not create temporary directory"),
Self::Uft8Error { error } => write!(f, "{error}"),
}
}
}
#[derive(Eq, Ord, PartialEq, PartialOrd, Clone, Copy, Debug)]
enum SortMode {
Numeric,
HumanNumeric,
GeneralNumeric,
Month,
Version,
Random,
Default,
}
impl SortMode {
fn get_short_name(&self) -> Option<char> {
match self {
Self::Numeric => Some('n'),
Self::HumanNumeric => Some('h'),
Self::GeneralNumeric => Some('g'),
Self::Month => Some('M'),
Self::Version => Some('V'),
Self::Random => Some('R'),
Self::Default => None,
}
}
}
pub struct Output {
file: Option<(String, File)>,
}
impl Output {
fn new(name: Option<&str>) -> UResult<Self> {
let file = if let Some(name) = name {
#[allow(clippy::suspicious_open_options)]
let file = OpenOptions::new()
.write(true)
.create(true)
.open(name)
.map_err(|e| SortError::OpenFailed {
path: name.to_owned(),
error: e,
})?;
Some((name.to_owned(), file))
} else {
None
};
Ok(Self { file })
}
fn into_write(self) -> BufWriter<Box<dyn Write>> {
BufWriter::new(match self.file {
Some((_name, file)) => {
let _ = file.set_len(0);
Box::new(file)
}
None => Box::new(stdout()),
})
}
fn as_output_name(&self) -> Option<&str> {
match &self.file {
Some((name, _file)) => Some(name),
None => None,
}
}
}
#[derive(Clone)]
pub struct GlobalSettings {
mode: SortMode,
debug: bool,
ignore_leading_blanks: bool,
ignore_case: bool,
dictionary_order: bool,
ignore_non_printing: bool,
merge: bool,
reverse: bool,
stable: bool,
unique: bool,
check: bool,
check_silent: bool,
salt: Option<[u8; 16]>,
selectors: Vec<FieldSelector>,
separator: Option<char>,
threads: String,
line_ending: LineEnding,
buffer_size: usize,
compress_prog: Option<String>,
merge_batch_size: usize,
precomputed: Precomputed,
}
#[derive(Clone, Debug, Default)]
struct Precomputed {
needs_tokens: bool,
num_infos_per_line: usize,
floats_per_line: usize,
selections_per_line: usize,
}
impl GlobalSettings {
fn parse_byte_count(input: &str) -> Result<usize, ParseSizeError> {
let size = Parser::default()
.with_allow_list(&[
"b", "k", "K", "m", "M", "g", "G", "t", "T", "P", "E", "Z", "Y",
])
.with_default_unit("K")
.with_b_byte_count(true)
.parse(input.trim())?;
usize::try_from(size).map_err(|_| {
ParseSizeError::SizeTooBig(format!("Buffer size {size} does not fit in address space"))
})
}
fn init_precomputed(&mut self) {
self.precomputed.needs_tokens = self.selectors.iter().any(|s| s.needs_tokens);
self.precomputed.selections_per_line =
self.selectors.iter().filter(|s| s.needs_selection).count();
self.precomputed.num_infos_per_line = self
.selectors
.iter()
.filter(|s| matches!(s.settings.mode, SortMode::Numeric | SortMode::HumanNumeric))
.count();
self.precomputed.floats_per_line = self
.selectors
.iter()
.filter(|s| matches!(s.settings.mode, SortMode::GeneralNumeric))
.count();
}
}
impl Default for GlobalSettings {
fn default() -> Self {
Self {
mode: SortMode::Default,
debug: false,
ignore_leading_blanks: false,
ignore_case: false,
dictionary_order: false,
ignore_non_printing: false,
merge: false,
reverse: false,
stable: false,
unique: false,
check: false,
check_silent: false,
salt: None,
selectors: vec![],
separator: None,
threads: String::new(),
line_ending: LineEnding::Newline,
buffer_size: DEFAULT_BUF_SIZE,
compress_prog: None,
merge_batch_size: 32,
precomputed: Precomputed::default(),
}
}
}
#[derive(Clone, PartialEq, Debug)]
struct KeySettings {
mode: SortMode,
ignore_blanks: bool,
ignore_case: bool,
dictionary_order: bool,
ignore_non_printing: bool,
reverse: bool,
}
impl KeySettings {
fn check_compatibility(
mode: SortMode,
ignore_non_printing: bool,
dictionary_order: bool,
) -> Result<(), String> {
if matches!(
mode,
SortMode::Numeric | SortMode::HumanNumeric | SortMode::GeneralNumeric | SortMode::Month
) {
if dictionary_order {
return Err(format!(
"options '-{}{}' are incompatible",
'd',
mode.get_short_name().unwrap()
));
} else if ignore_non_printing {
return Err(format!(
"options '-{}{}' are incompatible",
'i',
mode.get_short_name().unwrap()
));
}
}
Ok(())
}
fn set_sort_mode(&mut self, mode: SortMode) -> Result<(), String> {
if self.mode != SortMode::Default && self.mode != mode {
return Err(format!(
"options '-{}{}' are incompatible",
self.mode.get_short_name().unwrap(),
mode.get_short_name().unwrap()
));
}
Self::check_compatibility(mode, self.ignore_non_printing, self.dictionary_order)?;
self.mode = mode;
Ok(())
}
fn set_dictionary_order(&mut self) -> Result<(), String> {
Self::check_compatibility(self.mode, self.ignore_non_printing, true)?;
self.dictionary_order = true;
Ok(())
}
fn set_ignore_non_printing(&mut self) -> Result<(), String> {
Self::check_compatibility(self.mode, true, self.dictionary_order)?;
self.ignore_non_printing = true;
Ok(())
}
}
impl From<&GlobalSettings> for KeySettings {
fn from(settings: &GlobalSettings) -> Self {
Self {
mode: settings.mode,
ignore_blanks: settings.ignore_leading_blanks,
ignore_case: settings.ignore_case,
ignore_non_printing: settings.ignore_non_printing,
reverse: settings.reverse,
dictionary_order: settings.dictionary_order,
}
}
}
impl Default for KeySettings {
fn default() -> Self {
Self::from(&GlobalSettings::default())
}
}
enum Selection<'a> {
AsF64(GeneralF64ParseResult),
WithNumInfo(&'a str, NumInfo),
Str(&'a str),
}
type Field = Range<usize>;
#[derive(Clone, Debug)]
pub struct Line<'a> {
line: &'a str,
index: usize,
}
impl<'a> Line<'a> {
fn create(
line: &'a str,
index: usize,
line_data: &mut LineData<'a>,
token_buffer: &mut Vec<Field>,
settings: &GlobalSettings,
) -> Self {
token_buffer.clear();
if settings.precomputed.needs_tokens {
tokenize(line, settings.separator, token_buffer);
}
for (selector, selection) in settings
.selectors
.iter()
.map(|selector| (selector, selector.get_selection(line, token_buffer)))
{
match selection {
Selection::AsF64(parsed_float) => line_data.parsed_floats.push(parsed_float),
Selection::WithNumInfo(str, num_info) => {
line_data.num_infos.push(num_info);
line_data.selections.push(str);
}
Selection::Str(str) => {
if selector.needs_selection {
line_data.selections.push(str);
}
}
}
}
Self { line, index }
}
fn print(&self, writer: &mut impl Write, settings: &GlobalSettings) {
if settings.debug {
self.print_debug(settings, writer).unwrap();
} else {
writer.write_all(self.line.as_bytes()).unwrap();
writer.write_all(&[settings.line_ending.into()]).unwrap();
}
}
fn print_debug(
&self,
settings: &GlobalSettings,
writer: &mut impl Write,
) -> std::io::Result<()> {
let line = self.line.replace('\t', ">");
writeln!(writer, "{line}")?;
let mut fields = vec![];
tokenize(self.line, settings.separator, &mut fields);
for selector in &settings.selectors {
let mut selection = selector.get_range(self.line, Some(&fields));
match selector.settings.mode {
SortMode::Numeric | SortMode::HumanNumeric => {
let (_, num_range) = NumInfo::parse(
&self.line[selection.clone()],
&NumInfoParseSettings {
accept_si_units: selector.settings.mode == SortMode::HumanNumeric,
..Default::default()
},
);
let initial_selection = selection.clone();
selection.start += num_range.start;
selection.end = selection.start + num_range.len();
if num_range == (0..0) {
let leading_whitespace = self.line[selection.clone()]
.find(|c: char| !c.is_whitespace())
.unwrap_or(0);
selection.start += leading_whitespace;
selection.end += leading_whitespace;
} else {
if selector.settings.mode == SortMode::HumanNumeric
&& self.line[selection.end..initial_selection.end]
.starts_with(&['k', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'][..])
{
selection.end += 1;
}
while self.line[initial_selection.start..selection.start]
.ends_with(&['-', '0', '.'][..])
{
selection.start -= 1;
}
}
}
SortMode::GeneralNumeric => {
let initial_selection = &self.line[selection.clone()];
let leading = get_leading_gen(initial_selection);
selection.start += leading.start;
selection.end = selection.start + leading.len();
}
SortMode::Month => {
let initial_selection = &self.line[selection.clone()];
let mut month_chars = initial_selection
.char_indices()
.skip_while(|(_, c)| c.is_whitespace());
let month = if month_parse(initial_selection) == Month::Unknown {
let first_non_whitespace = month_chars.next();
first_non_whitespace.map_or(
initial_selection.len()..initial_selection.len(),
|(idx, _)| idx..idx,
)
} else {
month_chars.next().unwrap().0
..month_chars
.nth(2)
.map_or(initial_selection.len(), |(idx, _)| idx)
};
selection.start += month.start;
selection.end = selection.start + month.len();
}
_ => {}
}
write!(
writer,
"{}",
" ".repeat(UnicodeWidthStr::width(&line[..selection.start]))
)?;
if selection.is_empty() {
writeln!(writer, "^ no match for key")?;
} else {
writeln!(
writer,
"{}",
"_".repeat(UnicodeWidthStr::width(&line[selection]))
)?;
}
}
if settings.mode != SortMode::Random
&& !settings.stable
&& !settings.unique
&& (settings.dictionary_order
|| settings.ignore_leading_blanks
|| settings.ignore_case
|| settings.ignore_non_printing
|| settings.mode != SortMode::Default
|| settings
.selectors
.last()
.map_or(true, |selector| selector != &FieldSelector::default()))
{
if self.line.is_empty() {
writeln!(writer, "^ no match for key")?;
} else {
writeln!(
writer,
"{}",
"_".repeat(UnicodeWidthStr::width(line.as_str()))
)?;
}
}
Ok(())
}
}
fn tokenize(line: &str, separator: Option<char>, token_buffer: &mut Vec<Field>) {
assert!(token_buffer.is_empty());
if let Some(separator) = separator {
tokenize_with_separator(line, separator, token_buffer);
} else {
tokenize_default(line, token_buffer);
}
}
fn tokenize_default(line: &str, token_buffer: &mut Vec<Field>) {
token_buffer.push(0..0);
let mut previous_was_whitespace = true;
for (idx, char) in line.char_indices() {
if char.is_whitespace() {
if !previous_was_whitespace {
token_buffer.last_mut().unwrap().end = idx;
token_buffer.push(idx..0);
}
previous_was_whitespace = true;
} else {
previous_was_whitespace = false;
}
}
token_buffer.last_mut().unwrap().end = line.len();
}
fn tokenize_with_separator(line: &str, separator: char, token_buffer: &mut Vec<Field>) {
let separator_indices =
line.char_indices()
.filter_map(|(i, c)| if c == separator { Some(i) } else { None });
let mut start = 0;
for sep_idx in separator_indices {
token_buffer.push(start..sep_idx);
start = sep_idx + 1;
}
if start < line.len() {
token_buffer.push(start..line.len());
}
}
#[derive(Clone, PartialEq, Debug)]
struct KeyPosition {
field: usize,
char: usize,
ignore_blanks: bool,
}
impl KeyPosition {
fn new(key: &str, default_char_index: usize, ignore_blanks: bool) -> Result<Self, String> {
let mut field_and_char = key.split('.');
let field = field_and_char
.next()
.ok_or_else(|| format!("invalid key {}", key.quote()))?;
let char = field_and_char.next();
let field = field
.parse()
.map_err(|e| format!("failed to parse field index {}: {}", field.quote(), e))?;
if field == 0 {
return Err("field index can not be 0".to_string());
}
let char = char.map_or(Ok(default_char_index), |char| {
char.parse()
.map_err(|e| format!("failed to parse character index {}: {}", char.quote(), e))
})?;
Ok(Self {
field,
char,
ignore_blanks,
})
}
}
impl Default for KeyPosition {
fn default() -> Self {
Self {
field: 1,
char: 1,
ignore_blanks: false,
}
}
}
#[derive(Clone, PartialEq, Debug, Default)]
struct FieldSelector {
from: KeyPosition,
to: Option<KeyPosition>,
settings: KeySettings,
needs_tokens: bool,
needs_selection: bool,
}
impl FieldSelector {
fn split_key_options(position: &str) -> (&str, &str) {
if let Some((options_start, _)) = position.char_indices().find(|(_, c)| c.is_alphabetic()) {
position.split_at(options_start)
} else {
(position, "")
}
}
fn parse(key: &str, global_settings: &GlobalSettings) -> UResult<Self> {
let mut from_to = key.split(',');
let (from, from_options) = Self::split_key_options(from_to.next().unwrap());
let to = from_to.next().map(Self::split_key_options);
let options_are_empty = from_options.is_empty() && matches!(to, None | Some((_, "")));
if options_are_empty {
(|| {
Self::new(
KeyPosition::new(from, 1, global_settings.ignore_leading_blanks)?,
to.map(|(to, _)| {
KeyPosition::new(to, 0, global_settings.ignore_leading_blanks)
})
.transpose()?,
KeySettings::from(global_settings),
)
})()
} else {
Self::parse_with_options((from, from_options), to)
}
.map_err(|msg| {
SortError::ParseKeyError {
key: key.to_owned(),
msg,
}
.into()
})
}
fn parse_with_options(
(from, from_options): (&str, &str),
to: Option<(&str, &str)>,
) -> Result<Self, String> {
fn parse_key_settings(
options: &str,
key_settings: &mut KeySettings,
) -> Result<bool, String> {
let mut ignore_blanks = false;
for option in options.chars() {
match option {
'M' => key_settings.set_sort_mode(SortMode::Month)?,
'b' => ignore_blanks = true,
'd' => key_settings.set_dictionary_order()?,
'f' => key_settings.ignore_case = true,
'g' => key_settings.set_sort_mode(SortMode::GeneralNumeric)?,
'h' => key_settings.set_sort_mode(SortMode::HumanNumeric)?,
'i' => key_settings.set_ignore_non_printing()?,
'n' => key_settings.set_sort_mode(SortMode::Numeric)?,
'R' => key_settings.set_sort_mode(SortMode::Random)?,
'r' => key_settings.reverse = true,
'V' => key_settings.set_sort_mode(SortMode::Version)?,
c => return Err(format!("invalid option: '{c}'")),
}
}
Ok(ignore_blanks)
}
let mut key_settings = KeySettings::default();
let from = parse_key_settings(from_options, &mut key_settings)
.map(|ignore_blanks| KeyPosition::new(from, 1, ignore_blanks))??;
let to = if let Some((to, to_options)) = to {
Some(
parse_key_settings(to_options, &mut key_settings)
.map(|ignore_blanks| KeyPosition::new(to, 0, ignore_blanks))??,
)
} else {
None
};
Self::new(from, to, key_settings)
}
fn new(
from: KeyPosition,
to: Option<KeyPosition>,
settings: KeySettings,
) -> Result<Self, String> {
if from.char == 0 {
Err("invalid character index 0 for the start position of a field".to_string())
} else {
Ok(Self {
needs_selection: (from.field != 1
|| from.char != 1
|| to.is_some()
|| matches!(settings.mode, SortMode::Numeric | SortMode::HumanNumeric)
|| from.ignore_blanks)
&& !matches!(settings.mode, SortMode::GeneralNumeric),
needs_tokens: from.field != 1 || from.char == 0 || to.is_some(),
from,
to,
settings,
})
}
}
fn get_selection<'a>(&self, line: &'a str, tokens: &[Field]) -> Selection<'a> {
let tokens = if self.needs_tokens {
Some(tokens)
} else {
None
};
let mut range = &line[self.get_range(line, tokens)];
if self.settings.mode == SortMode::Numeric || self.settings.mode == SortMode::HumanNumeric {
let (info, num_range) = NumInfo::parse(
range,
&NumInfoParseSettings {
accept_si_units: self.settings.mode == SortMode::HumanNumeric,
..Default::default()
},
);
range = &range[num_range];
Selection::WithNumInfo(range, info)
} else if self.settings.mode == SortMode::GeneralNumeric {
Selection::AsF64(general_f64_parse(&range[get_leading_gen(range)]))
} else {
Selection::Str(range)
}
}
fn get_range(&self, line: &str, tokens: Option<&[Field]>) -> Range<usize> {
enum Resolution {
StartOfChar(usize),
EndOfChar(usize),
TooLow,
TooHigh,
}
fn resolve_index(
line: &str,
tokens: Option<&[Field]>,
position: &KeyPosition,
) -> Resolution {
if matches!(tokens, Some(tokens) if tokens.len() < position.field) {
Resolution::TooHigh
} else if position.char == 0 {
let end = tokens.unwrap()[position.field - 1].end;
if end == 0 {
Resolution::TooLow
} else {
Resolution::EndOfChar(end)
}
} else {
let mut idx = if position.field == 1 {
0
} else {
tokens.unwrap()[position.field - 1].start
};
if position.ignore_blanks {
idx += line[idx..]
.char_indices()
.find(|(_, c)| !c.is_whitespace())
.map_or(line[idx..].len(), |(idx, _)| idx);
}
idx += line[idx..]
.char_indices()
.nth(position.char - 1)
.map_or(line[idx..].len(), |(idx, _)| idx);
if idx >= line.len() {
Resolution::TooHigh
} else {
Resolution::StartOfChar(idx)
}
}
}
match resolve_index(line, tokens, &self.from) {
Resolution::StartOfChar(from) => {
let to = self.to.as_ref().map(|to| resolve_index(line, tokens, to));
let mut range = match to {
Some(Resolution::StartOfChar(mut to)) => {
to += line[to..].chars().next().map_or(1, char::len_utf8);
from..to
}
Some(Resolution::EndOfChar(to)) => from..to,
None | Some(Resolution::TooHigh) => from..line.len(),
Some(Resolution::TooLow) => 0..0,
};
if range.start > range.end {
range.end = range.start;
}
range
}
Resolution::TooLow | Resolution::EndOfChar(_) => {
unreachable!("This should only happen if the field start index is 0, but that should already have caused an error.")
}
Resolution::TooHigh => line.len()..line.len(),
}
}
}
fn make_sort_mode_arg(mode: &'static str, short: char, help: &'static str) -> Arg {
let mut arg = Arg::new(mode)
.short(short)
.long(mode)
.help(help)
.action(ArgAction::SetTrue);
for possible_mode in &options::modes::ALL_SORT_MODES {
if *possible_mode != mode {
arg = arg.conflicts_with(possible_mode);
}
}
arg
}
#[uucore::main]
#[allow(clippy::cognitive_complexity)]
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let mut settings = GlobalSettings::default();
let matches = match uu_app().try_get_matches_from(args) {
Ok(t) => t,
Err(e) => {
e.print().unwrap();
if e.use_stderr() {
set_exit_code(2);
}
return Ok(());
}
};
settings.debug = matches.get_flag(options::DEBUG);
let mut files: Vec<OsString> = if matches.contains_id(options::FILES0_FROM) {
let files0_from: Vec<OsString> = matches
.get_many::<OsString>(options::FILES0_FROM)
.map(|v| v.map(ToOwned::to_owned).collect())
.unwrap_or_default();
let mut files = Vec::new();
for path in &files0_from {
let reader = open(path)?;
let buf_reader = BufReader::new(reader);
for line in buf_reader.split(b'\0').flatten() {
files.push(OsString::from(
std::str::from_utf8(&line)
.expect("Could not parse string from zero terminated input."),
));
}
}
files
} else {
matches
.get_many::<OsString>(options::FILES)
.map(|v| v.map(ToOwned::to_owned).collect())
.unwrap_or_default()
};
settings.mode = if matches.get_flag(options::modes::HUMAN_NUMERIC)
|| matches
.get_one::<String>(options::modes::SORT)
.map(|s| s.as_str())
== Some("human-numeric")
{
SortMode::HumanNumeric
} else if matches.get_flag(options::modes::MONTH)
|| matches
.get_one::<String>(options::modes::SORT)
.map(|s| s.as_str())
== Some("month")
{
SortMode::Month
} else if matches.get_flag(options::modes::GENERAL_NUMERIC)
|| matches
.get_one::<String>(options::modes::SORT)
.map(|s| s.as_str())
== Some("general-numeric")
{
SortMode::GeneralNumeric
} else if matches.get_flag(options::modes::NUMERIC)
|| matches
.get_one::<String>(options::modes::SORT)
.map(|s| s.as_str())
== Some("numeric")
{
SortMode::Numeric
} else if matches.get_flag(options::modes::VERSION)
|| matches
.get_one::<String>(options::modes::SORT)
.map(|s| s.as_str())
== Some("version")
{
SortMode::Version
} else if matches.get_flag(options::modes::RANDOM)
|| matches
.get_one::<String>(options::modes::SORT)
.map(|s| s.as_str())
== Some("random")
{
settings.salt = Some(get_rand_string());
SortMode::Random
} else {
SortMode::Default
};
settings.dictionary_order = matches.get_flag(options::DICTIONARY_ORDER);
settings.ignore_non_printing = matches.get_flag(options::IGNORE_NONPRINTING);
if matches.contains_id(options::PARALLEL) {
settings.threads = matches
.get_one::<String>(options::PARALLEL)
.map(String::from)
.unwrap_or_else(|| "0".to_string());
env::set_var("RAYON_NUM_THREADS", &settings.threads);
}
settings.buffer_size =
matches
.get_one::<String>(options::BUF_SIZE)
.map_or(Ok(DEFAULT_BUF_SIZE), |s| {
GlobalSettings::parse_byte_count(s).map_err(|e| {
USimpleError::new(2, format_error_message(&e, s, options::BUF_SIZE))
})
})?;
let mut tmp_dir = TmpDirWrapper::new(
matches
.get_one::<String>(options::TMP_DIR)
.map(PathBuf::from)
.unwrap_or_else(env::temp_dir),
);
settings.compress_prog = matches
.get_one::<String>(options::COMPRESS_PROG)
.map(String::from);
if let Some(n_merge) = matches.get_one::<String>(options::BATCH_SIZE) {
settings.merge_batch_size = n_merge.parse().map_err(|_| {
UUsageError::new(
2,
format!("invalid --batch-size argument {}", n_merge.quote()),
)
})?;
}
settings.line_ending = LineEnding::from_zero_flag(matches.get_flag(options::ZERO_TERMINATED));
settings.merge = matches.get_flag(options::MERGE);
settings.check = matches.contains_id(options::check::CHECK);
if matches.get_flag(options::check::CHECK_SILENT)
|| matches!(
matches
.get_one::<String>(options::check::CHECK)
.map(|s| s.as_str()),
Some(options::check::SILENT | options::check::QUIET)
)
{
settings.check_silent = true;
settings.check = true;
};
settings.ignore_case = matches.get_flag(options::IGNORE_CASE);
settings.ignore_leading_blanks = matches.get_flag(options::IGNORE_LEADING_BLANKS);
settings.reverse = matches.get_flag(options::REVERSE);
settings.stable = matches.get_flag(options::STABLE);
settings.unique = matches.get_flag(options::UNIQUE);
if files.is_empty() {
files.push("-".to_string().into());
} else if settings.check && files.len() != 1 {
return Err(UUsageError::new(
2,
format!("extra operand {} not allowed with -c", files[1].quote()),
));
}
if let Some(arg) = matches.get_one::<OsString>(options::SEPARATOR) {
let mut separator = arg.to_str().ok_or_else(|| {
UUsageError::new(
2,
format!("separator is not valid unicode: {}", arg.quote()),
)
})?;
if separator == "\\0" {
separator = "\0";
}
if separator.len() != 1 {
return Err(UUsageError::new(
2,
format!(
"separator must be exactly one character long: {}",
separator.quote()
),
));
}
settings.separator = Some(separator.chars().next().unwrap());
}
if let Some(values) = matches.get_many::<String>(options::KEY) {
for value in values {
let selector = FieldSelector::parse(value, &settings)?;
if selector.settings.mode == SortMode::Random && settings.salt.is_none() {
settings.salt = Some(get_rand_string());
}
settings.selectors.push(selector);
}
}
if !matches.contains_id(options::KEY) {
let key_settings = KeySettings::from(&settings);
settings.selectors.push(
FieldSelector::new(
KeyPosition {
field: 1,
char: 1,
ignore_blanks: key_settings.ignore_blanks,
},
None,
key_settings,
)
.unwrap(),
);
}
for file in &files {
open(file)?;
}
let output = Output::new(
matches
.get_one::<String>(options::OUTPUT)
.map(|s| s.as_str()),
)?;
settings.init_precomputed();
let result = exec(&mut files, &settings, output, &mut tmp_dir);
tmp_dir.wait_if_signal();
result
}
pub fn uu_app() -> Command {
Command::new(uucore::util_name())
.version(crate_version!())
.about(ABOUT)
.after_help(AFTER_HELP)
.override_usage(format_usage(USAGE))
.infer_long_args(true)
.disable_help_flag(true)
.disable_version_flag(true)
.args_override_self(true)
.arg(
Arg::new(options::HELP)
.long(options::HELP)
.help("Print help information.")
.action(ArgAction::Help),
)
.arg(
Arg::new(options::VERSION)
.long(options::VERSION)
.help("Print version information.")
.action(ArgAction::Version),
)
.arg(
Arg::new(options::modes::SORT)
.long(options::modes::SORT)
.value_parser(ShortcutValueParser::new([
"general-numeric",
"human-numeric",
"month",
"numeric",
"version",
"random",
]))
.conflicts_with_all(options::modes::ALL_SORT_MODES),
)
.arg(make_sort_mode_arg(
options::modes::HUMAN_NUMERIC,
'h',
"compare according to human readable sizes, eg 1M > 100k",
))
.arg(make_sort_mode_arg(
options::modes::MONTH,
'M',
"compare according to month name abbreviation",
))
.arg(make_sort_mode_arg(
options::modes::NUMERIC,
'n',
"compare according to string numerical value",
))
.arg(make_sort_mode_arg(
options::modes::GENERAL_NUMERIC,
'g',
"compare according to string general numerical value",
))
.arg(make_sort_mode_arg(
options::modes::VERSION,
'V',
"Sort by SemVer version number, eg 1.12.2 > 1.1.2",
))
.arg(make_sort_mode_arg(
options::modes::RANDOM,
'R',
"shuffle in random order",
))
.arg(
Arg::new(options::DICTIONARY_ORDER)
.short('d')
.long(options::DICTIONARY_ORDER)
.help("consider only blanks and alphanumeric characters")
.conflicts_with_all([
options::modes::NUMERIC,
options::modes::GENERAL_NUMERIC,
options::modes::HUMAN_NUMERIC,
options::modes::MONTH,
])
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::MERGE)
.short('m')
.long(options::MERGE)
.help("merge already sorted files; do not sort")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::check::CHECK)
.short('c')
.long(options::check::CHECK)
.require_equals(true)
.num_args(0..)
.value_parser(ShortcutValueParser::new([
options::check::SILENT,
options::check::QUIET,
options::check::DIAGNOSE_FIRST,
]))
.conflicts_with(options::OUTPUT)
.help("check for sorted input; do not sort"),
)
.arg(
Arg::new(options::check::CHECK_SILENT)
.short('C')
.long(options::check::CHECK_SILENT)
.conflicts_with(options::OUTPUT)
.help(
"exit successfully if the given file is already sorted, \
and exit with status 1 otherwise.",
)
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::IGNORE_CASE)
.short('f')
.long(options::IGNORE_CASE)
.help("fold lower case to upper case characters")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::IGNORE_NONPRINTING)
.short('i')
.long(options::IGNORE_NONPRINTING)
.help("ignore nonprinting characters")
.conflicts_with_all([
options::modes::NUMERIC,
options::modes::GENERAL_NUMERIC,
options::modes::HUMAN_NUMERIC,
options::modes::MONTH,
])
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::IGNORE_LEADING_BLANKS)
.short('b')
.long(options::IGNORE_LEADING_BLANKS)
.help("ignore leading blanks when finding sort keys in each line")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::OUTPUT)
.short('o')
.long(options::OUTPUT)
.help("write output to FILENAME instead of stdout")
.value_name("FILENAME")
.value_hint(clap::ValueHint::FilePath),
)
.arg(
Arg::new(options::REVERSE)
.short('r')
.long(options::REVERSE)
.help("reverse the output")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::STABLE)
.short('s')
.long(options::STABLE)
.help("stabilize sort by disabling last-resort comparison")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::UNIQUE)
.short('u')
.long(options::UNIQUE)
.help("output only the first of an equal run")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::KEY)
.short('k')
.long(options::KEY)
.help("sort by a key")
.action(ArgAction::Append)
.num_args(1),
)
.arg(
Arg::new(options::SEPARATOR)
.short('t')
.long(options::SEPARATOR)
.help("custom separator for -k")
.value_parser(ValueParser::os_string()),
)
.arg(
Arg::new(options::ZERO_TERMINATED)
.short('z')
.long(options::ZERO_TERMINATED)
.help("line delimiter is NUL, not newline")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::PARALLEL)
.long(options::PARALLEL)
.help("change the number of threads running concurrently to NUM_THREADS")
.value_name("NUM_THREADS"),
)
.arg(
Arg::new(options::BUF_SIZE)
.short('S')
.long(options::BUF_SIZE)
.help("sets the maximum SIZE of each segment in number of sorted items")
.value_name("SIZE"),
)
.arg(
Arg::new(options::TMP_DIR)
.short('T')
.long(options::TMP_DIR)
.help("use DIR for temporaries, not $TMPDIR or /tmp")
.value_name("DIR")
.value_hint(clap::ValueHint::DirPath),
)
.arg(
Arg::new(options::COMPRESS_PROG)
.long(options::COMPRESS_PROG)
.help("compress temporary files with PROG, decompress with PROG -d; PROG has to take input from stdin and output to stdout")
.value_name("PROG")
.value_hint(clap::ValueHint::CommandName),
)
.arg(
Arg::new(options::BATCH_SIZE)
.long(options::BATCH_SIZE)
.help("Merge at most N_MERGE inputs at once.")
.value_name("N_MERGE"),
)
.arg(
Arg::new(options::FILES0_FROM)
.long(options::FILES0_FROM)
.help("read input from the files specified by NUL-terminated NUL_FILES")
.value_name("NUL_FILES")
.action(ArgAction::Append)
.value_parser(ValueParser::os_string())
.value_hint(clap::ValueHint::FilePath),
)
.arg(
Arg::new(options::DEBUG)
.long(options::DEBUG)
.help("underline the parts of the line that are actually used for sorting")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::FILES)
.action(ArgAction::Append)
.value_parser(ValueParser::os_string())
.value_hint(clap::ValueHint::FilePath),
)
}
fn exec(
files: &mut [OsString],
settings: &GlobalSettings,
output: Output,
tmp_dir: &mut TmpDirWrapper,
) -> UResult<()> {
if settings.merge {
let file_merger = merge::merge(files, settings, output.as_output_name(), tmp_dir)?;
file_merger.write_all(settings, output)
} else if settings.check {
if files.len() > 1 {
Err(UUsageError::new(2, "only one file allowed with -c"))
} else {
check::check(files.first().unwrap(), settings)
}
} else {
let mut lines = files.iter().map(open);
ext_sort(&mut lines, settings, output, tmp_dir)
}
}
fn sort_by<'a>(unsorted: &mut Vec<Line<'a>>, settings: &GlobalSettings, line_data: &LineData<'a>) {
if settings.stable || settings.unique {
unsorted.par_sort_by(|a, b| compare_by(a, b, settings, line_data, line_data));
} else {
unsorted.par_sort_unstable_by(|a, b| compare_by(a, b, settings, line_data, line_data));
}
}
fn compare_by<'a>(
a: &Line<'a>,
b: &Line<'a>,
global_settings: &GlobalSettings,
a_line_data: &LineData<'a>,
b_line_data: &LineData<'a>,
) -> Ordering {
let mut selection_index = 0;
let mut num_info_index = 0;
let mut parsed_float_index = 0;
for selector in &global_settings.selectors {
let (a_str, b_str) = if selector.needs_selection {
let selections = (
a_line_data.selections
[a.index * global_settings.precomputed.selections_per_line + selection_index],
b_line_data.selections
[b.index * global_settings.precomputed.selections_per_line + selection_index],
);
selection_index += 1;
selections
} else {
(a.line, b.line)
};
let settings = &selector.settings;
let cmp: Ordering = match settings.mode {
SortMode::Random => {
if custom_str_cmp(
a_str,
b_str,
settings.ignore_non_printing,
settings.dictionary_order,
settings.ignore_case,
) == Ordering::Equal
{
Ordering::Equal
} else {
random_shuffle(a_str, b_str, &global_settings.salt.unwrap())
}
}
SortMode::Numeric => {
let a_num_info = &a_line_data.num_infos
[a.index * global_settings.precomputed.num_infos_per_line + num_info_index];
let b_num_info = &b_line_data.num_infos
[b.index * global_settings.precomputed.num_infos_per_line + num_info_index];
num_info_index += 1;
numeric_str_cmp((a_str, a_num_info), (b_str, b_num_info))
}
SortMode::HumanNumeric => {
let a_num_info = &a_line_data.num_infos
[a.index * global_settings.precomputed.num_infos_per_line + num_info_index];
let b_num_info = &b_line_data.num_infos
[b.index * global_settings.precomputed.num_infos_per_line + num_info_index];
num_info_index += 1;
human_numeric_str_cmp((a_str, a_num_info), (b_str, b_num_info))
}
SortMode::GeneralNumeric => {
let a_float = &a_line_data.parsed_floats
[a.index * global_settings.precomputed.floats_per_line + parsed_float_index];
let b_float = &b_line_data.parsed_floats
[b.index * global_settings.precomputed.floats_per_line + parsed_float_index];
parsed_float_index += 1;
general_numeric_compare(a_float, b_float)
}
SortMode::Month => month_compare(a_str, b_str),
SortMode::Version => version_cmp(a_str, b_str),
SortMode::Default => custom_str_cmp(
a_str,
b_str,
settings.ignore_non_printing,
settings.dictionary_order,
settings.ignore_case,
),
};
if cmp != Ordering::Equal {
return if settings.reverse { cmp.reverse() } else { cmp };
}
}
let cmp = if global_settings.mode == SortMode::Random
|| global_settings.stable
|| global_settings.unique
{
Ordering::Equal
} else {
a.line.cmp(b.line)
};
if global_settings.reverse {
cmp.reverse()
} else {
cmp
}
}
#[allow(clippy::cognitive_complexity)]
fn get_leading_gen(input: &str) -> Range<usize> {
let trimmed = input.trim_start();
let leading_whitespace_len = input.len() - trimmed.len();
for allowed_prefix in ["inf", "-inf", "nan"] {
if trimmed.is_char_boundary(allowed_prefix.len())
&& trimmed[..allowed_prefix.len()].eq_ignore_ascii_case(allowed_prefix)
{
return leading_whitespace_len..(leading_whitespace_len + allowed_prefix.len());
}
}
let mut char_indices = itertools::peek_nth(trimmed.char_indices());
let first = char_indices.peek();
if matches!(first, Some((_, NEGATIVE) | (_, POSITIVE))) {
char_indices.next();
}
let mut had_e_notation = false;
let mut had_decimal_pt = false;
while let Some((idx, c)) = char_indices.next() {
if c.is_ascii_digit() {
continue;
}
if c == DECIMAL_PT && !had_decimal_pt && !had_e_notation {
had_decimal_pt = true;
continue;
}
if (c == 'e' || c == 'E') && !had_e_notation {
if let Some(&(_, next_char)) = char_indices.peek() {
if (next_char == '+' || next_char == '-')
&& matches!(
char_indices.peek_nth(2),
Some((_, c)) if c.is_ascii_digit()
)
{
char_indices.next();
had_e_notation = true;
continue;
}
if next_char.is_ascii_digit() {
had_e_notation = true;
continue;
}
}
}
return leading_whitespace_len..(leading_whitespace_len + idx);
}
leading_whitespace_len..input.len()
}
#[derive(Copy, Clone, PartialEq, PartialOrd, Debug)]
pub enum GeneralF64ParseResult {
Invalid,
NaN,
NegInfinity,
Number(f64),
Infinity,
}
#[inline(always)]
fn general_f64_parse(a: &str) -> GeneralF64ParseResult {
match a.parse::<f64>() {
Ok(a) if a.is_nan() => GeneralF64ParseResult::NaN,
Ok(a) if a == std::f64::NEG_INFINITY => GeneralF64ParseResult::NegInfinity,
Ok(a) if a == std::f64::INFINITY => GeneralF64ParseResult::Infinity,
Ok(a) => GeneralF64ParseResult::Number(a),
Err(_) => GeneralF64ParseResult::Invalid,
}
}
fn general_numeric_compare(a: &GeneralF64ParseResult, b: &GeneralF64ParseResult) -> Ordering {
a.partial_cmp(b).unwrap()
}
fn get_rand_string() -> [u8; 16] {
thread_rng().sample(rand::distributions::Standard)
}
fn get_hash<T: Hash>(t: &T) -> u64 {
let mut s = FnvHasher::default();
t.hash(&mut s);
s.finish()
}
fn random_shuffle(a: &str, b: &str, salt: &[u8]) -> Ordering {
let da = get_hash(&(a, salt));
let db = get_hash(&(b, salt));
da.cmp(&db)
}
#[derive(Eq, Ord, PartialEq, PartialOrd, Clone, Copy)]
enum Month {
Unknown,
January,
February,
March,
April,
May,
June,
July,
August,
September,
October,
November,
December,
}
fn month_parse(line: &str) -> Month {
let line = line.trim();
const MONTHS: [(&str, Month); 12] = [
("JAN", Month::January),
("FEB", Month::February),
("MAR", Month::March),
("APR", Month::April),
("MAY", Month::May),
("JUN", Month::June),
("JUL", Month::July),
("AUG", Month::August),
("SEP", Month::September),
("OCT", Month::October),
("NOV", Month::November),
("DEC", Month::December),
];
for (month_str, month) in &MONTHS {
if line.is_char_boundary(month_str.len())
&& line[..month_str.len()].eq_ignore_ascii_case(month_str)
{
return *month;
}
}
Month::Unknown
}
fn month_compare(a: &str, b: &str) -> Ordering {
#![allow(clippy::comparison_chain)]
let ma = month_parse(a);
let mb = month_parse(b);
if ma > mb {
Ordering::Greater
} else if ma < mb {
Ordering::Less
} else {
Ordering::Equal
}
}
fn print_sorted<'a, T: Iterator<Item = &'a Line<'a>>>(
iter: T,
settings: &GlobalSettings,
output: Output,
) {
let mut writer = output.into_write();
for line in iter {
line.print(&mut writer, settings);
}
}
fn open(path: impl AsRef<OsStr>) -> UResult<Box<dyn Read + Send>> {
let path = path.as_ref();
if path == "-" {
let stdin = stdin();
return Ok(Box::new(stdin) as Box<dyn Read + Send>);
}
let path = Path::new(path);
match File::open(path) {
Ok(f) => Ok(Box::new(f) as Box<dyn Read + Send>),
Err(error) => Err(SortError::ReadFailed {
path: path.to_owned(),
error,
}
.into()),
}
}
fn format_error_message(error: &ParseSizeError, s: &str, option: &str) -> String {
match error {
ParseSizeError::InvalidSuffix(_) => {
format!("invalid suffix in --{} argument {}", option, s.quote())
}
ParseSizeError::ParseFailure(_) => format!("invalid --{} argument {}", option, s.quote()),
ParseSizeError::SizeTooBig(_) => format!("--{} argument {} too large", option, s.quote()),
}
}
#[cfg(test)]
mod tests {
use super::*;
fn tokenize_helper(line: &str, separator: Option<char>) -> Vec<Field> {
let mut buffer = vec![];
tokenize(line, separator, &mut buffer);
buffer
}
#[test]
fn test_get_hash() {
let a = "Ted".to_string();
assert_eq!(2_646_829_031_758_483_623, get_hash(&a));
}
#[test]
fn test_random_shuffle() {
let a = "Ted";
let b = "Ted";
let c = get_rand_string();
assert_eq!(Ordering::Equal, random_shuffle(a, b, &c));
}
#[test]
fn test_month_compare() {
let a = "JaN";
let b = "OCt";
assert_eq!(Ordering::Less, month_compare(a, b));
}
#[test]
fn test_version_compare() {
let a = "1.2.3-alpha2";
let b = "1.4.0";
assert_eq!(Ordering::Less, version_cmp(a, b));
}
#[test]
fn test_random_compare() {
let a = "9";
let b = "9";
let c = get_rand_string();
assert_eq!(Ordering::Equal, random_shuffle(a, b, &c));
}
#[test]
fn test_tokenize_fields() {
let line = "foo bar b x";
assert_eq!(tokenize_helper(line, None), vec![0..3, 3..7, 7..9, 9..14,],);
}
#[test]
fn test_tokenize_fields_leading_whitespace() {
let line = " foo bar b x";
assert_eq!(
tokenize_helper(line, None),
vec![0..7, 7..11, 11..13, 13..18,]
);
}
#[test]
fn test_tokenize_fields_custom_separator() {
let line = "aaa foo bar b x";
assert_eq!(
tokenize_helper(line, Some('a')),
vec![0..0, 1..1, 2..2, 3..9, 10..18,]
);
}
#[test]
fn test_tokenize_fields_trailing_custom_separator() {
let line = "a";
assert_eq!(tokenize_helper(line, Some('a')), vec![0..0]);
let line = "aa";
assert_eq!(tokenize_helper(line, Some('a')), vec![0..0, 1..1]);
let line = "..a..a";
assert_eq!(tokenize_helper(line, Some('a')), vec![0..2, 3..5]);
}
#[test]
#[cfg(target_pointer_width = "64")]
fn test_line_size() {
assert_eq!(std::mem::size_of::<Line>(), 24);
}
#[test]
fn test_parse_byte_count() {
let valid_input = [
("0", 0),
("50K", 50 * 1024),
("50k", 50 * 1024),
("1M", 1024 * 1024),
("100M", 100 * 1024 * 1024),
#[cfg(not(target_pointer_width = "32"))]
("1000G", 1000 * 1024 * 1024 * 1024),
#[cfg(not(target_pointer_width = "32"))]
("10T", 10 * 1024 * 1024 * 1024 * 1024),
("1b", 1),
("1024b", 1024),
("1024Mb", 1024 * 1024 * 1024), ("1", 1024), ("50", 50 * 1024),
("K", 1024),
("k", 1024),
("m", 1024 * 1024),
#[cfg(not(target_pointer_width = "32"))]
("E", 1024 * 1024 * 1024 * 1024 * 1024 * 1024),
];
for (input, expected_output) in &valid_input {
assert_eq!(
GlobalSettings::parse_byte_count(input),
Ok(*expected_output)
);
}
let invalid_input = ["500E", "1Y"];
for input in &invalid_input {
#[cfg(not(target_pointer_width = "128"))]
assert!(GlobalSettings::parse_byte_count(input).is_err());
}
let invalid_input = ["nonsense", "1B", "B", "b", "p", "e", "z", "y"];
for input in &invalid_input {
assert!(GlobalSettings::parse_byte_count(input).is_err());
}
}
}