use clap::{
builder::ValueParser, crate_version, error::ContextKind, error::Error, error::ErrorKind, Arg,
ArgAction, ArgMatches, Command,
};
use std::ffi::{OsStr, OsString};
use std::fs::File;
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Write};
use std::num::IntErrorKind;
use uucore::display::Quotable;
use uucore::error::{FromIo, UError, UResult, USimpleError};
use uucore::posix::{posix_version, OBSOLETE};
use uucore::{format_usage, help_about, help_section, help_usage};
const ABOUT: &str = help_about!("uniq.md");
const USAGE: &str = help_usage!("uniq.md");
const AFTER_HELP: &str = help_section!("after help", "uniq.md");
pub mod options {
pub static ALL_REPEATED: &str = "all-repeated";
pub static CHECK_CHARS: &str = "check-chars";
pub static COUNT: &str = "count";
pub static IGNORE_CASE: &str = "ignore-case";
pub static REPEATED: &str = "repeated";
pub static SKIP_FIELDS: &str = "skip-fields";
pub static SKIP_CHARS: &str = "skip-chars";
pub static UNIQUE: &str = "unique";
pub static ZERO_TERMINATED: &str = "zero-terminated";
pub static GROUP: &str = "group";
}
static ARG_FILES: &str = "files";
#[derive(PartialEq, Clone, Copy)]
enum Delimiters {
Append,
Prepend,
Separate,
Both,
None,
}
struct Uniq {
repeats_only: bool,
uniques_only: bool,
all_repeated: bool,
delimiters: Delimiters,
show_counts: bool,
skip_fields: Option<usize>,
slice_start: Option<usize>,
slice_stop: Option<usize>,
ignore_case: bool,
zero_terminated: bool,
}
macro_rules! write_line_terminator {
($writer:expr, $line_terminator:expr) => {
$writer
.write_all(&[$line_terminator])
.map_err_context(|| "Could not write line terminator".to_string())
};
}
impl Uniq {
pub fn print_uniq(&self, reader: impl BufRead, mut writer: impl Write) -> UResult<()> {
let mut first_line_printed = false;
let mut group_count = 1;
let line_terminator = self.get_line_terminator();
let mut lines = reader.split(line_terminator);
let mut line = match lines.next() {
Some(l) => l?,
None => return Ok(()),
};
let writer = &mut writer;
for next_line in lines {
let next_line = next_line?;
if self.cmp_keys(&line, &next_line) {
if (group_count == 1 && !self.repeats_only)
|| (group_count > 1 && !self.uniques_only)
{
self.print_line(writer, &line, group_count, first_line_printed)?;
first_line_printed = true;
}
line = next_line;
group_count = 1;
} else {
if self.all_repeated {
self.print_line(writer, &line, group_count, first_line_printed)?;
first_line_printed = true;
line = next_line;
}
group_count += 1;
}
}
if (group_count == 1 && !self.repeats_only) || (group_count > 1 && !self.uniques_only) {
self.print_line(writer, &line, group_count, first_line_printed)?;
first_line_printed = true;
}
if (self.delimiters == Delimiters::Append || self.delimiters == Delimiters::Both)
&& first_line_printed
{
write_line_terminator!(writer, line_terminator)?;
}
Ok(())
}
fn skip_fields(&self, line: &[u8]) -> Vec<u8> {
if let Some(skip_fields) = self.skip_fields {
let mut line = line.iter();
let mut line_after_skipped_field: Vec<u8>;
for _ in 0..skip_fields {
if line.all(|u| u.is_ascii_whitespace()) {
return Vec::new();
}
line_after_skipped_field = line
.by_ref()
.skip_while(|u| !u.is_ascii_whitespace())
.copied()
.collect::<Vec<u8>>();
if line_after_skipped_field.is_empty() {
return Vec::new();
}
line = line_after_skipped_field.iter();
}
line.copied().collect::<Vec<u8>>()
} else {
line.to_vec()
}
}
fn get_line_terminator(&self) -> u8 {
if self.zero_terminated {
0
} else {
b'\n'
}
}
fn cmp_keys(&self, first: &[u8], second: &[u8]) -> bool {
self.cmp_key(first, |first_iter| {
self.cmp_key(second, |second_iter| first_iter.ne(second_iter))
})
}
fn cmp_key<F>(&self, line: &[u8], mut closure: F) -> bool
where
F: FnMut(&mut dyn Iterator<Item = u8>) -> bool,
{
let fields_to_check = self.skip_fields(line);
let len = fields_to_check.len();
let slice_start = self.slice_start.unwrap_or(0);
let slice_stop = self.slice_stop.unwrap_or(len);
if len > 0 {
if !self.ignore_case && slice_start == 0 && slice_stop == len {
return closure(&mut fields_to_check.iter().copied());
}
if self.ignore_case && slice_start == 0 && slice_stop == len {
return closure(&mut fields_to_check.iter().map(|u| u.to_ascii_lowercase()));
}
if !self.ignore_case {
return closure(
&mut fields_to_check
.iter()
.skip(slice_start)
.take(slice_stop)
.copied(),
);
}
closure(
&mut fields_to_check
.iter()
.skip(slice_start)
.take(slice_stop)
.map(|u| u.to_ascii_lowercase()),
)
} else {
closure(&mut fields_to_check.iter().copied())
}
}
fn should_print_delimiter(&self, group_count: usize, first_line_printed: bool) -> bool {
self.delimiters != Delimiters::None
&& group_count == 1
&& (first_line_printed
|| self.delimiters == Delimiters::Prepend
|| self.delimiters == Delimiters::Both)
}
fn print_line(
&self,
writer: &mut impl Write,
line: &[u8],
count: usize,
first_line_printed: bool,
) -> UResult<()> {
let line_terminator = self.get_line_terminator();
if self.should_print_delimiter(count, first_line_printed) {
write_line_terminator!(writer, line_terminator)?;
}
if self.show_counts {
let prefix = format!("{count:7} ");
let out = prefix
.as_bytes()
.iter()
.chain(line.iter())
.copied()
.collect::<Vec<u8>>();
writer.write_all(out.as_slice())
} else {
writer.write_all(line)
}
.map_err_context(|| "Failed to write line".to_string())?;
write_line_terminator!(writer, line_terminator)
}
}
fn opt_parsed(opt_name: &str, matches: &ArgMatches) -> UResult<Option<usize>> {
match matches.get_one::<String>(opt_name) {
Some(arg_str) => match arg_str.parse::<usize>() {
Ok(v) => Ok(Some(v)),
Err(e) => match e.kind() {
IntErrorKind::PosOverflow => Ok(Some(usize::MAX)),
_ => Err(USimpleError::new(
1,
format!(
"Invalid argument for {}: {}",
opt_name,
arg_str.maybe_quote()
),
)),
},
},
None => Ok(None),
}
}
fn handle_obsolete(args: impl uucore::Args) -> (Vec<OsString>, Option<usize>, Option<usize>) {
let mut skip_fields_old = None;
let mut skip_chars_old = None;
let mut preceding_long_opt_req_value = false;
let mut preceding_short_opt_req_value = false;
let filtered_args = args
.filter_map(|os_slice| {
filter_args(
os_slice,
&mut skip_fields_old,
&mut skip_chars_old,
&mut preceding_long_opt_req_value,
&mut preceding_short_opt_req_value,
)
})
.collect();
let skip_fields_old: Option<usize> = skip_fields_old.and_then(|v| v.parse::<usize>().ok());
let skip_chars_old: Option<usize> = skip_chars_old.and_then(|v| v.parse::<usize>().ok());
(filtered_args, skip_fields_old, skip_chars_old)
}
fn filter_args(
os_slice: OsString,
skip_fields_old: &mut Option<String>,
skip_chars_old: &mut Option<String>,
preceding_long_opt_req_value: &mut bool,
preceding_short_opt_req_value: &mut bool,
) -> Option<OsString> {
let filter: Option<OsString>;
if let Some(slice) = os_slice.to_str() {
if should_extract_obs_skip_fields(
slice,
preceding_long_opt_req_value,
preceding_short_opt_req_value,
) {
filter = handle_extract_obs_skip_fields(slice, skip_fields_old);
} else if should_extract_obs_skip_chars(
slice,
preceding_long_opt_req_value,
preceding_short_opt_req_value,
) {
filter = handle_extract_obs_skip_chars(slice, skip_chars_old);
} else {
filter = Some(OsString::from(slice));
if slice.starts_with("-f") {
*skip_fields_old = None;
}
if slice.starts_with("-s") {
*skip_chars_old = None;
}
}
handle_preceding_options(
slice,
preceding_long_opt_req_value,
preceding_short_opt_req_value,
);
} else {
filter = Some(os_slice);
}
filter
}
fn should_extract_obs_skip_fields(
slice: &str,
preceding_long_opt_req_value: &bool,
preceding_short_opt_req_value: &bool,
) -> bool {
slice.starts_with('-')
&& !slice.starts_with("--")
&& !preceding_long_opt_req_value
&& !preceding_short_opt_req_value
&& !slice.starts_with("-s")
&& !slice.starts_with("-f")
&& !slice.starts_with("-w")
}
fn should_extract_obs_skip_chars(
slice: &str,
preceding_long_opt_req_value: &bool,
preceding_short_opt_req_value: &bool,
) -> bool {
slice.starts_with('+')
&& posix_version().is_some_and(|v| v <= OBSOLETE)
&& !preceding_long_opt_req_value
&& !preceding_short_opt_req_value
&& slice.chars().nth(1).map_or(false, |c| c.is_ascii_digit())
}
fn handle_preceding_options(
slice: &str,
preceding_long_opt_req_value: &mut bool,
preceding_short_opt_req_value: &mut bool,
) {
if slice.starts_with("--") {
use options as O;
*preceding_long_opt_req_value = &slice[2..] == O::SKIP_CHARS
|| &slice[2..] == O::SKIP_FIELDS
|| &slice[2..] == O::CHECK_CHARS
|| &slice[2..] == O::GROUP
|| &slice[2..] == O::ALL_REPEATED;
}
*preceding_short_opt_req_value = slice == "-s" || slice == "-f" || slice == "-w";
if !slice.starts_with('-') {
*preceding_short_opt_req_value = false;
*preceding_long_opt_req_value = false;
}
}
fn handle_extract_obs_skip_fields(
slice: &str,
skip_fields_old: &mut Option<String>,
) -> Option<OsString> {
let mut obs_extracted: Vec<char> = vec![];
let mut obs_end_reached = false;
let mut obs_overwritten_by_new = false;
let filtered_slice: Vec<char> = slice
.chars()
.filter(|c| {
if c.eq(&'f') {
obs_overwritten_by_new = true;
}
if c.is_ascii_digit() && !obs_end_reached {
obs_extracted.push(*c);
false
} else {
if !obs_extracted.is_empty() {
obs_end_reached = true;
}
true
}
})
.collect();
if obs_extracted.is_empty() {
Some(OsString::from(slice))
} else {
if obs_overwritten_by_new {
*skip_fields_old = None;
} else {
let mut extracted: String = obs_extracted.iter().collect();
if let Some(val) = skip_fields_old {
extracted.push_str(val);
}
*skip_fields_old = Some(extracted);
}
if filtered_slice.get(1).is_some() {
let filtered_slice: String = filtered_slice.iter().collect();
Some(OsString::from(filtered_slice))
} else {
None
}
}
}
fn handle_extract_obs_skip_chars(
slice: &str,
skip_chars_old: &mut Option<String>,
) -> Option<OsString> {
let mut obs_extracted: Vec<char> = vec![];
let mut slice_chars = slice.chars();
slice_chars.next(); for c in slice_chars {
if c.is_ascii_digit() {
obs_extracted.push(c);
} else {
*skip_chars_old = None;
return Some(OsString::from(slice));
}
}
if obs_extracted.is_empty() {
Some(OsString::from(slice))
} else {
*skip_chars_old = Some(obs_extracted.iter().collect());
None
}
}
fn map_clap_errors(clap_error: &Error) -> Box<dyn UError> {
let footer = "Try 'uniq --help' for more information.";
let override_arg_conflict =
"--group is mutually exclusive with -c/-d/-D/-u\n".to_string() + footer;
let override_group_badoption = "invalid argument 'badoption' for '--group'\nValid arguments are:\n - 'prepend'\n - 'append'\n - 'separate'\n - 'both'\n".to_string() + footer;
let override_all_repeated_badoption = "invalid argument 'badoption' for '--all-repeated'\nValid arguments are:\n - 'none'\n - 'prepend'\n - 'separate'\n".to_string() + footer;
let error_message = match clap_error.kind() {
ErrorKind::ArgumentConflict => override_arg_conflict,
ErrorKind::InvalidValue
if clap_error
.get(ContextKind::InvalidValue)
.is_some_and(|v| v.to_string() == "badoption")
&& clap_error
.get(ContextKind::InvalidArg)
.is_some_and(|v| v.to_string().starts_with("--group")) =>
{
override_group_badoption
}
ErrorKind::InvalidValue
if clap_error
.get(ContextKind::InvalidValue)
.is_some_and(|v| v.to_string() == "badoption")
&& clap_error
.get(ContextKind::InvalidArg)
.is_some_and(|v| v.to_string().starts_with("--all-repeated")) =>
{
override_all_repeated_badoption
}
_ => clap_error.to_string(),
};
USimpleError::new(1, error_message)
}
#[uucore::main]
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let (args, skip_fields_old, skip_chars_old) = handle_obsolete(args);
let matches = uu_app()
.try_get_matches_from(args)
.map_err(|e| map_clap_errors(&e))?;
let files = matches.get_many::<OsString>(ARG_FILES);
let (in_file_name, out_file_name) = files
.map(|fi| fi.map(AsRef::as_ref))
.map(|mut fi| (fi.next(), fi.next()))
.unwrap_or_default();
let skip_fields_modern: Option<usize> = opt_parsed(options::SKIP_FIELDS, &matches)?;
let skip_chars_modern: Option<usize> = opt_parsed(options::SKIP_CHARS, &matches)?;
let uniq = Uniq {
repeats_only: matches.get_flag(options::REPEATED)
|| matches.contains_id(options::ALL_REPEATED),
uniques_only: matches.get_flag(options::UNIQUE),
all_repeated: matches.contains_id(options::ALL_REPEATED)
|| matches.contains_id(options::GROUP),
delimiters: get_delimiter(&matches),
show_counts: matches.get_flag(options::COUNT),
skip_fields: skip_fields_modern.or(skip_fields_old),
slice_start: skip_chars_modern.or(skip_chars_old),
slice_stop: opt_parsed(options::CHECK_CHARS, &matches)?,
ignore_case: matches.get_flag(options::IGNORE_CASE),
zero_terminated: matches.get_flag(options::ZERO_TERMINATED),
};
if uniq.show_counts && uniq.all_repeated {
return Err(USimpleError::new(
1,
"printing all duplicated lines and repeat counts is meaningless\nTry 'uniq --help' for more information.",
));
}
uniq.print_uniq(
open_input_file(in_file_name)?,
open_output_file(out_file_name)?,
)
}
pub fn uu_app() -> Command {
Command::new(uucore::util_name())
.version(crate_version!())
.about(ABOUT)
.override_usage(format_usage(USAGE))
.infer_long_args(true)
.after_help(AFTER_HELP)
.arg(
Arg::new(options::ALL_REPEATED)
.short('D')
.long(options::ALL_REPEATED)
.value_parser([
"none",
"prepend",
"separate"
])
.help("print all duplicate lines. Delimiting is done with blank lines. [default: none]")
.value_name("delimit-method")
.num_args(0..=1)
.default_missing_value("none")
.require_equals(true),
)
.arg(
Arg::new(options::GROUP)
.long(options::GROUP)
.value_parser([
"separate",
"prepend",
"append",
"both",
])
.help("show all items, separating groups with an empty line. [default: separate]")
.value_name("group-method")
.num_args(0..=1)
.default_missing_value("separate")
.require_equals(true)
.conflicts_with_all([
options::REPEATED,
options::ALL_REPEATED,
options::UNIQUE,
options::COUNT
]),
)
.arg(
Arg::new(options::CHECK_CHARS)
.short('w')
.long(options::CHECK_CHARS)
.help("compare no more than N characters in lines")
.value_name("N"),
)
.arg(
Arg::new(options::COUNT)
.short('c')
.long(options::COUNT)
.help("prefix lines by the number of occurrences")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::IGNORE_CASE)
.short('i')
.long(options::IGNORE_CASE)
.help("ignore differences in case when comparing")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::REPEATED)
.short('d')
.long(options::REPEATED)
.help("only print duplicate lines")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::SKIP_CHARS)
.short('s')
.long(options::SKIP_CHARS)
.help("avoid comparing the first N characters")
.value_name("N"),
)
.arg(
Arg::new(options::SKIP_FIELDS)
.short('f')
.long(options::SKIP_FIELDS)
.help("avoid comparing the first N fields")
.value_name("N"),
)
.arg(
Arg::new(options::UNIQUE)
.short('u')
.long(options::UNIQUE)
.help("only print unique lines")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::ZERO_TERMINATED)
.short('z')
.long(options::ZERO_TERMINATED)
.help("end lines with 0 byte, not newline")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(ARG_FILES)
.action(ArgAction::Append)
.value_parser(ValueParser::os_string())
.num_args(0..=2)
.hide(true)
.value_hint(clap::ValueHint::FilePath),
)
}
fn get_delimiter(matches: &ArgMatches) -> Delimiters {
let value = matches
.get_one::<String>(options::ALL_REPEATED)
.or_else(|| matches.get_one::<String>(options::GROUP));
if let Some(delimiter_arg) = value {
match delimiter_arg.as_ref() {
"append" => Delimiters::Append,
"prepend" => Delimiters::Prepend,
"separate" => Delimiters::Separate,
"both" => Delimiters::Both,
"none" => Delimiters::None,
_ => unreachable!("Should have been caught by possible values in clap"),
}
} else if matches.contains_id(options::GROUP) {
Delimiters::Separate
} else {
Delimiters::None
}
}
fn open_input_file(in_file_name: Option<&OsStr>) -> UResult<Box<dyn BufRead>> {
Ok(match in_file_name {
Some(path) if path != "-" => {
let in_file = File::open(path)
.map_err_context(|| format!("Could not open {}", path.maybe_quote()))?;
Box::new(BufReader::new(in_file))
}
_ => Box::new(stdin().lock()),
})
}
fn open_output_file(out_file_name: Option<&OsStr>) -> UResult<Box<dyn Write>> {
Ok(match out_file_name {
Some(path) if path != "-" => {
let out_file = File::create(path)
.map_err_context(|| format!("Could not open {}", path.maybe_quote()))?;
Box::new(BufWriter::new(out_file))
}
_ => Box::new(stdout().lock()),
})
}