mod platform;
use crate::platform::is_unsafe_overwrite;
use clap::{Arg, ArgAction, Command};
use memchr::memchr2;
use std::ffi::OsString;
use std::fs::{File, metadata};
use std::io::{self, BufWriter, ErrorKind, IsTerminal, Read, Write};
#[cfg(any(unix, target_os = "wasi"))]
use std::os::fd::AsFd;
#[cfg(unix)]
use std::os::unix::fs::FileTypeExt;
use thiserror::Error;
use uucore::display::Quotable;
use uucore::error::{UResult, strip_errno};
use uucore::translate;
use uucore::{fast_inc::fast_inc_one, format_usage};
const LINE_NUMBER_BUF_SIZE: usize = 32;
struct LineNumber {
buf: [u8; LINE_NUMBER_BUF_SIZE],
print_start: usize,
num_start: usize,
num_end: usize,
}
impl LineNumber {
fn new() -> Self {
let mut buf = [b'0'; LINE_NUMBER_BUF_SIZE];
let init_str = " 1\t";
let print_start = buf.len() - init_str.len();
let num_start = buf.len() - 2;
let num_end = buf.len() - 1;
buf[print_start..].copy_from_slice(init_str.as_bytes());
Self {
buf,
print_start,
num_start,
num_end,
}
}
fn increment(&mut self) {
fast_inc_one(&mut self.buf, &mut self.num_start, self.num_end);
self.print_start = self.print_start.min(self.num_start);
}
#[inline]
fn to_str(&self) -> &[u8] {
&self.buf[self.print_start..]
}
fn write(&self, writer: &mut impl Write) -> io::Result<()> {
writer.write_all(self.to_str())
}
}
#[derive(Error, Debug)]
enum CatError {
#[error("{}", strip_errno(.0))]
Io(#[from] io::Error),
#[error("{}", translate!("cat-error-unknown-filetype", "ft_debug" => .ft_debug))]
UnknownFiletype {
ft_debug: String,
},
#[error("{}", translate!("cat-error-is-directory"))]
IsDirectory,
#[cfg(unix)]
#[error("{}", translate!("cat-error-no-such-device-or-address"))]
NoSuchDeviceOrAddress,
#[error("{}", translate!("cat-error-input-file-is-output-file"))]
OutputIsInput,
#[error("{}", translate!("cat-error-too-many-symbolic-links"))]
TooManySymlinks,
}
type CatResult<T> = Result<T, CatError>;
#[cfg(any(unix, target_os = "wasi"))]
impl From<rustix::io::Errno> for CatError {
fn from(value: rustix::io::Errno) -> Self {
Self::Io(value.into())
}
}
#[derive(PartialEq)]
enum NumberingMode {
None,
NonEmpty,
All,
}
struct OutputOptions {
number: NumberingMode,
squeeze_blank: bool,
show_tabs: bool,
show_ends: bool,
show_nonprint: bool,
}
impl OutputOptions {
fn tab(&self) -> &'static str {
if self.show_tabs { "^I" } else { "\t" }
}
fn end_of_line(&self) -> &'static str {
if self.show_ends { "$\n" } else { "\n" }
}
fn can_print_fast(&self) -> bool {
!(self.show_tabs
|| self.show_nonprint
|| self.show_ends
|| self.squeeze_blank
|| self.number != NumberingMode::None)
}
}
struct OutputState {
line_number: LineNumber,
at_line_start: bool,
skipped_carriage_return: bool,
one_blank_kept: bool,
}
#[cfg(any(unix, target_os = "wasi"))]
trait FdReadable: Read + AsFd {}
#[cfg(not(any(unix, target_os = "wasi")))]
trait FdReadable: Read {}
#[cfg(any(unix, target_os = "wasi"))]
impl<T> FdReadable for T where T: Read + AsFd {}
#[cfg(not(any(unix, target_os = "wasi")))]
impl<T> FdReadable for T where T: Read {}
struct InputHandle<R: FdReadable> {
reader: R,
is_interactive: bool,
}
enum InputType {
Directory,
File,
StdIn,
SymLink,
#[cfg(unix)]
BlockDevice,
#[cfg(unix)]
CharacterDevice,
#[cfg(unix)]
Fifo,
#[cfg(unix)]
Socket,
}
mod options {
pub static FILE: &str = "file";
pub static SHOW_ALL: &str = "show-all";
pub static NUMBER_NONBLANK: &str = "number-nonblank";
pub static SHOW_NONPRINTING_ENDS: &str = "e";
pub static SHOW_ENDS: &str = "show-ends";
pub static NUMBER: &str = "number";
pub static SQUEEZE_BLANK: &str = "squeeze-blank";
pub static SHOW_NONPRINTING_TABS: &str = "t";
pub static SHOW_TABS: &str = "show-tabs";
pub static SHOW_NONPRINTING: &str = "show-nonprinting";
pub static IGNORED_U: &str = "ignored-u";
}
#[uucore::main]
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let matches = uucore::clap_localization::handle_clap_result(uu_app(), args)?;
let number_mode = if matches.get_flag(options::NUMBER_NONBLANK) {
NumberingMode::NonEmpty
} else if matches.get_flag(options::NUMBER) {
NumberingMode::All
} else {
NumberingMode::None
};
let show_nonprint = [
options::SHOW_ALL,
options::SHOW_NONPRINTING_ENDS,
options::SHOW_NONPRINTING_TABS,
options::SHOW_NONPRINTING,
]
.iter()
.any(|v| matches.get_flag(v));
let show_ends = [
options::SHOW_ENDS,
options::SHOW_ALL,
options::SHOW_NONPRINTING_ENDS,
]
.iter()
.any(|v| matches.get_flag(v));
let show_tabs = [
options::SHOW_ALL,
options::SHOW_TABS,
options::SHOW_NONPRINTING_TABS,
]
.iter()
.any(|v| matches.get_flag(v));
let squeeze_blank = matches.get_flag(options::SQUEEZE_BLANK);
#[allow(clippy::unwrap_used, reason = "clap provides '-' by default")]
let files = matches.get_many::<OsString>(options::FILE).unwrap();
let options = OutputOptions {
show_ends,
number: number_mode,
show_nonprint,
show_tabs,
squeeze_blank,
};
cat_files(files, &options)
}
pub fn uu_app() -> Command {
Command::new("cat")
.version(uucore::crate_version!())
.override_usage(format_usage(&translate!("cat-usage")))
.about(translate!("cat-about"))
.help_template(uucore::localized_help_template("cat"))
.infer_long_args(true)
.args_override_self(true)
.arg(
Arg::new(options::FILE)
.hide(true)
.action(ArgAction::Append)
.value_parser(clap::value_parser!(OsString))
.default_value("-")
.value_hint(clap::ValueHint::FilePath),
)
.arg(
Arg::new(options::SHOW_ALL)
.short('A')
.long(options::SHOW_ALL)
.help(translate!("cat-help-show-all"))
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::NUMBER_NONBLANK)
.short('b')
.long(options::NUMBER_NONBLANK)
.help(translate!("cat-help-number-nonblank"))
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::SHOW_NONPRINTING_ENDS)
.short('e')
.help(translate!("cat-help-show-nonprinting-ends"))
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::SHOW_ENDS)
.short('E')
.long(options::SHOW_ENDS)
.help(translate!("cat-help-show-ends"))
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::NUMBER)
.short('n')
.long(options::NUMBER)
.help(translate!("cat-help-number"))
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::SQUEEZE_BLANK)
.short('s')
.long(options::SQUEEZE_BLANK)
.help(translate!("cat-help-squeeze-blank"))
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::SHOW_NONPRINTING_TABS)
.short('t')
.help(translate!("cat-help-show-nonprinting-tabs"))
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::SHOW_TABS)
.short('T')
.long(options::SHOW_TABS)
.help(translate!("cat-help-show-tabs"))
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::SHOW_NONPRINTING)
.short('v')
.long(options::SHOW_NONPRINTING)
.help(translate!("cat-help-show-nonprinting"))
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::IGNORED_U)
.short('u')
.help(translate!("cat-help-ignored-u"))
.action(ArgAction::SetTrue),
)
}
fn cat_handle<R: FdReadable>(
handle: &mut InputHandle<R>,
options: &OutputOptions,
state: &mut OutputState,
) -> CatResult<()> {
if options.can_print_fast() {
print_fast(handle)
} else {
print_lines(handle, options, state)
}
}
fn cat_path(path: &OsString, options: &OutputOptions, state: &mut OutputState) -> CatResult<()> {
match get_input_type(path)? {
InputType::StdIn => {
let stdin = io::stdin();
if is_unsafe_overwrite(&stdin, &io::stdout()) {
return Err(CatError::OutputIsInput);
}
let mut handle = InputHandle {
reader: stdin,
is_interactive: io::stdin().is_terminal(),
};
cat_handle(&mut handle, options, state)
}
InputType::Directory => Err(CatError::IsDirectory),
#[cfg(unix)]
InputType::Socket => Err(CatError::NoSuchDeviceOrAddress),
_ => {
let file = File::open(path)?;
if is_unsafe_overwrite(&file, &io::stdout()) {
return Err(CatError::OutputIsInput);
}
let mut handle = InputHandle {
reader: file,
is_interactive: false,
};
cat_handle(&mut handle, options, state)
}
}
}
fn cat_files<'a, I>(files: I, options: &OutputOptions) -> UResult<()>
where
I: IntoIterator<Item = &'a OsString>,
{
let mut state = OutputState {
line_number: LineNumber::new(),
at_line_start: true,
skipped_carriage_return: false,
one_blank_kept: false,
};
let mut error_messages: Vec<String> = Vec::new();
for path in files {
if let Err(err) = cat_path(path, options, &mut state) {
error_messages.push(format!("{}: {err}", path.maybe_quote()));
}
}
if state.skipped_carriage_return {
print!("\r");
}
if error_messages.is_empty() {
Ok(())
} else {
let line_joiner = "\ncat: ";
Err(uucore::error::USimpleError::new(
error_messages.len() as i32,
error_messages.join(line_joiner),
))
}
}
fn get_input_type(path: &OsString) -> CatResult<InputType> {
if path == "-" {
return Ok(InputType::StdIn);
}
let ft = match metadata(path) {
Ok(md) => md.file_type(),
Err(e) => {
if let Some(raw_error) = e.raw_os_error() {
#[cfg(not(any(target_os = "macos", target_os = "freebsd")))]
let too_many_symlink_code = 40;
#[cfg(any(target_os = "macos", target_os = "freebsd"))]
let too_many_symlink_code = 62;
if raw_error == too_many_symlink_code {
return Err(CatError::TooManySymlinks);
}
}
return Err(e.into());
}
};
match ft {
#[cfg(unix)]
ft if ft.is_block_device() => Ok(InputType::BlockDevice),
#[cfg(unix)]
ft if ft.is_char_device() => Ok(InputType::CharacterDevice),
#[cfg(unix)]
ft if ft.is_fifo() => Ok(InputType::Fifo),
#[cfg(unix)]
ft if ft.is_socket() => Ok(InputType::Socket),
ft if ft.is_dir() => Ok(InputType::Directory),
ft if ft.is_file() => Ok(InputType::File),
ft if ft.is_symlink() => Ok(InputType::SymLink),
_ => Err(CatError::UnknownFiletype {
ft_debug: format!("{ft:?}"),
}),
}
}
fn print_fast<R: FdReadable>(handle: &mut InputHandle<R>) -> CatResult<()> {
let stdout = io::stdout();
#[cfg(any(target_os = "linux", target_os = "android"))]
let mut stdout = stdout;
#[cfg(any(target_os = "linux", target_os = "android"))]
if uucore::pipes::splice_unbounded_auto(&handle.reader, &mut stdout)?.is_ok()
&& !uucore::pipes::might_fuse(&handle.reader)
{
return Ok(());
}
print_unbuffered(handle, stdout)
}
#[cfg_attr(any(target_os = "linux", target_os = "android"), inline(never))] fn print_unbuffered<R: FdReadable>(
handle: &mut InputHandle<R>,
stdout: io::Stdout,
) -> CatResult<()> {
#[cfg(any(unix, target_os = "wasi"))]
let mut stdout = uucore::io::RawWriter(stdout); #[cfg(not(any(unix, target_os = "wasi")))]
let mut stdout = stdout.lock();
let mut buf = [0; 1024 * 64];
loop {
match handle.reader.read(&mut buf) {
Ok(0) => return Ok(()),
Ok(n) => {
stdout
.write_all(&buf[..n])
.inspect_err(handle_broken_pipe)?;
#[cfg(not(any(unix, target_os = "wasi")))]
stdout.flush().inspect_err(handle_broken_pipe)?;
}
Err(e) if e.kind() != ErrorKind::Interrupted => return Err(e.into()),
_ => {}
}
}
}
fn print_lines<R: FdReadable>(
handle: &mut InputHandle<R>,
options: &OutputOptions,
state: &mut OutputState,
) -> CatResult<()> {
let mut in_buf = [0; 1024 * 31];
let stdout = io::stdout();
let stdout = stdout.lock();
let mut writer = BufWriter::with_capacity(32 * 1024, stdout);
loop {
let n = match handle.reader.read(&mut in_buf) {
Ok(0) => break,
Ok(n) => n,
Err(e) if e.kind() == ErrorKind::Interrupted => continue,
Err(e) => return Err(e.into()),
};
let in_buf = &in_buf[..n];
let mut pos = 0;
while pos < n {
if in_buf[pos] == b'\n' {
write_new_line(&mut writer, options, state, handle.is_interactive)?;
state.at_line_start = true;
pos += 1;
continue;
}
if state.skipped_carriage_return {
writer.write_all(b"\r")?;
state.skipped_carriage_return = false;
state.at_line_start = false;
}
state.one_blank_kept = false;
if state.at_line_start && options.number != NumberingMode::None {
state.line_number.write(&mut writer)?;
state.line_number.increment();
}
let offset = write_end(&mut writer, &in_buf[pos..], options)?;
if offset + pos == in_buf.len() {
state.at_line_start = false;
break;
}
if in_buf[pos + offset] == b'\r' {
state.skipped_carriage_return = true;
} else {
assert_eq!(in_buf[pos + offset], b'\n');
write_end_of_line(
&mut writer,
options.end_of_line().as_bytes(),
handle.is_interactive,
)?;
state.at_line_start = true;
}
pos += offset + 1;
}
writer.flush().inspect_err(handle_broken_pipe)?;
}
Ok(())
}
fn write_new_line<W: Write>(
writer: &mut W,
options: &OutputOptions,
state: &mut OutputState,
is_interactive: bool,
) -> CatResult<()> {
if state.skipped_carriage_return {
if options.show_ends {
writer.write_all(b"^M")?;
} else {
writer.write_all(b"\r")?;
}
state.skipped_carriage_return = false;
write_end_of_line(writer, options.end_of_line().as_bytes(), is_interactive)?;
return Ok(());
}
if !state.at_line_start || !options.squeeze_blank || !state.one_blank_kept {
state.one_blank_kept = true;
if state.at_line_start && options.number == NumberingMode::All {
state.line_number.write(writer)?;
state.line_number.increment();
}
write_end_of_line(writer, options.end_of_line().as_bytes(), is_interactive)?;
}
Ok(())
}
fn write_end<W: Write>(
writer: &mut W,
in_buf: &[u8],
options: &OutputOptions,
) -> io::Result<usize> {
if options.show_nonprint {
write_nonprint_to_end(in_buf, writer, options.tab().as_bytes())
} else if options.show_tabs {
write_tab_to_end(in_buf, writer)
} else {
write_to_end(in_buf, writer)
}
}
fn write_to_end<W: Write>(in_buf: &[u8], writer: &mut W) -> io::Result<usize> {
if let Some(p) = memchr2(b'\n', b'\r', in_buf) {
writer.write_all(&in_buf[..p])?;
Ok(p)
} else {
writer.write_all(in_buf)?;
Ok(in_buf.len())
}
}
fn write_tab_to_end<W: Write>(mut in_buf: &[u8], writer: &mut W) -> io::Result<usize> {
let mut count = 0;
loop {
if let Some(p) = in_buf
.iter()
.position(|c| *c == b'\n' || *c == b'\t' || *c == b'\r')
{
writer.write_all(&in_buf[..p])?;
if in_buf[p] == b'\t' {
writer.write_all(b"^I")?;
in_buf = &in_buf[p + 1..];
count += p + 1;
} else {
return Ok(count + p);
}
} else {
writer.write_all(in_buf)?;
return Ok(in_buf.len() + count);
}
}
}
fn write_nonprint_to_end<W: Write>(in_buf: &[u8], writer: &mut W, tab: &[u8]) -> io::Result<usize> {
let mut count = 0;
for byte in in_buf.iter().copied() {
if byte == b'\n' {
break;
}
match byte {
9 => writer.write_all(tab),
0..=8 | 10..=31 => writer.write_all(&[b'^', byte + 64]),
32..=126 => writer.write_all(&[byte]),
127 => writer.write_all(b"^?"),
128..=159 => writer.write_all(&[b'M', b'-', b'^', byte - 64]),
160..=254 => writer.write_all(&[b'M', b'-', byte - 128]),
_ => writer.write_all(b"M-^?"),
}?;
count += 1;
}
Ok(count)
}
fn write_end_of_line<W: Write>(
writer: &mut W,
end_of_line: &[u8],
is_interactive: bool,
) -> CatResult<()> {
writer.write_all(end_of_line)?;
if is_interactive {
writer.flush().inspect_err(handle_broken_pipe)?;
}
Ok(())
}
fn handle_broken_pipe(error: &io::Error) {
if cfg!(target_os = "windows") && error.kind() == ErrorKind::BrokenPipe {
std::process::exit(13);
}
}
#[cfg(test)]
mod tests {
use std::io::{BufWriter, stdout};
#[test]
fn test_write_tab_to_end_with_newline() {
let mut writer = BufWriter::with_capacity(1024 * 64, stdout());
let in_buf = b"a\tb\tc\n";
assert_eq!(super::write_tab_to_end(in_buf, &mut writer).unwrap(), 5);
}
#[test]
fn test_write_tab_to_end_no_newline() {
let mut writer = BufWriter::with_capacity(1024 * 64, stdout());
let in_buf = b"a\tb\tc";
assert_eq!(super::write_tab_to_end(in_buf, &mut writer).unwrap(), 5);
}
#[test]
fn test_write_nonprint_to_end_new_line() {
let mut writer = BufWriter::with_capacity(1024 * 64, stdout());
let in_buf = b"\n";
let tab = b"";
super::write_nonprint_to_end(in_buf, &mut writer, tab).unwrap();
assert_eq!(writer.buffer().len(), 0);
}
#[test]
fn test_write_nonprint_to_end_9() {
let mut writer = BufWriter::with_capacity(1024 * 64, stdout());
let in_buf = &[9u8];
let tab = b"tab";
super::write_nonprint_to_end(in_buf, &mut writer, tab).unwrap();
assert_eq!(writer.buffer(), tab);
}
#[test]
fn test_write_nonprint_to_end_0_to_8() {
for byte in 0u8..=8u8 {
let mut writer = BufWriter::with_capacity(1024 * 64, stdout());
let in_buf = &[byte];
let tab = b"";
super::write_nonprint_to_end(in_buf, &mut writer, tab).unwrap();
assert_eq!(writer.buffer(), [b'^', byte + 64]);
}
}
#[test]
fn test_write_nonprint_to_end_10_to_31() {
for byte in 11u8..=31u8 {
let mut writer = BufWriter::with_capacity(1024 * 64, stdout());
let in_buf = &[byte];
let tab = b"";
super::write_nonprint_to_end(in_buf, &mut writer, tab).unwrap();
assert_eq!(writer.buffer(), [b'^', byte + 64]);
}
}
#[test]
fn test_incrementing_string() {
let mut incrementing_string = super::LineNumber::new();
assert_eq!(b" 1\t", incrementing_string.to_str());
incrementing_string.increment();
assert_eq!(b" 2\t", incrementing_string.to_str());
for _ in 3..=100 {
incrementing_string.increment();
}
assert_eq!(b" 100\t", incrementing_string.to_str());
for _ in 101..=1_000_000 {
incrementing_string.increment();
}
assert_eq!(b"1000000\t", incrementing_string.to_str());
incrementing_string.increment();
assert_eq!(b"1000001\t", incrementing_string.to_str());
}
}