#[cfg(not(target_os = "linux"))]
use std::io::BufWriter;
use std::io::{self, BufReader, Write};
#[cfg(unix)]
use std::mem::ManuallyDrop;
#[cfg(unix)]
use std::os::unix::io::FromRawFd;
use std::path::Path;
use std::process;
#[cfg(unix)]
use coreutils_rs::common::io::try_mmap_stdin;
use coreutils_rs::common::io::{MmapHints, read_file_with_hints};
use coreutils_rs::common::{enlarge_stdout_pipe, io_error_msg};
use coreutils_rs::cut::{self, CutMode};
#[cfg(target_os = "linux")]
struct VmspliceWriter {
raw: ManuallyDrop<std::fs::File>,
stdout_is_pipe: bool,
vmsplice_enabled: bool,
}
#[cfg(target_os = "linux")]
impl VmspliceWriter {
fn new() -> Self {
let raw = unsafe { ManuallyDrop::new(std::fs::File::from_raw_fd(1)) };
let stdout_is_pipe = {
let mut stat: libc::stat = unsafe { std::mem::zeroed() };
let rc = unsafe { libc::fstat(1, &mut stat) };
rc == 0 && (stat.st_mode & libc::S_IFMT) == libc::S_IFIFO
};
Self {
raw,
stdout_is_pipe,
vmsplice_enabled: false,
}
}
#[inline]
fn set_vmsplice_enabled(&mut self, enabled: bool) {
self.vmsplice_enabled = enabled;
}
}
#[cfg(target_os = "linux")]
impl VmspliceWriter {
#[inline(always)]
fn use_vmsplice(&self) -> bool {
self.stdout_is_pipe && self.vmsplice_enabled
}
}
#[cfg(target_os = "linux")]
impl Write for VmspliceWriter {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
if !self.use_vmsplice() || buf.is_empty() {
return (&*self.raw).write(buf);
}
loop {
let iov = libc::iovec {
iov_base: buf.as_ptr() as *mut libc::c_void,
iov_len: buf.len(),
};
let n = unsafe { libc::vmsplice(1, &iov, 1, 0) };
if n >= 0 {
return Ok(n as usize);
}
let err = io::Error::last_os_error();
if err.kind() == io::ErrorKind::Interrupted {
continue;
}
self.vmsplice_enabled = false;
return (&*self.raw).write(buf);
}
}
fn write_all(&mut self, mut buf: &[u8]) -> io::Result<()> {
if !self.use_vmsplice() || buf.is_empty() {
return (&*self.raw).write_all(buf);
}
while !buf.is_empty() {
let iov = libc::iovec {
iov_base: buf.as_ptr() as *mut libc::c_void,
iov_len: buf.len(),
};
let n = unsafe { libc::vmsplice(1, &iov, 1, 0) };
if n > 0 {
buf = &buf[n as usize..];
} else if n == 0 {
return Err(io::Error::new(io::ErrorKind::WriteZero, "vmsplice wrote 0"));
} else {
let err = io::Error::last_os_error();
if err.kind() == io::ErrorKind::Interrupted {
continue;
}
self.vmsplice_enabled = false;
return (&*self.raw).write_all(buf);
}
}
Ok(())
}
fn write_vectored(&mut self, bufs: &[io::IoSlice<'_>]) -> io::Result<usize> {
if !self.use_vmsplice() || bufs.is_empty() {
return (&*self.raw).write_vectored(bufs);
}
loop {
let count = bufs.len().min(1024);
let iovs = bufs.as_ptr() as *const libc::iovec;
let n = unsafe { libc::vmsplice(1, iovs, count, 0) };
if n >= 0 {
return Ok(n as usize);
}
let err = io::Error::last_os_error();
if err.kind() == io::ErrorKind::Interrupted {
continue;
}
self.vmsplice_enabled = false;
return (&*self.raw).write_vectored(bufs);
}
}
fn flush(&mut self) -> io::Result<()> {
Ok(())
}
}
struct Cli {
bytes: Option<String>,
characters: Option<String>,
fields: Option<String>,
delimiter: Option<String>,
complement: bool,
only_delimited: bool,
output_delimiter: Option<String>,
zero_terminated: bool,
files: Vec<String>,
}
fn parse_args() -> Cli {
let mut cli = Cli {
bytes: None,
characters: None,
fields: None,
delimiter: None,
complement: false,
only_delimited: false,
output_delimiter: None,
zero_terminated: false,
files: Vec::new(),
};
let mut args = std::env::args_os().skip(1);
#[allow(clippy::while_let_on_iterator)]
while let Some(arg) = args.next() {
let bytes = arg.as_encoded_bytes();
if bytes == b"--" {
for a in args {
cli.files.push(a.to_string_lossy().into_owned());
}
break;
}
if bytes.starts_with(b"--") {
if bytes.starts_with(b"--bytes=") {
cli.bytes = Some(std::str::from_utf8(&bytes[8..]).unwrap_or("").to_string());
} else if bytes.starts_with(b"--characters=") {
cli.characters = Some(std::str::from_utf8(&bytes[13..]).unwrap_or("").to_string());
} else if bytes.starts_with(b"--fields=") {
cli.fields = Some(std::str::from_utf8(&bytes[9..]).unwrap_or("").to_string());
} else if bytes.starts_with(b"--delimiter=") {
cli.delimiter = Some(std::str::from_utf8(&bytes[12..]).unwrap_or("").to_string());
} else if bytes.starts_with(b"--output-delimiter=") {
cli.output_delimiter =
Some(std::str::from_utf8(&bytes[19..]).unwrap_or("").to_string());
} else {
match bytes {
b"--bytes" => {
if let Some(v) = args.next() {
cli.bytes = Some(v.to_string_lossy().into_owned());
} else {
eprintln!("cut: option '--bytes' requires an argument");
process::exit(1);
}
}
b"--characters" => {
if let Some(v) = args.next() {
cli.characters = Some(v.to_string_lossy().into_owned());
} else {
eprintln!("cut: option '--characters' requires an argument");
process::exit(1);
}
}
b"--fields" => {
if let Some(v) = args.next() {
cli.fields = Some(v.to_string_lossy().into_owned());
} else {
eprintln!("cut: option '--fields' requires an argument");
process::exit(1);
}
}
b"--delimiter" => {
if let Some(v) = args.next() {
cli.delimiter = Some(v.to_string_lossy().into_owned());
} else {
eprintln!("cut: option '--delimiter' requires an argument");
process::exit(1);
}
}
b"--output-delimiter" => {
if let Some(v) = args.next() {
cli.output_delimiter = Some(v.to_string_lossy().into_owned());
} else {
eprintln!("cut: option '--output-delimiter' requires an argument");
process::exit(1);
}
}
b"--complement" => cli.complement = true,
b"--only-delimited" => cli.only_delimited = true,
b"--zero-terminated" => cli.zero_terminated = true,
b"--help" => {
print!(
"Usage: cut OPTION... [FILE]...\n\
Print selected parts of lines from each FILE to standard output.\n\n\
With no FILE, or when FILE is -, read standard input.\n\n\
Mandatory arguments to long options are mandatory for short options too.\n\
\x20 -b, --bytes=LIST select only these bytes\n\
\x20 -c, --characters=LIST select only these characters\n\
\x20 -d, --delimiter=DELIM use DELIM instead of TAB for field delimiter\n\
\x20 -f, --fields=LIST select only these fields; also print any line\n\
\x20 that contains no delimiter character, unless\n\
\x20 the -s option is specified\n\
\x20 -n (ignored)\n\
\x20 --complement complement the set of selected bytes, characters\n\
\x20 or fields\n\
\x20 -s, --only-delimited do not print lines not containing delimiters\n\
\x20 --output-delimiter=STRING use STRING as the output delimiter\n\
\x20 the default is to use the input delimiter\n\
\x20 -z, --zero-terminated line delimiter is NUL, not newline\n\
\x20 --help display this help and exit\n\
\x20 --version output version information and exit\n"
);
process::exit(0);
}
b"--version" => {
println!("cut (fcoreutils) {}", env!("CARGO_PKG_VERSION"));
process::exit(0);
}
_ => {
eprintln!("cut: unrecognized option '{}'", arg.to_string_lossy());
eprintln!("Try 'cut --help' for more information.");
process::exit(1);
}
}
}
} else if bytes.len() > 1 && bytes[0] == b'-' {
let mut i = 1;
while i < bytes.len() {
match bytes[i] {
b'n' => {} b's' => cli.only_delimited = true,
b'z' => cli.zero_terminated = true,
b'b' | b'c' | b'd' | b'f' => {
let flag = bytes[i];
let val = if i + 1 < bytes.len() {
std::str::from_utf8(&bytes[i + 1..])
.unwrap_or("")
.to_string()
} else if let Some(v) = args.next() {
v.to_string_lossy().into_owned()
} else {
eprintln!("cut: option requires an argument -- '{}'", flag as char);
process::exit(1);
};
match flag {
b'b' => cli.bytes = Some(val),
b'c' => cli.characters = Some(val),
b'd' => cli.delimiter = Some(val),
b'f' => cli.fields = Some(val),
_ => unreachable!(),
}
i = bytes.len();
continue;
}
_ => {
eprintln!("cut: invalid option -- '{}'", bytes[i] as char);
eprintln!("Try 'cut --help' for more information.");
process::exit(1);
}
}
i += 1;
}
} else {
cli.files.push(arg.to_string_lossy().into_owned());
}
}
cli
}
fn main() {
coreutils_rs::common::reset_sigpipe();
enlarge_stdout_pipe();
let cpus = std::thread::available_parallelism()
.map(|v| v.get().max(1))
.unwrap_or(1);
let _ = rayon::ThreadPoolBuilder::new()
.num_threads(cpus.saturating_sub(1).max(1)) .build_global();
let cli = parse_args();
let mode_count =
cli.bytes.is_some() as u8 + cli.characters.is_some() as u8 + cli.fields.is_some() as u8;
if mode_count == 0 {
eprintln!("cut: you must specify a list of bytes, characters, or fields");
eprintln!("Try 'cut --help' for more information.");
process::exit(1);
}
if mode_count > 1 {
eprintln!("cut: only one type of list may be specified");
eprintln!("Try 'cut --help' for more information.");
process::exit(1);
}
let (mode, spec) = if let Some(ref s) = cli.bytes {
(CutMode::Bytes, s.as_str())
} else if let Some(ref s) = cli.characters {
(CutMode::Characters, s.as_str())
} else {
(CutMode::Fields, cli.fields.as_ref().unwrap().as_str())
};
if cli.only_delimited && mode != CutMode::Fields {
eprintln!(
"cut: suppressing non-delimited lines makes sense\n\tonly when operating on fields"
);
eprintln!("Try 'cut --help' for more information.");
process::exit(1);
}
if cli.delimiter.is_some() && mode != CutMode::Fields {
eprintln!("cut: an input delimiter may be specified only when operating on fields");
eprintln!("Try 'cut --help' for more information.");
process::exit(1);
}
let has_output_delim = cli.output_delimiter.is_some();
let no_merge_adjacent = has_output_delim && mode != CutMode::Fields;
let ranges = match cut::parse_ranges(spec, no_merge_adjacent) {
Ok(r) => r,
Err(e) => {
eprintln!("cut: {}", e);
eprintln!("Try 'cut --help' for more information.");
process::exit(1);
}
};
let delim = if let Some(ref d) = cli.delimiter {
if d.len() != 1 {
eprintln!("cut: the delimiter must be a single character");
eprintln!("Try 'cut --help' for more information.");
process::exit(1);
}
d.as_bytes()[0]
} else {
b'\t'
};
let output_delim = if let Some(ref od) = cli.output_delimiter {
od.as_bytes().to_vec()
} else if mode == CutMode::Fields {
vec![delim]
} else {
vec![]
};
let line_delim = if cli.zero_terminated { b'\0' } else { b'\n' };
let files = if cli.files.is_empty() {
vec!["-".to_string()]
} else {
cli.files.clone()
};
#[cfg(target_os = "linux")]
let mut out = VmspliceWriter::new();
#[cfg(all(unix, not(target_os = "linux")))]
let mut raw = unsafe { ManuallyDrop::new(std::fs::File::from_raw_fd(1)) };
#[cfg(all(unix, not(target_os = "linux")))]
let mut out = BufWriter::with_capacity(16 * 1024 * 1024, &mut *raw);
#[cfg(not(unix))]
let stdout = io::stdout();
#[cfg(not(unix))]
let mut out = BufWriter::with_capacity(16 * 1024 * 1024, stdout.lock());
let mut had_error = false;
let cfg = cut::CutConfig {
mode,
ranges: &ranges,
complement: cli.complement,
delim,
output_delim: &output_delim,
suppress_no_delim: cli.only_delimited,
line_delim,
};
#[cfg(unix)]
let stdin_mmap = {
if files.iter().any(|f| f == "-") {
try_mmap_stdin(0)
} else {
None
}
};
#[cfg(target_os = "linux")]
let splice_mmap: Option<memmap2::MmapMut> =
if stdin_mmap.is_none() && files.iter().any(|f| f == "-") {
coreutils_rs::common::io::splice_stdin_to_mmap().unwrap_or(None)
} else {
None
};
#[cfg(not(target_os = "linux"))]
let splice_mmap: Option<memmap2::MmapMut> = None;
#[allow(unused_variables)]
let has_splice = splice_mmap.is_some();
#[cfg(unix)]
let mut stdin_buf: Option<Vec<u8>> =
if stdin_mmap.is_none() && !has_splice && files.iter().any(|f| f == "-") {
match coreutils_rs::common::io::read_stdin() {
Ok(buf) => Some(buf),
Err(e) => {
if e.kind() != io::ErrorKind::BrokenPipe {
eprintln!("cut: {}", io_error_msg(&e));
process::exit(1);
}
Some(Vec::new())
}
}
} else {
None
};
#[cfg(not(unix))]
let mut stdin_buf: Option<Vec<u8>> = if files.iter().any(|f| f == "-") {
match coreutils_rs::common::io::read_stdin() {
Ok(buf) => Some(buf),
Err(e) => {
if e.kind() != io::ErrorKind::BrokenPipe {
eprintln!("cut: {}", io_error_msg(&e));
process::exit(1);
}
Some(Vec::new())
}
}
} else {
None
};
let mut stdin_inplace_done = false;
#[cfg(target_os = "linux")]
let mut splice_mmap = splice_mmap;
#[cfg(target_os = "linux")]
let mut splice_inplace_len: usize = 0;
#[cfg(target_os = "linux")]
if let Some(ref mut mmap_data) = splice_mmap
&& !mmap_data.is_empty()
&& let Some(new_len) = cut::process_cut_data_mut(mmap_data, &cfg)
{
splice_inplace_len = new_len;
stdin_inplace_done = true;
}
if !stdin_inplace_done
&& let Some(ref mut data) = stdin_buf
&& !data.is_empty()
&& let Some(new_len) = cut::process_cut_data_mut(data, &cfg)
{
data.truncate(new_len);
stdin_inplace_done = true;
}
for filename in &files {
let result: io::Result<()> = if filename == "-" {
#[cfg(unix)]
{
if stdin_inplace_done {
#[cfg(target_os = "linux")]
{
out.set_vmsplice_enabled(true);
let res = if splice_inplace_len > 0 {
if let Some(ref mmap_data) = splice_mmap {
out.write_all(&mmap_data[..splice_inplace_len])
} else {
Ok(())
}
} else if let Some(ref data) = stdin_buf {
out.write_all(data)
} else {
Ok(())
};
out.set_vmsplice_enabled(false);
res
}
#[cfg(not(target_os = "linux"))]
{
if let Some(ref data) = stdin_buf {
out.flush().and_then(|()| out.get_mut().write_all(data))
} else {
Ok(())
}
}
} else if let Some(ref data) = stdin_mmap {
cut::process_cut_data(data, &cfg, &mut out)
} else {
#[cfg(target_os = "linux")]
if let Some(ref data) = splice_mmap {
cut::process_cut_data(data.as_ref(), &cfg, &mut out)
} else if let Some(ref data) = stdin_buf {
cut::process_cut_data(data, &cfg, &mut out)
} else {
let reader = BufReader::new(io::stdin().lock());
cut::process_cut_reader(reader, &cfg, &mut out)
}
#[cfg(not(target_os = "linux"))]
if let Some(ref data) = stdin_buf {
cut::process_cut_data(data, &cfg, &mut out)
} else {
let reader = BufReader::new(io::stdin().lock());
cut::process_cut_reader(reader, &cfg, &mut out)
}
}
}
#[cfg(not(unix))]
{
if stdin_inplace_done {
if let Some(ref data) = stdin_buf {
out.write_all(data)
} else {
Ok(())
}
} else if let Some(ref data) = stdin_buf {
cut::process_cut_data(data, &cfg, &mut out)
} else {
let reader = BufReader::new(io::stdin().lock());
cut::process_cut_reader(reader, &cfg, &mut out)
}
}
} else {
match read_file_with_hints(Path::new(filename), MmapHints::Lazy) {
Ok(data) => cut::process_cut_data(&data, &cfg, &mut out),
Err(e) => {
eprintln!("cut: {}: {}", filename, io_error_msg(&e));
had_error = true;
continue;
}
}
};
if let Err(e) = result {
if e.kind() == io::ErrorKind::BrokenPipe {
process::exit(0);
}
eprintln!("cut: write error: {}", io_error_msg(&e));
had_error = true;
}
}
if let Err(e) = out.flush() {
if e.kind() == io::ErrorKind::BrokenPipe {
process::exit(0);
}
eprintln!("cut: write error: {}", io_error_msg(&e));
had_error = true;
}
if had_error {
process::exit(1);
}
}
#[cfg(test)]
mod tests {
use std::io::Write;
use std::process::Command;
use std::process::Stdio;
fn cmd() -> Command {
let mut path = std::env::current_exe().unwrap();
path.pop();
path.pop();
path.push("fcut");
Command::new(path)
}
#[test]
fn test_cut_fields() {
let mut child = cmd()
.args(["-d:", "-f1"])
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.unwrap();
child.stdin.take().unwrap().write_all(b"a:b:c\n").unwrap();
let output = child.wait_with_output().unwrap();
assert!(output.status.success());
assert_eq!(String::from_utf8_lossy(&output.stdout).trim(), "a");
}
#[test]
fn test_cut_bytes() {
let mut child = cmd()
.args(["-b1-3"])
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.unwrap();
child.stdin.take().unwrap().write_all(b"hello\n").unwrap();
let output = child.wait_with_output().unwrap();
assert!(output.status.success());
assert_eq!(String::from_utf8_lossy(&output.stdout).trim(), "hel");
}
#[test]
fn test_cut_multiple_fields() {
let mut child = cmd()
.args(["-d:", "-f1,3"])
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.unwrap();
child.stdin.take().unwrap().write_all(b"a:b:c:d\n").unwrap();
let output = child.wait_with_output().unwrap();
assert!(output.status.success());
assert_eq!(String::from_utf8_lossy(&output.stdout).trim(), "a:c");
}
#[test]
fn test_cut_field_range() {
let mut child = cmd()
.args(["-d:", "-f2-4"])
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.unwrap();
child
.stdin
.take()
.unwrap()
.write_all(b"a:b:c:d:e\n")
.unwrap();
let output = child.wait_with_output().unwrap();
assert!(output.status.success());
assert_eq!(String::from_utf8_lossy(&output.stdout).trim(), "b:c:d");
}
#[test]
fn test_cut_complement() {
let dir = tempfile::tempdir().unwrap();
let infile = dir.path().join("in.txt");
let outfile = dir.path().join("out.txt");
std::fs::write(&infile, "a:b:c\n").unwrap();
let output = cmd()
.args(["-d:", "-f2", "--complement", infile.to_str().unwrap()])
.stdout(std::fs::File::create(&outfile).unwrap())
.output()
.unwrap();
assert!(output.status.success());
assert_eq!(std::fs::read_to_string(&outfile).unwrap().trim(), "a:c");
}
#[test]
fn test_cut_no_delimiter_in_line() {
let mut child = cmd()
.args(["-d:", "-f1"])
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.unwrap();
child
.stdin
.take()
.unwrap()
.write_all(b"no delimiter here\n")
.unwrap();
let output = child.wait_with_output().unwrap();
assert!(output.status.success());
assert_eq!(
String::from_utf8_lossy(&output.stdout).trim(),
"no delimiter here"
);
}
#[test]
fn test_cut_only_delimited() {
let mut child = cmd()
.args(["-d:", "-f1", "-s"])
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.unwrap();
child
.stdin
.take()
.unwrap()
.write_all(b"no delimiter\nhas:delimiter\n")
.unwrap();
let output = child.wait_with_output().unwrap();
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
assert_eq!(stdout.trim(), "has");
}
#[test]
fn test_cut_characters() {
let mut child = cmd()
.args(["-c1-5"])
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.unwrap();
child
.stdin
.take()
.unwrap()
.write_all(b"hello world\n")
.unwrap();
let output = child.wait_with_output().unwrap();
assert!(output.status.success());
assert_eq!(String::from_utf8_lossy(&output.stdout).trim(), "hello");
}
#[test]
fn test_cut_empty_input() {
let mut child = cmd()
.args(["-f1"])
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.unwrap();
child.stdin.take().unwrap().write_all(b"").unwrap();
let output = child.wait_with_output().unwrap();
assert!(output.status.success());
assert!(output.stdout.is_empty());
}
#[test]
fn test_cut_output_delimiter() {
let dir = tempfile::tempdir().unwrap();
let infile = dir.path().join("in.txt");
let outfile = dir.path().join("out.txt");
std::fs::write(&infile, "a:b:c:d\n").unwrap();
let output = cmd()
.args([
"-d:",
"-f1,3",
"--output-delimiter=|",
infile.to_str().unwrap(),
])
.stdout(std::fs::File::create(&outfile).unwrap())
.output()
.unwrap();
assert!(output.status.success());
assert_eq!(std::fs::read_to_string(&outfile).unwrap().trim(), "a|c");
}
#[test]
fn test_cut_no_option() {
let output = cmd().output().unwrap();
assert!(!output.status.success());
}
#[test]
fn test_cut_file() {
let dir = tempfile::tempdir().unwrap();
let file = dir.path().join("data.txt");
let outfile = dir.path().join("out.txt");
std::fs::write(&file, "a:b:c\nx:y:z\n").unwrap();
let output = cmd()
.args(["-d:", "-f2", file.to_str().unwrap()])
.stdout(std::fs::File::create(&outfile).unwrap())
.output()
.unwrap();
assert!(output.status.success());
let stdout = std::fs::read_to_string(&outfile).unwrap();
let lines: Vec<&str> = stdout.lines().collect();
assert_eq!(lines, vec!["b", "y"]);
}
#[test]
fn test_cut_tab_delimiter() {
let mut child = cmd()
.args(["-f2"])
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.unwrap();
child
.stdin
.take()
.unwrap()
.write_all(b"first\tsecond\tthird\n")
.unwrap();
let output = child.wait_with_output().unwrap();
assert!(output.status.success());
assert_eq!(String::from_utf8_lossy(&output.stdout).trim(), "second");
}
}