use std::io::{self, BufWriter, Read, Write};
#[cfg(unix)]
use std::mem::ManuallyDrop;
#[cfg(unix)]
use std::os::unix::io::FromRawFd;
use std::path::Path;
use std::process;
use memchr::memchr_iter;
use rayon::prelude::*;
#[cfg(unix)]
use coreutils_rs::common::io::try_mmap_stdin;
use coreutils_rs::common::io::{FileData, MmapHints, file_size, read_file_with_hints, read_stdin};
use coreutils_rs::common::io_error_msg;
use coreutils_rs::wc;
use memmap2::MmapOptions;
struct Cli {
bytes: bool,
chars: bool,
lines: bool,
max_line_length: bool,
words: bool,
files0_from: Option<String>,
total: String,
files: Vec<String>,
}
fn parse_args() -> Cli {
let mut cli = Cli {
bytes: false,
chars: false,
lines: false,
max_line_length: false,
words: false,
files0_from: None,
total: "auto".to_string(),
files: Vec::new(),
};
let mut args = std::env::args_os().skip(1);
#[allow(clippy::while_let_on_iterator)]
while let Some(arg) = args.next() {
let bytes = arg.as_encoded_bytes();
if bytes == b"--" {
for a in args {
cli.files.push(a.to_string_lossy().into_owned());
}
break;
}
if bytes.starts_with(b"--") {
if bytes.starts_with(b"--files0-from=") {
let val = arg.to_string_lossy();
cli.files0_from = Some(val["--files0-from=".len()..].to_string());
continue;
}
if bytes.starts_with(b"--total=") {
let val = arg.to_string_lossy();
cli.total = val["--total=".len()..].to_string();
continue;
}
match bytes {
b"--bytes" => cli.bytes = true,
b"--chars" => cli.chars = true,
b"--lines" => cli.lines = true,
b"--max-line-length" => cli.max_line_length = true,
b"--words" => cli.words = true,
b"--files0-from" => {
cli.files0_from = Some(
args.next()
.unwrap_or_else(|| {
eprintln!("wc: option '--files0-from' requires an argument");
process::exit(1);
})
.to_string_lossy()
.into_owned(),
);
}
b"--total" => {
cli.total = args
.next()
.unwrap_or_else(|| {
eprintln!("wc: option '--total' requires an argument");
process::exit(1);
})
.to_string_lossy()
.into_owned();
}
b"--help" => {
print!(
"Usage: wc [OPTION]... [FILE]...\n\
\x20 or: wc [OPTION]... --files0-from=F\n\
Print newline, word, and byte counts for each FILE, and a total line if\n\
more than one FILE is specified. A word is a non-zero-length sequence of\n\
printable characters delimited by white space.\n\n\
With no FILE, or when FILE is -, read standard input.\n\n\
The options below may be used to select which counts are printed, always in\n\
the following order: newline, word, character, byte, maximum line length.\n\
\x20 -c, --bytes print the byte counts\n\
\x20 -m, --chars print the character counts\n\
\x20 -l, --lines print the newline counts\n\
\x20 -L, --max-line-length print the maximum display width\n\
\x20 -w, --words print the word counts\n\
\x20 --files0-from=F read input from the files specified by\n\
\x20 NUL-terminated names in file F;\n\
\x20 If F is - then read names from standard input\n\
\x20 --total=WHEN when to print a line with total counts;\n\
\x20 WHEN can be: auto, always, only, never\n\
\x20 --help display this help and exit\n\
\x20 --version output version information and exit\n"
);
process::exit(0);
}
b"--version" => {
println!("wc (fcoreutils) {}", env!("CARGO_PKG_VERSION"));
process::exit(0);
}
_ => {
eprintln!("wc: unrecognized option '{}'", arg.to_string_lossy());
eprintln!("Try 'wc --help' for more information.");
process::exit(1);
}
}
} else if bytes.len() > 1 && bytes[0] == b'-' {
let mut i = 1;
while i < bytes.len() {
match bytes[i] {
b'c' => cli.bytes = true,
b'm' => cli.chars = true,
b'l' => cli.lines = true,
b'L' => cli.max_line_length = true,
b'w' => cli.words = true,
_ => {
eprintln!("wc: invalid option -- '{}'", bytes[i] as char);
eprintln!("Try 'wc --help' for more information.");
process::exit(1);
}
}
i += 1;
}
} else {
cli.files.push(arg.to_string_lossy().into_owned());
}
}
cli
}
struct ShowFlags {
lines: bool,
words: bool,
bytes: bool,
chars: bool,
max_line_length: bool,
}
impl ShowFlags {
fn bytes_only(&self) -> bool {
self.bytes && !self.lines && !self.words && !self.chars && !self.max_line_length
}
fn lines_only(&self) -> bool {
self.lines && !self.words && !self.bytes && !self.chars && !self.max_line_length
}
}
const WC_PARALLEL_THRESHOLD: usize = 1024 * 1024;
const LINE_PARALLEL_THRESHOLD: usize = 32 * 1024 * 1024;
fn count_lines_streaming(path: &Path) -> io::Result<(u64, u64)> {
let file = std::fs::File::open(path)?;
let meta = file.metadata()?;
let file_bytes = meta.len();
if !meta.file_type().is_file() || file_bytes == 0 {
return Ok((0, file_bytes));
}
if let Ok(mmap) = unsafe { MmapOptions::new().map(&file) } {
#[cfg(target_os = "linux")]
{
unsafe {
libc::madvise(
mmap.as_ptr() as *mut libc::c_void,
mmap.len(),
libc::MADV_SEQUENTIAL,
);
if mmap.len() >= 2 * 1024 * 1024 {
libc::madvise(
mmap.as_ptr() as *mut libc::c_void,
mmap.len(),
libc::MADV_HUGEPAGE,
);
}
}
}
let lines = if mmap.len() >= LINE_PARALLEL_THRESHOLD {
let num_threads = rayon::current_num_threads().max(1);
let chunk_size = (mmap.len() / num_threads).max(1024 * 1024);
mmap.par_chunks(chunk_size)
.map(|chunk| memchr_iter(b'\n', chunk).count() as u64)
.sum()
} else {
memchr_iter(b'\n', &mmap).count() as u64
};
return Ok((lines, file_bytes));
}
#[cfg(target_os = "linux")]
{
use std::os::unix::io::AsRawFd;
unsafe {
libc::posix_fadvise(
file.as_raw_fd(),
0,
file_bytes as i64,
libc::POSIX_FADV_SEQUENTIAL,
);
}
}
let mut lines = 0u64;
let mut buf = vec![0u8; 2 * 1024 * 1024]; let mut reader = file;
loop {
let n = reader.read(&mut buf)?;
if n == 0 {
break;
}
lines += memchr_iter(b'\n', &buf[..n]).count() as u64;
}
Ok((lines, file_bytes))
}
fn num_width(n: u64) -> usize {
if n == 0 {
return 1;
}
let mut width = 0;
let mut val = n;
while val > 0 {
val /= 10;
width += 1;
}
width
}
fn main() {
coreutils_rs::common::reset_sigpipe();
let cli = parse_args();
let utf8_locale = wc::is_utf8_locale();
let no_explicit = !cli.bytes && !cli.chars && !cli.words && !cli.lines && !cli.max_line_length;
let show = ShowFlags {
lines: cli.lines || no_explicit,
words: cli.words || no_explicit,
bytes: cli.bytes || no_explicit,
chars: cli.chars,
max_line_length: cli.max_line_length,
};
let total_mode = cli.total.as_str();
match total_mode {
"auto" | "always" | "only" | "never" => {}
_ => {
eprintln!("wc: invalid argument '{}' for '--total'", cli.total);
eprintln!("Valid arguments are:");
eprintln!(" - 'auto'");
eprintln!(" - 'always'");
eprintln!(" - 'only'");
eprintln!(" - 'never'");
eprintln!("Try 'wc --help' for more information.");
process::exit(1);
}
}
let files: Vec<String> = if let Some(ref f0f) = cli.files0_from {
if !cli.files.is_empty() {
eprintln!("wc: extra operand '{}'", cli.files[0]);
eprintln!("file operands cannot be combined with --files0-from");
eprintln!("Try 'wc --help' for more information.");
process::exit(1);
}
read_files0_from(f0f)
} else if cli.files.is_empty() {
vec!["-".to_string()] } else {
cli.files.clone()
};
let mut results: Vec<(wc::WcCounts, String)> = Vec::new();
let mut total = wc::WcCounts::default();
let mut had_error = false;
let mut has_stdin = false;
for filename in &files {
if filename == "-" {
has_stdin = true;
}
if show.bytes_only() && filename != "-" {
match file_size(Path::new(filename)) {
Ok(size) => {
let counts = wc::WcCounts {
bytes: size,
..Default::default()
};
total.bytes += size;
results.push((counts, filename.clone()));
continue;
}
Err(e) => {
eprintln!("wc: {}: {}", filename, io_error_msg(&e));
had_error = true;
continue;
}
}
}
if show.lines_only() && filename != "-" {
match count_lines_streaming(Path::new(filename)) {
Ok((lines, bytes)) => {
let counts = wc::WcCounts {
lines,
bytes,
..Default::default()
};
total.lines += lines;
total.bytes += bytes;
results.push((counts, filename.clone()));
continue;
}
Err(e) => {
eprintln!("wc: {}: {}", filename, io_error_msg(&e));
had_error = true;
continue;
}
}
}
let data: FileData = if filename == "-" {
#[cfg(unix)]
{
match try_mmap_stdin(0) {
Some(mmap) => FileData::Mmap(mmap),
None => match read_stdin() {
Ok(d) => FileData::Owned(d),
Err(e) => {
eprintln!("wc: standard input: {}", io_error_msg(&e));
had_error = true;
continue;
}
},
}
}
#[cfg(not(unix))]
match read_stdin() {
Ok(d) => FileData::Owned(d),
Err(e) => {
eprintln!("wc: standard input: {}", io_error_msg(&e));
had_error = true;
continue;
}
}
} else {
match read_file_with_hints(Path::new(filename), MmapHints::Lazy) {
Ok(d) => d,
Err(e) => {
eprintln!("wc: {}: {}", filename, io_error_msg(&e));
had_error = true;
continue;
}
}
};
let use_parallel = data.len() >= WC_PARALLEL_THRESHOLD;
let counts = if show.max_line_length && (show.lines || show.words) {
if use_parallel {
let mut c = wc::count_all_parallel(&data, utf8_locale);
if !show.lines {
c.lines = 0;
}
if !show.words {
c.words = 0;
}
if !show.chars {
c.chars = 0;
}
c
} else {
wc::count_all(&data, utf8_locale)
}
} else if show.lines && show.words && show.chars && !show.max_line_length {
if use_parallel {
let (lines, words, chars) = wc::count_lwc_parallel(&data, utf8_locale);
wc::WcCounts {
lines,
words,
bytes: data.len() as u64,
chars,
max_line_length: 0,
}
} else {
let (lines, words, chars) = wc::count_lines_words_chars(&data, utf8_locale);
wc::WcCounts {
lines,
words,
bytes: data.len() as u64,
chars,
max_line_length: 0,
}
}
} else if show.lines && show.words && !show.chars && !show.max_line_length {
if use_parallel {
let (lines, words, bytes) = wc::count_lwb_parallel(&data, utf8_locale);
wc::WcCounts {
lines,
words,
bytes,
chars: 0,
max_line_length: 0,
}
} else {
let (lines, words, bytes) = wc::count_lwb(&data, utf8_locale);
wc::WcCounts {
lines,
words,
bytes,
chars: 0,
max_line_length: 0,
}
}
} else {
wc::WcCounts {
lines: if show.lines {
if use_parallel {
wc::count_lines_parallel(&data)
} else {
wc::count_lines(&data)
}
} else {
0
},
words: if show.words {
if use_parallel {
wc::count_words_parallel(&data, utf8_locale)
} else {
wc::count_words_locale(&data, utf8_locale)
}
} else {
0
},
bytes: data.len() as u64,
chars: if show.chars {
if use_parallel {
wc::count_chars_parallel(&data, utf8_locale)
} else {
wc::count_chars(&data, utf8_locale)
}
} else {
0
},
max_line_length: if show.max_line_length {
if use_parallel {
wc::max_line_length_parallel(&data, utf8_locale)
} else {
wc::max_line_length(&data, utf8_locale)
}
} else {
0
},
}
};
total.lines += counts.lines;
total.words += counts.words;
total.bytes += counts.bytes;
total.chars += counts.chars;
if counts.max_line_length > total.max_line_length {
total.max_line_length = counts.max_line_length;
}
let display_name = if filename == "-" {
String::new()
} else {
filename.clone()
};
results.push((counts, display_name));
}
let show_total = match total_mode {
"always" => true,
"never" => false,
"only" => true,
_ => results.len() > 1, };
let num_columns = show.lines as usize
+ show.words as usize
+ show.bytes as usize
+ show.chars as usize
+ show.max_line_length as usize;
let num_output_rows = if total_mode == "only" {
if show_total { 1 } else { 0 }
} else {
results.len() + if show_total { 1 } else { 0 }
};
let min_width = if has_stdin && results.len() == 1 {
7
} else {
1
};
let width = if total_mode == "only" {
1
} else if num_columns <= 1 && num_output_rows <= 1 {
let single_val = if show.lines {
total.lines
} else if show.words {
total.words
} else if show.chars {
total.chars
} else if show.bytes {
total.bytes
} else if show.max_line_length {
total.max_line_length
} else {
0
};
num_width(single_val)
} else {
let max_val = [
total.lines,
total.words,
total.bytes,
total.chars,
total.max_line_length,
]
.into_iter()
.max()
.unwrap_or(0);
num_width(max_val).max(min_width)
};
#[cfg(unix)]
let mut raw = unsafe { ManuallyDrop::new(std::fs::File::from_raw_fd(1)) };
#[cfg(unix)]
let mut out = BufWriter::with_capacity(64 * 1024, &mut *raw);
#[cfg(not(unix))]
let mut out = BufWriter::with_capacity(64 * 1024, io::stdout().lock());
if total_mode != "only" {
for (counts, name) in &results {
print_counts_fmt(&mut out, counts, name, width, &show);
}
}
if show_total {
let label = if total_mode == "only" { "" } else { "total" };
print_counts_fmt(&mut out, &total, label, width, &show);
}
let _ = out.flush();
if had_error {
process::exit(1);
}
}
#[inline]
fn fmt_u64(val: u64, width: usize, buf: &mut [u8]) -> usize {
let mut digits = [0u8; 20];
let mut n = val;
let mut dlen = 0;
if n == 0 {
digits[19] = b'0';
dlen = 1;
} else {
let mut pos = 20;
while n > 0 {
pos -= 1;
digits[pos] = b'0' + (n % 10) as u8;
n /= 10;
dlen += 1;
}
if pos > 0 {
digits.copy_within(pos..20, 20 - dlen);
}
}
let pad = width.saturating_sub(dlen);
let total = pad + dlen;
for b in &mut buf[..pad] {
*b = b' ';
}
buf[pad..total].copy_from_slice(&digits[20 - dlen..20]);
total
}
fn print_counts_fmt(
out: &mut impl Write,
counts: &wc::WcCounts,
filename: &str,
width: usize,
show: &ShowFlags,
) {
let mut line = [0u8; 256];
let mut pos = 0;
let mut first = true;
macro_rules! field {
($val:expr) => {
if !first {
line[pos] = b' ';
pos += 1;
}
pos += fmt_u64($val, width, &mut line[pos..]);
#[allow(unused_assignments)]
{
first = false;
}
};
}
if show.lines {
field!(counts.lines);
}
if show.words {
field!(counts.words);
}
if show.chars {
field!(counts.chars);
}
if show.bytes {
field!(counts.bytes);
}
if show.max_line_length {
field!(counts.max_line_length);
}
if !filename.is_empty() {
line[pos] = b' ';
pos += 1;
let name_bytes = filename.as_bytes();
line[pos..pos + name_bytes.len()].copy_from_slice(name_bytes);
pos += name_bytes.len();
}
line[pos] = b'\n';
pos += 1;
let _ = out.write_all(&line[..pos]);
}
fn read_files0_from(path: &str) -> Vec<String> {
let data = if path == "-" {
read_stdin().unwrap_or_default()
} else {
std::fs::read(path).unwrap_or_else(|e| {
eprintln!(
"wc: cannot open '{}' for reading: {}",
path,
io_error_msg(&e)
);
process::exit(1);
})
};
data.split(|&b| b == 0)
.filter(|s| !s.is_empty())
.map(|s| String::from_utf8_lossy(s).into_owned())
.collect()
}
#[cfg(test)]
mod tests {
use std::io::Write;
use std::process::Command;
use std::process::Stdio;
fn cmd() -> Command {
let mut path = std::env::current_exe().unwrap();
path.pop();
path.pop();
path.push("fwc");
Command::new(path)
}
#[test]
fn test_wc_basic() {
let mut child = cmd()
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.unwrap();
child
.stdin
.take()
.unwrap()
.write_all(b"hello world\n")
.unwrap();
let output = child.wait_with_output().unwrap();
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(stdout.contains("1") && stdout.contains("2") && stdout.contains("12"));
}
#[test]
fn test_wc_lines() {
let mut child = cmd()
.arg("-l")
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.unwrap();
child.stdin.take().unwrap().write_all(b"a\nb\nc\n").unwrap();
let output = child.wait_with_output().unwrap();
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(stdout.contains("3"));
}
#[test]
fn test_wc_words() {
let mut child = cmd()
.arg("-w")
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.unwrap();
child
.stdin
.take()
.unwrap()
.write_all(b"hello world foo\n")
.unwrap();
let output = child.wait_with_output().unwrap();
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(stdout.contains("3"));
}
#[test]
fn test_wc_empty_input() {
let mut child = cmd()
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.unwrap();
drop(child.stdin.take().unwrap());
let output = child.wait_with_output().unwrap();
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(stdout.contains("0"));
}
#[test]
fn test_wc_bytes() {
let mut child = cmd()
.arg("-c")
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.unwrap();
child.stdin.take().unwrap().write_all(b"hello\n").unwrap();
let output = child.wait_with_output().unwrap();
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(stdout.trim().starts_with("6") || stdout.contains(" 6"));
}
#[test]
fn test_wc_max_line_length() {
let mut child = cmd()
.arg("-L")
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.unwrap();
child
.stdin
.take()
.unwrap()
.write_all(b"abc\nabcdef\nab\n")
.unwrap();
let output = child.wait_with_output().unwrap();
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(stdout.contains("6"));
}
#[test]
fn test_wc_file() {
let dir = tempfile::tempdir().unwrap();
let file = dir.path().join("test.txt");
std::fs::write(&file, "one two\nthree\n").unwrap();
let output = cmd().arg(file.to_str().unwrap()).output().unwrap();
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(stdout.contains("2"));
assert!(stdout.contains("3"));
assert!(stdout.contains("14"));
}
#[test]
fn test_wc_multiple_files() {
let dir = tempfile::tempdir().unwrap();
let f1 = dir.path().join("a.txt");
let f2 = dir.path().join("b.txt");
std::fs::write(&f1, "hello\n").unwrap();
std::fs::write(&f2, "world\n").unwrap();
let output = cmd()
.args([f1.to_str().unwrap(), f2.to_str().unwrap()])
.output()
.unwrap();
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(stdout.contains("total"));
}
#[test]
fn test_wc_nonexistent_file() {
let output = cmd().arg("/nonexistent_xyz_wc").output().unwrap();
assert!(!output.status.success());
}
#[test]
fn test_wc_no_newline() {
let mut child = cmd()
.arg("-l")
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.unwrap();
child.stdin.take().unwrap().write_all(b"hello").unwrap();
let output = child.wait_with_output().unwrap();
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(stdout.trim().starts_with("0") || stdout.contains(" 0"));
}
#[test]
fn test_wc_only_newlines() {
let mut child = cmd()
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.unwrap();
child.stdin.take().unwrap().write_all(b"\n\n\n").unwrap();
let output = child.wait_with_output().unwrap();
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(stdout.contains("3"));
assert!(stdout.contains("0"));
}
#[cfg(unix)]
#[test]
fn test_wc_c_locale_3state_transparent() {
let mut child = cmd()
.arg("-w")
.env("LC_ALL", "C")
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.unwrap();
child
.stdin
.take()
.unwrap()
.write_all(b"\xe4\xbd\xa0\xe5\xa5\xbd\n")
.unwrap();
let output = child.wait_with_output().unwrap();
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(
stdout.trim().starts_with("0") || stdout.contains(" 0"),
"C locale 3-state: high bytes are transparent, expected 0 words, got: {}",
stdout.trim()
);
}
#[cfg(unix)]
#[test]
fn test_wc_chars_vs_bytes_utf8() {
let mut child = cmd()
.arg("-m")
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.unwrap();
child
.stdin
.take()
.unwrap()
.write_all("é\n".as_bytes())
.unwrap();
let output = child.wait_with_output().unwrap();
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(stdout.contains("2"));
}
#[test]
fn test_wc_combined_flags() {
let mut child = cmd()
.args(["-l", "-w"])
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.unwrap();
child
.stdin
.take()
.unwrap()
.write_all(b"one two\nthree\n")
.unwrap();
let output = child.wait_with_output().unwrap();
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(stdout.contains("2") && stdout.contains("3"));
}
#[cfg(unix)]
#[test]
fn test_wc_c_locale_default_cjk() {
let dir = tempfile::tempdir().unwrap();
let file = dir.path().join("cjk.txt");
std::fs::write(&file, "Hello, 世界!\n你好世界\nこんにちは\n").unwrap();
let output = cmd()
.env("LC_ALL", "C")
.arg(file.to_str().unwrap())
.output()
.unwrap();
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
let parts: Vec<&str> = stdout.split_whitespace().collect();
assert!(
parts.len() >= 3,
"Expected at least 3 fields (lines words bytes), got: {}",
stdout.trim()
);
assert_eq!(
parts[1],
"2",
"Expected 2 words in C locale CJK text, got: {}",
stdout.trim()
);
}
}