fcoreutils 0.22.0

High-performance GNU coreutils replacement with SIMD and parallelism
Documentation
use std::io::{self, BufWriter, Write};
#[cfg(unix)]
use std::mem::ManuallyDrop;
#[cfg(unix)]
use std::os::unix::io::FromRawFd;
use std::path::Path;
use std::process;

use coreutils_rs::common::io::{MmapHints, read_file_with_hints, read_stdin};
use coreutils_rs::common::{enlarge_stdout_pipe, io_error_msg};
use coreutils_rs::expand::{TabStops, expand_bytes, parse_tab_stops};

struct Cli {
    initial: bool,
    tabs: TabStops,
    files: Vec<String>,
}

fn parse_args() -> Cli {
    let mut cli = Cli {
        initial: false,
        tabs: TabStops::Regular(8),
        files: Vec::new(),
    };

    let mut args = std::env::args_os().skip(1);
    let mut tab_spec: Option<String> = None;

    #[allow(clippy::while_let_on_iterator)]
    while let Some(arg) = args.next() {
        let bytes = arg.as_encoded_bytes();
        if bytes == b"--" {
            for a in args {
                cli.files.push(a.to_string_lossy().into_owned());
            }
            break;
        }
        if bytes.starts_with(b"--") {
            if bytes.starts_with(b"--tabs=") {
                let val = arg.to_string_lossy();
                tab_spec = Some(val[7..].to_string());
                continue;
            }
            match bytes {
                b"--initial" => cli.initial = true,
                b"--tabs" => {
                    tab_spec = Some(
                        args.next()
                            .unwrap_or_else(|| {
                                eprintln!("expand: option '--tabs' requires an argument");
                                process::exit(1);
                            })
                            .to_string_lossy()
                            .into_owned(),
                    );
                }
                b"--help" => {
                    print!(
                        "Usage: expand [OPTION]... [FILE]...\n\
                         Convert tabs in each FILE to spaces, writing to standard output.\n\n\
                         With no FILE, or when FILE is -, read standard input.\n\n\
                         Mandatory arguments to long options are mandatory for short options too.\n\
                         \x20 -i, --initial             do not convert tabs after non blanks\n\
                         \x20 -t, --tabs=N              have tabs N characters apart, not 8\n\
                         \x20 -t, --tabs=LIST           use comma separated list of tab positions.\n\
                         \x20                           The last specified position can be prefixed\n\
                         \x20                           with '/' to specify a tab size to use after\n\
                         \x20                           the last explicitly specified tab stop.\n\
                         \x20     --help                display this help and exit\n\
                         \x20     --version             output version information and exit\n"
                    );
                    process::exit(0);
                }
                b"--version" => {
                    println!("expand (fcoreutils) {}", env!("CARGO_PKG_VERSION"));
                    process::exit(0);
                }
                _ => {
                    eprintln!("expand: unrecognized option '{}'", arg.to_string_lossy());
                    eprintln!("Try 'expand --help' for more information.");
                    process::exit(1);
                }
            }
        } else if bytes.len() > 1 && bytes[0] == b'-' {
            let mut i = 1;
            while i < bytes.len() {
                match bytes[i] {
                    b'i' => cli.initial = true,
                    b't' => {
                        // -t takes a value: rest of this arg or next arg
                        if i + 1 < bytes.len() {
                            let val = arg.to_string_lossy();
                            tab_spec = Some(val[i + 1..].to_string());
                        } else {
                            tab_spec = Some(
                                args.next()
                                    .unwrap_or_else(|| {
                                        eprintln!("expand: option requires an argument -- 't'");
                                        process::exit(1);
                                    })
                                    .to_string_lossy()
                                    .into_owned(),
                            );
                        }
                        break; // consumed rest of arg
                    }
                    _ => {
                        // Check if it's a digit (GNU expand supports -N as shorthand for -t N)
                        if bytes[i].is_ascii_digit() {
                            let val = arg.to_string_lossy();
                            tab_spec = Some(val[i..].to_string());
                            break;
                        }
                        eprintln!("expand: invalid option -- '{}'", bytes[i] as char);
                        eprintln!("Try 'expand --help' for more information.");
                        process::exit(1);
                    }
                }
                i += 1;
            }
        } else {
            cli.files.push(arg.to_string_lossy().into_owned());
        }
    }

    if let Some(spec) = tab_spec {
        match parse_tab_stops(&spec) {
            Ok(tabs) => cli.tabs = tabs,
            Err(e) => {
                eprintln!("expand: {}", e);
                process::exit(1);
            }
        }
    }

    cli
}

fn main() {
    coreutils_rs::common::reset_sigpipe();

    enlarge_stdout_pipe();

    let cli = parse_args();

    let files: Vec<String> = if cli.files.is_empty() {
        vec!["-".to_string()]
    } else {
        cli.files
    };

    #[cfg(unix)]
    let stdout_raw = unsafe { ManuallyDrop::new(std::fs::File::from_raw_fd(1)) };
    #[cfg(unix)]
    let mut out = BufWriter::with_capacity(1024 * 1024, &*stdout_raw);
    #[cfg(not(unix))]
    let stdout = io::stdout();
    #[cfg(not(unix))]
    let mut out = BufWriter::with_capacity(1024 * 1024, stdout.lock());

    let mut had_error = false;

    for filename in &files {
        let data = if filename == "-" {
            match read_stdin() {
                Ok(d) => coreutils_rs::common::io::FileData::Owned(d),
                Err(e) => {
                    eprintln!("expand: standard input: {}", io_error_msg(&e));
                    had_error = true;
                    continue;
                }
            }
        } else {
            match read_file_with_hints(Path::new(filename), MmapHints::Lazy) {
                Ok(d) => d,
                Err(e) => {
                    eprintln!("expand: {}: {}", filename, io_error_msg(&e));
                    had_error = true;
                    continue;
                }
            }
        };

        if let Err(e) = expand_bytes(&data, &cli.tabs, cli.initial, &mut out) {
            if e.kind() == io::ErrorKind::BrokenPipe {
                process::exit(0);
            }
            eprintln!("expand: write error: {}", io_error_msg(&e));
            had_error = true;
        }
    }

    if let Err(e) = out.flush()
        && e.kind() != io::ErrorKind::BrokenPipe
    {
        eprintln!("expand: write error: {}", io_error_msg(&e));
        had_error = true;
    }

    if had_error {
        process::exit(1);
    }
}

#[cfg(test)]
mod tests {
    use std::process::Command;

    fn cmd() -> Command {
        let mut path = std::env::current_exe().unwrap();
        path.pop();
        path.pop();
        path.push("fexpand");
        Command::new(path)
    }
    #[test]
    fn test_expand_basic_tab() {
        use std::io::Write;
        use std::process::Stdio;
        let mut child = cmd()
            .stdin(Stdio::piped())
            .stdout(Stdio::piped())
            .spawn()
            .unwrap();
        child.stdin.take().unwrap().write_all(b"a\tb\n").unwrap();
        let output = child.wait_with_output().unwrap();
        assert!(output.status.success());
        let stdout = String::from_utf8_lossy(&output.stdout);
        // Default tab stop is 8, so 'a' + 7 spaces + 'b'
        assert!(!stdout.contains('\t'), "tabs should be expanded to spaces");
        assert_eq!(stdout.trim_end(), "a       b");
    }

    #[test]
    fn test_expand_custom_tabstop() {
        use std::io::Write;
        use std::process::Stdio;
        let mut child = cmd()
            .args(["-t", "4"])
            .stdin(Stdio::piped())
            .stdout(Stdio::piped())
            .spawn()
            .unwrap();
        child.stdin.take().unwrap().write_all(b"\thello\n").unwrap();
        let output = child.wait_with_output().unwrap();
        assert!(output.status.success());
        let stdout = String::from_utf8_lossy(&output.stdout);
        assert_eq!(stdout, "    hello\n");
    }

    #[test]
    fn test_expand_no_tabs() {
        use std::io::Write;
        use std::process::Stdio;
        let mut child = cmd()
            .stdin(Stdio::piped())
            .stdout(Stdio::piped())
            .spawn()
            .unwrap();
        child
            .stdin
            .take()
            .unwrap()
            .write_all(b"no tabs here\n")
            .unwrap();
        let output = child.wait_with_output().unwrap();
        assert!(output.status.success());
        assert_eq!(output.stdout, b"no tabs here\n");
    }

    #[test]
    fn test_expand_empty_input() {
        use std::io::Write;
        use std::process::Stdio;
        let mut child = cmd()
            .stdin(Stdio::piped())
            .stdout(Stdio::piped())
            .spawn()
            .unwrap();
        child.stdin.take().unwrap().write_all(b"").unwrap();
        let output = child.wait_with_output().unwrap();
        assert!(output.status.success());
        assert!(output.stdout.is_empty());
    }

    #[test]
    fn test_expand_multiple_tabs() {
        use std::io::Write;
        use std::process::Stdio;
        let mut child = cmd()
            .args(["-t", "4"])
            .stdin(Stdio::piped())
            .stdout(Stdio::piped())
            .spawn()
            .unwrap();
        child.stdin.take().unwrap().write_all(b"\t\t\n").unwrap();
        let output = child.wait_with_output().unwrap();
        assert!(output.status.success());
        assert_eq!(output.stdout, b"        \n");
    }

    #[test]
    fn test_expand_file() {
        let dir = tempfile::tempdir().unwrap();
        let file = dir.path().join("tabs.txt");
        std::fs::write(&file, "a\tb\n").unwrap();
        let output = cmd().arg(file.to_str().unwrap()).output().unwrap();
        assert!(output.status.success());
        let stdout = String::from_utf8_lossy(&output.stdout);
        assert!(!stdout.contains('\t'));
    }

    #[test]
    fn test_expand_initial_only() {
        use std::io::Write;
        use std::process::Stdio;
        let mut child = cmd()
            .arg("-i")
            .stdin(Stdio::piped())
            .stdout(Stdio::piped())
            .spawn()
            .unwrap();
        child.stdin.take().unwrap().write_all(b"\ta\tb\n").unwrap();
        let output = child.wait_with_output().unwrap();
        assert!(output.status.success());
        let stdout = String::from_utf8_lossy(&output.stdout);
        // First tab should be expanded, but the one between a and b should remain
        assert!(stdout.contains('\t'), "non-initial tab should remain");
        assert!(stdout.starts_with("        ") || stdout.starts_with("    "));
    }

    #[test]
    fn test_expand_nonexistent_file() {
        let output = cmd().arg("/nonexistent/file.txt").output().unwrap();
        assert!(!output.status.success());
    }
}