bashkit 0.5.0 - Docs.rs

//! Cut and tr builtins - extract fields and translate characters

use async_trait::async_trait;

use super::{Builtin, Context, read_text_file};
use crate::error::Result;
use crate::interpreter::ExecResult;

/// The cut builtin - remove sections from each line.
///
/// Usage: cut -d DELIM -f FIELDS [FILE...]
///        cut -b BYTES [FILE...]
///        cut -c CHARS [FILE...]
///
/// Options:
///   -d DELIM            Use DELIM instead of TAB for field delimiter
///   -f FIELDS           Select only these fields (1-indexed)
///   -b BYTES            Select only these bytes (1-indexed, same as -c for ASCII)
///   -c CHARS            Select only these characters (1-indexed)
///   -s                  Only print lines containing delimiter (with -f)
///   --complement        Complement the selection
///   --output-delimiter  Use STRING as output delimiter
pub struct Cut;

#[derive(PartialEq)]
enum CutMode {
    Fields,
    Chars,
}

#[async_trait]
impl Builtin for Cut {
    async fn execute(&self, ctx: Context<'_>) -> Result<ExecResult> {
        if let Some(r) = super::check_help_version(
            ctx.args,
            "Usage: cut OPTION... [FILE]...\nPrint selected parts of lines from each FILE to standard output.\n\n  -d DELIM\t\tuse DELIM instead of TAB for field delimiter\n  -f FIELDS\t\tselect only these fields\n  -b BYTES\t\tselect only these bytes\n  -c CHARS\t\tselect only these characters\n  -s\t\t\tonly print lines containing delimiter\n  -z\t\t\tline delimiter is NUL, not newline\n  --complement\t\tcomplement the selection\n  --output-delimiter=STRING\tuse STRING as output delimiter\n  --help\t\tdisplay this help and exit\n  --version\t\toutput version information and exit\n",
            Some("cut (bashkit) 0.1"),
        ) {
            return Ok(r);
        }
        let mut delimiter = '\t';
        let mut spec = String::new();
        let mut mode = CutMode::Fields;
        let mut complement = false;
        let mut only_delimited = false;
        let mut zero_terminated = false;
        let mut output_delimiter: Option<String> = None;
        let mut files = Vec::new();

        // Parse arguments
        let mut p = super::arg_parser::ArgParser::new(ctx.args);
        while !p.is_done() {
            if let Some(val) = p.flag_value_opt("-d") {
                delimiter = val.chars().next().unwrap_or('\t');
            } else if let Some(val) = p.flag_value_opt("-f") {
                spec = val.to_string();
                mode = CutMode::Fields;
            } else if let Some(val) = p.flag_value_opt("-c") {
                spec = val.to_string();
                mode = CutMode::Chars;
            } else if let Some(val) = p.flag_value_opt("-b") {
                spec = val.to_string();
                mode = CutMode::Chars;
            } else if p.flag("-s") {
                only_delimited = true;
            } else if p.flag("-z") {
                zero_terminated = true;
            } else if p.flag("--complement") {
                complement = true;
            } else if let Some(val) = p
                .current()
                .and_then(|s| s.strip_prefix("--output-delimiter="))
            {
                output_delimiter = Some(val.to_string());
                p.advance();
            } else if p.flag("--output-delimiter") {
                if let Some(val) = p.positional() {
                    output_delimiter = Some(val.to_string());
                }
            } else if let Some(arg) = p.current().filter(|s| !s.starts_with('-')) {
                files.push(arg.to_string());
                p.advance();
            } else {
                p.advance();
            }
        }

        if spec.is_empty() {
            return Ok(ExecResult::err(
                "cut: you must specify a list of fields\n".to_string(),
                1,
            ));
        }

        // Parse position specification (supports open-ended ranges like "3-" and "-3")
        let positions = parse_position_spec(&spec);
        let out_delim = output_delimiter.unwrap_or_else(|| delimiter.to_string());

        let process_line = |line: &str| -> Option<String> {
            match mode {
                CutMode::Chars => {
                    let chars: Vec<char> = line.chars().collect();
                    let total = chars.len();
                    let resolved = resolve_positions(&positions, total);
                    let selected: Vec<char> = if complement {
                        chars
                            .iter()
                            .enumerate()
                            .filter(|(i, _)| !resolved.contains(&(i + 1)))
                            .map(|(_, c)| *c)
                            .collect()
                    } else {
                        resolved
                            .iter()
                            .filter_map(|&p| chars.get(p - 1).copied())
                            .collect()
                    };
                    Some(selected.into_iter().collect())
                }
                CutMode::Fields => {
                    // -s: skip lines without delimiter
                    if only_delimited && !line.contains(delimiter) {
                        return None;
                    }
                    let parts: Vec<&str> = line.split(delimiter).collect();
                    let total = parts.len();
                    let resolved = resolve_positions(&positions, total);
                    let selected: Vec<&str> = if complement {
                        parts
                            .iter()
                            .enumerate()
                            .filter(|(i, _)| !resolved.contains(&(i + 1)))
                            .map(|(_, s)| *s)
                            .collect()
                    } else {
                        resolved
                            .iter()
                            .filter_map(|&f| parts.get(f - 1).copied())
                            .collect()
                    };
                    Some(selected.join(&out_delim))
                }
            }
        };

        let mut output = String::new();
        let line_sep = if zero_terminated { '\0' } else { '\n' };
        let out_sep = if zero_terminated { "\0" } else { "\n" };

        let process_input = |text: &str, output: &mut String| {
            for line in text.split(line_sep) {
                if line.is_empty() {
                    continue;
                }
                if let Some(result) = process_line(line) {
                    output.push_str(&result);
                    output.push_str(out_sep);
                }
            }
        };

        if files.is_empty() || files.iter().all(|f| f.as_str() == "-") {
            if let Some(stdin) = ctx.stdin {
                process_input(stdin, &mut output);
            }
        } else {
            for file in &files {
                if file.as_str() == "-" {
                    if let Some(stdin) = ctx.stdin {
                        process_input(stdin, &mut output);
                    }
                    continue;
                }

                let path = if file.starts_with('/') {
                    std::path::PathBuf::from(file)
                } else {
                    ctx.cwd.join(file)
                };

                let text = match read_text_file(&*ctx.fs, &path, "cut").await {
                    Ok(t) => t,
                    Err(e) => return Ok(e),
                };
                process_input(&text, &mut output);
            }
        }

        Ok(ExecResult::ok(output))
    }
}

/// Position in a cut specification — can be open-ended
#[derive(Debug, Clone)]
enum Position {
    Single(usize),
    Range(usize, usize),
    FromStart(usize), // -N (1 to N)
    ToEnd(usize),     // N- (N to end)
}

/// Parse a position specification like "1", "1,3", "1-3", "3-", "-3"
fn parse_position_spec(spec: &str) -> Vec<Position> {
    let mut positions = Vec::new();

    for part in spec.split(',') {
        if let Some((start, end)) = part.split_once('-') {
            if start.is_empty() {
                // -N
                if let Ok(n) = end.parse::<usize>() {
                    positions.push(Position::FromStart(n));
                }
            } else if end.is_empty() {
                // N-
                if let Ok(n) = start.parse::<usize>() {
                    positions.push(Position::ToEnd(n));
                }
            } else {
                // N-M
                let s: usize = start.parse().unwrap_or(1);
                let e: usize = end.parse().unwrap_or(s);
                positions.push(Position::Range(s, e));
            }
        } else if let Ok(f) = part.parse::<usize>()
            && f > 0
        {
            positions.push(Position::Single(f));
        }
    }

    positions
}

/// Resolve position specifications into concrete 1-indexed positions
fn resolve_positions(positions: &[Position], total: usize) -> Vec<usize> {
    let mut result = Vec::new();
    for pos in positions {
        match pos {
            Position::Single(n) => {
                if *n > 0 && *n <= total {
                    result.push(*n);
                }
            }
            Position::Range(s, e) => {
                let start = (*s).max(1);
                let end = (*e).min(total);
                for i in start..=end {
                    result.push(i);
                }
            }
            Position::FromStart(n) => {
                for i in 1..=(*n).min(total) {
                    result.push(i);
                }
            }
            Position::ToEnd(n) => {
                let start = (*n).max(1);
                for i in start..=total {
                    result.push(i);
                }
            }
        }
    }
    result.sort();
    result.dedup();
    result
}

/// The tr builtin - translate or delete characters.
///
/// Usage: tr [-d] [-s] [-c/-C] SET1 [SET2]
///
/// Options:
///   -d     Delete characters in SET1
///   -s     Squeeze repeated output characters in SET2 (or SET1 if no SET2)
///   -c/-C  Complement SET1 (use all chars NOT in SET1)
///
/// SET1 and SET2 can contain character ranges like a-z, A-Z, 0-9
/// and POSIX classes like [:lower:], [:upper:], [:digit:], [:alpha:],
/// [:alnum:], [:space:], [:blank:], [:punct:], [:xdigit:], [:print:], [:graph:]
pub struct Tr;

#[async_trait]
impl Builtin for Tr {
    async fn execute(&self, ctx: Context<'_>) -> Result<ExecResult> {
        if let Some(r) = super::check_help_version(
            ctx.args,
            "Usage: tr [OPTION]... SET1 [SET2]\nTranslate, squeeze, and/or delete characters from standard input.\n\n  -d\t\tdelete characters in SET1\n  -s\t\tsqueeze repeated output characters\n  -c, -C\tcomplement SET1\n  --help\tdisplay this help and exit\n  --version\toutput version information and exit\n",
            Some("tr (bashkit) 0.1"),
        ) {
            return Ok(r);
        }
        let mut delete = false;
        let mut squeeze = false;
        let mut complement = false;

        // Parse flags (can be combined like -ds, -cd)
        let mut non_flag_args: Vec<String> = Vec::new();
        let mut p = super::arg_parser::ArgParser::new(ctx.args);
        while !p.is_done() {
            let flags = p.bool_flags("dscC");
            if !flags.is_empty() {
                for ch in flags {
                    match ch {
                        'd' => delete = true,
                        's' => squeeze = true,
                        'c' | 'C' => complement = true,
                        _ => {}
                    }
                }
            } else if let Some(arg) = p.positional() {
                non_flag_args.push(arg.to_string());
            }
        }

        if non_flag_args.is_empty() {
            return Ok(ExecResult::err("tr: missing operand\n".to_string(), 1));
        }

        let mut set1 = expand_char_set(&non_flag_args[0]);
        if complement {
            // Complement: use all byte-range chars (0-255) NOT in set1.
            // Covers full Latin-1 range so binary data from /dev/urandom
            // (where each byte maps to one char) is handled correctly.
            let original = set1.clone();
            set1 = (0u16..=255)
                .map(|b| b as u8 as char)
                .filter(|c| !original.contains(c))
                .collect();
        }

        let stdin = ctx.stdin.unwrap_or("");

        let result = if delete && squeeze {
            // -ds: delete SET1 chars, then squeeze SET2 chars
            let set2 = if non_flag_args.len() >= 2 {
                expand_char_set(&non_flag_args[1])
            } else {
                set1.clone()
            };
            let after_delete: String = stdin.chars().filter(|c| !set1.contains(c)).collect();
            squeeze_chars(&after_delete, &set2)
        } else if delete {
            stdin
                .chars()
                .filter(|c| !set1.contains(c))
                .collect::<String>()
        } else if squeeze && non_flag_args.len() < 2 {
            // -s with only SET1: squeeze characters in SET1
            squeeze_chars(stdin, &set1)
        } else {
            if non_flag_args.len() < 2 {
                return Ok(ExecResult::err(
                    "tr: missing operand after SET1\n".to_string(),
                    1,
                ));
            }

            let set2 = expand_char_set(&non_flag_args[1]);

            let translated: String = stdin
                .chars()
                .map(|c| {
                    if let Some(pos) = set1.iter().position(|&x| x == c) {
                        *set2.get(pos).or(set2.last()).unwrap_or(&c)
                    } else {
                        c
                    }
                })
                .collect();

            if squeeze {
                squeeze_chars(&translated, &set2)
            } else {
                translated
            }
        };

        Ok(ExecResult::ok(result))
    }
}

/// Squeeze repeated consecutive characters that are in the given set
fn squeeze_chars(s: &str, set: &[char]) -> String {
    let mut result = String::with_capacity(s.len());
    let mut last_char: Option<char> = None;

    for c in s.chars() {
        if set.contains(&c) && last_char == Some(c) {
            continue; // skip repeated char in squeeze set
        }
        result.push(c);
        last_char = Some(c);
    }
    result
}

/// Expand a character set specification like "a-z" into a list of characters.
/// Supports POSIX character classes: [:lower:], [:upper:], [:digit:], [:alpha:], [:alnum:], [:space:]
fn expand_char_set(spec: &str) -> Vec<char> {
    let mut chars = Vec::new();
    let char_vec: Vec<char> = spec.chars().collect();
    let len = char_vec.len();
    let mut i = 0;

    while i < len {
        // Check for POSIX character class [:class:]
        if char_vec[i] == '['
            && i + 1 < len
            && char_vec[i + 1] == ':'
            && let Some(end) = spec[spec
                .char_indices()
                .nth(i + 2)
                .map_or(spec.len(), |(pos, _)| pos)..]
                .find(":]")
        {
            let class_start = spec
                .char_indices()
                .nth(i + 2)
                .map_or(spec.len(), |(pos, _)| pos);
            let class_name = &spec[class_start..class_start + end];
            match class_name {
                "lower" => chars.extend('a'..='z'),
                "upper" => chars.extend('A'..='Z'),
                "digit" => chars.extend('0'..='9'),
                "alpha" => {
                    chars.extend('a'..='z');
                    chars.extend('A'..='Z');
                }
                "alnum" => {
                    chars.extend('a'..='z');
                    chars.extend('A'..='Z');
                    chars.extend('0'..='9');
                }
                "space" => chars.extend([' ', '\t', '\n', '\r', '\x0b', '\x0c']),
                "blank" => chars.extend([' ', '\t']),
                "punct" => {
                    for code in 0x21u8..=0x7e {
                        let c = code as char;
                        if !c.is_ascii_alphanumeric() {
                            chars.push(c);
                        }
                    }
                }
                "xdigit" => {
                    chars.extend('0'..='9');
                    chars.extend('A'..='F');
                    chars.extend('a'..='f');
                }
                "print" => {
                    for code in 0x20u8..=0x7e {
                        chars.push(code as char);
                    }
                }
                "graph" => {
                    for code in 0x21u8..=0x7e {
                        chars.push(code as char);
                    }
                }
                "cntrl" => {
                    for code in 0u8..=0x1f {
                        chars.push(code as char);
                    }
                    chars.push(0x7f as char);
                }
                _ => {
                    chars.push('[');
                    i += 1;
                    continue;
                }
            }
            // Count chars in the class spec to advance properly
            let class_char_count = class_name.chars().count();
            i += 2 + class_char_count + 2; // skip past [: + class + :]
            continue;
        }

        let c = char_vec[i];
        // Check for range like a-z
        if i + 2 < len && char_vec[i + 1] == '-' {
            let end_char = char_vec[i + 2];
            let start = c as u32;
            let end = end_char as u32;
            for code in start..=end {
                if let Some(ch) = char::from_u32(code) {
                    chars.push(ch);
                }
            }
            i += 3;
        } else if i + 1 == len - 1 && char_vec[i + 1] == '-' {
            // Trailing dash
            chars.push(c);
            chars.push('-');
            i += 2;
        } else {
            // Handle escape sequences
            if c == '\\' && i + 1 < len {
                match char_vec[i + 1] {
                    'n' => {
                        chars.push('\n');
                        i += 2;
                        continue;
                    }
                    't' => {
                        chars.push('\t');
                        i += 2;
                        continue;
                    }
                    '0' => {
                        chars.push('\0');
                        i += 2;
                        continue;
                    }
                    '\\' => {
                        chars.push('\\');
                        i += 2;
                        continue;
                    }
                    _ => {}
                }
            }
            chars.push(c);
            i += 1;
        }
    }

    chars
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::collections::HashMap;
    use std::path::PathBuf;
    use std::sync::Arc;

    use crate::fs::InMemoryFs;

    async fn run_cut(args: &[&str], stdin: Option<&str>) -> ExecResult {
        let fs = Arc::new(InMemoryFs::new());
        let mut variables = HashMap::new();
        let env = HashMap::new();
        let mut cwd = PathBuf::from("/");

        let args: Vec<String> = args.iter().map(|s| s.to_string()).collect();
        let ctx = Context {
            args: &args,
            env: &env,
            variables: &mut variables,
            cwd: &mut cwd,
            fs,
            stdin,
            #[cfg(feature = "http_client")]
            http_client: None,
            #[cfg(feature = "git")]
            git_client: None,
            #[cfg(feature = "ssh")]
            ssh_client: None,
            shell: None,
        };

        Cut.execute(ctx).await.unwrap()
    }

    async fn run_tr(args: &[&str], stdin: Option<&str>) -> ExecResult {
        let fs = Arc::new(InMemoryFs::new());
        let mut variables = HashMap::new();
        let env = HashMap::new();
        let mut cwd = PathBuf::from("/");

        let args: Vec<String> = args.iter().map(|s| s.to_string()).collect();
        let ctx = Context {
            args: &args,
            env: &env,
            variables: &mut variables,
            cwd: &mut cwd,
            fs,
            stdin,
            #[cfg(feature = "http_client")]
            http_client: None,
            #[cfg(feature = "git")]
            git_client: None,
            #[cfg(feature = "ssh")]
            ssh_client: None,
            shell: None,
        };

        Tr.execute(ctx).await.unwrap()
    }

    #[tokio::test]
    async fn test_cut_single_field() {
        let result = run_cut(&["-d", ",", "-f", "2"], Some("a,b,c\n1,2,3\n")).await;
        assert_eq!(result.exit_code, 0);
        assert_eq!(result.stdout, "b\n2\n");
    }

    #[tokio::test]
    async fn test_cut_multiple_fields() {
        let result = run_cut(&["-d", ",", "-f", "1,3"], Some("a,b,c\n1,2,3\n")).await;
        assert_eq!(result.exit_code, 0);
        assert_eq!(result.stdout, "a,c\n1,3\n");
    }

    #[tokio::test]
    async fn test_cut_field_range() {
        let result = run_cut(&["-d", ",", "-f", "1-2"], Some("a,b,c,d\n")).await;
        assert_eq!(result.exit_code, 0);
        assert_eq!(result.stdout, "a,b\n");
    }

    #[tokio::test]
    async fn test_tr_lowercase_to_uppercase() {
        let result = run_tr(&["a-z", "A-Z"], Some("hello world")).await;
        assert_eq!(result.exit_code, 0);
        assert_eq!(result.stdout, "HELLO WORLD");
    }

    #[tokio::test]
    async fn test_tr_delete() {
        let result = run_tr(&["-d", "aeiou"], Some("hello world")).await;
        assert_eq!(result.exit_code, 0);
        assert_eq!(result.stdout, "hll wrld");
    }

    #[tokio::test]
    async fn test_tr_single_char() {
        let result = run_tr(&[":", "-"], Some("a:b:c")).await;
        assert_eq!(result.exit_code, 0);
        assert_eq!(result.stdout, "a-b-c");
    }

    #[test]
    fn test_expand_char_set() {
        assert_eq!(expand_char_set("abc"), vec!['a', 'b', 'c']);
        assert_eq!(expand_char_set("a-c"), vec!['a', 'b', 'c']);
        assert_eq!(expand_char_set("0-2"), vec!['0', '1', '2']);
    }

    #[test]
    fn test_expand_char_class_lower() {
        let lower = expand_char_set("[:lower:]");
        assert_eq!(lower.len(), 26);
        assert_eq!(lower[0], 'a');
        assert_eq!(lower[25], 'z');
    }

    #[test]
    fn test_expand_char_class_upper() {
        let upper = expand_char_set("[:upper:]");
        assert_eq!(upper.len(), 26);
        assert_eq!(upper[0], 'A');
        assert_eq!(upper[25], 'Z');
    }

    #[tokio::test]
    async fn test_tr_char_class_lower_to_upper() {
        let result = run_tr(&["[:lower:]", "[:upper:]"], Some("hello world\n")).await;
        assert_eq!(result.exit_code, 0);
        assert_eq!(result.stdout, "HELLO WORLD\n");
    }

    #[test]
    fn test_parse_position_spec() {
        // Resolved against 10 total positions
        let resolve = |spec: &str| resolve_positions(&parse_position_spec(spec), 10);
        assert_eq!(resolve("1"), vec![1]);
        assert_eq!(resolve("1,3"), vec![1, 3]);
        assert_eq!(resolve("1-3"), vec![1, 2, 3]);
        assert_eq!(resolve("1,3-5"), vec![1, 3, 4, 5]);
        assert_eq!(resolve("3-"), vec![3, 4, 5, 6, 7, 8, 9, 10]);
        assert_eq!(resolve("-3"), vec![1, 2, 3]);
    }

    #[tokio::test]
    async fn test_cut_char_mode() {
        let result = run_cut(&["-c", "1-5"], Some("hello world\n")).await;
        assert_eq!(result.exit_code, 0);
        assert_eq!(result.stdout, "hello\n");
    }

    #[tokio::test]
    async fn test_cut_complement() {
        let result = run_cut(&["-d", ",", "--complement", "-f", "2"], Some("a,b,c,d\n")).await;
        assert_eq!(result.exit_code, 0);
        assert_eq!(result.stdout, "a,c,d\n");
    }

    #[tokio::test]
    async fn test_cut_only_delimited() {
        let result = run_cut(
            &["-d", ",", "-f", "1", "-s"],
            Some("a,b,c\nno delim\nx,y\n"),
        )
        .await;
        assert_eq!(result.exit_code, 0);
        assert_eq!(result.stdout, "a\nx\n");
    }

    #[tokio::test]
    async fn test_tr_squeeze() {
        let result = run_tr(&["-s", "eol "], Some("heeelllo   wooorld\n")).await;
        assert_eq!(result.exit_code, 0);
        assert_eq!(result.stdout, "helo world\n");
    }

    #[tokio::test]
    async fn test_tr_complement_delete() {
        let result = run_tr(&["-cd", "0-9\n"], Some("hello123\n")).await;
        assert_eq!(result.exit_code, 0);
        assert_eq!(result.stdout, "123\n");
    }

    #[tokio::test]
    async fn test_tr_complement_uppercase_c() {
        // -C is POSIX alias for -c (complement)
        let result = run_tr(&["-Cd", "0-9\n"], Some("hello123\n")).await;
        assert_eq!(result.exit_code, 0);
        assert_eq!(result.stdout, "123\n");
    }

    #[tokio::test]
    async fn test_tr_combined_flags_ds() {
        // -ds: delete SET1 chars, then squeeze SET2 chars
        let result = run_tr(&["-ds", "aeiou", " "], Some("the  quick  fox\n")).await;
        assert_eq!(result.exit_code, 0);
        assert_eq!(result.stdout, "th qck fx\n");
    }

    #[test]
    fn test_expand_char_class_punct() {
        let punct = expand_char_set("[:punct:]");
        assert!(punct.contains(&'!'));
        assert!(punct.contains(&'.'));
        assert!(punct.contains(&','));
        assert!(punct.contains(&'@'));
        assert!(punct.contains(&'#'));
        assert!(!punct.contains(&'a'));
        assert!(!punct.contains(&'0'));
        assert!(!punct.contains(&' '));
    }

    #[test]
    fn test_expand_char_class_xdigit() {
        let xdigit = expand_char_set("[:xdigit:]");
        assert_eq!(xdigit.len(), 22); // 0-9 + A-F + a-f
        assert!(xdigit.contains(&'0'));
        assert!(xdigit.contains(&'9'));
        assert!(xdigit.contains(&'A'));
        assert!(xdigit.contains(&'F'));
        assert!(xdigit.contains(&'a'));
        assert!(xdigit.contains(&'f'));
        assert!(!xdigit.contains(&'G'));
        assert!(!xdigit.contains(&'g'));
    }

    #[test]
    fn test_expand_char_class_digit() {
        let digit = expand_char_set("[:digit:]");
        assert_eq!(digit.len(), 10);
        assert_eq!(digit[0], '0');
        assert_eq!(digit[9], '9');
    }

    #[test]
    fn test_expand_char_class_alpha() {
        let alpha = expand_char_set("[:alpha:]");
        assert_eq!(alpha.len(), 52);
        assert!(alpha.contains(&'a'));
        assert!(alpha.contains(&'z'));
        assert!(alpha.contains(&'A'));
        assert!(alpha.contains(&'Z'));
    }

    #[test]
    fn test_expand_char_class_alnum() {
        let alnum = expand_char_set("[:alnum:]");
        assert_eq!(alnum.len(), 62);
        assert!(alnum.contains(&'a'));
        assert!(alnum.contains(&'0'));
        assert!(alnum.contains(&'Z'));
    }

    #[test]
    fn test_expand_char_class_space() {
        let space = expand_char_set("[:space:]");
        assert!(space.contains(&' '));
        assert!(space.contains(&'\t'));
        assert!(space.contains(&'\n'));
        assert!(space.contains(&'\r'));
    }

    #[test]
    fn test_expand_char_class_blank() {
        let blank = expand_char_set("[:blank:]");
        assert_eq!(blank.len(), 2);
        assert!(blank.contains(&' '));
        assert!(blank.contains(&'\t'));
    }

    #[test]
    fn test_expand_char_class_cntrl() {
        let cntrl = expand_char_set("[:cntrl:]");
        assert!(cntrl.contains(&'\0'));
        assert!(cntrl.contains(&'\x1f'));
        assert!(cntrl.contains(&'\x7f'));
        assert!(!cntrl.contains(&' '));
    }

    #[tokio::test]
    async fn test_tr_delete_punct() {
        let result = run_tr(&["-d", "[:punct:]"], Some("hello, world!\n")).await;
        assert_eq!(result.exit_code, 0);
        assert_eq!(result.stdout, "hello world\n");
    }

    #[tokio::test]
    async fn test_tr_squeeze_spaces() {
        let result = run_tr(&["-s", "[:space:]"], Some("hello   world\n\n")).await;
        assert_eq!(result.exit_code, 0);
        assert_eq!(result.stdout, "hello world\n");
    }

    #[tokio::test]
    async fn test_tr_translate_with_squeeze() {
        let result = run_tr(&["-s", "a-z", "A-Z"], Some("aabbcc\n")).await;
        assert_eq!(result.exit_code, 0);
        assert_eq!(result.stdout, "ABC\n");
    }

    #[tokio::test]
    async fn test_cut_byte_mode() {
        // -b is alias for -c
        let result = run_cut(&["-b", "1-5"], Some("hello world\n")).await;
        assert_eq!(result.exit_code, 0);
        assert_eq!(result.stdout, "hello\n");
    }

    #[tokio::test]
    async fn test_cut_byte_mode_inline() {
        let result = run_cut(&["-b1,3,5"], Some("hello\n")).await;
        assert_eq!(result.exit_code, 0);
        assert_eq!(result.stdout, "hlo\n");
    }

    #[tokio::test]
    async fn test_tr_complement_squeeze() {
        // -cs: complement SET1, then squeeze result chars in SET2
        let result = run_tr(&["-cs", "[:alpha:]", "\n"], Some("hello 123 world\n")).await;
        assert_eq!(result.exit_code, 0);
        assert_eq!(result.stdout, "hello\nworld\n");
    }

    #[tokio::test]
    async fn test_tr_multibyte_utf8() {
        // Translate multi-byte chars: ä -> x
        let result = run_tr(&["ä", "x"], Some("hällo\n")).await;
        assert_eq!(result.exit_code, 0);
        assert_eq!(result.stdout, "hxllo\n");
    }

    #[tokio::test]
    async fn test_tr_multibyte_utf8_range() {
        // Multi-byte char in set preserved (not corrupted)
        let result = run_tr(&["über", "UBER"], Some("über\n")).await;
        assert_eq!(result.exit_code, 0);
        assert_eq!(result.stdout, "UBER\n");
    }

    #[tokio::test]
    async fn test_cut_multibyte_utf8_chars() {
        // cut -c with multi-byte input selects chars not bytes
        let result = run_cut(&["-c", "1-3"], Some("äöü\n")).await;
        assert_eq!(result.exit_code, 0);
        assert_eq!(result.stdout, "äöü\n");
    }

    #[test]
    fn test_expand_char_set_multibyte() {
        let chars = expand_char_set("äöü");
        assert_eq!(chars, vec!['ä', 'ö', 'ü']);
    }
}