mxsh 0.2.0

Embeddable POSIX-style shell parser and runtime
Documentation
#![cfg(all(
    feature = "cli",
    feature = "embed",
    feature = "test-support",
    feature = "unix-runtime"
))]

mod support;

use std::collections::HashSet;
use std::fmt::Write as _;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::OnceLock;
use std::time::Duration;

use support::{
    append_read_command, append_read_results, read_var_names, run_shell_with_timeout,
    semantic_output, shell_quote,
};

const TOKENS: &[&str] = &[
    "", "a", " ", "\t", ",", "\\", "\\ ", "\\,", "a ", " a", "a,", ",a",
];
const MATRIX_TOKENS: &[&str] = &["", "a", " ", "\t", ",", "\\"];
const SHELL_TIMEOUT: Duration = Duration::from_secs(180);

fn token_label(tokens: &[&str], terminated_by_newline: bool) -> String {
    let mut label = String::new();
    for (idx, token) in tokens.iter().enumerate() {
        if idx > 0 {
            label.push_str(" x ");
        }
        write!(&mut label, "{:?}", token).unwrap();
    }
    write!(&mut label, " newline={terminated_by_newline}").unwrap();
    label
}

#[derive(Clone)]
struct Case {
    label: String,
}

fn case_dir() -> &'static Path {
    static CASE_DIR: OnceLock<PathBuf> = OnceLock::new();
    CASE_DIR
        .get_or_init(|| {
            let dir = std::env::temp_dir().join(format!("mxsh-read-table-{}", std::process::id()));
            fs::create_dir_all(&dir)
                .unwrap_or_else(|err| panic!("failed to create read_table case dir: {err}"));
            dir
        })
        .as_path()
}

fn push_case(
    cases: &mut Vec<Case>,
    seen: &mut HashSet<String>,
    next_idx: &mut usize,
    tokens: &[&str],
    terminated: bool,
) {
    let stdin = format!("{}{}", tokens.concat(), if terminated { "\n" } else { "" });
    if !seen.insert(stdin.clone()) {
        return;
    }
    // Pre-materialize each stdin payload so the shell can use `< file` instead of a pipeline.
    let path = case_dir().join(next_idx.to_string());
    fs::write(&path, stdin.as_bytes())
        .unwrap_or_else(|err| panic!("failed to write read_table case {}: {err}", *next_idx));
    cases.push(Case {
        label: token_label(tokens, terminated),
    });
    *next_idx += 1;
}

fn push_case_products(
    cases: &mut Vec<Case>,
    seen: &mut HashSet<String>,
    next_idx: &mut usize,
    tokens: &[&'static str],
    width: usize,
    terminated: bool,
) {
    fn recurse(
        cases: &mut Vec<Case>,
        seen: &mut HashSet<String>,
        next_idx: &mut usize,
        tokens: &[&'static str],
        remaining: usize,
        terminated: bool,
        current: &mut Vec<&'static str>,
    ) {
        if remaining == 0 {
            push_case(cases, seen, next_idx, current, terminated);
            return;
        }
        for &token in tokens {
            current.push(token);
            recurse(
                cases,
                seen,
                next_idx,
                tokens,
                remaining - 1,
                terminated,
                current,
            );
            current.pop();
        }
    }

    let mut current = Vec::with_capacity(width);
    recurse(
        cases,
        seen,
        next_idx,
        tokens,
        width,
        terminated,
        &mut current,
    );
}

fn build_cases() -> Vec<Case> {
    let mut cases = Vec::new();
    let mut seen = HashSet::new();
    let mut next_idx = 0;
    for terminated in [false, true] {
        // Composite fragments like "a " or "\\," are already covered by concatenating the core
        // atoms below, so only take the Cartesian products over the smallest token set needed to
        // exercise empty, data, IFS whitespace, non-whitespace IFS, and escape interactions.
        for (tokens, width) in [(TOKENS, 1usize), (MATRIX_TOKENS, 2), (MATRIX_TOKENS, 3)] {
            push_case_products(
                &mut cases,
                &mut seen,
                &mut next_idx,
                tokens,
                width,
                terminated,
            );
        }
    }
    cases
}

fn cases() -> &'static [Case] {
    static CASES: OnceLock<Vec<Case>> = OnceLock::new();
    CASES.get_or_init(build_cases).as_slice()
}

fn read_script(cases: &[Case], ifs: &str, raw_mode: bool, var_count: usize) -> String {
    let var_names = read_var_names(var_count);
    let mut script = format!("CASE_DIR={}\n", shell_quote(&case_dir().to_string_lossy()));
    script.push_str("run_case() { idx=$1; printf '__CASE__%s\\n' \"$idx\"; unset X Y Z; ");
    append_read_command(&mut script, ifs, raw_mode, var_names);
    script.push_str(" < \"$CASE_DIR/$idx\"");
    append_read_results(&mut script, var_names);
    script.push_str("; }\n");
    for idx in 0..cases.len() {
        script.push_str(&format!("run_case {idx}\n"));
    }
    script
}

fn parse_cases(output: &str) -> Vec<String> {
    let mut cases = Vec::new();
    let mut current = String::new();
    for line in output.lines() {
        if line.starts_with("__CASE__") {
            if !current.is_empty() {
                cases.push(std::mem::take(&mut current));
            }
            continue;
        }
        current.push_str(line);
        current.push('\n');
    }
    if !current.is_empty() {
        cases.push(current);
    }
    cases
}

fn check_config(cases: &[Case], ifs: &str, raw_mode: bool, var_count: usize) {
    let script = read_script(cases, ifs, raw_mode, var_count);
    let mxsh = run_shell_with_timeout("mxsh", &["-s"], &script, SHELL_TIMEOUT);
    let sh = run_shell_with_timeout("/bin/sh", &["-s"], &script, SHELL_TIMEOUT);
    let (mxsh_status, mxsh_stdout) = semantic_output(&mxsh);
    let (sh_status, sh_stdout) = semantic_output(&sh);
    let config_context = format!(
        "ifs={ifs:?} raw_mode={raw_mode} var_count={var_count} stderr(mxsh)={:?} stderr(sh)={:?}",
        String::from_utf8_lossy(&mxsh.stderr),
        String::from_utf8_lossy(&sh.stderr),
    );
    assert_eq!(mxsh_status, sh_status, "config {config_context}",);

    let mxsh_cases = parse_cases(&mxsh_stdout);
    let sh_cases = parse_cases(&sh_stdout);
    assert_eq!(mxsh_cases.len(), cases.len());
    assert_eq!(sh_cases.len(), cases.len());
    for (idx, case) in cases.iter().enumerate() {
        assert_eq!(
            mxsh_cases[idx], sh_cases[idx],
            "tokens={} {}",
            case.label, config_context,
        );
    }
}

fn run_config(ifs: &str, raw_mode: bool, var_count: usize) {
    check_config(cases(), ifs, raw_mode, var_count);
}

macro_rules! read_table_var_tests {
    ($ifs:expr, $raw:expr) => {
        #[test]
        fn vars1() {
            run_config($ifs, $raw, 1);
        }

        #[test]
        fn vars2() {
            run_config($ifs, $raw, 2);
        }

        #[test]
        fn vars3() {
            run_config($ifs, $raw, 3);
        }
    };
}

macro_rules! read_table_ifs_tests {
    ($module:ident, $ifs:expr) => {
        mod $module {
            use super::*;

            read_table_var_tests!($ifs, false);

            mod raw {
                use super::*;

                read_table_var_tests!($ifs, true);
            }
        }
    };
}

read_table_ifs_tests!(default_ifs, " \t\n");
read_table_ifs_tests!(space_ifs, " ");
read_table_ifs_tests!(comma_ifs, ",");
read_table_ifs_tests!(empty_ifs, "");