mxsh 0.2.0

Embeddable POSIX-style shell parser and runtime
Documentation
#![cfg(all(feature = "embed", feature = "test-support"))]

use std::fs;
use std::panic::{AssertUnwindSafe, catch_unwind, resume_unwind};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{LazyLock, Mutex};

use mxsh::ShellBuilder;
use mxsh::ast::{
    AndOrList, Assignment, Command, CommandList, CompoundCommand, IoRedirect, IoRedirectOp,
    ParameterOp, Pipeline, Position, Program, Range, SimpleCommand, Word,
};
use mxsh::embed::StdioConfig;
use mxsh::runtime::testing::{InMemoryRuntime, StringStdioOut};
use proptest::prelude::*;

const EXEC_QUOTING_ATOMS: &[&str] = &[
    "alpha", "beta", "0", "-", "_", ".", "/", "+", " ", "  ", "\t", "\n", "#", ";", "&", "|", "(",
    ")", "{", "}", "<", ">", "$", "`", "\\", "\"", "'", "~", "=", ":",
];
const GLOB_LITERALS: &[&str] = &[
    "*",
    "?.txt",
    "*.txt",
    "alpha?",
    "[ab]eta",
    "dir/*",
    "file[0-9]",
];

fn exec_hostile_literal() -> impl Strategy<Value = String> {
    prop::collection::vec(prop::sample::select(EXEC_QUOTING_ATOMS), 1..=6)
        .prop_map(|parts| parts.into_iter().collect::<Vec<_>>().join(""))
}

fn exec_glob_literal() -> impl Strategy<Value = String> {
    prop::sample::select(GLOB_LITERALS).prop_map(str::to_string)
}

fn string_word(value: &str, single_quoted: bool, split_fields: bool) -> Word {
    Word::string(value, single_quoted, split_fields, None, Range::default())
}

fn bare_word(value: &str) -> Word {
    string_word(value, false, true)
}

fn single_quoted_word(value: &str) -> Word {
    string_word(value, true, false)
}

fn double_quoted_word(value: &str) -> Word {
    Word::list(
        vec![string_word(value, false, true)],
        true,
        Range::default(),
    )
}

fn parameter_word(name: &str) -> Word {
    Word::parameter(
        name,
        ParameterOp::None,
        false,
        None,
        Position::default(),
        None,
        Range::default(),
    )
}

fn simple_command(name: &str, arguments: Vec<Word>) -> Command {
    Command::Simple(SimpleCommand::new(
        Some(bare_word(name)),
        arguments,
        Vec::new(),
        Vec::new(),
    ))
}

fn command_list(command: Command) -> CommandList {
    CommandList::new(
        AndOrList::Pipeline(Pipeline::new(vec![command], false, Default::default())),
        false,
        Default::default(),
    )
}

fn printf_program(arguments: Vec<Word>) -> Program {
    let mut argv = Vec::with_capacity(arguments.len() + 1);
    argv.push(single_quoted_word("[%s]\\n"));
    argv.extend(arguments);
    Program::new(vec![command_list(simple_command("printf", argv))])
}

fn expected_printf_output(arguments: &[String]) -> String {
    arguments
        .iter()
        .map(|argument| format!("[{argument}]\n"))
        .collect()
}

fn eval_canonical(program: &Program) -> (String, i32, String, String) {
    let canonical = program.to_canonical();
    let stdout = StringStdioOut::new();
    let stderr = StringStdioOut::new();
    let mut shell = ShellBuilder::new()
        .stdio(StdioConfig {
            stdout: stdout.fd(),
            stderr: stderr.fd(),
            ..StdioConfig::default()
        })
        .new_session()
        .expect("session should build");
    let mut runtime = InMemoryRuntime::new();
    let result = shell.run(&mut runtime, &canonical);
    (canonical, result.status, stdout.collect(), stderr.collect())
}

fn temp_path(label: &str) -> std::path::PathBuf {
    static NEXT_ID: AtomicUsize = AtomicUsize::new(1);
    std::env::temp_dir().join(format!(
        "mxsh-{label}-{}-{}",
        std::process::id(),
        NEXT_ID.fetch_add(1, Ordering::Relaxed)
    ))
}

fn with_glob_fixture_cwd<T>(f: impl FnOnce() -> T) -> T {
    static CWD_LOCK: LazyLock<Mutex<()>> = LazyLock::new(|| Mutex::new(()));

    let _guard = CWD_LOCK.lock().expect("cwd lock should not be poisoned");
    let dir = temp_path("canonical-glob");
    fs::create_dir_all(dir.join("dir")).expect("glob fixture directory should exist");
    for path in [
        dir.join("a.txt"),
        dir.join("notes.txt"),
        dir.join("alpha1"),
        dir.join("aeta"),
        dir.join("beta"),
        dir.join("file7"),
        dir.join("dir").join("one"),
    ] {
        fs::write(path, "").expect("glob fixture file should be writable");
    }

    let cwd = std::env::current_dir().expect("current directory should be readable");
    std::env::set_current_dir(&dir).expect("glob fixture cwd should be enterable");
    let result = catch_unwind(AssertUnwindSafe(f));
    std::env::set_current_dir(cwd).expect("current directory should be restorable");
    let _ = fs::remove_dir_all(&dir);

    match result {
        Ok(value) => value,
        Err(payload) => resume_unwind(payload),
    }
}

#[test]
fn canonical_strip_tabs_heredoc_preserves_literal_tab_payload() {
    let read = Command::Simple(SimpleCommand::new(
        Some(bare_word("read")),
        vec![bare_word("-r"), bare_word("line")],
        Vec::new(),
        vec![Assignment::new(
            "IFS",
            string_word("", false, false),
            Default::default(),
        )],
    ));
    let printf = simple_command(
        "printf",
        vec![
            single_quoted_word("[%s]\\n"),
            Word::list(vec![parameter_word("line")], true, Range::default()),
        ],
    );
    let program = Program::new(vec![command_list(Command::BraceGroup(
        CompoundCommand::new(
            vec![command_list(read), command_list(printf)],
            vec![IoRedirect::new(
                None,
                IoRedirectOp::DLessDash,
                bare_word("EOF"),
                vec![string_word("\tkeep", false, false)],
                false,
            )],
        ),
    ))]);

    let (canonical, status, stdout, stderr) = eval_canonical(&program);

    assert!(canonical.contains("<< 'MXSH_HEREDOC_0'"));
    assert!(!canonical.contains("<<- 'MXSH_HEREDOC_0'"));
    assert_eq!(status, 0, "canonical program failed.\nstderr:\n{stderr}");
    assert_eq!(stdout, "[\tkeep]\n");
}

proptest! {
    #![proptest_config(ProptestConfig {
        cases: 24,
        failure_persistence: Some(Box::new(proptest::test_runner::FileFailurePersistence::WithSource("proptest-regressions"))),
        .. ProptestConfig::default()
    })]

    #[test]
    fn canonical_hostile_literals_execute_as_single_arguments(
        arguments in prop::collection::vec(exec_hostile_literal(), 1..=4),
    ) {
        let program = printf_program(arguments.iter().map(|argument| bare_word(argument)).collect());
        let expected = expected_printf_output(&arguments);
        let (canonical, status, stdout, stderr) = eval_canonical(&program);

        prop_assert_eq!(
            status,
            0,
            "canonical program failed.\ncanonical:\n{}\nstderr:\n{}",
            canonical,
            stderr
        );
        prop_assert_eq!(
            stdout,
            expected,
            "canonical quoting did not preserve literal argv boundaries.\ncanonical:\n{}\nstderr:\n{}",
            canonical,
            stderr
        );
    }

    #[test]
    fn canonical_composite_words_preserve_embedded_double_quoted_segments(
        value in exec_hostile_literal(),
    ) {
        let program = printf_program(vec![Word::list(
            vec![
                bare_word("pre"),
                double_quoted_word(&value),
                bare_word("post"),
            ],
            false,
            Range::default(),
        )]);
        let expected = format!("[pre{value}post]\n");
        let (canonical, status, stdout, stderr) = eval_canonical(&program);

        prop_assert_eq!(
            status,
            0,
            "canonical composite word failed.\ncanonical:\n{}\nstderr:\n{}",
            canonical,
            stderr
        );
        prop_assert_eq!(
            stdout,
            expected,
            "canonical composite word lost an embedded quoted segment.\ncanonical:\n{}\nstderr:\n{}",
            canonical,
            stderr
        );
    }

    #[test]
    fn canonical_glob_literals_do_not_expand_when_matches_exist(
        arguments in prop::collection::vec(exec_glob_literal(), 1..=4),
    ) {
        with_glob_fixture_cwd(|| -> Result<(), proptest::test_runner::TestCaseError> {
            let program = printf_program(arguments.iter().map(|argument| bare_word(argument)).collect());
            let expected = expected_printf_output(&arguments);
            let (canonical, status, stdout, stderr) = eval_canonical(&program);

            prop_assert_eq!(
                status,
                0,
                "canonical glob program failed.\ncanonical:\n{}\nstderr:\n{}",
                canonical,
                stderr
            );
            prop_assert_eq!(
                stdout,
                expected,
                "canonical quoting allowed glob expansion to leak through.\ncanonical:\n{}\nstderr:\n{}",
                canonical,
                stderr
            );
            Ok(())
        })?;
    }
}