mxsh 0.2.0

Embeddable POSIX-style shell parser and runtime
Documentation
#![cfg(feature = "parser")]

use mxsh::ast::{
    AndOrList, Command, CommandList, IoRedirect, IoRedirectOp, ParameterOp, Pipeline, Position,
    Program, Range, SimpleCommand, Word,
};
use mxsh::parser::Parser;
use proptest::prelude::*;

const LITERAL_ATOMS: &[&str] = &[
    "", "a", " ", "hello ", " world", "-", ".", "/tmp/", " = ", "$", "\\", "`", "\"", "'",
];

const PARAM_NAMES: &[&str] = &["X", "HOME", "PATH", "VALUE", "TMPDIR"];

#[derive(Clone, Debug)]
enum Piece {
    Literal(String),
    Parameter(String),
}

fn parse_program(script: &str) -> Result<Program, mxsh::parser::ParseError> {
    Parser::from_string(script).parse_program()
}

fn string_word(value: &str, split_fields: bool) -> Word {
    Word::string(value, false, split_fields, None, Range::default())
}

fn parameter_word(name: &str) -> Word {
    Word::parameter(
        name,
        ParameterOp::None,
        false,
        None,
        Position::default(),
        None,
        Range::default(),
    )
}

fn piece_strategy() -> impl Strategy<Value = Piece> {
    prop_oneof![
        prop::sample::select(LITERAL_ATOMS).prop_map(|atom| Piece::Literal((*atom).to_string())),
        prop::sample::select(PARAM_NAMES).prop_map(|name| Piece::Parameter((*name).to_string())),
    ]
}

fn literal_heredoc_pieces() -> impl Strategy<Value = Vec<Piece>> {
    prop::collection::vec(piece_strategy(), 1..=6).prop_filter(
        "need at least one parameter-shaped segment to exercise the regression",
        |pieces| {
            pieces
                .iter()
                .any(|piece| matches!(piece, Piece::Parameter(_)))
        },
    )
}

fn heredoc_body_word(pieces: &[Piece]) -> Word {
    let mut children = Vec::with_capacity(pieces.len());
    for piece in pieces {
        match piece {
            Piece::Literal(text) => children.push(string_word(text, false)),
            Piece::Parameter(name) => children.push(parameter_word(name)),
        }
    }
    match children.len() {
        0 => string_word("", false),
        1 => children
            .into_iter()
            .next()
            .unwrap_or_else(|| string_word("", false)),
        _ => Word::list(children, false, Range::default()),
    }
}

fn literal_body_text(pieces: &[Piece]) -> String {
    let mut text = String::new();
    for piece in pieces {
        match piece {
            Piece::Literal(literal) => text.push_str(literal),
            Piece::Parameter(name) => {
                text.push('$');
                text.push_str(name);
            }
        }
    }
    text
}

fn literal_heredoc_program(pieces: &[Piece]) -> Program {
    Program::new(vec![CommandList::new(
        AndOrList::Pipeline(Pipeline::new(
            vec![Command::Simple(SimpleCommand::new(
                Some(string_word("cat", true)),
                Vec::new(),
                vec![IoRedirect::new(
                    None,
                    IoRedirectOp::DLess,
                    string_word("EOF", true),
                    vec![heredoc_body_word(pieces)],
                    false,
                )],
                Vec::new(),
            ))],
            false,
            Default::default(),
        )),
        false,
        Default::default(),
    )])
}

fn first_redirect(program: &Program) -> &IoRedirect {
    let command_list = &program.body()[0];
    let AndOrList::Pipeline(pipeline) = command_list.and_or_list() else {
        panic!("expected a pipeline");
    };
    let Command::Simple(simple) = &pipeline.commands()[0] else {
        panic!("expected a simple command");
    };
    &simple.io_redirects()[0]
}

#[test]
fn canonical_strip_tabs_heredoc_preserves_tab_prefixed_lines() {
    let original = Program::new(vec![CommandList::new(
        AndOrList::Pipeline(Pipeline::new(
            vec![Command::Simple(SimpleCommand::new(
                Some(string_word("cat", true)),
                Vec::new(),
                vec![IoRedirect::new(
                    None,
                    IoRedirectOp::DLessDash,
                    string_word("EOF", true),
                    vec![string_word("\tkeep", false)],
                    false,
                )],
                Vec::new(),
            ))],
            false,
            Default::default(),
        )),
        false,
        Default::default(),
    )]);

    let canonical = original.to_canonical();
    assert_eq!(
        canonical,
        "cat << 'MXSH_HEREDOC_0'\n\tkeep\nMXSH_HEREDOC_0\n"
    );

    let reparsed =
        parse_program(&canonical).unwrap_or_else(|err| panic!("canonical parse failed: {err:?}"));
    let redirect = first_redirect(&reparsed);
    assert_eq!(redirect.op(), IoRedirectOp::DLess);
    assert!(!redirect.here_document_expand());
    assert_eq!(redirect.here_document().len(), 1);
    assert_eq!(
        redirect.here_document()[0].as_str().as_deref(),
        Some("\tkeep")
    );
}

proptest! {
    #![proptest_config(ProptestConfig {
        cases: 32,
        failure_persistence: None,
        .. ProptestConfig::default()
    })]

    #[test]
    fn canonical_literal_heredoc_reparse_preserves_literal_mode(pieces in literal_heredoc_pieces()) {
        let original = literal_heredoc_program(&pieces);
        let expected_body = literal_body_text(&pieces);

        let canonical = original.to_canonical();
        prop_assert!(
            canonical.contains("<< 'MXSH_HEREDOC_"),
            "canonical literal heredoc must quote its synthetic delimiter.\ncanonical:\n{}",
            canonical
        );

        let reparsed = parse_program(&canonical)
            .unwrap_or_else(|err| panic!("canonical parse failed: {err:?}\ncanonical:\n{canonical}"));
        let redirect = first_redirect(&reparsed);

        prop_assert!(
            !redirect.here_document_expand(),
            "literal heredoc mode was lost during canonicalization.\ncanonical:\n{}",
            canonical
        );
        prop_assert_eq!(redirect.here_document().len(), 1);
        let reparsed_body = redirect.here_document()[0].as_str().map(|body| body.into_owned());
        prop_assert_eq!(
            reparsed_body.as_deref(),
            Some(expected_body.as_str()),
            "literal heredoc body changed during canonicalization.\ncanonical:\n{}",
            canonical
        );
    }
}