#![cfg(feature = "parser")]
use mxsh::ast::{
AndOrBinary, AndOrList, ArithmExpr, Command, CommandList, Pipeline, Program, Range,
SimpleCommand, Word,
};
use mxsh::parser::Parser;
use proptest::prelude::*;
const BARE_PLAIN_ATOMS: &[&str] = &[
"alpha", "beta", "gamma", "0", "9", "_", ".", "/", "-", "+", "~", "path", "file",
];
const SINGLE_QUOTED_ATOMS: &[&str] = &[
"",
"alpha",
"beta gamma",
"$HOME",
"`cmd`",
"\\path",
"# ; [] {}",
"*?[]",
"\"double\"",
];
const DOUBLE_QUOTED_PLAIN_ATOMS: &[&str] = &[
"",
"alpha",
"beta gamma",
"'",
"# ; [] {}",
"*?[]",
"(",
")",
"tabs\tinside",
];
const RAW_ESCAPABLE_CHARS: &[char] = &[
' ', '\t', '#', ';', '&', '|', '(', ')', '<', '>', '\'', '"', '$', '`', '\\', '*', '?', '[',
']', '~', '=', ':',
];
const DOUBLE_QUOTED_ESCAPABLE_CHARS: &[char] = &['$', '`', '"', '\\'];
const DOUBLE_QUOTED_PRESERVED_BACKSLASH_CHARS: &[char] = &['a', ' ', ';', '#', '[', '*', '?', '~'];
#[derive(Clone, Debug)]
struct BareChunk {
source: String,
value: String,
escaped_whitespace: bool,
}
#[derive(Clone, Debug)]
struct DoubleQuotedChunk {
source: String,
value: String,
}
#[derive(Clone, Debug)]
struct BareSegment {
source: String,
value: String,
split_fields: bool,
}
#[derive(Clone, Debug)]
struct DoubleQuotedSegment {
source: String,
value: String,
}
#[derive(Clone, Debug)]
enum Segment {
Bare(BareSegment),
SingleQuoted(String),
DoubleQuoted(DoubleQuotedSegment),
}
#[derive(Clone, Debug)]
struct QuotationWord {
segments: Vec<Segment>,
}
impl BareSegment {
fn from_chunks(chunks: Vec<BareChunk>) -> Self {
let mut source = String::new();
let mut value = String::new();
let mut split_fields = true;
for chunk in chunks {
source.push_str(&chunk.source);
value.push_str(&chunk.value);
if chunk.escaped_whitespace {
split_fields = false;
}
}
Self {
source,
value,
split_fields,
}
}
}
impl DoubleQuotedSegment {
fn from_chunks(chunks: Vec<DoubleQuotedChunk>) -> Self {
let mut source = String::new();
let mut value = String::new();
for chunk in chunks {
source.push_str(&chunk.source);
value.push_str(&chunk.value);
}
Self { source, value }
}
}
impl Segment {
fn render(&self) -> String {
match self {
Segment::Bare(segment) => segment.source.clone(),
Segment::SingleQuoted(value) => format!("'{value}'"),
Segment::DoubleQuoted(segment) => format!("\"{}\"", segment.source),
}
}
fn literal_value(&self) -> &str {
match self {
Segment::Bare(segment) => &segment.value,
Segment::SingleQuoted(value) => value,
Segment::DoubleQuoted(segment) => &segment.value,
}
}
fn parsed_word(&self) -> Word {
match self {
Segment::Bare(segment) => Word::string(
&segment.value,
false,
segment.split_fields,
Some(segment.source.clone()),
Range::default(),
),
Segment::SingleQuoted(value) => Word::string(
value,
true,
false,
Some(format!("'{value}'")),
Range::default(),
),
Segment::DoubleQuoted(segment) => Word::list(
if segment.source.is_empty() {
Vec::new()
} else {
vec![Word::string(
&segment.value,
false,
true,
Some(segment.source.clone()),
Range::default(),
)]
},
true,
Range::default(),
),
}
}
}
impl QuotationWord {
fn render(&self) -> String {
self.segments.iter().map(Segment::render).collect()
}
fn literal_value(&self) -> String {
let mut value = String::new();
for segment in &self.segments {
value.push_str(segment.literal_value());
}
value
}
fn parsed_word(&self) -> Word {
let mut children = Vec::new();
for segment in &self.segments {
match segment {
Segment::Bare(bare) => match children.pop() {
Some(Word::String(string)) if !string.single_quoted() => {
let merged_source = string
.source()
.map(|source| {
let mut source = source.to_string();
source.push_str(&bare.source);
source
})
.or_else(|| Some(bare.source.clone()));
children.push(Word::string(
format!("{}{}", string.value(), bare.value),
false,
string.split_fields() && bare.split_fields,
merged_source,
Range::default(),
));
}
Some(previous) => {
children.push(previous);
children.push(segment.parsed_word());
}
None => children.push(segment.parsed_word()),
},
Segment::SingleQuoted(_) | Segment::DoubleQuoted(_) => {
children.push(segment.parsed_word());
}
}
}
match children.as_slice() {
[child] => child.clone(),
_ => Word::list(children, false, Range::default()),
}
}
fn ast_word(&self) -> Word {
strip_word_metadata(&self.parsed_word())
}
}
fn bare_chunk() -> BoxedStrategy<BareChunk> {
let plain = prop::sample::select(BARE_PLAIN_ATOMS).prop_map(|atom| BareChunk {
source: atom.to_string(),
value: atom.to_string(),
escaped_whitespace: false,
});
let escaped = prop::sample::select(RAW_ESCAPABLE_CHARS).prop_map(|ch| BareChunk {
source: format!("\\{ch}"),
value: ch.to_string(),
escaped_whitespace: ch.is_whitespace(),
});
prop_oneof![plain, escaped].boxed()
}
fn bare_leading_chunk() -> BoxedStrategy<BareChunk> {
let plain = prop::sample::select(BARE_PLAIN_ATOMS).prop_map(|atom| BareChunk {
source: atom.to_string(),
value: atom.to_string(),
escaped_whitespace: false,
});
let escaped = prop::sample::select(RAW_ESCAPABLE_CHARS).prop_map(|ch| BareChunk {
source: format!("\\{ch}"),
value: ch.to_string(),
escaped_whitespace: ch.is_whitespace(),
});
prop_oneof![plain, escaped].boxed()
}
fn bare_segment() -> BoxedStrategy<Segment> {
(
bare_leading_chunk(),
prop::collection::vec(bare_chunk(), 0..=3),
)
.prop_map(|(leading, mut rest)| {
let mut chunks = vec![leading];
chunks.append(&mut rest);
BareSegment::from_chunks(chunks)
})
.prop_map(Segment::Bare)
.boxed()
}
fn escaped_bare_chunk() -> BoxedStrategy<BareChunk> {
prop::sample::select(RAW_ESCAPABLE_CHARS)
.prop_map(|ch| BareChunk {
source: format!("\\{ch}"),
value: ch.to_string(),
escaped_whitespace: ch.is_whitespace(),
})
.boxed()
}
fn interesting_bare_segment() -> BoxedStrategy<Segment> {
(
prop::collection::vec(bare_chunk(), 0..=2),
escaped_bare_chunk(),
prop::collection::vec(bare_chunk(), 0..=2),
)
.prop_map(|(mut prefix, escaped, mut suffix)| {
prefix.push(escaped);
prefix.append(&mut suffix);
Segment::Bare(BareSegment::from_chunks(prefix))
})
.boxed()
}
fn single_quoted_segment() -> BoxedStrategy<Segment> {
prop::collection::vec(prop::sample::select(SINGLE_QUOTED_ATOMS), 0..=3)
.prop_map(|parts| Segment::SingleQuoted(parts.into_iter().collect::<Vec<_>>().join("")))
.boxed()
}
fn double_quoted_chunk() -> BoxedStrategy<DoubleQuotedChunk> {
let plain =
prop::sample::select(DOUBLE_QUOTED_PLAIN_ATOMS).prop_map(|atom| DoubleQuotedChunk {
source: atom.to_string(),
value: atom.to_string(),
});
let escaped =
prop::sample::select(DOUBLE_QUOTED_ESCAPABLE_CHARS).prop_map(|ch| DoubleQuotedChunk {
source: format!("\\{ch}"),
value: ch.to_string(),
});
let preserved_backslash = prop::sample::select(DOUBLE_QUOTED_PRESERVED_BACKSLASH_CHARS)
.prop_map(|ch| DoubleQuotedChunk {
source: format!("\\{ch}"),
value: format!("\\{ch}"),
});
prop_oneof![plain, escaped, preserved_backslash].boxed()
}
fn double_quoted_segment() -> BoxedStrategy<Segment> {
prop::collection::vec(double_quoted_chunk(), 0..=4)
.prop_map(DoubleQuotedSegment::from_chunks)
.prop_map(Segment::DoubleQuoted)
.boxed()
}
fn any_segment() -> BoxedStrategy<Segment> {
prop_oneof![
bare_segment(),
single_quoted_segment(),
double_quoted_segment()
]
.boxed()
}
fn interesting_segment() -> BoxedStrategy<Segment> {
prop_oneof![
interesting_bare_segment(),
single_quoted_segment(),
double_quoted_segment()
]
.boxed()
}
fn quotation_word() -> BoxedStrategy<QuotationWord> {
(0usize..=3)
.prop_flat_map(|prefix_len| {
(
prop::collection::vec(any_segment(), prefix_len),
interesting_segment(),
prop::collection::vec(any_segment(), 0..=(3 - prefix_len)),
)
})
.prop_map(|(mut prefix, interesting, mut suffix)| {
prefix.push(interesting);
prefix.append(&mut suffix);
QuotationWord { segments: prefix }
})
.boxed()
}
fn parse_program(script: &str) -> Result<Program, mxsh::parser::ParseError> {
Parser::from_string(script).parse_program()
}
fn bare_word(value: &str) -> Word {
Word::string(value, false, true, None, Range::default())
}
fn program_with_argument(argument: Word) -> Program {
Program::new(vec![CommandList::new(
AndOrList::Pipeline(Pipeline::new(
vec![Command::Simple(SimpleCommand::new(
Some(bare_word("echo")),
vec![argument],
Vec::new(),
Vec::new(),
))],
false,
Default::default(),
)),
false,
Default::default(),
)])
}
fn first_argument(program: &Program) -> &Word {
let Some(command_list) = program.body().first() else {
panic!("expected one command list");
};
let AndOrList::Pipeline(pipeline) = command_list.and_or_list() else {
panic!("expected pipeline");
};
let Some(Command::Simple(simple)) = pipeline.commands().first() else {
panic!("expected simple command");
};
let Some(argument) = simple.arguments().first() else {
panic!("expected one argument");
};
argument
}
fn normalize_program(program: &Program) -> Program {
Program::with_range(
program.body().iter().map(normalize_command_list).collect(),
Range::default(),
)
}
fn normalize_command_list(command_list: &CommandList) -> CommandList {
CommandList::new(
normalize_and_or_list(command_list.and_or_list()),
command_list.ampersand(),
Range::default(),
)
}
fn normalize_and_or_list(and_or_list: &AndOrList) -> AndOrList {
match and_or_list {
AndOrList::Pipeline(pipeline) => AndOrList::Pipeline(Pipeline::new(
pipeline.commands().iter().map(normalize_command).collect(),
pipeline.bang(),
Range::default(),
)),
AndOrList::BinOp(binary) => AndOrList::BinOp(AndOrBinary::with_range(
binary.op(),
normalize_and_or_list(binary.left()),
normalize_and_or_list(binary.right()),
Range::default(),
)),
}
}
fn normalize_command(command: &Command) -> Command {
match command {
Command::Simple(simple) => Command::Simple(SimpleCommand::with_range(
simple.name().map(normalize_word),
simple.arguments().iter().map(normalize_word).collect(),
Vec::new(),
Vec::new(),
Range::default(),
)),
other => other.clone(),
}
}
fn normalize_word(word: &Word) -> Word {
match word {
Word::String(string) => Word::string(
string.value(),
string.single_quoted(),
string.split_fields(),
None,
Range::default(),
),
Word::Parameter(parameter) => Word::parameter(
parameter.name(),
parameter.op(),
parameter.colon(),
parameter.arg().map(|arg| Box::new(normalize_word(arg))),
Default::default(),
Default::default(),
Range::default(),
),
Word::Command(command) => Word::command(
normalize_program(command.program()),
None,
false,
Range::default(),
),
Word::Arithmetic(arithm) => {
Word::arithmetic(normalize_arithm(arithm.body()), Range::default())
}
Word::List(list) => Word::list(
list.children().iter().map(normalize_word).collect(),
list.double_quoted(),
Range::default(),
),
}
}
fn normalize_arithm(expr: &ArithmExpr) -> ArithmExpr {
match expr {
ArithmExpr::Literal(literal) => ArithmExpr::literal(literal.value(), Range::default()),
ArithmExpr::Variable(variable) => ArithmExpr::variable(variable.name(), Range::default()),
ArithmExpr::Raw(raw) => ArithmExpr::raw(raw.expr(), Range::default()),
ArithmExpr::BinOp(binary) => ArithmExpr::bin_op(
binary.op(),
normalize_arithm(binary.left()),
normalize_arithm(binary.right()),
Range::default(),
),
ArithmExpr::UnOp(unary) => ArithmExpr::un_op(
unary.op(),
normalize_arithm(unary.operand()),
Range::default(),
),
ArithmExpr::Cond(cond) => ArithmExpr::cond(
normalize_arithm(cond.cond()),
normalize_arithm(cond.then_branch()),
normalize_arithm(cond.else_branch()),
Range::default(),
),
ArithmExpr::Assign(assign) => ArithmExpr::assign(
assign.name(),
assign.op(),
normalize_arithm(assign.value()),
Range::default(),
),
}
}
fn strip_word_metadata(word: &Word) -> Word {
normalize_word(word)
}
proptest! {
#![proptest_config(ProptestConfig {
cases: 64,
failure_persistence: Some(Box::new(proptest::test_runner::FileFailurePersistence::WithSource("proptest-regressions"))),
.. ProptestConfig::default()
})]
#[test]
fn quotation_words_remain_literal_for_ast(word in quotation_word()) {
let literal = word.literal_value();
let ast_word = word.ast_word();
let ast_literal = ast_word.as_str();
prop_assert_eq!(
ast_literal.as_deref(),
Some(literal.as_str()),
"generated literal quotation word stopped being recoverable as a literal.\nword:\n{:?}",
word
);
}
#[test]
fn parser_preserves_generated_quotation_words(word in quotation_word()) {
let surface = word.render();
let script = format!("echo {surface}\n");
let parsed = parse_program(&script)
.unwrap_or_else(|err| panic!("generated quotation should parse: {err:?}\nscript:\n{script}"));
let expected = normalize_word(&word.parsed_word());
let actual = normalize_word(first_argument(&parsed));
let actual_literal = actual.as_str();
let expected_literal = word.literal_value();
prop_assert_eq!(
&actual,
&expected,
"parser lost quotation structure for generated literal word.\nscript:\n{}\nword:\n{:?}",
script,
word
);
prop_assert_eq!(
actual_literal.as_deref(),
Some(expected_literal.as_str()),
"parser changed the quote-removed literal value.\nscript:\n{}",
script
);
}
#[test]
fn canonical_generated_quotation_words_are_idempotent(word in quotation_word()) {
let literal = word.literal_value();
let original = program_with_argument(word.ast_word());
let canonical = original.to_canonical();
let reparsed = parse_program(&canonical)
.unwrap_or_else(|err| panic!("canonical parse failed: {err:?}\ncanonical:\n{canonical}\nword:\n{word:?}"));
let reparsed_literal = first_argument(&reparsed).as_str();
prop_assert_eq!(
reparsed_literal.as_deref(),
Some(literal.as_str()),
"canonicalization changed the literal value of a quotation-heavy word.\ncanonical:\n{}",
canonical
);
prop_assert_eq!(
reparsed.to_canonical(),
canonical.clone(),
"canonicalization for a generated quotation word was not idempotent.\ncanonical:\n{}",
canonical
);
}
}