pub(crate) mod arithm;
mod program;
pub(crate) mod word;
use std::io::Read;
use crate::ast::{Position, Program, Range, Word};
#[cfg(feature = "embed")]
use crate::policy::ShellLanguage;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Symbol {
Eof,
Token,
Newline,
AndIf, OrIf, DSemi, DLess, DGreat, LessAnd, GreatAnd, LessGreat, DLessDash, Clobber, }
const OPERATORS: &[(Symbol, &str)] = &[
(Symbol::DLessDash, "<<-"),
(Symbol::AndIf, "&&"),
(Symbol::OrIf, "||"),
(Symbol::DSemi, ";;"),
(Symbol::DLess, "<<"),
(Symbol::DGreat, ">>"),
(Symbol::LessAnd, "<&"),
(Symbol::GreatAnd, ">&"),
(Symbol::LessGreat, "<>"),
(Symbol::Clobber, ">|"),
];
const KEYWORDS: &[&str] = &[
"if", "then", "else", "elif", "fi", "do", "done", "case", "esac", "while", "until", "for", "{",
"}", "!", "in",
];
#[derive(Debug, Clone)]
pub struct ParseError {
pub code: &'static str,
pub message: String,
pub source_name: Option<String>,
pub pos: Position,
pub range: Range,
}
impl ParseError {
pub fn new(message: impl Into<String>, pos: Position) -> Self {
Self::with_code("parse.syntax", message, pos, None)
}
pub fn with_range(
code: &'static str,
message: impl Into<String>,
range: Range,
source_name: Option<String>,
) -> Self {
Self {
code,
message: message.into(),
source_name,
pos: range.begin,
range,
}
}
pub fn with_code(
code: &'static str,
message: impl Into<String>,
pos: Position,
source_name: Option<String>,
) -> Self {
Self::with_range(
code,
message,
Range {
begin: pos,
end: pos,
},
source_name,
)
}
}
impl std::fmt::Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if let Some(source_name) = self.source_name.as_deref() {
write!(
f,
"{source_name}:{}:{}: {}",
self.pos.line, self.pos.column, self.message
)
} else {
write!(f, "{}:{}: {}", self.pos.line, self.pos.column, self.message)
}
}
}
impl std::error::Error for ParseError {}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ParseDiagnosticKind {
Warning,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParseDiagnostic {
pub kind: ParseDiagnosticKind,
pub code: &'static str,
pub message: String,
pub source_name: Option<String>,
pub range: Option<Range>,
}
impl ParseDiagnostic {
pub(crate) fn warning(
message: String,
source_name: Option<String>,
range: Option<Range>,
) -> Self {
Self {
kind: ParseDiagnosticKind::Warning,
code: "parse.warning",
message,
source_name,
range,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParseOptions {
alias_expansion_enabled: bool,
function_definitions_enabled: bool,
source_name: Option<String>,
}
impl ParseOptions {
pub fn new() -> Self {
Self::default()
}
pub fn alias_expansion_enabled(mut self, enabled: bool) -> Self {
self.alias_expansion_enabled = enabled;
self
}
pub fn function_definitions_enabled(mut self, enabled: bool) -> Self {
self.function_definitions_enabled = enabled;
self
}
pub fn source_name(mut self, source_name: impl Into<String>) -> Self {
self.source_name = Some(source_name.into());
self
}
pub fn clear_source_name(mut self) -> Self {
self.source_name = None;
self
}
pub fn alias_expansion_enabled_flag(&self) -> bool {
self.alias_expansion_enabled
}
pub fn function_definitions_enabled_flag(&self) -> bool {
self.function_definitions_enabled
}
pub fn source_name_ref(&self) -> Option<&str> {
self.source_name.as_deref()
}
fn configure_parser(&self, parser: &mut Parser) {
parser.set_alias_expansion_enabled(self.alias_expansion_enabled);
parser.set_function_definitions_enabled(self.function_definitions_enabled);
parser.set_source_name(self.source_name.clone());
}
}
impl Default for ParseOptions {
fn default() -> Self {
Self {
alias_expansion_enabled: true,
function_definitions_enabled: true,
source_name: None,
}
}
}
#[cfg(feature = "embed")]
impl From<&ShellLanguage> for ParseOptions {
fn from(language: &ShellLanguage) -> Self {
Self::new()
.alias_expansion_enabled(language.alias_expansion_enabled())
.function_definitions_enabled(language.function_definitions_enabled())
}
}
#[cfg(feature = "embed")]
impl From<ShellLanguage> for ParseOptions {
fn from(language: ShellLanguage) -> Self {
Self::from(&language)
}
}
#[cfg(feature = "embed")]
pub(crate) fn configure_parser_for_language(parser: &mut Parser, language: &ShellLanguage) {
ParseOptions::from(language).configure_parser(parser);
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParsedProgram {
program: Program,
diagnostics: Vec<ParseDiagnostic>,
}
impl ParsedProgram {
pub fn new(program: Program, diagnostics: Vec<ParseDiagnostic>) -> Self {
Self {
program,
diagnostics,
}
}
pub fn program(&self) -> &Program {
&self.program
}
pub fn diagnostics(&self) -> &[ParseDiagnostic] {
&self.diagnostics
}
pub fn into_program(self) -> Program {
self.program
}
}
pub fn parse(text: &str) -> Result<Program, ParseError> {
Ok(parse_with(text, &ParseOptions::default())?.into_program())
}
pub fn parse_with(text: &str, options: &ParseOptions) -> Result<ParsedProgram, ParseError> {
let mut parser = Parser::from_string(text);
options.configure_parser(&mut parser);
let program = parser.parse_program()?;
Ok(ParsedProgram::new(program, parser.take_diagnostics()))
}
type AliasResolver = dyn FnMut(&str) -> Option<String> + Send;
pub struct AliasFn {
inner: Box<AliasResolver>,
}
impl AliasFn {
pub fn new<F>(f: F) -> Self
where
F: FnMut(&str) -> Option<String> + Send + 'static,
{
Self { inner: Box::new(f) }
}
pub fn resolve(&mut self, name: &str) -> Option<String> {
(self.inner)(name)
}
}
pub(crate) struct HereDocPending {
pub delimiter: String,
pub strip_tabs: bool,
pub expand: bool,
}
fn is_word_quoted(w: &Word) -> bool {
match w {
Word::String(string) => string.single_quoted(),
Word::List(list) if list.double_quoted() => true,
Word::List(list) => list.children().iter().any(is_word_quoted),
_ => false,
}
}
const READ_SIZE: usize = 4096;
pub struct Parser {
input: Box<dyn Read + Send>,
buf: Vec<u8>,
buf_alias_inserted: Vec<bool>,
buf_pos: usize,
input_eof: bool,
pos: Position,
error: Option<ParseError>,
pub(crate) here_doc_pending: Vec<HereDocPending>,
pub(crate) here_doc_bodies: Vec<Vec<crate::ast::Word>>,
pub(crate) continuation_line: bool,
pub(crate) alias: Option<AliasFn>,
pub(crate) source_name: Option<String>,
pub(crate) diagnostics: Vec<ParseDiagnostic>,
pub(crate) alias_expansion_enabled: bool,
pub(crate) suppress_alias_once: bool,
pub(crate) disable_aliases: bool,
alias_inserted_rparen_is_data: bool,
pub(crate) function_definitions_enabled: bool,
}
impl Parser {
fn decode_next_char(&self) -> Option<(char, usize)> {
if self.buf_pos >= self.buf.len() {
return None;
}
let first = self.buf[self.buf_pos];
if first.is_ascii() {
return Some((first as char, 1));
}
for len in 2..=4 {
let end = self.buf_pos + len;
if end > self.buf.len() {
break;
}
if let Ok(s) = std::str::from_utf8(&self.buf[self.buf_pos..end])
&& let Some(ch) = s.chars().next()
{
return Some((ch, len));
}
}
Some((first as char, 1))
}
pub fn new(input: Box<dyn Read + Send>) -> Self {
Self {
input,
buf: Vec::new(),
buf_alias_inserted: Vec::new(),
buf_pos: 0,
input_eof: false,
pos: Position {
offset: 0,
line: 1,
column: 1,
},
error: None,
here_doc_pending: Vec::new(),
here_doc_bodies: Vec::new(),
continuation_line: false,
alias: None,
source_name: None,
diagnostics: Vec::new(),
alias_expansion_enabled: true,
suppress_alias_once: false,
disable_aliases: false,
alias_inserted_rparen_is_data: false,
function_definitions_enabled: true,
}
}
pub fn from_string(s: &str) -> Self {
let data = s.as_bytes().to_vec();
Self::new(Box::new(std::io::Cursor::new(data)))
}
pub fn set_alias_func(&mut self, f: AliasFn) {
self.alias = Some(f);
}
pub fn set_alias_expansion_enabled(&mut self, enabled: bool) {
self.alias_expansion_enabled = enabled;
}
pub fn set_function_definitions_enabled(&mut self, enabled: bool) {
self.function_definitions_enabled = enabled;
}
#[cfg(any(feature = "embed", test))]
pub(crate) fn set_alias_inserted_rparen_is_data(&mut self, enabled: bool) {
self.alias_inserted_rparen_is_data = enabled;
}
pub fn set_source_name(&mut self, source_name: Option<String>) {
self.source_name = source_name;
if let Some(error) = self.error.as_mut()
&& error.source_name.is_none()
{
error.source_name = self.source_name.clone();
}
}
pub fn source_name(&self) -> Option<&str> {
self.source_name.as_deref()
}
pub(crate) fn push_warning(&mut self, warning: String, range: Option<Range>) {
self.diagnostics.push(ParseDiagnostic::warning(
warning,
self.source_name.clone(),
range,
));
}
pub(crate) fn take_diagnostics(&mut self) -> Vec<ParseDiagnostic> {
std::mem::take(&mut self.diagnostics)
}
pub fn error(&self) -> Option<&ParseError> {
self.error.as_ref()
}
pub fn parse_program(&mut self) -> Result<Program, ParseError> {
program::parse_program(self)
}
pub fn parse_line(&mut self) -> Result<Option<Program>, ParseError> {
program::parse_line(self)
}
#[cfg(any(feature = "embed", test))]
pub(crate) fn parse_command_substitution_reparse(&mut self) -> Result<Program, ParseError> {
let program = program::parse_subshell_body(self);
program::consume_command_substitution_closer(self);
if self.peek_symbol() != Symbol::Eof {
program::set_unexpected_token_error(self);
}
if let Some(err) = self.error.take() {
return Err(err);
}
Ok(program)
}
fn advance_position(mut pos: Position, ch: char, width: usize) -> Position {
pos.offset += width;
if ch == '\n' {
pos.line += 1;
pos.column = 1;
} else {
pos.column += 1;
}
pos
}
fn error_range_from_buffer(&self) -> Option<Range> {
let (ch, width) = self.decode_next_char()?;
Some(Range {
begin: self.pos,
end: Self::advance_position(self.pos, ch, width),
})
}
fn error_range_without_fill(&self) -> Range {
self.error_range_from_buffer().unwrap_or(Range {
begin: self.pos,
end: self.pos,
})
}
fn error_range(&mut self) -> Range {
self.fill_buf();
self.error_range_without_fill()
}
pub fn eof(&mut self) -> bool {
self.skip_blanks_and_comments();
self.peek_char().is_none()
}
fn fill_buf(&mut self) {
if self.buf_pos > self.buf.len() {
self.buf_pos = self.buf.len();
}
if self.input_eof {
return;
}
let remaining = self.buf.len() - self.buf_pos;
const LOOKAHEAD: usize = 16;
if remaining >= LOOKAHEAD {
return;
}
let mut tmp = vec![0u8; READ_SIZE];
match self.input.read(&mut tmp) {
Ok(0) => {
self.input_eof = true;
}
Ok(n) => {
tmp.truncate(n);
self.buf.extend_from_slice(&tmp);
self.buf_alias_inserted
.extend(std::iter::repeat_n(false, n));
}
Err(err) => {
self.input_eof = true;
if self.error.is_none() {
self.error = Some(ParseError::with_range(
"parse.io",
format!("failed to read input: {err}"),
self.error_range_without_fill(),
self.source_name.clone(),
));
}
}
}
}
pub(crate) fn peek_char(&mut self) -> Option<char> {
self.fill_buf();
self.decode_next_char().map(|(ch, _)| ch)
}
pub(crate) fn current_rparen_is_syntax(&mut self) -> bool {
self.fill_buf();
if self.buf_pos >= self.buf.len() || self.buf[self.buf_pos] != b')' {
return false;
}
!(self.alias_inserted_rparen_is_data
&& self
.buf_alias_inserted
.get(self.buf_pos)
.copied()
.unwrap_or(false))
}
pub(crate) fn read_char(&mut self) -> Option<char> {
self.fill_buf();
let (ch, width) = self.decode_next_char()?;
self.buf_pos += width;
self.pos = Self::advance_position(self.pos, ch, width);
Some(ch)
}
pub(crate) fn current_pos(&self) -> Position {
self.pos
}
pub(crate) fn set_error(&mut self, message: String) {
if self.error.is_none() {
self.error = Some(ParseError::with_range(
"parse.syntax",
message,
self.error_range(),
self.source_name.clone(),
));
}
}
pub(crate) fn skip_blanks(&mut self) {
while let Some(ch) = self.peek_char() {
if ch == ' ' || ch == '\t' {
self.read_char();
} else {
break;
}
}
}
fn skip_blanks_and_comments(&mut self) {
self.skip_blanks();
while let Some('#') = self.peek_char() {
loop {
match self.peek_char() {
Some('\n') | None => break,
_ => {
self.read_char();
}
}
}
self.skip_blanks();
}
}
pub(crate) fn is_operator_start(ch: char) -> bool {
matches!(ch, '<' | '>' | '&' | '|' | ';')
}
pub(crate) fn read_continuation_line(&mut self) -> bool {
self.fill_buf();
if self.buf_pos + 1 < self.buf.len()
&& self.buf[self.buf_pos] == b'\\'
&& self.buf[self.buf_pos + 1] == b'\n'
{
self.read_char(); self.read_char(); self.continuation_line = true;
true
} else {
false
}
}
fn peek_operator(&mut self) -> Option<(Symbol, usize)> {
self.fill_buf();
let remaining = &self.buf[self.buf_pos..];
for &(sym, text) in OPERATORS {
if remaining.len() >= text.len() && &remaining[..text.len()] == text.as_bytes() {
return Some((sym, text.len()));
}
}
None
}
fn classify_symbol(&mut self) -> (Symbol, usize) {
self.skip_blanks_and_comments();
match self.peek_char() {
None => (Symbol::Eof, 0),
Some('\n') => (Symbol::Newline, 1),
Some(ch) if Self::is_operator_start(ch) => {
if let Some((sym, len)) = self.peek_operator() {
(sym, len)
} else {
(Symbol::Token, 0)
}
}
Some(_) => (Symbol::Token, 0),
}
}
pub(crate) fn peek_symbol(&mut self) -> Symbol {
let (sym, _) = self.classify_symbol();
sym
}
pub(crate) fn consume_symbol(&mut self, expected: Symbol) -> bool {
let (sym, len) = self.classify_symbol();
if sym == expected {
for _ in 0..len {
self.read_char();
}
true
} else {
false
}
}
pub(crate) fn newline(&mut self) -> bool {
self.consume_symbol(Symbol::Newline)
}
pub(crate) fn linebreak(&mut self) {
while self.newline() {}
}
pub(crate) fn newline_list(&mut self) -> bool {
if !self.newline() {
return false;
}
self.linebreak();
true
}
pub(crate) fn is_keyword(s: &str) -> bool {
KEYWORDS.contains(&s)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io;
use std::io::Read;
struct ChunkedReader {
bytes: Vec<u8>,
offset: usize,
chunk_size: usize,
}
impl ChunkedReader {
fn new(input: String, chunk_size: usize) -> Self {
Self {
bytes: input.into_bytes(),
offset: 0,
chunk_size,
}
}
}
impl Read for ChunkedReader {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
if self.offset >= self.bytes.len() {
return Ok(0);
}
let n = (self.bytes.len() - self.offset)
.min(self.chunk_size)
.min(buf.len());
buf[..n].copy_from_slice(&self.bytes[self.offset..self.offset + n]);
self.offset += n;
Ok(n)
}
}
struct FailingReader;
impl Read for FailingReader {
fn read(&mut self, _buf: &mut [u8]) -> io::Result<usize> {
Err(io::Error::other("simulated read failure"))
}
}
#[test]
fn peek_and_read() {
let mut p = Parser::from_string("abc");
assert_eq!(p.peek_char(), Some('a'));
assert_eq!(p.read_char(), Some('a'));
assert_eq!(p.read_char(), Some('b'));
assert_eq!(p.read_char(), Some('c'));
assert_eq!(p.read_char(), None);
}
#[test]
fn position_tracking() {
let mut p = Parser::from_string("ab\ncd");
assert_eq!(p.current_pos().line, 1);
p.read_char(); p.read_char(); assert_eq!(p.current_pos().column, 3);
p.read_char(); assert_eq!(p.current_pos().line, 2);
assert_eq!(p.current_pos().column, 1);
p.read_char(); assert_eq!(p.current_pos().column, 2);
}
#[test]
fn symbol_recognition() {
let mut p = Parser::from_string("&&||;;");
assert_eq!(p.peek_symbol(), Symbol::AndIf);
assert!(p.consume_symbol(Symbol::AndIf));
assert_eq!(p.peek_symbol(), Symbol::OrIf);
assert!(p.consume_symbol(Symbol::OrIf));
assert_eq!(p.peek_symbol(), Symbol::DSemi);
assert!(p.consume_symbol(Symbol::DSemi));
}
#[test]
fn newline_and_eof() {
let mut p = Parser::from_string("\n\n");
assert!(p.newline_list());
assert_eq!(p.peek_symbol(), Symbol::Eof);
}
#[test]
fn skip_comments() {
let mut p = Parser::from_string("# comment\necho");
p.skip_blanks_and_comments();
assert_eq!(p.peek_symbol(), Symbol::Newline);
}
#[test]
fn continuation_line() {
let mut p = Parser::from_string("\\\nhello");
assert!(p.read_continuation_line());
assert_eq!(p.peek_char(), Some('h'));
}
#[test]
fn redirection_operators() {
let mut p = Parser::from_string(">> >| <& >&");
assert_eq!(p.peek_symbol(), Symbol::DGreat);
assert!(p.consume_symbol(Symbol::DGreat));
assert_eq!(p.peek_symbol(), Symbol::Clobber);
assert!(p.consume_symbol(Symbol::Clobber));
assert_eq!(p.peek_symbol(), Symbol::LessAnd);
assert!(p.consume_symbol(Symbol::LessAnd));
assert_eq!(p.peek_symbol(), Symbol::GreatAnd);
assert!(p.consume_symbol(Symbol::GreatAnd));
}
#[test]
fn operator_at_buffer_boundary() {
let padding = READ_SIZE - 1;
let mut input = "a".repeat(padding);
input.push_str("&&b\n");
let mut p = Parser::from_string(&input);
let prog = p
.parse_program()
.expect("should parse across buffer boundary");
println!("operator_at_buffer_boundary: {prog:#?}");
assert_eq!(
prog.body.len(),
1,
"should be one command list with && joining two pipelines"
);
assert!(
!prog.body[0].ampersand,
"&& must not be split into & + & at buffer boundary"
);
}
#[test]
fn operator_split_across_buffer_boundary() {
let padding = READ_SIZE;
let mut input = "a".repeat(padding);
input.push_str("||b\n");
let mut p = Parser::from_string(&input);
let prog = p
.parse_program()
.expect("should parse || across buffer boundary");
println!("operator_split_across_buffer_boundary: {prog:#?}");
assert_eq!(
prog.body.len(),
1,
"should be one command list with || joining two pipelines"
);
assert!(
!prog.body[0].ampersand,
"|| must not be misparsed at buffer boundary"
);
}
#[test]
fn parser_streams_input_in_chunks() {
let mut input = "a".repeat(READ_SIZE - 1);
input.push_str("&&b\n");
let mut p = Parser::new(Box::new(ChunkedReader::new(input, 1)));
let prog = p
.parse_program()
.expect("should parse streaming input across reader chunk boundaries");
assert_eq!(prog.body.len(), 1);
assert!(
!prog.body[0].ampersand,
"&& must stay intact when the reader yields one byte at a time"
);
}
#[test]
fn is_operator_start_true_cases() {
assert!(Parser::is_operator_start('<'));
assert!(Parser::is_operator_start('>'));
assert!(Parser::is_operator_start('&'));
assert!(Parser::is_operator_start('|'));
assert!(Parser::is_operator_start(';'));
}
#[test]
fn is_operator_start_false_cases() {
assert!(!Parser::is_operator_start('a'));
assert!(!Parser::is_operator_start(' '));
assert!(!Parser::is_operator_start('\n'));
assert!(!Parser::is_operator_start('$'));
assert!(!Parser::is_operator_start('('));
assert!(!Parser::is_operator_start(')'));
assert!(!Parser::is_operator_start('#'));
}
#[test]
fn is_keyword_recognized() {
for kw in &[
"if", "then", "else", "elif", "fi", "do", "done", "case", "esac", "while", "until",
"for", "{", "}", "!", "in",
] {
assert!(
Parser::is_keyword(kw),
"{kw:?} should be recognized as a keyword"
);
}
}
#[test]
fn is_keyword_non_keywords() {
assert!(!Parser::is_keyword("echo"));
assert!(!Parser::is_keyword("cat"));
assert!(!Parser::is_keyword("IF"));
assert!(!Parser::is_keyword(""));
assert!(!Parser::is_keyword("iffy"));
assert!(!Parser::is_keyword("done!"));
}
#[test]
fn is_word_quoted_single_quoted() {
let w = Word::string("hello", true, false, None, crate::ast::Range::default());
assert!(is_word_quoted(&w));
}
#[test]
fn is_word_quoted_double_quoted() {
let w = Word::list(vec![], true, crate::ast::Range::default());
assert!(is_word_quoted(&w));
}
#[test]
fn is_word_quoted_unquoted_string() {
let w = Word::string("hello", false, true, None, crate::ast::Range::default());
assert!(!is_word_quoted(&w));
}
#[test]
fn is_word_quoted_nested_in_list() {
let inner = Word::string("inner", true, false, None, crate::ast::Range::default());
let w = Word::list(vec![inner], false, crate::ast::Range::default());
assert!(is_word_quoted(&w));
}
#[test]
fn is_word_quoted_parameter_not_quoted() {
let w = Word::parameter(
"x",
crate::ast::ParameterOp::None,
false,
None,
Position::default(),
None,
crate::ast::Range::default(),
);
assert!(!is_word_quoted(&w));
}
#[test]
fn parse_error_display() {
let err = ParseError::new(
"unexpected token",
Position {
offset: 10,
line: 3,
column: 5,
},
);
let displayed = format!("{err}");
assert_eq!(displayed, "3:5: unexpected token");
}
#[test]
fn parse_error_is_std_error() {
let err = ParseError::new("test error", Position::default());
let std_err: &dyn std::error::Error = &err;
println!("parse_error_is_std_error: {std_err}");
assert!(std_err.to_string().contains("test error"));
}
#[test]
fn source_name_default_none() {
let p = Parser::from_string("echo");
assert!(p.source_name().is_none());
}
#[test]
fn source_name_set_and_get() {
let mut p = Parser::from_string("echo");
p.set_source_name(Some("test.sh".to_string()));
assert_eq!(p.source_name(), Some("test.sh"));
}
#[test]
fn source_name_set_to_none() {
let mut p = Parser::from_string("echo");
p.set_source_name(Some("test.sh".to_string()));
p.set_source_name(None);
assert!(p.source_name().is_none());
}
#[test]
fn warnings_empty_by_default() {
let mut p = Parser::from_string("echo");
let diagnostics = p.take_diagnostics();
assert!(diagnostics.is_empty());
}
#[test]
fn push_and_take_warnings() {
let mut p = Parser::from_string("echo");
p.push_warning("warning one".to_string(), None);
p.push_warning("warning two".to_string(), None);
let diagnostics = p.take_diagnostics();
assert_eq!(diagnostics.len(), 2);
assert_eq!(diagnostics[0].message, "warning one");
assert_eq!(diagnostics[1].message, "warning two");
let after = p.take_diagnostics();
assert!(after.is_empty(), "take_diagnostics should clear the list");
}
#[test]
fn error_none_by_default() {
let p = Parser::from_string("echo");
assert!(p.error().is_none());
}
#[test]
fn set_error_latches_first() {
let mut p = Parser::from_string("echo");
p.set_error("first error".to_string());
p.set_error("second error".to_string());
let err = p.error().expect("should have an error");
assert_eq!(err.message, "first error");
}
#[test]
fn skip_blanks_spaces_and_tabs() {
let mut p = Parser::from_string(" \t\t hello");
p.skip_blanks();
assert_eq!(p.peek_char(), Some('h'));
}
#[test]
fn skip_blanks_preserves_newlines() {
let mut p = Parser::from_string(" \nhello");
p.skip_blanks();
assert_eq!(p.peek_char(), Some('\n'));
}
#[test]
fn skip_blanks_at_eof() {
let mut p = Parser::from_string(" ");
p.skip_blanks();
assert_eq!(p.peek_char(), None);
}
#[test]
fn skip_blanks_and_comments_multiple() {
let mut p = Parser::from_string(" # first\n # second\n echo");
p.skip_blanks_and_comments();
assert_eq!(
p.peek_char(),
Some('\n'),
"comment stops before the newline"
);
}
#[test]
fn skip_blanks_and_comments_no_comment() {
let mut p = Parser::from_string(" echo");
p.skip_blanks_and_comments();
assert_eq!(p.peek_char(), Some('e'));
}
#[test]
fn skip_blanks_and_comments_empty() {
let mut p = Parser::from_string("");
p.skip_blanks_and_comments();
assert_eq!(p.peek_char(), None);
}
#[test]
fn peek_symbol_less_great() {
let mut p = Parser::from_string("<>");
assert_eq!(p.peek_symbol(), Symbol::LessGreat);
}
#[test]
fn peek_symbol_dless_dash() {
let mut p = Parser::from_string("<<-");
assert_eq!(p.peek_symbol(), Symbol::DLessDash);
}
#[test]
fn peek_symbol_dless() {
let mut p = Parser::from_string("<<x");
assert_eq!(p.peek_symbol(), Symbol::DLess);
}
#[test]
fn peek_symbol_single_less_is_token() {
let mut p = Parser::from_string("< file");
assert_eq!(p.peek_symbol(), Symbol::Token);
}
#[test]
fn peek_symbol_single_greater_is_token() {
let mut p = Parser::from_string("> file");
assert_eq!(p.peek_symbol(), Symbol::Token);
}
#[test]
fn consume_symbol_returns_false_on_mismatch() {
let mut p = Parser::from_string("&&");
assert!(!p.consume_symbol(Symbol::OrIf));
assert_eq!(
p.peek_symbol(),
Symbol::AndIf,
"mismatch should not consume"
);
}
#[test]
fn consume_symbol_newline() {
let mut p = Parser::from_string("\nhello");
assert!(p.consume_symbol(Symbol::Newline));
assert_eq!(p.peek_char(), Some('h'));
}
#[test]
fn linebreak_consumes_zero_newlines() {
let mut p = Parser::from_string("hello");
p.linebreak();
assert_eq!(p.peek_char(), Some('h'));
}
#[test]
fn linebreak_consumes_multiple_newlines() {
let mut p = Parser::from_string("\n\n\nhello");
p.linebreak();
assert_eq!(p.peek_char(), Some('h'));
}
#[test]
fn newline_list_requires_one() {
let mut p = Parser::from_string("hello");
assert!(!p.newline_list());
}
#[test]
fn newline_list_consumes_all() {
let mut p = Parser::from_string("\n\n\nhello");
assert!(p.newline_list());
assert_eq!(p.peek_char(), Some('h'));
}
#[test]
fn eof_on_empty_input() {
let mut p = Parser::from_string("");
assert!(p.eof());
}
#[test]
fn eof_on_blanks_only() {
let mut p = Parser::from_string(" \t ");
assert!(p.eof());
}
#[test]
fn eof_on_comment_only() {
let mut p = Parser::from_string(" # comment");
assert!(
p.eof(),
"comment-only input with no trailing newline is eof"
);
}
#[test]
fn eof_on_comment_with_trailing_newline() {
let mut p = Parser::from_string(" # comment\n");
assert!(
!p.eof(),
"comment with trailing newline leaves newline unconsumed"
);
}
#[test]
fn eof_false_when_content_remains() {
let mut p = Parser::from_string("echo");
assert!(!p.eof());
}
#[test]
fn continuation_line_false_on_non_continuation() {
let mut p = Parser::from_string("hello");
assert!(!p.read_continuation_line());
assert_eq!(p.peek_char(), Some('h'));
}
#[test]
fn continuation_line_false_on_backslash_without_newline() {
let mut p = Parser::from_string("\\a");
assert!(!p.read_continuation_line());
assert_eq!(p.peek_char(), Some('\\'));
}
#[test]
fn continuation_line_at_end_of_input() {
let mut p = Parser::from_string("\\");
assert!(!p.read_continuation_line());
}
#[test]
fn decode_next_char_multibyte_utf8() {
let mut p = Parser::from_string("é");
assert_eq!(p.peek_char(), Some('é'));
assert_eq!(p.read_char(), Some('é'));
assert_eq!(p.read_char(), None);
}
#[test]
fn decode_next_char_cjk() {
let mut p = Parser::from_string("日本語");
assert_eq!(p.read_char(), Some('æ—¥'));
assert_eq!(p.read_char(), Some('本'));
assert_eq!(p.read_char(), Some('語'));
assert_eq!(p.read_char(), None);
}
#[test]
fn decode_next_char_emoji() {
let mut p = Parser::from_string("🦀x");
assert_eq!(p.read_char(), Some('🦀'));
assert_eq!(p.read_char(), Some('x'));
assert_eq!(p.read_char(), None);
}
#[test]
fn position_offset_tracks_bytes() {
let mut p = Parser::from_string("aé");
p.read_char(); assert_eq!(p.current_pos().offset, 1);
p.read_char(); assert_eq!(p.current_pos().offset, 3);
}
#[test]
fn parse_line_returns_none_at_eof() {
let mut p = Parser::from_string("");
let result = p.parse_line().expect("should not error");
assert!(result.is_none());
}
#[test]
fn parse_line_returns_empty_on_bare_newline() {
let mut p = Parser::from_string("\n");
let result = p.parse_line().expect("should not error");
let prog = result.expect("should return Some");
assert!(prog.body.is_empty());
}
#[test]
fn parse_line_single_command() {
let mut p = Parser::from_string("echo hello\n");
let result = p.parse_line().expect("should not error");
let prog = result.expect("should return Some");
assert_eq!(prog.body.len(), 1);
}
#[test]
fn parser_reports_input_read_failure() {
let mut parser = Parser::new(Box::new(FailingReader));
let err = parser.parse_program().expect_err("expected read failure");
assert!(err.message.contains("failed to read input"));
}
#[test]
fn parse_line_successive_calls() {
let mut p = Parser::from_string("echo a\necho b\n");
let first = p.parse_line().expect("no error").expect("should parse");
assert_eq!(first.body.len(), 1);
let second = p.parse_line().expect("no error").expect("should parse");
assert_eq!(second.body.len(), 1);
let third = p.parse_line().expect("no error");
assert!(third.is_none(), "should be None at eof");
}
#[test]
fn parse_program_error_propagated() {
let mut p = Parser::from_string("if echo; echo");
let result = p.parse_program();
assert!(result.is_err(), "missing 'then' should cause parse error");
}
#[test]
fn alias_func_expansion() {
let mut p = Parser::from_string("ll\n");
p.set_alias_func(AliasFn::new(|name: &str| {
if name == "ll" {
Some("ls -la".to_string())
} else {
None
}
}));
let prog = p.parse_program().expect("should parse with alias");
println!("alias_func_expansion: {prog:#?}");
assert_eq!(prog.body.len(), 1);
}
#[test]
fn all_operator_symbols_round_trip() {
for &(sym, text) in OPERATORS {
let mut p = Parser::from_string(text);
let peeked = p.peek_symbol();
assert_eq!(
peeked, sym,
"peek_symbol for {text:?} should be {sym:?}, got {peeked:?}"
);
assert!(
p.consume_symbol(sym),
"consume_symbol({sym:?}) should succeed for {text:?}"
);
}
}
#[test]
fn peek_does_not_consume() {
let mut p = Parser::from_string("&&||");
let s1 = p.peek_symbol();
let s2 = p.peek_symbol();
assert_eq!(s1, s2, "peeking twice should return the same symbol");
assert_eq!(s1, Symbol::AndIf);
}
#[test]
fn position_column_after_tab() {
let mut p = Parser::from_string("\thello");
p.read_char(); assert_eq!(p.current_pos().column, 2, "tab advances column by 1");
}
#[test]
fn read_char_returns_none_at_eof() {
let mut p = Parser::from_string("");
assert_eq!(p.read_char(), None);
assert_eq!(p.read_char(), None);
}
#[test]
fn peek_char_returns_none_at_eof() {
let mut p = Parser::from_string("");
assert_eq!(p.peek_char(), None);
}
#[test]
fn multiple_continuation_lines() {
let mut p = Parser::from_string("\\\n\\\nhello");
assert!(p.read_continuation_line());
assert!(p.read_continuation_line());
assert_eq!(p.peek_char(), Some('h'));
}
#[test]
fn newline_resets_position() {
let mut p = Parser::from_string("abc\ndef");
p.read_char(); p.read_char(); p.read_char(); p.read_char(); assert_eq!(p.current_pos().line, 2);
assert_eq!(p.current_pos().column, 1);
p.read_char(); assert_eq!(p.current_pos().line, 2);
assert_eq!(p.current_pos().column, 2);
}
}