use super::core::Lexer;
use super::core::WordContext;
use super::core::WordLexer;
use crate::parser::core::Result;
use crate::parser::error::Error;
use crate::parser::error::SyntaxError;
use crate::syntax::Backslashed;
use crate::syntax::Literal;
use crate::syntax::Text;
use crate::syntax::TextUnit;
impl WordLexer<'_, '_> {
pub async fn text_unit<F, G>(
&mut self,
mut is_delimiter: F,
mut is_escapable: G,
) -> Result<Option<TextUnit>>
where
F: FnMut(char) -> bool,
G: FnMut(char) -> bool,
{
self.text_unit_dyn(&mut is_delimiter, &mut is_escapable)
.await
}
async fn text_unit_dyn(
&mut self,
is_delimiter: &mut dyn FnMut(char) -> bool,
is_escapable: &mut dyn FnMut(char) -> bool,
) -> Result<Option<TextUnit>> {
if self.skip_if(|c| c == '\\').await? {
if let Some(c) = self.consume_raw_char_if_dyn(is_escapable).await? {
return Ok(Some(Backslashed(c)));
} else {
return Ok(Some(Literal('\\')));
}
}
if let Some(u) = self.dollar_unit().await? {
return Ok(Some(u));
}
if let Some(u) = self.backquote().await? {
return Ok(Some(u));
}
if let Some(sc) = self.consume_char_if(|c| !is_delimiter(c)).await? {
return Ok(Some(Literal(sc.value)));
}
Ok(None)
}
async fn consume_raw_char_if_dyn(
&mut self,
is_escapable: &mut dyn FnMut(char) -> bool,
) -> Result<Option<char>> {
Ok(self
.disable_line_continuation()
.consume_char_if_dyn(is_escapable)
.await?
.map(|c| c.value))
}
}
impl Lexer<'_> {
pub async fn text<F, G>(&mut self, mut is_delimiter: F, mut is_escapable: G) -> Result<Text>
where
F: FnMut(char) -> bool,
G: FnMut(char) -> bool,
{
self.text_dyn(&mut is_delimiter, &mut is_escapable).await
}
async fn text_dyn(
&mut self,
is_delimiter: &mut dyn FnMut(char) -> bool,
is_escapable: &mut dyn FnMut(char) -> bool,
) -> Result<Text> {
let mut units = vec![];
let mut word_lexer = WordLexer {
lexer: self,
context: WordContext::Text,
};
while let Some(unit) = word_lexer.text_unit_dyn(is_delimiter, is_escapable).await? {
units.push(unit);
}
Ok(Text(units))
}
pub async fn text_with_parentheses<F, G>(
&mut self,
mut is_delimiter: F,
mut is_escapable: G,
) -> Result<Text>
where
F: FnMut(char) -> bool,
G: FnMut(char) -> bool,
{
self.text_with_parentheses_dyn(&mut is_delimiter, &mut is_escapable)
.await
}
async fn text_with_parentheses_dyn(
&mut self,
is_delimiter: &mut dyn FnMut(char) -> bool,
is_escapable: &mut dyn FnMut(char) -> bool,
) -> Result<Text> {
let mut units = Vec::new();
let mut open_paren_locations = Vec::new();
loop {
let mut is_delimiter_or_paren = |c| {
if c == '(' {
return true;
}
if open_paren_locations.is_empty() {
is_delimiter(c)
} else {
c == ')'
}
};
let next_units = self
.text_dyn(&mut is_delimiter_or_paren, is_escapable)
.await?
.0;
units.extend(next_units);
if let Some(sc) = self.consume_char_if(|c| c == '(').await? {
units.push(Literal('('));
open_paren_locations.push(sc.location.clone());
} else if let Some(opening_location) = open_paren_locations.pop() {
if self.skip_if(|c| c == ')').await? {
units.push(Literal(')'));
} else {
let cause = SyntaxError::UnclosedParen { opening_location }.into();
let location = self.location().await?.clone();
return Err(Error { cause, location });
}
} else {
break;
}
}
Ok(Text(units))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parser::error::ErrorCause;
use crate::source::Source;
use crate::syntax::Backquote;
use crate::syntax::BackquoteUnit;
use crate::syntax::CommandSubst;
use assert_matches::assert_matches;
use futures_executor::block_on;
#[test]
fn lexer_text_unit_literal_accepted() {
let mut lexer = Lexer::from_memory("X", Source::Unknown);
let mut lexer = WordLexer {
lexer: &mut lexer,
context: WordContext::Word,
};
let mut called = false;
let result = block_on(lexer.text_unit(
|c| {
called = true;
assert_eq!(c, 'X');
false
},
|c| unreachable!("unexpected call to is_escapable({:?})", c),
))
.unwrap()
.unwrap();
assert!(called);
assert_matches!(result, Literal('X'));
assert_eq!(block_on(lexer.peek_char()), Ok(None));
}
#[test]
fn lexer_text_unit_literal_rejected() {
let mut lexer = Lexer::from_memory(";", Source::Unknown);
let mut lexer = WordLexer {
lexer: &mut lexer,
context: WordContext::Word,
};
let mut called = false;
let result = block_on(lexer.text_unit(
|c| {
called = true;
assert_eq!(c, ';');
true
},
|c| unreachable!("unexpected call to is_escapable({:?})", c),
))
.unwrap();
assert!(called);
assert_eq!(result, None);
assert_eq!(block_on(lexer.peek_char()), Ok(Some(';')));
}
#[test]
fn lexer_text_unit_backslash_accepted() {
let mut lexer = Lexer::from_memory(r"\#", Source::Unknown);
let mut lexer = WordLexer {
lexer: &mut lexer,
context: WordContext::Word,
};
let mut called = false;
let result = block_on(lexer.text_unit(
|c| unreachable!("unexpected call to is_delimiter({:?})", c),
|c| {
called = true;
assert_eq!(c, '#');
true
},
))
.unwrap()
.unwrap();
assert!(called);
assert_eq!(result, Backslashed('#'));
assert_eq!(block_on(lexer.peek_char()), Ok(None));
}
#[test]
fn lexer_text_unit_backslash_eof() {
let mut lexer = Lexer::from_memory(r"\", Source::Unknown);
let mut lexer = WordLexer {
lexer: &mut lexer,
context: WordContext::Word,
};
let result = block_on(lexer.text_unit(
|c| unreachable!("unexpected call to is_delimiter({:?})", c),
|c| unreachable!("unexpected call to is_escapable({:?})", c),
))
.unwrap()
.unwrap();
assert_eq!(result, Literal('\\'));
assert_eq!(block_on(lexer.peek_char()), Ok(None));
}
#[test]
fn lexer_text_unit_backslash_line_continuation_not_recognized() {
let mut lexer = Lexer::from_memory("\\\\\n", Source::Unknown);
let mut lexer = WordLexer {
lexer: &mut lexer,
context: WordContext::Word,
};
let mut called = false;
let result = block_on(lexer.text_unit(
|c| unreachable!("unexpected call to is_delimiter({:?})", c),
|c| {
called = true;
assert_eq!(c, '\\');
true
},
))
.unwrap()
.unwrap();
assert!(called);
assert_eq!(result, Backslashed('\\'));
assert_eq!(block_on(lexer.peek_char()), Ok(Some('\n')));
}
#[test]
fn lexer_text_unit_dollar() {
let mut lexer = Lexer::from_memory("$()", Source::Unknown);
let mut lexer = WordLexer {
lexer: &mut lexer,
context: WordContext::Word,
};
let result = block_on(lexer.text_unit(
|c| unreachable!("unexpected call to is_delimiter({:?})", c),
|c| unreachable!("unexpected call to is_escapable({:?})", c),
))
.unwrap()
.unwrap();
assert_matches!(result, CommandSubst { content, location } => {
assert_eq!(&*content, "");
assert_eq!(location.range, 0..3);
});
assert_eq!(block_on(lexer.peek_char()), Ok(None));
}
#[test]
fn lexer_text_unit_backquote_double_quote_escapable() {
let mut lexer = Lexer::from_memory(r#"`\"`"#, Source::Unknown);
let mut lexer = WordLexer {
lexer: &mut lexer,
context: WordContext::Text,
};
let result = block_on(lexer.text_unit(
|c| unreachable!("unexpected call to is_delimiter({:?})", c),
|c| unreachable!("unexpected call to is_escapable({:?})", c),
))
.unwrap()
.unwrap();
assert_matches!(result, Backquote { content, location } => {
assert_eq!(content, [BackquoteUnit::Backslashed('"')]);
assert_eq!(location.range, 0..4);
});
assert_eq!(block_on(lexer.peek_char()), Ok(None));
}
#[test]
fn lexer_text_unit_backquote_double_quote_not_escapable() {
let mut lexer = Lexer::from_memory(r#"`\"`"#, Source::Unknown);
let mut lexer = WordLexer {
lexer: &mut lexer,
context: WordContext::Word,
};
let result = block_on(lexer.text_unit(
|c| unreachable!("unexpected call to is_delimiter({:?})", c),
|c| unreachable!("unexpected call to is_escapable({:?})", c),
))
.unwrap()
.unwrap();
assert_matches!(result, Backquote { content, location } => {
assert_eq!(
content,
[BackquoteUnit::Literal('\\'), BackquoteUnit::Literal('"')]
);
assert_eq!(location.range, 0..4);
});
assert_eq!(block_on(lexer.peek_char()), Ok(None));
}
#[test]
fn lexer_text_unit_line_continuations() {
let mut lexer = Lexer::from_memory("\\\n\\\nX", Source::Unknown);
let mut lexer = WordLexer {
lexer: &mut lexer,
context: WordContext::Word,
};
let result = block_on(lexer.text_unit(
|_| false,
|c| unreachable!("unexpected call to is_escapable({:?})", c),
))
.unwrap()
.unwrap();
assert_eq!(result, Literal('X'));
assert_eq!(block_on(lexer.peek_char()), Ok(None));
}
#[test]
fn lexer_text_empty() {
let mut lexer = Lexer::from_memory("", Source::Unknown);
let Text(units) = block_on(lexer.text(
|c| unreachable!("unexpected call to is_delimiter({:?})", c),
|c| unreachable!("unexpected call to is_escapable({:?})", c),
))
.unwrap();
assert_eq!(units, &[]);
}
#[test]
fn lexer_text_nonempty() {
let mut lexer = Lexer::from_memory("abc", Source::Unknown);
let mut called = 0;
let Text(units) = block_on(lexer.text(
|c| {
assert!(
matches!(c, 'a' | 'b' | 'c'),
"unexpected call to is_delimiter({:?}), called={}",
c,
called
);
called += 1;
false
},
|c| unreachable!("unexpected call to is_escapable({:?})", c),
))
.unwrap();
assert_eq!(units, &[Literal('a'), Literal('b'), Literal('c')]);
assert_eq!(called, 3);
assert_eq!(block_on(lexer.peek_char()), Ok(None));
}
#[test]
fn lexer_text_delimiter() {
let mut lexer = Lexer::from_memory("abc", Source::Unknown);
let mut called = 0;
let Text(units) = block_on(lexer.text(
|c| {
assert!(
matches!(c, 'a' | 'b' | 'c'),
"unexpected call to is_delimiter({:?}), called={}",
c,
called
);
called += 1;
c == 'c'
},
|c| unreachable!("unexpected call to is_escapable({:?})", c),
))
.unwrap();
assert_eq!(units, &[Literal('a'), Literal('b')]);
assert_eq!(called, 3);
assert_eq!(block_on(lexer.peek_char()), Ok(Some('c')));
}
#[test]
fn lexer_text_escaping() {
let mut lexer = Lexer::from_memory(r"a\b\c", Source::Unknown);
let mut tested_chars = String::new();
let Text(units) = block_on(lexer.text(
|_| false,
|c| {
tested_chars.push(c);
c == 'b'
},
))
.unwrap();
assert_eq!(
units,
&[Literal('a'), Backslashed('b'), Literal('\\'), Literal('c')]
);
assert_eq!(tested_chars, "bc");
assert_eq!(block_on(lexer.peek_char()), Ok(None));
}
#[test]
fn lexer_text_with_parentheses_no_parentheses() {
let mut lexer = Lexer::from_memory("abc", Source::Unknown);
let Text(units) = block_on(lexer.text_with_parentheses(|_| false, |_| false)).unwrap();
assert_eq!(units, &[Literal('a'), Literal('b'), Literal('c')]);
assert_eq!(block_on(lexer.peek_char()), Ok(None));
}
#[test]
fn lexer_text_with_parentheses_nest_1() {
let mut lexer = Lexer::from_memory("a(b)c)", Source::Unknown);
let Text(units) =
block_on(lexer.text_with_parentheses(|c| c == 'b' || c == ')', |_| false)).unwrap();
assert_eq!(
units,
&[
Literal('a'),
Literal('('),
Literal('b'),
Literal(')'),
Literal('c'),
]
);
assert_eq!(block_on(lexer.peek_char()), Ok(Some(')')));
}
#[test]
fn lexer_text_with_parentheses_nest_1_1() {
let mut lexer = Lexer::from_memory("ab(CD)ef(GH)ij;", Source::Unknown);
let Text(units) = block_on(
lexer.text_with_parentheses(|c| c.is_ascii_uppercase() || c == ';', |_| false),
)
.unwrap();
assert_eq!(
units,
&[
Literal('a'),
Literal('b'),
Literal('('),
Literal('C'),
Literal('D'),
Literal(')'),
Literal('e'),
Literal('f'),
Literal('('),
Literal('G'),
Literal('H'),
Literal(')'),
Literal('i'),
Literal('j'),
]
);
assert_eq!(block_on(lexer.peek_char()), Ok(Some(';')));
}
#[test]
fn lexer_text_with_parentheses_nest_3() {
let mut lexer = Lexer::from_memory("a(B((C)D))e;", Source::Unknown);
let Text(units) = block_on(
lexer.text_with_parentheses(|c| c.is_ascii_uppercase() || c == ';', |_| false),
)
.unwrap();
assert_eq!(
units,
&[
Literal('a'),
Literal('('),
Literal('B'),
Literal('('),
Literal('('),
Literal('C'),
Literal(')'),
Literal('D'),
Literal(')'),
Literal(')'),
Literal('e'),
]
);
assert_eq!(block_on(lexer.peek_char()), Ok(Some(';')));
}
#[test]
fn lexer_text_with_parentheses_unclosed() {
let mut lexer = Lexer::from_memory("x(()", Source::Unknown);
let e = block_on(lexer.text_with_parentheses(|_| false, |_| false)).unwrap_err();
assert_matches!(e.cause,
ErrorCause::Syntax(SyntaxError::UnclosedParen { opening_location }) => {
assert_eq!(*opening_location.code.value.borrow(), "x(()");
assert_eq!(opening_location.code.start_line_number.get(), 1);
assert_eq!(opening_location.code.source, Source::Unknown);
assert_eq!(opening_location.range, 1..2);
});
assert_eq!(*e.location.code.value.borrow(), "x(()");
assert_eq!(e.location.code.start_line_number.get(), 1);
assert_eq!(e.location.code.source, Source::Unknown);
assert_eq!(e.location.range, 4..4);
}
}