use super::core::Lexer;
use super::core::Token;
use super::core::TokenId;
use crate::parser::core::Result;
use crate::syntax::Literal;
use crate::syntax::Unquoted;
use crate::syntax::Word;
use std::fmt;
use std::future::Future;
use std::pin::Pin;
use thiserror::Error;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum Operator {
Newline,
And,
AndAnd,
OpenParen,
CloseParen,
Semicolon,
SemicolonSemicolon,
Less,
LessAnd,
LessOpenParen,
LessLess,
LessLessDash,
LessLessLess,
LessGreater,
Greater,
GreaterAnd,
GreaterOpenParen,
GreaterGreater,
GreaterGreaterBar,
GreaterBar,
Bar,
BarBar,
}
impl Operator {
#[must_use]
pub const fn as_str(&self) -> &'static str {
use Operator::*;
match self {
Newline => "\n",
And => "&",
AndAnd => "&&",
OpenParen => "(",
CloseParen => ")",
Semicolon => ";",
SemicolonSemicolon => ";;",
Less => "<",
LessAnd => "<&",
LessOpenParen => "<(",
LessLess => "<<",
LessLessDash => "<<-",
LessLessLess => "<<<",
LessGreater => "<>",
Greater => ">",
GreaterAnd => ">&",
GreaterOpenParen => ">(",
GreaterGreater => ">>",
GreaterGreaterBar => ">>|",
GreaterBar => ">|",
Bar => "|",
BarBar => "||",
}
}
#[must_use]
pub const fn is_clause_delimiter(self) -> bool {
use Operator::*;
match self {
CloseParen | SemicolonSemicolon => true,
Newline | And | AndAnd | OpenParen | Semicolon | Less | LessAnd | LessOpenParen
| LessLess | LessLessDash | LessLessLess | LessGreater | Greater | GreaterAnd
| GreaterOpenParen | GreaterGreater | GreaterGreaterBar | GreaterBar | Bar | BarBar => {
false
}
}
}
}
impl fmt::Display for Operator {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}
#[derive(Copy, Clone, Debug)]
pub struct Trie(&'static [Edge]);
#[derive(Copy, Clone, Debug)]
pub struct Edge {
pub key: char,
pub value: Option<Operator>,
pub next: Trie,
}
impl Trie {
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
pub fn edge(&self, key: char) -> Option<&Edge> {
self.0
.binary_search_by_key(&key, |edge| edge.key)
.ok()
.map(|i| &self.0[i])
}
}
pub const OPERATORS: Trie = Trie(&[
Edge {
key: '\n',
value: Some(Operator::Newline),
next: NONE,
},
Edge {
key: '&',
value: Some(Operator::And),
next: AND,
},
Edge {
key: '(',
value: Some(Operator::OpenParen),
next: NONE,
},
Edge {
key: ')',
value: Some(Operator::CloseParen),
next: NONE,
},
Edge {
key: ';',
value: Some(Operator::Semicolon),
next: SEMICOLON,
},
Edge {
key: '<',
value: Some(Operator::Less),
next: LESS,
},
Edge {
key: '>',
value: Some(Operator::Greater),
next: GREATER,
},
Edge {
key: '|',
value: Some(Operator::Bar),
next: BAR,
},
]);
const AND: Trie = Trie(&[Edge {
key: '&',
value: Some(Operator::AndAnd),
next: NONE,
}]);
const SEMICOLON: Trie = Trie(&[Edge {
key: ';',
value: Some(Operator::SemicolonSemicolon),
next: NONE,
}]);
const LESS: Trie = Trie(&[
Edge {
key: '&',
value: Some(Operator::LessAnd),
next: NONE,
},
Edge {
key: '(',
value: Some(Operator::LessOpenParen),
next: NONE,
},
Edge {
key: '<',
value: Some(Operator::LessLess),
next: LESS_LESS,
},
Edge {
key: '>',
value: Some(Operator::LessGreater),
next: NONE,
},
]);
const LESS_LESS: Trie = Trie(&[
Edge {
key: '-',
value: Some(Operator::LessLessDash),
next: NONE,
},
Edge {
key: '<',
value: Some(Operator::LessLessLess),
next: NONE,
},
]);
const GREATER: Trie = Trie(&[
Edge {
key: '&',
value: Some(Operator::GreaterAnd),
next: NONE,
},
Edge {
key: '(',
value: Some(Operator::GreaterOpenParen),
next: NONE,
},
Edge {
key: '>',
value: Some(Operator::GreaterGreater),
next: GREATER_GREATER,
},
Edge {
key: '|',
value: Some(Operator::GreaterBar),
next: NONE,
},
]);
const GREATER_GREATER: Trie = Trie(&[Edge {
key: '|',
value: Some(Operator::GreaterGreaterBar),
next: NONE,
}]);
const BAR: Trie = Trie(&[Edge {
key: '|',
value: Some(Operator::BarBar),
next: NONE,
}]);
const NONE: Trie = Trie(&[]);
pub fn is_operator_char(c: char) -> bool {
OPERATORS.edge(c).is_some()
}
struct OperatorTail {
pub operator: Operator,
pub reversed_key: Vec<char>,
}
impl Lexer<'_> {
fn operator_tail(
&mut self,
trie: Trie,
) -> Pin<Box<dyn Future<Output = Result<Option<OperatorTail>>> + '_>> {
Box::pin(async move {
if trie.is_empty() {
return Ok(None);
}
let c = match self.peek_char().await? {
None => return Ok(None),
Some(c) => c,
};
let edge = match trie.edge(c) {
None => return Ok(None),
Some(edge) => edge,
};
let old_index = self.index();
self.consume_char();
if let Some(mut operator_tail) = self.operator_tail(edge.next).await? {
operator_tail.reversed_key.push(c);
return Ok(Some(operator_tail));
}
match edge.value {
None => {
self.rewind(old_index);
Ok(None)
}
Some(operator) => Ok(Some(OperatorTail {
operator,
reversed_key: vec![c],
})),
}
})
}
pub async fn operator(&mut self) -> Result<Option<Token>> {
let index = self.index();
self.operator_tail(OPERATORS).await.map(|o| {
o.map(|ot| {
let OperatorTail {
operator,
reversed_key,
} = ot;
let units = reversed_key
.into_iter()
.rev()
.map(|c| Unquoted(Literal(c)))
.collect::<Vec<_>>();
let location = self.location_range(index..self.index());
let word = Word { units, location };
let id = TokenId::Operator(operator);
Token { word, id, index }
})
})
}
}
#[derive(Clone, Debug, Eq, Error, PartialEq)]
pub struct ParseOperatorError;
impl std::fmt::Display for ParseOperatorError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str("not a valid operator")
}
}
#[derive(Clone, Debug, Eq, Error, PartialEq)]
pub struct TryFromOperatorError;
impl fmt::Display for TryFromOperatorError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("inconvertible operator")
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::input::Context;
use crate::input::Input;
use crate::source::Source;
use crate::syntax::TextUnit;
use crate::syntax::WordUnit;
use futures_util::FutureExt;
use std::num::NonZeroU64;
use std::rc::Rc;
fn ensure_sorted(trie: &Trie) {
assert!(
trie.0.windows(2).all(|pair| pair[0].key < pair[1].key),
"trie should be sorted: {trie:?}"
);
for edge in trie.0 {
ensure_sorted(&edge.next);
}
}
#[test]
fn tries_are_sorted() {
ensure_sorted(&OPERATORS);
}
#[test]
fn lexer_operator_longest_match() {
let mut lexer = Lexer::from_memory("<<-", Source::Unknown);
let t = lexer.operator().now_or_never().unwrap().unwrap().unwrap();
assert_eq!(t.word.units.len(), 3);
assert_eq!(t.word.units[0], WordUnit::Unquoted(TextUnit::Literal('<')));
assert_eq!(t.word.units[1], WordUnit::Unquoted(TextUnit::Literal('<')));
assert_eq!(t.word.units[2], WordUnit::Unquoted(TextUnit::Literal('-')));
assert_eq!(*t.word.location.code.value.borrow(), "<<-");
assert_eq!(t.word.location.code.start_line_number.get(), 1);
assert_eq!(*t.word.location.code.source, Source::Unknown);
assert_eq!(t.word.location.range, 0..3);
assert_eq!(t.id, TokenId::Operator(Operator::LessLessDash));
assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
}
#[test]
fn lexer_operator_delimited_by_another_operator() {
let mut lexer = Lexer::from_memory("<<>", Source::Unknown);
let t = lexer.operator().now_or_never().unwrap().unwrap().unwrap();
assert_eq!(t.word.units.len(), 2);
assert_eq!(t.word.units[0], WordUnit::Unquoted(TextUnit::Literal('<')));
assert_eq!(t.word.units[1], WordUnit::Unquoted(TextUnit::Literal('<')));
assert_eq!(*t.word.location.code.value.borrow(), "<<>");
assert_eq!(t.word.location.code.start_line_number.get(), 1);
assert_eq!(*t.word.location.code.source, Source::Unknown);
assert_eq!(t.word.location.range, 0..2);
assert_eq!(t.id, TokenId::Operator(Operator::LessLess));
assert_eq!(
lexer.location().now_or_never().unwrap().unwrap().range,
2..3
);
}
#[test]
fn lexer_operator_delimited_by_eof() {
let mut lexer = Lexer::from_memory("<<", Source::Unknown);
let t = lexer.operator().now_or_never().unwrap().unwrap().unwrap();
assert_eq!(t.word.units.len(), 2);
assert_eq!(t.word.units[0], WordUnit::Unquoted(TextUnit::Literal('<')));
assert_eq!(t.word.units[1], WordUnit::Unquoted(TextUnit::Literal('<')));
assert_eq!(*t.word.location.code.value.borrow(), "<<");
assert_eq!(t.word.location.code.start_line_number.get(), 1);
assert_eq!(*t.word.location.code.source, Source::Unknown);
assert_eq!(t.word.location.range, 0..2);
assert_eq!(t.id, TokenId::Operator(Operator::LessLess));
assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
}
#[test]
fn lexer_operator_containing_line_continuations() {
let mut lexer = Lexer::from_memory("\\\n\\\n<\\\n<\\\n>", Source::Unknown);
let t = lexer.operator().now_or_never().unwrap().unwrap().unwrap();
assert_eq!(t.word.units.len(), 2);
assert_eq!(t.word.units[0], WordUnit::Unquoted(TextUnit::Literal('<')));
assert_eq!(t.word.units[1], WordUnit::Unquoted(TextUnit::Literal('<')));
assert_eq!(*t.word.location.code.value.borrow(), "\\\n\\\n<\\\n<\\\n>");
assert_eq!(t.word.location.code.start_line_number.get(), 1);
assert_eq!(*t.word.location.code.source, Source::Unknown);
assert_eq!(t.word.location.range, 0..10);
assert_eq!(t.id, TokenId::Operator(Operator::LessLess));
assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('>')));
}
#[test]
fn lexer_operator_none() {
let mut lexer = Lexer::from_memory("\\\n ", Source::Unknown);
let r = lexer.operator().now_or_never().unwrap().unwrap();
assert!(r.is_none(), "unexpected success: {r:?}");
}
#[test]
fn lexer_operator_should_not_peek_beyond_newline() {
struct OneLineInput(Option<String>);
impl Input for OneLineInput {
async fn next_line(&mut self, _: &Context) -> crate::input::Result {
if let Some(line) = self.0.take() {
Ok(line)
} else {
panic!("The second line should not be read")
}
}
}
let line_number = NonZeroU64::new(1).unwrap();
let mut lexer = Lexer::new(
Box::new(OneLineInput(Some("\n".to_owned()))),
line_number,
Rc::new(Source::Unknown),
);
let t = lexer.operator().now_or_never().unwrap().unwrap().unwrap();
assert_eq!(t.word.units, [WordUnit::Unquoted(TextUnit::Literal('\n'))]);
assert_eq!(*t.word.location.code.value.borrow(), "\n");
assert_eq!(t.word.location.code.start_line_number.get(), 1);
assert_eq!(*t.word.location.code.source, Source::Unknown);
assert_eq!(t.word.location.range, 0..1);
assert_eq!(t.id, TokenId::Operator(Operator::Newline));
}
}