use super::core::is_blank;
use super::core::Lexer;
use super::core::Token;
use super::core::TokenId;
use super::core::WordContext;
use super::core::WordLexer;
use super::keyword::Keyword;
use super::op::is_operator_char;
use crate::parser::core::Result;
use crate::syntax::MaybeLiteral;
use crate::syntax::Word;
pub fn is_token_delimiter_char(c: char) -> bool {
is_operator_char(c) || is_blank(c)
}
impl Lexer<'_> {
async fn token_id(&mut self, word: &Word) -> Result<TokenId> {
if word.units.is_empty() {
return Ok(TokenId::EndOfInput);
}
if let Some(literal) = word.to_string_if_literal() {
if let Ok(keyword) = Keyword::try_from(literal.as_str()) {
return Ok(TokenId::Token(Some(keyword)));
}
if literal.chars().all(|c| c.is_ascii_digit()) {
if let Some(next) = self.peek_char().await? {
if next == '<' || next == '>' {
return Ok(TokenId::IoNumber);
}
}
}
}
Ok(TokenId::Token(None))
}
pub async fn token(&mut self) -> Result<Token> {
if let Some(op) = self.operator().await? {
return Ok(op);
}
let index = self.index();
let mut word_lexer = WordLexer {
lexer: self,
context: WordContext::Word,
};
let mut word = word_lexer.word(is_token_delimiter_char).await?;
word.parse_tilde_front();
let id = self.token_id(&word).await?;
Ok(Token { word, id, index })
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::source::Source;
use crate::syntax::TextUnit;
use crate::syntax::WordUnit;
use futures_executor::block_on;
#[test]
fn lexer_token_empty() {
let mut lexer = Lexer::from_memory("", Source::Unknown);
let t = block_on(lexer.token()).unwrap();
assert_eq!(*t.word.location.code.value.borrow(), "");
assert_eq!(t.word.location.code.start_line_number.get(), 1);
assert_eq!(t.word.location.code.source, Source::Unknown);
assert_eq!(t.word.location.range, 0..0);
assert_eq!(t.id, TokenId::EndOfInput);
assert_eq!(t.index, 0);
}
#[test]
fn lexer_token_non_empty() {
let mut lexer = Lexer::from_memory("abc ", Source::Unknown);
let t = block_on(lexer.token()).unwrap();
assert_eq!(t.word.units.len(), 3);
assert_eq!(t.word.units[0], WordUnit::Unquoted(TextUnit::Literal('a')));
assert_eq!(t.word.units[1], WordUnit::Unquoted(TextUnit::Literal('b')));
assert_eq!(t.word.units[2], WordUnit::Unquoted(TextUnit::Literal('c')));
assert_eq!(*t.word.location.code.value.borrow(), "abc ");
assert_eq!(t.word.location.code.start_line_number.get(), 1);
assert_eq!(t.word.location.code.source, Source::Unknown);
assert_eq!(t.word.location.range, 0..3);
assert_eq!(t.id, TokenId::Token(None));
assert_eq!(t.index, 0);
assert_eq!(block_on(lexer.peek_char()), Ok(Some(' ')));
}
#[test]
fn lexer_token_tilde() {
let mut lexer = Lexer::from_memory("~a:~", Source::Unknown);
let t = block_on(lexer.token()).unwrap();
assert_eq!(
t.word.units,
[
WordUnit::Tilde("a".to_string()),
WordUnit::Unquoted(TextUnit::Literal(':')),
WordUnit::Unquoted(TextUnit::Literal('~'))
]
);
}
#[test]
fn lexer_token_io_number_delimited_by_less() {
let mut lexer = Lexer::from_memory("12<", Source::Unknown);
let t = block_on(lexer.token()).unwrap();
assert_eq!(t.word.units.len(), 2);
assert_eq!(t.word.units[0], WordUnit::Unquoted(TextUnit::Literal('1')));
assert_eq!(t.word.units[1], WordUnit::Unquoted(TextUnit::Literal('2')));
assert_eq!(*t.word.location.code.value.borrow(), "12<");
assert_eq!(t.word.location.code.start_line_number.get(), 1);
assert_eq!(t.word.location.code.source, Source::Unknown);
assert_eq!(t.word.location.range, 0..2);
assert_eq!(t.id, TokenId::IoNumber);
assert_eq!(t.index, 0);
assert_eq!(block_on(lexer.peek_char()), Ok(Some('<')));
}
#[test]
fn lexer_token_io_number_delimited_by_greater() {
let mut lexer = Lexer::from_memory("0>>", Source::Unknown);
let t = block_on(lexer.token()).unwrap();
assert_eq!(t.word.units.len(), 1);
assert_eq!(t.word.units[0], WordUnit::Unquoted(TextUnit::Literal('0')));
assert_eq!(*t.word.location.code.value.borrow(), "0>>");
assert_eq!(t.word.location.code.start_line_number.get(), 1);
assert_eq!(t.word.location.code.source, Source::Unknown);
assert_eq!(t.word.location.range, 0..1);
assert_eq!(t.id, TokenId::IoNumber);
assert_eq!(t.index, 0);
assert_eq!(block_on(lexer.location()).unwrap().range, 1..2);
}
#[test]
fn lexer_token_after_blank() {
block_on(async {
let mut lexer = Lexer::from_memory(" a ", Source::Unknown);
lexer.skip_blanks().await.unwrap();
let t = lexer.token().await.unwrap();
assert_eq!(*t.word.location.code.value.borrow(), " a ");
assert_eq!(t.word.location.code.start_line_number.get(), 1);
assert_eq!(t.word.location.code.source, Source::Unknown);
assert_eq!(t.word.location.range, 1..2);
assert_eq!(t.id, TokenId::Token(None));
assert_eq!(t.index, 1);
lexer.skip_blanks().await.unwrap();
let t = lexer.token().await.unwrap();
assert_eq!(*t.word.location.code.value.borrow(), " a ");
assert_eq!(t.word.location.code.start_line_number.get(), 1);
assert_eq!(t.word.location.code.source, Source::Unknown);
assert_eq!(t.word.location.range, 4..4);
assert_eq!(t.id, TokenId::EndOfInput);
assert_eq!(t.index, 4);
});
}
}