use super::keyword::Keyword;
use super::op::Operator;
use crate::alias::Alias;
use crate::input::Context;
use crate::input::Input;
use crate::input::Memory;
use crate::parser::core::Result;
use crate::parser::error::Error;
use crate::source::source_chars;
use crate::source::Code;
use crate::source::Location;
use crate::source::Source;
use crate::source::SourceChar;
use crate::syntax::Word;
use std::cell::RefCell;
use std::fmt;
use std::future::Future;
use std::num::NonZeroU64;
use std::ops::Deref;
use std::ops::DerefMut;
use std::ops::Range;
use std::pin::Pin;
use std::rc::Rc;
use std::slice::SliceIndex;
pub fn is_blank(c: char) -> bool {
c != '\n' && c.is_whitespace()
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum PeekChar<'a> {
Char(&'a SourceChar),
EndOfInput(&'a Location),
}
impl<'a> PeekChar<'a> {
#[must_use]
fn location<'b>(self: &'b PeekChar<'a>) -> &'a Location {
match self {
PeekChar::Char(c) => &c.location,
PeekChar::EndOfInput(l) => l,
}
}
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum TokenId {
Token(Option<Keyword>),
Operator(Operator),
IoNumber,
EndOfInput,
}
impl TokenId {
pub fn is_clause_delimiter(self) -> bool {
use TokenId::*;
match self {
Token(Some(keyword)) => keyword.is_clause_delimiter(),
Token(None) => false,
Operator(operator) => operator.is_clause_delimiter(),
IoNumber => false,
EndOfInput => true,
}
}
}
#[derive(Debug)]
pub struct Token {
pub word: Word,
pub id: TokenId,
pub index: usize,
}
impl fmt::Display for Token {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.word)
}
}
#[derive(Clone, Debug)]
enum InputState {
Alive,
EndOfInput(Location),
Error(Error),
}
struct LexerCore<'a> {
input: Box<dyn Input + 'a>,
state: InputState,
raw_code: Rc<Code>,
source: Vec<SourceChar>,
index: usize,
}
impl<'a> LexerCore<'a> {
#[must_use]
fn new(
input: Box<dyn Input + 'a>,
start_line_number: NonZeroU64,
source: Source,
) -> LexerCore<'a> {
LexerCore {
input,
raw_code: Rc::new(Code {
value: RefCell::new(String::new()),
start_line_number,
source,
}),
state: InputState::Alive,
source: Vec::new(),
index: 0,
}
}
async fn peek_char(&mut self) -> Result<PeekChar<'_>> {
loop {
if self.index < self.source.len() {
return Ok(PeekChar::Char(&self.source[self.index]));
}
match self.state {
InputState::Alive => (),
InputState::EndOfInput(ref location) => return Ok(PeekChar::EndOfInput(location)),
InputState::Error(ref error) => return Err(error.clone()),
}
match self.input.next_line(&Context).await {
Ok(line) => {
if line.is_empty() {
self.state = InputState::EndOfInput(Location {
code: Rc::clone(&self.raw_code),
range: self.index..self.index,
});
} else {
self.raw_code.value.borrow_mut().push_str(&line);
self.source
.extend(source_chars(&line, &self.raw_code, self.index));
}
}
Err(io_error) => {
self.state = InputState::Error(Error {
cause: io_error.into(),
location: Location {
code: Rc::clone(&self.raw_code),
range: self.index..self.index,
},
});
}
}
}
}
fn consume_char(&mut self) {
assert!(
self.index < self.source.len(),
"A character must have been peeked before being consumed: index={}",
self.index
);
self.index += 1;
}
#[must_use]
fn peek_char_at(&self, index: usize) -> &SourceChar {
assert!(
index <= self.index,
"The index {} must not be larger than the current index {}",
index,
self.index
);
&self.source[index]
}
#[must_use]
fn index(&self) -> usize {
self.index
}
fn rewind(&mut self, index: usize) {
assert!(
index <= self.index,
"The new index {} must not be larger than the current index {}",
index,
self.index
);
self.index = index;
}
#[must_use]
fn pending(&self) -> bool {
self.index < self.source.len()
}
fn flush(&mut self) {
let lines = self
.raw_code
.value
.borrow()
.chars()
.filter(|&c| c == '\n')
.count()
.try_into()
.unwrap_or(u64::MAX);
let start_line_number = self.raw_code.start_line_number.get().saturating_add(lines);
let start_line_number = NonZeroU64::new(start_line_number).unwrap();
self.raw_code = Rc::new(Code {
value: RefCell::new(String::new()),
start_line_number,
source: self.raw_code.source.clone(),
});
self.source.clear();
self.index = 0;
}
fn reset(&mut self) {
self.state = InputState::Alive;
self.flush();
}
fn source_string<I>(&self, i: I) -> String
where
I: SliceIndex<[SourceChar], Output = [SourceChar]>,
{
self.source[i].iter().map(|c| c.value).collect()
}
#[must_use]
fn location_range(&self, range: Range<usize>) -> Location {
if range.start == self.source.len() {
if let InputState::EndOfInput(ref location) = self.state {
return location.clone();
}
}
let start = &self.peek_char_at(range.start).location;
let code = start.code.clone();
let end = range
.map(|index| &self.peek_char_at(index).location)
.take_while(|location| location.code == code)
.last()
.map(|location| location.range.end)
.unwrap_or(start.range.start);
let range = start.range.start..end;
Location { code, range }
}
fn substitute_alias(&mut self, begin: usize, alias: &Rc<Alias>) {
let end = self.index;
assert!(
begin < end,
"begin index {} should be less than end index {}",
begin,
end
);
let source = Source::Alias {
original: self.location_range(begin..end),
alias: alias.clone(),
};
let code = Rc::new(Code {
value: RefCell::new(alias.replacement.clone()),
start_line_number: NonZeroU64::new(1).unwrap(),
source,
});
let repl = source_chars(&alias.replacement, &code, 0);
self.source.splice(begin..end, repl);
self.index = begin;
}
fn is_after_blank_ending_alias(&self, index: usize) -> bool {
fn ends_with_blank(s: &str) -> bool {
s.chars().rev().next().map_or(false, is_blank)
}
fn is_same_alias(alias: &Alias, sc: Option<&SourceChar>) -> bool {
match sc {
None => false,
Some(sc) => sc.location.code.source.is_alias_for(&alias.name),
}
}
for index in (0..index).rev() {
let sc = &self.source[index];
if !is_blank(sc.value) {
return false;
}
if let Source::Alias { ref alias, .. } = sc.location.code.source {
#[allow(clippy::collapsible_if)]
if ends_with_blank(&alias.replacement) {
if !is_same_alias(alias, self.source.get(index + 1)) {
return true;
}
}
}
}
false
}
}
impl fmt::Debug for LexerCore<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("LexerCore")
.field("state", &self.state)
.field("source", &self.source)
.field("index", &self.index)
.finish()
}
}
#[derive(Debug)]
pub struct Lexer<'a> {
core: LexerCore<'a>,
line_continuation_enabled: bool,
}
impl<'a> Lexer<'a> {
#[must_use]
pub fn new(
input: Box<dyn Input + 'a>,
start_line_number: NonZeroU64,
source: Source,
) -> Lexer<'a> {
Lexer {
core: LexerCore::new(input, start_line_number, source),
line_continuation_enabled: true,
}
}
#[must_use]
pub fn from_memory(code: &'a str, source: Source) -> Lexer<'a> {
let line = NonZeroU64::new(1).unwrap();
Lexer::new(Box::new(Memory::new(code)), line, source)
}
pub fn disable_line_continuation<'b>(&'b mut self) -> PlainLexer<'b, 'a> {
assert!(
self.line_continuation_enabled,
"line continuation already disabled"
);
self.line_continuation_enabled = false;
PlainLexer { lexer: self }
}
pub fn enable_line_continuation<'b>(_: PlainLexer<'a, 'b>) {}
async fn line_continuation(&mut self) -> Result<bool> {
if !self.line_continuation_enabled {
return Ok(false);
}
let index = self.core.index();
match self.core.peek_char().await? {
PeekChar::Char(c) if c.value == '\\' => self.core.consume_char(),
_ => return Ok(false),
}
match self.core.peek_char().await? {
PeekChar::Char(c) if c.value == '\n' => self.core.consume_char(),
_ => {
self.core.rewind(index);
return Ok(false);
}
}
Ok(true)
}
pub async fn peek_char(&mut self) -> Result<Option<char>> {
while self.line_continuation().await? {}
match self.core.peek_char().await? {
PeekChar::Char(source_char) => Ok(Some(source_char.value)),
PeekChar::EndOfInput(_) => Ok(None),
}
}
pub async fn location(&mut self) -> Result<&Location> {
self.core.peek_char().await.map(|p| p.location())
}
pub fn consume_char(&mut self) {
self.core.consume_char()
}
#[must_use]
pub fn index(&self) -> usize {
self.core.index()
}
pub fn rewind(&mut self, index: usize) {
self.core.rewind(index)
}
#[must_use]
pub fn pending(&self) -> bool {
self.core.pending()
}
pub fn flush(&mut self) {
self.core.flush()
}
pub fn reset(&mut self) {
self.core.reset()
}
pub async fn consume_char_if<F>(&mut self, mut f: F) -> Result<Option<&SourceChar>>
where
F: FnMut(char) -> bool,
{
self.consume_char_if_dyn(&mut f).await
}
pub(crate) async fn consume_char_if_dyn(
&mut self,
f: &mut dyn FnMut(char) -> bool,
) -> Result<Option<&SourceChar>> {
match self.peek_char().await? {
Some(c) if f(c) => {
let index = self.index();
self.consume_char();
Ok(Some(self.core.peek_char_at(index)))
}
_ => Ok(None),
}
}
pub fn source_string<I>(&self, i: I) -> String
where
I: SliceIndex<[SourceChar], Output = [SourceChar]>,
{
self.core.source_string(i)
}
#[must_use]
pub fn location_range(&self, range: Range<usize>) -> Location {
self.core.location_range(range)
}
pub fn substitute_alias(&mut self, begin: usize, alias: &Rc<Alias>) {
self.core.substitute_alias(begin, alias)
}
pub fn is_after_blank_ending_alias(&self, index: usize) -> bool {
self.core.is_after_blank_ending_alias(index)
}
pub async fn inner_program(&mut self) -> Result<String> {
let begin = self.index();
let aliases = Default::default();
let mut parser = super::super::Parser::new(self, &aliases);
parser.maybe_compound_list().await?;
let end = parser.peek_token().await?.index;
self.rewind(end);
Ok(self.core.source_string(begin..end))
}
pub fn inner_program_boxed(&mut self) -> Pin<Box<dyn Future<Output = Result<String>> + '_>> {
Box::pin(self.inner_program())
}
}
#[derive(Debug)]
#[must_use = "You must retain the PlainLexer to keep line continuation disabled"]
pub struct PlainLexer<'a, 'b> {
lexer: &'a mut Lexer<'b>,
}
impl<'a, 'b> Deref for PlainLexer<'a, 'b> {
type Target = Lexer<'b>;
fn deref(&self) -> &Lexer<'b> {
self.lexer
}
}
impl<'a, 'b> DerefMut for PlainLexer<'a, 'b> {
fn deref_mut(&mut self) -> &mut Lexer<'b> {
self.lexer
}
}
impl Drop for PlainLexer<'_, '_> {
fn drop(&mut self) {
self.lexer.line_continuation_enabled = true;
}
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum WordContext {
Text,
Word,
}
#[derive(Debug)]
pub struct WordLexer<'a, 'b> {
pub lexer: &'a mut Lexer<'b>,
pub context: WordContext,
}
impl<'a, 'b> Deref for WordLexer<'a, 'b> {
type Target = Lexer<'b>;
fn deref(&self) -> &Lexer<'b> {
self.lexer
}
}
impl<'a, 'b> DerefMut for WordLexer<'a, 'b> {
fn deref_mut(&mut self) -> &mut Lexer<'b> {
self.lexer
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parser::error::ErrorCause;
use crate::parser::error::SyntaxError;
use assert_matches::assert_matches;
use futures_executor::block_on;
#[test]
fn lexer_core_peek_char_empty_source() {
let input = Memory::new("");
let line = NonZeroU64::new(32).unwrap();
let mut lexer = LexerCore::new(Box::new(input), line, Source::Unknown);
let result = block_on(lexer.peek_char());
assert_matches!(result, Ok(PeekChar::EndOfInput(location)) => {
assert_eq!(*location.code.value.borrow(), "");
assert_eq!(location.code.start_line_number, line);
assert_eq!(location.code.source, Source::Unknown);
assert_eq!(location.range, 0..0);
});
}
#[test]
fn lexer_core_peek_char_io_error() {
#[derive(Debug)]
struct Failing;
impl fmt::Display for Failing {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Failing")
}
}
impl std::error::Error for Failing {}
#[async_trait::async_trait(?Send)]
impl Input for Failing {
async fn next_line(&mut self, _: &Context) -> crate::input::Result {
Err(std::io::Error::new(std::io::ErrorKind::Other, Failing))
}
}
let line = NonZeroU64::new(42).unwrap();
let mut lexer = LexerCore::new(Box::new(Failing), line, Source::Unknown);
let e = block_on(lexer.peek_char()).unwrap_err();
assert_matches!(e.cause, ErrorCause::Io(io_error) => {
assert_eq!(io_error.kind(), std::io::ErrorKind::Other);
});
assert_eq!(*e.location.code.value.borrow(), "");
assert_eq!(e.location.code.start_line_number, line);
assert_eq!(e.location.code.source, Source::Unknown);
assert_eq!(e.location.range, 0..0);
}
#[test]
fn lexer_core_consume_char_success() {
let input = Memory::new("a\nb");
let line = NonZeroU64::new(1).unwrap();
let mut lexer = LexerCore::new(Box::new(input), line, Source::Unknown);
let result = block_on(lexer.peek_char());
assert_matches!(result, Ok(PeekChar::Char(c)) => {
assert_eq!(c.value, 'a');
assert_eq!(*c.location.code.value.borrow(), "a\n");
assert_eq!(c.location.code.start_line_number, line);
assert_eq!(c.location.code.source, Source::Unknown);
assert_eq!(c.location.range, 0..1);
});
assert_matches!(result, Ok(PeekChar::Char(c)) => {
assert_eq!(c.value, 'a');
assert_eq!(*c.location.code.value.borrow(), "a\n");
assert_eq!(c.location.code.start_line_number, line);
assert_eq!(c.location.code.source, Source::Unknown);
assert_eq!(c.location.range, 0..1);
});
lexer.consume_char();
let result = block_on(lexer.peek_char());
assert_matches!(result, Ok(PeekChar::Char(c)) => {
assert_eq!(c.value, '\n');
assert_eq!(*c.location.code.value.borrow(), "a\n");
assert_eq!(c.location.code.start_line_number, line);
assert_eq!(c.location.code.source, Source::Unknown);
assert_eq!(c.location.range, 1..2);
});
lexer.consume_char();
let result = block_on(lexer.peek_char());
assert_matches!(result, Ok(PeekChar::Char(c)) => {
assert_eq!(c.value, 'b');
assert_eq!(*c.location.code.value.borrow(), "a\nb");
assert_eq!(c.location.code.start_line_number.get(), 1);
assert_eq!(c.location.code.source, Source::Unknown);
assert_eq!(c.location.range, 2..3);
});
lexer.consume_char();
let result = block_on(lexer.peek_char());
assert_matches!(result, Ok(PeekChar::EndOfInput(location)) => {
assert_eq!(*location.code.value.borrow(), "a\nb");
assert_eq!(location.code.start_line_number.get(), 1);
assert_eq!(location.code.source, Source::Unknown);
assert_eq!(location.range, 3..3);
});
}
#[test]
#[should_panic(expected = "A character must have been peeked before being consumed: index=0")]
fn lexer_core_consume_char_panic() {
let input = Memory::new("a");
let line = NonZeroU64::new(1).unwrap();
let mut lexer = LexerCore::new(Box::new(input), line, Source::Unknown);
lexer.consume_char();
}
#[test]
fn lexer_core_peek_char_at() {
let input = Memory::new("a\nb");
let line = NonZeroU64::new(1).unwrap();
let mut lexer = LexerCore::new(Box::new(input), line, Source::Unknown);
let c0 = assert_matches!(block_on(lexer.peek_char()), Ok(PeekChar::Char(c)) => c.clone());
lexer.consume_char();
let c1 = assert_matches!(block_on(lexer.peek_char()), Ok(PeekChar::Char(c)) => c.clone());
lexer.consume_char();
let c2 = assert_matches!(block_on(lexer.peek_char()), Ok(PeekChar::Char(c)) => c.clone());
assert_eq!(lexer.peek_char_at(0), &c0);
assert_eq!(lexer.peek_char_at(1), &c1);
assert_eq!(lexer.peek_char_at(2), &c2);
}
#[test]
fn lexer_core_index() {
let input = Memory::new("a\nb");
let line = NonZeroU64::new(1).unwrap();
let mut lexer = LexerCore::new(Box::new(input), line, Source::Unknown);
assert_eq!(lexer.index(), 0);
block_on(lexer.peek_char()).unwrap();
assert_eq!(lexer.index(), 0);
lexer.consume_char();
assert_eq!(lexer.index(), 1);
block_on(lexer.peek_char()).unwrap();
lexer.consume_char();
assert_eq!(lexer.index(), 2);
block_on(lexer.peek_char()).unwrap();
lexer.consume_char();
assert_eq!(lexer.index(), 3);
}
#[test]
fn lexer_core_rewind_success() {
let input = Memory::new("abc");
let line = NonZeroU64::new(1).unwrap();
let mut lexer = LexerCore::new(Box::new(input), line, Source::Unknown);
lexer.rewind(0);
assert_eq!(lexer.index(), 0);
block_on(async {
let _ = lexer.peek_char().await;
lexer.consume_char();
let _ = lexer.peek_char().await;
lexer.consume_char();
lexer.rewind(0);
let result = lexer.peek_char().await;
assert_matches!(result, Ok(PeekChar::Char(c)) => {
assert_eq!(c.value, 'a');
assert_eq!(*c.location.code.value.borrow(), "abc");
assert_eq!(c.location.code.start_line_number, line);
assert_eq!(c.location.code.source, Source::Unknown);
assert_eq!(c.location.range, 0..1);
});
});
}
#[test]
#[should_panic(expected = "The new index 1 must not be larger than the current index 0")]
fn lexer_core_rewind_invalid_index() {
let input = Memory::new("abc");
let line = NonZeroU64::new(1).unwrap();
let mut lexer = LexerCore::new(Box::new(input), line, Source::Unknown);
lexer.rewind(1);
}
#[test]
fn lexer_core_source_string() {
let input = Memory::new("ab\ncd");
let line = NonZeroU64::new(1).unwrap();
let mut lexer = LexerCore::new(Box::new(input), line, Source::Unknown);
block_on(async {
for _ in 0..4 {
let _ = lexer.peek_char().await;
lexer.consume_char();
}
});
let result = lexer.source_string(1..4);
assert_eq!(result, "b\nc");
}
#[test]
#[should_panic(expected = "begin index 0 should be less than end index 0")]
fn lexer_core_substitute_alias_with_invalid_index() {
let input = Memory::new("a b");
let line = NonZeroU64::new(1).unwrap();
let mut lexer = LexerCore::new(Box::new(input), line, Source::Unknown);
let alias = Rc::new(Alias {
name: "a".to_string(),
replacement: "".to_string(),
global: false,
origin: Location::dummy("dummy"),
});
lexer.substitute_alias(0, &alias);
}
#[test]
fn lexer_core_substitute_alias_single_line_replacement() {
let input = Memory::new("a b");
let line = NonZeroU64::new(1).unwrap();
let mut lexer = LexerCore::new(Box::new(input), line, Source::Unknown);
let alias = Rc::new(Alias {
name: "a".to_string(),
replacement: "lex".to_string(),
global: false,
origin: Location::dummy("dummy"),
});
block_on(async {
let _ = lexer.peek_char().await;
lexer.consume_char();
lexer.substitute_alias(0, &alias);
assert_matches!(lexer.peek_char().await, Ok(PeekChar::Char(c)) => {
assert_eq!(c.value, 'l');
assert_eq!(*c.location.code.value.borrow(), "lex");
assert_eq!(c.location.code.start_line_number.get(), 1);
assert_matches!(&c.location.code.source,
Source::Alias { original, alias: alias2 } => {
assert_eq!(*original.code.value.borrow(), "a b");
assert_eq!(original.code.start_line_number, line);
assert_eq!(original.code.source, Source::Unknown);
assert_eq!(original.range, 0..1);
assert_eq!(alias2, &alias);
});
assert_eq!(c.location.range, 0..1);
});
lexer.consume_char();
assert_matches!(lexer.peek_char().await, Ok(PeekChar::Char(c)) => {
assert_eq!(c.value, 'e');
assert_eq!(*c.location.code.value.borrow(), "lex");
assert_eq!(c.location.code.start_line_number, line);
assert_matches!(&c.location.code.source,
Source::Alias { original, alias: alias2 } => {
assert_eq!(*original.code.value.borrow(), "a b");
assert_eq!(original.code.start_line_number, line);
assert_eq!(original.code.source, Source::Unknown);
assert_eq!(original.range, 0..1);
assert_eq!(alias2, &alias);
});
assert_eq!(c.location.range, 1..2);
});
lexer.consume_char();
assert_matches!(lexer.peek_char().await, Ok(PeekChar::Char(c)) => {
assert_eq!(c.value, 'x');
assert_eq!(*c.location.code.value.borrow(), "lex");
assert_eq!(c.location.code.start_line_number, line);
assert_matches!(&c.location.code.source,
Source::Alias { original, alias: alias2 } => {
assert_eq!(*original.code.value.borrow(), "a b");
assert_eq!(original.code.start_line_number, line);
assert_eq!(original.code.source, Source::Unknown);
assert_eq!(original.range, 0..1);
assert_eq!(alias2, &alias);
});
assert_eq!(c.location.range, 2..3);
});
lexer.consume_char();
assert_matches!(lexer.peek_char().await, Ok(PeekChar::Char(c)) => {
assert_eq!(c.value, ' ');
assert_eq!(*c.location.code.value.borrow(), "a b");
assert_eq!(c.location.code.start_line_number, line);
assert_eq!(c.location.code.source, Source::Unknown);
assert_eq!(c.location.range, 1..2);
});
lexer.consume_char();
});
}
#[test]
fn lexer_core_substitute_alias_multi_line_replacement() {
let input = Memory::new(" foo b");
let line = NonZeroU64::new(1).unwrap();
let mut lexer = LexerCore::new(Box::new(input), line, Source::Unknown);
let alias = Rc::new(Alias {
name: "foo".to_string(),
replacement: "x\ny".to_string(),
global: true,
origin: Location::dummy("loc"),
});
block_on(async {
for _ in 0..4 {
let _ = lexer.peek_char().await;
lexer.consume_char();
}
lexer.substitute_alias(1, &alias);
assert_matches!(lexer.peek_char().await, Ok(PeekChar::Char(c)) => {
assert_eq!(c.value, 'x');
assert_eq!(*c.location.code.value.borrow(), "x\ny");
assert_eq!(c.location.code.start_line_number, line);
assert_matches!(&c.location.code.source,
Source::Alias { original, alias: alias2 } => {
assert_eq!(*original.code.value.borrow(), " foo b");
assert_eq!(original.code.start_line_number, line);
assert_eq!(original.code.source, Source::Unknown);
assert_eq!(original.range, 1..4);
assert_eq!(alias2, &alias);
});
assert_eq!(c.location.range, 0..1);
});
lexer.consume_char();
assert_matches!(lexer.peek_char().await, Ok(PeekChar::Char(c)) => {
assert_eq!(c.value, '\n');
assert_eq!(*c.location.code.value.borrow(), "x\ny");
assert_eq!(c.location.code.start_line_number, line);
assert_matches!(&c.location.code.source,
Source::Alias { original, alias: alias2 } => {
assert_eq!(*original.code.value.borrow(), " foo b");
assert_eq!(original.code.start_line_number, line);
assert_eq!(original.code.source, Source::Unknown);
assert_eq!(original.range, 1..4);
assert_eq!(alias2, &alias);
});
assert_eq!(c.location.range, 1..2);
});
lexer.consume_char();
assert_matches!(lexer.peek_char().await, Ok(PeekChar::Char(c)) => {
assert_eq!(c.value, 'y');
assert_eq!(*c.location.code.value.borrow(), "x\ny");
assert_eq!(c.location.code.start_line_number, line);
assert_matches!(&c.location.code.source, Source::Alias { original, alias: alias2 } => {
assert_eq!(*original.code.value.borrow(), " foo b");
assert_eq!(original.code.start_line_number, line);
assert_eq!(original.code.source, Source::Unknown);
assert_eq!(original.range, 1..4);
assert_eq!(alias2, &alias);
});
assert_eq!(c.location.range, 2..3);
});
lexer.consume_char();
assert_matches!(lexer.peek_char().await, Ok(PeekChar::Char(c)) => {
assert_eq!(c.value, ' ');
assert_eq!(*c.location.code.value.borrow(), " foo b");
assert_eq!(c.location.code.start_line_number, line);
assert_eq!(c.location.code.source, Source::Unknown);
assert_eq!(c.location.range, 4..5);
});
lexer.consume_char();
});
}
#[test]
fn lexer_core_substitute_alias_empty_replacement() {
block_on(async {
let input = Memory::new("x ");
let line = NonZeroU64::new(1).unwrap();
let mut lexer = LexerCore::new(Box::new(input), line, Source::Unknown);
let alias = Rc::new(Alias {
name: "x".to_string(),
replacement: "".to_string(),
global: false,
origin: Location::dummy("dummy"),
});
let _ = lexer.peek_char().await;
lexer.consume_char();
lexer.substitute_alias(0, &alias);
assert_matches!(lexer.peek_char().await, Ok(PeekChar::Char(c)) => {
assert_eq!(c.value, ' ');
assert_eq!(*c.location.code.value.borrow(), "x ");
assert_eq!(c.location.code.start_line_number, line);
assert_eq!(c.location.code.source, Source::Unknown);
assert_eq!(c.location.range, 1..2);
});
});
}
#[test]
fn lexer_core_is_after_blank_ending_alias_index_0() {
let original = Location::dummy("original");
let alias = Rc::new(Alias {
name: "a".to_string(),
replacement: " ".to_string(),
global: false,
origin: Location::dummy("origin"),
});
let source = Source::Alias { original, alias };
let input = Memory::new("a");
let line = NonZeroU64::new(1).unwrap();
let lexer = LexerCore::new(Box::new(input), line, source);
assert!(!lexer.is_after_blank_ending_alias(0));
}
#[test]
fn lexer_core_is_after_blank_ending_alias_not_blank_ending() {
block_on(async {
let input = Memory::new("a x");
let line = NonZeroU64::new(1).unwrap();
let mut lexer = LexerCore::new(Box::new(input), line, Source::Unknown);
let alias = Rc::new(Alias {
name: "a".to_string(),
replacement: " b".to_string(),
global: false,
origin: Location::dummy("dummy"),
});
lexer.peek_char().await.unwrap();
lexer.consume_char();
lexer.substitute_alias(0, &alias);
assert!(!lexer.is_after_blank_ending_alias(0));
assert!(!lexer.is_after_blank_ending_alias(1));
assert!(!lexer.is_after_blank_ending_alias(2));
assert!(!lexer.is_after_blank_ending_alias(3));
});
}
#[test]
fn lexer_core_is_after_blank_ending_alias_blank_ending() {
block_on(async {
let input = Memory::new("a x");
let line = NonZeroU64::new(1).unwrap();
let mut lexer = LexerCore::new(Box::new(input), line, Source::Unknown);
let alias = Rc::new(Alias {
name: "a".to_string(),
replacement: " b ".to_string(),
global: false,
origin: Location::dummy("dummy"),
});
lexer.peek_char().await.unwrap();
lexer.consume_char();
lexer.substitute_alias(0, &alias);
assert!(!lexer.is_after_blank_ending_alias(0));
assert!(!lexer.is_after_blank_ending_alias(1));
assert!(!lexer.is_after_blank_ending_alias(2));
assert!(lexer.is_after_blank_ending_alias(3));
assert!(lexer.is_after_blank_ending_alias(4));
});
}
#[test]
fn lexer_with_empty_source() {
let mut lexer = Lexer::from_memory("", Source::Unknown);
assert_eq!(block_on(lexer.peek_char()), Ok(None));
}
#[test]
fn lexer_peek_char_with_line_continuation_enabled_stopping_on_non_backslash() {
let mut lexer = Lexer::from_memory("\\\n\n\\", Source::Unknown);
assert_eq!(block_on(lexer.peek_char()), Ok(Some('\n')));
assert_eq!(lexer.index(), 2);
}
#[test]
fn lexer_peek_char_with_line_continuation_enabled_stopping_on_non_newline() {
let mut lexer = Lexer::from_memory("\\\n\\\n\\\n\\\\", Source::Unknown);
assert_eq!(block_on(lexer.peek_char()), Ok(Some('\\')));
assert_eq!(lexer.index(), 6);
}
#[test]
fn lexer_peek_char_with_line_continuation_disabled() {
let mut lexer = Lexer::from_memory("\\\n\\\n\\\\", Source::Unknown);
let mut lexer = lexer.disable_line_continuation();
assert_eq!(block_on(lexer.peek_char()), Ok(Some('\\')));
assert_eq!(lexer.index(), 0);
}
#[test]
fn lexer_flush() {
block_on(async {
let mut lexer = Lexer::from_memory(" \n\n\t\n", Source::Unknown);
let location_1 = lexer.location().await.unwrap().clone();
assert_eq!(*location_1.code.value.borrow(), " \n");
lexer.consume_char();
lexer.peek_char().await.unwrap();
lexer.consume_char();
lexer.peek_char().await.unwrap();
lexer.consume_char();
lexer.flush();
lexer.peek_char().await.unwrap();
lexer.consume_char();
let location_2 = lexer.location().await.unwrap().clone();
assert_eq!(*location_1.code.value.borrow(), " \n\n");
assert_eq!(location_1.code.start_line_number.get(), 1);
assert_eq!(location_1.code.source, Source::Unknown);
assert_eq!(location_1.range, 0..1);
assert_eq!(*location_2.code.value.borrow(), "\t\n");
assert_eq!(location_2.code.start_line_number.get(), 3);
assert_eq!(location_2.code.source, Source::Unknown);
assert_eq!(location_2.range, 1..2);
});
}
#[test]
fn lexer_consume_char_if() {
let mut lexer = Lexer::from_memory("word\n", Source::Unknown);
let mut called = 0;
let c = block_on(lexer.consume_char_if(|c| {
assert_eq!(c, 'w');
called += 1;
true
}))
.unwrap()
.unwrap();
assert_eq!(called, 1);
assert_eq!(c.value, 'w');
assert_eq!(*c.location.code.value.borrow(), "word\n");
assert_eq!(c.location.code.start_line_number.get(), 1);
assert_eq!(c.location.code.source, Source::Unknown);
assert_eq!(c.location.range, 0..1);
let mut called = 0;
let r = block_on(lexer.consume_char_if(|c| {
assert_eq!(c, 'o');
called += 1;
false
}));
assert_eq!(called, 1);
assert_eq!(r, Ok(None));
let mut called = 0;
let r = block_on(lexer.consume_char_if(|c| {
assert_eq!(c, 'o');
called += 1;
false
}));
assert_eq!(called, 1);
assert_eq!(r, Ok(None));
let mut called = 0;
let c = block_on(lexer.consume_char_if(|c| {
assert_eq!(c, 'o');
called += 1;
true
}))
.unwrap()
.unwrap();
assert_eq!(called, 1);
assert_eq!(c.value, 'o');
assert_eq!(*c.location.code.value.borrow(), "word\n");
assert_eq!(c.location.code.start_line_number.get(), 1);
assert_eq!(c.location.code.source, Source::Unknown);
assert_eq!(c.location.range, 1..2);
block_on(lexer.consume_char_if(|c| {
assert_eq!(c, 'r');
true
}))
.unwrap()
.unwrap();
block_on(lexer.consume_char_if(|c| {
assert_eq!(c, 'd');
true
}))
.unwrap()
.unwrap();
block_on(lexer.consume_char_if(|c| {
assert_eq!(c, '\n');
true
}))
.unwrap()
.unwrap();
let r = block_on(lexer.consume_char_if(|c| {
unreachable!("unexpected call to the decider function: argument={}", c)
}));
assert_eq!(r, Ok(None));
}
#[test]
fn lexer_location_range_with_empty_range() {
let mut lexer = Lexer::from_memory("", Source::Unknown);
block_on(lexer.peek_char()).unwrap();
let location = lexer.location_range(0..0);
assert_eq!(*location.code.value.borrow(), "");
assert_eq!(location.code.start_line_number.get(), 1);
assert_eq!(location.code.source, Source::Unknown);
assert_eq!(location.range, 0..0);
}
#[test]
fn lexer_location_range_with_nonempty_range() {
block_on(async {
let mut lexer = Lexer::from_memory("cat foo", Source::Stdin);
for _ in 0..4 {
lexer.peek_char().await.unwrap();
lexer.consume_char();
}
lexer.peek_char().await.unwrap();
let location = lexer.location_range(1..4);
assert_eq!(*location.code.value.borrow(), "cat foo");
assert_eq!(location.code.start_line_number.get(), 1);
assert_eq!(location.code.source, Source::Stdin);
assert_eq!(location.range, 1..4);
})
}
#[test]
fn lexer_location_range_with_range_starting_at_end() {
block_on(async {
let mut lexer = Lexer::from_memory("cat", Source::Stdin);
for _ in 0..3 {
lexer.peek_char().await.unwrap();
lexer.consume_char();
}
lexer.peek_char().await.unwrap();
let location = lexer.location_range(3..3);
assert_eq!(*location.code.value.borrow(), "cat");
assert_eq!(location.code.start_line_number.get(), 1);
assert_eq!(location.code.source, Source::Stdin);
assert_eq!(location.range, 3..3);
})
}
#[test]
#[should_panic]
fn lexer_location_range_with_unconsumed_code() {
let lexer = Lexer::from_memory("echo ok", Source::Unknown);
let _ = lexer.location_range(0..0);
}
#[test]
#[should_panic(expected = "The index 1 must not be larger than the current index 0")]
fn lexer_location_range_with_range_out_of_bounds() {
let lexer = Lexer::from_memory("", Source::Unknown);
let _ = lexer.location_range(1..2);
}
#[test]
fn lexer_location_range_with_alias_substitution() {
block_on(async {
let mut lexer = Lexer::from_memory(" a;", Source::Unknown);
let alias_def = Rc::new(Alias {
name: "a".to_string(),
replacement: "abc".to_string(),
global: false,
origin: Location::dummy("dummy"),
});
for _ in 0..2 {
lexer.peek_char().await.unwrap();
lexer.consume_char();
}
lexer.substitute_alias(1, &alias_def);
for _ in 1..5 {
lexer.peek_char().await.unwrap();
lexer.consume_char();
}
let location = lexer.location_range(2..5);
assert_eq!(*location.code.value.borrow(), "abc");
assert_eq!(location.code.start_line_number.get(), 1);
assert_matches!(&location.code.source, Source::Alias { original, alias } => {
assert_eq!(*original.code.value.borrow(), " a;");
assert_eq!(original.code.start_line_number.get(), 1);
assert_eq!(original.code.source, Source::Unknown);
assert_eq!(original.range, 1..2);
assert_eq!(alias, &alias_def);
});
assert_eq!(location.range, 1..3);
})
}
#[test]
fn lexer_inner_program_success() {
let mut lexer = Lexer::from_memory("x y )", Source::Unknown);
let source = block_on(lexer.inner_program()).unwrap();
assert_eq!(source, "x y ");
}
#[test]
fn lexer_inner_program_failure() {
let mut lexer = Lexer::from_memory("<< )", Source::Unknown);
let e = block_on(lexer.inner_program()).unwrap_err();
assert_eq!(
e.cause,
ErrorCause::Syntax(SyntaxError::MissingHereDocDelimiter)
);
assert_eq!(*e.location.code.value.borrow(), "<< )");
assert_eq!(e.location.code.start_line_number.get(), 1);
assert_eq!(e.location.code.source, Source::Unknown);
assert_eq!(e.location.range, 3..4);
}
}