use crate::lexer::coords::{Coords, Span};
use crate::lexer::errors::{ParserError, ParserErrorDetails, ParserErrorSource, ParserResult};
use crate::lexer_input_error;
pub struct CharWithCoords {
pub ch: char,
pub coords: Coords,
}
pub struct StringWithSpan {
pub str: String,
pub span: Span,
}
macro_rules! clone_char_with_coords {
($src : expr) => {
CharWithCoords {
ch: $src.ch,
coords: $src.coords.clone(),
}
};
}
#[derive()]
pub struct LexerInput<'a> {
lookahead: Option<char>,
chars: &'a mut dyn Iterator<Item = char>,
position: Coords,
buffer: Vec<CharWithCoords>,
pushbacks: Vec<CharWithCoords>,
}
impl<'a> LexerInput<'a> {
pub fn new(chars: &'a mut dyn Iterator<Item = char>) -> Self {
LexerInput {
lookahead: None,
chars,
position: Coords::default(),
buffer: vec![],
pushbacks: vec![],
}
}
pub fn clear(&mut self) {
self.buffer = vec![];
}
pub fn pushback(&mut self) {
if !self.buffer.is_empty() {
let last = self.buffer.remove(self.buffer.len() - 1);
self.pushbacks.push(last);
}
}
pub fn position(&self) -> Coords {
self.position.clone()
}
pub fn front(&self) -> Option<CharWithCoords> {
return if !self.buffer.is_empty() {
Some(clone_char_with_coords!(self.buffer.last().unwrap()))
} else {
None
};
}
pub fn back(&self) -> Option<CharWithCoords> {
return if !self.buffer.is_empty() {
Some(clone_char_with_coords!(self.buffer.first().unwrap()))
} else {
None
};
}
pub fn advance(&mut self, skip_whitespace: bool) -> ParserResult<()> {
if skip_whitespace {
self.skip_whitespace()?;
}
if !self.pushbacks.is_empty() {
self.buffer.push(self.pushbacks.pop().unwrap());
self.position = self.buffer.last().unwrap().coords;
return Ok(());
}
return match self.next_char() {
Some(next) => {
self.inc_position(false);
match next {
(ch, Some(coords)) => self.buffer.push(CharWithCoords { ch, coords }),
(ch, None) => self.buffer.push(CharWithCoords {
ch,
coords: self.position,
}),
}
Ok(())
}
None => lexer_input_error!(ParserErrorDetails::EndOfInput, self.position),
};
}
fn try_lookahead(&mut self) -> Option<char> {
self.lookahead = self.chars.next();
self.lookahead
}
fn clear_lookahead(&mut self) {
self.lookahead = None;
}
fn skip_whitespace(&mut self) -> ParserResult<()> {
loop {
let next = self.next_char();
match next {
Some((ch, _)) => match ch.is_whitespace() {
true => match ch {
'\r' => {
self.inc_position(true);
match self.try_lookahead() {
Some(la) => match la {
'\n' => {
self.inc_position(false);
self.clear_lookahead();
}
_ => {
self.inc_position(false);
self.pushbacks.push(CharWithCoords {
ch: la,
coords: self.position,
})
}
},
None => {
return lexer_input_error!(
ParserErrorDetails::EndOfInput,
self.position
);
}
}
}
'\n' => self.inc_position(true),
_ => self.inc_position(false),
},
false => {
self.inc_position(false);
self.pushbacks.push(CharWithCoords {
ch,
coords: self.position,
});
return Ok(());
}
},
None => {
return lexer_input_error!(ParserErrorDetails::EndOfInput, self.position);
}
}
}
}
fn next_char(&mut self) -> Option<(char, Option<Coords>)> {
if !self.pushbacks.is_empty() {
let popped = self.pushbacks.pop().unwrap();
return Some((popped.ch, Some(popped.coords)));
}
match self.lookahead {
Some(ch) => {
self.lookahead = None;
Some((ch, None))
}
None => self.chars.next().map(|ch| (ch, None)),
}
}
pub fn advance_n(&mut self, n: usize, skip_whitespace: bool) -> ParserResult<()> {
for _ in 0..n {
self.advance(skip_whitespace)?;
}
Ok(())
}
fn inc_position(&mut self, newline: bool) {
if self.position.line == 0 {
self.position.line = 1
}
self.position.absolute += 1;
match newline {
true => {
self.position.column = 0;
self.position.line += 1;
}
false => {
self.position.column += 1;
}
}
}
pub fn buffer_as_string_with_span(&mut self) -> StringWithSpan {
return if !self.buffer.is_empty() {
let mut s = String::with_capacity(self.buffer.len());
self.buffer.iter().for_each(|cwc| s.push(cwc.ch));
StringWithSpan {
str: s,
span: Span {
start: self.back().unwrap().coords,
end: self.front().unwrap().coords,
},
}
} else {
StringWithSpan {
str: String::new(),
span: Span {
start: self.position,
end: self.position,
},
}
};
}
pub fn buffer_as_char_array(&mut self) -> Vec<char> {
return if !self.buffer.is_empty() {
let mut arr: Vec<char> = vec![];
self.buffer.iter().for_each(|cwc| arr.push(cwc.ch));
arr
} else {
vec![]
};
}
pub fn buffer_as_byte_array(&self) -> Vec<u8> {
return if !self.buffer.is_empty() {
self.buffer.iter().map(|cwc| cwc.ch as u8).collect()
} else {
vec![]
};
}
}
#[cfg(test)]
mod test {
use std::io::BufReader;
use chisel_decoders::utf8::Utf8Decoder;
use crate::lexer::lexer_input::LexerInput;
use crate::reader_from_bytes;
#[test]
fn should_create_new() {
let mut reader = reader_from_bytes!("{}[],:");
let mut decoder = Utf8Decoder::new(&mut reader);
let _ = LexerInput::new(&mut decoder);
}
#[test]
fn should_consume_single_lines_correctly() {
let mut reader = reader_from_bytes!("this is a test line");
let mut decoder = Utf8Decoder::new(&mut reader);
let mut input = LexerInput::new(&mut decoder);
let result = input.advance(true);
assert!(result.is_ok());
assert_eq!(input.front().unwrap().ch, 't');
for _ in 1..5 {
let result = input.advance(true);
assert!(result.is_ok());
}
assert_eq!(input.front().unwrap().ch, 'i');
assert_eq!(input.front().unwrap().coords.column, 6);
input.clear();
for _ in 1..5 {
let result = input.advance(false);
assert!(result.is_ok());
}
assert_eq!(input.front().unwrap().ch, ' ');
assert_eq!(input.front().unwrap().coords.column, 10)
}
}