use std::fmt;
use std::str;
#[derive(Debug, PartialEq)]
pub struct ParseError {
pub line_number: usize,
pub message: String,
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(f, "parse error (L{}): {}", self.line_number, self.message)
}
}
impl std::error::Error for ParseError {}
#[inline]
fn is_whitespace_except_newline(c: u8) -> bool {
c == b' ' || c == b'\t' || c == b'\r'
}
#[inline]
fn is_whitespace(c: u8) -> bool {
is_whitespace_except_newline(c) || c == b'\n'
}
#[derive(Clone)]
pub(crate) struct Lexer<'a> {
bytes: &'a [u8],
read_pos: usize,
current_line_number: usize,
}
impl<'a> Lexer<'a> {
pub(crate) fn new(input: &'a str) -> Lexer<'a> {
Lexer {
bytes: input.as_bytes(),
read_pos: 0,
current_line_number: 1,
}
}
fn advance(&mut self) {
if let Some(b'\n') = self.peek() {
self.current_line_number += 1;
}
self.read_pos += 1;
}
fn peek(&self) -> Option<&u8> {
self.bytes.get(self.read_pos)
}
pub(crate) fn bytes_consumed(&self, checkpoint: &Self) -> Option<usize> {
if self.read_pos < checkpoint.read_pos {
None
} else {
Some(self.read_pos - checkpoint.read_pos)
}
}
fn skip_while<F: Fn(u8) -> bool>(&mut self, is_true: F) -> bool {
let mut was_anything_skipped = false;
loop {
match self.peek() {
None => break,
Some(&c) if !is_true(c) => break,
_ => {
self.advance();
was_anything_skipped = true;
}
}
}
debug_assert!(self.peek().map(|&c| !is_true(c)).unwrap_or(true));
was_anything_skipped
}
fn skip_unless<F: Fn(u8) -> bool>(&mut self, is_false: F) -> bool {
self.skip_while(|c| !is_false(c))
}
fn skip_comment(&mut self) -> bool {
match self.peek() {
Some(b'#') => {
self.skip_unless(|c| c == b'\n');
true
}
_ => false,
}
}
fn skip_whitespace_except_newline(&mut self) -> bool {
self.skip_while(is_whitespace_except_newline)
}
fn next_word(&mut self) -> Option<&'a [u8]> {
self.skip_whitespace_except_newline();
self.skip_comment();
let start_ptr = self.read_pos;
match self.peek() {
Some(b'\n') => {
self.advance();
self.bytes.get(start_ptr..self.read_pos) }
Some(_) => {
if self.skip_unless(|c| is_whitespace(c) || c == b'#') {
self.bytes.get(start_ptr..self.read_pos)
} else {
None
}
}
None => None,
}
}
}
#[derive(Clone)]
pub(crate) struct PeekableLexer<'a> {
inner: Lexer<'a>,
peeked: Option<Option<&'a str>>,
}
impl<'a> PeekableLexer<'a> {
pub(crate) fn new(lexer: Lexer<'a>) -> Self {
Self {
inner: lexer,
peeked: None,
}
}
pub(crate) fn next_str(&mut self) -> Option<&'a str> {
match self.peeked.take() {
Some(v) => v,
None => self
.inner
.next_word()
.map(|buf| unsafe { str::from_utf8_unchecked(buf) }),
}
}
pub(crate) fn peek_str(&mut self) -> Option<&'a str> {
match self.peeked {
Some(v) => v,
None => {
let peek = self
.inner
.next_word()
.map(|buf| unsafe { str::from_utf8_unchecked(buf) });
self.peeked.replace(peek);
peek
}
}
}
pub(crate) fn bytes_consumed(&self, checkpoint: &Self) -> Option<usize> {
self.inner.bytes_consumed(&checkpoint.inner)
}
}
#[test]
fn test_next_word() {
let mut l = Lexer::new("hello wor\rld\n this# is\r\na \t test\n");
assert_eq!(l.next_word(), Some(&b"hello"[..]));
assert_eq!(l.current_line_number, 1);
assert_eq!(l.next_word(), Some(&b"wor"[..]));
assert_eq!(l.current_line_number, 1);
assert_eq!(l.next_word(), Some(&b"ld"[..]));
assert_eq!(l.current_line_number, 1);
assert_eq!(l.next_word(), Some(&b"\n"[..]));
assert_eq!(l.current_line_number, 2);
assert_eq!(l.next_word(), Some(&b"this"[..]));
assert_eq!(l.current_line_number, 2);
assert_eq!(l.next_word(), Some(&b"\n"[..]));
assert_eq!(l.current_line_number, 3);
assert_eq!(l.next_word(), Some(&b"a"[..]));
assert_eq!(l.current_line_number, 3);
assert_eq!(l.next_word(), Some(&b"test"[..]));
assert_eq!(l.current_line_number, 3);
assert_eq!(l.next_word(), Some(&b"\n"[..]));
assert_eq!(l.current_line_number, 4);
assert_eq!(l.next_word(), None);
}