use crate::decoder::Decode;
use crate::decoder::Decoder;
use crate::errors::syntax_error;
use crate::errors::ParseResult;
use crate::loc::Loc;
use crate::numbers::NumberParser;
use crate::parsers::comments::CommentParser;
use crate::parsers::keyword::KeywordParser;
use crate::strings::StringParser;
use std::cmp::Ordering;
use std::io::Read;
use std::io::Result;
use std::path::PathBuf;
pub const MAX_LOOKAHEAD: usize = 64 * 1024;
pub const PEEK_LIMIT: usize = 1000;
pub const EOF_LIMIT: usize = 1000;
#[cfg(not(feature = "no_tracking"))]
macro_rules! rc_update {
($self: ident, $ch: expr) => {
if $ch == '\n' {
$self.column = 1;
$self.line += 1;
} else {
$self.column += 1;
}
};
}
#[cfg(feature = "no_tracking")]
macro_rules! rc_update {
($self: ident, $ch: expr) => {};
}
#[cfg(not(feature = "no_tracking"))]
macro_rules! rc_chunk {
($self: ident, $start: expr, $end: expr) => {
for index in ($start..$end) {
rc_update!($self, $self.unwind_buffer[index]);
}
};
}
#[cfg(feature = "no_tracking")]
macro_rules! rc_chunk {
($self: ident, $start: expr, $end: expr) => {};
}
#[cfg(not(feature = "no_stall_detection"))]
macro_rules! check_peek_stall {
($self: ident) => {
$self.peek_count += 1;
if $self.peek_count > PEEK_LIMIT {
panic!("Exceeded peek limit; parsing has stalled. This is almost certainly an internal error.");
}
};
}
#[cfg(feature = "no_stall_detection")]
macro_rules! check_peek_stall {
($self: ident) => {};
}
#[cfg(not(feature = "no_stall_detection"))]
macro_rules! check_eof_stall {
($self: ident) => {
$self._eof_peek_count += 1;
if $self._eof_peek_count > EOF_LIMIT {
panic!("Exceeded EOF limit; parsing has stalled. This is almost certainly an internal error.");
}
};
}
#[cfg(feature = "no_stall_detection")]
macro_rules! check_eof_stall {
($self: ident) => {};
}
pub fn parse_from_bytes(source: &[u8]) -> Parser {
let decoder = Decode::new(source.to_vec());
Parser::new("<bytes>", decoder)
}
pub fn parse_from_string(source: &str) -> Parser {
let decoder = Decode::new(source.bytes().collect());
Parser::new("<string>", decoder)
}
#[cfg(not(tarpaulin_include))]
pub fn parse_from_stdin() -> Parser {
let mut bytes = vec![];
let _ = std::io::stdin().read_to_end(&mut bytes);
let decoder = Decode::new(bytes);
Parser::new("", decoder)
}
pub fn parse_from_path(path: &PathBuf) -> Result<Parser> {
let bytes = std::fs::read(path)?;
let decoder = Decode::new(bytes);
let parser = Parser::new(path.to_str().unwrap(), decoder);
Ok(parser)
}
pub struct ParserCore {
name: String,
line: usize,
column: usize,
decoder: Box<dyn Decoder>,
at_eof: bool,
peek_count: usize,
_eof_peek_count: usize,
unwind_buffer: Box<[char; MAX_LOOKAHEAD]>,
next: usize,
len: usize,
next_char: char,
whitespace: Box<dyn Fn(char) -> bool>,
}
impl ParserCore {
pub fn new<D: Decoder + 'static>(name: &str, decoder: D) -> Self {
let mut dec = Box::new(decoder);
let value = dec.next();
match value {
None => {
ParserCore {
name: name.to_string(),
line: 1,
column: 1,
decoder: dec,
at_eof: true,
peek_count: 0,
_eof_peek_count: 0,
unwind_buffer: Box::new(['\0'; MAX_LOOKAHEAD]),
next: 0,
len: 0,
next_char: '\0',
whitespace: Box::new(char::is_whitespace),
}
}
Some(ch) => {
let mut buffer = ['\0'; MAX_LOOKAHEAD];
buffer[0] = ch;
ParserCore {
name: name.to_string(),
line: 1,
column: 1,
decoder: dec,
at_eof: false,
peek_count: 0,
_eof_peek_count: 0,
unwind_buffer: Box::new(buffer),
next: 0,
len: 1,
next_char: ch,
whitespace: Box::new(char::is_whitespace),
}
}
}
}
pub fn loc(&self) -> Loc {
if self.name.is_empty() {
Loc::Console {
line: self.line,
column: self.column,
}
} else {
Loc::File {
name: self.name.clone(),
column: self.column,
line: self.line,
}
}
}
pub fn get_column_number(&self) -> usize {
self.column
}
pub fn get_line_number(&self) -> usize {
self.line
}
pub fn replace_whitespace_test(
&mut self,
test: Box<dyn Fn(char) -> bool>,
) -> Box<dyn Fn(char) -> bool> {
std::mem::replace(&mut self.whitespace, test)
}
fn reset(&mut self) {
match self.decoder.next() {
None => {
self.at_eof = true;
self.next_char = '\0';
self.len = 0;
self.next = 0;
self.at_eof = true;
}
Some(ch) => {
self.next_char = ch;
self.len = 1;
self.next = 0;
self.unwind_buffer[0] = ch;
}
}
}
#[inline(always)]
pub fn is_at_eof(&self) -> bool {
self.at_eof
}
#[inline]
pub fn peek(&mut self) -> char {
check_peek_stall!(self);
self.next_char
}
pub fn consume(&mut self) {
self.peek_count = 0;
if self.len > 0 {
rc_update!(self, self.unwind_buffer[self.next]);
if self.len == 1 {
self.reset();
} else {
self.len -= 1;
self.next += 1;
self.next_char = self.unwind_buffer[self.next];
}
} else {
check_eof_stall!(self);
}
}
pub fn peek_offset(&mut self, n: usize) -> char {
check_peek_stall!(self);
if n == 0 {
return self.next_char;
}
if n >= MAX_LOOKAHEAD {
'\0'
} else {
if self.len <= n {
let count = self.decoder.fill_n(
n + 1 - self.len,
&mut self.unwind_buffer[self.next + self.len..self.next + n + 1],
);
self.len += count;
}
if n < self.len {
self.unwind_buffer[self.next + n]
} else {
'\0'
}
}
}
pub fn peek_n_vec(&mut self, n: usize) -> Vec<char> {
check_peek_stall!(self);
if self.len >= n {
return self.unwind_buffer[self.next..(self.next + n)].to_vec();
}
let count = self.decoder.fill_n(
n - self.len,
&mut self.unwind_buffer[self.next + self.len..self.next + n],
);
self.len += count;
self.unwind_buffer[self.next..(self.next + self.len)].to_vec()
}
pub fn peek_n(&mut self, n: usize) -> String {
check_peek_stall!(self);
if self.len >= n {
return self.unwind_buffer[self.next..(self.next + n)]
.iter()
.collect();
}
let count = self.decoder.fill_n(
n - self.len,
&mut self.unwind_buffer[self.next + self.len..self.next + n],
);
self.len += count;
self.unwind_buffer[self.next..(self.next + self.len)]
.iter()
.collect()
}
pub fn consume_n(&mut self, n: usize) {
if self.len > 0 {
self.peek_count = 0;
match self.len.cmp(&n) {
Ordering::Equal => {
rc_chunk!(self, self.next, self.next + n);
self.reset();
}
Ordering::Less => {
rc_chunk!(self, self.next, self.next + n);
for _ in self.len..n {
if let Some(_ch) = self.decoder.next() {
rc_update!(self, _ch);
};
}
self.reset();
}
Ordering::Greater => {
rc_chunk!(self, self.next, self.next + n);
self.next += n;
self.len -= n;
self.next_char = self.unwind_buffer[self.next];
}
}
} else {
check_eof_stall!(self);
}
}
pub fn peek_chars(&mut self, chars: &[char]) -> bool {
check_peek_stall!(self);
let n = chars.len();
if n == 0 {
return true;
}
if self.len == 0 {
return false;
}
if self.len < n {
let count = self.decoder.fill_n(
n - self.len,
&mut self.unwind_buffer[self.next + self.len..self.next + n],
);
self.len += count;
if self.len < n {
return false;
}
}
#[allow(clippy::needless_range_loop)]
for index in 0..n {
if self.unwind_buffer[self.next + index] != chars[index] {
return false;
}
}
true
}
#[inline]
pub fn peek_and_consume(&mut self, ch: char) -> bool {
if self.peek() == ch {
self.consume();
true
} else {
false
}
}
pub fn peek_and_consume_chars(&mut self, chars: &[char]) -> bool {
if self.peek_chars(chars) {
self.consume_n(chars.len());
true
} else {
false
}
}
pub fn consume_ws_only(&mut self) -> bool {
let mut result = false;
while !self.at_eof {
if (self.whitespace)(self.next_char) {
self.consume();
result = true;
} else {
break;
}
}
result
}
pub fn take_until(&mut self, token: &str) -> String {
let chars = token.chars().collect::<Vec<char>>();
let mut value = String::new();
while !self.at_eof && !self.peek_and_consume_chars(&chars) {
value.push(self.unwind_buffer[self.next]);
self.consume();
}
value
}
pub fn take_while<T: Fn(char) -> bool>(&mut self, include: T) -> String {
let mut value = String::new();
while !self.at_eof && include(self.next_char) {
value.push(self.next_char);
self.consume();
}
value
}
pub fn take_while_unless<T: Fn(char) -> bool, U: Fn(char) -> bool>(
&mut self,
include: T,
exclude: U,
) -> String {
let mut value = String::new();
while !self.at_eof {
if exclude(self.next_char) {
self.consume();
} else if include(self.next_char) {
value.push(self.next_char);
self.consume();
} else {
break;
}
}
value
}
pub fn take<S, K>(&mut self, skip: S, stop: K) -> (Vec<char>, Option<char>)
where
S: Fn(char) -> bool,
K: Fn(char) -> bool,
{
let mut kept = vec![];
while !self.at_eof {
let ch = self.next_char;
if skip(ch) {
self.consume();
} else if stop(ch) {
return (kept, Some(ch));
} else {
self.consume();
kept.push(ch);
}
}
(kept, None)
}
pub fn consume_while<T: Fn(char) -> bool>(&mut self, include: T) -> bool {
let mut retval = false;
while !self.at_eof && include(self.next_char) {
self.consume();
retval = true;
}
retval
}
pub fn consume_until(&mut self, token: &str) -> bool {
let chars = token.chars().collect::<Vec<char>>();
let mut retval = false;
while !self.at_eof && !self.peek_and_consume_chars(&chars) {
self.consume();
retval = true;
}
retval
}
}
pub struct Parser {
core: Box<ParserCore>,
comment_parser: CommentParser,
string_parser: StringParser,
number_parser: NumberParser,
keyword_parser: KeywordParser,
pub parse_comments: bool,
}
impl Parser {
pub fn new(name: &str, decoder: Decode) -> Self {
Parser {
core: Box::new(ParserCore::new(name, decoder)),
comment_parser: CommentParser::new(),
string_parser: StringParser::new(),
number_parser: NumberParser::new(),
keyword_parser: KeywordParser::new(),
parse_comments: true,
}
}
pub fn borrow_core(&mut self) -> &mut ParserCore {
&mut self.core
}
pub fn borrow_comment_parser(&mut self) -> &mut CommentParser {
&mut self.comment_parser
}
pub fn replace_comment_parser(&mut self, compar: CommentParser) -> CommentParser {
std::mem::replace(&mut self.comment_parser, compar)
}
pub fn borrow_number_parser(&mut self) -> &mut NumberParser {
&mut self.number_parser
}
pub fn replace_number_parser(&mut self, numpar: NumberParser) -> NumberParser {
std::mem::replace(&mut self.number_parser, numpar)
}
pub fn borrow_string_parser(&mut self) -> &mut StringParser {
&mut self.string_parser
}
pub fn replace_string_parser(&mut self, strpar: StringParser) -> StringParser {
std::mem::replace(&mut self.string_parser, strpar)
}
pub fn borrow_keyword_parser(&mut self) -> &mut KeywordParser {
&mut self.keyword_parser
}
pub fn replace_keyword_parser(&mut self, strpar: KeywordParser) -> KeywordParser {
std::mem::replace(&mut self.keyword_parser, strpar)
}
pub fn parse_string_match_delimiter(&mut self) -> ParseResult<String> {
if self.is_at_eof() {
return Ok("".to_string());
}
let delimiter = self.peek();
self.consume();
self.string_parser.process(&mut self.core, Some(delimiter))
}
pub fn parse_string_until_delimiter(&mut self, delimiter: char) -> ParseResult<String> {
self.string_parser.process(&mut self.core, Some(delimiter))
}
pub fn parse_string_match_delimiter_ws(&mut self) -> ParseResult<String> {
let result = self.parse_string_match_delimiter();
self.consume_ws();
result
}
pub fn parse_string(&self, string: &str) -> ParseResult<String> {
self.string_parser.parse_string(string)
}
pub fn parse_string_until_delimiter_ws(&mut self, terminator: char) -> ParseResult<String> {
let string = self.string_parser.process(&mut self.core, Some(terminator));
self.consume_ws();
string
}
pub fn parse_u128(&mut self) -> ParseResult<u128> {
self.number_parser.parse_u128(&mut self.core)
}
pub fn parse_i128(&mut self) -> ParseResult<i128> {
self.number_parser.parse_i128(&mut self.core)
}
pub fn parse_u64(&mut self) -> ParseResult<u64> {
self.number_parser.parse_u64(&mut self.core)
}
pub fn parse_i64(&mut self) -> ParseResult<i64> {
self.number_parser.parse_i64(&mut self.core)
}
pub fn parse_f64(&mut self) -> ParseResult<f64> {
self.number_parser.parse_f64(&mut self.core)
}
pub fn parse_f64_decimal(&mut self) -> ParseResult<f64> {
self.number_parser.parse_f64_decimal(&mut self.core)
}
pub fn parse_u128_ws(&mut self) -> ParseResult<u128> {
let result = self.number_parser.parse_u128(&mut self.core);
self.consume_ws();
result
}
pub fn parse_i128_ws(&mut self) -> ParseResult<i128> {
let result = self.number_parser.parse_i128(&mut self.core);
self.consume_ws();
result
}
pub fn parse_u64_ws(&mut self) -> ParseResult<u64> {
let result = self.number_parser.parse_u64(&mut self.core);
self.consume_ws();
result
}
pub fn parse_i64_ws(&mut self) -> ParseResult<i64> {
let result = self.number_parser.parse_i64(&mut self.core);
self.consume_ws();
result
}
pub fn parse_f64_ws(&mut self) -> ParseResult<f64> {
let result = self.number_parser.parse_f64(&mut self.core);
self.consume_ws();
result
}
pub fn parse_f64_decimal_ws(&mut self) -> ParseResult<f64> {
let result = self.number_parser.parse_f64_decimal(&mut self.core);
self.consume_ws();
result
}
pub fn parse_keyword(&mut self) -> ParseResult<String> {
self.keyword_parser.parse(&mut self.core)
}
pub fn parse_keyword_ws(&mut self) -> ParseResult<String> {
let result = self.keyword_parser.parse(&mut self.core);
self.consume_ws();
result
}
#[inline(always)]
pub fn loc(&self) -> Loc {
self.core.loc()
}
#[inline(always)]
pub fn is_at_eof(&self) -> bool {
self.core.is_at_eof()
}
#[inline(always)]
pub fn peek(&mut self) -> char {
self.core.peek()
}
#[inline(always)]
pub fn consume(&mut self) {
self.core.consume()
}
#[inline(always)]
pub fn peek_offset(&mut self, n: usize) -> char {
self.core.peek_offset(n)
}
#[inline(always)]
pub fn peek_n(&mut self, n: usize) -> String {
self.core.peek_n(n)
}
#[inline(always)]
pub fn peek_n_vec(&mut self, n: usize) -> Vec<char> {
self.core.peek_n_vec(n)
}
#[inline(always)]
pub fn consume_n(&mut self, n: usize) {
self.core.consume_n(n)
}
#[inline(always)]
pub fn peek_chars(&mut self, chars: &[char]) -> bool {
self.core.peek_chars(chars)
}
#[inline(always)]
pub fn peek_and_consume_chars(&mut self, chars: &[char]) -> bool {
self.core.peek_and_consume_chars(chars)
}
#[inline(always)]
pub fn take_until(&mut self, token: &str) -> String {
self.core.take_until(token)
}
#[inline(always)]
pub fn take_while<T: Fn(char) -> bool>(&mut self, include: T) -> String {
self.core.take_while(include)
}
#[inline(always)]
pub fn take_while_unless<T: Fn(char) -> bool, U: Fn(char) -> bool>(
&mut self,
include: T,
exclude: U,
) -> String {
self.core.take_while_unless(include, exclude)
}
#[inline]
pub fn take<S, K>(&mut self, skip: S, stop: K) -> (Vec<char>, Option<char>)
where
S: Fn(char) -> bool,
K: Fn(char) -> bool,
{
self.core.take(skip, stop)
}
#[inline(always)]
pub fn consume_while<T: Fn(char) -> bool>(&mut self, include: T) -> bool {
self.core.consume_while(include)
}
#[inline(always)]
pub fn consume_until(&mut self, token: &str) -> bool {
self.core.consume_until(token)
}
pub fn consume_ws_only(&mut self) -> bool {
self.core.consume_ws_only()
}
#[inline(always)]
pub fn peek_and_consume(&mut self, ch: char) -> bool {
self.core.peek_and_consume(ch)
}
pub fn expect(&mut self, ch: char) -> ParseResult<()> {
if self.core.peek_and_consume(ch) {
Ok(())
} else {
Err(syntax_error(
self.loc(),
&format!(
"Expected to find {:?}, but instead found {:?}",
ch,
self.peek()
),
))
}
}
pub fn expect_chars(&mut self, ch: &[char]) -> ParseResult<()> {
if self.core.peek_and_consume_chars(ch) {
Ok(())
} else {
let expect = String::from_iter(ch);
let found = self.core.peek_n(ch.len());
Err(syntax_error(
self.loc(),
&format!(
"Expected to find {:?}, but instead found {:?}",
expect, found
),
))
}
}
pub fn peek_str(&mut self, value: &str) -> bool {
self.core.peek_chars(&value.chars().collect::<Vec<char>>())
}
pub fn peek_chars_greedy(&mut self, chars: &[char]) -> bool {
if chars.is_empty() {
return true;
}
let len = chars.len();
let peek = self.core.peek_n_vec(len + 1);
match len.cmp(&peek.len()) {
Ordering::Greater => false,
Ordering::Equal => chars == peek,
Ordering::Less => chars == &peek[0..len] && chars != &peek[1..len + 1],
}
}
pub fn peek_str_greedy(&mut self, value: &str) -> bool {
let here = self.peek_n(value.len());
let next = self.peek_n(value.len() + 1);
if here == value {
here.len() == next.len() || !next.ends_with(value)
} else {
false
}
}
pub fn peek_and_consume_ws(&mut self, ch: char) -> bool {
if self.core.peek() == ch {
self.core.consume();
self.consume_ws();
true
} else {
false
}
}
pub fn peek_and_consume_chars_ws(&mut self, chars: &[char]) -> bool {
let retval = self.core.peek_and_consume_chars(chars);
if retval {
self.consume_ws();
}
retval
}
pub fn peek_and_consume_str(&mut self, value: &str) -> bool {
self.core
.peek_and_consume_chars(&value.chars().collect::<Vec<char>>())
}
pub fn peek_and_consume_str_ws(&mut self, value: &str) -> bool {
let result = self
.core
.peek_and_consume_chars(&value.chars().collect::<Vec<char>>());
if result {
self.consume_ws();
}
result
}
pub fn consume_ws(&mut self) -> bool {
if self.parse_comments {
self.comment_parser.process(&mut self.core)
} else {
self.core.consume_ws_only()
}
}
pub fn take_until_greedy(&mut self, chars: &[char], must_match: bool) -> ParseResult<String> {
let mut value = String::new();
let loc = self.loc();
while !self.is_at_eof() {
if self.peek_chars_greedy(chars) {
self.consume_n(chars.len());
return Ok(value);
}
value.push(self.peek());
self.consume();
}
if must_match {
Err(syntax_error(
loc,
&format!(
"Expected to find terminating {:?}, but did not.",
chars.iter().collect::<String>()
),
))
} else {
Ok(value)
}
}
}