use crate::Error;
use crate::lexer::{Float, Integer, Lexer, Token, TokenKind};
use crate::token::Span;
use bumpalo::Bump;
use std::borrow::Cow;
use std::cell::{Cell, RefCell};
use std::collections::HashMap;
use std::fmt;
use std::usize;
#[cfg(feature = "wasm-module")]
pub(crate) const MAX_PARENS_DEPTH: usize = 100;
pub fn parse<'a, T: Parse<'a>>(buf: &'a ParseBuffer<'a>) -> Result<T> {
let parser = buf.parser();
let result = parser.parse()?;
if parser.cursor().token()?.is_none() {
Ok(result)
} else {
Err(parser.error("extra tokens remaining after parse"))
}
}
pub trait Parse<'a>: Sized {
fn parse(parser: Parser<'a>) -> Result<Self>;
}
impl<'a, T> Parse<'a> for Box<T>
where
T: Parse<'a>,
{
fn parse(parser: Parser<'a>) -> Result<Self> {
Ok(Box::new(parser.parse()?))
}
}
pub trait Peek {
fn peek(cursor: Cursor<'_>) -> Result<bool>;
fn peek2(mut cursor: Cursor<'_>) -> Result<bool> {
match cursor.token()? {
Some(token) => cursor.advance_past(&token),
None => return Ok(false),
}
Self::peek(cursor)
}
fn display() -> &'static str;
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
pub struct ParseBuffer<'a> {
lexer: Lexer<'a>,
cur: Cell<Position>,
known_annotations: RefCell<HashMap<String, usize>>,
track_instr_spans: bool,
depth: Cell<usize>,
strings: Bump,
}
#[derive(Copy, Clone)]
struct Position {
offset: usize,
token: Option<Token>,
}
#[derive(Copy, Clone)]
pub struct Parser<'a> {
buf: &'a ParseBuffer<'a>,
}
pub struct Lookahead1<'a> {
parser: Parser<'a>,
attempts: Vec<&'static str>,
}
#[derive(Copy, Clone)]
pub struct Cursor<'a> {
parser: Parser<'a>,
pos: Position,
}
impl ParseBuffer<'_> {
pub fn new(input: &str) -> Result<ParseBuffer<'_>> {
ParseBuffer::new_with_lexer(Lexer::new(input))
}
pub fn new_with_lexer(lexer: Lexer<'_>) -> Result<ParseBuffer<'_>> {
Ok(ParseBuffer {
lexer,
depth: Cell::new(0),
cur: Cell::new(Position {
offset: 0,
token: None,
}),
known_annotations: Default::default(),
strings: Default::default(),
track_instr_spans: false,
})
}
pub fn track_instr_spans(&mut self, track: bool) -> &mut Self {
self.track_instr_spans = track;
self
}
fn parser(&self) -> Parser<'_> {
Parser { buf: self }
}
fn push_str(&self, s: Vec<u8>) -> &[u8] {
self.strings.alloc_slice_copy(&s)
}
fn advance_token(&self, mut pos: usize) -> Result<Option<Token>> {
let token = loop {
let token = match self.lexer.parse(&mut pos)? {
Some(token) => token,
None => return Ok(None),
};
match token.kind {
TokenKind::Whitespace | TokenKind::LineComment | TokenKind::BlockComment => {
continue;
}
TokenKind::LParen => {
if let Some(annotation) = self.lexer.annotation(pos)? {
let text = annotation.annotation(self.lexer.input())?;
match self.known_annotations.borrow().get(&text[..]) {
Some(0) | None => {
self.skip_annotation(&mut pos)?;
continue;
}
Some(_) => {}
}
}
break token;
}
_ => break token,
}
};
Ok(Some(token))
}
fn skip_annotation(&self, pos: &mut usize) -> Result<()> {
let mut depth = 1;
let span = Span { offset: *pos };
loop {
let token = match self.lexer.parse(pos)? {
Some(token) => token,
None => {
break Err(Error::new(span, "unclosed annotation".to_string()));
}
};
match token.kind {
TokenKind::LParen => depth += 1,
TokenKind::RParen => {
depth -= 1;
if depth == 0 {
break Ok(());
}
}
_ => {}
}
}
}
}
impl<'a> Parser<'a> {
pub fn is_empty(self) -> bool {
match self.cursor().token() {
Ok(Some(token)) => matches!(token.kind, TokenKind::RParen),
Ok(None) => true,
Err(_) => false,
}
}
#[cfg(feature = "wasm-module")]
pub(crate) fn has_meaningful_tokens(self) -> bool {
self.buf.lexer.iter(0).any(|t| match t {
Ok(token) => !matches!(
token.kind,
TokenKind::Whitespace | TokenKind::LineComment | TokenKind::BlockComment
),
Err(_) => true,
})
}
pub fn parse<T: Parse<'a>>(self) -> Result<T> {
T::parse(self)
}
pub fn peek<T: Peek>(self) -> Result<bool> {
T::peek(self.cursor())
}
pub fn peek2<T: Peek>(self) -> Result<bool> {
T::peek2(self.cursor())
}
pub fn peek3<T: Peek>(self) -> Result<bool> {
let mut cursor = self.cursor();
match cursor.token()? {
Some(token) => cursor.advance_past(&token),
None => return Ok(false),
}
match cursor.token()? {
Some(token) => cursor.advance_past(&token),
None => return Ok(false),
}
T::peek(cursor)
}
pub fn lookahead1(self) -> Lookahead1<'a> {
Lookahead1 {
attempts: Vec::new(),
parser: self,
}
}
pub fn parens<T>(self, f: impl FnOnce(Parser<'a>) -> Result<T>) -> Result<T> {
self.buf.depth.set(self.buf.depth.get() + 1);
let before = self.buf.cur.get();
let res = self.step(|cursor| {
let mut cursor = match cursor.lparen()? {
Some(rest) => rest,
None => return Err(cursor.error("expected `(`")),
};
cursor.parser.buf.cur.set(cursor.pos);
let result = f(cursor.parser)?;
cursor.pos = cursor.parser.buf.cur.get();
match cursor.rparen()? {
Some(rest) => Ok((result, rest)),
None => Err(cursor.error("expected `)`")),
}
});
self.buf.depth.set(self.buf.depth.get() - 1);
if res.is_err() {
self.buf.cur.set(before);
}
res
}
pub fn parens_depth(&self) -> usize {
self.buf.depth.get()
}
#[cfg(feature = "wasm-module")]
pub(crate) fn depth_check(&self) -> Result<()> {
if self.parens_depth() > MAX_PARENS_DEPTH {
Err(self.error("item nesting too deep"))
} else {
Ok(())
}
}
fn cursor(self) -> Cursor<'a> {
Cursor {
parser: self,
pos: self.buf.cur.get(),
}
}
pub fn step<F, T>(self, f: F) -> Result<T>
where
F: FnOnce(Cursor<'a>) -> Result<(T, Cursor<'a>)>,
{
let (result, cursor) = f(self.cursor())?;
self.buf.cur.set(cursor.pos);
Ok(result)
}
pub fn error(self, msg: impl fmt::Display) -> Error {
self.error_at(self.cursor().cur_span(), msg)
}
pub fn error_at(self, span: Span, msg: impl fmt::Display) -> Error {
Error::parse(span, self.buf.lexer.input(), msg.to_string())
}
pub fn cur_span(&self) -> Span {
self.cursor().cur_span()
}
pub fn prev_span(&self) -> Span {
self.cursor()
.prev_span()
.unwrap_or_else(|| Span::from_offset(0))
}
pub fn register_annotation<'b>(self, annotation: &'b str) -> impl Drop + 'b
where
'a: 'b,
{
let mut annotations = self.buf.known_annotations.borrow_mut();
if !annotations.contains_key(annotation) {
annotations.insert(annotation.to_string(), 0);
}
*annotations.get_mut(annotation).unwrap() += 1;
return RemoveOnDrop(self, annotation);
struct RemoveOnDrop<'a>(Parser<'a>, &'a str);
impl Drop for RemoveOnDrop<'_> {
fn drop(&mut self) {
let mut annotations = self.0.buf.known_annotations.borrow_mut();
let slot = annotations.get_mut(self.1).unwrap();
*slot -= 1;
}
}
}
#[cfg(feature = "wasm-module")]
pub(crate) fn track_instr_spans(&self) -> bool {
self.buf.track_instr_spans
}
#[cfg(feature = "wasm-module")]
pub(crate) fn with_standard_annotations_registered<R>(
self,
f: impl FnOnce(Self) -> Result<R>,
) -> Result<R> {
let _r = self.register_annotation("custom");
let _r = self.register_annotation("producers");
let _r = self.register_annotation("name");
let _r = self.register_annotation("dylink.0");
let _r = self.register_annotation("metadata.code.branch_hint");
f(self)
}
}
impl<'a> Cursor<'a> {
pub fn cur_span(&self) -> Span {
let offset = match self.token() {
Ok(Some(t)) => t.offset,
Ok(None) => self.parser.buf.lexer.input().len(),
Err(_) => self.pos.offset,
};
Span { offset }
}
pub(crate) fn prev_span(&self) -> Option<Span> {
Some(Span {
offset: self.pos.offset,
})
}
pub fn error(&self, msg: impl fmt::Display) -> Error {
self.parser.error_at(self.cur_span(), msg)
}
pub fn peek_lparen(self) -> Result<bool> {
Ok(matches!(
self.token()?,
Some(Token {
kind: TokenKind::LParen,
..
})
))
}
pub fn peek_rparen(self) -> Result<bool> {
Ok(matches!(
self.token()?,
Some(Token {
kind: TokenKind::RParen,
..
})
))
}
pub fn peek_id(self) -> Result<bool> {
Ok(matches!(
self.token()?,
Some(Token {
kind: TokenKind::Id,
..
})
))
}
pub fn peek_reserved(self) -> Result<bool> {
Ok(matches!(
self.token()?,
Some(Token {
kind: TokenKind::Reserved,
..
})
))
}
pub fn peek_keyword(self) -> Result<bool> {
Ok(matches!(
self.token()?,
Some(Token {
kind: TokenKind::Keyword,
..
})
))
}
pub fn peek_integer(self) -> Result<bool> {
Ok(matches!(
self.token()?,
Some(Token {
kind: TokenKind::Integer(_),
..
})
))
}
pub fn peek_float(self) -> Result<bool> {
Ok(matches!(
self.token()?,
Some(Token {
kind: TokenKind::Float(_),
..
})
))
}
pub fn peek_string(self) -> Result<bool> {
Ok(matches!(
self.token()?,
Some(Token {
kind: TokenKind::String,
..
})
))
}
pub fn lparen(mut self) -> Result<Option<Self>> {
let token = match self.token()? {
Some(token) => token,
None => return Ok(None),
};
match token.kind {
TokenKind::LParen => {}
_ => return Ok(None),
}
self.advance_past(&token);
Ok(Some(self))
}
pub fn rparen(mut self) -> Result<Option<Self>> {
let token = match self.token()? {
Some(token) => token,
None => return Ok(None),
};
match token.kind {
TokenKind::RParen => {}
_ => return Ok(None),
}
self.advance_past(&token);
Ok(Some(self))
}
pub fn id(mut self) -> Result<Option<(&'a str, Self)>> {
let token = match self.token()? {
Some(token) => token,
None => return Ok(None),
};
match token.kind {
TokenKind::Id => {}
_ => return Ok(None),
}
self.advance_past(&token);
let id = match token.id(self.parser.buf.lexer.input())? {
Cow::Borrowed(id) => id,
Cow::Owned(s) => std::str::from_utf8(self.parser.buf.push_str(s.into_bytes())).unwrap(),
};
Ok(Some((id, self)))
}
pub fn keyword(mut self) -> Result<Option<(&'a str, Self)>> {
let token = match self.token()? {
Some(token) => token,
None => return Ok(None),
};
match token.kind {
TokenKind::Keyword => {}
_ => return Ok(None),
}
self.advance_past(&token);
Ok(Some((token.keyword(self.parser.buf.lexer.input()), self)))
}
pub fn annotation(mut self) -> Result<Option<(&'a str, Self)>> {
let token = match self.token()? {
Some(token) => token,
None => return Ok(None),
};
match token.kind {
TokenKind::Annotation => {}
_ => return Ok(None),
}
self.advance_past(&token);
let annotation = match token.annotation(self.parser.buf.lexer.input())? {
Cow::Borrowed(id) => id,
Cow::Owned(s) => std::str::from_utf8(self.parser.buf.push_str(s.into_bytes())).unwrap(),
};
Ok(Some((annotation, self)))
}
pub fn reserved(mut self) -> Result<Option<(&'a str, Self)>> {
let token = match self.token()? {
Some(token) => token,
None => return Ok(None),
};
match token.kind {
TokenKind::Reserved => {}
_ => return Ok(None),
}
self.advance_past(&token);
Ok(Some((token.reserved(self.parser.buf.lexer.input()), self)))
}
pub fn integer(mut self) -> Result<Option<(Integer<'a>, Self)>> {
let token = match self.token()? {
Some(token) => token,
None => return Ok(None),
};
let i = match token.kind {
TokenKind::Integer(i) => i,
_ => return Ok(None),
};
self.advance_past(&token);
Ok(Some((
token.integer(self.parser.buf.lexer.input(), i),
self,
)))
}
pub fn float(mut self) -> Result<Option<(Float<'a>, Self)>> {
let token = match self.token()? {
Some(token) => token,
None => return Ok(None),
};
let f = match token.kind {
TokenKind::Float(f) => f,
_ => return Ok(None),
};
self.advance_past(&token);
Ok(Some((token.float(self.parser.buf.lexer.input(), f), self)))
}
pub fn string(mut self) -> Result<Option<(&'a [u8], Self)>> {
let token = match self.token()? {
Some(token) => token,
None => return Ok(None),
};
match token.kind {
TokenKind::String => {}
_ => return Ok(None),
}
let string = match token.string(self.parser.buf.lexer.input()) {
Cow::Borrowed(s) => s,
Cow::Owned(s) => self.parser.buf.push_str(s),
};
self.advance_past(&token);
Ok(Some((string, self)))
}
pub fn comment(mut self) -> Result<Option<(&'a str, Self)>> {
let start = self.pos.offset;
self.pos.token = None;
let comment = loop {
let token = match self.parser.buf.lexer.parse(&mut self.pos.offset)? {
Some(token) => token,
None => return Ok(None),
};
match token.kind {
TokenKind::LineComment | TokenKind::BlockComment => {
break token.src(self.parser.buf.lexer.input());
}
TokenKind::Whitespace => {}
_ => {
self.pos.offset = start;
return Ok(None);
}
}
};
Ok(Some((comment, self)))
}
fn token(&self) -> Result<Option<Token>> {
match self.pos.token {
Some(token) => Ok(Some(token)),
None => self.parser.buf.advance_token(self.pos.offset),
}
}
fn advance_past(&mut self, token: &Token) {
self.pos.offset = token.offset + (token.len as usize);
self.pos.token = self
.parser
.buf
.advance_token(self.pos.offset)
.unwrap_or(None);
}
}
impl<'a> Lookahead1<'a> {
pub fn peek<T: Peek>(&mut self) -> Result<bool> {
Ok(if self.parser.peek::<T>()? {
true
} else {
self.attempts.push(T::display());
false
})
}
pub fn parser(&self) -> Parser<'a> {
self.parser
}
pub fn error(self) -> Error {
match self.attempts.len() {
0 => {
if self.parser.is_empty() {
self.parser.error("unexpected end of input")
} else {
self.parser.error("unexpected token")
}
}
1 => {
let message = format!("unexpected token, expected {}", self.attempts[0]);
self.parser.error(&message)
}
2 => {
let message = format!(
"unexpected token, expected {} or {}",
self.attempts[0], self.attempts[1]
);
self.parser.error(&message)
}
_ => {
let join = self.attempts.join(", ");
let message = format!("unexpected token, expected one of: {join}");
self.parser.error(&message)
}
}
}
}
impl<'a, T: Peek + Parse<'a>> Parse<'a> for Option<T> {
fn parse(parser: Parser<'a>) -> Result<Option<T>> {
if parser.peek::<T>()? {
Ok(Some(parser.parse()?))
} else {
Ok(None)
}
}
}