use crate::reader::{
ast::{
Atom, FunctionNode, IriRef, LiteralSyntax, PrefixedIriRef, Span, SyntaxDocument,
SyntaxNode, SyntaxNodeKind,
},
error::ParseError,
lexer::{Lexer, Token, TokenKind},
};
use core::mem;
#[cfg(not(feature = "std"))]
use alloc::{format, string::String, vec::Vec};
pub(crate) struct Parser {
lexer: Lexer,
current: Token,
}
impl Parser {
pub(super) fn new(input: &str) -> Result<Self, ParseError> {
let mut lexer = Lexer::new(input);
let current = lexer.next_token()?;
Ok(Self { lexer, current })
}
fn advance(&mut self) -> Result<Token, ParseError> {
let next = self.lexer.next_token()?;
Ok(mem::replace(&mut self.current, next))
}
fn peek_kind(&self) -> &TokenKind {
&self.current.kind
}
fn current_span(&self) -> Span {
self.current.span
}
fn expect_rparen(&mut self, open_span: Span) -> Result<(), ParseError> {
match &self.current.kind {
TokenKind::RParen => {
self.advance()?;
Ok(())
}
TokenKind::Eof => Err(ParseError::UnexpectedEof { expected: ")" }),
_ => Err(ParseError::UnexpectedToken {
got: format!("{:?}", self.current.kind),
expected: ")",
span: open_span,
}),
}
}
pub(super) fn parse_document(&mut self) -> Result<SyntaxDocument, ParseError> {
let mut nodes = Vec::new();
loop {
match self.peek_kind() {
TokenKind::Eof => break,
_ => nodes.push(self.parse_node()?),
}
}
Ok(SyntaxDocument { nodes })
}
fn parse_node(&mut self) -> Result<SyntaxNode, ParseError> {
let span = self.current_span();
match self.peek_kind().clone() {
TokenKind::Comment(text) => {
self.advance()?;
Ok(SyntaxNode {
span,
kind: SyntaxNodeKind::Comment(text),
})
}
TokenKind::Name(_) => self.parse_name_or_function(),
TokenKind::FullIri(_)
| TokenKind::PrefixedName(_)
| TokenKind::Namespace(_)
| TokenKind::NodeId(_) => self.parse_iri_or_node_id(),
TokenKind::QuotedString(_) => self.parse_literal(),
TokenKind::LParen => {
self.advance()?; let mut args = Vec::new();
loop {
match self.peek_kind() {
TokenKind::RParen | TokenKind::Eof => break,
_ => args.push(self.parse_node()?),
}
}
self.expect_rparen(span)?;
let end_span = self.current_span();
Ok(SyntaxNode {
span: span.extend_to(end_span),
kind: SyntaxNodeKind::Function(FunctionNode {
name: String::new(),
args,
}),
})
}
TokenKind::Integer(n) => {
let n = n;
self.advance()?;
Ok(SyntaxNode {
span,
kind: SyntaxNodeKind::Atom(Atom::Integer(n)),
})
}
TokenKind::Equals => {
self.advance()?;
Ok(SyntaxNode {
span,
kind: SyntaxNodeKind::Atom(Atom::Equals),
})
}
_ => {
let got = format!("{:?}", self.current.kind);
Err(ParseError::UnexpectedToken {
got,
expected: "node",
span,
})
}
}
}
fn parse_name_or_function(&mut self) -> Result<SyntaxNode, ParseError> {
let span = self.current_span();
let name = match self.advance()?.kind {
TokenKind::Name(n) => n,
_ => unreachable!(),
};
if matches!(self.peek_kind(), TokenKind::LParen) {
self.advance()?; let mut args = Vec::new();
loop {
match self.peek_kind() {
TokenKind::RParen | TokenKind::Eof => break,
_ => args.push(self.parse_node()?),
}
}
self.expect_rparen(span)?;
let end_span = self.current_span();
Ok(SyntaxNode {
span: span.extend_to(end_span),
kind: SyntaxNodeKind::Function(FunctionNode { name, args }),
})
} else {
Ok(SyntaxNode {
span,
kind: SyntaxNodeKind::Atom(Atom::Iri(IriRef::Prefixed(PrefixedIriRef {
prefix: None,
local: name,
}))),
})
}
}
fn parse_iri_or_node_id(&mut self) -> Result<SyntaxNode, ParseError> {
let span = self.current_span();
let tok = self.advance()?;
let kind = match tok.kind {
TokenKind::FullIri(s) => SyntaxNodeKind::Atom(Atom::Iri(IriRef::Full(s))),
TokenKind::PrefixedName(p) => SyntaxNodeKind::Atom(Atom::Iri(IriRef::Prefixed(p))),
TokenKind::Namespace(ns) => SyntaxNodeKind::Atom(Atom::Iri(IriRef::Namespace(ns))),
TokenKind::NodeId(id) => SyntaxNodeKind::Atom(Atom::NodeId(id)),
_ => unreachable!(),
};
Ok(SyntaxNode { span, kind })
}
fn parse_literal(&mut self) -> Result<SyntaxNode, ParseError> {
let start_span = self.current_span();
let lexical_form = match self.advance()?.kind {
TokenKind::QuotedString(s) => s,
_ => unreachable!(),
};
let (datatype, lang_tag) = match self.peek_kind() {
TokenKind::DataTypeSep => {
self.advance()?; let dt = self.parse_iri_ref()?;
(Some(dt), None)
}
TokenKind::LangTag(_) => {
let tag = match self.advance()?.kind {
TokenKind::LangTag(t) => t,
_ => unreachable!(),
};
(None, Some(tag))
}
_ => (None, None),
};
let end_span = self.current_span();
let span = start_span.extend_to(end_span);
Ok(SyntaxNode {
span,
kind: SyntaxNodeKind::Atom(Atom::Literal(LiteralSyntax {
lexical_form,
datatype,
lang_tag,
span,
})),
})
}
fn parse_iri_ref(&mut self) -> Result<IriRef, ParseError> {
let span = self.current_span();
match self.advance()?.kind {
TokenKind::FullIri(s) => Ok(IriRef::Full(s)),
TokenKind::PrefixedName(p) => Ok(IriRef::Prefixed(p)),
TokenKind::Namespace(ns) => Ok(IriRef::Namespace(ns)),
TokenKind::Name(n) => Ok(IriRef::Prefixed(PrefixedIriRef {
prefix: None,
local: n,
})),
_ => Err(ParseError::UnexpectedToken {
got: format!("{:?}", self.current.kind),
expected: "IRI",
span,
}),
}
}
}