use std::{error::Error, fmt, str::FromStr};
use unicode_width::UnicodeWidthStr;
use descape::UnescapeExt;
use super::{lexer::{LexError, Lexer}, tokens::{Kind, Site, Token}};
#[derive(Debug, Clone)]
pub struct Node<'a> {
pub value: String,
pub site: Site<'a>,
pub leading_whitespace: String,
}
impl<'a> PartialEq for Node<'a> {
fn eq(&self, other: &Self) -> bool {
self.value == other.value
}
}
impl<'a> Eq for Node<'a> { }
impl<'a> std::hash::Hash for Node<'a> {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.value.hash(state)
}
}
impl<'a> Node<'a> {
pub fn new(value: &str, site: &Site<'a>, leading_whitespace: &str) -> Self {
Self {
site: site.to_owned(),
value: value.to_owned(),
leading_whitespace: leading_whitespace.to_owned(),
}
}
}
#[derive(Debug, Clone)]
pub enum ParseNode<'a> {
Symbol(Node<'a>),
Number(Node<'a>),
String(Node<'a>),
Raw(Node<'a>), List {
nodes: Box<[ParseNode<'a>]>,
site: Site<'a>,
end_token: Token<'a>,
leading_whitespace: String,
},
Attribute {
keyword: String,
node: Box<ParseNode<'a>>,
site: Site<'a>,
leading_whitespace: String,
},
}
impl<'a> PartialEq for ParseNode<'a> {
fn eq(&self, other: &Self) -> bool {
match self {
Self::Symbol(node0) => match other {
Self::Symbol(node1) => node0 == node1,
_ => false,
},
Self::Number(node0) => match other {
Self::Number(node1) => node0 == node1,
_ => false,
},
Self::String(node0) => match other {
Self::String(node1) => node0 == node1,
_ => false,
},
Self::Raw(node0) => match other {
Self::Raw(node1) => node0 == node1,
_ => false,
},
Self::List { nodes: nodes0, .. } => match other {
Self::List { nodes: nodes1, .. } => nodes0 == nodes1,
_ => false,
},
Self::Attribute { keyword: keyword0, node: node0, .. } => match other {
Self::Attribute { keyword: keyword1, node: node1, .. } =>
keyword0 == keyword1 && node0 == node1,
_ => false,
}
}
}
}
impl<'a> Eq for ParseNode<'a> { }
impl<'a> std::hash::Hash for ParseNode<'a> {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
match self {
Self::Symbol(node) => {
state.write_u8(0);
node.hash(state);
},
Self::Number(node) => {
state.write_u8(1);
node.hash(state);
},
Self::String(node) =>{
state.write_u8(2);
node.hash(state);
},
Self::Raw(node) => {
state.write_u8(3);
node.hash(state);
},
Self::List { nodes, .. } => {
state.write_u8(4);
nodes.hash(state);
},
Self::Attribute { keyword, node, .. } => {
state.write_u8(5);
keyword.hash(state);
node.hash(state);
},
}
}
}
impl<'a> ParseNode<'a> {
pub fn null(&self) -> bool {
match self {
Self::List { nodes, .. } => nodes.is_empty(),
_ => false,
}
}
pub fn symbolic(&self) -> Option<&Node<'a>> {
match self {
Self::Symbol(ref node) | Self::Number(ref node) => Some(node),
_ => None,
}
}
pub fn string(&self) -> Option<&Node<'a>> {
match self {
Self::String(ref node) | Self::Raw(ref node) => Some(node),
_ => None,
}
}
pub fn number(&self) -> Option<&Node<'a>> {
match self {
Self::Number(ref node) => Some(node),
_ => None,
}
}
pub fn symbol(&self) -> Option<&Node<'a>> {
match self {
Self::Symbol(ref node) => Some(node),
_ => None,
}
}
pub fn atomic(&self) -> Option<&Node<'a>> {
match self {
Self::Symbol(ref node)
| Self::Number(ref node)
| Self::String(ref node)
| Self::Raw(ref node) => Some(node),
_ => None,
}
}
pub fn list(&self) -> Option<&ParseTree<'a>> {
match self {
Self::List { nodes, .. } => Some(nodes),
_ => None,
}
}
pub fn attribute(&self) -> Option<(&str, &Box<ParseNode<'a>>)> {
match self {
Self::Attribute { keyword, node, .. } => Some((keyword, node)),
_ => None,
}
}
pub fn into_atomic(self) -> Option<Node<'a>> {
match self {
Self::Symbol(node)
| Self::Number(node)
| Self::String(node)
| Self::Raw(node) => Some(node),
_ => None,
}
}
pub fn site(&self) -> &Site<'a> {
match self {
Self::Symbol(ref node)
| Self::Number(ref node)
| Self::String(ref node)
| Self::Raw(ref node) => &node.site,
Self::List { ref site, .. } => site,
Self::Attribute { ref site, .. } => site,
}
}
pub fn owned_site(&self) -> Site<'a> {
match self {
Self::Symbol(node)
| Self::Number(node)
| Self::String(node)
| Self::Raw(node) => node.site,
Self::List { site, .. } => *site,
Self::Attribute { site, .. } => *site,
}
}
pub fn leading_whitespace(&self) -> &str {
match self {
Self::Symbol(ref node)
| Self::Number(ref node)
| Self::String(ref node)
| Self::Raw(ref node) => node.leading_whitespace.as_str(),
Self::List { leading_whitespace, .. } => leading_whitespace.as_str(),
Self::Attribute { leading_whitespace, .. } => leading_whitespace.as_str(),
}
}
pub fn set_leading_whitespace(&mut self, whitespace: String) {
match self {
Self::Symbol(ref mut node)
| Self::Number(ref mut node)
| Self::String(ref mut node)
| Self::Raw(ref mut node) => node.leading_whitespace = whitespace,
Self::List { ref mut leading_whitespace, .. } => *leading_whitespace = whitespace,
Self::Attribute { ref mut leading_whitespace, .. } => *leading_whitespace = whitespace,
};
}
pub fn node_type(&self) -> &'static str {
match self {
Self::Symbol(..) => "symbol",
Self::Number(..) => "number",
Self::String(..) => "string",
Self::Raw(..) => "raw-content string",
Self::List { .. } => "list",
Self::Attribute { .. } => "attribute",
}
}
pub fn is_symbolic(&self) -> bool { self.symbolic().is_some() }
pub fn is_atomic(&self) -> bool { self.atomic().is_some() }
pub fn is_symbol(&self) -> bool { self.symbol().is_some() }
pub fn is_number(&self) -> bool { self.number().is_some() }
pub fn is_string(&self) -> bool { self.string().is_some() }
pub fn is_list(&self) -> bool { self.list().is_some() }
pub fn is_attribute(&self) -> bool { self.attribute().is_some() }
}
impl<'a> TryFrom<ParseNode<'a>> for Node<'a> {
type Error = ();
fn try_from(value: ParseNode<'a>) -> Result<Self, Self::Error> {
match value.into_atomic() {
Some(node) => Ok(node),
None => Err(()),
}
}
}
impl<'a> TryFrom<ParseNode<'a>> for Box<[ParseNode<'a>]> {
type Error = ();
fn try_from(value: ParseNode<'a>) -> Result<Self, Self::Error> {
match value {
ParseNode::List { nodes, .. } => Ok(nodes),
_ => Err(()),
}
}
}
impl<'a> TryFrom<ParseNode<'a>> for Vec<ParseNode<'a>> {
type Error = ();
fn try_from(value: ParseNode<'a>) -> Result<Self, Self::Error> {
let into: Result<Box<[ParseNode<'a>]>, Self::Error> = value.try_into();
into.map(|b| b.to_vec())
}
}
pub trait IntoValue<'a, T>: Sized {
fn into_value(&'a self) -> Option<T> { None }
}
trait Num<Rhs = Self, Output = Self>:
std::ops::Add<Rhs, Output = Output>
+ std::ops::Sub<Rhs, Output = Output>
+ std::ops::Mul<Rhs, Output = Output>
+ std::ops::Div<Rhs, Output = Output>
+ std::ops::Rem<Rhs, Output = Output> { }
impl Num for usize { }
impl Num for isize { }
impl Num for u32 { }
impl Num for i32 { }
impl Num for u64 { }
impl Num for i64 { }
impl Num for f32 { }
impl Num for f64 { }
impl<'a, T: Num + FromStr> IntoValue<'a, T> for ParseNode<'a> {
fn into_value(&self) -> Option<T> {
match self {
ParseNode::Number(node) => node.value.parse().ok(),
_ => None,
}
}
}
impl<'a> IntoValue<'a, &'a str> for ParseNode<'a> {
fn into_value(&'a self) -> Option<&'a str> {
match self {
ParseNode::Symbol(node)
| ParseNode::String(node)
| ParseNode::Raw(node) => Some(node.value.as_ref()),
_ => None,
}
}
}
pub type ParseTree<'a> = Box<[ParseNode<'a>]>;
#[derive(Debug, Clone)]
pub struct ParseError<'a>(pub String, pub Site<'a>);
impl<'a> fmt::Display for ParseError<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let ParseError(msg, site) = self;
let line_prefix = format!(" {} |", site.line);
let line_view = site.line_slice();
writeln!(f, "{} {}", line_prefix, line_view)?;
writeln!(f, "{:>prefix_offset$} {:~>text_offset$}{:^>length$}", "|", "", "",
prefix_offset=UnicodeWidthStr::width(line_prefix.as_str()),
text_offset=site.line_column() - 1,
length=site.width())?;
write!(f, "[**] Parse Error ({}:{}:{}): {}",
site.source, site.line, site.line_column(), msg)
}
}
impl<'a> Error for ParseError<'a> { }
#[derive(Debug, Clone)]
pub struct Parser {
lexer: Lexer, }
impl<'a> Parser {
pub fn new(lexer: Lexer) -> Self {
Self { lexer }
}
pub fn get_source(&self) -> &str {
self.lexer.get_source()
}
pub fn parse(&'a self) -> Result<ParseTree<'a>, Box<dyn Error + 'a>> {
let mut root: Vec<ParseNode> = Vec::new();
while !self.lexer.eof() {
let expr = self.parse_expr()?;
root.push(expr);
}
return Ok(root.into_boxed_slice());
}
pub fn parse_expr(&'a self) -> Result<ParseNode<'a>, Box<dyn Error + 'a>> {
let token = self.lexer.peek()?;
match token.kind {
Kind::LParen => self.parse_list(),
Kind::RParen => Err(ParseError(
"Unexpected `)' closing parenthesis.".to_owned(),
token.site.to_owned()))?,
Kind::Keyword => self.parse_keyword(),
Kind::Symbol => Ok(ParseNode::Symbol(self.parse_atomic()?)),
Kind::String => Ok(ParseNode::String(self.parse_atomic()?)),
Kind::Number => Ok(ParseNode::Number(self.parse_atomic()?)),
}
}
fn parse_keyword(&'a self) -> Result<ParseNode<'a>, Box<dyn Error + 'a>> {
let token = self.lexer.consume()?;
assert_eq!(token.kind, Kind::Keyword);
{
let no_expr_error = ParseError(
format!("Keyword `:{}' expects an expression following it.", token.value),
token.site.to_owned());
if self.lexer.eof() { Err(no_expr_error.clone())? ;}
match self.lexer.peek()? {
Token { kind: Kind::RParen, .. } => Err(no_expr_error)?,
_ => ()
}
}
let value = self.parse_expr()?;
Ok(ParseNode::Attribute {
keyword: token.value.to_owned(),
node: Box::new(value),
site: token.site.to_owned(),
leading_whitespace: token.leading_whitespace.to_owned(),
})
}
fn parse_atomic(&'a self) -> Result<Node<'a>, LexError<'a>> {
let token = self.lexer.consume()?;
let value = match token.kind {
Kind::Symbol | Kind::Number | Kind::Keyword => escape_sanitize(token.value),
Kind::String => escape_string(token.value, &token.site)?,
_ => unreachable!("called `parse_atomic` on non-atomic token."),
};
Ok(Node {
value,
site: token.site.clone(),
leading_whitespace: token.leading_whitespace.to_string(),
})
}
fn parse_list(&'a self) -> Result<ParseNode<'a>, Box<dyn Error + 'a>> {
let lparen = self.lexer.consume()?;
assert_eq!(lparen.kind, Kind::LParen);
let mut elements = Vec::new();
let mut rparen: Option<Token> = None;
while !self.lexer.eof() {
let token = self.lexer.peek()?;
if token.kind == Kind::RParen {
rparen = Some(self.lexer.consume()?); break;
}
let expr = self.parse_expr()?;
elements.push(expr);
}
let Some(rparen) = rparen else {
return Err(ParseError(
"Expected `)' closing parenthesis.".to_owned(),
lparen.site.to_owned()))?;
};
Ok(ParseNode::List {
nodes: elements.into_boxed_slice(),
site: lparen.site.to_owned(),
end_token: rparen.to_owned(),
leading_whitespace: lparen.leading_whitespace.to_owned(),
})
}
}
fn escape_sanitize(string: &str) -> String {
let mut builder = String::with_capacity(string.len());
let mut chars = string.chars();
while let Some(c) = chars.next() {
if c == '\\' { continue; }
builder.push(c)
}
builder
}
fn escape_string<'a>(string: &'a str, site: &Site<'a>) -> Result<String, LexError<'a>> {
string.to_unescaped()
.map(|s| s.to_string())
.map_err(|invalid| {
LexError(
format!("Invalid escape `\\{}' at byte-index {}.",
string.chars().nth(invalid.index).unwrap_or('?'), invalid.index),
site.clone())
})
}
pub trait SearchTree<'a> {
fn search_node(&'a self, kind: SearchType,
value: &str,
case_insensitive: bool,
level: usize) -> Option<&'a ParseNode<'a>>;
}
#[derive(Clone, Copy, PartialEq)]
pub enum SearchType {
ListHead, ListMember,
Symbol, Number, String,
Attribute,
Any,
}
impl SearchType {
pub fn is_a(self, kind: SearchType) -> bool {
self == SearchType::Any || self == kind
}
}
impl<'a> SearchTree<'a> for ParseNode<'a> {
fn search_node(&'a self, kind: SearchType, value: &str,
insensitive: bool, level: usize) -> Option<&'a ParseNode<'a>> {
if level == 0 {
return None;
}
let is_equal = |string: &str| if insensitive {
string.to_lowercase() == value.to_lowercase()
} else {
string == value
};
match self {
ParseNode::List { nodes, .. } => {
if kind.is_a(SearchType::ListHead) {
if let Some(Some(caller)) = nodes.get(0).map(ParseNode::atomic) {
if is_equal(&caller.value) {
return Some(self);
}
}
}
nodes.search_node(kind, value, insensitive, level - 1)
},
ParseNode::Symbol(name) => {
if kind.is_a(SearchType::Symbol) && is_equal(&name.value) {
Some(self)
} else {
None
}
},
ParseNode::String(name) | ParseNode::Raw(name) => {
if kind.is_a(SearchType::String) && is_equal(&name.value) {
Some(self)
} else {
None
}
},
ParseNode::Number(name) => {
if kind.is_a(SearchType::Number) && is_equal(&name.value) {
Some(self)
} else {
None
}
},
ParseNode::Attribute { node, ref keyword, .. } => {
if kind.is_a(SearchType::Attribute) {
if is_equal(keyword) {
return Some(node);
}
}
node.search_node(kind, value, insensitive, level - 1)
},
}
}
}
impl<'a> SearchTree<'a> for ParseTree<'a> {
fn search_node(&'a self, kind: SearchType, value: &str,
insensitive: bool, level: usize) -> Option<&'a ParseNode<'a>> {
if level == 0 {
return None;
}
for node in self {
let found = node.search_node(kind, value, insensitive, level);
if found.is_some() {
return found;
}
}
None
}
}
impl<'a> fmt::Display for ParseNode<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
ParseNode::Symbol(node)
| ParseNode::Number(node) => write!(f, "{}{}", node.leading_whitespace, node.value),
ParseNode::String(node)
| ParseNode::Raw(node) => write!(f, "{}{:?}", node.leading_whitespace, node.value),
ParseNode::Attribute { keyword, node, leading_whitespace, .. } =>
write!(f, "{}:{}{}", leading_whitespace, keyword, &*node),
ParseNode::List { nodes, leading_whitespace, end_token, .. } => {
write!(f, "{}(", leading_whitespace)?;
for node in nodes {
write!(f, "{}", node)?;
}
write!(f, "{})", end_token.leading_whitespace)
}
}
}
}