use std::collections::VecDeque;
use cstree::Syntax;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Syntax)]
#[repr(u32)]
pub enum SyntaxKind {
#[static_text("(")]
LParen, #[static_text(")")]
RParen, Word, Whitespace, Error,
List, Atom, Root, }
type SExprSyntax = SyntaxKind;
use SyntaxKind::*;
use cstree::{green::GreenNode, interning::Resolver};
use cstree::build::GreenNodeBuilder;
struct Parse<I> {
green_node: GreenNode,
resolver: I,
#[allow(unused)]
errors: Vec<String>,
}
fn parse(text: &str) -> Parse<impl Resolver + use<>> {
struct Parser<'input> {
tokens: VecDeque<(SyntaxKind, &'input str)>,
builder: GreenNodeBuilder<'static, 'static, SExprSyntax>,
errors: Vec<String>,
}
enum SExprResult {
Ok,
Eof,
RParen,
}
impl Parser<'_> {
fn parse(mut self) -> Parse<impl Resolver + use<>> {
self.builder.start_node(Root);
loop {
match self.sexp() {
SExprResult::Eof => break,
SExprResult::RParen => {
self.builder.start_node(Error);
self.errors.push("unmatched `)`".to_string());
self.bump(); self.builder.finish_node();
}
SExprResult::Ok => {}
}
}
self.skip_ws();
self.builder.finish_node();
let (tree, cache) = self.builder.finish();
Parse {
green_node: tree,
resolver: cache.unwrap().into_interner().unwrap(),
errors: self.errors,
}
}
fn list(&mut self) {
assert_eq!(self.current(), Some(LParen));
self.builder.start_node(List);
self.bump(); loop {
match self.sexp() {
SExprResult::Eof => {
self.errors.push("expected `)`".to_string());
break;
}
SExprResult::RParen => {
self.bump();
break;
}
SExprResult::Ok => {}
}
}
self.builder.finish_node();
}
fn sexp(&mut self) -> SExprResult {
self.skip_ws();
let t = match self.current() {
None => return SExprResult::Eof,
Some(RParen) => return SExprResult::RParen,
Some(t) => t,
};
match t {
LParen => self.list(),
Word => {
self.builder.start_node(Atom);
self.bump();
self.builder.finish_node();
}
Error => self.bump(),
_ => unreachable!(),
}
SExprResult::Ok
}
fn bump(&mut self) {
let (kind, text) = self.tokens.pop_front().unwrap();
self.builder.token(kind, text);
}
fn current(&self) -> Option<SyntaxKind> {
self.tokens.front().map(|(kind, _)| *kind)
}
fn skip_ws(&mut self) {
while self.current() == Some(Whitespace) {
self.bump()
}
}
}
Parser {
tokens: lex(text),
builder: GreenNodeBuilder::new(),
errors: Vec::new(),
}
.parse()
}
type SyntaxNode = cstree::syntax::SyntaxNode<SExprSyntax>;
#[allow(unused)]
type SyntaxToken = cstree::syntax::SyntaxToken<SExprSyntax>;
#[allow(unused)]
type SyntaxElement = cstree::syntax::SyntaxElement<SExprSyntax>;
impl<I> Parse<I> {
fn syntax(&self) -> SyntaxNode {
SyntaxNode::new_root(self.green_node.clone())
}
}
#[test]
fn test_parser() {
let text = "(+ (* 15 2) 62)";
let parse = parse(text);
let node = parse.syntax();
let resolver = &parse.resolver;
assert_eq!(
node.debug(resolver, false),
"Root@0..15", );
assert_eq!(node.children().count(), 1);
let list = node.children().next().unwrap();
let children = list
.children_with_tokens()
.map(|child| format!("{:?}@{:?}", child.kind(), child.text_range()))
.collect::<Vec<_>>();
assert_eq!(
children,
vec![
"LParen@0..1".to_string(),
"Atom@1..2".to_string(),
"Whitespace@2..3".to_string(), "List@3..11".to_string(),
"Whitespace@11..12".to_string(),
"Atom@12..14".to_string(),
"RParen@14..15".to_string(),
]
);
}
mod ast {
use super::*;
macro_rules! ast_node {
($ast:ident, $kind:ident) => {
#[derive(PartialEq, Eq, Hash)]
#[repr(transparent)]
pub struct $ast(pub(crate) SyntaxNode);
impl $ast {
#[allow(unused)]
pub fn cast(node: SyntaxNode) -> Option<Self> {
if node.kind() == SyntaxKind::$kind {
Some(Self(node))
} else {
None
}
}
}
};
}
ast_node!(Root, Root);
ast_node!(Atom, Atom);
ast_node!(List, List);
}
#[derive(PartialEq, Eq, Hash)]
#[repr(transparent)]
struct SExpr(SyntaxNode);
enum SexpKind {
Atom(ast::Atom),
List(ast::List),
}
impl SExpr {
fn cast(node: SyntaxNode) -> Option<Self> {
use ast::*;
if Atom::cast(node.clone()).is_some() || List::cast(node.clone()).is_some() {
Some(SExpr(node))
} else {
None
}
}
fn kind(&self) -> SexpKind {
use ast::*;
Atom::cast(self.0.clone())
.map(SexpKind::Atom)
.or_else(|| List::cast(self.0.clone()).map(SexpKind::List))
.unwrap()
}
}
impl ast::Root {
fn sexps(&self) -> impl Iterator<Item = SExpr> + '_ {
self.0.children().cloned().filter_map(SExpr::cast)
}
}
enum Op {
Add,
Sub,
Div,
Mul,
}
impl ast::Atom {
fn eval(&self, resolver: &impl Resolver) -> Option<i64> {
self.text(resolver).parse().ok()
}
fn as_op(&self, resolver: &impl Resolver) -> Option<Op> {
let op = match self.text(resolver) {
"+" => Op::Add,
"-" => Op::Sub,
"*" => Op::Mul,
"/" => Op::Div,
_ => return None,
};
Some(op)
}
fn text<'r>(&self, resolver: &'r impl Resolver) -> &'r str {
use cstree::util::NodeOrToken;
match self.0.green().children().next() {
Some(NodeOrToken::Token(token)) => SExprSyntax::static_text(SExprSyntax::from_raw(token.kind()))
.or_else(|| token.text(resolver))
.unwrap(),
_ => unreachable!(),
}
}
}
impl ast::List {
fn sexps(&self) -> impl Iterator<Item = SExpr> + '_ {
self.0.children().cloned().filter_map(SExpr::cast)
}
fn eval(&self, resolver: &impl Resolver) -> Option<i64> {
let op = match self.sexps().next()?.kind() {
SexpKind::Atom(atom) => atom.as_op(resolver)?,
_ => return None,
};
let arg1 = self.sexps().nth(1)?.eval(resolver)?;
let arg2 = self.sexps().nth(2)?.eval(resolver)?;
let res = match op {
Op::Add => arg1 + arg2,
Op::Sub => arg1 - arg2,
Op::Mul => arg1 * arg2,
Op::Div if arg2 == 0 => return None,
Op::Div => arg1 / arg2,
};
Some(res)
}
}
impl SExpr {
fn eval(&self, resolver: &impl Resolver) -> Option<i64> {
match self.kind() {
SexpKind::Atom(atom) => atom.eval(resolver),
SexpKind::List(list) => list.eval(resolver),
}
}
}
impl<I> Parse<I> {
fn root(&self) -> ast::Root {
ast::Root::cast(self.syntax()).unwrap()
}
}
fn main() {
let sexps = "
92
(+ 62 30)
(/ 92 0)
nan
(+ (* 15 2) 62)
";
let parse = parse(sexps);
let root = parse.root();
let resolver = &parse.resolver;
let res = root.sexps().map(|it| it.eval(resolver)).collect::<Vec<_>>();
eprintln!("{res:?}");
assert_eq!(res, vec![Some(92), Some(92), None, None, Some(92),])
}
fn lex(text: &str) -> VecDeque<(SyntaxKind, &str)> {
fn tok(t: SyntaxKind) -> m_lexer::TokenKind {
m_lexer::TokenKind(t.into_raw().0 as u16)
}
fn kind(t: m_lexer::TokenKind) -> SyntaxKind {
match t.0 {
0 => LParen,
1 => RParen,
2 => Word,
3 => Whitespace,
4 => Error,
_ => unreachable!(),
}
}
let lexer = m_lexer::LexerBuilder::new()
.error_token(tok(Error))
.tokens(&[
(tok(LParen), r"\("),
(tok(RParen), r"\)"),
(tok(Word), r"[^\s()]+"),
(tok(Whitespace), r"\s+"),
])
.build();
lexer
.tokenize(text)
.into_iter()
.map(|t| (t.len, kind(t.kind)))
.scan(0usize, |start_offset, (len, kind)| {
let s = &text[*start_offset..*start_offset + len];
*start_offset += len;
Some((kind, s))
})
.collect()
}