extern crate m_lexer;
extern crate rowan;
use rowan::SmolStr;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[allow(non_camel_case_types)]
enum SyntaxKind {
L_PAREN, R_PAREN, ATOM, WHITESPACE, ERROR, LIST, ROOT, }
use SyntaxKind::*;
enum STypes {}
impl rowan::Types for STypes {
type Kind = SyntaxKind;
type RootData = Vec<String>;
}
type GreenNode = rowan::GreenNode<STypes>;
type GreenNodeBuilder = rowan::GreenNodeBuilder<STypes>;
#[allow(type_alias_bounds)]
type SyntaxNode = rowan::SyntaxNode<STypes>;
type TreeArc<T> = rowan::TreeArc<STypes, T>;
use rowan::TransparentNewType;
fn parse(text: &str) -> TreeArc<Root> {
struct Parser {
tokens: Vec<(SyntaxKind, SmolStr)>,
builder: GreenNodeBuilder,
errors: Vec<String>,
}
enum SexpRes {
Eof,
RParen,
Ok,
}
impl Parser {
fn parse(mut self) -> TreeArc<Root> {
self.builder.start_internal(ROOT);
loop {
match self.sexp() {
SexpRes::Eof => break,
SexpRes::RParen => {
self.builder.start_internal(ERROR);
self.errors.push("unmatched `)`".to_string());
self.bump(); self.builder.finish_internal();
}
SexpRes::Ok => (),
}
}
self.skip_ws();
self.builder.finish_internal();
let green: GreenNode = self.builder.finish();
let node = SyntaxNode::new(green, self.errors);
Root::cast(&node).unwrap().to_owned()
}
fn list(&mut self) {
self.builder.start_internal(LIST);
self.bump(); loop {
match self.sexp() {
SexpRes::Eof => {
self.errors.push("expected `)`".to_string());
break;
}
SexpRes::RParen => {
self.bump();
break;
}
SexpRes::Ok => (),
}
}
self.builder.finish_internal();
}
fn sexp(&mut self) -> SexpRes {
self.skip_ws();
let t = match self.current() {
None => return SexpRes::Eof,
Some(R_PAREN) => return SexpRes::RParen,
Some(t) => t,
};
match t {
L_PAREN => self.list(),
ATOM | ERROR => self.bump(),
_ => unreachable!(),
}
SexpRes::Ok
}
fn bump(&mut self) {
let (kind, text) = self.tokens.pop().unwrap();
self.builder.leaf(kind, text);
}
fn current(&self) -> Option<SyntaxKind> {
self.tokens.last().map(|(kind, _)| *kind)
}
fn skip_ws(&mut self) {
while self.current() == Some(WHITESPACE) {
self.bump()
}
}
}
let mut tokens = lex(text);
tokens.reverse();
Parser {
tokens,
builder: GreenNodeBuilder::new(),
errors: Vec::new(),
}
.parse()
}
#[test]
fn test_parser() {
let text = "(+ (* 15 2) 62)";
let node = parse(text);
assert_eq!(
format!("{:?}", node),
"ROOT@[0; 15)", );
assert_eq!(node.children().count(), 1);
let list = node.children().next().unwrap();
let children = list
.children()
.map(|child| format!("{:?}", child))
.collect::<Vec<_>>();
assert_eq!(
children,
vec![
"L_PAREN@[0; 1)".to_string(),
"ATOM@[1; 2)".to_string(),
"WHITESPACE@[2; 3)".to_string(), "LIST@[3; 11)".to_string(),
"WHITESPACE@[11; 12)".to_string(),
"ATOM@[12; 14)".to_string(),
"R_PAREN@[14; 15)".to_string(),
]
);
}
macro_rules! ast_node {
($ast:ident, $kind:ident) => {
#[derive(PartialEq, Eq, Hash)]
#[repr(transparent)]
struct $ast(SyntaxNode);
unsafe impl TransparentNewType for $ast {
type Repr = SyntaxNode;
}
impl $ast {
#[allow(unused)]
fn cast(node: &SyntaxNode) -> Option<&Self> {
if node.kind() == $kind {
Some(Self::from_repr(node))
} else {
None
}
}
#[allow(unused)]
fn to_owned(&self) -> TreeArc<Self> {
let owned = self.0.to_owned();
TreeArc::cast(owned)
}
}
};
}
ast_node!(Root, ROOT);
ast_node!(Atom, ATOM);
ast_node!(List, LIST);
#[derive(PartialEq, Eq, Hash)]
#[repr(transparent)]
struct Sexp(SyntaxNode);
enum SexpKind<'a> {
Atom(&'a Atom),
List(&'a List),
}
impl Sexp {
fn cast(node: &SyntaxNode) -> Option<&Self> {
if Atom::cast(node).is_some() || List::cast(node).is_some() {
Some(unsafe { std::mem::transmute(node) })
} else {
None
}
}
fn kind(&self) -> SexpKind {
Atom::cast(&self.0)
.map(SexpKind::Atom)
.or_else(|| List::cast(&self.0).map(SexpKind::List))
.unwrap()
}
}
impl Root {
fn sexps(&self) -> impl Iterator<Item = &Sexp> {
self.0.children().filter_map(Sexp::cast)
}
}
enum Op {
Add,
Sub,
Div,
Mul,
}
impl Atom {
fn eval(&self) -> Option<i64> {
self.0.leaf_text().unwrap().parse().ok()
}
fn as_op(&self) -> Option<Op> {
let text = self.0.leaf_text().unwrap();
let op = match text.as_str() {
"+" => Op::Add,
"-" => Op::Sub,
"*" => Op::Mul,
"/" => Op::Div,
_ => return None,
};
Some(op)
}
}
impl List {
fn sexps(&self) -> impl Iterator<Item = &Sexp> {
self.0.children().filter_map(Sexp::cast)
}
fn eval(&self) -> Option<i64> {
let op = match self.sexps().nth(0)?.kind() {
SexpKind::Atom(atom) => atom.as_op()?,
_ => return None,
};
let arg1 = self.sexps().nth(1)?.eval()?;
let arg2 = self.sexps().nth(2)?.eval()?;
let res = match op {
Op::Add => arg1 + arg2,
Op::Sub => arg1 - arg2,
Op::Mul => arg1 * arg2,
Op::Div if arg2 == 0 => return None,
Op::Div => arg1 / arg2,
};
Some(res)
}
}
impl Sexp {
fn eval(&self) -> Option<i64> {
match self.kind() {
SexpKind::Atom(atom) => atom.eval(),
SexpKind::List(list) => list.eval(),
}
}
}
fn main() {
let sexps = "
92
(+ 62 30)
(/ 92 0)
nan
(+ (* 15 2) 62)
";
let root = parse(sexps);
let res = root.sexps().map(|it| it.eval()).collect::<Vec<_>>();
eprintln!("{:?}", res);
assert_eq!(res, vec![Some(92), Some(92), None, None, Some(92),])
}
fn lex(text: &str) -> Vec<(SyntaxKind, SmolStr)> {
use SyntaxKind::*;
fn tok(t: SyntaxKind) -> m_lexer::TokenKind {
m_lexer::TokenKind(t as u16)
}
fn kind(t: m_lexer::TokenKind) -> SyntaxKind {
match t.0 {
0 => L_PAREN,
1 => R_PAREN,
2 => ATOM,
3 => WHITESPACE,
4 => ERROR,
_ => unreachable!(),
}
}
let lexer = m_lexer::LexerBuilder::new()
.error_token(tok(ERROR))
.tokens(&[
(tok(L_PAREN), r"\("),
(tok(R_PAREN), r"\)"),
(tok(ATOM), r"[^\s()]+"),
(tok(WHITESPACE), r"\s+"),
])
.build();
lexer
.tokenize(text)
.into_iter()
.map(|t| (t.len, kind(t.kind)))
.scan(0usize, |start_offset, (len, kind)| {
let s: SmolStr = text[*start_offset..*start_offset + len].into();
*start_offset += len;
Some((kind, s))
})
.collect()
}