use crate::tree::Tree;
use ::core::iter;
use ::core::ops::Range;
use ::id_arena::{Arena, Id};
use ::lasso::{Rodeo, Spur};
use ::logos::{Lexer, Logos};
fn string_literal(lex: &mut Lexer<Token>) -> bool {
let remainder = lex.remainder().as_bytes();
let mut cursor = remainder;
while let [b, rest @ ..] = cursor {
if *b == b'"' {
lex.bump((rest.as_ptr() as usize) - (remainder.as_ptr() as usize));
return true;
}
cursor = rest;
}
false
}
fn line_comment(lex: &mut Lexer<Token>) -> bool {
let remainder = lex.remainder().as_bytes();
let mut cursor = remainder;
while let [0..=9 | 11..=255, rest @ ..] = cursor {
cursor = rest;
}
lex.bump(cursor.as_ptr() as usize - remainder.as_ptr() as usize);
true
}
#[derive(Logos, Debug, ::derive_more::IsVariant, Clone)]
pub enum Token {
#[token("(")]
OpenParenthesis,
#[token(")")]
CloseParenthesis,
#[token("\n")]
NewLine,
#[token("::")]
PathSeparator,
#[token(";", line_comment)]
LineComment,
#[token("\"", string_literal)]
StringLiteral,
#[regex("[0-9]+")]
Integer,
#[token("&")]
RefOp,
#[regex(r##"[-_+=a-zA-Z*`.,>][-_+=a-zA-Z*`.,>0-9]*"##)]
Ident,
#[regex("[\t ]+")]
Whitespace,
#[error]
Error,
}
pub fn lex_expression(input: &str) -> Result<Vec<(Token, Range<usize>)>, ()> {
Ok(Token::lexer(input).spanned().collect())
}
#[derive(Debug, ::derive_more::Deref)]
pub struct AST {
pub names: Rodeo,
pub strings: Rodeo,
#[deref]
tree: Tree<Term>,
}
impl AST {
fn fmt_rec(ast: &AST, id: Id<Term>, out: &mut String) {
match &ast.arena[id] {
Term::CreateRef(operand) => {
out.push('&');
Self::fmt_rec(ast, *operand, out);
}
Term::List(elements) => {
out.push('(');
match &**elements {
[first, rest @ ..] => {
Self::fmt_rec(ast, *first, out);
for elem in rest {
out.push(' ');
Self::fmt_rec(ast, *elem, out);
}
}
[] => (),
}
out.push(')');
}
Term::Variable(path) => path.fmt(ast, id, out),
Term::IntegerLiteral(val) => {
let mut itoa_buf = itoa::Buffer::new();
out.push_str(itoa_buf.format(*val));
}
Term::StringLiteral(val) => {
out.push('"');
out.push_str(ast.strings.resolve(&val.0));
out.push('"');
}
Term::Comment(_span) => todo!("formatting AST glued comments"),
}
}
pub fn fmt_sexpr(&self) -> String {
let mut output = String::new();
Self::fmt_rec(self, self.top, &mut output);
output
}
pub fn fmt_term(&self, id: Id<Term>) -> String {
let mut output = String::new();
Self::fmt_rec(self, id, &mut output);
output
}
}
#[derive(Debug, Clone)]
pub struct Span(Range<usize>);
#[derive(Debug, Hash, PartialEq, Eq, Clone)]
pub struct Name(Spur);
impl Name {
fn new(interner: &mut Rodeo, name: &str) -> Self {
Self(interner.get_or_intern(name))
}
pub fn lookup(interner: &Rodeo, name: &str) -> Option<Self> {
interner.get(name).map(|x| Self(x))
}
pub fn key(&self) -> &Spur {
&self.0
}
}
#[derive(Debug, Clone)]
pub struct StringLiteral(Spur);
impl StringLiteral {
fn new(interner: &mut Rodeo, literal: &str) -> Self {
Self(interner.get_or_intern(literal))
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum PathSegment {
Ident(Name),
Value(Id<Term>),
Hidden(&'static str),
}
#[derive(Debug, Clone, PartialEq)]
pub struct Path {
parts: Vec<PathSegment>,
}
impl Path {
pub fn ident(name: Name) -> Self {
Self {
parts: vec![PathSegment::Ident(name)],
}
}
fn new(parts: Vec<Name>) -> Self {
Self {
parts: parts
.into_iter()
.map(|name| PathSegment::Ident(name))
.collect(),
}
}
pub fn hidden_magic(names: &mut Rodeo, parts: Vec<&'static str>) -> Self {
Self::from_parts(
iter::once(PathSegment::Ident(Name::new(names, "#magic#")))
.chain(parts.into_iter().map(|name| PathSegment::Hidden(name)))
.collect(),
)
}
fn from_parts(parts: Vec<PathSegment>) -> Self {
Self { parts }
}
fn fmt(&self, ast: &AST, id: Id<Term>, out: &mut String) {
if self
.parts
.iter()
.all(|a| matches!(a, PathSegment::Ident(_) | PathSegment::Hidden(_)))
{
match &*self.parts {
[PathSegment::Ident(name), rest @ ..] => {
out.push_str(ast.names.resolve(&name.0));
for name in rest {
match name {
PathSegment::Ident(name) => {
out.push_str("::");
out.push_str(ast.names.resolve(&name.0));
}
PathSegment::Hidden(name) => {
out.push_str("::");
out.push_str(name);
}
_ => todo!("full path expression formatting"),
}
}
}
_ => todo!("full path expression formatting"),
}
} else {
todo!("full path expression formatting")
}
}
pub fn into_ident(self) -> Option<Name> {
let mut iter = self.parts.into_iter();
let res = iter.next().and_then(|segment| match segment {
PathSegment::Ident(ident) => Some(ident),
_ => None,
});
for _ in iter {
return None;
}
res
}
}
#[derive(Debug, ::derive_more::Unwrap, Clone)]
pub enum Term {
CreateRef(Id<Term>),
List(Vec<Id<Term>>),
Variable(Path),
IntegerLiteral(u64),
StringLiteral(StringLiteral),
Comment(Span),
}
use crate::tree::TreeIndex;
impl crate::tree::IndexNode for Term {
fn index(&self, index: TreeIndex) -> Option<Id<Self>> {
match self {
Term::CreateRef(operand) if index.val() == 0 => Some(*operand),
Term::CreateRef(_) => None,
Term::List(elements) => elements.get(index.val() as usize).copied(),
Term::Variable(_name) => None,
Term::IntegerLiteral(_val) => None,
Term::StringLiteral(_val) => None,
Term::Comment(_span) => None,
}
}
}
#[derive(Debug)]
pub enum ParseError {
IntegerTooLarge,
ExpectedIdent,
UnexpectedEof,
Eof,
}
fn parse_list(
names: &mut Rodeo,
strings: &mut Rodeo,
terms: &mut Arena<Term>,
lexer: &mut Lexer<Token>,
) -> Result<Id<Term>, ParseError> {
let mut expressions = Vec::new();
loop {
let mut peeker = lexer.clone();
if peeker
.next()
.ok_or(ParseError::UnexpectedEof)?
.is_close_parenthesis()
{
lexer.next();
break;
} else {
expressions.push(parse_expression(names, strings, terms, lexer).map_err(
|e| match e {
ParseError::Eof => ParseError::UnexpectedEof,
e => e,
},
)?);
}
}
Ok(terms.alloc(Term::List(expressions)))
}
fn parse_path(
names: &mut Rodeo,
_strings: &mut Rodeo,
terms: &mut Arena<Term>,
lexer: &mut Lexer<Token>,
front: &str,
) -> Result<Id<Term>, ParseError> {
let mut parts = vec![PathSegment::Ident(Name::new(names, front))];
#[derive(Copy, Clone, Debug)]
enum State {
ExpectingSeparator,
ExpectingSegment,
}
let mut state = State::ExpectingSeparator;
loop {
let mut peeker = lexer.clone();
match (state, peeker.next()) {
(State::ExpectingSegment, Some(Token::Ident)) => {
parts.push(PathSegment::Ident(Name::new(names, {
lexer.next();
lexer.slice()
})));
state = State::ExpectingSeparator;
}
(State::ExpectingSegment, None) => return Err(ParseError::UnexpectedEof),
(State::ExpectingSeparator, Some(Token::PathSeparator)) => {
lexer.next();
state = State::ExpectingSegment;
}
(State::ExpectingSeparator, _) => break,
x => todo!("unexpected tokens in path parser: {:?}", x),
}
}
Ok(terms.alloc(Term::Variable(Path { parts })))
}
fn parse_expression(
names: &mut Rodeo,
strings: &mut Rodeo,
terms: &mut Arena<Term>,
lexer: &mut Lexer<Token>,
) -> Result<Id<Term>, ParseError> {
::stacker::maybe_grow(16 * 2048, 2048 * 1024, || {
while let Some((token, span)) = lexer.next().map(|token| (token, lexer.span())) {
match token {
Token::OpenParenthesis => return parse_list(names, strings, terms, lexer),
Token::CloseParenthesis => todo!("no matching open parenthesis"),
Token::NewLine => (),
Token::PathSeparator => todo!("unexpected `::`?"),
Token::LineComment => (),
Token::StringLiteral => {
let lit = &lexer.slice()[1..];
let lit = &lit[..lit.len() - 1];
return Ok(terms.alloc(Term::StringLiteral(StringLiteral::new(strings, lit))));
}
Token::Integer => {
let int = lexer
.slice()
.as_bytes()
.iter()
.try_fold(0u64, |a, b| {
(a * 10).checked_add((b - b'0') as u64).ok_or(())
})
.map_err(|()| ParseError::IntegerTooLarge)?;
return Ok(terms.alloc(Term::IntegerLiteral(int)));
}
Token::RefOp => {
let operand = parse_expression(names, strings, terms, lexer)?;
return Ok(terms.alloc(Term::CreateRef(operand)));
}
Token::Ident => return parse_path(names, strings, terms, lexer, lexer.slice()),
Token::Whitespace => (),
Token::Error => todo!("hm... {:?}", lexer.slice()),
}
}
Err(ParseError::Eof)
})
}
pub fn parse_program(input: &str) -> Result<AST, ParseError> {
let mut names = Rodeo::<Spur>::new();
let mut strings = Rodeo::<Spur>::new();
let mut terms = Arena::<Term>::new();
let mut lexer = Token::lexer(input);
let mut implicit_progn = vec![terms.alloc(Term::Variable(Path::from_parts(vec![
PathSegment::Ident(Name::new(&mut names, "#magic#")),
PathSegment::Hidden("progn"),
])))];
loop {
match parse_expression(&mut names, &mut strings, &mut terms, &mut lexer) {
Ok(top) => implicit_progn.push(top),
Err(ParseError::Eof) => break,
Err(e) => return Err(e),
}
}
let top = terms.alloc(Term::List(implicit_progn));
Ok(AST {
names,
strings,
tree: Tree { arena: terms, top },
})
}