mod positions;
mod token;
use crate::no_path;
use crate::parsing::token::Token;
use crate::prelude::*;
pub use positions::{Position, Span};
pub(crate) use token::{TokenData, tokenize};
fn syntax_error<T>(msg: impl Into<String>, span: &Span) -> Result<T> {
Err(Exception::spanned("Syntax", msg, span))
}
pub fn build_program(tokens: Vec<Token>) -> Result<Argument> {
let mut cursor = tokens.as_slice();
let arg = build_subprogram(&mut cursor)?;
if let Some(t) = next_non_comment(cursor) {
return syntax_error("trailing unparsed tokens detected", &t.span);
}
Ok(arg)
}
fn next_non_comment(tokens: &[Token]) -> Option<&Token> {
tokens.iter().find(|t| !t.is_comment())
}
fn eat_commented_token<'a>(tokens: &mut &'a [Token]) -> Result<(&'a [Token], &'a Token)> {
for i in 0..tokens.len() {
if !tokens[i].is_comment() {
let r = (&tokens[0..i], &tokens[i]);
*tokens = &tokens[i + 1..];
return Ok(r);
}
}
Err(Exception::spanned(
"Syntax",
"program contains no non-comment tokens",
&Span::single(Position::ONE, no_path()),
))
}
fn extract_within_parens<'a>(tokens: &mut &'a [Token]) -> Result<(Span, &'a [Token])> {
let mut stack = 1u32;
for i in 1..tokens.len() {
match tokens[i].data {
TokenData::LeftParen => stack += 1,
TokenData::RightParen => {
stack -= 1;
if stack == 0 {
let span = tokens[i].span.clone();
let rest = &tokens[i + 1..];
*tokens = &tokens[1..i];
return Ok((span, rest));
}
}
_ => (),
}
}
syntax_error("unclosed `(` parenthesis", &tokens[0].span)
}
fn concat_doc_comments(tokens: &[Token]) -> String {
let mut s = String::new();
for t in tokens {
let TokenData::Comment(doc) = &t.data else {
unreachable!()
};
s.push_str(doc.strip_prefix(' ').unwrap_or(doc));
s.push('\n');
}
s.pop();
s
}
fn build_subprogram(tokens: &mut &[Token]) -> Result<Argument> {
let (doc_comments, first_token) = eat_commented_token(tokens)?;
if let Some(atom) = first_token.to_atom() {
return Ok(atom);
}
let name = first_token.to_name()?;
let Some(Token {
data: TokenData::LeftParen,
span: left_paren_span,
}) = next_non_comment(tokens)
else {
return Ok(Argument::Variable(name, first_token.span.clone()));
};
let (right_paren_span, rest) = extract_within_parens(tokens)?;
let mut args = vec![];
while next_non_comment(tokens).is_some() {
args.push(build_subprogram(tokens)?);
let Ok((_, comma)) = eat_commented_token(tokens) else {
break;
};
if !comma.is_comma() {
return syntax_error("missing comma in argument list", &comma.span);
}
}
*tokens = rest;
Ok(Argument::FunctionCall(
FunctionCall {
args,
name,
doc_comment: concat_doc_comments(doc_comments),
},
Span {
start: left_paren_span.start,
..right_paren_span
},
))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::no_path;
fn make_program(code: &str) -> Result<Argument> {
build_program(tokenize(code, no_path()).expect(""))
}
#[test]
fn extra_parens() {
let prog = make_program("_((2))");
assert_eq!(
prog.unwrap_err().to_string(),
"SyntaxError: expected atom or ident\nat <file>:0:3"
);
let prog = make_program("(print(2)), print(3)");
assert_eq!(
prog.unwrap_err().to_string(),
"SyntaxError: expected atom or ident\nat <file>:0:1"
);
}
#[test]
fn atom_fn() {
let prog = make_program("2(4)");
assert_eq!(prog.unwrap().stringify(), "2(4)");
}
#[test]
fn two_commas() {
let prog = make_program("_(4,,4)");
assert_eq!(
prog.unwrap_err().to_string(),
"SyntaxError: expected atom or ident\nat <file>:0:5"
);
}
#[test]
fn empty_program() {
let prog = make_program("");
assert_eq!(
prog.unwrap_err().to_string(),
"SyntaxError: program contains no non-comment tokens\nat <file>:1:1"
);
}
}