use oxyl_diagnostics::{DiagSpan, Diagnostic};
use oxyl_lexer::{Span, Token, TokenKind};
fn diag_span(s: Span) -> DiagSpan {
DiagSpan::new(s.start, s.end)
}
#[derive(Debug, Clone)]
pub struct Document {
pub body: Vec<Node>,
}
#[derive(Debug, Clone)]
pub enum Node {
Text(String, Span),
ParagraphBreak(Span),
Command {
name: String ,
args: Vec<Arg>,
span: Span,
},
Group(Vec<Node>, Span),
Math(Vec<Node>, Span),
}
impl Node {
pub fn span(&self) -> Span {
match self {
Node::Text(_,s) => *s,
Node::ParagraphBreak(s) => *s,
Node::Command { span, .. } => *span,
Node::Group(_, s) => *s,
Node::Math(_, s) => *s,
}
}
}
#[derive(Debug, Clone)]
pub enum Arg {
Mandatory(Vec<Node>),
Optional(Vec<Node>),
}
#[derive(Debug)]
pub struct ParseResult {
pub document: Document,
pub errors: Vec<Diagnostic>,
}
pub struct Parser {
tokens: Vec<Token>,
pos: usize,
errors: Vec<Diagnostic>,
}
impl Parser {
pub fn new(tokens: Vec<Token>) -> Self {
Self { tokens, pos: 0, errors: Vec::new() }
}
pub fn parse(mut self) -> ParseResult {
let body = self.parse_nodes(None);
ParseResult { document: Document { body }, errors: self.errors }
}
fn peek(&self) -> Option<&Token> {
self.tokens.get(self.pos)
}
fn peek_kind(&self) -> Option<&TokenKind> {
self.peek().map(|t| &t.kind)
}
fn bump(&mut self) -> Option<Token> {
if self.pos < self.tokens.len() {
let tok = self.tokens[self.pos].clone();
self.pos += 1;
Some(tok)
} else {
None
}
}
fn parse_nodes(&mut self, stop: Option<&TokenKind>) -> Vec<Node> {
let mut nodes: Vec<Node> = Vec::new();
loop {
match self.peek() {
None => break,
Some(tok) if stop.map_or(false, |s| &tok.kind == s) => break,
_ => {}
}
let tok = self.bump().unwrap();
match tok.kind {
TokenKind::Char(c) => self.push_char(&mut nodes, c, tok.span),
TokenKind::Space => self.push_char(&mut nodes, ' ', tok.span),
TokenKind::ParagraphBreak => nodes.push(Node::ParagraphBreak(tok.span)),
TokenKind::ControlSeq(name) => {
let cmd_span = tok.span;
let args = self.parse_args();
let full_span = args.last()
.and_then(|a| match a {
Arg::Mandatory(children) => children.last().map(|n| n.span()),
Arg::Optional(children) => children.last().map(|n| n.span()),
})
.map(|s| cmd_span.merge(s))
.unwrap_or(cmd_span);
nodes.push(Node::Command { name, args, span: full_span });
}
TokenKind::BeginGroup => {
let open_span = tok.span;
let children = self.parse_nodes(Some(&TokenKind::EndGroup));
if self.peek_kind() == Some(&TokenKind::EndGroup) {
let close = self.bump().unwrap();
nodes.push(Node::Group(children, open_span.merge(close.span)));
} else {
self.errors.push(
Diagnostic::error("E020", "unclosed '{'")
.with_span(diag_span(open_span)),
);
nodes.push(Node::Group(children, open_span));
}
}
TokenKind::MathShift => {
let open_span = tok.span;
let children = self.parse_nodes(Some(&TokenKind::MathShift));
if self.peek_kind() == Some(&TokenKind::MathShift) {
let close = self.bump().unwrap();
nodes.push(Node::Math(children, open_span.merge(close.span)));
} else {
self.errors.push(
Diagnostic::error("E030", "unclosed '$' (math mode)")
.with_span(diag_span(open_span)),
);
}
}
_ => {}
}
}
nodes
}
fn parse_args(&mut self) -> Vec<Arg> {
let mut args = Vec::new();
loop {
if self.peek_kind() == Some(&TokenKind::Space) {
self.bump();
}
match self.peek_kind() {
Some(&TokenKind::BeginGroup) => args.push(self.parse_mandatory_arg()),
Some(&TokenKind::Char('[')) => args.push(self.parse_optional_arg()),
_ => break,
}
}
args
}
fn parse_mandatory_arg(&mut self) -> Arg {
let open_span = self.bump().unwrap().span;
let children = self.parse_nodes(Some(&TokenKind::EndGroup));
if self.peek_kind() == Some(&TokenKind::EndGroup) {
self.bump();
} else {
self.errors.push(
Diagnostic::error("E021","unclosed mandatory argument")
.with_span(diag_span(open_span)),
);
}
Arg::Mandatory(children)
}
fn parse_optional_arg(&mut self) -> Arg {
let open_span = self.bump().unwrap().span;
let children = self.parse_nodes(Some(&TokenKind::Char(']')));
if self.peek_kind() == Some(&TokenKind::Char(']')) {
self.bump();
} else {
self.errors.push(
Diagnostic::error("E022","unclosed optional argument")
.with_span(diag_span(open_span)),
);
}
Arg::Optional(children)
}
fn push_char(&self, nodes: &mut Vec<Node>, c: char, span: Span) {
match nodes.last_mut() {
Some(Node::Text(s, existing)) => {
s.push(c);
*existing = existing.merge(span);
}
_ => nodes.push(Node::Text(c.to_string(), span)),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use oxyl_lexer::Lexer;
fn parse(src: &str) -> ParseResult {
let tokens = Lexer::new(src).tokenise().tokens;
Parser::new(tokens).parse()
}
fn first_command(src: &str) -> (String, Vec<Arg>) {
let r = parse(src);
for node in &r.document.body {
if let Node::Command { name, args, .. } = node {
return (name.clone(), args.clone());
}
}
panic!("no command found in: {src}");
}
#[test]
fn command_no_args() {
let (name, args) = first_command("\\LaTeX");
assert_eq!(name, "LaTeX");
assert!(args.is_empty());
}
#[test]
fn command_one_mandatory_arg() {
let (name, args) = first_command("\\textbf{hello}");
assert_eq!(name, "textbf");
assert_eq!(args.len(), 1);
assert!(matches!(&args[0], Arg::Mandatory(children)
if matches!(&children[0], Node::Text(s, _) if s == "hello")));
}
#[test]
fn command_two_mandatory_args() {
let (name, args) = first_command("\\frac{a}{b}");
assert_eq!(name, "frac");
assert_eq!(args.len(), 2);
}
#[test]
fn unclosed_arg_produces_error() {
let r = parse("\\cmd{oops");
assert!(!r.errors.is_empty());
}
#[test]
fn paragraph_break_still_works() {
let r = parse("line one\n\nline two");
let has_par = r.document.body.iter().any(|n| matches!(n, Node::ParagraphBreak(_)));
assert!(has_par);
}
#[test]
fn nested_command_in_arg() {
let r = parse("\\outer{\\inner{x}}");
assert!(r.errors.is_empty());
if let Node::Command { args, .. } = &r.document.body[0] {
if let Arg::Mandatory(inner) = &args[0] {
assert!(matches!(&inner[0], Node::Command { name, .. } if name == "inner"));
} else { panic!("expected mandatory arg"); }
} else { panic!("expected command"); }
}
#[test]
fn command_with_optional_arg() {
let (name, args) = first_command("\\sqrt[3]{27}");
assert_eq!(name, "sqrt");
assert_eq!(args.len(), 2);
assert!(matches!(&args[0], Arg::Optional(children)
if matches!(&children[0], Node::Text(s, _) if s == "3")));
assert!(matches!(&args[1], Arg::Mandatory(children)
if matches!(&children[0], Node::Text(s, _) if s== "27")));
}
#[test]
fn command_with_only_optional_arg() {
let (name, args) = first_command("\\foo[opt]");
assert_eq!(name, "foo");
assert_eq!(args.len(), 1);
assert!(matches!(&args[0], Arg::Optional(_)));
}
#[test]
fn optional_then_two_mandatory() {
let (_, args) = first_command("\\section[short]{long}{extra}");
assert_eq!(args.len(), 3);
assert!(matches!(&args[0], Arg::Optional(_)));
assert!(matches!(&args[1], Arg::Mandatory(_)));
assert!(matches!(&args[2], Arg::Mandatory(_)));
}
#[test]
fn unclosed_optional_arg_produces_error() {
let r = parse("\\cmd[oops");
assert!(!r.errors.is_empty());
}
#[test]
fn bracket_outside_command_is_text() {
let r = parse("hello [world]");
assert!(r.errors.is_empty());
assert!(matches!(&r.document.body[0], Node::Text(s, _) if s == "hello [world]"));
}
#[test]
fn inline_math_simple() {
let r = parse("$x+1$");
assert!(r.errors.is_empty());
assert_eq!(r.document.body.len(), 1);
assert!(matches!(&r.document.body[0], Node::Math(children, _)
if matches!(&children[0], Node::Text(s, _) if s == "x+1")));
}
#[test]
fn inline_math_with_command() {
let r = parse("$\\alpha + \\beta$");
assert!(r.errors.is_empty());
if let Node::Math(children, _) = &r.document.body[0] {
let names: Vec<_> = children.iter().filter_map(|n| match n {
Node::Command { name, .. } => Some(name.as_str()),
_ => None,
}).collect();
assert_eq!(names, vec!["alpha", "beta"]);
} else {
panic!("expected math node");
}
}
#[test]
fn unclosed_math_produces_error() {
let r = parse("text $oops");
assert!(!r.errors.is_empty());
}
#[test]
fn parser_errors_carry_spans() {
let cases = [
"\\cmd{oops", "\\cmd[oops", "{", "$oops", ];
for src in cases {
let r = parse(src);
assert!(!r.errors.is_empty(), "expected error for {src:?}");
for e in &r.errors {
assert!(e.span.is_some(), "error for {src:?} has no span: {e:?}");
}
}
}
#[test]
fn math_after_text() {
let r = parse("hello $x$");
assert!(r.errors.is_empty());
assert_eq!(r.document.body.len(), 2);
assert!(matches!(&r.document.body[0], Node::Text(s, _) if s == "hello "));
assert!(matches!(&r.document.body[1], Node::Math(_, _)));
}
}