use pulldown_latex::event::{Content, DelimiterType, Event, Grouping, ScriptType, Visual};
use pulldown_latex::{Parser, Storage};
use ttf_parser::GlyphId;
use crate::font;
use crate::ir::{AtomClass, Node, Style};
#[derive(Debug)]
pub struct ParseError(pub String);
pub fn to_ir(src: &str, font_size: f32, style: Style) -> Result<Node, ParseError> {
let storage = Storage::new();
let parser = Parser::new(src, &storage);
let mut events = Vec::new();
for ev in parser {
events.push(ev.map_err(|e| ParseError(format!("{e:?}")))?);
}
let mut cursor = 0usize;
let row = parse_until_end(
&events,
&mut cursor,
font_size,
style,
false,
)?;
Ok(Node::Row(row))
}
fn parse_until_end(
events: &[Event],
cursor: &mut usize,
font_size: f32,
style: Style,
in_group: bool,
) -> Result<Vec<Node>, ParseError> {
let mut row = Vec::new();
while *cursor < events.len() {
if in_group {
if let Event::End = events[*cursor] {
*cursor += 1;
return Ok(row);
}
}
let node = parse_element(events, cursor, font_size, style)?;
if let Some(n) = node {
row.push(n);
}
}
if in_group {
return Err(ParseError("unterminated group (missing End)".into()));
}
Ok(row)
}
fn parse_element(
events: &[Event],
cursor: &mut usize,
font_size: f32,
style: Style,
) -> Result<Option<Node>, ParseError> {
if *cursor >= events.len() {
return Err(ParseError("expected element, got end of stream".into()));
}
let ev = events[*cursor].clone();
*cursor += 1;
match ev {
Event::Content(c) => Ok(Some(content_to_node(c, font_size, style)?)),
Event::Begin(Grouping::Normal) => {
let inner =
parse_until_end(events, cursor, font_size, style, true)?;
Ok(Some(Node::Row(inner)))
}
Event::Begin(Grouping::LeftRight(open_opt, close_opt)) => {
let inner =
parse_until_end(events, cursor, font_size, style, true)?;
let open = delim_glyph(open_opt);
let close = delim_glyph(close_opt);
Ok(Some(Node::Fenced {
open,
close,
body: Box::new(Node::Row(inner)),
}))
}
Event::Begin(_) => {
let inner =
parse_until_end(events, cursor, font_size, style, true)?;
Ok(Some(Node::Row(inner)))
}
Event::End => Err(ParseError("unexpected End outside group".into())),
Event::Script { ty, .. } => {
let base = parse_element(events, cursor, font_size, style)?
.ok_or_else(|| ParseError("script base produced no node".into()))?;
let (sub, sup) = match ty {
ScriptType::Subscript => {
let s = parse_element(events, cursor, font_size, style)?
.ok_or_else(|| ParseError("subscript produced no node".into()))?;
(Some(Box::new(s)), None)
}
ScriptType::Superscript => {
let s = parse_element(events, cursor, font_size, style)?
.ok_or_else(|| ParseError("superscript produced no node".into()))?;
(None, Some(Box::new(s)))
}
ScriptType::SubSuperscript => {
let sb = parse_element(events, cursor, font_size, style)?
.ok_or_else(|| ParseError("subscript produced no node".into()))?;
let sp = parse_element(events, cursor, font_size, style)?
.ok_or_else(|| ParseError("superscript produced no node".into()))?;
(Some(Box::new(sb)), Some(Box::new(sp)))
}
};
Ok(Some(Node::Subsup {
base: Box::new(base),
sub,
sup,
}))
}
Event::Visual(v) => match v {
Visual::Fraction(_) => {
let num = parse_element(events, cursor, font_size, style)?
.ok_or_else(|| ParseError("fraction numerator produced no node".into()))?;
let den = parse_element(events, cursor, font_size, style)?
.ok_or_else(|| ParseError("fraction denominator produced no node".into()))?;
Ok(Some(Node::Frac {
num: Box::new(num),
den: Box::new(den),
}))
}
Visual::SquareRoot => {
let body = parse_element(events, cursor, font_size, style)?
.ok_or_else(|| ParseError("sqrt body produced no node".into()))?;
Ok(Some(Node::Radical {
degree: None,
body: Box::new(body),
}))
}
Visual::Root => {
let body = parse_element(events, cursor, font_size, style)?
.ok_or_else(|| ParseError("root radicand produced no node".into()))?;
let degree = parse_element(events, cursor, font_size, style)?
.ok_or_else(|| ParseError("root index produced no node".into()))?;
Ok(Some(Node::Radical {
degree: Some(Box::new(degree)),
body: Box::new(body),
}))
}
Visual::Negation => Ok(None),
},
Event::Space { .. } | Event::StateChange(_) | Event::EnvironmentFlow(_) => Ok(None),
}
}
fn content_to_node(c: Content, font_size: f32, style: Style) -> Result<Node, ParseError> {
let size = font_size;
match c {
Content::Ordinary { content, .. } => atom_node(content, AtomClass::Ord, size),
Content::Number(s) => chars_to_node(s.chars(), AtomClass::Ord, size),
Content::Text(s) => chars_to_node(s.chars(), AtomClass::Ord, size),
Content::Function(s) => chars_to_node(s.chars(), AtomClass::Op, size),
Content::BinaryOp { content, .. } => atom_node(content, AtomClass::Bin, size),
Content::Relation { content, .. } => {
let mut buf = [0u8; 8];
let bytes = content.encode_utf8_to_buf(&mut buf);
let s = std::str::from_utf8(bytes)
.map_err(|e| ParseError(format!("relation utf8: {e}")))?;
chars_to_node(s.chars(), AtomClass::Rel, size)
}
Content::Delimiter { content, ty, .. } => {
let class = match ty {
DelimiterType::Open => AtomClass::Open,
DelimiterType::Close => AtomClass::Close,
DelimiterType::Fence => AtomClass::Inner,
};
atom_node(content, class, size)
}
Content::Punctuation(ch) => atom_node(ch, AtomClass::Punct, size),
Content::LargeOp { content, small } => large_op_node(content, small, font_size, style),
}
}
fn large_op_node(ch: char, small: bool, font_size: f32, style: Style) -> Result<Node, ParseError> {
let size = font_size;
let base_glyph = font::glyph_id(ch)
.ok_or_else(|| ParseError(format!("no glyph for {ch:?} (U+{:04X})", ch as u32)))?;
let big = style.is_display() && !small;
let glyph = if big {
font::math_variant_vertical(base_glyph, 1500.0)
.map(|(g, _)| g)
.unwrap_or(base_glyph)
} else {
base_glyph
};
let limits = big;
Ok(Node::Op {
glyph,
limits,
big,
font_size: size,
})
}
fn delim_glyph(ch: Option<char>) -> GlyphId {
ch.and_then(font::glyph_id).unwrap_or(GlyphId(0))
}
fn atom_node(ch: char, class: AtomClass, font_size: f32) -> Result<Node, ParseError> {
let glyph = font::glyph_id(ch)
.ok_or_else(|| ParseError(format!("no glyph for {ch:?} (U+{:04X})", ch as u32)))?;
Ok(Node::Atom {
class,
glyph,
font_size,
})
}
fn chars_to_node<I: Iterator<Item = char>>(
chars: I,
class: AtomClass,
font_size: f32,
) -> Result<Node, ParseError> {
let mut nodes = Vec::new();
for ch in chars {
nodes.push(atom_node(ch, class, font_size)?);
}
if nodes.len() == 1 {
Ok(nodes.into_iter().next().unwrap())
} else {
Ok(Node::Row(nodes))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::ir::{AtomClass, Node, Style};
#[test]
fn parses_single_letter() {
let ir = to_ir("x", 16.0, Style::Text).unwrap();
let Node::Row(items) = ir else {
panic!("expected Row, got {:?}", ir)
};
assert_eq!(items.len(), 1);
let Node::Atom { class, .. } = &items[0] else {
panic!()
};
assert_eq!(*class, AtomClass::Ord);
}
#[test]
fn parses_two_letters_as_row() {
let ir = to_ir("xy", 16.0, Style::Text).unwrap();
let Node::Row(items) = ir else { panic!() };
assert_eq!(items.len(), 2);
}
#[test]
fn classifies_plus_as_bin() {
let ir = to_ir("a+b", 16.0, Style::Text).unwrap();
let Node::Row(items) = ir else { panic!() };
let Node::Atom { class: c2, .. } = &items[1] else {
panic!()
};
assert_eq!(*c2, AtomClass::Bin);
}
#[test]
fn parses_left_right_paren() {
let ir = to_ir(r"\left( x \right)", 16.0, Style::Text).unwrap();
let Node::Row(items) = ir else { panic!() };
assert_eq!(items.len(), 1);
let Node::Fenced {
open: _,
close: _,
body,
} = &items[0]
else {
panic!("expected Fenced")
};
assert!(matches!(body.as_ref(), Node::Row(_)));
}
#[test]
fn parses_left_right_brackets() {
let ir = to_ir(r"\left[ \frac{a}{b} \right]", 16.0, Style::Display).unwrap();
let Node::Row(items) = ir else { panic!() };
let Node::Fenced { .. } = &items[0] else {
panic!("expected Fenced")
};
}
#[test]
fn parses_left_dot_null_delim() {
let ir = to_ir(r"\left. x \right)", 16.0, Style::Text).unwrap();
let Node::Row(items) = ir else { panic!() };
let Node::Fenced { open, .. } = &items[0] else {
panic!("expected Fenced")
};
assert_eq!(open.0, 0);
}
#[test]
fn parses_frac() {
let ir = to_ir(r"\frac{1}{2}", 16.0, Style::Text).unwrap();
let Node::Row(items) = ir else { panic!() };
assert_eq!(items.len(), 1);
let Node::Frac { num, den } = &items[0] else {
panic!("expected Frac")
};
assert!(matches!(num.as_ref(), Node::Row(_)));
assert!(matches!(den.as_ref(), Node::Row(_)));
}
#[test]
fn parses_sqrt() {
let ir = to_ir(r"\sqrt{x}", 16.0, Style::Text).unwrap();
let Node::Row(items) = ir else { panic!() };
let Node::Radical { degree, body } = &items[0] else {
panic!("expected Radical")
};
assert!(degree.is_none());
assert!(matches!(body.as_ref(), Node::Row(_)));
}
#[test]
fn parses_sqrt_with_degree() {
let ir = to_ir(r"\sqrt[3]{x}", 16.0, Style::Text).unwrap();
let Node::Row(items) = ir else { panic!() };
let Node::Radical {
degree: Some(_),
body: _,
} = &items[0]
else {
panic!()
};
}
#[test]
fn parses_alpha() {
let ir = to_ir(r"\alpha", 16.0, Style::Text).unwrap();
let Node::Row(items) = ir else { panic!() };
assert_eq!(items.len(), 1);
let Node::Atom { class, .. } = &items[0] else {
panic!()
};
assert_eq!(*class, AtomClass::Ord);
}
#[test]
fn parses_capital_gamma() {
let ir = to_ir(r"\Gamma", 16.0, Style::Text).unwrap();
let Node::Row(items) = ir else { panic!() };
assert_eq!(items.len(), 1);
}
#[test]
fn parses_sum_with_limits_in_display() {
let ir = to_ir(r"\sum_{i=1}^{n}", 16.0, Style::Display).unwrap();
let Node::Row(items) = ir else { panic!() };
let Node::Subsup {
base,
sub: Some(_),
sup: Some(_),
} = &items[0]
else {
panic!("expected Subsup wrapping Op")
};
let Node::Op { limits, big, .. } = base.as_ref() else {
panic!("expected Op base")
};
assert!(*limits, "\\sum in display mode must have limits=true");
assert!(*big, "\\sum should pick big variant in display");
}
#[test]
fn parses_sum_inline_uses_scripts_not_limits() {
let ir = to_ir(r"\sum_{i=1}^{n}", 16.0, Style::Text).unwrap();
let Node::Row(items) = ir else { panic!() };
let Node::Subsup { base, .. } = &items[0] else {
panic!()
};
let Node::Op { limits, big, .. } = base.as_ref() else {
panic!()
};
assert!(
!*limits,
"\\sum in text mode must have limits=false (scripts)"
);
assert!(!*big, "\\sum should NOT pick big variant in text");
}
#[test]
fn parses_superscript() {
let ir = to_ir("x^2", 16.0, Style::Text).unwrap();
let Node::Row(items) = ir else { panic!() };
assert_eq!(items.len(), 1);
let Node::Subsup { sub, sup, .. } = &items[0] else {
panic!("expected Subsup, got {:?}", items[0])
};
assert!(sub.is_none());
assert!(sup.is_some());
}
#[test]
fn parses_subscript() {
let ir = to_ir("a_i", 16.0, Style::Text).unwrap();
let Node::Row(items) = ir else { panic!() };
let Node::Subsup { sub, sup, .. } = &items[0] else {
panic!()
};
assert!(sub.is_some());
assert!(sup.is_none());
}
#[test]
fn parses_both_sub_and_sup() {
let ir = to_ir("a_i^j", 16.0, Style::Text).unwrap();
let Node::Row(items) = ir else { panic!() };
let Node::Subsup { sub, sup, .. } = &items[0] else {
panic!()
};
assert!(sub.is_some() && sup.is_some());
}
#[test]
fn parses_braced_exponent() {
let ir = to_ir("x^{n+1}", 16.0, Style::Text).unwrap();
let Node::Row(items) = ir else { panic!() };
let Node::Subsup { sup: Some(sup), .. } = &items[0] else {
panic!()
};
let Node::Row(inner) = sup.as_ref() else {
panic!("expected Row inside exponent, got {:?}", sup)
};
assert_eq!(inner.len(), 3, "n + 1 = 3 atoms");
}
}