Skip to main content

texform_interface/
syntax_node.rs

1//! Lossless syntax tree snapshots used for serde and transport.
2//!
3//! `SyntaxNode` is the parser's immutable, lossless output shape. It is useful
4//! for JSON snapshots, Python dictionaries, JavaScript objects, and tests that
5//! need to inspect the parsed structure.
6//!
7//! Editing is intentionally handled by `texform::Document`, not by
8//! `SyntaxNode`. Convert a syntax snapshot with `Document::from_syntax` when
9//! you need a live DOM-style tree, and call `Document::to_syntax` when you need
10//! to serialize or transport the current tree.
11//!
12//! `SyntaxNode::Error` represents a parser recovery placeholder. It can appear
13//! in partial parse trees and preserves the original source snippet.
14
15use serde::{Deserialize, Deserializer, Serialize};
16
17/// Command or environment argument.
18///
19/// Each argument contains an `ArgumentKind` + `ArgumentValue`.
20#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
21#[cfg_attr(feature = "tsify", derive(tsify_next::Tsify))]
22pub struct Argument {
23    pub kind: ArgumentKind,
24    pub value: ArgumentValue,
25}
26
27/// Optional slot for argument lists.
28pub type ArgumentSlot = Option<Argument>;
29
30/// Argument type.
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
32#[cfg_attr(feature = "tsify", derive(tsify_next::Tsify))]
33pub enum ArgumentKind {
34    /// Standard mandatory argument (`m`).
35    Mandatory,
36    /// Standard optional bracket argument (`o`).
37    Optional,
38    /// Star argument (`s`).
39    Star,
40    /// Braced group argument (`g` or `m{}`).
41    ///
42    /// Requiredness is enforced by the spec/parser rather than this enum.
43    Group,
44    /// Single delimited argument (`r` / `d`) with matched delimiters.
45    Delimited { open: Delimiter, close: Delimiter },
46    /// Paired-candidate argument (`r` / `d` with `<l,r>` pair list) with matched delimiters.
47    Paired { open: Delimiter, close: Delimiter },
48}
49
50impl ArgumentKind {
51    /// Create an ArgumentKind for standard forms from requiredness.
52    #[inline]
53    pub const fn from_required(required: bool) -> Self {
54        if required {
55            ArgumentKind::Mandatory
56        } else {
57            ArgumentKind::Optional
58        }
59    }
60}
61
62/// Parsed argument value.
63#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
64#[cfg_attr(feature = "tsify", derive(tsify_next::Tsify))]
65pub enum ArgumentValue {
66    /// Parsed math-mode content subtree.
67    MathContent(SyntaxNode),
68    /// Parsed text-mode content subtree.
69    TextContent(SyntaxNode),
70    /// Delimiter argument value.
71    Delimiter(Delimiter),
72    /// Control-sequence name string with no escape/control sequences.
73    CSName(String),
74    /// Dimension argument value (raw string).
75    Dimension(String),
76    /// Integer argument value (raw string).
77    Integer(String),
78    /// Key-value list argument value (raw string).
79    KeyVal(String),
80    /// Parsed column template string.
81    Column(String),
82    /// Boolean argument value, used by star slots.
83    Boolean(bool),
84}
85
86/// Content mode: math or text
87///
88/// Determines how content is parsed and interpreted.
89#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
90#[cfg_attr(feature = "tsify", derive(tsify_next::Tsify))]
91pub enum ContentMode {
92    /// Math mode: default mode, supports formulas, scripts, infix commands
93    Math,
94    /// Text mode: consecutive chars merged, no scripts, inline math via $...$
95    Text,
96}
97
98/// Delimiter type for delimited groups
99#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
100#[cfg_attr(feature = "tsify", derive(tsify_next::Tsify))]
101pub enum Delimiter {
102    /// No delimiter (corresponds to '.' in LaTeX)
103    None,
104    /// Single character delimiter: '(', ')', '[', ']', '|', etc.
105    Char(char),
106    /// Control sequence delimiter: "\langle", "\rangle", "\{", "\}", etc.
107    Control(&'static str),
108}
109
110impl<'de> Deserialize<'de> for Delimiter {
111    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
112    where
113        D: Deserializer<'de>,
114    {
115        #[derive(Deserialize)]
116        enum DelimiterInput {
117            None,
118            Char(char),
119            Control(String),
120        }
121
122        match DelimiterInput::deserialize(deserializer)? {
123            DelimiterInput::None => Ok(Delimiter::None),
124            DelimiterInput::Char(ch) => Ok(Delimiter::Char(ch)),
125            DelimiterInput::Control(name) => {
126                Ok(Delimiter::Control(Box::leak(name.into_boxed_str())))
127            }
128        }
129    }
130}
131
132/// Group type for different grouping constructs
133#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
134#[cfg_attr(feature = "tsify", derive(tsify_next::Tsify))]
135pub enum GroupKind {
136    /// Explicit group: {...}
137    Explicit,
138
139    /// Implicit group: wrapper for sequences that need to be treated as a single node
140    ///
141    /// Used when folding multiple items into one (e.g., infix operands).
142    Implicit,
143
144    /// Delimited group: \left delim ... \right delim
145    ///
146    /// Examples: \left( ... \right), \left\{ ... \right\}
147    Delimited { left: Delimiter, right: Delimiter },
148
149    /// Inline math in text mode: $...$
150    ///
151    /// Note: Display math \[...\] is not currently supported (future extension).
152    InlineMath,
153}
154
155/// Immutable syntax tree node
156///
157/// Represents the structure of parsed LaTeX source code.
158/// Each variant corresponds to a different syntactic construct.
159#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
160#[cfg_attr(feature = "tsify", derive(tsify_next::Tsify))]
161pub enum SyntaxNode {
162    /// Parse-tree root node produced by the top-level parser.
163    ///
164    /// A `Root` never nests inside another `SyntaxNode`; it marks the entry
165    /// point of a parsed formula and carries the top-level content mode.
166    Root {
167        mode: ContentMode,
168        children: Vec<SyntaxNode>,
169    },
170
171    /// Group: explicit {...}, implicit, delimited \left...\right, or inline math $...$
172    Group {
173        mode: ContentMode,
174        kind: GroupKind, // TODO: Move boundary info into Group, remove kind.
175        children: Vec<SyntaxNode>,
176    },
177
178    /// Prefix command: `\frac{a}{b}`, `\sqrt[n]{x}`.
179    ///
180    /// This is the most common command type where arguments follow the command name.
181    Command {
182        name: String,
183        args: Vec<ArgumentSlot>,
184        known: bool,
185    },
186
187    /// Infix command: a \over b, {n \choose k}
188    ///
189    /// Only ONE infix command is allowed per group at the top level.
190    /// The left and right operands are collected during parsing.
191    Infix {
192        name: String,
193        args: Vec<ArgumentSlot>, // Command's own arguments (usually empty)
194        left: Box<SyntaxNode>,
195        right: Box<SyntaxNode>,
196    },
197
198    /// Declarative command: \color{red}, \bfseries
199    Declarative {
200        name: String,
201        args: Vec<ArgumentSlot>,
202    },
203
204    /// Environment: \begin{env}...\end{env}
205    ///
206    /// Examples: \begin{matrix}...\end{matrix}, \begin{align*}...\end{align*}
207    Environment {
208        name: String,
209        args: Vec<ArgumentSlot>,
210        known: bool,
211        body: Box<SyntaxNode>, // Environment body (always a Group node)
212    },
213
214    /// Scripted expression: x^2_i, a_{n-1}
215    ///
216    /// Subscripts and superscripts are normalized:
217    /// - Order of ^ and _ is ignored (x^2_i == x_i^2)
218    /// - Duplicates take the last occurrence (x^a^b -> superscript = b)
219    Scripted {
220        base: Box<SyntaxNode>,
221        subscript: Option<Box<SyntaxNode>>,
222        superscript: Option<Box<SyntaxNode>>,
223    },
224
225    /// Parser-produced error placeholder.
226    ///
227    /// Recovery inserts this node where the parser could not interpret a source
228    /// fragment. AST and document-style conversions preserve it so callers can
229    /// inspect partial trees or serialize the captured snippet. Callers that
230    /// require semantically complete trees should inspect parser diagnostics and
231    /// check for `Error` nodes before continuing.
232    Error { message: String, snippet: String },
233
234    /// Math prime shorthand represented by one or more consecutive prime marks.
235    ///
236    /// `count` must be greater than zero.
237    Prime { count: usize },
238
239    /// Text string (Text mode only)
240    ///
241    /// Produced in Text mode or as content of Text-mode arguments/environments.
242    /// Consecutive characters and whitespace are merged into a single Text node.
243    /// Multiple whitespace characters collapse into a single space.
244    /// Note: In Math mode, characters remain as individual Char nodes, not Text.
245    Text(String),
246
247    /// Single character (primarily in math mode)
248    ///
249    /// Examples: letters (a-z, A-Z), digits (0-9), symbols (+, -, =)
250    Char(char),
251
252    /// Active character ~ (non-breaking space)
253    ///
254    /// In LaTeX, ~ produces a non-breaking space.
255    /// This node is produced in both Math and Text modes.
256    /// In Text mode, ~ is NOT merged into TextChunk; it remains as a separate node.
257    ///
258    /// TODO: Decide whether this needs to remain a distinct node type.
259    ActiveSpace,
260}
261
262// ============ Helper Methods ============
263
264impl SyntaxNode {
265    /// Check if this node is a content container (`Group` or parse-tree `Root`).
266    pub fn is_group(&self) -> bool {
267        matches!(self, SyntaxNode::Root { .. } | SyntaxNode::Group { .. })
268    }
269
270    /// Check if this node is a leaf (has no children)
271    pub fn is_leaf(&self) -> bool {
272        matches!(
273            self,
274            SyntaxNode::Char(_)
275                | SyntaxNode::Text(_)
276                | SyntaxNode::Prime { .. }
277                | SyntaxNode::ActiveSpace
278                | SyntaxNode::Error { .. }
279        ) || matches!(self, SyntaxNode::Command { args, .. } if args.iter().all(|slot| {
280            slot.as_ref().is_none_or(|arg| {
281                !matches!(
282                    arg.value,
283                    ArgumentValue::MathContent(_) | ArgumentValue::TextContent(_)
284                )
285            })
286        })) || matches!(self, SyntaxNode::Declarative { args, .. } if args.iter().all(|slot| {
287            slot.as_ref().is_none_or(|arg| {
288                !matches!(
289                    arg.value,
290                    ArgumentValue::MathContent(_) | ArgumentValue::TextContent(_)
291                )
292            })
293        }))
294    }
295
296    /// Get the content mode if this is a content container (`Group` or `Root`).
297    pub fn group_mode(&self) -> Option<ContentMode> {
298        match self {
299            SyntaxNode::Root { mode, .. } | SyntaxNode::Group { mode, .. } => Some(*mode),
300            _ => None,
301        }
302    }
303
304    /// Create a parse-tree root node wrapping a sequence of top-level children.
305    pub fn root(mode: ContentMode, children: Vec<SyntaxNode>) -> Self {
306        SyntaxNode::Root { mode, children }
307    }
308
309    /// Create an implicit group wrapping a sequence of nodes
310    pub fn implicit_group(mode: ContentMode, children: Vec<SyntaxNode>) -> Self {
311        SyntaxNode::Group {
312            mode,
313            kind: GroupKind::Implicit,
314            children,
315        }
316    }
317
318    /// Create an empty implicit group
319    pub fn empty_group(mode: ContentMode) -> Self {
320        SyntaxNode::Group {
321            mode,
322            kind: GroupKind::Implicit,
323            children: Vec::new(),
324        }
325    }
326
327    /// Create a math prime shorthand node.
328    pub fn prime(count: usize) -> Self {
329        SyntaxNode::Prime { count }
330    }
331}
332
333impl Argument {
334    /// Create an argument from a kind and value.
335    pub fn from_value(kind: ArgumentKind, value: ArgumentValue) -> Self {
336        Argument { kind, value }
337    }
338}
339
340impl ContentMode {
341    pub const fn as_str(self) -> &'static str {
342        match self {
343            ContentMode::Math => "math",
344            ContentMode::Text => "text",
345        }
346    }
347}
348
349impl std::fmt::Display for ContentMode {
350    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
351        f.write_str((*self).as_str())
352    }
353}
354
355// ============ Display Implementations ============
356
357impl std::fmt::Display for SyntaxNode {
358    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
359        self.fmt_with_indent(f, 0)
360    }
361}
362
363impl SyntaxNode {
364    /// Format with indentation for pretty-printing
365    fn fmt_with_indent(&self, f: &mut std::fmt::Formatter<'_>, indent: usize) -> std::fmt::Result {
366        let prefix = "  ".repeat(indent);
367        match self {
368            SyntaxNode::Root { mode, children } => {
369                writeln!(f, "{}Root({:?}) [", prefix, mode)?;
370                Self::fmt_group_children_with_indent(f, children, indent + 1)?;
371                writeln!(f, "{}]", prefix)
372            }
373            SyntaxNode::Group {
374                mode,
375                kind,
376                children,
377            } => {
378                writeln!(f, "{}Group({:?}, {:?}) [", prefix, mode, kind)?;
379                Self::fmt_group_children_with_indent(f, children, indent + 1)?;
380                writeln!(f, "{}]", prefix)
381            }
382            SyntaxNode::Command { name, args, known } => {
383                writeln!(f, "{}Command(\\{}, known={}) [", prefix, name, known)?;
384                for arg in args {
385                    fmt_argument_slot(f, arg, indent + 1)?;
386                }
387                writeln!(f, "{}]", prefix)
388            }
389            SyntaxNode::Infix {
390                name,
391                args,
392                left,
393                right,
394            } => {
395                writeln!(f, "{}Infix(\\{}) [", prefix, name)?;
396                writeln!(f, "{}  left:", prefix)?;
397                left.fmt_with_indent(f, indent + 2)?;
398                writeln!(f, "{}  right:", prefix)?;
399                right.fmt_with_indent(f, indent + 2)?;
400                if !args.is_empty() {
401                    writeln!(f, "{}  args:", prefix)?;
402                    for arg in args {
403                        fmt_argument_slot(f, arg, indent + 2)?;
404                    }
405                }
406                writeln!(f, "{}]", prefix)
407            }
408            SyntaxNode::Declarative { name, args } => {
409                writeln!(f, "{}Declarative(\\{}) [", prefix, name)?;
410                if !args.is_empty() {
411                    writeln!(f, "{}  args:", prefix)?;
412                    for arg in args {
413                        fmt_argument_slot(f, arg, indent + 2)?;
414                    }
415                }
416                writeln!(f, "{}]", prefix)
417            }
418            SyntaxNode::Environment {
419                name,
420                args,
421                known,
422                body,
423            } => {
424                writeln!(f, "{}Environment({}, known={}) [", prefix, name, known)?;
425                if !args.is_empty() {
426                    writeln!(f, "{}  args:", prefix)?;
427                    for arg in args {
428                        fmt_argument_slot(f, arg, indent + 2)?;
429                    }
430                }
431                writeln!(f, "{}  body:", prefix)?;
432                body.fmt_with_indent(f, indent + 2)?;
433                writeln!(f, "{}]", prefix)
434            }
435            SyntaxNode::Scripted {
436                base,
437                subscript,
438                superscript,
439            } => {
440                writeln!(f, "{}Scripted [", prefix)?;
441                writeln!(f, "{}  base:", prefix)?;
442                base.fmt_with_indent(f, indent + 2)?;
443                if let Some(sub) = subscript {
444                    writeln!(f, "{}  subscript:", prefix)?;
445                    sub.fmt_with_indent(f, indent + 2)?;
446                }
447                if let Some(sup) = superscript {
448                    writeln!(f, "{}  superscript:", prefix)?;
449                    sup.fmt_with_indent(f, indent + 2)?;
450                }
451                writeln!(f, "{}]", prefix)
452            }
453            SyntaxNode::Error { message, snippet } => {
454                writeln!(
455                    f,
456                    "{}Error(message: {}, snippet: {})",
457                    prefix, message, snippet
458                )
459            }
460            SyntaxNode::Prime { count } => writeln!(f, "{}Prime({})", prefix, count),
461            SyntaxNode::Text(s) => writeln!(f, "{}Text(\"{}\")", prefix, s),
462            SyntaxNode::Char(c) => writeln!(f, "{}Char('{}')", prefix, c),
463            SyntaxNode::ActiveSpace => writeln!(f, "{}ActiveSpace", prefix),
464        }
465    }
466
467    fn fmt_group_children_with_indent(
468        f: &mut std::fmt::Formatter<'_>,
469        children: &[SyntaxNode],
470        indent: usize,
471    ) -> std::fmt::Result {
472        let prefix = "  ".repeat(indent);
473        let mut i = 0;
474
475        while i < children.len() {
476            if let SyntaxNode::Char(_) = children[i] {
477                let mut merged = String::new();
478                while i < children.len() {
479                    match &children[i] {
480                        SyntaxNode::Char(c) => {
481                            merged.push(*c);
482                            i += 1;
483                        }
484                        _ => break,
485                    }
486                }
487
488                if merged.chars().count() == 1 {
489                    writeln!(f, "{}Char('{}')", prefix, merged.chars().next().unwrap())?;
490                } else {
491                    writeln!(f, "{}Chars({:?})", prefix, merged)?;
492                }
493                continue;
494            }
495
496            children[i].fmt_with_indent(f, indent)?;
497            i += 1;
498        }
499
500        Ok(())
501    }
502}
503
504impl Argument {
505    fn fmt_with_indent(&self, f: &mut std::fmt::Formatter<'_>, indent: usize) -> std::fmt::Result {
506        let prefix = "  ".repeat(indent);
507        writeln!(f, "{}Arg({:?}):", prefix, self.kind)?;
508        self.value.fmt_with_indent(f, indent + 1)
509    }
510}
511
512impl ArgumentValue {
513    fn fmt_with_indent(&self, f: &mut std::fmt::Formatter<'_>, indent: usize) -> std::fmt::Result {
514        let prefix = "  ".repeat(indent);
515        match self {
516            ArgumentValue::MathContent(node) | ArgumentValue::TextContent(node) => {
517                node.fmt_with_indent(f, indent)
518            }
519            ArgumentValue::Delimiter(delim) => writeln!(f, "{}Delimiter({:?})", prefix, delim),
520            ArgumentValue::CSName(value) => writeln!(f, "{}CSName(\"{}\")", prefix, value),
521            ArgumentValue::Dimension(value) => writeln!(f, "{}Dimension(\"{}\")", prefix, value),
522            ArgumentValue::Integer(value) => writeln!(f, "{}Integer(\"{}\")", prefix, value),
523            ArgumentValue::KeyVal(value) => writeln!(f, "{}KeyVal(\"{}\")", prefix, value),
524            ArgumentValue::Column(value) => writeln!(f, "{}Column(\"{}\")", prefix, value),
525            ArgumentValue::Boolean(value) => writeln!(f, "{}Boolean({})", prefix, value),
526        }
527    }
528}
529
530fn fmt_argument_slot(
531    f: &mut std::fmt::Formatter<'_>,
532    slot: &ArgumentSlot,
533    indent: usize,
534) -> std::fmt::Result {
535    let prefix = "  ".repeat(indent);
536    match slot {
537        Some(argument) => argument.fmt_with_indent(f, indent),
538        None => writeln!(f, "{}Arg(None)", prefix),
539    }
540}
541
542// Tests in tests/syntax_node.rs