Skip to main content

texform_core/
serialize.rs

1//! Canonical AST serializer — converts [`Ast`] back to LaTeX text.
2//!
3//! The serializer is independent of the transform stage: it covers the full AST
4//! node vocabulary and makes no assumptions about whether the input has been
5//! normalized. Its default style targets the `corpus` / `equiv` use cases with
6//! strong disambiguation and explicit token boundaries in math mode, while text
7//! mode content is preserved verbatim.
8//!
9//! # Architecture
10//!
11//! ```text
12//! Serializer (recursive AST walk)
13//!   -> emit atom with kind + mode
14//!   -> AtomWriter decides inter-atom boundary
15//!   -> String
16//! ```
17//!
18//! Most spacing rules are concentrated in the atom writer's boundary decision,
19//! which inspects the previous atom, the next atom, the current content mode,
20//! and the active [`SerializeOptions`]. A few wrapper/scalar helpers still emit
21//! preformatted spaces directly for cases that cannot be expressed as a simple
22//! previous/next atom decision (for example empty padded groups). This keeps
23//! the boundary logic local and avoids post-hoc string cleanup — important
24//! because TeX whitespace carries both lexical and semantic weight.
25
26use serde::{Deserialize, Serialize};
27
28use crate::ast::{
29    Argument, ArgumentKind, ArgumentSlot, ArgumentValue, Ast, ContentMode, Delimiter, GroupKind,
30    Node, NodeId,
31};
32
33/// Serialize an AST to LaTeX using the default canonical style.
34pub fn serialize(ast: &Ast) -> String {
35    serialize_with(ast, &SerializeOptions::default())
36}
37
38/// Serialize an AST to LaTeX with explicit style options.
39pub fn serialize_with(ast: &Ast, options: &SerializeOptions) -> String {
40    let mut serializer = Serializer::new(ast, options);
41    serializer.serialize_root();
42    serializer.finish()
43}
44
45/// Error type for fallible LaTeX serialization.
46///
47/// The current canonical serializer is infallible; this type exists so the
48/// public `Document::to_latex*` API can stay stable if serialization later
49/// grows validation or IO-free failure modes.
50#[derive(Clone, Debug, PartialEq, Eq)]
51#[non_exhaustive]
52pub enum SerializeError {
53    /// Reserved for future fallible serialization paths.
54    Unsupported,
55}
56
57impl std::fmt::Display for SerializeError {
58    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
59        match self {
60            SerializeError::Unsupported => f.write_str("unsupported serialization operation"),
61        }
62    }
63}
64
65impl std::error::Error for SerializeError {}
66
67/// Top-level serialization options, grouped by scope.
68///
69/// `math.*` controls math-mode-specific behavior; `syntax.*` controls
70/// structural LaTeX syntax that is mode-independent.
71#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
72#[serde(default)]
73pub struct SerializeOptions {
74    pub math: MathSerializeOptions,
75    pub syntax: SyntaxSerializeOptions,
76}
77
78/// Math-mode serialization options.
79#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
80#[serde(default)]
81pub struct MathSerializeOptions {
82    pub spacing: MathSpacingOptions,
83    pub scripts: MathScriptOptions,
84    pub infix: MathInfixOptions,
85}
86
87/// Infix serialization options for math mode.
88#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
89#[serde(default)]
90pub struct MathInfixOptions {
91    pub grouping: InfixGrouping,
92}
93
94/// Spacing controls within math mode.
95#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
96#[serde(default)]
97pub struct MathSpacingOptions {
98    pub commands: CommandSpacing,
99    pub group_inner_spacing: MathGroupInnerSpacing,
100    pub adjacent_chars: AdjacentCharSpacing,
101}
102
103/// Sub/superscript formatting controls.
104#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
105#[serde(default)]
106pub struct MathScriptOptions {
107    pub spacing: ScriptSpacing,
108    pub order: ScriptOrder,
109}
110
111/// Structural syntax options (mode-independent).
112#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
113#[serde(default)]
114pub struct SyntaxSerializeOptions {
115    pub environments: EnvironmentSerializeOptions,
116}
117
118/// Environment header formatting options.
119#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
120#[serde(default)]
121pub struct EnvironmentSerializeOptions {
122    pub name_spacing: EnvironmentNameSpacing,
123}
124
125/// Whether to insert a space between a command and the following structural
126/// token in math mode.
127///
128/// `Spaced`: `\frac { a }` — `Minimal`: `\frac{ a }`.
129/// `Minimal` only removes the command-to-structure boundary itself; it still
130/// preserves lexical separation when omitting a space would merge a following
131/// letter-like token into the control sequence name (e.g. `\alpha x`).
132#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
133#[serde(rename_all = "snake_case")]
134pub enum CommandSpacing {
135    #[default]
136    Spaced,
137    Minimal,
138}
139
140/// Controls the inside spacing of math brace groups.
141///
142/// `Padded`: `{ a }`, `{ }`, `x ^ { 2 }`.
143/// `Compact`: `{a}`, `{}`, `x ^ {2}`.
144///
145/// This applies both to explicit/implicit `Group` nodes and to wrapper-owned
146/// braces emitted for command/script arguments. Text-mode content and scalar
147/// fragments (environment names, dimensions, etc.) are never padded.
148#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
149#[serde(rename_all = "snake_case")]
150pub enum MathGroupInnerSpacing {
151    #[default]
152    Padded,
153    Compact,
154}
155
156/// Whether adjacent math character atoms get explicit space separation.
157///
158/// `Spaced`: `a b c + d` — `Compact`: `abc+d`.
159/// All `Char` nodes in math mode are treated uniformly; the serializer does
160/// not classify characters as operators vs. letters.
161#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
162#[serde(rename_all = "snake_case")]
163pub enum AdjacentCharSpacing {
164    #[default]
165    Spaced,
166    Compact,
167}
168
169/// Whether to insert spaces immediately around `_` and `^` markers.
170///
171/// `Spaced`: `x _ { i }` — `Compact`: `x_{ i }`.
172/// This only controls the marker boundary itself; inner brace spacing still
173/// follows the normal math group rules.
174#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
175#[serde(rename_all = "snake_case")]
176pub enum ScriptSpacing {
177    #[default]
178    Spaced,
179    Compact,
180}
181
182/// Fixed output order for subscript and superscript.
183///
184/// `SubFirst`: `x _ { i } ^ { 2 }` — `SupFirst`: `x ^ { 2 } _ { i }`.
185#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
186#[serde(rename_all = "snake_case")]
187pub enum ScriptOrder {
188    #[default]
189    SubFirst,
190    SupFirst,
191}
192
193/// Whether math infix operands are always braced or only when needed.
194#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
195#[serde(rename_all = "snake_case")]
196pub enum InfixGrouping {
197    AlwaysExplicit,
198    #[default]
199    WhenRequired,
200}
201
202/// Whether `\begin` / `\end` get a space before the name brace.
203///
204/// `Spaced` -> `\begin {matrix}`, `Compact` -> `\begin{matrix}`.
205/// The environment name inside `{}` is always compact, and this setting is
206/// independent from [`CommandSpacing`].
207#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
208#[serde(rename_all = "snake_case")]
209pub enum EnvironmentNameSpacing {
210    #[default]
211    Spaced,
212    Compact,
213}
214
215/// Private atom classification used solely by [`AtomWriter`] to decide
216/// inter-atom boundaries. This does not appear in the AST; the serializer
217/// assigns a kind to each piece of text it emits.
218#[derive(Debug, Clone, Copy, PartialEq, Eq)]
219enum AtomKind {
220    /// `\frac`, `\alpha`, `\\`, `\,` — any control sequence body
221    ControlSequence,
222    /// Verbatim text-mode chunk (never split or spaced internally)
223    TextChunk,
224    /// Single math-mode character atom
225    MathChar,
226    /// Prime shorthand mark(s)
227    Prime,
228    /// `{`, `}`, `[`, `]` — structural delimiters
229    Brace,
230    /// Delimiter token after `\left` / `\right` or in argument pairs
231    DelimiterToken,
232    /// `_` or `^`
233    ScriptMark,
234    /// `$` for inline math boundaries
235    Dollar,
236    /// `~` (active character space)
237    ActiveChar,
238    /// Raw fragment (dimension, column spec, environment name, etc.) that must
239    /// not be token-spaced
240    RawFragment,
241}
242
243/// Accumulates output text and decides where to insert inter-atom spaces.
244///
245/// Most boundary rules live in the atom writer's central decision function,
246/// making them testable in isolation without constructing a full AST. A few helpers still
247/// bypass it for preformatted cases such as empty padded groups. The writer
248/// tracks only the *previous* atom kind — no look-ahead — so the serializer
249/// must emit atoms in final output order.
250#[derive(Default)]
251struct AtomWriter {
252    output: String,
253    previous: Option<AtomKind>,
254}
255
256impl AtomWriter {
257    fn emit(&mut self, mode: ContentMode, kind: AtomKind, text: &str, options: &SerializeOptions) {
258        if self.should_insert_space(mode, kind, options) {
259            self.output.push(' ');
260        }
261        self.output.push_str(text);
262        self.previous = Some(kind);
263    }
264
265    /// Append `*` directly — star must glue to the preceding control sequence
266    /// without any boundary space (`\operatorname*`, not `\operatorname *`).
267    fn emit_star_suffix(&mut self) {
268        self.output.push('*');
269    }
270
271    /// Central boundary-decision function.
272    ///
273    /// Returns `true` when a space should be inserted between the previous atom
274    /// and the upcoming `next` atom. Rules are checked top-down; the first
275    /// matching branch wins.
276    fn should_insert_space(
277        &self,
278        mode: ContentMode,
279        next: AtomKind,
280        options: &SerializeOptions,
281    ) -> bool {
282        let Some(prev) = self.previous else {
283            return false;
284        };
285
286        // A control sequence followed by a letter-like atom always needs a
287        // boundary; without it the letter would be absorbed into the command
288        // name during re-lexing (e.g. `\alphax` vs `\alpha x`).
289        if matches!(prev, AtomKind::ControlSequence)
290            && matches!(
291                next,
292                AtomKind::TextChunk | AtomKind::MathChar | AtomKind::RawFragment
293            )
294        {
295            return true;
296        }
297
298        // Text mode never injects extra spaces. Some callers also reuse
299        // `ContentMode::Text` as a synthetic "compact boundary" mode.
300        if matches!(mode, ContentMode::Text) {
301            return false;
302        }
303
304        // --- Below this point, we are in math mode ---
305
306        if matches!(prev, AtomKind::ControlSequence) {
307            return match next {
308                AtomKind::Brace | AtomKind::DelimiterToken => {
309                    matches!(options.math.spacing.commands, CommandSpacing::Spaced)
310                }
311                _ => true,
312            };
313        }
314
315        if matches!(prev, AtomKind::MathChar) && matches!(next, AtomKind::MathChar) {
316            return matches!(
317                options.math.spacing.adjacent_chars,
318                AdjacentCharSpacing::Spaced
319            );
320        }
321
322        // Prime marks attach tightly to the preceding atom. A following atom
323        // still gets separated so a leading prime stays readable as its own
324        // item in canonical output.
325        if matches!(next, AtomKind::Prime) {
326            return !matches!(
327                prev,
328                AtomKind::ControlSequence | AtomKind::MathChar | AtomKind::Prime
329            );
330        }
331        if matches!(prev, AtomKind::Prime) && matches!(next, AtomKind::ScriptMark) {
332            return matches!(options.math.scripts.spacing, ScriptSpacing::Spaced);
333        }
334        if matches!(prev, AtomKind::Prime) {
335            return true;
336        }
337
338        // `$` delimiters bind tightly to their content (`$x$`, not `$ x $`).
339        if matches!(prev, AtomKind::Dollar) || matches!(next, AtomKind::Dollar) {
340            return false;
341        }
342
343        if matches!(prev, AtomKind::ScriptMark) || matches!(next, AtomKind::ScriptMark) {
344            return matches!(options.math.scripts.spacing, ScriptSpacing::Spaced);
345        }
346
347        true
348    }
349
350    fn finish(self) -> String {
351        self.output
352    }
353}
354
355/// Recursive AST walker that emits atoms into an [`AtomWriter`].
356///
357/// Mode is tracked through the recursion stack — each `visit` call receives
358/// the content mode of its parent context, so no separate mutable mode stack
359/// is needed.
360struct Serializer<'a> {
361    ast: &'a Ast,
362    options: &'a SerializeOptions,
363    writer: AtomWriter,
364}
365
366impl<'a> Serializer<'a> {
367    fn new(ast: &'a Ast, options: &'a SerializeOptions) -> Self {
368        Self {
369            ast,
370            options,
371            writer: AtomWriter::default(),
372        }
373    }
374
375    /// Emit the formula content without root-level braces.
376    ///
377    /// The top-level API serializes "formula content", not "a group node".
378    /// Root braces are intentionally suppressed regardless of whether the
379    /// root is Explicit or Implicit.
380    fn serialize_root(&mut self) {
381        let root = self.ast.root();
382        let Node::Root { children, mode } = self.ast.node(root) else {
383            unreachable!("root must be a root node")
384        };
385
386        for &child in children {
387            self.visit(child, *mode);
388        }
389    }
390
391    fn visit(&mut self, id: NodeId, mode: ContentMode) {
392        match self.ast.node(id).clone() {
393            Node::Root { .. } => unreachable!("root node must be handled by serialize_root"),
394            Node::Environment {
395                name, args, body, ..
396            } => self.visit_environment(&name, &args, body, mode),
397            Node::Infix {
398                name,
399                args,
400                left,
401                right,
402            } => self.visit_infix(&name, &args, left, right),
403            Node::Declarative { name, args } => self.visit_declarative(&name, &args, mode),
404            Node::Group {
405                children,
406                kind,
407                mode: child_mode,
408            } => self.visit_group(kind, child_mode, &children),
409            Node::Scripted {
410                base,
411                subscript,
412                superscript,
413            } => self.visit_scripted(base, subscript, superscript),
414            Node::Command { name, args, .. } => self.visit_command(&name, &args, mode),
415            Node::Prime { count } => self.visit_prime(count, mode),
416            Node::Char(ch) => self.visit_char(ch, mode),
417            Node::Text(text) => self
418                .writer
419                .emit(mode, AtomKind::TextChunk, &text, self.options),
420            Node::ActiveSpace => self
421                .writer
422                .emit(mode, AtomKind::ActiveChar, "~", self.options),
423            Node::Error { snippet, .. } => {
424                self.writer
425                    .emit(mode, AtomKind::RawFragment, &snippet, self.options)
426            }
427        }
428    }
429
430    /// Emit a group node.
431    ///
432    /// `Explicit` and `Implicit` are treated identically as brace groups — the
433    /// distinction is parser/transform history and must not leak into the text.
434    fn visit_group(&mut self, kind: GroupKind, child_mode: ContentMode, children: &[NodeId]) {
435        match kind {
436            GroupKind::Explicit | GroupKind::Implicit => {
437                if matches!(child_mode, ContentMode::Math)
438                    && matches!(
439                        self.options.math.spacing.group_inner_spacing,
440                        MathGroupInnerSpacing::Compact
441                    )
442                {
443                    self.emit_compact_math_brace_group(children);
444                } else {
445                    self.emit_wrapped(child_mode, AtomKind::Brace, "{", "}", children);
446                }
447            }
448            GroupKind::Delimited { left, right } => {
449                self.writer.emit(
450                    ContentMode::Math,
451                    AtomKind::ControlSequence,
452                    r"\left",
453                    self.options,
454                );
455                self.emit_delimiter(&left, ContentMode::Math);
456                for &child in children {
457                    self.visit(child, ContentMode::Math);
458                }
459                self.writer.emit(
460                    ContentMode::Math,
461                    AtomKind::ControlSequence,
462                    r"\right",
463                    self.options,
464                );
465                self.emit_delimiter(&right, ContentMode::Math);
466            }
467            GroupKind::InlineMath => self.visit_inline_math(children),
468        }
469    }
470
471    fn visit_command(&mut self, name: &str, args: &[Option<Argument>], mode: ContentMode) {
472        self.writer.emit(
473            mode,
474            AtomKind::ControlSequence,
475            &format!(r"\{}", name),
476            self.options,
477        );
478
479        for slot in args {
480            self.visit_argument_slot(slot, mode);
481        }
482    }
483
484    /// Emit an infix command in its original syntactic form.
485    ///
486    /// The serializer does not assume the infix has been desugared by a
487    /// transform rule; an un-rewritten `\over` still round-trips correctly.
488    fn visit_infix(&mut self, name: &str, args: &[ArgumentSlot], left: NodeId, right: NodeId) {
489        self.emit_infix_operand(left);
490        self.writer.emit(
491            ContentMode::Math,
492            AtomKind::ControlSequence,
493            &format!(r"\{}", name),
494            self.options,
495        );
496        for slot in args {
497            self.visit_argument_slot(slot, ContentMode::Math);
498        }
499        self.emit_infix_operand(right);
500    }
501
502    /// Emit a declarative command with its explicit arguments.
503    fn visit_declarative(&mut self, name: &str, args: &[ArgumentSlot], mode: ContentMode) {
504        self.writer.emit(
505            mode,
506            AtomKind::ControlSequence,
507            &format!(r"\{}", name),
508            self.options,
509        );
510        for slot in args {
511            self.visit_argument_slot(slot, mode);
512        }
513    }
514
515    fn visit_environment(
516        &mut self,
517        name: &str,
518        args: &[ArgumentSlot],
519        body: NodeId,
520        mode: ContentMode,
521    ) {
522        self.emit_environment_head(mode, r"\begin", name);
523        for slot in args {
524            self.visit_argument_slot(slot, mode);
525        }
526
527        match self.ast.node(body).clone() {
528            Node::Group {
529                children,
530                mode: body_mode,
531                kind: GroupKind::Implicit,
532            } => {
533                for child in children {
534                    self.visit(child, body_mode);
535                }
536            }
537            Node::Group {
538                mode: body_mode, ..
539            } => self.visit(body, body_mode),
540            other => unreachable!("environment body must remain a group, got {:?}", other),
541        }
542
543        self.emit_environment_head(mode, r"\end", name);
544    }
545
546    /// Emit `\begin {name}` or `\end {name}` (or compact `\begin{name}`).
547    ///
548    /// Environment header spacing is intentionally controlled here instead of
549    /// piggybacking on the generic command-to-brace rule, so it stays
550    /// independent from `CommandSpacing`.
551    fn emit_environment_head(&mut self, outer_mode: ContentMode, head: &str, name: &str) {
552        self.writer
553            .emit(outer_mode, AtomKind::ControlSequence, head, self.options);
554
555        if matches!(
556            self.options.syntax.environments.name_spacing,
557            EnvironmentNameSpacing::Spaced
558        ) {
559            self.writer.output.push(' ');
560        }
561
562        self.writer.output.push('{');
563        self.writer.output.push_str(name);
564        self.writer.output.push('}');
565        self.writer.previous = Some(AtomKind::Brace);
566    }
567
568    /// Dispatch a single argument slot to the appropriate emitter.
569    ///
570    /// Content arguments (`MathContent` / `TextContent`) recurse into the
571    /// serializer; scalar arguments are emitted as opaque fragments that
572    /// bypass math-mode token spacing.
573    fn visit_argument_slot(&mut self, slot: &Option<Argument>, mode: ContentMode) {
574        let Some(arg) = slot else {
575            return;
576        };
577
578        match (&arg.kind, &arg.value) {
579            (ArgumentKind::Star, ArgumentValue::Boolean(true)) => self.writer.emit_star_suffix(),
580            (ArgumentKind::Star, ArgumentValue::Boolean(false)) => {}
581            (ArgumentKind::Star, _) => {
582                unreachable!("star slots must carry boolean values")
583            }
584            (ArgumentKind::Mandatory | ArgumentKind::Group, ArgumentValue::MathContent(child)) => {
585                self.emit_argument_content(*child, ContentMode::Math, "{", "}", mode);
586            }
587            (ArgumentKind::Mandatory | ArgumentKind::Group, ArgumentValue::TextContent(child)) => {
588                self.emit_argument_content(*child, ContentMode::Text, "{", "}", mode);
589            }
590            (ArgumentKind::Optional, ArgumentValue::MathContent(child)) => {
591                self.emit_argument_content(*child, ContentMode::Math, "[", "]", mode);
592            }
593            (ArgumentKind::Optional, ArgumentValue::TextContent(child)) => {
594                self.emit_argument_content(*child, ContentMode::Text, "[", "]", mode);
595            }
596            (ArgumentKind::Mandatory | ArgumentKind::Group, value) => {
597                self.emit_scalar_wrapped(value, "{", "}", mode)
598            }
599            (ArgumentKind::Optional, value) => self.emit_scalar_wrapped(value, "[", "]", mode),
600            (ArgumentKind::Delimited { open, close }, ArgumentValue::MathContent(node))
601            | (ArgumentKind::Paired { open, close }, ArgumentValue::MathContent(node)) => {
602                self.emit_recorded_delimiters(open, close, *node, ContentMode::Math)
603            }
604            (ArgumentKind::Delimited { open, close }, ArgumentValue::TextContent(node))
605            | (ArgumentKind::Paired { open, close }, ArgumentValue::TextContent(node)) => {
606                self.emit_recorded_delimiters(open, close, *node, ContentMode::Text)
607            }
608            (ArgumentKind::Delimited { open, close }, value)
609            | (ArgumentKind::Paired { open, close }, value) => {
610                self.emit_scalar_between_delimiters(open, close, value, mode)
611            }
612        }
613    }
614
615    /// Emit a content argument wrapped in its matching delimiters.
616    ///
617    /// `content_mode` is the mode the argument was parsed in (from the
618    /// `MathContent` / `TextContent` variant), while `wrapper_mode` controls
619    /// boundary spacing around the outer delimiters.
620    fn emit_argument_content(
621        &mut self,
622        child: NodeId,
623        content_mode: ContentMode,
624        open: &str,
625        close: &str,
626        wrapper_mode: ContentMode,
627    ) {
628        self.emit_wrapped_content(child, wrapper_mode, content_mode, open, close);
629    }
630
631    fn visit_scripted(
632        &mut self,
633        base: NodeId,
634        subscript: Option<NodeId>,
635        superscript: Option<NodeId>,
636    ) {
637        self.visit(base, ContentMode::Math);
638
639        match self.options.math.scripts.order {
640            ScriptOrder::SubFirst => {
641                if let Some(node) = subscript {
642                    self.emit_script('_', node);
643                }
644                if let Some(node) = superscript {
645                    self.emit_superscript(node);
646                }
647            }
648            ScriptOrder::SupFirst => {
649                if let Some(node) = superscript {
650                    self.emit_superscript(node);
651                }
652                if let Some(node) = subscript {
653                    self.emit_script('_', node);
654                }
655            }
656        }
657    }
658
659    fn visit_inline_math(&mut self, children: &[NodeId]) {
660        self.writer
661            .emit(ContentMode::Text, AtomKind::Dollar, "$", self.options);
662        for &child in children {
663            self.visit(child, ContentMode::Math);
664        }
665        self.writer
666            .emit(ContentMode::Text, AtomKind::Dollar, "$", self.options);
667    }
668
669    /// Emit a single `_` or `^` followed by its braced argument.
670    ///
671    /// Script spacing is controlled by emitting the marker in a synthetic
672    /// mode: `Math` triggers boundary insertion while `Text` suppresses it,
673    /// reusing the existing boundary logic without a dedicated
674    /// script-mark branch in every caller.
675    fn emit_script(&mut self, marker: char, node: NodeId) {
676        let mode = match self.options.math.scripts.spacing {
677            ScriptSpacing::Spaced => ContentMode::Math,
678            ScriptSpacing::Compact => ContentMode::Text,
679        };
680        self.writer.emit(
681            mode,
682            AtomKind::ScriptMark,
683            &marker.to_string(),
684            self.options,
685        );
686        self.emit_wrapped_content(node, ContentMode::Math, ContentMode::Math, "{", "}");
687    }
688
689    fn emit_superscript(&mut self, node: NodeId) {
690        if let Node::Prime { count } = self.ast.node(node) {
691            self.emit_prime_marks(*count);
692        } else {
693            self.emit_script('^', node);
694        }
695    }
696
697    /// Emit children surrounded by open/close delimiters.
698    fn emit_wrapped(
699        &mut self,
700        mode: ContentMode,
701        kind: AtomKind,
702        open: &str,
703        close: &str,
704        children: &[NodeId],
705    ) {
706        // Empty math brace groups need special handling to produce `{ }`
707        // instead of `{}` under Padded mode — the normal visitor path would
708        // emit `{` then immediately `}` with no content in between.
709        if children.is_empty()
710            && matches!(mode, ContentMode::Math)
711            && matches!(kind, AtomKind::Brace)
712            && matches!(
713                self.options.math.spacing.group_inner_spacing,
714                MathGroupInnerSpacing::Padded
715            )
716        {
717            self.emit_padded_empty_group(mode, kind, open, close);
718            return;
719        }
720
721        self.writer.emit(mode, kind, open, self.options);
722        for &child in children {
723            self.visit(child, mode);
724        }
725        self.writer.emit(mode, kind, close, self.options);
726    }
727
728    fn emit_compact_math_brace_group(&mut self, children: &[NodeId]) {
729        self.writer
730            .emit(ContentMode::Math, AtomKind::Brace, "{", self.options);
731
732        self.writer.previous = None;
733        for &child in children {
734            self.visit(child, ContentMode::Math);
735        }
736
737        self.writer
738            .emit(ContentMode::Text, AtomKind::Brace, "}", self.options);
739    }
740
741    /// Emit `{ }` as a single pre-formatted unit.
742    ///
743    /// Bypasses the normal atom pipeline because there is no interior content
744    /// to visit, yet the padding space must still appear between the braces.
745    fn emit_padded_empty_group(
746        &mut self,
747        mode: ContentMode,
748        kind: AtomKind,
749        open: &str,
750        close: &str,
751    ) {
752        if self.writer.should_insert_space(mode, kind, self.options) {
753            self.writer.output.push(' ');
754        }
755        self.writer.output.push_str(open);
756        self.writer.output.push(' ');
757        self.writer.output.push_str(close);
758        self.writer.previous = Some(kind);
759    }
760
761    /// Emit a child node inside wrapper-owned delimiters (e.g. `{ ... }`).
762    ///
763    /// When the child is itself a brace group, its children are inlined
764    /// directly to avoid double-bracing (`{ { a } }` → `{ a }`). This is
765    /// safe because the wrapper already provides the grouping delimiter.
766    fn emit_wrapped_content(
767        &mut self,
768        child: NodeId,
769        wrapper_mode: ContentMode,
770        content_mode: ContentMode,
771        open: &str,
772        close: &str,
773    ) {
774        self.writer
775            .emit(wrapper_mode, AtomKind::Brace, open, self.options);
776
777        let compact_math_inner = matches!(content_mode, ContentMode::Math)
778            && matches!(
779                self.options.math.spacing.group_inner_spacing,
780                MathGroupInnerSpacing::Compact
781            );
782
783        if compact_math_inner {
784            self.writer.previous = None;
785        }
786
787        match self.ast.node(child) {
788            Node::Group {
789                children,
790                kind: GroupKind::Explicit | GroupKind::Implicit,
791                mode: child_mode,
792            } => {
793                if children.is_empty()
794                    && matches!(*child_mode, ContentMode::Math)
795                    && matches!(
796                        self.options.math.spacing.group_inner_spacing,
797                        MathGroupInnerSpacing::Padded
798                    )
799                {
800                    self.writer.output.push(' ');
801                    self.writer.output.push_str(close);
802                    self.writer.previous = Some(AtomKind::Brace);
803                    return;
804                }
805                for &grandchild in children {
806                    self.visit(grandchild, *child_mode);
807                }
808            }
809            _ => self.visit(child, content_mode),
810        }
811
812        let close_mode = if compact_math_inner {
813            ContentMode::Text
814        } else {
815            content_mode
816        };
817        self.writer
818            .emit(close_mode, AtomKind::Brace, close, self.options);
819    }
820
821    fn emit_infix_operand(&mut self, node: NodeId) {
822        if self.is_empty_infix_operand(node) {
823            return;
824        }
825
826        match self.options.math.infix.grouping {
827            InfixGrouping::AlwaysExplicit => {
828                self.emit_wrapped_content(node, ContentMode::Math, ContentMode::Math, "{", "}")
829            }
830            InfixGrouping::WhenRequired => {
831                if self.infix_operand_requires_braces(node) {
832                    self.emit_wrapped_content(node, ContentMode::Math, ContentMode::Math, "{", "}");
833                } else {
834                    self.emit_unwrapped_infix_operand(node);
835                }
836            }
837        }
838    }
839
840    fn emit_unwrapped_infix_operand(&mut self, node: NodeId) {
841        match self.ast.node(node) {
842            Node::Group {
843                children,
844                kind: GroupKind::Explicit | GroupKind::Implicit,
845                mode,
846            } => {
847                for &child in children {
848                    self.visit(child, *mode);
849                }
850            }
851            _ => self.visit(node, ContentMode::Math),
852        }
853    }
854
855    fn is_empty_infix_operand(&self, node: NodeId) -> bool {
856        matches!(
857            self.ast.node(node),
858            Node::Group {
859                children,
860                kind: GroupKind::Implicit,
861                mode: ContentMode::Math,
862            } if children.is_empty()
863        )
864    }
865
866    fn infix_operand_requires_braces(&self, node: NodeId) -> bool {
867        match self.ast.node(node) {
868            Node::Infix { .. } => true,
869            Node::Group {
870                kind: GroupKind::Explicit,
871                ..
872            } => true,
873            Node::Group {
874                children,
875                kind: GroupKind::Implicit,
876                ..
877            } => children
878                .iter()
879                .any(|&child| matches!(self.ast.node(child), Node::Infix { .. })),
880            _ => false,
881        }
882    }
883
884    /// Emit a scalar argument value inside delimiters as a single opaque chunk.
885    ///
886    /// Scalars (dimensions, column specs, etc.) are written directly into the
887    /// output buffer to prevent math-mode token spacing from corrupting them
888    /// (e.g. `1pt` must not become `1 p t`).
889    fn emit_scalar_wrapped(
890        &mut self,
891        value: &ArgumentValue,
892        open: &str,
893        close: &str,
894        mode: ContentMode,
895    ) {
896        if self
897            .writer
898            .should_insert_space(mode, AtomKind::Brace, self.options)
899        {
900            self.writer.output.push(' ');
901        }
902        self.writer.output.push_str(open);
903        self.writer
904            .output
905            .push_str(&self.scalar_argument_text(value));
906        self.writer.output.push_str(close);
907        self.writer.previous = Some(AtomKind::Brace);
908    }
909
910    fn emit_recorded_delimiters(
911        &mut self,
912        open: &Delimiter,
913        close: &Delimiter,
914        node: NodeId,
915        mode: ContentMode,
916    ) {
917        self.emit_delimiter(open, mode);
918        self.visit_argument_content_node(node, mode);
919        self.emit_delimiter(close, mode);
920    }
921
922    fn emit_scalar_between_delimiters(
923        &mut self,
924        open: &Delimiter,
925        close: &Delimiter,
926        value: &ArgumentValue,
927        mode: ContentMode,
928    ) {
929        self.emit_delimiter(open, mode);
930        let text = self.scalar_argument_text(value);
931        self.writer
932            .emit(mode, AtomKind::RawFragment, &text, self.options);
933        self.emit_delimiter(close, mode);
934    }
935
936    fn scalar_argument_text(&self, value: &ArgumentValue) -> String {
937        match value {
938            ArgumentValue::Delimiter(delimiter) => self.delimiter_text(delimiter),
939            ArgumentValue::CSName(name)
940            | ArgumentValue::Dimension(name)
941            | ArgumentValue::Integer(name)
942            | ArgumentValue::KeyVal(name)
943            | ArgumentValue::Column(name) => name.clone(),
944            ArgumentValue::Boolean(_) => {
945                unreachable!("boolean values are only valid in star slots")
946            }
947            ArgumentValue::MathContent(_) | ArgumentValue::TextContent(_) => {
948                unreachable!("content variants must be serialized as child nodes")
949            }
950        }
951    }
952
953    fn delimiter_text(&self, delimiter: &Delimiter) -> String {
954        match delimiter {
955            Delimiter::None => ".".to_string(),
956            Delimiter::Char(ch) => ch.to_string(),
957            Delimiter::Control(name) => format!(r"\{}", name),
958        }
959    }
960
961    fn emit_delimiter(&mut self, delimiter: &Delimiter, mode: ContentMode) {
962        match delimiter {
963            Delimiter::None => self
964                .writer
965                .emit(mode, AtomKind::DelimiterToken, ".", self.options),
966            Delimiter::Char(ch) => self.writer.emit(
967                mode,
968                AtomKind::DelimiterToken,
969                &ch.to_string(),
970                self.options,
971            ),
972            Delimiter::Control(name) => self.writer.emit(
973                mode,
974                AtomKind::DelimiterToken,
975                &format!(r"\{}", name),
976                self.options,
977            ),
978        }
979    }
980
981    /// Visit content inside a `Delimited` / `Paired` argument, unwrapping
982    /// any top-level brace group to avoid redundant nesting.
983    fn visit_argument_content_node(&mut self, node: NodeId, mode: ContentMode) {
984        match self.ast.node(node) {
985            Node::Group {
986                children,
987                kind: GroupKind::Explicit | GroupKind::Implicit,
988                mode: child_mode,
989            } => {
990                for &child in children {
991                    self.visit(child, *child_mode);
992                }
993            }
994            _ => self.visit(node, mode),
995        }
996    }
997
998    /// Emit a `Char` node — classified as `MathChar` or `TextChunk`
999    /// depending on the surrounding mode so boundary rules apply correctly.
1000    fn visit_char(&mut self, ch: char, mode: ContentMode) {
1001        let kind = if matches!(mode, ContentMode::Text) {
1002            AtomKind::TextChunk
1003        } else {
1004            AtomKind::MathChar
1005        };
1006        let text = serialized_char(ch, mode);
1007        self.writer.emit(mode, kind, &text, self.options);
1008    }
1009
1010    fn visit_prime(&mut self, count: usize, mode: ContentMode) {
1011        if matches!(mode, ContentMode::Math) {
1012            self.writer
1013                .emit(mode, AtomKind::Prime, &"'".repeat(count), self.options);
1014        } else {
1015            self.writer
1016                .emit(mode, AtomKind::TextChunk, &"'".repeat(count), self.options);
1017        }
1018    }
1019
1020    fn emit_prime_marks(&mut self, count: usize) {
1021        self.writer.output.push_str(&"'".repeat(count));
1022        self.writer.previous = Some(AtomKind::Prime);
1023    }
1024
1025    fn finish(self) -> String {
1026        self.writer.finish()
1027    }
1028}
1029
1030fn serialized_char(ch: char, mode: ContentMode) -> String {
1031    let needs_escape = match mode {
1032        ContentMode::Math => matches!(ch, '%' | '$' | '#' | '_' | '{' | '}'),
1033        ContentMode::Text => matches!(ch, '%' | '$' | '&' | '#' | '_' | '{' | '}'),
1034    };
1035
1036    if needs_escape {
1037        format!(r"\{ch}")
1038    } else {
1039        ch.to_string()
1040    }
1041}
1042
1043#[cfg(test)]
1044mod tests {
1045    use super::*;
1046
1047    #[test]
1048    fn serializes_error_node_as_snippet() {
1049        use crate::ast::{Ast, Node};
1050
1051        let mut ast = Ast::new();
1052        let error = ast.new_node(Node::Error {
1053            message: "unexpected".to_string(),
1054            snippet: r"\bad{".to_string(),
1055        });
1056        ast.append_child(ast.root(), error);
1057
1058        assert_eq!(serialize(&ast), r"\bad{");
1059    }
1060
1061    #[test]
1062    fn test_atom_writer_glues_star_to_control_sequence() {
1063        let options = SerializeOptions::default();
1064        let mut writer = AtomWriter::default();
1065
1066        writer.emit(
1067            ContentMode::Math,
1068            AtomKind::ControlSequence,
1069            r"\operatorname",
1070            &options,
1071        );
1072        writer.emit_star_suffix();
1073
1074        assert_eq!(writer.finish(), r"\operatorname*");
1075    }
1076
1077    #[test]
1078    fn test_atom_writer_keeps_text_chunk_compact() {
1079        let options = SerializeOptions::default();
1080        let mut writer = AtomWriter::default();
1081
1082        writer.emit(ContentMode::Text, AtomKind::TextChunk, "abc", &options);
1083        writer.emit(ContentMode::Text, AtomKind::TextChunk, " def", &options);
1084
1085        assert_eq!(writer.finish(), "abc def");
1086    }
1087}