lex_core/lex/inlines/
parser.rs

1//! Inline parser implementation
2//!
3//!     Inline parsing is done by a declarative engine that will process each element declaration.
4//!     For some, this is a flat transformation (i.e. it only wraps up the text into a node, as
5//!     in bold or italic). Others are more involved, as in references, in which the engine will
6//!     execute a callback with the text content and return a node.
7//!
8//!     This solves elegantly the fact that most inlines are simple and very much the same
9//!     structure, while allowing for more complex ones to handle their specific needs.
10//!
11//!     The parser processes inline elements in order, matching start tokens and finding
12//!     corresponding end tokens. Simple elements like bold and italic are flat transformations,
13//!     while complex elements like references use post-processing callbacks.
14//!
15//! Simple (Flat) Inline Elements
16//!
17//!     Most inline elements are simple transformations that just wrap text content:
18//!
19//!     - **Strong** (*text*): Wraps content in `InlineNode::Strong(children)`
20//!     - **Emphasis** (_text_): Wraps content in `InlineNode::Emphasis(children)`
21//!     - **Code** (`text`): Wraps literal text in `InlineNode::Code(string)` - no nested parsing
22//!     - **Math** (#formula#): Wraps literal text in `InlineNode::Math(string)` - no nested parsing
23//!
24//!     These are defined in the `default_specs()` function with just start/end tokens and whether
25//!     they're literal (no nested inline parsing inside).
26//!
27//! Complex Inline Elements (with Post-Processing)
28//!
29//!     Some inline elements need additional logic after parsing:
30//!
31//!     - **References** ([target]): After wrapping content, the `classify_reference_node` callback
32//!       analyzes the target text to determine the reference type (URL, citation, footnote, etc.)
33//!       and creates the appropriate `ReferenceType` variant.
34//!
35//!     Example: `[https://example.com]` is classified as a URL reference, while `[@doe2024]` becomes
36//!     a citation reference.
37//!
38//! Adding New Inline Types
39//!
40//!     To add a new inline element type:
41//!
42//!     1. Add a variant to `InlineKind` enum in [crate::lex::token::inline]
43//!     2. Add a variant to `InlineNode` in the ast module
44//!     3. Add an `InlineSpec` to `default_specs()` with start/end tokens
45//!     4. If complex logic is needed, implement a post-processor callback:
46//!        ```
47//!        fn my_post_processor(node: InlineNode) -> InlineNode {
48//!            // Transform the node based on its content
49//!            node
50//!        }
51//!        ```
52//!     5. Attach the callback via `.with_post_processor(InlineKind::MyType, my_post_processor)`
53//!
54//! Extension Pattern
55//!
56//!     The parser can be customized by creating an `InlineParser` instance and attaching
57//!     post-processors for specific inline types:
58//!     ```
59//!     let parser = InlineParser::new()
60//!         .with_post_processor(InlineKind::Strong, my_custom_processor);
61//!     let result = parser.parse("*text*");
62//!     ```
63
64use super::references::classify_reference_node;
65use crate::lex::ast::elements::inlines::{InlineContent, InlineNode, ReferenceInline};
66use crate::lex::token::InlineKind;
67use once_cell::sync::Lazy;
68use std::collections::HashMap;
69
70static DEFAULT_INLINE_PARSER: Lazy<InlineParser> = Lazy::new(InlineParser::new);
71
72/// Parse inline nodes from a raw string using the default inline parser configuration.
73pub fn parse_inlines(text: &str) -> InlineContent {
74    DEFAULT_INLINE_PARSER.parse(text)
75}
76
77/// Parse inline nodes using a custom parser configuration.
78pub fn parse_inlines_with_parser(text: &str, parser: &InlineParser) -> InlineContent {
79    parser.parse(text)
80}
81
82/// Optional transformation applied to a parsed inline node.
83pub type InlinePostProcessor = fn(InlineNode) -> InlineNode;
84
85/// Specification for an inline element type
86///
87/// Defines how to parse and process a specific inline element. Each spec includes:
88/// - The kind of inline element (from [InlineKind])
89/// - Start and end tokens (single characters)
90/// - Whether content is literal (no nested inline parsing)
91/// - Optional post-processing callback for complex transformations
92#[derive(Clone)]
93pub struct InlineSpec {
94    pub kind: InlineKind,
95    pub start_token: char,
96    pub end_token: char,
97    pub literal: bool,
98    pub post_process: Option<InlinePostProcessor>,
99}
100
101impl InlineSpec {
102    fn apply_post_process(&self, node: InlineNode) -> InlineNode {
103        if let Some(callback) = self.post_process {
104            callback(node)
105        } else {
106            node
107        }
108    }
109}
110
111#[derive(Clone)]
112pub struct InlineParser {
113    specs: Vec<InlineSpec>,
114    token_map: HashMap<char, usize>,
115}
116
117impl InlineParser {
118    pub fn new() -> Self {
119        Self::from_specs(default_specs())
120    }
121
122    /// Attach a post-processing callback to a specific inline kind.
123    pub fn with_post_processor(mut self, kind: InlineKind, processor: InlinePostProcessor) -> Self {
124        if let Some(spec) = self.specs.iter_mut().find(|spec| spec.kind == kind) {
125            spec.post_process = Some(processor);
126        }
127        self
128    }
129
130    pub fn parse(&self, text: &str) -> InlineContent {
131        parse_with(self, text)
132    }
133
134    fn from_specs(specs: Vec<InlineSpec>) -> Self {
135        let mut token_map = HashMap::new();
136        for (index, spec) in specs.iter().enumerate() {
137            token_map.insert(spec.start_token, index);
138        }
139        Self { specs, token_map }
140    }
141
142    fn spec(&self, index: usize) -> &InlineSpec {
143        &self.specs[index]
144    }
145
146    fn spec_index_for_start(&self, ch: char) -> Option<usize> {
147        self.token_map.get(&ch).copied()
148    }
149
150    fn spec_count(&self) -> usize {
151        self.specs.len()
152    }
153}
154
155impl Default for InlineParser {
156    fn default() -> Self {
157        InlineParser::new()
158    }
159}
160
161fn default_specs() -> Vec<InlineSpec> {
162    vec![
163        InlineSpec {
164            kind: InlineKind::Strong,
165            start_token: '*',
166            end_token: '*',
167            literal: false,
168            post_process: None,
169        },
170        InlineSpec {
171            kind: InlineKind::Emphasis,
172            start_token: '_',
173            end_token: '_',
174            literal: false,
175            post_process: None,
176        },
177        InlineSpec {
178            kind: InlineKind::Code,
179            start_token: '`',
180            end_token: '`',
181            literal: true,
182            post_process: None,
183        },
184        InlineSpec {
185            kind: InlineKind::Math,
186            start_token: '#',
187            end_token: '#',
188            literal: true,
189            post_process: None,
190        },
191        InlineSpec {
192            kind: InlineKind::Reference,
193            start_token: '[',
194            end_token: ']',
195            literal: true,
196            post_process: Some(classify_reference_node),
197        },
198    ]
199}
200
201fn parse_with(parser: &InlineParser, text: &str) -> InlineContent {
202    let chars: Vec<char> = text.chars().collect();
203    if chars.is_empty() {
204        return Vec::new();
205    }
206
207    let mut stack = vec![InlineFrame::root()];
208    let mut blocked = BlockedClosings::new(parser.spec_count());
209
210    let mut i = 0;
211    while i < chars.len() {
212        let ch = chars[i];
213        let prev = if i == 0 { None } else { Some(chars[i - 1]) };
214        let next = if i + 1 < chars.len() {
215            Some(chars[i + 1])
216        } else {
217            None
218        };
219
220        if ch == '\\' {
221            if let Some(next_char) = next {
222                if !next_char.is_alphanumeric() {
223                    // Escape non-alphanumeric characters
224                    stack.last_mut().unwrap().push_char(next_char);
225                    i += 2;
226                    continue;
227                } else {
228                    // Preserve backslash before alphanumeric
229                    stack.last_mut().unwrap().push_char('\\');
230                    i += 1;
231                    continue;
232                }
233            } else {
234                stack.last_mut().unwrap().push_char('\\');
235                break;
236            }
237        }
238
239        let mut consumed = false;
240        if let Some(spec_index) = stack.last().unwrap().spec_index {
241            let spec = parser.spec(spec_index);
242            if ch == spec.end_token {
243                if blocked.consume(spec_index) {
244                    // Literal closing paired to a disallowed nested start.
245                } else if is_valid_end(prev, next, spec) {
246                    let mut frame = stack.pop().unwrap();
247                    frame.flush_buffer();
248                    let had_content = frame.has_content();
249                    if !had_content {
250                        let parent = stack.last_mut().unwrap();
251                        parent.push_char(spec.start_token);
252                        parent.push_char(spec.end_token);
253                    } else {
254                        let node = frame.into_node(spec);
255                        let node = spec.apply_post_process(node);
256                        stack.last_mut().unwrap().push_node(node);
257                    }
258                    consumed = true;
259                }
260            }
261        }
262
263        if !consumed && !stack.last().unwrap().is_literal(parser) {
264            if let Some(spec_index) = parser.spec_index_for_start(ch) {
265                let spec = parser.spec(spec_index);
266                if is_valid_start(prev, next, spec) {
267                    if stack
268                        .iter()
269                        .any(|frame| frame.spec_index == Some(spec_index))
270                    {
271                        blocked.increment(spec_index);
272                    } else {
273                        stack.last_mut().unwrap().flush_buffer();
274                        stack.push(InlineFrame::new(spec_index));
275                        consumed = true;
276                    }
277                }
278            }
279        }
280
281        if !consumed {
282            stack.last_mut().unwrap().push_char(ch);
283        }
284
285        i += 1;
286    }
287
288    if let Some(frame) = stack.last_mut() {
289        frame.flush_buffer();
290    }
291
292    while stack.len() > 1 {
293        let mut frame = stack.pop().unwrap();
294        frame.flush_buffer();
295        let spec_index = frame
296            .spec_index
297            .expect("non-root stack frame must have a spec");
298        let spec = parser.spec(spec_index);
299        let parent = stack.last_mut().unwrap();
300        parent.push_char(spec.start_token);
301        for child in frame.children {
302            parent.push_node(child);
303        }
304    }
305
306    let mut root = stack.pop().unwrap();
307    root.flush_buffer();
308    root.children
309}
310
311struct InlineFrame {
312    spec_index: Option<usize>,
313    buffer: String,
314    children: InlineContent,
315}
316
317impl InlineFrame {
318    fn root() -> Self {
319        Self {
320            spec_index: None,
321            buffer: String::new(),
322            children: Vec::new(),
323        }
324    }
325
326    fn new(spec_index: usize) -> Self {
327        Self {
328            spec_index: Some(spec_index),
329            buffer: String::new(),
330            children: Vec::new(),
331        }
332    }
333
334    fn has_content(&self) -> bool {
335        !self.buffer.is_empty() || !self.children.is_empty()
336    }
337
338    fn push_char(&mut self, ch: char) {
339        self.buffer.push(ch);
340    }
341
342    fn flush_buffer(&mut self) {
343        if self.buffer.is_empty() {
344            return;
345        }
346        let text = std::mem::take(&mut self.buffer);
347        if let Some(InlineNode::Plain { text: existing, .. }) = self.children.last_mut() {
348            existing.push_str(&text);
349        } else {
350            self.children.push(InlineNode::Plain {
351                text,
352                annotations: Vec::new(),
353            });
354        }
355    }
356
357    fn push_node(&mut self, node: InlineNode) {
358        self.flush_buffer();
359        match node {
360            InlineNode::Plain { text, annotations } => {
361                if text.is_empty() {
362                    return;
363                }
364                if let Some(InlineNode::Plain { text: existing, .. }) = self.children.last_mut() {
365                    existing.push_str(&text);
366                    // Note: annotations from the merged node are discarded
367                    // This is intentional as Plain nodes are typically created without annotations
368                } else {
369                    self.children.push(InlineNode::Plain { text, annotations });
370                }
371            }
372            other => self.children.push(other),
373        }
374    }
375
376    fn into_node(self, spec: &InlineSpec) -> InlineNode {
377        match spec.kind {
378            InlineKind::Strong => InlineNode::Strong {
379                content: self.children,
380                annotations: Vec::new(),
381            },
382            InlineKind::Emphasis => InlineNode::Emphasis {
383                content: self.children,
384                annotations: Vec::new(),
385            },
386            InlineKind::Code => InlineNode::Code {
387                text: flatten_literal(self.children),
388                annotations: Vec::new(),
389            },
390            InlineKind::Math => InlineNode::Math {
391                text: flatten_literal(self.children),
392                annotations: Vec::new(),
393            },
394            InlineKind::Reference => InlineNode::Reference {
395                data: ReferenceInline::new(flatten_literal(self.children)),
396                annotations: Vec::new(),
397            },
398        }
399    }
400
401    fn is_literal(&self, parser: &InlineParser) -> bool {
402        self.spec_index
403            .map(|index| parser.spec(index).literal)
404            .unwrap_or(false)
405    }
406}
407
408fn flatten_literal(children: InlineContent) -> String {
409    let mut text = String::new();
410    for node in children {
411        match node {
412            InlineNode::Plain { text: segment, .. } => text.push_str(&segment),
413            _ => fatal_literal_content(),
414        }
415    }
416    text
417}
418
419fn fatal_literal_content() -> ! {
420    panic!("Literal inline nodes must not contain nested nodes");
421}
422
423struct BlockedClosings {
424    counts: Vec<usize>,
425}
426
427impl BlockedClosings {
428    fn new(spec_len: usize) -> Self {
429        Self {
430            counts: vec![0; spec_len],
431        }
432    }
433
434    fn increment(&mut self, spec_index: usize) {
435        if let Some(slot) = self.counts.get_mut(spec_index) {
436            *slot += 1;
437        }
438    }
439
440    fn consume(&mut self, spec_index: usize) -> bool {
441        if let Some(slot) = self.counts.get_mut(spec_index) {
442            if *slot > 0 {
443                *slot -= 1;
444                return true;
445            }
446        }
447        false
448    }
449}
450
451fn is_valid_start(prev: Option<char>, next: Option<char>, spec: &InlineSpec) -> bool {
452    if spec.kind == InlineKind::Reference {
453        !is_word(prev) && next.is_some()
454    } else {
455        !is_word(prev) && is_word(next)
456    }
457}
458
459fn is_valid_end(prev: Option<char>, next: Option<char>, spec: &InlineSpec) -> bool {
460    let inside_valid = if spec.literal {
461        prev.is_some()
462    } else {
463        matches!(prev, Some(ch) if !ch.is_whitespace())
464    };
465
466    inside_valid && !is_word(next)
467}
468
469fn is_word(ch: Option<char>) -> bool {
470    ch.map(|c| c.is_alphanumeric()).unwrap_or(false)
471}
472
473#[cfg(test)]
474mod tests {
475    use super::*;
476    use crate::lex::inlines::{InlineNode, PageFormat, ReferenceType};
477
478    #[test]
479    fn parses_plain_text() {
480        let nodes = parse_inlines("hello world");
481        assert_eq!(
482            nodes,
483            vec![InlineNode::Plain {
484                text: "hello world".into(),
485                annotations: Vec::new()
486            }]
487        );
488    }
489
490    #[test]
491    fn parses_strong_and_emphasis() {
492        let nodes = parse_inlines("*strong _inner_* text");
493        assert_eq!(nodes.len(), 2);
494        match &nodes[0] {
495            InlineNode::Strong { content, .. } => {
496                assert_eq!(content.len(), 2);
497                assert_eq!(
498                    content[0],
499                    InlineNode::Plain {
500                        text: "strong ".into(),
501                        annotations: Vec::new()
502                    }
503                );
504                match &content[1] {
505                    InlineNode::Emphasis { content: inner, .. } => {
506                        assert_eq!(
507                            inner,
508                            &vec![InlineNode::Plain {
509                                text: "inner".into(),
510                                annotations: Vec::new()
511                            }]
512                        );
513                    }
514                    other => panic!("Unexpected child: {other:?}"),
515                }
516            }
517            other => panic!("Unexpected node: {other:?}"),
518        }
519        assert_eq!(
520            nodes[1],
521            InlineNode::Plain {
522                text: " text".into(),
523                annotations: Vec::new()
524            }
525        );
526    }
527
528    #[test]
529    fn nested_emphasis_inside_strong() {
530        let nodes = parse_inlines("*strong and _emphasis_* text");
531        assert_eq!(nodes.len(), 2);
532        match &nodes[0] {
533            InlineNode::Strong { content, .. } => {
534                assert_eq!(content.len(), 2);
535                assert_eq!(content[0], InlineNode::plain("strong and ".into()));
536                match &content[1] {
537                    InlineNode::Emphasis { content: inner, .. } => {
538                        assert_eq!(inner, &vec![InlineNode::plain("emphasis".into())]);
539                    }
540                    other => panic!("Unexpected child: {other:?}"),
541                }
542            }
543            _ => panic!("Expected strong node"),
544        }
545    }
546
547    #[test]
548    fn code_is_literal() {
549        let nodes = parse_inlines("`a * literal _` text");
550        assert_eq!(nodes.len(), 2);
551        assert_eq!(nodes[0], InlineNode::code("a * literal _".into()));
552        assert_eq!(nodes[1], InlineNode::plain(" text".into()));
553    }
554
555    #[test]
556    fn math_is_literal() {
557        let nodes = parse_inlines("#x + y#");
558        assert_eq!(nodes, vec![InlineNode::math("x + y".into())]);
559    }
560
561    #[test]
562    fn unmatched_start_is_literal() {
563        let nodes = parse_inlines("prefix *text");
564        assert_eq!(nodes, vec![InlineNode::plain("prefix *text".into())]);
565    }
566
567    #[test]
568    fn unmatched_nested_preserves_children() {
569        let nodes = parse_inlines("*a _b_ c");
570        assert_eq!(nodes.len(), 3);
571        assert_eq!(nodes[0], InlineNode::plain("*a ".into()));
572        match &nodes[1] {
573            InlineNode::Emphasis { content, .. } => {
574                assert_eq!(content, &vec![InlineNode::plain("b".into())]);
575            }
576            other => panic!("Unexpected node: {other:?}"),
577        }
578        assert_eq!(nodes[2], InlineNode::plain(" c".into()));
579    }
580
581    #[test]
582    fn same_type_nesting_skips_inner_pair() {
583        let nodes = parse_inlines("*outer *inner* text*");
584        assert_eq!(nodes.len(), 1);
585        match &nodes[0] {
586            InlineNode::Strong { content, .. } => {
587                assert_eq!(
588                    content,
589                    &vec![InlineNode::plain("outer *inner* text".into())]
590                );
591            }
592            other => panic!("Unexpected node: {other:?}"),
593        }
594    }
595
596    #[test]
597    fn reference_detects_url() {
598        let nodes = parse_inlines("[https://example.com]");
599        match &nodes[0] {
600            InlineNode::Reference { data, .. } => match &data.reference_type {
601                ReferenceType::Url { target } => assert_eq!(target, "https://example.com"),
602                other => panic!("Expected URL reference, got {other:?}"),
603            },
604            other => panic!("Unexpected node: {other:?}"),
605        }
606    }
607
608    #[test]
609    fn reference_detects_tk_identifier() {
610        let nodes = parse_inlines("[TK-feature]");
611        match &nodes[0] {
612            InlineNode::Reference { data, .. } => match &data.reference_type {
613                ReferenceType::ToCome { identifier } => {
614                    assert_eq!(identifier.as_deref(), Some("feature"));
615                }
616                other => panic!("Expected TK reference, got {other:?}"),
617            },
618            other => panic!("Unexpected node: {other:?}"),
619        }
620    }
621
622    #[test]
623    fn reference_detects_citation_and_footnotes() {
624        let citation = parse_inlines("[@doe2024]");
625        let labeled = parse_inlines("[^note1]");
626        let numbered = parse_inlines("[42]");
627
628        match &citation[0] {
629            InlineNode::Reference { data, .. } => match &data.reference_type {
630                ReferenceType::Citation(citation_data) => {
631                    assert_eq!(citation_data.keys, vec!["doe2024".to_string()]);
632                    assert!(citation_data.locator.is_none());
633                }
634                other => panic!("Expected citation, got {other:?}"),
635            },
636            _ => panic!("Expected reference"),
637        }
638        match &labeled[0] {
639            InlineNode::Reference { data, .. } => match &data.reference_type {
640                ReferenceType::FootnoteLabeled { label } => assert_eq!(label, "note1"),
641                other => panic!("Expected labeled footnote, got {other:?}"),
642            },
643            _ => panic!("Expected reference"),
644        }
645        match &numbered[0] {
646            InlineNode::Reference { data, .. } => match &data.reference_type {
647                ReferenceType::FootnoteNumber { number } => assert_eq!(*number, 42),
648                other => panic!("Expected numeric footnote, got {other:?}"),
649            },
650            _ => panic!("Expected reference"),
651        }
652    }
653
654    #[test]
655    fn reference_parses_citation_locator() {
656        let nodes = parse_inlines("[@doe2024; @smith2023, pp. 45-46,47]");
657        match &nodes[0] {
658            InlineNode::Reference { data, .. } => match &data.reference_type {
659                ReferenceType::Citation(citation_data) => {
660                    assert_eq!(
661                        citation_data.keys,
662                        vec!["doe2024".to_string(), "smith2023".to_string()]
663                    );
664                    let locator = citation_data.locator.as_ref().expect("expected locator");
665                    assert!(matches!(locator.format, PageFormat::Pp));
666                    assert_eq!(locator.ranges.len(), 2);
667                    assert_eq!(locator.ranges[0].start, 45);
668                    assert_eq!(locator.ranges[0].end, Some(46));
669                    assert_eq!(locator.ranges[1].start, 47);
670                    assert!(locator.ranges[1].end.is_none());
671                }
672                other => panic!("Expected citation, got {other:?}"),
673            },
674            _ => panic!("Expected reference"),
675        }
676    }
677
678    #[test]
679    fn reference_detects_general_and_not_sure() {
680        let general = parse_inlines("[Section Title]");
681        let unsure = parse_inlines("[!!!]");
682        match &general[0] {
683            InlineNode::Reference { data, .. } => match &data.reference_type {
684                ReferenceType::General { target } => assert_eq!(target, "Section Title"),
685                other => panic!("Expected general reference, got {other:?}"),
686            },
687            _ => panic!("Expected reference"),
688        }
689        match &unsure[0] {
690            InlineNode::Reference { data, .. } => {
691                assert!(matches!(data.reference_type, ReferenceType::NotSure));
692            }
693            _ => panic!("Expected reference"),
694        }
695    }
696
697    fn annotate_strong(node: InlineNode) -> InlineNode {
698        match node {
699            InlineNode::Strong {
700                mut content,
701                annotations,
702            } => {
703                let mut annotated = vec![InlineNode::plain("[strong]".into())];
704                annotated.append(&mut content);
705                InlineNode::Strong {
706                    content: annotated,
707                    annotations,
708                }
709            }
710            other => other,
711        }
712    }
713
714    #[test]
715    fn post_process_callback_transforms_node() {
716        let parser = InlineParser::new().with_post_processor(InlineKind::Strong, annotate_strong);
717        let nodes = parser.parse("*bold*");
718        assert_eq!(nodes.len(), 1);
719        match &nodes[0] {
720            InlineNode::Strong { content, .. } => {
721                assert_eq!(content[0], InlineNode::plain("[strong]".into()));
722                assert_eq!(content[1], InlineNode::plain("bold".into()));
723            }
724            other => panic!("Unexpected inline node: {other:?}"),
725        }
726    }
727
728    #[test]
729    fn escaped_tokens_are_literal() {
730        let nodes = parse_inlines("\\*literal\\*");
731        assert_eq!(nodes, vec![InlineNode::plain("*literal*".into())]);
732    }
733
734    #[test]
735    fn backslash_before_alphanumeric_preserved() {
736        let nodes = parse_inlines("C:\\Users\\name");
737        assert_eq!(nodes, vec![InlineNode::plain("C:\\Users\\name".into())]);
738    }
739
740    #[test]
741    fn escape_works_in_paths() {
742        let nodes = parse_inlines("Path: C:\\\\Users\\\\name");
743        assert_eq!(
744            nodes,
745            vec![InlineNode::plain("Path: C:\\Users\\name".into())]
746        );
747    }
748
749    #[test]
750    fn arithmetic_not_parsed_as_inline() {
751        let nodes = parse_inlines("7 * 8");
752        assert_eq!(nodes, vec![InlineNode::plain("7 * 8".into())]);
753    }
754
755    #[test]
756    fn word_boundary_start_invalid() {
757        let nodes = parse_inlines("word*s*");
758        assert_eq!(nodes, vec![InlineNode::plain("word*s*".into())]);
759    }
760
761    #[test]
762    fn multiple_arithmetic_expressions() {
763        let nodes = parse_inlines("Calculate 7 * 8 + 3 * 4");
764        assert_eq!(
765            nodes,
766            vec![InlineNode::plain("Calculate 7 * 8 + 3 * 4".into())]
767        );
768    }
769
770    #[test]
771    fn inline_node_annotations_empty_by_default() {
772        let nodes = parse_inlines("*bold* text");
773        assert_eq!(nodes.len(), 2);
774        assert!(nodes[0].annotations().is_empty());
775        assert!(nodes[1].annotations().is_empty());
776    }
777
778    #[test]
779    fn with_annotation_adds_annotation_to_node() {
780        use crate::lex::ast::elements::{Annotation, Label};
781
782        let annotation = Annotation::marker(Label::new("test".to_string()));
783        let node = InlineNode::plain("text".into()).with_annotation(annotation.clone());
784
785        assert_eq!(node.annotations().len(), 1);
786        assert_eq!(node.annotations()[0].data.label.value, "test");
787    }
788
789    #[test]
790    fn with_annotations_adds_multiple_annotations() {
791        use crate::lex::ast::elements::{Annotation, Label, Parameter};
792
793        let anno1 = Annotation::marker(Label::new("doc.data".to_string()));
794        let anno2 = Annotation::with_parameters(
795            Label::new("test".to_string()),
796            vec![Parameter::new("key".to_string(), "value".to_string())],
797        );
798
799        let node = InlineNode::math("x + y".into()).with_annotations(vec![anno1, anno2]);
800
801        assert_eq!(node.annotations().len(), 2);
802        assert_eq!(node.annotations()[0].data.label.value, "doc.data");
803        assert_eq!(node.annotations()[1].data.label.value, "test");
804    }
805
806    #[test]
807    fn annotations_mut_allows_modification() {
808        use crate::lex::ast::elements::{Annotation, Label};
809
810        let mut node = InlineNode::code("code".into());
811        assert!(node.annotations().is_empty());
812
813        let annotation = Annotation::marker(Label::new("highlighted".to_string()));
814        node.annotations_mut().push(annotation);
815
816        assert_eq!(node.annotations().len(), 1);
817        assert_eq!(node.annotations()[0].data.label.value, "highlighted");
818    }
819
820    #[test]
821    fn post_processor_can_add_annotations() {
822        use crate::lex::ast::elements::{Annotation, Label, Parameter};
823
824        fn add_mathml_annotation(node: InlineNode) -> InlineNode {
825            match node {
826                InlineNode::Math {
827                    text,
828                    mut annotations,
829                } => {
830                    let anno = Annotation::with_parameters(
831                        Label::new("doc.data".to_string()),
832                        vec![Parameter::new("type".to_string(), "mathml".to_string())],
833                    );
834                    annotations.push(anno);
835                    InlineNode::Math { text, annotations }
836                }
837                other => other,
838            }
839        }
840
841        let parser =
842            InlineParser::new().with_post_processor(InlineKind::Math, add_mathml_annotation);
843        let nodes = parser.parse("#x + y#");
844
845        assert_eq!(nodes.len(), 1);
846        match &nodes[0] {
847            InlineNode::Math { text, annotations } => {
848                assert_eq!(text, "x + y");
849                assert_eq!(annotations.len(), 1);
850                assert_eq!(annotations[0].data.label.value, "doc.data");
851                assert_eq!(annotations[0].data.parameters.len(), 1);
852                assert_eq!(annotations[0].data.parameters[0].key, "type");
853                assert_eq!(annotations[0].data.parameters[0].value, "mathml");
854            }
855            other => panic!("Expected math node, got {other:?}"),
856        }
857    }
858}