Skip to main content

lex_core/lex/inlines/
parser.rs

1//! Inline parser implementation
2//!
3//!     Inline parsing is done by a declarative engine that will process each element declaration.
4//!     For some, this is a flat transformation (i.e. it only wraps up the text into a node, as
5//!     in bold or italic). Others are more involved, as in references, in which the engine will
6//!     execute a callback with the text content and return a node.
7//!
8//!     This solves elegantly the fact that most inlines are simple and very much the same
9//!     structure, while allowing for more complex ones to handle their specific needs.
10//!
11//!     The parser processes inline elements in order, matching start tokens and finding
12//!     corresponding end tokens. Simple elements like bold and italic are flat transformations,
13//!     while complex elements like references use post-processing callbacks.
14//!
15//! Simple (Flat) Inline Elements
16//!
17//!     Most inline elements are simple transformations that just wrap text content:
18//!
19//!     - **Strong** (*text*): Wraps content in `InlineNode::Strong(children)`
20//!     - **Emphasis** (_text_): Wraps content in `InlineNode::Emphasis(children)`
21//!     - **Code** (`text`): Wraps literal text in `InlineNode::Code(string)` - no nested parsing
22//!     - **Math** (#formula#): Wraps literal text in `InlineNode::Math(string)` - no nested parsing
23//!
24//!     These are defined in the `default_specs()` function with just start/end tokens and whether
25//!     they're literal (no nested inline parsing inside).
26//!
27//! Complex Inline Elements (with Post-Processing)
28//!
29//!     Some inline elements need additional logic after parsing:
30//!
31//!     - **References** ([target]): After wrapping content, the `classify_reference_node` callback
32//!       analyzes the target text to determine the reference type (URL, citation, footnote, etc.)
33//!       and creates the appropriate `ReferenceType` variant.
34//!
35//!     Example: `[https://example.com]` is classified as a URL reference, while `[@doe2024]` becomes
36//!     a citation reference.
37//!
38//! Adding New Inline Types
39//!
40//!     To add a new inline element type:
41//!
42//!     1. Add a variant to `InlineKind` enum in [crate::lex::token::inline]
43//!     2. Add a variant to `InlineNode` in the ast module
44//!     3. Add an `InlineSpec` to `default_specs()` with start/end tokens
45//!     4. If complex logic is needed, implement a post-processor callback:
46//!        ```
47//!        fn my_post_processor(node: InlineNode) -> InlineNode {
48//!            // Transform the node based on its content
49//!            node
50//!        }
51//!        ```
52//!     5. Attach the callback via `.with_post_processor(InlineKind::MyType, my_post_processor)`
53//!
54//! Extension Pattern
55//!
56//!     The parser can be customized by creating an `InlineParser` instance and attaching
57//!     post-processors for specific inline types:
58//!     ```
59//!     let parser = InlineParser::new()
60//!         .with_post_processor(InlineKind::Strong, my_custom_processor);
61//!     let result = parser.parse("*text*");
62//!     ```
63
64use super::references::classify_reference_node;
65use crate::lex::ast::elements::inlines::{InlineContent, InlineNode, ReferenceInline};
66use crate::lex::escape::unescape_inline_char;
67use crate::lex::token::InlineKind;
68use once_cell::sync::Lazy;
69use std::collections::HashMap;
70
71static DEFAULT_INLINE_PARSER: Lazy<InlineParser> = Lazy::new(InlineParser::new);
72
73/// Parse inline nodes from a raw string using the default inline parser configuration.
74pub fn parse_inlines(text: &str) -> InlineContent {
75    DEFAULT_INLINE_PARSER.parse(text)
76}
77
78/// Parse inline nodes using a custom parser configuration.
79pub fn parse_inlines_with_parser(text: &str, parser: &InlineParser) -> InlineContent {
80    parser.parse(text)
81}
82
83/// Optional transformation applied to a parsed inline node.
84pub type InlinePostProcessor = fn(InlineNode) -> InlineNode;
85
86/// Specification for an inline element type
87///
88/// Defines how to parse and process a specific inline element. Each spec includes:
89/// - The kind of inline element (from [InlineKind])
90/// - Start and end tokens (single characters)
91/// - Whether content is literal (no nested inline parsing)
92/// - Optional post-processing callback for complex transformations
93#[derive(Clone)]
94pub struct InlineSpec {
95    pub kind: InlineKind,
96    pub start_token: char,
97    pub end_token: char,
98    pub literal: bool,
99    pub post_process: Option<InlinePostProcessor>,
100}
101
102impl InlineSpec {
103    fn apply_post_process(&self, node: InlineNode) -> InlineNode {
104        if let Some(callback) = self.post_process {
105            callback(node)
106        } else {
107            node
108        }
109    }
110}
111
112#[derive(Clone)]
113pub struct InlineParser {
114    specs: Vec<InlineSpec>,
115    token_map: HashMap<char, usize>,
116}
117
118impl InlineParser {
119    pub fn new() -> Self {
120        Self::from_specs(default_specs())
121    }
122
123    /// Attach a post-processing callback to a specific inline kind.
124    pub fn with_post_processor(mut self, kind: InlineKind, processor: InlinePostProcessor) -> Self {
125        if let Some(spec) = self.specs.iter_mut().find(|spec| spec.kind == kind) {
126            spec.post_process = Some(processor);
127        }
128        self
129    }
130
131    pub fn parse(&self, text: &str) -> InlineContent {
132        parse_with(self, text)
133    }
134
135    fn from_specs(specs: Vec<InlineSpec>) -> Self {
136        let mut token_map = HashMap::new();
137        for (index, spec) in specs.iter().enumerate() {
138            token_map.insert(spec.start_token, index);
139        }
140        Self { specs, token_map }
141    }
142
143    fn spec(&self, index: usize) -> &InlineSpec {
144        &self.specs[index]
145    }
146
147    fn spec_index_for_start(&self, ch: char) -> Option<usize> {
148        self.token_map.get(&ch).copied()
149    }
150
151    fn spec_count(&self) -> usize {
152        self.specs.len()
153    }
154}
155
156impl Default for InlineParser {
157    fn default() -> Self {
158        InlineParser::new()
159    }
160}
161
162fn default_specs() -> Vec<InlineSpec> {
163    vec![
164        InlineSpec {
165            kind: InlineKind::Strong,
166            start_token: '*',
167            end_token: '*',
168            literal: false,
169            post_process: None,
170        },
171        InlineSpec {
172            kind: InlineKind::Emphasis,
173            start_token: '_',
174            end_token: '_',
175            literal: false,
176            post_process: None,
177        },
178        InlineSpec {
179            kind: InlineKind::Code,
180            start_token: '`',
181            end_token: '`',
182            literal: true,
183            post_process: None,
184        },
185        InlineSpec {
186            kind: InlineKind::Math,
187            start_token: '#',
188            end_token: '#',
189            literal: true,
190            post_process: None,
191        },
192        InlineSpec {
193            kind: InlineKind::Reference,
194            start_token: '[',
195            end_token: ']',
196            literal: true,
197            post_process: Some(classify_reference_node),
198        },
199    ]
200}
201
202fn parse_with(parser: &InlineParser, text: &str) -> InlineContent {
203    let chars: Vec<char> = text.chars().collect();
204    if chars.is_empty() {
205        return Vec::new();
206    }
207
208    let mut stack = vec![InlineFrame::root()];
209    let mut blocked = BlockedClosings::new(parser.spec_count());
210
211    let mut i = 0;
212    while i < chars.len() {
213        let ch = chars[i];
214        let prev = if i == 0 { None } else { Some(chars[i - 1]) };
215        let next = if i + 1 < chars.len() {
216            Some(chars[i + 1])
217        } else {
218            None
219        };
220
221        // Escape processing only applies outside literal contexts (code/math).
222        // Inside literal elements, backslash is just a regular character.
223        if ch == '\\' && !stack.last().unwrap().is_literal(parser) {
224            match unescape_inline_char(next) {
225                crate::lex::escape::EscapeAction::Escape(escaped) => {
226                    stack.last_mut().unwrap().push_char(escaped);
227                    i += 2;
228                    continue;
229                }
230                crate::lex::escape::EscapeAction::Literal => {
231                    stack.last_mut().unwrap().push_char('\\');
232                    if next.is_none() {
233                        break;
234                    }
235                    i += 1;
236                    continue;
237                }
238            }
239        }
240
241        let mut consumed = false;
242        if let Some(spec_index) = stack.last().unwrap().spec_index {
243            let spec = parser.spec(spec_index);
244            if ch == spec.end_token {
245                if blocked.consume(spec_index) {
246                    // Literal closing paired to a disallowed nested start.
247                } else if is_valid_end(prev, next, spec) {
248                    let mut frame = stack.pop().unwrap();
249                    frame.flush_buffer();
250                    let had_content = frame.has_content();
251                    if !had_content {
252                        let parent = stack.last_mut().unwrap();
253                        parent.push_char(spec.start_token);
254                        parent.push_char(spec.end_token);
255                    } else {
256                        let node = frame.into_node(spec);
257                        let node = spec.apply_post_process(node);
258                        stack.last_mut().unwrap().push_node(node);
259                    }
260                    consumed = true;
261                }
262            }
263        }
264
265        if !consumed && !stack.last().unwrap().is_literal(parser) {
266            if let Some(spec_index) = parser.spec_index_for_start(ch) {
267                let spec = parser.spec(spec_index);
268                if is_valid_start(prev, next, spec) {
269                    if stack
270                        .iter()
271                        .any(|frame| frame.spec_index == Some(spec_index))
272                    {
273                        blocked.increment(spec_index);
274                    } else {
275                        stack.last_mut().unwrap().flush_buffer();
276                        stack.push(InlineFrame::new(spec_index));
277                        consumed = true;
278                    }
279                }
280            }
281        }
282
283        if !consumed {
284            stack.last_mut().unwrap().push_char(ch);
285        }
286
287        i += 1;
288    }
289
290    if let Some(frame) = stack.last_mut() {
291        frame.flush_buffer();
292    }
293
294    while stack.len() > 1 {
295        let mut frame = stack.pop().unwrap();
296        frame.flush_buffer();
297        let spec_index = frame
298            .spec_index
299            .expect("non-root stack frame must have a spec");
300        let spec = parser.spec(spec_index);
301        let parent = stack.last_mut().unwrap();
302        parent.push_char(spec.start_token);
303        for child in frame.children {
304            parent.push_node(child);
305        }
306    }
307
308    let mut root = stack.pop().unwrap();
309    root.flush_buffer();
310    root.children
311}
312
313struct InlineFrame {
314    spec_index: Option<usize>,
315    buffer: String,
316    children: InlineContent,
317}
318
319impl InlineFrame {
320    fn root() -> Self {
321        Self {
322            spec_index: None,
323            buffer: String::new(),
324            children: Vec::new(),
325        }
326    }
327
328    fn new(spec_index: usize) -> Self {
329        Self {
330            spec_index: Some(spec_index),
331            buffer: String::new(),
332            children: Vec::new(),
333        }
334    }
335
336    fn has_content(&self) -> bool {
337        !self.buffer.is_empty() || !self.children.is_empty()
338    }
339
340    fn push_char(&mut self, ch: char) {
341        self.buffer.push(ch);
342    }
343
344    fn flush_buffer(&mut self) {
345        if self.buffer.is_empty() {
346            return;
347        }
348        let text = std::mem::take(&mut self.buffer);
349        if let Some(InlineNode::Plain { text: existing, .. }) = self.children.last_mut() {
350            existing.push_str(&text);
351        } else {
352            self.children.push(InlineNode::Plain {
353                text,
354                annotations: Vec::new(),
355            });
356        }
357    }
358
359    fn push_node(&mut self, node: InlineNode) {
360        self.flush_buffer();
361        match node {
362            InlineNode::Plain { text, annotations } => {
363                if text.is_empty() {
364                    return;
365                }
366                if let Some(InlineNode::Plain { text: existing, .. }) = self.children.last_mut() {
367                    existing.push_str(&text);
368                    // Note: annotations from the merged node are discarded
369                    // This is intentional as Plain nodes are typically created without annotations
370                } else {
371                    self.children.push(InlineNode::Plain { text, annotations });
372                }
373            }
374            other => self.children.push(other),
375        }
376    }
377
378    fn into_node(self, spec: &InlineSpec) -> InlineNode {
379        match spec.kind {
380            InlineKind::Strong => InlineNode::Strong {
381                content: self.children,
382                annotations: Vec::new(),
383            },
384            InlineKind::Emphasis => InlineNode::Emphasis {
385                content: self.children,
386                annotations: Vec::new(),
387            },
388            InlineKind::Code => InlineNode::Code {
389                text: flatten_literal(self.children),
390                annotations: Vec::new(),
391            },
392            InlineKind::Math => InlineNode::Math {
393                text: flatten_literal(self.children),
394                annotations: Vec::new(),
395            },
396            InlineKind::Reference => InlineNode::Reference {
397                data: ReferenceInline::new(flatten_literal(self.children)),
398                annotations: Vec::new(),
399            },
400        }
401    }
402
403    fn is_literal(&self, parser: &InlineParser) -> bool {
404        self.spec_index
405            .map(|index| parser.spec(index).literal)
406            .unwrap_or(false)
407    }
408}
409
410fn flatten_literal(children: InlineContent) -> String {
411    let mut text = String::new();
412    for node in children {
413        match node {
414            InlineNode::Plain { text: segment, .. } => text.push_str(&segment),
415            _ => fatal_literal_content(),
416        }
417    }
418    text
419}
420
421fn fatal_literal_content() -> ! {
422    panic!("Literal inline nodes must not contain nested nodes");
423}
424
425struct BlockedClosings {
426    counts: Vec<usize>,
427}
428
429impl BlockedClosings {
430    fn new(spec_len: usize) -> Self {
431        Self {
432            counts: vec![0; spec_len],
433        }
434    }
435
436    fn increment(&mut self, spec_index: usize) {
437        if let Some(slot) = self.counts.get_mut(spec_index) {
438            *slot += 1;
439        }
440    }
441
442    fn consume(&mut self, spec_index: usize) -> bool {
443        if let Some(slot) = self.counts.get_mut(spec_index) {
444            if *slot > 0 {
445                *slot -= 1;
446                return true;
447            }
448        }
449        false
450    }
451}
452
453fn is_valid_start(prev: Option<char>, next: Option<char>, spec: &InlineSpec) -> bool {
454    if spec.literal {
455        // Literal elements (code, math, reference) accept any non-whitespace content.
456        // This allows code/math to start with \, {, (, *, etc.
457        !is_word(prev) && next.is_some_and(|c| !c.is_whitespace())
458    } else {
459        // Non-literal elements (strong, emphasis) require word char after marker.
460        !is_word(prev) && is_word(next)
461    }
462}
463
464fn is_valid_end(prev: Option<char>, next: Option<char>, spec: &InlineSpec) -> bool {
465    let inside_valid = if spec.literal {
466        prev.is_some()
467    } else {
468        matches!(prev, Some(ch) if !ch.is_whitespace())
469    };
470
471    inside_valid && !is_word(next)
472}
473
474fn is_word(ch: Option<char>) -> bool {
475    ch.map(|c| c.is_alphanumeric()).unwrap_or(false)
476}
477
478#[cfg(test)]
479mod tests {
480    use super::*;
481    use crate::lex::inlines::{InlineNode, PageFormat, ReferenceType};
482
483    #[test]
484    fn parses_plain_text() {
485        let nodes = parse_inlines("hello world");
486        assert_eq!(
487            nodes,
488            vec![InlineNode::Plain {
489                text: "hello world".into(),
490                annotations: Vec::new()
491            }]
492        );
493    }
494
495    #[test]
496    fn parses_strong_and_emphasis() {
497        let nodes = parse_inlines("*strong _inner_* text");
498        assert_eq!(nodes.len(), 2);
499        match &nodes[0] {
500            InlineNode::Strong { content, .. } => {
501                assert_eq!(content.len(), 2);
502                assert_eq!(
503                    content[0],
504                    InlineNode::Plain {
505                        text: "strong ".into(),
506                        annotations: Vec::new()
507                    }
508                );
509                match &content[1] {
510                    InlineNode::Emphasis { content: inner, .. } => {
511                        assert_eq!(
512                            inner,
513                            &vec![InlineNode::Plain {
514                                text: "inner".into(),
515                                annotations: Vec::new()
516                            }]
517                        );
518                    }
519                    other => panic!("Unexpected child: {other:?}"),
520                }
521            }
522            other => panic!("Unexpected node: {other:?}"),
523        }
524        assert_eq!(
525            nodes[1],
526            InlineNode::Plain {
527                text: " text".into(),
528                annotations: Vec::new()
529            }
530        );
531    }
532
533    #[test]
534    fn nested_emphasis_inside_strong() {
535        let nodes = parse_inlines("*strong and _emphasis_* text");
536        assert_eq!(nodes.len(), 2);
537        match &nodes[0] {
538            InlineNode::Strong { content, .. } => {
539                assert_eq!(content.len(), 2);
540                assert_eq!(content[0], InlineNode::plain("strong and ".into()));
541                match &content[1] {
542                    InlineNode::Emphasis { content: inner, .. } => {
543                        assert_eq!(inner, &vec![InlineNode::plain("emphasis".into())]);
544                    }
545                    other => panic!("Unexpected child: {other:?}"),
546                }
547            }
548            _ => panic!("Expected strong node"),
549        }
550    }
551
552    #[test]
553    fn code_is_literal() {
554        let nodes = parse_inlines("`a * literal _` text");
555        assert_eq!(nodes.len(), 2);
556        assert_eq!(nodes[0], InlineNode::code("a * literal _".into()));
557        assert_eq!(nodes[1], InlineNode::plain(" text".into()));
558    }
559
560    #[test]
561    fn math_is_literal() {
562        let nodes = parse_inlines("#x + y#");
563        assert_eq!(nodes, vec![InlineNode::math("x + y".into())]);
564    }
565
566    #[test]
567    fn code_preserves_backslashes() {
568        // Backslashes inside literal contexts (code) are preserved verbatim
569        let nodes = parse_inlines("`\\*text\\*`");
570        assert_eq!(nodes, vec![InlineNode::code("\\*text\\*".into())]);
571    }
572
573    #[test]
574    fn math_preserves_backslashes() {
575        // Backslashes inside literal contexts (math) are preserved verbatim
576        let nodes = parse_inlines("#\\alpha#");
577        assert_eq!(nodes, vec![InlineNode::math("\\alpha".into())]);
578    }
579
580    #[test]
581    fn unmatched_start_is_literal() {
582        let nodes = parse_inlines("prefix *text");
583        assert_eq!(nodes, vec![InlineNode::plain("prefix *text".into())]);
584    }
585
586    #[test]
587    fn unmatched_nested_preserves_children() {
588        let nodes = parse_inlines("*a _b_ c");
589        assert_eq!(nodes.len(), 3);
590        assert_eq!(nodes[0], InlineNode::plain("*a ".into()));
591        match &nodes[1] {
592            InlineNode::Emphasis { content, .. } => {
593                assert_eq!(content, &vec![InlineNode::plain("b".into())]);
594            }
595            other => panic!("Unexpected node: {other:?}"),
596        }
597        assert_eq!(nodes[2], InlineNode::plain(" c".into()));
598    }
599
600    #[test]
601    fn same_type_nesting_skips_inner_pair() {
602        let nodes = parse_inlines("*outer *inner* text*");
603        assert_eq!(nodes.len(), 1);
604        match &nodes[0] {
605            InlineNode::Strong { content, .. } => {
606                assert_eq!(
607                    content,
608                    &vec![InlineNode::plain("outer *inner* text".into())]
609                );
610            }
611            other => panic!("Unexpected node: {other:?}"),
612        }
613    }
614
615    #[test]
616    fn reference_detects_url() {
617        let nodes = parse_inlines("[https://example.com]");
618        match &nodes[0] {
619            InlineNode::Reference { data, .. } => match &data.reference_type {
620                ReferenceType::Url { target } => assert_eq!(target, "https://example.com"),
621                other => panic!("Expected URL reference, got {other:?}"),
622            },
623            other => panic!("Unexpected node: {other:?}"),
624        }
625    }
626
627    #[test]
628    fn reference_detects_tk_identifier() {
629        let nodes = parse_inlines("[TK-feature]");
630        match &nodes[0] {
631            InlineNode::Reference { data, .. } => match &data.reference_type {
632                ReferenceType::ToCome { identifier } => {
633                    assert_eq!(identifier.as_deref(), Some("feature"));
634                }
635                other => panic!("Expected TK reference, got {other:?}"),
636            },
637            other => panic!("Unexpected node: {other:?}"),
638        }
639    }
640
641    #[test]
642    fn reference_detects_citation_and_footnotes() {
643        let citation = parse_inlines("[@doe2024]");
644        let labeled = parse_inlines("[^note1]");
645        let numbered = parse_inlines("[42]");
646
647        match &citation[0] {
648            InlineNode::Reference { data, .. } => match &data.reference_type {
649                ReferenceType::Citation(citation_data) => {
650                    assert_eq!(citation_data.keys, vec!["doe2024".to_string()]);
651                    assert!(citation_data.locator.is_none());
652                }
653                other => panic!("Expected citation, got {other:?}"),
654            },
655            _ => panic!("Expected reference"),
656        }
657        match &labeled[0] {
658            InlineNode::Reference { data, .. } => match &data.reference_type {
659                ReferenceType::FootnoteLabeled { label } => assert_eq!(label, "note1"),
660                other => panic!("Expected labeled footnote, got {other:?}"),
661            },
662            _ => panic!("Expected reference"),
663        }
664        match &numbered[0] {
665            InlineNode::Reference { data, .. } => match &data.reference_type {
666                ReferenceType::FootnoteNumber { number } => assert_eq!(*number, 42),
667                other => panic!("Expected numeric footnote, got {other:?}"),
668            },
669            _ => panic!("Expected reference"),
670        }
671    }
672
673    #[test]
674    fn reference_parses_citation_locator() {
675        let nodes = parse_inlines("[@doe2024; @smith2023, pp. 45-46,47]");
676        match &nodes[0] {
677            InlineNode::Reference { data, .. } => match &data.reference_type {
678                ReferenceType::Citation(citation_data) => {
679                    assert_eq!(
680                        citation_data.keys,
681                        vec!["doe2024".to_string(), "smith2023".to_string()]
682                    );
683                    let locator = citation_data.locator.as_ref().expect("expected locator");
684                    assert!(matches!(locator.format, PageFormat::Pp));
685                    assert_eq!(locator.ranges.len(), 2);
686                    assert_eq!(locator.ranges[0].start, 45);
687                    assert_eq!(locator.ranges[0].end, Some(46));
688                    assert_eq!(locator.ranges[1].start, 47);
689                    assert!(locator.ranges[1].end.is_none());
690                }
691                other => panic!("Expected citation, got {other:?}"),
692            },
693            _ => panic!("Expected reference"),
694        }
695    }
696
697    #[test]
698    fn reference_detects_general_and_not_sure() {
699        let general = parse_inlines("[Section Title]");
700        let unsure = parse_inlines("[!!!]");
701        match &general[0] {
702            InlineNode::Reference { data, .. } => match &data.reference_type {
703                ReferenceType::General { target } => assert_eq!(target, "Section Title"),
704                other => panic!("Expected general reference, got {other:?}"),
705            },
706            _ => panic!("Expected reference"),
707        }
708        match &unsure[0] {
709            InlineNode::Reference { data, .. } => {
710                assert!(matches!(data.reference_type, ReferenceType::NotSure));
711            }
712            _ => panic!("Expected reference"),
713        }
714    }
715
716    fn annotate_strong(node: InlineNode) -> InlineNode {
717        match node {
718            InlineNode::Strong {
719                mut content,
720                annotations,
721            } => {
722                let mut annotated = vec![InlineNode::plain("[strong]".into())];
723                annotated.append(&mut content);
724                InlineNode::Strong {
725                    content: annotated,
726                    annotations,
727                }
728            }
729            other => other,
730        }
731    }
732
733    #[test]
734    fn post_process_callback_transforms_node() {
735        let parser = InlineParser::new().with_post_processor(InlineKind::Strong, annotate_strong);
736        let nodes = parser.parse("*bold*");
737        assert_eq!(nodes.len(), 1);
738        match &nodes[0] {
739            InlineNode::Strong { content, .. } => {
740                assert_eq!(content[0], InlineNode::plain("[strong]".into()));
741                assert_eq!(content[1], InlineNode::plain("bold".into()));
742            }
743            other => panic!("Unexpected inline node: {other:?}"),
744        }
745    }
746
747    #[test]
748    fn escaped_tokens_are_literal() {
749        let nodes = parse_inlines("\\*literal\\*");
750        assert_eq!(nodes, vec![InlineNode::plain("*literal*".into())]);
751    }
752
753    #[test]
754    fn backslash_before_alphanumeric_preserved() {
755        let nodes = parse_inlines("C:\\Users\\name");
756        assert_eq!(nodes, vec![InlineNode::plain("C:\\Users\\name".into())]);
757    }
758
759    #[test]
760    fn escape_works_in_paths() {
761        let nodes = parse_inlines("Path: C:\\\\Users\\\\name");
762        assert_eq!(
763            nodes,
764            vec![InlineNode::plain("Path: C:\\Users\\name".into())]
765        );
766    }
767
768    #[test]
769    fn arithmetic_not_parsed_as_inline() {
770        let nodes = parse_inlines("7 * 8");
771        assert_eq!(nodes, vec![InlineNode::plain("7 * 8".into())]);
772    }
773
774    #[test]
775    fn word_boundary_start_invalid() {
776        let nodes = parse_inlines("word*s*");
777        assert_eq!(nodes, vec![InlineNode::plain("word*s*".into())]);
778    }
779
780    #[test]
781    fn multiple_arithmetic_expressions() {
782        let nodes = parse_inlines("Calculate 7 * 8 + 3 * 4");
783        assert_eq!(
784            nodes,
785            vec![InlineNode::plain("Calculate 7 * 8 + 3 * 4".into())]
786        );
787    }
788
789    #[test]
790    fn inline_node_annotations_empty_by_default() {
791        let nodes = parse_inlines("*bold* text");
792        assert_eq!(nodes.len(), 2);
793        assert!(nodes[0].annotations().is_empty());
794        assert!(nodes[1].annotations().is_empty());
795    }
796
797    #[test]
798    fn with_annotation_adds_annotation_to_node() {
799        use crate::lex::ast::elements::{Annotation, Label};
800
801        let annotation = Annotation::marker(Label::new("test".to_string()));
802        let node = InlineNode::plain("text".into()).with_annotation(annotation.clone());
803
804        assert_eq!(node.annotations().len(), 1);
805        assert_eq!(node.annotations()[0].data.label.value, "test");
806    }
807
808    #[test]
809    fn with_annotations_adds_multiple_annotations() {
810        use crate::lex::ast::elements::{Annotation, Label, Parameter};
811
812        let anno1 = Annotation::marker(Label::new("doc.data".to_string()));
813        let anno2 = Annotation::with_parameters(
814            Label::new("test".to_string()),
815            vec![Parameter::new("key".to_string(), "value".to_string())],
816        );
817
818        let node = InlineNode::math("x + y".into()).with_annotations(vec![anno1, anno2]);
819
820        assert_eq!(node.annotations().len(), 2);
821        assert_eq!(node.annotations()[0].data.label.value, "doc.data");
822        assert_eq!(node.annotations()[1].data.label.value, "test");
823    }
824
825    #[test]
826    fn annotations_mut_allows_modification() {
827        use crate::lex::ast::elements::{Annotation, Label};
828
829        let mut node = InlineNode::code("code".into());
830        assert!(node.annotations().is_empty());
831
832        let annotation = Annotation::marker(Label::new("highlighted".to_string()));
833        node.annotations_mut().push(annotation);
834
835        assert_eq!(node.annotations().len(), 1);
836        assert_eq!(node.annotations()[0].data.label.value, "highlighted");
837    }
838
839    #[test]
840    fn post_processor_can_add_annotations() {
841        use crate::lex::ast::elements::{Annotation, Label, Parameter};
842
843        fn add_mathml_annotation(node: InlineNode) -> InlineNode {
844            match node {
845                InlineNode::Math {
846                    text,
847                    mut annotations,
848                } => {
849                    let anno = Annotation::with_parameters(
850                        Label::new("doc.data".to_string()),
851                        vec![Parameter::new("type".to_string(), "mathml".to_string())],
852                    );
853                    annotations.push(anno);
854                    InlineNode::Math { text, annotations }
855                }
856                other => other,
857            }
858        }
859
860        let parser =
861            InlineParser::new().with_post_processor(InlineKind::Math, add_mathml_annotation);
862        let nodes = parser.parse("#x + y#");
863
864        assert_eq!(nodes.len(), 1);
865        match &nodes[0] {
866            InlineNode::Math { text, annotations } => {
867                assert_eq!(text, "x + y");
868                assert_eq!(annotations.len(), 1);
869                assert_eq!(annotations[0].data.label.value, "doc.data");
870                assert_eq!(annotations[0].data.parameters.len(), 1);
871                assert_eq!(annotations[0].data.parameters[0].key, "type");
872                assert_eq!(annotations[0].data.parameters[0].value, "mathml");
873            }
874            other => panic!("Expected math node, got {other:?}"),
875        }
876    }
877}