Skip to main content

lex_core/lex/inlines/
parser.rs

1//! Inline parser implementation
2//!
3//!     Inline parsing is done by a declarative engine that will process each element declaration.
4//!     For some, this is a flat transformation (i.e. it only wraps up the text into a node, as
5//!     in bold or italic). Others are more involved, as in references, in which the engine will
6//!     execute a callback with the text content and return a node.
7//!
8//!     This solves elegantly the fact that most inlines are simple and very much the same
9//!     structure, while allowing for more complex ones to handle their specific needs.
10//!
11//!     The parser processes inline elements in order, matching start tokens and finding
12//!     corresponding end tokens. Simple elements like bold and italic are flat transformations,
13//!     while complex elements like references use post-processing callbacks.
14//!
15//! Simple (Flat) Inline Elements
16//!
17//!     Most inline elements are simple transformations that just wrap text content:
18//!
19//!     - **Strong** (*text*): Wraps content in `InlineNode::Strong(children)`
20//!     - **Emphasis** (_text_): Wraps content in `InlineNode::Emphasis(children)`
21//!     - **Code** (`text`): Wraps literal text in `InlineNode::Code(string)` - no nested parsing
22//!     - **Math** (#formula#): Wraps literal text in `InlineNode::Math(string)` - no nested parsing
23//!
24//!     These are defined in the `default_specs()` function with just start/end tokens and whether
25//!     they're literal (no nested inline parsing inside).
26//!
27//! Complex Inline Elements (with Post-Processing)
28//!
29//!     Some inline elements need additional logic after parsing:
30//!
31//!     - **References** ([target]): After wrapping content, the `classify_reference_node` callback
32//!       analyzes the target text to determine the reference type (URL, citation, footnote, etc.)
33//!       and creates the appropriate `ReferenceType` variant.
34//!
35//!     Example: `[https://example.com]` is classified as a URL reference, while `[@doe2024]` becomes
36//!     a citation reference.
37//!
38//! Adding New Inline Types
39//!
40//!     To add a new inline element type:
41//!
42//!     1. Add a variant to `InlineKind` enum in [crate::lex::token::inline]
43//!     2. Add a variant to `InlineNode` in the ast module
44//!     3. Add an `InlineSpec` to `default_specs()` with start/end tokens
45//!     4. If complex logic is needed, implement a post-processor callback:
46//!        ```
47//!        fn my_post_processor(node: InlineNode) -> InlineNode {
48//!            // Transform the node based on its content
49//!            node
50//!        }
51//!        ```
52//!     5. Attach the callback via `.with_post_processor(InlineKind::MyType, my_post_processor)`
53//!
54//! Extension Pattern
55//!
56//!     The parser can be customized by creating an `InlineParser` instance and attaching
57//!     post-processors for specific inline types:
58//!     ```
59//!     let parser = InlineParser::new()
60//!         .with_post_processor(InlineKind::Strong, my_custom_processor);
61//!     let result = parser.parse("*text*");
62//!     ```
63
64use super::references::classify_reference_node;
65use crate::lex::ast::elements::inlines::{InlineContent, InlineNode, ReferenceInline};
66use crate::lex::escape::unescape_inline_char;
67use crate::lex::token::InlineKind;
68use once_cell::sync::Lazy;
69use std::collections::HashMap;
70
71static DEFAULT_INLINE_PARSER: Lazy<InlineParser> = Lazy::new(InlineParser::new);
72
73/// Parse inline nodes from a raw string using the default inline parser configuration.
74pub fn parse_inlines(text: &str) -> InlineContent {
75    DEFAULT_INLINE_PARSER.parse(text)
76}
77
78/// Parse inline nodes using a custom parser configuration.
79pub fn parse_inlines_with_parser(text: &str, parser: &InlineParser) -> InlineContent {
80    parser.parse(text)
81}
82
83/// Optional transformation applied to a parsed inline node.
84pub type InlinePostProcessor = fn(InlineNode) -> InlineNode;
85
86/// Specification for an inline element type
87///
88/// Defines how to parse and process a specific inline element. Each spec includes:
89/// - The kind of inline element (from [InlineKind])
90/// - Start and end tokens (single characters)
91/// - Whether content is literal (no nested inline parsing)
92/// - Optional post-processing callback for complex transformations
93#[derive(Clone)]
94pub struct InlineSpec {
95    pub kind: InlineKind,
96    pub start_token: char,
97    pub end_token: char,
98    pub literal: bool,
99    pub post_process: Option<InlinePostProcessor>,
100}
101
102impl InlineSpec {
103    fn apply_post_process(&self, node: InlineNode) -> InlineNode {
104        if let Some(callback) = self.post_process {
105            callback(node)
106        } else {
107            node
108        }
109    }
110}
111
112#[derive(Clone)]
113pub struct InlineParser {
114    specs: Vec<InlineSpec>,
115    token_map: HashMap<char, usize>,
116}
117
118impl InlineParser {
119    pub fn new() -> Self {
120        Self::from_specs(default_specs())
121    }
122
123    /// Attach a post-processing callback to a specific inline kind.
124    pub fn with_post_processor(mut self, kind: InlineKind, processor: InlinePostProcessor) -> Self {
125        if let Some(spec) = self.specs.iter_mut().find(|spec| spec.kind == kind) {
126            spec.post_process = Some(processor);
127        }
128        self
129    }
130
131    pub fn parse(&self, text: &str) -> InlineContent {
132        parse_with(self, text)
133    }
134
135    fn from_specs(specs: Vec<InlineSpec>) -> Self {
136        let mut token_map = HashMap::new();
137        for (index, spec) in specs.iter().enumerate() {
138            token_map.insert(spec.start_token, index);
139        }
140        Self { specs, token_map }
141    }
142
143    fn spec(&self, index: usize) -> &InlineSpec {
144        &self.specs[index]
145    }
146
147    fn spec_index_for_start(&self, ch: char) -> Option<usize> {
148        self.token_map.get(&ch).copied()
149    }
150
151    fn spec_count(&self) -> usize {
152        self.specs.len()
153    }
154}
155
156impl Default for InlineParser {
157    fn default() -> Self {
158        InlineParser::new()
159    }
160}
161
162fn default_specs() -> Vec<InlineSpec> {
163    vec![
164        InlineSpec {
165            kind: InlineKind::Strong,
166            start_token: '*',
167            end_token: '*',
168            literal: false,
169            post_process: None,
170        },
171        InlineSpec {
172            kind: InlineKind::Emphasis,
173            start_token: '_',
174            end_token: '_',
175            literal: false,
176            post_process: None,
177        },
178        InlineSpec {
179            kind: InlineKind::Code,
180            start_token: '`',
181            end_token: '`',
182            literal: true,
183            post_process: None,
184        },
185        InlineSpec {
186            kind: InlineKind::Math,
187            start_token: '#',
188            end_token: '#',
189            literal: true,
190            post_process: None,
191        },
192        InlineSpec {
193            kind: InlineKind::Reference,
194            start_token: '[',
195            end_token: ']',
196            literal: true,
197            post_process: Some(classify_reference_node),
198        },
199    ]
200}
201
202fn parse_with(parser: &InlineParser, text: &str) -> InlineContent {
203    let chars: Vec<char> = text.chars().collect();
204    if chars.is_empty() {
205        return Vec::new();
206    }
207
208    let mut stack = vec![InlineFrame::root()];
209    let mut blocked = BlockedClosings::new(parser.spec_count());
210
211    let mut i = 0;
212    while i < chars.len() {
213        let ch = chars[i];
214        let prev = if i == 0 { None } else { Some(chars[i - 1]) };
215        let next = if i + 1 < chars.len() {
216            Some(chars[i + 1])
217        } else {
218            None
219        };
220
221        // Escape processing only applies outside literal contexts (code/math).
222        // Inside literal elements, backslash is just a regular character.
223        if ch == '\\' && !stack.last().unwrap().is_literal(parser) {
224            match unescape_inline_char(next) {
225                crate::lex::escape::EscapeAction::Escape(escaped) => {
226                    stack.last_mut().unwrap().push_char(escaped);
227                    i += 2;
228                    continue;
229                }
230                crate::lex::escape::EscapeAction::Literal => {
231                    stack.last_mut().unwrap().push_char('\\');
232                    if next.is_none() {
233                        break;
234                    }
235                    i += 1;
236                    continue;
237                }
238            }
239        }
240
241        let mut consumed = false;
242        if let Some(spec_index) = stack.last().unwrap().spec_index {
243            let spec = parser.spec(spec_index);
244            if ch == spec.end_token {
245                if blocked.consume(spec_index) {
246                    // Literal closing paired to a disallowed nested start.
247                } else if is_valid_end(prev, next, spec) {
248                    let mut frame = stack.pop().unwrap();
249                    frame.flush_buffer();
250                    let had_content = frame.has_content();
251                    if !had_content {
252                        let parent = stack.last_mut().unwrap();
253                        parent.push_char(spec.start_token);
254                        parent.push_char(spec.end_token);
255                    } else {
256                        let node = frame.into_node(spec);
257                        let node = spec.apply_post_process(node);
258                        stack.last_mut().unwrap().push_node(node);
259                    }
260                    consumed = true;
261                }
262            }
263        }
264
265        if !consumed && !stack.last().unwrap().is_literal(parser) {
266            if let Some(spec_index) = parser.spec_index_for_start(ch) {
267                let spec = parser.spec(spec_index);
268                if is_valid_start(prev, next, spec, parser) {
269                    if stack
270                        .iter()
271                        .any(|frame| frame.spec_index == Some(spec_index))
272                    {
273                        blocked.increment(spec_index);
274                    } else {
275                        stack.last_mut().unwrap().flush_buffer();
276                        stack.push(InlineFrame::new(spec_index));
277                        consumed = true;
278                    }
279                }
280            }
281        }
282
283        if !consumed {
284            stack.last_mut().unwrap().push_char(ch);
285        }
286
287        i += 1;
288    }
289
290    if let Some(frame) = stack.last_mut() {
291        frame.flush_buffer();
292    }
293
294    while stack.len() > 1 {
295        let mut frame = stack.pop().unwrap();
296        frame.flush_buffer();
297        let spec_index = frame
298            .spec_index
299            .expect("non-root stack frame must have a spec");
300        let spec = parser.spec(spec_index);
301        let parent = stack.last_mut().unwrap();
302        parent.push_char(spec.start_token);
303        for child in frame.children {
304            parent.push_node(child);
305        }
306    }
307
308    let mut root = stack.pop().unwrap();
309    root.flush_buffer();
310    root.children
311}
312
313struct InlineFrame {
314    spec_index: Option<usize>,
315    buffer: String,
316    children: InlineContent,
317}
318
319impl InlineFrame {
320    fn root() -> Self {
321        Self {
322            spec_index: None,
323            buffer: String::new(),
324            children: Vec::new(),
325        }
326    }
327
328    fn new(spec_index: usize) -> Self {
329        Self {
330            spec_index: Some(spec_index),
331            buffer: String::new(),
332            children: Vec::new(),
333        }
334    }
335
336    fn has_content(&self) -> bool {
337        !self.buffer.is_empty() || !self.children.is_empty()
338    }
339
340    fn push_char(&mut self, ch: char) {
341        self.buffer.push(ch);
342    }
343
344    fn flush_buffer(&mut self) {
345        if self.buffer.is_empty() {
346            return;
347        }
348        let text = std::mem::take(&mut self.buffer);
349        if let Some(InlineNode::Plain { text: existing, .. }) = self.children.last_mut() {
350            existing.push_str(&text);
351        } else {
352            self.children.push(InlineNode::Plain {
353                text,
354                annotations: Vec::new(),
355            });
356        }
357    }
358
359    fn push_node(&mut self, node: InlineNode) {
360        self.flush_buffer();
361        match node {
362            InlineNode::Plain { text, annotations } => {
363                if text.is_empty() {
364                    return;
365                }
366                if let Some(InlineNode::Plain { text: existing, .. }) = self.children.last_mut() {
367                    existing.push_str(&text);
368                    // Note: annotations from the merged node are discarded
369                    // This is intentional as Plain nodes are typically created without annotations
370                } else {
371                    self.children.push(InlineNode::Plain { text, annotations });
372                }
373            }
374            other => self.children.push(other),
375        }
376    }
377
378    fn into_node(self, spec: &InlineSpec) -> InlineNode {
379        match spec.kind {
380            InlineKind::Strong => InlineNode::Strong {
381                content: self.children,
382                annotations: Vec::new(),
383            },
384            InlineKind::Emphasis => InlineNode::Emphasis {
385                content: self.children,
386                annotations: Vec::new(),
387            },
388            InlineKind::Code => InlineNode::Code {
389                text: flatten_literal(self.children),
390                annotations: Vec::new(),
391            },
392            InlineKind::Math => InlineNode::Math {
393                text: flatten_literal(self.children),
394                annotations: Vec::new(),
395            },
396            InlineKind::Reference => InlineNode::Reference {
397                data: ReferenceInline::new(flatten_literal(self.children)),
398                annotations: Vec::new(),
399            },
400        }
401    }
402
403    fn is_literal(&self, parser: &InlineParser) -> bool {
404        self.spec_index
405            .map(|index| parser.spec(index).literal)
406            .unwrap_or(false)
407    }
408}
409
410fn flatten_literal(children: InlineContent) -> String {
411    let mut text = String::new();
412    for node in children {
413        match node {
414            InlineNode::Plain { text: segment, .. } => text.push_str(&segment),
415            _ => fatal_literal_content(),
416        }
417    }
418    text
419}
420
421fn fatal_literal_content() -> ! {
422    panic!("Literal inline nodes must not contain nested nodes");
423}
424
425struct BlockedClosings {
426    counts: Vec<usize>,
427}
428
429impl BlockedClosings {
430    fn new(spec_len: usize) -> Self {
431        Self {
432            counts: vec![0; spec_len],
433        }
434    }
435
436    fn increment(&mut self, spec_index: usize) {
437        if let Some(slot) = self.counts.get_mut(spec_index) {
438            *slot += 1;
439        }
440    }
441
442    fn consume(&mut self, spec_index: usize) -> bool {
443        if let Some(slot) = self.counts.get_mut(spec_index) {
444            if *slot > 0 {
445                *slot -= 1;
446                return true;
447            }
448        }
449        false
450    }
451}
452
453fn is_valid_start(
454    prev: Option<char>,
455    next: Option<char>,
456    spec: &InlineSpec,
457    parser: &InlineParser,
458) -> bool {
459    if matches!(spec.kind, InlineKind::Reference) {
460        // References may abut a preceding word: `Hello[./file.txt]` anchors the
461        // word "Hello" (references-general.lex §2.3.1, "no surrounding space is
462        // required"). So unlike code/math, a reference start is allowed even
463        // when the previous char is a word char. The opening still requires
464        // non-whitespace content immediately after `[`.
465        next.is_some_and(|c| !c.is_whitespace())
466    } else if spec.literal {
467        // Literal elements (code, math) accept any non-whitespace content.
468        // This allows code/math to start with \, {, (, *, etc.
469        !is_word(prev) && next.is_some_and(|c| !c.is_whitespace())
470    } else {
471        // Non-literal elements (strong, emphasis) require a word char OR another
472        // inline start marker after the opening token. Allowing an adjacent marker
473        // enables directly-nested formatting such as _*foo*_ and *_foo_*.
474        !is_word(prev)
475            && next.is_some_and(|c| is_word(Some(c)) || parser.spec_index_for_start(c).is_some())
476    }
477}
478
479fn is_valid_end(prev: Option<char>, next: Option<char>, spec: &InlineSpec) -> bool {
480    let inside_valid = if spec.literal {
481        prev.is_some()
482    } else {
483        matches!(prev, Some(ch) if !ch.is_whitespace())
484    };
485
486    inside_valid && !is_word(next)
487}
488
489fn is_word(ch: Option<char>) -> bool {
490    ch.map(|c| c.is_alphanumeric()).unwrap_or(false)
491}
492
493#[cfg(test)]
494mod tests {
495    use super::*;
496    use crate::lex::inlines::{InlineNode, PageFormat, ReferenceType};
497
498    #[test]
499    fn parses_plain_text() {
500        let nodes = parse_inlines("hello world");
501        assert_eq!(
502            nodes,
503            vec![InlineNode::Plain {
504                text: "hello world".into(),
505                annotations: Vec::new()
506            }]
507        );
508    }
509
510    #[test]
511    fn parses_strong_and_emphasis() {
512        let nodes = parse_inlines("*strong _inner_* text");
513        assert_eq!(nodes.len(), 2);
514        match &nodes[0] {
515            InlineNode::Strong { content, .. } => {
516                assert_eq!(content.len(), 2);
517                assert_eq!(
518                    content[0],
519                    InlineNode::Plain {
520                        text: "strong ".into(),
521                        annotations: Vec::new()
522                    }
523                );
524                match &content[1] {
525                    InlineNode::Emphasis { content: inner, .. } => {
526                        assert_eq!(
527                            inner,
528                            &vec![InlineNode::Plain {
529                                text: "inner".into(),
530                                annotations: Vec::new()
531                            }]
532                        );
533                    }
534                    other => panic!("Unexpected child: {other:?}"),
535                }
536            }
537            other => panic!("Unexpected node: {other:?}"),
538        }
539        assert_eq!(
540            nodes[1],
541            InlineNode::Plain {
542                text: " text".into(),
543                annotations: Vec::new()
544            }
545        );
546    }
547
548    #[test]
549    fn nested_emphasis_inside_strong() {
550        let nodes = parse_inlines("*strong and _emphasis_* text");
551        assert_eq!(nodes.len(), 2);
552        match &nodes[0] {
553            InlineNode::Strong { content, .. } => {
554                assert_eq!(content.len(), 2);
555                assert_eq!(content[0], InlineNode::plain("strong and ".into()));
556                match &content[1] {
557                    InlineNode::Emphasis { content: inner, .. } => {
558                        assert_eq!(inner, &vec![InlineNode::plain("emphasis".into())]);
559                    }
560                    other => panic!("Unexpected child: {other:?}"),
561                }
562            }
563            _ => panic!("Expected strong node"),
564        }
565    }
566
567    #[test]
568    fn directly_nested_emphasis_wraps_strong() {
569        // _*foo*_ — emphasis directly wrapping strong, no intervening content
570        let nodes = parse_inlines("_*both*_");
571        assert_eq!(nodes.len(), 1);
572        match &nodes[0] {
573            InlineNode::Emphasis { content, .. } => {
574                assert_eq!(content.len(), 1);
575                match &content[0] {
576                    InlineNode::Strong { content: inner, .. } => {
577                        assert_eq!(inner, &vec![InlineNode::plain("both".into())]);
578                    }
579                    other => panic!("Expected Strong inside Emphasis, got: {other:?}"),
580                }
581            }
582            other => panic!("Expected Emphasis, got: {other:?}"),
583        }
584    }
585
586    #[test]
587    fn directly_nested_strong_wraps_emphasis() {
588        // *_foo_* — strong directly wrapping emphasis
589        let nodes = parse_inlines("*_inverted_*");
590        assert_eq!(nodes.len(), 1);
591        match &nodes[0] {
592            InlineNode::Strong { content, .. } => {
593                assert_eq!(content.len(), 1);
594                match &content[0] {
595                    InlineNode::Emphasis { content: inner, .. } => {
596                        assert_eq!(inner, &vec![InlineNode::plain("inverted".into())]);
597                    }
598                    other => panic!("Expected Emphasis inside Strong, got: {other:?}"),
599                }
600            }
601            other => panic!("Expected Strong, got: {other:?}"),
602        }
603    }
604
605    #[test]
606    fn directly_nested_multi_word() {
607        let nodes = parse_inlines("_*bold multiple words*_");
608        assert_eq!(nodes.len(), 1);
609        match &nodes[0] {
610            InlineNode::Emphasis { content, .. } => match &content[0] {
611                InlineNode::Strong { content: inner, .. } => {
612                    assert_eq!(
613                        inner,
614                        &vec![InlineNode::plain("bold multiple words".into())]
615                    );
616                }
617                other => panic!("Expected Strong, got: {other:?}"),
618            },
619            other => panic!("Expected Emphasis, got: {other:?}"),
620        }
621    }
622
623    #[test]
624    fn arithmetic_still_not_parsed_as_inline() {
625        // 7 * 8 — * surrounded by spaces should NOT start a strong.
626        // This guards against the adjacent-marker rule accidentally matching.
627        let nodes = parse_inlines("7 * 8");
628        assert_eq!(nodes, vec![InlineNode::plain("7 * 8".into())]);
629    }
630
631    #[test]
632    fn empty_markers_stay_literal() {
633        // Same-delimiter adjacency (**, __, ``) should stay literal, not
634        // produce empty inline elements — the adjacent-marker rule must not
635        // swallow the closing delimiter as "content".
636        let nodes = parse_inlines("a ** b __ c");
637        assert_eq!(nodes, vec![InlineNode::plain("a ** b __ c".into())]);
638    }
639
640    #[test]
641    fn code_is_literal() {
642        let nodes = parse_inlines("`a * literal _` text");
643        assert_eq!(nodes.len(), 2);
644        assert_eq!(nodes[0], InlineNode::code("a * literal _".into()));
645        assert_eq!(nodes[1], InlineNode::plain(" text".into()));
646    }
647
648    #[test]
649    fn math_is_literal() {
650        let nodes = parse_inlines("#x + y#");
651        assert_eq!(nodes, vec![InlineNode::math("x + y".into())]);
652    }
653
654    #[test]
655    fn code_preserves_backslashes() {
656        // Backslashes inside literal contexts (code) are preserved verbatim
657        let nodes = parse_inlines("`\\*text\\*`");
658        assert_eq!(nodes, vec![InlineNode::code("\\*text\\*".into())]);
659    }
660
661    #[test]
662    fn math_preserves_backslashes() {
663        // Backslashes inside literal contexts (math) are preserved verbatim
664        let nodes = parse_inlines("#\\alpha#");
665        assert_eq!(nodes, vec![InlineNode::math("\\alpha".into())]);
666    }
667
668    #[test]
669    fn unmatched_start_is_literal() {
670        let nodes = parse_inlines("prefix *text");
671        assert_eq!(nodes, vec![InlineNode::plain("prefix *text".into())]);
672    }
673
674    #[test]
675    fn unmatched_nested_preserves_children() {
676        let nodes = parse_inlines("*a _b_ c");
677        assert_eq!(nodes.len(), 3);
678        assert_eq!(nodes[0], InlineNode::plain("*a ".into()));
679        match &nodes[1] {
680            InlineNode::Emphasis { content, .. } => {
681                assert_eq!(content, &vec![InlineNode::plain("b".into())]);
682            }
683            other => panic!("Unexpected node: {other:?}"),
684        }
685        assert_eq!(nodes[2], InlineNode::plain(" c".into()));
686    }
687
688    #[test]
689    fn same_type_nesting_skips_inner_pair() {
690        let nodes = parse_inlines("*outer *inner* text*");
691        assert_eq!(nodes.len(), 1);
692        match &nodes[0] {
693            InlineNode::Strong { content, .. } => {
694                assert_eq!(
695                    content,
696                    &vec![InlineNode::plain("outer *inner* text".into())]
697                );
698            }
699            other => panic!("Unexpected node: {other:?}"),
700        }
701    }
702
703    #[test]
704    fn reference_detects_url() {
705        let nodes = parse_inlines("[https://example.com]");
706        match &nodes[0] {
707            InlineNode::Reference { data, .. } => match &data.reference_type {
708                ReferenceType::Url { target } => assert_eq!(target, "https://example.com"),
709                other => panic!("Expected URL reference, got {other:?}"),
710            },
711            other => panic!("Unexpected node: {other:?}"),
712        }
713    }
714
715    #[test]
716    fn reference_detects_tk_identifier() {
717        let nodes = parse_inlines("[TK-feature]");
718        match &nodes[0] {
719            InlineNode::Reference { data, .. } => match &data.reference_type {
720                ReferenceType::ToCome { identifier } => {
721                    assert_eq!(identifier.as_deref(), Some("feature"));
722                }
723                other => panic!("Expected TK reference, got {other:?}"),
724            },
725            other => panic!("Unexpected node: {other:?}"),
726        }
727    }
728
729    #[test]
730    fn reference_detects_citation_and_footnotes() {
731        let citation = parse_inlines("[@doe2024]");
732        let labeled = parse_inlines("[::note1]");
733        let numbered = parse_inlines("[42]");
734
735        match &citation[0] {
736            InlineNode::Reference { data, .. } => match &data.reference_type {
737                ReferenceType::Citation(citation_data) => {
738                    assert_eq!(citation_data.keys, vec!["doe2024".to_string()]);
739                    assert!(citation_data.locator.is_none());
740                }
741                other => panic!("Expected citation, got {other:?}"),
742            },
743            _ => panic!("Expected reference"),
744        }
745        match &labeled[0] {
746            InlineNode::Reference { data, .. } => match &data.reference_type {
747                ReferenceType::AnnotationReference { label } => assert_eq!(label, "note1"),
748                other => panic!("Expected annotation reference, got {other:?}"),
749            },
750            _ => panic!("Expected reference"),
751        }
752        match &numbered[0] {
753            InlineNode::Reference { data, .. } => match &data.reference_type {
754                ReferenceType::FootnoteNumber { number } => assert_eq!(*number, 42),
755                other => panic!("Expected numeric footnote, got {other:?}"),
756            },
757            _ => panic!("Expected reference"),
758        }
759    }
760
761    #[test]
762    fn reference_parses_citation_locator() {
763        let nodes = parse_inlines("[@doe2024; @smith2023, pp. 45-46,47]");
764        match &nodes[0] {
765            InlineNode::Reference { data, .. } => match &data.reference_type {
766                ReferenceType::Citation(citation_data) => {
767                    assert_eq!(
768                        citation_data.keys,
769                        vec!["doe2024".to_string(), "smith2023".to_string()]
770                    );
771                    let locator = citation_data.locator.as_ref().expect("expected locator");
772                    assert!(matches!(locator.format, PageFormat::Pp));
773                    assert_eq!(locator.ranges.len(), 2);
774                    assert_eq!(locator.ranges[0].start, 45);
775                    assert_eq!(locator.ranges[0].end, Some(46));
776                    assert_eq!(locator.ranges[1].start, 47);
777                    assert!(locator.ranges[1].end.is_none());
778                }
779                other => panic!("Expected citation, got {other:?}"),
780            },
781            _ => panic!("Expected reference"),
782        }
783    }
784
785    #[test]
786    fn reference_detects_general_and_not_sure() {
787        let general = parse_inlines("[Section Title]");
788        let unsure = parse_inlines("[!!!]");
789        match &general[0] {
790            InlineNode::Reference { data, .. } => match &data.reference_type {
791                ReferenceType::General { target } => assert_eq!(target, "Section Title"),
792                other => panic!("Expected general reference, got {other:?}"),
793            },
794            _ => panic!("Expected reference"),
795        }
796        match &unsure[0] {
797            InlineNode::Reference { data, .. } => {
798                assert!(matches!(data.reference_type, ReferenceType::NotSure));
799            }
800            _ => panic!("Expected reference"),
801        }
802    }
803
804    fn annotate_strong(node: InlineNode) -> InlineNode {
805        match node {
806            InlineNode::Strong {
807                mut content,
808                annotations,
809            } => {
810                let mut annotated = vec![InlineNode::plain("[strong]".into())];
811                annotated.append(&mut content);
812                InlineNode::Strong {
813                    content: annotated,
814                    annotations,
815                }
816            }
817            other => other,
818        }
819    }
820
821    #[test]
822    fn post_process_callback_transforms_node() {
823        let parser = InlineParser::new().with_post_processor(InlineKind::Strong, annotate_strong);
824        let nodes = parser.parse("*bold*");
825        assert_eq!(nodes.len(), 1);
826        match &nodes[0] {
827            InlineNode::Strong { content, .. } => {
828                assert_eq!(content[0], InlineNode::plain("[strong]".into()));
829                assert_eq!(content[1], InlineNode::plain("bold".into()));
830            }
831            other => panic!("Unexpected inline node: {other:?}"),
832        }
833    }
834
835    #[test]
836    fn escaped_tokens_are_literal() {
837        let nodes = parse_inlines("\\*literal\\*");
838        assert_eq!(nodes, vec![InlineNode::plain("*literal*".into())]);
839    }
840
841    #[test]
842    fn backslash_before_alphanumeric_preserved() {
843        let nodes = parse_inlines("C:\\Users\\name");
844        assert_eq!(nodes, vec![InlineNode::plain("C:\\Users\\name".into())]);
845    }
846
847    #[test]
848    fn escape_works_in_paths() {
849        let nodes = parse_inlines("Path: C:\\\\Users\\\\name");
850        assert_eq!(
851            nodes,
852            vec![InlineNode::plain("Path: C:\\Users\\name".into())]
853        );
854    }
855
856    #[test]
857    fn arithmetic_not_parsed_as_inline() {
858        let nodes = parse_inlines("7 * 8");
859        assert_eq!(nodes, vec![InlineNode::plain("7 * 8".into())]);
860    }
861
862    #[test]
863    fn word_boundary_start_invalid() {
864        let nodes = parse_inlines("word*s*");
865        assert_eq!(nodes, vec![InlineNode::plain("word*s*".into())]);
866    }
867
868    #[test]
869    fn multiple_arithmetic_expressions() {
870        let nodes = parse_inlines("Calculate 7 * 8 + 3 * 4");
871        assert_eq!(
872            nodes,
873            vec![InlineNode::plain("Calculate 7 * 8 + 3 * 4".into())]
874        );
875    }
876
877    #[test]
878    fn inline_node_annotations_empty_by_default() {
879        let nodes = parse_inlines("*bold* text");
880        assert_eq!(nodes.len(), 2);
881        assert!(nodes[0].annotations().is_empty());
882        assert!(nodes[1].annotations().is_empty());
883    }
884
885    #[test]
886    fn with_annotation_adds_annotation_to_node() {
887        use crate::lex::ast::elements::{Annotation, Label};
888
889        let annotation = Annotation::marker(Label::new("test".to_string()));
890        let node = InlineNode::plain("text".into()).with_annotation(annotation.clone());
891
892        assert_eq!(node.annotations().len(), 1);
893        assert_eq!(node.annotations()[0].data.label.value, "test");
894    }
895
896    #[test]
897    fn with_annotations_adds_multiple_annotations() {
898        use crate::lex::ast::elements::{Annotation, Label, Parameter};
899
900        let anno1 = Annotation::marker(Label::new("doc.data".to_string()));
901        let anno2 = Annotation::with_parameters(
902            Label::new("test".to_string()),
903            vec![Parameter::new("key".to_string(), "value".to_string())],
904        );
905
906        let node = InlineNode::math("x + y".into()).with_annotations(vec![anno1, anno2]);
907
908        assert_eq!(node.annotations().len(), 2);
909        assert_eq!(node.annotations()[0].data.label.value, "doc.data");
910        assert_eq!(node.annotations()[1].data.label.value, "test");
911    }
912
913    #[test]
914    fn annotations_mut_allows_modification() {
915        use crate::lex::ast::elements::{Annotation, Label};
916
917        let mut node = InlineNode::code("code".into());
918        assert!(node.annotations().is_empty());
919
920        let annotation = Annotation::marker(Label::new("highlighted".to_string()));
921        node.annotations_mut().push(annotation);
922
923        assert_eq!(node.annotations().len(), 1);
924        assert_eq!(node.annotations()[0].data.label.value, "highlighted");
925    }
926
927    #[test]
928    fn post_processor_can_add_annotations() {
929        use crate::lex::ast::elements::{Annotation, Label, Parameter};
930
931        fn add_mathml_annotation(node: InlineNode) -> InlineNode {
932            match node {
933                InlineNode::Math {
934                    text,
935                    mut annotations,
936                } => {
937                    let anno = Annotation::with_parameters(
938                        Label::new("doc.data".to_string()),
939                        vec![Parameter::new("type".to_string(), "mathml".to_string())],
940                    );
941                    annotations.push(anno);
942                    InlineNode::Math { text, annotations }
943                }
944                other => other,
945            }
946        }
947
948        let parser =
949            InlineParser::new().with_post_processor(InlineKind::Math, add_mathml_annotation);
950        let nodes = parser.parse("#x + y#");
951
952        assert_eq!(nodes.len(), 1);
953        match &nodes[0] {
954            InlineNode::Math { text, annotations } => {
955                assert_eq!(text, "x + y");
956                assert_eq!(annotations.len(), 1);
957                assert_eq!(annotations[0].data.label.value, "doc.data");
958                assert_eq!(annotations[0].data.parameters.len(), 1);
959                assert_eq!(annotations[0].data.parameters[0].key, "type");
960                assert_eq!(annotations[0].data.parameters[0].value, "mathml");
961            }
962            other => panic!("Expected math node, got {other:?}"),
963        }
964    }
965}