Skip to main content

lex_core/lex/inlines/
parser.rs

1//! Inline parser implementation
2//!
3//!     Inline parsing is done by a declarative engine that will process each element declaration.
4//!     For some, this is a flat transformation (i.e. it only wraps up the text into a node, as
5//!     in bold or italic). Others are more involved, as in references, in which the engine will
6//!     execute a callback with the text content and return a node.
7//!
8//!     This solves elegantly the fact that most inlines are simple and very much the same
9//!     structure, while allowing for more complex ones to handle their specific needs.
10//!
11//!     The parser processes inline elements in order, matching start tokens and finding
12//!     corresponding end tokens. Simple elements like bold and italic are flat transformations,
13//!     while complex elements like references use post-processing callbacks.
14//!
15//! Simple (Flat) Inline Elements
16//!
17//!     Most inline elements are simple transformations that just wrap text content:
18//!
19//!     - **Strong** (*text*): Wraps content in `InlineNode::Strong(children)`
20//!     - **Emphasis** (_text_): Wraps content in `InlineNode::Emphasis(children)`
21//!     - **Code** (`text`): Wraps literal text in `InlineNode::Code(string)` - no nested parsing
22//!     - **Math** (#formula#): Wraps literal text in `InlineNode::Math(string)` - no nested parsing
23//!
24//!     These are defined in the `default_specs()` function with just start/end tokens and whether
25//!     they're literal (no nested inline parsing inside).
26//!
27//! Complex Inline Elements (with Post-Processing)
28//!
29//!     Some inline elements need additional logic after parsing:
30//!
31//!     - **References** ([target]): After wrapping content, the `classify_reference_node` callback
32//!       analyzes the target text to determine the reference type (URL, citation, footnote, etc.)
33//!       and creates the appropriate `ReferenceType` variant.
34//!
35//!     Example: `[https://example.com]` is classified as a URL reference, while `[@doe2024]` becomes
36//!     a citation reference.
37//!
38//! Adding New Inline Types
39//!
40//!     To add a new inline element type:
41//!
42//!     1. Add a variant to `InlineKind` enum in [crate::lex::token::inline]
43//!     2. Add a variant to `InlineNode` in the ast module
44//!     3. Add an `InlineSpec` to `default_specs()` with start/end tokens
45//!     4. If complex logic is needed, implement a post-processor callback:
46//!        ```
47//!        fn my_post_processor(node: InlineNode) -> InlineNode {
48//!            // Transform the node based on its content
49//!            node
50//!        }
51//!        ```
52//!     5. Attach the callback via `.with_post_processor(InlineKind::MyType, my_post_processor)`
53//!
54//! Extension Pattern
55//!
56//!     The parser can be customized by creating an `InlineParser` instance and attaching
57//!     post-processors for specific inline types:
58//!     ```
59//!     let parser = InlineParser::new()
60//!         .with_post_processor(InlineKind::Strong, my_custom_processor);
61//!     let result = parser.parse("*text*");
62//!     ```
63
64use super::references::classify_reference_node;
65use crate::lex::ast::elements::inlines::{InlineContent, InlineNode, ReferenceInline};
66use crate::lex::escape::unescape_inline_char;
67use crate::lex::token::InlineKind;
68use once_cell::sync::Lazy;
69use std::collections::HashMap;
70
71static DEFAULT_INLINE_PARSER: Lazy<InlineParser> = Lazy::new(InlineParser::new);
72
73/// Parse inline nodes from a raw string using the default inline parser configuration.
74pub fn parse_inlines(text: &str) -> InlineContent {
75    DEFAULT_INLINE_PARSER.parse(text)
76}
77
78/// Parse inline nodes using a custom parser configuration.
79pub fn parse_inlines_with_parser(text: &str, parser: &InlineParser) -> InlineContent {
80    parser.parse(text)
81}
82
83/// Optional transformation applied to a parsed inline node.
84pub type InlinePostProcessor = fn(InlineNode) -> InlineNode;
85
86/// Specification for an inline element type
87///
88/// Defines how to parse and process a specific inline element. Each spec includes:
89/// - The kind of inline element (from [InlineKind])
90/// - Start and end tokens (single characters)
91/// - Whether content is literal (no nested inline parsing)
92/// - Optional post-processing callback for complex transformations
93#[derive(Clone)]
94pub struct InlineSpec {
95    pub kind: InlineKind,
96    pub start_token: char,
97    pub end_token: char,
98    pub literal: bool,
99    pub post_process: Option<InlinePostProcessor>,
100}
101
102impl InlineSpec {
103    fn apply_post_process(&self, node: InlineNode) -> InlineNode {
104        if let Some(callback) = self.post_process {
105            callback(node)
106        } else {
107            node
108        }
109    }
110}
111
112#[derive(Clone)]
113pub struct InlineParser {
114    specs: Vec<InlineSpec>,
115    token_map: HashMap<char, usize>,
116}
117
118impl InlineParser {
119    pub fn new() -> Self {
120        Self::from_specs(default_specs())
121    }
122
123    /// Attach a post-processing callback to a specific inline kind.
124    pub fn with_post_processor(mut self, kind: InlineKind, processor: InlinePostProcessor) -> Self {
125        if let Some(spec) = self.specs.iter_mut().find(|spec| spec.kind == kind) {
126            spec.post_process = Some(processor);
127        }
128        self
129    }
130
131    pub fn parse(&self, text: &str) -> InlineContent {
132        parse_with(self, text)
133    }
134
135    fn from_specs(specs: Vec<InlineSpec>) -> Self {
136        let mut token_map = HashMap::new();
137        for (index, spec) in specs.iter().enumerate() {
138            token_map.insert(spec.start_token, index);
139        }
140        Self { specs, token_map }
141    }
142
143    fn spec(&self, index: usize) -> &InlineSpec {
144        &self.specs[index]
145    }
146
147    fn spec_index_for_start(&self, ch: char) -> Option<usize> {
148        self.token_map.get(&ch).copied()
149    }
150
151    fn spec_count(&self) -> usize {
152        self.specs.len()
153    }
154}
155
156impl Default for InlineParser {
157    fn default() -> Self {
158        InlineParser::new()
159    }
160}
161
162fn default_specs() -> Vec<InlineSpec> {
163    vec![
164        InlineSpec {
165            kind: InlineKind::Strong,
166            start_token: '*',
167            end_token: '*',
168            literal: false,
169            post_process: None,
170        },
171        InlineSpec {
172            kind: InlineKind::Emphasis,
173            start_token: '_',
174            end_token: '_',
175            literal: false,
176            post_process: None,
177        },
178        InlineSpec {
179            kind: InlineKind::Code,
180            start_token: '`',
181            end_token: '`',
182            literal: true,
183            post_process: None,
184        },
185        InlineSpec {
186            kind: InlineKind::Math,
187            start_token: '#',
188            end_token: '#',
189            literal: true,
190            post_process: None,
191        },
192        InlineSpec {
193            kind: InlineKind::Reference,
194            start_token: '[',
195            end_token: ']',
196            literal: true,
197            post_process: Some(classify_reference_node),
198        },
199    ]
200}
201
202fn parse_with(parser: &InlineParser, text: &str) -> InlineContent {
203    let chars: Vec<char> = text.chars().collect();
204    if chars.is_empty() {
205        return Vec::new();
206    }
207
208    let mut stack = vec![InlineFrame::root()];
209    let mut blocked = BlockedClosings::new(parser.spec_count());
210
211    let mut i = 0;
212    while i < chars.len() {
213        let ch = chars[i];
214        let prev = if i == 0 { None } else { Some(chars[i - 1]) };
215        let next = if i + 1 < chars.len() {
216            Some(chars[i + 1])
217        } else {
218            None
219        };
220
221        // Escape processing only applies outside literal contexts (code/math).
222        // Inside literal elements, backslash is just a regular character.
223        if ch == '\\' && !stack.last().unwrap().is_literal(parser) {
224            match unescape_inline_char(next) {
225                crate::lex::escape::EscapeAction::Escape(escaped) => {
226                    stack.last_mut().unwrap().push_char(escaped);
227                    i += 2;
228                    continue;
229                }
230                crate::lex::escape::EscapeAction::Literal => {
231                    stack.last_mut().unwrap().push_char('\\');
232                    if next.is_none() {
233                        break;
234                    }
235                    i += 1;
236                    continue;
237                }
238            }
239        }
240
241        let mut consumed = false;
242        if let Some(spec_index) = stack.last().unwrap().spec_index {
243            let spec = parser.spec(spec_index);
244            if ch == spec.end_token {
245                if blocked.consume(spec_index) {
246                    // Literal closing paired to a disallowed nested start.
247                } else if is_valid_end(prev, next, spec) {
248                    let mut frame = stack.pop().unwrap();
249                    frame.flush_buffer();
250                    let had_content = frame.has_content();
251                    if !had_content {
252                        let parent = stack.last_mut().unwrap();
253                        parent.push_char(spec.start_token);
254                        parent.push_char(spec.end_token);
255                    } else {
256                        let node = frame.into_node(spec);
257                        let node = spec.apply_post_process(node);
258                        stack.last_mut().unwrap().push_node(node);
259                    }
260                    consumed = true;
261                }
262            }
263        }
264
265        if !consumed && !stack.last().unwrap().is_literal(parser) {
266            if let Some(spec_index) = parser.spec_index_for_start(ch) {
267                let spec = parser.spec(spec_index);
268                if is_valid_start(prev, next, spec, parser) {
269                    if stack
270                        .iter()
271                        .any(|frame| frame.spec_index == Some(spec_index))
272                    {
273                        blocked.increment(spec_index);
274                    } else {
275                        stack.last_mut().unwrap().flush_buffer();
276                        stack.push(InlineFrame::new(spec_index));
277                        consumed = true;
278                    }
279                }
280            }
281        }
282
283        if !consumed {
284            stack.last_mut().unwrap().push_char(ch);
285        }
286
287        i += 1;
288    }
289
290    if let Some(frame) = stack.last_mut() {
291        frame.flush_buffer();
292    }
293
294    while stack.len() > 1 {
295        let mut frame = stack.pop().unwrap();
296        frame.flush_buffer();
297        let spec_index = frame
298            .spec_index
299            .expect("non-root stack frame must have a spec");
300        let spec = parser.spec(spec_index);
301        let parent = stack.last_mut().unwrap();
302        parent.push_char(spec.start_token);
303        for child in frame.children {
304            parent.push_node(child);
305        }
306    }
307
308    let mut root = stack.pop().unwrap();
309    root.flush_buffer();
310    root.children
311}
312
313struct InlineFrame {
314    spec_index: Option<usize>,
315    buffer: String,
316    children: InlineContent,
317}
318
319impl InlineFrame {
320    fn root() -> Self {
321        Self {
322            spec_index: None,
323            buffer: String::new(),
324            children: Vec::new(),
325        }
326    }
327
328    fn new(spec_index: usize) -> Self {
329        Self {
330            spec_index: Some(spec_index),
331            buffer: String::new(),
332            children: Vec::new(),
333        }
334    }
335
336    fn has_content(&self) -> bool {
337        !self.buffer.is_empty() || !self.children.is_empty()
338    }
339
340    fn push_char(&mut self, ch: char) {
341        self.buffer.push(ch);
342    }
343
344    fn flush_buffer(&mut self) {
345        if self.buffer.is_empty() {
346            return;
347        }
348        let text = std::mem::take(&mut self.buffer);
349        if let Some(InlineNode::Plain { text: existing, .. }) = self.children.last_mut() {
350            existing.push_str(&text);
351        } else {
352            self.children.push(InlineNode::Plain {
353                text,
354                annotations: Vec::new(),
355            });
356        }
357    }
358
359    fn push_node(&mut self, node: InlineNode) {
360        self.flush_buffer();
361        match node {
362            InlineNode::Plain { text, annotations } => {
363                if text.is_empty() {
364                    return;
365                }
366                if let Some(InlineNode::Plain { text: existing, .. }) = self.children.last_mut() {
367                    existing.push_str(&text);
368                    // Note: annotations from the merged node are discarded
369                    // This is intentional as Plain nodes are typically created without annotations
370                } else {
371                    self.children.push(InlineNode::Plain { text, annotations });
372                }
373            }
374            other => self.children.push(other),
375        }
376    }
377
378    fn into_node(self, spec: &InlineSpec) -> InlineNode {
379        match spec.kind {
380            InlineKind::Strong => InlineNode::Strong {
381                content: self.children,
382                annotations: Vec::new(),
383            },
384            InlineKind::Emphasis => InlineNode::Emphasis {
385                content: self.children,
386                annotations: Vec::new(),
387            },
388            InlineKind::Code => InlineNode::Code {
389                text: flatten_literal(self.children),
390                annotations: Vec::new(),
391            },
392            InlineKind::Math => InlineNode::Math {
393                text: flatten_literal(self.children),
394                annotations: Vec::new(),
395            },
396            InlineKind::Reference => InlineNode::Reference {
397                data: ReferenceInline::new(flatten_literal(self.children)),
398                annotations: Vec::new(),
399            },
400        }
401    }
402
403    fn is_literal(&self, parser: &InlineParser) -> bool {
404        self.spec_index
405            .map(|index| parser.spec(index).literal)
406            .unwrap_or(false)
407    }
408}
409
410fn flatten_literal(children: InlineContent) -> String {
411    let mut text = String::new();
412    for node in children {
413        match node {
414            InlineNode::Plain { text: segment, .. } => text.push_str(&segment),
415            _ => fatal_literal_content(),
416        }
417    }
418    text
419}
420
421fn fatal_literal_content() -> ! {
422    panic!("Literal inline nodes must not contain nested nodes");
423}
424
425struct BlockedClosings {
426    counts: Vec<usize>,
427}
428
429impl BlockedClosings {
430    fn new(spec_len: usize) -> Self {
431        Self {
432            counts: vec![0; spec_len],
433        }
434    }
435
436    fn increment(&mut self, spec_index: usize) {
437        if let Some(slot) = self.counts.get_mut(spec_index) {
438            *slot += 1;
439        }
440    }
441
442    fn consume(&mut self, spec_index: usize) -> bool {
443        if let Some(slot) = self.counts.get_mut(spec_index) {
444            if *slot > 0 {
445                *slot -= 1;
446                return true;
447            }
448        }
449        false
450    }
451}
452
453fn is_valid_start(
454    prev: Option<char>,
455    next: Option<char>,
456    spec: &InlineSpec,
457    parser: &InlineParser,
458) -> bool {
459    if spec.literal {
460        // Literal elements (code, math, reference) accept any non-whitespace content.
461        // This allows code/math to start with \, {, (, *, etc.
462        !is_word(prev) && next.is_some_and(|c| !c.is_whitespace())
463    } else {
464        // Non-literal elements (strong, emphasis) require a word char OR another
465        // inline start marker after the opening token. Allowing an adjacent marker
466        // enables directly-nested formatting such as _*foo*_ and *_foo_*.
467        !is_word(prev)
468            && next.is_some_and(|c| is_word(Some(c)) || parser.spec_index_for_start(c).is_some())
469    }
470}
471
472fn is_valid_end(prev: Option<char>, next: Option<char>, spec: &InlineSpec) -> bool {
473    let inside_valid = if spec.literal {
474        prev.is_some()
475    } else {
476        matches!(prev, Some(ch) if !ch.is_whitespace())
477    };
478
479    inside_valid && !is_word(next)
480}
481
482fn is_word(ch: Option<char>) -> bool {
483    ch.map(|c| c.is_alphanumeric()).unwrap_or(false)
484}
485
486#[cfg(test)]
487mod tests {
488    use super::*;
489    use crate::lex::inlines::{InlineNode, PageFormat, ReferenceType};
490
491    #[test]
492    fn parses_plain_text() {
493        let nodes = parse_inlines("hello world");
494        assert_eq!(
495            nodes,
496            vec![InlineNode::Plain {
497                text: "hello world".into(),
498                annotations: Vec::new()
499            }]
500        );
501    }
502
503    #[test]
504    fn parses_strong_and_emphasis() {
505        let nodes = parse_inlines("*strong _inner_* text");
506        assert_eq!(nodes.len(), 2);
507        match &nodes[0] {
508            InlineNode::Strong { content, .. } => {
509                assert_eq!(content.len(), 2);
510                assert_eq!(
511                    content[0],
512                    InlineNode::Plain {
513                        text: "strong ".into(),
514                        annotations: Vec::new()
515                    }
516                );
517                match &content[1] {
518                    InlineNode::Emphasis { content: inner, .. } => {
519                        assert_eq!(
520                            inner,
521                            &vec![InlineNode::Plain {
522                                text: "inner".into(),
523                                annotations: Vec::new()
524                            }]
525                        );
526                    }
527                    other => panic!("Unexpected child: {other:?}"),
528                }
529            }
530            other => panic!("Unexpected node: {other:?}"),
531        }
532        assert_eq!(
533            nodes[1],
534            InlineNode::Plain {
535                text: " text".into(),
536                annotations: Vec::new()
537            }
538        );
539    }
540
541    #[test]
542    fn nested_emphasis_inside_strong() {
543        let nodes = parse_inlines("*strong and _emphasis_* text");
544        assert_eq!(nodes.len(), 2);
545        match &nodes[0] {
546            InlineNode::Strong { content, .. } => {
547                assert_eq!(content.len(), 2);
548                assert_eq!(content[0], InlineNode::plain("strong and ".into()));
549                match &content[1] {
550                    InlineNode::Emphasis { content: inner, .. } => {
551                        assert_eq!(inner, &vec![InlineNode::plain("emphasis".into())]);
552                    }
553                    other => panic!("Unexpected child: {other:?}"),
554                }
555            }
556            _ => panic!("Expected strong node"),
557        }
558    }
559
560    #[test]
561    fn directly_nested_emphasis_wraps_strong() {
562        // _*foo*_ — emphasis directly wrapping strong, no intervening content
563        let nodes = parse_inlines("_*both*_");
564        assert_eq!(nodes.len(), 1);
565        match &nodes[0] {
566            InlineNode::Emphasis { content, .. } => {
567                assert_eq!(content.len(), 1);
568                match &content[0] {
569                    InlineNode::Strong { content: inner, .. } => {
570                        assert_eq!(inner, &vec![InlineNode::plain("both".into())]);
571                    }
572                    other => panic!("Expected Strong inside Emphasis, got: {other:?}"),
573                }
574            }
575            other => panic!("Expected Emphasis, got: {other:?}"),
576        }
577    }
578
579    #[test]
580    fn directly_nested_strong_wraps_emphasis() {
581        // *_foo_* — strong directly wrapping emphasis
582        let nodes = parse_inlines("*_inverted_*");
583        assert_eq!(nodes.len(), 1);
584        match &nodes[0] {
585            InlineNode::Strong { content, .. } => {
586                assert_eq!(content.len(), 1);
587                match &content[0] {
588                    InlineNode::Emphasis { content: inner, .. } => {
589                        assert_eq!(inner, &vec![InlineNode::plain("inverted".into())]);
590                    }
591                    other => panic!("Expected Emphasis inside Strong, got: {other:?}"),
592                }
593            }
594            other => panic!("Expected Strong, got: {other:?}"),
595        }
596    }
597
598    #[test]
599    fn directly_nested_multi_word() {
600        let nodes = parse_inlines("_*bold multiple words*_");
601        assert_eq!(nodes.len(), 1);
602        match &nodes[0] {
603            InlineNode::Emphasis { content, .. } => match &content[0] {
604                InlineNode::Strong { content: inner, .. } => {
605                    assert_eq!(
606                        inner,
607                        &vec![InlineNode::plain("bold multiple words".into())]
608                    );
609                }
610                other => panic!("Expected Strong, got: {other:?}"),
611            },
612            other => panic!("Expected Emphasis, got: {other:?}"),
613        }
614    }
615
616    #[test]
617    fn arithmetic_still_not_parsed_as_inline() {
618        // 7 * 8 — * surrounded by spaces should NOT start a strong.
619        // This guards against the adjacent-marker rule accidentally matching.
620        let nodes = parse_inlines("7 * 8");
621        assert_eq!(nodes, vec![InlineNode::plain("7 * 8".into())]);
622    }
623
624    #[test]
625    fn empty_markers_stay_literal() {
626        // Same-delimiter adjacency (**, __, ``) should stay literal, not
627        // produce empty inline elements — the adjacent-marker rule must not
628        // swallow the closing delimiter as "content".
629        let nodes = parse_inlines("a ** b __ c");
630        assert_eq!(nodes, vec![InlineNode::plain("a ** b __ c".into())]);
631    }
632
633    #[test]
634    fn code_is_literal() {
635        let nodes = parse_inlines("`a * literal _` text");
636        assert_eq!(nodes.len(), 2);
637        assert_eq!(nodes[0], InlineNode::code("a * literal _".into()));
638        assert_eq!(nodes[1], InlineNode::plain(" text".into()));
639    }
640
641    #[test]
642    fn math_is_literal() {
643        let nodes = parse_inlines("#x + y#");
644        assert_eq!(nodes, vec![InlineNode::math("x + y".into())]);
645    }
646
647    #[test]
648    fn code_preserves_backslashes() {
649        // Backslashes inside literal contexts (code) are preserved verbatim
650        let nodes = parse_inlines("`\\*text\\*`");
651        assert_eq!(nodes, vec![InlineNode::code("\\*text\\*".into())]);
652    }
653
654    #[test]
655    fn math_preserves_backslashes() {
656        // Backslashes inside literal contexts (math) are preserved verbatim
657        let nodes = parse_inlines("#\\alpha#");
658        assert_eq!(nodes, vec![InlineNode::math("\\alpha".into())]);
659    }
660
661    #[test]
662    fn unmatched_start_is_literal() {
663        let nodes = parse_inlines("prefix *text");
664        assert_eq!(nodes, vec![InlineNode::plain("prefix *text".into())]);
665    }
666
667    #[test]
668    fn unmatched_nested_preserves_children() {
669        let nodes = parse_inlines("*a _b_ c");
670        assert_eq!(nodes.len(), 3);
671        assert_eq!(nodes[0], InlineNode::plain("*a ".into()));
672        match &nodes[1] {
673            InlineNode::Emphasis { content, .. } => {
674                assert_eq!(content, &vec![InlineNode::plain("b".into())]);
675            }
676            other => panic!("Unexpected node: {other:?}"),
677        }
678        assert_eq!(nodes[2], InlineNode::plain(" c".into()));
679    }
680
681    #[test]
682    fn same_type_nesting_skips_inner_pair() {
683        let nodes = parse_inlines("*outer *inner* text*");
684        assert_eq!(nodes.len(), 1);
685        match &nodes[0] {
686            InlineNode::Strong { content, .. } => {
687                assert_eq!(
688                    content,
689                    &vec![InlineNode::plain("outer *inner* text".into())]
690                );
691            }
692            other => panic!("Unexpected node: {other:?}"),
693        }
694    }
695
696    #[test]
697    fn reference_detects_url() {
698        let nodes = parse_inlines("[https://example.com]");
699        match &nodes[0] {
700            InlineNode::Reference { data, .. } => match &data.reference_type {
701                ReferenceType::Url { target } => assert_eq!(target, "https://example.com"),
702                other => panic!("Expected URL reference, got {other:?}"),
703            },
704            other => panic!("Unexpected node: {other:?}"),
705        }
706    }
707
708    #[test]
709    fn reference_detects_tk_identifier() {
710        let nodes = parse_inlines("[TK-feature]");
711        match &nodes[0] {
712            InlineNode::Reference { data, .. } => match &data.reference_type {
713                ReferenceType::ToCome { identifier } => {
714                    assert_eq!(identifier.as_deref(), Some("feature"));
715                }
716                other => panic!("Expected TK reference, got {other:?}"),
717            },
718            other => panic!("Unexpected node: {other:?}"),
719        }
720    }
721
722    #[test]
723    fn reference_detects_citation_and_footnotes() {
724        let citation = parse_inlines("[@doe2024]");
725        let labeled = parse_inlines("[::note1]");
726        let numbered = parse_inlines("[42]");
727
728        match &citation[0] {
729            InlineNode::Reference { data, .. } => match &data.reference_type {
730                ReferenceType::Citation(citation_data) => {
731                    assert_eq!(citation_data.keys, vec!["doe2024".to_string()]);
732                    assert!(citation_data.locator.is_none());
733                }
734                other => panic!("Expected citation, got {other:?}"),
735            },
736            _ => panic!("Expected reference"),
737        }
738        match &labeled[0] {
739            InlineNode::Reference { data, .. } => match &data.reference_type {
740                ReferenceType::AnnotationReference { label } => assert_eq!(label, "note1"),
741                other => panic!("Expected annotation reference, got {other:?}"),
742            },
743            _ => panic!("Expected reference"),
744        }
745        match &numbered[0] {
746            InlineNode::Reference { data, .. } => match &data.reference_type {
747                ReferenceType::FootnoteNumber { number } => assert_eq!(*number, 42),
748                other => panic!("Expected numeric footnote, got {other:?}"),
749            },
750            _ => panic!("Expected reference"),
751        }
752    }
753
754    #[test]
755    fn reference_parses_citation_locator() {
756        let nodes = parse_inlines("[@doe2024; @smith2023, pp. 45-46,47]");
757        match &nodes[0] {
758            InlineNode::Reference { data, .. } => match &data.reference_type {
759                ReferenceType::Citation(citation_data) => {
760                    assert_eq!(
761                        citation_data.keys,
762                        vec!["doe2024".to_string(), "smith2023".to_string()]
763                    );
764                    let locator = citation_data.locator.as_ref().expect("expected locator");
765                    assert!(matches!(locator.format, PageFormat::Pp));
766                    assert_eq!(locator.ranges.len(), 2);
767                    assert_eq!(locator.ranges[0].start, 45);
768                    assert_eq!(locator.ranges[0].end, Some(46));
769                    assert_eq!(locator.ranges[1].start, 47);
770                    assert!(locator.ranges[1].end.is_none());
771                }
772                other => panic!("Expected citation, got {other:?}"),
773            },
774            _ => panic!("Expected reference"),
775        }
776    }
777
778    #[test]
779    fn reference_detects_general_and_not_sure() {
780        let general = parse_inlines("[Section Title]");
781        let unsure = parse_inlines("[!!!]");
782        match &general[0] {
783            InlineNode::Reference { data, .. } => match &data.reference_type {
784                ReferenceType::General { target } => assert_eq!(target, "Section Title"),
785                other => panic!("Expected general reference, got {other:?}"),
786            },
787            _ => panic!("Expected reference"),
788        }
789        match &unsure[0] {
790            InlineNode::Reference { data, .. } => {
791                assert!(matches!(data.reference_type, ReferenceType::NotSure));
792            }
793            _ => panic!("Expected reference"),
794        }
795    }
796
797    fn annotate_strong(node: InlineNode) -> InlineNode {
798        match node {
799            InlineNode::Strong {
800                mut content,
801                annotations,
802            } => {
803                let mut annotated = vec![InlineNode::plain("[strong]".into())];
804                annotated.append(&mut content);
805                InlineNode::Strong {
806                    content: annotated,
807                    annotations,
808                }
809            }
810            other => other,
811        }
812    }
813
814    #[test]
815    fn post_process_callback_transforms_node() {
816        let parser = InlineParser::new().with_post_processor(InlineKind::Strong, annotate_strong);
817        let nodes = parser.parse("*bold*");
818        assert_eq!(nodes.len(), 1);
819        match &nodes[0] {
820            InlineNode::Strong { content, .. } => {
821                assert_eq!(content[0], InlineNode::plain("[strong]".into()));
822                assert_eq!(content[1], InlineNode::plain("bold".into()));
823            }
824            other => panic!("Unexpected inline node: {other:?}"),
825        }
826    }
827
828    #[test]
829    fn escaped_tokens_are_literal() {
830        let nodes = parse_inlines("\\*literal\\*");
831        assert_eq!(nodes, vec![InlineNode::plain("*literal*".into())]);
832    }
833
834    #[test]
835    fn backslash_before_alphanumeric_preserved() {
836        let nodes = parse_inlines("C:\\Users\\name");
837        assert_eq!(nodes, vec![InlineNode::plain("C:\\Users\\name".into())]);
838    }
839
840    #[test]
841    fn escape_works_in_paths() {
842        let nodes = parse_inlines("Path: C:\\\\Users\\\\name");
843        assert_eq!(
844            nodes,
845            vec![InlineNode::plain("Path: C:\\Users\\name".into())]
846        );
847    }
848
849    #[test]
850    fn arithmetic_not_parsed_as_inline() {
851        let nodes = parse_inlines("7 * 8");
852        assert_eq!(nodes, vec![InlineNode::plain("7 * 8".into())]);
853    }
854
855    #[test]
856    fn word_boundary_start_invalid() {
857        let nodes = parse_inlines("word*s*");
858        assert_eq!(nodes, vec![InlineNode::plain("word*s*".into())]);
859    }
860
861    #[test]
862    fn multiple_arithmetic_expressions() {
863        let nodes = parse_inlines("Calculate 7 * 8 + 3 * 4");
864        assert_eq!(
865            nodes,
866            vec![InlineNode::plain("Calculate 7 * 8 + 3 * 4".into())]
867        );
868    }
869
870    #[test]
871    fn inline_node_annotations_empty_by_default() {
872        let nodes = parse_inlines("*bold* text");
873        assert_eq!(nodes.len(), 2);
874        assert!(nodes[0].annotations().is_empty());
875        assert!(nodes[1].annotations().is_empty());
876    }
877
878    #[test]
879    fn with_annotation_adds_annotation_to_node() {
880        use crate::lex::ast::elements::{Annotation, Label};
881
882        let annotation = Annotation::marker(Label::new("test".to_string()));
883        let node = InlineNode::plain("text".into()).with_annotation(annotation.clone());
884
885        assert_eq!(node.annotations().len(), 1);
886        assert_eq!(node.annotations()[0].data.label.value, "test");
887    }
888
889    #[test]
890    fn with_annotations_adds_multiple_annotations() {
891        use crate::lex::ast::elements::{Annotation, Label, Parameter};
892
893        let anno1 = Annotation::marker(Label::new("doc.data".to_string()));
894        let anno2 = Annotation::with_parameters(
895            Label::new("test".to_string()),
896            vec![Parameter::new("key".to_string(), "value".to_string())],
897        );
898
899        let node = InlineNode::math("x + y".into()).with_annotations(vec![anno1, anno2]);
900
901        assert_eq!(node.annotations().len(), 2);
902        assert_eq!(node.annotations()[0].data.label.value, "doc.data");
903        assert_eq!(node.annotations()[1].data.label.value, "test");
904    }
905
906    #[test]
907    fn annotations_mut_allows_modification() {
908        use crate::lex::ast::elements::{Annotation, Label};
909
910        let mut node = InlineNode::code("code".into());
911        assert!(node.annotations().is_empty());
912
913        let annotation = Annotation::marker(Label::new("highlighted".to_string()));
914        node.annotations_mut().push(annotation);
915
916        assert_eq!(node.annotations().len(), 1);
917        assert_eq!(node.annotations()[0].data.label.value, "highlighted");
918    }
919
920    #[test]
921    fn post_processor_can_add_annotations() {
922        use crate::lex::ast::elements::{Annotation, Label, Parameter};
923
924        fn add_mathml_annotation(node: InlineNode) -> InlineNode {
925            match node {
926                InlineNode::Math {
927                    text,
928                    mut annotations,
929                } => {
930                    let anno = Annotation::with_parameters(
931                        Label::new("doc.data".to_string()),
932                        vec![Parameter::new("type".to_string(), "mathml".to_string())],
933                    );
934                    annotations.push(anno);
935                    InlineNode::Math { text, annotations }
936                }
937                other => other,
938            }
939        }
940
941        let parser =
942            InlineParser::new().with_post_processor(InlineKind::Math, add_mathml_annotation);
943        let nodes = parser.parse("#x + y#");
944
945        assert_eq!(nodes.len(), 1);
946        match &nodes[0] {
947            InlineNode::Math { text, annotations } => {
948                assert_eq!(text, "x + y");
949                assert_eq!(annotations.len(), 1);
950                assert_eq!(annotations[0].data.label.value, "doc.data");
951                assert_eq!(annotations[0].data.parameters.len(), 1);
952                assert_eq!(annotations[0].data.parameters[0].key, "type");
953                assert_eq!(annotations[0].data.parameters[0].value, "mathml");
954            }
955            other => panic!("Expected math node, got {other:?}"),
956        }
957    }
958}