panache_parser/parser/inlines/
core.rs

1//! Recursive emphasis parsing using Pandoc's algorithm.
2//!
3//! This module implements emphasis/strong emphasis parsing using a recursive
4//! descent approach based on Pandoc's Haskell implementation in
5//! `Readers/Markdown.hs:L1662-L1722`.
6//!
7//! **Key algorithm**: Left-to-right, greedy, first-match wins
8//! 1. Parse text left-to-right
9//! 2. When we see delimiters, try to parse emphasis (look for matching closer)
10//! 3. If successful, emit emphasis node and continue from after closer
11//! 4. If failed (no closer found), emit delimiter as literal and continue
12//! 5. Nested emphasis is handled naturally by recursive parsing of content
13//!
14//! **Example**: `*foo **bar* baz**`
15//! - See `*`, try to parse EMPH
16//! - Parse content: see `**`, try to parse STRONG
17//! - STRONG finds closer `**` at end → succeeds, emits STRONG[bar* baz]
18//! - Outer `*` can't find closer (all delimiters consumed) → fails, emits `*foo` as literal
19//! - Result: `*foo` + STRONG[bar* baz]
20//!
21//! This matches Pandoc's behavior exactly.
22
23use crate::config::Config;
24use crate::syntax::SyntaxKind;
25use rowan::GreenNodeBuilder;
26
27// Import inline element parsers from sibling modules
28use super::bookdown::{
29    try_parse_bookdown_definition, try_parse_bookdown_reference, try_parse_bookdown_text_reference,
30};
31use super::bracketed_spans::{emit_bracketed_span, try_parse_bracketed_span};
32use super::citations::{
33    emit_bare_citation, emit_bracketed_citation, try_parse_bare_citation,
34    try_parse_bracketed_citation,
35};
36use super::code_spans::{emit_code_span, try_parse_code_span};
37use super::emoji::{emit_emoji, try_parse_emoji};
38use super::escapes::{EscapeType, emit_escape, try_parse_escape};
39use super::inline_executable::{emit_inline_executable, try_parse_inline_executable};
40use super::inline_footnotes::{
41    emit_footnote_reference, emit_inline_footnote, try_parse_footnote_reference,
42    try_parse_inline_footnote,
43};
44use super::latex::{parse_latex_command, try_parse_latex_command};
45use super::links::{
46    emit_autolink, emit_bare_uri_link, emit_inline_image, emit_inline_link, emit_reference_image,
47    emit_reference_link, try_parse_autolink, try_parse_bare_uri, try_parse_inline_image,
48    try_parse_inline_link, try_parse_reference_image, try_parse_reference_link,
49};
50use super::math::{
51    emit_display_math, emit_display_math_environment, emit_double_backslash_display_math,
52    emit_double_backslash_inline_math, emit_gfm_inline_math, emit_inline_math,
53    emit_single_backslash_display_math, emit_single_backslash_inline_math, try_parse_display_math,
54    try_parse_double_backslash_display_math, try_parse_double_backslash_inline_math,
55    try_parse_gfm_inline_math, try_parse_inline_math, try_parse_math_environment,
56    try_parse_single_backslash_display_math, try_parse_single_backslash_inline_math,
57};
58use super::native_spans::{emit_native_span, try_parse_native_span};
59use super::raw_inline::is_raw_inline;
60use super::shortcodes::{emit_shortcode, try_parse_shortcode};
61use super::strikeout::{emit_strikeout, try_parse_strikeout};
62use super::subscript::{emit_subscript, try_parse_subscript};
63use super::superscript::{emit_superscript, try_parse_superscript};
64
65/// Parse inline text using the recursive emphasis algorithm.
66///
67/// This is the main entry point for parsing inline content with Pandoc-style
68/// recursive emphasis handling. It uses a greedy left-to-right, first-match-wins
69/// approach that matches Pandoc's behavior exactly.
70///
71/// **Algorithm**:
72/// 1. Parse text left-to-right trying each inline element type in precedence order
73/// 2. When we see `*` or `_`, try to parse emphasis recursively
74/// 3. Nested emphasis naturally consumes delimiters before outer matches
75/// 4. All inline elements (code, links, math, etc.) are parsed on-the-fly
76///
77/// # Arguments
78/// * `text` - The inline text to parse
79/// * `config` - Configuration for extensions and formatting
80/// * `builder` - The CST builder to emit nodes to
81pub fn parse_inline_text_recursive(builder: &mut GreenNodeBuilder, text: &str, config: &Config) {
82    log::debug!(
83        "Recursive inline parsing: {:?} ({} bytes)",
84        &text[..text.len().min(40)],
85        text.len()
86    );
87
88    parse_inline_range(text, 0, text.len(), config, builder);
89
90    log::debug!("Recursive inline parsing complete");
91}
92
93/// Parse inline elements from text content.
94/// This is a standalone function used for recursive inline parsing within blocks.
95///
96/// The `allow_reference_links` parameter is accepted for compatibility but not currently used.
97/// Set to `false` in nested contexts (inside link text, image alt, spans) to prevent recursive parsing.
98pub fn parse_inline_text(
99    builder: &mut GreenNodeBuilder,
100    text: &str,
101    config: &Config,
102    _allow_reference_links: bool,
103) {
104    log::trace!(
105        "Parsing inline text (recursive): {:?} ({} bytes)",
106        &text[..text.len().min(40)],
107        text.len()
108    );
109
110    // Use recursive parsing with Pandoc's algorithm for emphasis
111    parse_inline_text_recursive(builder, text, config);
112}
113
114/// Try to parse emphasis starting at the given position.
115///
116/// This is the entry point for recursive emphasis parsing, equivalent to
117/// Pandoc's `enclosure` function.
118///
119/// Returns Some((bytes_consumed, delim_count)) if emphasis was successfully parsed,
120/// or None if the delimiter should be treated as literal text.
121/// When returning None, the delim_count tells the caller how many delimiter
122/// characters to skip (to avoid re-parsing parts of a failed delimiter run).
123///
124/// # Arguments
125/// * `text` - The full text being parsed
126/// * `pos` - Current position in text (where the delimiter starts)
127/// * `end` - End boundary (don't search for closers beyond this)
128/// * `config` - Configuration
129/// * `builder` - CST builder
130///
131/// **Algorithm**:
132/// 1. Count opening delimiters
133/// 2. Check if followed by whitespace (if so, return None)
134/// 3. Dispatch to parse_one/two/three based on count
135/// 4. Those functions parse content and look for matching closer (within bounds)
136/// 5. If closer found, emit node and return bytes consumed
137/// 6. If not found, return None with delimiter count (caller skips entire run)
138pub fn try_parse_emphasis(
139    text: &str,
140    pos: usize,
141    end: usize,
142    config: &Config,
143    builder: &mut GreenNodeBuilder,
144) -> Option<(usize, usize)> {
145    let bytes = text.as_bytes();
146
147    if pos >= bytes.len() {
148        return None;
149    }
150
151    let delim_char = bytes[pos] as char;
152    if delim_char != '*' && delim_char != '_' {
153        return None;
154    }
155
156    // Count consecutive delimiters
157    let mut count = 0;
158    while pos + count < bytes.len() && bytes[pos + count] == bytes[pos] {
159        count += 1;
160    }
161
162    let after_pos = pos + count;
163
164    log::debug!(
165        "try_parse_emphasis: '{}' x {} at pos {}",
166        delim_char,
167        count,
168        pos
169    );
170
171    // Check if followed by whitespace (Pandoc rule: treat as literal)
172    if after_pos < text.len()
173        && let Some(next_char) = text[after_pos..].chars().next()
174        && next_char.is_whitespace()
175    {
176        log::trace!("Delimiter followed by whitespace, treating as literal");
177        return None;
178    }
179
180    // For underscores: check intraword_underscores extension (Pandoc lines 1668-1672)
181    // Can't open if preceded by alphanumeric (prevents foo_bar from parsing)
182    if delim_char == '_'
183        && pos > 0
184        && let Some(prev_char) = text[..pos].chars().last()
185        && prev_char.is_alphanumeric()
186    {
187        log::trace!("Underscore preceded by alphanumeric, can't open (intraword)");
188        return None;
189    }
190
191    // Dispatch based on delimiter count
192    let result = match count {
193        1 => try_parse_one(text, pos, delim_char, end, config, builder),
194        2 => try_parse_two(text, pos, delim_char, end, config, builder),
195        3 => try_parse_three(text, pos, delim_char, end, config, builder),
196        _ => {
197            // 4+ delimiters: treat as literal (Pandoc behavior)
198            log::trace!("{} delimiters (4+), treating as literal", count);
199            None
200        }
201    };
202
203    // If parsing succeeded, return (bytes_consumed, delim_count)
204    // If failed, return None but the caller will know to skip `count` delimiters
205    result.map(|consumed| (consumed, count))
206}
207
208/// Try to parse emphasis in a nested context (bypassing opener validity checks).
209///
210/// This mirrors Pandoc's behavior where `one` can call `two c mempty` directly,
211/// bypassing the `enclosure` opener validity checks. This is needed because
212/// patterns like `***foo **bar** baz***` require `**` followed by space to be
213/// parsed as a nested strong opener.
214///
215/// Returns Some((bytes_consumed, delim_count)) if successful, None otherwise.
216fn try_parse_emphasis_nested(
217    text: &str,
218    pos: usize,
219    end: usize,
220    config: &Config,
221    builder: &mut GreenNodeBuilder,
222) -> Option<(usize, usize)> {
223    let bytes = text.as_bytes();
224
225    if pos >= bytes.len() {
226        return None;
227    }
228
229    let delim_char = bytes[pos] as char;
230    if delim_char != '*' && delim_char != '_' {
231        return None;
232    }
233
234    // Count consecutive delimiters
235    let mut count = 0;
236    while pos + count < bytes.len() && bytes[pos + count] == bytes[pos] {
237        count += 1;
238    }
239
240    log::debug!(
241        "try_parse_emphasis_nested: '{}' x {} at pos {}",
242        delim_char,
243        count,
244        pos
245    );
246
247    // For underscores: still check intraword_underscores (prevents foo_bar parsing)
248    // This check applies even in nested contexts
249    if delim_char == '_'
250        && pos > 0
251        && let Some(prev_char) = text[..pos].chars().last()
252        && prev_char.is_alphanumeric()
253    {
254        log::trace!("Underscore preceded by alphanumeric, can't open (intraword)");
255        return None;
256    }
257
258    // NOTE: We intentionally skip the "delimiter followed by whitespace" check here.
259    // In nested contexts (inside `one` calling `two`), Pandoc allows openers
260    // followed by whitespace because the opener has already been matched.
261
262    // Dispatch based on delimiter count
263    let result = match count {
264        1 => try_parse_one(text, pos, delim_char, end, config, builder),
265        2 => try_parse_two(text, pos, delim_char, end, config, builder),
266        3 => try_parse_three(text, pos, delim_char, end, config, builder),
267        _ => {
268            // 4+ delimiters: treat as literal (Pandoc behavior)
269            log::trace!("{} delimiters (4+), treating as literal", count);
270            None
271        }
272    };
273
274    result.map(|consumed| (consumed, count))
275}
276
277/// Try to parse emphasis with *** opening delimiter.
278///
279/// Tries to match closers in order: *** → ** → *
280/// Returns Some(bytes_consumed) if successful, None otherwise.
281fn try_parse_three(
282    text: &str,
283    pos: usize,
284    delim_char: char,
285    end: usize,
286    config: &Config,
287    builder: &mut GreenNodeBuilder,
288) -> Option<usize> {
289    let content_start = pos + 3;
290    let one = delim_char.to_string();
291    let two = one.repeat(2);
292
293    log::debug!("try_parse_three: '{}' x 3 at pos {}", delim_char, pos);
294
295    // Pandoc algorithm (line 1695): Parse content UNTIL we see a VALID ender
296    // We loop through potential enders, checking if each is valid.
297    // Invalid enders (like `**` preceded by whitespace) are skipped.
298    let mut search_pos = content_start;
299
300    loop {
301        // Find next potential ender
302        let closer_start = match find_first_potential_ender(text, search_pos, delim_char, end) {
303            Some(p) => p,
304            None => {
305                log::trace!("No potential ender found for ***");
306                return None;
307            }
308        };
309
310        log::debug!("Potential ender at pos {}", closer_start);
311
312        // Count how many delimiters we have at closer_start
313        let bytes = text.as_bytes();
314        let mut closer_count = 0;
315        let mut check_pos = closer_start;
316        while check_pos < bytes.len() && bytes[check_pos] == delim_char as u8 {
317            closer_count += 1;
318            check_pos += 1;
319        }
320
321        log::debug!(
322            "Found {} x {} at pos {}",
323            delim_char,
324            closer_count,
325            closer_start
326        );
327
328        // Try to match closers in order: ***, **, * (Pandoc lines 1696-1698)
329
330        // Try *** (line 1696)
331        if closer_count >= 3 && is_valid_ender(text, closer_start, delim_char, 3) {
332            log::debug!("Matched *** closer, emitting Strong[Emph[content]]");
333
334            builder.start_node(SyntaxKind::STRONG.into());
335            builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
336
337            builder.start_node(SyntaxKind::EMPHASIS.into());
338            builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
339            parse_inline_range_nested(text, content_start, closer_start, config, builder);
340            builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
341            builder.finish_node(); // EMPHASIS
342
343            builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
344            builder.finish_node(); // STRONG
345
346            return Some(closer_start + 3 - pos);
347        }
348
349        // Try ** (line 1697)
350        if closer_count >= 2 && is_valid_ender(text, closer_start, delim_char, 2) {
351            log::debug!("Matched ** closer, wrapping as Strong and continuing with one");
352
353            let continue_pos = closer_start + 2;
354
355            if let Some(final_closer_pos) =
356                parse_until_closer_with_nested_two(text, continue_pos, delim_char, 1, end, config)
357            {
358                log::debug!(
359                    "Found * closer at pos {}, emitting Emph[Strong[...], ...]",
360                    final_closer_pos
361                );
362
363                builder.start_node(SyntaxKind::EMPHASIS.into());
364                builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
365
366                builder.start_node(SyntaxKind::STRONG.into());
367                builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
368                parse_inline_range_nested(text, content_start, closer_start, config, builder);
369                builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
370                builder.finish_node(); // STRONG
371
372                // Parse additional content between ** and * (up to but not including the closer)
373                parse_inline_range_nested(text, continue_pos, final_closer_pos, config, builder);
374
375                builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
376                builder.finish_node(); // EMPHASIS
377
378                return Some(final_closer_pos + 1 - pos);
379            }
380
381            // Fallback: emit * + STRONG
382            log::debug!("No * closer found after **, emitting * + STRONG");
383            builder.token(SyntaxKind::TEXT.into(), &one);
384
385            builder.start_node(SyntaxKind::STRONG.into());
386            builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
387            parse_inline_range_nested(text, content_start, closer_start, config, builder);
388            builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
389            builder.finish_node(); // STRONG
390
391            return Some(closer_start + 2 - pos);
392        }
393
394        // Try * (line 1698)
395        if closer_count >= 1 && is_valid_ender(text, closer_start, delim_char, 1) {
396            log::debug!("Matched * closer, wrapping as Emph and continuing with two");
397
398            let continue_pos = closer_start + 1;
399
400            if let Some(final_closer_pos) =
401                parse_until_closer_with_nested_one(text, continue_pos, delim_char, 2, end, config)
402            {
403                log::debug!(
404                    "Found ** closer at pos {}, emitting Strong[Emph[...], ...]",
405                    final_closer_pos
406                );
407
408                builder.start_node(SyntaxKind::STRONG.into());
409                builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
410
411                builder.start_node(SyntaxKind::EMPHASIS.into());
412                builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
413                parse_inline_range_nested(text, content_start, closer_start, config, builder);
414                builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
415                builder.finish_node(); // EMPHASIS
416
417                parse_inline_range_nested(text, continue_pos, final_closer_pos, config, builder);
418
419                builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
420                builder.finish_node(); // STRONG
421
422                return Some(final_closer_pos + 2 - pos);
423            }
424
425            // Fallback: emit ** + EMPH
426            log::debug!("No ** closer found after *, emitting ** + EMPH");
427            builder.token(SyntaxKind::TEXT.into(), &two);
428
429            builder.start_node(SyntaxKind::EMPHASIS.into());
430            builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
431            parse_inline_range_nested(text, content_start, closer_start, config, builder);
432            builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
433            builder.finish_node(); // EMPHASIS
434
435            return Some(closer_start + 1 - pos);
436        }
437
438        // No valid ender at this position - continue searching after this delimiter run
439        log::debug!(
440            "No valid ender at pos {}, continuing search from {}",
441            closer_start,
442            closer_start + closer_count
443        );
444        search_pos = closer_start + closer_count;
445    }
446}
447
448/// Find the first potential emphasis ender (delimiter character) starting from `start`.
449/// This implements Pandoc's `many (notFollowedBy (ender c 1) >> inline)` -
450/// we parse inline content until we hit a delimiter that could be an ender.
451fn find_first_potential_ender(
452    text: &str,
453    start: usize,
454    delim_char: char,
455    end: usize,
456) -> Option<usize> {
457    let bytes = text.as_bytes();
458    let mut pos = start;
459
460    while pos < end.min(text.len()) {
461        // Check if we found the delimiter character
462        if bytes[pos] == delim_char as u8 {
463            // Check if it's escaped
464            let is_escaped = {
465                let mut backslash_count = 0;
466                let mut check_pos = pos;
467                while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
468                    backslash_count += 1;
469                    check_pos -= 1;
470                }
471                backslash_count % 2 == 1
472            };
473
474            if !is_escaped {
475                // Found a potential ender
476                return Some(pos);
477            }
478        }
479
480        pos += 1;
481    }
482
483    None
484}
485
486/// Check if a delimiter at the given position is a valid ender.
487/// This implements Pandoc's `ender c n` function.
488fn is_valid_ender(text: &str, pos: usize, delim_char: char, delim_count: usize) -> bool {
489    let bytes = text.as_bytes();
490
491    // Check we have exactly delim_count delimiters (not more, not less)
492    if pos + delim_count > text.len() {
493        return false;
494    }
495
496    for i in 0..delim_count {
497        if bytes[pos + i] != delim_char as u8 {
498            return false;
499        }
500    }
501
502    // Check no delimiter immediately before
503    if pos > 0 && bytes[pos - 1] == delim_char as u8 {
504        return false;
505    }
506
507    // Check no delimiter immediately after
508    let after_pos = pos + delim_count;
509    if after_pos < bytes.len() && bytes[after_pos] == delim_char as u8 {
510        return false;
511    }
512
513    // For underscores, check right-flanking (not preceded by whitespace)
514    // Pandoc's `ender` for asterisks has NO right-flanking requirement
515    if delim_char == '_' {
516        if pos > 0
517            && let Some(prev_char) = text[..pos].chars().last()
518            && prev_char.is_whitespace()
519        {
520            return false;
521        }
522
523        // Check not followed by alphanumeric (right-flanking rule for underscores)
524        if after_pos < text.len()
525            && let Some(next_char) = text[after_pos..].chars().next()
526            && next_char.is_alphanumeric()
527        {
528            return false;
529        }
530    }
531
532    true
533}
534
535/// Try to parse emphasis with ** opening delimiter.
536///
537/// Tries to match ** closer only. No fallback.
538/// Returns Some(bytes_consumed) if successful, None otherwise.
539fn try_parse_two(
540    text: &str,
541    pos: usize,
542    delim_char: char,
543    end: usize,
544    config: &Config,
545    builder: &mut GreenNodeBuilder,
546) -> Option<usize> {
547    let content_start = pos + 2;
548
549    log::debug!("try_parse_two: '{}' x 2 at pos {}", delim_char, pos);
550
551    // Try to find ** closer, checking for nested * emphasis along the way
552    if let Some(closer_pos) =
553        parse_until_closer_with_nested_one(text, content_start, delim_char, 2, end, config)
554    {
555        log::debug!("Found ** closer at pos {}", closer_pos);
556
557        // Emit STRONG(content)
558        builder.start_node(SyntaxKind::STRONG.into());
559        builder.token(SyntaxKind::STRONG_MARKER.into(), &text[pos..pos + 2]);
560        parse_inline_range_nested(text, content_start, closer_pos, config, builder);
561        builder.token(
562            SyntaxKind::STRONG_MARKER.into(),
563            &text[closer_pos..closer_pos + 2],
564        );
565        builder.finish_node(); // STRONG
566
567        return Some(closer_pos + 2 - pos);
568    }
569
570    // No closer found
571    log::trace!("No closer found for **");
572    None
573}
574
575/// Try to parse emphasis with * opening delimiter.
576///
577/// Tries to match * closer.
578/// Returns Some(bytes_consumed) if successful, None otherwise.
579///
580/// **Pandoc algorithm**: While parsing content, if we encounter **,
581/// try to parse it as `two` (strong) recursively. If `two` succeeds,
582/// it consumes the ** delimiters, potentially preventing us from finding
583/// a closer for the outer *. This creates priority where ** can "steal"
584/// matches from *.
585fn try_parse_one(
586    text: &str,
587    pos: usize,
588    delim_char: char,
589    end: usize,
590    config: &Config,
591    builder: &mut GreenNodeBuilder,
592) -> Option<usize> {
593    let content_start = pos + 1;
594
595    log::debug!("try_parse_one: '{}' x 1 at pos {}", delim_char, pos);
596
597    // Try to find * closer using Pandoc's algorithm with nested two attempts
598    if let Some(closer_pos) =
599        parse_until_closer_with_nested_two(text, content_start, delim_char, 1, end, config)
600    {
601        log::debug!("Found * closer at pos {}", closer_pos);
602
603        // Emit EMPH(content)
604        builder.start_node(SyntaxKind::EMPHASIS.into());
605        builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &text[pos..pos + 1]);
606        parse_inline_range_nested(text, content_start, closer_pos, config, builder);
607        builder.token(
608            SyntaxKind::EMPHASIS_MARKER.into(),
609            &text[closer_pos..closer_pos + 1],
610        );
611        builder.finish_node(); // EMPHASIS
612
613        return Some(closer_pos + 1 - pos);
614    }
615
616    // No closer found
617    log::trace!("No closer found for *");
618    None
619}
620
621/// Parse inline content and look for a matching closer, with nested two attempts.
622///
623/// This implements Pandoc's algorithm from Markdown.hs lines 1712-1717:
624/// When parsing `*...*`, if we encounter `**` (and it's not followed by
625/// another `*` that would close the outer emphasis), try to parse it as
626/// `two c mempty` (strong). If `two` succeeds, those `**` delimiters are
627/// consumed, and we continue searching for the `*` closer.
628///
629/// This creates a priority system where `**` can "steal" matches from `*`.
630///
631/// Example: `*foo **bar* baz**`
632/// - When parsing the outer `*...*`, we encounter `**` at position 5
633/// - We try `two` which succeeds with `**bar* baz**`
634/// - Now there's no `*` closer for the outer `*`, so it fails
635/// - Result: literal `*foo ` + STRONG("bar* baz")
636///
637/// # Arguments
638/// * `end` - Don't search beyond this position (respects nesting boundaries)
639fn parse_until_closer_with_nested_two(
640    text: &str,
641    start: usize,
642    delim_char: char,
643    delim_count: usize,
644    end: usize,
645    config: &Config,
646) -> Option<usize> {
647    let bytes = text.as_bytes();
648    let mut pos = start;
649
650    while pos < end.min(text.len()) {
651        if bytes[pos] == b'`'
652            && let Some(m) = try_parse_inline_executable(
653                &text[pos..],
654                config.extensions.rmarkdown_inline_code,
655                config.extensions.quarto_inline_code,
656            )
657        {
658            log::trace!(
659                "Skipping inline executable span of {} bytes at pos {}",
660                m.total_len,
661                pos
662            );
663            pos += m.total_len;
664            continue;
665        }
666
667        // Skip over code spans - their content is protected from delimiter matching
668        if bytes[pos] == b'`'
669            && let Some((len, _, _, _)) = try_parse_code_span(&text[pos..])
670        {
671            log::trace!("Skipping code span of {} bytes at pos {}", len, pos);
672            pos += len;
673            continue;
674        }
675
676        // Skip over inline math - their content is protected from delimiter matching
677        if bytes[pos] == b'$'
678            && let Some((len, _)) = try_parse_inline_math(&text[pos..])
679        {
680            log::trace!("Skipping inline math of {} bytes at pos {}", len, pos);
681            pos += len;
682            continue;
683        }
684
685        // Skip over links - their content is protected from delimiter matching
686        if bytes[pos] == b'['
687            && let Some((len, _, _, _)) = try_parse_inline_link(&text[pos..])
688        {
689            log::trace!("Skipping inline link of {} bytes at pos {}", len, pos);
690            pos += len;
691            continue;
692        }
693
694        // Pandoc algorithm: If we're looking for a single delimiter (*) and
695        // encounter a double delimiter (**), try to parse it as `two` (strong).
696        // This happens BEFORE checking if pos is a closer for our current emphasis.
697        if delim_count == 1
698            && pos + 2 <= text.len()
699            && bytes[pos] == delim_char as u8
700            && bytes[pos + 1] == delim_char as u8
701        {
702            // First check if the first delimiter is escaped
703            let first_is_escaped = {
704                let mut backslash_count = 0;
705                let mut check_pos = pos;
706                while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
707                    backslash_count += 1;
708                    check_pos -= 1;
709                }
710                backslash_count % 2 == 1
711            };
712
713            if first_is_escaped {
714                // First * is escaped, skip it and continue
715                // The second * might be a closer or start of emphasis
716                log::trace!(
717                    "First * at pos {} is escaped, skipping to check second *",
718                    pos
719                );
720                pos += 1;
721                continue;
722            }
723
724            // Check that there's NOT a third delimiter (which would make this
725            // part of a longer run that we shouldn't treat as `two`)
726            let no_third_delim = pos + 2 >= bytes.len() || bytes[pos + 2] != delim_char as u8;
727
728            if no_third_delim {
729                log::trace!(
730                    "try_parse_one: found ** at pos {}, attempting nested two",
731                    pos
732                );
733
734                // Try to parse as `two` (strong emphasis)
735                // We create a temporary builder to test if `two` succeeds
736                let mut temp_builder = GreenNodeBuilder::new();
737                if let Some(two_consumed) =
738                    try_parse_two(text, pos, delim_char, end, config, &mut temp_builder)
739                {
740                    // `two` succeeded! Those ** delimiters are consumed.
741                    // We skip past the `two` and continue searching for our `*` closer.
742                    log::debug!(
743                        "Nested two succeeded, consumed {} bytes, continuing search",
744                        two_consumed
745                    );
746                    pos += two_consumed;
747                    continue;
748                }
749                // `two` failed - this means the entire `one` parse should fail!
750                // In Pandoc, the `try (string [c,c] >> notFollowedBy (ender c 1) >> two c mempty)`
751                // alternative fails, and the first alternative `notFollowedBy (ender c 1) >> inline`
752                // also fails because we ARE followed by an ender (the first * of **).
753                // So the entire content parsing fails, and `one` returns failure.
754                log::trace!("Nested two failed at pos {}, entire one() should fail", pos);
755                return None;
756            }
757        }
758
759        // Check if we have a potential closer here
760        if pos + delim_count <= text.len() {
761            let mut matches = true;
762            for i in 0..delim_count {
763                if bytes[pos + i] != delim_char as u8 {
764                    matches = false;
765                    break;
766                }
767            }
768
769            if matches {
770                // IMPORTANT: Check that there are EXACTLY delim_count delimiters,
771                // not more. E.g., when looking for `*`, we shouldn't match
772                // `*` that's part of a longer run.
773
774                // Check: not escaped (preceded by odd number of backslashes)
775                let is_escaped = {
776                    let mut backslash_count = 0;
777                    let mut check_pos = pos;
778                    while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
779                        backslash_count += 1;
780                        check_pos -= 1;
781                    }
782                    backslash_count % 2 == 1 // Odd number = escaped
783                };
784
785                // Allow matching at the start OR end of a delimiter run.
786                // This lets `**` close at the end of `***` (after a nested `*` closes),
787                // while still avoiding matches in the middle of longer runs.
788                let at_run_start = pos == 0 || bytes[pos - 1] != delim_char as u8;
789                let after_pos = pos + delim_count;
790                let at_run_end = after_pos >= bytes.len() || bytes[after_pos] != delim_char as u8;
791
792                if (at_run_start || at_run_end) && !is_escaped {
793                    // Found a potential closer!
794                    // For underscores, check right-flanking: closer must be preceded by non-whitespace
795                    // For asterisks, Pandoc doesn't require right-flanking (see ender function in Markdown.hs)
796                    if delim_char == '_'
797                        && pos > start
798                        && let Some(prev_char) = text[..pos].chars().last()
799                        && prev_char.is_whitespace()
800                    {
801                        log::trace!(
802                            "Underscore closer preceded by whitespace at pos {}, not right-flanking",
803                            pos
804                        );
805                        // Not a valid closer, continue searching
806                        pos += 1;
807                        continue;
808                    }
809
810                    log::trace!(
811                        "Found exact {} x {} closer at pos {}",
812                        delim_char,
813                        delim_count,
814                        pos
815                    );
816                    return Some(pos);
817                }
818            }
819        }
820
821        // Not a closer, move to next position
822        // TODO: Should skip entire characters (UTF-8), not just bytes
823        pos += 1;
824    }
825
826    None
827}
828
829/// Parse inline content and look for a matching closer, with nested one attempts.
830///
831/// This implements the symmetric case to `parse_until_closer_with_nested_two`:
832/// When parsing `**...**`, if we encounter `*` (and it's not followed by
833/// another `*` that would be part of our `**` closer), try to parse it as
834/// `one c mempty` (emphasis). If `one` succeeds, those `*` delimiters are
835/// consumed, and we continue searching for the `**` closer.
836///
837/// This ensures nested emphasis closes before the outer strong emphasis.
838///
839/// Example: `**bold with *italic***`
840/// - When parsing the outer `**...**, we scan for `**` closer
841/// - At position 12, we encounter a single `*` (start of `*italic`)
842/// - We try `one` which succeeds with `*italic*` (consuming the first `*` from `***`)
843/// - We continue scanning and find `**` at position 20 (the remaining `**` from `***`)
844/// - Result: STRONG["bold with " EMPHASIS["italic"]]
845///
846/// # Arguments
847/// * `end` - Don't search beyond this position (respects nesting boundaries)
848fn parse_until_closer_with_nested_one(
849    text: &str,
850    start: usize,
851    delim_char: char,
852    delim_count: usize,
853    end: usize,
854    config: &Config,
855) -> Option<usize> {
856    let bytes = text.as_bytes();
857    let mut pos = start;
858
859    while pos < end.min(text.len()) {
860        if bytes[pos] == b'`'
861            && let Some(m) = try_parse_inline_executable(
862                &text[pos..],
863                config.extensions.rmarkdown_inline_code,
864                config.extensions.quarto_inline_code,
865            )
866        {
867            log::trace!(
868                "Skipping inline executable span of {} bytes at pos {}",
869                m.total_len,
870                pos
871            );
872            pos += m.total_len;
873            continue;
874        }
875
876        // Skip over code spans - their content is protected from delimiter matching
877        if bytes[pos] == b'`'
878            && let Some((len, _, _, _)) = try_parse_code_span(&text[pos..])
879        {
880            log::trace!("Skipping code span of {} bytes at pos {}", len, pos);
881            pos += len;
882            continue;
883        }
884
885        // Skip over inline math - their content is protected from delimiter matching
886        if bytes[pos] == b'$'
887            && let Some((len, _)) = try_parse_inline_math(&text[pos..])
888        {
889            log::trace!("Skipping inline math of {} bytes at pos {}", len, pos);
890            pos += len;
891            continue;
892        }
893
894        // Skip over links - their content is protected from delimiter matching
895        if bytes[pos] == b'['
896            && let Some((len, _, _, _)) = try_parse_inline_link(&text[pos..])
897        {
898            log::trace!("Skipping inline link of {} bytes at pos {}", len, pos);
899            pos += len;
900            continue;
901        }
902
903        // Pandoc algorithm: If we're looking for a double delimiter (**) and
904        // encounter a single delimiter (*), check if it's a valid emphasis opener.
905        // If it is, try to parse it as `one` (emphasis). If `one` succeeds, skip
906        // over it. If `one` fails, the outer `two` also fails (delimiter poisoning).
907        // If the `*` is NOT a valid opener (e.g., followed by whitespace or escaped),
908        // skip it and continue looking for the `**` closer.
909        if delim_count == 2 && pos < text.len() && bytes[pos] == delim_char as u8 {
910            // Check that there's NOT a second delimiter immediately after
911            // (which would make this part of our `**` closer or another `**` opener)
912            let no_second_delim = pos + 1 >= bytes.len() || bytes[pos + 1] != delim_char as u8;
913
914            if no_second_delim {
915                // Check if this * is escaped (preceded by odd number of backslashes)
916                let is_escaped = {
917                    let mut backslash_count = 0;
918                    let mut check_pos = pos;
919                    while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
920                        backslash_count += 1;
921                        check_pos -= 1;
922                    }
923                    backslash_count % 2 == 1
924                };
925
926                if is_escaped {
927                    // Escaped delimiter - just literal text, skip it
928                    log::trace!("* at pos {} is escaped, skipping", pos);
929                    pos += 1;
930                    continue;
931                }
932
933                // Check if this * is a valid emphasis opener (Pandoc's enclosure rule).
934                // A delimiter followed by whitespace is NOT an opener - it's literal text.
935                let after_delim = pos + 1;
936                let followed_by_whitespace = after_delim < text.len()
937                    && text[after_delim..]
938                        .chars()
939                        .next()
940                        .is_some_and(|c| c.is_whitespace());
941
942                if followed_by_whitespace {
943                    // Not a valid opener - just literal text, skip it
944                    log::trace!(
945                        "* at pos {} followed by whitespace, not an opener, skipping",
946                        pos
947                    );
948                    pos += 1;
949                    continue;
950                }
951
952                log::trace!(
953                    "try_parse_two: found * at pos {}, attempting nested one",
954                    pos
955                );
956
957                // Try to parse as `one` (emphasis)
958                // We create a temporary builder to test if `one` succeeds
959                let mut temp_builder = GreenNodeBuilder::new();
960                if let Some(one_consumed) =
961                    try_parse_one(text, pos, delim_char, end, config, &mut temp_builder)
962                {
963                    // `one` succeeded! Those * delimiters are consumed.
964                    // We skip past the `one` and continue searching for our `**` closer.
965                    log::debug!(
966                        "Nested one succeeded, consumed {} bytes, continuing search",
967                        one_consumed
968                    );
969                    pos += one_consumed;
970                    continue;
971                }
972
973                // `one` failed to find a closer. According to Pandoc's algorithm,
974                // this means the outer `two` should also fail. An unmatched inner
975                // delimiter "poisons" the outer emphasis.
976                // Example: `**foo *bar**` - the `*` can't find a closer, so the
977                // outer `**` should fail and the whole thing becomes literal.
978                log::debug!(
979                    "Nested one failed at pos {}, poisoning outer two (no closer found)",
980                    pos
981                );
982                return None;
983            }
984        }
985
986        // Check if we have a potential closer here
987        if pos + delim_count <= text.len() {
988            let mut matches = true;
989            for i in 0..delim_count {
990                if bytes[pos + i] != delim_char as u8 {
991                    matches = false;
992                    break;
993                }
994            }
995
996            if matches {
997                // Check: not escaped (preceded by odd number of backslashes)
998                let is_escaped = {
999                    let mut backslash_count = 0;
1000                    let mut check_pos = pos;
1001                    while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
1002                        backslash_count += 1;
1003                        check_pos -= 1;
1004                    }
1005                    backslash_count % 2 == 1 // Odd number = escaped
1006                };
1007
1008                // Allow matching at the start OR end of a delimiter run.
1009                // This lets `**` close at the end of `***` (after a nested `*` closes),
1010                // while still avoiding matches in the middle of longer runs.
1011                let at_run_start = pos == 0 || bytes[pos - 1] != delim_char as u8;
1012                let after_pos = pos + delim_count;
1013                let at_run_end = after_pos >= bytes.len() || bytes[after_pos] != delim_char as u8;
1014
1015                if (at_run_start || at_run_end) && !is_escaped {
1016                    // Found a potential closer!
1017                    // For underscores, check right-flanking: closer must be preceded by non-whitespace
1018                    // For asterisks, Pandoc doesn't require right-flanking (see ender function in Markdown.hs)
1019                    if delim_char == '_'
1020                        && pos > start
1021                        && let Some(prev_char) = text[..pos].chars().last()
1022                        && prev_char.is_whitespace()
1023                    {
1024                        log::trace!(
1025                            "Underscore closer preceded by whitespace at pos {}, not right-flanking",
1026                            pos
1027                        );
1028                        // Not a valid closer, continue searching
1029                        pos += 1;
1030                        continue;
1031                    }
1032
1033                    log::trace!(
1034                        "Found exact {} x {} closer at pos {}",
1035                        delim_char,
1036                        delim_count,
1037                        pos
1038                    );
1039                    return Some(pos);
1040                }
1041            }
1042        }
1043
1044        // Not a closer, move to next position
1045        // TODO: Should skip entire characters (UTF-8), not just bytes
1046        pos += 1;
1047    }
1048
1049    None
1050}
1051
1052///
1053/// This is the recursive inline parser that handles all inline elements:
1054/// - Text
1055/// - Escapes (highest priority)
1056/// - Code spans
1057/// - Math (inline and display)
1058/// - Emphasis/strong (via try_parse_emphasis)
1059/// - Other inline elements (TODO: links, images, citations, etc.)
1060///
1061/// **Important**: This is where the greedy left-to-right parsing happens.
1062/// When we see `**`, we try to parse it as STRONG. If it succeeds, those
1063/// delimiters are consumed and won't be available for outer emphasis.
1064///
1065/// # Arguments
1066/// * `nested_emphasis` - If true, bypass opener validity checks for emphasis.
1067///   Set to true when called from within emphasis parsing (e.g., from try_parse_one/two/three).
1068fn parse_inline_range(
1069    text: &str,
1070    start: usize,
1071    end: usize,
1072    config: &Config,
1073    builder: &mut GreenNodeBuilder,
1074) {
1075    parse_inline_range_impl(text, start, end, config, builder, false)
1076}
1077
1078/// Same as `parse_inline_range` but bypasses opener validity checks for emphasis.
1079/// Used within emphasis parsing contexts (e.g., from try_parse_one/two/three).
1080fn parse_inline_range_nested(
1081    text: &str,
1082    start: usize,
1083    end: usize,
1084    config: &Config,
1085    builder: &mut GreenNodeBuilder,
1086) {
1087    parse_inline_range_impl(text, start, end, config, builder, true)
1088}
1089
1090fn is_emoji_boundary(text: &str, pos: usize) -> bool {
1091    if pos > 0 {
1092        let prev = text.as_bytes()[pos - 1] as char;
1093        if prev.is_ascii_alphanumeric() || prev == '_' {
1094            return false;
1095        }
1096    }
1097    true
1098}
1099
1100fn parse_inline_range_impl(
1101    text: &str,
1102    start: usize,
1103    end: usize,
1104    config: &Config,
1105    builder: &mut GreenNodeBuilder,
1106    nested_emphasis: bool,
1107) {
1108    log::debug!(
1109        "parse_inline_range: start={}, end={}, text={:?}",
1110        start,
1111        end,
1112        &text[start..end]
1113    );
1114    let mut pos = start;
1115    let mut text_start = start;
1116
1117    while pos < end {
1118        let byte = text.as_bytes()[pos];
1119
1120        // Backslash math (highest priority if enabled)
1121        if byte == b'\\' {
1122            // Try double backslash display math first: \\[...\\]
1123            if config.extensions.tex_math_double_backslash {
1124                if let Some((len, content)) = try_parse_double_backslash_display_math(&text[pos..])
1125                {
1126                    if pos > text_start {
1127                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1128                    }
1129                    log::debug!("Matched double backslash display math at pos {}", pos);
1130                    emit_double_backslash_display_math(builder, content);
1131                    pos += len;
1132                    text_start = pos;
1133                    continue;
1134                }
1135
1136                // Try double backslash inline math: \\(...\\)
1137                if let Some((len, content)) = try_parse_double_backslash_inline_math(&text[pos..]) {
1138                    if pos > text_start {
1139                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1140                    }
1141                    log::debug!("Matched double backslash inline math at pos {}", pos);
1142                    emit_double_backslash_inline_math(builder, content);
1143                    pos += len;
1144                    text_start = pos;
1145                    continue;
1146                }
1147            }
1148
1149            // Try single backslash display math: \[...\]
1150            if config.extensions.tex_math_single_backslash {
1151                if let Some((len, content)) = try_parse_single_backslash_display_math(&text[pos..])
1152                {
1153                    if pos > text_start {
1154                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1155                    }
1156                    log::debug!("Matched single backslash display math at pos {}", pos);
1157                    emit_single_backslash_display_math(builder, content);
1158                    pos += len;
1159                    text_start = pos;
1160                    continue;
1161                }
1162
1163                // Try single backslash inline math: \(...\)
1164                if let Some((len, content)) = try_parse_single_backslash_inline_math(&text[pos..]) {
1165                    if pos > text_start {
1166                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1167                    }
1168                    log::debug!("Matched single backslash inline math at pos {}", pos);
1169                    emit_single_backslash_inline_math(builder, content);
1170                    pos += len;
1171                    text_start = pos;
1172                    continue;
1173                }
1174            }
1175
1176            // Try math environments \begin{equation}...\end{equation}
1177            if config.extensions.raw_tex
1178                && let Some((len, begin_marker, content, end_marker)) =
1179                    try_parse_math_environment(&text[pos..])
1180            {
1181                if pos > text_start {
1182                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1183                }
1184                log::debug!("Matched math environment at pos {}", pos);
1185                emit_display_math_environment(builder, begin_marker, content, end_marker);
1186                pos += len;
1187                text_start = pos;
1188                continue;
1189            }
1190
1191            // Try bookdown reference: \@ref(label)
1192            if config.extensions.bookdown_references
1193                && let Some((len, label)) = try_parse_bookdown_reference(&text[pos..])
1194            {
1195                if pos > text_start {
1196                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1197                }
1198                log::debug!("Matched bookdown reference at pos {}: {}", pos, label);
1199                super::citations::emit_bookdown_crossref(builder, label);
1200                pos += len;
1201                text_start = pos;
1202                continue;
1203            }
1204
1205            // Try escapes (after bookdown refs and backslash math)
1206            if let Some((len, ch, escape_type)) = try_parse_escape(&text[pos..]) {
1207                let escape_enabled = match escape_type {
1208                    EscapeType::HardLineBreak => config.extensions.escaped_line_breaks,
1209                    EscapeType::NonbreakingSpace => config.extensions.all_symbols_escapable,
1210                    EscapeType::Literal => {
1211                        const BASE_ESCAPABLE: &str = "\\`*_{}[]()>#+-.!";
1212                        BASE_ESCAPABLE.contains(ch) || config.extensions.all_symbols_escapable
1213                    }
1214                };
1215                if !escape_enabled {
1216                    // Don't treat as hard line break - skip the escape and continue
1217                    // The backslash will be included in the next TEXT token
1218                    pos += 1;
1219                    continue;
1220                }
1221
1222                // Emit accumulated text
1223                if pos > text_start {
1224                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1225                }
1226
1227                log::debug!("Matched escape at pos {}: \\{}", pos, ch);
1228                emit_escape(builder, ch, escape_type);
1229                pos += len;
1230                text_start = pos;
1231                continue;
1232            }
1233
1234            // Try LaTeX commands (after escapes, before shortcodes)
1235            if config.extensions.raw_tex
1236                && let Some(len) = try_parse_latex_command(&text[pos..])
1237            {
1238                if pos > text_start {
1239                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1240                }
1241                log::debug!("Matched LaTeX command at pos {}", pos);
1242                parse_latex_command(builder, &text[pos..], len);
1243                pos += len;
1244                text_start = pos;
1245                continue;
1246            }
1247        }
1248
1249        // Try Quarto shortcodes: {{< shortcode >}}
1250        if byte == b'{'
1251            && pos + 1 < text.len()
1252            && text.as_bytes()[pos + 1] == b'{'
1253            && let Some((len, name, attrs)) = try_parse_shortcode(&text[pos..])
1254        {
1255            if pos > text_start {
1256                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1257            }
1258            log::debug!("Matched shortcode at pos {}: {}", pos, &name);
1259            emit_shortcode(builder, &name, attrs);
1260            pos += len;
1261            text_start = pos;
1262            continue;
1263        }
1264
1265        // Try inline executable code spans (`... `r expr`` and `... `{r} expr``)
1266        if byte == b'`'
1267            && let Some(m) = try_parse_inline_executable(
1268                &text[pos..],
1269                config.extensions.rmarkdown_inline_code,
1270                config.extensions.quarto_inline_code,
1271            )
1272        {
1273            if pos > text_start {
1274                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1275            }
1276            log::debug!("Matched inline executable code at pos {}", pos);
1277            emit_inline_executable(builder, &m);
1278            pos += m.total_len;
1279            text_start = pos;
1280            continue;
1281        }
1282
1283        // Try code spans
1284        if byte == b'`'
1285            && let Some((len, content, backtick_count, attributes)) =
1286                try_parse_code_span(&text[pos..])
1287        {
1288            // Emit accumulated text
1289            if pos > text_start {
1290                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1291            }
1292
1293            log::debug!(
1294                "Matched code span at pos {}: {} backticks",
1295                pos,
1296                backtick_count
1297            );
1298
1299            // Check for raw inline
1300            if let Some(ref attrs) = attributes
1301                && config.extensions.raw_attribute
1302                && let Some(format) = is_raw_inline(attrs)
1303            {
1304                use super::raw_inline::emit_raw_inline;
1305                log::debug!("Matched raw inline span at pos {}: format={}", pos, format);
1306                emit_raw_inline(builder, content, backtick_count, format);
1307            } else if !config.extensions.inline_code_attributes && attributes.is_some() {
1308                let code_span_len = backtick_count * 2 + content.len();
1309                emit_code_span(builder, content, backtick_count, None);
1310                pos += code_span_len;
1311                text_start = pos;
1312                continue;
1313            } else {
1314                emit_code_span(builder, content, backtick_count, attributes);
1315            }
1316
1317            pos += len;
1318            text_start = pos;
1319            continue;
1320        }
1321
1322        // Try textual emoji aliases: :smile:
1323        if byte == b':'
1324            && config.extensions.emoji
1325            && is_emoji_boundary(text, pos)
1326            && let Some((len, _alias)) = try_parse_emoji(&text[pos..])
1327        {
1328            if pos > text_start {
1329                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1330            }
1331            log::debug!("Matched emoji at pos {}", pos);
1332            emit_emoji(builder, &text[pos..pos + len]);
1333            pos += len;
1334            text_start = pos;
1335            continue;
1336        }
1337
1338        // Try inline footnotes: ^[note]
1339        if byte == b'^'
1340            && pos + 1 < text.len()
1341            && text.as_bytes()[pos + 1] == b'['
1342            && config.extensions.inline_footnotes
1343            && let Some((len, content)) = try_parse_inline_footnote(&text[pos..])
1344        {
1345            if pos > text_start {
1346                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1347            }
1348            log::debug!("Matched inline footnote at pos {}", pos);
1349            emit_inline_footnote(builder, content, config);
1350            pos += len;
1351            text_start = pos;
1352            continue;
1353        }
1354
1355        // Try superscript: ^text^
1356        if byte == b'^'
1357            && config.extensions.superscript
1358            && let Some((len, content)) = try_parse_superscript(&text[pos..])
1359        {
1360            if pos > text_start {
1361                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1362            }
1363            log::debug!("Matched superscript at pos {}", pos);
1364            emit_superscript(builder, content, config);
1365            pos += len;
1366            text_start = pos;
1367            continue;
1368        }
1369
1370        // Try bookdown definition: (\#label) or (ref:label)
1371        if byte == b'(' && config.extensions.bookdown_references {
1372            if let Some((len, label)) = try_parse_bookdown_definition(&text[pos..]) {
1373                if pos > text_start {
1374                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1375                }
1376                log::debug!("Matched bookdown definition at pos {}: {}", pos, label);
1377                builder.token(SyntaxKind::TEXT.into(), &text[pos..pos + len]);
1378                pos += len;
1379                text_start = pos;
1380                continue;
1381            }
1382            if let Some((len, label)) = try_parse_bookdown_text_reference(&text[pos..]) {
1383                if pos > text_start {
1384                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1385                }
1386                log::debug!("Matched bookdown text reference at pos {}: {}", pos, label);
1387                builder.token(SyntaxKind::TEXT.into(), &text[pos..pos + len]);
1388                pos += len;
1389                text_start = pos;
1390                continue;
1391            }
1392        }
1393
1394        // Try subscript: ~text~
1395        if byte == b'~'
1396            && config.extensions.subscript
1397            && let Some((len, content)) = try_parse_subscript(&text[pos..])
1398        {
1399            if pos > text_start {
1400                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1401            }
1402            log::debug!("Matched subscript at pos {}", pos);
1403            emit_subscript(builder, content, config);
1404            pos += len;
1405            text_start = pos;
1406            continue;
1407        }
1408
1409        // Try strikeout: ~~text~~
1410        if byte == b'~'
1411            && config.extensions.strikeout
1412            && let Some((len, content)) = try_parse_strikeout(&text[pos..])
1413        {
1414            if pos > text_start {
1415                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1416            }
1417            log::debug!("Matched strikeout at pos {}", pos);
1418            emit_strikeout(builder, content, config);
1419            pos += len;
1420            text_start = pos;
1421            continue;
1422        }
1423
1424        // Try GFM inline math: $`...`$
1425        if byte == b'$'
1426            && config.extensions.tex_math_gfm
1427            && let Some((len, content)) = try_parse_gfm_inline_math(&text[pos..])
1428        {
1429            if pos > text_start {
1430                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1431            }
1432            log::debug!("Matched GFM inline math at pos {}", pos);
1433            emit_gfm_inline_math(builder, content);
1434            pos += len;
1435            text_start = pos;
1436            continue;
1437        }
1438
1439        // Try math ($...$, $$...$$)
1440        if byte == b'$' && config.extensions.tex_math_dollars {
1441            // Try display math first ($$...$$)
1442            if let Some((len, content)) = try_parse_display_math(&text[pos..]) {
1443                // Emit accumulated text
1444                if pos > text_start {
1445                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1446                }
1447
1448                let dollar_count = text[pos..].chars().take_while(|&c| c == '$').count();
1449                log::debug!(
1450                    "Matched display math at pos {}: {} dollars",
1451                    pos,
1452                    dollar_count
1453                );
1454
1455                // Check for trailing attributes (Quarto cross-reference support)
1456                let after_math = &text[pos + len..];
1457                let attr_len = if config.extensions.quarto_crossrefs {
1458                    use crate::parser::utils::attributes::try_parse_trailing_attributes;
1459                    if let Some((_attr_block, _)) = try_parse_trailing_attributes(after_math) {
1460                        let trimmed_after = after_math.trim_start();
1461                        if let Some(open_brace_pos) = trimmed_after.find('{') {
1462                            let ws_before_brace = after_math.len() - trimmed_after.len();
1463                            let attr_text_len = trimmed_after[open_brace_pos..]
1464                                .find('}')
1465                                .map(|close| close + 1)
1466                                .unwrap_or(0);
1467                            ws_before_brace + open_brace_pos + attr_text_len
1468                        } else {
1469                            0
1470                        }
1471                    } else {
1472                        0
1473                    }
1474                } else {
1475                    0
1476                };
1477
1478                let total_len = len + attr_len;
1479                emit_display_math(builder, content, dollar_count);
1480
1481                // Emit attributes if present
1482                if attr_len > 0 {
1483                    use crate::parser::utils::attributes::{
1484                        emit_attributes, try_parse_trailing_attributes,
1485                    };
1486                    let attr_text = &text[pos + len..pos + total_len];
1487                    if let Some((attr_block, _text_before)) =
1488                        try_parse_trailing_attributes(attr_text)
1489                    {
1490                        let trimmed_after = attr_text.trim_start();
1491                        let ws_len = attr_text.len() - trimmed_after.len();
1492                        if ws_len > 0 {
1493                            builder.token(SyntaxKind::WHITESPACE.into(), &attr_text[..ws_len]);
1494                        }
1495                        emit_attributes(builder, &attr_block);
1496                    }
1497                }
1498
1499                pos += total_len;
1500                text_start = pos;
1501                continue;
1502            }
1503
1504            // Try inline math ($...$)
1505            if let Some((len, content)) = try_parse_inline_math(&text[pos..]) {
1506                // Emit accumulated text
1507                if pos > text_start {
1508                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1509                }
1510
1511                log::debug!("Matched inline math at pos {}", pos);
1512                emit_inline_math(builder, content);
1513                pos += len;
1514                text_start = pos;
1515                continue;
1516            }
1517
1518            // Neither display nor inline math matched - emit the $ as literal text
1519            // This ensures each $ gets its own TEXT token for CST compatibility
1520            if pos > text_start {
1521                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1522            }
1523            builder.token(SyntaxKind::TEXT.into(), "$");
1524            pos += 1;
1525            text_start = pos;
1526            continue;
1527        }
1528
1529        // Try autolinks: <url> or <email>
1530        if byte == b'<'
1531            && config.extensions.autolinks
1532            && let Some((len, url)) = try_parse_autolink(&text[pos..])
1533        {
1534            if pos > text_start {
1535                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1536            }
1537            log::debug!("Matched autolink at pos {}", pos);
1538            emit_autolink(builder, &text[pos..pos + len], url);
1539            pos += len;
1540            text_start = pos;
1541            continue;
1542        }
1543
1544        if config.extensions.autolink_bare_uris
1545            && let Some((len, url)) = try_parse_bare_uri(&text[pos..])
1546        {
1547            if pos > text_start {
1548                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1549            }
1550            log::debug!("Matched bare URI at pos {}", pos);
1551            emit_bare_uri_link(builder, url, config);
1552            pos += len;
1553            text_start = pos;
1554            continue;
1555        }
1556
1557        // Try native spans: <span>text</span> (after autolink since both start with <)
1558        if byte == b'<'
1559            && config.extensions.native_spans
1560            && let Some((len, content, attributes)) = try_parse_native_span(&text[pos..])
1561        {
1562            if pos > text_start {
1563                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1564            }
1565            log::debug!("Matched native span at pos {}", pos);
1566            emit_native_span(builder, content, &attributes, config);
1567            pos += len;
1568            text_start = pos;
1569            continue;
1570        }
1571
1572        // Images and links - process in order: inline image, reference image, footnote ref, inline link, reference link
1573        if byte == b'!' && pos + 1 < text.len() && text.as_bytes()[pos + 1] == b'[' {
1574            // Try inline image: ![alt](url)
1575            if let Some((len, alt_text, dest, attributes)) = try_parse_inline_image(&text[pos..]) {
1576                if pos > text_start {
1577                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1578                }
1579                log::debug!("Matched inline image at pos {}", pos);
1580                emit_inline_image(
1581                    builder,
1582                    &text[pos..pos + len],
1583                    alt_text,
1584                    dest,
1585                    attributes,
1586                    config,
1587                );
1588                pos += len;
1589                text_start = pos;
1590                continue;
1591            }
1592
1593            // Try reference image: ![alt][ref] or ![alt]
1594            if config.extensions.reference_links {
1595                let allow_shortcut = config.extensions.shortcut_reference_links;
1596                if let Some((len, alt_text, reference, is_implicit)) =
1597                    try_parse_reference_image(&text[pos..], allow_shortcut)
1598                {
1599                    if pos > text_start {
1600                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1601                    }
1602                    log::debug!("Matched reference image at pos {}", pos);
1603                    emit_reference_image(builder, alt_text, &reference, is_implicit, config);
1604                    pos += len;
1605                    text_start = pos;
1606                    continue;
1607                }
1608            }
1609        }
1610
1611        // Process bracket-starting elements
1612        if byte == b'[' {
1613            // Try footnote reference: [^id]
1614            if config.extensions.footnotes
1615                && let Some((len, id)) = try_parse_footnote_reference(&text[pos..])
1616            {
1617                if pos > text_start {
1618                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1619                }
1620                log::debug!("Matched footnote reference at pos {}", pos);
1621                emit_footnote_reference(builder, &id);
1622                pos += len;
1623                text_start = pos;
1624                continue;
1625            }
1626
1627            // Try inline link: [text](url)
1628            if config.extensions.inline_links
1629                && let Some((len, link_text, dest, attributes)) =
1630                    try_parse_inline_link(&text[pos..])
1631            {
1632                if pos > text_start {
1633                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1634                }
1635                log::debug!("Matched inline link at pos {}", pos);
1636                emit_inline_link(
1637                    builder,
1638                    &text[pos..pos + len],
1639                    link_text,
1640                    dest,
1641                    attributes,
1642                    config,
1643                );
1644                pos += len;
1645                text_start = pos;
1646                continue;
1647            }
1648
1649            // Try reference link: [text][ref] or [text]
1650            if config.extensions.reference_links {
1651                let allow_shortcut = config.extensions.shortcut_reference_links;
1652                if let Some((len, link_text, reference, is_implicit)) =
1653                    try_parse_reference_link(&text[pos..], allow_shortcut)
1654                {
1655                    if pos > text_start {
1656                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1657                    }
1658                    log::debug!("Matched reference link at pos {}", pos);
1659                    emit_reference_link(builder, link_text, &reference, is_implicit, config);
1660                    pos += len;
1661                    text_start = pos;
1662                    continue;
1663                }
1664            }
1665
1666            // Try bracketed citation: [@cite]
1667            if config.extensions.citations
1668                && let Some((len, content)) = try_parse_bracketed_citation(&text[pos..])
1669            {
1670                if pos > text_start {
1671                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1672                }
1673                log::debug!("Matched bracketed citation at pos {}", pos);
1674                emit_bracketed_citation(builder, content);
1675                pos += len;
1676                text_start = pos;
1677                continue;
1678            }
1679        }
1680
1681        // Try bracketed spans: [text]{.class}
1682        // Must come after links/citations
1683        if byte == b'['
1684            && config.extensions.bracketed_spans
1685            && let Some((len, text_content, attrs)) = try_parse_bracketed_span(&text[pos..])
1686        {
1687            if pos > text_start {
1688                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1689            }
1690            log::debug!("Matched bracketed span at pos {}", pos);
1691            emit_bracketed_span(builder, &text_content, &attrs, config);
1692            pos += len;
1693            text_start = pos;
1694            continue;
1695        }
1696
1697        // Try bare citation: @cite (must come after bracketed elements)
1698        if byte == b'@'
1699            && (config.extensions.citations || config.extensions.quarto_crossrefs)
1700            && let Some((len, key, has_suppress)) = try_parse_bare_citation(&text[pos..])
1701        {
1702            let is_crossref =
1703                config.extensions.quarto_crossrefs && super::citations::is_quarto_crossref_key(key);
1704            if is_crossref || config.extensions.citations {
1705                if pos > text_start {
1706                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1707                }
1708                if is_crossref {
1709                    log::debug!("Matched Quarto crossref at pos {}: {}", pos, &key);
1710                    super::citations::emit_crossref(builder, key, has_suppress);
1711                } else {
1712                    log::debug!("Matched bare citation at pos {}: {}", pos, &key);
1713                    emit_bare_citation(builder, key, has_suppress);
1714                }
1715                pos += len;
1716                text_start = pos;
1717                continue;
1718            }
1719        }
1720
1721        // Try suppress-author citation: -@cite
1722        if byte == b'-'
1723            && pos + 1 < text.len()
1724            && text.as_bytes()[pos + 1] == b'@'
1725            && (config.extensions.citations || config.extensions.quarto_crossrefs)
1726            && let Some((len, key, has_suppress)) = try_parse_bare_citation(&text[pos..])
1727        {
1728            let is_crossref =
1729                config.extensions.quarto_crossrefs && super::citations::is_quarto_crossref_key(key);
1730            if is_crossref || config.extensions.citations {
1731                if pos > text_start {
1732                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1733                }
1734                if is_crossref {
1735                    log::debug!("Matched Quarto crossref at pos {}: {}", pos, &key);
1736                    super::citations::emit_crossref(builder, key, has_suppress);
1737                } else {
1738                    log::debug!("Matched suppress-author citation at pos {}: {}", pos, &key);
1739                    emit_bare_citation(builder, key, has_suppress);
1740                }
1741                pos += len;
1742                text_start = pos;
1743                continue;
1744            }
1745        }
1746
1747        // Try to parse emphasis at this position
1748        if byte == b'*' || byte == b'_' {
1749            // Count the delimiter run to avoid re-parsing
1750            let bytes = text.as_bytes();
1751            let mut delim_count = 0;
1752            while pos + delim_count < bytes.len() && bytes[pos + delim_count] == byte {
1753                delim_count += 1;
1754            }
1755
1756            // Emit any accumulated text before the delimiter
1757            if pos > text_start {
1758                log::debug!(
1759                    "Emitting TEXT before delimiter: {:?}",
1760                    &text[text_start..pos]
1761                );
1762                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1763                text_start = pos; // Update text_start after emission
1764            }
1765
1766            // Try to parse emphasis
1767            // Use nested variant (bypass opener validity) when in nested context
1768            let emphasis_result = if nested_emphasis {
1769                try_parse_emphasis_nested(text, pos, end, config, builder)
1770            } else {
1771                try_parse_emphasis(text, pos, end, config, builder)
1772            };
1773
1774            if let Some((consumed, _)) = emphasis_result {
1775                // Successfully parsed emphasis
1776                log::debug!(
1777                    "Parsed emphasis, consumed {} bytes from pos {}",
1778                    consumed,
1779                    pos
1780                );
1781                pos += consumed;
1782                text_start = pos;
1783            } else {
1784                // Failed to parse, delimiter run will be treated as regular text
1785                // Skip the ENTIRE delimiter run to avoid re-parsing parts of it
1786                log::debug!(
1787                    "Failed to parse emphasis at pos {}, skipping {} delimiters as literal",
1788                    pos,
1789                    delim_count
1790                );
1791                pos += delim_count;
1792                // DON'T update text_start - let the delimiters accumulate
1793            }
1794            continue;
1795        }
1796
1797        // Check for newlines - may need to emit as hard line break
1798        if byte == b'\r' && pos + 1 < end && text.as_bytes()[pos + 1] == b'\n' {
1799            let text_before = &text[text_start..pos];
1800
1801            // Check for trailing spaces hard line break (always enabled in Pandoc)
1802            let trailing_spaces = text_before.chars().rev().take_while(|&c| c == ' ').count();
1803            if trailing_spaces >= 2 {
1804                // Emit text before the trailing spaces
1805                let text_content = &text_before[..text_before.len() - trailing_spaces];
1806                if !text_content.is_empty() {
1807                    builder.token(SyntaxKind::TEXT.into(), text_content);
1808                }
1809                let spaces = " ".repeat(trailing_spaces);
1810                builder.token(
1811                    SyntaxKind::HARD_LINE_BREAK.into(),
1812                    &format!("{}\r\n", spaces),
1813                );
1814                pos += 2;
1815                text_start = pos;
1816                continue;
1817            }
1818
1819            // hard_line_breaks: treat all single newlines as hard line breaks
1820            if config.extensions.hard_line_breaks {
1821                if !text_before.is_empty() {
1822                    builder.token(SyntaxKind::TEXT.into(), text_before);
1823                }
1824                builder.token(SyntaxKind::HARD_LINE_BREAK.into(), "\r\n");
1825                pos += 2;
1826                text_start = pos;
1827                continue;
1828            }
1829
1830            // Regular newline
1831            if !text_before.is_empty() {
1832                builder.token(SyntaxKind::TEXT.into(), text_before);
1833            }
1834            builder.token(SyntaxKind::NEWLINE.into(), "\r\n");
1835            pos += 2;
1836            text_start = pos;
1837            continue;
1838        }
1839
1840        if byte == b'\n' {
1841            let text_before = &text[text_start..pos];
1842
1843            // Check for trailing spaces hard line break (always enabled in Pandoc)
1844            let trailing_spaces = text_before.chars().rev().take_while(|&c| c == ' ').count();
1845            if trailing_spaces >= 2 {
1846                // Emit text before the trailing spaces
1847                let text_content = &text_before[..text_before.len() - trailing_spaces];
1848                if !text_content.is_empty() {
1849                    builder.token(SyntaxKind::TEXT.into(), text_content);
1850                }
1851                let spaces = " ".repeat(trailing_spaces);
1852                builder.token(SyntaxKind::HARD_LINE_BREAK.into(), &format!("{}\n", spaces));
1853                pos += 1;
1854                text_start = pos;
1855                continue;
1856            }
1857
1858            // hard_line_breaks: treat all single newlines as hard line breaks
1859            if config.extensions.hard_line_breaks {
1860                if !text_before.is_empty() {
1861                    builder.token(SyntaxKind::TEXT.into(), text_before);
1862                }
1863                builder.token(SyntaxKind::HARD_LINE_BREAK.into(), "\n");
1864                pos += 1;
1865                text_start = pos;
1866                continue;
1867            }
1868
1869            // Regular newline
1870            if !text_before.is_empty() {
1871                builder.token(SyntaxKind::TEXT.into(), text_before);
1872            }
1873            builder.token(SyntaxKind::NEWLINE.into(), "\n");
1874            pos += 1;
1875            text_start = pos;
1876            continue;
1877        }
1878
1879        // Regular character, keep accumulating
1880        pos += 1;
1881    }
1882
1883    // Emit any remaining text
1884    if pos > text_start && text_start < end {
1885        log::debug!("Emitting remaining TEXT: {:?}", &text[text_start..end]);
1886        builder.token(SyntaxKind::TEXT.into(), &text[text_start..end]);
1887    }
1888
1889    log::debug!("parse_inline_range complete: start={}, end={}", start, end);
1890}
1891
1892#[cfg(test)]
1893mod tests {
1894    use super::*;
1895    use crate::syntax::{SyntaxKind, SyntaxNode};
1896    use rowan::GreenNode;
1897
1898    #[test]
1899    fn test_recursive_simple_emphasis() {
1900        let text = "*test*";
1901        let config = Config::default();
1902        let mut builder = GreenNodeBuilder::new();
1903
1904        parse_inline_text_recursive(&mut builder, text, &config);
1905
1906        let green: GreenNode = builder.finish();
1907        let node = SyntaxNode::new_root(green);
1908
1909        // Should be lossless
1910        assert_eq!(node.text().to_string(), text);
1911
1912        // Should have EMPHASIS node
1913        let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1914        assert!(has_emph, "Should have EMPHASIS node");
1915    }
1916
1917    #[test]
1918    fn test_recursive_nested() {
1919        let text = "*foo **bar** baz*";
1920        let config = Config::default();
1921        let mut builder = GreenNodeBuilder::new();
1922
1923        // Wrap in a PARAGRAPH node (inline content needs a parent)
1924        builder.start_node(SyntaxKind::PARAGRAPH.into());
1925        parse_inline_text_recursive(&mut builder, text, &config);
1926        builder.finish_node();
1927
1928        let green: GreenNode = builder.finish();
1929        let node = SyntaxNode::new_root(green);
1930
1931        // Should be lossless
1932        assert_eq!(node.text().to_string(), text);
1933
1934        // Should have both EMPHASIS and STRONG
1935        let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1936        let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
1937
1938        assert!(has_emph, "Should have EMPHASIS node");
1939        assert!(has_strong, "Should have STRONG node");
1940    }
1941
1942    /// Test that we can parse a simple emphasis case
1943    #[test]
1944    fn test_parse_simple_emphasis() {
1945        use crate::config::Config;
1946        use crate::syntax::SyntaxNode;
1947        use rowan::GreenNode;
1948
1949        let text = "*test*";
1950        let config = Config::default();
1951        let mut builder = GreenNodeBuilder::new();
1952
1953        // Try to parse emphasis at position 0
1954        let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
1955
1956        // Should successfully parse
1957        assert_eq!(result, Some((6, 1))); // Consumed all 6 bytes, delimiter count 1
1958
1959        // Check the generated CST
1960        let green: GreenNode = builder.finish();
1961        let node = SyntaxNode::new_root(green);
1962
1963        // The root IS the EMPHASIS node
1964        assert_eq!(node.kind(), SyntaxKind::EMPHASIS);
1965
1966        // Verify losslessness: CST text should match input
1967        assert_eq!(node.text().to_string(), text);
1968    }
1969
1970    /// Test parsing nested emphasis/strong
1971    #[test]
1972    fn test_parse_nested_emphasis_strong() {
1973        use crate::config::Config;
1974
1975        let text = "*foo **bar** baz*";
1976        let config = Config::default();
1977        let mut builder = GreenNodeBuilder::new();
1978
1979        // Parse the whole range
1980        parse_inline_range(text, 0, text.len(), &config, &mut builder);
1981
1982        let green = builder.finish();
1983        let node = crate::syntax::SyntaxNode::new_root(green);
1984
1985        // Verify losslessness
1986        assert_eq!(node.text().to_string(), text);
1987
1988        // Should have EMPHASIS and STRONG nodes
1989        let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1990        let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
1991
1992        assert!(has_emph, "Should have EMPHASIS node");
1993        assert!(has_strong, "Should have STRONG node");
1994    }
1995
1996    /// Test Pandoc's "three" algorithm: ***foo* bar**
1997    /// Expected: Strong[Emph[foo], bar]
1998    /// Current bug: Parses as *Strong[foo* bar]
1999    #[test]
2000    fn test_triple_emphasis_star_then_double_star() {
2001        use crate::config::Config;
2002        use crate::syntax::SyntaxNode;
2003        use rowan::GreenNode;
2004
2005        let text = "***foo* bar**";
2006        let config = Config::default();
2007        let mut builder = GreenNodeBuilder::new();
2008
2009        builder.start_node(SyntaxKind::DOCUMENT.into());
2010        parse_inline_range(text, 0, text.len(), &config, &mut builder);
2011        builder.finish_node();
2012
2013        let green: GreenNode = builder.finish();
2014        let node = SyntaxNode::new_root(green);
2015
2016        // Verify losslessness
2017        assert_eq!(node.text().to_string(), text);
2018
2019        // Expected structure: STRONG > EMPH > "foo"
2020        // The STRONG should contain EMPH, not the other way around
2021        let structure = format!("{:#?}", node);
2022
2023        // Should have both STRONG and EMPH
2024        assert!(structure.contains("STRONG"), "Should have STRONG node");
2025        assert!(structure.contains("EMPHASIS"), "Should have EMPHASIS node");
2026
2027        // STRONG should be outer, EMPH should be inner
2028        // Check that STRONG comes before EMPH in tree traversal
2029        let mut found_strong = false;
2030        let mut found_emph_after_strong = false;
2031        for descendant in node.descendants() {
2032            if descendant.kind() == SyntaxKind::STRONG {
2033                found_strong = true;
2034            }
2035            if found_strong && descendant.kind() == SyntaxKind::EMPHASIS {
2036                found_emph_after_strong = true;
2037                break;
2038            }
2039        }
2040
2041        assert!(
2042            found_emph_after_strong,
2043            "EMPH should be inside STRONG, not before it. Current structure:\n{}",
2044            structure
2045        );
2046    }
2047
2048    /// Test Pandoc's "three" algorithm: ***foo** bar*
2049    /// Expected: Emph[Strong[foo], bar]
2050    #[test]
2051    fn test_triple_emphasis_double_star_then_star() {
2052        use crate::config::Config;
2053        use crate::syntax::SyntaxNode;
2054        use rowan::GreenNode;
2055
2056        let text = "***foo** bar*";
2057        let config = Config::default();
2058        let mut builder = GreenNodeBuilder::new();
2059
2060        builder.start_node(SyntaxKind::DOCUMENT.into());
2061        parse_inline_range(text, 0, text.len(), &config, &mut builder);
2062        builder.finish_node();
2063
2064        let green: GreenNode = builder.finish();
2065        let node = SyntaxNode::new_root(green);
2066
2067        // Verify losslessness
2068        assert_eq!(node.text().to_string(), text);
2069
2070        // Expected structure: EMPH > STRONG > "foo"
2071        let structure = format!("{:#?}", node);
2072
2073        // Should have both EMPH and STRONG
2074        assert!(structure.contains("EMPHASIS"), "Should have EMPHASIS node");
2075        assert!(structure.contains("STRONG"), "Should have STRONG node");
2076
2077        // EMPH should be outer, STRONG should be inner
2078        let mut found_emph = false;
2079        let mut found_strong_after_emph = false;
2080        for descendant in node.descendants() {
2081            if descendant.kind() == SyntaxKind::EMPHASIS {
2082                found_emph = true;
2083            }
2084            if found_emph && descendant.kind() == SyntaxKind::STRONG {
2085                found_strong_after_emph = true;
2086                break;
2087            }
2088        }
2089
2090        assert!(
2091            found_strong_after_emph,
2092            "STRONG should be inside EMPH. Current structure:\n{}",
2093            structure
2094        );
2095    }
2096
2097    /// Test that display math with attributes parses correctly
2098    /// Regression test for equation_attributes_single_line golden test
2099    #[test]
2100    fn test_display_math_with_attributes() {
2101        use crate::config::Config;
2102        use crate::syntax::SyntaxNode;
2103        use rowan::GreenNode;
2104
2105        let text = "$$ E = mc^2 $$ {#eq-einstein}";
2106        let mut config = Config::default();
2107        config.extensions.quarto_crossrefs = true; // Enable Quarto cross-references
2108
2109        let mut builder = GreenNodeBuilder::new();
2110        builder.start_node(SyntaxKind::DOCUMENT.into()); // Need a root node
2111
2112        // Parse the whole text
2113        parse_inline_text_recursive(&mut builder, text, &config);
2114
2115        builder.finish_node(); // Finish ROOT
2116        let green: GreenNode = builder.finish();
2117        let node = SyntaxNode::new_root(green);
2118
2119        // Verify losslessness
2120        assert_eq!(node.text().to_string(), text);
2121
2122        // Should have DISPLAY_MATH node
2123        let has_display_math = node
2124            .descendants()
2125            .any(|n| n.kind() == SyntaxKind::DISPLAY_MATH);
2126        assert!(has_display_math, "Should have DISPLAY_MATH node");
2127
2128        // Should have ATTRIBUTE node
2129        let has_attributes = node
2130            .descendants()
2131            .any(|n| n.kind() == SyntaxKind::ATTRIBUTE);
2132        assert!(
2133            has_attributes,
2134            "Should have ATTRIBUTE node for {{#eq-einstein}}"
2135        );
2136
2137        // Attributes should not be TEXT
2138        let math_followed_by_text = node.descendants().any(|n| {
2139            n.kind() == SyntaxKind::DISPLAY_MATH
2140                && n.next_sibling()
2141                    .map(|s| {
2142                        s.kind() == SyntaxKind::TEXT
2143                            && s.text().to_string().contains("{#eq-einstein}")
2144                    })
2145                    .unwrap_or(false)
2146        });
2147        assert!(
2148            !math_followed_by_text,
2149            "Attributes should not be parsed as TEXT"
2150        );
2151    }
2152}
2153
2154#[test]
2155fn test_two_with_nested_one_and_triple_closer() {
2156    // **bold with *italic***
2157    // Should parse as: Strong["bold with ", Emph["italic"]]
2158    // The *** at end is parsed as * (closes Emph) + ** (closes Strong)
2159
2160    use crate::config::Config;
2161    use crate::syntax::SyntaxNode;
2162    use rowan::GreenNode;
2163
2164    let text = "**bold with *italic***";
2165    let config = Config::default();
2166    let mut builder = GreenNodeBuilder::new();
2167
2168    // parse_inline_range emits inline content directly
2169    parse_inline_range(text, 0, text.len(), &config, &mut builder);
2170
2171    let green: GreenNode = builder.finish();
2172    let node = SyntaxNode::new_root(green);
2173
2174    // Verify lossless parsing
2175    assert_eq!(node.text().to_string(), text, "Should be lossless");
2176
2177    // The root node should be STRONG (parse_inline_range doesn't add wrapper)
2178    assert_eq!(
2179        node.kind(),
2180        SyntaxKind::STRONG,
2181        "Root should be STRONG, got: {:?}",
2182        node.kind()
2183    );
2184
2185    // STRONG should contain EMPHASIS as a nested node
2186    let has_emphasis = node.children().any(|c| c.kind() == SyntaxKind::EMPHASIS);
2187    assert!(has_emphasis, "STRONG should contain EMPHASIS node");
2188}
2189
2190#[test]
2191fn test_emphasis_with_trailing_space_before_closer() {
2192    // *foo * should parse as emphasis (Pandoc behavior)
2193    // For asterisks, Pandoc doesn't require right-flanking for closers
2194
2195    use crate::config::Config;
2196    use crate::syntax::SyntaxNode;
2197    use rowan::GreenNode;
2198
2199    let text = "*foo *";
2200    let config = Config::default();
2201    let mut builder = GreenNodeBuilder::new();
2202
2203    // Try to parse emphasis at position 0
2204    let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
2205
2206    // Should successfully parse (consumed all 6 bytes, delimiter count 1)
2207    assert_eq!(
2208        result,
2209        Some((6, 1)),
2210        "Should parse as emphasis, result: {:?}",
2211        result
2212    );
2213
2214    // Check the generated CST
2215    let green: GreenNode = builder.finish();
2216    let node = SyntaxNode::new_root(green);
2217
2218    // The root IS the EMPHASIS node
2219    assert_eq!(node.kind(), SyntaxKind::EMPHASIS);
2220
2221    // Verify losslessness
2222    assert_eq!(node.text().to_string(), text);
2223}
2224
2225#[test]
2226fn test_triple_emphasis_all_strong_nested() {
2227    // ***foo** bar **baz*** should parse as Emph[Strong[foo], " bar ", Strong[baz]]
2228    // Pandoc output confirms this
2229
2230    use crate::config::Config;
2231    use crate::syntax::SyntaxNode;
2232    use rowan::GreenNode;
2233
2234    let text = "***foo** bar **baz***";
2235    let config = Config::default();
2236    let mut builder = GreenNodeBuilder::new();
2237
2238    parse_inline_range(text, 0, text.len(), &config, &mut builder);
2239
2240    let green: GreenNode = builder.finish();
2241    let node = SyntaxNode::new_root(green);
2242
2243    // Should have one EMPHASIS node at root
2244    let emphasis_nodes: Vec<_> = node
2245        .descendants()
2246        .filter(|n| n.kind() == SyntaxKind::EMPHASIS)
2247        .collect();
2248    assert_eq!(
2249        emphasis_nodes.len(),
2250        1,
2251        "Should have exactly one EMPHASIS node, found: {}",
2252        emphasis_nodes.len()
2253    );
2254
2255    // EMPHASIS should contain two STRONG nodes
2256    let emphasis_node = emphasis_nodes[0].clone();
2257    let strong_in_emphasis: Vec<_> = emphasis_node
2258        .children()
2259        .filter(|n| n.kind() == SyntaxKind::STRONG)
2260        .collect();
2261    assert_eq!(
2262        strong_in_emphasis.len(),
2263        2,
2264        "EMPHASIS should contain two STRONG nodes, found: {}",
2265        strong_in_emphasis.len()
2266    );
2267
2268    // Verify losslessness
2269    assert_eq!(node.text().to_string(), text);
2270}
2271
2272#[test]
2273fn test_triple_emphasis_all_emph_nested() {
2274    // ***foo* bar *baz*** should parse as Strong[Emph[foo], " bar ", Emph[baz]]
2275    // Pandoc output confirms this
2276
2277    use crate::config::Config;
2278    use crate::syntax::SyntaxNode;
2279    use rowan::GreenNode;
2280
2281    let text = "***foo* bar *baz***";
2282    let config = Config::default();
2283    let mut builder = GreenNodeBuilder::new();
2284
2285    parse_inline_range(text, 0, text.len(), &config, &mut builder);
2286
2287    let green: GreenNode = builder.finish();
2288    let node = SyntaxNode::new_root(green);
2289
2290    // Should have one STRONG node at root
2291    let strong_nodes: Vec<_> = node
2292        .descendants()
2293        .filter(|n| n.kind() == SyntaxKind::STRONG)
2294        .collect();
2295    assert_eq!(
2296        strong_nodes.len(),
2297        1,
2298        "Should have exactly one STRONG node, found: {}",
2299        strong_nodes.len()
2300    );
2301
2302    // STRONG should contain two EMPHASIS nodes
2303    let strong_node = strong_nodes[0].clone();
2304    let emph_in_strong: Vec<_> = strong_node
2305        .children()
2306        .filter(|n| n.kind() == SyntaxKind::EMPHASIS)
2307        .collect();
2308    assert_eq!(
2309        emph_in_strong.len(),
2310        2,
2311        "STRONG should contain two EMPHASIS nodes, found: {}",
2312        emph_in_strong.len()
2313    );
2314
2315    // Verify losslessness
2316    assert_eq!(node.text().to_string(), text);
2317}
2318
2319// Multiline emphasis tests
2320#[test]
2321fn test_parse_emphasis_multiline() {
2322    // Per Pandoc spec, emphasis CAN contain newlines (soft breaks)
2323    use crate::config::Config;
2324    use crate::syntax::SyntaxNode;
2325    use rowan::GreenNode;
2326
2327    let text = "*text on\nline two*";
2328    let config = Config::default();
2329    let mut builder = GreenNodeBuilder::new();
2330
2331    let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
2332
2333    // Should successfully parse all bytes
2334    assert_eq!(
2335        result,
2336        Some((text.len(), 1)),
2337        "Emphasis should parse multiline content"
2338    );
2339
2340    // Check the generated CST
2341    let green: GreenNode = builder.finish();
2342    let node = SyntaxNode::new_root(green);
2343
2344    // Should have EMPHASIS node
2345    assert_eq!(node.kind(), SyntaxKind::EMPHASIS);
2346
2347    // Verify losslessness: should preserve the newline
2348    assert_eq!(node.text().to_string(), text);
2349    assert!(
2350        node.text().to_string().contains('\n'),
2351        "Should preserve newline in emphasis content"
2352    );
2353}
2354
2355#[test]
2356fn test_parse_strong_multiline() {
2357    // Per Pandoc spec, strong emphasis CAN contain newlines
2358    use crate::config::Config;
2359    use crate::syntax::SyntaxNode;
2360    use rowan::GreenNode;
2361
2362    let text = "**strong on\nline two**";
2363    let config = Config::default();
2364    let mut builder = GreenNodeBuilder::new();
2365
2366    let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
2367
2368    // Should successfully parse all bytes
2369    assert_eq!(
2370        result,
2371        Some((text.len(), 2)),
2372        "Strong emphasis should parse multiline content"
2373    );
2374
2375    // Check the generated CST
2376    let green: GreenNode = builder.finish();
2377    let node = SyntaxNode::new_root(green);
2378
2379    // Should have STRONG node
2380    assert_eq!(node.kind(), SyntaxKind::STRONG);
2381
2382    // Verify losslessness
2383    assert_eq!(node.text().to_string(), text);
2384    assert!(
2385        node.text().to_string().contains('\n'),
2386        "Should preserve newline in strong content"
2387    );
2388}
2389
2390#[test]
2391fn test_parse_triple_emphasis_multiline() {
2392    // Triple emphasis with newlines
2393    use crate::config::Config;
2394    use crate::syntax::SyntaxNode;
2395    use rowan::GreenNode;
2396
2397    let text = "***both on\nline two***";
2398    let config = Config::default();
2399    let mut builder = GreenNodeBuilder::new();
2400
2401    let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
2402
2403    // Should successfully parse all bytes
2404    assert_eq!(
2405        result,
2406        Some((text.len(), 3)),
2407        "Triple emphasis should parse multiline content"
2408    );
2409
2410    // Check the generated CST
2411    let green: GreenNode = builder.finish();
2412    let node = SyntaxNode::new_root(green);
2413
2414    // Should have STRONG node (triple = strong + emph)
2415    let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
2416    assert!(has_strong, "Should have STRONG node");
2417
2418    // Verify losslessness
2419    assert_eq!(node.text().to_string(), text);
2420    assert!(
2421        node.text().to_string().contains('\n'),
2422        "Should preserve newline in triple emphasis content"
2423    );
2424}
panache_parser/parser/inlines/core.rs

panache_parser/parser/inlines/
core.rs