Skip to main content

panache_parser/parser/inlines/
core.rs

1//! Inline emission walk.
2//!
3//! Consumes the IR plans built by [`super::inline_ir::build_full_plans`]
4//! (emphasis pairings, bracket resolutions, standalone Pandoc constructs)
5//! and emits the inline CST tokens / nodes in source order. Resolution
6//! decisions for emphasis, brackets, and standalone Pandoc constructs
7//! are entirely IR-driven for both dialects; the dispatcher's
8//! `try_parse_*` recognizers are still called to *parse* a matched byte
9//! range into a CST subtree, but "what is this byte range?" is answered
10//! exclusively by the IR.
11
12use super::sink::InlineSink;
13use crate::options::{Dialect, ParserOptions};
14use crate::syntax::SyntaxKind;
15#[cfg(test)]
16use rowan::GreenNodeBuilder;
17
18use super::inline_ir::{
19    BracketPlan, ConstructDispo, ConstructPlan, DelimChar, EmphasisKind, EmphasisPlan,
20};
21
22// Import inline element parsers from sibling modules
23use super::bookdown::{
24    try_parse_bookdown_definition, try_parse_bookdown_reference, try_parse_bookdown_text_reference,
25};
26use super::bracketed_spans::{emit_bracketed_span, try_parse_bracketed_span};
27use super::citations::{
28    emit_bare_citation, emit_bracketed_citation, try_parse_bare_citation,
29    try_parse_bracketed_citation,
30};
31use super::code_spans::{emit_code_span, try_parse_code_span};
32use super::emoji::{emit_emoji, try_parse_emoji};
33use super::escapes::{EscapeType, emit_escape, try_parse_escape};
34use super::inline_executable::{emit_inline_executable, try_parse_inline_executable};
35use super::inline_footnotes::{
36    emit_footnote_reference, emit_inline_footnote, try_parse_footnote_reference,
37    try_parse_inline_footnote,
38};
39use super::inline_html::{emit_inline_html, try_parse_inline_html};
40use super::latex::{parse_latex_command, try_parse_latex_command};
41use super::links::{
42    LinkScanContext, emit_autolink, emit_bare_uri_link, emit_inline_image, emit_inline_link,
43    emit_reference_image, emit_reference_link, emit_unresolved_reference, try_parse_autolink,
44    try_parse_bare_uri, try_parse_inline_image, try_parse_inline_link, try_parse_reference_image,
45    try_parse_reference_link,
46};
47use super::mark::{emit_mark, try_parse_mark};
48use super::math::{
49    emit_display_math, emit_display_math_environment, emit_double_backslash_display_math,
50    emit_double_backslash_inline_math, emit_gfm_inline_math, emit_inline_math,
51    emit_single_backslash_display_math, emit_single_backslash_inline_math, math_opts,
52    try_parse_display_math, try_parse_double_backslash_display_math,
53    try_parse_double_backslash_inline_math, try_parse_gfm_inline_math, try_parse_inline_math,
54    try_parse_math_environment, try_parse_single_backslash_display_math,
55    try_parse_single_backslash_inline_math,
56};
57use super::native_spans::{emit_native_span, try_parse_native_span};
58use super::raw_inline::is_raw_inline;
59use super::shortcodes::{emit_shortcode, try_parse_shortcode};
60use super::strikeout::{emit_strikeout, try_parse_strikeout};
61use super::subscript::{emit_subscript, try_parse_subscript};
62use super::superscript::{emit_superscript, try_parse_superscript};
63
64/// Parse inline text into the CST builder.
65///
66/// Top-level entry point for inline parsing. Builds the IR plans
67/// (emphasis pairings, bracket resolutions, standalone Pandoc constructs)
68/// once via [`super::inline_ir::build_full_plans`], then walks the byte
69/// range left-to-right consulting those plans plus the dispatcher's
70/// ordered-try chain for non-IR-resolved constructs (autolinks, code
71/// spans, escapes, math, etc.). Dialect-specific behavior is selected
72/// inside `build_full_plans`.
73///
74/// # Arguments
75/// * `text` - The inline text to parse
76/// * `config` - Configuration for extensions and formatting
77/// * `builder` - The CST builder to emit nodes to
78/// * `suppress_footnote_refs` - When `true`, `[^id]` bytes are emitted as
79///   literal TEXT instead of `FOOTNOTE_REFERENCE`. Set by block parsers when
80///   the inline content lives inside a reference-style footnote definition
81///   body, where pandoc silently drops nested footnote references.
82pub fn parse_inline_text_recursive(
83    builder: &mut impl InlineSink,
84    text: &str,
85    config: &ParserOptions,
86    suppress_footnote_refs: bool,
87) {
88    log::trace!(
89        "Recursive inline parsing: {:?} ({} bytes)",
90        &text[..text.len().min(40)],
91        text.len()
92    );
93
94    let mask = structural_byte_mask(config);
95    if try_emit_plain_text_fast_path_with_mask(builder, text, &mask) {
96        log::trace!("Recursive inline parsing complete (plain-text fast path)");
97        return;
98    }
99
100    let plans = super::inline_ir::build_full_plans(text, 0, text.len(), config);
101    parse_inline_range_impl(
102        text,
103        0,
104        text.len(),
105        config,
106        builder,
107        false,
108        &plans.emphasis,
109        &plans.brackets,
110        &plans.constructs,
111        false,
112        suppress_footnote_refs,
113        &mask,
114    );
115
116    log::trace!("Recursive inline parsing complete");
117}
118
119/// Parse inline elements from text content nested inside a link/image/span.
120///
121/// Used for recursive inline parsing of link text, image alt, span content, etc.
122/// Suppresses constructs that would create nested links (CommonMark §6.3 forbids
123/// links inside links), notably extended bare-URI autolinks under GFM.
124///
125/// `suppress_inner_links` should be `true` when the recursion is for a
126/// LINK or REFERENCE-LINK's text, where inner link / reference-link
127/// brackets must emit as literal text (pandoc-native:
128/// `[link [inner](u2)](u1)` → outer `Link` with `Str "[inner](u2)"`).
129/// Image alt text and all non-link contexts pass `false`:
130/// pandoc-native verifies `![alt with [inner](u)](u2)` keeps the inner
131/// `Link`, and bracketed spans / native spans / inline footnotes /
132/// emphasis all allow nested links.
133pub fn parse_inline_text(
134    builder: &mut impl InlineSink,
135    text: &str,
136    config: &ParserOptions,
137    suppress_inner_links: bool,
138    suppress_footnote_refs: bool,
139) {
140    log::trace!(
141        "Parsing inline text (nested in link): {:?} ({} bytes)",
142        &text[..text.len().min(40)],
143        text.len()
144    );
145
146    let mask = structural_byte_mask(config);
147    if try_emit_plain_text_fast_path_with_mask(builder, text, &mask) {
148        return;
149    }
150
151    let plans = super::inline_ir::build_full_plans(text, 0, text.len(), config);
152    parse_inline_range_impl(
153        text,
154        0,
155        text.len(),
156        config,
157        builder,
158        true,
159        &plans.emphasis,
160        &plans.brackets,
161        &plans.constructs,
162        suppress_inner_links,
163        suppress_footnote_refs,
164        &mask,
165    );
166}
167
168/// Plain-text fast path for inline ranges with no structural bytes.
169///
170/// Returns `true` if the range was emitted as a single `TEXT` token and
171/// the caller should skip the IR + dispatcher pipeline. Returns `false`
172/// if any structural byte appears (or the range is empty), letting the
173/// caller proceed normally. Empty input returns `false` so the caller's
174/// existing "no events → no output" path is preserved exactly.
175///
176/// The structural byte set is computed from `config.dialect` and
177/// `config.extensions` so prose containing dialect-irrelevant punctuation
178/// (e.g. `-` outside a citation flavor) doesn't unnecessarily disable the
179/// fast path. `\n` and `\r` are always structural — multi-line inline
180/// content must still split into TEXT + NEWLINE tokens like the slow path.
181fn try_emit_plain_text_fast_path_with_mask(
182    builder: &mut impl InlineSink,
183    text: &str,
184    mask: &[bool; 256],
185) -> bool {
186    if text.is_empty() {
187        return false;
188    }
189    for &b in text.as_bytes() {
190        if mask[b as usize] {
191            return false;
192        }
193    }
194    builder.token(SyntaxKind::TEXT.into(), text);
195    true
196}
197
198/// Build a 256-entry byte mask: `mask[b]` is `true` iff byte `b` could
199/// trigger any IR-recognised construct or dispatcher branch under the
200/// current dialect/extensions. Used by the plain-text fast path to scan
201/// inline ranges in a single pass.
202fn structural_byte_mask(config: &ParserOptions) -> [bool; 256] {
203    let mut mask = [false; 256];
204    let exts = &config.extensions;
205    let pandoc = config.dialect == Dialect::Pandoc;
206
207    // Always structural: line breaks (CST splits TEXT/NEWLINE), backslash
208    // (escape / hard break / backslash-math / latex / bookdown ref),
209    // backtick (code span / inline executable), `*`/`_` (emphasis is a
210    // core CommonMark construct, not extension-gated), and `[`/`]` if
211    // any bracket-shaped construct is reachable.
212    mask[b'\n' as usize] = true;
213    mask[b'\r' as usize] = true;
214    mask[b'\\' as usize] = true;
215    mask[b'`' as usize] = true;
216    mask[b'*' as usize] = true;
217    mask[b'_' as usize] = true;
218
219    // Brackets: the IR/dispatcher only acts on `[`/`]` if some
220    // bracket-shaped feature is reachable. `!` is the leading byte of
221    // `![alt]` image brackets — the IR's `BracketPlan` keys image
222    // openers at the `!` position, so the dispatcher must stop here
223    // to consult the plan.
224    if exts.inline_links
225        || exts.reference_links
226        || exts.inline_images
227        || exts.bracketed_spans
228        || exts.footnotes
229        || exts.citations
230    {
231        mask[b'[' as usize] = true;
232        mask[b']' as usize] = true;
233    }
234    if exts.inline_images || exts.reference_links {
235        mask[b'!' as usize] = true;
236    }
237
238    // `<` covers autolinks, raw HTML, and Pandoc native spans.
239    if exts.autolinks || exts.raw_html || exts.native_spans {
240        mask[b'<' as usize] = true;
241    }
242
243    // `^` covers Pandoc inline footnotes (`^[...]`), CM inline footnotes
244    // (when explicitly enabled), and superscript (`^text^`).
245    if exts.inline_footnotes || exts.superscript {
246        mask[b'^' as usize] = true;
247    }
248
249    // `@` and `-` cover Pandoc citation forms (`@cite`, `-@cite`,
250    // `[@cite]`). Under Pandoc dialect, the IR's `ConstructPlan` keys
251    // bare citations at the `@` or `-` position, so the dispatcher
252    // must stop at either to consult the plan. Including `-` is
253    // pessimistic — most prose hyphens won't form `-@` — but missing
254    // it would skip past valid suppress-author citations.
255    if exts.citations || exts.quarto_crossrefs {
256        mask[b'@' as usize] = true;
257        if pandoc {
258            mask[b'-' as usize] = true;
259        }
260    }
261
262    // `$` covers dollar-math and GFM math.
263    if exts.tex_math_dollars || exts.tex_math_gfm {
264        mask[b'$' as usize] = true;
265    }
266
267    // `~` covers subscript and strikeout (both `~text~` and `~~text~~`).
268    if exts.subscript || exts.strikeout {
269        mask[b'~' as usize] = true;
270    }
271
272    if exts.mark {
273        mask[b'=' as usize] = true;
274    }
275    if exts.emoji {
276        mask[b':' as usize] = true;
277    }
278    if exts.bookdown_references {
279        mask[b'(' as usize] = true;
280    }
281    // `{{< ... >}}` shortcodes: the dispatcher tries them on any
282    // `{` regardless of the `quarto_shortcodes` extension flag, so
283    // `{` must always be flagged here.
284    mask[b'{' as usize] = true;
285
286    // Bare-URI autolinks (`http://...` without `<>`) have no
287    // leading-byte gate in the dispatcher — `try_parse_bare_uri`
288    // probes for a URI scheme starting at every byte. Flag all
289    // ASCII alphabetic bytes so the bulk-skip stops on every
290    // potential scheme starter. This effectively disables the
291    // bulk-skip benefit for prose under GFM-style flavors but
292    // preserves correctness; ASCII digits / punctuation / non-ASCII
293    // bytes still skip cleanly.
294    if exts.autolink_bare_uris {
295        for b in b'a'..=b'z' {
296            mask[b as usize] = true;
297        }
298        for b in b'A'..=b'Z' {
299            mask[b as usize] = true;
300        }
301    }
302
303    mask
304}
305
306fn is_emoji_boundary(text: &str, pos: usize) -> bool {
307    if pos > 0 {
308        let prev = text.as_bytes()[pos - 1] as char;
309        if prev.is_ascii_alphanumeric() || prev == '_' {
310            return false;
311        }
312    }
313    true
314}
315
316#[inline]
317fn advance_char_boundary(text: &str, pos: usize, end: usize) -> usize {
318    if pos >= end || pos >= text.len() {
319        return pos;
320    }
321    let ch_len = text[pos..]
322        .chars()
323        .next()
324        .map_or(1, std::primitive::char::len_utf8);
325    (pos + ch_len).min(end)
326}
327
328#[allow(clippy::too_many_arguments)]
329fn parse_inline_range_impl(
330    text: &str,
331    start: usize,
332    end: usize,
333    config: &ParserOptions,
334    builder: &mut impl InlineSink,
335    nested_in_link: bool,
336    plan: &EmphasisPlan,
337    bracket_plan: &BracketPlan,
338    construct_plan: &ConstructPlan,
339    suppress_inner_links: bool,
340    suppress_footnote_refs: bool,
341    mask: &[bool; 256],
342) {
343    log::trace!(
344        "parse_inline_range: start={}, end={}, text={:?}",
345        start,
346        end,
347        &text[start..end]
348    );
349    let mut pos = start;
350    let mut text_start = start;
351    let bytes = text.as_bytes();
352
353    while pos < end {
354        // Bulk-skip plain bytes between structural bytes. Plans
355        // (`construct_plan`, `bracket_plan`, emphasis `plan`) only
356        // resolve at structural byte positions, so skipping here
357        // never elides a real match. `text_start` is preserved
358        // across the skip; the next emitted construct flushes the
359        // accumulated TEXT span.
360        if !mask[bytes[pos] as usize] {
361            let mut next = pos + 1;
362            while next < end && !mask[bytes[next] as usize] {
363                next += 1;
364            }
365            pos = next;
366            if pos >= end {
367                break;
368            }
369        }
370        // IR-driven dispatch: if the IR identified a Pandoc standalone
371        // construct starting here, emit it directly. Bypasses the
372        // dispatcher's ordered-try chain for inline footnotes, native
373        // spans, footnote references, citations, and bracketed spans
374        // under `Dialect::Pandoc`. The IR scan gates these on
375        // `!is_commonmark` and the relevant extension flag, so this
376        // branch is empty under CommonMark dialect (where the legacy
377        // dispatcher branches still run when the extension is enabled).
378        if let Some(dispo) = construct_plan.lookup(pos) {
379            match *dispo {
380                ConstructDispo::InlineFootnote { end: dispo_end } => {
381                    if dispo_end <= end
382                        && let Some((len, content)) = try_parse_inline_footnote(&text[pos..])
383                        && pos + len == dispo_end
384                    {
385                        if pos > text_start {
386                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
387                        }
388                        log::trace!("IR: matched inline footnote at pos {}", pos);
389                        emit_inline_footnote(builder, content, config, suppress_footnote_refs);
390                        pos += len;
391                        text_start = pos;
392                        continue;
393                    }
394                }
395                ConstructDispo::NativeSpan { end: dispo_end } => {
396                    if dispo_end <= end
397                        && let Some((len, content, _attributes)) =
398                            try_parse_native_span(&text[pos..])
399                        && pos + len == dispo_end
400                    {
401                        if pos > text_start {
402                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
403                        }
404                        log::trace!("IR: matched native span at pos {}", pos);
405                        emit_native_span(
406                            builder,
407                            &text[pos..pos + len],
408                            content,
409                            config,
410                            suppress_footnote_refs,
411                        );
412                        pos += len;
413                        text_start = pos;
414                        continue;
415                    }
416                }
417                ConstructDispo::FootnoteReference { end: dispo_end } => {
418                    if !suppress_footnote_refs
419                        && dispo_end <= end
420                        && let Some((len, id)) = try_parse_footnote_reference(&text[pos..])
421                        && pos + len == dispo_end
422                    {
423                        if pos > text_start {
424                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
425                        }
426                        log::trace!("IR: matched footnote reference at pos {}", pos);
427                        emit_footnote_reference(builder, &id);
428                        pos += len;
429                        text_start = pos;
430                        continue;
431                    }
432                }
433                ConstructDispo::BracketedCitation { end: dispo_end } => {
434                    if dispo_end <= end
435                        && let Some((len, content)) = try_parse_bracketed_citation(&text[pos..])
436                        && pos + len == dispo_end
437                    {
438                        if pos > text_start {
439                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
440                        }
441                        log::trace!("IR: matched bracketed citation at pos {}", pos);
442                        emit_bracketed_citation(builder, content);
443                        pos += len;
444                        text_start = pos;
445                        continue;
446                    }
447                }
448                ConstructDispo::BareCitation { end: dispo_end } => {
449                    if dispo_end <= end
450                        && let Some((len, key, has_suppress)) =
451                            try_parse_bare_citation(&text[pos..])
452                        && pos + len == dispo_end
453                    {
454                        let is_crossref = config.extensions.quarto_crossrefs
455                            && super::citations::is_quarto_crossref_key(key);
456                        if is_crossref || config.extensions.citations {
457                            if pos > text_start {
458                                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
459                            }
460                            if is_crossref {
461                                log::trace!("IR: matched Quarto crossref at pos {}: {}", pos, key);
462                                super::citations::emit_crossref(builder, key, has_suppress);
463                            } else {
464                                log::trace!("IR: matched bare citation at pos {}: {}", pos, key);
465                                emit_bare_citation(builder, key, has_suppress);
466                            }
467                            pos += len;
468                            text_start = pos;
469                            continue;
470                        }
471                    }
472                }
473                ConstructDispo::BracketedSpan { end: dispo_end } => {
474                    if dispo_end <= end
475                        && let Some((len, content, attrs)) = try_parse_bracketed_span(&text[pos..])
476                        && pos + len == dispo_end
477                    {
478                        if pos > text_start {
479                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
480                        }
481                        log::trace!("IR: matched bracketed span at pos {}", pos);
482                        emit_bracketed_span(
483                            builder,
484                            &content,
485                            &attrs,
486                            config,
487                            suppress_footnote_refs,
488                        );
489                        pos += len;
490                        text_start = pos;
491                        continue;
492                    }
493                }
494                ConstructDispo::WikiLink { end: dispo_end } => {
495                    if dispo_end <= end
496                        && let Some(span) = super::wikilinks::try_parse_wikilink(text, pos, config)
497                        && span.end == dispo_end
498                    {
499                        if pos > text_start {
500                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
501                        }
502                        log::trace!("IR: matched wikilink at pos {}", pos);
503                        super::wikilinks::emit_wikilink(builder, text, span, config);
504                        pos = span.end;
505                        text_start = pos;
506                        continue;
507                    }
508                }
509            }
510        }
511
512        // IR-driven bracket dispatch: if the IR's `process_brackets`
513        // resolved a bracket pair starting at this position, emit it
514        // directly via the appropriate helper. The
515        // dispatcher's `try_parse_*` recognizers compute the actual
516        // byte length and extract content / attributes; the IR's
517        // `suffix_end` is used to constrain the dispatcher's match
518        // shape so the two pipelines agree on which link variant
519        // resolved (e.g. `[foo][bar]` with `bar` undefined and `foo`
520        // defined: IR resolves `[foo]` as shortcut, but the
521        // dispatcher's `try_parse_reference_link` would otherwise
522        // greedily return the full-ref shape). Suppression of inner
523        // LINK / REFERENCE LINK during LINK-text recursion is applied
524        // here (pandoc-native: outer-wins for nested links).
525        //
526        // Pandoc-extended `{.attrs}` after a link can extend the
527        // dispatcher's match length past the IR's `suffix_end`. The
528        // dispatcher's len is therefore constrained to
529        // `[suffix_end, end]` rather than required to equal
530        // `suffix_end` exactly.
531        // IR-driven dispatch: Pandoc unresolved bracket-shape pattern.
532        // Before emitting the `UNRESOLVED_REFERENCE` wrapper, give the
533        // dispatcher's lenient inline-link / inline-image parsers a
534        // chance to override. The IR's `try_inline_suffix` is stricter
535        // than pandoc-markdown for some destination shapes (URLs with
536        // spaces, titles with embedded quotes, shortcode-style braces);
537        // the dispatcher accepts those and produces a real LINK / IMAGE
538        // node — pandoc-native agrees. Without this override, valid
539        // pandoc links would degrade to `UNRESOLVED_REFERENCE` here.
540        if let Some(super::inline_ir::BracketDispo::UnresolvedReference {
541            is_image,
542            text_start: ref_text_start,
543            text_end: ref_text_end,
544            end: ref_end,
545        }) = bracket_plan.lookup(pos)
546        {
547            let is_image = *is_image;
548            let dispo_suffix_end = *ref_end;
549            let suppress = suppress_inner_links && !is_image;
550            if !suppress {
551                let ctx = LinkScanContext::from_options(config);
552                let is_commonmark = config.dialect == Dialect::CommonMark;
553                if is_image {
554                    if config.extensions.inline_images
555                        && let Some((len, alt_text, dest, attributes)) =
556                            try_parse_inline_image(&text[pos..], ctx)
557                        && pos + len >= dispo_suffix_end
558                        && pos + len <= end
559                    {
560                        if pos > text_start {
561                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
562                        }
563                        log::trace!(
564                            "IR: dispatcher overrode UnresolvedReference with inline image at pos {}",
565                            pos
566                        );
567                        emit_inline_image(
568                            builder,
569                            &text[pos..pos + len],
570                            alt_text,
571                            dest,
572                            attributes,
573                            config,
574                            suppress_footnote_refs,
575                        );
576                        pos += len;
577                        text_start = pos;
578                        continue;
579                    }
580                } else if config.extensions.inline_links
581                    && let Some((len, link_text, dest, attributes)) =
582                        try_parse_inline_link(&text[pos..], is_commonmark, ctx)
583                    && pos + len >= dispo_suffix_end
584                    && pos + len <= end
585                {
586                    if pos > text_start {
587                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
588                    }
589                    log::trace!(
590                        "IR: dispatcher overrode UnresolvedReference with inline link at pos {}",
591                        pos
592                    );
593                    emit_inline_link(
594                        builder,
595                        &text[pos..pos + len],
596                        link_text,
597                        dest,
598                        attributes,
599                        config,
600                        suppress_footnote_refs,
601                    );
602                    pos += len;
603                    text_start = pos;
604                    continue;
605                }
606            }
607
608            // Dispatcher didn't override; emit the wrapper.
609            let inner_text = &text[*ref_text_start..*ref_text_end];
610            let suffix_start = *ref_text_end + 1;
611            let label_suffix = if suffix_start < *ref_end {
612                Some(&text[suffix_start..*ref_end])
613            } else {
614                None
615            };
616            if pos > text_start {
617                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
618            }
619            log::trace!(
620                "IR: unresolved Pandoc reference shape at pos {}..{}",
621                pos,
622                ref_end
623            );
624            emit_unresolved_reference(
625                builder,
626                is_image,
627                inner_text,
628                label_suffix,
629                config,
630                suppress_footnote_refs,
631            );
632            pos = *ref_end;
633            text_start = pos;
634            continue;
635        }
636
637        if let Some(super::inline_ir::BracketDispo::Open {
638            is_image,
639            suffix_end,
640            ..
641        }) = bracket_plan.lookup(pos)
642        {
643            let is_image = *is_image;
644            let dispo_suffix_end = *suffix_end;
645            let suppress = suppress_inner_links && !is_image;
646            if !suppress {
647                let ctx = LinkScanContext::from_options(config);
648                let allow_shortcut = config.extensions.shortcut_reference_links;
649                let is_commonmark = config.dialect == Dialect::CommonMark;
650                if is_image {
651                    if config.extensions.inline_images
652                        && let Some((len, alt_text, dest, attributes)) =
653                            try_parse_inline_image(&text[pos..], ctx)
654                        && pos + len >= dispo_suffix_end
655                        && pos + len <= end
656                    {
657                        if pos > text_start {
658                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
659                        }
660                        log::trace!("IR: matched inline image at pos {}", pos);
661                        emit_inline_image(
662                            builder,
663                            &text[pos..pos + len],
664                            alt_text,
665                            dest,
666                            attributes,
667                            config,
668                            suppress_footnote_refs,
669                        );
670                        pos += len;
671                        text_start = pos;
672                        continue;
673                    }
674                    if config.extensions.reference_links
675                        && let Some((len, alt_text, reference, gap, is_shortcut)) =
676                            try_parse_reference_image(
677                                &text[pos..],
678                                allow_shortcut,
679                                config.extensions.spaced_reference_links,
680                            )
681                        && pos + len == dispo_suffix_end
682                        && pos + len <= end
683                    {
684                        if pos > text_start {
685                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
686                        }
687                        log::trace!("IR: matched reference image at pos {}", pos);
688                        emit_reference_image(
689                            builder,
690                            alt_text,
691                            &reference,
692                            gap,
693                            is_shortcut,
694                            config,
695                            suppress_footnote_refs,
696                        );
697                        pos += len;
698                        text_start = pos;
699                        continue;
700                    }
701                } else {
702                    if config.extensions.inline_links
703                        && let Some((len, link_text, dest, attributes)) =
704                            try_parse_inline_link(&text[pos..], is_commonmark, ctx)
705                        && pos + len >= dispo_suffix_end
706                        && pos + len <= end
707                    {
708                        if pos > text_start {
709                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
710                        }
711                        log::trace!("IR: matched inline link at pos {}", pos);
712                        emit_inline_link(
713                            builder,
714                            &text[pos..pos + len],
715                            link_text,
716                            dest,
717                            attributes,
718                            config,
719                            suppress_footnote_refs,
720                        );
721                        pos += len;
722                        text_start = pos;
723                        continue;
724                    }
725                    if config.extensions.reference_links
726                        && let Some((len, link_text, reference, gap, is_shortcut)) =
727                            try_parse_reference_link(
728                                &text[pos..],
729                                allow_shortcut,
730                                config.extensions.inline_links,
731                                config.extensions.spaced_reference_links,
732                                ctx,
733                            )
734                        && pos + len == dispo_suffix_end
735                        && pos + len <= end
736                    {
737                        if pos > text_start {
738                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
739                        }
740                        log::trace!("IR: matched reference link at pos {}", pos);
741                        emit_reference_link(
742                            builder,
743                            link_text,
744                            &reference,
745                            gap,
746                            is_shortcut,
747                            config,
748                            suppress_footnote_refs,
749                        );
750                        pos += len;
751                        text_start = pos;
752                        continue;
753                    }
754                }
755            }
756        }
757
758        let byte = text.as_bytes()[pos];
759
760        // Backslash math (highest priority if enabled)
761        if byte == b'\\' {
762            // Try double backslash display math first: \\[...\\]
763            if config.extensions.tex_math_double_backslash {
764                if let Some((len, content)) = try_parse_double_backslash_display_math(&text[pos..])
765                {
766                    if pos > text_start {
767                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
768                    }
769                    log::trace!("Matched double backslash display math at pos {}", pos);
770                    emit_double_backslash_display_math(builder, content, math_opts(config));
771                    pos += len;
772                    text_start = pos;
773                    continue;
774                }
775
776                // Try double backslash inline math: \\(...\\)
777                if let Some((len, content)) = try_parse_double_backslash_inline_math(&text[pos..]) {
778                    if pos > text_start {
779                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
780                    }
781                    log::trace!("Matched double backslash inline math at pos {}", pos);
782                    emit_double_backslash_inline_math(builder, content, math_opts(config));
783                    pos += len;
784                    text_start = pos;
785                    continue;
786                }
787            }
788
789            // Try single backslash display math: \[...\]
790            if config.extensions.tex_math_single_backslash {
791                if let Some((len, content)) = try_parse_single_backslash_display_math(&text[pos..])
792                {
793                    if pos > text_start {
794                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
795                    }
796                    log::trace!("Matched single backslash display math at pos {}", pos);
797                    emit_single_backslash_display_math(builder, content, math_opts(config));
798                    pos += len;
799                    text_start = pos;
800                    continue;
801                }
802
803                // Try single backslash inline math: \(...\)
804                if let Some((len, content)) = try_parse_single_backslash_inline_math(&text[pos..]) {
805                    if pos > text_start {
806                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
807                    }
808                    log::trace!("Matched single backslash inline math at pos {}", pos);
809                    emit_single_backslash_inline_math(builder, content, math_opts(config));
810                    pos += len;
811                    text_start = pos;
812                    continue;
813                }
814            }
815
816            // Try math environments \begin{equation}...\end{equation}
817            if config.extensions.raw_tex
818                && let Some((len, begin_marker, content, end_marker)) =
819                    try_parse_math_environment(&text[pos..])
820            {
821                if pos > text_start {
822                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
823                }
824                log::trace!("Matched math environment at pos {}", pos);
825                emit_display_math_environment(
826                    builder,
827                    begin_marker,
828                    content,
829                    end_marker,
830                    math_opts(config),
831                );
832                pos += len;
833                text_start = pos;
834                continue;
835            }
836
837            // Try bookdown reference: \@ref(label)
838            if config.extensions.bookdown_references
839                && let Some((len, label)) = try_parse_bookdown_reference(&text[pos..])
840            {
841                if pos > text_start {
842                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
843                }
844                log::trace!("Matched bookdown reference at pos {}: {}", pos, label);
845                super::citations::emit_bookdown_crossref(builder, label);
846                pos += len;
847                text_start = pos;
848                continue;
849            }
850
851            // Try escapes (after bookdown refs and backslash math)
852            if let Some((len, ch, escape_type)) = try_parse_escape(&text[pos..]) {
853                let escape_enabled = match escape_type {
854                    EscapeType::HardLineBreak => config.extensions.escaped_line_breaks,
855                    EscapeType::NonbreakingSpace => config.extensions.all_symbols_escapable,
856                    EscapeType::Literal => {
857                        // BASE_ESCAPABLE matches Pandoc's markdown_strict /
858                        // original Markdown set, plus `|` and `~` which the
859                        // formatter emits as escapes for pipe-table separators
860                        // and strikethrough delimiters. Recognising those here
861                        // keeps round-trips idempotent in flavors that don't
862                        // enable all_symbols_escapable.
863                        //
864                        // Under CommonMark dialect, the spec (§2.4) explicitly
865                        // allows ANY ASCII punctuation to be backslash-escaped,
866                        // independent of the all_symbols_escapable extension
867                        // (which also widens to whitespace, a Pandoc-only
868                        // construct).
869                        const BASE_ESCAPABLE: &str = "\\`*_{}[]()>#+-.!|~";
870                        BASE_ESCAPABLE.contains(ch)
871                            || config.extensions.all_symbols_escapable
872                            || (config.dialect == crate::Dialect::CommonMark
873                                && ch.is_ascii_punctuation())
874                    }
875                };
876                if !escape_enabled {
877                    // Don't treat as hard line break - skip the escape and continue
878                    // The backslash will be included in the next TEXT token
879                    pos = advance_char_boundary(text, pos, end);
880                    continue;
881                }
882
883                // Emit accumulated text
884                if pos > text_start {
885                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
886                }
887
888                log::trace!("Matched escape at pos {}: \\{}", pos, ch);
889                emit_escape(builder, ch, escape_type);
890                pos += len;
891                text_start = pos;
892                continue;
893            }
894
895            // Try LaTeX commands (after escapes, before shortcodes)
896            if config.extensions.raw_tex
897                && let Some(len) = try_parse_latex_command(&text[pos..])
898            {
899                if pos > text_start {
900                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
901                }
902                log::trace!("Matched LaTeX command at pos {}", pos);
903                parse_latex_command(builder, &text[pos..], len);
904                pos += len;
905                text_start = pos;
906                continue;
907            }
908        }
909
910        // Try Quarto shortcodes: {{< shortcode >}}
911        if byte == b'{'
912            && pos + 1 < text.len()
913            && text.as_bytes()[pos + 1] == b'{'
914            && let Some((len, name, attrs)) = try_parse_shortcode(&text[pos..])
915        {
916            if pos > text_start {
917                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
918            }
919            log::trace!("Matched shortcode at pos {}: {}", pos, &name);
920            emit_shortcode(builder, &name, attrs);
921            pos += len;
922            text_start = pos;
923            continue;
924        }
925
926        // Try inline executable code spans (`... `r expr`` and `... `{r} expr``)
927        if byte == b'`'
928            && let Some(m) = try_parse_inline_executable(
929                &text[pos..],
930                config.extensions.rmarkdown_inline_code,
931                config.extensions.quarto_inline_code,
932            )
933        {
934            if pos > text_start {
935                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
936            }
937            log::trace!("Matched inline executable code at pos {}", pos);
938            emit_inline_executable(builder, &m);
939            pos += m.total_len;
940            text_start = pos;
941            continue;
942        }
943
944        // Try code spans
945        if byte == b'`' {
946            if let Some((len, content, backtick_count, attributes)) =
947                try_parse_code_span(&text[pos..])
948            {
949                // Emit accumulated text
950                if pos > text_start {
951                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
952                }
953
954                log::trace!(
955                    "Matched code span at pos {}: {} backticks",
956                    pos,
957                    backtick_count
958                );
959
960                // Check for raw inline
961                if let Some((ref attrs, raw_attr)) = attributes
962                    && config.extensions.raw_attribute
963                    && let Some(format) = is_raw_inline(attrs)
964                {
965                    use super::raw_inline::emit_raw_inline;
966                    log::trace!("Matched raw inline span at pos {}: format={}", pos, format);
967                    emit_raw_inline(builder, content, backtick_count, raw_attr);
968                } else if !config.extensions.inline_code_attributes && attributes.is_some() {
969                    let code_span_len = backtick_count * 2 + content.len();
970                    emit_code_span(builder, content, backtick_count, None);
971                    pos += code_span_len;
972                    text_start = pos;
973                    continue;
974                } else {
975                    emit_code_span(
976                        builder,
977                        content,
978                        backtick_count,
979                        attributes.as_ref().map(|(_, raw)| *raw),
980                    );
981                }
982
983                pos += len;
984                text_start = pos;
985                continue;
986            }
987
988            // Unmatched backtick run.
989            //
990            // CommonMark (and GFM) treat the whole run as literal text — the
991            // run cannot be re-entered as a shorter opener. Pandoc-markdown
992            // instead lets a longer run shadow a shorter one (e.g.
993            // `` ```foo`` `` parses as `` ` `` + ``<code>foo</code>``), so
994            // for the Pandoc dialect we fall through and advance one byte at
995            // a time, allowing the inner run to be tried on a later iteration.
996            if config.dialect == Dialect::CommonMark {
997                let run_len = text[pos..].bytes().take_while(|&b| b == b'`').count();
998                pos += run_len;
999                continue;
1000            }
1001        }
1002
1003        // Try textual emoji aliases: :smile:
1004        if byte == b':'
1005            && config.extensions.emoji
1006            && is_emoji_boundary(text, pos)
1007            && let Some((len, _alias)) = try_parse_emoji(&text[pos..])
1008        {
1009            if pos > text_start {
1010                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1011            }
1012            log::trace!("Matched emoji at pos {}", pos);
1013            emit_emoji(builder, &text[pos..pos + len]);
1014            pos += len;
1015            text_start = pos;
1016            continue;
1017        }
1018
1019        // Try inline footnotes: ^[note]. Under Pandoc dialect this is
1020        // consumed via the IR's `ConstructPlan` at the top of the loop;
1021        // this dispatcher branch only fires for CommonMark dialect with
1022        // the extension explicitly enabled.
1023        if byte == b'^'
1024            && pos + 1 < text.len()
1025            && text.as_bytes()[pos + 1] == b'['
1026            && config.dialect == Dialect::CommonMark
1027            && config.extensions.inline_footnotes
1028            && let Some((len, content)) = try_parse_inline_footnote(&text[pos..])
1029        {
1030            if pos > text_start {
1031                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1032            }
1033            log::trace!("Matched inline footnote at pos {}", pos);
1034            emit_inline_footnote(builder, content, config, suppress_footnote_refs);
1035            pos += len;
1036            text_start = pos;
1037            continue;
1038        }
1039
1040        // Try superscript: ^text^
1041        if byte == b'^'
1042            && config.extensions.superscript
1043            && let Some((len, content)) = try_parse_superscript(&text[pos..])
1044        {
1045            if pos > text_start {
1046                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1047            }
1048            log::trace!("Matched superscript at pos {}", pos);
1049            emit_superscript(builder, content, config, suppress_footnote_refs);
1050            pos += len;
1051            text_start = pos;
1052            continue;
1053        }
1054
1055        // Try bookdown definition: (\#label) or (ref:label)
1056        if byte == b'(' && config.extensions.bookdown_references {
1057            if let Some((len, label)) = try_parse_bookdown_definition(&text[pos..]) {
1058                if pos > text_start {
1059                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1060                }
1061                log::trace!("Matched bookdown definition at pos {}: {}", pos, label);
1062                builder.token(SyntaxKind::TEXT.into(), &text[pos..pos + len]);
1063                pos += len;
1064                text_start = pos;
1065                continue;
1066            }
1067            if let Some((len, label)) = try_parse_bookdown_text_reference(&text[pos..]) {
1068                if pos > text_start {
1069                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1070                }
1071                log::trace!("Matched bookdown text reference at pos {}: {}", pos, label);
1072                builder.token(SyntaxKind::TEXT.into(), &text[pos..pos + len]);
1073                pos += len;
1074                text_start = pos;
1075                continue;
1076            }
1077        }
1078
1079        // Try strikeout: ~~text~~
1080        // Must run before subscript so `~~text~~` is matched as a single
1081        // Strikeout rather than two empty Subscripts. Subscript falls back
1082        // to consuming `~~` as an empty subscript only when strikeout
1083        // didn't match (e.g. `~~unclosed`).
1084        if byte == b'~'
1085            && config.extensions.strikeout
1086            && let Some((len, content)) = try_parse_strikeout(&text[pos..])
1087        {
1088            if pos > text_start {
1089                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1090            }
1091            log::trace!("Matched strikeout at pos {}", pos);
1092            emit_strikeout(builder, content, config, suppress_footnote_refs);
1093            pos += len;
1094            text_start = pos;
1095            continue;
1096        }
1097
1098        // Try subscript: ~text~ or `~~` as empty subscript when strikeout
1099        // didn't match (matches pandoc: `~~unclosed` → `Subscript [] + text`).
1100        if byte == b'~'
1101            && config.extensions.subscript
1102            && let Some((len, content)) = try_parse_subscript(&text[pos..])
1103        {
1104            if pos > text_start {
1105                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1106            }
1107            log::trace!("Matched subscript at pos {}", pos);
1108            emit_subscript(builder, content, config, suppress_footnote_refs);
1109            pos += len;
1110            text_start = pos;
1111            continue;
1112        }
1113
1114        // Try mark/highlight: ==text==
1115        if byte == b'='
1116            && config.extensions.mark
1117            && let Some((len, content)) = try_parse_mark(&text[pos..])
1118        {
1119            if pos > text_start {
1120                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1121            }
1122            log::trace!("Matched mark at pos {}", pos);
1123            emit_mark(builder, content, config, suppress_footnote_refs);
1124            pos += len;
1125            text_start = pos;
1126            continue;
1127        }
1128
1129        // Try GFM inline math: $`...`$
1130        if byte == b'$'
1131            && config.extensions.tex_math_gfm
1132            && let Some((len, content)) = try_parse_gfm_inline_math(&text[pos..])
1133        {
1134            if pos > text_start {
1135                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1136            }
1137            log::trace!("Matched GFM inline math at pos {}", pos);
1138            emit_gfm_inline_math(builder, content, math_opts(config));
1139            pos += len;
1140            text_start = pos;
1141            continue;
1142        }
1143
1144        // Try math ($...$, $$...$$)
1145        if byte == b'$' && config.extensions.tex_math_dollars {
1146            // Try display math first ($$...$$)
1147            if let Some((len, content)) = try_parse_display_math(&text[pos..]) {
1148                // Emit accumulated text
1149                if pos > text_start {
1150                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1151                }
1152
1153                let dollar_count = text[pos..].chars().take_while(|&c| c == '$').count();
1154                log::trace!(
1155                    "Matched display math at pos {}: {} dollars",
1156                    pos,
1157                    dollar_count
1158                );
1159
1160                // Check for trailing attributes (Quarto cross-reference support).
1161                // The Quarto attribute block sits on the same line as the closing
1162                // `$$`, so scope the lookup to the current line — otherwise
1163                // anything on later lines (e.g. a following `@eq-id` reference)
1164                // makes the segment not end with `}` and the lift no-ops.
1165                let after_math = &text[pos + len..];
1166                let line_end = after_math.find('\n').unwrap_or(after_math.len());
1167                let line_segment = &after_math[..line_end];
1168                let attr_len = if config.extensions.quarto_crossrefs {
1169                    use crate::parser::utils::attributes::try_parse_trailing_attributes;
1170                    if let Some((_attr_block, _)) = try_parse_trailing_attributes(line_segment) {
1171                        let trimmed_after = line_segment.trim_start();
1172                        if let Some(open_brace_pos) = trimmed_after.find('{') {
1173                            let ws_before_brace = line_segment.len() - trimmed_after.len();
1174                            let attr_text_len = trimmed_after[open_brace_pos..]
1175                                .find('}')
1176                                .map(|close| close + 1)
1177                                .unwrap_or(0);
1178                            ws_before_brace + open_brace_pos + attr_text_len
1179                        } else {
1180                            0
1181                        }
1182                    } else {
1183                        0
1184                    }
1185                } else {
1186                    0
1187                };
1188
1189                let total_len = len + attr_len;
1190                emit_display_math(builder, content, dollar_count, math_opts(config));
1191
1192                // Emit attributes if present, structured over the raw source
1193                // bytes (leading whitespace split out as its own token).
1194                if attr_len > 0 {
1195                    use crate::parser::utils::attributes::emit_attribute_node;
1196                    let attr_text = &text[pos + len..pos + total_len];
1197                    let trimmed_after = attr_text.trim_start();
1198                    let ws_len = attr_text.len() - trimmed_after.len();
1199                    if ws_len > 0 {
1200                        builder.token(SyntaxKind::WHITESPACE.into(), &attr_text[..ws_len]);
1201                    }
1202                    emit_attribute_node(builder, trimmed_after);
1203                }
1204
1205                pos += total_len;
1206                text_start = pos;
1207                continue;
1208            }
1209
1210            // Try inline math ($...$)
1211            if let Some((len, content)) = try_parse_inline_math(&text[pos..]) {
1212                // Emit accumulated text
1213                if pos > text_start {
1214                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1215                }
1216
1217                log::trace!("Matched inline math at pos {}", pos);
1218                emit_inline_math(builder, content, math_opts(config));
1219                pos += len;
1220                text_start = pos;
1221                continue;
1222            }
1223
1224            // Neither display nor inline math matched - emit the $ as literal text
1225            // This ensures each $ gets its own TEXT token for CST compatibility
1226            if pos > text_start {
1227                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1228            }
1229            builder.token(SyntaxKind::TEXT.into(), "$");
1230            pos = advance_char_boundary(text, pos, end);
1231            text_start = pos;
1232            continue;
1233        }
1234
1235        // Try autolinks: <url> or <email>
1236        if byte == b'<'
1237            && config.extensions.autolinks
1238            && let Some((len, url)) = try_parse_autolink(
1239                &text[pos..],
1240                config.dialect == crate::options::Dialect::CommonMark,
1241            )
1242        {
1243            if pos > text_start {
1244                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1245            }
1246            log::trace!("Matched autolink at pos {}", pos);
1247            emit_autolink(builder, &text[pos..pos + len], url);
1248            pos += len;
1249            text_start = pos;
1250            continue;
1251        }
1252
1253        if !nested_in_link
1254            && config.extensions.autolink_bare_uris
1255            && let Some((len, url)) = try_parse_bare_uri(&text[pos..])
1256        {
1257            if pos > text_start {
1258                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1259            }
1260            log::trace!("Matched bare URI at pos {}", pos);
1261            emit_bare_uri_link(builder, url, config);
1262            pos += len;
1263            text_start = pos;
1264            continue;
1265        }
1266
1267        // Try native spans: <span>text</span> (after autolink since both
1268        // start with <). Under Pandoc dialect this is consumed via the
1269        // IR's `ConstructPlan` at the top of the loop; this dispatcher
1270        // branch only fires for CommonMark dialect with the extension
1271        // explicitly enabled.
1272        if byte == b'<'
1273            && config.dialect == Dialect::CommonMark
1274            && config.extensions.native_spans
1275            && let Some((len, content, _attributes)) = try_parse_native_span(&text[pos..])
1276        {
1277            if pos > text_start {
1278                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1279            }
1280            log::trace!("Matched native span at pos {}", pos);
1281            emit_native_span(
1282                builder,
1283                &text[pos..pos + len],
1284                content,
1285                config,
1286                suppress_footnote_refs,
1287            );
1288            pos += len;
1289            text_start = pos;
1290            continue;
1291        }
1292
1293        // Try inline raw HTML (CommonMark §6.6 / Pandoc raw_html). Must run
1294        // after autolinks (more specific) and native spans (Pandoc
1295        // <span>…</span> wrapper) since all three start with `<`.
1296        if byte == b'<'
1297            && config.extensions.raw_html
1298            && let Some(len) = try_parse_inline_html(&text[pos..], config.dialect)
1299        {
1300            if pos > text_start {
1301                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1302            }
1303            log::trace!("Matched inline raw HTML at pos {}", pos);
1304            emit_inline_html(builder, &text[pos..pos + len]);
1305            pos += len;
1306            text_start = pos;
1307            continue;
1308        }
1309
1310        // Bracket-starting elements: inline / reference links and
1311        // images are dispatched via the IR-driven arm at the top of
1312        // the loop, gated by the IR's `BracketPlan`. Only dialect-CM-
1313        // specific Pandoc-extension constructs that share the `[...]`
1314        // shape (footnote refs, bracketed citations) need a CM-gated
1315        // dispatcher branch — under Pandoc dialect they're consumed
1316        // via the IR's `ConstructPlan` instead.
1317        if byte == b'['
1318            && config.dialect == Dialect::CommonMark
1319            && config.extensions.footnotes
1320            && !suppress_footnote_refs
1321            && let Some((len, id)) = try_parse_footnote_reference(&text[pos..])
1322        {
1323            if pos > text_start {
1324                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1325            }
1326            log::trace!("Matched footnote reference at pos {}", pos);
1327            emit_footnote_reference(builder, &id);
1328            pos += len;
1329            text_start = pos;
1330            continue;
1331        }
1332        if byte == b'['
1333            && config.dialect == Dialect::CommonMark
1334            && config.extensions.citations
1335            && let Some((len, content)) = try_parse_bracketed_citation(&text[pos..])
1336        {
1337            if pos > text_start {
1338                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1339            }
1340            log::trace!("Matched bracketed citation at pos {}", pos);
1341            emit_bracketed_citation(builder, content);
1342            pos += len;
1343            text_start = pos;
1344            continue;
1345        }
1346
1347        // Try bracketed spans: [text]{.class}. Must come after
1348        // links/citations. Under Pandoc dialect this is consumed via
1349        // the IR's `ConstructPlan` at the top of the loop; this
1350        // dispatcher branch only fires for CommonMark dialect with the
1351        // extension explicitly enabled.
1352        if config.dialect == Dialect::CommonMark
1353            && byte == b'['
1354            && config.extensions.bracketed_spans
1355            && let Some((len, text_content, attrs)) = try_parse_bracketed_span(&text[pos..])
1356        {
1357            if pos > text_start {
1358                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1359            }
1360            log::trace!("Matched bracketed span at pos {}", pos);
1361            emit_bracketed_span(
1362                builder,
1363                &text_content,
1364                &attrs,
1365                config,
1366                suppress_footnote_refs,
1367            );
1368            pos += len;
1369            text_start = pos;
1370            continue;
1371        }
1372
1373        // Try bare citation: @cite (must come after bracketed elements).
1374        // Under Pandoc dialect this is consumed via the IR's
1375        // `ConstructPlan` at the top of the loop; this dispatcher branch
1376        // only fires for CommonMark dialect with the extension
1377        // explicitly enabled.
1378        if config.dialect == Dialect::CommonMark
1379            && byte == b'@'
1380            && (config.extensions.citations || config.extensions.quarto_crossrefs)
1381            && let Some((len, key, has_suppress)) = try_parse_bare_citation(&text[pos..])
1382        {
1383            let is_crossref =
1384                config.extensions.quarto_crossrefs && super::citations::is_quarto_crossref_key(key);
1385            if is_crossref || config.extensions.citations {
1386                if pos > text_start {
1387                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1388                }
1389                if is_crossref {
1390                    log::trace!("Matched Quarto crossref at pos {}: {}", pos, &key);
1391                    super::citations::emit_crossref(builder, key, has_suppress);
1392                } else {
1393                    log::trace!("Matched bare citation at pos {}: {}", pos, &key);
1394                    emit_bare_citation(builder, key, has_suppress);
1395                }
1396                pos += len;
1397                text_start = pos;
1398                continue;
1399            }
1400        }
1401
1402        // Try suppress-author citation: -@cite. Under Pandoc dialect
1403        // this is consumed via the IR's `ConstructPlan` at the top of
1404        // the loop; this dispatcher branch only fires for CommonMark
1405        // dialect with the extension explicitly enabled.
1406        if config.dialect == Dialect::CommonMark
1407            && byte == b'-'
1408            && pos + 1 < text.len()
1409            && text.as_bytes()[pos + 1] == b'@'
1410            && (config.extensions.citations || config.extensions.quarto_crossrefs)
1411            && let Some((len, key, has_suppress)) = try_parse_bare_citation(&text[pos..])
1412        {
1413            let is_crossref =
1414                config.extensions.quarto_crossrefs && super::citations::is_quarto_crossref_key(key);
1415            if is_crossref || config.extensions.citations {
1416                if pos > text_start {
1417                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1418                }
1419                if is_crossref {
1420                    log::trace!("Matched Quarto crossref at pos {}: {}", pos, &key);
1421                    super::citations::emit_crossref(builder, key, has_suppress);
1422                } else {
1423                    log::trace!("Matched suppress-author citation at pos {}: {}", pos, &key);
1424                    emit_bare_citation(builder, key, has_suppress);
1425                }
1426                pos += len;
1427                text_start = pos;
1428                continue;
1429            }
1430        }
1431
1432        // Emphasis emission, plan-driven. The IR's emphasis pass has
1433        // already decided every delimiter byte's disposition (open
1434        // marker, close marker, or unmatched literal); consult the
1435        // plan here instead of re-scanning.
1436        if byte == b'*' || byte == b'_' {
1437            match plan.lookup(pos) {
1438                Some(DelimChar::Open {
1439                    len,
1440                    partner,
1441                    partner_len,
1442                    kind,
1443                }) => {
1444                    if pos > text_start {
1445                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1446                    }
1447                    let len = len as usize;
1448                    let partner_len = partner_len as usize;
1449                    let (wrapper_kind, marker_kind) = match kind {
1450                        EmphasisKind::Strong => (SyntaxKind::STRONG, SyntaxKind::STRONG_MARKER),
1451                        EmphasisKind::Emph => (SyntaxKind::EMPHASIS, SyntaxKind::EMPHASIS_MARKER),
1452                    };
1453                    builder.start_node(wrapper_kind.into());
1454                    builder.token(marker_kind.into(), &text[pos..pos + len]);
1455                    parse_inline_range_impl(
1456                        text,
1457                        pos + len,
1458                        partner,
1459                        config,
1460                        builder,
1461                        nested_in_link,
1462                        plan,
1463                        bracket_plan,
1464                        construct_plan,
1465                        suppress_inner_links,
1466                        suppress_footnote_refs,
1467                        mask,
1468                    );
1469                    builder.token(marker_kind.into(), &text[partner..partner + partner_len]);
1470                    builder.finish_node();
1471                    pos = partner + partner_len;
1472                    text_start = pos;
1473                    continue;
1474                }
1475                Some(DelimChar::Close) => {
1476                    // Defensive: a close should be jumped past by its
1477                    // matching open. If we hit one anyway (e.g. when the
1478                    // outer caller's range starts mid-pair), let it be
1479                    // emitted as part of the surrounding text by simply
1480                    // advancing. text_start stays put so the byte folds
1481                    // into the next TEXT flush.
1482                    pos += 1;
1483                    continue;
1484                }
1485                Some(DelimChar::Literal) | None => {
1486                    // Unmatched delim chars at this position behave as
1487                    // literal text. Don't emit yet — let them coalesce
1488                    // with surrounding plain bytes via the existing
1489                    // text_start flushing so the CST keeps the same TEXT
1490                    // token granularity Pandoc fixtures expect.
1491                    let bytes = text.as_bytes();
1492                    let mut end_pos = pos + 1;
1493                    while end_pos < end && bytes[end_pos] == byte {
1494                        match plan.lookup(end_pos) {
1495                            Some(DelimChar::Literal) | None => end_pos += 1,
1496                            _ => break,
1497                        }
1498                    }
1499                    pos = end_pos;
1500                    continue;
1501                }
1502            }
1503        }
1504
1505        // Check for newlines - may need to emit as hard line break
1506        if byte == b'\r' && pos + 1 < end && text.as_bytes()[pos + 1] == b'\n' {
1507            let text_before = &text[text_start..pos];
1508
1509            // Check for trailing spaces hard line break (always enabled in Pandoc)
1510            let trailing_spaces = text_before.chars().rev().take_while(|&c| c == ' ').count();
1511            if trailing_spaces >= 2 {
1512                // Emit text before the trailing spaces
1513                let text_content = &text_before[..text_before.len() - trailing_spaces];
1514                if !text_content.is_empty() {
1515                    builder.token(SyntaxKind::TEXT.into(), text_content);
1516                }
1517                let spaces = " ".repeat(trailing_spaces);
1518                builder.token(
1519                    SyntaxKind::HARD_LINE_BREAK.into(),
1520                    &format!("{}\r\n", spaces),
1521                );
1522                pos += 2;
1523                text_start = pos;
1524                continue;
1525            }
1526
1527            // hard_line_breaks: treat all single newlines as hard line breaks
1528            if config.extensions.hard_line_breaks {
1529                if !text_before.is_empty() {
1530                    builder.token(SyntaxKind::TEXT.into(), text_before);
1531                }
1532                builder.token(SyntaxKind::HARD_LINE_BREAK.into(), "\r\n");
1533                pos += 2;
1534                text_start = pos;
1535                continue;
1536            }
1537
1538            // Regular newline
1539            if !text_before.is_empty() {
1540                builder.token(SyntaxKind::TEXT.into(), text_before);
1541            }
1542            builder.token(SyntaxKind::NEWLINE.into(), "\r\n");
1543            pos += 2;
1544            text_start = pos;
1545            continue;
1546        }
1547
1548        if byte == b'\n' {
1549            let text_before = &text[text_start..pos];
1550
1551            // Check for trailing spaces hard line break (always enabled in Pandoc)
1552            let trailing_spaces = text_before.chars().rev().take_while(|&c| c == ' ').count();
1553            if trailing_spaces >= 2 {
1554                // Emit text before the trailing spaces
1555                let text_content = &text_before[..text_before.len() - trailing_spaces];
1556                if !text_content.is_empty() {
1557                    builder.token(SyntaxKind::TEXT.into(), text_content);
1558                }
1559                let spaces = " ".repeat(trailing_spaces);
1560                builder.token(SyntaxKind::HARD_LINE_BREAK.into(), &format!("{}\n", spaces));
1561                pos += 1;
1562                text_start = pos;
1563                continue;
1564            }
1565
1566            // hard_line_breaks: treat all single newlines as hard line breaks
1567            if config.extensions.hard_line_breaks {
1568                if !text_before.is_empty() {
1569                    builder.token(SyntaxKind::TEXT.into(), text_before);
1570                }
1571                builder.token(SyntaxKind::HARD_LINE_BREAK.into(), "\n");
1572                pos += 1;
1573                text_start = pos;
1574                continue;
1575            }
1576
1577            // Regular newline
1578            if !text_before.is_empty() {
1579                builder.token(SyntaxKind::TEXT.into(), text_before);
1580            }
1581            builder.token(SyntaxKind::NEWLINE.into(), "\n");
1582            pos += 1;
1583            text_start = pos;
1584            continue;
1585        }
1586
1587        // Regular character, keep accumulating
1588        pos = advance_char_boundary(text, pos, end);
1589    }
1590
1591    // Emit any remaining text
1592    if pos > text_start && text_start < end {
1593        log::trace!("Emitting remaining TEXT: {:?}", &text[text_start..end]);
1594        builder.token(SyntaxKind::TEXT.into(), &text[text_start..end]);
1595    }
1596
1597    log::trace!("parse_inline_range complete: start={}, end={}", start, end);
1598}
1599
1600#[cfg(test)]
1601mod tests {
1602    use super::*;
1603    use crate::syntax::{SyntaxKind, SyntaxNode};
1604    use rowan::GreenNode;
1605
1606    #[test]
1607    fn test_recursive_simple_emphasis() {
1608        let text = "*test*";
1609        let config = ParserOptions::default();
1610        let mut builder = GreenNodeBuilder::new();
1611
1612        parse_inline_text_recursive(&mut builder, text, &config, false);
1613
1614        let green: GreenNode = builder.finish();
1615        let node = SyntaxNode::new_root(green);
1616
1617        // Should be lossless
1618        assert_eq!(node.text().to_string(), text);
1619
1620        // Should have EMPHASIS node
1621        let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1622        assert!(has_emph, "Should have EMPHASIS node");
1623    }
1624
1625    #[test]
1626    fn test_recursive_nested() {
1627        let text = "*foo **bar** baz*";
1628        let config = ParserOptions::default();
1629        let mut builder = GreenNodeBuilder::new();
1630
1631        // Wrap in a PARAGRAPH node (inline content needs a parent)
1632        builder.start_node(SyntaxKind::PARAGRAPH.into());
1633        parse_inline_text_recursive(&mut builder, text, &config, false);
1634        builder.finish_node();
1635
1636        let green: GreenNode = builder.finish();
1637        let node = SyntaxNode::new_root(green);
1638
1639        // Should be lossless
1640        assert_eq!(node.text().to_string(), text);
1641
1642        // Should have both EMPHASIS and STRONG
1643        let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1644        let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
1645
1646        assert!(has_emph, "Should have EMPHASIS node");
1647        assert!(has_strong, "Should have STRONG node");
1648    }
1649
1650    /// Test Pandoc's "three" algorithm: ***foo* bar**
1651    /// Expected: Strong[Emph[foo], bar]
1652    #[test]
1653    fn test_triple_emphasis_star_then_double_star() {
1654        use crate::options::ParserOptions;
1655        use crate::syntax::SyntaxNode;
1656        use rowan::GreenNode;
1657
1658        let text = "***foo* bar**";
1659        let config = ParserOptions::default();
1660        let mut builder = GreenNodeBuilder::new();
1661
1662        builder.start_node(SyntaxKind::DOCUMENT.into());
1663        parse_inline_text_recursive(&mut builder, text, &config, false);
1664        builder.finish_node();
1665
1666        let green: GreenNode = builder.finish();
1667        let node = SyntaxNode::new_root(green);
1668
1669        // Verify losslessness
1670        assert_eq!(node.text().to_string(), text);
1671
1672        // Expected structure: STRONG > EMPH > "foo"
1673        // The STRONG should contain EMPH, not the other way around
1674        let structure = format!("{:#?}", node);
1675
1676        // Should have both STRONG and EMPH
1677        assert!(structure.contains("STRONG"), "Should have STRONG node");
1678        assert!(structure.contains("EMPHASIS"), "Should have EMPHASIS node");
1679
1680        // STRONG should be outer, EMPH should be inner
1681        // Check that STRONG comes before EMPH in tree traversal
1682        let mut found_strong = false;
1683        let mut found_emph_after_strong = false;
1684        for descendant in node.descendants() {
1685            if descendant.kind() == SyntaxKind::STRONG {
1686                found_strong = true;
1687            }
1688            if found_strong && descendant.kind() == SyntaxKind::EMPHASIS {
1689                found_emph_after_strong = true;
1690                break;
1691            }
1692        }
1693
1694        assert!(
1695            found_emph_after_strong,
1696            "EMPH should be inside STRONG, not before it. Current structure:\n{}",
1697            structure
1698        );
1699    }
1700
1701    /// Test Pandoc's "three" algorithm: ***foo** bar*
1702    /// Expected: Emph[Strong[foo], bar]
1703    #[test]
1704    fn test_triple_emphasis_double_star_then_star() {
1705        use crate::options::ParserOptions;
1706        use crate::syntax::SyntaxNode;
1707        use rowan::GreenNode;
1708
1709        let text = "***foo** bar*";
1710        let config = ParserOptions::default();
1711        let mut builder = GreenNodeBuilder::new();
1712
1713        builder.start_node(SyntaxKind::DOCUMENT.into());
1714        parse_inline_text_recursive(&mut builder, text, &config, false);
1715        builder.finish_node();
1716
1717        let green: GreenNode = builder.finish();
1718        let node = SyntaxNode::new_root(green);
1719
1720        // Verify losslessness
1721        assert_eq!(node.text().to_string(), text);
1722
1723        // Expected structure: EMPH > STRONG > "foo"
1724        let structure = format!("{:#?}", node);
1725
1726        // Should have both EMPH and STRONG
1727        assert!(structure.contains("EMPHASIS"), "Should have EMPHASIS node");
1728        assert!(structure.contains("STRONG"), "Should have STRONG node");
1729
1730        // EMPH should be outer, STRONG should be inner
1731        let mut found_emph = false;
1732        let mut found_strong_after_emph = false;
1733        for descendant in node.descendants() {
1734            if descendant.kind() == SyntaxKind::EMPHASIS {
1735                found_emph = true;
1736            }
1737            if found_emph && descendant.kind() == SyntaxKind::STRONG {
1738                found_strong_after_emph = true;
1739                break;
1740            }
1741        }
1742
1743        assert!(
1744            found_strong_after_emph,
1745            "STRONG should be inside EMPH. Current structure:\n{}",
1746            structure
1747        );
1748    }
1749
1750    /// Test that display math with attributes parses correctly
1751    /// Regression test for equation_attributes_single_line golden test
1752    #[test]
1753    fn test_display_math_with_attributes() {
1754        use crate::options::ParserOptions;
1755        use crate::syntax::SyntaxNode;
1756        use rowan::GreenNode;
1757
1758        let text = "$$ E = mc^2 $$ {#eq-einstein}";
1759        let mut config = ParserOptions::default();
1760        config.extensions.quarto_crossrefs = true; // Enable Quarto cross-references
1761
1762        let mut builder = GreenNodeBuilder::new();
1763        builder.start_node(SyntaxKind::DOCUMENT.into()); // Need a root node
1764
1765        // Parse the whole text
1766        parse_inline_text_recursive(&mut builder, text, &config, false);
1767
1768        builder.finish_node(); // Finish ROOT
1769        let green: GreenNode = builder.finish();
1770        let node = SyntaxNode::new_root(green);
1771
1772        // Verify losslessness
1773        assert_eq!(node.text().to_string(), text);
1774
1775        // Should have DISPLAY_MATH node
1776        let has_display_math = node
1777            .descendants()
1778            .any(|n| n.kind() == SyntaxKind::DISPLAY_MATH);
1779        assert!(has_display_math, "Should have DISPLAY_MATH node");
1780
1781        // Should have ATTRIBUTE node
1782        let has_attributes = node
1783            .descendants()
1784            .any(|n| n.kind() == SyntaxKind::ATTRIBUTE);
1785        assert!(
1786            has_attributes,
1787            "Should have ATTRIBUTE node for {{#eq-einstein}}"
1788        );
1789
1790        // Attributes should not be TEXT
1791        let math_followed_by_text = node.descendants().any(|n| {
1792            n.kind() == SyntaxKind::DISPLAY_MATH
1793                && n.next_sibling()
1794                    .map(|s| {
1795                        s.kind() == SyntaxKind::TEXT
1796                            && s.text().to_string().contains("{#eq-einstein}")
1797                    })
1798                    .unwrap_or(false)
1799        });
1800        assert!(
1801            !math_followed_by_text,
1802            "Attributes should not be parsed as TEXT"
1803        );
1804    }
1805
1806    #[test]
1807    fn test_parse_inline_text_gfm_inline_link_destination_not_autolinked() {
1808        use crate::options::{Dialect, Extensions, Flavor};
1809
1810        let config = ParserOptions {
1811            flavor: Flavor::Gfm,
1812            dialect: Dialect::for_flavor(Flavor::Gfm),
1813            extensions: Extensions::for_flavor(Flavor::Gfm),
1814            ..ParserOptions::default()
1815        };
1816
1817        let mut builder = GreenNodeBuilder::new();
1818        builder.start_node(SyntaxKind::PARAGRAPH.into());
1819        parse_inline_text_recursive(
1820            &mut builder,
1821            "Second Link [link_text](https://link.com)",
1822            &config,
1823            false,
1824        );
1825        builder.finish_node();
1826        let green = builder.finish();
1827        let root = SyntaxNode::new_root(green);
1828
1829        let links: Vec<_> = root
1830            .descendants()
1831            .filter(|n| n.kind() == SyntaxKind::LINK)
1832            .collect();
1833        assert_eq!(
1834            links.len(),
1835            1,
1836            "Expected exactly one LINK node for inline link, not nested bare URI autolink"
1837        );
1838
1839        let link = links[0].clone();
1840        let mut link_text = None::<String>;
1841        let mut link_dest = None::<String>;
1842
1843        for child in link.children() {
1844            match child.kind() {
1845                SyntaxKind::LINK_TEXT => link_text = Some(child.text().to_string()),
1846                SyntaxKind::LINK_DEST => link_dest = Some(child.text().to_string()),
1847                _ => {}
1848            }
1849        }
1850
1851        assert_eq!(link_text.as_deref(), Some("link_text"));
1852        assert_eq!(link_dest.as_deref(), Some("https://link.com"));
1853    }
1854
1855    #[test]
1856    fn test_autolink_bare_uri_utf8_boundary_safe() {
1857        let text = "§";
1858        let mut config = ParserOptions::default();
1859        config.extensions.autolink_bare_uris = true;
1860        let mut builder = GreenNodeBuilder::new();
1861
1862        builder.start_node(SyntaxKind::DOCUMENT.into());
1863        parse_inline_text_recursive(&mut builder, text, &config, false);
1864        builder.finish_node();
1865
1866        let green: GreenNode = builder.finish();
1867        let node = SyntaxNode::new_root(green);
1868        assert_eq!(node.text().to_string(), text);
1869    }
1870
1871    #[test]
1872    fn test_parse_emphasis_unicode_content_no_panic() {
1873        let text = "*§*";
1874        let config = ParserOptions::default();
1875        let mut builder = GreenNodeBuilder::new();
1876
1877        builder.start_node(SyntaxKind::PARAGRAPH.into());
1878        parse_inline_text_recursive(&mut builder, text, &config, false);
1879        builder.finish_node();
1880
1881        let green: GreenNode = builder.finish();
1882        let node = SyntaxNode::new_root(green);
1883        let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1884        assert!(has_emph, "Should have EMPHASIS node");
1885        assert_eq!(node.text().to_string(), text);
1886    }
1887}
1888
1889#[test]
1890fn test_two_with_nested_one_and_triple_closer() {
1891    // **bold with *italic***
1892    // Should parse as: Strong["bold with ", Emph["italic"]]
1893    // The *** at end is parsed as * (closes Emph) + ** (closes Strong)
1894
1895    use crate::options::ParserOptions;
1896    use crate::syntax::SyntaxNode;
1897    use rowan::GreenNode;
1898
1899    let text = "**bold with *italic***";
1900    let config = ParserOptions::default();
1901    let mut builder = GreenNodeBuilder::new();
1902
1903    builder.start_node(SyntaxKind::PARAGRAPH.into());
1904    parse_inline_text_recursive(&mut builder, text, &config, false);
1905    builder.finish_node();
1906
1907    let green: GreenNode = builder.finish();
1908    let node = SyntaxNode::new_root(green);
1909
1910    assert_eq!(node.text().to_string(), text, "Should be lossless");
1911
1912    let strong_nodes: Vec<_> = node
1913        .descendants()
1914        .filter(|n| n.kind() == SyntaxKind::STRONG)
1915        .collect();
1916    assert_eq!(strong_nodes.len(), 1, "Should have exactly one STRONG node");
1917    let has_emphasis_in_strong = strong_nodes[0]
1918        .descendants()
1919        .any(|n| n.kind() == SyntaxKind::EMPHASIS);
1920    assert!(
1921        has_emphasis_in_strong,
1922        "STRONG should contain EMPHASIS node"
1923    );
1924}
1925
1926#[test]
1927fn test_emphasis_with_trailing_space_before_closer() {
1928    // *foo * should parse as emphasis (Pandoc behavior)
1929    // For asterisks, Pandoc doesn't require right-flanking for closers
1930
1931    use crate::options::ParserOptions;
1932    use crate::syntax::SyntaxNode;
1933    use rowan::GreenNode;
1934
1935    let text = "*foo *";
1936    let config = ParserOptions::default();
1937    let mut builder = GreenNodeBuilder::new();
1938
1939    builder.start_node(SyntaxKind::PARAGRAPH.into());
1940    parse_inline_text_recursive(&mut builder, text, &config, false);
1941    builder.finish_node();
1942
1943    let green: GreenNode = builder.finish();
1944    let node = SyntaxNode::new_root(green);
1945
1946    let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1947    assert!(has_emph, "Should have EMPHASIS node");
1948    assert_eq!(node.text().to_string(), text);
1949}
1950
1951#[test]
1952fn test_triple_emphasis_all_strong_nested() {
1953    // ***foo** bar **baz*** should parse as Emph[Strong[foo], " bar ", Strong[baz]]
1954    // Pandoc output confirms this
1955
1956    use crate::options::ParserOptions;
1957    use crate::syntax::SyntaxNode;
1958    use rowan::GreenNode;
1959
1960    let text = "***foo** bar **baz***";
1961    let config = ParserOptions::default();
1962    let mut builder = GreenNodeBuilder::new();
1963
1964    builder.start_node(SyntaxKind::DOCUMENT.into());
1965    parse_inline_text_recursive(&mut builder, text, &config, false);
1966    builder.finish_node();
1967
1968    let green: GreenNode = builder.finish();
1969    let node = SyntaxNode::new_root(green);
1970
1971    // Should have one EMPHASIS node at root
1972    let emphasis_nodes: Vec<_> = node
1973        .descendants()
1974        .filter(|n| n.kind() == SyntaxKind::EMPHASIS)
1975        .collect();
1976    assert_eq!(
1977        emphasis_nodes.len(),
1978        1,
1979        "Should have exactly one EMPHASIS node, found: {}",
1980        emphasis_nodes.len()
1981    );
1982
1983    // EMPHASIS should contain two STRONG nodes
1984    let emphasis_node = emphasis_nodes[0].clone();
1985    let strong_in_emphasis: Vec<_> = emphasis_node
1986        .children()
1987        .filter(|n| n.kind() == SyntaxKind::STRONG)
1988        .collect();
1989    assert_eq!(
1990        strong_in_emphasis.len(),
1991        2,
1992        "EMPHASIS should contain two STRONG nodes, found: {}",
1993        strong_in_emphasis.len()
1994    );
1995
1996    // Verify losslessness
1997    assert_eq!(node.text().to_string(), text);
1998}
1999
2000#[test]
2001fn test_triple_emphasis_all_emph_nested() {
2002    // ***foo* bar *baz*** should parse as Strong[Emph[foo], " bar ", Emph[baz]]
2003    // Pandoc output confirms this
2004
2005    use crate::options::ParserOptions;
2006    use crate::syntax::SyntaxNode;
2007    use rowan::GreenNode;
2008
2009    let text = "***foo* bar *baz***";
2010    let config = ParserOptions::default();
2011    let mut builder = GreenNodeBuilder::new();
2012
2013    builder.start_node(SyntaxKind::DOCUMENT.into());
2014    parse_inline_text_recursive(&mut builder, text, &config, false);
2015    builder.finish_node();
2016
2017    let green: GreenNode = builder.finish();
2018    let node = SyntaxNode::new_root(green);
2019
2020    // Should have one STRONG node at root
2021    let strong_nodes: Vec<_> = node
2022        .descendants()
2023        .filter(|n| n.kind() == SyntaxKind::STRONG)
2024        .collect();
2025    assert_eq!(
2026        strong_nodes.len(),
2027        1,
2028        "Should have exactly one STRONG node, found: {}",
2029        strong_nodes.len()
2030    );
2031
2032    // STRONG should contain two EMPHASIS nodes
2033    let strong_node = strong_nodes[0].clone();
2034    let emph_in_strong: Vec<_> = strong_node
2035        .children()
2036        .filter(|n| n.kind() == SyntaxKind::EMPHASIS)
2037        .collect();
2038    assert_eq!(
2039        emph_in_strong.len(),
2040        2,
2041        "STRONG should contain two EMPHASIS nodes, found: {}",
2042        emph_in_strong.len()
2043    );
2044
2045    // Verify losslessness
2046    assert_eq!(node.text().to_string(), text);
2047}
2048
2049// Multiline emphasis tests
2050#[test]
2051fn test_parse_emphasis_multiline() {
2052    // Per Pandoc spec, emphasis CAN contain newlines (soft breaks)
2053    use crate::options::ParserOptions;
2054    use crate::syntax::SyntaxNode;
2055    use rowan::GreenNode;
2056
2057    let text = "*text on\nline two*";
2058    let config = ParserOptions::default();
2059    let mut builder = GreenNodeBuilder::new();
2060
2061    builder.start_node(SyntaxKind::PARAGRAPH.into());
2062    parse_inline_text_recursive(&mut builder, text, &config, false);
2063    builder.finish_node();
2064
2065    let green: GreenNode = builder.finish();
2066    let node = SyntaxNode::new_root(green);
2067
2068    let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
2069    assert!(has_emph, "Should have EMPHASIS node");
2070
2071    assert_eq!(node.text().to_string(), text);
2072    assert!(
2073        node.text().to_string().contains('\n'),
2074        "Should preserve newline in emphasis content"
2075    );
2076}
2077
2078#[test]
2079fn test_parse_strong_multiline() {
2080    // Per Pandoc spec, strong emphasis CAN contain newlines
2081    use crate::options::ParserOptions;
2082    use crate::syntax::SyntaxNode;
2083    use rowan::GreenNode;
2084
2085    let text = "**strong on\nline two**";
2086    let config = ParserOptions::default();
2087    let mut builder = GreenNodeBuilder::new();
2088
2089    builder.start_node(SyntaxKind::PARAGRAPH.into());
2090    parse_inline_text_recursive(&mut builder, text, &config, false);
2091    builder.finish_node();
2092
2093    let green: GreenNode = builder.finish();
2094    let node = SyntaxNode::new_root(green);
2095
2096    let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
2097    assert!(has_strong, "Should have STRONG node");
2098
2099    assert_eq!(node.text().to_string(), text);
2100    assert!(
2101        node.text().to_string().contains('\n'),
2102        "Should preserve newline in strong content"
2103    );
2104}
2105
2106#[test]
2107fn test_parse_triple_emphasis_multiline() {
2108    // Triple emphasis with newlines
2109    use crate::options::ParserOptions;
2110    use crate::syntax::SyntaxNode;
2111    use rowan::GreenNode;
2112
2113    let text = "***both on\nline two***";
2114    let config = ParserOptions::default();
2115    let mut builder = GreenNodeBuilder::new();
2116
2117    builder.start_node(SyntaxKind::PARAGRAPH.into());
2118    parse_inline_text_recursive(&mut builder, text, &config, false);
2119    builder.finish_node();
2120
2121    let green: GreenNode = builder.finish();
2122    let node = SyntaxNode::new_root(green);
2123
2124    // Should have STRONG node (triple = strong + emph)
2125    let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
2126    assert!(has_strong, "Should have STRONG node");
2127
2128    assert_eq!(node.text().to_string(), text);
2129    assert!(
2130        node.text().to_string().contains('\n'),
2131        "Should preserve newline in triple emphasis content"
2132    );
2133}