Skip to main content

panache_parser/parser/inlines/
core.rs

1//! Inline emission walk.
2//!
3//! Consumes the IR plans built by [`super::inline_ir::build_full_plans`]
4//! (emphasis pairings, bracket resolutions, standalone Pandoc constructs)
5//! and emits the inline CST tokens / nodes in source order. Resolution
6//! decisions for emphasis, brackets, and standalone Pandoc constructs
7//! are entirely IR-driven for both dialects; the dispatcher's
8//! `try_parse_*` recognizers are still called to *parse* a matched byte
9//! range into a CST subtree, but "what is this byte range?" is answered
10//! exclusively by the IR.
11
12use crate::options::{Dialect, ParserOptions};
13use crate::syntax::SyntaxKind;
14use rowan::GreenNodeBuilder;
15
16use super::inline_ir::{
17    BracketPlan, ConstructDispo, ConstructPlan, DelimChar, EmphasisKind, EmphasisPlan,
18};
19
20// Import inline element parsers from sibling modules
21use super::bookdown::{
22    try_parse_bookdown_definition, try_parse_bookdown_reference, try_parse_bookdown_text_reference,
23};
24use super::bracketed_spans::{emit_bracketed_span, try_parse_bracketed_span};
25use super::citations::{
26    emit_bare_citation, emit_bracketed_citation, try_parse_bare_citation,
27    try_parse_bracketed_citation,
28};
29use super::code_spans::{emit_code_span, try_parse_code_span};
30use super::emoji::{emit_emoji, try_parse_emoji};
31use super::escapes::{EscapeType, emit_escape, try_parse_escape};
32use super::inline_executable::{emit_inline_executable, try_parse_inline_executable};
33use super::inline_footnotes::{
34    emit_footnote_reference, emit_inline_footnote, try_parse_footnote_reference,
35    try_parse_inline_footnote,
36};
37use super::inline_html::{emit_inline_html, try_parse_inline_html};
38use super::latex::{parse_latex_command, try_parse_latex_command};
39use super::links::{
40    LinkScanContext, emit_autolink, emit_bare_uri_link, emit_inline_image, emit_inline_link,
41    emit_reference_image, emit_reference_link, emit_unresolved_reference, try_parse_autolink,
42    try_parse_bare_uri, try_parse_inline_image, try_parse_inline_link, try_parse_reference_image,
43    try_parse_reference_link,
44};
45use super::mark::{emit_mark, try_parse_mark};
46use super::math::{
47    emit_display_math, emit_display_math_environment, emit_double_backslash_display_math,
48    emit_double_backslash_inline_math, emit_gfm_inline_math, emit_inline_math,
49    emit_single_backslash_display_math, emit_single_backslash_inline_math, math_opts,
50    try_parse_display_math, try_parse_double_backslash_display_math,
51    try_parse_double_backslash_inline_math, try_parse_gfm_inline_math, try_parse_inline_math,
52    try_parse_math_environment, try_parse_single_backslash_display_math,
53    try_parse_single_backslash_inline_math,
54};
55use super::native_spans::{emit_native_span, try_parse_native_span};
56use super::raw_inline::is_raw_inline;
57use super::shortcodes::{emit_shortcode, try_parse_shortcode};
58use super::strikeout::{emit_strikeout, try_parse_strikeout};
59use super::subscript::{emit_subscript, try_parse_subscript};
60use super::superscript::{emit_superscript, try_parse_superscript};
61
62/// Parse inline text into the CST builder.
63///
64/// Top-level entry point for inline parsing. Builds the IR plans
65/// (emphasis pairings, bracket resolutions, standalone Pandoc constructs)
66/// once via [`super::inline_ir::build_full_plans`], then walks the byte
67/// range left-to-right consulting those plans plus the dispatcher's
68/// ordered-try chain for non-IR-resolved constructs (autolinks, code
69/// spans, escapes, math, etc.). Dialect-specific behavior is selected
70/// inside `build_full_plans`.
71///
72/// # Arguments
73/// * `text` - The inline text to parse
74/// * `config` - Configuration for extensions and formatting
75/// * `builder` - The CST builder to emit nodes to
76/// * `suppress_footnote_refs` - When `true`, `[^id]` bytes are emitted as
77///   literal TEXT instead of `FOOTNOTE_REFERENCE`. Set by block parsers when
78///   the inline content lives inside a reference-style footnote definition
79///   body, where pandoc silently drops nested footnote references.
80pub fn parse_inline_text_recursive(
81    builder: &mut GreenNodeBuilder,
82    text: &str,
83    config: &ParserOptions,
84    suppress_footnote_refs: bool,
85) {
86    log::trace!(
87        "Recursive inline parsing: {:?} ({} bytes)",
88        &text[..text.len().min(40)],
89        text.len()
90    );
91
92    let mask = structural_byte_mask(config);
93    if try_emit_plain_text_fast_path_with_mask(builder, text, &mask) {
94        log::trace!("Recursive inline parsing complete (plain-text fast path)");
95        return;
96    }
97
98    let plans = super::inline_ir::build_full_plans(text, 0, text.len(), config);
99    parse_inline_range_impl(
100        text,
101        0,
102        text.len(),
103        config,
104        builder,
105        false,
106        &plans.emphasis,
107        &plans.brackets,
108        &plans.constructs,
109        false,
110        suppress_footnote_refs,
111        &mask,
112    );
113
114    log::trace!("Recursive inline parsing complete");
115}
116
117/// Parse inline elements from text content nested inside a link/image/span.
118///
119/// Used for recursive inline parsing of link text, image alt, span content, etc.
120/// Suppresses constructs that would create nested links (CommonMark §6.3 forbids
121/// links inside links), notably extended bare-URI autolinks under GFM.
122///
123/// `suppress_inner_links` should be `true` when the recursion is for a
124/// LINK or REFERENCE-LINK's text, where inner link / reference-link
125/// brackets must emit as literal text (pandoc-native:
126/// `[link [inner](u2)](u1)` → outer `Link` with `Str "[inner](u2)"`).
127/// Image alt text and all non-link contexts pass `false`:
128/// pandoc-native verifies `![alt with [inner](u)](u2)` keeps the inner
129/// `Link`, and bracketed spans / native spans / inline footnotes /
130/// emphasis all allow nested links.
131pub fn parse_inline_text(
132    builder: &mut GreenNodeBuilder,
133    text: &str,
134    config: &ParserOptions,
135    suppress_inner_links: bool,
136    suppress_footnote_refs: bool,
137) {
138    log::trace!(
139        "Parsing inline text (nested in link): {:?} ({} bytes)",
140        &text[..text.len().min(40)],
141        text.len()
142    );
143
144    let mask = structural_byte_mask(config);
145    if try_emit_plain_text_fast_path_with_mask(builder, text, &mask) {
146        return;
147    }
148
149    let plans = super::inline_ir::build_full_plans(text, 0, text.len(), config);
150    parse_inline_range_impl(
151        text,
152        0,
153        text.len(),
154        config,
155        builder,
156        true,
157        &plans.emphasis,
158        &plans.brackets,
159        &plans.constructs,
160        suppress_inner_links,
161        suppress_footnote_refs,
162        &mask,
163    );
164}
165
166/// Plain-text fast path for inline ranges with no structural bytes.
167///
168/// Returns `true` if the range was emitted as a single `TEXT` token and
169/// the caller should skip the IR + dispatcher pipeline. Returns `false`
170/// if any structural byte appears (or the range is empty), letting the
171/// caller proceed normally. Empty input returns `false` so the caller's
172/// existing "no events → no output" path is preserved exactly.
173///
174/// The structural byte set is computed from `config.dialect` and
175/// `config.extensions` so prose containing dialect-irrelevant punctuation
176/// (e.g. `-` outside a citation flavor) doesn't unnecessarily disable the
177/// fast path. `\n` and `\r` are always structural — multi-line inline
178/// content must still split into TEXT + NEWLINE tokens like the slow path.
179fn try_emit_plain_text_fast_path_with_mask(
180    builder: &mut GreenNodeBuilder,
181    text: &str,
182    mask: &[bool; 256],
183) -> bool {
184    if text.is_empty() {
185        return false;
186    }
187    for &b in text.as_bytes() {
188        if mask[b as usize] {
189            return false;
190        }
191    }
192    builder.token(SyntaxKind::TEXT.into(), text);
193    true
194}
195
196/// Build a 256-entry byte mask: `mask[b]` is `true` iff byte `b` could
197/// trigger any IR-recognised construct or dispatcher branch under the
198/// current dialect/extensions. Used by the plain-text fast path to scan
199/// inline ranges in a single pass.
200fn structural_byte_mask(config: &ParserOptions) -> [bool; 256] {
201    let mut mask = [false; 256];
202    let exts = &config.extensions;
203    let pandoc = config.dialect == Dialect::Pandoc;
204
205    // Always structural: line breaks (CST splits TEXT/NEWLINE), backslash
206    // (escape / hard break / backslash-math / latex / bookdown ref),
207    // backtick (code span / inline executable), `*`/`_` (emphasis is a
208    // core CommonMark construct, not extension-gated), and `[`/`]` if
209    // any bracket-shaped construct is reachable.
210    mask[b'\n' as usize] = true;
211    mask[b'\r' as usize] = true;
212    mask[b'\\' as usize] = true;
213    mask[b'`' as usize] = true;
214    mask[b'*' as usize] = true;
215    mask[b'_' as usize] = true;
216
217    // Brackets: the IR/dispatcher only acts on `[`/`]` if some
218    // bracket-shaped feature is reachable. `!` is the leading byte of
219    // `![alt]` image brackets — the IR's `BracketPlan` keys image
220    // openers at the `!` position, so the dispatcher must stop here
221    // to consult the plan.
222    if exts.inline_links
223        || exts.reference_links
224        || exts.inline_images
225        || exts.bracketed_spans
226        || exts.footnotes
227        || exts.citations
228    {
229        mask[b'[' as usize] = true;
230        mask[b']' as usize] = true;
231    }
232    if exts.inline_images || exts.reference_links {
233        mask[b'!' as usize] = true;
234    }
235
236    // `<` covers autolinks, raw HTML, and Pandoc native spans.
237    if exts.autolinks || exts.raw_html || exts.native_spans {
238        mask[b'<' as usize] = true;
239    }
240
241    // `^` covers Pandoc inline footnotes (`^[...]`), CM inline footnotes
242    // (when explicitly enabled), and superscript (`^text^`).
243    if exts.inline_footnotes || exts.superscript {
244        mask[b'^' as usize] = true;
245    }
246
247    // `@` and `-` cover Pandoc citation forms (`@cite`, `-@cite`,
248    // `[@cite]`). Under Pandoc dialect, the IR's `ConstructPlan` keys
249    // bare citations at the `@` or `-` position, so the dispatcher
250    // must stop at either to consult the plan. Including `-` is
251    // pessimistic — most prose hyphens won't form `-@` — but missing
252    // it would skip past valid suppress-author citations.
253    if exts.citations || exts.quarto_crossrefs {
254        mask[b'@' as usize] = true;
255        if pandoc {
256            mask[b'-' as usize] = true;
257        }
258    }
259
260    // `$` covers dollar-math and GFM math.
261    if exts.tex_math_dollars || exts.tex_math_gfm {
262        mask[b'$' as usize] = true;
263    }
264
265    // `~` covers subscript and strikeout (both `~text~` and `~~text~~`).
266    if exts.subscript || exts.strikeout {
267        mask[b'~' as usize] = true;
268    }
269
270    if exts.mark {
271        mask[b'=' as usize] = true;
272    }
273    if exts.emoji {
274        mask[b':' as usize] = true;
275    }
276    if exts.bookdown_references {
277        mask[b'(' as usize] = true;
278    }
279    // `{{< ... >}}` shortcodes: the dispatcher tries them on any
280    // `{` regardless of the `quarto_shortcodes` extension flag, so
281    // `{` must always be flagged here.
282    mask[b'{' as usize] = true;
283
284    // Bare-URI autolinks (`http://...` without `<>`) have no
285    // leading-byte gate in the dispatcher — `try_parse_bare_uri`
286    // probes for a URI scheme starting at every byte. Flag all
287    // ASCII alphabetic bytes so the bulk-skip stops on every
288    // potential scheme starter. This effectively disables the
289    // bulk-skip benefit for prose under GFM-style flavors but
290    // preserves correctness; ASCII digits / punctuation / non-ASCII
291    // bytes still skip cleanly.
292    if exts.autolink_bare_uris {
293        for b in b'a'..=b'z' {
294            mask[b as usize] = true;
295        }
296        for b in b'A'..=b'Z' {
297            mask[b as usize] = true;
298        }
299    }
300
301    mask
302}
303
304fn is_emoji_boundary(text: &str, pos: usize) -> bool {
305    if pos > 0 {
306        let prev = text.as_bytes()[pos - 1] as char;
307        if prev.is_ascii_alphanumeric() || prev == '_' {
308            return false;
309        }
310    }
311    true
312}
313
314#[inline]
315fn advance_char_boundary(text: &str, pos: usize, end: usize) -> usize {
316    if pos >= end || pos >= text.len() {
317        return pos;
318    }
319    let ch_len = text[pos..]
320        .chars()
321        .next()
322        .map_or(1, std::primitive::char::len_utf8);
323    (pos + ch_len).min(end)
324}
325
326#[allow(clippy::too_many_arguments)]
327fn parse_inline_range_impl(
328    text: &str,
329    start: usize,
330    end: usize,
331    config: &ParserOptions,
332    builder: &mut GreenNodeBuilder,
333    nested_in_link: bool,
334    plan: &EmphasisPlan,
335    bracket_plan: &BracketPlan,
336    construct_plan: &ConstructPlan,
337    suppress_inner_links: bool,
338    suppress_footnote_refs: bool,
339    mask: &[bool; 256],
340) {
341    log::trace!(
342        "parse_inline_range: start={}, end={}, text={:?}",
343        start,
344        end,
345        &text[start..end]
346    );
347    let mut pos = start;
348    let mut text_start = start;
349    let bytes = text.as_bytes();
350
351    while pos < end {
352        // Bulk-skip plain bytes between structural bytes. Plans
353        // (`construct_plan`, `bracket_plan`, emphasis `plan`) only
354        // resolve at structural byte positions, so skipping here
355        // never elides a real match. `text_start` is preserved
356        // across the skip; the next emitted construct flushes the
357        // accumulated TEXT span.
358        if !mask[bytes[pos] as usize] {
359            let mut next = pos + 1;
360            while next < end && !mask[bytes[next] as usize] {
361                next += 1;
362            }
363            pos = next;
364            if pos >= end {
365                break;
366            }
367        }
368        // IR-driven dispatch: if the IR identified a Pandoc standalone
369        // construct starting here, emit it directly. Bypasses the
370        // dispatcher's ordered-try chain for inline footnotes, native
371        // spans, footnote references, citations, and bracketed spans
372        // under `Dialect::Pandoc`. The IR scan gates these on
373        // `!is_commonmark` and the relevant extension flag, so this
374        // branch is empty under CommonMark dialect (where the legacy
375        // dispatcher branches still run when the extension is enabled).
376        if let Some(dispo) = construct_plan.lookup(pos) {
377            match *dispo {
378                ConstructDispo::InlineFootnote { end: dispo_end } => {
379                    if dispo_end <= end
380                        && let Some((len, content)) = try_parse_inline_footnote(&text[pos..])
381                        && pos + len == dispo_end
382                    {
383                        if pos > text_start {
384                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
385                        }
386                        log::trace!("IR: matched inline footnote at pos {}", pos);
387                        emit_inline_footnote(builder, content, config, suppress_footnote_refs);
388                        pos += len;
389                        text_start = pos;
390                        continue;
391                    }
392                }
393                ConstructDispo::NativeSpan { end: dispo_end } => {
394                    if dispo_end <= end
395                        && let Some((len, content, _attributes)) =
396                            try_parse_native_span(&text[pos..])
397                        && pos + len == dispo_end
398                    {
399                        if pos > text_start {
400                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
401                        }
402                        log::trace!("IR: matched native span at pos {}", pos);
403                        emit_native_span(
404                            builder,
405                            &text[pos..pos + len],
406                            content,
407                            config,
408                            suppress_footnote_refs,
409                        );
410                        pos += len;
411                        text_start = pos;
412                        continue;
413                    }
414                }
415                ConstructDispo::FootnoteReference { end: dispo_end } => {
416                    if !suppress_footnote_refs
417                        && dispo_end <= end
418                        && let Some((len, id)) = try_parse_footnote_reference(&text[pos..])
419                        && pos + len == dispo_end
420                    {
421                        if pos > text_start {
422                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
423                        }
424                        log::trace!("IR: matched footnote reference at pos {}", pos);
425                        emit_footnote_reference(builder, &id);
426                        pos += len;
427                        text_start = pos;
428                        continue;
429                    }
430                }
431                ConstructDispo::BracketedCitation { end: dispo_end } => {
432                    if dispo_end <= end
433                        && let Some((len, content)) = try_parse_bracketed_citation(&text[pos..])
434                        && pos + len == dispo_end
435                    {
436                        if pos > text_start {
437                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
438                        }
439                        log::trace!("IR: matched bracketed citation at pos {}", pos);
440                        emit_bracketed_citation(builder, content);
441                        pos += len;
442                        text_start = pos;
443                        continue;
444                    }
445                }
446                ConstructDispo::BareCitation { end: dispo_end } => {
447                    if dispo_end <= end
448                        && let Some((len, key, has_suppress)) =
449                            try_parse_bare_citation(&text[pos..])
450                        && pos + len == dispo_end
451                    {
452                        let is_crossref = config.extensions.quarto_crossrefs
453                            && super::citations::is_quarto_crossref_key(key);
454                        if is_crossref || config.extensions.citations {
455                            if pos > text_start {
456                                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
457                            }
458                            if is_crossref {
459                                log::trace!("IR: matched Quarto crossref at pos {}: {}", pos, key);
460                                super::citations::emit_crossref(builder, key, has_suppress);
461                            } else {
462                                log::trace!("IR: matched bare citation at pos {}: {}", pos, key);
463                                emit_bare_citation(builder, key, has_suppress);
464                            }
465                            pos += len;
466                            text_start = pos;
467                            continue;
468                        }
469                    }
470                }
471                ConstructDispo::BracketedSpan { end: dispo_end } => {
472                    if dispo_end <= end
473                        && let Some((len, content, attrs)) = try_parse_bracketed_span(&text[pos..])
474                        && pos + len == dispo_end
475                    {
476                        if pos > text_start {
477                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
478                        }
479                        log::trace!("IR: matched bracketed span at pos {}", pos);
480                        emit_bracketed_span(
481                            builder,
482                            &content,
483                            &attrs,
484                            config,
485                            suppress_footnote_refs,
486                        );
487                        pos += len;
488                        text_start = pos;
489                        continue;
490                    }
491                }
492                ConstructDispo::WikiLink { end: dispo_end } => {
493                    if dispo_end <= end
494                        && let Some(span) = super::wikilinks::try_parse_wikilink(text, pos, config)
495                        && span.end == dispo_end
496                    {
497                        if pos > text_start {
498                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
499                        }
500                        log::trace!("IR: matched wikilink at pos {}", pos);
501                        super::wikilinks::emit_wikilink(builder, text, span, config);
502                        pos = span.end;
503                        text_start = pos;
504                        continue;
505                    }
506                }
507            }
508        }
509
510        // IR-driven bracket dispatch: if the IR's `process_brackets`
511        // resolved a bracket pair starting at this position, emit it
512        // directly via the appropriate helper. The
513        // dispatcher's `try_parse_*` recognizers compute the actual
514        // byte length and extract content / attributes; the IR's
515        // `suffix_end` is used to constrain the dispatcher's match
516        // shape so the two pipelines agree on which link variant
517        // resolved (e.g. `[foo][bar]` with `bar` undefined and `foo`
518        // defined: IR resolves `[foo]` as shortcut, but the
519        // dispatcher's `try_parse_reference_link` would otherwise
520        // greedily return the full-ref shape). Suppression of inner
521        // LINK / REFERENCE LINK during LINK-text recursion is applied
522        // here (pandoc-native: outer-wins for nested links).
523        //
524        // Pandoc-extended `{.attrs}` after a link can extend the
525        // dispatcher's match length past the IR's `suffix_end`. The
526        // dispatcher's len is therefore constrained to
527        // `[suffix_end, end]` rather than required to equal
528        // `suffix_end` exactly.
529        // IR-driven dispatch: Pandoc unresolved bracket-shape pattern.
530        // Before emitting the `UNRESOLVED_REFERENCE` wrapper, give the
531        // dispatcher's lenient inline-link / inline-image parsers a
532        // chance to override. The IR's `try_inline_suffix` is stricter
533        // than pandoc-markdown for some destination shapes (URLs with
534        // spaces, titles with embedded quotes, shortcode-style braces);
535        // the dispatcher accepts those and produces a real LINK / IMAGE
536        // node — pandoc-native agrees. Without this override, valid
537        // pandoc links would degrade to `UNRESOLVED_REFERENCE` here.
538        if let Some(super::inline_ir::BracketDispo::UnresolvedReference {
539            is_image,
540            text_start: ref_text_start,
541            text_end: ref_text_end,
542            end: ref_end,
543        }) = bracket_plan.lookup(pos)
544        {
545            let is_image = *is_image;
546            let dispo_suffix_end = *ref_end;
547            let suppress = suppress_inner_links && !is_image;
548            if !suppress {
549                let ctx = LinkScanContext::from_options(config);
550                let is_commonmark = config.dialect == Dialect::CommonMark;
551                if is_image {
552                    if config.extensions.inline_images
553                        && let Some((len, alt_text, dest, attributes)) =
554                            try_parse_inline_image(&text[pos..], ctx)
555                        && pos + len >= dispo_suffix_end
556                        && pos + len <= end
557                    {
558                        if pos > text_start {
559                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
560                        }
561                        log::trace!(
562                            "IR: dispatcher overrode UnresolvedReference with inline image at pos {}",
563                            pos
564                        );
565                        emit_inline_image(
566                            builder,
567                            &text[pos..pos + len],
568                            alt_text,
569                            dest,
570                            attributes,
571                            config,
572                            suppress_footnote_refs,
573                        );
574                        pos += len;
575                        text_start = pos;
576                        continue;
577                    }
578                } else if config.extensions.inline_links
579                    && let Some((len, link_text, dest, attributes)) =
580                        try_parse_inline_link(&text[pos..], is_commonmark, ctx)
581                    && pos + len >= dispo_suffix_end
582                    && pos + len <= end
583                {
584                    if pos > text_start {
585                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
586                    }
587                    log::trace!(
588                        "IR: dispatcher overrode UnresolvedReference with inline link at pos {}",
589                        pos
590                    );
591                    emit_inline_link(
592                        builder,
593                        &text[pos..pos + len],
594                        link_text,
595                        dest,
596                        attributes,
597                        config,
598                        suppress_footnote_refs,
599                    );
600                    pos += len;
601                    text_start = pos;
602                    continue;
603                }
604            }
605
606            // Dispatcher didn't override; emit the wrapper.
607            let inner_text = &text[*ref_text_start..*ref_text_end];
608            let suffix_start = *ref_text_end + 1;
609            let label_suffix = if suffix_start < *ref_end {
610                Some(&text[suffix_start..*ref_end])
611            } else {
612                None
613            };
614            if pos > text_start {
615                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
616            }
617            log::trace!(
618                "IR: unresolved Pandoc reference shape at pos {}..{}",
619                pos,
620                ref_end
621            );
622            emit_unresolved_reference(
623                builder,
624                is_image,
625                inner_text,
626                label_suffix,
627                config,
628                suppress_footnote_refs,
629            );
630            pos = *ref_end;
631            text_start = pos;
632            continue;
633        }
634
635        if let Some(super::inline_ir::BracketDispo::Open {
636            is_image,
637            suffix_end,
638            ..
639        }) = bracket_plan.lookup(pos)
640        {
641            let is_image = *is_image;
642            let dispo_suffix_end = *suffix_end;
643            let suppress = suppress_inner_links && !is_image;
644            if !suppress {
645                let ctx = LinkScanContext::from_options(config);
646                let allow_shortcut = config.extensions.shortcut_reference_links;
647                let is_commonmark = config.dialect == Dialect::CommonMark;
648                if is_image {
649                    if config.extensions.inline_images
650                        && let Some((len, alt_text, dest, attributes)) =
651                            try_parse_inline_image(&text[pos..], ctx)
652                        && pos + len >= dispo_suffix_end
653                        && pos + len <= end
654                    {
655                        if pos > text_start {
656                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
657                        }
658                        log::trace!("IR: matched inline image at pos {}", pos);
659                        emit_inline_image(
660                            builder,
661                            &text[pos..pos + len],
662                            alt_text,
663                            dest,
664                            attributes,
665                            config,
666                            suppress_footnote_refs,
667                        );
668                        pos += len;
669                        text_start = pos;
670                        continue;
671                    }
672                    if config.extensions.reference_links
673                        && let Some((len, alt_text, reference, gap, is_shortcut)) =
674                            try_parse_reference_image(
675                                &text[pos..],
676                                allow_shortcut,
677                                config.extensions.spaced_reference_links,
678                            )
679                        && pos + len == dispo_suffix_end
680                        && pos + len <= end
681                    {
682                        if pos > text_start {
683                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
684                        }
685                        log::trace!("IR: matched reference image at pos {}", pos);
686                        emit_reference_image(
687                            builder,
688                            alt_text,
689                            &reference,
690                            gap,
691                            is_shortcut,
692                            config,
693                            suppress_footnote_refs,
694                        );
695                        pos += len;
696                        text_start = pos;
697                        continue;
698                    }
699                } else {
700                    if config.extensions.inline_links
701                        && let Some((len, link_text, dest, attributes)) =
702                            try_parse_inline_link(&text[pos..], is_commonmark, ctx)
703                        && pos + len >= dispo_suffix_end
704                        && pos + len <= end
705                    {
706                        if pos > text_start {
707                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
708                        }
709                        log::trace!("IR: matched inline link at pos {}", pos);
710                        emit_inline_link(
711                            builder,
712                            &text[pos..pos + len],
713                            link_text,
714                            dest,
715                            attributes,
716                            config,
717                            suppress_footnote_refs,
718                        );
719                        pos += len;
720                        text_start = pos;
721                        continue;
722                    }
723                    if config.extensions.reference_links
724                        && let Some((len, link_text, reference, gap, is_shortcut)) =
725                            try_parse_reference_link(
726                                &text[pos..],
727                                allow_shortcut,
728                                config.extensions.inline_links,
729                                config.extensions.spaced_reference_links,
730                                ctx,
731                            )
732                        && pos + len == dispo_suffix_end
733                        && pos + len <= end
734                    {
735                        if pos > text_start {
736                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
737                        }
738                        log::trace!("IR: matched reference link at pos {}", pos);
739                        emit_reference_link(
740                            builder,
741                            link_text,
742                            &reference,
743                            gap,
744                            is_shortcut,
745                            config,
746                            suppress_footnote_refs,
747                        );
748                        pos += len;
749                        text_start = pos;
750                        continue;
751                    }
752                }
753            }
754        }
755
756        let byte = text.as_bytes()[pos];
757
758        // Backslash math (highest priority if enabled)
759        if byte == b'\\' {
760            // Try double backslash display math first: \\[...\\]
761            if config.extensions.tex_math_double_backslash {
762                if let Some((len, content)) = try_parse_double_backslash_display_math(&text[pos..])
763                {
764                    if pos > text_start {
765                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
766                    }
767                    log::trace!("Matched double backslash display math at pos {}", pos);
768                    emit_double_backslash_display_math(builder, content, math_opts(config));
769                    pos += len;
770                    text_start = pos;
771                    continue;
772                }
773
774                // Try double backslash inline math: \\(...\\)
775                if let Some((len, content)) = try_parse_double_backslash_inline_math(&text[pos..]) {
776                    if pos > text_start {
777                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
778                    }
779                    log::trace!("Matched double backslash inline math at pos {}", pos);
780                    emit_double_backslash_inline_math(builder, content, math_opts(config));
781                    pos += len;
782                    text_start = pos;
783                    continue;
784                }
785            }
786
787            // Try single backslash display math: \[...\]
788            if config.extensions.tex_math_single_backslash {
789                if let Some((len, content)) = try_parse_single_backslash_display_math(&text[pos..])
790                {
791                    if pos > text_start {
792                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
793                    }
794                    log::trace!("Matched single backslash display math at pos {}", pos);
795                    emit_single_backslash_display_math(builder, content, math_opts(config));
796                    pos += len;
797                    text_start = pos;
798                    continue;
799                }
800
801                // Try single backslash inline math: \(...\)
802                if let Some((len, content)) = try_parse_single_backslash_inline_math(&text[pos..]) {
803                    if pos > text_start {
804                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
805                    }
806                    log::trace!("Matched single backslash inline math at pos {}", pos);
807                    emit_single_backslash_inline_math(builder, content, math_opts(config));
808                    pos += len;
809                    text_start = pos;
810                    continue;
811                }
812            }
813
814            // Try math environments \begin{equation}...\end{equation}
815            if config.extensions.raw_tex
816                && let Some((len, begin_marker, content, end_marker)) =
817                    try_parse_math_environment(&text[pos..])
818            {
819                if pos > text_start {
820                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
821                }
822                log::trace!("Matched math environment at pos {}", pos);
823                emit_display_math_environment(
824                    builder,
825                    begin_marker,
826                    content,
827                    end_marker,
828                    math_opts(config),
829                );
830                pos += len;
831                text_start = pos;
832                continue;
833            }
834
835            // Try bookdown reference: \@ref(label)
836            if config.extensions.bookdown_references
837                && let Some((len, label)) = try_parse_bookdown_reference(&text[pos..])
838            {
839                if pos > text_start {
840                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
841                }
842                log::trace!("Matched bookdown reference at pos {}: {}", pos, label);
843                super::citations::emit_bookdown_crossref(builder, label);
844                pos += len;
845                text_start = pos;
846                continue;
847            }
848
849            // Try escapes (after bookdown refs and backslash math)
850            if let Some((len, ch, escape_type)) = try_parse_escape(&text[pos..]) {
851                let escape_enabled = match escape_type {
852                    EscapeType::HardLineBreak => config.extensions.escaped_line_breaks,
853                    EscapeType::NonbreakingSpace => config.extensions.all_symbols_escapable,
854                    EscapeType::Literal => {
855                        // BASE_ESCAPABLE matches Pandoc's markdown_strict /
856                        // original Markdown set, plus `|` and `~` which the
857                        // formatter emits as escapes for pipe-table separators
858                        // and strikethrough delimiters. Recognising those here
859                        // keeps round-trips idempotent in flavors that don't
860                        // enable all_symbols_escapable.
861                        //
862                        // Under CommonMark dialect, the spec (§2.4) explicitly
863                        // allows ANY ASCII punctuation to be backslash-escaped,
864                        // independent of the all_symbols_escapable extension
865                        // (which also widens to whitespace, a Pandoc-only
866                        // construct).
867                        const BASE_ESCAPABLE: &str = "\\`*_{}[]()>#+-.!|~";
868                        BASE_ESCAPABLE.contains(ch)
869                            || config.extensions.all_symbols_escapable
870                            || (config.dialect == crate::Dialect::CommonMark
871                                && ch.is_ascii_punctuation())
872                    }
873                };
874                if !escape_enabled {
875                    // Don't treat as hard line break - skip the escape and continue
876                    // The backslash will be included in the next TEXT token
877                    pos = advance_char_boundary(text, pos, end);
878                    continue;
879                }
880
881                // Emit accumulated text
882                if pos > text_start {
883                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
884                }
885
886                log::trace!("Matched escape at pos {}: \\{}", pos, ch);
887                emit_escape(builder, ch, escape_type);
888                pos += len;
889                text_start = pos;
890                continue;
891            }
892
893            // Try LaTeX commands (after escapes, before shortcodes)
894            if config.extensions.raw_tex
895                && let Some(len) = try_parse_latex_command(&text[pos..])
896            {
897                if pos > text_start {
898                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
899                }
900                log::trace!("Matched LaTeX command at pos {}", pos);
901                parse_latex_command(builder, &text[pos..], len);
902                pos += len;
903                text_start = pos;
904                continue;
905            }
906        }
907
908        // Try Quarto shortcodes: {{< shortcode >}}
909        if byte == b'{'
910            && pos + 1 < text.len()
911            && text.as_bytes()[pos + 1] == b'{'
912            && let Some((len, name, attrs)) = try_parse_shortcode(&text[pos..])
913        {
914            if pos > text_start {
915                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
916            }
917            log::trace!("Matched shortcode at pos {}: {}", pos, &name);
918            emit_shortcode(builder, &name, attrs);
919            pos += len;
920            text_start = pos;
921            continue;
922        }
923
924        // Try inline executable code spans (`... `r expr`` and `... `{r} expr``)
925        if byte == b'`'
926            && let Some(m) = try_parse_inline_executable(
927                &text[pos..],
928                config.extensions.rmarkdown_inline_code,
929                config.extensions.quarto_inline_code,
930            )
931        {
932            if pos > text_start {
933                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
934            }
935            log::trace!("Matched inline executable code at pos {}", pos);
936            emit_inline_executable(builder, &m);
937            pos += m.total_len;
938            text_start = pos;
939            continue;
940        }
941
942        // Try code spans
943        if byte == b'`' {
944            if let Some((len, content, backtick_count, attributes)) =
945                try_parse_code_span(&text[pos..])
946            {
947                // Emit accumulated text
948                if pos > text_start {
949                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
950                }
951
952                log::trace!(
953                    "Matched code span at pos {}: {} backticks",
954                    pos,
955                    backtick_count
956                );
957
958                // Check for raw inline
959                if let Some((ref attrs, raw_attr)) = attributes
960                    && config.extensions.raw_attribute
961                    && let Some(format) = is_raw_inline(attrs)
962                {
963                    use super::raw_inline::emit_raw_inline;
964                    log::trace!("Matched raw inline span at pos {}: format={}", pos, format);
965                    emit_raw_inline(builder, content, backtick_count, raw_attr);
966                } else if !config.extensions.inline_code_attributes && attributes.is_some() {
967                    let code_span_len = backtick_count * 2 + content.len();
968                    emit_code_span(builder, content, backtick_count, None);
969                    pos += code_span_len;
970                    text_start = pos;
971                    continue;
972                } else {
973                    emit_code_span(
974                        builder,
975                        content,
976                        backtick_count,
977                        attributes.as_ref().map(|(_, raw)| *raw),
978                    );
979                }
980
981                pos += len;
982                text_start = pos;
983                continue;
984            }
985
986            // Unmatched backtick run.
987            //
988            // CommonMark (and GFM) treat the whole run as literal text — the
989            // run cannot be re-entered as a shorter opener. Pandoc-markdown
990            // instead lets a longer run shadow a shorter one (e.g.
991            // `` ```foo`` `` parses as `` ` `` + ``<code>foo</code>``), so
992            // for the Pandoc dialect we fall through and advance one byte at
993            // a time, allowing the inner run to be tried on a later iteration.
994            if config.dialect == Dialect::CommonMark {
995                let run_len = text[pos..].bytes().take_while(|&b| b == b'`').count();
996                pos += run_len;
997                continue;
998            }
999        }
1000
1001        // Try textual emoji aliases: :smile:
1002        if byte == b':'
1003            && config.extensions.emoji
1004            && is_emoji_boundary(text, pos)
1005            && let Some((len, _alias)) = try_parse_emoji(&text[pos..])
1006        {
1007            if pos > text_start {
1008                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1009            }
1010            log::trace!("Matched emoji at pos {}", pos);
1011            emit_emoji(builder, &text[pos..pos + len]);
1012            pos += len;
1013            text_start = pos;
1014            continue;
1015        }
1016
1017        // Try inline footnotes: ^[note]. Under Pandoc dialect this is
1018        // consumed via the IR's `ConstructPlan` at the top of the loop;
1019        // this dispatcher branch only fires for CommonMark dialect with
1020        // the extension explicitly enabled.
1021        if byte == b'^'
1022            && pos + 1 < text.len()
1023            && text.as_bytes()[pos + 1] == b'['
1024            && config.dialect == Dialect::CommonMark
1025            && config.extensions.inline_footnotes
1026            && let Some((len, content)) = try_parse_inline_footnote(&text[pos..])
1027        {
1028            if pos > text_start {
1029                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1030            }
1031            log::trace!("Matched inline footnote at pos {}", pos);
1032            emit_inline_footnote(builder, content, config, suppress_footnote_refs);
1033            pos += len;
1034            text_start = pos;
1035            continue;
1036        }
1037
1038        // Try superscript: ^text^
1039        if byte == b'^'
1040            && config.extensions.superscript
1041            && let Some((len, content)) = try_parse_superscript(&text[pos..])
1042        {
1043            if pos > text_start {
1044                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1045            }
1046            log::trace!("Matched superscript at pos {}", pos);
1047            emit_superscript(builder, content, config, suppress_footnote_refs);
1048            pos += len;
1049            text_start = pos;
1050            continue;
1051        }
1052
1053        // Try bookdown definition: (\#label) or (ref:label)
1054        if byte == b'(' && config.extensions.bookdown_references {
1055            if let Some((len, label)) = try_parse_bookdown_definition(&text[pos..]) {
1056                if pos > text_start {
1057                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1058                }
1059                log::trace!("Matched bookdown definition at pos {}: {}", pos, label);
1060                builder.token(SyntaxKind::TEXT.into(), &text[pos..pos + len]);
1061                pos += len;
1062                text_start = pos;
1063                continue;
1064            }
1065            if let Some((len, label)) = try_parse_bookdown_text_reference(&text[pos..]) {
1066                if pos > text_start {
1067                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1068                }
1069                log::trace!("Matched bookdown text reference at pos {}: {}", pos, label);
1070                builder.token(SyntaxKind::TEXT.into(), &text[pos..pos + len]);
1071                pos += len;
1072                text_start = pos;
1073                continue;
1074            }
1075        }
1076
1077        // Try strikeout: ~~text~~
1078        // Must run before subscript so `~~text~~` is matched as a single
1079        // Strikeout rather than two empty Subscripts. Subscript falls back
1080        // to consuming `~~` as an empty subscript only when strikeout
1081        // didn't match (e.g. `~~unclosed`).
1082        if byte == b'~'
1083            && config.extensions.strikeout
1084            && let Some((len, content)) = try_parse_strikeout(&text[pos..])
1085        {
1086            if pos > text_start {
1087                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1088            }
1089            log::trace!("Matched strikeout at pos {}", pos);
1090            emit_strikeout(builder, content, config, suppress_footnote_refs);
1091            pos += len;
1092            text_start = pos;
1093            continue;
1094        }
1095
1096        // Try subscript: ~text~ or `~~` as empty subscript when strikeout
1097        // didn't match (matches pandoc: `~~unclosed` → `Subscript [] + text`).
1098        if byte == b'~'
1099            && config.extensions.subscript
1100            && let Some((len, content)) = try_parse_subscript(&text[pos..])
1101        {
1102            if pos > text_start {
1103                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1104            }
1105            log::trace!("Matched subscript at pos {}", pos);
1106            emit_subscript(builder, content, config, suppress_footnote_refs);
1107            pos += len;
1108            text_start = pos;
1109            continue;
1110        }
1111
1112        // Try mark/highlight: ==text==
1113        if byte == b'='
1114            && config.extensions.mark
1115            && let Some((len, content)) = try_parse_mark(&text[pos..])
1116        {
1117            if pos > text_start {
1118                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1119            }
1120            log::trace!("Matched mark at pos {}", pos);
1121            emit_mark(builder, content, config, suppress_footnote_refs);
1122            pos += len;
1123            text_start = pos;
1124            continue;
1125        }
1126
1127        // Try GFM inline math: $`...`$
1128        if byte == b'$'
1129            && config.extensions.tex_math_gfm
1130            && let Some((len, content)) = try_parse_gfm_inline_math(&text[pos..])
1131        {
1132            if pos > text_start {
1133                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1134            }
1135            log::trace!("Matched GFM inline math at pos {}", pos);
1136            emit_gfm_inline_math(builder, content, math_opts(config));
1137            pos += len;
1138            text_start = pos;
1139            continue;
1140        }
1141
1142        // Try math ($...$, $$...$$)
1143        if byte == b'$' && config.extensions.tex_math_dollars {
1144            // Try display math first ($$...$$)
1145            if let Some((len, content)) = try_parse_display_math(&text[pos..]) {
1146                // Emit accumulated text
1147                if pos > text_start {
1148                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1149                }
1150
1151                let dollar_count = text[pos..].chars().take_while(|&c| c == '$').count();
1152                log::trace!(
1153                    "Matched display math at pos {}: {} dollars",
1154                    pos,
1155                    dollar_count
1156                );
1157
1158                // Check for trailing attributes (Quarto cross-reference support).
1159                // The Quarto attribute block sits on the same line as the closing
1160                // `$$`, so scope the lookup to the current line — otherwise
1161                // anything on later lines (e.g. a following `@eq-id` reference)
1162                // makes the segment not end with `}` and the lift no-ops.
1163                let after_math = &text[pos + len..];
1164                let line_end = after_math.find('\n').unwrap_or(after_math.len());
1165                let line_segment = &after_math[..line_end];
1166                let attr_len = if config.extensions.quarto_crossrefs {
1167                    use crate::parser::utils::attributes::try_parse_trailing_attributes;
1168                    if let Some((_attr_block, _)) = try_parse_trailing_attributes(line_segment) {
1169                        let trimmed_after = line_segment.trim_start();
1170                        if let Some(open_brace_pos) = trimmed_after.find('{') {
1171                            let ws_before_brace = line_segment.len() - trimmed_after.len();
1172                            let attr_text_len = trimmed_after[open_brace_pos..]
1173                                .find('}')
1174                                .map(|close| close + 1)
1175                                .unwrap_or(0);
1176                            ws_before_brace + open_brace_pos + attr_text_len
1177                        } else {
1178                            0
1179                        }
1180                    } else {
1181                        0
1182                    }
1183                } else {
1184                    0
1185                };
1186
1187                let total_len = len + attr_len;
1188                emit_display_math(builder, content, dollar_count, math_opts(config));
1189
1190                // Emit attributes if present, structured over the raw source
1191                // bytes (leading whitespace split out as its own token).
1192                if attr_len > 0 {
1193                    use crate::parser::utils::attributes::emit_attribute_node;
1194                    let attr_text = &text[pos + len..pos + total_len];
1195                    let trimmed_after = attr_text.trim_start();
1196                    let ws_len = attr_text.len() - trimmed_after.len();
1197                    if ws_len > 0 {
1198                        builder.token(SyntaxKind::WHITESPACE.into(), &attr_text[..ws_len]);
1199                    }
1200                    emit_attribute_node(builder, trimmed_after);
1201                }
1202
1203                pos += total_len;
1204                text_start = pos;
1205                continue;
1206            }
1207
1208            // Try inline math ($...$)
1209            if let Some((len, content)) = try_parse_inline_math(&text[pos..]) {
1210                // Emit accumulated text
1211                if pos > text_start {
1212                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1213                }
1214
1215                log::trace!("Matched inline math at pos {}", pos);
1216                emit_inline_math(builder, content, math_opts(config));
1217                pos += len;
1218                text_start = pos;
1219                continue;
1220            }
1221
1222            // Neither display nor inline math matched - emit the $ as literal text
1223            // This ensures each $ gets its own TEXT token for CST compatibility
1224            if pos > text_start {
1225                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1226            }
1227            builder.token(SyntaxKind::TEXT.into(), "$");
1228            pos = advance_char_boundary(text, pos, end);
1229            text_start = pos;
1230            continue;
1231        }
1232
1233        // Try autolinks: <url> or <email>
1234        if byte == b'<'
1235            && config.extensions.autolinks
1236            && let Some((len, url)) = try_parse_autolink(
1237                &text[pos..],
1238                config.dialect == crate::options::Dialect::CommonMark,
1239            )
1240        {
1241            if pos > text_start {
1242                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1243            }
1244            log::trace!("Matched autolink at pos {}", pos);
1245            emit_autolink(builder, &text[pos..pos + len], url);
1246            pos += len;
1247            text_start = pos;
1248            continue;
1249        }
1250
1251        if !nested_in_link
1252            && config.extensions.autolink_bare_uris
1253            && let Some((len, url)) = try_parse_bare_uri(&text[pos..])
1254        {
1255            if pos > text_start {
1256                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1257            }
1258            log::trace!("Matched bare URI at pos {}", pos);
1259            emit_bare_uri_link(builder, url, config);
1260            pos += len;
1261            text_start = pos;
1262            continue;
1263        }
1264
1265        // Try native spans: <span>text</span> (after autolink since both
1266        // start with <). Under Pandoc dialect this is consumed via the
1267        // IR's `ConstructPlan` at the top of the loop; this dispatcher
1268        // branch only fires for CommonMark dialect with the extension
1269        // explicitly enabled.
1270        if byte == b'<'
1271            && config.dialect == Dialect::CommonMark
1272            && config.extensions.native_spans
1273            && let Some((len, content, _attributes)) = try_parse_native_span(&text[pos..])
1274        {
1275            if pos > text_start {
1276                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1277            }
1278            log::trace!("Matched native span at pos {}", pos);
1279            emit_native_span(
1280                builder,
1281                &text[pos..pos + len],
1282                content,
1283                config,
1284                suppress_footnote_refs,
1285            );
1286            pos += len;
1287            text_start = pos;
1288            continue;
1289        }
1290
1291        // Try inline raw HTML (CommonMark §6.6 / Pandoc raw_html). Must run
1292        // after autolinks (more specific) and native spans (Pandoc
1293        // <span>…</span> wrapper) since all three start with `<`.
1294        if byte == b'<'
1295            && config.extensions.raw_html
1296            && let Some(len) = try_parse_inline_html(&text[pos..], config.dialect)
1297        {
1298            if pos > text_start {
1299                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1300            }
1301            log::trace!("Matched inline raw HTML at pos {}", pos);
1302            emit_inline_html(builder, &text[pos..pos + len]);
1303            pos += len;
1304            text_start = pos;
1305            continue;
1306        }
1307
1308        // Bracket-starting elements: inline / reference links and
1309        // images are dispatched via the IR-driven arm at the top of
1310        // the loop, gated by the IR's `BracketPlan`. Only dialect-CM-
1311        // specific Pandoc-extension constructs that share the `[...]`
1312        // shape (footnote refs, bracketed citations) need a CM-gated
1313        // dispatcher branch — under Pandoc dialect they're consumed
1314        // via the IR's `ConstructPlan` instead.
1315        if byte == b'['
1316            && config.dialect == Dialect::CommonMark
1317            && config.extensions.footnotes
1318            && !suppress_footnote_refs
1319            && let Some((len, id)) = try_parse_footnote_reference(&text[pos..])
1320        {
1321            if pos > text_start {
1322                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1323            }
1324            log::trace!("Matched footnote reference at pos {}", pos);
1325            emit_footnote_reference(builder, &id);
1326            pos += len;
1327            text_start = pos;
1328            continue;
1329        }
1330        if byte == b'['
1331            && config.dialect == Dialect::CommonMark
1332            && config.extensions.citations
1333            && let Some((len, content)) = try_parse_bracketed_citation(&text[pos..])
1334        {
1335            if pos > text_start {
1336                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1337            }
1338            log::trace!("Matched bracketed citation at pos {}", pos);
1339            emit_bracketed_citation(builder, content);
1340            pos += len;
1341            text_start = pos;
1342            continue;
1343        }
1344
1345        // Try bracketed spans: [text]{.class}. Must come after
1346        // links/citations. Under Pandoc dialect this is consumed via
1347        // the IR's `ConstructPlan` at the top of the loop; this
1348        // dispatcher branch only fires for CommonMark dialect with the
1349        // extension explicitly enabled.
1350        if config.dialect == Dialect::CommonMark
1351            && byte == b'['
1352            && config.extensions.bracketed_spans
1353            && let Some((len, text_content, attrs)) = try_parse_bracketed_span(&text[pos..])
1354        {
1355            if pos > text_start {
1356                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1357            }
1358            log::trace!("Matched bracketed span at pos {}", pos);
1359            emit_bracketed_span(
1360                builder,
1361                &text_content,
1362                &attrs,
1363                config,
1364                suppress_footnote_refs,
1365            );
1366            pos += len;
1367            text_start = pos;
1368            continue;
1369        }
1370
1371        // Try bare citation: @cite (must come after bracketed elements).
1372        // Under Pandoc dialect this is consumed via the IR's
1373        // `ConstructPlan` at the top of the loop; this dispatcher branch
1374        // only fires for CommonMark dialect with the extension
1375        // explicitly enabled.
1376        if config.dialect == Dialect::CommonMark
1377            && byte == b'@'
1378            && (config.extensions.citations || config.extensions.quarto_crossrefs)
1379            && let Some((len, key, has_suppress)) = try_parse_bare_citation(&text[pos..])
1380        {
1381            let is_crossref =
1382                config.extensions.quarto_crossrefs && super::citations::is_quarto_crossref_key(key);
1383            if is_crossref || config.extensions.citations {
1384                if pos > text_start {
1385                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1386                }
1387                if is_crossref {
1388                    log::trace!("Matched Quarto crossref at pos {}: {}", pos, &key);
1389                    super::citations::emit_crossref(builder, key, has_suppress);
1390                } else {
1391                    log::trace!("Matched bare citation at pos {}: {}", pos, &key);
1392                    emit_bare_citation(builder, key, has_suppress);
1393                }
1394                pos += len;
1395                text_start = pos;
1396                continue;
1397            }
1398        }
1399
1400        // Try suppress-author citation: -@cite. Under Pandoc dialect
1401        // this is consumed via the IR's `ConstructPlan` at the top of
1402        // the loop; this dispatcher branch only fires for CommonMark
1403        // dialect with the extension explicitly enabled.
1404        if config.dialect == Dialect::CommonMark
1405            && byte == b'-'
1406            && pos + 1 < text.len()
1407            && text.as_bytes()[pos + 1] == b'@'
1408            && (config.extensions.citations || config.extensions.quarto_crossrefs)
1409            && let Some((len, key, has_suppress)) = try_parse_bare_citation(&text[pos..])
1410        {
1411            let is_crossref =
1412                config.extensions.quarto_crossrefs && super::citations::is_quarto_crossref_key(key);
1413            if is_crossref || config.extensions.citations {
1414                if pos > text_start {
1415                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1416                }
1417                if is_crossref {
1418                    log::trace!("Matched Quarto crossref at pos {}: {}", pos, &key);
1419                    super::citations::emit_crossref(builder, key, has_suppress);
1420                } else {
1421                    log::trace!("Matched suppress-author citation at pos {}: {}", pos, &key);
1422                    emit_bare_citation(builder, key, has_suppress);
1423                }
1424                pos += len;
1425                text_start = pos;
1426                continue;
1427            }
1428        }
1429
1430        // Emphasis emission, plan-driven. The IR's emphasis pass has
1431        // already decided every delimiter byte's disposition (open
1432        // marker, close marker, or unmatched literal); consult the
1433        // plan here instead of re-scanning.
1434        if byte == b'*' || byte == b'_' {
1435            match plan.lookup(pos) {
1436                Some(DelimChar::Open {
1437                    len,
1438                    partner,
1439                    partner_len,
1440                    kind,
1441                }) => {
1442                    if pos > text_start {
1443                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1444                    }
1445                    let len = len as usize;
1446                    let partner_len = partner_len as usize;
1447                    let (wrapper_kind, marker_kind) = match kind {
1448                        EmphasisKind::Strong => (SyntaxKind::STRONG, SyntaxKind::STRONG_MARKER),
1449                        EmphasisKind::Emph => (SyntaxKind::EMPHASIS, SyntaxKind::EMPHASIS_MARKER),
1450                    };
1451                    builder.start_node(wrapper_kind.into());
1452                    builder.token(marker_kind.into(), &text[pos..pos + len]);
1453                    parse_inline_range_impl(
1454                        text,
1455                        pos + len,
1456                        partner,
1457                        config,
1458                        builder,
1459                        nested_in_link,
1460                        plan,
1461                        bracket_plan,
1462                        construct_plan,
1463                        suppress_inner_links,
1464                        suppress_footnote_refs,
1465                        mask,
1466                    );
1467                    builder.token(marker_kind.into(), &text[partner..partner + partner_len]);
1468                    builder.finish_node();
1469                    pos = partner + partner_len;
1470                    text_start = pos;
1471                    continue;
1472                }
1473                Some(DelimChar::Close) => {
1474                    // Defensive: a close should be jumped past by its
1475                    // matching open. If we hit one anyway (e.g. when the
1476                    // outer caller's range starts mid-pair), let it be
1477                    // emitted as part of the surrounding text by simply
1478                    // advancing. text_start stays put so the byte folds
1479                    // into the next TEXT flush.
1480                    pos += 1;
1481                    continue;
1482                }
1483                Some(DelimChar::Literal) | None => {
1484                    // Unmatched delim chars at this position behave as
1485                    // literal text. Don't emit yet — let them coalesce
1486                    // with surrounding plain bytes via the existing
1487                    // text_start flushing so the CST keeps the same TEXT
1488                    // token granularity Pandoc fixtures expect.
1489                    let bytes = text.as_bytes();
1490                    let mut end_pos = pos + 1;
1491                    while end_pos < end && bytes[end_pos] == byte {
1492                        match plan.lookup(end_pos) {
1493                            Some(DelimChar::Literal) | None => end_pos += 1,
1494                            _ => break,
1495                        }
1496                    }
1497                    pos = end_pos;
1498                    continue;
1499                }
1500            }
1501        }
1502
1503        // Check for newlines - may need to emit as hard line break
1504        if byte == b'\r' && pos + 1 < end && text.as_bytes()[pos + 1] == b'\n' {
1505            let text_before = &text[text_start..pos];
1506
1507            // Check for trailing spaces hard line break (always enabled in Pandoc)
1508            let trailing_spaces = text_before.chars().rev().take_while(|&c| c == ' ').count();
1509            if trailing_spaces >= 2 {
1510                // Emit text before the trailing spaces
1511                let text_content = &text_before[..text_before.len() - trailing_spaces];
1512                if !text_content.is_empty() {
1513                    builder.token(SyntaxKind::TEXT.into(), text_content);
1514                }
1515                let spaces = " ".repeat(trailing_spaces);
1516                builder.token(
1517                    SyntaxKind::HARD_LINE_BREAK.into(),
1518                    &format!("{}\r\n", spaces),
1519                );
1520                pos += 2;
1521                text_start = pos;
1522                continue;
1523            }
1524
1525            // hard_line_breaks: treat all single newlines as hard line breaks
1526            if config.extensions.hard_line_breaks {
1527                if !text_before.is_empty() {
1528                    builder.token(SyntaxKind::TEXT.into(), text_before);
1529                }
1530                builder.token(SyntaxKind::HARD_LINE_BREAK.into(), "\r\n");
1531                pos += 2;
1532                text_start = pos;
1533                continue;
1534            }
1535
1536            // Regular newline
1537            if !text_before.is_empty() {
1538                builder.token(SyntaxKind::TEXT.into(), text_before);
1539            }
1540            builder.token(SyntaxKind::NEWLINE.into(), "\r\n");
1541            pos += 2;
1542            text_start = pos;
1543            continue;
1544        }
1545
1546        if byte == b'\n' {
1547            let text_before = &text[text_start..pos];
1548
1549            // Check for trailing spaces hard line break (always enabled in Pandoc)
1550            let trailing_spaces = text_before.chars().rev().take_while(|&c| c == ' ').count();
1551            if trailing_spaces >= 2 {
1552                // Emit text before the trailing spaces
1553                let text_content = &text_before[..text_before.len() - trailing_spaces];
1554                if !text_content.is_empty() {
1555                    builder.token(SyntaxKind::TEXT.into(), text_content);
1556                }
1557                let spaces = " ".repeat(trailing_spaces);
1558                builder.token(SyntaxKind::HARD_LINE_BREAK.into(), &format!("{}\n", spaces));
1559                pos += 1;
1560                text_start = pos;
1561                continue;
1562            }
1563
1564            // hard_line_breaks: treat all single newlines as hard line breaks
1565            if config.extensions.hard_line_breaks {
1566                if !text_before.is_empty() {
1567                    builder.token(SyntaxKind::TEXT.into(), text_before);
1568                }
1569                builder.token(SyntaxKind::HARD_LINE_BREAK.into(), "\n");
1570                pos += 1;
1571                text_start = pos;
1572                continue;
1573            }
1574
1575            // Regular newline
1576            if !text_before.is_empty() {
1577                builder.token(SyntaxKind::TEXT.into(), text_before);
1578            }
1579            builder.token(SyntaxKind::NEWLINE.into(), "\n");
1580            pos += 1;
1581            text_start = pos;
1582            continue;
1583        }
1584
1585        // Regular character, keep accumulating
1586        pos = advance_char_boundary(text, pos, end);
1587    }
1588
1589    // Emit any remaining text
1590    if pos > text_start && text_start < end {
1591        log::trace!("Emitting remaining TEXT: {:?}", &text[text_start..end]);
1592        builder.token(SyntaxKind::TEXT.into(), &text[text_start..end]);
1593    }
1594
1595    log::trace!("parse_inline_range complete: start={}, end={}", start, end);
1596}
1597
1598#[cfg(test)]
1599mod tests {
1600    use super::*;
1601    use crate::syntax::{SyntaxKind, SyntaxNode};
1602    use rowan::GreenNode;
1603
1604    #[test]
1605    fn test_recursive_simple_emphasis() {
1606        let text = "*test*";
1607        let config = ParserOptions::default();
1608        let mut builder = GreenNodeBuilder::new();
1609
1610        parse_inline_text_recursive(&mut builder, text, &config, false);
1611
1612        let green: GreenNode = builder.finish();
1613        let node = SyntaxNode::new_root(green);
1614
1615        // Should be lossless
1616        assert_eq!(node.text().to_string(), text);
1617
1618        // Should have EMPHASIS node
1619        let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1620        assert!(has_emph, "Should have EMPHASIS node");
1621    }
1622
1623    #[test]
1624    fn test_recursive_nested() {
1625        let text = "*foo **bar** baz*";
1626        let config = ParserOptions::default();
1627        let mut builder = GreenNodeBuilder::new();
1628
1629        // Wrap in a PARAGRAPH node (inline content needs a parent)
1630        builder.start_node(SyntaxKind::PARAGRAPH.into());
1631        parse_inline_text_recursive(&mut builder, text, &config, false);
1632        builder.finish_node();
1633
1634        let green: GreenNode = builder.finish();
1635        let node = SyntaxNode::new_root(green);
1636
1637        // Should be lossless
1638        assert_eq!(node.text().to_string(), text);
1639
1640        // Should have both EMPHASIS and STRONG
1641        let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1642        let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
1643
1644        assert!(has_emph, "Should have EMPHASIS node");
1645        assert!(has_strong, "Should have STRONG node");
1646    }
1647
1648    /// Test Pandoc's "three" algorithm: ***foo* bar**
1649    /// Expected: Strong[Emph[foo], bar]
1650    #[test]
1651    fn test_triple_emphasis_star_then_double_star() {
1652        use crate::options::ParserOptions;
1653        use crate::syntax::SyntaxNode;
1654        use rowan::GreenNode;
1655
1656        let text = "***foo* bar**";
1657        let config = ParserOptions::default();
1658        let mut builder = GreenNodeBuilder::new();
1659
1660        builder.start_node(SyntaxKind::DOCUMENT.into());
1661        parse_inline_text_recursive(&mut builder, text, &config, false);
1662        builder.finish_node();
1663
1664        let green: GreenNode = builder.finish();
1665        let node = SyntaxNode::new_root(green);
1666
1667        // Verify losslessness
1668        assert_eq!(node.text().to_string(), text);
1669
1670        // Expected structure: STRONG > EMPH > "foo"
1671        // The STRONG should contain EMPH, not the other way around
1672        let structure = format!("{:#?}", node);
1673
1674        // Should have both STRONG and EMPH
1675        assert!(structure.contains("STRONG"), "Should have STRONG node");
1676        assert!(structure.contains("EMPHASIS"), "Should have EMPHASIS node");
1677
1678        // STRONG should be outer, EMPH should be inner
1679        // Check that STRONG comes before EMPH in tree traversal
1680        let mut found_strong = false;
1681        let mut found_emph_after_strong = false;
1682        for descendant in node.descendants() {
1683            if descendant.kind() == SyntaxKind::STRONG {
1684                found_strong = true;
1685            }
1686            if found_strong && descendant.kind() == SyntaxKind::EMPHASIS {
1687                found_emph_after_strong = true;
1688                break;
1689            }
1690        }
1691
1692        assert!(
1693            found_emph_after_strong,
1694            "EMPH should be inside STRONG, not before it. Current structure:\n{}",
1695            structure
1696        );
1697    }
1698
1699    /// Test Pandoc's "three" algorithm: ***foo** bar*
1700    /// Expected: Emph[Strong[foo], bar]
1701    #[test]
1702    fn test_triple_emphasis_double_star_then_star() {
1703        use crate::options::ParserOptions;
1704        use crate::syntax::SyntaxNode;
1705        use rowan::GreenNode;
1706
1707        let text = "***foo** bar*";
1708        let config = ParserOptions::default();
1709        let mut builder = GreenNodeBuilder::new();
1710
1711        builder.start_node(SyntaxKind::DOCUMENT.into());
1712        parse_inline_text_recursive(&mut builder, text, &config, false);
1713        builder.finish_node();
1714
1715        let green: GreenNode = builder.finish();
1716        let node = SyntaxNode::new_root(green);
1717
1718        // Verify losslessness
1719        assert_eq!(node.text().to_string(), text);
1720
1721        // Expected structure: EMPH > STRONG > "foo"
1722        let structure = format!("{:#?}", node);
1723
1724        // Should have both EMPH and STRONG
1725        assert!(structure.contains("EMPHASIS"), "Should have EMPHASIS node");
1726        assert!(structure.contains("STRONG"), "Should have STRONG node");
1727
1728        // EMPH should be outer, STRONG should be inner
1729        let mut found_emph = false;
1730        let mut found_strong_after_emph = false;
1731        for descendant in node.descendants() {
1732            if descendant.kind() == SyntaxKind::EMPHASIS {
1733                found_emph = true;
1734            }
1735            if found_emph && descendant.kind() == SyntaxKind::STRONG {
1736                found_strong_after_emph = true;
1737                break;
1738            }
1739        }
1740
1741        assert!(
1742            found_strong_after_emph,
1743            "STRONG should be inside EMPH. Current structure:\n{}",
1744            structure
1745        );
1746    }
1747
1748    /// Test that display math with attributes parses correctly
1749    /// Regression test for equation_attributes_single_line golden test
1750    #[test]
1751    fn test_display_math_with_attributes() {
1752        use crate::options::ParserOptions;
1753        use crate::syntax::SyntaxNode;
1754        use rowan::GreenNode;
1755
1756        let text = "$$ E = mc^2 $$ {#eq-einstein}";
1757        let mut config = ParserOptions::default();
1758        config.extensions.quarto_crossrefs = true; // Enable Quarto cross-references
1759
1760        let mut builder = GreenNodeBuilder::new();
1761        builder.start_node(SyntaxKind::DOCUMENT.into()); // Need a root node
1762
1763        // Parse the whole text
1764        parse_inline_text_recursive(&mut builder, text, &config, false);
1765
1766        builder.finish_node(); // Finish ROOT
1767        let green: GreenNode = builder.finish();
1768        let node = SyntaxNode::new_root(green);
1769
1770        // Verify losslessness
1771        assert_eq!(node.text().to_string(), text);
1772
1773        // Should have DISPLAY_MATH node
1774        let has_display_math = node
1775            .descendants()
1776            .any(|n| n.kind() == SyntaxKind::DISPLAY_MATH);
1777        assert!(has_display_math, "Should have DISPLAY_MATH node");
1778
1779        // Should have ATTRIBUTE node
1780        let has_attributes = node
1781            .descendants()
1782            .any(|n| n.kind() == SyntaxKind::ATTRIBUTE);
1783        assert!(
1784            has_attributes,
1785            "Should have ATTRIBUTE node for {{#eq-einstein}}"
1786        );
1787
1788        // Attributes should not be TEXT
1789        let math_followed_by_text = node.descendants().any(|n| {
1790            n.kind() == SyntaxKind::DISPLAY_MATH
1791                && n.next_sibling()
1792                    .map(|s| {
1793                        s.kind() == SyntaxKind::TEXT
1794                            && s.text().to_string().contains("{#eq-einstein}")
1795                    })
1796                    .unwrap_or(false)
1797        });
1798        assert!(
1799            !math_followed_by_text,
1800            "Attributes should not be parsed as TEXT"
1801        );
1802    }
1803
1804    #[test]
1805    fn test_parse_inline_text_gfm_inline_link_destination_not_autolinked() {
1806        use crate::options::{Dialect, Extensions, Flavor};
1807
1808        let config = ParserOptions {
1809            flavor: Flavor::Gfm,
1810            dialect: Dialect::for_flavor(Flavor::Gfm),
1811            extensions: Extensions::for_flavor(Flavor::Gfm),
1812            ..ParserOptions::default()
1813        };
1814
1815        let mut builder = GreenNodeBuilder::new();
1816        builder.start_node(SyntaxKind::PARAGRAPH.into());
1817        parse_inline_text_recursive(
1818            &mut builder,
1819            "Second Link [link_text](https://link.com)",
1820            &config,
1821            false,
1822        );
1823        builder.finish_node();
1824        let green = builder.finish();
1825        let root = SyntaxNode::new_root(green);
1826
1827        let links: Vec<_> = root
1828            .descendants()
1829            .filter(|n| n.kind() == SyntaxKind::LINK)
1830            .collect();
1831        assert_eq!(
1832            links.len(),
1833            1,
1834            "Expected exactly one LINK node for inline link, not nested bare URI autolink"
1835        );
1836
1837        let link = links[0].clone();
1838        let mut link_text = None::<String>;
1839        let mut link_dest = None::<String>;
1840
1841        for child in link.children() {
1842            match child.kind() {
1843                SyntaxKind::LINK_TEXT => link_text = Some(child.text().to_string()),
1844                SyntaxKind::LINK_DEST => link_dest = Some(child.text().to_string()),
1845                _ => {}
1846            }
1847        }
1848
1849        assert_eq!(link_text.as_deref(), Some("link_text"));
1850        assert_eq!(link_dest.as_deref(), Some("https://link.com"));
1851    }
1852
1853    #[test]
1854    fn test_autolink_bare_uri_utf8_boundary_safe() {
1855        let text = "§";
1856        let mut config = ParserOptions::default();
1857        config.extensions.autolink_bare_uris = true;
1858        let mut builder = GreenNodeBuilder::new();
1859
1860        builder.start_node(SyntaxKind::DOCUMENT.into());
1861        parse_inline_text_recursive(&mut builder, text, &config, false);
1862        builder.finish_node();
1863
1864        let green: GreenNode = builder.finish();
1865        let node = SyntaxNode::new_root(green);
1866        assert_eq!(node.text().to_string(), text);
1867    }
1868
1869    #[test]
1870    fn test_parse_emphasis_unicode_content_no_panic() {
1871        let text = "*§*";
1872        let config = ParserOptions::default();
1873        let mut builder = GreenNodeBuilder::new();
1874
1875        builder.start_node(SyntaxKind::PARAGRAPH.into());
1876        parse_inline_text_recursive(&mut builder, text, &config, false);
1877        builder.finish_node();
1878
1879        let green: GreenNode = builder.finish();
1880        let node = SyntaxNode::new_root(green);
1881        let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1882        assert!(has_emph, "Should have EMPHASIS node");
1883        assert_eq!(node.text().to_string(), text);
1884    }
1885}
1886
1887#[test]
1888fn test_two_with_nested_one_and_triple_closer() {
1889    // **bold with *italic***
1890    // Should parse as: Strong["bold with ", Emph["italic"]]
1891    // The *** at end is parsed as * (closes Emph) + ** (closes Strong)
1892
1893    use crate::options::ParserOptions;
1894    use crate::syntax::SyntaxNode;
1895    use rowan::GreenNode;
1896
1897    let text = "**bold with *italic***";
1898    let config = ParserOptions::default();
1899    let mut builder = GreenNodeBuilder::new();
1900
1901    builder.start_node(SyntaxKind::PARAGRAPH.into());
1902    parse_inline_text_recursive(&mut builder, text, &config, false);
1903    builder.finish_node();
1904
1905    let green: GreenNode = builder.finish();
1906    let node = SyntaxNode::new_root(green);
1907
1908    assert_eq!(node.text().to_string(), text, "Should be lossless");
1909
1910    let strong_nodes: Vec<_> = node
1911        .descendants()
1912        .filter(|n| n.kind() == SyntaxKind::STRONG)
1913        .collect();
1914    assert_eq!(strong_nodes.len(), 1, "Should have exactly one STRONG node");
1915    let has_emphasis_in_strong = strong_nodes[0]
1916        .descendants()
1917        .any(|n| n.kind() == SyntaxKind::EMPHASIS);
1918    assert!(
1919        has_emphasis_in_strong,
1920        "STRONG should contain EMPHASIS node"
1921    );
1922}
1923
1924#[test]
1925fn test_emphasis_with_trailing_space_before_closer() {
1926    // *foo * should parse as emphasis (Pandoc behavior)
1927    // For asterisks, Pandoc doesn't require right-flanking for closers
1928
1929    use crate::options::ParserOptions;
1930    use crate::syntax::SyntaxNode;
1931    use rowan::GreenNode;
1932
1933    let text = "*foo *";
1934    let config = ParserOptions::default();
1935    let mut builder = GreenNodeBuilder::new();
1936
1937    builder.start_node(SyntaxKind::PARAGRAPH.into());
1938    parse_inline_text_recursive(&mut builder, text, &config, false);
1939    builder.finish_node();
1940
1941    let green: GreenNode = builder.finish();
1942    let node = SyntaxNode::new_root(green);
1943
1944    let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1945    assert!(has_emph, "Should have EMPHASIS node");
1946    assert_eq!(node.text().to_string(), text);
1947}
1948
1949#[test]
1950fn test_triple_emphasis_all_strong_nested() {
1951    // ***foo** bar **baz*** should parse as Emph[Strong[foo], " bar ", Strong[baz]]
1952    // Pandoc output confirms this
1953
1954    use crate::options::ParserOptions;
1955    use crate::syntax::SyntaxNode;
1956    use rowan::GreenNode;
1957
1958    let text = "***foo** bar **baz***";
1959    let config = ParserOptions::default();
1960    let mut builder = GreenNodeBuilder::new();
1961
1962    builder.start_node(SyntaxKind::DOCUMENT.into());
1963    parse_inline_text_recursive(&mut builder, text, &config, false);
1964    builder.finish_node();
1965
1966    let green: GreenNode = builder.finish();
1967    let node = SyntaxNode::new_root(green);
1968
1969    // Should have one EMPHASIS node at root
1970    let emphasis_nodes: Vec<_> = node
1971        .descendants()
1972        .filter(|n| n.kind() == SyntaxKind::EMPHASIS)
1973        .collect();
1974    assert_eq!(
1975        emphasis_nodes.len(),
1976        1,
1977        "Should have exactly one EMPHASIS node, found: {}",
1978        emphasis_nodes.len()
1979    );
1980
1981    // EMPHASIS should contain two STRONG nodes
1982    let emphasis_node = emphasis_nodes[0].clone();
1983    let strong_in_emphasis: Vec<_> = emphasis_node
1984        .children()
1985        .filter(|n| n.kind() == SyntaxKind::STRONG)
1986        .collect();
1987    assert_eq!(
1988        strong_in_emphasis.len(),
1989        2,
1990        "EMPHASIS should contain two STRONG nodes, found: {}",
1991        strong_in_emphasis.len()
1992    );
1993
1994    // Verify losslessness
1995    assert_eq!(node.text().to_string(), text);
1996}
1997
1998#[test]
1999fn test_triple_emphasis_all_emph_nested() {
2000    // ***foo* bar *baz*** should parse as Strong[Emph[foo], " bar ", Emph[baz]]
2001    // Pandoc output confirms this
2002
2003    use crate::options::ParserOptions;
2004    use crate::syntax::SyntaxNode;
2005    use rowan::GreenNode;
2006
2007    let text = "***foo* bar *baz***";
2008    let config = ParserOptions::default();
2009    let mut builder = GreenNodeBuilder::new();
2010
2011    builder.start_node(SyntaxKind::DOCUMENT.into());
2012    parse_inline_text_recursive(&mut builder, text, &config, false);
2013    builder.finish_node();
2014
2015    let green: GreenNode = builder.finish();
2016    let node = SyntaxNode::new_root(green);
2017
2018    // Should have one STRONG node at root
2019    let strong_nodes: Vec<_> = node
2020        .descendants()
2021        .filter(|n| n.kind() == SyntaxKind::STRONG)
2022        .collect();
2023    assert_eq!(
2024        strong_nodes.len(),
2025        1,
2026        "Should have exactly one STRONG node, found: {}",
2027        strong_nodes.len()
2028    );
2029
2030    // STRONG should contain two EMPHASIS nodes
2031    let strong_node = strong_nodes[0].clone();
2032    let emph_in_strong: Vec<_> = strong_node
2033        .children()
2034        .filter(|n| n.kind() == SyntaxKind::EMPHASIS)
2035        .collect();
2036    assert_eq!(
2037        emph_in_strong.len(),
2038        2,
2039        "STRONG should contain two EMPHASIS nodes, found: {}",
2040        emph_in_strong.len()
2041    );
2042
2043    // Verify losslessness
2044    assert_eq!(node.text().to_string(), text);
2045}
2046
2047// Multiline emphasis tests
2048#[test]
2049fn test_parse_emphasis_multiline() {
2050    // Per Pandoc spec, emphasis CAN contain newlines (soft breaks)
2051    use crate::options::ParserOptions;
2052    use crate::syntax::SyntaxNode;
2053    use rowan::GreenNode;
2054
2055    let text = "*text on\nline two*";
2056    let config = ParserOptions::default();
2057    let mut builder = GreenNodeBuilder::new();
2058
2059    builder.start_node(SyntaxKind::PARAGRAPH.into());
2060    parse_inline_text_recursive(&mut builder, text, &config, false);
2061    builder.finish_node();
2062
2063    let green: GreenNode = builder.finish();
2064    let node = SyntaxNode::new_root(green);
2065
2066    let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
2067    assert!(has_emph, "Should have EMPHASIS node");
2068
2069    assert_eq!(node.text().to_string(), text);
2070    assert!(
2071        node.text().to_string().contains('\n'),
2072        "Should preserve newline in emphasis content"
2073    );
2074}
2075
2076#[test]
2077fn test_parse_strong_multiline() {
2078    // Per Pandoc spec, strong emphasis CAN contain newlines
2079    use crate::options::ParserOptions;
2080    use crate::syntax::SyntaxNode;
2081    use rowan::GreenNode;
2082
2083    let text = "**strong on\nline two**";
2084    let config = ParserOptions::default();
2085    let mut builder = GreenNodeBuilder::new();
2086
2087    builder.start_node(SyntaxKind::PARAGRAPH.into());
2088    parse_inline_text_recursive(&mut builder, text, &config, false);
2089    builder.finish_node();
2090
2091    let green: GreenNode = builder.finish();
2092    let node = SyntaxNode::new_root(green);
2093
2094    let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
2095    assert!(has_strong, "Should have STRONG node");
2096
2097    assert_eq!(node.text().to_string(), text);
2098    assert!(
2099        node.text().to_string().contains('\n'),
2100        "Should preserve newline in strong content"
2101    );
2102}
2103
2104#[test]
2105fn test_parse_triple_emphasis_multiline() {
2106    // Triple emphasis with newlines
2107    use crate::options::ParserOptions;
2108    use crate::syntax::SyntaxNode;
2109    use rowan::GreenNode;
2110
2111    let text = "***both on\nline two***";
2112    let config = ParserOptions::default();
2113    let mut builder = GreenNodeBuilder::new();
2114
2115    builder.start_node(SyntaxKind::PARAGRAPH.into());
2116    parse_inline_text_recursive(&mut builder, text, &config, false);
2117    builder.finish_node();
2118
2119    let green: GreenNode = builder.finish();
2120    let node = SyntaxNode::new_root(green);
2121
2122    // Should have STRONG node (triple = strong + emph)
2123    let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
2124    assert!(has_strong, "Should have STRONG node");
2125
2126    assert_eq!(node.text().to_string(), text);
2127    assert!(
2128        node.text().to_string().contains('\n'),
2129        "Should preserve newline in triple emphasis content"
2130    );
2131}