Skip to main content

panache_parser/parser/inlines/
core.rs

1//! Inline emission walk.
2//!
3//! Consumes the IR plans built by [`super::inline_ir::build_full_plans`]
4//! (emphasis pairings, bracket resolutions, standalone Pandoc constructs)
5//! and emits the inline CST tokens / nodes in source order. Resolution
6//! decisions for emphasis, brackets, and standalone Pandoc constructs
7//! are entirely IR-driven for both dialects; the dispatcher's
8//! `try_parse_*` recognizers are still called to *parse* a matched byte
9//! range into a CST subtree, but "what is this byte range?" is answered
10//! exclusively by the IR.
11
12use crate::options::{Dialect, ParserOptions};
13use crate::syntax::SyntaxKind;
14use rowan::GreenNodeBuilder;
15
16use super::inline_ir::{
17    BracketPlan, ConstructDispo, ConstructPlan, DelimChar, EmphasisKind, EmphasisPlan,
18};
19
20// Import inline element parsers from sibling modules
21use super::bookdown::{
22    try_parse_bookdown_definition, try_parse_bookdown_reference, try_parse_bookdown_text_reference,
23};
24use super::bracketed_spans::{emit_bracketed_span, try_parse_bracketed_span};
25use super::citations::{
26    emit_bare_citation, emit_bracketed_citation, try_parse_bare_citation,
27    try_parse_bracketed_citation,
28};
29use super::code_spans::{emit_code_span, try_parse_code_span};
30use super::emoji::{emit_emoji, try_parse_emoji};
31use super::escapes::{EscapeType, emit_escape, try_parse_escape};
32use super::inline_executable::{emit_inline_executable, try_parse_inline_executable};
33use super::inline_footnotes::{
34    emit_footnote_reference, emit_inline_footnote, try_parse_footnote_reference,
35    try_parse_inline_footnote,
36};
37use super::inline_html::{emit_inline_html, try_parse_inline_html};
38use super::latex::{parse_latex_command, try_parse_latex_command};
39use super::links::{
40    LinkScanContext, emit_autolink, emit_bare_uri_link, emit_inline_image, emit_inline_link,
41    emit_reference_image, emit_reference_link, emit_unresolved_reference, try_parse_autolink,
42    try_parse_bare_uri, try_parse_inline_image, try_parse_inline_link, try_parse_reference_image,
43    try_parse_reference_link,
44};
45use super::mark::{emit_mark, try_parse_mark};
46use super::math::{
47    emit_display_math, emit_display_math_environment, emit_double_backslash_display_math,
48    emit_double_backslash_inline_math, emit_gfm_inline_math, emit_inline_math,
49    emit_single_backslash_display_math, emit_single_backslash_inline_math, try_parse_display_math,
50    try_parse_double_backslash_display_math, try_parse_double_backslash_inline_math,
51    try_parse_gfm_inline_math, try_parse_inline_math, try_parse_math_environment,
52    try_parse_single_backslash_display_math, try_parse_single_backslash_inline_math,
53};
54use super::native_spans::{emit_native_span, try_parse_native_span};
55use super::raw_inline::is_raw_inline;
56use super::shortcodes::{emit_shortcode, try_parse_shortcode};
57use super::strikeout::{emit_strikeout, try_parse_strikeout};
58use super::subscript::{emit_subscript, try_parse_subscript};
59use super::superscript::{emit_superscript, try_parse_superscript};
60
61/// Parse inline text into the CST builder.
62///
63/// Top-level entry point for inline parsing. Builds the IR plans
64/// (emphasis pairings, bracket resolutions, standalone Pandoc constructs)
65/// once via [`super::inline_ir::build_full_plans`], then walks the byte
66/// range left-to-right consulting those plans plus the dispatcher's
67/// ordered-try chain for non-IR-resolved constructs (autolinks, code
68/// spans, escapes, math, etc.). Dialect-specific behavior is selected
69/// inside `build_full_plans`.
70///
71/// # Arguments
72/// * `text` - The inline text to parse
73/// * `config` - Configuration for extensions and formatting
74/// * `builder` - The CST builder to emit nodes to
75/// * `suppress_footnote_refs` - When `true`, `[^id]` bytes are emitted as
76///   literal TEXT instead of `FOOTNOTE_REFERENCE`. Set by block parsers when
77///   the inline content lives inside a reference-style footnote definition
78///   body, where pandoc silently drops nested footnote references.
79pub fn parse_inline_text_recursive(
80    builder: &mut GreenNodeBuilder,
81    text: &str,
82    config: &ParserOptions,
83    suppress_footnote_refs: bool,
84) {
85    log::trace!(
86        "Recursive inline parsing: {:?} ({} bytes)",
87        &text[..text.len().min(40)],
88        text.len()
89    );
90
91    let mask = structural_byte_mask(config);
92    if try_emit_plain_text_fast_path_with_mask(builder, text, &mask) {
93        log::trace!("Recursive inline parsing complete (plain-text fast path)");
94        return;
95    }
96
97    let plans = super::inline_ir::build_full_plans(text, 0, text.len(), config);
98    parse_inline_range_impl(
99        text,
100        0,
101        text.len(),
102        config,
103        builder,
104        false,
105        &plans.emphasis,
106        &plans.brackets,
107        &plans.constructs,
108        false,
109        suppress_footnote_refs,
110        &mask,
111    );
112
113    log::trace!("Recursive inline parsing complete");
114}
115
116/// Parse inline elements from text content nested inside a link/image/span.
117///
118/// Used for recursive inline parsing of link text, image alt, span content, etc.
119/// Suppresses constructs that would create nested links (CommonMark §6.3 forbids
120/// links inside links), notably extended bare-URI autolinks under GFM.
121///
122/// `suppress_inner_links` should be `true` when the recursion is for a
123/// LINK or REFERENCE-LINK's text, where inner link / reference-link
124/// brackets must emit as literal text (pandoc-native:
125/// `[link [inner](u2)](u1)` → outer `Link` with `Str "[inner](u2)"`).
126/// Image alt text and all non-link contexts pass `false`:
127/// pandoc-native verifies `![alt with [inner](u)](u2)` keeps the inner
128/// `Link`, and bracketed spans / native spans / inline footnotes /
129/// emphasis all allow nested links.
130pub fn parse_inline_text(
131    builder: &mut GreenNodeBuilder,
132    text: &str,
133    config: &ParserOptions,
134    suppress_inner_links: bool,
135    suppress_footnote_refs: bool,
136) {
137    log::trace!(
138        "Parsing inline text (nested in link): {:?} ({} bytes)",
139        &text[..text.len().min(40)],
140        text.len()
141    );
142
143    let mask = structural_byte_mask(config);
144    if try_emit_plain_text_fast_path_with_mask(builder, text, &mask) {
145        return;
146    }
147
148    let plans = super::inline_ir::build_full_plans(text, 0, text.len(), config);
149    parse_inline_range_impl(
150        text,
151        0,
152        text.len(),
153        config,
154        builder,
155        true,
156        &plans.emphasis,
157        &plans.brackets,
158        &plans.constructs,
159        suppress_inner_links,
160        suppress_footnote_refs,
161        &mask,
162    );
163}
164
165/// Plain-text fast path for inline ranges with no structural bytes.
166///
167/// Returns `true` if the range was emitted as a single `TEXT` token and
168/// the caller should skip the IR + dispatcher pipeline. Returns `false`
169/// if any structural byte appears (or the range is empty), letting the
170/// caller proceed normally. Empty input returns `false` so the caller's
171/// existing "no events → no output" path is preserved exactly.
172///
173/// The structural byte set is computed from `config.dialect` and
174/// `config.extensions` so prose containing dialect-irrelevant punctuation
175/// (e.g. `-` outside a citation flavor) doesn't unnecessarily disable the
176/// fast path. `\n` and `\r` are always structural — multi-line inline
177/// content must still split into TEXT + NEWLINE tokens like the slow path.
178fn try_emit_plain_text_fast_path_with_mask(
179    builder: &mut GreenNodeBuilder,
180    text: &str,
181    mask: &[bool; 256],
182) -> bool {
183    if text.is_empty() {
184        return false;
185    }
186    for &b in text.as_bytes() {
187        if mask[b as usize] {
188            return false;
189        }
190    }
191    builder.token(SyntaxKind::TEXT.into(), text);
192    true
193}
194
195/// Build a 256-entry byte mask: `mask[b]` is `true` iff byte `b` could
196/// trigger any IR-recognised construct or dispatcher branch under the
197/// current dialect/extensions. Used by the plain-text fast path to scan
198/// inline ranges in a single pass.
199fn structural_byte_mask(config: &ParserOptions) -> [bool; 256] {
200    let mut mask = [false; 256];
201    let exts = &config.extensions;
202    let pandoc = config.dialect == Dialect::Pandoc;
203
204    // Always structural: line breaks (CST splits TEXT/NEWLINE), backslash
205    // (escape / hard break / backslash-math / latex / bookdown ref),
206    // backtick (code span / inline executable), `*`/`_` (emphasis is a
207    // core CommonMark construct, not extension-gated), and `[`/`]` if
208    // any bracket-shaped construct is reachable.
209    mask[b'\n' as usize] = true;
210    mask[b'\r' as usize] = true;
211    mask[b'\\' as usize] = true;
212    mask[b'`' as usize] = true;
213    mask[b'*' as usize] = true;
214    mask[b'_' as usize] = true;
215
216    // Brackets: the IR/dispatcher only acts on `[`/`]` if some
217    // bracket-shaped feature is reachable. `!` is the leading byte of
218    // `![alt]` image brackets — the IR's `BracketPlan` keys image
219    // openers at the `!` position, so the dispatcher must stop here
220    // to consult the plan.
221    if exts.inline_links
222        || exts.reference_links
223        || exts.inline_images
224        || exts.bracketed_spans
225        || exts.footnotes
226        || exts.citations
227    {
228        mask[b'[' as usize] = true;
229        mask[b']' as usize] = true;
230    }
231    if exts.inline_images || exts.reference_links {
232        mask[b'!' as usize] = true;
233    }
234
235    // `<` covers autolinks, raw HTML, and Pandoc native spans.
236    if exts.autolinks || exts.raw_html || exts.native_spans {
237        mask[b'<' as usize] = true;
238    }
239
240    // `^` covers Pandoc inline footnotes (`^[...]`), CM inline footnotes
241    // (when explicitly enabled), and superscript (`^text^`).
242    if exts.inline_footnotes || exts.superscript {
243        mask[b'^' as usize] = true;
244    }
245
246    // `@` and `-` cover Pandoc citation forms (`@cite`, `-@cite`,
247    // `[@cite]`). Under Pandoc dialect, the IR's `ConstructPlan` keys
248    // bare citations at the `@` or `-` position, so the dispatcher
249    // must stop at either to consult the plan. Including `-` is
250    // pessimistic — most prose hyphens won't form `-@` — but missing
251    // it would skip past valid suppress-author citations.
252    if exts.citations || exts.quarto_crossrefs {
253        mask[b'@' as usize] = true;
254        if pandoc {
255            mask[b'-' as usize] = true;
256        }
257    }
258
259    // `$` covers dollar-math and GFM math.
260    if exts.tex_math_dollars || exts.tex_math_gfm {
261        mask[b'$' as usize] = true;
262    }
263
264    // `~` covers subscript and strikeout (both `~text~` and `~~text~~`).
265    if exts.subscript || exts.strikeout {
266        mask[b'~' as usize] = true;
267    }
268
269    if exts.mark {
270        mask[b'=' as usize] = true;
271    }
272    if exts.emoji {
273        mask[b':' as usize] = true;
274    }
275    if exts.bookdown_references {
276        mask[b'(' as usize] = true;
277    }
278    // `{{< ... >}}` shortcodes: the dispatcher tries them on any
279    // `{` regardless of the `quarto_shortcodes` extension flag, so
280    // `{` must always be flagged here.
281    mask[b'{' as usize] = true;
282
283    // Bare-URI autolinks (`http://...` without `<>`) have no
284    // leading-byte gate in the dispatcher — `try_parse_bare_uri`
285    // probes for a URI scheme starting at every byte. Flag all
286    // ASCII alphabetic bytes so the bulk-skip stops on every
287    // potential scheme starter. This effectively disables the
288    // bulk-skip benefit for prose under GFM-style flavors but
289    // preserves correctness; ASCII digits / punctuation / non-ASCII
290    // bytes still skip cleanly.
291    if exts.autolink_bare_uris {
292        for b in b'a'..=b'z' {
293            mask[b as usize] = true;
294        }
295        for b in b'A'..=b'Z' {
296            mask[b as usize] = true;
297        }
298    }
299
300    mask
301}
302
303fn is_emoji_boundary(text: &str, pos: usize) -> bool {
304    if pos > 0 {
305        let prev = text.as_bytes()[pos - 1] as char;
306        if prev.is_ascii_alphanumeric() || prev == '_' {
307            return false;
308        }
309    }
310    true
311}
312
313#[inline]
314fn advance_char_boundary(text: &str, pos: usize, end: usize) -> usize {
315    if pos >= end || pos >= text.len() {
316        return pos;
317    }
318    let ch_len = text[pos..]
319        .chars()
320        .next()
321        .map_or(1, std::primitive::char::len_utf8);
322    (pos + ch_len).min(end)
323}
324
325#[allow(clippy::too_many_arguments)]
326fn parse_inline_range_impl(
327    text: &str,
328    start: usize,
329    end: usize,
330    config: &ParserOptions,
331    builder: &mut GreenNodeBuilder,
332    nested_in_link: bool,
333    plan: &EmphasisPlan,
334    bracket_plan: &BracketPlan,
335    construct_plan: &ConstructPlan,
336    suppress_inner_links: bool,
337    suppress_footnote_refs: bool,
338    mask: &[bool; 256],
339) {
340    log::trace!(
341        "parse_inline_range: start={}, end={}, text={:?}",
342        start,
343        end,
344        &text[start..end]
345    );
346    let mut pos = start;
347    let mut text_start = start;
348    let bytes = text.as_bytes();
349
350    while pos < end {
351        // Bulk-skip plain bytes between structural bytes. Plans
352        // (`construct_plan`, `bracket_plan`, emphasis `plan`) only
353        // resolve at structural byte positions, so skipping here
354        // never elides a real match. `text_start` is preserved
355        // across the skip; the next emitted construct flushes the
356        // accumulated TEXT span.
357        if !mask[bytes[pos] as usize] {
358            let mut next = pos + 1;
359            while next < end && !mask[bytes[next] as usize] {
360                next += 1;
361            }
362            pos = next;
363            if pos >= end {
364                break;
365            }
366        }
367        // IR-driven dispatch: if the IR identified a Pandoc standalone
368        // construct starting here, emit it directly. Bypasses the
369        // dispatcher's ordered-try chain for inline footnotes, native
370        // spans, footnote references, citations, and bracketed spans
371        // under `Dialect::Pandoc`. The IR scan gates these on
372        // `!is_commonmark` and the relevant extension flag, so this
373        // branch is empty under CommonMark dialect (where the legacy
374        // dispatcher branches still run when the extension is enabled).
375        if let Some(dispo) = construct_plan.lookup(pos) {
376            match *dispo {
377                ConstructDispo::InlineFootnote { end: dispo_end } => {
378                    if dispo_end <= end
379                        && let Some((len, content)) = try_parse_inline_footnote(&text[pos..])
380                        && pos + len == dispo_end
381                    {
382                        if pos > text_start {
383                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
384                        }
385                        log::trace!("IR: matched inline footnote at pos {}", pos);
386                        emit_inline_footnote(builder, content, config, suppress_footnote_refs);
387                        pos += len;
388                        text_start = pos;
389                        continue;
390                    }
391                }
392                ConstructDispo::NativeSpan { end: dispo_end } => {
393                    if dispo_end <= end
394                        && let Some((len, content, _attributes)) =
395                            try_parse_native_span(&text[pos..])
396                        && pos + len == dispo_end
397                    {
398                        if pos > text_start {
399                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
400                        }
401                        log::trace!("IR: matched native span at pos {}", pos);
402                        emit_native_span(
403                            builder,
404                            &text[pos..pos + len],
405                            content,
406                            config,
407                            suppress_footnote_refs,
408                        );
409                        pos += len;
410                        text_start = pos;
411                        continue;
412                    }
413                }
414                ConstructDispo::FootnoteReference { end: dispo_end } => {
415                    if !suppress_footnote_refs
416                        && dispo_end <= end
417                        && let Some((len, id)) = try_parse_footnote_reference(&text[pos..])
418                        && pos + len == dispo_end
419                    {
420                        if pos > text_start {
421                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
422                        }
423                        log::trace!("IR: matched footnote reference at pos {}", pos);
424                        emit_footnote_reference(builder, &id);
425                        pos += len;
426                        text_start = pos;
427                        continue;
428                    }
429                }
430                ConstructDispo::BracketedCitation { end: dispo_end } => {
431                    if dispo_end <= end
432                        && let Some((len, content)) = try_parse_bracketed_citation(&text[pos..])
433                        && pos + len == dispo_end
434                    {
435                        if pos > text_start {
436                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
437                        }
438                        log::trace!("IR: matched bracketed citation at pos {}", pos);
439                        emit_bracketed_citation(builder, content);
440                        pos += len;
441                        text_start = pos;
442                        continue;
443                    }
444                }
445                ConstructDispo::BareCitation { end: dispo_end } => {
446                    if dispo_end <= end
447                        && let Some((len, key, has_suppress)) =
448                            try_parse_bare_citation(&text[pos..])
449                        && pos + len == dispo_end
450                    {
451                        let is_crossref = config.extensions.quarto_crossrefs
452                            && super::citations::is_quarto_crossref_key(key);
453                        if is_crossref || config.extensions.citations {
454                            if pos > text_start {
455                                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
456                            }
457                            if is_crossref {
458                                log::trace!("IR: matched Quarto crossref at pos {}: {}", pos, key);
459                                super::citations::emit_crossref(builder, key, has_suppress);
460                            } else {
461                                log::trace!("IR: matched bare citation at pos {}: {}", pos, key);
462                                emit_bare_citation(builder, key, has_suppress);
463                            }
464                            pos += len;
465                            text_start = pos;
466                            continue;
467                        }
468                    }
469                }
470                ConstructDispo::BracketedSpan { end: dispo_end } => {
471                    if dispo_end <= end
472                        && let Some((len, content, attrs)) = try_parse_bracketed_span(&text[pos..])
473                        && pos + len == dispo_end
474                    {
475                        if pos > text_start {
476                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
477                        }
478                        log::trace!("IR: matched bracketed span at pos {}", pos);
479                        emit_bracketed_span(
480                            builder,
481                            &content,
482                            &attrs,
483                            config,
484                            suppress_footnote_refs,
485                        );
486                        pos += len;
487                        text_start = pos;
488                        continue;
489                    }
490                }
491            }
492        }
493
494        // IR-driven bracket dispatch: if the IR's `process_brackets`
495        // resolved a bracket pair starting at this position, emit it
496        // directly via the appropriate helper. The
497        // dispatcher's `try_parse_*` recognizers compute the actual
498        // byte length and extract content / attributes; the IR's
499        // `suffix_end` is used to constrain the dispatcher's match
500        // shape so the two pipelines agree on which link variant
501        // resolved (e.g. `[foo][bar]` with `bar` undefined and `foo`
502        // defined: IR resolves `[foo]` as shortcut, but the
503        // dispatcher's `try_parse_reference_link` would otherwise
504        // greedily return the full-ref shape). Suppression of inner
505        // LINK / REFERENCE LINK during LINK-text recursion is applied
506        // here (pandoc-native: outer-wins for nested links).
507        //
508        // Pandoc-extended `{.attrs}` after a link can extend the
509        // dispatcher's match length past the IR's `suffix_end`. The
510        // dispatcher's len is therefore constrained to
511        // `[suffix_end, end]` rather than required to equal
512        // `suffix_end` exactly.
513        // IR-driven dispatch: Pandoc unresolved bracket-shape pattern.
514        // Before emitting the `UNRESOLVED_REFERENCE` wrapper, give the
515        // dispatcher's lenient inline-link / inline-image parsers a
516        // chance to override. The IR's `try_inline_suffix` is stricter
517        // than pandoc-markdown for some destination shapes (URLs with
518        // spaces, titles with embedded quotes, shortcode-style braces);
519        // the dispatcher accepts those and produces a real LINK / IMAGE
520        // node — pandoc-native agrees. Without this override, valid
521        // pandoc links would degrade to `UNRESOLVED_REFERENCE` here.
522        if let Some(super::inline_ir::BracketDispo::UnresolvedReference {
523            is_image,
524            text_start: ref_text_start,
525            text_end: ref_text_end,
526            end: ref_end,
527        }) = bracket_plan.lookup(pos)
528        {
529            let is_image = *is_image;
530            let dispo_suffix_end = *ref_end;
531            let suppress = suppress_inner_links && !is_image;
532            if !suppress {
533                let ctx = LinkScanContext::from_options(config);
534                let is_commonmark = config.dialect == Dialect::CommonMark;
535                if is_image {
536                    if config.extensions.inline_images
537                        && let Some((len, alt_text, dest, attributes)) =
538                            try_parse_inline_image(&text[pos..], ctx)
539                        && pos + len >= dispo_suffix_end
540                        && pos + len <= end
541                    {
542                        if pos > text_start {
543                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
544                        }
545                        log::trace!(
546                            "IR: dispatcher overrode UnresolvedReference with inline image at pos {}",
547                            pos
548                        );
549                        emit_inline_image(
550                            builder,
551                            &text[pos..pos + len],
552                            alt_text,
553                            dest,
554                            attributes,
555                            config,
556                            suppress_footnote_refs,
557                        );
558                        pos += len;
559                        text_start = pos;
560                        continue;
561                    }
562                } else if config.extensions.inline_links
563                    && let Some((len, link_text, dest, attributes)) =
564                        try_parse_inline_link(&text[pos..], is_commonmark, ctx)
565                    && pos + len >= dispo_suffix_end
566                    && pos + len <= end
567                {
568                    if pos > text_start {
569                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
570                    }
571                    log::trace!(
572                        "IR: dispatcher overrode UnresolvedReference with inline link at pos {}",
573                        pos
574                    );
575                    emit_inline_link(
576                        builder,
577                        &text[pos..pos + len],
578                        link_text,
579                        dest,
580                        attributes,
581                        config,
582                        suppress_footnote_refs,
583                    );
584                    pos += len;
585                    text_start = pos;
586                    continue;
587                }
588            }
589
590            // Dispatcher didn't override; emit the wrapper.
591            let inner_text = &text[*ref_text_start..*ref_text_end];
592            let suffix_start = *ref_text_end + 1;
593            let label_suffix = if suffix_start < *ref_end {
594                Some(&text[suffix_start..*ref_end])
595            } else {
596                None
597            };
598            if pos > text_start {
599                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
600            }
601            log::trace!(
602                "IR: unresolved Pandoc reference shape at pos {}..{}",
603                pos,
604                ref_end
605            );
606            emit_unresolved_reference(
607                builder,
608                is_image,
609                inner_text,
610                label_suffix,
611                config,
612                suppress_footnote_refs,
613            );
614            pos = *ref_end;
615            text_start = pos;
616            continue;
617        }
618
619        if let Some(super::inline_ir::BracketDispo::Open {
620            is_image,
621            suffix_end,
622            ..
623        }) = bracket_plan.lookup(pos)
624        {
625            let is_image = *is_image;
626            let dispo_suffix_end = *suffix_end;
627            let suppress = suppress_inner_links && !is_image;
628            if !suppress {
629                let ctx = LinkScanContext::from_options(config);
630                let allow_shortcut = config.extensions.shortcut_reference_links;
631                let is_commonmark = config.dialect == Dialect::CommonMark;
632                if is_image {
633                    if config.extensions.inline_images
634                        && let Some((len, alt_text, dest, attributes)) =
635                            try_parse_inline_image(&text[pos..], ctx)
636                        && pos + len >= dispo_suffix_end
637                        && pos + len <= end
638                    {
639                        if pos > text_start {
640                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
641                        }
642                        log::trace!("IR: matched inline image at pos {}", pos);
643                        emit_inline_image(
644                            builder,
645                            &text[pos..pos + len],
646                            alt_text,
647                            dest,
648                            attributes,
649                            config,
650                            suppress_footnote_refs,
651                        );
652                        pos += len;
653                        text_start = pos;
654                        continue;
655                    }
656                    if config.extensions.reference_links
657                        && let Some((len, alt_text, reference, is_shortcut)) =
658                            try_parse_reference_image(&text[pos..], allow_shortcut)
659                        && pos + len == dispo_suffix_end
660                        && pos + len <= end
661                    {
662                        if pos > text_start {
663                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
664                        }
665                        log::trace!("IR: matched reference image at pos {}", pos);
666                        emit_reference_image(
667                            builder,
668                            alt_text,
669                            &reference,
670                            is_shortcut,
671                            config,
672                            suppress_footnote_refs,
673                        );
674                        pos += len;
675                        text_start = pos;
676                        continue;
677                    }
678                } else {
679                    if config.extensions.inline_links
680                        && let Some((len, link_text, dest, attributes)) =
681                            try_parse_inline_link(&text[pos..], is_commonmark, ctx)
682                        && pos + len >= dispo_suffix_end
683                        && pos + len <= end
684                    {
685                        if pos > text_start {
686                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
687                        }
688                        log::trace!("IR: matched inline link at pos {}", pos);
689                        emit_inline_link(
690                            builder,
691                            &text[pos..pos + len],
692                            link_text,
693                            dest,
694                            attributes,
695                            config,
696                            suppress_footnote_refs,
697                        );
698                        pos += len;
699                        text_start = pos;
700                        continue;
701                    }
702                    if config.extensions.reference_links
703                        && let Some((len, link_text, reference, is_shortcut)) =
704                            try_parse_reference_link(
705                                &text[pos..],
706                                allow_shortcut,
707                                config.extensions.inline_links,
708                                ctx,
709                            )
710                        && pos + len == dispo_suffix_end
711                        && pos + len <= end
712                    {
713                        if pos > text_start {
714                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
715                        }
716                        log::trace!("IR: matched reference link at pos {}", pos);
717                        emit_reference_link(
718                            builder,
719                            link_text,
720                            &reference,
721                            is_shortcut,
722                            config,
723                            suppress_footnote_refs,
724                        );
725                        pos += len;
726                        text_start = pos;
727                        continue;
728                    }
729                }
730            }
731        }
732
733        let byte = text.as_bytes()[pos];
734
735        // Backslash math (highest priority if enabled)
736        if byte == b'\\' {
737            // Try double backslash display math first: \\[...\\]
738            if config.extensions.tex_math_double_backslash {
739                if let Some((len, content)) = try_parse_double_backslash_display_math(&text[pos..])
740                {
741                    if pos > text_start {
742                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
743                    }
744                    log::trace!("Matched double backslash display math at pos {}", pos);
745                    emit_double_backslash_display_math(builder, content);
746                    pos += len;
747                    text_start = pos;
748                    continue;
749                }
750
751                // Try double backslash inline math: \\(...\\)
752                if let Some((len, content)) = try_parse_double_backslash_inline_math(&text[pos..]) {
753                    if pos > text_start {
754                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
755                    }
756                    log::trace!("Matched double backslash inline math at pos {}", pos);
757                    emit_double_backslash_inline_math(builder, content);
758                    pos += len;
759                    text_start = pos;
760                    continue;
761                }
762            }
763
764            // Try single backslash display math: \[...\]
765            if config.extensions.tex_math_single_backslash {
766                if let Some((len, content)) = try_parse_single_backslash_display_math(&text[pos..])
767                {
768                    if pos > text_start {
769                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
770                    }
771                    log::trace!("Matched single backslash display math at pos {}", pos);
772                    emit_single_backslash_display_math(builder, content);
773                    pos += len;
774                    text_start = pos;
775                    continue;
776                }
777
778                // Try single backslash inline math: \(...\)
779                if let Some((len, content)) = try_parse_single_backslash_inline_math(&text[pos..]) {
780                    if pos > text_start {
781                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
782                    }
783                    log::trace!("Matched single backslash inline math at pos {}", pos);
784                    emit_single_backslash_inline_math(builder, content);
785                    pos += len;
786                    text_start = pos;
787                    continue;
788                }
789            }
790
791            // Try math environments \begin{equation}...\end{equation}
792            if config.extensions.raw_tex
793                && let Some((len, begin_marker, content, end_marker)) =
794                    try_parse_math_environment(&text[pos..])
795            {
796                if pos > text_start {
797                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
798                }
799                log::trace!("Matched math environment at pos {}", pos);
800                emit_display_math_environment(builder, begin_marker, content, end_marker);
801                pos += len;
802                text_start = pos;
803                continue;
804            }
805
806            // Try bookdown reference: \@ref(label)
807            if config.extensions.bookdown_references
808                && let Some((len, label)) = try_parse_bookdown_reference(&text[pos..])
809            {
810                if pos > text_start {
811                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
812                }
813                log::trace!("Matched bookdown reference at pos {}: {}", pos, label);
814                super::citations::emit_bookdown_crossref(builder, label);
815                pos += len;
816                text_start = pos;
817                continue;
818            }
819
820            // Try escapes (after bookdown refs and backslash math)
821            if let Some((len, ch, escape_type)) = try_parse_escape(&text[pos..]) {
822                let escape_enabled = match escape_type {
823                    EscapeType::HardLineBreak => config.extensions.escaped_line_breaks,
824                    EscapeType::NonbreakingSpace => config.extensions.all_symbols_escapable,
825                    EscapeType::Literal => {
826                        // BASE_ESCAPABLE matches Pandoc's markdown_strict /
827                        // original Markdown set, plus `|` and `~` which the
828                        // formatter emits as escapes for pipe-table separators
829                        // and strikethrough delimiters. Recognising those here
830                        // keeps round-trips idempotent in flavors that don't
831                        // enable all_symbols_escapable.
832                        //
833                        // Under CommonMark dialect, the spec (§2.4) explicitly
834                        // allows ANY ASCII punctuation to be backslash-escaped,
835                        // independent of the all_symbols_escapable extension
836                        // (which also widens to whitespace, a Pandoc-only
837                        // construct).
838                        const BASE_ESCAPABLE: &str = "\\`*_{}[]()>#+-.!|~";
839                        BASE_ESCAPABLE.contains(ch)
840                            || config.extensions.all_symbols_escapable
841                            || (config.dialect == crate::Dialect::CommonMark
842                                && ch.is_ascii_punctuation())
843                    }
844                };
845                if !escape_enabled {
846                    // Don't treat as hard line break - skip the escape and continue
847                    // The backslash will be included in the next TEXT token
848                    pos = advance_char_boundary(text, pos, end);
849                    continue;
850                }
851
852                // Emit accumulated text
853                if pos > text_start {
854                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
855                }
856
857                log::trace!("Matched escape at pos {}: \\{}", pos, ch);
858                emit_escape(builder, ch, escape_type);
859                pos += len;
860                text_start = pos;
861                continue;
862            }
863
864            // Try LaTeX commands (after escapes, before shortcodes)
865            if config.extensions.raw_tex
866                && let Some(len) = try_parse_latex_command(&text[pos..])
867            {
868                if pos > text_start {
869                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
870                }
871                log::trace!("Matched LaTeX command at pos {}", pos);
872                parse_latex_command(builder, &text[pos..], len);
873                pos += len;
874                text_start = pos;
875                continue;
876            }
877        }
878
879        // Try Quarto shortcodes: {{< shortcode >}}
880        if byte == b'{'
881            && pos + 1 < text.len()
882            && text.as_bytes()[pos + 1] == b'{'
883            && let Some((len, name, attrs)) = try_parse_shortcode(&text[pos..])
884        {
885            if pos > text_start {
886                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
887            }
888            log::trace!("Matched shortcode at pos {}: {}", pos, &name);
889            emit_shortcode(builder, &name, attrs);
890            pos += len;
891            text_start = pos;
892            continue;
893        }
894
895        // Try inline executable code spans (`... `r expr`` and `... `{r} expr``)
896        if byte == b'`'
897            && let Some(m) = try_parse_inline_executable(
898                &text[pos..],
899                config.extensions.rmarkdown_inline_code,
900                config.extensions.quarto_inline_code,
901            )
902        {
903            if pos > text_start {
904                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
905            }
906            log::trace!("Matched inline executable code at pos {}", pos);
907            emit_inline_executable(builder, &m);
908            pos += m.total_len;
909            text_start = pos;
910            continue;
911        }
912
913        // Try code spans
914        if byte == b'`' {
915            if let Some((len, content, backtick_count, attributes)) =
916                try_parse_code_span(&text[pos..])
917            {
918                // Emit accumulated text
919                if pos > text_start {
920                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
921                }
922
923                log::trace!(
924                    "Matched code span at pos {}: {} backticks",
925                    pos,
926                    backtick_count
927                );
928
929                // Check for raw inline
930                if let Some(ref attrs) = attributes
931                    && config.extensions.raw_attribute
932                    && let Some(format) = is_raw_inline(attrs)
933                {
934                    use super::raw_inline::emit_raw_inline;
935                    log::trace!("Matched raw inline span at pos {}: format={}", pos, format);
936                    emit_raw_inline(builder, content, backtick_count, format);
937                } else if !config.extensions.inline_code_attributes && attributes.is_some() {
938                    let code_span_len = backtick_count * 2 + content.len();
939                    emit_code_span(builder, content, backtick_count, None);
940                    pos += code_span_len;
941                    text_start = pos;
942                    continue;
943                } else {
944                    emit_code_span(builder, content, backtick_count, attributes);
945                }
946
947                pos += len;
948                text_start = pos;
949                continue;
950            }
951
952            // Unmatched backtick run.
953            //
954            // CommonMark (and GFM) treat the whole run as literal text — the
955            // run cannot be re-entered as a shorter opener. Pandoc-markdown
956            // instead lets a longer run shadow a shorter one (e.g.
957            // `` ```foo`` `` parses as `` ` `` + ``<code>foo</code>``), so
958            // for the Pandoc dialect we fall through and advance one byte at
959            // a time, allowing the inner run to be tried on a later iteration.
960            if config.dialect == Dialect::CommonMark {
961                let run_len = text[pos..].bytes().take_while(|&b| b == b'`').count();
962                pos += run_len;
963                continue;
964            }
965        }
966
967        // Try textual emoji aliases: :smile:
968        if byte == b':'
969            && config.extensions.emoji
970            && is_emoji_boundary(text, pos)
971            && let Some((len, _alias)) = try_parse_emoji(&text[pos..])
972        {
973            if pos > text_start {
974                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
975            }
976            log::trace!("Matched emoji at pos {}", pos);
977            emit_emoji(builder, &text[pos..pos + len]);
978            pos += len;
979            text_start = pos;
980            continue;
981        }
982
983        // Try inline footnotes: ^[note]. Under Pandoc dialect this is
984        // consumed via the IR's `ConstructPlan` at the top of the loop;
985        // this dispatcher branch only fires for CommonMark dialect with
986        // the extension explicitly enabled.
987        if byte == b'^'
988            && pos + 1 < text.len()
989            && text.as_bytes()[pos + 1] == b'['
990            && config.dialect == Dialect::CommonMark
991            && config.extensions.inline_footnotes
992            && let Some((len, content)) = try_parse_inline_footnote(&text[pos..])
993        {
994            if pos > text_start {
995                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
996            }
997            log::trace!("Matched inline footnote at pos {}", pos);
998            emit_inline_footnote(builder, content, config, suppress_footnote_refs);
999            pos += len;
1000            text_start = pos;
1001            continue;
1002        }
1003
1004        // Try superscript: ^text^
1005        if byte == b'^'
1006            && config.extensions.superscript
1007            && let Some((len, content)) = try_parse_superscript(&text[pos..])
1008        {
1009            if pos > text_start {
1010                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1011            }
1012            log::trace!("Matched superscript at pos {}", pos);
1013            emit_superscript(builder, content, config, suppress_footnote_refs);
1014            pos += len;
1015            text_start = pos;
1016            continue;
1017        }
1018
1019        // Try bookdown definition: (\#label) or (ref:label)
1020        if byte == b'(' && config.extensions.bookdown_references {
1021            if let Some((len, label)) = try_parse_bookdown_definition(&text[pos..]) {
1022                if pos > text_start {
1023                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1024                }
1025                log::trace!("Matched bookdown definition at pos {}: {}", pos, label);
1026                builder.token(SyntaxKind::TEXT.into(), &text[pos..pos + len]);
1027                pos += len;
1028                text_start = pos;
1029                continue;
1030            }
1031            if let Some((len, label)) = try_parse_bookdown_text_reference(&text[pos..]) {
1032                if pos > text_start {
1033                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1034                }
1035                log::trace!("Matched bookdown text reference at pos {}: {}", pos, label);
1036                builder.token(SyntaxKind::TEXT.into(), &text[pos..pos + len]);
1037                pos += len;
1038                text_start = pos;
1039                continue;
1040            }
1041        }
1042
1043        // Try strikeout: ~~text~~
1044        // Must run before subscript so `~~text~~` is matched as a single
1045        // Strikeout rather than two empty Subscripts. Subscript falls back
1046        // to consuming `~~` as an empty subscript only when strikeout
1047        // didn't match (e.g. `~~unclosed`).
1048        if byte == b'~'
1049            && config.extensions.strikeout
1050            && let Some((len, content)) = try_parse_strikeout(&text[pos..])
1051        {
1052            if pos > text_start {
1053                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1054            }
1055            log::trace!("Matched strikeout at pos {}", pos);
1056            emit_strikeout(builder, content, config, suppress_footnote_refs);
1057            pos += len;
1058            text_start = pos;
1059            continue;
1060        }
1061
1062        // Try subscript: ~text~ or `~~` as empty subscript when strikeout
1063        // didn't match (matches pandoc: `~~unclosed` → `Subscript [] + text`).
1064        if byte == b'~'
1065            && config.extensions.subscript
1066            && let Some((len, content)) = try_parse_subscript(&text[pos..])
1067        {
1068            if pos > text_start {
1069                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1070            }
1071            log::trace!("Matched subscript at pos {}", pos);
1072            emit_subscript(builder, content, config, suppress_footnote_refs);
1073            pos += len;
1074            text_start = pos;
1075            continue;
1076        }
1077
1078        // Try mark/highlight: ==text==
1079        if byte == b'='
1080            && config.extensions.mark
1081            && let Some((len, content)) = try_parse_mark(&text[pos..])
1082        {
1083            if pos > text_start {
1084                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1085            }
1086            log::trace!("Matched mark at pos {}", pos);
1087            emit_mark(builder, content, config, suppress_footnote_refs);
1088            pos += len;
1089            text_start = pos;
1090            continue;
1091        }
1092
1093        // Try GFM inline math: $`...`$
1094        if byte == b'$'
1095            && config.extensions.tex_math_gfm
1096            && let Some((len, content)) = try_parse_gfm_inline_math(&text[pos..])
1097        {
1098            if pos > text_start {
1099                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1100            }
1101            log::trace!("Matched GFM inline math at pos {}", pos);
1102            emit_gfm_inline_math(builder, content);
1103            pos += len;
1104            text_start = pos;
1105            continue;
1106        }
1107
1108        // Try math ($...$, $$...$$)
1109        if byte == b'$' && config.extensions.tex_math_dollars {
1110            // Try display math first ($$...$$)
1111            if let Some((len, content)) = try_parse_display_math(&text[pos..]) {
1112                // Emit accumulated text
1113                if pos > text_start {
1114                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1115                }
1116
1117                let dollar_count = text[pos..].chars().take_while(|&c| c == '$').count();
1118                log::trace!(
1119                    "Matched display math at pos {}: {} dollars",
1120                    pos,
1121                    dollar_count
1122                );
1123
1124                // Check for trailing attributes (Quarto cross-reference support).
1125                // The Quarto attribute block sits on the same line as the closing
1126                // `$$`, so scope the lookup to the current line — otherwise
1127                // anything on later lines (e.g. a following `@eq-id` reference)
1128                // makes the segment not end with `}` and the lift no-ops.
1129                let after_math = &text[pos + len..];
1130                let line_end = after_math.find('\n').unwrap_or(after_math.len());
1131                let line_segment = &after_math[..line_end];
1132                let attr_len = if config.extensions.quarto_crossrefs {
1133                    use crate::parser::utils::attributes::try_parse_trailing_attributes;
1134                    if let Some((_attr_block, _)) = try_parse_trailing_attributes(line_segment) {
1135                        let trimmed_after = line_segment.trim_start();
1136                        if let Some(open_brace_pos) = trimmed_after.find('{') {
1137                            let ws_before_brace = line_segment.len() - trimmed_after.len();
1138                            let attr_text_len = trimmed_after[open_brace_pos..]
1139                                .find('}')
1140                                .map(|close| close + 1)
1141                                .unwrap_or(0);
1142                            ws_before_brace + open_brace_pos + attr_text_len
1143                        } else {
1144                            0
1145                        }
1146                    } else {
1147                        0
1148                    }
1149                } else {
1150                    0
1151                };
1152
1153                let total_len = len + attr_len;
1154                emit_display_math(builder, content, dollar_count);
1155
1156                // Emit attributes if present
1157                if attr_len > 0 {
1158                    use crate::parser::utils::attributes::{
1159                        emit_attributes, try_parse_trailing_attributes,
1160                    };
1161                    let attr_text = &text[pos + len..pos + total_len];
1162                    if let Some((attr_block, _text_before)) =
1163                        try_parse_trailing_attributes(attr_text)
1164                    {
1165                        let trimmed_after = attr_text.trim_start();
1166                        let ws_len = attr_text.len() - trimmed_after.len();
1167                        if ws_len > 0 {
1168                            builder.token(SyntaxKind::WHITESPACE.into(), &attr_text[..ws_len]);
1169                        }
1170                        emit_attributes(builder, &attr_block);
1171                    }
1172                }
1173
1174                pos += total_len;
1175                text_start = pos;
1176                continue;
1177            }
1178
1179            // Try inline math ($...$)
1180            if let Some((len, content)) = try_parse_inline_math(&text[pos..]) {
1181                // Emit accumulated text
1182                if pos > text_start {
1183                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1184                }
1185
1186                log::trace!("Matched inline math at pos {}", pos);
1187                emit_inline_math(builder, content);
1188                pos += len;
1189                text_start = pos;
1190                continue;
1191            }
1192
1193            // Neither display nor inline math matched - emit the $ as literal text
1194            // This ensures each $ gets its own TEXT token for CST compatibility
1195            if pos > text_start {
1196                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1197            }
1198            builder.token(SyntaxKind::TEXT.into(), "$");
1199            pos = advance_char_boundary(text, pos, end);
1200            text_start = pos;
1201            continue;
1202        }
1203
1204        // Try autolinks: <url> or <email>
1205        if byte == b'<'
1206            && config.extensions.autolinks
1207            && let Some((len, url)) = try_parse_autolink(
1208                &text[pos..],
1209                config.dialect == crate::options::Dialect::CommonMark,
1210            )
1211        {
1212            if pos > text_start {
1213                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1214            }
1215            log::trace!("Matched autolink at pos {}", pos);
1216            emit_autolink(builder, &text[pos..pos + len], url);
1217            pos += len;
1218            text_start = pos;
1219            continue;
1220        }
1221
1222        if !nested_in_link
1223            && config.extensions.autolink_bare_uris
1224            && let Some((len, url)) = try_parse_bare_uri(&text[pos..])
1225        {
1226            if pos > text_start {
1227                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1228            }
1229            log::trace!("Matched bare URI at pos {}", pos);
1230            emit_bare_uri_link(builder, url, config);
1231            pos += len;
1232            text_start = pos;
1233            continue;
1234        }
1235
1236        // Try native spans: <span>text</span> (after autolink since both
1237        // start with <). Under Pandoc dialect this is consumed via the
1238        // IR's `ConstructPlan` at the top of the loop; this dispatcher
1239        // branch only fires for CommonMark dialect with the extension
1240        // explicitly enabled.
1241        if byte == b'<'
1242            && config.dialect == Dialect::CommonMark
1243            && config.extensions.native_spans
1244            && let Some((len, content, _attributes)) = try_parse_native_span(&text[pos..])
1245        {
1246            if pos > text_start {
1247                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1248            }
1249            log::trace!("Matched native span at pos {}", pos);
1250            emit_native_span(
1251                builder,
1252                &text[pos..pos + len],
1253                content,
1254                config,
1255                suppress_footnote_refs,
1256            );
1257            pos += len;
1258            text_start = pos;
1259            continue;
1260        }
1261
1262        // Try inline raw HTML (CommonMark §6.6 / Pandoc raw_html). Must run
1263        // after autolinks (more specific) and native spans (Pandoc
1264        // <span>…</span> wrapper) since all three start with `<`.
1265        if byte == b'<'
1266            && config.extensions.raw_html
1267            && let Some(len) = try_parse_inline_html(&text[pos..], config.dialect)
1268        {
1269            if pos > text_start {
1270                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1271            }
1272            log::trace!("Matched inline raw HTML at pos {}", pos);
1273            emit_inline_html(builder, &text[pos..pos + len]);
1274            pos += len;
1275            text_start = pos;
1276            continue;
1277        }
1278
1279        // Bracket-starting elements: inline / reference links and
1280        // images are dispatched via the IR-driven arm at the top of
1281        // the loop, gated by the IR's `BracketPlan`. Only dialect-CM-
1282        // specific Pandoc-extension constructs that share the `[...]`
1283        // shape (footnote refs, bracketed citations) need a CM-gated
1284        // dispatcher branch — under Pandoc dialect they're consumed
1285        // via the IR's `ConstructPlan` instead.
1286        if byte == b'['
1287            && config.dialect == Dialect::CommonMark
1288            && config.extensions.footnotes
1289            && !suppress_footnote_refs
1290            && let Some((len, id)) = try_parse_footnote_reference(&text[pos..])
1291        {
1292            if pos > text_start {
1293                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1294            }
1295            log::trace!("Matched footnote reference at pos {}", pos);
1296            emit_footnote_reference(builder, &id);
1297            pos += len;
1298            text_start = pos;
1299            continue;
1300        }
1301        if byte == b'['
1302            && config.dialect == Dialect::CommonMark
1303            && config.extensions.citations
1304            && let Some((len, content)) = try_parse_bracketed_citation(&text[pos..])
1305        {
1306            if pos > text_start {
1307                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1308            }
1309            log::trace!("Matched bracketed citation at pos {}", pos);
1310            emit_bracketed_citation(builder, content);
1311            pos += len;
1312            text_start = pos;
1313            continue;
1314        }
1315
1316        // Try bracketed spans: [text]{.class}. Must come after
1317        // links/citations. Under Pandoc dialect this is consumed via
1318        // the IR's `ConstructPlan` at the top of the loop; this
1319        // dispatcher branch only fires for CommonMark dialect with the
1320        // extension explicitly enabled.
1321        if config.dialect == Dialect::CommonMark
1322            && byte == b'['
1323            && config.extensions.bracketed_spans
1324            && let Some((len, text_content, attrs)) = try_parse_bracketed_span(&text[pos..])
1325        {
1326            if pos > text_start {
1327                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1328            }
1329            log::trace!("Matched bracketed span at pos {}", pos);
1330            emit_bracketed_span(
1331                builder,
1332                &text_content,
1333                &attrs,
1334                config,
1335                suppress_footnote_refs,
1336            );
1337            pos += len;
1338            text_start = pos;
1339            continue;
1340        }
1341
1342        // Try bare citation: @cite (must come after bracketed elements).
1343        // Under Pandoc dialect this is consumed via the IR's
1344        // `ConstructPlan` at the top of the loop; this dispatcher branch
1345        // only fires for CommonMark dialect with the extension
1346        // explicitly enabled.
1347        if config.dialect == Dialect::CommonMark
1348            && byte == b'@'
1349            && (config.extensions.citations || config.extensions.quarto_crossrefs)
1350            && let Some((len, key, has_suppress)) = try_parse_bare_citation(&text[pos..])
1351        {
1352            let is_crossref =
1353                config.extensions.quarto_crossrefs && super::citations::is_quarto_crossref_key(key);
1354            if is_crossref || config.extensions.citations {
1355                if pos > text_start {
1356                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1357                }
1358                if is_crossref {
1359                    log::trace!("Matched Quarto crossref at pos {}: {}", pos, &key);
1360                    super::citations::emit_crossref(builder, key, has_suppress);
1361                } else {
1362                    log::trace!("Matched bare citation at pos {}: {}", pos, &key);
1363                    emit_bare_citation(builder, key, has_suppress);
1364                }
1365                pos += len;
1366                text_start = pos;
1367                continue;
1368            }
1369        }
1370
1371        // Try suppress-author citation: -@cite. Under Pandoc dialect
1372        // this is consumed via the IR's `ConstructPlan` at the top of
1373        // the loop; this dispatcher branch only fires for CommonMark
1374        // dialect with the extension explicitly enabled.
1375        if config.dialect == Dialect::CommonMark
1376            && byte == b'-'
1377            && pos + 1 < text.len()
1378            && text.as_bytes()[pos + 1] == b'@'
1379            && (config.extensions.citations || config.extensions.quarto_crossrefs)
1380            && let Some((len, key, has_suppress)) = try_parse_bare_citation(&text[pos..])
1381        {
1382            let is_crossref =
1383                config.extensions.quarto_crossrefs && super::citations::is_quarto_crossref_key(key);
1384            if is_crossref || config.extensions.citations {
1385                if pos > text_start {
1386                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1387                }
1388                if is_crossref {
1389                    log::trace!("Matched Quarto crossref at pos {}: {}", pos, &key);
1390                    super::citations::emit_crossref(builder, key, has_suppress);
1391                } else {
1392                    log::trace!("Matched suppress-author citation at pos {}: {}", pos, &key);
1393                    emit_bare_citation(builder, key, has_suppress);
1394                }
1395                pos += len;
1396                text_start = pos;
1397                continue;
1398            }
1399        }
1400
1401        // Emphasis emission, plan-driven. The IR's emphasis pass has
1402        // already decided every delimiter byte's disposition (open
1403        // marker, close marker, or unmatched literal); consult the
1404        // plan here instead of re-scanning.
1405        if byte == b'*' || byte == b'_' {
1406            match plan.lookup(pos) {
1407                Some(DelimChar::Open {
1408                    len,
1409                    partner,
1410                    partner_len,
1411                    kind,
1412                }) => {
1413                    if pos > text_start {
1414                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1415                    }
1416                    let len = len as usize;
1417                    let partner_len = partner_len as usize;
1418                    let (wrapper_kind, marker_kind) = match kind {
1419                        EmphasisKind::Strong => (SyntaxKind::STRONG, SyntaxKind::STRONG_MARKER),
1420                        EmphasisKind::Emph => (SyntaxKind::EMPHASIS, SyntaxKind::EMPHASIS_MARKER),
1421                    };
1422                    builder.start_node(wrapper_kind.into());
1423                    builder.token(marker_kind.into(), &text[pos..pos + len]);
1424                    parse_inline_range_impl(
1425                        text,
1426                        pos + len,
1427                        partner,
1428                        config,
1429                        builder,
1430                        nested_in_link,
1431                        plan,
1432                        bracket_plan,
1433                        construct_plan,
1434                        suppress_inner_links,
1435                        suppress_footnote_refs,
1436                        mask,
1437                    );
1438                    builder.token(marker_kind.into(), &text[partner..partner + partner_len]);
1439                    builder.finish_node();
1440                    pos = partner + partner_len;
1441                    text_start = pos;
1442                    continue;
1443                }
1444                Some(DelimChar::Close) => {
1445                    // Defensive: a close should be jumped past by its
1446                    // matching open. If we hit one anyway (e.g. when the
1447                    // outer caller's range starts mid-pair), let it be
1448                    // emitted as part of the surrounding text by simply
1449                    // advancing. text_start stays put so the byte folds
1450                    // into the next TEXT flush.
1451                    pos += 1;
1452                    continue;
1453                }
1454                Some(DelimChar::Literal) | None => {
1455                    // Unmatched delim chars at this position behave as
1456                    // literal text. Don't emit yet — let them coalesce
1457                    // with surrounding plain bytes via the existing
1458                    // text_start flushing so the CST keeps the same TEXT
1459                    // token granularity Pandoc fixtures expect.
1460                    let bytes = text.as_bytes();
1461                    let mut end_pos = pos + 1;
1462                    while end_pos < end && bytes[end_pos] == byte {
1463                        match plan.lookup(end_pos) {
1464                            Some(DelimChar::Literal) | None => end_pos += 1,
1465                            _ => break,
1466                        }
1467                    }
1468                    pos = end_pos;
1469                    continue;
1470                }
1471            }
1472        }
1473
1474        // Check for newlines - may need to emit as hard line break
1475        if byte == b'\r' && pos + 1 < end && text.as_bytes()[pos + 1] == b'\n' {
1476            let text_before = &text[text_start..pos];
1477
1478            // Check for trailing spaces hard line break (always enabled in Pandoc)
1479            let trailing_spaces = text_before.chars().rev().take_while(|&c| c == ' ').count();
1480            if trailing_spaces >= 2 {
1481                // Emit text before the trailing spaces
1482                let text_content = &text_before[..text_before.len() - trailing_spaces];
1483                if !text_content.is_empty() {
1484                    builder.token(SyntaxKind::TEXT.into(), text_content);
1485                }
1486                let spaces = " ".repeat(trailing_spaces);
1487                builder.token(
1488                    SyntaxKind::HARD_LINE_BREAK.into(),
1489                    &format!("{}\r\n", spaces),
1490                );
1491                pos += 2;
1492                text_start = pos;
1493                continue;
1494            }
1495
1496            // hard_line_breaks: treat all single newlines as hard line breaks
1497            if config.extensions.hard_line_breaks {
1498                if !text_before.is_empty() {
1499                    builder.token(SyntaxKind::TEXT.into(), text_before);
1500                }
1501                builder.token(SyntaxKind::HARD_LINE_BREAK.into(), "\r\n");
1502                pos += 2;
1503                text_start = pos;
1504                continue;
1505            }
1506
1507            // Regular newline
1508            if !text_before.is_empty() {
1509                builder.token(SyntaxKind::TEXT.into(), text_before);
1510            }
1511            builder.token(SyntaxKind::NEWLINE.into(), "\r\n");
1512            pos += 2;
1513            text_start = pos;
1514            continue;
1515        }
1516
1517        if byte == b'\n' {
1518            let text_before = &text[text_start..pos];
1519
1520            // Check for trailing spaces hard line break (always enabled in Pandoc)
1521            let trailing_spaces = text_before.chars().rev().take_while(|&c| c == ' ').count();
1522            if trailing_spaces >= 2 {
1523                // Emit text before the trailing spaces
1524                let text_content = &text_before[..text_before.len() - trailing_spaces];
1525                if !text_content.is_empty() {
1526                    builder.token(SyntaxKind::TEXT.into(), text_content);
1527                }
1528                let spaces = " ".repeat(trailing_spaces);
1529                builder.token(SyntaxKind::HARD_LINE_BREAK.into(), &format!("{}\n", spaces));
1530                pos += 1;
1531                text_start = pos;
1532                continue;
1533            }
1534
1535            // hard_line_breaks: treat all single newlines as hard line breaks
1536            if config.extensions.hard_line_breaks {
1537                if !text_before.is_empty() {
1538                    builder.token(SyntaxKind::TEXT.into(), text_before);
1539                }
1540                builder.token(SyntaxKind::HARD_LINE_BREAK.into(), "\n");
1541                pos += 1;
1542                text_start = pos;
1543                continue;
1544            }
1545
1546            // Regular newline
1547            if !text_before.is_empty() {
1548                builder.token(SyntaxKind::TEXT.into(), text_before);
1549            }
1550            builder.token(SyntaxKind::NEWLINE.into(), "\n");
1551            pos += 1;
1552            text_start = pos;
1553            continue;
1554        }
1555
1556        // Regular character, keep accumulating
1557        pos = advance_char_boundary(text, pos, end);
1558    }
1559
1560    // Emit any remaining text
1561    if pos > text_start && text_start < end {
1562        log::trace!("Emitting remaining TEXT: {:?}", &text[text_start..end]);
1563        builder.token(SyntaxKind::TEXT.into(), &text[text_start..end]);
1564    }
1565
1566    log::trace!("parse_inline_range complete: start={}, end={}", start, end);
1567}
1568
1569#[cfg(test)]
1570mod tests {
1571    use super::*;
1572    use crate::syntax::{SyntaxKind, SyntaxNode};
1573    use rowan::GreenNode;
1574
1575    #[test]
1576    fn test_recursive_simple_emphasis() {
1577        let text = "*test*";
1578        let config = ParserOptions::default();
1579        let mut builder = GreenNodeBuilder::new();
1580
1581        parse_inline_text_recursive(&mut builder, text, &config, false);
1582
1583        let green: GreenNode = builder.finish();
1584        let node = SyntaxNode::new_root(green);
1585
1586        // Should be lossless
1587        assert_eq!(node.text().to_string(), text);
1588
1589        // Should have EMPHASIS node
1590        let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1591        assert!(has_emph, "Should have EMPHASIS node");
1592    }
1593
1594    #[test]
1595    fn test_recursive_nested() {
1596        let text = "*foo **bar** baz*";
1597        let config = ParserOptions::default();
1598        let mut builder = GreenNodeBuilder::new();
1599
1600        // Wrap in a PARAGRAPH node (inline content needs a parent)
1601        builder.start_node(SyntaxKind::PARAGRAPH.into());
1602        parse_inline_text_recursive(&mut builder, text, &config, false);
1603        builder.finish_node();
1604
1605        let green: GreenNode = builder.finish();
1606        let node = SyntaxNode::new_root(green);
1607
1608        // Should be lossless
1609        assert_eq!(node.text().to_string(), text);
1610
1611        // Should have both EMPHASIS and STRONG
1612        let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1613        let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
1614
1615        assert!(has_emph, "Should have EMPHASIS node");
1616        assert!(has_strong, "Should have STRONG node");
1617    }
1618
1619    /// Test Pandoc's "three" algorithm: ***foo* bar**
1620    /// Expected: Strong[Emph[foo], bar]
1621    #[test]
1622    fn test_triple_emphasis_star_then_double_star() {
1623        use crate::options::ParserOptions;
1624        use crate::syntax::SyntaxNode;
1625        use rowan::GreenNode;
1626
1627        let text = "***foo* bar**";
1628        let config = ParserOptions::default();
1629        let mut builder = GreenNodeBuilder::new();
1630
1631        builder.start_node(SyntaxKind::DOCUMENT.into());
1632        parse_inline_text_recursive(&mut builder, text, &config, false);
1633        builder.finish_node();
1634
1635        let green: GreenNode = builder.finish();
1636        let node = SyntaxNode::new_root(green);
1637
1638        // Verify losslessness
1639        assert_eq!(node.text().to_string(), text);
1640
1641        // Expected structure: STRONG > EMPH > "foo"
1642        // The STRONG should contain EMPH, not the other way around
1643        let structure = format!("{:#?}", node);
1644
1645        // Should have both STRONG and EMPH
1646        assert!(structure.contains("STRONG"), "Should have STRONG node");
1647        assert!(structure.contains("EMPHASIS"), "Should have EMPHASIS node");
1648
1649        // STRONG should be outer, EMPH should be inner
1650        // Check that STRONG comes before EMPH in tree traversal
1651        let mut found_strong = false;
1652        let mut found_emph_after_strong = false;
1653        for descendant in node.descendants() {
1654            if descendant.kind() == SyntaxKind::STRONG {
1655                found_strong = true;
1656            }
1657            if found_strong && descendant.kind() == SyntaxKind::EMPHASIS {
1658                found_emph_after_strong = true;
1659                break;
1660            }
1661        }
1662
1663        assert!(
1664            found_emph_after_strong,
1665            "EMPH should be inside STRONG, not before it. Current structure:\n{}",
1666            structure
1667        );
1668    }
1669
1670    /// Test Pandoc's "three" algorithm: ***foo** bar*
1671    /// Expected: Emph[Strong[foo], bar]
1672    #[test]
1673    fn test_triple_emphasis_double_star_then_star() {
1674        use crate::options::ParserOptions;
1675        use crate::syntax::SyntaxNode;
1676        use rowan::GreenNode;
1677
1678        let text = "***foo** bar*";
1679        let config = ParserOptions::default();
1680        let mut builder = GreenNodeBuilder::new();
1681
1682        builder.start_node(SyntaxKind::DOCUMENT.into());
1683        parse_inline_text_recursive(&mut builder, text, &config, false);
1684        builder.finish_node();
1685
1686        let green: GreenNode = builder.finish();
1687        let node = SyntaxNode::new_root(green);
1688
1689        // Verify losslessness
1690        assert_eq!(node.text().to_string(), text);
1691
1692        // Expected structure: EMPH > STRONG > "foo"
1693        let structure = format!("{:#?}", node);
1694
1695        // Should have both EMPH and STRONG
1696        assert!(structure.contains("EMPHASIS"), "Should have EMPHASIS node");
1697        assert!(structure.contains("STRONG"), "Should have STRONG node");
1698
1699        // EMPH should be outer, STRONG should be inner
1700        let mut found_emph = false;
1701        let mut found_strong_after_emph = false;
1702        for descendant in node.descendants() {
1703            if descendant.kind() == SyntaxKind::EMPHASIS {
1704                found_emph = true;
1705            }
1706            if found_emph && descendant.kind() == SyntaxKind::STRONG {
1707                found_strong_after_emph = true;
1708                break;
1709            }
1710        }
1711
1712        assert!(
1713            found_strong_after_emph,
1714            "STRONG should be inside EMPH. Current structure:\n{}",
1715            structure
1716        );
1717    }
1718
1719    /// Test that display math with attributes parses correctly
1720    /// Regression test for equation_attributes_single_line golden test
1721    #[test]
1722    fn test_display_math_with_attributes() {
1723        use crate::options::ParserOptions;
1724        use crate::syntax::SyntaxNode;
1725        use rowan::GreenNode;
1726
1727        let text = "$$ E = mc^2 $$ {#eq-einstein}";
1728        let mut config = ParserOptions::default();
1729        config.extensions.quarto_crossrefs = true; // Enable Quarto cross-references
1730
1731        let mut builder = GreenNodeBuilder::new();
1732        builder.start_node(SyntaxKind::DOCUMENT.into()); // Need a root node
1733
1734        // Parse the whole text
1735        parse_inline_text_recursive(&mut builder, text, &config, false);
1736
1737        builder.finish_node(); // Finish ROOT
1738        let green: GreenNode = builder.finish();
1739        let node = SyntaxNode::new_root(green);
1740
1741        // Verify losslessness
1742        assert_eq!(node.text().to_string(), text);
1743
1744        // Should have DISPLAY_MATH node
1745        let has_display_math = node
1746            .descendants()
1747            .any(|n| n.kind() == SyntaxKind::DISPLAY_MATH);
1748        assert!(has_display_math, "Should have DISPLAY_MATH node");
1749
1750        // Should have ATTRIBUTE node
1751        let has_attributes = node
1752            .descendants()
1753            .any(|n| n.kind() == SyntaxKind::ATTRIBUTE);
1754        assert!(
1755            has_attributes,
1756            "Should have ATTRIBUTE node for {{#eq-einstein}}"
1757        );
1758
1759        // Attributes should not be TEXT
1760        let math_followed_by_text = node.descendants().any(|n| {
1761            n.kind() == SyntaxKind::DISPLAY_MATH
1762                && n.next_sibling()
1763                    .map(|s| {
1764                        s.kind() == SyntaxKind::TEXT
1765                            && s.text().to_string().contains("{#eq-einstein}")
1766                    })
1767                    .unwrap_or(false)
1768        });
1769        assert!(
1770            !math_followed_by_text,
1771            "Attributes should not be parsed as TEXT"
1772        );
1773    }
1774
1775    #[test]
1776    fn test_parse_inline_text_gfm_inline_link_destination_not_autolinked() {
1777        use crate::options::{Dialect, Extensions, Flavor};
1778
1779        let config = ParserOptions {
1780            flavor: Flavor::Gfm,
1781            dialect: Dialect::for_flavor(Flavor::Gfm),
1782            extensions: Extensions::for_flavor(Flavor::Gfm),
1783            ..ParserOptions::default()
1784        };
1785
1786        let mut builder = GreenNodeBuilder::new();
1787        builder.start_node(SyntaxKind::PARAGRAPH.into());
1788        parse_inline_text_recursive(
1789            &mut builder,
1790            "Second Link [link_text](https://link.com)",
1791            &config,
1792            false,
1793        );
1794        builder.finish_node();
1795        let green = builder.finish();
1796        let root = SyntaxNode::new_root(green);
1797
1798        let links: Vec<_> = root
1799            .descendants()
1800            .filter(|n| n.kind() == SyntaxKind::LINK)
1801            .collect();
1802        assert_eq!(
1803            links.len(),
1804            1,
1805            "Expected exactly one LINK node for inline link, not nested bare URI autolink"
1806        );
1807
1808        let link = links[0].clone();
1809        let mut link_text = None::<String>;
1810        let mut link_dest = None::<String>;
1811
1812        for child in link.children() {
1813            match child.kind() {
1814                SyntaxKind::LINK_TEXT => link_text = Some(child.text().to_string()),
1815                SyntaxKind::LINK_DEST => link_dest = Some(child.text().to_string()),
1816                _ => {}
1817            }
1818        }
1819
1820        assert_eq!(link_text.as_deref(), Some("link_text"));
1821        assert_eq!(link_dest.as_deref(), Some("https://link.com"));
1822    }
1823
1824    #[test]
1825    fn test_autolink_bare_uri_utf8_boundary_safe() {
1826        let text = "§";
1827        let mut config = ParserOptions::default();
1828        config.extensions.autolink_bare_uris = true;
1829        let mut builder = GreenNodeBuilder::new();
1830
1831        builder.start_node(SyntaxKind::DOCUMENT.into());
1832        parse_inline_text_recursive(&mut builder, text, &config, false);
1833        builder.finish_node();
1834
1835        let green: GreenNode = builder.finish();
1836        let node = SyntaxNode::new_root(green);
1837        assert_eq!(node.text().to_string(), text);
1838    }
1839
1840    #[test]
1841    fn test_parse_emphasis_unicode_content_no_panic() {
1842        let text = "*§*";
1843        let config = ParserOptions::default();
1844        let mut builder = GreenNodeBuilder::new();
1845
1846        builder.start_node(SyntaxKind::PARAGRAPH.into());
1847        parse_inline_text_recursive(&mut builder, text, &config, false);
1848        builder.finish_node();
1849
1850        let green: GreenNode = builder.finish();
1851        let node = SyntaxNode::new_root(green);
1852        let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1853        assert!(has_emph, "Should have EMPHASIS node");
1854        assert_eq!(node.text().to_string(), text);
1855    }
1856}
1857
1858#[test]
1859fn test_two_with_nested_one_and_triple_closer() {
1860    // **bold with *italic***
1861    // Should parse as: Strong["bold with ", Emph["italic"]]
1862    // The *** at end is parsed as * (closes Emph) + ** (closes Strong)
1863
1864    use crate::options::ParserOptions;
1865    use crate::syntax::SyntaxNode;
1866    use rowan::GreenNode;
1867
1868    let text = "**bold with *italic***";
1869    let config = ParserOptions::default();
1870    let mut builder = GreenNodeBuilder::new();
1871
1872    builder.start_node(SyntaxKind::PARAGRAPH.into());
1873    parse_inline_text_recursive(&mut builder, text, &config, false);
1874    builder.finish_node();
1875
1876    let green: GreenNode = builder.finish();
1877    let node = SyntaxNode::new_root(green);
1878
1879    assert_eq!(node.text().to_string(), text, "Should be lossless");
1880
1881    let strong_nodes: Vec<_> = node
1882        .descendants()
1883        .filter(|n| n.kind() == SyntaxKind::STRONG)
1884        .collect();
1885    assert_eq!(strong_nodes.len(), 1, "Should have exactly one STRONG node");
1886    let has_emphasis_in_strong = strong_nodes[0]
1887        .descendants()
1888        .any(|n| n.kind() == SyntaxKind::EMPHASIS);
1889    assert!(
1890        has_emphasis_in_strong,
1891        "STRONG should contain EMPHASIS node"
1892    );
1893}
1894
1895#[test]
1896fn test_emphasis_with_trailing_space_before_closer() {
1897    // *foo * should parse as emphasis (Pandoc behavior)
1898    // For asterisks, Pandoc doesn't require right-flanking for closers
1899
1900    use crate::options::ParserOptions;
1901    use crate::syntax::SyntaxNode;
1902    use rowan::GreenNode;
1903
1904    let text = "*foo *";
1905    let config = ParserOptions::default();
1906    let mut builder = GreenNodeBuilder::new();
1907
1908    builder.start_node(SyntaxKind::PARAGRAPH.into());
1909    parse_inline_text_recursive(&mut builder, text, &config, false);
1910    builder.finish_node();
1911
1912    let green: GreenNode = builder.finish();
1913    let node = SyntaxNode::new_root(green);
1914
1915    let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1916    assert!(has_emph, "Should have EMPHASIS node");
1917    assert_eq!(node.text().to_string(), text);
1918}
1919
1920#[test]
1921fn test_triple_emphasis_all_strong_nested() {
1922    // ***foo** bar **baz*** should parse as Emph[Strong[foo], " bar ", Strong[baz]]
1923    // Pandoc output confirms this
1924
1925    use crate::options::ParserOptions;
1926    use crate::syntax::SyntaxNode;
1927    use rowan::GreenNode;
1928
1929    let text = "***foo** bar **baz***";
1930    let config = ParserOptions::default();
1931    let mut builder = GreenNodeBuilder::new();
1932
1933    builder.start_node(SyntaxKind::DOCUMENT.into());
1934    parse_inline_text_recursive(&mut builder, text, &config, false);
1935    builder.finish_node();
1936
1937    let green: GreenNode = builder.finish();
1938    let node = SyntaxNode::new_root(green);
1939
1940    // Should have one EMPHASIS node at root
1941    let emphasis_nodes: Vec<_> = node
1942        .descendants()
1943        .filter(|n| n.kind() == SyntaxKind::EMPHASIS)
1944        .collect();
1945    assert_eq!(
1946        emphasis_nodes.len(),
1947        1,
1948        "Should have exactly one EMPHASIS node, found: {}",
1949        emphasis_nodes.len()
1950    );
1951
1952    // EMPHASIS should contain two STRONG nodes
1953    let emphasis_node = emphasis_nodes[0].clone();
1954    let strong_in_emphasis: Vec<_> = emphasis_node
1955        .children()
1956        .filter(|n| n.kind() == SyntaxKind::STRONG)
1957        .collect();
1958    assert_eq!(
1959        strong_in_emphasis.len(),
1960        2,
1961        "EMPHASIS should contain two STRONG nodes, found: {}",
1962        strong_in_emphasis.len()
1963    );
1964
1965    // Verify losslessness
1966    assert_eq!(node.text().to_string(), text);
1967}
1968
1969#[test]
1970fn test_triple_emphasis_all_emph_nested() {
1971    // ***foo* bar *baz*** should parse as Strong[Emph[foo], " bar ", Emph[baz]]
1972    // Pandoc output confirms this
1973
1974    use crate::options::ParserOptions;
1975    use crate::syntax::SyntaxNode;
1976    use rowan::GreenNode;
1977
1978    let text = "***foo* bar *baz***";
1979    let config = ParserOptions::default();
1980    let mut builder = GreenNodeBuilder::new();
1981
1982    builder.start_node(SyntaxKind::DOCUMENT.into());
1983    parse_inline_text_recursive(&mut builder, text, &config, false);
1984    builder.finish_node();
1985
1986    let green: GreenNode = builder.finish();
1987    let node = SyntaxNode::new_root(green);
1988
1989    // Should have one STRONG node at root
1990    let strong_nodes: Vec<_> = node
1991        .descendants()
1992        .filter(|n| n.kind() == SyntaxKind::STRONG)
1993        .collect();
1994    assert_eq!(
1995        strong_nodes.len(),
1996        1,
1997        "Should have exactly one STRONG node, found: {}",
1998        strong_nodes.len()
1999    );
2000
2001    // STRONG should contain two EMPHASIS nodes
2002    let strong_node = strong_nodes[0].clone();
2003    let emph_in_strong: Vec<_> = strong_node
2004        .children()
2005        .filter(|n| n.kind() == SyntaxKind::EMPHASIS)
2006        .collect();
2007    assert_eq!(
2008        emph_in_strong.len(),
2009        2,
2010        "STRONG should contain two EMPHASIS nodes, found: {}",
2011        emph_in_strong.len()
2012    );
2013
2014    // Verify losslessness
2015    assert_eq!(node.text().to_string(), text);
2016}
2017
2018// Multiline emphasis tests
2019#[test]
2020fn test_parse_emphasis_multiline() {
2021    // Per Pandoc spec, emphasis CAN contain newlines (soft breaks)
2022    use crate::options::ParserOptions;
2023    use crate::syntax::SyntaxNode;
2024    use rowan::GreenNode;
2025
2026    let text = "*text on\nline two*";
2027    let config = ParserOptions::default();
2028    let mut builder = GreenNodeBuilder::new();
2029
2030    builder.start_node(SyntaxKind::PARAGRAPH.into());
2031    parse_inline_text_recursive(&mut builder, text, &config, false);
2032    builder.finish_node();
2033
2034    let green: GreenNode = builder.finish();
2035    let node = SyntaxNode::new_root(green);
2036
2037    let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
2038    assert!(has_emph, "Should have EMPHASIS node");
2039
2040    assert_eq!(node.text().to_string(), text);
2041    assert!(
2042        node.text().to_string().contains('\n'),
2043        "Should preserve newline in emphasis content"
2044    );
2045}
2046
2047#[test]
2048fn test_parse_strong_multiline() {
2049    // Per Pandoc spec, strong emphasis CAN contain newlines
2050    use crate::options::ParserOptions;
2051    use crate::syntax::SyntaxNode;
2052    use rowan::GreenNode;
2053
2054    let text = "**strong on\nline two**";
2055    let config = ParserOptions::default();
2056    let mut builder = GreenNodeBuilder::new();
2057
2058    builder.start_node(SyntaxKind::PARAGRAPH.into());
2059    parse_inline_text_recursive(&mut builder, text, &config, false);
2060    builder.finish_node();
2061
2062    let green: GreenNode = builder.finish();
2063    let node = SyntaxNode::new_root(green);
2064
2065    let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
2066    assert!(has_strong, "Should have STRONG node");
2067
2068    assert_eq!(node.text().to_string(), text);
2069    assert!(
2070        node.text().to_string().contains('\n'),
2071        "Should preserve newline in strong content"
2072    );
2073}
2074
2075#[test]
2076fn test_parse_triple_emphasis_multiline() {
2077    // Triple emphasis with newlines
2078    use crate::options::ParserOptions;
2079    use crate::syntax::SyntaxNode;
2080    use rowan::GreenNode;
2081
2082    let text = "***both on\nline two***";
2083    let config = ParserOptions::default();
2084    let mut builder = GreenNodeBuilder::new();
2085
2086    builder.start_node(SyntaxKind::PARAGRAPH.into());
2087    parse_inline_text_recursive(&mut builder, text, &config, false);
2088    builder.finish_node();
2089
2090    let green: GreenNode = builder.finish();
2091    let node = SyntaxNode::new_root(green);
2092
2093    // Should have STRONG node (triple = strong + emph)
2094    let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
2095    assert!(has_strong, "Should have STRONG node");
2096
2097    assert_eq!(node.text().to_string(), text);
2098    assert!(
2099        node.text().to_string().contains('\n'),
2100        "Should preserve newline in triple emphasis content"
2101    );
2102}