Skip to main content

panache_parser/parser/inlines/
core.rs

1//! Inline emission walk.
2//!
3//! Consumes the IR plans built by [`super::inline_ir::build_full_plans`]
4//! (emphasis pairings, bracket resolutions, standalone Pandoc constructs)
5//! and emits the inline CST tokens / nodes in source order. Resolution
6//! decisions for emphasis, brackets, and standalone Pandoc constructs
7//! are entirely IR-driven for both dialects; the dispatcher's
8//! `try_parse_*` recognizers are still called to *parse* a matched byte
9//! range into a CST subtree, but "what is this byte range?" is answered
10//! exclusively by the IR.
11
12use crate::options::{Dialect, ParserOptions};
13use crate::syntax::SyntaxKind;
14use rowan::GreenNodeBuilder;
15
16use super::inline_ir::{
17    BracketPlan, ConstructDispo, ConstructPlan, DelimChar, EmphasisKind, EmphasisPlan,
18};
19
20// Import inline element parsers from sibling modules
21use super::bookdown::{
22    try_parse_bookdown_definition, try_parse_bookdown_reference, try_parse_bookdown_text_reference,
23};
24use super::bracketed_spans::{emit_bracketed_span, try_parse_bracketed_span};
25use super::citations::{
26    emit_bare_citation, emit_bracketed_citation, try_parse_bare_citation,
27    try_parse_bracketed_citation,
28};
29use super::code_spans::{emit_code_span, try_parse_code_span};
30use super::emoji::{emit_emoji, try_parse_emoji};
31use super::escapes::{EscapeType, emit_escape, try_parse_escape};
32use super::inline_executable::{emit_inline_executable, try_parse_inline_executable};
33use super::inline_footnotes::{
34    emit_footnote_reference, emit_inline_footnote, try_parse_footnote_reference,
35    try_parse_inline_footnote,
36};
37use super::inline_html::{emit_inline_html, try_parse_inline_html};
38use super::latex::{parse_latex_command, try_parse_latex_command};
39use super::links::{
40    LinkScanContext, emit_autolink, emit_bare_uri_link, emit_inline_image, emit_inline_link,
41    emit_reference_image, emit_reference_link, emit_unresolved_reference, try_parse_autolink,
42    try_parse_bare_uri, try_parse_inline_image, try_parse_inline_link, try_parse_reference_image,
43    try_parse_reference_link,
44};
45use super::mark::{emit_mark, try_parse_mark};
46use super::math::{
47    emit_display_math, emit_display_math_environment, emit_double_backslash_display_math,
48    emit_double_backslash_inline_math, emit_gfm_inline_math, emit_inline_math,
49    emit_single_backslash_display_math, emit_single_backslash_inline_math, try_parse_display_math,
50    try_parse_double_backslash_display_math, try_parse_double_backslash_inline_math,
51    try_parse_gfm_inline_math, try_parse_inline_math, try_parse_math_environment,
52    try_parse_single_backslash_display_math, try_parse_single_backslash_inline_math,
53};
54use super::native_spans::{emit_native_span, try_parse_native_span};
55use super::raw_inline::is_raw_inline;
56use super::shortcodes::{emit_shortcode, try_parse_shortcode};
57use super::strikeout::{emit_strikeout, try_parse_strikeout};
58use super::subscript::{emit_subscript, try_parse_subscript};
59use super::superscript::{emit_superscript, try_parse_superscript};
60
61/// Parse inline text into the CST builder.
62///
63/// Top-level entry point for inline parsing. Builds the IR plans
64/// (emphasis pairings, bracket resolutions, standalone Pandoc constructs)
65/// once via [`super::inline_ir::build_full_plans`], then walks the byte
66/// range left-to-right consulting those plans plus the dispatcher's
67/// ordered-try chain for non-IR-resolved constructs (autolinks, code
68/// spans, escapes, math, etc.). Dialect-specific behavior is selected
69/// inside `build_full_plans`.
70///
71/// # Arguments
72/// * `text` - The inline text to parse
73/// * `config` - Configuration for extensions and formatting
74/// * `builder` - The CST builder to emit nodes to
75pub fn parse_inline_text_recursive(
76    builder: &mut GreenNodeBuilder,
77    text: &str,
78    config: &ParserOptions,
79) {
80    log::trace!(
81        "Recursive inline parsing: {:?} ({} bytes)",
82        &text[..text.len().min(40)],
83        text.len()
84    );
85
86    let mask = structural_byte_mask(config);
87    if try_emit_plain_text_fast_path_with_mask(builder, text, &mask) {
88        log::trace!("Recursive inline parsing complete (plain-text fast path)");
89        return;
90    }
91
92    let plans = super::inline_ir::build_full_plans(text, 0, text.len(), config);
93    parse_inline_range_impl(
94        text,
95        0,
96        text.len(),
97        config,
98        builder,
99        false,
100        &plans.emphasis,
101        &plans.brackets,
102        &plans.constructs,
103        false,
104        &mask,
105    );
106
107    log::trace!("Recursive inline parsing complete");
108}
109
110/// Parse inline elements from text content nested inside a link/image/span.
111///
112/// Used for recursive inline parsing of link text, image alt, span content, etc.
113/// Suppresses constructs that would create nested links (CommonMark §6.3 forbids
114/// links inside links), notably extended bare-URI autolinks under GFM.
115///
116/// `suppress_inner_links` should be `true` when the recursion is for a
117/// LINK or REFERENCE-LINK's text, where inner link / reference-link
118/// brackets must emit as literal text (pandoc-native:
119/// `[link [inner](u2)](u1)` → outer `Link` with `Str "[inner](u2)"`).
120/// Image alt text and all non-link contexts pass `false`:
121/// pandoc-native verifies `![alt with [inner](u)](u2)` keeps the inner
122/// `Link`, and bracketed spans / native spans / inline footnotes /
123/// emphasis all allow nested links.
124pub fn parse_inline_text(
125    builder: &mut GreenNodeBuilder,
126    text: &str,
127    config: &ParserOptions,
128    suppress_inner_links: bool,
129) {
130    log::trace!(
131        "Parsing inline text (nested in link): {:?} ({} bytes)",
132        &text[..text.len().min(40)],
133        text.len()
134    );
135
136    let mask = structural_byte_mask(config);
137    if try_emit_plain_text_fast_path_with_mask(builder, text, &mask) {
138        return;
139    }
140
141    let plans = super::inline_ir::build_full_plans(text, 0, text.len(), config);
142    parse_inline_range_impl(
143        text,
144        0,
145        text.len(),
146        config,
147        builder,
148        true,
149        &plans.emphasis,
150        &plans.brackets,
151        &plans.constructs,
152        suppress_inner_links,
153        &mask,
154    );
155}
156
157/// Plain-text fast path for inline ranges with no structural bytes.
158///
159/// Returns `true` if the range was emitted as a single `TEXT` token and
160/// the caller should skip the IR + dispatcher pipeline. Returns `false`
161/// if any structural byte appears (or the range is empty), letting the
162/// caller proceed normally. Empty input returns `false` so the caller's
163/// existing "no events → no output" path is preserved exactly.
164///
165/// The structural byte set is computed from `config.dialect` and
166/// `config.extensions` so prose containing dialect-irrelevant punctuation
167/// (e.g. `-` outside a citation flavor) doesn't unnecessarily disable the
168/// fast path. `\n` and `\r` are always structural — multi-line inline
169/// content must still split into TEXT + NEWLINE tokens like the slow path.
170fn try_emit_plain_text_fast_path_with_mask(
171    builder: &mut GreenNodeBuilder,
172    text: &str,
173    mask: &[bool; 256],
174) -> bool {
175    if text.is_empty() {
176        return false;
177    }
178    for &b in text.as_bytes() {
179        if mask[b as usize] {
180            return false;
181        }
182    }
183    builder.token(SyntaxKind::TEXT.into(), text);
184    true
185}
186
187/// Build a 256-entry byte mask: `mask[b]` is `true` iff byte `b` could
188/// trigger any IR-recognised construct or dispatcher branch under the
189/// current dialect/extensions. Used by the plain-text fast path to scan
190/// inline ranges in a single pass.
191fn structural_byte_mask(config: &ParserOptions) -> [bool; 256] {
192    let mut mask = [false; 256];
193    let exts = &config.extensions;
194    let pandoc = config.dialect == Dialect::Pandoc;
195
196    // Always structural: line breaks (CST splits TEXT/NEWLINE), backslash
197    // (escape / hard break / backslash-math / latex / bookdown ref),
198    // backtick (code span / inline executable), `*`/`_` (emphasis is a
199    // core CommonMark construct, not extension-gated), and `[`/`]` if
200    // any bracket-shaped construct is reachable.
201    mask[b'\n' as usize] = true;
202    mask[b'\r' as usize] = true;
203    mask[b'\\' as usize] = true;
204    mask[b'`' as usize] = true;
205    mask[b'*' as usize] = true;
206    mask[b'_' as usize] = true;
207
208    // Brackets: the IR/dispatcher only acts on `[`/`]` if some
209    // bracket-shaped feature is reachable. `!` is the leading byte of
210    // `![alt]` image brackets — the IR's `BracketPlan` keys image
211    // openers at the `!` position, so the dispatcher must stop here
212    // to consult the plan.
213    if exts.inline_links
214        || exts.reference_links
215        || exts.inline_images
216        || exts.bracketed_spans
217        || exts.footnotes
218        || exts.citations
219    {
220        mask[b'[' as usize] = true;
221        mask[b']' as usize] = true;
222    }
223    if exts.inline_images || exts.reference_links {
224        mask[b'!' as usize] = true;
225    }
226
227    // `<` covers autolinks, raw HTML, and Pandoc native spans.
228    if exts.autolinks || exts.raw_html || exts.native_spans {
229        mask[b'<' as usize] = true;
230    }
231
232    // `^` covers Pandoc inline footnotes (`^[...]`), CM inline footnotes
233    // (when explicitly enabled), and superscript (`^text^`).
234    if exts.inline_footnotes || exts.superscript {
235        mask[b'^' as usize] = true;
236    }
237
238    // `@` and `-` cover Pandoc citation forms (`@cite`, `-@cite`,
239    // `[@cite]`). Under Pandoc dialect, the IR's `ConstructPlan` keys
240    // bare citations at the `@` or `-` position, so the dispatcher
241    // must stop at either to consult the plan. Including `-` is
242    // pessimistic — most prose hyphens won't form `-@` — but missing
243    // it would skip past valid suppress-author citations.
244    if exts.citations || exts.quarto_crossrefs {
245        mask[b'@' as usize] = true;
246        if pandoc {
247            mask[b'-' as usize] = true;
248        }
249    }
250
251    // `$` covers dollar-math and GFM math.
252    if exts.tex_math_dollars || exts.tex_math_gfm {
253        mask[b'$' as usize] = true;
254    }
255
256    // `~` covers subscript and strikeout (both `~text~` and `~~text~~`).
257    if exts.subscript || exts.strikeout {
258        mask[b'~' as usize] = true;
259    }
260
261    if exts.mark {
262        mask[b'=' as usize] = true;
263    }
264    if exts.emoji {
265        mask[b':' as usize] = true;
266    }
267    if exts.bookdown_references {
268        mask[b'(' as usize] = true;
269    }
270    // `{{< ... >}}` shortcodes: the dispatcher tries them on any
271    // `{` regardless of the `quarto_shortcodes` extension flag, so
272    // `{` must always be flagged here.
273    mask[b'{' as usize] = true;
274
275    // Bare-URI autolinks (`http://...` without `<>`) have no
276    // leading-byte gate in the dispatcher — `try_parse_bare_uri`
277    // probes for a URI scheme starting at every byte. Flag all
278    // ASCII alphabetic bytes so the bulk-skip stops on every
279    // potential scheme starter. This effectively disables the
280    // bulk-skip benefit for prose under GFM-style flavors but
281    // preserves correctness; ASCII digits / punctuation / non-ASCII
282    // bytes still skip cleanly.
283    if exts.autolink_bare_uris {
284        for b in b'a'..=b'z' {
285            mask[b as usize] = true;
286        }
287        for b in b'A'..=b'Z' {
288            mask[b as usize] = true;
289        }
290    }
291
292    mask
293}
294
295fn is_emoji_boundary(text: &str, pos: usize) -> bool {
296    if pos > 0 {
297        let prev = text.as_bytes()[pos - 1] as char;
298        if prev.is_ascii_alphanumeric() || prev == '_' {
299            return false;
300        }
301    }
302    true
303}
304
305#[inline]
306fn advance_char_boundary(text: &str, pos: usize, end: usize) -> usize {
307    if pos >= end || pos >= text.len() {
308        return pos;
309    }
310    let ch_len = text[pos..]
311        .chars()
312        .next()
313        .map_or(1, std::primitive::char::len_utf8);
314    (pos + ch_len).min(end)
315}
316
317#[allow(clippy::too_many_arguments)]
318fn parse_inline_range_impl(
319    text: &str,
320    start: usize,
321    end: usize,
322    config: &ParserOptions,
323    builder: &mut GreenNodeBuilder,
324    nested_in_link: bool,
325    plan: &EmphasisPlan,
326    bracket_plan: &BracketPlan,
327    construct_plan: &ConstructPlan,
328    suppress_inner_links: bool,
329    mask: &[bool; 256],
330) {
331    log::trace!(
332        "parse_inline_range: start={}, end={}, text={:?}",
333        start,
334        end,
335        &text[start..end]
336    );
337    let mut pos = start;
338    let mut text_start = start;
339    let bytes = text.as_bytes();
340
341    while pos < end {
342        // Bulk-skip plain bytes between structural bytes. Plans
343        // (`construct_plan`, `bracket_plan`, emphasis `plan`) only
344        // resolve at structural byte positions, so skipping here
345        // never elides a real match. `text_start` is preserved
346        // across the skip; the next emitted construct flushes the
347        // accumulated TEXT span.
348        if !mask[bytes[pos] as usize] {
349            let mut next = pos + 1;
350            while next < end && !mask[bytes[next] as usize] {
351                next += 1;
352            }
353            pos = next;
354            if pos >= end {
355                break;
356            }
357        }
358        // IR-driven dispatch: if the IR identified a Pandoc standalone
359        // construct starting here, emit it directly. Bypasses the
360        // dispatcher's ordered-try chain for inline footnotes, native
361        // spans, footnote references, citations, and bracketed spans
362        // under `Dialect::Pandoc`. The IR scan gates these on
363        // `!is_commonmark` and the relevant extension flag, so this
364        // branch is empty under CommonMark dialect (where the legacy
365        // dispatcher branches still run when the extension is enabled).
366        if let Some(dispo) = construct_plan.lookup(pos) {
367            match *dispo {
368                ConstructDispo::InlineFootnote { end: dispo_end } => {
369                    if dispo_end <= end
370                        && let Some((len, content)) = try_parse_inline_footnote(&text[pos..])
371                        && pos + len == dispo_end
372                    {
373                        if pos > text_start {
374                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
375                        }
376                        log::trace!("IR: matched inline footnote at pos {}", pos);
377                        emit_inline_footnote(builder, content, config);
378                        pos += len;
379                        text_start = pos;
380                        continue;
381                    }
382                }
383                ConstructDispo::NativeSpan { end: dispo_end } => {
384                    if dispo_end <= end
385                        && let Some((len, content, _attributes)) =
386                            try_parse_native_span(&text[pos..])
387                        && pos + len == dispo_end
388                    {
389                        if pos > text_start {
390                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
391                        }
392                        log::trace!("IR: matched native span at pos {}", pos);
393                        emit_native_span(builder, &text[pos..pos + len], content, config);
394                        pos += len;
395                        text_start = pos;
396                        continue;
397                    }
398                }
399                ConstructDispo::FootnoteReference { end: dispo_end } => {
400                    if dispo_end <= end
401                        && let Some((len, id)) = try_parse_footnote_reference(&text[pos..])
402                        && pos + len == dispo_end
403                    {
404                        if pos > text_start {
405                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
406                        }
407                        log::trace!("IR: matched footnote reference at pos {}", pos);
408                        emit_footnote_reference(builder, &id);
409                        pos += len;
410                        text_start = pos;
411                        continue;
412                    }
413                }
414                ConstructDispo::BracketedCitation { end: dispo_end } => {
415                    if dispo_end <= end
416                        && let Some((len, content)) = try_parse_bracketed_citation(&text[pos..])
417                        && pos + len == dispo_end
418                    {
419                        if pos > text_start {
420                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
421                        }
422                        log::trace!("IR: matched bracketed citation at pos {}", pos);
423                        emit_bracketed_citation(builder, content);
424                        pos += len;
425                        text_start = pos;
426                        continue;
427                    }
428                }
429                ConstructDispo::BareCitation { end: dispo_end } => {
430                    if dispo_end <= end
431                        && let Some((len, key, has_suppress)) =
432                            try_parse_bare_citation(&text[pos..])
433                        && pos + len == dispo_end
434                    {
435                        let is_crossref = config.extensions.quarto_crossrefs
436                            && super::citations::is_quarto_crossref_key(key);
437                        if is_crossref || config.extensions.citations {
438                            if pos > text_start {
439                                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
440                            }
441                            if is_crossref {
442                                log::trace!("IR: matched Quarto crossref at pos {}: {}", pos, key);
443                                super::citations::emit_crossref(builder, key, has_suppress);
444                            } else {
445                                log::trace!("IR: matched bare citation at pos {}: {}", pos, key);
446                                emit_bare_citation(builder, key, has_suppress);
447                            }
448                            pos += len;
449                            text_start = pos;
450                            continue;
451                        }
452                    }
453                }
454                ConstructDispo::BracketedSpan { end: dispo_end } => {
455                    if dispo_end <= end
456                        && let Some((len, content, attrs)) = try_parse_bracketed_span(&text[pos..])
457                        && pos + len == dispo_end
458                    {
459                        if pos > text_start {
460                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
461                        }
462                        log::trace!("IR: matched bracketed span at pos {}", pos);
463                        emit_bracketed_span(builder, &content, &attrs, config);
464                        pos += len;
465                        text_start = pos;
466                        continue;
467                    }
468                }
469            }
470        }
471
472        // IR-driven bracket dispatch: if the IR's `process_brackets`
473        // resolved a bracket pair starting at this position, emit it
474        // directly via the appropriate helper. The
475        // dispatcher's `try_parse_*` recognizers compute the actual
476        // byte length and extract content / attributes; the IR's
477        // `suffix_end` is used to constrain the dispatcher's match
478        // shape so the two pipelines agree on which link variant
479        // resolved (e.g. `[foo][bar]` with `bar` undefined and `foo`
480        // defined: IR resolves `[foo]` as shortcut, but the
481        // dispatcher's `try_parse_reference_link` would otherwise
482        // greedily return the full-ref shape). Suppression of inner
483        // LINK / REFERENCE LINK during LINK-text recursion is applied
484        // here (pandoc-native: outer-wins for nested links).
485        //
486        // Pandoc-extended `{.attrs}` after a link can extend the
487        // dispatcher's match length past the IR's `suffix_end`. The
488        // dispatcher's len is therefore constrained to
489        // `[suffix_end, end]` rather than required to equal
490        // `suffix_end` exactly.
491        // IR-driven dispatch: Pandoc unresolved bracket-shape pattern.
492        // Before emitting the `UNRESOLVED_REFERENCE` wrapper, give the
493        // dispatcher's lenient inline-link / inline-image parsers a
494        // chance to override. The IR's `try_inline_suffix` is stricter
495        // than pandoc-markdown for some destination shapes (URLs with
496        // spaces, titles with embedded quotes, shortcode-style braces);
497        // the dispatcher accepts those and produces a real LINK / IMAGE
498        // node — pandoc-native agrees. Without this override, valid
499        // pandoc links would degrade to `UNRESOLVED_REFERENCE` here.
500        if let Some(super::inline_ir::BracketDispo::UnresolvedReference {
501            is_image,
502            text_start: ref_text_start,
503            text_end: ref_text_end,
504            end: ref_end,
505        }) = bracket_plan.lookup(pos)
506        {
507            let is_image = *is_image;
508            let dispo_suffix_end = *ref_end;
509            let suppress = suppress_inner_links && !is_image;
510            if !suppress {
511                let ctx = LinkScanContext::from_options(config);
512                let is_commonmark = config.dialect == Dialect::CommonMark;
513                if is_image {
514                    if config.extensions.inline_images
515                        && let Some((len, alt_text, dest, attributes)) =
516                            try_parse_inline_image(&text[pos..], ctx)
517                        && pos + len >= dispo_suffix_end
518                        && pos + len <= end
519                    {
520                        if pos > text_start {
521                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
522                        }
523                        log::trace!(
524                            "IR: dispatcher overrode UnresolvedReference with inline image at pos {}",
525                            pos
526                        );
527                        emit_inline_image(
528                            builder,
529                            &text[pos..pos + len],
530                            alt_text,
531                            dest,
532                            attributes,
533                            config,
534                        );
535                        pos += len;
536                        text_start = pos;
537                        continue;
538                    }
539                } else if config.extensions.inline_links
540                    && let Some((len, link_text, dest, attributes)) =
541                        try_parse_inline_link(&text[pos..], is_commonmark, ctx)
542                    && pos + len >= dispo_suffix_end
543                    && pos + len <= end
544                {
545                    if pos > text_start {
546                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
547                    }
548                    log::trace!(
549                        "IR: dispatcher overrode UnresolvedReference with inline link at pos {}",
550                        pos
551                    );
552                    emit_inline_link(
553                        builder,
554                        &text[pos..pos + len],
555                        link_text,
556                        dest,
557                        attributes,
558                        config,
559                    );
560                    pos += len;
561                    text_start = pos;
562                    continue;
563                }
564            }
565
566            // Dispatcher didn't override; emit the wrapper.
567            let inner_text = &text[*ref_text_start..*ref_text_end];
568            let suffix_start = *ref_text_end + 1;
569            let label_suffix = if suffix_start < *ref_end {
570                Some(&text[suffix_start..*ref_end])
571            } else {
572                None
573            };
574            if pos > text_start {
575                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
576            }
577            log::trace!(
578                "IR: unresolved Pandoc reference shape at pos {}..{}",
579                pos,
580                ref_end
581            );
582            emit_unresolved_reference(builder, is_image, inner_text, label_suffix, config);
583            pos = *ref_end;
584            text_start = pos;
585            continue;
586        }
587
588        if let Some(super::inline_ir::BracketDispo::Open {
589            is_image,
590            suffix_end,
591            ..
592        }) = bracket_plan.lookup(pos)
593        {
594            let is_image = *is_image;
595            let dispo_suffix_end = *suffix_end;
596            let suppress = suppress_inner_links && !is_image;
597            if !suppress {
598                let ctx = LinkScanContext::from_options(config);
599                let allow_shortcut = config.extensions.shortcut_reference_links;
600                let is_commonmark = config.dialect == Dialect::CommonMark;
601                if is_image {
602                    if config.extensions.inline_images
603                        && let Some((len, alt_text, dest, attributes)) =
604                            try_parse_inline_image(&text[pos..], ctx)
605                        && pos + len >= dispo_suffix_end
606                        && pos + len <= end
607                    {
608                        if pos > text_start {
609                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
610                        }
611                        log::trace!("IR: matched inline image at pos {}", pos);
612                        emit_inline_image(
613                            builder,
614                            &text[pos..pos + len],
615                            alt_text,
616                            dest,
617                            attributes,
618                            config,
619                        );
620                        pos += len;
621                        text_start = pos;
622                        continue;
623                    }
624                    if config.extensions.reference_links
625                        && let Some((len, alt_text, reference, is_shortcut)) =
626                            try_parse_reference_image(&text[pos..], allow_shortcut)
627                        && pos + len == dispo_suffix_end
628                        && pos + len <= end
629                    {
630                        if pos > text_start {
631                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
632                        }
633                        log::trace!("IR: matched reference image at pos {}", pos);
634                        emit_reference_image(builder, alt_text, &reference, is_shortcut, config);
635                        pos += len;
636                        text_start = pos;
637                        continue;
638                    }
639                } else {
640                    if config.extensions.inline_links
641                        && let Some((len, link_text, dest, attributes)) =
642                            try_parse_inline_link(&text[pos..], is_commonmark, ctx)
643                        && pos + len >= dispo_suffix_end
644                        && pos + len <= end
645                    {
646                        if pos > text_start {
647                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
648                        }
649                        log::trace!("IR: matched inline link at pos {}", pos);
650                        emit_inline_link(
651                            builder,
652                            &text[pos..pos + len],
653                            link_text,
654                            dest,
655                            attributes,
656                            config,
657                        );
658                        pos += len;
659                        text_start = pos;
660                        continue;
661                    }
662                    if config.extensions.reference_links
663                        && let Some((len, link_text, reference, is_shortcut)) =
664                            try_parse_reference_link(
665                                &text[pos..],
666                                allow_shortcut,
667                                config.extensions.inline_links,
668                                ctx,
669                            )
670                        && pos + len == dispo_suffix_end
671                        && pos + len <= end
672                    {
673                        if pos > text_start {
674                            builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
675                        }
676                        log::trace!("IR: matched reference link at pos {}", pos);
677                        emit_reference_link(builder, link_text, &reference, is_shortcut, config);
678                        pos += len;
679                        text_start = pos;
680                        continue;
681                    }
682                }
683            }
684        }
685
686        let byte = text.as_bytes()[pos];
687
688        // Backslash math (highest priority if enabled)
689        if byte == b'\\' {
690            // Try double backslash display math first: \\[...\\]
691            if config.extensions.tex_math_double_backslash {
692                if let Some((len, content)) = try_parse_double_backslash_display_math(&text[pos..])
693                {
694                    if pos > text_start {
695                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
696                    }
697                    log::trace!("Matched double backslash display math at pos {}", pos);
698                    emit_double_backslash_display_math(builder, content);
699                    pos += len;
700                    text_start = pos;
701                    continue;
702                }
703
704                // Try double backslash inline math: \\(...\\)
705                if let Some((len, content)) = try_parse_double_backslash_inline_math(&text[pos..]) {
706                    if pos > text_start {
707                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
708                    }
709                    log::trace!("Matched double backslash inline math at pos {}", pos);
710                    emit_double_backslash_inline_math(builder, content);
711                    pos += len;
712                    text_start = pos;
713                    continue;
714                }
715            }
716
717            // Try single backslash display math: \[...\]
718            if config.extensions.tex_math_single_backslash {
719                if let Some((len, content)) = try_parse_single_backslash_display_math(&text[pos..])
720                {
721                    if pos > text_start {
722                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
723                    }
724                    log::trace!("Matched single backslash display math at pos {}", pos);
725                    emit_single_backslash_display_math(builder, content);
726                    pos += len;
727                    text_start = pos;
728                    continue;
729                }
730
731                // Try single backslash inline math: \(...\)
732                if let Some((len, content)) = try_parse_single_backslash_inline_math(&text[pos..]) {
733                    if pos > text_start {
734                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
735                    }
736                    log::trace!("Matched single backslash inline math at pos {}", pos);
737                    emit_single_backslash_inline_math(builder, content);
738                    pos += len;
739                    text_start = pos;
740                    continue;
741                }
742            }
743
744            // Try math environments \begin{equation}...\end{equation}
745            if config.extensions.raw_tex
746                && let Some((len, begin_marker, content, end_marker)) =
747                    try_parse_math_environment(&text[pos..])
748            {
749                if pos > text_start {
750                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
751                }
752                log::trace!("Matched math environment at pos {}", pos);
753                emit_display_math_environment(builder, begin_marker, content, end_marker);
754                pos += len;
755                text_start = pos;
756                continue;
757            }
758
759            // Try bookdown reference: \@ref(label)
760            if config.extensions.bookdown_references
761                && let Some((len, label)) = try_parse_bookdown_reference(&text[pos..])
762            {
763                if pos > text_start {
764                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
765                }
766                log::trace!("Matched bookdown reference at pos {}: {}", pos, label);
767                super::citations::emit_bookdown_crossref(builder, label);
768                pos += len;
769                text_start = pos;
770                continue;
771            }
772
773            // Try escapes (after bookdown refs and backslash math)
774            if let Some((len, ch, escape_type)) = try_parse_escape(&text[pos..]) {
775                let escape_enabled = match escape_type {
776                    EscapeType::HardLineBreak => config.extensions.escaped_line_breaks,
777                    EscapeType::NonbreakingSpace => config.extensions.all_symbols_escapable,
778                    EscapeType::Literal => {
779                        // BASE_ESCAPABLE matches Pandoc's markdown_strict /
780                        // original Markdown set, plus `|` and `~` which the
781                        // formatter emits as escapes for pipe-table separators
782                        // and strikethrough delimiters. Recognising those here
783                        // keeps round-trips idempotent in flavors that don't
784                        // enable all_symbols_escapable.
785                        //
786                        // Under CommonMark dialect, the spec (§2.4) explicitly
787                        // allows ANY ASCII punctuation to be backslash-escaped,
788                        // independent of the all_symbols_escapable extension
789                        // (which also widens to whitespace, a Pandoc-only
790                        // construct).
791                        const BASE_ESCAPABLE: &str = "\\`*_{}[]()>#+-.!|~";
792                        BASE_ESCAPABLE.contains(ch)
793                            || config.extensions.all_symbols_escapable
794                            || (config.dialect == crate::Dialect::CommonMark
795                                && ch.is_ascii_punctuation())
796                    }
797                };
798                if !escape_enabled {
799                    // Don't treat as hard line break - skip the escape and continue
800                    // The backslash will be included in the next TEXT token
801                    pos = advance_char_boundary(text, pos, end);
802                    continue;
803                }
804
805                // Emit accumulated text
806                if pos > text_start {
807                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
808                }
809
810                log::trace!("Matched escape at pos {}: \\{}", pos, ch);
811                emit_escape(builder, ch, escape_type);
812                pos += len;
813                text_start = pos;
814                continue;
815            }
816
817            // Try LaTeX commands (after escapes, before shortcodes)
818            if config.extensions.raw_tex
819                && let Some(len) = try_parse_latex_command(&text[pos..])
820            {
821                if pos > text_start {
822                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
823                }
824                log::trace!("Matched LaTeX command at pos {}", pos);
825                parse_latex_command(builder, &text[pos..], len);
826                pos += len;
827                text_start = pos;
828                continue;
829            }
830        }
831
832        // Try Quarto shortcodes: {{< shortcode >}}
833        if byte == b'{'
834            && pos + 1 < text.len()
835            && text.as_bytes()[pos + 1] == b'{'
836            && let Some((len, name, attrs)) = try_parse_shortcode(&text[pos..])
837        {
838            if pos > text_start {
839                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
840            }
841            log::trace!("Matched shortcode at pos {}: {}", pos, &name);
842            emit_shortcode(builder, &name, attrs);
843            pos += len;
844            text_start = pos;
845            continue;
846        }
847
848        // Try inline executable code spans (`... `r expr`` and `... `{r} expr``)
849        if byte == b'`'
850            && let Some(m) = try_parse_inline_executable(
851                &text[pos..],
852                config.extensions.rmarkdown_inline_code,
853                config.extensions.quarto_inline_code,
854            )
855        {
856            if pos > text_start {
857                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
858            }
859            log::trace!("Matched inline executable code at pos {}", pos);
860            emit_inline_executable(builder, &m);
861            pos += m.total_len;
862            text_start = pos;
863            continue;
864        }
865
866        // Try code spans
867        if byte == b'`' {
868            if let Some((len, content, backtick_count, attributes)) =
869                try_parse_code_span(&text[pos..])
870            {
871                // Emit accumulated text
872                if pos > text_start {
873                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
874                }
875
876                log::trace!(
877                    "Matched code span at pos {}: {} backticks",
878                    pos,
879                    backtick_count
880                );
881
882                // Check for raw inline
883                if let Some(ref attrs) = attributes
884                    && config.extensions.raw_attribute
885                    && let Some(format) = is_raw_inline(attrs)
886                {
887                    use super::raw_inline::emit_raw_inline;
888                    log::trace!("Matched raw inline span at pos {}: format={}", pos, format);
889                    emit_raw_inline(builder, content, backtick_count, format);
890                } else if !config.extensions.inline_code_attributes && attributes.is_some() {
891                    let code_span_len = backtick_count * 2 + content.len();
892                    emit_code_span(builder, content, backtick_count, None);
893                    pos += code_span_len;
894                    text_start = pos;
895                    continue;
896                } else {
897                    emit_code_span(builder, content, backtick_count, attributes);
898                }
899
900                pos += len;
901                text_start = pos;
902                continue;
903            }
904
905            // Unmatched backtick run.
906            //
907            // CommonMark (and GFM) treat the whole run as literal text — the
908            // run cannot be re-entered as a shorter opener. Pandoc-markdown
909            // instead lets a longer run shadow a shorter one (e.g.
910            // `` ```foo`` `` parses as `` ` `` + ``<code>foo</code>``), so
911            // for the Pandoc dialect we fall through and advance one byte at
912            // a time, allowing the inner run to be tried on a later iteration.
913            if config.dialect == Dialect::CommonMark {
914                let run_len = text[pos..].bytes().take_while(|&b| b == b'`').count();
915                pos += run_len;
916                continue;
917            }
918        }
919
920        // Try textual emoji aliases: :smile:
921        if byte == b':'
922            && config.extensions.emoji
923            && is_emoji_boundary(text, pos)
924            && let Some((len, _alias)) = try_parse_emoji(&text[pos..])
925        {
926            if pos > text_start {
927                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
928            }
929            log::trace!("Matched emoji at pos {}", pos);
930            emit_emoji(builder, &text[pos..pos + len]);
931            pos += len;
932            text_start = pos;
933            continue;
934        }
935
936        // Try inline footnotes: ^[note]. Under Pandoc dialect this is
937        // consumed via the IR's `ConstructPlan` at the top of the loop;
938        // this dispatcher branch only fires for CommonMark dialect with
939        // the extension explicitly enabled.
940        if byte == b'^'
941            && pos + 1 < text.len()
942            && text.as_bytes()[pos + 1] == b'['
943            && config.dialect == Dialect::CommonMark
944            && config.extensions.inline_footnotes
945            && let Some((len, content)) = try_parse_inline_footnote(&text[pos..])
946        {
947            if pos > text_start {
948                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
949            }
950            log::trace!("Matched inline footnote at pos {}", pos);
951            emit_inline_footnote(builder, content, config);
952            pos += len;
953            text_start = pos;
954            continue;
955        }
956
957        // Try superscript: ^text^
958        if byte == b'^'
959            && config.extensions.superscript
960            && let Some((len, content)) = try_parse_superscript(&text[pos..])
961        {
962            if pos > text_start {
963                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
964            }
965            log::trace!("Matched superscript at pos {}", pos);
966            emit_superscript(builder, content, config);
967            pos += len;
968            text_start = pos;
969            continue;
970        }
971
972        // Try bookdown definition: (\#label) or (ref:label)
973        if byte == b'(' && config.extensions.bookdown_references {
974            if let Some((len, label)) = try_parse_bookdown_definition(&text[pos..]) {
975                if pos > text_start {
976                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
977                }
978                log::trace!("Matched bookdown definition at pos {}: {}", pos, label);
979                builder.token(SyntaxKind::TEXT.into(), &text[pos..pos + len]);
980                pos += len;
981                text_start = pos;
982                continue;
983            }
984            if let Some((len, label)) = try_parse_bookdown_text_reference(&text[pos..]) {
985                if pos > text_start {
986                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
987                }
988                log::trace!("Matched bookdown text reference at pos {}: {}", pos, label);
989                builder.token(SyntaxKind::TEXT.into(), &text[pos..pos + len]);
990                pos += len;
991                text_start = pos;
992                continue;
993            }
994        }
995
996        // Try strikeout: ~~text~~
997        // Must run before subscript so `~~text~~` is matched as a single
998        // Strikeout rather than two empty Subscripts. Subscript falls back
999        // to consuming `~~` as an empty subscript only when strikeout
1000        // didn't match (e.g. `~~unclosed`).
1001        if byte == b'~'
1002            && config.extensions.strikeout
1003            && let Some((len, content)) = try_parse_strikeout(&text[pos..])
1004        {
1005            if pos > text_start {
1006                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1007            }
1008            log::trace!("Matched strikeout at pos {}", pos);
1009            emit_strikeout(builder, content, config);
1010            pos += len;
1011            text_start = pos;
1012            continue;
1013        }
1014
1015        // Try subscript: ~text~ or `~~` as empty subscript when strikeout
1016        // didn't match (matches pandoc: `~~unclosed` → `Subscript [] + text`).
1017        if byte == b'~'
1018            && config.extensions.subscript
1019            && let Some((len, content)) = try_parse_subscript(&text[pos..])
1020        {
1021            if pos > text_start {
1022                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1023            }
1024            log::trace!("Matched subscript at pos {}", pos);
1025            emit_subscript(builder, content, config);
1026            pos += len;
1027            text_start = pos;
1028            continue;
1029        }
1030
1031        // Try mark/highlight: ==text==
1032        if byte == b'='
1033            && config.extensions.mark
1034            && let Some((len, content)) = try_parse_mark(&text[pos..])
1035        {
1036            if pos > text_start {
1037                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1038            }
1039            log::trace!("Matched mark at pos {}", pos);
1040            emit_mark(builder, content, config);
1041            pos += len;
1042            text_start = pos;
1043            continue;
1044        }
1045
1046        // Try GFM inline math: $`...`$
1047        if byte == b'$'
1048            && config.extensions.tex_math_gfm
1049            && let Some((len, content)) = try_parse_gfm_inline_math(&text[pos..])
1050        {
1051            if pos > text_start {
1052                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1053            }
1054            log::trace!("Matched GFM inline math at pos {}", pos);
1055            emit_gfm_inline_math(builder, content);
1056            pos += len;
1057            text_start = pos;
1058            continue;
1059        }
1060
1061        // Try math ($...$, $$...$$)
1062        if byte == b'$' && config.extensions.tex_math_dollars {
1063            // Try display math first ($$...$$)
1064            if let Some((len, content)) = try_parse_display_math(&text[pos..]) {
1065                // Emit accumulated text
1066                if pos > text_start {
1067                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1068                }
1069
1070                let dollar_count = text[pos..].chars().take_while(|&c| c == '$').count();
1071                log::trace!(
1072                    "Matched display math at pos {}: {} dollars",
1073                    pos,
1074                    dollar_count
1075                );
1076
1077                // Check for trailing attributes (Quarto cross-reference support).
1078                // The Quarto attribute block sits on the same line as the closing
1079                // `$$`, so scope the lookup to the current line — otherwise
1080                // anything on later lines (e.g. a following `@eq-id` reference)
1081                // makes the segment not end with `}` and the lift no-ops.
1082                let after_math = &text[pos + len..];
1083                let line_end = after_math.find('\n').unwrap_or(after_math.len());
1084                let line_segment = &after_math[..line_end];
1085                let attr_len = if config.extensions.quarto_crossrefs {
1086                    use crate::parser::utils::attributes::try_parse_trailing_attributes;
1087                    if let Some((_attr_block, _)) = try_parse_trailing_attributes(line_segment) {
1088                        let trimmed_after = line_segment.trim_start();
1089                        if let Some(open_brace_pos) = trimmed_after.find('{') {
1090                            let ws_before_brace = line_segment.len() - trimmed_after.len();
1091                            let attr_text_len = trimmed_after[open_brace_pos..]
1092                                .find('}')
1093                                .map(|close| close + 1)
1094                                .unwrap_or(0);
1095                            ws_before_brace + open_brace_pos + attr_text_len
1096                        } else {
1097                            0
1098                        }
1099                    } else {
1100                        0
1101                    }
1102                } else {
1103                    0
1104                };
1105
1106                let total_len = len + attr_len;
1107                emit_display_math(builder, content, dollar_count);
1108
1109                // Emit attributes if present
1110                if attr_len > 0 {
1111                    use crate::parser::utils::attributes::{
1112                        emit_attributes, try_parse_trailing_attributes,
1113                    };
1114                    let attr_text = &text[pos + len..pos + total_len];
1115                    if let Some((attr_block, _text_before)) =
1116                        try_parse_trailing_attributes(attr_text)
1117                    {
1118                        let trimmed_after = attr_text.trim_start();
1119                        let ws_len = attr_text.len() - trimmed_after.len();
1120                        if ws_len > 0 {
1121                            builder.token(SyntaxKind::WHITESPACE.into(), &attr_text[..ws_len]);
1122                        }
1123                        emit_attributes(builder, &attr_block);
1124                    }
1125                }
1126
1127                pos += total_len;
1128                text_start = pos;
1129                continue;
1130            }
1131
1132            // Try inline math ($...$)
1133            if let Some((len, content)) = try_parse_inline_math(&text[pos..]) {
1134                // Emit accumulated text
1135                if pos > text_start {
1136                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1137                }
1138
1139                log::trace!("Matched inline math at pos {}", pos);
1140                emit_inline_math(builder, content);
1141                pos += len;
1142                text_start = pos;
1143                continue;
1144            }
1145
1146            // Neither display nor inline math matched - emit the $ as literal text
1147            // This ensures each $ gets its own TEXT token for CST compatibility
1148            if pos > text_start {
1149                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1150            }
1151            builder.token(SyntaxKind::TEXT.into(), "$");
1152            pos = advance_char_boundary(text, pos, end);
1153            text_start = pos;
1154            continue;
1155        }
1156
1157        // Try autolinks: <url> or <email>
1158        if byte == b'<'
1159            && config.extensions.autolinks
1160            && let Some((len, url)) = try_parse_autolink(
1161                &text[pos..],
1162                config.dialect == crate::options::Dialect::CommonMark,
1163            )
1164        {
1165            if pos > text_start {
1166                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1167            }
1168            log::trace!("Matched autolink at pos {}", pos);
1169            emit_autolink(builder, &text[pos..pos + len], url);
1170            pos += len;
1171            text_start = pos;
1172            continue;
1173        }
1174
1175        if !nested_in_link
1176            && config.extensions.autolink_bare_uris
1177            && let Some((len, url)) = try_parse_bare_uri(&text[pos..])
1178        {
1179            if pos > text_start {
1180                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1181            }
1182            log::trace!("Matched bare URI at pos {}", pos);
1183            emit_bare_uri_link(builder, url, config);
1184            pos += len;
1185            text_start = pos;
1186            continue;
1187        }
1188
1189        // Try native spans: <span>text</span> (after autolink since both
1190        // start with <). Under Pandoc dialect this is consumed via the
1191        // IR's `ConstructPlan` at the top of the loop; this dispatcher
1192        // branch only fires for CommonMark dialect with the extension
1193        // explicitly enabled.
1194        if byte == b'<'
1195            && config.dialect == Dialect::CommonMark
1196            && config.extensions.native_spans
1197            && let Some((len, content, _attributes)) = try_parse_native_span(&text[pos..])
1198        {
1199            if pos > text_start {
1200                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1201            }
1202            log::trace!("Matched native span at pos {}", pos);
1203            emit_native_span(builder, &text[pos..pos + len], content, config);
1204            pos += len;
1205            text_start = pos;
1206            continue;
1207        }
1208
1209        // Try inline raw HTML (CommonMark §6.6 / Pandoc raw_html). Must run
1210        // after autolinks (more specific) and native spans (Pandoc
1211        // <span>…</span> wrapper) since all three start with `<`.
1212        if byte == b'<'
1213            && config.extensions.raw_html
1214            && let Some(len) = try_parse_inline_html(&text[pos..], config.dialect)
1215        {
1216            if pos > text_start {
1217                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1218            }
1219            log::trace!("Matched inline raw HTML at pos {}", pos);
1220            emit_inline_html(builder, &text[pos..pos + len]);
1221            pos += len;
1222            text_start = pos;
1223            continue;
1224        }
1225
1226        // Bracket-starting elements: inline / reference links and
1227        // images are dispatched via the IR-driven arm at the top of
1228        // the loop, gated by the IR's `BracketPlan`. Only dialect-CM-
1229        // specific Pandoc-extension constructs that share the `[...]`
1230        // shape (footnote refs, bracketed citations) need a CM-gated
1231        // dispatcher branch — under Pandoc dialect they're consumed
1232        // via the IR's `ConstructPlan` instead.
1233        if byte == b'['
1234            && config.dialect == Dialect::CommonMark
1235            && config.extensions.footnotes
1236            && let Some((len, id)) = try_parse_footnote_reference(&text[pos..])
1237        {
1238            if pos > text_start {
1239                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1240            }
1241            log::trace!("Matched footnote reference at pos {}", pos);
1242            emit_footnote_reference(builder, &id);
1243            pos += len;
1244            text_start = pos;
1245            continue;
1246        }
1247        if byte == b'['
1248            && config.dialect == Dialect::CommonMark
1249            && config.extensions.citations
1250            && let Some((len, content)) = try_parse_bracketed_citation(&text[pos..])
1251        {
1252            if pos > text_start {
1253                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1254            }
1255            log::trace!("Matched bracketed citation at pos {}", pos);
1256            emit_bracketed_citation(builder, content);
1257            pos += len;
1258            text_start = pos;
1259            continue;
1260        }
1261
1262        // Try bracketed spans: [text]{.class}. Must come after
1263        // links/citations. Under Pandoc dialect this is consumed via
1264        // the IR's `ConstructPlan` at the top of the loop; this
1265        // dispatcher branch only fires for CommonMark dialect with the
1266        // extension explicitly enabled.
1267        if config.dialect == Dialect::CommonMark
1268            && byte == b'['
1269            && config.extensions.bracketed_spans
1270            && let Some((len, text_content, attrs)) = try_parse_bracketed_span(&text[pos..])
1271        {
1272            if pos > text_start {
1273                builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1274            }
1275            log::trace!("Matched bracketed span at pos {}", pos);
1276            emit_bracketed_span(builder, &text_content, &attrs, config);
1277            pos += len;
1278            text_start = pos;
1279            continue;
1280        }
1281
1282        // Try bare citation: @cite (must come after bracketed elements).
1283        // Under Pandoc dialect this is consumed via the IR's
1284        // `ConstructPlan` at the top of the loop; this dispatcher branch
1285        // only fires for CommonMark dialect with the extension
1286        // explicitly enabled.
1287        if config.dialect == Dialect::CommonMark
1288            && byte == b'@'
1289            && (config.extensions.citations || config.extensions.quarto_crossrefs)
1290            && let Some((len, key, has_suppress)) = try_parse_bare_citation(&text[pos..])
1291        {
1292            let is_crossref =
1293                config.extensions.quarto_crossrefs && super::citations::is_quarto_crossref_key(key);
1294            if is_crossref || config.extensions.citations {
1295                if pos > text_start {
1296                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1297                }
1298                if is_crossref {
1299                    log::trace!("Matched Quarto crossref at pos {}: {}", pos, &key);
1300                    super::citations::emit_crossref(builder, key, has_suppress);
1301                } else {
1302                    log::trace!("Matched bare citation at pos {}: {}", pos, &key);
1303                    emit_bare_citation(builder, key, has_suppress);
1304                }
1305                pos += len;
1306                text_start = pos;
1307                continue;
1308            }
1309        }
1310
1311        // Try suppress-author citation: -@cite. Under Pandoc dialect
1312        // this is consumed via the IR's `ConstructPlan` at the top of
1313        // the loop; this dispatcher branch only fires for CommonMark
1314        // dialect with the extension explicitly enabled.
1315        if config.dialect == Dialect::CommonMark
1316            && byte == b'-'
1317            && pos + 1 < text.len()
1318            && text.as_bytes()[pos + 1] == b'@'
1319            && (config.extensions.citations || config.extensions.quarto_crossrefs)
1320            && let Some((len, key, has_suppress)) = try_parse_bare_citation(&text[pos..])
1321        {
1322            let is_crossref =
1323                config.extensions.quarto_crossrefs && super::citations::is_quarto_crossref_key(key);
1324            if is_crossref || config.extensions.citations {
1325                if pos > text_start {
1326                    builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1327                }
1328                if is_crossref {
1329                    log::trace!("Matched Quarto crossref at pos {}: {}", pos, &key);
1330                    super::citations::emit_crossref(builder, key, has_suppress);
1331                } else {
1332                    log::trace!("Matched suppress-author citation at pos {}: {}", pos, &key);
1333                    emit_bare_citation(builder, key, has_suppress);
1334                }
1335                pos += len;
1336                text_start = pos;
1337                continue;
1338            }
1339        }
1340
1341        // Emphasis emission, plan-driven. The IR's emphasis pass has
1342        // already decided every delimiter byte's disposition (open
1343        // marker, close marker, or unmatched literal); consult the
1344        // plan here instead of re-scanning.
1345        if byte == b'*' || byte == b'_' {
1346            match plan.lookup(pos) {
1347                Some(DelimChar::Open {
1348                    len,
1349                    partner,
1350                    partner_len,
1351                    kind,
1352                }) => {
1353                    if pos > text_start {
1354                        builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1355                    }
1356                    let len = len as usize;
1357                    let partner_len = partner_len as usize;
1358                    let (wrapper_kind, marker_kind) = match kind {
1359                        EmphasisKind::Strong => (SyntaxKind::STRONG, SyntaxKind::STRONG_MARKER),
1360                        EmphasisKind::Emph => (SyntaxKind::EMPHASIS, SyntaxKind::EMPHASIS_MARKER),
1361                    };
1362                    builder.start_node(wrapper_kind.into());
1363                    builder.token(marker_kind.into(), &text[pos..pos + len]);
1364                    parse_inline_range_impl(
1365                        text,
1366                        pos + len,
1367                        partner,
1368                        config,
1369                        builder,
1370                        nested_in_link,
1371                        plan,
1372                        bracket_plan,
1373                        construct_plan,
1374                        suppress_inner_links,
1375                        mask,
1376                    );
1377                    builder.token(marker_kind.into(), &text[partner..partner + partner_len]);
1378                    builder.finish_node();
1379                    pos = partner + partner_len;
1380                    text_start = pos;
1381                    continue;
1382                }
1383                Some(DelimChar::Close) => {
1384                    // Defensive: a close should be jumped past by its
1385                    // matching open. If we hit one anyway (e.g. when the
1386                    // outer caller's range starts mid-pair), let it be
1387                    // emitted as part of the surrounding text by simply
1388                    // advancing. text_start stays put so the byte folds
1389                    // into the next TEXT flush.
1390                    pos += 1;
1391                    continue;
1392                }
1393                Some(DelimChar::Literal) | None => {
1394                    // Unmatched delim chars at this position behave as
1395                    // literal text. Don't emit yet — let them coalesce
1396                    // with surrounding plain bytes via the existing
1397                    // text_start flushing so the CST keeps the same TEXT
1398                    // token granularity Pandoc fixtures expect.
1399                    let bytes = text.as_bytes();
1400                    let mut end_pos = pos + 1;
1401                    while end_pos < end && bytes[end_pos] == byte {
1402                        match plan.lookup(end_pos) {
1403                            Some(DelimChar::Literal) | None => end_pos += 1,
1404                            _ => break,
1405                        }
1406                    }
1407                    pos = end_pos;
1408                    continue;
1409                }
1410            }
1411        }
1412
1413        // Check for newlines - may need to emit as hard line break
1414        if byte == b'\r' && pos + 1 < end && text.as_bytes()[pos + 1] == b'\n' {
1415            let text_before = &text[text_start..pos];
1416
1417            // Check for trailing spaces hard line break (always enabled in Pandoc)
1418            let trailing_spaces = text_before.chars().rev().take_while(|&c| c == ' ').count();
1419            if trailing_spaces >= 2 {
1420                // Emit text before the trailing spaces
1421                let text_content = &text_before[..text_before.len() - trailing_spaces];
1422                if !text_content.is_empty() {
1423                    builder.token(SyntaxKind::TEXT.into(), text_content);
1424                }
1425                let spaces = " ".repeat(trailing_spaces);
1426                builder.token(
1427                    SyntaxKind::HARD_LINE_BREAK.into(),
1428                    &format!("{}\r\n", spaces),
1429                );
1430                pos += 2;
1431                text_start = pos;
1432                continue;
1433            }
1434
1435            // hard_line_breaks: treat all single newlines as hard line breaks
1436            if config.extensions.hard_line_breaks {
1437                if !text_before.is_empty() {
1438                    builder.token(SyntaxKind::TEXT.into(), text_before);
1439                }
1440                builder.token(SyntaxKind::HARD_LINE_BREAK.into(), "\r\n");
1441                pos += 2;
1442                text_start = pos;
1443                continue;
1444            }
1445
1446            // Regular newline
1447            if !text_before.is_empty() {
1448                builder.token(SyntaxKind::TEXT.into(), text_before);
1449            }
1450            builder.token(SyntaxKind::NEWLINE.into(), "\r\n");
1451            pos += 2;
1452            text_start = pos;
1453            continue;
1454        }
1455
1456        if byte == b'\n' {
1457            let text_before = &text[text_start..pos];
1458
1459            // Check for trailing spaces hard line break (always enabled in Pandoc)
1460            let trailing_spaces = text_before.chars().rev().take_while(|&c| c == ' ').count();
1461            if trailing_spaces >= 2 {
1462                // Emit text before the trailing spaces
1463                let text_content = &text_before[..text_before.len() - trailing_spaces];
1464                if !text_content.is_empty() {
1465                    builder.token(SyntaxKind::TEXT.into(), text_content);
1466                }
1467                let spaces = " ".repeat(trailing_spaces);
1468                builder.token(SyntaxKind::HARD_LINE_BREAK.into(), &format!("{}\n", spaces));
1469                pos += 1;
1470                text_start = pos;
1471                continue;
1472            }
1473
1474            // hard_line_breaks: treat all single newlines as hard line breaks
1475            if config.extensions.hard_line_breaks {
1476                if !text_before.is_empty() {
1477                    builder.token(SyntaxKind::TEXT.into(), text_before);
1478                }
1479                builder.token(SyntaxKind::HARD_LINE_BREAK.into(), "\n");
1480                pos += 1;
1481                text_start = pos;
1482                continue;
1483            }
1484
1485            // Regular newline
1486            if !text_before.is_empty() {
1487                builder.token(SyntaxKind::TEXT.into(), text_before);
1488            }
1489            builder.token(SyntaxKind::NEWLINE.into(), "\n");
1490            pos += 1;
1491            text_start = pos;
1492            continue;
1493        }
1494
1495        // Regular character, keep accumulating
1496        pos = advance_char_boundary(text, pos, end);
1497    }
1498
1499    // Emit any remaining text
1500    if pos > text_start && text_start < end {
1501        log::trace!("Emitting remaining TEXT: {:?}", &text[text_start..end]);
1502        builder.token(SyntaxKind::TEXT.into(), &text[text_start..end]);
1503    }
1504
1505    log::trace!("parse_inline_range complete: start={}, end={}", start, end);
1506}
1507
1508#[cfg(test)]
1509mod tests {
1510    use super::*;
1511    use crate::syntax::{SyntaxKind, SyntaxNode};
1512    use rowan::GreenNode;
1513
1514    #[test]
1515    fn test_recursive_simple_emphasis() {
1516        let text = "*test*";
1517        let config = ParserOptions::default();
1518        let mut builder = GreenNodeBuilder::new();
1519
1520        parse_inline_text_recursive(&mut builder, text, &config);
1521
1522        let green: GreenNode = builder.finish();
1523        let node = SyntaxNode::new_root(green);
1524
1525        // Should be lossless
1526        assert_eq!(node.text().to_string(), text);
1527
1528        // Should have EMPHASIS node
1529        let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1530        assert!(has_emph, "Should have EMPHASIS node");
1531    }
1532
1533    #[test]
1534    fn test_recursive_nested() {
1535        let text = "*foo **bar** baz*";
1536        let config = ParserOptions::default();
1537        let mut builder = GreenNodeBuilder::new();
1538
1539        // Wrap in a PARAGRAPH node (inline content needs a parent)
1540        builder.start_node(SyntaxKind::PARAGRAPH.into());
1541        parse_inline_text_recursive(&mut builder, text, &config);
1542        builder.finish_node();
1543
1544        let green: GreenNode = builder.finish();
1545        let node = SyntaxNode::new_root(green);
1546
1547        // Should be lossless
1548        assert_eq!(node.text().to_string(), text);
1549
1550        // Should have both EMPHASIS and STRONG
1551        let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1552        let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
1553
1554        assert!(has_emph, "Should have EMPHASIS node");
1555        assert!(has_strong, "Should have STRONG node");
1556    }
1557
1558    /// Test Pandoc's "three" algorithm: ***foo* bar**
1559    /// Expected: Strong[Emph[foo], bar]
1560    #[test]
1561    fn test_triple_emphasis_star_then_double_star() {
1562        use crate::options::ParserOptions;
1563        use crate::syntax::SyntaxNode;
1564        use rowan::GreenNode;
1565
1566        let text = "***foo* bar**";
1567        let config = ParserOptions::default();
1568        let mut builder = GreenNodeBuilder::new();
1569
1570        builder.start_node(SyntaxKind::DOCUMENT.into());
1571        parse_inline_text_recursive(&mut builder, text, &config);
1572        builder.finish_node();
1573
1574        let green: GreenNode = builder.finish();
1575        let node = SyntaxNode::new_root(green);
1576
1577        // Verify losslessness
1578        assert_eq!(node.text().to_string(), text);
1579
1580        // Expected structure: STRONG > EMPH > "foo"
1581        // The STRONG should contain EMPH, not the other way around
1582        let structure = format!("{:#?}", node);
1583
1584        // Should have both STRONG and EMPH
1585        assert!(structure.contains("STRONG"), "Should have STRONG node");
1586        assert!(structure.contains("EMPHASIS"), "Should have EMPHASIS node");
1587
1588        // STRONG should be outer, EMPH should be inner
1589        // Check that STRONG comes before EMPH in tree traversal
1590        let mut found_strong = false;
1591        let mut found_emph_after_strong = false;
1592        for descendant in node.descendants() {
1593            if descendant.kind() == SyntaxKind::STRONG {
1594                found_strong = true;
1595            }
1596            if found_strong && descendant.kind() == SyntaxKind::EMPHASIS {
1597                found_emph_after_strong = true;
1598                break;
1599            }
1600        }
1601
1602        assert!(
1603            found_emph_after_strong,
1604            "EMPH should be inside STRONG, not before it. Current structure:\n{}",
1605            structure
1606        );
1607    }
1608
1609    /// Test Pandoc's "three" algorithm: ***foo** bar*
1610    /// Expected: Emph[Strong[foo], bar]
1611    #[test]
1612    fn test_triple_emphasis_double_star_then_star() {
1613        use crate::options::ParserOptions;
1614        use crate::syntax::SyntaxNode;
1615        use rowan::GreenNode;
1616
1617        let text = "***foo** bar*";
1618        let config = ParserOptions::default();
1619        let mut builder = GreenNodeBuilder::new();
1620
1621        builder.start_node(SyntaxKind::DOCUMENT.into());
1622        parse_inline_text_recursive(&mut builder, text, &config);
1623        builder.finish_node();
1624
1625        let green: GreenNode = builder.finish();
1626        let node = SyntaxNode::new_root(green);
1627
1628        // Verify losslessness
1629        assert_eq!(node.text().to_string(), text);
1630
1631        // Expected structure: EMPH > STRONG > "foo"
1632        let structure = format!("{:#?}", node);
1633
1634        // Should have both EMPH and STRONG
1635        assert!(structure.contains("EMPHASIS"), "Should have EMPHASIS node");
1636        assert!(structure.contains("STRONG"), "Should have STRONG node");
1637
1638        // EMPH should be outer, STRONG should be inner
1639        let mut found_emph = false;
1640        let mut found_strong_after_emph = false;
1641        for descendant in node.descendants() {
1642            if descendant.kind() == SyntaxKind::EMPHASIS {
1643                found_emph = true;
1644            }
1645            if found_emph && descendant.kind() == SyntaxKind::STRONG {
1646                found_strong_after_emph = true;
1647                break;
1648            }
1649        }
1650
1651        assert!(
1652            found_strong_after_emph,
1653            "STRONG should be inside EMPH. Current structure:\n{}",
1654            structure
1655        );
1656    }
1657
1658    /// Test that display math with attributes parses correctly
1659    /// Regression test for equation_attributes_single_line golden test
1660    #[test]
1661    fn test_display_math_with_attributes() {
1662        use crate::options::ParserOptions;
1663        use crate::syntax::SyntaxNode;
1664        use rowan::GreenNode;
1665
1666        let text = "$$ E = mc^2 $$ {#eq-einstein}";
1667        let mut config = ParserOptions::default();
1668        config.extensions.quarto_crossrefs = true; // Enable Quarto cross-references
1669
1670        let mut builder = GreenNodeBuilder::new();
1671        builder.start_node(SyntaxKind::DOCUMENT.into()); // Need a root node
1672
1673        // Parse the whole text
1674        parse_inline_text_recursive(&mut builder, text, &config);
1675
1676        builder.finish_node(); // Finish ROOT
1677        let green: GreenNode = builder.finish();
1678        let node = SyntaxNode::new_root(green);
1679
1680        // Verify losslessness
1681        assert_eq!(node.text().to_string(), text);
1682
1683        // Should have DISPLAY_MATH node
1684        let has_display_math = node
1685            .descendants()
1686            .any(|n| n.kind() == SyntaxKind::DISPLAY_MATH);
1687        assert!(has_display_math, "Should have DISPLAY_MATH node");
1688
1689        // Should have ATTRIBUTE node
1690        let has_attributes = node
1691            .descendants()
1692            .any(|n| n.kind() == SyntaxKind::ATTRIBUTE);
1693        assert!(
1694            has_attributes,
1695            "Should have ATTRIBUTE node for {{#eq-einstein}}"
1696        );
1697
1698        // Attributes should not be TEXT
1699        let math_followed_by_text = node.descendants().any(|n| {
1700            n.kind() == SyntaxKind::DISPLAY_MATH
1701                && n.next_sibling()
1702                    .map(|s| {
1703                        s.kind() == SyntaxKind::TEXT
1704                            && s.text().to_string().contains("{#eq-einstein}")
1705                    })
1706                    .unwrap_or(false)
1707        });
1708        assert!(
1709            !math_followed_by_text,
1710            "Attributes should not be parsed as TEXT"
1711        );
1712    }
1713
1714    #[test]
1715    fn test_parse_inline_text_gfm_inline_link_destination_not_autolinked() {
1716        use crate::options::{Dialect, Extensions, Flavor};
1717
1718        let config = ParserOptions {
1719            flavor: Flavor::Gfm,
1720            dialect: Dialect::for_flavor(Flavor::Gfm),
1721            extensions: Extensions::for_flavor(Flavor::Gfm),
1722            ..ParserOptions::default()
1723        };
1724
1725        let mut builder = GreenNodeBuilder::new();
1726        builder.start_node(SyntaxKind::PARAGRAPH.into());
1727        parse_inline_text_recursive(
1728            &mut builder,
1729            "Second Link [link_text](https://link.com)",
1730            &config,
1731        );
1732        builder.finish_node();
1733        let green = builder.finish();
1734        let root = SyntaxNode::new_root(green);
1735
1736        let links: Vec<_> = root
1737            .descendants()
1738            .filter(|n| n.kind() == SyntaxKind::LINK)
1739            .collect();
1740        assert_eq!(
1741            links.len(),
1742            1,
1743            "Expected exactly one LINK node for inline link, not nested bare URI autolink"
1744        );
1745
1746        let link = links[0].clone();
1747        let mut link_text = None::<String>;
1748        let mut link_dest = None::<String>;
1749
1750        for child in link.children() {
1751            match child.kind() {
1752                SyntaxKind::LINK_TEXT => link_text = Some(child.text().to_string()),
1753                SyntaxKind::LINK_DEST => link_dest = Some(child.text().to_string()),
1754                _ => {}
1755            }
1756        }
1757
1758        assert_eq!(link_text.as_deref(), Some("link_text"));
1759        assert_eq!(link_dest.as_deref(), Some("https://link.com"));
1760    }
1761
1762    #[test]
1763    fn test_autolink_bare_uri_utf8_boundary_safe() {
1764        let text = "§";
1765        let mut config = ParserOptions::default();
1766        config.extensions.autolink_bare_uris = true;
1767        let mut builder = GreenNodeBuilder::new();
1768
1769        builder.start_node(SyntaxKind::DOCUMENT.into());
1770        parse_inline_text_recursive(&mut builder, text, &config);
1771        builder.finish_node();
1772
1773        let green: GreenNode = builder.finish();
1774        let node = SyntaxNode::new_root(green);
1775        assert_eq!(node.text().to_string(), text);
1776    }
1777
1778    #[test]
1779    fn test_parse_emphasis_unicode_content_no_panic() {
1780        let text = "*§*";
1781        let config = ParserOptions::default();
1782        let mut builder = GreenNodeBuilder::new();
1783
1784        builder.start_node(SyntaxKind::PARAGRAPH.into());
1785        parse_inline_text_recursive(&mut builder, text, &config);
1786        builder.finish_node();
1787
1788        let green: GreenNode = builder.finish();
1789        let node = SyntaxNode::new_root(green);
1790        let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1791        assert!(has_emph, "Should have EMPHASIS node");
1792        assert_eq!(node.text().to_string(), text);
1793    }
1794}
1795
1796#[test]
1797fn test_two_with_nested_one_and_triple_closer() {
1798    // **bold with *italic***
1799    // Should parse as: Strong["bold with ", Emph["italic"]]
1800    // The *** at end is parsed as * (closes Emph) + ** (closes Strong)
1801
1802    use crate::options::ParserOptions;
1803    use crate::syntax::SyntaxNode;
1804    use rowan::GreenNode;
1805
1806    let text = "**bold with *italic***";
1807    let config = ParserOptions::default();
1808    let mut builder = GreenNodeBuilder::new();
1809
1810    builder.start_node(SyntaxKind::PARAGRAPH.into());
1811    parse_inline_text_recursive(&mut builder, text, &config);
1812    builder.finish_node();
1813
1814    let green: GreenNode = builder.finish();
1815    let node = SyntaxNode::new_root(green);
1816
1817    assert_eq!(node.text().to_string(), text, "Should be lossless");
1818
1819    let strong_nodes: Vec<_> = node
1820        .descendants()
1821        .filter(|n| n.kind() == SyntaxKind::STRONG)
1822        .collect();
1823    assert_eq!(strong_nodes.len(), 1, "Should have exactly one STRONG node");
1824    let has_emphasis_in_strong = strong_nodes[0]
1825        .descendants()
1826        .any(|n| n.kind() == SyntaxKind::EMPHASIS);
1827    assert!(
1828        has_emphasis_in_strong,
1829        "STRONG should contain EMPHASIS node"
1830    );
1831}
1832
1833#[test]
1834fn test_emphasis_with_trailing_space_before_closer() {
1835    // *foo * should parse as emphasis (Pandoc behavior)
1836    // For asterisks, Pandoc doesn't require right-flanking for closers
1837
1838    use crate::options::ParserOptions;
1839    use crate::syntax::SyntaxNode;
1840    use rowan::GreenNode;
1841
1842    let text = "*foo *";
1843    let config = ParserOptions::default();
1844    let mut builder = GreenNodeBuilder::new();
1845
1846    builder.start_node(SyntaxKind::PARAGRAPH.into());
1847    parse_inline_text_recursive(&mut builder, text, &config);
1848    builder.finish_node();
1849
1850    let green: GreenNode = builder.finish();
1851    let node = SyntaxNode::new_root(green);
1852
1853    let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1854    assert!(has_emph, "Should have EMPHASIS node");
1855    assert_eq!(node.text().to_string(), text);
1856}
1857
1858#[test]
1859fn test_triple_emphasis_all_strong_nested() {
1860    // ***foo** bar **baz*** should parse as Emph[Strong[foo], " bar ", Strong[baz]]
1861    // Pandoc output confirms this
1862
1863    use crate::options::ParserOptions;
1864    use crate::syntax::SyntaxNode;
1865    use rowan::GreenNode;
1866
1867    let text = "***foo** bar **baz***";
1868    let config = ParserOptions::default();
1869    let mut builder = GreenNodeBuilder::new();
1870
1871    builder.start_node(SyntaxKind::DOCUMENT.into());
1872    parse_inline_text_recursive(&mut builder, text, &config);
1873    builder.finish_node();
1874
1875    let green: GreenNode = builder.finish();
1876    let node = SyntaxNode::new_root(green);
1877
1878    // Should have one EMPHASIS node at root
1879    let emphasis_nodes: Vec<_> = node
1880        .descendants()
1881        .filter(|n| n.kind() == SyntaxKind::EMPHASIS)
1882        .collect();
1883    assert_eq!(
1884        emphasis_nodes.len(),
1885        1,
1886        "Should have exactly one EMPHASIS node, found: {}",
1887        emphasis_nodes.len()
1888    );
1889
1890    // EMPHASIS should contain two STRONG nodes
1891    let emphasis_node = emphasis_nodes[0].clone();
1892    let strong_in_emphasis: Vec<_> = emphasis_node
1893        .children()
1894        .filter(|n| n.kind() == SyntaxKind::STRONG)
1895        .collect();
1896    assert_eq!(
1897        strong_in_emphasis.len(),
1898        2,
1899        "EMPHASIS should contain two STRONG nodes, found: {}",
1900        strong_in_emphasis.len()
1901    );
1902
1903    // Verify losslessness
1904    assert_eq!(node.text().to_string(), text);
1905}
1906
1907#[test]
1908fn test_triple_emphasis_all_emph_nested() {
1909    // ***foo* bar *baz*** should parse as Strong[Emph[foo], " bar ", Emph[baz]]
1910    // Pandoc output confirms this
1911
1912    use crate::options::ParserOptions;
1913    use crate::syntax::SyntaxNode;
1914    use rowan::GreenNode;
1915
1916    let text = "***foo* bar *baz***";
1917    let config = ParserOptions::default();
1918    let mut builder = GreenNodeBuilder::new();
1919
1920    builder.start_node(SyntaxKind::DOCUMENT.into());
1921    parse_inline_text_recursive(&mut builder, text, &config);
1922    builder.finish_node();
1923
1924    let green: GreenNode = builder.finish();
1925    let node = SyntaxNode::new_root(green);
1926
1927    // Should have one STRONG node at root
1928    let strong_nodes: Vec<_> = node
1929        .descendants()
1930        .filter(|n| n.kind() == SyntaxKind::STRONG)
1931        .collect();
1932    assert_eq!(
1933        strong_nodes.len(),
1934        1,
1935        "Should have exactly one STRONG node, found: {}",
1936        strong_nodes.len()
1937    );
1938
1939    // STRONG should contain two EMPHASIS nodes
1940    let strong_node = strong_nodes[0].clone();
1941    let emph_in_strong: Vec<_> = strong_node
1942        .children()
1943        .filter(|n| n.kind() == SyntaxKind::EMPHASIS)
1944        .collect();
1945    assert_eq!(
1946        emph_in_strong.len(),
1947        2,
1948        "STRONG should contain two EMPHASIS nodes, found: {}",
1949        emph_in_strong.len()
1950    );
1951
1952    // Verify losslessness
1953    assert_eq!(node.text().to_string(), text);
1954}
1955
1956// Multiline emphasis tests
1957#[test]
1958fn test_parse_emphasis_multiline() {
1959    // Per Pandoc spec, emphasis CAN contain newlines (soft breaks)
1960    use crate::options::ParserOptions;
1961    use crate::syntax::SyntaxNode;
1962    use rowan::GreenNode;
1963
1964    let text = "*text on\nline two*";
1965    let config = ParserOptions::default();
1966    let mut builder = GreenNodeBuilder::new();
1967
1968    builder.start_node(SyntaxKind::PARAGRAPH.into());
1969    parse_inline_text_recursive(&mut builder, text, &config);
1970    builder.finish_node();
1971
1972    let green: GreenNode = builder.finish();
1973    let node = SyntaxNode::new_root(green);
1974
1975    let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1976    assert!(has_emph, "Should have EMPHASIS node");
1977
1978    assert_eq!(node.text().to_string(), text);
1979    assert!(
1980        node.text().to_string().contains('\n'),
1981        "Should preserve newline in emphasis content"
1982    );
1983}
1984
1985#[test]
1986fn test_parse_strong_multiline() {
1987    // Per Pandoc spec, strong emphasis CAN contain newlines
1988    use crate::options::ParserOptions;
1989    use crate::syntax::SyntaxNode;
1990    use rowan::GreenNode;
1991
1992    let text = "**strong on\nline two**";
1993    let config = ParserOptions::default();
1994    let mut builder = GreenNodeBuilder::new();
1995
1996    builder.start_node(SyntaxKind::PARAGRAPH.into());
1997    parse_inline_text_recursive(&mut builder, text, &config);
1998    builder.finish_node();
1999
2000    let green: GreenNode = builder.finish();
2001    let node = SyntaxNode::new_root(green);
2002
2003    let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
2004    assert!(has_strong, "Should have STRONG node");
2005
2006    assert_eq!(node.text().to_string(), text);
2007    assert!(
2008        node.text().to_string().contains('\n'),
2009        "Should preserve newline in strong content"
2010    );
2011}
2012
2013#[test]
2014fn test_parse_triple_emphasis_multiline() {
2015    // Triple emphasis with newlines
2016    use crate::options::ParserOptions;
2017    use crate::syntax::SyntaxNode;
2018    use rowan::GreenNode;
2019
2020    let text = "***both on\nline two***";
2021    let config = ParserOptions::default();
2022    let mut builder = GreenNodeBuilder::new();
2023
2024    builder.start_node(SyntaxKind::PARAGRAPH.into());
2025    parse_inline_text_recursive(&mut builder, text, &config);
2026    builder.finish_node();
2027
2028    let green: GreenNode = builder.finish();
2029    let node = SyntaxNode::new_root(green);
2030
2031    // Should have STRONG node (triple = strong + emph)
2032    let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
2033    assert!(has_strong, "Should have STRONG node");
2034
2035    assert_eq!(node.text().to_string(), text);
2036    assert!(
2037        node.text().to_string().contains('\n'),
2038        "Should preserve newline in triple emphasis content"
2039    );
2040}